diff options
Diffstat (limited to 'drivers/gpu/drm/i915')
266 files changed, 22824 insertions, 11615 deletions
diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index 72a38f28393f..47e845353ffa 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -207,6 +207,8 @@ config DRM_I915_LOW_LEVEL_TRACEPOINTS This provides the ability to precisely monitor engine utilisation and also analyze the request dependency resolving timeline. + Recommended for driver developers only. + If in doubt, say "N". config DRM_I915_DEBUG_VBLANK_EVADE @@ -220,6 +222,8 @@ config DRM_I915_DEBUG_VBLANK_EVADE is exceeded, even if there isn't an actual risk of missing the vblank. + Recommended for driver developers only. + If in doubt, say "N". config DRM_I915_DEBUG_RUNTIME_PM @@ -232,4 +236,6 @@ config DRM_I915_DEBUG_RUNTIME_PM runtime PM functionality. This may introduce overhead during driver loading, suspend and resume operations. + Recommended for driver developers only. + If in doubt, say "N" diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 4f22cac1c49b..642a5b5a1b81 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -38,6 +38,7 @@ i915-y += i915_drv.o \ i915_irq.o \ i915_getparam.o \ i915_mitigations.o \ + i915_module.o \ i915_params.o \ i915_pci.o \ i915_scatterlist.o \ @@ -89,7 +90,6 @@ gt-y += \ gt/gen8_ppgtt.o \ gt/intel_breadcrumbs.o \ gt/intel_context.o \ - gt/intel_context_param.o \ gt/intel_context_sseu.o \ gt/intel_engine_cs.o \ gt/intel_engine_heartbeat.o \ @@ -108,6 +108,7 @@ gt-y += \ gt/intel_gtt.o \ gt/intel_llc.o \ gt/intel_lrc.o \ + gt/intel_migrate.o \ gt/intel_mocs.o \ gt/intel_ppgtt.o \ gt/intel_rc6.o \ @@ -135,7 +136,6 @@ i915-y += $(gt-y) gem-y += \ gem/i915_gem_busy.o \ gem/i915_gem_clflush.o \ - gem/i915_gem_client_blt.o \ gem/i915_gem_context.o \ gem/i915_gem_create.o \ gem/i915_gem_dmabuf.o \ @@ -143,7 +143,6 @@ gem-y += \ gem/i915_gem_execbuffer.o \ gem/i915_gem_internal.o \ gem/i915_gem_object.o \ - gem/i915_gem_object_blt.o \ gem/i915_gem_lmem.o \ gem/i915_gem_mman.o \ gem/i915_gem_pages.o \ @@ -155,21 +154,24 @@ gem-y += \ gem/i915_gem_stolen.o \ gem/i915_gem_throttle.o \ gem/i915_gem_tiling.o \ + gem/i915_gem_ttm.o \ gem/i915_gem_userptr.o \ gem/i915_gem_wait.o \ gem/i915_gemfs.o i915-y += \ $(gem-y) \ i915_active.o \ + i915_buddy.o \ i915_cmd_parser.o \ i915_gem_evict.o \ i915_gem_gtt.o \ + i915_gem_ww.o \ i915_gem.o \ - i915_globals.o \ i915_query.o \ i915_request.o \ i915_scheduler.o \ i915_trace_points.o \ + i915_ttm_buddy_manager.o \ i915_vma.o \ intel_wopcm.o @@ -184,6 +186,8 @@ i915-y += gt/uc/intel_uc.o \ gt/uc/intel_guc_fw.o \ gt/uc/intel_guc_log.o \ gt/uc/intel_guc_log_debugfs.o \ + gt/uc/intel_guc_rc.o \ + gt/uc/intel_guc_slpc.o \ gt/uc/intel_guc_submission.o \ gt/uc/intel_huc.o \ gt/uc/intel_huc_debugfs.o \ @@ -265,6 +269,7 @@ i915-y += \ display/intel_pps.o \ display/intel_qp_tables.o \ display/intel_sdvo.o \ + display/intel_snps_phy.o \ display/intel_tv.o \ display/intel_vdsc.o \ display/intel_vrr.o \ @@ -276,7 +281,9 @@ i915-y += i915_perf.o # Post-mortem debug and GPU hang state capture i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o i915-$(CONFIG_DRM_I915_SELFTEST) += \ + gem/selftests/i915_gem_client_blt.o \ gem/selftests/igt_gem_utils.o \ + selftests/intel_scheduler_helpers.o \ selftests/i915_random.o \ selftests/i915_selftest.o \ selftests/igt_atomic.o \ diff --git a/drivers/gpu/drm/i915/display/i9xx_plane.c b/drivers/gpu/drm/i915/display/i9xx_plane.c index 9643c45a2209..b1439ba78f67 100644 --- a/drivers/gpu/drm/i915/display/i9xx_plane.c +++ b/drivers/gpu/drm/i915/display/i9xx_plane.c @@ -912,7 +912,7 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) zpos = 0; drm_plane_create_zpos_immutable_property(&plane->base, zpos); - drm_plane_helper_add(&plane->base, &intel_plane_helper_funcs); + intel_plane_helper_add(plane); return plane; diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index 16812488c5dd..43ec7fcd3f5d 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -729,8 +729,8 @@ gen11_dsi_configure_transcoder(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); - struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->uapi.crtc); - enum pipe pipe = intel_crtc->pipe; + struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); + enum pipe pipe = crtc->pipe; u32 tmp; enum port port; enum transcoder dsi_trans; @@ -1253,15 +1253,36 @@ static void gen11_dsi_pre_enable(struct intel_atomic_state *state, gen11_dsi_set_transcoder_timings(encoder, pipe_config); } +/* + * Wa_1409054076:icl,jsl,ehl + * When pipe A is disabled and MIPI DSI is enabled on pipe B, + * the AMT KVMR feature will incorrectly see pipe A as enabled. + * Set 0x42080 bit 23=1 before enabling DSI on pipe B and leave + * it set while DSI is enabled on pipe B + */ +static void icl_apply_kvmr_pipe_a_wa(struct intel_encoder *encoder, + enum pipe pipe, bool enable) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + + if (DISPLAY_VER(dev_priv) == 11 && pipe == PIPE_B) + intel_de_rmw(dev_priv, CHICKEN_PAR1_1, + IGNORE_KVMR_PIPE_A, + enable ? IGNORE_KVMR_PIPE_A : 0); +} static void gen11_dsi_enable(struct intel_atomic_state *state, struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); + struct intel_crtc *crtc = to_intel_crtc(conn_state->crtc); drm_WARN_ON(state->base.dev, crtc_state->has_pch_encoder); + /* Wa_1409054076:icl,jsl,ehl */ + icl_apply_kvmr_pipe_a_wa(encoder, crtc->pipe, true); + /* step6d: enable dsi transcoder */ gen11_dsi_enable_transcoder(encoder); @@ -1415,6 +1436,7 @@ static void gen11_dsi_disable(struct intel_atomic_state *state, const struct drm_connector_state *old_conn_state) { struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); + struct intel_crtc *crtc = to_intel_crtc(old_conn_state->crtc); /* step1: turn off backlight */ intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_OFF); @@ -1423,6 +1445,9 @@ static void gen11_dsi_disable(struct intel_atomic_state *state, /* step2d,e: disable transcoder and wait */ gen11_dsi_disable_transcoder(encoder); + /* Wa_1409054076:icl,jsl,ehl */ + icl_apply_kvmr_pipe_a_wa(encoder, crtc->pipe, false); + /* step2f,g: powerdown panel */ gen11_dsi_powerdown_panel(encoder); @@ -1548,6 +1573,22 @@ static void gen11_dsi_get_config(struct intel_encoder *encoder, pipe_config->mode_flags |= I915_MODE_FLAG_DSI_PERIODIC_CMD_MODE; } +static void gen11_dsi_sync_state(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc); + enum pipe pipe = intel_crtc->pipe; + + /* wa verify 1409054076:icl,jsl,ehl */ + if (DISPLAY_VER(dev_priv) == 11 && pipe == PIPE_B && + !(intel_de_read(dev_priv, CHICKEN_PAR1_1) & IGNORE_KVMR_PIPE_A)) + drm_dbg_kms(&dev_priv->drm, + "[ENCODER:%d:%s] BIOS left IGNORE_KVMR_PIPE_A cleared with pipe B enabled\n", + encoder->base.base.id, + encoder->base.name); +} + static int gen11_dsi_dsc_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *crtc_state) { @@ -1966,6 +2007,7 @@ void icl_dsi_init(struct drm_i915_private *dev_priv) encoder->post_disable = gen11_dsi_post_disable; encoder->port = port; encoder->get_config = gen11_dsi_get_config; + encoder->sync_state = gen11_dsi_sync_state; encoder->update_pipe = intel_panel_update_backlight; encoder->compute_config = gen11_dsi_compute_config; encoder->get_hw_state = gen11_dsi_get_hw_state; diff --git a/drivers/gpu/drm/i915/display/intel_acpi.c b/drivers/gpu/drm/i915/display/intel_acpi.c index 833d0c1be4f1..7cfe91fc05f2 100644 --- a/drivers/gpu/drm/i915/display/intel_acpi.c +++ b/drivers/gpu/drm/i915/display/intel_acpi.c @@ -19,6 +19,12 @@ static const guid_t intel_dsm_guid = GUID_INIT(0x7ed873d3, 0xc2d0, 0x4e4f, 0xa8, 0x54, 0x0f, 0x13, 0x17, 0xb0, 0x1c, 0x2c); +#define INTEL_DSM_FN_GET_BIOS_DATA_FUNCS_SUPPORTED 0 /* No args */ + +static const guid_t intel_dsm_guid2 = + GUID_INIT(0x3e5b41c6, 0xeb1d, 0x4260, + 0x9d, 0x15, 0xc7, 0x1f, 0xba, 0xda, 0xe4, 0x14); + static char *intel_dsm_port_name(u8 id) { switch (id) { @@ -176,6 +182,19 @@ void intel_unregister_dsm_handler(void) { } +void intel_dsm_get_bios_data_funcs_supported(struct drm_i915_private *i915) +{ + struct pci_dev *pdev = to_pci_dev(i915->drm.dev); + acpi_handle dhandle; + + dhandle = ACPI_HANDLE(&pdev->dev); + if (!dhandle) + return; + + acpi_evaluate_dsm(dhandle, &intel_dsm_guid2, INTEL_DSM_REVISION_ID, + INTEL_DSM_FN_GET_BIOS_DATA_FUNCS_SUPPORTED, NULL); +} + /* * ACPI Specification, Revision 5.0, Appendix B.3.2 _DOD (Enumerate All Devices * Attached to the Display Adapter). diff --git a/drivers/gpu/drm/i915/display/intel_acpi.h b/drivers/gpu/drm/i915/display/intel_acpi.h index e8b068661d22..9f197401c313 100644 --- a/drivers/gpu/drm/i915/display/intel_acpi.h +++ b/drivers/gpu/drm/i915/display/intel_acpi.h @@ -11,11 +11,14 @@ struct drm_i915_private; #ifdef CONFIG_ACPI void intel_register_dsm_handler(void); void intel_unregister_dsm_handler(void); +void intel_dsm_get_bios_data_funcs_supported(struct drm_i915_private *i915); void intel_acpi_device_id_update(struct drm_i915_private *i915); #else static inline void intel_register_dsm_handler(void) { return; } static inline void intel_unregister_dsm_handler(void) { return; } static inline +void intel_dsm_get_bios_data_funcs_supported(struct drm_i915_private *i915) { return; } +static inline void intel_acpi_device_id_update(struct drm_i915_private *i915) { return; } #endif /* CONFIG_ACPI */ diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.c b/drivers/gpu/drm/i915/display/intel_atomic_plane.c index 36f52a1d7552..47234d898549 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.c @@ -601,7 +601,12 @@ int intel_atomic_plane_check_clipping(struct intel_plane_state *plane_state, return 0; } -const struct drm_plane_helper_funcs intel_plane_helper_funcs = { +static const struct drm_plane_helper_funcs intel_plane_helper_funcs = { .prepare_fb = intel_prepare_plane_fb, .cleanup_fb = intel_cleanup_plane_fb, }; + +void intel_plane_helper_add(struct intel_plane *plane) +{ + drm_plane_helper_add(&plane->base, &intel_plane_helper_funcs); +} diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.h b/drivers/gpu/drm/i915/display/intel_atomic_plane.h index dc4d05e75e1c..62e5a2a77fd4 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic_plane.h +++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.h @@ -17,8 +17,6 @@ struct intel_crtc_state; struct intel_plane; struct intel_plane_state; -extern const struct drm_plane_helper_funcs intel_plane_helper_funcs; - unsigned int intel_adjusted_rate(const struct drm_rect *src, const struct drm_rect *dst, unsigned int rate); @@ -65,5 +63,6 @@ int intel_atomic_plane_check_clipping(struct intel_plane_state *plane_state, bool can_position); void intel_plane_set_invisible(struct intel_crtc_state *crtc_state, struct intel_plane_state *plane_state); +void intel_plane_helper_add(struct intel_plane *plane); #endif /* __INTEL_ATOMIC_PLANE_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c index 5f4f316b3ab5..532237588511 100644 --- a/drivers/gpu/drm/i915/display/intel_audio.c +++ b/drivers/gpu/drm/i915/display/intel_audio.c @@ -1001,7 +1001,7 @@ static unsigned long i915_audio_component_get_power(struct device *kdev) /* Catch potential impedance mismatches before they occur! */ BUILD_BUG_ON(sizeof(intel_wakeref_t) > sizeof(unsigned long)); - ret = intel_display_power_get(dev_priv, POWER_DOMAIN_AUDIO); + ret = intel_display_power_get(dev_priv, POWER_DOMAIN_AUDIO_PLAYBACK); if (dev_priv->audio_power_refcount++ == 0) { if (DISPLAY_VER(dev_priv) >= 9) { @@ -1034,7 +1034,7 @@ static void i915_audio_component_put_power(struct device *kdev, if (IS_GEMINILAKE(dev_priv)) glk_force_audio_cdclk(dev_priv, false); - intel_display_power_put(dev_priv, POWER_DOMAIN_AUDIO, cookie); + intel_display_power_put(dev_priv, POWER_DOMAIN_AUDIO_PLAYBACK, cookie); } static void i915_audio_component_codec_wake_override(struct device *kdev, diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index aa667fa71158..e86e6ed2d3bf 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -1871,12 +1871,12 @@ intel_bios_encoder_supports_edp(const struct intel_bios_encoder_data *devdata) static bool is_port_valid(struct drm_i915_private *i915, enum port port) { /* - * On some ICL/CNL SKUs port F is not present, but broken VBTs mark + * On some ICL SKUs port F is not present, but broken VBTs mark * the port as present. Only try to initialize port F for the * SKUs that may actually have it. */ - if (port == PORT_F && (IS_ICELAKE(i915) || IS_CANNONLAKE(i915))) - return IS_ICL_WITH_PORT_F(i915) || IS_CNL_WITH_PORT_F(i915); + if (port == PORT_F && IS_ICELAKE(i915)) + return IS_ICL_WITH_PORT_F(i915); return true; } @@ -1998,7 +1998,7 @@ static void parse_ddi_port(struct drm_i915_private *i915, "Port %c VBT HDMI boost level: %d\n", port_name(port), hdmi_boost_level); - /* DP max link rate for CNL+ */ + /* DP max link rate for GLK+ */ if (i915->vbt.version >= 216) { if (i915->vbt.version >= 230) info->dp_max_link_rate = parse_bdb_230_dp_max_link_rate(child->dp_max_link_rate); diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index bfb398f0432e..e91e0e0191fb 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -17,12 +17,53 @@ struct intel_qgv_point { u16 dclk, t_rp, t_rdpre, t_rc, t_ras, t_rcd; }; +struct intel_psf_gv_point { + u8 clk; /* clock in multiples of 16.6666 MHz */ +}; + struct intel_qgv_info { struct intel_qgv_point points[I915_NUM_QGV_POINTS]; + struct intel_psf_gv_point psf_points[I915_NUM_PSF_GV_POINTS]; u8 num_points; + u8 num_psf_points; u8 t_bl; }; +static int dg1_mchbar_read_qgv_point_info(struct drm_i915_private *dev_priv, + struct intel_qgv_point *sp, + int point) +{ + u32 dclk_ratio, dclk_reference; + u32 val; + + val = intel_uncore_read(&dev_priv->uncore, SA_PERF_STATUS_0_0_0_MCHBAR_PC); + dclk_ratio = REG_FIELD_GET(DG1_QCLK_RATIO_MASK, val); + if (val & DG1_QCLK_REFERENCE) + dclk_reference = 6; /* 6 * 16.666 MHz = 100 MHz */ + else + dclk_reference = 8; /* 8 * 16.666 MHz = 133 MHz */ + sp->dclk = dclk_ratio * dclk_reference; + + val = intel_uncore_read(&dev_priv->uncore, SKL_MC_BIOS_DATA_0_0_0_MCHBAR_PCU); + if (val & DG1_GEAR_TYPE) + sp->dclk *= 2; + + if (sp->dclk == 0) + return -EINVAL; + + val = intel_uncore_read(&dev_priv->uncore, MCHBAR_CH0_CR_TC_PRE_0_0_0_MCHBAR); + sp->t_rp = REG_FIELD_GET(DG1_DRAM_T_RP_MASK, val); + sp->t_rdpre = REG_FIELD_GET(DG1_DRAM_T_RDPRE_MASK, val); + + val = intel_uncore_read(&dev_priv->uncore, MCHBAR_CH0_CR_TC_PRE_0_0_0_MCHBAR_HIGH); + sp->t_rcd = REG_FIELD_GET(DG1_DRAM_T_RCD_MASK, val); + sp->t_ras = REG_FIELD_GET(DG1_DRAM_T_RAS_MASK, val); + + sp->t_rc = sp->t_rp + sp->t_ras; + + return 0; +} + static int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv, struct intel_qgv_point *sp, int point) @@ -49,6 +90,28 @@ static int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv, return 0; } +static int adls_pcode_read_psf_gv_point_info(struct drm_i915_private *dev_priv, + struct intel_psf_gv_point *points) +{ + u32 val = 0; + int ret; + int i; + + ret = sandybridge_pcode_read(dev_priv, + ICL_PCODE_MEM_SUBSYSYSTEM_INFO | + ADL_PCODE_MEM_SS_READ_PSF_GV_INFO, + &val, NULL); + if (ret) + return ret; + + for (i = 0; i < I915_NUM_PSF_GV_POINTS; i++) { + points[i].clk = val & 0xff; + val >>= 8; + } + + return 0; +} + int icl_pcode_restrict_qgv_points(struct drm_i915_private *dev_priv, u32 points_mask) { @@ -62,7 +125,7 @@ int icl_pcode_restrict_qgv_points(struct drm_i915_private *dev_priv, 1); if (ret < 0) { - drm_err(&dev_priv->drm, "Failed to disable qgv points (%d)\n", ret); + drm_err(&dev_priv->drm, "Failed to disable qgv points (%d) points: 0x%x\n", ret, points_mask); return ret; } @@ -76,6 +139,7 @@ static int icl_get_qgv_points(struct drm_i915_private *dev_priv, int i, ret; qi->num_points = dram_info->num_qgv_points; + qi->num_psf_points = dram_info->num_psf_gv_points; if (DISPLAY_VER(dev_priv) == 12) switch (dram_info->type) { @@ -99,7 +163,11 @@ static int icl_get_qgv_points(struct drm_i915_private *dev_priv, for (i = 0; i < qi->num_points; i++) { struct intel_qgv_point *sp = &qi->points[i]; - ret = icl_pcode_read_qgv_point_info(dev_priv, sp, i); + if (IS_DG1(dev_priv)) + ret = dg1_mchbar_read_qgv_point_info(dev_priv, sp, i); + else + ret = icl_pcode_read_qgv_point_info(dev_priv, sp, i); + if (ret) return ret; @@ -109,6 +177,19 @@ static int icl_get_qgv_points(struct drm_i915_private *dev_priv, sp->t_rcd, sp->t_rc); } + if (qi->num_psf_points > 0) { + ret = adls_pcode_read_psf_gv_point_info(dev_priv, qi->psf_points); + if (ret) { + drm_err(&dev_priv->drm, "Failed to read PSF point data; PSF points will not be considered in bandwidth calculations.\n"); + qi->num_psf_points = 0; + } + + for (i = 0; i < qi->num_psf_points; i++) + drm_dbg_kms(&dev_priv->drm, + "PSF GV %d: CLK=%d \n", + i, qi->psf_points[i].clk); + } + return 0; } @@ -118,6 +199,16 @@ static int icl_calc_bw(int dclk, int num, int den) return DIV_ROUND_CLOSEST(num * dclk * 100, den * 6); } +static int adl_calc_psf_bw(int clk) +{ + /* + * clk is multiples of 16.666MHz (100/6) + * According to BSpec PSF GV bandwidth is + * calculated as BW = 64 * clk * 16.666Mhz + */ + return DIV_ROUND_CLOSEST(64 * clk * 100, 6); +} + static int icl_sagv_max_dclk(const struct intel_qgv_info *qi) { u16 dclk = 0; @@ -194,6 +285,7 @@ static int icl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1; bi->num_qgv_points = qi.num_points; + bi->num_psf_gv_points = qi.num_psf_points; for (j = 0; j < qi.num_points; j++) { const struct intel_qgv_point *sp = &qi.points[j]; @@ -217,6 +309,16 @@ static int icl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel i, j, bi->num_planes, bi->deratedbw[j]); } + for (j = 0; j < qi.num_psf_points; j++) { + const struct intel_psf_gv_point *sp = &qi.psf_points[j]; + + bi->psf_bw[j] = adl_calc_psf_bw(sp->clk); + + drm_dbg_kms(&dev_priv->drm, + "BW%d / PSF GV %d: num_planes=%d bw=%u\n", + i, j, bi->num_planes, bi->psf_bw[j]); + } + if (bi->num_planes == 1) break; } @@ -234,6 +336,26 @@ static int icl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel return 0; } +static void dg2_get_bw_info(struct drm_i915_private *i915) +{ + struct intel_bw_info *bi = &i915->max_bw[0]; + + /* + * DG2 doesn't have SAGV or QGV points, just a constant max bandwidth + * that doesn't depend on the number of planes enabled. Create a + * single dummy QGV point to reflect that. DG2-G10 platforms have a + * constant 50 GB/s bandwidth, whereas DG2-G11 platforms have 38 GB/s. + */ + bi->num_planes = 1; + bi->num_qgv_points = 1; + if (IS_DG2_G11(i915)) + bi->deratedbw[0] = 38000; + else + bi->deratedbw[0] = 50000; + + i915->sagv_status = I915_SAGV_NOT_CONTROLLED; +} + static unsigned int icl_max_bw(struct drm_i915_private *dev_priv, int num_planes, int qgv_point) { @@ -262,12 +384,23 @@ static unsigned int icl_max_bw(struct drm_i915_private *dev_priv, return 0; } +static unsigned int adl_psf_bw(struct drm_i915_private *dev_priv, + int psf_gv_point) +{ + const struct intel_bw_info *bi = + &dev_priv->max_bw[0]; + + return bi->psf_bw[psf_gv_point]; +} + void intel_bw_init_hw(struct drm_i915_private *dev_priv) { if (!HAS_DISPLAY(dev_priv)) return; - if (IS_ALDERLAKE_S(dev_priv) || IS_ALDERLAKE_P(dev_priv)) + if (IS_DG2(dev_priv)) + dg2_get_bw_info(dev_priv); + else if (IS_ALDERLAKE_S(dev_priv) || IS_ALDERLAKE_P(dev_priv)) icl_get_bw_info(dev_priv, &adls_sa_info); else if (IS_ROCKETLAKE(dev_priv)) icl_get_bw_info(dev_priv, &rkl_sa_info); @@ -534,12 +667,24 @@ int intel_bw_atomic_check(struct intel_atomic_state *state) u32 allowed_points = 0; unsigned int max_bw_point = 0, max_bw = 0; unsigned int num_qgv_points = dev_priv->max_bw[0].num_qgv_points; - u32 mask = (1 << num_qgv_points) - 1; + unsigned int num_psf_gv_points = dev_priv->max_bw[0].num_psf_gv_points; + u32 mask = 0; /* FIXME earlier gens need some checks too */ if (DISPLAY_VER(dev_priv) < 11) return 0; + /* + * We can _not_ use the whole ADLS_QGV_PT_MASK here, as PCode rejects + * it with failure if we try masking any unadvertised points. + * So need to operate only with those returned from PCode. + */ + if (num_qgv_points > 0) + mask |= REG_GENMASK(num_qgv_points - 1, 0); + + if (num_psf_gv_points > 0) + mask |= REG_GENMASK(num_psf_gv_points - 1, 0) << ADLS_PSF_PT_SHIFT; + for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { unsigned int old_data_rate = @@ -602,23 +747,44 @@ int intel_bw_atomic_check(struct intel_atomic_state *state) max_bw = max_data_rate; } if (max_data_rate >= data_rate) - allowed_points |= BIT(i); + allowed_points |= REG_FIELD_PREP(ADLS_QGV_PT_MASK, BIT(i)); + drm_dbg_kms(&dev_priv->drm, "QGV point %d: max bw %d required %d\n", i, max_data_rate, data_rate); } + for (i = 0; i < num_psf_gv_points; i++) { + unsigned int max_data_rate = adl_psf_bw(dev_priv, i); + + if (max_data_rate >= data_rate) + allowed_points |= REG_FIELD_PREP(ADLS_PSF_PT_MASK, BIT(i)); + + drm_dbg_kms(&dev_priv->drm, "PSF GV point %d: max bw %d" + " required %d\n", + i, max_data_rate, data_rate); + } + /* * BSpec states that we always should have at least one allowed point * left, so if we couldn't - simply reject the configuration for obvious * reasons. */ - if (allowed_points == 0) { + if ((allowed_points & ADLS_QGV_PT_MASK) == 0) { drm_dbg_kms(&dev_priv->drm, "No QGV points provide sufficient memory" " bandwidth %d for display configuration(%d active planes).\n", data_rate, num_active_planes); return -EINVAL; } + if (num_psf_gv_points > 0) { + if ((allowed_points & ADLS_PSF_PT_MASK) == 0) { + drm_dbg_kms(&dev_priv->drm, "No PSF GV points provide sufficient memory" + " bandwidth %d for display configuration(%d active planes).\n", + data_rate, num_active_planes); + return -EINVAL; + } + } + /* * Leave only single point with highest bandwidth, if * we can't enable SAGV due to the increased memory latency it may diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 613ffcc68eba..34fa4130d5c4 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -1195,17 +1195,6 @@ static const struct intel_cdclk_vals glk_cdclk_table[] = { {} }; -static const struct intel_cdclk_vals cnl_cdclk_table[] = { - { .refclk = 19200, .cdclk = 168000, .divider = 4, .ratio = 35 }, - { .refclk = 19200, .cdclk = 336000, .divider = 2, .ratio = 35 }, - { .refclk = 19200, .cdclk = 528000, .divider = 2, .ratio = 55 }, - - { .refclk = 24000, .cdclk = 168000, .divider = 4, .ratio = 28 }, - { .refclk = 24000, .cdclk = 336000, .divider = 2, .ratio = 28 }, - { .refclk = 24000, .cdclk = 528000, .divider = 2, .ratio = 44 }, - {} -}; - static const struct intel_cdclk_vals icl_cdclk_table[] = { { .refclk = 19200, .cdclk = 172800, .divider = 2, .ratio = 18 }, { .refclk = 19200, .cdclk = 192000, .divider = 2, .ratio = 20 }, @@ -1290,6 +1279,16 @@ static const struct intel_cdclk_vals adlp_cdclk_table[] = { {} }; +static const struct intel_cdclk_vals dg2_cdclk_table[] = { + { .refclk = 38400, .cdclk = 172800, .divider = 2, .ratio = 9 }, + { .refclk = 38400, .cdclk = 192000, .divider = 2, .ratio = 10 }, + { .refclk = 38400, .cdclk = 307200, .divider = 2, .ratio = 16 }, + { .refclk = 38400, .cdclk = 326400, .divider = 4, .ratio = 34 }, + { .refclk = 38400, .cdclk = 556800, .divider = 2, .ratio = 29 }, + { .refclk = 38400, .cdclk = 652800, .divider = 2, .ratio = 34 }, + {} +}; + static int bxt_calc_cdclk(struct drm_i915_private *dev_priv, int min_cdclk) { const struct intel_cdclk_vals *table = dev_priv->cdclk.table; @@ -1329,16 +1328,6 @@ static u8 bxt_calc_voltage_level(int cdclk) return DIV_ROUND_UP(cdclk, 25000); } -static u8 cnl_calc_voltage_level(int cdclk) -{ - if (cdclk > 336000) - return 2; - else if (cdclk > 168000) - return 1; - else - return 0; -} - static u8 icl_calc_voltage_level(int cdclk) { if (cdclk > 556800) @@ -1373,15 +1362,6 @@ static u8 tgl_calc_voltage_level(int cdclk) return 0; } -static void cnl_readout_refclk(struct drm_i915_private *dev_priv, - struct intel_cdclk_config *cdclk_config) -{ - if (intel_de_read(dev_priv, SKL_DSSM) & CNL_DSSM_CDCLK_PLL_REFCLK_24MHz) - cdclk_config->ref = 24000; - else - cdclk_config->ref = 19200; -} - static void icl_readout_refclk(struct drm_i915_private *dev_priv, struct intel_cdclk_config *cdclk_config) { @@ -1408,10 +1388,10 @@ static void bxt_de_pll_readout(struct drm_i915_private *dev_priv, { u32 val, ratio; - if (DISPLAY_VER(dev_priv) >= 11) + if (IS_DG2(dev_priv)) + cdclk_config->ref = 38400; + else if (DISPLAY_VER(dev_priv) >= 11) icl_readout_refclk(dev_priv, cdclk_config); - else if (IS_CANNONLAKE(dev_priv)) - cnl_readout_refclk(dev_priv, cdclk_config); else cdclk_config->ref = 19200; @@ -1427,11 +1407,11 @@ static void bxt_de_pll_readout(struct drm_i915_private *dev_priv, } /* - * CNL+ have the ratio directly in the PLL enable register, gen9lp had - * it in a separate PLL control register. + * DISPLAY_VER >= 11 have the ratio directly in the PLL enable register, + * gen9lp had it in a separate PLL control register. */ - if (DISPLAY_VER(dev_priv) >= 11 || IS_CANNONLAKE(dev_priv)) - ratio = val & CNL_CDCLK_PLL_RATIO_MASK; + if (DISPLAY_VER(dev_priv) >= 11) + ratio = val & ICL_CDCLK_PLL_RATIO_MASK; else ratio = intel_de_read(dev_priv, BXT_DE_PLL_CTL) & BXT_DE_PLL_RATIO_MASK; @@ -1518,7 +1498,7 @@ static void bxt_de_pll_enable(struct drm_i915_private *dev_priv, int vco) dev_priv->cdclk.hw.vco = vco; } -static void cnl_cdclk_pll_disable(struct drm_i915_private *dev_priv) +static void icl_cdclk_pll_disable(struct drm_i915_private *dev_priv) { intel_de_rmw(dev_priv, BXT_DE_PLL_ENABLE, BXT_DE_PLL_PLL_ENABLE, 0); @@ -1530,12 +1510,12 @@ static void cnl_cdclk_pll_disable(struct drm_i915_private *dev_priv) dev_priv->cdclk.hw.vco = 0; } -static void cnl_cdclk_pll_enable(struct drm_i915_private *dev_priv, int vco) +static void icl_cdclk_pll_enable(struct drm_i915_private *dev_priv, int vco) { int ratio = DIV_ROUND_CLOSEST(vco, dev_priv->cdclk.hw.ref); u32 val; - val = CNL_CDCLK_PLL_RATIO(ratio); + val = ICL_CDCLK_PLL_RATIO(ratio); intel_de_write(dev_priv, BXT_DE_PLL_ENABLE, val); val |= BXT_DE_PLL_PLL_ENABLE; @@ -1548,18 +1528,13 @@ static void cnl_cdclk_pll_enable(struct drm_i915_private *dev_priv, int vco) dev_priv->cdclk.hw.vco = vco; } -static bool has_cdclk_crawl(struct drm_i915_private *i915) -{ - return INTEL_INFO(i915)->has_cdclk_crawl; -} - static void adlp_cdclk_pll_crawl(struct drm_i915_private *dev_priv, int vco) { int ratio = DIV_ROUND_CLOSEST(vco, dev_priv->cdclk.hw.ref); u32 val; /* Write PLL ratio without disabling */ - val = CNL_CDCLK_PLL_RATIO(ratio) | BXT_DE_PLL_PLL_ENABLE; + val = ICL_CDCLK_PLL_RATIO(ratio) | BXT_DE_PLL_PLL_ENABLE; intel_de_write(dev_priv, BXT_DE_PLL_ENABLE, val); /* Submit freq change request */ @@ -1628,7 +1603,7 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, int ret; /* Inform power controller of upcoming frequency change. */ - if (DISPLAY_VER(dev_priv) >= 11 || IS_CANNONLAKE(dev_priv)) + if (DISPLAY_VER(dev_priv) >= 11) ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, SKL_CDCLK_PREPARE_FOR_CHANGE, SKL_CDCLK_READY_FOR_CHANGE, @@ -1649,16 +1624,16 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, return; } - if (has_cdclk_crawl(dev_priv) && dev_priv->cdclk.hw.vco > 0 && vco > 0) { + if (HAS_CDCLK_CRAWL(dev_priv) && dev_priv->cdclk.hw.vco > 0 && vco > 0) { if (dev_priv->cdclk.hw.vco != vco) adlp_cdclk_pll_crawl(dev_priv, vco); - } else if (DISPLAY_VER(dev_priv) >= 11 || IS_CANNONLAKE(dev_priv)) { + } else if (DISPLAY_VER(dev_priv) >= 11) { if (dev_priv->cdclk.hw.vco != 0 && dev_priv->cdclk.hw.vco != vco) - cnl_cdclk_pll_disable(dev_priv); + icl_cdclk_pll_disable(dev_priv); if (dev_priv->cdclk.hw.vco != vco) - cnl_cdclk_pll_enable(dev_priv, vco); + icl_cdclk_pll_enable(dev_priv, vco); } else { if (dev_priv->cdclk.hw.vco != 0 && dev_priv->cdclk.hw.vco != vco) @@ -1684,7 +1659,7 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, if (pipe != INVALID_PIPE) intel_wait_for_vblank(dev_priv, pipe); - if (DISPLAY_VER(dev_priv) >= 11 || IS_CANNONLAKE(dev_priv)) { + if (DISPLAY_VER(dev_priv) >= 11) { ret = sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, cdclk_config->voltage_level); } else { @@ -1709,7 +1684,7 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, intel_update_cdclk(dev_priv); - if (DISPLAY_VER(dev_priv) >= 11 || IS_CANNONLAKE(dev_priv)) + if (DISPLAY_VER(dev_priv) >= 11) /* * Can't read out the voltage level :( * Let's just assume everything is as expected. @@ -1857,7 +1832,7 @@ static bool intel_cdclk_can_crawl(struct drm_i915_private *dev_priv, { int a_div, b_div; - if (!has_cdclk_crawl(dev_priv)) + if (!HAS_CDCLK_CRAWL(dev_priv)) return false; /* @@ -2118,7 +2093,7 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state) crtc_state->port_clock >= 540000 && crtc_state->lane_count == 4) { if (DISPLAY_VER(dev_priv) == 10) { - /* Display WA #1145: glk,cnl */ + /* Display WA #1145: glk */ min_cdclk = max(316800, min_cdclk); } else if (DISPLAY_VER(dev_priv) == 9 || IS_BROADWELL(dev_priv)) { /* Display WA #1144: skl,bxt */ @@ -2239,7 +2214,7 @@ static int intel_compute_min_cdclk(struct intel_cdclk_state *cdclk_state) /* * Account for port clock min voltage level requirements. - * This only really does something on CNL+ but can be + * This only really does something on DISPLA_VER >= 11 but can be * called on earlier platforms as well. * * Note that this functions assumes that 0 is @@ -2653,8 +2628,6 @@ void intel_update_max_cdclk(struct drm_i915_private *dev_priv) dev_priv->max_cdclk_freq = 648000; else dev_priv->max_cdclk_freq = 652800; - } else if (IS_CANNONLAKE(dev_priv)) { - dev_priv->max_cdclk_freq = 528000; } else if (IS_GEMINILAKE(dev_priv)) { dev_priv->max_cdclk_freq = 316800; } else if (IS_BROXTON(dev_priv)) { @@ -2878,13 +2851,19 @@ u32 intel_read_rawclk(struct drm_i915_private *dev_priv) */ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv) { - if (IS_ALDERLAKE_P(dev_priv)) { + if (IS_DG2(dev_priv)) { + dev_priv->display.set_cdclk = bxt_set_cdclk; + dev_priv->display.bw_calc_min_cdclk = skl_bw_calc_min_cdclk; + dev_priv->display.modeset_calc_cdclk = bxt_modeset_calc_cdclk; + dev_priv->display.calc_voltage_level = tgl_calc_voltage_level; + dev_priv->cdclk.table = dg2_cdclk_table; + } else if (IS_ALDERLAKE_P(dev_priv)) { dev_priv->display.set_cdclk = bxt_set_cdclk; dev_priv->display.bw_calc_min_cdclk = skl_bw_calc_min_cdclk; dev_priv->display.modeset_calc_cdclk = bxt_modeset_calc_cdclk; dev_priv->display.calc_voltage_level = tgl_calc_voltage_level; - /* Wa_22011320316:adlp[a0] */ - if (IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_A0)) + /* Wa_22011320316:adl-p[a0] */ + if (IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0)) dev_priv->cdclk.table = adlp_a_step_cdclk_table; else dev_priv->cdclk.table = adlp_cdclk_table; @@ -2912,12 +2891,6 @@ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv) dev_priv->display.modeset_calc_cdclk = bxt_modeset_calc_cdclk; dev_priv->display.calc_voltage_level = icl_calc_voltage_level; dev_priv->cdclk.table = icl_cdclk_table; - } else if (IS_CANNONLAKE(dev_priv)) { - dev_priv->display.bw_calc_min_cdclk = skl_bw_calc_min_cdclk; - dev_priv->display.set_cdclk = bxt_set_cdclk; - dev_priv->display.modeset_calc_cdclk = bxt_modeset_calc_cdclk; - dev_priv->display.calc_voltage_level = cnl_calc_voltage_level; - dev_priv->cdclk.table = cnl_cdclk_table; } else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { dev_priv->display.bw_calc_min_cdclk = skl_bw_calc_min_cdclk; dev_priv->display.set_cdclk = bxt_set_cdclk; diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index dab892d2251b..afcb4bf3826c 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -305,13 +305,12 @@ static void ilk_load_csc_matrix(const struct intel_crtc_state *crtc_state) ilk_csc_postoff_limited_range); } else if (crtc_state->csc_enable) { /* - * On GLK+ both pipe CSC and degamma LUT are controlled + * On GLK both pipe CSC and degamma LUT are controlled * by csc_enable. Hence for the cases where the degama * LUT is needed but CSC is not we need to load an * identity matrix. */ - drm_WARN_ON(&dev_priv->drm, !IS_CANNONLAKE(dev_priv) && - !IS_GEMINILAKE(dev_priv)); + drm_WARN_ON(&dev_priv->drm, !IS_GEMINILAKE(dev_priv)); ilk_update_pipe_csc(crtc, ilk_csc_off_zero, ilk_csc_coeff_identity, diff --git a/drivers/gpu/drm/i915/display/intel_combo_phy.c b/drivers/gpu/drm/i915/display/intel_combo_phy.c index 487c54cd5982..bacdf8a16bcb 100644 --- a/drivers/gpu/drm/i915/display/intel_combo_phy.c +++ b/drivers/gpu/drm/i915/display/intel_combo_phy.c @@ -23,9 +23,9 @@ enum { PROCMON_1_05V_DOT_1, }; -static const struct cnl_procmon { +static const struct icl_procmon { u32 dw1, dw9, dw10; -} cnl_procmon_values[] = { +} icl_procmon_values[] = { [PROCMON_0_85V_DOT_0] = { .dw1 = 0x00000000, .dw9 = 0x62AB67BB, .dw10 = 0x51914F96, }, [PROCMON_0_95V_DOT_0] = @@ -38,15 +38,10 @@ static const struct cnl_procmon { { .dw1 = 0x00440000, .dw9 = 0x9A00AB25, .dw10 = 0x8AE38FF1, }, }; -/* - * CNL has just one set of registers, while gen11 has a set for each combo PHY. - * The CNL registers are equivalent to the gen11 PHY A registers, that's why we - * call the ICL macros even though the function has CNL on its name. - */ -static const struct cnl_procmon * -cnl_get_procmon_ref_values(struct drm_i915_private *dev_priv, enum phy phy) +static const struct icl_procmon * +icl_get_procmon_ref_values(struct drm_i915_private *dev_priv, enum phy phy) { - const struct cnl_procmon *procmon; + const struct icl_procmon *procmon; u32 val; val = intel_de_read(dev_priv, ICL_PORT_COMP_DW3(phy)); @@ -55,32 +50,32 @@ cnl_get_procmon_ref_values(struct drm_i915_private *dev_priv, enum phy phy) MISSING_CASE(val); fallthrough; case VOLTAGE_INFO_0_85V | PROCESS_INFO_DOT_0: - procmon = &cnl_procmon_values[PROCMON_0_85V_DOT_0]; + procmon = &icl_procmon_values[PROCMON_0_85V_DOT_0]; break; case VOLTAGE_INFO_0_95V | PROCESS_INFO_DOT_0: - procmon = &cnl_procmon_values[PROCMON_0_95V_DOT_0]; + procmon = &icl_procmon_values[PROCMON_0_95V_DOT_0]; break; case VOLTAGE_INFO_0_95V | PROCESS_INFO_DOT_1: - procmon = &cnl_procmon_values[PROCMON_0_95V_DOT_1]; + procmon = &icl_procmon_values[PROCMON_0_95V_DOT_1]; break; case VOLTAGE_INFO_1_05V | PROCESS_INFO_DOT_0: - procmon = &cnl_procmon_values[PROCMON_1_05V_DOT_0]; + procmon = &icl_procmon_values[PROCMON_1_05V_DOT_0]; break; case VOLTAGE_INFO_1_05V | PROCESS_INFO_DOT_1: - procmon = &cnl_procmon_values[PROCMON_1_05V_DOT_1]; + procmon = &icl_procmon_values[PROCMON_1_05V_DOT_1]; break; } return procmon; } -static void cnl_set_procmon_ref_values(struct drm_i915_private *dev_priv, +static void icl_set_procmon_ref_values(struct drm_i915_private *dev_priv, enum phy phy) { - const struct cnl_procmon *procmon; + const struct icl_procmon *procmon; u32 val; - procmon = cnl_get_procmon_ref_values(dev_priv, phy); + procmon = icl_get_procmon_ref_values(dev_priv, phy); val = intel_de_read(dev_priv, ICL_PORT_COMP_DW1(phy)); val &= ~((0xff << 16) | 0xff); @@ -109,13 +104,13 @@ static bool check_phy_reg(struct drm_i915_private *dev_priv, return true; } -static bool cnl_verify_procmon_ref_values(struct drm_i915_private *dev_priv, +static bool icl_verify_procmon_ref_values(struct drm_i915_private *dev_priv, enum phy phy) { - const struct cnl_procmon *procmon; + const struct icl_procmon *procmon; bool ret; - procmon = cnl_get_procmon_ref_values(dev_priv, phy); + procmon = icl_get_procmon_ref_values(dev_priv, phy); ret = check_phy_reg(dev_priv, phy, ICL_PORT_COMP_DW1(phy), (0xff << 16) | 0xff, procmon->dw1); @@ -127,61 +122,6 @@ static bool cnl_verify_procmon_ref_values(struct drm_i915_private *dev_priv, return ret; } -static bool cnl_combo_phy_enabled(struct drm_i915_private *dev_priv) -{ - return !(intel_de_read(dev_priv, CHICKEN_MISC_2) & CNL_COMP_PWR_DOWN) && - (intel_de_read(dev_priv, CNL_PORT_COMP_DW0) & COMP_INIT); -} - -static bool cnl_combo_phy_verify_state(struct drm_i915_private *dev_priv) -{ - enum phy phy = PHY_A; - bool ret; - - if (!cnl_combo_phy_enabled(dev_priv)) - return false; - - ret = cnl_verify_procmon_ref_values(dev_priv, phy); - - ret &= check_phy_reg(dev_priv, phy, CNL_PORT_CL1CM_DW5, - CL_POWER_DOWN_ENABLE, CL_POWER_DOWN_ENABLE); - - return ret; -} - -static void cnl_combo_phys_init(struct drm_i915_private *dev_priv) -{ - u32 val; - - val = intel_de_read(dev_priv, CHICKEN_MISC_2); - val &= ~CNL_COMP_PWR_DOWN; - intel_de_write(dev_priv, CHICKEN_MISC_2, val); - - /* Dummy PORT_A to get the correct CNL register from the ICL macro */ - cnl_set_procmon_ref_values(dev_priv, PHY_A); - - val = intel_de_read(dev_priv, CNL_PORT_COMP_DW0); - val |= COMP_INIT; - intel_de_write(dev_priv, CNL_PORT_COMP_DW0, val); - - val = intel_de_read(dev_priv, CNL_PORT_CL1CM_DW5); - val |= CL_POWER_DOWN_ENABLE; - intel_de_write(dev_priv, CNL_PORT_CL1CM_DW5, val); -} - -static void cnl_combo_phys_uninit(struct drm_i915_private *dev_priv) -{ - u32 val; - - if (!cnl_combo_phy_verify_state(dev_priv)) - drm_warn(&dev_priv->drm, - "Combo PHY HW state changed unexpectedly.\n"); - - val = intel_de_read(dev_priv, CHICKEN_MISC_2); - val |= CNL_COMP_PWR_DOWN; - intel_de_write(dev_priv, CHICKEN_MISC_2, val); -} - static bool has_phy_misc(struct drm_i915_private *i915, enum phy phy) { /* @@ -291,7 +231,7 @@ static bool icl_combo_phy_verify_state(struct drm_i915_private *dev_priv, DCC_MODE_SELECT_CONTINUOSLY); } - ret &= cnl_verify_procmon_ref_values(dev_priv, phy); + ret &= icl_verify_procmon_ref_values(dev_priv, phy); if (phy_is_master(dev_priv, phy)) { ret &= check_phy_reg(dev_priv, phy, ICL_PORT_COMP_DW8(phy), @@ -415,7 +355,7 @@ skip_phy_misc: intel_de_write(dev_priv, ICL_PORT_PCS_DW1_GRP(phy), val); } - cnl_set_procmon_ref_values(dev_priv, phy); + icl_set_procmon_ref_values(dev_priv, phy); if (phy_is_master(dev_priv, phy)) { val = intel_de_read(dev_priv, ICL_PORT_COMP_DW8(phy)); @@ -474,16 +414,10 @@ skip_phy_misc: void intel_combo_phy_init(struct drm_i915_private *i915) { - if (DISPLAY_VER(i915) >= 11) - icl_combo_phys_init(i915); - else if (IS_CANNONLAKE(i915)) - cnl_combo_phys_init(i915); + icl_combo_phys_init(i915); } void intel_combo_phy_uninit(struct drm_i915_private *i915) { - if (DISPLAY_VER(i915) >= 11) - icl_combo_phys_uninit(i915); - else if (IS_CANNONLAKE(i915)) - cnl_combo_phys_uninit(i915); + icl_combo_phys_uninit(i915); } diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c index 648f1c0d3d39..408f82b0dc7d 100644 --- a/drivers/gpu/drm/i915/display/intel_crt.c +++ b/drivers/gpu/drm/i915/display/intel_crt.c @@ -38,6 +38,7 @@ #include "intel_crt.h" #include "intel_crtc.h" #include "intel_ddi.h" +#include "intel_ddi_buf_trans.h" #include "intel_de.h" #include "intel_display_types.h" #include "intel_fdi.h" @@ -1081,6 +1082,8 @@ void intel_crt_init(struct drm_i915_private *dev_priv) crt->base.enable_clock = hsw_ddi_enable_clock; crt->base.disable_clock = hsw_ddi_disable_clock; crt->base.is_clock_enabled = hsw_ddi_is_clock_enabled; + + intel_ddi_buf_trans_init(&crt->base); } else { if (HAS_PCH_SPLIT(dev_priv)) { crt->base.compute_config = pch_crt_compute_config; diff --git a/drivers/gpu/drm/i915/display/intel_crtc.c b/drivers/gpu/drm/i915/display/intel_crtc.c index 95ff1707b4bd..254e67141a77 100644 --- a/drivers/gpu/drm/i915/display/intel_crtc.c +++ b/drivers/gpu/drm/i915/display/intel_crtc.c @@ -163,12 +163,12 @@ static void intel_crtc_free(struct intel_crtc *crtc) kfree(crtc); } -static void intel_crtc_destroy(struct drm_crtc *crtc) +static void intel_crtc_destroy(struct drm_crtc *_crtc) { - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct intel_crtc *crtc = to_intel_crtc(_crtc); - drm_crtc_cleanup(crtc); - kfree(intel_crtc); + drm_crtc_cleanup(&crtc->base); + kfree(crtc); } static int intel_crtc_late_register(struct drm_crtc *crtc) @@ -335,7 +335,7 @@ int intel_crtc_init(struct drm_i915_private *dev_priv, enum pipe pipe) dev_priv->plane_to_crtc_mapping[i9xx_plane] = crtc; } - if (DISPLAY_VER(dev_priv) >= 11 || IS_CANNONLAKE(dev_priv)) + if (DISPLAY_VER(dev_priv) >= 11) drm_crtc_create_scaling_filter_property(&crtc->base, BIT(DRM_SCALING_FILTER_DEFAULT) | BIT(DRM_SCALING_FILTER_NEAREST_NEIGHBOR)); diff --git a/drivers/gpu/drm/i915/display/intel_cursor.c b/drivers/gpu/drm/i915/display/intel_cursor.c index 966e020331fb..c7618fef0143 100644 --- a/drivers/gpu/drm/i915/display/intel_cursor.c +++ b/drivers/gpu/drm/i915/display/intel_cursor.c @@ -383,7 +383,7 @@ static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state, if (plane_state->hw.rotation & DRM_MODE_ROTATE_180) cntl |= MCURSOR_ROTATE_180; - /* Wa_22012358565:adlp */ + /* Wa_22012358565:adl-p */ if (DISPLAY_VER(dev_priv) == 13) cntl |= MCURSOR_ARB_SLOTS(1); @@ -629,12 +629,16 @@ intel_legacy_cursor_update(struct drm_plane *_plane, /* * When crtc is inactive or there is a modeset pending, - * wait for it to complete in the slowpath + * wait for it to complete in the slowpath. + * PSR2 selective fetch also requires the slow path as + * PSR2 plane and transcoder registers can only be updated during + * vblank. * * FIXME bigjoiner fastpath would be good */ if (!crtc_state->hw.active || intel_crtc_needs_modeset(crtc_state) || - crtc_state->update_pipe || crtc_state->bigjoiner) + crtc_state->update_pipe || crtc_state->bigjoiner || + crtc_state->enable_psr2_sel_fetch) goto slow; /* @@ -801,7 +805,7 @@ intel_cursor_plane_create(struct drm_i915_private *dev_priv, if (DISPLAY_VER(dev_priv) >= 12) drm_plane_enable_fb_damage_clips(&cursor->base); - drm_plane_helper_add(&cursor->base, &intel_plane_helper_funcs); + intel_plane_helper_add(cursor); return cursor; diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 00dade49665b..9903a78df896 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -51,6 +51,7 @@ #include "intel_panel.h" #include "intel_pps.h" #include "intel_psr.h" +#include "intel_snps_phy.h" #include "intel_sprite.h" #include "intel_tc.h" #include "intel_vdsc.h" @@ -95,24 +96,18 @@ static int intel_ddi_hdmi_level(struct intel_encoder *encoder, * values in advance. This function programs the correct values for * DP/eDP/FDI use cases. */ -void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state) +void hsw_prepare_dp_ddi_buffers(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); u32 iboost_bit = 0; int i, n_entries; enum port port = encoder->port; - const struct ddi_buf_trans *ddi_translations; - - if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_ANALOG)) - ddi_translations = intel_ddi_get_buf_trans_fdi(dev_priv, - &n_entries); - else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) - ddi_translations = intel_ddi_get_buf_trans_edp(encoder, - &n_entries); - else - ddi_translations = intel_ddi_get_buf_trans_dp(encoder, - &n_entries); + const struct intel_ddi_buf_trans *ddi_translations; + + ddi_translations = encoder->get_buf_trans(encoder, crtc_state, &n_entries); + if (drm_WARN_ON_ONCE(&dev_priv->drm, !ddi_translations)) + return; /* If we're boosting the current, set bit 31 of trans1 */ if (DISPLAY_VER(dev_priv) == 9 && !IS_BROXTON(dev_priv) && @@ -121,9 +116,9 @@ void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder, for (i = 0; i < n_entries; i++) { intel_de_write(dev_priv, DDI_BUF_TRANS_LO(port, i), - ddi_translations[i].trans1 | iboost_bit); + ddi_translations->entries[i].hsw.trans1 | iboost_bit); intel_de_write(dev_priv, DDI_BUF_TRANS_HI(port, i), - ddi_translations[i].trans2); + ddi_translations->entries[i].hsw.trans2); } } @@ -132,17 +127,17 @@ void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder, * values in advance. This function programs the correct values for * HDMI/DVI use cases. */ -static void intel_prepare_hdmi_ddi_buffers(struct intel_encoder *encoder, - int level) +static void hsw_prepare_hdmi_ddi_buffers(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + int level) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); u32 iboost_bit = 0; int n_entries; enum port port = encoder->port; - const struct ddi_buf_trans *ddi_translations; - - ddi_translations = intel_ddi_get_buf_trans_hdmi(encoder, &n_entries); + const struct intel_ddi_buf_trans *ddi_translations; + ddi_translations = encoder->get_buf_trans(encoder, crtc_state, &n_entries); if (drm_WARN_ON_ONCE(&dev_priv->drm, !ddi_translations)) return; if (drm_WARN_ON_ONCE(&dev_priv->drm, level >= n_entries)) @@ -155,9 +150,9 @@ static void intel_prepare_hdmi_ddi_buffers(struct intel_encoder *encoder, /* Entry 9 is for HDMI: */ intel_de_write(dev_priv, DDI_BUF_TRANS_LO(port, 9), - ddi_translations[level].trans1 | iboost_bit); + ddi_translations->entries[level].hsw.trans1 | iboost_bit); intel_de_write(dev_priv, DDI_BUF_TRANS_HI(port, 9), - ddi_translations[level].trans2); + ddi_translations->entries[level].hsw.trans2); } void intel_wait_ddi_buf_idle(struct drm_i915_private *dev_priv, @@ -177,14 +172,18 @@ void intel_wait_ddi_buf_idle(struct drm_i915_private *dev_priv, static void intel_wait_ddi_buf_active(struct drm_i915_private *dev_priv, enum port port) { + int ret; + /* Wait > 518 usecs for DDI_BUF_CTL to be non idle */ if (DISPLAY_VER(dev_priv) < 10) { usleep_range(518, 1000); return; } - if (wait_for_us(!(intel_de_read(dev_priv, DDI_BUF_CTL(port)) & - DDI_BUF_IS_IDLE), 500)) + ret = _wait_for(!(intel_de_read(dev_priv, DDI_BUF_CTL(port)) & + DDI_BUF_IS_IDLE), IS_DG2(dev_priv) ? 1200 : 500, 10, 10); + + if (ret) drm_err(&dev_priv->drm, "Timeout waiting for DDI BUF %c to get active\n", port_name(port)); } @@ -828,7 +827,7 @@ bool intel_ddi_get_hw_state(struct intel_encoder *encoder, static enum intel_display_power_domain intel_ddi_main_link_aux_domain(struct intel_digital_port *dig_port) { - /* CNL+ HW requires corresponding AUX IOs to be powered up for PSR with + /* ICL+ HW requires corresponding AUX IOs to be powered up for PSR with * DC states enabled at the same time, while for driver initiated AUX * transfers we need the same AUX IOs to be powered but with DC states * disabled. Accordingly use the AUX power domain here which leaves DC @@ -948,22 +947,16 @@ static void skl_ddi_set_iboost(struct intel_encoder *encoder, iboost = intel_bios_encoder_dp_boost_level(encoder->devdata); if (iboost == 0) { - const struct ddi_buf_trans *ddi_translations; + const struct intel_ddi_buf_trans *ddi_translations; int n_entries; - if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) - ddi_translations = intel_ddi_get_buf_trans_hdmi(encoder, &n_entries); - else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) - ddi_translations = intel_ddi_get_buf_trans_edp(encoder, &n_entries); - else - ddi_translations = intel_ddi_get_buf_trans_dp(encoder, &n_entries); - + ddi_translations = encoder->get_buf_trans(encoder, crtc_state, &n_entries); if (drm_WARN_ON_ONCE(&dev_priv->drm, !ddi_translations)) return; if (drm_WARN_ON_ONCE(&dev_priv->drm, level >= n_entries)) level = n_entries - 1; - iboost = ddi_translations[level].i_boost; + iboost = ddi_translations->entries[level].hsw.i_boost; } /* Make sure that the requested I_boost is valid */ @@ -983,21 +976,21 @@ static void bxt_ddi_vswing_sequence(struct intel_encoder *encoder, int level) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - const struct bxt_ddi_buf_trans *ddi_translations; + const struct intel_ddi_buf_trans *ddi_translations; enum port port = encoder->port; int n_entries; - ddi_translations = bxt_get_buf_trans(encoder, crtc_state, &n_entries); + ddi_translations = encoder->get_buf_trans(encoder, crtc_state, &n_entries); if (drm_WARN_ON_ONCE(&dev_priv->drm, !ddi_translations)) return; if (drm_WARN_ON_ONCE(&dev_priv->drm, level >= n_entries)) level = n_entries - 1; bxt_ddi_phy_set_signal_level(dev_priv, port, - ddi_translations[level].margin, - ddi_translations[level].scale, - ddi_translations[level].enable, - ddi_translations[level].deemphasis); + ddi_translations->entries[level].bxt.margin, + ddi_translations->entries[level].bxt.scale, + ddi_translations->entries[level].bxt.enable, + ddi_translations->entries[level].bxt.deemphasis); } static u8 intel_ddi_dp_voltage_max(struct intel_dp *intel_dp, @@ -1005,36 +998,9 @@ static u8 intel_ddi_dp_voltage_max(struct intel_dp *intel_dp, { struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum port port = encoder->port; - enum phy phy = intel_port_to_phy(dev_priv, port); int n_entries; - if (DISPLAY_VER(dev_priv) >= 12) { - if (intel_phy_is_combo(dev_priv, phy)) - tgl_get_combo_buf_trans(encoder, crtc_state, &n_entries); - else if (IS_ALDERLAKE_P(dev_priv)) - adlp_get_dkl_buf_trans(encoder, crtc_state, &n_entries); - else - tgl_get_dkl_buf_trans(encoder, crtc_state, &n_entries); - } else if (DISPLAY_VER(dev_priv) == 11) { - if (IS_PLATFORM(dev_priv, INTEL_JASPERLAKE)) - jsl_get_combo_buf_trans(encoder, crtc_state, &n_entries); - else if (IS_PLATFORM(dev_priv, INTEL_ELKHARTLAKE)) - ehl_get_combo_buf_trans(encoder, crtc_state, &n_entries); - else if (intel_phy_is_combo(dev_priv, phy)) - icl_get_combo_buf_trans(encoder, crtc_state, &n_entries); - else - icl_get_mg_buf_trans(encoder, crtc_state, &n_entries); - } else if (IS_CANNONLAKE(dev_priv)) { - cnl_get_buf_trans(encoder, crtc_state, &n_entries); - } else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { - bxt_get_buf_trans(encoder, crtc_state, &n_entries); - } else { - if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) - intel_ddi_get_buf_trans_edp(encoder, &n_entries); - else - intel_ddi_get_buf_trans_dp(encoder, &n_entries); - } + encoder->get_buf_trans(encoder, crtc_state, &n_entries); if (drm_WARN_ON(&dev_priv->drm, n_entries < 1)) n_entries = 1; @@ -1056,146 +1022,17 @@ static u8 intel_ddi_dp_preemph_max(struct intel_dp *intel_dp) return DP_TRAIN_PRE_EMPH_LEVEL_3; } -static void cnl_ddi_vswing_program(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - int level) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - const struct cnl_ddi_buf_trans *ddi_translations; - enum port port = encoder->port; - int n_entries, ln; - u32 val; - - ddi_translations = cnl_get_buf_trans(encoder, crtc_state, &n_entries); - - if (drm_WARN_ON_ONCE(&dev_priv->drm, !ddi_translations)) - return; - if (drm_WARN_ON_ONCE(&dev_priv->drm, level >= n_entries)) - level = n_entries - 1; - - /* Set PORT_TX_DW5 Scaling Mode Sel to 010b. */ - val = intel_de_read(dev_priv, CNL_PORT_TX_DW5_LN0(port)); - val &= ~SCALING_MODE_SEL_MASK; - val |= SCALING_MODE_SEL(2); - intel_de_write(dev_priv, CNL_PORT_TX_DW5_GRP(port), val); - - /* Program PORT_TX_DW2 */ - val = intel_de_read(dev_priv, CNL_PORT_TX_DW2_LN0(port)); - val &= ~(SWING_SEL_LOWER_MASK | SWING_SEL_UPPER_MASK | - RCOMP_SCALAR_MASK); - val |= SWING_SEL_UPPER(ddi_translations[level].dw2_swing_sel); - val |= SWING_SEL_LOWER(ddi_translations[level].dw2_swing_sel); - /* Rcomp scalar is fixed as 0x98 for every table entry */ - val |= RCOMP_SCALAR(0x98); - intel_de_write(dev_priv, CNL_PORT_TX_DW2_GRP(port), val); - - /* Program PORT_TX_DW4 */ - /* We cannot write to GRP. It would overrite individual loadgen */ - for (ln = 0; ln < 4; ln++) { - val = intel_de_read(dev_priv, CNL_PORT_TX_DW4_LN(ln, port)); - val &= ~(POST_CURSOR_1_MASK | POST_CURSOR_2_MASK | - CURSOR_COEFF_MASK); - val |= POST_CURSOR_1(ddi_translations[level].dw4_post_cursor_1); - val |= POST_CURSOR_2(ddi_translations[level].dw4_post_cursor_2); - val |= CURSOR_COEFF(ddi_translations[level].dw4_cursor_coeff); - intel_de_write(dev_priv, CNL_PORT_TX_DW4_LN(ln, port), val); - } - - /* Program PORT_TX_DW5 */ - /* All DW5 values are fixed for every table entry */ - val = intel_de_read(dev_priv, CNL_PORT_TX_DW5_LN0(port)); - val &= ~RTERM_SELECT_MASK; - val |= RTERM_SELECT(6); - val |= TAP3_DISABLE; - intel_de_write(dev_priv, CNL_PORT_TX_DW5_GRP(port), val); - - /* Program PORT_TX_DW7 */ - val = intel_de_read(dev_priv, CNL_PORT_TX_DW7_LN0(port)); - val &= ~N_SCALAR_MASK; - val |= N_SCALAR(ddi_translations[level].dw7_n_scalar); - intel_de_write(dev_priv, CNL_PORT_TX_DW7_GRP(port), val); -} - -static void cnl_ddi_vswing_sequence(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - int level) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum port port = encoder->port; - int width, rate, ln; - u32 val; - - width = crtc_state->lane_count; - rate = crtc_state->port_clock; - - /* - * 1. If port type is eDP or DP, - * set PORT_PCS_DW1 cmnkeeper_enable to 1b, - * else clear to 0b. - */ - val = intel_de_read(dev_priv, CNL_PORT_PCS_DW1_LN0(port)); - if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) - val &= ~COMMON_KEEPER_EN; - else - val |= COMMON_KEEPER_EN; - intel_de_write(dev_priv, CNL_PORT_PCS_DW1_GRP(port), val); - - /* 2. Program loadgen select */ - /* - * Program PORT_TX_DW4_LN depending on Bit rate and used lanes - * <= 6 GHz and 4 lanes (LN0=0, LN1=1, LN2=1, LN3=1) - * <= 6 GHz and 1,2 lanes (LN0=0, LN1=1, LN2=1, LN3=0) - * > 6 GHz (LN0=0, LN1=0, LN2=0, LN3=0) - */ - for (ln = 0; ln <= 3; ln++) { - val = intel_de_read(dev_priv, CNL_PORT_TX_DW4_LN(ln, port)); - val &= ~LOADGEN_SELECT; - - if ((rate <= 600000 && width == 4 && ln >= 1) || - (rate <= 600000 && width < 4 && (ln == 1 || ln == 2))) { - val |= LOADGEN_SELECT; - } - intel_de_write(dev_priv, CNL_PORT_TX_DW4_LN(ln, port), val); - } - - /* 3. Set PORT_CL_DW5 SUS Clock Config to 11b */ - val = intel_de_read(dev_priv, CNL_PORT_CL1CM_DW5); - val |= SUS_CLOCK_CONFIG; - intel_de_write(dev_priv, CNL_PORT_CL1CM_DW5, val); - - /* 4. Clear training enable to change swing values */ - val = intel_de_read(dev_priv, CNL_PORT_TX_DW5_LN0(port)); - val &= ~TX_TRAINING_EN; - intel_de_write(dev_priv, CNL_PORT_TX_DW5_GRP(port), val); - - /* 5. Program swing and de-emphasis */ - cnl_ddi_vswing_program(encoder, crtc_state, level); - - /* 6. Set training enable to trigger update */ - val = intel_de_read(dev_priv, CNL_PORT_TX_DW5_LN0(port)); - val |= TX_TRAINING_EN; - intel_de_write(dev_priv, CNL_PORT_TX_DW5_GRP(port), val); -} - static void icl_ddi_combo_vswing_program(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int level) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - const struct cnl_ddi_buf_trans *ddi_translations; + const struct intel_ddi_buf_trans *ddi_translations; enum phy phy = intel_port_to_phy(dev_priv, encoder->port); int n_entries, ln; u32 val; - if (DISPLAY_VER(dev_priv) >= 12) - ddi_translations = tgl_get_combo_buf_trans(encoder, crtc_state, &n_entries); - else if (IS_PLATFORM(dev_priv, INTEL_JASPERLAKE)) - ddi_translations = jsl_get_combo_buf_trans(encoder, crtc_state, &n_entries); - else if (IS_PLATFORM(dev_priv, INTEL_ELKHARTLAKE)) - ddi_translations = ehl_get_combo_buf_trans(encoder, crtc_state, &n_entries); - else - ddi_translations = icl_get_combo_buf_trans(encoder, crtc_state, &n_entries); - + ddi_translations = encoder->get_buf_trans(encoder, crtc_state, &n_entries); if (drm_WARN_ON_ONCE(&dev_priv->drm, !ddi_translations)) return; if (drm_WARN_ON_ONCE(&dev_priv->drm, level >= n_entries)) @@ -1223,8 +1060,8 @@ static void icl_ddi_combo_vswing_program(struct intel_encoder *encoder, val = intel_de_read(dev_priv, ICL_PORT_TX_DW2_LN0(phy)); val &= ~(SWING_SEL_LOWER_MASK | SWING_SEL_UPPER_MASK | RCOMP_SCALAR_MASK); - val |= SWING_SEL_UPPER(ddi_translations[level].dw2_swing_sel); - val |= SWING_SEL_LOWER(ddi_translations[level].dw2_swing_sel); + val |= SWING_SEL_UPPER(ddi_translations->entries[level].icl.dw2_swing_sel); + val |= SWING_SEL_LOWER(ddi_translations->entries[level].icl.dw2_swing_sel); /* Program Rcomp scalar for every table entry */ val |= RCOMP_SCALAR(0x98); intel_de_write(dev_priv, ICL_PORT_TX_DW2_GRP(phy), val); @@ -1235,16 +1072,16 @@ static void icl_ddi_combo_vswing_program(struct intel_encoder *encoder, val = intel_de_read(dev_priv, ICL_PORT_TX_DW4_LN(ln, phy)); val &= ~(POST_CURSOR_1_MASK | POST_CURSOR_2_MASK | CURSOR_COEFF_MASK); - val |= POST_CURSOR_1(ddi_translations[level].dw4_post_cursor_1); - val |= POST_CURSOR_2(ddi_translations[level].dw4_post_cursor_2); - val |= CURSOR_COEFF(ddi_translations[level].dw4_cursor_coeff); + val |= POST_CURSOR_1(ddi_translations->entries[level].icl.dw4_post_cursor_1); + val |= POST_CURSOR_2(ddi_translations->entries[level].icl.dw4_post_cursor_2); + val |= CURSOR_COEFF(ddi_translations->entries[level].icl.dw4_cursor_coeff); intel_de_write(dev_priv, ICL_PORT_TX_DW4_LN(ln, phy), val); } /* Program PORT_TX_DW7 */ val = intel_de_read(dev_priv, ICL_PORT_TX_DW7_LN0(phy)); val &= ~N_SCALAR_MASK; - val |= N_SCALAR(ddi_translations[level].dw7_n_scalar); + val |= N_SCALAR(ddi_translations->entries[level].icl.dw7_n_scalar); intel_de_write(dev_priv, ICL_PORT_TX_DW7_GRP(phy), val); } @@ -1315,15 +1152,14 @@ static void icl_mg_phy_ddi_vswing_sequence(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum tc_port tc_port = intel_port_to_tc(dev_priv, encoder->port); - const struct icl_mg_phy_ddi_buf_trans *ddi_translations; + const struct intel_ddi_buf_trans *ddi_translations; int n_entries, ln; u32 val; if (enc_to_dig_port(encoder)->tc_mode == TC_PORT_TBT_ALT) return; - ddi_translations = icl_get_mg_buf_trans(encoder, crtc_state, &n_entries); - + ddi_translations = encoder->get_buf_trans(encoder, crtc_state, &n_entries); if (drm_WARN_ON_ONCE(&dev_priv->drm, !ddi_translations)) return; if (drm_WARN_ON_ONCE(&dev_priv->drm, level >= n_entries)) @@ -1345,13 +1181,13 @@ static void icl_mg_phy_ddi_vswing_sequence(struct intel_encoder *encoder, val = intel_de_read(dev_priv, MG_TX1_SWINGCTRL(ln, tc_port)); val &= ~CRI_TXDEEMPH_OVERRIDE_17_12_MASK; val |= CRI_TXDEEMPH_OVERRIDE_17_12( - ddi_translations[level].cri_txdeemph_override_17_12); + ddi_translations->entries[level].mg.cri_txdeemph_override_17_12); intel_de_write(dev_priv, MG_TX1_SWINGCTRL(ln, tc_port), val); val = intel_de_read(dev_priv, MG_TX2_SWINGCTRL(ln, tc_port)); val &= ~CRI_TXDEEMPH_OVERRIDE_17_12_MASK; val |= CRI_TXDEEMPH_OVERRIDE_17_12( - ddi_translations[level].cri_txdeemph_override_17_12); + ddi_translations->entries[level].mg.cri_txdeemph_override_17_12); intel_de_write(dev_priv, MG_TX2_SWINGCTRL(ln, tc_port), val); } @@ -1361,9 +1197,9 @@ static void icl_mg_phy_ddi_vswing_sequence(struct intel_encoder *encoder, val &= ~(CRI_TXDEEMPH_OVERRIDE_11_6_MASK | CRI_TXDEEMPH_OVERRIDE_5_0_MASK); val |= CRI_TXDEEMPH_OVERRIDE_5_0( - ddi_translations[level].cri_txdeemph_override_5_0) | + ddi_translations->entries[level].mg.cri_txdeemph_override_5_0) | CRI_TXDEEMPH_OVERRIDE_11_6( - ddi_translations[level].cri_txdeemph_override_11_6) | + ddi_translations->entries[level].mg.cri_txdeemph_override_11_6) | CRI_TXDEEMPH_OVERRIDE_EN; intel_de_write(dev_priv, MG_TX1_DRVCTRL(ln, tc_port), val); @@ -1371,9 +1207,9 @@ static void icl_mg_phy_ddi_vswing_sequence(struct intel_encoder *encoder, val &= ~(CRI_TXDEEMPH_OVERRIDE_11_6_MASK | CRI_TXDEEMPH_OVERRIDE_5_0_MASK); val |= CRI_TXDEEMPH_OVERRIDE_5_0( - ddi_translations[level].cri_txdeemph_override_5_0) | + ddi_translations->entries[level].mg.cri_txdeemph_override_5_0) | CRI_TXDEEMPH_OVERRIDE_11_6( - ddi_translations[level].cri_txdeemph_override_11_6) | + ddi_translations->entries[level].mg.cri_txdeemph_override_11_6) | CRI_TXDEEMPH_OVERRIDE_EN; intel_de_write(dev_priv, MG_TX2_DRVCTRL(ln, tc_port), val); @@ -1453,18 +1289,14 @@ tgl_dkl_phy_ddi_vswing_sequence(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum tc_port tc_port = intel_port_to_tc(dev_priv, encoder->port); - const struct tgl_dkl_phy_ddi_buf_trans *ddi_translations; + const struct intel_ddi_buf_trans *ddi_translations; u32 val, dpcnt_mask, dpcnt_val; int n_entries, ln; if (enc_to_dig_port(encoder)->tc_mode == TC_PORT_TBT_ALT) return; - if (IS_ALDERLAKE_P(dev_priv)) - ddi_translations = adlp_get_dkl_buf_trans(encoder, crtc_state, &n_entries); - else - ddi_translations = tgl_get_dkl_buf_trans(encoder, crtc_state, &n_entries); - + ddi_translations = encoder->get_buf_trans(encoder, crtc_state, &n_entries); if (drm_WARN_ON_ONCE(&dev_priv->drm, !ddi_translations)) return; if (drm_WARN_ON_ONCE(&dev_priv->drm, level >= n_entries)) @@ -1473,9 +1305,9 @@ tgl_dkl_phy_ddi_vswing_sequence(struct intel_encoder *encoder, dpcnt_mask = (DKL_TX_PRESHOOT_COEFF_MASK | DKL_TX_DE_EMPAHSIS_COEFF_MASK | DKL_TX_VSWING_CONTROL_MASK); - dpcnt_val = DKL_TX_VSWING_CONTROL(ddi_translations[level].dkl_vswing_control); - dpcnt_val |= DKL_TX_DE_EMPHASIS_COEFF(ddi_translations[level].dkl_de_emphasis_control); - dpcnt_val |= DKL_TX_PRESHOOT_COEFF(ddi_translations[level].dkl_preshoot_control); + dpcnt_val = DKL_TX_VSWING_CONTROL(ddi_translations->entries[level].dkl.dkl_vswing_control); + dpcnt_val |= DKL_TX_DE_EMPHASIS_COEFF(ddi_translations->entries[level].dkl.dkl_de_emphasis_control); + dpcnt_val |= DKL_TX_PRESHOOT_COEFF(ddi_translations->entries[level].dkl.dkl_preshoot_control); for (ln = 0; ln < 2; ln++) { intel_de_write(dev_priv, HIP_INDEX_REG(tc_port), @@ -1549,33 +1381,33 @@ static int intel_ddi_dp_level(struct intel_dp *intel_dp) } static void -tgl_set_signal_levels(struct intel_dp *intel_dp, +dg2_set_signal_levels(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state) { struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; int level = intel_ddi_dp_level(intel_dp); - tgl_ddi_vswing_sequence(encoder, crtc_state, level); + intel_snps_phy_ddi_vswing_sequence(encoder, level); } static void -icl_set_signal_levels(struct intel_dp *intel_dp, +tgl_set_signal_levels(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state) { struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; int level = intel_ddi_dp_level(intel_dp); - icl_ddi_vswing_sequence(encoder, crtc_state, level); + tgl_ddi_vswing_sequence(encoder, crtc_state, level); } static void -cnl_set_signal_levels(struct intel_dp *intel_dp, +icl_set_signal_levels(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state) { struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; int level = intel_ddi_dp_level(intel_dp); - cnl_ddi_vswing_sequence(encoder, crtc_state, level); + icl_ddi_vswing_sequence(encoder, crtc_state, level); } static void @@ -1613,7 +1445,7 @@ hsw_set_signal_levels(struct intel_dp *intel_dp, intel_de_posting_read(dev_priv, DDI_BUF_CTL(port)); } -static void _cnl_ddi_enable_clock(struct drm_i915_private *i915, i915_reg_t reg, +static void _icl_ddi_enable_clock(struct drm_i915_private *i915, i915_reg_t reg, u32 clk_sel_mask, u32 clk_sel, u32 clk_off) { mutex_lock(&i915->dpll.lock); @@ -1629,7 +1461,7 @@ static void _cnl_ddi_enable_clock(struct drm_i915_private *i915, i915_reg_t reg, mutex_unlock(&i915->dpll.lock); } -static void _cnl_ddi_disable_clock(struct drm_i915_private *i915, i915_reg_t reg, +static void _icl_ddi_disable_clock(struct drm_i915_private *i915, i915_reg_t reg, u32 clk_off) { mutex_lock(&i915->dpll.lock); @@ -1639,14 +1471,14 @@ static void _cnl_ddi_disable_clock(struct drm_i915_private *i915, i915_reg_t reg mutex_unlock(&i915->dpll.lock); } -static bool _cnl_ddi_is_clock_enabled(struct drm_i915_private *i915, i915_reg_t reg, +static bool _icl_ddi_is_clock_enabled(struct drm_i915_private *i915, i915_reg_t reg, u32 clk_off) { return !(intel_de_read(i915, reg) & clk_off); } static struct intel_shared_dpll * -_cnl_ddi_get_pll(struct drm_i915_private *i915, i915_reg_t reg, +_icl_ddi_get_pll(struct drm_i915_private *i915, i915_reg_t reg, u32 clk_sel_mask, u32 clk_sel_shift) { enum intel_dpll_id id; @@ -1666,7 +1498,7 @@ static void adls_ddi_enable_clock(struct intel_encoder *encoder, if (drm_WARN_ON(&i915->drm, !pll)) return; - _cnl_ddi_enable_clock(i915, ADLS_DPCLKA_CFGCR(phy), + _icl_ddi_enable_clock(i915, ADLS_DPCLKA_CFGCR(phy), ADLS_DPCLKA_CFGCR_DDI_CLK_SEL_MASK(phy), pll->info->id << ADLS_DPCLKA_CFGCR_DDI_SHIFT(phy), ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy)); @@ -1677,7 +1509,7 @@ static void adls_ddi_disable_clock(struct intel_encoder *encoder) struct drm_i915_private *i915 = to_i915(encoder->base.dev); enum phy phy = intel_port_to_phy(i915, encoder->port); - _cnl_ddi_disable_clock(i915, ADLS_DPCLKA_CFGCR(phy), + _icl_ddi_disable_clock(i915, ADLS_DPCLKA_CFGCR(phy), ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy)); } @@ -1686,7 +1518,7 @@ static bool adls_ddi_is_clock_enabled(struct intel_encoder *encoder) struct drm_i915_private *i915 = to_i915(encoder->base.dev); enum phy phy = intel_port_to_phy(i915, encoder->port); - return _cnl_ddi_is_clock_enabled(i915, ADLS_DPCLKA_CFGCR(phy), + return _icl_ddi_is_clock_enabled(i915, ADLS_DPCLKA_CFGCR(phy), ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy)); } @@ -1695,7 +1527,7 @@ static struct intel_shared_dpll *adls_ddi_get_pll(struct intel_encoder *encoder) struct drm_i915_private *i915 = to_i915(encoder->base.dev); enum phy phy = intel_port_to_phy(i915, encoder->port); - return _cnl_ddi_get_pll(i915, ADLS_DPCLKA_CFGCR(phy), + return _icl_ddi_get_pll(i915, ADLS_DPCLKA_CFGCR(phy), ADLS_DPCLKA_CFGCR_DDI_CLK_SEL_MASK(phy), ADLS_DPCLKA_CFGCR_DDI_SHIFT(phy)); } @@ -1710,7 +1542,7 @@ static void rkl_ddi_enable_clock(struct intel_encoder *encoder, if (drm_WARN_ON(&i915->drm, !pll)) return; - _cnl_ddi_enable_clock(i915, ICL_DPCLKA_CFGCR0, + _icl_ddi_enable_clock(i915, ICL_DPCLKA_CFGCR0, RKL_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy), RKL_DPCLKA_CFGCR0_DDI_CLK_SEL(pll->info->id, phy), RKL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy)); @@ -1721,7 +1553,7 @@ static void rkl_ddi_disable_clock(struct intel_encoder *encoder) struct drm_i915_private *i915 = to_i915(encoder->base.dev); enum phy phy = intel_port_to_phy(i915, encoder->port); - _cnl_ddi_disable_clock(i915, ICL_DPCLKA_CFGCR0, + _icl_ddi_disable_clock(i915, ICL_DPCLKA_CFGCR0, RKL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy)); } @@ -1730,7 +1562,7 @@ static bool rkl_ddi_is_clock_enabled(struct intel_encoder *encoder) struct drm_i915_private *i915 = to_i915(encoder->base.dev); enum phy phy = intel_port_to_phy(i915, encoder->port); - return _cnl_ddi_is_clock_enabled(i915, ICL_DPCLKA_CFGCR0, + return _icl_ddi_is_clock_enabled(i915, ICL_DPCLKA_CFGCR0, RKL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy)); } @@ -1739,7 +1571,7 @@ static struct intel_shared_dpll *rkl_ddi_get_pll(struct intel_encoder *encoder) struct drm_i915_private *i915 = to_i915(encoder->base.dev); enum phy phy = intel_port_to_phy(i915, encoder->port); - return _cnl_ddi_get_pll(i915, ICL_DPCLKA_CFGCR0, + return _icl_ddi_get_pll(i915, ICL_DPCLKA_CFGCR0, RKL_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy), RKL_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy)); } @@ -1763,7 +1595,7 @@ static void dg1_ddi_enable_clock(struct intel_encoder *encoder, (pll->info->id >= DPLL_ID_DG1_DPLL2 && phy < PHY_C))) return; - _cnl_ddi_enable_clock(i915, DG1_DPCLKA_CFGCR0(phy), + _icl_ddi_enable_clock(i915, DG1_DPCLKA_CFGCR0(phy), DG1_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy), DG1_DPCLKA_CFGCR0_DDI_CLK_SEL(pll->info->id, phy), DG1_DPCLKA_CFGCR0_DDI_CLK_OFF(phy)); @@ -1774,7 +1606,7 @@ static void dg1_ddi_disable_clock(struct intel_encoder *encoder) struct drm_i915_private *i915 = to_i915(encoder->base.dev); enum phy phy = intel_port_to_phy(i915, encoder->port); - _cnl_ddi_disable_clock(i915, DG1_DPCLKA_CFGCR0(phy), + _icl_ddi_disable_clock(i915, DG1_DPCLKA_CFGCR0(phy), DG1_DPCLKA_CFGCR0_DDI_CLK_OFF(phy)); } @@ -1783,7 +1615,7 @@ static bool dg1_ddi_is_clock_enabled(struct intel_encoder *encoder) struct drm_i915_private *i915 = to_i915(encoder->base.dev); enum phy phy = intel_port_to_phy(i915, encoder->port); - return _cnl_ddi_is_clock_enabled(i915, DG1_DPCLKA_CFGCR0(phy), + return _icl_ddi_is_clock_enabled(i915, DG1_DPCLKA_CFGCR0(phy), DG1_DPCLKA_CFGCR0_DDI_CLK_OFF(phy)); } @@ -1820,7 +1652,7 @@ static void icl_ddi_combo_enable_clock(struct intel_encoder *encoder, if (drm_WARN_ON(&i915->drm, !pll)) return; - _cnl_ddi_enable_clock(i915, ICL_DPCLKA_CFGCR0, + _icl_ddi_enable_clock(i915, ICL_DPCLKA_CFGCR0, ICL_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy), ICL_DPCLKA_CFGCR0_DDI_CLK_SEL(pll->info->id, phy), ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy)); @@ -1831,7 +1663,7 @@ static void icl_ddi_combo_disable_clock(struct intel_encoder *encoder) struct drm_i915_private *i915 = to_i915(encoder->base.dev); enum phy phy = intel_port_to_phy(i915, encoder->port); - _cnl_ddi_disable_clock(i915, ICL_DPCLKA_CFGCR0, + _icl_ddi_disable_clock(i915, ICL_DPCLKA_CFGCR0, ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy)); } @@ -1840,7 +1672,7 @@ static bool icl_ddi_combo_is_clock_enabled(struct intel_encoder *encoder) struct drm_i915_private *i915 = to_i915(encoder->base.dev); enum phy phy = intel_port_to_phy(i915, encoder->port); - return _cnl_ddi_is_clock_enabled(i915, ICL_DPCLKA_CFGCR0, + return _icl_ddi_is_clock_enabled(i915, ICL_DPCLKA_CFGCR0, ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy)); } @@ -1849,7 +1681,7 @@ struct intel_shared_dpll *icl_ddi_combo_get_pll(struct intel_encoder *encoder) struct drm_i915_private *i915 = to_i915(encoder->base.dev); enum phy phy = intel_port_to_phy(i915, encoder->port); - return _cnl_ddi_get_pll(i915, ICL_DPCLKA_CFGCR0, + return _icl_ddi_get_pll(i915, ICL_DPCLKA_CFGCR0, ICL_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy), ICL_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy)); } @@ -1982,50 +1814,6 @@ static struct intel_shared_dpll *icl_ddi_tc_get_pll(struct intel_encoder *encode return intel_get_shared_dpll_by_id(i915, id); } -static void cnl_ddi_enable_clock(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state) -{ - struct drm_i915_private *i915 = to_i915(encoder->base.dev); - const struct intel_shared_dpll *pll = crtc_state->shared_dpll; - enum port port = encoder->port; - - if (drm_WARN_ON(&i915->drm, !pll)) - return; - - _cnl_ddi_enable_clock(i915, DPCLKA_CFGCR0, - DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(port), - DPCLKA_CFGCR0_DDI_CLK_SEL(pll->info->id, port), - DPCLKA_CFGCR0_DDI_CLK_OFF(port)); -} - -static void cnl_ddi_disable_clock(struct intel_encoder *encoder) -{ - struct drm_i915_private *i915 = to_i915(encoder->base.dev); - enum port port = encoder->port; - - _cnl_ddi_disable_clock(i915, DPCLKA_CFGCR0, - DPCLKA_CFGCR0_DDI_CLK_OFF(port)); -} - -static bool cnl_ddi_is_clock_enabled(struct intel_encoder *encoder) -{ - struct drm_i915_private *i915 = to_i915(encoder->base.dev); - enum port port = encoder->port; - - return _cnl_ddi_is_clock_enabled(i915, DPCLKA_CFGCR0, - DPCLKA_CFGCR0_DDI_CLK_OFF(port)); -} - -static struct intel_shared_dpll *cnl_ddi_get_pll(struct intel_encoder *encoder) -{ - struct drm_i915_private *i915 = to_i915(encoder->base.dev); - enum port port = encoder->port; - - return _cnl_ddi_get_pll(i915, DPCLKA_CFGCR0, - DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(port), - DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(port)); -} - static struct intel_shared_dpll *bxt_ddi_get_pll(struct intel_encoder *encoder) { struct drm_i915_private *i915 = to_i915(encoder->base.dev); @@ -2249,7 +2037,7 @@ void intel_ddi_sanitize_encoder_pll_mapping(struct intel_encoder *encoder) ddi_clk_needed = false; } - if (ddi_clk_needed || !encoder->disable_clock || + if (ddi_clk_needed || !encoder->is_clock_enabled || !encoder->is_clock_enabled(encoder)) return; @@ -2534,6 +2322,116 @@ static void intel_ddi_mso_configure(const struct intel_crtc_state *crtc_state) OVERLAP_PIXELS_MASK, dss1); } +static void dg2_ddi_pre_enable_dp(struct intel_atomic_state *state, + struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) +{ + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum phy phy = intel_port_to_phy(dev_priv, encoder->port); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); + bool is_mst = intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST); + int level = intel_ddi_dp_level(intel_dp); + + intel_dp_set_link_params(intel_dp, crtc_state->port_clock, + crtc_state->lane_count); + + /* + * 1. Enable Power Wells + * + * This was handled at the beginning of intel_atomic_commit_tail(), + * before we called down into this function. + */ + + /* 2. Enable Panel Power if PPS is required */ + intel_pps_on(intel_dp); + + /* + * 3. Enable the port PLL. + */ + intel_ddi_enable_clock(encoder, crtc_state); + + /* 4. Enable IO power */ + if (!intel_phy_is_tc(dev_priv, phy) || + dig_port->tc_mode != TC_PORT_TBT_ALT) + dig_port->ddi_io_wakeref = intel_display_power_get(dev_priv, + dig_port->ddi_io_power_domain); + + /* + * 5. The rest of the below are substeps under the bspec's "Enable and + * Train Display Port" step. Note that steps that are specific to + * MST will be handled by intel_mst_pre_enable_dp() before/after it + * calls into this function. Also intel_mst_pre_enable_dp() only calls + * us when active_mst_links==0, so any steps designated for "single + * stream or multi-stream master transcoder" can just be performed + * unconditionally here. + */ + + /* + * 5.a Configure Transcoder Clock Select to direct the Port clock to the + * Transcoder. + */ + intel_ddi_enable_pipe_clock(encoder, crtc_state); + + /* 5.b Not relevant to i915 for now */ + + /* + * 5.c Configure TRANS_DDI_FUNC_CTL DDI Select, DDI Mode Select & MST + * Transport Select + */ + intel_ddi_config_transcoder_func(encoder, crtc_state); + + /* + * 5.d Configure & enable DP_TP_CTL with link training pattern 1 + * selected + * + * This will be handled by the intel_dp_start_link_train() farther + * down this function. + */ + + /* 5.e Configure voltage swing and related IO settings */ + intel_snps_phy_ddi_vswing_sequence(encoder, level); + + /* + * 5.f Configure and enable DDI_BUF_CTL + * 5.g Wait for DDI_BUF_CTL DDI Idle Status = 0b (Not Idle), timeout + * after 1200 us. + * + * We only configure what the register value will be here. Actual + * enabling happens during link training farther down. + */ + intel_ddi_init_dp_buf_reg(encoder, crtc_state); + + if (!is_mst) + intel_dp_set_power(intel_dp, DP_SET_POWER_D0); + + intel_dp_sink_set_decompression_state(intel_dp, crtc_state, true); + /* + * DDI FEC: "anticipates enabling FEC encoding sets the FEC_READY bit + * in the FEC_CONFIGURATION register to 1 before initiating link + * training + */ + intel_dp_sink_set_fec_ready(intel_dp, crtc_state); + + /* + * 5.h Follow DisplayPort specification training sequence (see notes for + * failure handling) + * 5.i If DisplayPort multi-stream - Set DP_TP_CTL link training to Idle + * Pattern, wait for 5 idle patterns (DP_TP_STATUS Min_Idles_Sent) + * (timeout after 800 us) + */ + intel_dp_start_link_train(intel_dp, crtc_state); + + /* 5.j Set DP_TP_CTL link training to Normal */ + if (!is_trans_port_sync_mode(crtc_state)) + intel_dp_stop_link_train(intel_dp, crtc_state); + + /* 5.k Configure and enable FEC if needed */ + intel_ddi_enable_fec(encoder, crtc_state); + intel_dsc_enable(encoder, crtc_state); +} + static void tgl_ddi_pre_enable_dp(struct intel_atomic_state *state, struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, @@ -2714,12 +2612,10 @@ static void hsw_ddi_pre_enable_dp(struct intel_atomic_state *state, if (DISPLAY_VER(dev_priv) >= 11) icl_ddi_vswing_sequence(encoder, crtc_state, level); - else if (IS_CANNONLAKE(dev_priv)) - cnl_ddi_vswing_sequence(encoder, crtc_state, level); else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) bxt_ddi_vswing_sequence(encoder, crtc_state, level); else - intel_prepare_dp_ddi_buffers(encoder, crtc_state); + hsw_prepare_dp_ddi_buffers(encoder, crtc_state); intel_ddi_power_up_lanes(encoder, crtc_state); @@ -2751,7 +2647,9 @@ static void intel_ddi_pre_enable_dp(struct intel_atomic_state *state, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - if (DISPLAY_VER(dev_priv) >= 12) + if (IS_DG2(dev_priv)) + dg2_ddi_pre_enable_dp(state, encoder, crtc_state, conn_state); + else if (DISPLAY_VER(dev_priv) >= 12) tgl_ddi_pre_enable_dp(state, encoder, crtc_state, conn_state); else hsw_ddi_pre_enable_dp(state, encoder, crtc_state, conn_state); @@ -2827,6 +2725,7 @@ static void intel_ddi_pre_enable(struct intel_atomic_state *state, conn_state); /* FIXME precompute everything properly */ + /* FIXME how do we turn infoframes off again? */ if (dig_port->lspcon.active && dig_port->dp.has_hdmi_sink) dig_port->set_infoframes(encoder, crtc_state->has_infoframe, @@ -3157,16 +3056,16 @@ static void intel_enable_ddi_hdmi(struct intel_atomic_state *state, "[CONNECTOR:%d:%s] Failed to configure sink scrambling/TMDS bit clock ratio\n", connector->base.id, connector->name); - if (DISPLAY_VER(dev_priv) >= 12) + if (IS_DG2(dev_priv)) + intel_snps_phy_ddi_vswing_sequence(encoder, U32_MAX); + else if (DISPLAY_VER(dev_priv) >= 12) tgl_ddi_vswing_sequence(encoder, crtc_state, level); else if (DISPLAY_VER(dev_priv) == 11) icl_ddi_vswing_sequence(encoder, crtc_state, level); - else if (IS_CANNONLAKE(dev_priv)) - cnl_ddi_vswing_sequence(encoder, crtc_state, level); else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) bxt_ddi_vswing_sequence(encoder, crtc_state, level); else - intel_prepare_hdmi_ddi_buffers(encoder, level); + hsw_prepare_hdmi_ddi_buffers(encoder, crtc_state, level); if (DISPLAY_VER(dev_priv) == 9 && !IS_BROXTON(dev_priv)) skl_ddi_set_iboost(encoder, crtc_state, level); @@ -3260,12 +3159,6 @@ static void intel_disable_ddi_dp(struct intel_atomic_state *state, intel_dp->link_trained = false; - if (old_crtc_state->has_audio) - intel_audio_codec_disable(encoder, - old_crtc_state, old_conn_state); - - intel_edp_drrs_disable(intel_dp, old_crtc_state); - intel_psr_disable(intel_dp, old_crtc_state); intel_edp_backlight_off(old_conn_state); /* Disable the decompression in DP Sink */ intel_dp_sink_set_decompression_state(intel_dp, old_crtc_state, @@ -3283,10 +3176,6 @@ static void intel_disable_ddi_hdmi(struct intel_atomic_state *state, struct drm_i915_private *i915 = to_i915(encoder->base.dev); struct drm_connector *connector = old_conn_state->connector; - if (old_crtc_state->has_audio) - intel_audio_codec_disable(encoder, - old_crtc_state, old_conn_state); - if (!intel_hdmi_handle_sink_scrambling(encoder, connector, false, false)) drm_dbg_kms(&i915->drm, @@ -3294,6 +3183,25 @@ static void intel_disable_ddi_hdmi(struct intel_atomic_state *state, connector->base.id, connector->name); } +static void intel_pre_disable_ddi(struct intel_atomic_state *state, + struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) +{ + struct intel_dp *intel_dp; + + if (old_crtc_state->has_audio) + intel_audio_codec_disable(encoder, old_crtc_state, + old_conn_state); + + if (intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_HDMI)) + return; + + intel_dp = enc_to_intel_dp(encoder); + intel_edp_drrs_disable(intel_dp, old_crtc_state); + intel_psr_disable(intel_dp, old_crtc_state); +} + static void intel_disable_ddi(struct intel_atomic_state *state, struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state, @@ -3510,7 +3418,7 @@ static bool intel_ddi_is_audio_enabled(struct drm_i915_private *dev_priv, if (cpu_transcoder == TRANSCODER_EDP) return false; - if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_AUDIO)) + if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_AUDIO_MMIO)) return false; return intel_de_read(dev_priv, HSW_AUD_PIN_ELD_CP_VLD) & @@ -3526,8 +3434,6 @@ void intel_ddi_compute_min_voltage_level(struct drm_i915_private *dev_priv, crtc_state->min_voltage_level = 3; else if (DISPLAY_VER(dev_priv) >= 11 && crtc_state->port_clock > 594000) crtc_state->min_voltage_level = 1; - else if (IS_CANNONLAKE(dev_priv) && crtc_state->port_clock > 594000) - crtc_state->min_voltage_level = 2; } static enum transcoder bdw_transcoder_master_readout(struct drm_i915_private *dev_priv, @@ -3594,7 +3500,7 @@ static void intel_ddi_read_func_ctl(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->uapi.crtc); + struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); enum transcoder cpu_transcoder = pipe_config->cpu_transcoder; struct intel_digital_port *dig_port = enc_to_dig_port(encoder); u32 temp, flags = 0; @@ -3657,7 +3563,7 @@ static void intel_ddi_read_func_ctl(struct intel_encoder *encoder, pipe_config->output_types |= BIT(INTEL_OUTPUT_DP); pipe_config->lane_count = ((temp & DDI_PORT_WIDTH_MASK) >> DDI_PORT_WIDTH_SHIFT) + 1; - intel_dp_get_m_n(intel_crtc, pipe_config); + intel_dp_get_m_n(crtc, pipe_config); if (DISPLAY_VER(dev_priv) >= 11) { i915_reg_t dp_tp_ctl = dp_tp_ctl_reg(encoder, pipe_config); @@ -3687,7 +3593,7 @@ static void intel_ddi_read_func_ctl(struct intel_encoder *encoder, pipe_config->mst_master_transcoder = REG_FIELD_GET(TRANS_DDI_MST_TRANSPORT_SELECT_MASK, temp); - intel_dp_get_m_n(intel_crtc, pipe_config); + intel_dp_get_m_n(crtc, pipe_config); pipe_config->infoframes.enable |= intel_hdmi_infoframes_enabled(encoder, pipe_config); @@ -3801,6 +3707,15 @@ void intel_ddi_get_clock(struct intel_encoder *encoder, &crtc_state->dpll_hw_state); } +static void dg2_ddi_get_config(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state) +{ + intel_mpllb_readout_hw_state(encoder, &crtc_state->mpllb_state); + crtc_state->port_clock = intel_mpllb_calc_port_clock(encoder, &crtc_state->mpllb_state); + + intel_ddi_get_config(encoder, crtc_state); +} + static void adls_ddi_get_config(struct intel_encoder *encoder, struct intel_crtc_state *crtc_state) { @@ -3868,13 +3783,6 @@ static void icl_ddi_tc_get_config(struct intel_encoder *encoder, intel_ddi_get_config(encoder, crtc_state); } -static void cnl_ddi_get_config(struct intel_encoder *encoder, - struct intel_crtc_state *crtc_state) -{ - intel_ddi_get_clock(encoder, crtc_state, cnl_ddi_get_pll(encoder)); - intel_ddi_get_config(encoder, crtc_state); -} - static void bxt_ddi_get_config(struct intel_encoder *encoder, struct intel_crtc_state *crtc_state) { @@ -4121,12 +4029,12 @@ intel_ddi_init_dp_connector(struct intel_digital_port *dig_port) dig_port->dp.set_link_train = intel_ddi_set_link_train; dig_port->dp.set_idle_link_train = intel_ddi_set_idle_link_train; - if (DISPLAY_VER(dev_priv) >= 12) + if (IS_DG2(dev_priv)) + dig_port->dp.set_signal_levels = dg2_set_signal_levels; + else if (DISPLAY_VER(dev_priv) >= 12) dig_port->dp.set_signal_levels = tgl_set_signal_levels; else if (DISPLAY_VER(dev_priv) >= 11) dig_port->dp.set_signal_levels = icl_set_signal_levels; - else if (IS_CANNONLAKE(dev_priv)) - dig_port->dp.set_signal_levels = cnl_set_signal_levels; else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) dig_port->dp.set_signal_levels = bxt_set_signal_levels; else @@ -4373,15 +4281,6 @@ static bool intel_ddi_a_force_4_lanes(struct intel_digital_port *dig_port) if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) return true; - /* Cannonlake: Most of SKUs don't support DDI_E, and the only - * one who does also have a full A/E split called - * DDI_F what makes DDI_E useless. However for this - * case let's trust VBT info. - */ - if (IS_CANNONLAKE(dev_priv) && - !intel_bios_is_port_present(dev_priv, PORT_E)) - return true; - return false; } @@ -4486,15 +4385,6 @@ static enum hpd_pin ehl_hpd_pin(struct drm_i915_private *dev_priv, return HPD_PORT_A + port - PORT_A; } -static enum hpd_pin cnl_hpd_pin(struct drm_i915_private *dev_priv, - enum port port) -{ - if (port == PORT_F) - return HPD_PORT_E; - - return HPD_PORT_A + port - PORT_A; -} - static enum hpd_pin skl_hpd_pin(struct drm_i915_private *dev_priv, enum port port) { if (HAS_PCH_TGP(dev_priv)) @@ -4513,6 +4403,36 @@ static bool intel_ddi_is_tc(struct drm_i915_private *i915, enum port port) return false; } +static void intel_ddi_encoder_suspend(struct intel_encoder *encoder) +{ + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); + enum phy phy = intel_port_to_phy(i915, encoder->port); + + intel_dp_encoder_suspend(encoder); + + if (!intel_phy_is_tc(i915, phy)) + return; + + intel_tc_port_disconnect_phy(dig_port); +} + +static void intel_ddi_encoder_shutdown(struct intel_encoder *encoder) +{ + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); + enum phy phy = intel_port_to_phy(i915, encoder->port); + + intel_dp_encoder_shutdown(encoder); + + if (!intel_phy_is_tc(i915, phy)) + return; + + intel_tc_port_disconnect_phy(dig_port); +} + #define port_tc_name(port) ((port) - PORT_TC1 + '1') #define tc_port_name(tc_port) ((tc_port) - TC_PORT_1 + '1') @@ -4616,14 +4536,15 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) encoder->enable = intel_enable_ddi; encoder->pre_pll_enable = intel_ddi_pre_pll_enable; encoder->pre_enable = intel_ddi_pre_enable; + encoder->pre_disable = intel_pre_disable_ddi; encoder->disable = intel_disable_ddi; encoder->post_disable = intel_ddi_post_disable; encoder->update_pipe = intel_ddi_update_pipe; encoder->get_hw_state = intel_ddi_get_hw_state; encoder->sync_state = intel_ddi_sync_state; encoder->initial_fastset_check = intel_ddi_initial_fastset_check; - encoder->suspend = intel_dp_encoder_suspend; - encoder->shutdown = intel_dp_encoder_shutdown; + encoder->suspend = intel_ddi_encoder_suspend; + encoder->shutdown = intel_ddi_encoder_shutdown; encoder->get_power_domains = intel_ddi_get_power_domains; encoder->type = INTEL_OUTPUT_DDI; @@ -4632,7 +4553,11 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) encoder->cloneable = 0; encoder->pipe_mask = ~0; - if (IS_ALDERLAKE_S(dev_priv)) { + if (IS_DG2(dev_priv)) { + encoder->enable_clock = intel_mpllb_enable; + encoder->disable_clock = intel_mpllb_disable; + encoder->get_config = dg2_ddi_get_config; + } else if (IS_ALDERLAKE_S(dev_priv)) { encoder->enable_clock = adls_ddi_enable_clock; encoder->disable_clock = adls_ddi_disable_clock; encoder->is_clock_enabled = adls_ddi_is_clock_enabled; @@ -4671,11 +4596,6 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) encoder->is_clock_enabled = icl_ddi_combo_is_clock_enabled; encoder->get_config = icl_ddi_combo_get_config; } - } else if (IS_CANNONLAKE(dev_priv)) { - encoder->enable_clock = cnl_ddi_enable_clock; - encoder->disable_clock = cnl_ddi_disable_clock; - encoder->is_clock_enabled = cnl_ddi_is_clock_enabled; - encoder->get_config = cnl_ddi_get_config; } else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { /* BXT/GLK have fixed PLL->port mapping */ encoder->get_config = bxt_ddi_get_config; @@ -4691,6 +4611,8 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) encoder->get_config = hsw_ddi_get_config; } + intel_ddi_buf_trans_init(encoder); + if (DISPLAY_VER(dev_priv) >= 13) encoder->hpd_pin = xelpd_hpd_pin(dev_priv, port); else if (IS_DG1(dev_priv)) @@ -4703,8 +4625,6 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) encoder->hpd_pin = ehl_hpd_pin(dev_priv, port); else if (DISPLAY_VER(dev_priv) == 11) encoder->hpd_pin = icl_hpd_pin(dev_priv, port); - else if (IS_CANNONLAKE(dev_priv)) - encoder->hpd_pin = cnl_hpd_pin(dev_priv, port); else if (DISPLAY_VER(dev_priv) == 9 && !IS_BROXTON(dev_priv)) encoder->hpd_pin = skl_hpd_pin(dev_priv, port); else diff --git a/drivers/gpu/drm/i915/display/intel_ddi.h b/drivers/gpu/drm/i915/display/intel_ddi.h index 59c6b01d4199..7d448485d887 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.h +++ b/drivers/gpu/drm/i915/display/intel_ddi.h @@ -40,8 +40,8 @@ bool hsw_ddi_is_clock_enabled(struct intel_encoder *encoder); void hsw_ddi_get_config(struct intel_encoder *encoder, struct intel_crtc_state *crtc_state); struct intel_shared_dpll *icl_ddi_combo_get_pll(struct intel_encoder *encoder); -void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state); +void hsw_prepare_dp_ddi_buffers(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state); void intel_wait_ddi_buf_idle(struct drm_i915_private *dev_priv, enum port port); void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port); diff --git a/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c b/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c index 8bfd00f49f2a..ba2c08f1a797 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c +++ b/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c @@ -13,1095 +13,1160 @@ * them for both DP and FDI transports, allowing those ports to * automatically adapt to HDMI connections as well */ -static const struct ddi_buf_trans hsw_ddi_translations_dp[] = { - { 0x00FFFFFF, 0x0006000E, 0x0 }, - { 0x00D75FFF, 0x0005000A, 0x0 }, - { 0x00C30FFF, 0x00040006, 0x0 }, - { 0x80AAAFFF, 0x000B0000, 0x0 }, - { 0x00FFFFFF, 0x0005000A, 0x0 }, - { 0x00D75FFF, 0x000C0004, 0x0 }, - { 0x80C30FFF, 0x000B0000, 0x0 }, - { 0x00FFFFFF, 0x00040006, 0x0 }, - { 0x80D75FFF, 0x000B0000, 0x0 }, -}; - -static const struct ddi_buf_trans hsw_ddi_translations_fdi[] = { - { 0x00FFFFFF, 0x0007000E, 0x0 }, - { 0x00D75FFF, 0x000F000A, 0x0 }, - { 0x00C30FFF, 0x00060006, 0x0 }, - { 0x00AAAFFF, 0x001E0000, 0x0 }, - { 0x00FFFFFF, 0x000F000A, 0x0 }, - { 0x00D75FFF, 0x00160004, 0x0 }, - { 0x00C30FFF, 0x001E0000, 0x0 }, - { 0x00FFFFFF, 0x00060006, 0x0 }, - { 0x00D75FFF, 0x001E0000, 0x0 }, -}; - -static const struct ddi_buf_trans hsw_ddi_translations_hdmi[] = { - /* Idx NT mV d T mV d db */ - { 0x00FFFFFF, 0x0006000E, 0x0 },/* 0: 400 400 0 */ - { 0x00E79FFF, 0x000E000C, 0x0 },/* 1: 400 500 2 */ - { 0x00D75FFF, 0x0005000A, 0x0 },/* 2: 400 600 3.5 */ - { 0x00FFFFFF, 0x0005000A, 0x0 },/* 3: 600 600 0 */ - { 0x00E79FFF, 0x001D0007, 0x0 },/* 4: 600 750 2 */ - { 0x00D75FFF, 0x000C0004, 0x0 },/* 5: 600 900 3.5 */ - { 0x00FFFFFF, 0x00040006, 0x0 },/* 6: 800 800 0 */ - { 0x80E79FFF, 0x00030002, 0x0 },/* 7: 800 1000 2 */ - { 0x00FFFFFF, 0x00140005, 0x0 },/* 8: 850 850 0 */ - { 0x00FFFFFF, 0x000C0004, 0x0 },/* 9: 900 900 0 */ - { 0x00FFFFFF, 0x001C0003, 0x0 },/* 10: 950 950 0 */ - { 0x80FFFFFF, 0x00030002, 0x0 },/* 11: 1000 1000 0 */ -}; - -static const struct ddi_buf_trans bdw_ddi_translations_edp[] = { - { 0x00FFFFFF, 0x00000012, 0x0 }, - { 0x00EBAFFF, 0x00020011, 0x0 }, - { 0x00C71FFF, 0x0006000F, 0x0 }, - { 0x00AAAFFF, 0x000E000A, 0x0 }, - { 0x00FFFFFF, 0x00020011, 0x0 }, - { 0x00DB6FFF, 0x0005000F, 0x0 }, - { 0x00BEEFFF, 0x000A000C, 0x0 }, - { 0x00FFFFFF, 0x0005000F, 0x0 }, - { 0x00DB6FFF, 0x000A000C, 0x0 }, -}; - -static const struct ddi_buf_trans bdw_ddi_translations_dp[] = { - { 0x00FFFFFF, 0x0007000E, 0x0 }, - { 0x00D75FFF, 0x000E000A, 0x0 }, - { 0x00BEFFFF, 0x00140006, 0x0 }, - { 0x80B2CFFF, 0x001B0002, 0x0 }, - { 0x00FFFFFF, 0x000E000A, 0x0 }, - { 0x00DB6FFF, 0x00160005, 0x0 }, - { 0x80C71FFF, 0x001A0002, 0x0 }, - { 0x00F7DFFF, 0x00180004, 0x0 }, - { 0x80D75FFF, 0x001B0002, 0x0 }, -}; - -static const struct ddi_buf_trans bdw_ddi_translations_fdi[] = { - { 0x00FFFFFF, 0x0001000E, 0x0 }, - { 0x00D75FFF, 0x0004000A, 0x0 }, - { 0x00C30FFF, 0x00070006, 0x0 }, - { 0x00AAAFFF, 0x000C0000, 0x0 }, - { 0x00FFFFFF, 0x0004000A, 0x0 }, - { 0x00D75FFF, 0x00090004, 0x0 }, - { 0x00C30FFF, 0x000C0000, 0x0 }, - { 0x00FFFFFF, 0x00070006, 0x0 }, - { 0x00D75FFF, 0x000C0000, 0x0 }, -}; - -static const struct ddi_buf_trans bdw_ddi_translations_hdmi[] = { - /* Idx NT mV d T mV df db */ - { 0x00FFFFFF, 0x0007000E, 0x0 },/* 0: 400 400 0 */ - { 0x00D75FFF, 0x000E000A, 0x0 },/* 1: 400 600 3.5 */ - { 0x00BEFFFF, 0x00140006, 0x0 },/* 2: 400 800 6 */ - { 0x00FFFFFF, 0x0009000D, 0x0 },/* 3: 450 450 0 */ - { 0x00FFFFFF, 0x000E000A, 0x0 },/* 4: 600 600 0 */ - { 0x00D7FFFF, 0x00140006, 0x0 },/* 5: 600 800 2.5 */ - { 0x80CB2FFF, 0x001B0002, 0x0 },/* 6: 600 1000 4.5 */ - { 0x00FFFFFF, 0x00140006, 0x0 },/* 7: 800 800 0 */ - { 0x80E79FFF, 0x001B0002, 0x0 },/* 8: 800 1000 2 */ - { 0x80FFFFFF, 0x001B0002, 0x0 },/* 9: 1000 1000 0 */ +static const union intel_ddi_buf_trans_entry _hsw_ddi_translations_dp[] = { + { .hsw = { 0x00FFFFFF, 0x0006000E, 0x0 } }, + { .hsw = { 0x00D75FFF, 0x0005000A, 0x0 } }, + { .hsw = { 0x00C30FFF, 0x00040006, 0x0 } }, + { .hsw = { 0x80AAAFFF, 0x000B0000, 0x0 } }, + { .hsw = { 0x00FFFFFF, 0x0005000A, 0x0 } }, + { .hsw = { 0x00D75FFF, 0x000C0004, 0x0 } }, + { .hsw = { 0x80C30FFF, 0x000B0000, 0x0 } }, + { .hsw = { 0x00FFFFFF, 0x00040006, 0x0 } }, + { .hsw = { 0x80D75FFF, 0x000B0000, 0x0 } }, +}; + +static const struct intel_ddi_buf_trans hsw_ddi_translations_dp = { + .entries = _hsw_ddi_translations_dp, + .num_entries = ARRAY_SIZE(_hsw_ddi_translations_dp), +}; + +static const union intel_ddi_buf_trans_entry _hsw_ddi_translations_fdi[] = { + { .hsw = { 0x00FFFFFF, 0x0007000E, 0x0 } }, + { .hsw = { 0x00D75FFF, 0x000F000A, 0x0 } }, + { .hsw = { 0x00C30FFF, 0x00060006, 0x0 } }, + { .hsw = { 0x00AAAFFF, 0x001E0000, 0x0 } }, + { .hsw = { 0x00FFFFFF, 0x000F000A, 0x0 } }, + { .hsw = { 0x00D75FFF, 0x00160004, 0x0 } }, + { .hsw = { 0x00C30FFF, 0x001E0000, 0x0 } }, + { .hsw = { 0x00FFFFFF, 0x00060006, 0x0 } }, + { .hsw = { 0x00D75FFF, 0x001E0000, 0x0 } }, +}; + +static const struct intel_ddi_buf_trans hsw_ddi_translations_fdi = { + .entries = _hsw_ddi_translations_fdi, + .num_entries = ARRAY_SIZE(_hsw_ddi_translations_fdi), +}; + +static const union intel_ddi_buf_trans_entry _hsw_ddi_translations_hdmi[] = { + /* Idx NT mV d T mV d db */ + { .hsw = { 0x00FFFFFF, 0x0006000E, 0x0 } }, /* 0: 400 400 0 */ + { .hsw = { 0x00E79FFF, 0x000E000C, 0x0 } }, /* 1: 400 500 2 */ + { .hsw = { 0x00D75FFF, 0x0005000A, 0x0 } }, /* 2: 400 600 3.5 */ + { .hsw = { 0x00FFFFFF, 0x0005000A, 0x0 } }, /* 3: 600 600 0 */ + { .hsw = { 0x00E79FFF, 0x001D0007, 0x0 } }, /* 4: 600 750 2 */ + { .hsw = { 0x00D75FFF, 0x000C0004, 0x0 } }, /* 5: 600 900 3.5 */ + { .hsw = { 0x00FFFFFF, 0x00040006, 0x0 } }, /* 6: 800 800 0 */ + { .hsw = { 0x80E79FFF, 0x00030002, 0x0 } }, /* 7: 800 1000 2 */ + { .hsw = { 0x00FFFFFF, 0x00140005, 0x0 } }, /* 8: 850 850 0 */ + { .hsw = { 0x00FFFFFF, 0x000C0004, 0x0 } }, /* 9: 900 900 0 */ + { .hsw = { 0x00FFFFFF, 0x001C0003, 0x0 } }, /* 10: 950 950 0 */ + { .hsw = { 0x80FFFFFF, 0x00030002, 0x0 } }, /* 11: 1000 1000 0 */ +}; + +static const struct intel_ddi_buf_trans hsw_ddi_translations_hdmi = { + .entries = _hsw_ddi_translations_hdmi, + .num_entries = ARRAY_SIZE(_hsw_ddi_translations_hdmi), + .hdmi_default_entry = 6, +}; + +static const union intel_ddi_buf_trans_entry _bdw_ddi_translations_edp[] = { + { .hsw = { 0x00FFFFFF, 0x00000012, 0x0 } }, + { .hsw = { 0x00EBAFFF, 0x00020011, 0x0 } }, + { .hsw = { 0x00C71FFF, 0x0006000F, 0x0 } }, + { .hsw = { 0x00AAAFFF, 0x000E000A, 0x0 } }, + { .hsw = { 0x00FFFFFF, 0x00020011, 0x0 } }, + { .hsw = { 0x00DB6FFF, 0x0005000F, 0x0 } }, + { .hsw = { 0x00BEEFFF, 0x000A000C, 0x0 } }, + { .hsw = { 0x00FFFFFF, 0x0005000F, 0x0 } }, + { .hsw = { 0x00DB6FFF, 0x000A000C, 0x0 } }, +}; + +static const struct intel_ddi_buf_trans bdw_ddi_translations_edp = { + .entries = _bdw_ddi_translations_edp, + .num_entries = ARRAY_SIZE(_bdw_ddi_translations_edp), +}; + +static const union intel_ddi_buf_trans_entry _bdw_ddi_translations_dp[] = { + { .hsw = { 0x00FFFFFF, 0x0007000E, 0x0 } }, + { .hsw = { 0x00D75FFF, 0x000E000A, 0x0 } }, + { .hsw = { 0x00BEFFFF, 0x00140006, 0x0 } }, + { .hsw = { 0x80B2CFFF, 0x001B0002, 0x0 } }, + { .hsw = { 0x00FFFFFF, 0x000E000A, 0x0 } }, + { .hsw = { 0x00DB6FFF, 0x00160005, 0x0 } }, + { .hsw = { 0x80C71FFF, 0x001A0002, 0x0 } }, + { .hsw = { 0x00F7DFFF, 0x00180004, 0x0 } }, + { .hsw = { 0x80D75FFF, 0x001B0002, 0x0 } }, +}; + +static const struct intel_ddi_buf_trans bdw_ddi_translations_dp = { + .entries = _bdw_ddi_translations_dp, + .num_entries = ARRAY_SIZE(_bdw_ddi_translations_dp), +}; + +static const union intel_ddi_buf_trans_entry _bdw_ddi_translations_fdi[] = { + { .hsw = { 0x00FFFFFF, 0x0001000E, 0x0 } }, + { .hsw = { 0x00D75FFF, 0x0004000A, 0x0 } }, + { .hsw = { 0x00C30FFF, 0x00070006, 0x0 } }, + { .hsw = { 0x00AAAFFF, 0x000C0000, 0x0 } }, + { .hsw = { 0x00FFFFFF, 0x0004000A, 0x0 } }, + { .hsw = { 0x00D75FFF, 0x00090004, 0x0 } }, + { .hsw = { 0x00C30FFF, 0x000C0000, 0x0 } }, + { .hsw = { 0x00FFFFFF, 0x00070006, 0x0 } }, + { .hsw = { 0x00D75FFF, 0x000C0000, 0x0 } }, +}; + +static const struct intel_ddi_buf_trans bdw_ddi_translations_fdi = { + .entries = _bdw_ddi_translations_fdi, + .num_entries = ARRAY_SIZE(_bdw_ddi_translations_fdi), +}; + +static const union intel_ddi_buf_trans_entry _bdw_ddi_translations_hdmi[] = { + /* Idx NT mV d T mV df db */ + { .hsw = { 0x00FFFFFF, 0x0007000E, 0x0 } }, /* 0: 400 400 0 */ + { .hsw = { 0x00D75FFF, 0x000E000A, 0x0 } }, /* 1: 400 600 3.5 */ + { .hsw = { 0x00BEFFFF, 0x00140006, 0x0 } }, /* 2: 400 800 6 */ + { .hsw = { 0x00FFFFFF, 0x0009000D, 0x0 } }, /* 3: 450 450 0 */ + { .hsw = { 0x00FFFFFF, 0x000E000A, 0x0 } }, /* 4: 600 600 0 */ + { .hsw = { 0x00D7FFFF, 0x00140006, 0x0 } }, /* 5: 600 800 2.5 */ + { .hsw = { 0x80CB2FFF, 0x001B0002, 0x0 } }, /* 6: 600 1000 4.5 */ + { .hsw = { 0x00FFFFFF, 0x00140006, 0x0 } }, /* 7: 800 800 0 */ + { .hsw = { 0x80E79FFF, 0x001B0002, 0x0 } }, /* 8: 800 1000 2 */ + { .hsw = { 0x80FFFFFF, 0x001B0002, 0x0 } }, /* 9: 1000 1000 0 */ +}; + +static const struct intel_ddi_buf_trans bdw_ddi_translations_hdmi = { + .entries = _bdw_ddi_translations_hdmi, + .num_entries = ARRAY_SIZE(_bdw_ddi_translations_hdmi), + .hdmi_default_entry = 7, }; /* Skylake H and S */ -static const struct ddi_buf_trans skl_ddi_translations_dp[] = { - { 0x00002016, 0x000000A0, 0x0 }, - { 0x00005012, 0x0000009B, 0x0 }, - { 0x00007011, 0x00000088, 0x0 }, - { 0x80009010, 0x000000C0, 0x1 }, - { 0x00002016, 0x0000009B, 0x0 }, - { 0x00005012, 0x00000088, 0x0 }, - { 0x80007011, 0x000000C0, 0x1 }, - { 0x00002016, 0x000000DF, 0x0 }, - { 0x80005012, 0x000000C0, 0x1 }, +static const union intel_ddi_buf_trans_entry _skl_ddi_translations_dp[] = { + { .hsw = { 0x00002016, 0x000000A0, 0x0 } }, + { .hsw = { 0x00005012, 0x0000009B, 0x0 } }, + { .hsw = { 0x00007011, 0x00000088, 0x0 } }, + { .hsw = { 0x80009010, 0x000000C0, 0x1 } }, + { .hsw = { 0x00002016, 0x0000009B, 0x0 } }, + { .hsw = { 0x00005012, 0x00000088, 0x0 } }, + { .hsw = { 0x80007011, 0x000000C0, 0x1 } }, + { .hsw = { 0x00002016, 0x000000DF, 0x0 } }, + { .hsw = { 0x80005012, 0x000000C0, 0x1 } }, +}; + +static const struct intel_ddi_buf_trans skl_ddi_translations_dp = { + .entries = _skl_ddi_translations_dp, + .num_entries = ARRAY_SIZE(_skl_ddi_translations_dp), }; /* Skylake U */ -static const struct ddi_buf_trans skl_u_ddi_translations_dp[] = { - { 0x0000201B, 0x000000A2, 0x0 }, - { 0x00005012, 0x00000088, 0x0 }, - { 0x80007011, 0x000000CD, 0x1 }, - { 0x80009010, 0x000000C0, 0x1 }, - { 0x0000201B, 0x0000009D, 0x0 }, - { 0x80005012, 0x000000C0, 0x1 }, - { 0x80007011, 0x000000C0, 0x1 }, - { 0x00002016, 0x00000088, 0x0 }, - { 0x80005012, 0x000000C0, 0x1 }, +static const union intel_ddi_buf_trans_entry _skl_u_ddi_translations_dp[] = { + { .hsw = { 0x0000201B, 0x000000A2, 0x0 } }, + { .hsw = { 0x00005012, 0x00000088, 0x0 } }, + { .hsw = { 0x80007011, 0x000000CD, 0x1 } }, + { .hsw = { 0x80009010, 0x000000C0, 0x1 } }, + { .hsw = { 0x0000201B, 0x0000009D, 0x0 } }, + { .hsw = { 0x80005012, 0x000000C0, 0x1 } }, + { .hsw = { 0x80007011, 0x000000C0, 0x1 } }, + { .hsw = { 0x00002016, 0x00000088, 0x0 } }, + { .hsw = { 0x80005012, 0x000000C0, 0x1 } }, +}; + +static const struct intel_ddi_buf_trans skl_u_ddi_translations_dp = { + .entries = _skl_u_ddi_translations_dp, + .num_entries = ARRAY_SIZE(_skl_u_ddi_translations_dp), }; /* Skylake Y */ -static const struct ddi_buf_trans skl_y_ddi_translations_dp[] = { - { 0x00000018, 0x000000A2, 0x0 }, - { 0x00005012, 0x00000088, 0x0 }, - { 0x80007011, 0x000000CD, 0x3 }, - { 0x80009010, 0x000000C0, 0x3 }, - { 0x00000018, 0x0000009D, 0x0 }, - { 0x80005012, 0x000000C0, 0x3 }, - { 0x80007011, 0x000000C0, 0x3 }, - { 0x00000018, 0x00000088, 0x0 }, - { 0x80005012, 0x000000C0, 0x3 }, +static const union intel_ddi_buf_trans_entry _skl_y_ddi_translations_dp[] = { + { .hsw = { 0x00000018, 0x000000A2, 0x0 } }, + { .hsw = { 0x00005012, 0x00000088, 0x0 } }, + { .hsw = { 0x80007011, 0x000000CD, 0x3 } }, + { .hsw = { 0x80009010, 0x000000C0, 0x3 } }, + { .hsw = { 0x00000018, 0x0000009D, 0x0 } }, + { .hsw = { 0x80005012, 0x000000C0, 0x3 } }, + { .hsw = { 0x80007011, 0x000000C0, 0x3 } }, + { .hsw = { 0x00000018, 0x00000088, 0x0 } }, + { .hsw = { 0x80005012, 0x000000C0, 0x3 } }, +}; + +static const struct intel_ddi_buf_trans skl_y_ddi_translations_dp = { + .entries = _skl_y_ddi_translations_dp, + .num_entries = ARRAY_SIZE(_skl_y_ddi_translations_dp), }; /* Kabylake H and S */ -static const struct ddi_buf_trans kbl_ddi_translations_dp[] = { - { 0x00002016, 0x000000A0, 0x0 }, - { 0x00005012, 0x0000009B, 0x0 }, - { 0x00007011, 0x00000088, 0x0 }, - { 0x80009010, 0x000000C0, 0x1 }, - { 0x00002016, 0x0000009B, 0x0 }, - { 0x00005012, 0x00000088, 0x0 }, - { 0x80007011, 0x000000C0, 0x1 }, - { 0x00002016, 0x00000097, 0x0 }, - { 0x80005012, 0x000000C0, 0x1 }, +static const union intel_ddi_buf_trans_entry _kbl_ddi_translations_dp[] = { + { .hsw = { 0x00002016, 0x000000A0, 0x0 } }, + { .hsw = { 0x00005012, 0x0000009B, 0x0 } }, + { .hsw = { 0x00007011, 0x00000088, 0x0 } }, + { .hsw = { 0x80009010, 0x000000C0, 0x1 } }, + { .hsw = { 0x00002016, 0x0000009B, 0x0 } }, + { .hsw = { 0x00005012, 0x00000088, 0x0 } }, + { .hsw = { 0x80007011, 0x000000C0, 0x1 } }, + { .hsw = { 0x00002016, 0x00000097, 0x0 } }, + { .hsw = { 0x80005012, 0x000000C0, 0x1 } }, +}; + +static const struct intel_ddi_buf_trans kbl_ddi_translations_dp = { + .entries = _kbl_ddi_translations_dp, + .num_entries = ARRAY_SIZE(_kbl_ddi_translations_dp), }; /* Kabylake U */ -static const struct ddi_buf_trans kbl_u_ddi_translations_dp[] = { - { 0x0000201B, 0x000000A1, 0x0 }, - { 0x00005012, 0x00000088, 0x0 }, - { 0x80007011, 0x000000CD, 0x3 }, - { 0x80009010, 0x000000C0, 0x3 }, - { 0x0000201B, 0x0000009D, 0x0 }, - { 0x80005012, 0x000000C0, 0x3 }, - { 0x80007011, 0x000000C0, 0x3 }, - { 0x00002016, 0x0000004F, 0x0 }, - { 0x80005012, 0x000000C0, 0x3 }, +static const union intel_ddi_buf_trans_entry _kbl_u_ddi_translations_dp[] = { + { .hsw = { 0x0000201B, 0x000000A1, 0x0 } }, + { .hsw = { 0x00005012, 0x00000088, 0x0 } }, + { .hsw = { 0x80007011, 0x000000CD, 0x3 } }, + { .hsw = { 0x80009010, 0x000000C0, 0x3 } }, + { .hsw = { 0x0000201B, 0x0000009D, 0x0 } }, + { .hsw = { 0x80005012, 0x000000C0, 0x3 } }, + { .hsw = { 0x80007011, 0x000000C0, 0x3 } }, + { .hsw = { 0x00002016, 0x0000004F, 0x0 } }, + { .hsw = { 0x80005012, 0x000000C0, 0x3 } }, +}; + +static const struct intel_ddi_buf_trans kbl_u_ddi_translations_dp = { + .entries = _kbl_u_ddi_translations_dp, + .num_entries = ARRAY_SIZE(_kbl_u_ddi_translations_dp), }; /* Kabylake Y */ -static const struct ddi_buf_trans kbl_y_ddi_translations_dp[] = { - { 0x00001017, 0x000000A1, 0x0 }, - { 0x00005012, 0x00000088, 0x0 }, - { 0x80007011, 0x000000CD, 0x3 }, - { 0x8000800F, 0x000000C0, 0x3 }, - { 0x00001017, 0x0000009D, 0x0 }, - { 0x80005012, 0x000000C0, 0x3 }, - { 0x80007011, 0x000000C0, 0x3 }, - { 0x00001017, 0x0000004C, 0x0 }, - { 0x80005012, 0x000000C0, 0x3 }, +static const union intel_ddi_buf_trans_entry _kbl_y_ddi_translations_dp[] = { + { .hsw = { 0x00001017, 0x000000A1, 0x0 } }, + { .hsw = { 0x00005012, 0x00000088, 0x0 } }, + { .hsw = { 0x80007011, 0x000000CD, 0x3 } }, + { .hsw = { 0x8000800F, 0x000000C0, 0x3 } }, + { .hsw = { 0x00001017, 0x0000009D, 0x0 } }, + { .hsw = { 0x80005012, 0x000000C0, 0x3 } }, + { .hsw = { 0x80007011, 0x000000C0, 0x3 } }, + { .hsw = { 0x00001017, 0x0000004C, 0x0 } }, + { .hsw = { 0x80005012, 0x000000C0, 0x3 } }, +}; + +static const struct intel_ddi_buf_trans kbl_y_ddi_translations_dp = { + .entries = _kbl_y_ddi_translations_dp, + .num_entries = ARRAY_SIZE(_kbl_y_ddi_translations_dp), }; /* * Skylake/Kabylake H and S * eDP 1.4 low vswing translation parameters */ -static const struct ddi_buf_trans skl_ddi_translations_edp[] = { - { 0x00000018, 0x000000A8, 0x0 }, - { 0x00004013, 0x000000A9, 0x0 }, - { 0x00007011, 0x000000A2, 0x0 }, - { 0x00009010, 0x0000009C, 0x0 }, - { 0x00000018, 0x000000A9, 0x0 }, - { 0x00006013, 0x000000A2, 0x0 }, - { 0x00007011, 0x000000A6, 0x0 }, - { 0x00000018, 0x000000AB, 0x0 }, - { 0x00007013, 0x0000009F, 0x0 }, - { 0x00000018, 0x000000DF, 0x0 }, +static const union intel_ddi_buf_trans_entry _skl_ddi_translations_edp[] = { + { .hsw = { 0x00000018, 0x000000A8, 0x0 } }, + { .hsw = { 0x00004013, 0x000000A9, 0x0 } }, + { .hsw = { 0x00007011, 0x000000A2, 0x0 } }, + { .hsw = { 0x00009010, 0x0000009C, 0x0 } }, + { .hsw = { 0x00000018, 0x000000A9, 0x0 } }, + { .hsw = { 0x00006013, 0x000000A2, 0x0 } }, + { .hsw = { 0x00007011, 0x000000A6, 0x0 } }, + { .hsw = { 0x00000018, 0x000000AB, 0x0 } }, + { .hsw = { 0x00007013, 0x0000009F, 0x0 } }, + { .hsw = { 0x00000018, 0x000000DF, 0x0 } }, +}; + +static const struct intel_ddi_buf_trans skl_ddi_translations_edp = { + .entries = _skl_ddi_translations_edp, + .num_entries = ARRAY_SIZE(_skl_ddi_translations_edp), }; /* * Skylake/Kabylake U * eDP 1.4 low vswing translation parameters */ -static const struct ddi_buf_trans skl_u_ddi_translations_edp[] = { - { 0x00000018, 0x000000A8, 0x0 }, - { 0x00004013, 0x000000A9, 0x0 }, - { 0x00007011, 0x000000A2, 0x0 }, - { 0x00009010, 0x0000009C, 0x0 }, - { 0x00000018, 0x000000A9, 0x0 }, - { 0x00006013, 0x000000A2, 0x0 }, - { 0x00007011, 0x000000A6, 0x0 }, - { 0x00002016, 0x000000AB, 0x0 }, - { 0x00005013, 0x0000009F, 0x0 }, - { 0x00000018, 0x000000DF, 0x0 }, +static const union intel_ddi_buf_trans_entry _skl_u_ddi_translations_edp[] = { + { .hsw = { 0x00000018, 0x000000A8, 0x0 } }, + { .hsw = { 0x00004013, 0x000000A9, 0x0 } }, + { .hsw = { 0x00007011, 0x000000A2, 0x0 } }, + { .hsw = { 0x00009010, 0x0000009C, 0x0 } }, + { .hsw = { 0x00000018, 0x000000A9, 0x0 } }, + { .hsw = { 0x00006013, 0x000000A2, 0x0 } }, + { .hsw = { 0x00007011, 0x000000A6, 0x0 } }, + { .hsw = { 0x00002016, 0x000000AB, 0x0 } }, + { .hsw = { 0x00005013, 0x0000009F, 0x0 } }, + { .hsw = { 0x00000018, 0x000000DF, 0x0 } }, +}; + +static const struct intel_ddi_buf_trans skl_u_ddi_translations_edp = { + .entries = _skl_u_ddi_translations_edp, + .num_entries = ARRAY_SIZE(_skl_u_ddi_translations_edp), }; /* * Skylake/Kabylake Y * eDP 1.4 low vswing translation parameters */ -static const struct ddi_buf_trans skl_y_ddi_translations_edp[] = { - { 0x00000018, 0x000000A8, 0x0 }, - { 0x00004013, 0x000000AB, 0x0 }, - { 0x00007011, 0x000000A4, 0x0 }, - { 0x00009010, 0x000000DF, 0x0 }, - { 0x00000018, 0x000000AA, 0x0 }, - { 0x00006013, 0x000000A4, 0x0 }, - { 0x00007011, 0x0000009D, 0x0 }, - { 0x00000018, 0x000000A0, 0x0 }, - { 0x00006012, 0x000000DF, 0x0 }, - { 0x00000018, 0x0000008A, 0x0 }, +static const union intel_ddi_buf_trans_entry _skl_y_ddi_translations_edp[] = { + { .hsw = { 0x00000018, 0x000000A8, 0x0 } }, + { .hsw = { 0x00004013, 0x000000AB, 0x0 } }, + { .hsw = { 0x00007011, 0x000000A4, 0x0 } }, + { .hsw = { 0x00009010, 0x000000DF, 0x0 } }, + { .hsw = { 0x00000018, 0x000000AA, 0x0 } }, + { .hsw = { 0x00006013, 0x000000A4, 0x0 } }, + { .hsw = { 0x00007011, 0x0000009D, 0x0 } }, + { .hsw = { 0x00000018, 0x000000A0, 0x0 } }, + { .hsw = { 0x00006012, 0x000000DF, 0x0 } }, + { .hsw = { 0x00000018, 0x0000008A, 0x0 } }, +}; + +static const struct intel_ddi_buf_trans skl_y_ddi_translations_edp = { + .entries = _skl_y_ddi_translations_edp, + .num_entries = ARRAY_SIZE(_skl_y_ddi_translations_edp), }; /* Skylake/Kabylake U, H and S */ -static const struct ddi_buf_trans skl_ddi_translations_hdmi[] = { - { 0x00000018, 0x000000AC, 0x0 }, - { 0x00005012, 0x0000009D, 0x0 }, - { 0x00007011, 0x00000088, 0x0 }, - { 0x00000018, 0x000000A1, 0x0 }, - { 0x00000018, 0x00000098, 0x0 }, - { 0x00004013, 0x00000088, 0x0 }, - { 0x80006012, 0x000000CD, 0x1 }, - { 0x00000018, 0x000000DF, 0x0 }, - { 0x80003015, 0x000000CD, 0x1 }, /* Default */ - { 0x80003015, 0x000000C0, 0x1 }, - { 0x80000018, 0x000000C0, 0x1 }, +static const union intel_ddi_buf_trans_entry _skl_ddi_translations_hdmi[] = { + { .hsw = { 0x00000018, 0x000000AC, 0x0 } }, + { .hsw = { 0x00005012, 0x0000009D, 0x0 } }, + { .hsw = { 0x00007011, 0x00000088, 0x0 } }, + { .hsw = { 0x00000018, 0x000000A1, 0x0 } }, + { .hsw = { 0x00000018, 0x00000098, 0x0 } }, + { .hsw = { 0x00004013, 0x00000088, 0x0 } }, + { .hsw = { 0x80006012, 0x000000CD, 0x1 } }, + { .hsw = { 0x00000018, 0x000000DF, 0x0 } }, + { .hsw = { 0x80003015, 0x000000CD, 0x1 } }, /* Default */ + { .hsw = { 0x80003015, 0x000000C0, 0x1 } }, + { .hsw = { 0x80000018, 0x000000C0, 0x1 } }, +}; + +static const struct intel_ddi_buf_trans skl_ddi_translations_hdmi = { + .entries = _skl_ddi_translations_hdmi, + .num_entries = ARRAY_SIZE(_skl_ddi_translations_hdmi), + .hdmi_default_entry = 8, }; /* Skylake/Kabylake Y */ -static const struct ddi_buf_trans skl_y_ddi_translations_hdmi[] = { - { 0x00000018, 0x000000A1, 0x0 }, - { 0x00005012, 0x000000DF, 0x0 }, - { 0x80007011, 0x000000CB, 0x3 }, - { 0x00000018, 0x000000A4, 0x0 }, - { 0x00000018, 0x0000009D, 0x0 }, - { 0x00004013, 0x00000080, 0x0 }, - { 0x80006013, 0x000000C0, 0x3 }, - { 0x00000018, 0x0000008A, 0x0 }, - { 0x80003015, 0x000000C0, 0x3 }, /* Default */ - { 0x80003015, 0x000000C0, 0x3 }, - { 0x80000018, 0x000000C0, 0x3 }, -}; - - -static const struct bxt_ddi_buf_trans bxt_ddi_translations_dp[] = { - /* Idx NT mV diff db */ - { 52, 0x9A, 0, 128, }, /* 0: 400 0 */ - { 78, 0x9A, 0, 85, }, /* 1: 400 3.5 */ - { 104, 0x9A, 0, 64, }, /* 2: 400 6 */ - { 154, 0x9A, 0, 43, }, /* 3: 400 9.5 */ - { 77, 0x9A, 0, 128, }, /* 4: 600 0 */ - { 116, 0x9A, 0, 85, }, /* 5: 600 3.5 */ - { 154, 0x9A, 0, 64, }, /* 6: 600 6 */ - { 102, 0x9A, 0, 128, }, /* 7: 800 0 */ - { 154, 0x9A, 0, 85, }, /* 8: 800 3.5 */ - { 154, 0x9A, 1, 128, }, /* 9: 1200 0 */ -}; - -static const struct bxt_ddi_buf_trans bxt_ddi_translations_edp[] = { +static const union intel_ddi_buf_trans_entry _skl_y_ddi_translations_hdmi[] = { + { .hsw = { 0x00000018, 0x000000A1, 0x0 } }, + { .hsw = { 0x00005012, 0x000000DF, 0x0 } }, + { .hsw = { 0x80007011, 0x000000CB, 0x3 } }, + { .hsw = { 0x00000018, 0x000000A4, 0x0 } }, + { .hsw = { 0x00000018, 0x0000009D, 0x0 } }, + { .hsw = { 0x00004013, 0x00000080, 0x0 } }, + { .hsw = { 0x80006013, 0x000000C0, 0x3 } }, + { .hsw = { 0x00000018, 0x0000008A, 0x0 } }, + { .hsw = { 0x80003015, 0x000000C0, 0x3 } }, /* Default */ + { .hsw = { 0x80003015, 0x000000C0, 0x3 } }, + { .hsw = { 0x80000018, 0x000000C0, 0x3 } }, +}; + +static const struct intel_ddi_buf_trans skl_y_ddi_translations_hdmi = { + .entries = _skl_y_ddi_translations_hdmi, + .num_entries = ARRAY_SIZE(_skl_y_ddi_translations_hdmi), + .hdmi_default_entry = 8, +}; + +static const union intel_ddi_buf_trans_entry _bxt_ddi_translations_dp[] = { + /* Idx NT mV diff db */ + { .bxt = { 52, 0x9A, 0, 128, } }, /* 0: 400 0 */ + { .bxt = { 78, 0x9A, 0, 85, } }, /* 1: 400 3.5 */ + { .bxt = { 104, 0x9A, 0, 64, } }, /* 2: 400 6 */ + { .bxt = { 154, 0x9A, 0, 43, } }, /* 3: 400 9.5 */ + { .bxt = { 77, 0x9A, 0, 128, } }, /* 4: 600 0 */ + { .bxt = { 116, 0x9A, 0, 85, } }, /* 5: 600 3.5 */ + { .bxt = { 154, 0x9A, 0, 64, } }, /* 6: 600 6 */ + { .bxt = { 102, 0x9A, 0, 128, } }, /* 7: 800 0 */ + { .bxt = { 154, 0x9A, 0, 85, } }, /* 8: 800 3.5 */ + { .bxt = { 154, 0x9A, 1, 128, } }, /* 9: 1200 0 */ +}; + +static const struct intel_ddi_buf_trans bxt_ddi_translations_dp = { + .entries = _bxt_ddi_translations_dp, + .num_entries = ARRAY_SIZE(_bxt_ddi_translations_dp), +}; + +static const union intel_ddi_buf_trans_entry _bxt_ddi_translations_edp[] = { /* Idx NT mV diff db */ - { 26, 0, 0, 128, }, /* 0: 200 0 */ - { 38, 0, 0, 112, }, /* 1: 200 1.5 */ - { 48, 0, 0, 96, }, /* 2: 200 4 */ - { 54, 0, 0, 69, }, /* 3: 200 6 */ - { 32, 0, 0, 128, }, /* 4: 250 0 */ - { 48, 0, 0, 104, }, /* 5: 250 1.5 */ - { 54, 0, 0, 85, }, /* 6: 250 4 */ - { 43, 0, 0, 128, }, /* 7: 300 0 */ - { 54, 0, 0, 101, }, /* 8: 300 1.5 */ - { 48, 0, 0, 128, }, /* 9: 300 0 */ + { .bxt = { 26, 0, 0, 128, } }, /* 0: 200 0 */ + { .bxt = { 38, 0, 0, 112, } }, /* 1: 200 1.5 */ + { .bxt = { 48, 0, 0, 96, } }, /* 2: 200 4 */ + { .bxt = { 54, 0, 0, 69, } }, /* 3: 200 6 */ + { .bxt = { 32, 0, 0, 128, } }, /* 4: 250 0 */ + { .bxt = { 48, 0, 0, 104, } }, /* 5: 250 1.5 */ + { .bxt = { 54, 0, 0, 85, } }, /* 6: 250 4 */ + { .bxt = { 43, 0, 0, 128, } }, /* 7: 300 0 */ + { .bxt = { 54, 0, 0, 101, } }, /* 8: 300 1.5 */ + { .bxt = { 48, 0, 0, 128, } }, /* 9: 300 0 */ +}; + +static const struct intel_ddi_buf_trans bxt_ddi_translations_edp = { + .entries = _bxt_ddi_translations_edp, + .num_entries = ARRAY_SIZE(_bxt_ddi_translations_edp), }; /* BSpec has 2 recommended values - entries 0 and 8. * Using the entry with higher vswing. */ -static const struct bxt_ddi_buf_trans bxt_ddi_translations_hdmi[] = { - /* Idx NT mV diff db */ - { 52, 0x9A, 0, 128, }, /* 0: 400 0 */ - { 52, 0x9A, 0, 85, }, /* 1: 400 3.5 */ - { 52, 0x9A, 0, 64, }, /* 2: 400 6 */ - { 42, 0x9A, 0, 43, }, /* 3: 400 9.5 */ - { 77, 0x9A, 0, 128, }, /* 4: 600 0 */ - { 77, 0x9A, 0, 85, }, /* 5: 600 3.5 */ - { 77, 0x9A, 0, 64, }, /* 6: 600 6 */ - { 102, 0x9A, 0, 128, }, /* 7: 800 0 */ - { 102, 0x9A, 0, 85, }, /* 8: 800 3.5 */ - { 154, 0x9A, 1, 128, }, /* 9: 1200 0 */ -}; - -/* Voltage Swing Programming for VccIO 0.85V for DP */ -static const struct cnl_ddi_buf_trans cnl_ddi_translations_dp_0_85V[] = { - /* NT mV Trans mV db */ - { 0xA, 0x5D, 0x3F, 0x00, 0x00 }, /* 350 350 0.0 */ - { 0xA, 0x6A, 0x38, 0x00, 0x07 }, /* 350 500 3.1 */ - { 0xB, 0x7A, 0x32, 0x00, 0x0D }, /* 350 700 6.0 */ - { 0x6, 0x7C, 0x2D, 0x00, 0x12 }, /* 350 900 8.2 */ - { 0xA, 0x69, 0x3F, 0x00, 0x00 }, /* 500 500 0.0 */ - { 0xB, 0x7A, 0x36, 0x00, 0x09 }, /* 500 700 2.9 */ - { 0x6, 0x7C, 0x30, 0x00, 0x0F }, /* 500 900 5.1 */ - { 0xB, 0x7D, 0x3C, 0x00, 0x03 }, /* 650 725 0.9 */ - { 0x6, 0x7C, 0x34, 0x00, 0x0B }, /* 600 900 3.5 */ - { 0x6, 0x7B, 0x3F, 0x00, 0x00 }, /* 900 900 0.0 */ -}; - -/* Voltage Swing Programming for VccIO 0.85V for HDMI */ -static const struct cnl_ddi_buf_trans cnl_ddi_translations_hdmi_0_85V[] = { - /* NT mV Trans mV db */ - { 0xA, 0x60, 0x3F, 0x00, 0x00 }, /* 450 450 0.0 */ - { 0xB, 0x73, 0x36, 0x00, 0x09 }, /* 450 650 3.2 */ - { 0x6, 0x7F, 0x31, 0x00, 0x0E }, /* 450 850 5.5 */ - { 0xB, 0x73, 0x3F, 0x00, 0x00 }, /* 650 650 0.0 */ - { 0x6, 0x7F, 0x37, 0x00, 0x08 }, /* 650 850 2.3 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 850 850 0.0 */ - { 0x6, 0x7F, 0x35, 0x00, 0x0A }, /* 600 850 3.0 */ -}; - -/* Voltage Swing Programming for VccIO 0.85V for eDP */ -static const struct cnl_ddi_buf_trans cnl_ddi_translations_edp_0_85V[] = { - /* NT mV Trans mV db */ - { 0xA, 0x66, 0x3A, 0x00, 0x05 }, /* 384 500 2.3 */ - { 0x0, 0x7F, 0x38, 0x00, 0x07 }, /* 153 200 2.3 */ - { 0x8, 0x7F, 0x38, 0x00, 0x07 }, /* 192 250 2.3 */ - { 0x1, 0x7F, 0x38, 0x00, 0x07 }, /* 230 300 2.3 */ - { 0x9, 0x7F, 0x38, 0x00, 0x07 }, /* 269 350 2.3 */ - { 0xA, 0x66, 0x3C, 0x00, 0x03 }, /* 446 500 1.0 */ - { 0xB, 0x70, 0x3C, 0x00, 0x03 }, /* 460 600 2.3 */ - { 0xC, 0x75, 0x3C, 0x00, 0x03 }, /* 537 700 2.3 */ - { 0x2, 0x7F, 0x3F, 0x00, 0x00 }, /* 400 400 0.0 */ -}; - -/* Voltage Swing Programming for VccIO 0.95V for DP */ -static const struct cnl_ddi_buf_trans cnl_ddi_translations_dp_0_95V[] = { - /* NT mV Trans mV db */ - { 0xA, 0x5D, 0x3F, 0x00, 0x00 }, /* 350 350 0.0 */ - { 0xA, 0x6A, 0x38, 0x00, 0x07 }, /* 350 500 3.1 */ - { 0xB, 0x7A, 0x32, 0x00, 0x0D }, /* 350 700 6.0 */ - { 0x6, 0x7C, 0x2D, 0x00, 0x12 }, /* 350 900 8.2 */ - { 0xA, 0x69, 0x3F, 0x00, 0x00 }, /* 500 500 0.0 */ - { 0xB, 0x7A, 0x36, 0x00, 0x09 }, /* 500 700 2.9 */ - { 0x6, 0x7C, 0x30, 0x00, 0x0F }, /* 500 900 5.1 */ - { 0xB, 0x7D, 0x3C, 0x00, 0x03 }, /* 650 725 0.9 */ - { 0x6, 0x7C, 0x34, 0x00, 0x0B }, /* 600 900 3.5 */ - { 0x6, 0x7B, 0x3F, 0x00, 0x00 }, /* 900 900 0.0 */ -}; - -/* Voltage Swing Programming for VccIO 0.95V for HDMI */ -static const struct cnl_ddi_buf_trans cnl_ddi_translations_hdmi_0_95V[] = { - /* NT mV Trans mV db */ - { 0xA, 0x5C, 0x3F, 0x00, 0x00 }, /* 400 400 0.0 */ - { 0xB, 0x69, 0x37, 0x00, 0x08 }, /* 400 600 3.5 */ - { 0x5, 0x76, 0x31, 0x00, 0x0E }, /* 400 800 6.0 */ - { 0xA, 0x5E, 0x3F, 0x00, 0x00 }, /* 450 450 0.0 */ - { 0xB, 0x69, 0x3F, 0x00, 0x00 }, /* 600 600 0.0 */ - { 0xB, 0x79, 0x35, 0x00, 0x0A }, /* 600 850 3.0 */ - { 0x6, 0x7D, 0x32, 0x00, 0x0D }, /* 600 1000 4.4 */ - { 0x5, 0x76, 0x3F, 0x00, 0x00 }, /* 800 800 0.0 */ - { 0x6, 0x7D, 0x39, 0x00, 0x06 }, /* 800 1000 1.9 */ - { 0x6, 0x7F, 0x39, 0x00, 0x06 }, /* 850 1050 1.8 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 1050 1050 0.0 */ -}; - -/* Voltage Swing Programming for VccIO 0.95V for eDP */ -static const struct cnl_ddi_buf_trans cnl_ddi_translations_edp_0_95V[] = { - /* NT mV Trans mV db */ - { 0xA, 0x61, 0x3A, 0x00, 0x05 }, /* 384 500 2.3 */ - { 0x0, 0x7F, 0x38, 0x00, 0x07 }, /* 153 200 2.3 */ - { 0x8, 0x7F, 0x38, 0x00, 0x07 }, /* 192 250 2.3 */ - { 0x1, 0x7F, 0x38, 0x00, 0x07 }, /* 230 300 2.3 */ - { 0x9, 0x7F, 0x38, 0x00, 0x07 }, /* 269 350 2.3 */ - { 0xA, 0x61, 0x3C, 0x00, 0x03 }, /* 446 500 1.0 */ - { 0xB, 0x68, 0x39, 0x00, 0x06 }, /* 460 600 2.3 */ - { 0xC, 0x6E, 0x39, 0x00, 0x06 }, /* 537 700 2.3 */ - { 0x4, 0x7F, 0x3A, 0x00, 0x05 }, /* 460 600 2.3 */ - { 0x2, 0x7F, 0x3F, 0x00, 0x00 }, /* 400 400 0.0 */ -}; - -/* Voltage Swing Programming for VccIO 1.05V for DP */ -static const struct cnl_ddi_buf_trans cnl_ddi_translations_dp_1_05V[] = { - /* NT mV Trans mV db */ - { 0xA, 0x58, 0x3F, 0x00, 0x00 }, /* 400 400 0.0 */ - { 0xB, 0x64, 0x37, 0x00, 0x08 }, /* 400 600 3.5 */ - { 0x5, 0x70, 0x31, 0x00, 0x0E }, /* 400 800 6.0 */ - { 0x6, 0x7F, 0x2C, 0x00, 0x13 }, /* 400 1050 8.4 */ - { 0xB, 0x64, 0x3F, 0x00, 0x00 }, /* 600 600 0.0 */ - { 0x5, 0x73, 0x35, 0x00, 0x0A }, /* 600 850 3.0 */ - { 0x6, 0x7F, 0x30, 0x00, 0x0F }, /* 550 1050 5.6 */ - { 0x5, 0x76, 0x3E, 0x00, 0x01 }, /* 850 900 0.5 */ - { 0x6, 0x7F, 0x36, 0x00, 0x09 }, /* 750 1050 2.9 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 1050 1050 0.0 */ -}; - -/* Voltage Swing Programming for VccIO 1.05V for HDMI */ -static const struct cnl_ddi_buf_trans cnl_ddi_translations_hdmi_1_05V[] = { - /* NT mV Trans mV db */ - { 0xA, 0x58, 0x3F, 0x00, 0x00 }, /* 400 400 0.0 */ - { 0xB, 0x64, 0x37, 0x00, 0x08 }, /* 400 600 3.5 */ - { 0x5, 0x70, 0x31, 0x00, 0x0E }, /* 400 800 6.0 */ - { 0xA, 0x5B, 0x3F, 0x00, 0x00 }, /* 450 450 0.0 */ - { 0xB, 0x64, 0x3F, 0x00, 0x00 }, /* 600 600 0.0 */ - { 0x5, 0x73, 0x35, 0x00, 0x0A }, /* 600 850 3.0 */ - { 0x6, 0x7C, 0x32, 0x00, 0x0D }, /* 600 1000 4.4 */ - { 0x5, 0x70, 0x3F, 0x00, 0x00 }, /* 800 800 0.0 */ - { 0x6, 0x7C, 0x39, 0x00, 0x06 }, /* 800 1000 1.9 */ - { 0x6, 0x7F, 0x39, 0x00, 0x06 }, /* 850 1050 1.8 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 1050 1050 0.0 */ -}; - -/* Voltage Swing Programming for VccIO 1.05V for eDP */ -static const struct cnl_ddi_buf_trans cnl_ddi_translations_edp_1_05V[] = { - /* NT mV Trans mV db */ - { 0xA, 0x5E, 0x3A, 0x00, 0x05 }, /* 384 500 2.3 */ - { 0x0, 0x7F, 0x38, 0x00, 0x07 }, /* 153 200 2.3 */ - { 0x8, 0x7F, 0x38, 0x00, 0x07 }, /* 192 250 2.3 */ - { 0x1, 0x7F, 0x38, 0x00, 0x07 }, /* 230 300 2.3 */ - { 0x9, 0x7F, 0x38, 0x00, 0x07 }, /* 269 350 2.3 */ - { 0xA, 0x5E, 0x3C, 0x00, 0x03 }, /* 446 500 1.0 */ - { 0xB, 0x64, 0x39, 0x00, 0x06 }, /* 460 600 2.3 */ - { 0xE, 0x6A, 0x39, 0x00, 0x06 }, /* 537 700 2.3 */ - { 0x2, 0x7F, 0x3F, 0x00, 0x00 }, /* 400 400 0.0 */ +static const union intel_ddi_buf_trans_entry _bxt_ddi_translations_hdmi[] = { + /* Idx NT mV diff db */ + { .bxt = { 52, 0x9A, 0, 128, } }, /* 0: 400 0 */ + { .bxt = { 52, 0x9A, 0, 85, } }, /* 1: 400 3.5 */ + { .bxt = { 52, 0x9A, 0, 64, } }, /* 2: 400 6 */ + { .bxt = { 42, 0x9A, 0, 43, } }, /* 3: 400 9.5 */ + { .bxt = { 77, 0x9A, 0, 128, } }, /* 4: 600 0 */ + { .bxt = { 77, 0x9A, 0, 85, } }, /* 5: 600 3.5 */ + { .bxt = { 77, 0x9A, 0, 64, } }, /* 6: 600 6 */ + { .bxt = { 102, 0x9A, 0, 128, } }, /* 7: 800 0 */ + { .bxt = { 102, 0x9A, 0, 85, } }, /* 8: 800 3.5 */ + { .bxt = { 154, 0x9A, 1, 128, } }, /* 9: 1200 0 */ +}; + +static const struct intel_ddi_buf_trans bxt_ddi_translations_hdmi = { + .entries = _bxt_ddi_translations_hdmi, + .num_entries = ARRAY_SIZE(_bxt_ddi_translations_hdmi), + .hdmi_default_entry = ARRAY_SIZE(_bxt_ddi_translations_hdmi) - 1, }; /* icl_combo_phy_ddi_translations */ -static const struct cnl_ddi_buf_trans icl_combo_phy_ddi_translations_dp_hbr2[] = { - /* NT mV Trans mV db */ - { 0xA, 0x35, 0x3F, 0x00, 0x00 }, /* 350 350 0.0 */ - { 0xA, 0x4F, 0x37, 0x00, 0x08 }, /* 350 500 3.1 */ - { 0xC, 0x71, 0x2F, 0x00, 0x10 }, /* 350 700 6.0 */ - { 0x6, 0x7F, 0x2B, 0x00, 0x14 }, /* 350 900 8.2 */ - { 0xA, 0x4C, 0x3F, 0x00, 0x00 }, /* 500 500 0.0 */ - { 0xC, 0x73, 0x34, 0x00, 0x0B }, /* 500 700 2.9 */ - { 0x6, 0x7F, 0x2F, 0x00, 0x10 }, /* 500 900 5.1 */ - { 0xC, 0x6C, 0x3C, 0x00, 0x03 }, /* 650 700 0.6 */ - { 0x6, 0x7F, 0x35, 0x00, 0x0A }, /* 600 900 3.5 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 900 900 0.0 */ -}; - -static const struct cnl_ddi_buf_trans icl_combo_phy_ddi_translations_edp_hbr2[] = { - /* NT mV Trans mV db */ - { 0x0, 0x7F, 0x3F, 0x00, 0x00 }, /* 200 200 0.0 */ - { 0x8, 0x7F, 0x38, 0x00, 0x07 }, /* 200 250 1.9 */ - { 0x1, 0x7F, 0x33, 0x00, 0x0C }, /* 200 300 3.5 */ - { 0x9, 0x7F, 0x31, 0x00, 0x0E }, /* 200 350 4.9 */ - { 0x8, 0x7F, 0x3F, 0x00, 0x00 }, /* 250 250 0.0 */ - { 0x1, 0x7F, 0x38, 0x00, 0x07 }, /* 250 300 1.6 */ - { 0x9, 0x7F, 0x35, 0x00, 0x0A }, /* 250 350 2.9 */ - { 0x1, 0x7F, 0x3F, 0x00, 0x00 }, /* 300 300 0.0 */ - { 0x9, 0x7F, 0x38, 0x00, 0x07 }, /* 300 350 1.3 */ - { 0x9, 0x7F, 0x3F, 0x00, 0x00 }, /* 350 350 0.0 */ -}; - -static const struct cnl_ddi_buf_trans icl_combo_phy_ddi_translations_edp_hbr3[] = { - /* NT mV Trans mV db */ - { 0xA, 0x35, 0x3F, 0x00, 0x00 }, /* 350 350 0.0 */ - { 0xA, 0x4F, 0x37, 0x00, 0x08 }, /* 350 500 3.1 */ - { 0xC, 0x71, 0x2F, 0x00, 0x10 }, /* 350 700 6.0 */ - { 0x6, 0x7F, 0x2B, 0x00, 0x14 }, /* 350 900 8.2 */ - { 0xA, 0x4C, 0x3F, 0x00, 0x00 }, /* 500 500 0.0 */ - { 0xC, 0x73, 0x34, 0x00, 0x0B }, /* 500 700 2.9 */ - { 0x6, 0x7F, 0x2F, 0x00, 0x10 }, /* 500 900 5.1 */ - { 0xC, 0x6C, 0x3C, 0x00, 0x03 }, /* 650 700 0.6 */ - { 0x6, 0x7F, 0x35, 0x00, 0x0A }, /* 600 900 3.5 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 900 900 0.0 */ -}; - -static const struct cnl_ddi_buf_trans icl_combo_phy_ddi_translations_hdmi[] = { - /* NT mV Trans mV db */ - { 0xA, 0x60, 0x3F, 0x00, 0x00 }, /* 450 450 0.0 */ - { 0xB, 0x73, 0x36, 0x00, 0x09 }, /* 450 650 3.2 */ - { 0x6, 0x7F, 0x31, 0x00, 0x0E }, /* 450 850 5.5 */ - { 0xB, 0x73, 0x3F, 0x00, 0x00 }, /* 650 650 0.0 ALS */ - { 0x6, 0x7F, 0x37, 0x00, 0x08 }, /* 650 850 2.3 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 850 850 0.0 */ - { 0x6, 0x7F, 0x35, 0x00, 0x0A }, /* 600 850 3.0 */ -}; - -static const struct cnl_ddi_buf_trans ehl_combo_phy_ddi_translations_dp[] = { - /* NT mV Trans mV db */ - { 0xA, 0x33, 0x3F, 0x00, 0x00 }, /* 350 350 0.0 */ - { 0xA, 0x47, 0x36, 0x00, 0x09 }, /* 350 500 3.1 */ - { 0xC, 0x64, 0x34, 0x00, 0x0B }, /* 350 700 6.0 */ - { 0x6, 0x7F, 0x30, 0x00, 0x0F }, /* 350 900 8.2 */ - { 0xA, 0x46, 0x3F, 0x00, 0x00 }, /* 500 500 0.0 */ - { 0xC, 0x64, 0x38, 0x00, 0x07 }, /* 500 700 2.9 */ - { 0x6, 0x7F, 0x32, 0x00, 0x0D }, /* 500 900 5.1 */ - { 0xC, 0x61, 0x3F, 0x00, 0x00 }, /* 650 700 0.6 */ - { 0x6, 0x7F, 0x38, 0x00, 0x07 }, /* 600 900 3.5 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 900 900 0.0 */ -}; - -static const struct cnl_ddi_buf_trans jsl_combo_phy_ddi_translations_edp_hbr[] = { - /* NT mV Trans mV db */ - { 0x8, 0x7F, 0x3F, 0x00, 0x00 }, /* 200 200 0.0 */ - { 0x8, 0x7F, 0x38, 0x00, 0x07 }, /* 200 250 1.9 */ - { 0x1, 0x7F, 0x33, 0x00, 0x0C }, /* 200 300 3.5 */ - { 0xA, 0x35, 0x36, 0x00, 0x09 }, /* 200 350 4.9 */ - { 0x8, 0x7F, 0x3F, 0x00, 0x00 }, /* 250 250 0.0 */ - { 0x1, 0x7F, 0x38, 0x00, 0x07 }, /* 250 300 1.6 */ - { 0xA, 0x35, 0x35, 0x00, 0x0A }, /* 250 350 2.9 */ - { 0x1, 0x7F, 0x3F, 0x00, 0x00 }, /* 300 300 0.0 */ - { 0xA, 0x35, 0x38, 0x00, 0x07 }, /* 300 350 1.3 */ - { 0xA, 0x35, 0x3F, 0x00, 0x00 }, /* 350 350 0.0 */ -}; - -static const struct cnl_ddi_buf_trans jsl_combo_phy_ddi_translations_edp_hbr2[] = { - /* NT mV Trans mV db */ - { 0x8, 0x7F, 0x3F, 0x00, 0x00 }, /* 200 200 0.0 */ - { 0x8, 0x7F, 0x3F, 0x00, 0x00 }, /* 200 250 1.9 */ - { 0x1, 0x7F, 0x3D, 0x00, 0x02 }, /* 200 300 3.5 */ - { 0xA, 0x35, 0x38, 0x00, 0x07 }, /* 200 350 4.9 */ - { 0x8, 0x7F, 0x3F, 0x00, 0x00 }, /* 250 250 0.0 */ - { 0x1, 0x7F, 0x3F, 0x00, 0x00 }, /* 250 300 1.6 */ - { 0xA, 0x35, 0x3A, 0x00, 0x05 }, /* 250 350 2.9 */ - { 0x1, 0x7F, 0x3F, 0x00, 0x00 }, /* 300 300 0.0 */ - { 0xA, 0x35, 0x38, 0x00, 0x07 }, /* 300 350 1.3 */ - { 0xA, 0x35, 0x3F, 0x00, 0x00 }, /* 350 350 0.0 */ -}; - -static const struct cnl_ddi_buf_trans dg1_combo_phy_ddi_translations_dp_rbr_hbr[] = { - /* NT mV Trans mV db */ - { 0xA, 0x32, 0x3F, 0x00, 0x00 }, /* 350 350 0.0 */ - { 0xA, 0x48, 0x35, 0x00, 0x0A }, /* 350 500 3.1 */ - { 0xC, 0x63, 0x2F, 0x00, 0x10 }, /* 350 700 6.0 */ - { 0x6, 0x7F, 0x2C, 0x00, 0x13 }, /* 350 900 8.2 */ - { 0xA, 0x43, 0x3F, 0x00, 0x00 }, /* 500 500 0.0 */ - { 0xC, 0x60, 0x36, 0x00, 0x09 }, /* 500 700 2.9 */ - { 0x6, 0x7F, 0x30, 0x00, 0x0F }, /* 500 900 5.1 */ - { 0xC, 0x60, 0x3F, 0x00, 0x00 }, /* 650 700 0.6 */ - { 0x6, 0x7F, 0x37, 0x00, 0x08 }, /* 600 900 3.5 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 900 900 0.0 */ -}; - -static const struct cnl_ddi_buf_trans dg1_combo_phy_ddi_translations_dp_hbr2_hbr3[] = { - /* NT mV Trans mV db */ - { 0xA, 0x32, 0x3F, 0x00, 0x00 }, /* 350 350 0.0 */ - { 0xA, 0x48, 0x35, 0x00, 0x0A }, /* 350 500 3.1 */ - { 0xC, 0x63, 0x2F, 0x00, 0x10 }, /* 350 700 6.0 */ - { 0x6, 0x7F, 0x2C, 0x00, 0x13 }, /* 350 900 8.2 */ - { 0xA, 0x43, 0x3F, 0x00, 0x00 }, /* 500 500 0.0 */ - { 0xC, 0x60, 0x36, 0x00, 0x09 }, /* 500 700 2.9 */ - { 0x6, 0x7F, 0x30, 0x00, 0x0F }, /* 500 900 5.1 */ - { 0xC, 0x58, 0x3F, 0x00, 0x00 }, /* 650 700 0.6 */ - { 0x6, 0x7F, 0x35, 0x00, 0x0A }, /* 600 900 3.5 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 900 900 0.0 */ -}; - -static const struct icl_mg_phy_ddi_buf_trans icl_mg_phy_ddi_translations_rbr_hbr[] = { - /* Voltage swing pre-emphasis */ - { 0x18, 0x00, 0x00 }, /* 0 0 */ - { 0x1D, 0x00, 0x05 }, /* 0 1 */ - { 0x24, 0x00, 0x0C }, /* 0 2 */ - { 0x2B, 0x00, 0x14 }, /* 0 3 */ - { 0x21, 0x00, 0x00 }, /* 1 0 */ - { 0x2B, 0x00, 0x08 }, /* 1 1 */ - { 0x30, 0x00, 0x0F }, /* 1 2 */ - { 0x31, 0x00, 0x03 }, /* 2 0 */ - { 0x34, 0x00, 0x0B }, /* 2 1 */ - { 0x3F, 0x00, 0x00 }, /* 3 0 */ -}; - -static const struct icl_mg_phy_ddi_buf_trans icl_mg_phy_ddi_translations_hbr2_hbr3[] = { - /* Voltage swing pre-emphasis */ - { 0x18, 0x00, 0x00 }, /* 0 0 */ - { 0x1D, 0x00, 0x05 }, /* 0 1 */ - { 0x24, 0x00, 0x0C }, /* 0 2 */ - { 0x2B, 0x00, 0x14 }, /* 0 3 */ - { 0x26, 0x00, 0x00 }, /* 1 0 */ - { 0x2C, 0x00, 0x07 }, /* 1 1 */ - { 0x33, 0x00, 0x0C }, /* 1 2 */ - { 0x2E, 0x00, 0x00 }, /* 2 0 */ - { 0x36, 0x00, 0x09 }, /* 2 1 */ - { 0x3F, 0x00, 0x00 }, /* 3 0 */ -}; - -static const struct icl_mg_phy_ddi_buf_trans icl_mg_phy_ddi_translations_hdmi[] = { - /* HDMI Preset VS Pre-emph */ - { 0x1A, 0x0, 0x0 }, /* 1 400mV 0dB */ - { 0x20, 0x0, 0x0 }, /* 2 500mV 0dB */ - { 0x29, 0x0, 0x0 }, /* 3 650mV 0dB */ - { 0x32, 0x0, 0x0 }, /* 4 800mV 0dB */ - { 0x3F, 0x0, 0x0 }, /* 5 1000mV 0dB */ - { 0x3A, 0x0, 0x5 }, /* 6 Full -1.5 dB */ - { 0x39, 0x0, 0x6 }, /* 7 Full -1.8 dB */ - { 0x38, 0x0, 0x7 }, /* 8 Full -2 dB */ - { 0x37, 0x0, 0x8 }, /* 9 Full -2.5 dB */ - { 0x36, 0x0, 0x9 }, /* 10 Full -3 dB */ -}; - -static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_dp_ddi_trans[] = { - /* VS pre-emp Non-trans mV Pre-emph dB */ - { 0x7, 0x0, 0x00 }, /* 0 0 400mV 0 dB */ - { 0x5, 0x0, 0x05 }, /* 0 1 400mV 3.5 dB */ - { 0x2, 0x0, 0x0B }, /* 0 2 400mV 6 dB */ - { 0x0, 0x0, 0x18 }, /* 0 3 400mV 9.5 dB */ - { 0x5, 0x0, 0x00 }, /* 1 0 600mV 0 dB */ - { 0x2, 0x0, 0x08 }, /* 1 1 600mV 3.5 dB */ - { 0x0, 0x0, 0x14 }, /* 1 2 600mV 6 dB */ - { 0x2, 0x0, 0x00 }, /* 2 0 800mV 0 dB */ - { 0x0, 0x0, 0x0B }, /* 2 1 800mV 3.5 dB */ - { 0x0, 0x0, 0x00 }, /* 3 0 1200mV 0 dB HDMI default */ -}; - -static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_dp_ddi_trans_hbr2[] = { - /* VS pre-emp Non-trans mV Pre-emph dB */ - { 0x7, 0x0, 0x00 }, /* 0 0 400mV 0 dB */ - { 0x5, 0x0, 0x05 }, /* 0 1 400mV 3.5 dB */ - { 0x2, 0x0, 0x0B }, /* 0 2 400mV 6 dB */ - { 0x0, 0x0, 0x19 }, /* 0 3 400mV 9.5 dB */ - { 0x5, 0x0, 0x00 }, /* 1 0 600mV 0 dB */ - { 0x2, 0x0, 0x08 }, /* 1 1 600mV 3.5 dB */ - { 0x0, 0x0, 0x14 }, /* 1 2 600mV 6 dB */ - { 0x2, 0x0, 0x00 }, /* 2 0 800mV 0 dB */ - { 0x0, 0x0, 0x0B }, /* 2 1 800mV 3.5 dB */ - { 0x0, 0x0, 0x00 }, /* 3 0 1200mV 0 dB HDMI default */ -}; - -static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_hdmi_ddi_trans[] = { - /* HDMI Preset VS Pre-emph */ - { 0x7, 0x0, 0x0 }, /* 1 400mV 0dB */ - { 0x6, 0x0, 0x0 }, /* 2 500mV 0dB */ - { 0x4, 0x0, 0x0 }, /* 3 650mV 0dB */ - { 0x2, 0x0, 0x0 }, /* 4 800mV 0dB */ - { 0x0, 0x0, 0x0 }, /* 5 1000mV 0dB */ - { 0x0, 0x0, 0x5 }, /* 6 Full -1.5 dB */ - { 0x0, 0x0, 0x6 }, /* 7 Full -1.8 dB */ - { 0x0, 0x0, 0x7 }, /* 8 Full -2 dB */ - { 0x0, 0x0, 0x8 }, /* 9 Full -2.5 dB */ - { 0x0, 0x0, 0xA }, /* 10 Full -3 dB */ -}; - -static const struct cnl_ddi_buf_trans tgl_combo_phy_ddi_translations_dp_hbr[] = { - /* NT mV Trans mV db */ - { 0xA, 0x32, 0x3F, 0x00, 0x00 }, /* 350 350 0.0 */ - { 0xA, 0x4F, 0x37, 0x00, 0x08 }, /* 350 500 3.1 */ - { 0xC, 0x71, 0x2F, 0x00, 0x10 }, /* 350 700 6.0 */ - { 0x6, 0x7D, 0x2B, 0x00, 0x14 }, /* 350 900 8.2 */ - { 0xA, 0x4C, 0x3F, 0x00, 0x00 }, /* 500 500 0.0 */ - { 0xC, 0x73, 0x34, 0x00, 0x0B }, /* 500 700 2.9 */ - { 0x6, 0x7F, 0x2F, 0x00, 0x10 }, /* 500 900 5.1 */ - { 0xC, 0x6C, 0x3C, 0x00, 0x03 }, /* 650 700 0.6 */ - { 0x6, 0x7F, 0x35, 0x00, 0x0A }, /* 600 900 3.5 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 900 900 0.0 */ -}; - -static const struct cnl_ddi_buf_trans tgl_combo_phy_ddi_translations_dp_hbr2[] = { - /* NT mV Trans mV db */ - { 0xA, 0x35, 0x3F, 0x00, 0x00 }, /* 350 350 0.0 */ - { 0xA, 0x4F, 0x37, 0x00, 0x08 }, /* 350 500 3.1 */ - { 0xC, 0x63, 0x2F, 0x00, 0x10 }, /* 350 700 6.0 */ - { 0x6, 0x7F, 0x2B, 0x00, 0x14 }, /* 350 900 8.2 */ - { 0xA, 0x47, 0x3F, 0x00, 0x00 }, /* 500 500 0.0 */ - { 0xC, 0x63, 0x34, 0x00, 0x0B }, /* 500 700 2.9 */ - { 0x6, 0x7F, 0x2F, 0x00, 0x10 }, /* 500 900 5.1 */ - { 0xC, 0x61, 0x3C, 0x00, 0x03 }, /* 650 700 0.6 */ - { 0x6, 0x7B, 0x35, 0x00, 0x0A }, /* 600 900 3.5 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 900 900 0.0 */ -}; - -static const struct cnl_ddi_buf_trans tgl_uy_combo_phy_ddi_translations_dp_hbr2[] = { - /* NT mV Trans mV db */ - { 0xA, 0x35, 0x3F, 0x00, 0x00 }, /* 350 350 0.0 */ - { 0xA, 0x4F, 0x36, 0x00, 0x09 }, /* 350 500 3.1 */ - { 0xC, 0x60, 0x32, 0x00, 0x0D }, /* 350 700 6.0 */ - { 0xC, 0x7F, 0x2D, 0x00, 0x12 }, /* 350 900 8.2 */ - { 0xC, 0x47, 0x3F, 0x00, 0x00 }, /* 500 500 0.0 */ - { 0xC, 0x6F, 0x36, 0x00, 0x09 }, /* 500 700 2.9 */ - { 0x6, 0x7D, 0x32, 0x00, 0x0D }, /* 500 900 5.1 */ - { 0x6, 0x60, 0x3C, 0x00, 0x03 }, /* 650 700 0.6 */ - { 0x6, 0x7F, 0x34, 0x00, 0x0B }, /* 600 900 3.5 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 900 900 0.0 */ +static const union intel_ddi_buf_trans_entry _icl_combo_phy_ddi_translations_dp_hbr2_edp_hbr3[] = { + /* NT mV Trans mV db */ + { .icl = { 0xA, 0x35, 0x3F, 0x00, 0x00 } }, /* 350 350 0.0 */ + { .icl = { 0xA, 0x4F, 0x37, 0x00, 0x08 } }, /* 350 500 3.1 */ + { .icl = { 0xC, 0x71, 0x2F, 0x00, 0x10 } }, /* 350 700 6.0 */ + { .icl = { 0x6, 0x7F, 0x2B, 0x00, 0x14 } }, /* 350 900 8.2 */ + { .icl = { 0xA, 0x4C, 0x3F, 0x00, 0x00 } }, /* 500 500 0.0 */ + { .icl = { 0xC, 0x73, 0x34, 0x00, 0x0B } }, /* 500 700 2.9 */ + { .icl = { 0x6, 0x7F, 0x2F, 0x00, 0x10 } }, /* 500 900 5.1 */ + { .icl = { 0xC, 0x6C, 0x3C, 0x00, 0x03 } }, /* 650 700 0.6 */ + { .icl = { 0x6, 0x7F, 0x35, 0x00, 0x0A } }, /* 600 900 3.5 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 900 900 0.0 */ +}; + +static const struct intel_ddi_buf_trans icl_combo_phy_ddi_translations_dp_hbr2_edp_hbr3 = { + .entries = _icl_combo_phy_ddi_translations_dp_hbr2_edp_hbr3, + .num_entries = ARRAY_SIZE(_icl_combo_phy_ddi_translations_dp_hbr2_edp_hbr3), +}; + +static const union intel_ddi_buf_trans_entry _icl_combo_phy_ddi_translations_edp_hbr2[] = { + /* NT mV Trans mV db */ + { .icl = { 0x0, 0x7F, 0x3F, 0x00, 0x00 } }, /* 200 200 0.0 */ + { .icl = { 0x8, 0x7F, 0x38, 0x00, 0x07 } }, /* 200 250 1.9 */ + { .icl = { 0x1, 0x7F, 0x33, 0x00, 0x0C } }, /* 200 300 3.5 */ + { .icl = { 0x9, 0x7F, 0x31, 0x00, 0x0E } }, /* 200 350 4.9 */ + { .icl = { 0x8, 0x7F, 0x3F, 0x00, 0x00 } }, /* 250 250 0.0 */ + { .icl = { 0x1, 0x7F, 0x38, 0x00, 0x07 } }, /* 250 300 1.6 */ + { .icl = { 0x9, 0x7F, 0x35, 0x00, 0x0A } }, /* 250 350 2.9 */ + { .icl = { 0x1, 0x7F, 0x3F, 0x00, 0x00 } }, /* 300 300 0.0 */ + { .icl = { 0x9, 0x7F, 0x38, 0x00, 0x07 } }, /* 300 350 1.3 */ + { .icl = { 0x9, 0x7F, 0x3F, 0x00, 0x00 } }, /* 350 350 0.0 */ +}; + +static const struct intel_ddi_buf_trans icl_combo_phy_ddi_translations_edp_hbr2 = { + .entries = _icl_combo_phy_ddi_translations_edp_hbr2, + .num_entries = ARRAY_SIZE(_icl_combo_phy_ddi_translations_edp_hbr2), +}; + +static const union intel_ddi_buf_trans_entry _icl_combo_phy_ddi_translations_hdmi[] = { + /* NT mV Trans mV db */ + { .icl = { 0xA, 0x60, 0x3F, 0x00, 0x00 } }, /* 450 450 0.0 */ + { .icl = { 0xB, 0x73, 0x36, 0x00, 0x09 } }, /* 450 650 3.2 */ + { .icl = { 0x6, 0x7F, 0x31, 0x00, 0x0E } }, /* 450 850 5.5 */ + { .icl = { 0xB, 0x73, 0x3F, 0x00, 0x00 } }, /* 650 650 0.0 ALS */ + { .icl = { 0x6, 0x7F, 0x37, 0x00, 0x08 } }, /* 650 850 2.3 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 850 850 0.0 */ + { .icl = { 0x6, 0x7F, 0x35, 0x00, 0x0A } }, /* 600 850 3.0 */ +}; + +static const struct intel_ddi_buf_trans icl_combo_phy_ddi_translations_hdmi = { + .entries = _icl_combo_phy_ddi_translations_hdmi, + .num_entries = ARRAY_SIZE(_icl_combo_phy_ddi_translations_hdmi), + .hdmi_default_entry = ARRAY_SIZE(_icl_combo_phy_ddi_translations_hdmi) - 1, +}; + +static const union intel_ddi_buf_trans_entry _ehl_combo_phy_ddi_translations_dp[] = { + /* NT mV Trans mV db */ + { .icl = { 0xA, 0x33, 0x3F, 0x00, 0x00 } }, /* 350 350 0.0 */ + { .icl = { 0xA, 0x47, 0x36, 0x00, 0x09 } }, /* 350 500 3.1 */ + { .icl = { 0xC, 0x64, 0x34, 0x00, 0x0B } }, /* 350 700 6.0 */ + { .icl = { 0x6, 0x7F, 0x30, 0x00, 0x0F } }, /* 350 900 8.2 */ + { .icl = { 0xA, 0x46, 0x3F, 0x00, 0x00 } }, /* 500 500 0.0 */ + { .icl = { 0xC, 0x64, 0x38, 0x00, 0x07 } }, /* 500 700 2.9 */ + { .icl = { 0x6, 0x7F, 0x32, 0x00, 0x0D } }, /* 500 900 5.1 */ + { .icl = { 0xC, 0x61, 0x3F, 0x00, 0x00 } }, /* 650 700 0.6 */ + { .icl = { 0x6, 0x7F, 0x38, 0x00, 0x07 } }, /* 600 900 3.5 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 900 900 0.0 */ +}; + +static const struct intel_ddi_buf_trans ehl_combo_phy_ddi_translations_dp = { + .entries = _ehl_combo_phy_ddi_translations_dp, + .num_entries = ARRAY_SIZE(_ehl_combo_phy_ddi_translations_dp), +}; + +static const union intel_ddi_buf_trans_entry _ehl_combo_phy_ddi_translations_edp_hbr2[] = { + /* NT mV Trans mV db */ + { .icl = { 0x8, 0x7F, 0x3F, 0x00, 0x00 } }, /* 200 200 0.0 */ + { .icl = { 0x8, 0x7F, 0x3F, 0x00, 0x00 } }, /* 200 250 1.9 */ + { .icl = { 0x1, 0x7F, 0x3D, 0x00, 0x02 } }, /* 200 300 3.5 */ + { .icl = { 0xA, 0x35, 0x39, 0x00, 0x06 } }, /* 200 350 4.9 */ + { .icl = { 0x8, 0x7F, 0x3F, 0x00, 0x00 } }, /* 250 250 0.0 */ + { .icl = { 0x1, 0x7F, 0x3C, 0x00, 0x03 } }, /* 250 300 1.6 */ + { .icl = { 0xA, 0x35, 0x39, 0x00, 0x06 } }, /* 250 350 2.9 */ + { .icl = { 0x1, 0x7F, 0x3F, 0x00, 0x00 } }, /* 300 300 0.0 */ + { .icl = { 0xA, 0x35, 0x38, 0x00, 0x07 } }, /* 300 350 1.3 */ + { .icl = { 0xA, 0x35, 0x3F, 0x00, 0x00 } }, /* 350 350 0.0 */ +}; + +static const struct intel_ddi_buf_trans ehl_combo_phy_ddi_translations_edp_hbr2 = { + .entries = _ehl_combo_phy_ddi_translations_edp_hbr2, + .num_entries = ARRAY_SIZE(_ehl_combo_phy_ddi_translations_edp_hbr2), +}; + +static const union intel_ddi_buf_trans_entry _jsl_combo_phy_ddi_translations_edp_hbr[] = { + /* NT mV Trans mV db */ + { .icl = { 0x8, 0x7F, 0x3F, 0x00, 0x00 } }, /* 200 200 0.0 */ + { .icl = { 0x8, 0x7F, 0x38, 0x00, 0x07 } }, /* 200 250 1.9 */ + { .icl = { 0x1, 0x7F, 0x33, 0x00, 0x0C } }, /* 200 300 3.5 */ + { .icl = { 0xA, 0x35, 0x36, 0x00, 0x09 } }, /* 200 350 4.9 */ + { .icl = { 0x8, 0x7F, 0x3F, 0x00, 0x00 } }, /* 250 250 0.0 */ + { .icl = { 0x1, 0x7F, 0x38, 0x00, 0x07 } }, /* 250 300 1.6 */ + { .icl = { 0xA, 0x35, 0x35, 0x00, 0x0A } }, /* 250 350 2.9 */ + { .icl = { 0x1, 0x7F, 0x3F, 0x00, 0x00 } }, /* 300 300 0.0 */ + { .icl = { 0xA, 0x35, 0x38, 0x00, 0x07 } }, /* 300 350 1.3 */ + { .icl = { 0xA, 0x35, 0x3F, 0x00, 0x00 } }, /* 350 350 0.0 */ +}; + +static const struct intel_ddi_buf_trans jsl_combo_phy_ddi_translations_edp_hbr = { + .entries = _jsl_combo_phy_ddi_translations_edp_hbr, + .num_entries = ARRAY_SIZE(_jsl_combo_phy_ddi_translations_edp_hbr), +}; + +static const union intel_ddi_buf_trans_entry _jsl_combo_phy_ddi_translations_edp_hbr2[] = { + /* NT mV Trans mV db */ + { .icl = { 0x8, 0x7F, 0x3F, 0x00, 0x00 } }, /* 200 200 0.0 */ + { .icl = { 0x8, 0x7F, 0x3F, 0x00, 0x00 } }, /* 200 250 1.9 */ + { .icl = { 0x1, 0x7F, 0x3D, 0x00, 0x02 } }, /* 200 300 3.5 */ + { .icl = { 0xA, 0x35, 0x38, 0x00, 0x07 } }, /* 200 350 4.9 */ + { .icl = { 0x8, 0x7F, 0x3F, 0x00, 0x00 } }, /* 250 250 0.0 */ + { .icl = { 0x1, 0x7F, 0x3F, 0x00, 0x00 } }, /* 250 300 1.6 */ + { .icl = { 0xA, 0x35, 0x3A, 0x00, 0x05 } }, /* 250 350 2.9 */ + { .icl = { 0x1, 0x7F, 0x3F, 0x00, 0x00 } }, /* 300 300 0.0 */ + { .icl = { 0xA, 0x35, 0x38, 0x00, 0x07 } }, /* 300 350 1.3 */ + { .icl = { 0xA, 0x35, 0x3F, 0x00, 0x00 } }, /* 350 350 0.0 */ +}; + +static const struct intel_ddi_buf_trans jsl_combo_phy_ddi_translations_edp_hbr2 = { + .entries = _jsl_combo_phy_ddi_translations_edp_hbr2, + .num_entries = ARRAY_SIZE(_jsl_combo_phy_ddi_translations_edp_hbr2), +}; + +static const union intel_ddi_buf_trans_entry _dg1_combo_phy_ddi_translations_dp_rbr_hbr[] = { + /* NT mV Trans mV db */ + { .icl = { 0xA, 0x32, 0x3F, 0x00, 0x00 } }, /* 350 350 0.0 */ + { .icl = { 0xA, 0x48, 0x35, 0x00, 0x0A } }, /* 350 500 3.1 */ + { .icl = { 0xC, 0x63, 0x2F, 0x00, 0x10 } }, /* 350 700 6.0 */ + { .icl = { 0x6, 0x7F, 0x2C, 0x00, 0x13 } }, /* 350 900 8.2 */ + { .icl = { 0xA, 0x43, 0x3F, 0x00, 0x00 } }, /* 500 500 0.0 */ + { .icl = { 0xC, 0x60, 0x36, 0x00, 0x09 } }, /* 500 700 2.9 */ + { .icl = { 0x6, 0x7F, 0x30, 0x00, 0x0F } }, /* 500 900 5.1 */ + { .icl = { 0xC, 0x60, 0x3F, 0x00, 0x00 } }, /* 650 700 0.6 */ + { .icl = { 0x6, 0x7F, 0x37, 0x00, 0x08 } }, /* 600 900 3.5 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 900 900 0.0 */ +}; + +static const struct intel_ddi_buf_trans dg1_combo_phy_ddi_translations_dp_rbr_hbr = { + .entries = _dg1_combo_phy_ddi_translations_dp_rbr_hbr, + .num_entries = ARRAY_SIZE(_dg1_combo_phy_ddi_translations_dp_rbr_hbr), +}; + +static const union intel_ddi_buf_trans_entry _dg1_combo_phy_ddi_translations_dp_hbr2_hbr3[] = { + /* NT mV Trans mV db */ + { .icl = { 0xA, 0x32, 0x3F, 0x00, 0x00 } }, /* 350 350 0.0 */ + { .icl = { 0xA, 0x48, 0x35, 0x00, 0x0A } }, /* 350 500 3.1 */ + { .icl = { 0xC, 0x63, 0x2F, 0x00, 0x10 } }, /* 350 700 6.0 */ + { .icl = { 0x6, 0x7F, 0x2C, 0x00, 0x13 } }, /* 350 900 8.2 */ + { .icl = { 0xA, 0x43, 0x3F, 0x00, 0x00 } }, /* 500 500 0.0 */ + { .icl = { 0xC, 0x60, 0x36, 0x00, 0x09 } }, /* 500 700 2.9 */ + { .icl = { 0x6, 0x7F, 0x30, 0x00, 0x0F } }, /* 500 900 5.1 */ + { .icl = { 0xC, 0x58, 0x3F, 0x00, 0x00 } }, /* 650 700 0.6 */ + { .icl = { 0x6, 0x7F, 0x35, 0x00, 0x0A } }, /* 600 900 3.5 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 900 900 0.0 */ +}; + +static const struct intel_ddi_buf_trans dg1_combo_phy_ddi_translations_dp_hbr2_hbr3 = { + .entries = _dg1_combo_phy_ddi_translations_dp_hbr2_hbr3, + .num_entries = ARRAY_SIZE(_dg1_combo_phy_ddi_translations_dp_hbr2_hbr3), +}; + +static const union intel_ddi_buf_trans_entry _icl_mg_phy_ddi_translations_rbr_hbr[] = { + /* Voltage swing pre-emphasis */ + { .mg = { 0x18, 0x00, 0x00 } }, /* 0 0 */ + { .mg = { 0x1D, 0x00, 0x05 } }, /* 0 1 */ + { .mg = { 0x24, 0x00, 0x0C } }, /* 0 2 */ + { .mg = { 0x2B, 0x00, 0x14 } }, /* 0 3 */ + { .mg = { 0x21, 0x00, 0x00 } }, /* 1 0 */ + { .mg = { 0x2B, 0x00, 0x08 } }, /* 1 1 */ + { .mg = { 0x30, 0x00, 0x0F } }, /* 1 2 */ + { .mg = { 0x31, 0x00, 0x03 } }, /* 2 0 */ + { .mg = { 0x34, 0x00, 0x0B } }, /* 2 1 */ + { .mg = { 0x3F, 0x00, 0x00 } }, /* 3 0 */ +}; + +static const struct intel_ddi_buf_trans icl_mg_phy_ddi_translations_rbr_hbr = { + .entries = _icl_mg_phy_ddi_translations_rbr_hbr, + .num_entries = ARRAY_SIZE(_icl_mg_phy_ddi_translations_rbr_hbr), +}; + +static const union intel_ddi_buf_trans_entry _icl_mg_phy_ddi_translations_hbr2_hbr3[] = { + /* Voltage swing pre-emphasis */ + { .mg = { 0x18, 0x00, 0x00 } }, /* 0 0 */ + { .mg = { 0x1D, 0x00, 0x05 } }, /* 0 1 */ + { .mg = { 0x24, 0x00, 0x0C } }, /* 0 2 */ + { .mg = { 0x2B, 0x00, 0x14 } }, /* 0 3 */ + { .mg = { 0x26, 0x00, 0x00 } }, /* 1 0 */ + { .mg = { 0x2C, 0x00, 0x07 } }, /* 1 1 */ + { .mg = { 0x33, 0x00, 0x0C } }, /* 1 2 */ + { .mg = { 0x2E, 0x00, 0x00 } }, /* 2 0 */ + { .mg = { 0x36, 0x00, 0x09 } }, /* 2 1 */ + { .mg = { 0x3F, 0x00, 0x00 } }, /* 3 0 */ +}; + +static const struct intel_ddi_buf_trans icl_mg_phy_ddi_translations_hbr2_hbr3 = { + .entries = _icl_mg_phy_ddi_translations_hbr2_hbr3, + .num_entries = ARRAY_SIZE(_icl_mg_phy_ddi_translations_hbr2_hbr3), +}; + +static const union intel_ddi_buf_trans_entry _icl_mg_phy_ddi_translations_hdmi[] = { + /* HDMI Preset VS Pre-emph */ + { .mg = { 0x1A, 0x0, 0x0 } }, /* 1 400mV 0dB */ + { .mg = { 0x20, 0x0, 0x0 } }, /* 2 500mV 0dB */ + { .mg = { 0x29, 0x0, 0x0 } }, /* 3 650mV 0dB */ + { .mg = { 0x32, 0x0, 0x0 } }, /* 4 800mV 0dB */ + { .mg = { 0x3F, 0x0, 0x0 } }, /* 5 1000mV 0dB */ + { .mg = { 0x3A, 0x0, 0x5 } }, /* 6 Full -1.5 dB */ + { .mg = { 0x39, 0x0, 0x6 } }, /* 7 Full -1.8 dB */ + { .mg = { 0x38, 0x0, 0x7 } }, /* 8 Full -2 dB */ + { .mg = { 0x37, 0x0, 0x8 } }, /* 9 Full -2.5 dB */ + { .mg = { 0x36, 0x0, 0x9 } }, /* 10 Full -3 dB */ +}; + +static const struct intel_ddi_buf_trans icl_mg_phy_ddi_translations_hdmi = { + .entries = _icl_mg_phy_ddi_translations_hdmi, + .num_entries = ARRAY_SIZE(_icl_mg_phy_ddi_translations_hdmi), + .hdmi_default_entry = ARRAY_SIZE(_icl_mg_phy_ddi_translations_hdmi) - 1, +}; + +static const union intel_ddi_buf_trans_entry _tgl_dkl_phy_ddi_translations_dp_hbr[] = { + /* VS pre-emp Non-trans mV Pre-emph dB */ + { .dkl = { 0x7, 0x0, 0x00 } }, /* 0 0 400mV 0 dB */ + { .dkl = { 0x5, 0x0, 0x05 } }, /* 0 1 400mV 3.5 dB */ + { .dkl = { 0x2, 0x0, 0x0B } }, /* 0 2 400mV 6 dB */ + { .dkl = { 0x0, 0x0, 0x18 } }, /* 0 3 400mV 9.5 dB */ + { .dkl = { 0x5, 0x0, 0x00 } }, /* 1 0 600mV 0 dB */ + { .dkl = { 0x2, 0x0, 0x08 } }, /* 1 1 600mV 3.5 dB */ + { .dkl = { 0x0, 0x0, 0x14 } }, /* 1 2 600mV 6 dB */ + { .dkl = { 0x2, 0x0, 0x00 } }, /* 2 0 800mV 0 dB */ + { .dkl = { 0x0, 0x0, 0x0B } }, /* 2 1 800mV 3.5 dB */ + { .dkl = { 0x0, 0x0, 0x00 } }, /* 3 0 1200mV 0 dB HDMI default */ +}; + +static const struct intel_ddi_buf_trans tgl_dkl_phy_ddi_translations_dp_hbr = { + .entries = _tgl_dkl_phy_ddi_translations_dp_hbr, + .num_entries = ARRAY_SIZE(_tgl_dkl_phy_ddi_translations_dp_hbr), +}; + +static const union intel_ddi_buf_trans_entry _tgl_dkl_phy_ddi_translations_dp_hbr2[] = { + /* VS pre-emp Non-trans mV Pre-emph dB */ + { .dkl = { 0x7, 0x0, 0x00 } }, /* 0 0 400mV 0 dB */ + { .dkl = { 0x5, 0x0, 0x05 } }, /* 0 1 400mV 3.5 dB */ + { .dkl = { 0x2, 0x0, 0x0B } }, /* 0 2 400mV 6 dB */ + { .dkl = { 0x0, 0x0, 0x19 } }, /* 0 3 400mV 9.5 dB */ + { .dkl = { 0x5, 0x0, 0x00 } }, /* 1 0 600mV 0 dB */ + { .dkl = { 0x2, 0x0, 0x08 } }, /* 1 1 600mV 3.5 dB */ + { .dkl = { 0x0, 0x0, 0x14 } }, /* 1 2 600mV 6 dB */ + { .dkl = { 0x2, 0x0, 0x00 } }, /* 2 0 800mV 0 dB */ + { .dkl = { 0x0, 0x0, 0x0B } }, /* 2 1 800mV 3.5 dB */ + { .dkl = { 0x0, 0x0, 0x00 } }, /* 3 0 1200mV 0 dB HDMI default */ +}; + +static const struct intel_ddi_buf_trans tgl_dkl_phy_ddi_translations_dp_hbr2 = { + .entries = _tgl_dkl_phy_ddi_translations_dp_hbr2, + .num_entries = ARRAY_SIZE(_tgl_dkl_phy_ddi_translations_dp_hbr2), +}; + +static const union intel_ddi_buf_trans_entry _tgl_dkl_phy_ddi_translations_hdmi[] = { + /* HDMI Preset VS Pre-emph */ + { .dkl = { 0x7, 0x0, 0x0 } }, /* 1 400mV 0dB */ + { .dkl = { 0x6, 0x0, 0x0 } }, /* 2 500mV 0dB */ + { .dkl = { 0x4, 0x0, 0x0 } }, /* 3 650mV 0dB */ + { .dkl = { 0x2, 0x0, 0x0 } }, /* 4 800mV 0dB */ + { .dkl = { 0x0, 0x0, 0x0 } }, /* 5 1000mV 0dB */ + { .dkl = { 0x0, 0x0, 0x5 } }, /* 6 Full -1.5 dB */ + { .dkl = { 0x0, 0x0, 0x6 } }, /* 7 Full -1.8 dB */ + { .dkl = { 0x0, 0x0, 0x7 } }, /* 8 Full -2 dB */ + { .dkl = { 0x0, 0x0, 0x8 } }, /* 9 Full -2.5 dB */ + { .dkl = { 0x0, 0x0, 0xA } }, /* 10 Full -3 dB */ +}; + +static const struct intel_ddi_buf_trans tgl_dkl_phy_ddi_translations_hdmi = { + .entries = _tgl_dkl_phy_ddi_translations_hdmi, + .num_entries = ARRAY_SIZE(_tgl_dkl_phy_ddi_translations_hdmi), + .hdmi_default_entry = ARRAY_SIZE(_tgl_dkl_phy_ddi_translations_hdmi) - 1, +}; + +static const union intel_ddi_buf_trans_entry _tgl_combo_phy_ddi_translations_dp_hbr[] = { + /* NT mV Trans mV db */ + { .icl = { 0xA, 0x32, 0x3F, 0x00, 0x00 } }, /* 350 350 0.0 */ + { .icl = { 0xA, 0x4F, 0x37, 0x00, 0x08 } }, /* 350 500 3.1 */ + { .icl = { 0xC, 0x71, 0x2F, 0x00, 0x10 } }, /* 350 700 6.0 */ + { .icl = { 0x6, 0x7D, 0x2B, 0x00, 0x14 } }, /* 350 900 8.2 */ + { .icl = { 0xA, 0x4C, 0x3F, 0x00, 0x00 } }, /* 500 500 0.0 */ + { .icl = { 0xC, 0x73, 0x34, 0x00, 0x0B } }, /* 500 700 2.9 */ + { .icl = { 0x6, 0x7F, 0x2F, 0x00, 0x10 } }, /* 500 900 5.1 */ + { .icl = { 0xC, 0x6C, 0x3C, 0x00, 0x03 } }, /* 650 700 0.6 */ + { .icl = { 0x6, 0x7F, 0x35, 0x00, 0x0A } }, /* 600 900 3.5 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 900 900 0.0 */ +}; + +static const struct intel_ddi_buf_trans tgl_combo_phy_ddi_translations_dp_hbr = { + .entries = _tgl_combo_phy_ddi_translations_dp_hbr, + .num_entries = ARRAY_SIZE(_tgl_combo_phy_ddi_translations_dp_hbr), +}; + +static const union intel_ddi_buf_trans_entry _tgl_combo_phy_ddi_translations_dp_hbr2[] = { + /* NT mV Trans mV db */ + { .icl = { 0xA, 0x35, 0x3F, 0x00, 0x00 } }, /* 350 350 0.0 */ + { .icl = { 0xA, 0x4F, 0x37, 0x00, 0x08 } }, /* 350 500 3.1 */ + { .icl = { 0xC, 0x63, 0x2F, 0x00, 0x10 } }, /* 350 700 6.0 */ + { .icl = { 0x6, 0x7F, 0x2B, 0x00, 0x14 } }, /* 350 900 8.2 */ + { .icl = { 0xA, 0x47, 0x3F, 0x00, 0x00 } }, /* 500 500 0.0 */ + { .icl = { 0xC, 0x63, 0x34, 0x00, 0x0B } }, /* 500 700 2.9 */ + { .icl = { 0x6, 0x7F, 0x2F, 0x00, 0x10 } }, /* 500 900 5.1 */ + { .icl = { 0xC, 0x61, 0x3C, 0x00, 0x03 } }, /* 650 700 0.6 */ + { .icl = { 0x6, 0x7B, 0x35, 0x00, 0x0A } }, /* 600 900 3.5 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 900 900 0.0 */ +}; + +static const struct intel_ddi_buf_trans tgl_combo_phy_ddi_translations_dp_hbr2 = { + .entries = _tgl_combo_phy_ddi_translations_dp_hbr2, + .num_entries = ARRAY_SIZE(_tgl_combo_phy_ddi_translations_dp_hbr2), +}; + +static const union intel_ddi_buf_trans_entry _tgl_uy_combo_phy_ddi_translations_dp_hbr2[] = { + /* NT mV Trans mV db */ + { .icl = { 0xA, 0x35, 0x3F, 0x00, 0x00 } }, /* 350 350 0.0 */ + { .icl = { 0xA, 0x4F, 0x36, 0x00, 0x09 } }, /* 350 500 3.1 */ + { .icl = { 0xC, 0x60, 0x32, 0x00, 0x0D } }, /* 350 700 6.0 */ + { .icl = { 0xC, 0x7F, 0x2D, 0x00, 0x12 } }, /* 350 900 8.2 */ + { .icl = { 0xC, 0x47, 0x3F, 0x00, 0x00 } }, /* 500 500 0.0 */ + { .icl = { 0xC, 0x6F, 0x36, 0x00, 0x09 } }, /* 500 700 2.9 */ + { .icl = { 0x6, 0x7D, 0x32, 0x00, 0x0D } }, /* 500 900 5.1 */ + { .icl = { 0x6, 0x60, 0x3C, 0x00, 0x03 } }, /* 650 700 0.6 */ + { .icl = { 0x6, 0x7F, 0x34, 0x00, 0x0B } }, /* 600 900 3.5 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 900 900 0.0 */ +}; + +static const struct intel_ddi_buf_trans tgl_uy_combo_phy_ddi_translations_dp_hbr2 = { + .entries = _tgl_uy_combo_phy_ddi_translations_dp_hbr2, + .num_entries = ARRAY_SIZE(_tgl_uy_combo_phy_ddi_translations_dp_hbr2), }; /* * Cloned the HOBL entry to comply with the voltage and pre-emphasis entries * that DisplayPort specification requires */ -static const struct cnl_ddi_buf_trans tgl_combo_phy_ddi_translations_edp_hbr2_hobl[] = { - /* VS pre-emp */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 0 0 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 0 1 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 0 2 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 0 3 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 1 0 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 1 1 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 1 2 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 2 0 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 2 1 */ -}; - -static const struct cnl_ddi_buf_trans rkl_combo_phy_ddi_translations_dp_hbr[] = { - /* NT mV Trans mV db */ - { 0xA, 0x2F, 0x3F, 0x00, 0x00 }, /* 350 350 0.0 */ - { 0xA, 0x4F, 0x37, 0x00, 0x08 }, /* 350 500 3.1 */ - { 0xC, 0x63, 0x2F, 0x00, 0x10 }, /* 350 700 6.0 */ - { 0x6, 0x7D, 0x2A, 0x00, 0x15 }, /* 350 900 8.2 */ - { 0xA, 0x4C, 0x3F, 0x00, 0x00 }, /* 500 500 0.0 */ - { 0xC, 0x73, 0x34, 0x00, 0x0B }, /* 500 700 2.9 */ - { 0x6, 0x7F, 0x2F, 0x00, 0x10 }, /* 500 900 5.1 */ - { 0xC, 0x6E, 0x3E, 0x00, 0x01 }, /* 650 700 0.6 */ - { 0x6, 0x7F, 0x35, 0x00, 0x0A }, /* 600 900 3.5 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 900 900 0.0 */ -}; - -static const struct cnl_ddi_buf_trans rkl_combo_phy_ddi_translations_dp_hbr2_hbr3[] = { - /* NT mV Trans mV db */ - { 0xA, 0x35, 0x3F, 0x00, 0x00 }, /* 350 350 0.0 */ - { 0xA, 0x50, 0x38, 0x00, 0x07 }, /* 350 500 3.1 */ - { 0xC, 0x61, 0x33, 0x00, 0x0C }, /* 350 700 6.0 */ - { 0x6, 0x7F, 0x2E, 0x00, 0x11 }, /* 350 900 8.2 */ - { 0xA, 0x47, 0x3F, 0x00, 0x00 }, /* 500 500 0.0 */ - { 0xC, 0x5F, 0x38, 0x00, 0x07 }, /* 500 700 2.9 */ - { 0x6, 0x7F, 0x2F, 0x00, 0x10 }, /* 500 900 5.1 */ - { 0xC, 0x5F, 0x3F, 0x00, 0x00 }, /* 650 700 0.6 */ - { 0x6, 0x7E, 0x36, 0x00, 0x09 }, /* 600 900 3.5 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 900 900 0.0 */ -}; - -static const struct tgl_dkl_phy_ddi_buf_trans adlp_dkl_phy_dp_ddi_trans_hbr[] = { - /* VS pre-emp Non-trans mV Pre-emph dB */ - { 0x7, 0x0, 0x01 }, /* 0 0 400mV 0 dB */ - { 0x5, 0x0, 0x06 }, /* 0 1 400mV 3.5 dB */ - { 0x2, 0x0, 0x0B }, /* 0 2 400mV 6 dB */ - { 0x0, 0x0, 0x17 }, /* 0 3 400mV 9.5 dB */ - { 0x5, 0x0, 0x00 }, /* 1 0 600mV 0 dB */ - { 0x2, 0x0, 0x08 }, /* 1 1 600mV 3.5 dB */ - { 0x0, 0x0, 0x14 }, /* 1 2 600mV 6 dB */ - { 0x2, 0x0, 0x00 }, /* 2 0 800mV 0 dB */ - { 0x0, 0x0, 0x0B }, /* 2 1 800mV 3.5 dB */ - { 0x0, 0x0, 0x00 }, /* 3 0 1200mV 0 dB */ -}; - -static const struct tgl_dkl_phy_ddi_buf_trans adlp_dkl_phy_dp_ddi_trans_hbr2_hbr3[] = { - /* VS pre-emp Non-trans mV Pre-emph dB */ - { 0x7, 0x0, 0x00 }, /* 0 0 400mV 0 dB */ - { 0x5, 0x0, 0x04 }, /* 0 1 400mV 3.5 dB */ - { 0x2, 0x0, 0x0A }, /* 0 2 400mV 6 dB */ - { 0x0, 0x0, 0x18 }, /* 0 3 400mV 9.5 dB */ - { 0x5, 0x0, 0x00 }, /* 1 0 600mV 0 dB */ - { 0x2, 0x0, 0x06 }, /* 1 1 600mV 3.5 dB */ - { 0x0, 0x0, 0x14 }, /* 1 2 600mV 6 dB */ - { 0x2, 0x0, 0x00 }, /* 2 0 800mV 0 dB */ - { 0x0, 0x0, 0x09 }, /* 2 1 800mV 3.5 dB */ - { 0x0, 0x0, 0x00 }, /* 3 0 1200mV 0 dB */ -}; - -bool is_hobl_buf_trans(const struct cnl_ddi_buf_trans *table) -{ - return table == tgl_combo_phy_ddi_translations_edp_hbr2_hobl; -} +static const union intel_ddi_buf_trans_entry _tgl_combo_phy_ddi_translations_edp_hbr2_hobl[] = { + /* VS pre-emp */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 0 0 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 0 1 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 0 2 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 0 3 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 1 0 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 1 1 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 1 2 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 2 0 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 2 1 */ +}; -static const struct ddi_buf_trans * -bdw_get_buf_trans_edp(struct intel_encoder *encoder, int *n_entries) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); +static const struct intel_ddi_buf_trans tgl_combo_phy_ddi_translations_edp_hbr2_hobl = { + .entries = _tgl_combo_phy_ddi_translations_edp_hbr2_hobl, + .num_entries = ARRAY_SIZE(_tgl_combo_phy_ddi_translations_edp_hbr2_hobl), +}; - if (dev_priv->vbt.edp.low_vswing) { - *n_entries = ARRAY_SIZE(bdw_ddi_translations_edp); - return bdw_ddi_translations_edp; - } else { - *n_entries = ARRAY_SIZE(bdw_ddi_translations_dp); - return bdw_ddi_translations_dp; - } -} +static const union intel_ddi_buf_trans_entry _rkl_combo_phy_ddi_translations_dp_hbr[] = { + /* NT mV Trans mV db */ + { .icl = { 0xA, 0x2F, 0x3F, 0x00, 0x00 } }, /* 350 350 0.0 */ + { .icl = { 0xA, 0x4F, 0x37, 0x00, 0x08 } }, /* 350 500 3.1 */ + { .icl = { 0xC, 0x63, 0x2F, 0x00, 0x10 } }, /* 350 700 6.0 */ + { .icl = { 0x6, 0x7D, 0x2A, 0x00, 0x15 } }, /* 350 900 8.2 */ + { .icl = { 0xA, 0x4C, 0x3F, 0x00, 0x00 } }, /* 500 500 0.0 */ + { .icl = { 0xC, 0x73, 0x34, 0x00, 0x0B } }, /* 500 700 2.9 */ + { .icl = { 0x6, 0x7F, 0x2F, 0x00, 0x10 } }, /* 500 900 5.1 */ + { .icl = { 0xC, 0x6E, 0x3E, 0x00, 0x01 } }, /* 650 700 0.6 */ + { .icl = { 0x6, 0x7F, 0x35, 0x00, 0x0A } }, /* 600 900 3.5 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 900 900 0.0 */ +}; -static const struct ddi_buf_trans * -skl_get_buf_trans_dp(struct intel_encoder *encoder, int *n_entries) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); +static const struct intel_ddi_buf_trans rkl_combo_phy_ddi_translations_dp_hbr = { + .entries = _rkl_combo_phy_ddi_translations_dp_hbr, + .num_entries = ARRAY_SIZE(_rkl_combo_phy_ddi_translations_dp_hbr), +}; - if (IS_SKL_ULX(dev_priv)) { - *n_entries = ARRAY_SIZE(skl_y_ddi_translations_dp); - return skl_y_ddi_translations_dp; - } else if (IS_SKL_ULT(dev_priv)) { - *n_entries = ARRAY_SIZE(skl_u_ddi_translations_dp); - return skl_u_ddi_translations_dp; - } else { - *n_entries = ARRAY_SIZE(skl_ddi_translations_dp); - return skl_ddi_translations_dp; - } -} +static const union intel_ddi_buf_trans_entry _rkl_combo_phy_ddi_translations_dp_hbr2_hbr3[] = { + /* NT mV Trans mV db */ + { .icl = { 0xA, 0x35, 0x3F, 0x00, 0x00 } }, /* 350 350 0.0 */ + { .icl = { 0xA, 0x50, 0x38, 0x00, 0x07 } }, /* 350 500 3.1 */ + { .icl = { 0xC, 0x61, 0x33, 0x00, 0x0C } }, /* 350 700 6.0 */ + { .icl = { 0x6, 0x7F, 0x2E, 0x00, 0x11 } }, /* 350 900 8.2 */ + { .icl = { 0xA, 0x47, 0x3F, 0x00, 0x00 } }, /* 500 500 0.0 */ + { .icl = { 0xC, 0x5F, 0x38, 0x00, 0x07 } }, /* 500 700 2.9 */ + { .icl = { 0x6, 0x7F, 0x2F, 0x00, 0x10 } }, /* 500 900 5.1 */ + { .icl = { 0xC, 0x5F, 0x3F, 0x00, 0x00 } }, /* 650 700 0.6 */ + { .icl = { 0x6, 0x7E, 0x36, 0x00, 0x09 } }, /* 600 900 3.5 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 900 900 0.0 */ +}; -static const struct ddi_buf_trans * -kbl_get_buf_trans_dp(struct intel_encoder *encoder, int *n_entries) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); +static const struct intel_ddi_buf_trans rkl_combo_phy_ddi_translations_dp_hbr2_hbr3 = { + .entries = _rkl_combo_phy_ddi_translations_dp_hbr2_hbr3, + .num_entries = ARRAY_SIZE(_rkl_combo_phy_ddi_translations_dp_hbr2_hbr3), +}; - if (IS_KBL_ULX(dev_priv) || - IS_CFL_ULX(dev_priv) || - IS_CML_ULX(dev_priv)) { - *n_entries = ARRAY_SIZE(kbl_y_ddi_translations_dp); - return kbl_y_ddi_translations_dp; - } else if (IS_KBL_ULT(dev_priv) || - IS_CFL_ULT(dev_priv) || - IS_CML_ULT(dev_priv)) { - *n_entries = ARRAY_SIZE(kbl_u_ddi_translations_dp); - return kbl_u_ddi_translations_dp; - } else { - *n_entries = ARRAY_SIZE(kbl_ddi_translations_dp); - return kbl_ddi_translations_dp; - } -} +static const union intel_ddi_buf_trans_entry _adls_combo_phy_ddi_translations_dp_hbr2_hbr3[] = { + /* NT mV Trans mV db */ + { .icl = { 0xA, 0x35, 0x3F, 0x00, 0x00 } }, /* 350 350 0.0 */ + { .icl = { 0xA, 0x4F, 0x37, 0x00, 0x08 } }, /* 350 500 3.1 */ + { .icl = { 0xC, 0x63, 0x31, 0x00, 0x0E } }, /* 350 700 6.0 */ + { .icl = { 0x6, 0x7F, 0x2C, 0x00, 0x13 } }, /* 350 900 8.2 */ + { .icl = { 0xA, 0x47, 0x3F, 0x00, 0x00 } }, /* 500 500 0.0 */ + { .icl = { 0xC, 0x63, 0x37, 0x00, 0x08 } }, /* 500 700 2.9 */ + { .icl = { 0x6, 0x73, 0x32, 0x00, 0x0D } }, /* 500 900 5.1 */ + { .icl = { 0xC, 0x58, 0x3F, 0x00, 0x00 } }, /* 650 700 0.6 */ + { .icl = { 0x6, 0x7F, 0x35, 0x00, 0x0A } }, /* 600 900 3.5 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 900 900 0.0 */ +}; -static const struct ddi_buf_trans * -skl_get_buf_trans_edp(struct intel_encoder *encoder, int *n_entries) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); +static const struct intel_ddi_buf_trans adls_combo_phy_ddi_translations_dp_hbr2_hbr3 = { + .entries = _adls_combo_phy_ddi_translations_dp_hbr2_hbr3, + .num_entries = ARRAY_SIZE(_adls_combo_phy_ddi_translations_dp_hbr2_hbr3), +}; - if (dev_priv->vbt.edp.low_vswing) { - if (IS_SKL_ULX(dev_priv) || - IS_KBL_ULX(dev_priv) || - IS_CFL_ULX(dev_priv) || - IS_CML_ULX(dev_priv)) { - *n_entries = ARRAY_SIZE(skl_y_ddi_translations_edp); - return skl_y_ddi_translations_edp; - } else if (IS_SKL_ULT(dev_priv) || - IS_KBL_ULT(dev_priv) || - IS_CFL_ULT(dev_priv) || - IS_CML_ULT(dev_priv)) { - *n_entries = ARRAY_SIZE(skl_u_ddi_translations_edp); - return skl_u_ddi_translations_edp; - } else { - *n_entries = ARRAY_SIZE(skl_ddi_translations_edp); - return skl_ddi_translations_edp; - } - } +static const union intel_ddi_buf_trans_entry _adls_combo_phy_ddi_translations_edp_hbr2[] = { + /* NT mV Trans mV db */ + { .icl = { 0x9, 0x73, 0x3D, 0x00, 0x02 } }, /* 200 200 0.0 */ + { .icl = { 0x9, 0x7A, 0x3C, 0x00, 0x03 } }, /* 200 250 1.9 */ + { .icl = { 0x9, 0x7F, 0x3B, 0x00, 0x04 } }, /* 200 300 3.5 */ + { .icl = { 0x4, 0x6C, 0x33, 0x00, 0x0C } }, /* 200 350 4.9 */ + { .icl = { 0x2, 0x73, 0x3A, 0x00, 0x05 } }, /* 250 250 0.0 */ + { .icl = { 0x2, 0x7C, 0x38, 0x00, 0x07 } }, /* 250 300 1.6 */ + { .icl = { 0x4, 0x5A, 0x36, 0x00, 0x09 } }, /* 250 350 2.9 */ + { .icl = { 0x4, 0x57, 0x3D, 0x00, 0x02 } }, /* 300 300 0.0 */ + { .icl = { 0x4, 0x65, 0x38, 0x00, 0x07 } }, /* 300 350 1.3 */ + { .icl = { 0x4, 0x6C, 0x3A, 0x00, 0x05 } }, /* 350 350 0.0 */ +}; - if (IS_KABYLAKE(dev_priv) || - IS_COFFEELAKE(dev_priv) || - IS_COMETLAKE(dev_priv)) - return kbl_get_buf_trans_dp(encoder, n_entries); - else - return skl_get_buf_trans_dp(encoder, n_entries); -} +static const struct intel_ddi_buf_trans adls_combo_phy_ddi_translations_edp_hbr2 = { + .entries = _adls_combo_phy_ddi_translations_edp_hbr2, + .num_entries = ARRAY_SIZE(_adls_combo_phy_ddi_translations_edp_hbr2), +}; -static const struct ddi_buf_trans * -skl_get_buf_trans_hdmi(struct drm_i915_private *dev_priv, int *n_entries) -{ - if (IS_SKL_ULX(dev_priv) || - IS_KBL_ULX(dev_priv) || - IS_CFL_ULX(dev_priv) || - IS_CML_ULX(dev_priv)) { - *n_entries = ARRAY_SIZE(skl_y_ddi_translations_hdmi); - return skl_y_ddi_translations_hdmi; - } else { - *n_entries = ARRAY_SIZE(skl_ddi_translations_hdmi); - return skl_ddi_translations_hdmi; - } -} +static const union intel_ddi_buf_trans_entry _adls_combo_phy_ddi_translations_edp_hbr3[] = { + /* NT mV Trans mV db */ + { .icl = { 0xA, 0x35, 0x3F, 0x00, 0x00 } }, /* 350 350 0.0 */ + { .icl = { 0xA, 0x4F, 0x37, 0x00, 0x08 } }, /* 350 500 3.1 */ + { .icl = { 0xC, 0x63, 0x31, 0x00, 0x0E } }, /* 350 700 6.0 */ + { .icl = { 0x6, 0x7F, 0x2C, 0x00, 0x13 } }, /* 350 900 8.2 */ + { .icl = { 0xA, 0x47, 0x3F, 0x00, 0x00 } }, /* 500 500 0.0 */ + { .icl = { 0xC, 0x63, 0x37, 0x00, 0x08 } }, /* 500 700 2.9 */ + { .icl = { 0x6, 0x73, 0x32, 0x00, 0x0D } }, /* 500 900 5.1 */ + { .icl = { 0xC, 0x58, 0x3F, 0x00, 0x00 } }, /* 650 700 0.6 */ + { .icl = { 0x6, 0x7F, 0x35, 0x00, 0x0A } }, /* 600 900 3.5 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 900 900 0.0 */ +}; -static int skl_buf_trans_num_entries(enum port port, int n_entries) -{ - /* Only DDIA and DDIE can select the 10th register with DP */ - if (port == PORT_A || port == PORT_E) - return min(n_entries, 10); - else - return min(n_entries, 9); -} +static const struct intel_ddi_buf_trans adls_combo_phy_ddi_translations_edp_hbr3 = { + .entries = _adls_combo_phy_ddi_translations_edp_hbr3, + .num_entries = ARRAY_SIZE(_adls_combo_phy_ddi_translations_edp_hbr3), +}; -const struct ddi_buf_trans * -intel_ddi_get_buf_trans_dp(struct intel_encoder *encoder, int *n_entries) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); +static const union intel_ddi_buf_trans_entry _adlp_combo_phy_ddi_translations_hdmi[] = { + /* NT mV Trans mV db */ + { .icl = { 0x6, 0x60, 0x3F, 0x00, 0x00 } }, /* 400 400 0.0 */ + { .icl = { 0x6, 0x68, 0x3F, 0x00, 0x00 } }, /* 500 500 0.0 */ + { .icl = { 0xA, 0x73, 0x3F, 0x00, 0x00 } }, /* 650 650 0.0 ALS */ + { .icl = { 0xA, 0x78, 0x3F, 0x00, 0x00 } }, /* 800 800 0.0 */ + { .icl = { 0xB, 0x7F, 0x3F, 0x00, 0x00 } }, /* 1000 1000 0.0 Re-timer */ + { .icl = { 0xB, 0x7F, 0x3B, 0x00, 0x04 } }, /* Full Red -1.5 */ + { .icl = { 0xB, 0x7F, 0x39, 0x00, 0x06 } }, /* Full Red -1.8 */ + { .icl = { 0xB, 0x7F, 0x37, 0x00, 0x08 } }, /* Full Red -2.0 CRLS */ + { .icl = { 0xB, 0x7F, 0x35, 0x00, 0x0A } }, /* Full Red -2.5 */ + { .icl = { 0xB, 0x7F, 0x33, 0x00, 0x0C } }, /* Full Red -3.0 */ +}; - if (IS_KABYLAKE(dev_priv) || - IS_COFFEELAKE(dev_priv) || - IS_COMETLAKE(dev_priv)) { - const struct ddi_buf_trans *ddi_translations = - kbl_get_buf_trans_dp(encoder, n_entries); - *n_entries = skl_buf_trans_num_entries(encoder->port, *n_entries); - return ddi_translations; - } else if (IS_SKYLAKE(dev_priv)) { - const struct ddi_buf_trans *ddi_translations = - skl_get_buf_trans_dp(encoder, n_entries); - *n_entries = skl_buf_trans_num_entries(encoder->port, *n_entries); - return ddi_translations; - } else if (IS_BROADWELL(dev_priv)) { - *n_entries = ARRAY_SIZE(bdw_ddi_translations_dp); - return bdw_ddi_translations_dp; - } else if (IS_HASWELL(dev_priv)) { - *n_entries = ARRAY_SIZE(hsw_ddi_translations_dp); - return hsw_ddi_translations_dp; - } +static const struct intel_ddi_buf_trans adlp_combo_phy_ddi_translations_hdmi = { + .entries = _adlp_combo_phy_ddi_translations_hdmi, + .num_entries = ARRAY_SIZE(_adlp_combo_phy_ddi_translations_hdmi), + .hdmi_default_entry = ARRAY_SIZE(_adlp_combo_phy_ddi_translations_hdmi) - 1, +}; - *n_entries = 0; - return NULL; -} +static const union intel_ddi_buf_trans_entry _adlp_combo_phy_ddi_translations_dp_hbr[] = { + /* NT mV Trans mV db */ + { .icl = { 0xA, 0x35, 0x3F, 0x00, 0x00 } }, /* 350 350 0.0 */ + { .icl = { 0xA, 0x4F, 0x37, 0x00, 0x08 } }, /* 350 500 3.1 */ + { .icl = { 0xC, 0x71, 0x31, 0x00, 0x0E } }, /* 350 700 6.0 */ + { .icl = { 0x6, 0x7F, 0x2C, 0x00, 0x13 } }, /* 350 900 8.2 */ + { .icl = { 0xA, 0x4C, 0x3F, 0x00, 0x00 } }, /* 500 500 0.0 */ + { .icl = { 0xC, 0x73, 0x34, 0x00, 0x0B } }, /* 500 700 2.9 */ + { .icl = { 0x6, 0x7F, 0x2F, 0x00, 0x10 } }, /* 500 900 5.1 */ + { .icl = { 0xC, 0x73, 0x3E, 0x00, 0x01 } }, /* 650 700 0.6 */ + { .icl = { 0x6, 0x7F, 0x35, 0x00, 0x0A } }, /* 600 900 3.5 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 900 900 0.0 */ +}; -const struct ddi_buf_trans * -intel_ddi_get_buf_trans_edp(struct intel_encoder *encoder, int *n_entries) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); +static const struct intel_ddi_buf_trans adlp_combo_phy_ddi_translations_dp_hbr = { + .entries = _adlp_combo_phy_ddi_translations_dp_hbr, + .num_entries = ARRAY_SIZE(_adlp_combo_phy_ddi_translations_dp_hbr), +}; - if (DISPLAY_VER(dev_priv) == 9 && !IS_BROXTON(dev_priv)) { - const struct ddi_buf_trans *ddi_translations = - skl_get_buf_trans_edp(encoder, n_entries); - *n_entries = skl_buf_trans_num_entries(encoder->port, *n_entries); - return ddi_translations; - } else if (IS_BROADWELL(dev_priv)) { - return bdw_get_buf_trans_edp(encoder, n_entries); - } else if (IS_HASWELL(dev_priv)) { - *n_entries = ARRAY_SIZE(hsw_ddi_translations_dp); - return hsw_ddi_translations_dp; - } +static const union intel_ddi_buf_trans_entry _adlp_combo_phy_ddi_translations_dp_hbr2_hbr3[] = { + /* NT mV Trans mV db */ + { .icl = { 0xA, 0x35, 0x3F, 0x00, 0x00 } }, /* 350 350 0.0 */ + { .icl = { 0xA, 0x4F, 0x37, 0x00, 0x08 } }, /* 350 500 3.1 */ + { .icl = { 0xC, 0x71, 0x2F, 0x00, 0x10 } }, /* 350 700 6.0 */ + { .icl = { 0x6, 0x7F, 0x2B, 0x00, 0x14 } }, /* 350 900 8.2 */ + { .icl = { 0xA, 0x4C, 0x3F, 0x00, 0x00 } }, /* 500 500 0.0 */ + { .icl = { 0xC, 0x73, 0x34, 0x00, 0x0B } }, /* 500 700 2.9 */ + { .icl = { 0x6, 0x7F, 0x30, 0x00, 0x0F } }, /* 500 900 5.1 */ + { .icl = { 0xC, 0x63, 0x3F, 0x00, 0x00 } }, /* 650 700 0.6 */ + { .icl = { 0x6, 0x7F, 0x38, 0x00, 0x07 } }, /* 600 900 3.5 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 900 900 0.0 */ +}; - *n_entries = 0; - return NULL; -} +static const struct intel_ddi_buf_trans adlp_combo_phy_ddi_translations_dp_hbr2_hbr3 = { + .entries = _adlp_combo_phy_ddi_translations_dp_hbr2_hbr3, + .num_entries = ARRAY_SIZE(_adlp_combo_phy_ddi_translations_dp_hbr2_hbr3), +}; -const struct ddi_buf_trans * -intel_ddi_get_buf_trans_fdi(struct drm_i915_private *dev_priv, - int *n_entries) -{ - if (IS_BROADWELL(dev_priv)) { - *n_entries = ARRAY_SIZE(bdw_ddi_translations_fdi); - return bdw_ddi_translations_fdi; - } else if (IS_HASWELL(dev_priv)) { - *n_entries = ARRAY_SIZE(hsw_ddi_translations_fdi); - return hsw_ddi_translations_fdi; - } +static const struct intel_ddi_buf_trans adlp_combo_phy_ddi_translations_edp_hbr3 = { + .entries = _icl_combo_phy_ddi_translations_dp_hbr2_edp_hbr3, + .num_entries = ARRAY_SIZE(_icl_combo_phy_ddi_translations_dp_hbr2_edp_hbr3), +}; + +static const struct intel_ddi_buf_trans adlp_combo_phy_ddi_translations_edp_up_to_hbr2 = { + .entries = _icl_combo_phy_ddi_translations_edp_hbr2, + .num_entries = ARRAY_SIZE(_icl_combo_phy_ddi_translations_edp_hbr2), +}; - *n_entries = 0; - return NULL; +static const union intel_ddi_buf_trans_entry _adlp_dkl_phy_ddi_translations_dp_hbr[] = { + /* VS pre-emp Non-trans mV Pre-emph dB */ + { .dkl = { 0x7, 0x0, 0x01 } }, /* 0 0 400mV 0 dB */ + { .dkl = { 0x5, 0x0, 0x06 } }, /* 0 1 400mV 3.5 dB */ + { .dkl = { 0x2, 0x0, 0x0B } }, /* 0 2 400mV 6 dB */ + { .dkl = { 0x0, 0x0, 0x17 } }, /* 0 3 400mV 9.5 dB */ + { .dkl = { 0x5, 0x0, 0x00 } }, /* 1 0 600mV 0 dB */ + { .dkl = { 0x2, 0x0, 0x08 } }, /* 1 1 600mV 3.5 dB */ + { .dkl = { 0x0, 0x0, 0x14 } }, /* 1 2 600mV 6 dB */ + { .dkl = { 0x2, 0x0, 0x00 } }, /* 2 0 800mV 0 dB */ + { .dkl = { 0x0, 0x0, 0x0B } }, /* 2 1 800mV 3.5 dB */ + { .dkl = { 0x0, 0x0, 0x00 } }, /* 3 0 1200mV 0 dB */ +}; + +static const struct intel_ddi_buf_trans adlp_dkl_phy_ddi_translations_dp_hbr = { + .entries = _adlp_dkl_phy_ddi_translations_dp_hbr, + .num_entries = ARRAY_SIZE(_adlp_dkl_phy_ddi_translations_dp_hbr), +}; + +static const union intel_ddi_buf_trans_entry _adlp_dkl_phy_ddi_translations_dp_hbr2_hbr3[] = { + /* VS pre-emp Non-trans mV Pre-emph dB */ + { .dkl = { 0x7, 0x0, 0x00 } }, /* 0 0 400mV 0 dB */ + { .dkl = { 0x5, 0x0, 0x04 } }, /* 0 1 400mV 3.5 dB */ + { .dkl = { 0x2, 0x0, 0x0A } }, /* 0 2 400mV 6 dB */ + { .dkl = { 0x0, 0x0, 0x18 } }, /* 0 3 400mV 9.5 dB */ + { .dkl = { 0x5, 0x0, 0x00 } }, /* 1 0 600mV 0 dB */ + { .dkl = { 0x2, 0x0, 0x06 } }, /* 1 1 600mV 3.5 dB */ + { .dkl = { 0x0, 0x0, 0x14 } }, /* 1 2 600mV 6 dB */ + { .dkl = { 0x2, 0x0, 0x00 } }, /* 2 0 800mV 0 dB */ + { .dkl = { 0x0, 0x0, 0x09 } }, /* 2 1 800mV 3.5 dB */ + { .dkl = { 0x0, 0x0, 0x00 } }, /* 3 0 1200mV 0 dB */ +}; + +static const struct intel_ddi_buf_trans adlp_dkl_phy_ddi_translations_dp_hbr2_hbr3 = { + .entries = _adlp_dkl_phy_ddi_translations_dp_hbr2_hbr3, + .num_entries = ARRAY_SIZE(_adlp_dkl_phy_ddi_translations_dp_hbr2_hbr3), +}; + +bool is_hobl_buf_trans(const struct intel_ddi_buf_trans *table) +{ + return table == &tgl_combo_phy_ddi_translations_edp_hbr2_hobl; } -const struct ddi_buf_trans * -intel_ddi_get_buf_trans_hdmi(struct intel_encoder *encoder, - int *n_entries) +static const struct intel_ddi_buf_trans * +intel_get_buf_trans(const struct intel_ddi_buf_trans *ddi_translations, int *num_entries) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + *num_entries = ddi_translations->num_entries; + return ddi_translations; +} - if (DISPLAY_VER(dev_priv) == 9 && !IS_BROXTON(dev_priv)) { - return skl_get_buf_trans_hdmi(dev_priv, n_entries); - } else if (IS_BROADWELL(dev_priv)) { - *n_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); - return bdw_ddi_translations_hdmi; - } else if (IS_HASWELL(dev_priv)) { - *n_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi); - return hsw_ddi_translations_hdmi; - } +static const struct intel_ddi_buf_trans * +hsw_get_buf_trans(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + int *n_entries) +{ + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_ANALOG)) + return intel_get_buf_trans(&hsw_ddi_translations_fdi, n_entries); + else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) + return intel_get_buf_trans(&hsw_ddi_translations_hdmi, n_entries); + else + return intel_get_buf_trans(&hsw_ddi_translations_dp, n_entries); +} - *n_entries = 0; - return NULL; +static const struct intel_ddi_buf_trans * +bdw_get_buf_trans(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + int *n_entries) +{ + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_ANALOG)) + return intel_get_buf_trans(&bdw_ddi_translations_fdi, n_entries); + else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) + return intel_get_buf_trans(&bdw_ddi_translations_hdmi, n_entries); + else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP) && + i915->vbt.edp.low_vswing) + return intel_get_buf_trans(&bdw_ddi_translations_edp, n_entries); + else + return intel_get_buf_trans(&bdw_ddi_translations_dp, n_entries); } -static const struct bxt_ddi_buf_trans * -bxt_get_buf_trans_dp(struct intel_encoder *encoder, int *n_entries) +static int skl_buf_trans_num_entries(enum port port, int n_entries) { - *n_entries = ARRAY_SIZE(bxt_ddi_translations_dp); - return bxt_ddi_translations_dp; + /* Only DDIA and DDIE can select the 10th register with DP */ + if (port == PORT_A || port == PORT_E) + return min(n_entries, 10); + else + return min(n_entries, 9); } -static const struct bxt_ddi_buf_trans * -bxt_get_buf_trans_edp(struct intel_encoder *encoder, int *n_entries) +static const struct intel_ddi_buf_trans * +_skl_get_buf_trans_dp(struct intel_encoder *encoder, + const struct intel_ddi_buf_trans *ddi_translations, + int *n_entries) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + ddi_translations = intel_get_buf_trans(ddi_translations, n_entries); + *n_entries = skl_buf_trans_num_entries(encoder->port, *n_entries); + return ddi_translations; +} - if (dev_priv->vbt.edp.low_vswing) { - *n_entries = ARRAY_SIZE(bxt_ddi_translations_edp); - return bxt_ddi_translations_edp; - } +static const struct intel_ddi_buf_trans * +skl_y_get_buf_trans(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + int *n_entries) +{ + struct drm_i915_private *i915 = to_i915(encoder->base.dev); - return bxt_get_buf_trans_dp(encoder, n_entries); + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) + return intel_get_buf_trans(&skl_y_ddi_translations_hdmi, n_entries); + else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP) && + i915->vbt.edp.low_vswing) + return _skl_get_buf_trans_dp(encoder, &skl_y_ddi_translations_edp, n_entries); + else + return _skl_get_buf_trans_dp(encoder, &skl_y_ddi_translations_dp, n_entries); } -static const struct bxt_ddi_buf_trans * -bxt_get_buf_trans_hdmi(struct intel_encoder *encoder, int *n_entries) +static const struct intel_ddi_buf_trans * +skl_u_get_buf_trans(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + int *n_entries) { - *n_entries = ARRAY_SIZE(bxt_ddi_translations_hdmi); - return bxt_ddi_translations_hdmi; + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) + return intel_get_buf_trans(&skl_ddi_translations_hdmi, n_entries); + else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP) && + i915->vbt.edp.low_vswing) + return _skl_get_buf_trans_dp(encoder, &skl_u_ddi_translations_edp, n_entries); + else + return _skl_get_buf_trans_dp(encoder, &skl_u_ddi_translations_dp, n_entries); } -const struct bxt_ddi_buf_trans * -bxt_get_buf_trans(struct intel_encoder *encoder, +static const struct intel_ddi_buf_trans * +skl_get_buf_trans(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) - return bxt_get_buf_trans_hdmi(encoder, n_entries); - if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) - return bxt_get_buf_trans_edp(encoder, n_entries); - return bxt_get_buf_trans_dp(encoder, n_entries); + return intel_get_buf_trans(&skl_ddi_translations_hdmi, n_entries); + else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP) && + i915->vbt.edp.low_vswing) + return _skl_get_buf_trans_dp(encoder, &skl_ddi_translations_edp, n_entries); + else + return _skl_get_buf_trans_dp(encoder, &skl_ddi_translations_dp, n_entries); } -static const struct cnl_ddi_buf_trans * -cnl_get_buf_trans_hdmi(struct intel_encoder *encoder, int *n_entries) +static const struct intel_ddi_buf_trans * +kbl_y_get_buf_trans(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + int *n_entries) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - u32 voltage = intel_de_read(dev_priv, CNL_PORT_COMP_DW3) & VOLTAGE_INFO_MASK; - - if (voltage == VOLTAGE_INFO_0_85V) { - *n_entries = ARRAY_SIZE(cnl_ddi_translations_hdmi_0_85V); - return cnl_ddi_translations_hdmi_0_85V; - } else if (voltage == VOLTAGE_INFO_0_95V) { - *n_entries = ARRAY_SIZE(cnl_ddi_translations_hdmi_0_95V); - return cnl_ddi_translations_hdmi_0_95V; - } else if (voltage == VOLTAGE_INFO_1_05V) { - *n_entries = ARRAY_SIZE(cnl_ddi_translations_hdmi_1_05V); - return cnl_ddi_translations_hdmi_1_05V; - } else { - *n_entries = 1; /* shut up gcc */ - MISSING_CASE(voltage); - } - return NULL; -} + struct drm_i915_private *i915 = to_i915(encoder->base.dev); -static const struct cnl_ddi_buf_trans * -cnl_get_buf_trans_dp(struct intel_encoder *encoder, int *n_entries) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - u32 voltage = intel_de_read(dev_priv, CNL_PORT_COMP_DW3) & VOLTAGE_INFO_MASK; - - if (voltage == VOLTAGE_INFO_0_85V) { - *n_entries = ARRAY_SIZE(cnl_ddi_translations_dp_0_85V); - return cnl_ddi_translations_dp_0_85V; - } else if (voltage == VOLTAGE_INFO_0_95V) { - *n_entries = ARRAY_SIZE(cnl_ddi_translations_dp_0_95V); - return cnl_ddi_translations_dp_0_95V; - } else if (voltage == VOLTAGE_INFO_1_05V) { - *n_entries = ARRAY_SIZE(cnl_ddi_translations_dp_1_05V); - return cnl_ddi_translations_dp_1_05V; - } else { - *n_entries = 1; /* shut up gcc */ - MISSING_CASE(voltage); - } - return NULL; + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) + return intel_get_buf_trans(&skl_y_ddi_translations_hdmi, n_entries); + else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP) && + i915->vbt.edp.low_vswing) + return _skl_get_buf_trans_dp(encoder, &skl_y_ddi_translations_edp, n_entries); + else + return _skl_get_buf_trans_dp(encoder, &kbl_y_ddi_translations_dp, n_entries); } -static const struct cnl_ddi_buf_trans * -cnl_get_buf_trans_edp(struct intel_encoder *encoder, int *n_entries) +static const struct intel_ddi_buf_trans * +kbl_u_get_buf_trans(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + int *n_entries) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - u32 voltage = intel_de_read(dev_priv, CNL_PORT_COMP_DW3) & VOLTAGE_INFO_MASK; - - if (dev_priv->vbt.edp.low_vswing) { - if (voltage == VOLTAGE_INFO_0_85V) { - *n_entries = ARRAY_SIZE(cnl_ddi_translations_edp_0_85V); - return cnl_ddi_translations_edp_0_85V; - } else if (voltage == VOLTAGE_INFO_0_95V) { - *n_entries = ARRAY_SIZE(cnl_ddi_translations_edp_0_95V); - return cnl_ddi_translations_edp_0_95V; - } else if (voltage == VOLTAGE_INFO_1_05V) { - *n_entries = ARRAY_SIZE(cnl_ddi_translations_edp_1_05V); - return cnl_ddi_translations_edp_1_05V; - } else { - *n_entries = 1; /* shut up gcc */ - MISSING_CASE(voltage); - } - return NULL; - } else { - return cnl_get_buf_trans_dp(encoder, n_entries); - } + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) + return intel_get_buf_trans(&skl_ddi_translations_hdmi, n_entries); + else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP) && + i915->vbt.edp.low_vswing) + return _skl_get_buf_trans_dp(encoder, &skl_u_ddi_translations_edp, n_entries); + else + return _skl_get_buf_trans_dp(encoder, &kbl_u_ddi_translations_dp, n_entries); } -const struct cnl_ddi_buf_trans * -cnl_get_buf_trans(struct intel_encoder *encoder, +static const struct intel_ddi_buf_trans * +kbl_get_buf_trans(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) - return cnl_get_buf_trans_hdmi(encoder, n_entries); - if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) - return cnl_get_buf_trans_edp(encoder, n_entries); - return cnl_get_buf_trans_dp(encoder, n_entries); + return intel_get_buf_trans(&skl_ddi_translations_hdmi, n_entries); + else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP) && + i915->vbt.edp.low_vswing) + return _skl_get_buf_trans_dp(encoder, &skl_ddi_translations_edp, n_entries); + else + return _skl_get_buf_trans_dp(encoder, &kbl_ddi_translations_dp, n_entries); } -static const struct cnl_ddi_buf_trans * -icl_get_combo_buf_trans_hdmi(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - int *n_entries) +static const struct intel_ddi_buf_trans * +bxt_get_buf_trans(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + int *n_entries) { - *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_hdmi); - return icl_combo_phy_ddi_translations_hdmi; + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) + return intel_get_buf_trans(&bxt_ddi_translations_hdmi, n_entries); + else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP) && + i915->vbt.edp.low_vswing) + return intel_get_buf_trans(&bxt_ddi_translations_edp, n_entries); + else + return intel_get_buf_trans(&bxt_ddi_translations_dp, n_entries); } -static const struct cnl_ddi_buf_trans * +static const struct intel_ddi_buf_trans * icl_get_combo_buf_trans_dp(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { - *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_dp_hbr2); - return icl_combo_phy_ddi_translations_dp_hbr2; + return intel_get_buf_trans(&icl_combo_phy_ddi_translations_dp_hbr2_edp_hbr3, + n_entries); } -static const struct cnl_ddi_buf_trans * +static const struct intel_ddi_buf_trans * icl_get_combo_buf_trans_edp(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) @@ -1109,294 +1174,391 @@ icl_get_combo_buf_trans_edp(struct intel_encoder *encoder, struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); if (crtc_state->port_clock > 540000) { - *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_hbr3); - return icl_combo_phy_ddi_translations_edp_hbr3; + return intel_get_buf_trans(&icl_combo_phy_ddi_translations_dp_hbr2_edp_hbr3, + n_entries); } else if (dev_priv->vbt.edp.low_vswing) { - *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_hbr2); - return icl_combo_phy_ddi_translations_edp_hbr2; - } else if (IS_DG1(dev_priv) && crtc_state->port_clock > 270000) { - *n_entries = ARRAY_SIZE(dg1_combo_phy_ddi_translations_dp_hbr2_hbr3); - return dg1_combo_phy_ddi_translations_dp_hbr2_hbr3; - } else if (IS_DG1(dev_priv)) { - *n_entries = ARRAY_SIZE(dg1_combo_phy_ddi_translations_dp_rbr_hbr); - return dg1_combo_phy_ddi_translations_dp_rbr_hbr; + return intel_get_buf_trans(&icl_combo_phy_ddi_translations_edp_hbr2, + n_entries); } return icl_get_combo_buf_trans_dp(encoder, crtc_state, n_entries); } -const struct cnl_ddi_buf_trans * +static const struct intel_ddi_buf_trans * icl_get_combo_buf_trans(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) - return icl_get_combo_buf_trans_hdmi(encoder, crtc_state, n_entries); + return intel_get_buf_trans(&icl_combo_phy_ddi_translations_hdmi, n_entries); else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) return icl_get_combo_buf_trans_edp(encoder, crtc_state, n_entries); else return icl_get_combo_buf_trans_dp(encoder, crtc_state, n_entries); } -static const struct icl_mg_phy_ddi_buf_trans * -icl_get_mg_buf_trans_hdmi(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - int *n_entries) -{ - *n_entries = ARRAY_SIZE(icl_mg_phy_ddi_translations_hdmi); - return icl_mg_phy_ddi_translations_hdmi; -} - -static const struct icl_mg_phy_ddi_buf_trans * +static const struct intel_ddi_buf_trans * icl_get_mg_buf_trans_dp(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { if (crtc_state->port_clock > 270000) { - *n_entries = ARRAY_SIZE(icl_mg_phy_ddi_translations_hbr2_hbr3); - return icl_mg_phy_ddi_translations_hbr2_hbr3; + return intel_get_buf_trans(&icl_mg_phy_ddi_translations_hbr2_hbr3, + n_entries); } else { - *n_entries = ARRAY_SIZE(icl_mg_phy_ddi_translations_rbr_hbr); - return icl_mg_phy_ddi_translations_rbr_hbr; + return intel_get_buf_trans(&icl_mg_phy_ddi_translations_rbr_hbr, + n_entries); } } -const struct icl_mg_phy_ddi_buf_trans * +static const struct intel_ddi_buf_trans * icl_get_mg_buf_trans(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) - return icl_get_mg_buf_trans_hdmi(encoder, crtc_state, n_entries); + return intel_get_buf_trans(&icl_mg_phy_ddi_translations_hdmi, n_entries); else return icl_get_mg_buf_trans_dp(encoder, crtc_state, n_entries); } -static const struct cnl_ddi_buf_trans * -ehl_get_combo_buf_trans_hdmi(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - int *n_entries) +static const struct intel_ddi_buf_trans * +ehl_get_combo_buf_trans_edp(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + int *n_entries) +{ + if (crtc_state->port_clock > 270000) + return intel_get_buf_trans(&ehl_combo_phy_ddi_translations_edp_hbr2, n_entries); + else + return intel_get_buf_trans(&icl_combo_phy_ddi_translations_edp_hbr2, n_entries); +} + +static const struct intel_ddi_buf_trans * +ehl_get_combo_buf_trans(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + int *n_entries) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) + return intel_get_buf_trans(&icl_combo_phy_ddi_translations_hdmi, n_entries); + else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP) && + dev_priv->vbt.edp.low_vswing) + return ehl_get_combo_buf_trans_edp(encoder, crtc_state, n_entries); + else + return intel_get_buf_trans(&ehl_combo_phy_ddi_translations_dp, n_entries); +} + +static const struct intel_ddi_buf_trans * +jsl_get_combo_buf_trans_edp(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + int *n_entries) { - *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_hdmi); - return icl_combo_phy_ddi_translations_hdmi; + if (crtc_state->port_clock > 270000) + return intel_get_buf_trans(&jsl_combo_phy_ddi_translations_edp_hbr2, n_entries); + else + return intel_get_buf_trans(&jsl_combo_phy_ddi_translations_edp_hbr, n_entries); +} + +static const struct intel_ddi_buf_trans * +jsl_get_combo_buf_trans(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + int *n_entries) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) + return intel_get_buf_trans(&icl_combo_phy_ddi_translations_hdmi, n_entries); + else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP) && + dev_priv->vbt.edp.low_vswing) + return jsl_get_combo_buf_trans_edp(encoder, crtc_state, n_entries); + else + return intel_get_buf_trans(&icl_combo_phy_ddi_translations_dp_hbr2_edp_hbr3, n_entries); } -static const struct cnl_ddi_buf_trans * -ehl_get_combo_buf_trans_dp(struct intel_encoder *encoder, +static const struct intel_ddi_buf_trans * +tgl_get_combo_buf_trans_dp(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { - *n_entries = ARRAY_SIZE(ehl_combo_phy_ddi_translations_dp); - return ehl_combo_phy_ddi_translations_dp; + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + + if (crtc_state->port_clock > 270000) { + if (IS_TGL_U(dev_priv) || IS_TGL_Y(dev_priv)) { + return intel_get_buf_trans(&tgl_uy_combo_phy_ddi_translations_dp_hbr2, + n_entries); + } else { + return intel_get_buf_trans(&tgl_combo_phy_ddi_translations_dp_hbr2, + n_entries); + } + } else { + return intel_get_buf_trans(&tgl_combo_phy_ddi_translations_dp_hbr, + n_entries); + } } -static const struct cnl_ddi_buf_trans * -ehl_get_combo_buf_trans_edp(struct intel_encoder *encoder, +static const struct intel_ddi_buf_trans * +tgl_get_combo_buf_trans_edp(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); - if (dev_priv->vbt.edp.low_vswing) { - *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_hbr2); - return icl_combo_phy_ddi_translations_edp_hbr2; + if (crtc_state->port_clock > 540000) { + return intel_get_buf_trans(&icl_combo_phy_ddi_translations_dp_hbr2_edp_hbr3, + n_entries); + } else if (dev_priv->vbt.edp.hobl && !intel_dp->hobl_failed) { + return intel_get_buf_trans(&tgl_combo_phy_ddi_translations_edp_hbr2_hobl, + n_entries); + } else if (dev_priv->vbt.edp.low_vswing) { + return intel_get_buf_trans(&icl_combo_phy_ddi_translations_edp_hbr2, + n_entries); } - return ehl_get_combo_buf_trans_dp(encoder, crtc_state, n_entries); + return tgl_get_combo_buf_trans_dp(encoder, crtc_state, n_entries); } -const struct cnl_ddi_buf_trans * -ehl_get_combo_buf_trans(struct intel_encoder *encoder, +static const struct intel_ddi_buf_trans * +tgl_get_combo_buf_trans(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) - return ehl_get_combo_buf_trans_hdmi(encoder, crtc_state, n_entries); + return intel_get_buf_trans(&icl_combo_phy_ddi_translations_hdmi, n_entries); else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) - return ehl_get_combo_buf_trans_edp(encoder, crtc_state, n_entries); + return tgl_get_combo_buf_trans_edp(encoder, crtc_state, n_entries); else - return ehl_get_combo_buf_trans_dp(encoder, crtc_state, n_entries); + return tgl_get_combo_buf_trans_dp(encoder, crtc_state, n_entries); } -static const struct cnl_ddi_buf_trans * -jsl_get_combo_buf_trans_hdmi(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - int *n_entries) +static const struct intel_ddi_buf_trans * +dg1_get_combo_buf_trans_dp(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + int *n_entries) { - *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_hdmi); - return icl_combo_phy_ddi_translations_hdmi; + if (crtc_state->port_clock > 270000) + return intel_get_buf_trans(&dg1_combo_phy_ddi_translations_dp_hbr2_hbr3, + n_entries); + else + return intel_get_buf_trans(&dg1_combo_phy_ddi_translations_dp_rbr_hbr, + n_entries); } -static const struct cnl_ddi_buf_trans * -jsl_get_combo_buf_trans_dp(struct intel_encoder *encoder, +static const struct intel_ddi_buf_trans * +dg1_get_combo_buf_trans_edp(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + int *n_entries) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + + if (crtc_state->port_clock > 540000) + return intel_get_buf_trans(&icl_combo_phy_ddi_translations_dp_hbr2_edp_hbr3, + n_entries); + else if (dev_priv->vbt.edp.hobl && !intel_dp->hobl_failed) + return intel_get_buf_trans(&tgl_combo_phy_ddi_translations_edp_hbr2_hobl, + n_entries); + else if (dev_priv->vbt.edp.low_vswing) + return intel_get_buf_trans(&icl_combo_phy_ddi_translations_edp_hbr2, + n_entries); + else + return dg1_get_combo_buf_trans_dp(encoder, crtc_state, n_entries); +} + +static const struct intel_ddi_buf_trans * +dg1_get_combo_buf_trans(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + int *n_entries) +{ + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) + return intel_get_buf_trans(&icl_combo_phy_ddi_translations_hdmi, n_entries); + else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) + return dg1_get_combo_buf_trans_edp(encoder, crtc_state, n_entries); + else + return dg1_get_combo_buf_trans_dp(encoder, crtc_state, n_entries); +} + +static const struct intel_ddi_buf_trans * +rkl_get_combo_buf_trans_dp(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { - *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_dp_hbr2); - return icl_combo_phy_ddi_translations_dp_hbr2; + if (crtc_state->port_clock > 270000) + return intel_get_buf_trans(&rkl_combo_phy_ddi_translations_dp_hbr2_hbr3, n_entries); + else + return intel_get_buf_trans(&rkl_combo_phy_ddi_translations_dp_hbr, n_entries); } -static const struct cnl_ddi_buf_trans * -jsl_get_combo_buf_trans_edp(struct intel_encoder *encoder, +static const struct intel_ddi_buf_trans * +rkl_get_combo_buf_trans_edp(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); - if (dev_priv->vbt.edp.low_vswing) { - if (crtc_state->port_clock > 270000) { - *n_entries = ARRAY_SIZE(jsl_combo_phy_ddi_translations_edp_hbr2); - return jsl_combo_phy_ddi_translations_edp_hbr2; - } else { - *n_entries = ARRAY_SIZE(jsl_combo_phy_ddi_translations_edp_hbr); - return jsl_combo_phy_ddi_translations_edp_hbr; - } + if (crtc_state->port_clock > 540000) { + return intel_get_buf_trans(&icl_combo_phy_ddi_translations_dp_hbr2_edp_hbr3, + n_entries); + } else if (dev_priv->vbt.edp.hobl && !intel_dp->hobl_failed) { + return intel_get_buf_trans(&tgl_combo_phy_ddi_translations_edp_hbr2_hobl, + n_entries); + } else if (dev_priv->vbt.edp.low_vswing) { + return intel_get_buf_trans(&icl_combo_phy_ddi_translations_edp_hbr2, + n_entries); } - return jsl_get_combo_buf_trans_dp(encoder, crtc_state, n_entries); + return rkl_get_combo_buf_trans_dp(encoder, crtc_state, n_entries); } -const struct cnl_ddi_buf_trans * -jsl_get_combo_buf_trans(struct intel_encoder *encoder, +static const struct intel_ddi_buf_trans * +rkl_get_combo_buf_trans(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) - return jsl_get_combo_buf_trans_hdmi(encoder, crtc_state, n_entries); + return intel_get_buf_trans(&icl_combo_phy_ddi_translations_hdmi, n_entries); else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) - return jsl_get_combo_buf_trans_edp(encoder, crtc_state, n_entries); + return rkl_get_combo_buf_trans_edp(encoder, crtc_state, n_entries); else - return jsl_get_combo_buf_trans_dp(encoder, crtc_state, n_entries); + return rkl_get_combo_buf_trans_dp(encoder, crtc_state, n_entries); } -static const struct cnl_ddi_buf_trans * -tgl_get_combo_buf_trans_hdmi(struct intel_encoder *encoder, +static const struct intel_ddi_buf_trans * +adls_get_combo_buf_trans_dp(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + int *n_entries) +{ + if (crtc_state->port_clock > 270000) + return intel_get_buf_trans(&adls_combo_phy_ddi_translations_dp_hbr2_hbr3, n_entries); + else + return intel_get_buf_trans(&tgl_combo_phy_ddi_translations_dp_hbr, n_entries); +} + +static const struct intel_ddi_buf_trans * +adls_get_combo_buf_trans_edp(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { - *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_hdmi); - return icl_combo_phy_ddi_translations_hdmi; + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + + if (crtc_state->port_clock > 540000) + return intel_get_buf_trans(&adls_combo_phy_ddi_translations_edp_hbr3, n_entries); + else if (i915->vbt.edp.hobl && !intel_dp->hobl_failed) + return intel_get_buf_trans(&tgl_combo_phy_ddi_translations_edp_hbr2_hobl, n_entries); + else if (i915->vbt.edp.low_vswing) + return intel_get_buf_trans(&adls_combo_phy_ddi_translations_edp_hbr2, n_entries); + else + return adls_get_combo_buf_trans_dp(encoder, crtc_state, n_entries); } -static const struct cnl_ddi_buf_trans * -tgl_get_combo_buf_trans_dp(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - int *n_entries) +static const struct intel_ddi_buf_trans * +adls_get_combo_buf_trans(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + int *n_entries) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - - if (crtc_state->port_clock > 270000) { - if (IS_ROCKETLAKE(dev_priv)) { - *n_entries = ARRAY_SIZE(rkl_combo_phy_ddi_translations_dp_hbr2_hbr3); - return rkl_combo_phy_ddi_translations_dp_hbr2_hbr3; - } else if (IS_TGL_U(dev_priv) || IS_TGL_Y(dev_priv)) { - *n_entries = ARRAY_SIZE(tgl_uy_combo_phy_ddi_translations_dp_hbr2); - return tgl_uy_combo_phy_ddi_translations_dp_hbr2; - } else { - *n_entries = ARRAY_SIZE(tgl_combo_phy_ddi_translations_dp_hbr2); - return tgl_combo_phy_ddi_translations_dp_hbr2; - } - } else { - if (IS_ROCKETLAKE(dev_priv)) { - *n_entries = ARRAY_SIZE(rkl_combo_phy_ddi_translations_dp_hbr); - return rkl_combo_phy_ddi_translations_dp_hbr; - } else { - *n_entries = ARRAY_SIZE(tgl_combo_phy_ddi_translations_dp_hbr); - return tgl_combo_phy_ddi_translations_dp_hbr; - } - } + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) + return intel_get_buf_trans(&icl_combo_phy_ddi_translations_hdmi, n_entries); + else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) + return adls_get_combo_buf_trans_edp(encoder, crtc_state, n_entries); + else + return adls_get_combo_buf_trans_dp(encoder, crtc_state, n_entries); } -static const struct cnl_ddi_buf_trans * -tgl_get_combo_buf_trans_edp(struct intel_encoder *encoder, +static const struct intel_ddi_buf_trans * +adlp_get_combo_buf_trans_dp(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { + if (crtc_state->port_clock > 270000) + return intel_get_buf_trans(&adlp_combo_phy_ddi_translations_dp_hbr2_hbr3, n_entries); + else + return intel_get_buf_trans(&adlp_combo_phy_ddi_translations_dp_hbr, n_entries); +} + +static const struct intel_ddi_buf_trans * +adlp_get_combo_buf_trans_edp(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + int *n_entries) +{ struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dp *intel_dp = enc_to_intel_dp(encoder); if (crtc_state->port_clock > 540000) { - *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_hbr3); - return icl_combo_phy_ddi_translations_edp_hbr3; + return intel_get_buf_trans(&adlp_combo_phy_ddi_translations_edp_hbr3, + n_entries); } else if (dev_priv->vbt.edp.hobl && !intel_dp->hobl_failed) { - *n_entries = ARRAY_SIZE(tgl_combo_phy_ddi_translations_edp_hbr2_hobl); - return tgl_combo_phy_ddi_translations_edp_hbr2_hobl; + return intel_get_buf_trans(&tgl_combo_phy_ddi_translations_edp_hbr2_hobl, + n_entries); } else if (dev_priv->vbt.edp.low_vswing) { - *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_hbr2); - return icl_combo_phy_ddi_translations_edp_hbr2; + return intel_get_buf_trans(&adlp_combo_phy_ddi_translations_edp_up_to_hbr2, + n_entries); } - return tgl_get_combo_buf_trans_dp(encoder, crtc_state, n_entries); + return adlp_get_combo_buf_trans_dp(encoder, crtc_state, n_entries); } -const struct cnl_ddi_buf_trans * -tgl_get_combo_buf_trans(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - int *n_entries) +static const struct intel_ddi_buf_trans * +adlp_get_combo_buf_trans(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + int *n_entries) { if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) - return tgl_get_combo_buf_trans_hdmi(encoder, crtc_state, n_entries); + return intel_get_buf_trans(&adlp_combo_phy_ddi_translations_hdmi, n_entries); else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) - return tgl_get_combo_buf_trans_edp(encoder, crtc_state, n_entries); + return adlp_get_combo_buf_trans_edp(encoder, crtc_state, n_entries); else - return tgl_get_combo_buf_trans_dp(encoder, crtc_state, n_entries); + return adlp_get_combo_buf_trans_dp(encoder, crtc_state, n_entries); } -static const struct tgl_dkl_phy_ddi_buf_trans * -tgl_get_dkl_buf_trans_hdmi(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - int *n_entries) -{ - *n_entries = ARRAY_SIZE(tgl_dkl_phy_hdmi_ddi_trans); - return tgl_dkl_phy_hdmi_ddi_trans; -} - -static const struct tgl_dkl_phy_ddi_buf_trans * +static const struct intel_ddi_buf_trans * tgl_get_dkl_buf_trans_dp(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { if (crtc_state->port_clock > 270000) { - *n_entries = ARRAY_SIZE(tgl_dkl_phy_dp_ddi_trans_hbr2); - return tgl_dkl_phy_dp_ddi_trans_hbr2; + return intel_get_buf_trans(&tgl_dkl_phy_ddi_translations_dp_hbr2, + n_entries); } else { - *n_entries = ARRAY_SIZE(tgl_dkl_phy_dp_ddi_trans); - return tgl_dkl_phy_dp_ddi_trans; + return intel_get_buf_trans(&tgl_dkl_phy_ddi_translations_dp_hbr, + n_entries); } } -const struct tgl_dkl_phy_ddi_buf_trans * +static const struct intel_ddi_buf_trans * tgl_get_dkl_buf_trans(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) - return tgl_get_dkl_buf_trans_hdmi(encoder, crtc_state, n_entries); + return intel_get_buf_trans(&tgl_dkl_phy_ddi_translations_hdmi, n_entries); else return tgl_get_dkl_buf_trans_dp(encoder, crtc_state, n_entries); } -static const struct tgl_dkl_phy_ddi_buf_trans * +static const struct intel_ddi_buf_trans * adlp_get_dkl_buf_trans_dp(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { if (crtc_state->port_clock > 270000) { - *n_entries = ARRAY_SIZE(adlp_dkl_phy_dp_ddi_trans_hbr2_hbr3); - return adlp_dkl_phy_dp_ddi_trans_hbr2_hbr3; + return intel_get_buf_trans(&adlp_dkl_phy_ddi_translations_dp_hbr2_hbr3, + n_entries); + } else { + return intel_get_buf_trans(&adlp_dkl_phy_ddi_translations_dp_hbr, + n_entries); } - - *n_entries = ARRAY_SIZE(adlp_dkl_phy_dp_ddi_trans_hbr); - return adlp_dkl_phy_dp_ddi_trans_hbr; } -const struct tgl_dkl_phy_ddi_buf_trans * +static const struct intel_ddi_buf_trans * adlp_get_dkl_buf_trans(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) - return tgl_get_dkl_buf_trans_hdmi(encoder, crtc_state, n_entries); + return intel_get_buf_trans(&tgl_dkl_phy_ddi_translations_hdmi, n_entries); else return adlp_get_dkl_buf_trans_dp(encoder, crtc_state, n_entries); } @@ -1406,43 +1568,68 @@ int intel_ddi_hdmi_num_entries(struct intel_encoder *encoder, int *default_entry) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum phy phy = intel_port_to_phy(dev_priv, encoder->port); + const struct intel_ddi_buf_trans *ddi_translations; int n_entries; - if (DISPLAY_VER(dev_priv) >= 12) { - if (intel_phy_is_combo(dev_priv, phy)) - tgl_get_combo_buf_trans_hdmi(encoder, crtc_state, &n_entries); - else - tgl_get_dkl_buf_trans_hdmi(encoder, crtc_state, &n_entries); - *default_entry = n_entries - 1; - } else if (DISPLAY_VER(dev_priv) == 11) { - if (intel_phy_is_combo(dev_priv, phy)) - icl_get_combo_buf_trans_hdmi(encoder, crtc_state, &n_entries); - else - icl_get_mg_buf_trans_hdmi(encoder, crtc_state, &n_entries); - *default_entry = n_entries - 1; - } else if (IS_CANNONLAKE(dev_priv)) { - cnl_get_buf_trans_hdmi(encoder, &n_entries); - *default_entry = n_entries - 1; - } else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { - bxt_get_buf_trans_hdmi(encoder, &n_entries); - *default_entry = n_entries - 1; - } else if (DISPLAY_VER(dev_priv) == 9) { - intel_ddi_get_buf_trans_hdmi(encoder, &n_entries); - *default_entry = 8; - } else if (IS_BROADWELL(dev_priv)) { - intel_ddi_get_buf_trans_hdmi(encoder, &n_entries); - *default_entry = 7; - } else if (IS_HASWELL(dev_priv)) { - intel_ddi_get_buf_trans_hdmi(encoder, &n_entries); - *default_entry = 6; - } else { - drm_WARN(&dev_priv->drm, 1, "ddi translation table missing\n"); + ddi_translations = encoder->get_buf_trans(encoder, crtc_state, &n_entries); + + if (drm_WARN_ON(&dev_priv->drm, !ddi_translations)) { + *default_entry = 0; return 0; } - if (drm_WARN_ON_ONCE(&dev_priv->drm, n_entries == 0)) - return 0; + *default_entry = ddi_translations->hdmi_default_entry; return n_entries; } + +void intel_ddi_buf_trans_init(struct intel_encoder *encoder) +{ + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + enum phy phy = intel_port_to_phy(i915, encoder->port); + + if (IS_ALDERLAKE_P(i915)) { + if (intel_phy_is_combo(i915, phy)) + encoder->get_buf_trans = adlp_get_combo_buf_trans; + else + encoder->get_buf_trans = adlp_get_dkl_buf_trans; + } else if (IS_ALDERLAKE_S(i915)) { + encoder->get_buf_trans = adls_get_combo_buf_trans; + } else if (IS_ROCKETLAKE(i915)) { + encoder->get_buf_trans = rkl_get_combo_buf_trans; + } else if (IS_DG1(i915)) { + encoder->get_buf_trans = dg1_get_combo_buf_trans; + } else if (DISPLAY_VER(i915) >= 12) { + if (intel_phy_is_combo(i915, phy)) + encoder->get_buf_trans = tgl_get_combo_buf_trans; + else + encoder->get_buf_trans = tgl_get_dkl_buf_trans; + } else if (DISPLAY_VER(i915) == 11) { + if (IS_PLATFORM(i915, INTEL_JASPERLAKE)) + encoder->get_buf_trans = jsl_get_combo_buf_trans; + else if (IS_PLATFORM(i915, INTEL_ELKHARTLAKE)) + encoder->get_buf_trans = ehl_get_combo_buf_trans; + else if (intel_phy_is_combo(i915, phy)) + encoder->get_buf_trans = icl_get_combo_buf_trans; + else + encoder->get_buf_trans = icl_get_mg_buf_trans; + } else if (IS_GEMINILAKE(i915) || IS_BROXTON(i915)) { + encoder->get_buf_trans = bxt_get_buf_trans; + } else if (IS_CML_ULX(i915) || IS_CFL_ULX(i915) || IS_KBL_ULX(i915)) { + encoder->get_buf_trans = kbl_y_get_buf_trans; + } else if (IS_CML_ULT(i915) || IS_CFL_ULT(i915) || IS_KBL_ULT(i915)) { + encoder->get_buf_trans = kbl_u_get_buf_trans; + } else if (IS_COMETLAKE(i915) || IS_COFFEELAKE(i915) || IS_KABYLAKE(i915)) { + encoder->get_buf_trans = kbl_get_buf_trans; + } else if (IS_SKL_ULX(i915)) { + encoder->get_buf_trans = skl_y_get_buf_trans; + } else if (IS_SKL_ULT(i915)) { + encoder->get_buf_trans = skl_u_get_buf_trans; + } else if (IS_SKYLAKE(i915)) { + encoder->get_buf_trans = skl_get_buf_trans; + } else if (IS_BROADWELL(i915)) { + encoder->get_buf_trans = bdw_get_buf_trans; + } else { + encoder->get_buf_trans = hsw_get_buf_trans; + } +} diff --git a/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.h b/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.h index 4c2efab38642..2acd720f9d4f 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.h +++ b/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.h @@ -12,7 +12,7 @@ struct drm_i915_private; struct intel_encoder; struct intel_crtc_state; -struct ddi_buf_trans { +struct hsw_ddi_buf_trans { u32 trans1; /* balance leg enable, de-emph level */ u32 trans2; /* vref sel, vswing */ u8 i_boost; /* SKL: I_boost; valid: 0x0, 0x1, 0x3, 0x7 */ @@ -25,7 +25,7 @@ struct bxt_ddi_buf_trans { u8 deemphasis; }; -struct cnl_ddi_buf_trans { +struct icl_ddi_buf_trans { u8 dw2_swing_sel; u8 dw7_n_scalar; u8 dw4_cursor_coeff; @@ -45,60 +45,26 @@ struct tgl_dkl_phy_ddi_buf_trans { u32 dkl_de_emphasis_control; }; -bool is_hobl_buf_trans(const struct cnl_ddi_buf_trans *table); +union intel_ddi_buf_trans_entry { + struct hsw_ddi_buf_trans hsw; + struct bxt_ddi_buf_trans bxt; + struct icl_ddi_buf_trans icl; + struct icl_mg_phy_ddi_buf_trans mg; + struct tgl_dkl_phy_ddi_buf_trans dkl; +}; + +struct intel_ddi_buf_trans { + const union intel_ddi_buf_trans_entry *entries; + u8 num_entries; + u8 hdmi_default_entry; +}; + +bool is_hobl_buf_trans(const struct intel_ddi_buf_trans *table); int intel_ddi_hdmi_num_entries(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *default_entry); -const struct ddi_buf_trans * -intel_ddi_get_buf_trans_edp(struct intel_encoder *encoder, int *n_entries); -const struct ddi_buf_trans * -intel_ddi_get_buf_trans_fdi(struct drm_i915_private *dev_priv, - int *n_entries); -const struct ddi_buf_trans * -intel_ddi_get_buf_trans_hdmi(struct intel_encoder *encoder, - int *n_entries); -const struct ddi_buf_trans * -intel_ddi_get_buf_trans_dp(struct intel_encoder *encoder, int *n_entries); - -const struct bxt_ddi_buf_trans * -bxt_get_buf_trans(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - int *n_entries); - -const struct tgl_dkl_phy_ddi_buf_trans * -adlp_get_dkl_buf_trans(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - int *n_entries); -const struct cnl_ddi_buf_trans * -tgl_get_combo_buf_trans(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - int *n_entries); -const struct tgl_dkl_phy_ddi_buf_trans * -tgl_get_dkl_buf_trans(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - int *n_entries); -const struct cnl_ddi_buf_trans * -jsl_get_combo_buf_trans(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - int *n_entries); -const struct cnl_ddi_buf_trans * -ehl_get_combo_buf_trans(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - int *n_entries); -const struct cnl_ddi_buf_trans * -icl_get_combo_buf_trans(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - int *n_entries); -const struct icl_mg_phy_ddi_buf_trans * -icl_get_mg_buf_trans(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - int *n_entries); - -const struct cnl_ddi_buf_trans * -cnl_get_buf_trans(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - int *n_entries); +void intel_ddi_buf_trans_init(struct intel_encoder *encoder); #endif diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 0a8a2395c8ac..134a6acbd8fb 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -59,6 +59,7 @@ #include "display/intel_hdmi.h" #include "display/intel_lvds.h" #include "display/intel_sdvo.h" +#include "display/intel_snps_phy.h" #include "display/intel_tv.h" #include "display/intel_vdsc.h" #include "display/intel_vrr.h" @@ -975,7 +976,7 @@ void intel_enable_pipe(const struct intel_crtc_state *new_crtc_state) /* FIXME: assert CPU port conditions for SNB+ */ } - /* Wa_22012358565:adlp */ + /* Wa_22012358565:adl-p */ if (DISPLAY_VER(dev_priv) == 13) intel_de_rmw(dev_priv, PIPE_ARB_CTL(pipe), 0, PIPE_ARB_USE_PROG_SLOTS); @@ -1035,6 +1036,10 @@ void intel_disable_pipe(const struct intel_crtc_state *old_crtc_state) if (!IS_I830(dev_priv)) val &= ~PIPECONF_ENABLE; + if (DISPLAY_VER(dev_priv) >= 12) + intel_de_rmw(dev_priv, CHICKEN_TRANS(cpu_transcoder), + FECSTALL_DIS_DPTSTREAM_DPTTG, 0); + intel_de_write(dev_priv, reg, val); if ((val & PIPECONF_ENABLE) == 0) intel_wait_for_pipe_off(old_crtc_state); @@ -1331,6 +1336,9 @@ retry: ret = i915_gem_object_lock(obj, &ww); if (!ret && phys_cursor) ret = i915_gem_object_attach_phys(obj, alignment); + else if (!ret && HAS_LMEM(dev_priv)) + ret = i915_gem_object_migrate(obj, &ww, INTEL_REGION_LMEM); + /* TODO: Do we need to sync when migration becomes async? */ if (!ret) ret = i915_gem_object_pin_pages(obj); if (ret) @@ -1914,20 +1922,50 @@ static void intel_dpt_unpin(struct i915_address_space *vm) i915_vma_put(dpt->vma); } +static bool +intel_reuse_initial_plane_obj(struct drm_i915_private *i915, + const struct intel_initial_plane_config *plane_config, + struct drm_framebuffer **fb, + struct i915_vma **vma) +{ + struct intel_crtc *crtc; + + for_each_intel_crtc(&i915->drm, crtc) { + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + struct intel_plane *plane = + to_intel_plane(crtc->base.primary); + struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); + + if (!crtc_state->uapi.active) + continue; + + if (!plane_state->ggtt_vma) + continue; + + if (intel_plane_ggtt_offset(plane_state) == plane_config->base) { + *fb = plane_state->hw.fb; + *vma = plane_state->ggtt_vma; + return true; + } + } + + return false; +} + static void -intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, +intel_find_initial_plane_obj(struct intel_crtc *crtc, struct intel_initial_plane_config *plane_config) { - struct drm_device *dev = intel_crtc->base.dev; + struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_crtc *c; - struct drm_plane *primary = intel_crtc->base.primary; - struct drm_plane_state *plane_state = primary->state; - struct intel_plane *intel_plane = to_intel_plane(primary); - struct intel_plane_state *intel_state = - to_intel_plane_state(plane_state); struct intel_crtc_state *crtc_state = - to_intel_crtc_state(intel_crtc->base.state); + to_intel_crtc_state(crtc->base.state); + struct intel_plane *plane = + to_intel_plane(crtc->base.primary); + struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); struct drm_framebuffer *fb; struct i915_vma *vma; @@ -1939,7 +1977,7 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, if (!plane_config->fb) return; - if (intel_alloc_initial_plane_obj(intel_crtc, plane_config)) { + if (intel_alloc_initial_plane_obj(crtc, plane_config)) { fb = &plane_config->fb->base; vma = plane_config->vma; goto valid_fb; @@ -1949,25 +1987,8 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, * Failed to alloc the obj, check to see if we should share * an fb with another CRTC instead */ - for_each_crtc(dev, c) { - struct intel_plane_state *state; - - if (c == &intel_crtc->base) - continue; - - if (!to_intel_crtc_state(c->state)->uapi.active) - continue; - - state = to_intel_plane_state(c->primary->state); - if (!state->ggtt_vma) - continue; - - if (intel_plane_ggtt_offset(state) == plane_config->base) { - fb = state->hw.fb; - vma = state->ggtt_vma; - goto valid_fb; - } - } + if (intel_reuse_initial_plane_obj(dev_priv, plane_config, &fb, &vma)) + goto valid_fb; /* * We've failed to reconstruct the BIOS FB. Current display state @@ -1976,7 +1997,7 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, * simplest solution is to just disable the primary plane now and * pretend the BIOS never had it enabled. */ - intel_plane_disable_noatomic(intel_crtc, intel_plane); + intel_plane_disable_noatomic(crtc, plane); if (crtc_state->bigjoiner) { struct intel_crtc *slave = crtc_state->bigjoiner_linked_crtc; @@ -1986,40 +2007,38 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, return; valid_fb: - plane_state->rotation = plane_config->rotation; - intel_fb_fill_view(to_intel_framebuffer(fb), plane_state->rotation, - &intel_state->view); + plane_state->uapi.rotation = plane_config->rotation; + intel_fb_fill_view(to_intel_framebuffer(fb), + plane_state->uapi.rotation, &plane_state->view); __i915_vma_pin(vma); - intel_state->ggtt_vma = i915_vma_get(vma); - if (intel_plane_uses_fence(intel_state) && i915_vma_pin_fence(vma) == 0) - if (vma->fence) - intel_state->flags |= PLANE_HAS_FENCE; + plane_state->ggtt_vma = i915_vma_get(vma); + if (intel_plane_uses_fence(plane_state) && + i915_vma_pin_fence(vma) == 0 && vma->fence) + plane_state->flags |= PLANE_HAS_FENCE; - plane_state->src_x = 0; - plane_state->src_y = 0; - plane_state->src_w = fb->width << 16; - plane_state->src_h = fb->height << 16; + plane_state->uapi.src_x = 0; + plane_state->uapi.src_y = 0; + plane_state->uapi.src_w = fb->width << 16; + plane_state->uapi.src_h = fb->height << 16; - plane_state->crtc_x = 0; - plane_state->crtc_y = 0; - plane_state->crtc_w = fb->width; - plane_state->crtc_h = fb->height; + plane_state->uapi.crtc_x = 0; + plane_state->uapi.crtc_y = 0; + plane_state->uapi.crtc_w = fb->width; + plane_state->uapi.crtc_h = fb->height; if (plane_config->tiling) dev_priv->preserve_bios_swizzle = true; - plane_state->fb = fb; + plane_state->uapi.fb = fb; drm_framebuffer_get(fb); - plane_state->crtc = &intel_crtc->base; - intel_plane_copy_uapi_to_hw_state(intel_state, intel_state, - intel_crtc); + plane_state->uapi.crtc = &crtc->base; + intel_plane_copy_uapi_to_hw_state(plane_state, plane_state, crtc); intel_frontbuffer_flush(to_intel_frontbuffer(fb), ORIGIN_DIRTYFB); - atomic_or(to_intel_plane(primary)->frontbuffer_bit, - &to_intel_frontbuffer(fb)->bits); + atomic_or(plane->frontbuffer_bit, &to_intel_frontbuffer(fb)->bits); } unsigned int @@ -2193,8 +2212,29 @@ unlock: clear_bit_unlock(I915_RESET_MODESET, &dev_priv->gt.reset.flags); } -static void icl_set_pipe_chicken(struct intel_crtc *crtc) +static bool underrun_recovery_supported(const struct intel_crtc_state *crtc_state) { + if (crtc_state->pch_pfit.enabled && + (crtc_state->pipe_src_w > drm_rect_width(&crtc_state->pch_pfit.dst) || + crtc_state->pipe_src_h > drm_rect_height(&crtc_state->pch_pfit.dst) || + crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420)) + return false; + + if (crtc_state->dsc.compression_enable) + return false; + + if (crtc_state->has_psr2) + return false; + + if (crtc_state->splitter.enable) + return false; + + return true; +} + +static void icl_set_pipe_chicken(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; u32 tmp; @@ -2215,19 +2255,19 @@ static void icl_set_pipe_chicken(struct intel_crtc *crtc) */ tmp |= PIXEL_ROUNDING_TRUNC_FB_PASSTHRU; - /* - * "The underrun recovery mechanism should be disabled - * when the following is enabled for this pipe: - * WiDi - * Downscaling (this includes YUV420 fullblend) - * COG - * DSC - * PSR2" - * - * FIXME: enable whenever possible... - */ - if (IS_ALDERLAKE_P(dev_priv)) - tmp |= UNDERRUN_RECOVERY_DISABLE; + if (IS_DG2(dev_priv)) { + /* + * Underrun recovery must always be disabled on DG2. However + * the chicken bit meaning is inverted compared to other + * platforms. + */ + tmp &= ~UNDERRUN_RECOVERY_ENABLE_DG2; + } else if (DISPLAY_VER(dev_priv) >= 13) { + if (underrun_recovery_supported(crtc_state)) + tmp &= ~UNDERRUN_RECOVERY_DISABLE_ADLP; + else + tmp |= UNDERRUN_RECOVERY_DISABLE_ADLP; + } intel_de_write(dev_priv, PIPE_CHICKEN(pipe), tmp); } @@ -2706,10 +2746,10 @@ void hsw_disable_ips(const struct intel_crtc_state *crtc_state) intel_wait_for_vblank(dev_priv, crtc->pipe); } -static void intel_crtc_dpms_overlay_disable(struct intel_crtc *intel_crtc) +static void intel_crtc_dpms_overlay_disable(struct intel_crtc *crtc) { - if (intel_crtc->overlay) - (void) intel_overlay_switch_off(intel_crtc->overlay); + if (crtc->overlay) + (void) intel_overlay_switch_off(crtc->overlay); /* Let userspace switch the overlay on again. In most cases userspace * has to recompute where to put it anyway. @@ -3177,6 +3217,28 @@ static void intel_encoders_enable(struct intel_atomic_state *state, } } +static void intel_encoders_pre_disable(struct intel_atomic_state *state, + struct intel_crtc *crtc) +{ + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); + const struct drm_connector_state *old_conn_state; + struct drm_connector *conn; + int i; + + for_each_old_connector_in_state(&state->base, conn, old_conn_state, i) { + struct intel_encoder *encoder = + to_intel_encoder(old_conn_state->best_encoder); + + if (old_conn_state->crtc != &crtc->base) + continue; + + if (encoder->pre_disable) + encoder->pre_disable(state, encoder, old_crtc_state, + old_conn_state); + } +} + static void intel_encoders_disable(struct intel_atomic_state *state, struct intel_crtc *crtc) { @@ -3386,13 +3448,17 @@ static void glk_pipe_scaler_clock_gating_wa(struct drm_i915_private *dev_priv, intel_de_write(dev_priv, CLKGATE_DIS_PSL(pipe), val); } -static void icl_pipe_mbus_enable(struct intel_crtc *crtc) +static void icl_pipe_mbus_enable(struct intel_crtc *crtc, bool joined_mbus) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; u32 val; - val = MBUS_DBOX_A_CREDIT(2); + /* Wa_22010947358:adl-p */ + if (IS_ALDERLAKE_P(dev_priv)) + val = joined_mbus ? MBUS_DBOX_A_CREDIT(6) : MBUS_DBOX_A_CREDIT(4); + else + val = MBUS_DBOX_A_CREDIT(2); if (DISPLAY_VER(dev_priv) >= 12) { val |= MBUS_DBOX_BW_CREDIT(2); @@ -3460,7 +3526,8 @@ static void icl_ddi_bigjoiner_pre_enable(struct intel_atomic_state *state, * Enable sequence steps 1-7 on bigjoiner master */ intel_encoders_pre_pll_enable(state, master); - intel_enable_shared_dpll(master_crtc_state); + if (master_crtc_state->shared_dpll) + intel_enable_shared_dpll(master_crtc_state); intel_encoders_pre_enable(state, master); /* and DSC on slave */ @@ -3518,7 +3585,7 @@ static void hsw_crtc_enable(struct intel_atomic_state *state, crtc->active = true; - /* Display WA #1180: WaDisableScalarClockGating: glk, cnl */ + /* Display WA #1180: WaDisableScalarClockGating: glk */ psl_clkgate_wa = DISPLAY_VER(dev_priv) == 10 && new_crtc_state->pch_pfit.enabled; if (psl_clkgate_wa) @@ -3542,13 +3609,17 @@ static void hsw_crtc_enable(struct intel_atomic_state *state, hsw_set_linetime_wm(new_crtc_state); if (DISPLAY_VER(dev_priv) >= 11) - icl_set_pipe_chicken(crtc); + icl_set_pipe_chicken(new_crtc_state); if (dev_priv->display.initial_watermarks) dev_priv->display.initial_watermarks(state, crtc); - if (DISPLAY_VER(dev_priv) >= 11) - icl_pipe_mbus_enable(crtc); + if (DISPLAY_VER(dev_priv) >= 11) { + const struct intel_dbuf_state *dbuf_state = + intel_atomic_get_new_dbuf_state(state); + + icl_pipe_mbus_enable(crtc, dbuf_state->joined_mbus); + } if (new_crtc_state->bigjoiner_slave) intel_crtc_vblank_on(new_crtc_state); @@ -3682,6 +3753,13 @@ bool intel_phy_is_combo(struct drm_i915_private *dev_priv, enum phy phy) { if (phy == PHY_NONE) return false; + else if (IS_DG2(dev_priv)) + /* + * DG2 outputs labelled as "combo PHY" in the bspec use + * SNPS PHYs with completely different programming, + * hence we always return false here. + */ + return false; else if (IS_ALDERLAKE_S(dev_priv)) return phy <= PHY_E; else if (IS_DG1(dev_priv) || IS_ROCKETLAKE(dev_priv)) @@ -3696,7 +3774,10 @@ bool intel_phy_is_combo(struct drm_i915_private *dev_priv, enum phy phy) bool intel_phy_is_tc(struct drm_i915_private *dev_priv, enum phy phy) { - if (IS_ALDERLAKE_P(dev_priv)) + if (IS_DG2(dev_priv)) + /* DG2's "TC1" output uses a SNPS PHY */ + return false; + else if (IS_ALDERLAKE_P(dev_priv)) return phy >= PHY_F && phy <= PHY_I; else if (IS_TIGERLAKE(dev_priv)) return phy >= PHY_D && phy <= PHY_I; @@ -3706,6 +3787,20 @@ bool intel_phy_is_tc(struct drm_i915_private *dev_priv, enum phy phy) return false; } +bool intel_phy_is_snps(struct drm_i915_private *dev_priv, enum phy phy) +{ + if (phy == PHY_NONE) + return false; + else if (IS_DG2(dev_priv)) + /* + * All four "combo" ports and the TC1 port (PHY E) use + * Synopsis PHYs. + */ + return phy <= PHY_E; + + return false; +} + enum phy intel_port_to_phy(struct drm_i915_private *i915, enum port port) { if (DISPLAY_VER(i915) >= 13 && port >= PORT_D_XELPD) @@ -3850,7 +3945,7 @@ static u64 get_crtc_power_domains(struct intel_crtc_state *crtc_state) } if (HAS_DDI(dev_priv) && crtc_state->has_audio) - mask |= BIT_ULL(POWER_DOMAIN_AUDIO); + mask |= BIT_ULL(POWER_DOMAIN_AUDIO_MMIO); if (crtc_state->shared_dpll) mask |= BIT_ULL(POWER_DOMAIN_DISPLAY_CORE); @@ -6487,23 +6582,21 @@ int intel_get_load_detect_pipe(struct drm_connector *connector, struct intel_load_detect_pipe *old, struct drm_modeset_acquire_ctx *ctx) { - struct intel_crtc *intel_crtc; - struct intel_encoder *intel_encoder = + struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector)); - struct drm_crtc *possible_crtc; - struct drm_encoder *encoder = &intel_encoder->base; - struct drm_crtc *crtc = NULL; - struct drm_device *dev = encoder->dev; + struct intel_crtc *possible_crtc; + struct intel_crtc *crtc = NULL; + struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct drm_mode_config *config = &dev->mode_config; struct drm_atomic_state *state = NULL, *restore_state = NULL; struct drm_connector_state *connector_state; struct intel_crtc_state *crtc_state; - int ret, i = -1; + int ret; drm_dbg_kms(&dev_priv->drm, "[CONNECTOR:%d:%s], [ENCODER:%d:%s]\n", connector->base.id, connector->name, - encoder->base.id, encoder->name); + encoder->base.base.id, encoder->base.name); old->restore_state = NULL; @@ -6521,9 +6614,9 @@ int intel_get_load_detect_pipe(struct drm_connector *connector, /* See if we already have a CRTC for this connector */ if (connector->state->crtc) { - crtc = connector->state->crtc; + crtc = to_intel_crtc(connector->state->crtc); - ret = drm_modeset_lock(&crtc->mutex, ctx); + ret = drm_modeset_lock(&crtc->base.mutex, ctx); if (ret) goto fail; @@ -6532,17 +6625,17 @@ int intel_get_load_detect_pipe(struct drm_connector *connector, } /* Find an unused one (if possible) */ - for_each_crtc(dev, possible_crtc) { - i++; - if (!(encoder->possible_crtcs & (1 << i))) + for_each_intel_crtc(dev, possible_crtc) { + if (!(encoder->base.possible_crtcs & + drm_crtc_mask(&possible_crtc->base))) continue; - ret = drm_modeset_lock(&possible_crtc->mutex, ctx); + ret = drm_modeset_lock(&possible_crtc->base.mutex, ctx); if (ret) goto fail; - if (possible_crtc->state->enable) { - drm_modeset_unlock(&possible_crtc->mutex); + if (possible_crtc->base.state->enable) { + drm_modeset_unlock(&possible_crtc->base.mutex); continue; } @@ -6561,8 +6654,6 @@ int intel_get_load_detect_pipe(struct drm_connector *connector, } found: - intel_crtc = to_intel_crtc(crtc); - state = drm_atomic_state_alloc(dev); restore_state = drm_atomic_state_alloc(dev); if (!state || !restore_state) { @@ -6579,11 +6670,11 @@ found: goto fail; } - ret = drm_atomic_set_crtc_for_connector(connector_state, crtc); + ret = drm_atomic_set_crtc_for_connector(connector_state, &crtc->base); if (ret) goto fail; - crtc_state = intel_atomic_get_crtc_state(state, intel_crtc); + crtc_state = intel_atomic_get_crtc_state(state, crtc); if (IS_ERR(crtc_state)) { ret = PTR_ERR(crtc_state); goto fail; @@ -6596,15 +6687,15 @@ found: if (ret) goto fail; - ret = intel_modeset_disable_planes(state, crtc); + ret = intel_modeset_disable_planes(state, &crtc->base); if (ret) goto fail; ret = PTR_ERR_OR_ZERO(drm_atomic_get_connector_state(restore_state, connector)); if (!ret) - ret = PTR_ERR_OR_ZERO(drm_atomic_get_crtc_state(restore_state, crtc)); + ret = PTR_ERR_OR_ZERO(drm_atomic_get_crtc_state(restore_state, &crtc->base)); if (!ret) - ret = drm_atomic_add_affected_planes(restore_state, crtc); + ret = drm_atomic_add_affected_planes(restore_state, &crtc->base); if (ret) { drm_dbg_kms(&dev_priv->drm, "Failed to create a copy of old state to restore: %i\n", @@ -6623,7 +6714,7 @@ found: drm_atomic_state_put(state); /* let the connector get through one full cycle before testing */ - intel_wait_for_vblank(dev_priv, intel_crtc->pipe); + intel_wait_for_vblank(dev_priv, crtc->pipe); return true; fail: @@ -7295,12 +7386,13 @@ static int intel_crtc_atomic_check(struct intel_atomic_state *state, } if (dev_priv->display.compute_pipe_wm) { - ret = dev_priv->display.compute_pipe_wm(crtc_state); + ret = dev_priv->display.compute_pipe_wm(state, crtc); if (ret) { drm_dbg_kms(&dev_priv->drm, "Target pipe watermarks are invalid\n"); return ret; } + } if (dev_priv->display.compute_intermediate_wm) { @@ -7313,7 +7405,7 @@ static int intel_crtc_atomic_check(struct intel_atomic_state *state, * old state and the new state. We can program these * immediately. */ - ret = dev_priv->display.compute_intermediate_wm(crtc_state); + ret = dev_priv->display.compute_intermediate_wm(state, crtc); if (ret) { drm_dbg_kms(&dev_priv->drm, "No valid intermediate pipe watermarks are possible\n"); @@ -8636,10 +8728,11 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, PIPE_CONF_CHECK_BOOL(double_wide); - PIPE_CONF_CHECK_P(shared_dpll); + if (dev_priv->dpll.mgr) + PIPE_CONF_CHECK_P(shared_dpll); /* FIXME do the readout properly and get rid of this quirk */ - if (!PIPE_CONF_QUIRK(PIPE_CONFIG_QUIRK_BIGJOINER_SLAVE)) { + if (dev_priv->dpll.mgr && !PIPE_CONF_QUIRK(PIPE_CONFIG_QUIRK_BIGJOINER_SLAVE)) { PIPE_CONF_CHECK_X(dpll_hw_state.dpll); PIPE_CONF_CHECK_X(dpll_hw_state.dpll_md); PIPE_CONF_CHECK_X(dpll_hw_state.fp0); @@ -8671,7 +8764,9 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_ssc); PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_bias); PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_tdc_coldst_bias); + } + if (!PIPE_CONF_QUIRK(PIPE_CONFIG_QUIRK_BIGJOINER_SLAVE)) { PIPE_CONF_CHECK_X(dsi_pll.ctrl); PIPE_CONF_CHECK_X(dsi_pll.div); @@ -9009,6 +9104,10 @@ verify_crtc_state(struct intel_crtc *crtc, if (!new_crtc_state->hw.active) return; + if (new_crtc_state->bigjoiner_slave) + /* No PLLs set for slave */ + pipe_config->shared_dpll = NULL; + intel_pipe_config_sanity_check(dev_priv, pipe_config); if (!intel_pipe_config_compare(new_crtc_state, @@ -9112,6 +9211,55 @@ verify_shared_dpll_state(struct intel_crtc *crtc, } static void +verify_mpllb_state(struct intel_atomic_state *state, + struct intel_crtc_state *new_crtc_state) +{ + struct drm_i915_private *i915 = to_i915(state->base.dev); + struct intel_mpllb_state mpllb_hw_state = { 0 }; + struct intel_mpllb_state *mpllb_sw_state = &new_crtc_state->mpllb_state; + struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc); + struct intel_encoder *encoder; + + if (!IS_DG2(i915)) + return; + + if (!new_crtc_state->hw.active) + return; + + if (new_crtc_state->bigjoiner_slave) + return; + + encoder = intel_get_crtc_new_encoder(state, new_crtc_state); + intel_mpllb_readout_hw_state(encoder, &mpllb_hw_state); + +#define MPLLB_CHECK(name) do { \ + if (mpllb_sw_state->name != mpllb_hw_state.name) { \ + pipe_config_mismatch(false, crtc, "MPLLB:" __stringify(name), \ + "(expected 0x%08x, found 0x%08x)", \ + mpllb_sw_state->name, \ + mpllb_hw_state.name); \ + } \ +} while (0) + + MPLLB_CHECK(mpllb_cp); + MPLLB_CHECK(mpllb_div); + MPLLB_CHECK(mpllb_div2); + MPLLB_CHECK(mpllb_fracn1); + MPLLB_CHECK(mpllb_fracn2); + MPLLB_CHECK(mpllb_sscen); + MPLLB_CHECK(mpllb_sscstep); + + /* + * ref_control is handled by the hardware/firemware and never + * programmed by the software, but the proper values are supplied + * in the bspec for verification purposes. + */ + MPLLB_CHECK(ref_control); + +#undef MPLLB_CHECK +} + +static void intel_modeset_verify_crtc(struct intel_crtc *crtc, struct intel_atomic_state *state, struct intel_crtc_state *old_crtc_state, @@ -9124,6 +9272,7 @@ intel_modeset_verify_crtc(struct intel_crtc *crtc, verify_connector_state(state, crtc); verify_crtc_state(crtc, old_crtc_state, new_crtc_state); verify_shared_dpll_state(crtc, old_crtc_state, new_crtc_state); + verify_mpllb_state(state, new_crtc_state); } static void @@ -9749,7 +9898,7 @@ static int intel_atomic_check_async(struct intel_atomic_state *state) /* * FIXME: This check is kept generic for all platforms. - * Need to verify this for all gen9 and gen10 platforms to enable + * Need to verify this for all gen9 platforms to enable * this selectively if required. */ switch (new_plane_state->hw.fb->modifier) { @@ -10160,7 +10309,7 @@ static void intel_pipe_fastset(const struct intel_crtc_state *old_crtc_state, hsw_set_linetime_wm(new_crtc_state); if (DISPLAY_VER(dev_priv) >= 11) - icl_set_pipe_chicken(crtc); + icl_set_pipe_chicken(new_crtc_state); } static void commit_pipe_pre_planes(struct intel_atomic_state *state, @@ -10294,6 +10443,8 @@ static void intel_old_crtc_state_disables(struct intel_atomic_state *state, drm_WARN_ON(&dev_priv->drm, old_crtc_state->bigjoiner_slave); + intel_encoders_pre_disable(state, crtc); + intel_crtc_disable_planes(state, crtc); /* @@ -11328,7 +11479,12 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) if (!HAS_DISPLAY(dev_priv)) return; - if (IS_ALDERLAKE_P(dev_priv)) { + if (IS_DG2(dev_priv)) { + intel_ddi_init(dev_priv, PORT_A); + intel_ddi_init(dev_priv, PORT_B); + intel_ddi_init(dev_priv, PORT_C); + intel_ddi_init(dev_priv, PORT_D_XELPD); + } else if (IS_ALDERLAKE_P(dev_priv)) { intel_ddi_init(dev_priv, PORT_A); intel_ddi_init(dev_priv, PORT_B); intel_ddi_init(dev_priv, PORT_TC1); @@ -11375,13 +11531,6 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) intel_ddi_init(dev_priv, PORT_B); intel_ddi_init(dev_priv, PORT_C); vlv_dsi_init(dev_priv); - } else if (DISPLAY_VER(dev_priv) == 10) { - intel_ddi_init(dev_priv, PORT_A); - intel_ddi_init(dev_priv, PORT_B); - intel_ddi_init(dev_priv, PORT_C); - intel_ddi_init(dev_priv, PORT_D); - intel_ddi_init(dev_priv, PORT_E); - intel_ddi_init(dev_priv, PORT_F); } else if (DISPLAY_VER(dev_priv) >= 9) { intel_ddi_init(dev_priv, PORT_A); intel_ddi_init(dev_priv, PORT_B); @@ -11790,7 +11939,7 @@ intel_user_framebuffer_create(struct drm_device *dev, /* object is backed with LMEM for discrete */ i915 = to_i915(obj->base.dev); - if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj)) { + if (HAS_LMEM(i915) && !i915_gem_object_can_migrate(obj, INTEL_REGION_LMEM)) { /* object is "remote", not in local memory */ i915_gem_object_put(obj); return ERR_PTR(-EREMOTE); @@ -13136,7 +13285,7 @@ get_encoder_power_domains(struct drm_i915_private *dev_priv) static void intel_early_display_was(struct drm_i915_private *dev_priv) { /* - * Display WA #1185 WaDisableDARBFClkGating:cnl,glk,icl,ehl,tgl + * Display WA #1185 WaDisableDARBFClkGating:glk,icl,ehl,tgl * Also known as Wa_14010480278. */ if (IS_DISPLAY_VER(dev_priv, 10, 12)) diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index c9dbaf074d77..284936f0ddab 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -561,6 +561,7 @@ struct drm_display_mode * intel_encoder_current_mode(struct intel_encoder *encoder); bool intel_phy_is_combo(struct drm_i915_private *dev_priv, enum phy phy); bool intel_phy_is_tc(struct drm_i915_private *dev_priv, enum phy phy); +bool intel_phy_is_snps(struct drm_i915_private *dev_priv, enum phy phy); enum tc_port intel_port_to_tc(struct drm_i915_private *dev_priv, enum port port); int intel_get_pipe_from_crtc_id_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c index 88bb05d5c483..8fdacb252bb1 100644 --- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c @@ -544,6 +544,11 @@ static int i915_dmc_info(struct seq_file *m, void *unused) seq_printf(m, "fw loaded: %s\n", yesno(intel_dmc_has_payload(dev_priv))); seq_printf(m, "path: %s\n", dmc->fw_path); + seq_printf(m, "Pipe A fw support: %s\n", + yesno(GRAPHICS_VER(dev_priv) >= 12)); + seq_printf(m, "Pipe A fw loaded: %s\n", yesno(dmc->dmc_info[DMC_FW_PIPEA].payload)); + seq_printf(m, "Pipe B fw support: %s\n", yesno(IS_ALDERLAKE_P(dev_priv))); + seq_printf(m, "Pipe B fw loaded: %s\n", yesno(dmc->dmc_info[DMC_FW_PIPEB].payload)); if (!intel_dmc_has_payload(dev_priv)) goto out; @@ -582,7 +587,7 @@ static int i915_dmc_info(struct seq_file *m, void *unused) out: seq_printf(m, "program base: 0x%08x\n", - intel_de_read(dev_priv, DMC_PROGRAM(0))); + intel_de_read(dev_priv, DMC_PROGRAM(dmc->dmc_info[DMC_FW_MAIN].start_mmioaddr, 0))); seq_printf(m, "ssp base: 0x%08x\n", intel_de_read(dev_priv, DMC_SSP_BASE)); seq_printf(m, "htp: 0x%08x\n", intel_de_read(dev_priv, DMC_HTP_SKL)); @@ -1225,7 +1230,7 @@ static int i915_ddb_info(struct seq_file *m, void *unused) static void drrs_status_per_crtc(struct seq_file *m, struct drm_device *dev, - struct intel_crtc *intel_crtc) + struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(dev); struct i915_drrs *drrs = &dev_priv->drrs; @@ -1237,7 +1242,7 @@ static void drrs_status_per_crtc(struct seq_file *m, drm_for_each_connector_iter(connector, &conn_iter) { bool supported = false; - if (connector->state->crtc != &intel_crtc->base) + if (connector->state->crtc != &crtc->base) continue; seq_printf(m, "%s:\n", connector->name); @@ -1252,7 +1257,7 @@ static void drrs_status_per_crtc(struct seq_file *m, seq_puts(m, "\n"); - if (to_intel_crtc_state(intel_crtc->base.state)->has_drrs) { + if (to_intel_crtc_state(crtc->base.state)->has_drrs) { struct intel_panel *panel; mutex_lock(&drrs->mutex); @@ -1298,16 +1303,16 @@ static int i915_drrs_status(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); struct drm_device *dev = &dev_priv->drm; - struct intel_crtc *intel_crtc; + struct intel_crtc *crtc; int active_crtc_cnt = 0; drm_modeset_lock_all(dev); - for_each_intel_crtc(dev, intel_crtc) { - if (intel_crtc->base.state->active) { + for_each_intel_crtc(dev, crtc) { + if (crtc->base.state->active) { active_crtc_cnt++; seq_printf(m, "\nCRTC %d: ", active_crtc_cnt); - drrs_status_per_crtc(m, dev, intel_crtc); + drrs_status_per_crtc(m, dev, crtc); } } drm_modeset_unlock_all(dev); @@ -2064,7 +2069,7 @@ i915_fifo_underrun_reset_write(struct file *filp, size_t cnt, loff_t *ppos) { struct drm_i915_private *dev_priv = filp->private_data; - struct intel_crtc *intel_crtc; + struct intel_crtc *crtc; struct drm_device *dev = &dev_priv->drm; int ret; bool reset; @@ -2076,15 +2081,15 @@ i915_fifo_underrun_reset_write(struct file *filp, if (!reset) return cnt; - for_each_intel_crtc(dev, intel_crtc) { + for_each_intel_crtc(dev, crtc) { struct drm_crtc_commit *commit; struct intel_crtc_state *crtc_state; - ret = drm_modeset_lock_single_interruptible(&intel_crtc->base.mutex); + ret = drm_modeset_lock_single_interruptible(&crtc->base.mutex); if (ret) return ret; - crtc_state = to_intel_crtc_state(intel_crtc->base.state); + crtc_state = to_intel_crtc_state(crtc->base.state); commit = crtc_state->uapi.commit; if (commit) { ret = wait_for_completion_interruptible(&commit->hw_done); @@ -2095,12 +2100,12 @@ i915_fifo_underrun_reset_write(struct file *filp, if (!ret && crtc_state->hw.active) { drm_dbg_kms(&dev_priv->drm, "Re-arming FIFO underruns on pipe %c\n", - pipe_name(intel_crtc->pipe)); + pipe_name(crtc->pipe)); - intel_crtc_arm_fifo_underrun(intel_crtc, crtc_state); + intel_crtc_arm_fifo_underrun(crtc, crtc_state); } - drm_modeset_unlock(&intel_crtc->base.mutex); + drm_modeset_unlock(&crtc->base.mutex); if (ret) return ret; @@ -2251,6 +2256,11 @@ static int i915_lpsp_capability_show(struct seq_file *m, void *data) if (connector->status != connector_status_connected) return -ENODEV; + if (DISPLAY_VER(i915) >= 13) { + LPSP_CAPABLE(encoder->port <= PORT_B); + return 0; + } + switch (DISPLAY_VER(i915)) { case 12: /* @@ -2385,6 +2395,73 @@ static const struct file_operations i915_dsc_fec_support_fops = { .write = i915_dsc_fec_support_write }; +static int i915_dsc_bpp_show(struct seq_file *m, void *data) +{ + struct drm_connector *connector = m->private; + struct drm_device *dev = connector->dev; + struct drm_crtc *crtc; + struct intel_crtc_state *crtc_state; + struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector)); + int ret; + + if (!encoder) + return -ENODEV; + + ret = drm_modeset_lock_single_interruptible(&dev->mode_config.connection_mutex); + if (ret) + return ret; + + crtc = connector->state->crtc; + if (connector->status != connector_status_connected || !crtc) { + ret = -ENODEV; + goto out; + } + + crtc_state = to_intel_crtc_state(crtc->state); + seq_printf(m, "Compressed_BPP: %d\n", crtc_state->dsc.compressed_bpp); + +out: drm_modeset_unlock(&dev->mode_config.connection_mutex); + + return ret; +} + +static ssize_t i915_dsc_bpp_write(struct file *file, + const char __user *ubuf, + size_t len, loff_t *offp) +{ + struct drm_connector *connector = + ((struct seq_file *)file->private_data)->private; + struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector)); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + int dsc_bpp = 0; + int ret; + + ret = kstrtoint_from_user(ubuf, len, 0, &dsc_bpp); + if (ret < 0) + return ret; + + intel_dp->force_dsc_bpp = dsc_bpp; + *offp += len; + + return len; +} + +static int i915_dsc_bpp_open(struct inode *inode, + struct file *file) +{ + return single_open(file, i915_dsc_bpp_show, + inode->i_private); +} + +static const struct file_operations i915_dsc_bpp_fops = { + .owner = THIS_MODULE, + .open = i915_dsc_bpp_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = i915_dsc_bpp_write +}; + /** * intel_connector_debugfs_add - add i915 specific connector debugfs files * @connector: pointer to a registered drm_connector @@ -2423,10 +2500,17 @@ int intel_connector_debugfs_add(struct drm_connector *connector) connector, &i915_hdcp_sink_capability_fops); } - if ((DISPLAY_VER(dev_priv) >= 11 || IS_CANNONLAKE(dev_priv)) && ((connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort && !to_intel_connector(connector)->mst_port) || connector->connector_type == DRM_MODE_CONNECTOR_eDP)) - debugfs_create_file("i915_dsc_fec_support", S_IRUGO, root, + if (DISPLAY_VER(dev_priv) >= 11 && + ((connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort && + !to_intel_connector(connector)->mst_port) || + connector->connector_type == DRM_MODE_CONNECTOR_eDP)) { + debugfs_create_file("i915_dsc_fec_support", 0644, root, connector, &i915_dsc_fec_support_fops); + debugfs_create_file("i915_dsc_bpp", 0644, root, + connector, &i915_dsc_bpp_fops); + } + /* Legacy panels doesn't lpsp on any platform */ if ((DISPLAY_VER(dev_priv) >= 9 || IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) && diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 86b7ac7b65ec..cce1a926fcc1 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -18,6 +18,7 @@ #include "intel_pm.h" #include "intel_pps.h" #include "intel_sideband.h" +#include "intel_snps_phy.h" #include "intel_tc.h" #include "intel_vga.h" @@ -106,8 +107,10 @@ intel_display_power_domain_str(enum intel_display_power_domain domain) return "PORT_OTHER"; case POWER_DOMAIN_VGA: return "VGA"; - case POWER_DOMAIN_AUDIO: - return "AUDIO"; + case POWER_DOMAIN_AUDIO_MMIO: + return "AUDIO_MMIO"; + case POWER_DOMAIN_AUDIO_PLAYBACK: + return "AUDIO_PLAYBACK"; case POWER_DOMAIN_AUX_A: return "AUX_A"; case POWER_DOMAIN_AUX_B: @@ -341,6 +344,17 @@ static void hsw_wait_for_power_well_enable(struct drm_i915_private *dev_priv, { const struct i915_power_well_regs *regs = power_well->desc->hsw.regs; int pw_idx = power_well->desc->hsw.idx; + int enable_delay = power_well->desc->hsw.fixed_enable_delay; + + /* + * For some power wells we're not supposed to watch the status bit for + * an ack, but rather just wait a fixed amount of time and then + * proceed. This is only used on DG2. + */ + if (IS_DG2(dev_priv) && enable_delay) { + usleep_range(enable_delay, 2 * enable_delay); + return; + } /* Timeout for PW1:10 us, AUX:not specified, other PWs:20 us. */ if (intel_de_wait_for_set(dev_priv, regs->driver, @@ -436,17 +450,6 @@ static void hsw_power_well_enable(struct drm_i915_private *dev_priv, hsw_wait_for_power_well_enable(dev_priv, power_well, false); - /* Display WA #1178: cnl */ - if (IS_CANNONLAKE(dev_priv) && - pw_idx >= GLK_PW_CTL_IDX_AUX_B && - pw_idx <= CNL_PW_CTL_IDX_AUX_F) { - u32 val; - - val = intel_de_read(dev_priv, CNL_AUX_ANAOVRD1(pw_idx)); - val |= CNL_AUX_ANAOVRD1_ENABLE | CNL_AUX_ANAOVRD1_LDO_BYPASS; - intel_de_write(dev_priv, CNL_AUX_ANAOVRD1(pw_idx), val); - } - if (power_well->desc->hsw.has_fuses) { enum skl_power_gate pg; @@ -961,8 +964,9 @@ static void bxt_disable_dc9(struct drm_i915_private *dev_priv) static void assert_dmc_loaded(struct drm_i915_private *dev_priv) { drm_WARN_ONCE(&dev_priv->drm, - !intel_de_read(dev_priv, DMC_PROGRAM(0)), - "DMC program storage start is NULL\n"); + !intel_de_read(dev_priv, + DMC_PROGRAM(dev_priv->dmc.dmc_info[DMC_FW_MAIN].start_mmioaddr, 0)), + "DMC program storage start is NULL\n"); drm_WARN_ONCE(&dev_priv->drm, !intel_de_read(dev_priv, DMC_SSP_BASE), "DMC SSP Base Not fine\n"); drm_WARN_ONCE(&dev_priv->drm, !intel_de_read(dev_priv, DMC_HTP_SKL), @@ -2507,7 +2511,8 @@ intel_display_power_put_mask_in_set(struct drm_i915_private *i915, BIT_ULL(POWER_DOMAIN_PORT_DSI) | \ BIT_ULL(POWER_DOMAIN_PORT_CRT) | \ BIT_ULL(POWER_DOMAIN_VGA) | \ - BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_MMIO) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_PLAYBACK) | \ BIT_ULL(POWER_DOMAIN_AUX_B) | \ BIT_ULL(POWER_DOMAIN_AUX_C) | \ BIT_ULL(POWER_DOMAIN_GMBUS) | \ @@ -2557,7 +2562,8 @@ intel_display_power_put_mask_in_set(struct drm_i915_private *i915, BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ BIT_ULL(POWER_DOMAIN_PORT_DSI) | \ BIT_ULL(POWER_DOMAIN_VGA) | \ - BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_MMIO) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_PLAYBACK) | \ BIT_ULL(POWER_DOMAIN_AUX_B) | \ BIT_ULL(POWER_DOMAIN_AUX_C) | \ BIT_ULL(POWER_DOMAIN_AUX_D) | \ @@ -2590,7 +2596,8 @@ intel_display_power_put_mask_in_set(struct drm_i915_private *i915, BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ BIT_ULL(POWER_DOMAIN_PORT_CRT) | /* DDI E */ \ BIT_ULL(POWER_DOMAIN_VGA) | \ - BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_MMIO) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_PLAYBACK) | \ BIT_ULL(POWER_DOMAIN_INIT)) #define BDW_DISPLAY_POWER_DOMAINS ( \ @@ -2606,7 +2613,8 @@ intel_display_power_put_mask_in_set(struct drm_i915_private *i915, BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ BIT_ULL(POWER_DOMAIN_PORT_CRT) | /* DDI E */ \ BIT_ULL(POWER_DOMAIN_VGA) | \ - BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_MMIO) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_PLAYBACK) | \ BIT_ULL(POWER_DOMAIN_INIT)) #define SKL_DISPLAY_POWERWELL_2_POWER_DOMAINS ( \ @@ -2624,7 +2632,8 @@ intel_display_power_put_mask_in_set(struct drm_i915_private *i915, BIT_ULL(POWER_DOMAIN_AUX_B) | \ BIT_ULL(POWER_DOMAIN_AUX_C) | \ BIT_ULL(POWER_DOMAIN_AUX_D) | \ - BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_MMIO) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_PLAYBACK) | \ BIT_ULL(POWER_DOMAIN_VGA) | \ BIT_ULL(POWER_DOMAIN_INIT)) #define SKL_DISPLAY_DDI_IO_A_E_POWER_DOMAINS ( \ @@ -2659,7 +2668,8 @@ intel_display_power_put_mask_in_set(struct drm_i915_private *i915, BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ BIT_ULL(POWER_DOMAIN_AUX_B) | \ BIT_ULL(POWER_DOMAIN_AUX_C) | \ - BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_MMIO) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_PLAYBACK) | \ BIT_ULL(POWER_DOMAIN_VGA) | \ BIT_ULL(POWER_DOMAIN_INIT)) #define BXT_DISPLAY_DC_OFF_POWER_DOMAINS ( \ @@ -2692,7 +2702,8 @@ intel_display_power_put_mask_in_set(struct drm_i915_private *i915, BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ BIT_ULL(POWER_DOMAIN_AUX_B) | \ BIT_ULL(POWER_DOMAIN_AUX_C) | \ - BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_MMIO) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_PLAYBACK) | \ BIT_ULL(POWER_DOMAIN_VGA) | \ BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DISPLAY_DDI_IO_A_POWER_DOMAINS ( \ @@ -2731,63 +2742,6 @@ intel_display_power_put_mask_in_set(struct drm_i915_private *i915, BIT_ULL(POWER_DOMAIN_GMBUS) | \ BIT_ULL(POWER_DOMAIN_INIT)) -#define CNL_DISPLAY_POWERWELL_2_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_TRANSCODER_A) | \ - BIT_ULL(POWER_DOMAIN_PIPE_B) | \ - BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ - BIT_ULL(POWER_DOMAIN_PIPE_C) | \ - BIT_ULL(POWER_DOMAIN_TRANSCODER_C) | \ - BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ - BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_F_LANES) | \ - BIT_ULL(POWER_DOMAIN_AUX_B) | \ - BIT_ULL(POWER_DOMAIN_AUX_C) | \ - BIT_ULL(POWER_DOMAIN_AUX_D) | \ - BIT_ULL(POWER_DOMAIN_AUX_F) | \ - BIT_ULL(POWER_DOMAIN_AUDIO) | \ - BIT_ULL(POWER_DOMAIN_VGA) | \ - BIT_ULL(POWER_DOMAIN_INIT)) -#define CNL_DISPLAY_DDI_A_IO_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_A_IO) | \ - BIT_ULL(POWER_DOMAIN_INIT)) -#define CNL_DISPLAY_DDI_B_IO_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_B_IO) | \ - BIT_ULL(POWER_DOMAIN_INIT)) -#define CNL_DISPLAY_DDI_C_IO_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_C_IO) | \ - BIT_ULL(POWER_DOMAIN_INIT)) -#define CNL_DISPLAY_DDI_D_IO_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_D_IO) | \ - BIT_ULL(POWER_DOMAIN_INIT)) -#define CNL_DISPLAY_AUX_A_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_AUX_A) | \ - BIT_ULL(POWER_DOMAIN_AUX_IO_A) | \ - BIT_ULL(POWER_DOMAIN_INIT)) -#define CNL_DISPLAY_AUX_B_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_AUX_B) | \ - BIT_ULL(POWER_DOMAIN_INIT)) -#define CNL_DISPLAY_AUX_C_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_AUX_C) | \ - BIT_ULL(POWER_DOMAIN_INIT)) -#define CNL_DISPLAY_AUX_D_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_AUX_D) | \ - BIT_ULL(POWER_DOMAIN_INIT)) -#define CNL_DISPLAY_AUX_F_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_AUX_F) | \ - BIT_ULL(POWER_DOMAIN_INIT)) -#define CNL_DISPLAY_DDI_F_IO_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_F_IO) | \ - BIT_ULL(POWER_DOMAIN_INIT)) -#define CNL_DISPLAY_DC_OFF_POWER_DOMAINS ( \ - CNL_DISPLAY_POWERWELL_2_POWER_DOMAINS | \ - BIT_ULL(POWER_DOMAIN_GT_IRQ) | \ - BIT_ULL(POWER_DOMAIN_MODESET) | \ - BIT_ULL(POWER_DOMAIN_AUX_A) | \ - BIT_ULL(POWER_DOMAIN_INIT)) - /* * ICL PW_0/PG_0 domains (HW/DMC control): * - PCI @@ -2829,7 +2783,8 @@ intel_display_power_put_mask_in_set(struct drm_i915_private *i915, BIT_ULL(POWER_DOMAIN_AUX_E_TBT) | \ BIT_ULL(POWER_DOMAIN_AUX_F_TBT) | \ BIT_ULL(POWER_DOMAIN_VGA) | \ - BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_MMIO) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_PLAYBACK) | \ BIT_ULL(POWER_DOMAIN_INIT)) /* * - transcoder WD @@ -2921,7 +2876,8 @@ intel_display_power_put_mask_in_set(struct drm_i915_private *i915, BIT_ULL(POWER_DOMAIN_AUX_TBT5) | \ BIT_ULL(POWER_DOMAIN_AUX_TBT6) | \ BIT_ULL(POWER_DOMAIN_VGA) | \ - BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_MMIO) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_PLAYBACK) | \ BIT_ULL(POWER_DOMAIN_INIT)) #define TGL_PW_2_POWER_DOMAINS ( \ @@ -2991,7 +2947,8 @@ intel_display_power_put_mask_in_set(struct drm_i915_private *i915, RKL_PW_4_POWER_DOMAINS | \ BIT_ULL(POWER_DOMAIN_PIPE_B) | \ BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ - BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_MMIO) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_PLAYBACK) | \ BIT_ULL(POWER_DOMAIN_VGA) | \ BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ BIT_ULL(POWER_DOMAIN_PORT_DDI_LANES_TC1) | \ @@ -3029,6 +2986,35 @@ intel_display_power_put_mask_in_set(struct drm_i915_private *i915, BIT_ULL(POWER_DOMAIN_INIT)) /* + * DG1 onwards Audio MMIO/VERBS lies in PG0 power well. + */ +#define DG1_PW_3_POWER_DOMAINS ( \ + TGL_PW_4_POWER_DOMAINS | \ + BIT_ULL(POWER_DOMAIN_PIPE_B) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_LANES_TC1) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_LANES_TC2) | \ + BIT_ULL(POWER_DOMAIN_AUX_USBC1) | \ + BIT_ULL(POWER_DOMAIN_AUX_USBC2) | \ + BIT_ULL(POWER_DOMAIN_VGA) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_PLAYBACK) | \ + BIT_ULL(POWER_DOMAIN_INIT)) + +#define DG1_PW_2_POWER_DOMAINS ( \ + DG1_PW_3_POWER_DOMAINS | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_VDSC_PW2) | \ + BIT_ULL(POWER_DOMAIN_INIT)) + +#define DG1_DISPLAY_DC_OFF_POWER_DOMAINS ( \ + DG1_PW_3_POWER_DOMAINS | \ + BIT_ULL(POWER_DOMAIN_AUDIO_MMIO) | \ + BIT_ULL(POWER_DOMAIN_MODESET) | \ + BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_INIT)) + +/* * XE_LPD Power Domains * * Previous platforms required that PG(n-1) be enabled before PG(n). That @@ -3073,7 +3059,7 @@ intel_display_power_put_mask_in_set(struct drm_i915_private *i915, XELPD_PW_B_POWER_DOMAINS | \ XELPD_PW_C_POWER_DOMAINS | \ XELPD_PW_D_POWER_DOMAINS | \ - BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_PLAYBACK) | \ BIT_ULL(POWER_DOMAIN_VGA) | \ BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ BIT_ULL(POWER_DOMAIN_PORT_DDI_LANES_D_XELPD) | \ @@ -3114,6 +3100,7 @@ intel_display_power_put_mask_in_set(struct drm_i915_private *i915, #define XELPD_DISPLAY_DC_OFF_POWER_DOMAINS ( \ XELPD_PW_2_POWER_DOMAINS | \ + BIT_ULL(POWER_DOMAIN_AUDIO_MMIO) | \ BIT_ULL(POWER_DOMAIN_MODESET) | \ BIT_ULL(POWER_DOMAIN_AUX_A) | \ BIT_ULL(POWER_DOMAIN_AUX_B) | \ @@ -3694,148 +3681,6 @@ static const struct i915_power_well_desc glk_power_wells[] = { }, }; -static const struct i915_power_well_desc cnl_power_wells[] = { - { - .name = "always-on", - .always_on = true, - .domains = POWER_DOMAIN_MASK, - .ops = &i9xx_always_on_power_well_ops, - .id = DISP_PW_ID_NONE, - }, - { - .name = "power well 1", - /* Handled by the DMC firmware */ - .always_on = true, - .domains = 0, - .ops = &hsw_power_well_ops, - .id = SKL_DISP_PW_1, - { - .hsw.regs = &hsw_power_well_regs, - .hsw.idx = SKL_PW_CTL_IDX_PW_1, - .hsw.has_fuses = true, - }, - }, - { - .name = "AUX A", - .domains = CNL_DISPLAY_AUX_A_POWER_DOMAINS, - .ops = &hsw_power_well_ops, - .id = DISP_PW_ID_NONE, - { - .hsw.regs = &hsw_power_well_regs, - .hsw.idx = GLK_PW_CTL_IDX_AUX_A, - }, - }, - { - .name = "AUX B", - .domains = CNL_DISPLAY_AUX_B_POWER_DOMAINS, - .ops = &hsw_power_well_ops, - .id = DISP_PW_ID_NONE, - { - .hsw.regs = &hsw_power_well_regs, - .hsw.idx = GLK_PW_CTL_IDX_AUX_B, - }, - }, - { - .name = "AUX C", - .domains = CNL_DISPLAY_AUX_C_POWER_DOMAINS, - .ops = &hsw_power_well_ops, - .id = DISP_PW_ID_NONE, - { - .hsw.regs = &hsw_power_well_regs, - .hsw.idx = GLK_PW_CTL_IDX_AUX_C, - }, - }, - { - .name = "AUX D", - .domains = CNL_DISPLAY_AUX_D_POWER_DOMAINS, - .ops = &hsw_power_well_ops, - .id = DISP_PW_ID_NONE, - { - .hsw.regs = &hsw_power_well_regs, - .hsw.idx = CNL_PW_CTL_IDX_AUX_D, - }, - }, - { - .name = "DC off", - .domains = CNL_DISPLAY_DC_OFF_POWER_DOMAINS, - .ops = &gen9_dc_off_power_well_ops, - .id = SKL_DISP_DC_OFF, - }, - { - .name = "power well 2", - .domains = CNL_DISPLAY_POWERWELL_2_POWER_DOMAINS, - .ops = &hsw_power_well_ops, - .id = SKL_DISP_PW_2, - { - .hsw.regs = &hsw_power_well_regs, - .hsw.idx = SKL_PW_CTL_IDX_PW_2, - .hsw.irq_pipe_mask = BIT(PIPE_B) | BIT(PIPE_C), - .hsw.has_vga = true, - .hsw.has_fuses = true, - }, - }, - { - .name = "DDI A IO power well", - .domains = CNL_DISPLAY_DDI_A_IO_POWER_DOMAINS, - .ops = &hsw_power_well_ops, - .id = DISP_PW_ID_NONE, - { - .hsw.regs = &hsw_power_well_regs, - .hsw.idx = GLK_PW_CTL_IDX_DDI_A, - }, - }, - { - .name = "DDI B IO power well", - .domains = CNL_DISPLAY_DDI_B_IO_POWER_DOMAINS, - .ops = &hsw_power_well_ops, - .id = DISP_PW_ID_NONE, - { - .hsw.regs = &hsw_power_well_regs, - .hsw.idx = SKL_PW_CTL_IDX_DDI_B, - }, - }, - { - .name = "DDI C IO power well", - .domains = CNL_DISPLAY_DDI_C_IO_POWER_DOMAINS, - .ops = &hsw_power_well_ops, - .id = DISP_PW_ID_NONE, - { - .hsw.regs = &hsw_power_well_regs, - .hsw.idx = SKL_PW_CTL_IDX_DDI_C, - }, - }, - { - .name = "DDI D IO power well", - .domains = CNL_DISPLAY_DDI_D_IO_POWER_DOMAINS, - .ops = &hsw_power_well_ops, - .id = DISP_PW_ID_NONE, - { - .hsw.regs = &hsw_power_well_regs, - .hsw.idx = SKL_PW_CTL_IDX_DDI_D, - }, - }, - { - .name = "DDI F IO power well", - .domains = CNL_DISPLAY_DDI_F_IO_POWER_DOMAINS, - .ops = &hsw_power_well_ops, - .id = CNL_DISP_PW_DDI_F_IO, - { - .hsw.regs = &hsw_power_well_regs, - .hsw.idx = CNL_PW_CTL_IDX_DDI_F, - }, - }, - { - .name = "AUX F", - .domains = CNL_DISPLAY_AUX_F_POWER_DOMAINS, - .ops = &hsw_power_well_ops, - .id = CNL_DISP_PW_DDI_F_AUX, - { - .hsw.regs = &hsw_power_well_regs, - .hsw.idx = CNL_PW_CTL_IDX_AUX_F, - }, - }, -}; - static const struct i915_power_well_ops icl_aux_power_well_ops = { .sync_hw = hsw_power_well_sync_hw, .enable = icl_aux_power_well_enable, @@ -4642,6 +4487,165 @@ static const struct i915_power_well_desc rkl_power_wells[] = { }, }; +static const struct i915_power_well_desc dg1_power_wells[] = { + { + .name = "always-on", + .always_on = true, + .domains = POWER_DOMAIN_MASK, + .ops = &i9xx_always_on_power_well_ops, + .id = DISP_PW_ID_NONE, + }, + { + .name = "power well 1", + /* Handled by the DMC firmware */ + .always_on = true, + .domains = 0, + .ops = &hsw_power_well_ops, + .id = SKL_DISP_PW_1, + { + .hsw.regs = &hsw_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_PW_1, + .hsw.has_fuses = true, + }, + }, + { + .name = "DC off", + .domains = DG1_DISPLAY_DC_OFF_POWER_DOMAINS, + .ops = &gen9_dc_off_power_well_ops, + .id = SKL_DISP_DC_OFF, + }, + { + .name = "power well 2", + .domains = DG1_PW_2_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = SKL_DISP_PW_2, + { + .hsw.regs = &hsw_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_PW_2, + .hsw.has_fuses = true, + }, + }, + { + .name = "power well 3", + .domains = DG1_PW_3_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = ICL_DISP_PW_3, + { + .hsw.regs = &hsw_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_PW_3, + .hsw.irq_pipe_mask = BIT(PIPE_B), + .hsw.has_vga = true, + .hsw.has_fuses = true, + }, + }, + { + .name = "DDI A IO", + .domains = ICL_DDI_IO_A_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_ddi_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_DDI_A, + } + }, + { + .name = "DDI B IO", + .domains = ICL_DDI_IO_B_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_ddi_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_DDI_B, + } + }, + { + .name = "DDI IO TC1", + .domains = TGL_DDI_IO_TC1_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_ddi_power_well_regs, + .hsw.idx = TGL_PW_CTL_IDX_DDI_TC1, + }, + }, + { + .name = "DDI IO TC2", + .domains = TGL_DDI_IO_TC2_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_ddi_power_well_regs, + .hsw.idx = TGL_PW_CTL_IDX_DDI_TC2, + }, + }, + { + .name = "AUX A", + .domains = TGL_AUX_A_IO_POWER_DOMAINS, + .ops = &icl_aux_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_aux_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_AUX_A, + }, + }, + { + .name = "AUX B", + .domains = TGL_AUX_B_IO_POWER_DOMAINS, + .ops = &icl_aux_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_aux_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_AUX_B, + }, + }, + { + .name = "AUX USBC1", + .domains = TGL_AUX_IO_USBC1_POWER_DOMAINS, + .ops = &icl_aux_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_aux_power_well_regs, + .hsw.idx = TGL_PW_CTL_IDX_AUX_TC1, + .hsw.is_tc_tbt = false, + }, + }, + { + .name = "AUX USBC2", + .domains = TGL_AUX_IO_USBC2_POWER_DOMAINS, + .ops = &icl_aux_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_aux_power_well_regs, + .hsw.idx = TGL_PW_CTL_IDX_AUX_TC2, + .hsw.is_tc_tbt = false, + }, + }, + { + .name = "power well 4", + .domains = TGL_PW_4_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &hsw_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_PW_4, + .hsw.has_fuses = true, + .hsw.irq_pipe_mask = BIT(PIPE_C), + } + }, + { + .name = "power well 5", + .domains = TGL_PW_5_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &hsw_power_well_regs, + .hsw.idx = TGL_PW_CTL_IDX_PW_5, + .hsw.has_fuses = true, + .hsw.irq_pipe_mask = BIT(PIPE_D), + }, + }, +}; + static const struct i915_power_well_desc xelpd_power_wells[] = { { .name = "always-on", @@ -4827,6 +4831,7 @@ static const struct i915_power_well_desc xelpd_power_wells[] = { { .hsw.regs = &icl_aux_power_well_regs, .hsw.idx = ICL_PW_CTL_IDX_AUX_A, + .hsw.fixed_enable_delay = 600, }, }, { @@ -4837,6 +4842,7 @@ static const struct i915_power_well_desc xelpd_power_wells[] = { { .hsw.regs = &icl_aux_power_well_regs, .hsw.idx = ICL_PW_CTL_IDX_AUX_B, + .hsw.fixed_enable_delay = 600, }, }, { @@ -4847,6 +4853,7 @@ static const struct i915_power_well_desc xelpd_power_wells[] = { { .hsw.regs = &icl_aux_power_well_regs, .hsw.idx = ICL_PW_CTL_IDX_AUX_C, + .hsw.fixed_enable_delay = 600, }, }, { @@ -4857,6 +4864,7 @@ static const struct i915_power_well_desc xelpd_power_wells[] = { { .hsw.regs = &icl_aux_power_well_regs, .hsw.idx = XELPD_PW_CTL_IDX_AUX_D, + .hsw.fixed_enable_delay = 600, }, }, { @@ -4877,6 +4885,7 @@ static const struct i915_power_well_desc xelpd_power_wells[] = { { .hsw.regs = &icl_aux_power_well_regs, .hsw.idx = TGL_PW_CTL_IDX_AUX_TC1, + .hsw.fixed_enable_delay = 600, }, }, { @@ -5121,7 +5130,9 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv) err = 0; } else if (DISPLAY_VER(dev_priv) >= 13) { err = set_power_wells(power_domains, xelpd_power_wells); - } else if (IS_ALDERLAKE_S(dev_priv) || IS_DG1(dev_priv)) { + } else if (IS_DG1(dev_priv)) { + err = set_power_wells(power_domains, dg1_power_wells); + } else if (IS_ALDERLAKE_S(dev_priv)) { err = set_power_wells_mask(power_domains, tgl_power_wells, BIT_ULL(TGL_DISP_PW_TC_COLD_OFF)); } else if (IS_ROCKETLAKE(dev_priv)) { @@ -5130,12 +5141,6 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv) err = set_power_wells(power_domains, tgl_power_wells); } else if (DISPLAY_VER(dev_priv) == 11) { err = set_power_wells(power_domains, icl_power_wells); - } else if (IS_CNL_WITH_PORT_F(dev_priv)) { - err = set_power_wells(power_domains, cnl_power_wells); - } else if (IS_CANNONLAKE(dev_priv)) { - err = set_power_wells_mask(power_domains, cnl_power_wells, - BIT_ULL(CNL_DISP_PW_DDI_F_IO) | - BIT_ULL(CNL_DISP_PW_DDI_F_AUX)); } else if (IS_GEMINILAKE(dev_priv)) { err = set_power_wells(power_domains, glk_power_wells); } else if (IS_BROXTON(dev_priv)) { @@ -5690,75 +5695,6 @@ static void bxt_display_core_uninit(struct drm_i915_private *dev_priv) usleep_range(10, 30); /* 10 us delay per Bspec */ } -static void cnl_display_core_init(struct drm_i915_private *dev_priv, bool resume) -{ - struct i915_power_domains *power_domains = &dev_priv->power_domains; - struct i915_power_well *well; - - gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); - - /* 1. Enable PCH Reset Handshake */ - intel_pch_reset_handshake(dev_priv, !HAS_PCH_NOP(dev_priv)); - - if (!HAS_DISPLAY(dev_priv)) - return; - - /* 2-3. */ - intel_combo_phy_init(dev_priv); - - /* - * 4. Enable Power Well 1 (PG1). - * The AUX IO power wells will be enabled on demand. - */ - mutex_lock(&power_domains->lock); - well = lookup_power_well(dev_priv, SKL_DISP_PW_1); - intel_power_well_enable(dev_priv, well); - mutex_unlock(&power_domains->lock); - - /* 5. Enable CD clock */ - intel_cdclk_init_hw(dev_priv); - - /* 6. Enable DBUF */ - gen9_dbuf_enable(dev_priv); - - if (resume && intel_dmc_has_payload(dev_priv)) - intel_dmc_load_program(dev_priv); -} - -static void cnl_display_core_uninit(struct drm_i915_private *dev_priv) -{ - struct i915_power_domains *power_domains = &dev_priv->power_domains; - struct i915_power_well *well; - - if (!HAS_DISPLAY(dev_priv)) - return; - - gen9_disable_dc_states(dev_priv); - - /* 1. Disable all display engine functions -> aready done */ - - /* 2. Disable DBUF */ - gen9_dbuf_disable(dev_priv); - - /* 3. Disable CD clock */ - intel_cdclk_uninit_hw(dev_priv); - - /* - * 4. Disable Power Well 1 (PG1). - * The AUX IO power wells are toggled on demand, so they are already - * disabled at this point. - */ - mutex_lock(&power_domains->lock); - well = lookup_power_well(dev_priv, SKL_DISP_PW_1); - intel_power_well_disable(dev_priv, well); - mutex_unlock(&power_domains->lock); - - usleep_range(10, 30); /* 10 us delay per Bspec */ - - /* 5. */ - intel_combo_phy_uninit(dev_priv); -} - struct buddy_page_mask { u32 page_mask; u8 type; @@ -5797,9 +5733,14 @@ static void tgl_bw_buddy_init(struct drm_i915_private *dev_priv) unsigned long abox_mask = INTEL_INFO(dev_priv)->abox_mask; int config, i; + /* BW_BUDDY registers are not used on dgpu's beyond DG1 */ + if (IS_DGFX(dev_priv) && !IS_DG1(dev_priv)) + return; + if (IS_ALDERLAKE_S(dev_priv) || - IS_DG1_REVID(dev_priv, DG1_REVID_A0, DG1_REVID_A0) || - IS_TGL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0)) + IS_DG1_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0) || + IS_RKL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0) || + IS_TGL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_C0)) /* Wa_1409767108:tgl,dg1,adl-s */ table = wa_1409767108_buddy_page_masks; else @@ -5821,10 +5762,11 @@ static void tgl_bw_buddy_init(struct drm_i915_private *dev_priv) intel_de_write(dev_priv, BW_BUDDY_PAGE_MASK(i), table[config].page_mask); - /* Wa_22010178259:tgl,rkl */ - intel_de_rmw(dev_priv, BW_BUDDY_CTL(i), - BW_BUDDY_TLB_REQ_TIMER_MASK, - BW_BUDDY_TLB_REQ_TIMER(0x8)); + /* Wa_22010178259:tgl,dg1,rkl,adl-s */ + if (DISPLAY_VER(dev_priv) == 12) + intel_de_rmw(dev_priv, BW_BUDDY_CTL(i), + BW_BUDDY_TLB_REQ_TIMER_MASK, + BW_BUDDY_TLB_REQ_TIMER(0x8)); } } } @@ -5878,11 +5820,15 @@ static void icl_display_core_init(struct drm_i915_private *dev_priv, if (DISPLAY_VER(dev_priv) >= 12) tgl_bw_buddy_init(dev_priv); + /* 8. Ensure PHYs have completed calibration and adaptation */ + if (IS_DG2(dev_priv)) + intel_snps_phy_wait_for_calibration(dev_priv); + if (resume && intel_dmc_has_payload(dev_priv)) intel_dmc_load_program(dev_priv); - /* Wa_14011508470 */ - if (DISPLAY_VER(dev_priv) == 12) { + /* Wa_14011508470:tgl,dg1,rkl,adl-s,adl-p */ + if (DISPLAY_VER(dev_priv) >= 12) { val = DCPR_CLEAR_MEMSTAT_DIS | DCPR_SEND_RESP_IMM | DCPR_MASK_LPMODE | DCPR_MASK_MAXLATENCY_MEMUP_CLR; intel_uncore_rmw(&dev_priv->uncore, GEN11_CHICKEN_DCPR_2, 0, val); @@ -6097,8 +6043,6 @@ void intel_power_domains_init_hw(struct drm_i915_private *i915, bool resume) if (DISPLAY_VER(i915) >= 11) { icl_display_core_init(i915, resume); - } else if (IS_CANNONLAKE(i915)) { - cnl_display_core_init(i915, resume); } else if (IS_GEMINILAKE(i915) || IS_BROXTON(i915)) { bxt_display_core_init(i915, resume); } else if (DISPLAY_VER(i915) == 9) { @@ -6258,8 +6202,6 @@ void intel_power_domains_suspend(struct drm_i915_private *i915, if (DISPLAY_VER(i915) >= 11) icl_display_core_uninit(i915); - else if (IS_CANNONLAKE(i915)) - cnl_display_core_uninit(i915); else if (IS_GEMINILAKE(i915) || IS_BROXTON(i915)) bxt_display_core_uninit(i915); else if (DISPLAY_VER(i915) == 9) diff --git a/drivers/gpu/drm/i915/display/intel_display_power.h b/drivers/gpu/drm/i915/display/intel_display_power.h index 4f0917df4375..978531841fa3 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.h +++ b/drivers/gpu/drm/i915/display/intel_display_power.h @@ -76,7 +76,8 @@ enum intel_display_power_domain { POWER_DOMAIN_PORT_CRT, POWER_DOMAIN_PORT_OTHER, POWER_DOMAIN_VGA, - POWER_DOMAIN_AUDIO, + POWER_DOMAIN_AUDIO_MMIO, + POWER_DOMAIN_AUDIO_PLAYBACK, POWER_DOMAIN_AUX_A, POWER_DOMAIN_AUX_B, POWER_DOMAIN_AUX_C, @@ -142,8 +143,6 @@ enum i915_power_well_id { SKL_DISP_PW_MISC_IO, SKL_DISP_PW_1, SKL_DISP_PW_2, - CNL_DISP_PW_DDI_F_IO, - CNL_DISP_PW_DDI_F_AUX, ICL_DISP_PW_3, SKL_DISP_DC_OFF, TGL_DISP_PW_TC_COLD_OFF, @@ -223,6 +222,12 @@ struct i915_power_well_desc { u8 idx; /* Mask of pipes whose IRQ logic is backed by the pw */ u8 irq_pipe_mask; + /* + * Instead of waiting for the status bit to ack enables, + * just wait a specific amount of time and then consider + * the well enabled. + */ + u16 fixed_enable_delay; /* The pw is backing the VGA functionality */ bool has_vga:1; bool has_fuses:1; @@ -386,6 +391,10 @@ intel_display_power_put_all_in_set(struct drm_i915_private *i915, intel_display_power_put_mask_in_set(i915, power_domain_set, power_domain_set->mask); } +/* + * FIXME: We should probably switch this to a 0-based scheme to be consistent + * with how we now name/number DBUF_CTL instances. + */ enum dbuf_slice { DBUF_S1, DBUF_S2, diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 04613864cbe8..6beeeeba1bed 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -48,6 +48,7 @@ struct drm_printer; struct __intel_global_objs_state; +struct intel_ddi_buf_trans; /* * Display related stuff @@ -195,6 +196,10 @@ struct intel_encoder { void (*update_complete)(struct intel_atomic_state *, struct intel_encoder *, struct intel_crtc *); + void (*pre_disable)(struct intel_atomic_state *, + struct intel_encoder *, + const struct intel_crtc_state *, + const struct drm_connector_state *); void (*disable)(struct intel_atomic_state *, struct intel_encoder *, const struct intel_crtc_state *, @@ -263,6 +268,9 @@ struct intel_encoder { * Returns whether the port clock is enabled or not. */ bool (*is_clock_enabled)(struct intel_encoder *encoder); + const struct intel_ddi_buf_trans *(*get_buf_trans)(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + int *n_entries); enum hpd_pin hpd_pin; enum intel_display_power_domain power_domain; /* for communication with audio component; protected by av_mutex */ @@ -310,7 +318,7 @@ struct intel_panel { /* DPCD backlight */ union { struct { - u8 pwmgen_bit_count; + struct drm_edp_backlight_info info; } vesa; struct { bool sdr_uses_aux; @@ -880,6 +888,18 @@ enum intel_output_format { INTEL_OUTPUT_FORMAT_YCBCR444, }; +struct intel_mpllb_state { + u32 clock; /* in KHz */ + u32 ref_control; + u32 mpllb_cp; + u32 mpllb_div; + u32 mpllb_div2; + u32 mpllb_fracn1; + u32 mpllb_fracn2; + u32 mpllb_sscen; + u32 mpllb_sscstep; +}; + struct intel_crtc_state { /* * uapi (drm) state. This is the software state shown to userspace. @@ -1014,7 +1034,10 @@ struct intel_crtc_state { struct intel_shared_dpll *shared_dpll; /* Actual register state of the dpll, for shared dpll cross-checking. */ - struct intel_dpll_hw_state dpll_hw_state; + union { + struct intel_dpll_hw_state dpll_hw_state; + struct intel_mpllb_state mpllb_state; + }; /* * ICL reserved DPLLs for the CRTC/port. The active PLL is selected by @@ -1040,7 +1063,9 @@ struct intel_crtc_state { bool has_psr; bool has_psr2; bool enable_psr2_sel_fetch; + bool req_psr2_sdp_prior_scanline; u32 dc3co_exitline; + u16 su_y_granularity; /* * Frequence the dpll for the port should run at. Differs from the @@ -1493,12 +1518,14 @@ struct intel_psr { bool colorimetry_support; bool psr2_enabled; bool psr2_sel_fetch_enabled; + bool req_psr2_sdp_prior_scanline; u8 sink_sync_latency; ktime_t last_entry_attempt; ktime_t last_exit; bool sink_not_reliable; bool irq_aux_error; - u16 su_x_granularity; + u16 su_w_granularity; + u16 su_y_granularity; u32 dc3co_exitline; u32 dc3co_exit_delay; struct delayed_work dc3co_work; @@ -1604,6 +1631,7 @@ struct intel_dp { /* Display stream compression testing */ bool force_dsc_en; + int force_dsc_bpp; bool hobl_failed; bool hobl_active; diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c index 97308da28059..3c3c6cb5c0df 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc.c +++ b/drivers/gpu/drm/i915/display/intel_dmc.c @@ -45,6 +45,10 @@ #define GEN12_DMC_MAX_FW_SIZE ICL_DMC_MAX_FW_SIZE +#define ADLP_DMC_PATH DMC_PATH(adlp, 2, 10) +#define ADLP_DMC_VERSION_REQUIRED DMC_VERSION(2, 10) +MODULE_FIRMWARE(ADLP_DMC_PATH); + #define ADLS_DMC_PATH DMC_PATH(adls, 2, 01) #define ADLS_DMC_VERSION_REQUIRED DMC_VERSION(2, 1) MODULE_FIRMWARE(ADLS_DMC_PATH); @@ -53,12 +57,12 @@ MODULE_FIRMWARE(ADLS_DMC_PATH); #define DG1_DMC_VERSION_REQUIRED DMC_VERSION(2, 2) MODULE_FIRMWARE(DG1_DMC_PATH); -#define RKL_DMC_PATH DMC_PATH(rkl, 2, 02) -#define RKL_DMC_VERSION_REQUIRED DMC_VERSION(2, 2) +#define RKL_DMC_PATH DMC_PATH(rkl, 2, 03) +#define RKL_DMC_VERSION_REQUIRED DMC_VERSION(2, 3) MODULE_FIRMWARE(RKL_DMC_PATH); -#define TGL_DMC_PATH DMC_PATH(tgl, 2, 08) -#define TGL_DMC_VERSION_REQUIRED DMC_VERSION(2, 8) +#define TGL_DMC_PATH DMC_PATH(tgl, 2, 12) +#define TGL_DMC_VERSION_REQUIRED DMC_VERSION(2, 12) MODULE_FIRMWARE(TGL_DMC_PATH); #define ICL_DMC_PATH DMC_PATH(icl, 1, 09) @@ -66,11 +70,6 @@ MODULE_FIRMWARE(TGL_DMC_PATH); #define ICL_DMC_MAX_FW_SIZE 0x6000 MODULE_FIRMWARE(ICL_DMC_PATH); -#define CNL_DMC_PATH DMC_PATH(cnl, 1, 07) -#define CNL_DMC_VERSION_REQUIRED DMC_VERSION(1, 7) -#define CNL_DMC_MAX_FW_SIZE GLK_DMC_MAX_FW_SIZE -MODULE_FIRMWARE(CNL_DMC_PATH); - #define GLK_DMC_PATH DMC_PATH(glk, 1, 04) #define GLK_DMC_VERSION_REQUIRED DMC_VERSION(1, 4) #define GLK_DMC_MAX_FW_SIZE 0x4000 @@ -96,6 +95,7 @@ MODULE_FIRMWARE(BXT_DMC_PATH); #define PACKAGE_V2_MAX_FW_INFO_ENTRIES 32 #define DMC_V1_MAX_MMIO_COUNT 8 #define DMC_V3_MAX_MMIO_COUNT 20 +#define DMC_V1_MMIO_START_RANGE 0x80000 struct intel_css_header { /* 0x09 for DMC */ @@ -239,53 +239,18 @@ struct stepping_info { bool intel_dmc_has_payload(struct drm_i915_private *i915) { - return i915->dmc.dmc_payload; + return i915->dmc.dmc_info[DMC_FW_MAIN].payload; } -static const struct stepping_info skl_stepping_info[] = { - {'A', '0'}, {'B', '0'}, {'C', '0'}, - {'D', '0'}, {'E', '0'}, {'F', '0'}, - {'G', '0'}, {'H', '0'}, {'I', '0'}, - {'J', '0'}, {'K', '0'} -}; - -static const struct stepping_info bxt_stepping_info[] = { - {'A', '0'}, {'A', '1'}, {'A', '2'}, - {'B', '0'}, {'B', '1'}, {'B', '2'} -}; - -static const struct stepping_info icl_stepping_info[] = { - {'A', '0'}, {'A', '1'}, {'A', '2'}, - {'B', '0'}, {'B', '2'}, - {'C', '0'} -}; - -static const struct stepping_info no_stepping_info = { '*', '*' }; - static const struct stepping_info * -intel_get_stepping_info(struct drm_i915_private *dev_priv) +intel_get_stepping_info(struct drm_i915_private *i915, + struct stepping_info *si) { - const struct stepping_info *si; - unsigned int size; - - if (IS_ICELAKE(dev_priv)) { - size = ARRAY_SIZE(icl_stepping_info); - si = icl_stepping_info; - } else if (IS_SKYLAKE(dev_priv)) { - size = ARRAY_SIZE(skl_stepping_info); - si = skl_stepping_info; - } else if (IS_BROXTON(dev_priv)) { - size = ARRAY_SIZE(bxt_stepping_info); - si = bxt_stepping_info; - } else { - size = 0; - si = NULL; - } - - if (INTEL_REVID(dev_priv) < size) - return si + INTEL_REVID(dev_priv); + const char *step_name = intel_step_name(RUNTIME_INFO(i915)->step.display_step); - return &no_stepping_info; + si->stepping = step_name[0]; + si->substepping = step_name[1]; + return si; } static void gen9_set_dc_state_debugmask(struct drm_i915_private *dev_priv) @@ -316,8 +281,8 @@ static void gen9_set_dc_state_debugmask(struct drm_i915_private *dev_priv) */ void intel_dmc_load_program(struct drm_i915_private *dev_priv) { - u32 *payload = dev_priv->dmc.dmc_payload; - u32 i, fw_size; + struct intel_dmc *dmc = &dev_priv->dmc; + u32 id, i; if (!HAS_DMC(dev_priv)) { drm_err(&dev_priv->drm, @@ -325,26 +290,31 @@ void intel_dmc_load_program(struct drm_i915_private *dev_priv) return; } - if (!intel_dmc_has_payload(dev_priv)) { + if (!dev_priv->dmc.dmc_info[DMC_FW_MAIN].payload) { drm_err(&dev_priv->drm, "Tried to program CSR with empty payload\n"); return; } - fw_size = dev_priv->dmc.dmc_fw_size; assert_rpm_wakelock_held(&dev_priv->runtime_pm); preempt_disable(); - for (i = 0; i < fw_size; i++) - intel_uncore_write_fw(&dev_priv->uncore, DMC_PROGRAM(i), - payload[i]); + for (id = 0; id < DMC_FW_MAX; id++) { + for (i = 0; i < dmc->dmc_info[id].dmc_fw_size; i++) { + intel_uncore_write_fw(&dev_priv->uncore, + DMC_PROGRAM(dmc->dmc_info[id].start_mmioaddr, i), + dmc->dmc_info[id].payload[i]); + } + } preempt_enable(); - for (i = 0; i < dev_priv->dmc.mmio_count; i++) { - intel_de_write(dev_priv, dev_priv->dmc.mmioaddr[i], - dev_priv->dmc.mmiodata[i]); + for (id = 0; id < DMC_FW_MAX; id++) { + for (i = 0; i < dmc->dmc_info[id].mmio_count; i++) { + intel_de_write(dev_priv, dmc->dmc_info[id].mmioaddr[i], + dmc->dmc_info[id].mmiodata[i]); + } } dev_priv->dmc.dc_state = 0; @@ -352,62 +322,72 @@ void intel_dmc_load_program(struct drm_i915_private *dev_priv) gen9_set_dc_state_debugmask(dev_priv); } +static bool fw_info_matches_stepping(const struct intel_fw_info *fw_info, + const struct stepping_info *si) +{ + if ((fw_info->substepping == '*' && si->stepping == fw_info->stepping) || + (si->stepping == fw_info->stepping && si->substepping == fw_info->substepping) || + /* + * If we don't find a more specific one from above two checks, we + * then check for the generic one to be sure to work even with + * "broken firmware" + */ + (si->stepping == '*' && si->substepping == fw_info->substepping) || + (fw_info->stepping == '*' && fw_info->substepping == '*')) + return true; + + return false; +} + /* * Search fw_info table for dmc_offset to find firmware binary: num_entries is * already sanitized. */ -static u32 find_dmc_fw_offset(const struct intel_fw_info *fw_info, +static void dmc_set_fw_offset(struct intel_dmc *dmc, + const struct intel_fw_info *fw_info, unsigned int num_entries, const struct stepping_info *si, u8 package_ver) { - u32 dmc_offset = DMC_DEFAULT_FW_OFFSET; - unsigned int i; + unsigned int i, id; + + struct drm_i915_private *i915 = container_of(dmc, typeof(*i915), dmc); for (i = 0; i < num_entries; i++) { - if (package_ver > 1 && fw_info[i].dmc_id != 0) - continue; + id = package_ver <= 1 ? DMC_FW_MAIN : fw_info[i].dmc_id; - if (fw_info[i].substepping == '*' && - si->stepping == fw_info[i].stepping) { - dmc_offset = fw_info[i].offset; - break; + if (id >= DMC_FW_MAX) { + drm_dbg(&i915->drm, "Unsupported firmware id: %u\n", id); + continue; } - if (si->stepping == fw_info[i].stepping && - si->substepping == fw_info[i].substepping) { - dmc_offset = fw_info[i].offset; - break; - } + /* More specific versions come first, so we don't even have to + * check for the stepping since we already found a previous FW + * for this id. + */ + if (dmc->dmc_info[id].present) + continue; - if (fw_info[i].stepping == '*' && - fw_info[i].substepping == '*') { - /* - * In theory we should stop the search as generic - * entries should always come after the more specific - * ones, but let's continue to make sure to work even - * with "broken" firmwares. If we don't find a more - * specific one, then we use this entry - */ - dmc_offset = fw_info[i].offset; + if (fw_info_matches_stepping(&fw_info[i], si)) { + dmc->dmc_info[id].present = true; + dmc->dmc_info[id].dmc_offset = fw_info[i].offset; } } - - return dmc_offset; } static u32 parse_dmc_fw_header(struct intel_dmc *dmc, const struct intel_dmc_header_base *dmc_header, - size_t rem_size) + size_t rem_size, u8 dmc_id) { struct drm_i915_private *i915 = container_of(dmc, typeof(*i915), dmc); + struct dmc_fw_info *dmc_info = &dmc->dmc_info[dmc_id]; unsigned int header_len_bytes, dmc_header_size, payload_size, i; const u32 *mmioaddr, *mmiodata; - u32 mmio_count, mmio_count_max; + u32 mmio_count, mmio_count_max, start_mmioaddr; u8 *payload; - BUILD_BUG_ON(ARRAY_SIZE(dmc->mmioaddr) < DMC_V3_MAX_MMIO_COUNT || - ARRAY_SIZE(dmc->mmioaddr) < DMC_V1_MAX_MMIO_COUNT); + BUILD_BUG_ON(ARRAY_SIZE(dmc_info->mmioaddr) < DMC_V3_MAX_MMIO_COUNT || + ARRAY_SIZE(dmc_info->mmioaddr) < DMC_V1_MAX_MMIO_COUNT); /* * Check if we can access common fields, we will checkc again below @@ -430,6 +410,7 @@ static u32 parse_dmc_fw_header(struct intel_dmc *dmc, mmio_count_max = DMC_V3_MAX_MMIO_COUNT; /* header_len is in dwords */ header_len_bytes = dmc_header->header_len * 4; + start_mmioaddr = v3->start_mmioaddr; dmc_header_size = sizeof(*v3); } else if (dmc_header->header_ver == 1) { const struct intel_dmc_header_v1 *v1 = @@ -443,6 +424,7 @@ static u32 parse_dmc_fw_header(struct intel_dmc *dmc, mmio_count = v1->mmio_count; mmio_count_max = DMC_V1_MAX_MMIO_COUNT; header_len_bytes = dmc_header->header_len; + start_mmioaddr = DMC_V1_MMIO_START_RANGE; dmc_header_size = sizeof(*v1); } else { drm_err(&i915->drm, "Unknown DMC fw header version: %u\n", @@ -463,16 +445,11 @@ static u32 parse_dmc_fw_header(struct intel_dmc *dmc, } for (i = 0; i < mmio_count; i++) { - if (mmioaddr[i] < DMC_MMIO_START_RANGE || - mmioaddr[i] > DMC_MMIO_END_RANGE) { - drm_err(&i915->drm, "DMC firmware has wrong mmio address 0x%x\n", - mmioaddr[i]); - return 0; - } - dmc->mmioaddr[i] = _MMIO(mmioaddr[i]); - dmc->mmiodata[i] = mmiodata[i]; + dmc_info->mmioaddr[i] = _MMIO(mmioaddr[i]); + dmc_info->mmiodata[i] = mmiodata[i]; } - dmc->mmio_count = mmio_count; + dmc_info->mmio_count = mmio_count; + dmc_info->start_mmioaddr = start_mmioaddr; rem_size -= header_len_bytes; @@ -485,14 +462,14 @@ static u32 parse_dmc_fw_header(struct intel_dmc *dmc, drm_err(&i915->drm, "DMC FW too big (%u bytes)\n", payload_size); return 0; } - dmc->dmc_fw_size = dmc_header->fw_size; + dmc_info->dmc_fw_size = dmc_header->fw_size; - dmc->dmc_payload = kmalloc(payload_size, GFP_KERNEL); - if (!dmc->dmc_payload) + dmc_info->payload = kmalloc(payload_size, GFP_KERNEL); + if (!dmc_info->payload) return 0; payload = (u8 *)(dmc_header) + header_len_bytes; - memcpy(dmc->dmc_payload, payload, payload_size); + memcpy(dmc_info->payload, payload, payload_size); return header_len_bytes + payload_size; @@ -509,7 +486,7 @@ parse_dmc_fw_package(struct intel_dmc *dmc, { struct drm_i915_private *i915 = container_of(dmc, typeof(*i915), dmc); u32 package_size = sizeof(struct intel_package_header); - u32 num_entries, max_entries, dmc_offset; + u32 num_entries, max_entries; const struct intel_fw_info *fw_info; if (rem_size < package_size) @@ -545,16 +522,11 @@ parse_dmc_fw_package(struct intel_dmc *dmc, fw_info = (const struct intel_fw_info *) ((u8 *)package_header + sizeof(*package_header)); - dmc_offset = find_dmc_fw_offset(fw_info, num_entries, si, - package_header->header_ver); - if (dmc_offset == DMC_DEFAULT_FW_OFFSET) { - drm_err(&i915->drm, "DMC firmware not supported for %c stepping\n", - si->stepping); - return 0; - } + dmc_set_fw_offset(dmc, fw_info, num_entries, si, + package_header->header_ver); /* dmc_offset is in dwords */ - return package_size + dmc_offset * 4; + return package_size; error_truncated: drm_err(&i915->drm, "Truncated DMC firmware, refusing.\n"); @@ -604,9 +576,11 @@ static void parse_dmc_fw(struct drm_i915_private *dev_priv, struct intel_package_header *package_header; struct intel_dmc_header_base *dmc_header; struct intel_dmc *dmc = &dev_priv->dmc; - const struct stepping_info *si = intel_get_stepping_info(dev_priv); + struct stepping_info display_info = { '*', '*'}; + const struct stepping_info *si = intel_get_stepping_info(dev_priv, &display_info); u32 readcount = 0; - u32 r; + u32 r, offset; + int id; if (!fw) return; @@ -627,9 +601,19 @@ static void parse_dmc_fw(struct drm_i915_private *dev_priv, readcount += r; - /* Extract dmc_header information */ - dmc_header = (struct intel_dmc_header_base *)&fw->data[readcount]; - parse_dmc_fw_header(dmc, dmc_header, fw->size - readcount); + for (id = 0; id < DMC_FW_MAX; id++) { + if (!dev_priv->dmc.dmc_info[id].present) + continue; + + offset = readcount + dmc->dmc_info[id].dmc_offset * 4; + if (fw->size - offset < 0) { + drm_err(&dev_priv->drm, "Reading beyond the fw_size\n"); + continue; + } + + dmc_header = (struct intel_dmc_header_base *)&fw->data[offset]; + parse_dmc_fw_header(dmc, dmc_header, fw->size - offset, id); + } } static void intel_dmc_runtime_pm_get(struct drm_i915_private *dev_priv) @@ -705,7 +689,11 @@ void intel_dmc_ucode_init(struct drm_i915_private *dev_priv) */ intel_dmc_runtime_pm_get(dev_priv); - if (IS_ALDERLAKE_S(dev_priv)) { + if (IS_ALDERLAKE_P(dev_priv)) { + dmc->fw_path = ADLP_DMC_PATH; + dmc->required_version = ADLP_DMC_VERSION_REQUIRED; + dmc->max_fw_size = GEN12_DMC_MAX_FW_SIZE; + } else if (IS_ALDERLAKE_S(dev_priv)) { dmc->fw_path = ADLS_DMC_PATH; dmc->required_version = ADLS_DMC_VERSION_REQUIRED; dmc->max_fw_size = GEN12_DMC_MAX_FW_SIZE; @@ -725,10 +713,6 @@ void intel_dmc_ucode_init(struct drm_i915_private *dev_priv) dmc->fw_path = ICL_DMC_PATH; dmc->required_version = ICL_DMC_VERSION_REQUIRED; dmc->max_fw_size = ICL_DMC_MAX_FW_SIZE; - } else if (IS_CANNONLAKE(dev_priv)) { - dmc->fw_path = CNL_DMC_PATH; - dmc->required_version = CNL_DMC_VERSION_REQUIRED; - dmc->max_fw_size = CNL_DMC_MAX_FW_SIZE; } else if (IS_GEMINILAKE(dev_priv)) { dmc->fw_path = GLK_DMC_PATH; dmc->required_version = GLK_DMC_VERSION_REQUIRED; @@ -827,5 +811,5 @@ void intel_dmc_ucode_fini(struct drm_i915_private *dev_priv) intel_dmc_ucode_suspend(dev_priv); drm_WARN_ON(&dev_priv->drm, dev_priv->dmc.wakeref); - kfree(dev_priv->dmc.dmc_payload); + kfree(dev_priv->dmc.dmc_info[DMC_FW_MAIN].payload); } diff --git a/drivers/gpu/drm/i915/display/intel_dmc.h b/drivers/gpu/drm/i915/display/intel_dmc.h index 4c22f567b61b..c3c00ff03869 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc.h +++ b/drivers/gpu/drm/i915/display/intel_dmc.h @@ -16,17 +16,30 @@ struct drm_i915_private; #define DMC_VERSION_MAJOR(version) ((version) >> 16) #define DMC_VERSION_MINOR(version) ((version) & 0xffff) +enum { + DMC_FW_MAIN = 0, + DMC_FW_PIPEA, + DMC_FW_PIPEB, + DMC_FW_MAX +}; + struct intel_dmc { struct work_struct work; const char *fw_path; u32 required_version; u32 max_fw_size; /* bytes */ - u32 *dmc_payload; - u32 dmc_fw_size; /* dwords */ u32 version; - u32 mmio_count; - i915_reg_t mmioaddr[20]; - u32 mmiodata[20]; + struct dmc_fw_info { + u32 mmio_count; + i915_reg_t mmioaddr[20]; + u32 mmiodata[20]; + u32 dmc_offset; + u32 start_mmioaddr; + u32 dmc_fw_size; /*dwords */ + u32 *payload; + bool present; + } dmc_info[DMC_FW_MAX]; + u32 dc_state; u32 target_dc_state; u32 allowed_dc_mask; diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 862c1df69cc2..04175f359fd6 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -222,29 +222,6 @@ bool intel_dp_can_bigjoiner(struct intel_dp *intel_dp) encoder->port != PORT_A); } -static int cnl_max_source_rate(struct intel_dp *intel_dp) -{ - struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); - enum port port = dig_port->base.port; - - u32 voltage = intel_de_read(dev_priv, CNL_PORT_COMP_DW3) & VOLTAGE_INFO_MASK; - - /* Low voltage SKUs are limited to max of 5.4G */ - if (voltage == VOLTAGE_INFO_0_85V) - return 540000; - - /* For this SKU 8.1G is supported in all ports */ - if (IS_CNL_WITH_PORT_F(dev_priv)) - return 810000; - - /* For other SKUs, max rate on ports A and D is 5.4G */ - if (port == PORT_A || port == PORT_D) - return 540000; - - return 810000; -} - static int icl_max_source_rate(struct intel_dp *intel_dp) { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); @@ -270,7 +247,7 @@ static void intel_dp_set_source_rates(struct intel_dp *intel_dp) { /* The values must be in increasing order */ - static const int cnl_rates[] = { + static const int icl_rates[] = { 162000, 216000, 270000, 324000, 432000, 540000, 648000, 810000 }; static const int bxt_rates[] = { @@ -295,12 +272,10 @@ intel_dp_set_source_rates(struct intel_dp *intel_dp) drm_WARN_ON(&dev_priv->drm, intel_dp->source_rates || intel_dp->num_source_rates); - if (DISPLAY_VER(dev_priv) >= 11 || IS_CANNONLAKE(dev_priv)) { - source_rates = cnl_rates; - size = ARRAY_SIZE(cnl_rates); - if (DISPLAY_VER(dev_priv) == 10) - max_rate = cnl_max_source_rate(intel_dp); - else if (IS_JSL_EHL(dev_priv)) + if (DISPLAY_VER(dev_priv) >= 11) { + source_rates = icl_rates; + size = ARRAY_SIZE(icl_rates); + if (IS_JSL_EHL(dev_priv)) max_rate = ehl_max_source_rate(intel_dp); else max_rate = icl_max_source_rate(intel_dp); @@ -1274,6 +1249,23 @@ static int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, pipe_config->pipe_bpp); pipe_config->dsc.slice_count = dsc_dp_slice_count; } + + /* As of today we support DSC for only RGB */ + if (intel_dp->force_dsc_bpp) { + if (intel_dp->force_dsc_bpp >= 8 && + intel_dp->force_dsc_bpp < pipe_bpp) { + drm_dbg_kms(&dev_priv->drm, + "DSC BPP forced to %d", + intel_dp->force_dsc_bpp); + pipe_config->dsc.compressed_bpp = + intel_dp->force_dsc_bpp; + } else { + drm_dbg_kms(&dev_priv->drm, + "Invalid DSC BPP %d", + intel_dp->force_dsc_bpp); + } + } + /* * VDSC engine operates at 1 Pixel per clock, so if peak pixel rate * is greater than the maximum Cdclock and if slice count is even @@ -3031,9 +3023,6 @@ void intel_read_dp_sdp(struct intel_encoder *encoder, struct intel_crtc_state *crtc_state, unsigned int type) { - if (encoder->type != INTEL_OUTPUT_DDI) - return; - switch (type) { case DP_SDP_VSC: intel_read_dp_vsc_sdp(encoder, crtc_state, @@ -3342,6 +3331,9 @@ static void intel_dp_process_phy_request(struct intel_dp *intel_dp, intel_dp_autotest_phy_ddi_enable(intel_dp, crtc_state); + drm_dp_dpcd_write(&intel_dp->aux, DP_TRAINING_LANE0_SET, + intel_dp->train_set, crtc_state->lane_count); + drm_dp_set_phy_test_pattern(&intel_dp->aux, data, link_status[DP_DPCD_REV]); } @@ -4736,7 +4728,7 @@ static void intel_dp_set_drrs_state(struct drm_i915_private *dev_priv, int refresh_rate) { struct intel_dp *intel_dp = dev_priv->drrs.dp; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); enum drrs_refresh_rate_type index = DRRS_HIGH_RR; if (refresh_rate <= 0) { @@ -4750,7 +4742,7 @@ static void intel_dp_set_drrs_state(struct drm_i915_private *dev_priv, return; } - if (!intel_crtc) { + if (!crtc) { drm_dbg_kms(&dev_priv->drm, "DRRS: intel_crtc not initialized\n"); return; @@ -5233,7 +5225,8 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, } intel_panel_init(&intel_connector->panel, fixed_mode, downclock_mode); - intel_connector->panel.backlight.power = intel_pps_backlight_power; + if (!(dev_priv->quirks & QUIRK_NO_PPS_BACKLIGHT_POWER_HOOK)) + intel_connector->panel.backlight.power = intel_pps_backlight_power; intel_panel_setup_backlight(connector, pipe); if (fixed_mode) { diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux.c b/drivers/gpu/drm/i915/display/intel_dp_aux.c index 7c048d2ecf43..f483f479dd0b 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux.c +++ b/drivers/gpu/drm/i915/display/intel_dp_aux.c @@ -158,7 +158,6 @@ static u32 skl_get_aux_send_ctl(struct intel_dp *intel_dp, /* * Max timeout values: * SKL-GLK: 1.6ms - * CNL: 3.2ms * ICL+: 4ms */ ret = DP_AUX_CH_CTL_SEND_BUSY | diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c index 8e9ac9ba1d38..6ac568617ef3 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c @@ -107,7 +107,7 @@ intel_dp_aux_supports_hdr_backlight(struct intel_connector *connector) u8 tcon_cap[4]; ret = drm_dp_dpcd_read(aux, INTEL_EDP_HDR_TCON_CAP0, tcon_cap, sizeof(tcon_cap)); - if (ret < 0) + if (ret != sizeof(tcon_cap)) return false; if (!(tcon_cap[1] & INTEL_EDP_HDR_TCON_BRIGHTNESS_NITS_CAP)) @@ -137,7 +137,7 @@ intel_dp_aux_hdr_get_backlight(struct intel_connector *connector, enum pipe pipe u8 tmp; u8 buf[2] = { 0 }; - if (drm_dp_dpcd_readb(&intel_dp->aux, INTEL_EDP_HDR_GETSET_CTRL_PARAMS, &tmp) < 0) { + if (drm_dp_dpcd_readb(&intel_dp->aux, INTEL_EDP_HDR_GETSET_CTRL_PARAMS, &tmp) != 1) { drm_err(&i915->drm, "Failed to read current backlight mode from DPCD\n"); return 0; } @@ -153,7 +153,8 @@ intel_dp_aux_hdr_get_backlight(struct intel_connector *connector, enum pipe pipe return panel->backlight.max; } - if (drm_dp_dpcd_read(&intel_dp->aux, INTEL_EDP_BRIGHTNESS_NITS_LSB, buf, sizeof(buf)) < 0) { + if (drm_dp_dpcd_read(&intel_dp->aux, INTEL_EDP_BRIGHTNESS_NITS_LSB, buf, + sizeof(buf)) != sizeof(buf)) { drm_err(&i915->drm, "Failed to read brightness from DPCD\n"); return 0; } @@ -172,7 +173,8 @@ intel_dp_aux_hdr_set_aux_backlight(const struct drm_connector_state *conn_state, buf[0] = level & 0xFF; buf[1] = (level & 0xFF00) >> 8; - if (drm_dp_dpcd_write(&intel_dp->aux, INTEL_EDP_BRIGHTNESS_NITS_LSB, buf, 4) < 0) + if (drm_dp_dpcd_write(&intel_dp->aux, INTEL_EDP_BRIGHTNESS_NITS_LSB, buf, + sizeof(buf)) != sizeof(buf)) drm_err(dev, "Failed to write brightness level to DPCD\n"); } @@ -203,7 +205,7 @@ intel_dp_aux_hdr_enable_backlight(const struct intel_crtc_state *crtc_state, u8 old_ctrl, ctrl; ret = drm_dp_dpcd_readb(&intel_dp->aux, INTEL_EDP_HDR_GETSET_CTRL_PARAMS, &old_ctrl); - if (ret < 0) { + if (ret != 1) { drm_err(&i915->drm, "Failed to read current backlight control mode: %d\n", ret); return; } @@ -221,7 +223,7 @@ intel_dp_aux_hdr_enable_backlight(const struct intel_crtc_state *crtc_state, } if (ctrl != old_ctrl) - if (drm_dp_dpcd_writeb(&intel_dp->aux, INTEL_EDP_HDR_GETSET_CTRL_PARAMS, ctrl) < 0) + if (drm_dp_dpcd_writeb(&intel_dp->aux, INTEL_EDP_HDR_GETSET_CTRL_PARAMS, ctrl) != 1) drm_err(&i915->drm, "Failed to configure DPCD brightness controls\n"); } @@ -268,153 +270,19 @@ intel_dp_aux_hdr_setup_backlight(struct intel_connector *connector, enum pipe pi } /* VESA backlight callbacks */ -static void set_vesa_backlight_enable(struct intel_dp *intel_dp, bool enable) -{ - struct drm_i915_private *i915 = dp_to_i915(intel_dp); - u8 reg_val = 0; - - /* Early return when display use other mechanism to enable backlight. */ - if (!(intel_dp->edp_dpcd[1] & DP_EDP_BACKLIGHT_AUX_ENABLE_CAP)) - return; - - if (drm_dp_dpcd_readb(&intel_dp->aux, DP_EDP_DISPLAY_CONTROL_REGISTER, - ®_val) < 0) { - drm_dbg_kms(&i915->drm, "Failed to read DPCD register 0x%x\n", - DP_EDP_DISPLAY_CONTROL_REGISTER); - return; - } - if (enable) - reg_val |= DP_EDP_BACKLIGHT_ENABLE; - else - reg_val &= ~(DP_EDP_BACKLIGHT_ENABLE); - - if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_EDP_DISPLAY_CONTROL_REGISTER, - reg_val) != 1) { - drm_dbg_kms(&i915->drm, "Failed to %s aux backlight\n", - enabledisable(enable)); - } -} - -static bool intel_dp_aux_vesa_backlight_dpcd_mode(struct intel_connector *connector) -{ - struct intel_dp *intel_dp = intel_attached_dp(connector); - struct drm_i915_private *i915 = dp_to_i915(intel_dp); - u8 mode_reg; - - if (drm_dp_dpcd_readb(&intel_dp->aux, - DP_EDP_BACKLIGHT_MODE_SET_REGISTER, - &mode_reg) != 1) { - drm_dbg_kms(&i915->drm, - "Failed to read the DPCD register 0x%x\n", - DP_EDP_BACKLIGHT_MODE_SET_REGISTER); - return false; - } - - return (mode_reg & DP_EDP_BACKLIGHT_CONTROL_MODE_MASK) == - DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD; -} - -/* - * Read the current backlight value from DPCD register(s) based - * on if 8-bit(MSB) or 16-bit(MSB and LSB) values are supported - */ static u32 intel_dp_aux_vesa_get_backlight(struct intel_connector *connector, enum pipe unused) { - struct intel_dp *intel_dp = intel_attached_dp(connector); - struct drm_i915_private *i915 = dp_to_i915(intel_dp); - u8 read_val[2] = { 0x0 }; - u16 level = 0; - - /* - * If we're not in DPCD control mode yet, the programmed brightness - * value is meaningless and we should assume max brightness - */ - if (!intel_dp_aux_vesa_backlight_dpcd_mode(connector)) - return connector->panel.backlight.max; - - if (drm_dp_dpcd_read(&intel_dp->aux, DP_EDP_BACKLIGHT_BRIGHTNESS_MSB, - &read_val, sizeof(read_val)) < 0) { - drm_dbg_kms(&i915->drm, "Failed to read DPCD register 0x%x\n", - DP_EDP_BACKLIGHT_BRIGHTNESS_MSB); - return 0; - } - level = read_val[0]; - if (intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_BYTE_COUNT) - level = (read_val[0] << 8 | read_val[1]); - - return level; + return connector->panel.backlight.level; } -/* - * Sends the current backlight level over the aux channel, checking if its using - * 8-bit or 16 bit value (MSB and LSB) - */ static void -intel_dp_aux_vesa_set_backlight(const struct drm_connector_state *conn_state, - u32 level) +intel_dp_aux_vesa_set_backlight(const struct drm_connector_state *conn_state, u32 level) { struct intel_connector *connector = to_intel_connector(conn_state->connector); - struct intel_dp *intel_dp = intel_attached_dp(connector); - struct drm_i915_private *i915 = dp_to_i915(intel_dp); - u8 vals[2] = { 0x0 }; - - vals[0] = level; - - /* Write the MSB and/or LSB */ - if (intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_BYTE_COUNT) { - vals[0] = (level & 0xFF00) >> 8; - vals[1] = (level & 0xFF); - } - if (drm_dp_dpcd_write(&intel_dp->aux, DP_EDP_BACKLIGHT_BRIGHTNESS_MSB, - vals, sizeof(vals)) < 0) { - drm_dbg_kms(&i915->drm, - "Failed to write aux backlight level\n"); - return; - } -} - -/* - * Set PWM Frequency divider to match desired frequency in vbt. - * The PWM Frequency is calculated as 27Mhz / (F x P). - * - Where F = PWM Frequency Pre-Divider value programmed by field 7:0 of the - * EDP_BACKLIGHT_FREQ_SET register (DPCD Address 00728h) - * - Where P = 2^Pn, where Pn is the value programmed by field 4:0 of the - * EDP_PWMGEN_BIT_COUNT register (DPCD Address 00724h) - */ -static bool intel_dp_aux_vesa_set_pwm_freq(struct intel_connector *connector) -{ - struct drm_i915_private *dev_priv = to_i915(connector->base.dev); - struct intel_dp *intel_dp = intel_attached_dp(connector); - const u8 pn = connector->panel.backlight.edp.vesa.pwmgen_bit_count; - int freq, fxp, f, fxp_actual, fxp_min, fxp_max; - - freq = dev_priv->vbt.backlight.pwm_freq_hz; - if (!freq) { - drm_dbg_kms(&dev_priv->drm, - "Use panel default backlight frequency\n"); - return false; - } - - fxp = DIV_ROUND_CLOSEST(KHz(DP_EDP_BACKLIGHT_FREQ_BASE_KHZ), freq); - f = clamp(DIV_ROUND_CLOSEST(fxp, 1 << pn), 1, 255); - fxp_actual = f << pn; - - /* Ensure frequency is within 25% of desired value */ - fxp_min = DIV_ROUND_CLOSEST(fxp * 3, 4); - fxp_max = DIV_ROUND_CLOSEST(fxp * 5, 4); - - if (fxp_min > fxp_actual || fxp_actual > fxp_max) { - drm_dbg_kms(&dev_priv->drm, "Actual frequency out of range\n"); - return false; - } + struct intel_panel *panel = &connector->panel; + struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder); - if (drm_dp_dpcd_writeb(&intel_dp->aux, - DP_EDP_BACKLIGHT_FREQ_SET, (u8) f) < 0) { - drm_dbg_kms(&dev_priv->drm, - "Failed to write aux backlight freq\n"); - return false; - } - return true; + drm_edp_backlight_set_level(&intel_dp->aux, &panel->backlight.edp.vesa.info, level); } static void @@ -422,159 +290,46 @@ intel_dp_aux_vesa_enable_backlight(const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state, u32 level) { struct intel_connector *connector = to_intel_connector(conn_state->connector); - struct intel_dp *intel_dp = intel_attached_dp(connector); - struct drm_i915_private *i915 = dp_to_i915(intel_dp); struct intel_panel *panel = &connector->panel; - u8 dpcd_buf, new_dpcd_buf, edp_backlight_mode; - u8 pwmgen_bit_count = panel->backlight.edp.vesa.pwmgen_bit_count; - - if (drm_dp_dpcd_readb(&intel_dp->aux, - DP_EDP_BACKLIGHT_MODE_SET_REGISTER, &dpcd_buf) != 1) { - drm_dbg_kms(&i915->drm, "Failed to read DPCD register 0x%x\n", - DP_EDP_BACKLIGHT_MODE_SET_REGISTER); - return; - } - - new_dpcd_buf = dpcd_buf; - edp_backlight_mode = dpcd_buf & DP_EDP_BACKLIGHT_CONTROL_MODE_MASK; - - switch (edp_backlight_mode) { - case DP_EDP_BACKLIGHT_CONTROL_MODE_PWM: - case DP_EDP_BACKLIGHT_CONTROL_MODE_PRESET: - case DP_EDP_BACKLIGHT_CONTROL_MODE_PRODUCT: - new_dpcd_buf &= ~DP_EDP_BACKLIGHT_CONTROL_MODE_MASK; - new_dpcd_buf |= DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD; - - if (drm_dp_dpcd_writeb(&intel_dp->aux, - DP_EDP_PWMGEN_BIT_COUNT, - pwmgen_bit_count) < 0) - drm_dbg_kms(&i915->drm, - "Failed to write aux pwmgen bit count\n"); - - break; - - /* Do nothing when it is already DPCD mode */ - case DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD: - default: - break; - } - - if (intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_FREQ_AUX_SET_CAP) - if (intel_dp_aux_vesa_set_pwm_freq(connector)) - new_dpcd_buf |= DP_EDP_BACKLIGHT_FREQ_AUX_SET_ENABLE; - - if (new_dpcd_buf != dpcd_buf) { - if (drm_dp_dpcd_writeb(&intel_dp->aux, - DP_EDP_BACKLIGHT_MODE_SET_REGISTER, new_dpcd_buf) < 0) { - drm_dbg_kms(&i915->drm, - "Failed to write aux backlight mode\n"); - } - } + struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder); - intel_dp_aux_vesa_set_backlight(conn_state, level); - set_vesa_backlight_enable(intel_dp, true); + drm_edp_backlight_enable(&intel_dp->aux, &panel->backlight.edp.vesa.info, level); } static void intel_dp_aux_vesa_disable_backlight(const struct drm_connector_state *old_conn_state, u32 level) { - set_vesa_backlight_enable(enc_to_intel_dp(to_intel_encoder(old_conn_state->best_encoder)), - false); -} - -static u32 intel_dp_aux_vesa_calc_max_backlight(struct intel_connector *connector) -{ - struct drm_i915_private *i915 = to_i915(connector->base.dev); - struct intel_dp *intel_dp = intel_attached_dp(connector); + struct intel_connector *connector = to_intel_connector(old_conn_state->connector); struct intel_panel *panel = &connector->panel; - u32 max_backlight = 0; - int freq, fxp, fxp_min, fxp_max, fxp_actual, f = 1; - u8 pn, pn_min, pn_max; - - if (drm_dp_dpcd_readb(&intel_dp->aux, DP_EDP_PWMGEN_BIT_COUNT, &pn) == 1) { - pn &= DP_EDP_PWMGEN_BIT_COUNT_MASK; - max_backlight = (1 << pn) - 1; - } - - /* Find desired value of (F x P) - * Note that, if F x P is out of supported range, the maximum value or - * minimum value will applied automatically. So no need to check that. - */ - freq = i915->vbt.backlight.pwm_freq_hz; - drm_dbg_kms(&i915->drm, "VBT defined backlight frequency %u Hz\n", - freq); - if (!freq) { - drm_dbg_kms(&i915->drm, - "Use panel default backlight frequency\n"); - return max_backlight; - } - - fxp = DIV_ROUND_CLOSEST(KHz(DP_EDP_BACKLIGHT_FREQ_BASE_KHZ), freq); - - /* Use highest possible value of Pn for more granularity of brightness - * adjustment while satifying the conditions below. - * - Pn is in the range of Pn_min and Pn_max - * - F is in the range of 1 and 255 - * - FxP is within 25% of desired value. - * Note: 25% is arbitrary value and may need some tweak. - */ - if (drm_dp_dpcd_readb(&intel_dp->aux, - DP_EDP_PWMGEN_BIT_COUNT_CAP_MIN, &pn_min) != 1) { - drm_dbg_kms(&i915->drm, - "Failed to read pwmgen bit count cap min\n"); - return max_backlight; - } - if (drm_dp_dpcd_readb(&intel_dp->aux, - DP_EDP_PWMGEN_BIT_COUNT_CAP_MAX, &pn_max) != 1) { - drm_dbg_kms(&i915->drm, - "Failed to read pwmgen bit count cap max\n"); - return max_backlight; - } - pn_min &= DP_EDP_PWMGEN_BIT_COUNT_MASK; - pn_max &= DP_EDP_PWMGEN_BIT_COUNT_MASK; - - fxp_min = DIV_ROUND_CLOSEST(fxp * 3, 4); - fxp_max = DIV_ROUND_CLOSEST(fxp * 5, 4); - if (fxp_min < (1 << pn_min) || (255 << pn_max) < fxp_max) { - drm_dbg_kms(&i915->drm, - "VBT defined backlight frequency out of range\n"); - return max_backlight; - } - - for (pn = pn_max; pn >= pn_min; pn--) { - f = clamp(DIV_ROUND_CLOSEST(fxp, 1 << pn), 1, 255); - fxp_actual = f << pn; - if (fxp_min <= fxp_actual && fxp_actual <= fxp_max) - break; - } - - drm_dbg_kms(&i915->drm, "Using eDP pwmgen bit count of %d\n", pn); - if (drm_dp_dpcd_writeb(&intel_dp->aux, - DP_EDP_PWMGEN_BIT_COUNT, pn) < 0) { - drm_dbg_kms(&i915->drm, - "Failed to write aux pwmgen bit count\n"); - return max_backlight; - } - panel->backlight.edp.vesa.pwmgen_bit_count = pn; - - max_backlight = (1 << pn) - 1; + struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder); - return max_backlight; + drm_edp_backlight_disable(&intel_dp->aux, &panel->backlight.edp.vesa.info); } -static int intel_dp_aux_vesa_setup_backlight(struct intel_connector *connector, - enum pipe pipe) +static int intel_dp_aux_vesa_setup_backlight(struct intel_connector *connector, enum pipe pipe) { + struct intel_dp *intel_dp = intel_attached_dp(connector); struct intel_panel *panel = &connector->panel; + struct drm_i915_private *i915 = dp_to_i915(intel_dp); + u16 current_level; + u8 current_mode; + int ret; - panel->backlight.max = intel_dp_aux_vesa_calc_max_backlight(connector); - if (!panel->backlight.max) - return -ENODEV; + ret = drm_edp_backlight_init(&intel_dp->aux, &panel->backlight.edp.vesa.info, + i915->vbt.backlight.pwm_freq_hz, intel_dp->edp_dpcd, + ¤t_level, ¤t_mode); + if (ret < 0) + return ret; + panel->backlight.max = panel->backlight.edp.vesa.info.max; panel->backlight.min = 0; - panel->backlight.level = intel_dp_aux_vesa_get_backlight(connector, pipe); - panel->backlight.enabled = intel_dp_aux_vesa_backlight_dpcd_mode(connector) && - panel->backlight.level != 0; + if (current_mode == DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD) { + panel->backlight.level = current_level; + panel->backlight.enabled = panel->backlight.level != 0; + } else { + panel->backlight.level = panel->backlight.max; + panel->backlight.enabled = false; + } return 0; } @@ -585,16 +340,12 @@ intel_dp_aux_supports_vesa_backlight(struct intel_connector *connector) struct intel_dp *intel_dp = intel_attached_dp(connector); struct drm_i915_private *i915 = dp_to_i915(intel_dp); - /* Check the eDP Display control capabilities registers to determine if - * the panel can support backlight control over the aux channel. - * - * TODO: We currently only support AUX only backlight configurations, not backlights which + /* TODO: We currently only support AUX only backlight configurations, not backlights which * require a mix of PWM and AUX controls to work. In the mean time, these machines typically * work just fine using normal PWM controls anyway. */ - if (intel_dp->edp_dpcd[1] & DP_EDP_TCON_BACKLIGHT_ADJUSTMENT_CAP && - (intel_dp->edp_dpcd[1] & DP_EDP_BACKLIGHT_AUX_ENABLE_CAP) && - (intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_AUX_SET_CAP)) { + if ((intel_dp->edp_dpcd[1] & DP_EDP_BACKLIGHT_AUX_ENABLE_CAP) && + drm_edp_backlight_supported(intel_dp->edp_dpcd)) { drm_dbg_kms(&i915->drm, "AUX Backlight Control Supported!\n"); return true; } diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index b170e272bdee..8d13d7b26a25 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -308,9 +308,9 @@ intel_dp_mst_atomic_check(struct drm_connector *connector, * connector */ if (new_crtc) { - struct intel_crtc *intel_crtc = to_intel_crtc(new_crtc); + struct intel_crtc *crtc = to_intel_crtc(new_crtc); struct intel_crtc_state *crtc_state = - intel_atomic_get_new_crtc_state(state, intel_crtc); + intel_atomic_get_new_crtc_state(state, crtc); if (!crtc_state || !drm_atomic_crtc_needs_modeset(&crtc_state->uapi) || @@ -348,6 +348,16 @@ static void wait_for_act_sent(struct intel_encoder *encoder, drm_dp_check_act_status(&intel_dp->mst_mgr); } +static void intel_mst_pre_disable_dp(struct intel_atomic_state *state, + struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) +{ + if (old_crtc_state->has_audio) + intel_audio_codec_disable(encoder, old_crtc_state, + old_conn_state); +} + static void intel_mst_disable_dp(struct intel_atomic_state *state, struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state, @@ -372,9 +382,6 @@ static void intel_mst_disable_dp(struct intel_atomic_state *state, if (ret) { drm_dbg_kms(&i915->drm, "failed to update payload %d\n", ret); } - if (old_crtc_state->has_audio) - intel_audio_codec_disable(encoder, - old_crtc_state, old_conn_state); } static void intel_mst_post_disable_dp(struct intel_atomic_state *state, @@ -542,7 +549,7 @@ static void intel_mst_enable_dp(struct intel_atomic_state *state, struct intel_digital_port *dig_port = intel_mst->primary; struct intel_dp *intel_dp = &dig_port->dp; struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - u32 val; + enum transcoder trans = pipe_config->cpu_transcoder; drm_WARN_ON(&dev_priv->drm, pipe_config->has_pch_encoder); @@ -550,12 +557,8 @@ static void intel_mst_enable_dp(struct intel_atomic_state *state, intel_ddi_enable_transcoder_func(encoder, pipe_config); - val = intel_de_read(dev_priv, - TRANS_DDI_FUNC_CTL(pipe_config->cpu_transcoder)); - val |= TRANS_DDI_DP_VC_PAYLOAD_ALLOC; - intel_de_write(dev_priv, - TRANS_DDI_FUNC_CTL(pipe_config->cpu_transcoder), - val); + intel_de_rmw(dev_priv, TRANS_DDI_FUNC_CTL(trans), 0, + TRANS_DDI_DP_VC_PAYLOAD_ALLOC); drm_dbg_kms(&dev_priv->drm, "active links %d\n", intel_dp->active_mst_links); @@ -564,6 +567,10 @@ static void intel_mst_enable_dp(struct intel_atomic_state *state, drm_dp_update_payload_part2(&intel_dp->mst_mgr); + if (DISPLAY_VER(dev_priv) >= 12 && pipe_config->fec_enable) + intel_de_rmw(dev_priv, CHICKEN_TRANS(trans), 0, + FECSTALL_DIS_DPTSTREAM_DPTTG); + intel_enable_pipe(pipe_config); intel_crtc_vblank_on(pipe_config); @@ -835,13 +842,10 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo intel_attach_force_audio_property(connector); intel_attach_broadcast_rgb_property(connector); - if (DISPLAY_VER(dev_priv) <= 12) { - ret = intel_dp_hdcp_init(dig_port, intel_connector); - if (ret) - drm_dbg_kms(&dev_priv->drm, "[%s:%d] HDCP MST init failed, skipping.\n", - connector->name, connector->base.id); - } - + ret = intel_dp_hdcp_init(dig_port, intel_connector); + if (ret) + drm_dbg_kms(&dev_priv->drm, "[%s:%d] HDCP MST init failed, skipping.\n", + connector->name, connector->base.id); /* * Reuse the prop from the SST connector because we're * not allowed to create new props after device registration. @@ -906,6 +910,7 @@ intel_dp_create_fake_mst_encoder(struct intel_digital_port *dig_port, enum pipe intel_encoder->compute_config = intel_dp_mst_compute_config; intel_encoder->compute_config_late = intel_dp_mst_compute_config_late; + intel_encoder->pre_disable = intel_mst_pre_disable_dp; intel_encoder->disable = intel_mst_disable_dp; intel_encoder->post_disable = intel_mst_post_disable_dp; intel_encoder->update_pipe = intel_ddi_update_pipe; diff --git a/drivers/gpu/drm/i915/display/intel_dpll.c b/drivers/gpu/drm/i915/display/intel_dpll.c index 89635da9f6f6..14515e62c05e 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll.c +++ b/drivers/gpu/drm/i915/display/intel_dpll.c @@ -11,6 +11,7 @@ #include "intel_lvds.h" #include "intel_panel.h" #include "intel_sideband.h" +#include "display/intel_snps_phy.h" struct intel_limit { struct { @@ -923,12 +924,13 @@ static int hsw_crtc_compute_clock(struct intel_crtc *crtc, struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_atomic_state *state = to_intel_atomic_state(crtc_state->uapi.state); + struct intel_encoder *encoder = + intel_get_crtc_new_encoder(state, crtc_state); - if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DSI) || - DISPLAY_VER(dev_priv) >= 11) { - struct intel_encoder *encoder = - intel_get_crtc_new_encoder(state, crtc_state); - + if (IS_DG2(dev_priv)) { + return intel_mpllb_calc_state(crtc_state, encoder); + } else if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DSI) || + DISPLAY_VER(dev_priv) >= 11) { if (!intel_reserve_shared_dplls(state, crtc, encoder)) { drm_dbg_kms(&dev_priv->drm, "failed to find PLL for pipe %c\n", diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index 71ac57670043..5c91d125a337 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -168,7 +168,7 @@ intel_combo_pll_enable_reg(struct drm_i915_private *i915, else if (IS_JSL_EHL(i915) && (pll->info->id == DPLL_ID_EHL_DPLL4)) return MG_PLL_ENABLE(0); - return CNL_DPLL_ENABLE(pll->info->id); + return ICL_DPLL_ENABLE(pll->info->id); } static i915_reg_t @@ -2346,160 +2346,7 @@ static const struct intel_dpll_mgr bxt_pll_mgr = { .dump_hw_state = bxt_dump_hw_state, }; -static void cnl_ddi_pll_enable(struct drm_i915_private *dev_priv, - struct intel_shared_dpll *pll) -{ - const enum intel_dpll_id id = pll->info->id; - u32 val; - - /* 1. Enable DPLL power in DPLL_ENABLE. */ - val = intel_de_read(dev_priv, CNL_DPLL_ENABLE(id)); - val |= PLL_POWER_ENABLE; - intel_de_write(dev_priv, CNL_DPLL_ENABLE(id), val); - - /* 2. Wait for DPLL power state enabled in DPLL_ENABLE. */ - if (intel_de_wait_for_set(dev_priv, CNL_DPLL_ENABLE(id), - PLL_POWER_STATE, 5)) - drm_err(&dev_priv->drm, "PLL %d Power not enabled\n", id); - - /* - * 3. Configure DPLL_CFGCR0 to set SSC enable/disable, - * select DP mode, and set DP link rate. - */ - val = pll->state.hw_state.cfgcr0; - intel_de_write(dev_priv, CNL_DPLL_CFGCR0(id), val); - - /* 4. Reab back to ensure writes completed */ - intel_de_posting_read(dev_priv, CNL_DPLL_CFGCR0(id)); - - /* 3. Configure DPLL_CFGCR0 */ - /* Avoid touch CFGCR1 if HDMI mode is not enabled */ - if (pll->state.hw_state.cfgcr0 & DPLL_CFGCR0_HDMI_MODE) { - val = pll->state.hw_state.cfgcr1; - intel_de_write(dev_priv, CNL_DPLL_CFGCR1(id), val); - /* 4. Reab back to ensure writes completed */ - intel_de_posting_read(dev_priv, CNL_DPLL_CFGCR1(id)); - } - - /* - * 5. If the frequency will result in a change to the voltage - * requirement, follow the Display Voltage Frequency Switching - * Sequence Before Frequency Change - * - * Note: DVFS is actually handled via the cdclk code paths, - * hence we do nothing here. - */ - - /* 6. Enable DPLL in DPLL_ENABLE. */ - val = intel_de_read(dev_priv, CNL_DPLL_ENABLE(id)); - val |= PLL_ENABLE; - intel_de_write(dev_priv, CNL_DPLL_ENABLE(id), val); - - /* 7. Wait for PLL lock status in DPLL_ENABLE. */ - if (intel_de_wait_for_set(dev_priv, CNL_DPLL_ENABLE(id), PLL_LOCK, 5)) - drm_err(&dev_priv->drm, "PLL %d not locked\n", id); - - /* - * 8. If the frequency will result in a change to the voltage - * requirement, follow the Display Voltage Frequency Switching - * Sequence After Frequency Change - * - * Note: DVFS is actually handled via the cdclk code paths, - * hence we do nothing here. - */ - - /* - * 9. turn on the clock for the DDI and map the DPLL to the DDI - * Done at intel_ddi_clk_select - */ -} - -static void cnl_ddi_pll_disable(struct drm_i915_private *dev_priv, - struct intel_shared_dpll *pll) -{ - const enum intel_dpll_id id = pll->info->id; - u32 val; - - /* - * 1. Configure DPCLKA_CFGCR0 to turn off the clock for the DDI. - * Done at intel_ddi_post_disable - */ - - /* - * 2. If the frequency will result in a change to the voltage - * requirement, follow the Display Voltage Frequency Switching - * Sequence Before Frequency Change - * - * Note: DVFS is actually handled via the cdclk code paths, - * hence we do nothing here. - */ - - /* 3. Disable DPLL through DPLL_ENABLE. */ - val = intel_de_read(dev_priv, CNL_DPLL_ENABLE(id)); - val &= ~PLL_ENABLE; - intel_de_write(dev_priv, CNL_DPLL_ENABLE(id), val); - - /* 4. Wait for PLL not locked status in DPLL_ENABLE. */ - if (intel_de_wait_for_clear(dev_priv, CNL_DPLL_ENABLE(id), PLL_LOCK, 5)) - drm_err(&dev_priv->drm, "PLL %d locked\n", id); - - /* - * 5. If the frequency will result in a change to the voltage - * requirement, follow the Display Voltage Frequency Switching - * Sequence After Frequency Change - * - * Note: DVFS is actually handled via the cdclk code paths, - * hence we do nothing here. - */ - - /* 6. Disable DPLL power in DPLL_ENABLE. */ - val = intel_de_read(dev_priv, CNL_DPLL_ENABLE(id)); - val &= ~PLL_POWER_ENABLE; - intel_de_write(dev_priv, CNL_DPLL_ENABLE(id), val); - - /* 7. Wait for DPLL power state disabled in DPLL_ENABLE. */ - if (intel_de_wait_for_clear(dev_priv, CNL_DPLL_ENABLE(id), - PLL_POWER_STATE, 5)) - drm_err(&dev_priv->drm, "PLL %d Power not disabled\n", id); -} - -static bool cnl_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv, - struct intel_shared_dpll *pll, - struct intel_dpll_hw_state *hw_state) -{ - const enum intel_dpll_id id = pll->info->id; - intel_wakeref_t wakeref; - u32 val; - bool ret; - - wakeref = intel_display_power_get_if_enabled(dev_priv, - POWER_DOMAIN_DISPLAY_CORE); - if (!wakeref) - return false; - - ret = false; - - val = intel_de_read(dev_priv, CNL_DPLL_ENABLE(id)); - if (!(val & PLL_ENABLE)) - goto out; - - val = intel_de_read(dev_priv, CNL_DPLL_CFGCR0(id)); - hw_state->cfgcr0 = val; - - /* avoid reading back stale values if HDMI mode is not enabled */ - if (val & DPLL_CFGCR0_HDMI_MODE) { - hw_state->cfgcr1 = intel_de_read(dev_priv, - CNL_DPLL_CFGCR1(id)); - } - ret = true; - -out: - intel_display_power_put(dev_priv, POWER_DOMAIN_DISPLAY_CORE, wakeref); - - return ret; -} - -static void cnl_wrpll_get_multipliers(int bestdiv, int *pdiv, +static void icl_wrpll_get_multipliers(int bestdiv, int *pdiv, int *qdiv, int *kdiv) { /* even dividers */ @@ -2538,7 +2385,7 @@ static void cnl_wrpll_get_multipliers(int bestdiv, int *pdiv, } } -static void cnl_wrpll_params_populate(struct skl_wrpll_params *params, +static void icl_wrpll_params_populate(struct skl_wrpll_params *params, u32 dco_freq, u32 ref_freq, int pdiv, int qdiv, int kdiv) { @@ -2586,349 +2433,19 @@ static void cnl_wrpll_params_populate(struct skl_wrpll_params *params, params->dco_fraction = dco & 0x7fff; } -static bool -__cnl_ddi_calculate_wrpll(struct intel_crtc_state *crtc_state, - struct skl_wrpll_params *wrpll_params, - int ref_clock) -{ - u32 afe_clock = crtc_state->port_clock * 5; - u32 dco_min = 7998000; - u32 dco_max = 10000000; - u32 dco_mid = (dco_min + dco_max) / 2; - static const int dividers[] = { 2, 4, 6, 8, 10, 12, 14, 16, - 18, 20, 24, 28, 30, 32, 36, 40, - 42, 44, 48, 50, 52, 54, 56, 60, - 64, 66, 68, 70, 72, 76, 78, 80, - 84, 88, 90, 92, 96, 98, 100, 102, - 3, 5, 7, 9, 15, 21 }; - u32 dco, best_dco = 0, dco_centrality = 0; - u32 best_dco_centrality = U32_MAX; /* Spec meaning of 999999 MHz */ - int d, best_div = 0, pdiv = 0, qdiv = 0, kdiv = 0; - - for (d = 0; d < ARRAY_SIZE(dividers); d++) { - dco = afe_clock * dividers[d]; - - if ((dco <= dco_max) && (dco >= dco_min)) { - dco_centrality = abs(dco - dco_mid); - - if (dco_centrality < best_dco_centrality) { - best_dco_centrality = dco_centrality; - best_div = dividers[d]; - best_dco = dco; - } - } - } - - if (best_div == 0) - return false; - - cnl_wrpll_get_multipliers(best_div, &pdiv, &qdiv, &kdiv); - cnl_wrpll_params_populate(wrpll_params, best_dco, ref_clock, - pdiv, qdiv, kdiv); - - return true; -} - -static bool -cnl_ddi_calculate_wrpll(struct intel_crtc_state *crtc_state, - struct skl_wrpll_params *wrpll_params) -{ - struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); - - return __cnl_ddi_calculate_wrpll(crtc_state, wrpll_params, - i915->dpll.ref_clks.nssc); -} - -static bool cnl_ddi_hdmi_pll_dividers(struct intel_crtc_state *crtc_state) -{ - u32 cfgcr0, cfgcr1; - struct skl_wrpll_params wrpll_params = { 0, }; - - cfgcr0 = DPLL_CFGCR0_HDMI_MODE; - - if (!cnl_ddi_calculate_wrpll(crtc_state, &wrpll_params)) - return false; - - cfgcr0 |= DPLL_CFGCR0_DCO_FRACTION(wrpll_params.dco_fraction) | - wrpll_params.dco_integer; - - cfgcr1 = DPLL_CFGCR1_QDIV_RATIO(wrpll_params.qdiv_ratio) | - DPLL_CFGCR1_QDIV_MODE(wrpll_params.qdiv_mode) | - DPLL_CFGCR1_KDIV(wrpll_params.kdiv) | - DPLL_CFGCR1_PDIV(wrpll_params.pdiv) | - DPLL_CFGCR1_CENTRAL_FREQ; - - memset(&crtc_state->dpll_hw_state, 0, - sizeof(crtc_state->dpll_hw_state)); - - crtc_state->dpll_hw_state.cfgcr0 = cfgcr0; - crtc_state->dpll_hw_state.cfgcr1 = cfgcr1; - return true; -} - /* - * Display WA #22010492432: ehl, tgl + * Display WA #22010492432: ehl, tgl, adl-p * Program half of the nominal DCO divider fraction value. */ static bool ehl_combo_pll_div_frac_wa_needed(struct drm_i915_private *i915) { return ((IS_PLATFORM(i915, INTEL_ELKHARTLAKE) && - IS_JSL_EHL_REVID(i915, EHL_REVID_B0, REVID_FOREVER)) || - IS_TIGERLAKE(i915)) && + IS_JSL_EHL_DISPLAY_STEP(i915, STEP_B0, STEP_FOREVER)) || + IS_TIGERLAKE(i915) || IS_ALDERLAKE_P(i915)) && i915->dpll.ref_clks.nssc == 38400; } -static int __cnl_ddi_wrpll_get_freq(struct drm_i915_private *dev_priv, - const struct intel_shared_dpll *pll, - const struct intel_dpll_hw_state *pll_state, - int ref_clock) -{ - u32 dco_fraction; - u32 p0, p1, p2, dco_freq; - - p0 = pll_state->cfgcr1 & DPLL_CFGCR1_PDIV_MASK; - p2 = pll_state->cfgcr1 & DPLL_CFGCR1_KDIV_MASK; - - if (pll_state->cfgcr1 & DPLL_CFGCR1_QDIV_MODE(1)) - p1 = (pll_state->cfgcr1 & DPLL_CFGCR1_QDIV_RATIO_MASK) >> - DPLL_CFGCR1_QDIV_RATIO_SHIFT; - else - p1 = 1; - - - switch (p0) { - case DPLL_CFGCR1_PDIV_2: - p0 = 2; - break; - case DPLL_CFGCR1_PDIV_3: - p0 = 3; - break; - case DPLL_CFGCR1_PDIV_5: - p0 = 5; - break; - case DPLL_CFGCR1_PDIV_7: - p0 = 7; - break; - } - - switch (p2) { - case DPLL_CFGCR1_KDIV_1: - p2 = 1; - break; - case DPLL_CFGCR1_KDIV_2: - p2 = 2; - break; - case DPLL_CFGCR1_KDIV_3: - p2 = 3; - break; - } - - dco_freq = (pll_state->cfgcr0 & DPLL_CFGCR0_DCO_INTEGER_MASK) * - ref_clock; - - dco_fraction = (pll_state->cfgcr0 & DPLL_CFGCR0_DCO_FRACTION_MASK) >> - DPLL_CFGCR0_DCO_FRACTION_SHIFT; - - if (ehl_combo_pll_div_frac_wa_needed(dev_priv)) - dco_fraction *= 2; - - dco_freq += (dco_fraction * ref_clock) / 0x8000; - - if (drm_WARN_ON(&dev_priv->drm, p0 == 0 || p1 == 0 || p2 == 0)) - return 0; - - return dco_freq / (p0 * p1 * p2 * 5); -} - -static int cnl_ddi_wrpll_get_freq(struct drm_i915_private *i915, - const struct intel_shared_dpll *pll, - const struct intel_dpll_hw_state *pll_state) -{ - return __cnl_ddi_wrpll_get_freq(i915, pll, pll_state, - i915->dpll.ref_clks.nssc); -} - -static bool -cnl_ddi_dp_set_dpll_hw_state(struct intel_crtc_state *crtc_state) -{ - u32 cfgcr0; - - cfgcr0 = DPLL_CFGCR0_SSC_ENABLE; - - switch (crtc_state->port_clock / 2) { - case 81000: - cfgcr0 |= DPLL_CFGCR0_LINK_RATE_810; - break; - case 135000: - cfgcr0 |= DPLL_CFGCR0_LINK_RATE_1350; - break; - case 270000: - cfgcr0 |= DPLL_CFGCR0_LINK_RATE_2700; - break; - /* eDP 1.4 rates */ - case 162000: - cfgcr0 |= DPLL_CFGCR0_LINK_RATE_1620; - break; - case 108000: - cfgcr0 |= DPLL_CFGCR0_LINK_RATE_1080; - break; - case 216000: - cfgcr0 |= DPLL_CFGCR0_LINK_RATE_2160; - break; - case 324000: - /* Some SKUs may require elevated I/O voltage to support this */ - cfgcr0 |= DPLL_CFGCR0_LINK_RATE_3240; - break; - case 405000: - /* Some SKUs may require elevated I/O voltage to support this */ - cfgcr0 |= DPLL_CFGCR0_LINK_RATE_4050; - break; - } - - memset(&crtc_state->dpll_hw_state, 0, - sizeof(crtc_state->dpll_hw_state)); - - crtc_state->dpll_hw_state.cfgcr0 = cfgcr0; - - return true; -} - -static int cnl_ddi_lcpll_get_freq(struct drm_i915_private *i915, - const struct intel_shared_dpll *pll, - const struct intel_dpll_hw_state *pll_state) -{ - int link_clock = 0; - - switch (pll_state->cfgcr0 & DPLL_CFGCR0_LINK_RATE_MASK) { - case DPLL_CFGCR0_LINK_RATE_810: - link_clock = 81000; - break; - case DPLL_CFGCR0_LINK_RATE_1080: - link_clock = 108000; - break; - case DPLL_CFGCR0_LINK_RATE_1350: - link_clock = 135000; - break; - case DPLL_CFGCR0_LINK_RATE_1620: - link_clock = 162000; - break; - case DPLL_CFGCR0_LINK_RATE_2160: - link_clock = 216000; - break; - case DPLL_CFGCR0_LINK_RATE_2700: - link_clock = 270000; - break; - case DPLL_CFGCR0_LINK_RATE_3240: - link_clock = 324000; - break; - case DPLL_CFGCR0_LINK_RATE_4050: - link_clock = 405000; - break; - default: - drm_WARN(&i915->drm, 1, "Unsupported link rate\n"); - break; - } - - return link_clock * 2; -} - -static bool cnl_get_dpll(struct intel_atomic_state *state, - struct intel_crtc *crtc, - struct intel_encoder *encoder) -{ - struct intel_crtc_state *crtc_state = - intel_atomic_get_new_crtc_state(state, crtc); - struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); - struct intel_shared_dpll *pll; - bool bret; - - if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) { - bret = cnl_ddi_hdmi_pll_dividers(crtc_state); - if (!bret) { - drm_dbg_kms(&i915->drm, - "Could not get HDMI pll dividers.\n"); - return false; - } - } else if (intel_crtc_has_dp_encoder(crtc_state)) { - bret = cnl_ddi_dp_set_dpll_hw_state(crtc_state); - if (!bret) { - drm_dbg_kms(&i915->drm, - "Could not set DP dpll HW state.\n"); - return false; - } - } else { - drm_dbg_kms(&i915->drm, - "Skip DPLL setup for output_types 0x%x\n", - crtc_state->output_types); - return false; - } - - pll = intel_find_shared_dpll(state, crtc, - &crtc_state->dpll_hw_state, - BIT(DPLL_ID_SKL_DPLL2) | - BIT(DPLL_ID_SKL_DPLL1) | - BIT(DPLL_ID_SKL_DPLL0)); - if (!pll) { - drm_dbg_kms(&i915->drm, "No PLL selected\n"); - return false; - } - - intel_reference_shared_dpll(state, crtc, - pll, &crtc_state->dpll_hw_state); - - crtc_state->shared_dpll = pll; - - return true; -} - -static int cnl_ddi_pll_get_freq(struct drm_i915_private *i915, - const struct intel_shared_dpll *pll, - const struct intel_dpll_hw_state *pll_state) -{ - if (pll_state->cfgcr0 & DPLL_CFGCR0_HDMI_MODE) - return cnl_ddi_wrpll_get_freq(i915, pll, pll_state); - else - return cnl_ddi_lcpll_get_freq(i915, pll, pll_state); -} - -static void cnl_update_dpll_ref_clks(struct drm_i915_private *i915) -{ - /* No SSC reference */ - i915->dpll.ref_clks.nssc = i915->cdclk.hw.ref; -} - -static void cnl_dump_hw_state(struct drm_i915_private *dev_priv, - const struct intel_dpll_hw_state *hw_state) -{ - drm_dbg_kms(&dev_priv->drm, "dpll_hw_state: " - "cfgcr0: 0x%x, cfgcr1: 0x%x\n", - hw_state->cfgcr0, - hw_state->cfgcr1); -} - -static const struct intel_shared_dpll_funcs cnl_ddi_pll_funcs = { - .enable = cnl_ddi_pll_enable, - .disable = cnl_ddi_pll_disable, - .get_hw_state = cnl_ddi_pll_get_hw_state, - .get_freq = cnl_ddi_pll_get_freq, -}; - -static const struct dpll_info cnl_plls[] = { - { "DPLL 0", &cnl_ddi_pll_funcs, DPLL_ID_SKL_DPLL0, 0 }, - { "DPLL 1", &cnl_ddi_pll_funcs, DPLL_ID_SKL_DPLL1, 0 }, - { "DPLL 2", &cnl_ddi_pll_funcs, DPLL_ID_SKL_DPLL2, 0 }, - { }, -}; - -static const struct intel_dpll_mgr cnl_pll_mgr = { - .dpll_info = cnl_plls, - .get_dplls = cnl_get_dpll, - .put_dplls = intel_put_dpll, - .update_ref_clks = cnl_update_dpll_ref_clks, - .dump_hw_state = cnl_dump_hw_state, -}; - struct icl_combo_pll_params { int clock; struct skl_wrpll_params wrpll; @@ -3105,17 +2622,104 @@ icl_calc_wrpll(struct intel_crtc_state *crtc_state, struct skl_wrpll_params *wrpll_params) { struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); + int ref_clock = icl_wrpll_ref_clock(i915); + u32 afe_clock = crtc_state->port_clock * 5; + u32 dco_min = 7998000; + u32 dco_max = 10000000; + u32 dco_mid = (dco_min + dco_max) / 2; + static const int dividers[] = { 2, 4, 6, 8, 10, 12, 14, 16, + 18, 20, 24, 28, 30, 32, 36, 40, + 42, 44, 48, 50, 52, 54, 56, 60, + 64, 66, 68, 70, 72, 76, 78, 80, + 84, 88, 90, 92, 96, 98, 100, 102, + 3, 5, 7, 9, 15, 21 }; + u32 dco, best_dco = 0, dco_centrality = 0; + u32 best_dco_centrality = U32_MAX; /* Spec meaning of 999999 MHz */ + int d, best_div = 0, pdiv = 0, qdiv = 0, kdiv = 0; - return __cnl_ddi_calculate_wrpll(crtc_state, wrpll_params, - icl_wrpll_ref_clock(i915)); + for (d = 0; d < ARRAY_SIZE(dividers); d++) { + dco = afe_clock * dividers[d]; + + if (dco <= dco_max && dco >= dco_min) { + dco_centrality = abs(dco - dco_mid); + + if (dco_centrality < best_dco_centrality) { + best_dco_centrality = dco_centrality; + best_div = dividers[d]; + best_dco = dco; + } + } + } + + if (best_div == 0) + return false; + + icl_wrpll_get_multipliers(best_div, &pdiv, &qdiv, &kdiv); + icl_wrpll_params_populate(wrpll_params, best_dco, ref_clock, + pdiv, qdiv, kdiv); + + return true; } static int icl_ddi_combo_pll_get_freq(struct drm_i915_private *i915, const struct intel_shared_dpll *pll, const struct intel_dpll_hw_state *pll_state) { - return __cnl_ddi_wrpll_get_freq(i915, pll, pll_state, - icl_wrpll_ref_clock(i915)); + int ref_clock = icl_wrpll_ref_clock(i915); + u32 dco_fraction; + u32 p0, p1, p2, dco_freq; + + p0 = pll_state->cfgcr1 & DPLL_CFGCR1_PDIV_MASK; + p2 = pll_state->cfgcr1 & DPLL_CFGCR1_KDIV_MASK; + + if (pll_state->cfgcr1 & DPLL_CFGCR1_QDIV_MODE(1)) + p1 = (pll_state->cfgcr1 & DPLL_CFGCR1_QDIV_RATIO_MASK) >> + DPLL_CFGCR1_QDIV_RATIO_SHIFT; + else + p1 = 1; + + switch (p0) { + case DPLL_CFGCR1_PDIV_2: + p0 = 2; + break; + case DPLL_CFGCR1_PDIV_3: + p0 = 3; + break; + case DPLL_CFGCR1_PDIV_5: + p0 = 5; + break; + case DPLL_CFGCR1_PDIV_7: + p0 = 7; + break; + } + + switch (p2) { + case DPLL_CFGCR1_KDIV_1: + p2 = 1; + break; + case DPLL_CFGCR1_KDIV_2: + p2 = 2; + break; + case DPLL_CFGCR1_KDIV_3: + p2 = 3; + break; + } + + dco_freq = (pll_state->cfgcr0 & DPLL_CFGCR0_DCO_INTEGER_MASK) * + ref_clock; + + dco_fraction = (pll_state->cfgcr0 & DPLL_CFGCR0_DCO_FRACTION_MASK) >> + DPLL_CFGCR0_DCO_FRACTION_SHIFT; + + if (ehl_combo_pll_div_frac_wa_needed(i915)) + dco_fraction *= 2; + + dco_freq += (dco_fraction * ref_clock) / 0x8000; + + if (drm_WARN_ON(&i915->drm, p0 == 0 || p1 == 0 || p2 == 0)) + return 0; + + return dco_freq / (p0 * p1 * p2 * 5); } static void icl_calc_dpll_state(struct drm_i915_private *i915, @@ -4131,6 +3735,31 @@ static void icl_pll_enable(struct drm_i915_private *dev_priv, drm_err(&dev_priv->drm, "PLL %d not locked\n", pll->info->id); } +static void adlp_cmtg_clock_gating_wa(struct drm_i915_private *i915, struct intel_shared_dpll *pll) +{ + u32 val; + + if (!IS_ADLP_DISPLAY_STEP(i915, STEP_A0, STEP_B0) || + pll->info->id != DPLL_ID_ICL_DPLL0) + return; + /* + * Wa_16011069516:adl-p[a0] + * + * All CMTG regs are unreliable until CMTG clock gating is disabled, + * so we can only assume the default TRANS_CMTG_CHICKEN reg value and + * sanity check this assumption with a double read, which presumably + * returns the correct value even with clock gating on. + * + * Instead of the usual place for workarounds we apply this one here, + * since TRANS_CMTG_CHICKEN is only accessible while DPLL0 is enabled. + */ + val = intel_de_read(i915, TRANS_CMTG_CHICKEN); + val = intel_de_read(i915, TRANS_CMTG_CHICKEN); + intel_de_write(i915, TRANS_CMTG_CHICKEN, DISABLE_DPT_CLK_GATING); + if (drm_WARN_ON(&i915->drm, val & ~DISABLE_DPT_CLK_GATING)) + drm_dbg_kms(&i915->drm, "Unexpected flags in TRANS_CMTG_CHICKEN: %08x\n", val); +} + static void combo_pll_enable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { @@ -4160,6 +3789,8 @@ static void combo_pll_enable(struct drm_i915_private *dev_priv, icl_pll_enable(dev_priv, pll, enable_reg); + adlp_cmtg_clock_gating_wa(dev_priv, pll); + /* DVFS post sequence would be here. See the comment above. */ } @@ -4462,7 +4093,10 @@ void intel_shared_dpll_init(struct drm_device *dev) const struct dpll_info *dpll_info; int i; - if (IS_ALDERLAKE_P(dev_priv)) + if (IS_DG2(dev_priv)) + /* No shared DPLLs on DG2; port PLLs are part of the PHY */ + dpll_mgr = NULL; + else if (IS_ALDERLAKE_P(dev_priv)) dpll_mgr = &adlp_pll_mgr; else if (IS_ALDERLAKE_S(dev_priv)) dpll_mgr = &adls_pll_mgr; @@ -4476,8 +4110,6 @@ void intel_shared_dpll_init(struct drm_device *dev) dpll_mgr = &ehl_pll_mgr; else if (DISPLAY_VER(dev_priv) >= 11) dpll_mgr = &icl_pll_mgr; - else if (IS_CANNONLAKE(dev_priv)) - dpll_mgr = &cnl_pll_mgr; else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) dpll_mgr = &bxt_pll_mgr; else if (DISPLAY_VER(dev_priv) == 9) @@ -4668,7 +4300,12 @@ void intel_dpll_readout_hw_state(struct drm_i915_private *i915) static void sanitize_dpll_state(struct drm_i915_private *i915, struct intel_shared_dpll *pll) { - if (!pll->on || pll->active_mask) + if (!pll->on) + return; + + adlp_cmtg_clock_gating_wa(i915, pll); + + if (pll->active_mask) return; drm_dbg_kms(&i915->drm, diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.h b/drivers/gpu/drm/i915/display/intel_dpll_mgr.h index 7fd031a70cfd..30e0aa5ca109 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.h +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.h @@ -204,9 +204,8 @@ struct intel_dpll_hw_state { /* HDMI only, 0 when used for DP */ u32 cfgcr1, cfgcr2; - /* cnl */ + /* icl */ u32 cfgcr0; - /* CNL also uses cfgcr1 */ /* bxt */ u32 ebb0, ebb4, pll0, pll1, pll2, pll3, pll6, pll8, pll9, pll10, pcsdw12; diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 1847a161cb37..ddfc17e21668 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -104,7 +104,7 @@ static void i8xx_fbc_activate(struct drm_i915_private *dev_priv) int i; u32 fbc_ctl; - /* Note: fbc.threshold == 1 for i8xx */ + /* Note: fbc.limit == 1 for i8xx */ cfb_pitch = params->cfb_size / FBC_LL_SIZE; if (params->fb.stride < cfb_pitch) cfb_pitch = params->fb.stride; @@ -148,16 +148,35 @@ static bool i8xx_fbc_is_active(struct drm_i915_private *dev_priv) return intel_de_read(dev_priv, FBC_CONTROL) & FBC_CTL_EN; } +static u32 g4x_dpfc_ctl_limit(struct drm_i915_private *i915) +{ + const struct intel_fbc_reg_params *params = &i915->fbc.params; + int limit = i915->fbc.limit; + + if (params->fb.format->cpp[0] == 2) + limit <<= 1; + + switch (limit) { + default: + MISSING_CASE(limit); + fallthrough; + case 1: + return DPFC_CTL_LIMIT_1X; + case 2: + return DPFC_CTL_LIMIT_2X; + case 4: + return DPFC_CTL_LIMIT_4X; + } +} + static void g4x_fbc_activate(struct drm_i915_private *dev_priv) { struct intel_fbc_reg_params *params = &dev_priv->fbc.params; u32 dpfc_ctl; dpfc_ctl = DPFC_CTL_PLANE(params->crtc.i9xx_plane) | DPFC_SR_EN; - if (params->fb.format->cpp[0] == 2) - dpfc_ctl |= DPFC_CTL_LIMIT_2X; - else - dpfc_ctl |= DPFC_CTL_LIMIT_1X; + + dpfc_ctl |= g4x_dpfc_ctl_limit(dev_priv); if (params->fence_id >= 0) { dpfc_ctl |= DPFC_CTL_FENCE_EN | params->fence_id; @@ -235,24 +254,10 @@ static void ilk_fbc_activate(struct drm_i915_private *dev_priv) { struct intel_fbc_reg_params *params = &dev_priv->fbc.params; u32 dpfc_ctl; - int threshold = dev_priv->fbc.threshold; dpfc_ctl = DPFC_CTL_PLANE(params->crtc.i9xx_plane); - if (params->fb.format->cpp[0] == 2) - threshold++; - switch (threshold) { - case 4: - case 3: - dpfc_ctl |= DPFC_CTL_LIMIT_4X; - break; - case 2: - dpfc_ctl |= DPFC_CTL_LIMIT_2X; - break; - case 1: - dpfc_ctl |= DPFC_CTL_LIMIT_1X; - break; - } + dpfc_ctl |= g4x_dpfc_ctl_limit(dev_priv); if (params->fence_id >= 0) { dpfc_ctl |= DPFC_CTL_FENCE_EN; @@ -300,7 +305,6 @@ static void gen7_fbc_activate(struct drm_i915_private *dev_priv) { struct intel_fbc_reg_params *params = &dev_priv->fbc.params; u32 dpfc_ctl; - int threshold = dev_priv->fbc.threshold; /* Display WA #0529: skl, kbl, bxt. */ if (DISPLAY_VER(dev_priv) == 9) { @@ -318,21 +322,7 @@ static void gen7_fbc_activate(struct drm_i915_private *dev_priv) if (IS_IVYBRIDGE(dev_priv)) dpfc_ctl |= IVB_DPFC_CTL_PLANE(params->crtc.i9xx_plane); - if (params->fb.format->cpp[0] == 2) - threshold++; - - switch (threshold) { - case 4: - case 3: - dpfc_ctl |= DPFC_CTL_LIMIT_4X; - break; - case 2: - dpfc_ctl |= DPFC_CTL_LIMIT_2X; - break; - case 1: - dpfc_ctl |= DPFC_CTL_LIMIT_1X; - break; - } + dpfc_ctl |= g4x_dpfc_ctl_limit(dev_priv); if (params->fence_id >= 0) { dpfc_ctl |= IVB_DPFC_CTL_FENCE_EN; @@ -433,13 +423,8 @@ static u64 intel_fbc_cfb_base_max(struct drm_i915_private *i915) return BIT_ULL(32); } -static int find_compression_threshold(struct drm_i915_private *dev_priv, - struct drm_mm_node *node, - unsigned int size, - unsigned int fb_cpp) +static u64 intel_fbc_stolen_end(struct drm_i915_private *dev_priv) { - int compression_threshold = 1; - int ret; u64 end; /* The FBC hardware for BDW/SKL doesn't have access to the stolen @@ -452,51 +437,69 @@ static int find_compression_threshold(struct drm_i915_private *dev_priv, else end = U64_MAX; - end = min(end, intel_fbc_cfb_base_max(dev_priv)); + return min(end, intel_fbc_cfb_base_max(dev_priv)); +} - /* HACK: This code depends on what we will do in *_enable_fbc. If that - * code changes, this code needs to change as well. - * - * The enable_fbc code will attempt to use one of our 2 compression - * thresholds, therefore, in that case, we only have 1 resort. +static int intel_fbc_max_limit(struct drm_i915_private *dev_priv, int fb_cpp) +{ + /* + * FIXME: FBC1 can have arbitrary cfb stride, + * so we could support different compression ratios. */ + if (DISPLAY_VER(dev_priv) < 5 && !IS_G4X(dev_priv)) + return 1; + + /* WaFbcOnly1to1Ratio:ctg */ + if (IS_G4X(dev_priv)) + return 1; + + /* FBC2 can only do 1:1, 1:2, 1:4 */ + return fb_cpp == 2 ? 2 : 4; +} + +static int find_compression_limit(struct drm_i915_private *dev_priv, + unsigned int size, + unsigned int fb_cpp) +{ + struct intel_fbc *fbc = &dev_priv->fbc; + u64 end = intel_fbc_stolen_end(dev_priv); + int ret, limit = 1; /* Try to over-allocate to reduce reallocations and fragmentation. */ - ret = i915_gem_stolen_insert_node_in_range(dev_priv, node, size <<= 1, - 4096, 0, end); + ret = i915_gem_stolen_insert_node_in_range(dev_priv, &fbc->compressed_fb, + size <<= 1, 4096, 0, end); if (ret == 0) - return compression_threshold; + return limit; -again: - /* HW's ability to limit the CFB is 1:4 */ - if (compression_threshold > 4 || - (fb_cpp == 2 && compression_threshold == 2)) - return 0; - - ret = i915_gem_stolen_insert_node_in_range(dev_priv, node, size >>= 1, - 4096, 0, end); - if (ret && DISPLAY_VER(dev_priv) <= 4) { - return 0; - } else if (ret) { - compression_threshold <<= 1; - goto again; - } else { - return compression_threshold; + for (; limit <= intel_fbc_max_limit(dev_priv, fb_cpp); limit <<= 1) { + ret = i915_gem_stolen_insert_node_in_range(dev_priv, &fbc->compressed_fb, + size >>= 1, 4096, 0, end); + if (ret == 0) + return limit; } + + return 0; } static int intel_fbc_alloc_cfb(struct drm_i915_private *dev_priv, unsigned int size, unsigned int fb_cpp) { struct intel_fbc *fbc = &dev_priv->fbc; - struct drm_mm_node *compressed_llb; int ret; drm_WARN_ON(&dev_priv->drm, drm_mm_node_allocated(&fbc->compressed_fb)); + drm_WARN_ON(&dev_priv->drm, + drm_mm_node_allocated(&fbc->compressed_llb)); - ret = find_compression_threshold(dev_priv, &fbc->compressed_fb, - size, fb_cpp); + if (DISPLAY_VER(dev_priv) < 5 && !IS_G4X(dev_priv)) { + ret = i915_gem_stolen_insert_node(dev_priv, &fbc->compressed_llb, + 4096, 4096); + if (ret) + goto err; + } + + ret = find_compression_limit(dev_priv, size, fb_cpp); if (!ret) goto err_llb; else if (ret > 1) { @@ -504,51 +507,46 @@ static int intel_fbc_alloc_cfb(struct drm_i915_private *dev_priv, "Reducing the compressed framebuffer size. This may lead to less power savings than a non-reduced-size. Try to increase stolen memory size if available in BIOS.\n"); } - fbc->threshold = ret; + fbc->limit = ret; - if (DISPLAY_VER(dev_priv) >= 5) + drm_dbg_kms(&dev_priv->drm, + "reserved %llu bytes of contiguous stolen space for FBC, limit: %d\n", + fbc->compressed_fb.size, fbc->limit); + + return 0; + +err_llb: + if (drm_mm_node_allocated(&fbc->compressed_llb)) + i915_gem_stolen_remove_node(dev_priv, &fbc->compressed_llb); +err: + if (drm_mm_initialized(&dev_priv->mm.stolen)) + drm_info_once(&dev_priv->drm, "not enough stolen space for compressed buffer (need %d more bytes), disabling. Hint: you may be able to increase stolen memory size in the BIOS to avoid this.\n", size); + return -ENOSPC; +} + +static void intel_fbc_program_cfb(struct drm_i915_private *dev_priv) +{ + struct intel_fbc *fbc = &dev_priv->fbc; + + if (DISPLAY_VER(dev_priv) >= 5) { intel_de_write(dev_priv, ILK_DPFC_CB_BASE, fbc->compressed_fb.start); - else if (IS_GM45(dev_priv)) { + } else if (IS_GM45(dev_priv)) { intel_de_write(dev_priv, DPFC_CB_BASE, fbc->compressed_fb.start); } else { - compressed_llb = kzalloc(sizeof(*compressed_llb), GFP_KERNEL); - if (!compressed_llb) - goto err_fb; - - ret = i915_gem_stolen_insert_node(dev_priv, compressed_llb, - 4096, 4096); - if (ret) - goto err_fb; - - fbc->compressed_llb = compressed_llb; - GEM_BUG_ON(range_overflows_end_t(u64, dev_priv->dsm.start, fbc->compressed_fb.start, U32_MAX)); GEM_BUG_ON(range_overflows_end_t(u64, dev_priv->dsm.start, - fbc->compressed_llb->start, + fbc->compressed_llb.start, U32_MAX)); + intel_de_write(dev_priv, FBC_CFB_BASE, dev_priv->dsm.start + fbc->compressed_fb.start); intel_de_write(dev_priv, FBC_LL_BASE, - dev_priv->dsm.start + compressed_llb->start); + dev_priv->dsm.start + fbc->compressed_llb.start); } - - drm_dbg_kms(&dev_priv->drm, - "reserved %llu bytes of contiguous stolen space for FBC, threshold: %d\n", - fbc->compressed_fb.size, fbc->threshold); - - return 0; - -err_fb: - kfree(compressed_llb); - i915_gem_stolen_remove_node(dev_priv, &fbc->compressed_fb); -err_llb: - if (drm_mm_initialized(&dev_priv->mm.stolen)) - drm_info_once(&dev_priv->drm, "not enough stolen space for compressed buffer (need %d more bytes), disabling. Hint: you may be able to increase stolen memory size in the BIOS to avoid this.\n", size); - return -ENOSPC; } static void __intel_fbc_cleanup_cfb(struct drm_i915_private *dev_priv) @@ -558,15 +556,10 @@ static void __intel_fbc_cleanup_cfb(struct drm_i915_private *dev_priv) if (WARN_ON(intel_fbc_hw_is_active(dev_priv))) return; - if (!drm_mm_node_allocated(&fbc->compressed_fb)) - return; - - if (fbc->compressed_llb) { - i915_gem_stolen_remove_node(dev_priv, fbc->compressed_llb); - kfree(fbc->compressed_llb); - } - - i915_gem_stolen_remove_node(dev_priv, &fbc->compressed_fb); + if (drm_mm_node_allocated(&fbc->compressed_llb)) + i915_gem_stolen_remove_node(dev_priv, &fbc->compressed_llb); + if (drm_mm_node_allocated(&fbc->compressed_fb)) + i915_gem_stolen_remove_node(dev_priv, &fbc->compressed_fb); } void intel_fbc_cleanup_cfb(struct drm_i915_private *dev_priv) @@ -753,7 +746,7 @@ static bool intel_fbc_cfb_size_changed(struct drm_i915_private *dev_priv) struct intel_fbc *fbc = &dev_priv->fbc; return intel_fbc_calculate_cfb_size(dev_priv, &fbc->state_cache) > - fbc->compressed_fb.size * fbc->threshold; + fbc->compressed_fb.size * fbc->limit; } static u16 intel_fbc_gen9_wa_cfb_stride(struct drm_i915_private *dev_priv) @@ -763,7 +756,7 @@ static u16 intel_fbc_gen9_wa_cfb_stride(struct drm_i915_private *dev_priv) if ((DISPLAY_VER(dev_priv) == 9) && cache->fb.modifier != I915_FORMAT_MOD_X_TILED) - return DIV_ROUND_UP(cache->plane.src_w, 32 * fbc->threshold) * 8; + return DIV_ROUND_UP(cache->plane.src_w, 32 * fbc->limit) * 8; else return 0; } @@ -919,11 +912,11 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc) } /* - * Tigerlake is not supporting FBC with PSR2. + * Display 12+ is not supporting FBC with PSR2. * Recommendation is to keep this combination disabled * Bspec: 50422 HSD: 14010260002 */ - if (fbc->state_cache.psr2_active && IS_TIGERLAKE(dev_priv)) { + if (fbc->state_cache.psr2_active && DISPLAY_VER(dev_priv) >= 12) { fbc->no_fbc_reason = "not supported with PSR2"; return false; } @@ -1302,6 +1295,8 @@ void intel_fbc_enable(struct intel_atomic_state *state, fbc->no_fbc_reason = "FBC enabled but not active yet\n"; fbc->crtc = crtc; + + intel_fbc_program_cfb(dev_priv); out: mutex_unlock(&fbc->lock); } diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c index 4af40229f5ec..df05d285f0bd 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c @@ -335,32 +335,43 @@ static void intel_fbdev_destroy(struct intel_fbdev *ifbdev) * fbcon), so we just find the biggest and use that. */ static bool intel_fbdev_init_bios(struct drm_device *dev, - struct intel_fbdev *ifbdev) + struct intel_fbdev *ifbdev) { struct drm_i915_private *i915 = to_i915(dev); struct intel_framebuffer *fb = NULL; - struct drm_crtc *crtc; - struct intel_crtc *intel_crtc; + struct intel_crtc *crtc; unsigned int max_size = 0; /* Find the largest fb */ - for_each_crtc(dev, crtc) { + for_each_intel_crtc(dev, crtc) { + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + struct intel_plane *plane = + to_intel_plane(crtc->base.primary); + struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); struct drm_i915_gem_object *obj = - intel_fb_obj(crtc->primary->state->fb); - intel_crtc = to_intel_crtc(crtc); + intel_fb_obj(plane_state->uapi.fb); - if (!crtc->state->active || !obj) { + if (!crtc_state->uapi.active) { drm_dbg_kms(&i915->drm, - "pipe %c not active or no fb, skipping\n", - pipe_name(intel_crtc->pipe)); + "[CRTC:%d:%s] not active, skipping\n", + crtc->base.base.id, crtc->base.name); + continue; + } + + if (!obj) { + drm_dbg_kms(&i915->drm, + "[PLANE:%d:%s] no fb, skipping\n", + plane->base.base.id, plane->base.name); continue; } if (obj->base.size > max_size) { drm_dbg_kms(&i915->drm, - "found possible fb from plane %c\n", - pipe_name(intel_crtc->pipe)); - fb = to_intel_framebuffer(crtc->primary->state->fb); + "found possible fb from [PLANE:%d:%s]\n", + plane->base.base.id, plane->base.name); + fb = to_intel_framebuffer(plane_state->uapi.fb); max_size = obj->base.size; } } @@ -372,60 +383,62 @@ static bool intel_fbdev_init_bios(struct drm_device *dev, } /* Now make sure all the pipes will fit into it */ - for_each_crtc(dev, crtc) { + for_each_intel_crtc(dev, crtc) { + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + struct intel_plane *plane = + to_intel_plane(crtc->base.primary); unsigned int cur_size; - intel_crtc = to_intel_crtc(crtc); - - if (!crtc->state->active) { + if (!crtc_state->uapi.active) { drm_dbg_kms(&i915->drm, - "pipe %c not active, skipping\n", - pipe_name(intel_crtc->pipe)); + "[CRTC:%d:%s] not active, skipping\n", + crtc->base.base.id, crtc->base.name); continue; } - drm_dbg_kms(&i915->drm, "checking plane %c for BIOS fb\n", - pipe_name(intel_crtc->pipe)); + drm_dbg_kms(&i915->drm, "checking [PLANE:%d:%s] for BIOS fb\n", + plane->base.base.id, plane->base.name); /* * See if the plane fb we found above will fit on this * pipe. Note we need to use the selected fb's pitch and bpp * rather than the current pipe's, since they differ. */ - cur_size = crtc->state->adjusted_mode.crtc_hdisplay; + cur_size = crtc_state->uapi.adjusted_mode.crtc_hdisplay; cur_size = cur_size * fb->base.format->cpp[0]; if (fb->base.pitches[0] < cur_size) { drm_dbg_kms(&i915->drm, - "fb not wide enough for plane %c (%d vs %d)\n", - pipe_name(intel_crtc->pipe), + "fb not wide enough for [PLANE:%d:%s] (%d vs %d)\n", + plane->base.base.id, plane->base.name, cur_size, fb->base.pitches[0]); fb = NULL; break; } - cur_size = crtc->state->adjusted_mode.crtc_vdisplay; + cur_size = crtc_state->uapi.adjusted_mode.crtc_vdisplay; cur_size = intel_fb_align_height(&fb->base, 0, cur_size); cur_size *= fb->base.pitches[0]; drm_dbg_kms(&i915->drm, - "pipe %c area: %dx%d, bpp: %d, size: %d\n", - pipe_name(intel_crtc->pipe), - crtc->state->adjusted_mode.crtc_hdisplay, - crtc->state->adjusted_mode.crtc_vdisplay, + "[CRTC:%d:%s] area: %dx%d, bpp: %d, size: %d\n", + crtc->base.base.id, crtc->base.name, + crtc_state->uapi.adjusted_mode.crtc_hdisplay, + crtc_state->uapi.adjusted_mode.crtc_vdisplay, fb->base.format->cpp[0] * 8, cur_size); if (cur_size > max_size) { drm_dbg_kms(&i915->drm, - "fb not big enough for plane %c (%d vs %d)\n", - pipe_name(intel_crtc->pipe), + "fb not big enough for [PLANE:%d:%s] (%d vs %d)\n", + plane->base.base.id, plane->base.name, cur_size, max_size); fb = NULL; break; } drm_dbg_kms(&i915->drm, - "fb big enough for plane %c (%d >= %d)\n", - pipe_name(intel_crtc->pipe), + "fb big enough [PLANE:%d:%s] (%d >= %d)\n", + plane->base.base.id, plane->base.name, max_size, cur_size); } @@ -441,15 +454,20 @@ static bool intel_fbdev_init_bios(struct drm_device *dev, drm_framebuffer_get(&ifbdev->fb->base); /* Final pass to check if any active pipes don't have fbs */ - for_each_crtc(dev, crtc) { - intel_crtc = to_intel_crtc(crtc); - - if (!crtc->state->active) + for_each_intel_crtc(dev, crtc) { + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + struct intel_plane *plane = + to_intel_plane(crtc->base.primary); + struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); + + if (!crtc_state->uapi.active) continue; - drm_WARN(dev, !crtc->primary->state->fb, - "re-used BIOS config but lost an fb on crtc %d\n", - crtc->base.id); + drm_WARN(dev, !plane_state->uapi.fb, + "re-used BIOS config but lost an fb on [PLANE:%d:%s]\n", + plane->base.base.id, plane->base.name); } diff --git a/drivers/gpu/drm/i915/display/intel_fdi.c b/drivers/gpu/drm/i915/display/intel_fdi.c index cef1061fd6cb..e10b9cd8e86e 100644 --- a/drivers/gpu/drm/i915/display/intel_fdi.c +++ b/drivers/gpu/drm/i915/display/intel_fdi.c @@ -4,7 +4,6 @@ */ #include "intel_atomic.h" #include "intel_ddi.h" -#include "intel_ddi_buf_trans.h" #include "intel_de.h" #include "intel_display_types.h" #include "intel_fdi.h" @@ -96,10 +95,10 @@ static int ilk_check_fdi_lanes(struct drm_device *dev, enum pipe pipe, } } -int ilk_fdi_compute_config(struct intel_crtc *intel_crtc, - struct intel_crtc_state *pipe_config) +int ilk_fdi_compute_config(struct intel_crtc *crtc, + struct intel_crtc_state *pipe_config) { - struct drm_device *dev = intel_crtc->base.dev; + struct drm_device *dev = crtc->base.dev; struct drm_i915_private *i915 = to_i915(dev); const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; int lane, link_bw, fdi_dotclock, ret; @@ -125,7 +124,7 @@ retry: intel_link_compute_m_n(pipe_config->pipe_bpp, lane, fdi_dotclock, link_bw, &pipe_config->fdi_m_n, false, false); - ret = ilk_check_fdi_lanes(dev, intel_crtc->pipe, pipe_config); + ret = ilk_check_fdi_lanes(dev, crtc->pipe, pipe_config); if (ret == -EDEADLK) return ret; @@ -569,9 +568,9 @@ void hsw_fdi_link_train(struct intel_encoder *encoder, u32 temp, i, rx_ctl_val; int n_entries; - intel_ddi_get_buf_trans_fdi(dev_priv, &n_entries); + encoder->get_buf_trans(encoder, crtc_state, &n_entries); - intel_prepare_dp_ddi_buffers(encoder, crtc_state); + hsw_prepare_dp_ddi_buffers(encoder, crtc_state); /* Set the FDI_RX_MISC pwrdn lanes and the 2 workarounds listed at the * mode set "sequence for CRT port" document: @@ -691,9 +690,9 @@ void hsw_fdi_link_train(struct intel_encoder *encoder, void ilk_fdi_pll_enable(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc); - struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev); - enum pipe pipe = intel_crtc->pipe; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; i915_reg_t reg; u32 temp; @@ -726,11 +725,11 @@ void ilk_fdi_pll_enable(const struct intel_crtc_state *crtc_state) } } -void ilk_fdi_pll_disable(struct intel_crtc *intel_crtc) +void ilk_fdi_pll_disable(struct intel_crtc *crtc) { - struct drm_device *dev = intel_crtc->base.dev; + struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - enum pipe pipe = intel_crtc->pipe; + enum pipe pipe = crtc->pipe; i915_reg_t reg; u32 temp; diff --git a/drivers/gpu/drm/i915/display/intel_gmbus.c b/drivers/gpu/drm/i915/display/intel_gmbus.c index fcf47f98ea36..ceb1bf8a8c3c 100644 --- a/drivers/gpu/drm/i915/display/intel_gmbus.c +++ b/drivers/gpu/drm/i915/display/intel_gmbus.c @@ -600,7 +600,7 @@ do_gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num, int i = 0, inc, try = 0; int ret = 0; - /* Display WA #0868: skl,bxt,kbl,cfl,glk,cnl */ + /* Display WA #0868: skl,bxt,kbl,cfl,glk */ if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) bxt_gmbus_clock_gating(dev_priv, false); else if (HAS_PCH_SPT(dev_priv) || HAS_PCH_CNP(dev_priv)) @@ -713,7 +713,7 @@ timeout: ret = -EAGAIN; out: - /* Display WA #0868: skl,bxt,kbl,cfl,glk,cnl */ + /* Display WA #0868: skl,bxt,kbl,cfl,glk */ if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) bxt_gmbus_clock_gating(dev_priv, true); else if (HAS_PCH_SPT(dev_priv) || HAS_PCH_CNP(dev_priv)) diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index 7e51c98c475e..b04685bb6439 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -51,6 +51,7 @@ #include "intel_hdmi.h" #include "intel_lspcon.h" #include "intel_panel.h" +#include "intel_snps_phy.h" static struct drm_device *intel_hdmi_to_dev(struct intel_hdmi *intel_hdmi) { @@ -270,8 +271,8 @@ static void ibx_write_infoframe(struct intel_encoder *encoder, { const u32 *data = frame; struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc); - i915_reg_t reg = TVIDEO_DIP_CTL(intel_crtc->pipe); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + i915_reg_t reg = TVIDEO_DIP_CTL(crtc->pipe); u32 val = intel_de_read(dev_priv, reg); int i; @@ -286,13 +287,13 @@ static void ibx_write_infoframe(struct intel_encoder *encoder, intel_de_write(dev_priv, reg, val); for (i = 0; i < len; i += 4) { - intel_de_write(dev_priv, TVIDEO_DIP_DATA(intel_crtc->pipe), + intel_de_write(dev_priv, TVIDEO_DIP_DATA(crtc->pipe), *data); data++; } /* Write every possible data byte to force correct ECC calculation. */ for (; i < VIDEO_DIP_DATA_SIZE; i += 4) - intel_de_write(dev_priv, TVIDEO_DIP_DATA(intel_crtc->pipe), 0); + intel_de_write(dev_priv, TVIDEO_DIP_DATA(crtc->pipe), 0); val |= g4x_infoframe_enable(type); val &= ~VIDEO_DIP_FREQ_MASK; @@ -349,8 +350,8 @@ static void cpt_write_infoframe(struct intel_encoder *encoder, { const u32 *data = frame; struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc); - i915_reg_t reg = TVIDEO_DIP_CTL(intel_crtc->pipe); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + i915_reg_t reg = TVIDEO_DIP_CTL(crtc->pipe); u32 val = intel_de_read(dev_priv, reg); int i; @@ -368,13 +369,13 @@ static void cpt_write_infoframe(struct intel_encoder *encoder, intel_de_write(dev_priv, reg, val); for (i = 0; i < len; i += 4) { - intel_de_write(dev_priv, TVIDEO_DIP_DATA(intel_crtc->pipe), + intel_de_write(dev_priv, TVIDEO_DIP_DATA(crtc->pipe), *data); data++; } /* Write every possible data byte to force correct ECC calculation. */ for (; i < VIDEO_DIP_DATA_SIZE; i += 4) - intel_de_write(dev_priv, TVIDEO_DIP_DATA(intel_crtc->pipe), 0); + intel_de_write(dev_priv, TVIDEO_DIP_DATA(crtc->pipe), 0); val |= g4x_infoframe_enable(type); val &= ~VIDEO_DIP_FREQ_MASK; @@ -427,8 +428,8 @@ static void vlv_write_infoframe(struct intel_encoder *encoder, { const u32 *data = frame; struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc); - i915_reg_t reg = VLV_TVIDEO_DIP_CTL(intel_crtc->pipe); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + i915_reg_t reg = VLV_TVIDEO_DIP_CTL(crtc->pipe); u32 val = intel_de_read(dev_priv, reg); int i; @@ -444,13 +445,13 @@ static void vlv_write_infoframe(struct intel_encoder *encoder, for (i = 0; i < len; i += 4) { intel_de_write(dev_priv, - VLV_TVIDEO_DIP_DATA(intel_crtc->pipe), *data); + VLV_TVIDEO_DIP_DATA(crtc->pipe), *data); data++; } /* Write every possible data byte to force correct ECC calculation. */ for (; i < VIDEO_DIP_DATA_SIZE; i += 4) intel_de_write(dev_priv, - VLV_TVIDEO_DIP_DATA(intel_crtc->pipe), 0); + VLV_TVIDEO_DIP_DATA(crtc->pipe), 0); val |= g4x_infoframe_enable(type); val &= ~VIDEO_DIP_FREQ_MASK; @@ -1040,10 +1041,10 @@ static void ibx_set_infoframes(struct intel_encoder *encoder, const struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct intel_digital_port *dig_port = enc_to_dig_port(encoder); struct intel_hdmi *intel_hdmi = &dig_port->hdmi; - i915_reg_t reg = TVIDEO_DIP_CTL(intel_crtc->pipe); + i915_reg_t reg = TVIDEO_DIP_CTL(crtc->pipe); u32 val = intel_de_read(dev_priv, reg); u32 port = VIDEO_DIP_PORT(encoder->port); @@ -1099,9 +1100,9 @@ static void cpt_set_infoframes(struct intel_encoder *encoder, const struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); - i915_reg_t reg = TVIDEO_DIP_CTL(intel_crtc->pipe); + i915_reg_t reg = TVIDEO_DIP_CTL(crtc->pipe); u32 val = intel_de_read(dev_priv, reg); assert_hdmi_port_disabled(intel_hdmi); @@ -1148,9 +1149,9 @@ static void vlv_set_infoframes(struct intel_encoder *encoder, const struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); - i915_reg_t reg = VLV_TVIDEO_DIP_CTL(intel_crtc->pipe); + i915_reg_t reg = VLV_TVIDEO_DIP_CTL(crtc->pipe); u32 val = intel_de_read(dev_priv, reg); u32 port = VIDEO_DIP_PORT(encoder->port); @@ -1465,14 +1466,12 @@ static int kbl_repositioning_enc_en_signal(struct intel_connector *connector, { struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_digital_port *dig_port = intel_attached_dig_port(connector); - struct drm_crtc *crtc = connector->base.state->crtc; - struct intel_crtc *intel_crtc = container_of(crtc, - struct intel_crtc, base); + struct intel_crtc *crtc = to_intel_crtc(connector->base.state->crtc); u32 scanline; int ret; for (;;) { - scanline = intel_de_read(dev_priv, PIPEDSL(intel_crtc->pipe)); + scanline = intel_de_read(dev_priv, PIPEDSL(crtc->pipe)); if (scanline > 100 && scanline < 200) break; usleep_range(25, 50); @@ -1852,6 +1851,16 @@ hdmi_port_clock_valid(struct intel_hdmi *hdmi, if (IS_CHERRYVIEW(dev_priv) && clock > 216000 && clock < 240000) return MODE_CLOCK_RANGE; + /* + * SNPS PHYs' MPLLB table-based programming can only handle a fixed + * set of link rates. + * + * FIXME: We will hopefully get an algorithmic way of programming + * the MPLLB for HDMI in the future. + */ + if (IS_DG2(dev_priv)) + return intel_snps_phy_check_hdmi_link_rate(clock); + return MODE_OK; } diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c index 7f40e9f60bc2..e0381b0fce91 100644 --- a/drivers/gpu/drm/i915/display/intel_lvds.c +++ b/drivers/gpu/drm/i915/display/intel_lvds.c @@ -411,12 +411,12 @@ static int intel_lvds_compute_config(struct intel_encoder *intel_encoder, struct intel_connector *intel_connector = lvds_encoder->attached_connector; struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; - struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->uapi.crtc); + struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); unsigned int lvds_bpp; int ret; /* Should never happen!! */ - if (DISPLAY_VER(dev_priv) < 4 && intel_crtc->pipe == 0) { + if (DISPLAY_VER(dev_priv) < 4 && crtc->pipe == 0) { drm_err(&dev_priv->drm, "Can't support LVDS on pipe A\n"); return -EINVAL; } diff --git a/drivers/gpu/drm/i915/display/intel_opregion.c b/drivers/gpu/drm/i915/display/intel_opregion.c index dfd724e506b5..3855fba70980 100644 --- a/drivers/gpu/drm/i915/display/intel_opregion.c +++ b/drivers/gpu/drm/i915/display/intel_opregion.c @@ -1078,6 +1078,9 @@ void intel_opregion_resume(struct drm_i915_private *i915) opregion->asle->ardy = ASLE_ARDY_READY; } + /* Some platforms abuse the _DSM to enable MUX */ + intel_dsm_get_bios_data_funcs_supported(i915); + intel_opregion_notify_adapter(i915, PCI_D0); } diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 77865cf6641f..1b0daf649e82 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -32,6 +32,7 @@ #include "intel_dp_aux.h" #include "intel_hdmi.h" #include "intel_psr.h" +#include "intel_snps_phy.h" #include "intel_sprite.h" #include "skl_universal_plane.h" @@ -265,32 +266,44 @@ static u8 intel_dp_get_sink_sync_latency(struct intel_dp *intel_dp) return val; } -static u16 intel_dp_get_su_x_granulartiy(struct intel_dp *intel_dp) +static void intel_dp_get_su_granularity(struct intel_dp *intel_dp) { struct drm_i915_private *i915 = dp_to_i915(intel_dp); - u16 val; ssize_t r; + u16 w; + u8 y; + + /* If sink don't have specific granularity requirements set legacy ones */ + if (!(intel_dp->psr_dpcd[1] & DP_PSR2_SU_GRANULARITY_REQUIRED)) { + /* As PSR2 HW sends full lines, we do not care about x granularity */ + w = 4; + y = 4; + goto exit; + } - /* - * Returning the default X granularity if granularity not required or - * if DPCD read fails - */ - if (!(intel_dp->psr_dpcd[1] & DP_PSR2_SU_GRANULARITY_REQUIRED)) - return 4; - - r = drm_dp_dpcd_read(&intel_dp->aux, DP_PSR2_SU_X_GRANULARITY, &val, 2); + r = drm_dp_dpcd_read(&intel_dp->aux, DP_PSR2_SU_X_GRANULARITY, &w, 2); if (r != 2) drm_dbg_kms(&i915->drm, "Unable to read DP_PSR2_SU_X_GRANULARITY\n"); - /* * Spec says that if the value read is 0 the default granularity should * be used instead. */ - if (r != 2 || val == 0) - val = 4; + if (r != 2 || w == 0) + w = 4; - return val; + r = drm_dp_dpcd_read(&intel_dp->aux, DP_PSR2_SU_Y_GRANULARITY, &y, 1); + if (r != 1) { + drm_dbg_kms(&i915->drm, + "Unable to read DP_PSR2_SU_Y_GRANULARITY\n"); + y = 4; + } + if (y == 0) + y = 1; + +exit: + intel_dp->psr.su_w_granularity = w; + intel_dp->psr.su_y_granularity = y; } void intel_psr_init_dpcd(struct intel_dp *intel_dp) @@ -346,8 +359,7 @@ void intel_psr_init_dpcd(struct intel_dp *intel_dp) if (intel_dp->psr.sink_psr2_support) { intel_dp->psr.colorimetry_support = intel_dp_get_colorimetry_status(intel_dp); - intel_dp->psr.su_x_granularity = - intel_dp_get_su_x_granulartiy(intel_dp); + intel_dp_get_su_granularity(intel_dp); } } } @@ -407,6 +419,9 @@ static void intel_psr_enable_sink(struct intel_dp *intel_dp) dpcd_val |= DP_PSR_CRC_VERIFICATION; } + if (intel_dp->psr.req_psr2_sdp_prior_scanline) + dpcd_val |= DP_PSR_SU_REGION_SCANLINE_CAPTURE; + drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG, dpcd_val); drm_dp_dpcd_writeb(&intel_dp->aux, DP_SET_POWER, DP_SET_POWER_D0); @@ -520,18 +535,47 @@ static u32 intel_psr2_get_tp_time(struct intel_dp *intel_dp) static void hsw_activate_psr2(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); - u32 val; + u32 val = EDP_PSR2_ENABLE; - val = psr_compute_idle_frames(intel_dp) << EDP_PSR2_IDLE_FRAME_SHIFT; + val |= psr_compute_idle_frames(intel_dp) << EDP_PSR2_IDLE_FRAME_SHIFT; + + if (!IS_ALDERLAKE_P(dev_priv)) + val |= EDP_SU_TRACK_ENABLE; - val |= EDP_PSR2_ENABLE | EDP_SU_TRACK_ENABLE; if (DISPLAY_VER(dev_priv) >= 10 && DISPLAY_VER(dev_priv) <= 12) val |= EDP_Y_COORDINATE_ENABLE; val |= EDP_PSR2_FRAME_BEFORE_SU(intel_dp->psr.sink_sync_latency + 1); val |= intel_psr2_get_tp_time(intel_dp); - if (DISPLAY_VER(dev_priv) >= 12) { + /* Wa_22012278275:adl-p */ + if (IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_E0)) { + static const u8 map[] = { + 2, /* 5 lines */ + 1, /* 6 lines */ + 0, /* 7 lines */ + 3, /* 8 lines */ + 6, /* 9 lines */ + 5, /* 10 lines */ + 4, /* 11 lines */ + 7, /* 12 lines */ + }; + /* + * Still using the default IO_BUFFER_WAKE and FAST_WAKE, see + * comments bellow for more information + */ + u32 tmp, lines = 7; + + val |= TGL_EDP_PSR2_BLOCK_COUNT_NUM_2; + + tmp = map[lines - TGL_EDP_PSR2_IO_BUFFER_WAKE_MIN_LINES]; + tmp = tmp << TGL_EDP_PSR2_IO_BUFFER_WAKE_SHIFT; + val |= tmp; + + tmp = map[lines - TGL_EDP_PSR2_FAST_WAKE_MIN_LINES]; + tmp = tmp << TGL_EDP_PSR2_FAST_WAKE_MIN_SHIFT; + val |= tmp; + } else if (DISPLAY_VER(dev_priv) >= 12) { /* * TODO: 7 lines of IO_BUFFER_WAKE and FAST_WAKE are default * values from BSpec. In order to setting an optimal power @@ -547,10 +591,12 @@ static void hsw_activate_psr2(struct intel_dp *intel_dp) val |= EDP_PSR2_FAST_WAKE(7); } + if (intel_dp->psr.req_psr2_sdp_prior_scanline) + val |= EDP_PSR2_SU_SDP_SCANLINE; + if (intel_dp->psr.psr2_sel_fetch_enabled) { - /* WA 1408330847 */ - if (IS_TGL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_A0) || - IS_RKL_REVID(dev_priv, RKL_REVID_A0, RKL_REVID_A0)) + /* Wa_1408330847 */ + if (IS_TGL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0)) intel_de_rmw(dev_priv, CHICKEN_PAR1_1, DIS_RAM_BYPASS_PSR2_MAN_TRACK, DIS_RAM_BYPASS_PSR2_MAN_TRACK); @@ -689,6 +735,10 @@ tgl_dc3co_exitline_compute_config(struct intel_dp *intel_dp, if (!dc3co_is_pipe_port_compatible(intel_dp, crtc_state)) return; + /* Wa_16011303918:adl-p */ + if (IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0)) + return; + /* * DC3CO Exit time 200us B.Spec 49196 * PSR2 transcoder Early Exit scanlines = ROUNDUP(200 / line time) + 1 @@ -733,7 +783,7 @@ static bool intel_psr2_sel_fetch_config_valid(struct intel_dp *intel_dp, } /* Wa_14010254185 Wa_14010103792 */ - if (IS_TGL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B1)) { + if (IS_TGL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_C0)) { drm_dbg_kms(&dev_priv->drm, "PSR2 sel fetch not enabled, missing the implementation of WAs\n"); return false; @@ -742,6 +792,67 @@ static bool intel_psr2_sel_fetch_config_valid(struct intel_dp *intel_dp, return crtc_state->enable_psr2_sel_fetch = true; } +static bool psr2_granularity_check(struct intel_dp *intel_dp, + struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + const int crtc_hdisplay = crtc_state->hw.adjusted_mode.crtc_hdisplay; + const int crtc_vdisplay = crtc_state->hw.adjusted_mode.crtc_vdisplay; + u16 y_granularity = 0; + + /* PSR2 HW only send full lines so we only need to validate the width */ + if (crtc_hdisplay % intel_dp->psr.su_w_granularity) + return false; + + if (crtc_vdisplay % intel_dp->psr.su_y_granularity) + return false; + + /* HW tracking is only aligned to 4 lines */ + if (!crtc_state->enable_psr2_sel_fetch) + return intel_dp->psr.su_y_granularity == 4; + + /* + * adl_p has 1 line granularity. For other platforms with SW tracking we + * can adjust the y coordinates to match sink requirement if multiple of + * 4. + */ + if (IS_ALDERLAKE_P(dev_priv)) + y_granularity = intel_dp->psr.su_y_granularity; + else if (intel_dp->psr.su_y_granularity <= 2) + y_granularity = 4; + else if ((intel_dp->psr.su_y_granularity % 4) == 0) + y_granularity = intel_dp->psr.su_y_granularity; + + if (y_granularity == 0 || crtc_vdisplay % y_granularity) + return false; + + crtc_state->su_y_granularity = y_granularity; + return true; +} + +static bool _compute_psr2_sdp_prior_scanline_indication(struct intel_dp *intel_dp, + struct intel_crtc_state *crtc_state) +{ + const struct drm_display_mode *adjusted_mode = &crtc_state->uapi.adjusted_mode; + struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + u32 hblank_total, hblank_ns, req_ns; + + hblank_total = adjusted_mode->crtc_hblank_end - adjusted_mode->crtc_hblank_start; + hblank_ns = div_u64(1000000ULL * hblank_total, adjusted_mode->crtc_clock); + + /* From spec: (72 / number of lanes) * 1000 / symbol clock frequency MHz */ + req_ns = (72 / crtc_state->lane_count) * 1000 / (crtc_state->port_clock / 1000); + + if ((hblank_ns - req_ns) > 100) + return true; + + if (DISPLAY_VER(dev_priv) < 13 || intel_dp->edp_dpcd[0] < DP_EDP_14b) + return false; + + crtc_state->req_psr2_sdp_prior_scanline = true; + return true; +} + static bool intel_psr2_config_valid(struct intel_dp *intel_dp, struct intel_crtc_state *crtc_state) { @@ -760,7 +871,8 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp, } /* Wa_16011181250 */ - if (IS_ROCKETLAKE(dev_priv) || IS_ALDERLAKE_S(dev_priv)) { + if (IS_ROCKETLAKE(dev_priv) || IS_ALDERLAKE_S(dev_priv) || + IS_DG2(dev_priv)) { drm_dbg_kms(&dev_priv->drm, "PSR2 is defeatured for this platform\n"); return false; } @@ -824,19 +936,6 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp, return false; } - /* - * HW sends SU blocks of size four scan lines, which means the starting - * X coordinate and Y granularity requirements will always be met. We - * only need to validate the SU block width is a multiple of - * x granularity. - */ - if (crtc_hdisplay % intel_dp->psr.su_x_granularity) { - drm_dbg_kms(&dev_priv->drm, - "PSR2 not enabled, hdisplay(%d) not multiple of %d\n", - crtc_hdisplay, intel_dp->psr.su_x_granularity); - return false; - } - if (HAS_PSR2_SEL_FETCH(dev_priv)) { if (!intel_psr2_sel_fetch_config_valid(intel_dp, crtc_state) && !HAS_PSR_HW_TRACKING(dev_priv)) { @@ -848,11 +947,16 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp, /* Wa_2209313811 */ if (!crtc_state->enable_psr2_sel_fetch && - IS_TGL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B1)) { + IS_TGL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_C0)) { drm_dbg_kms(&dev_priv->drm, "PSR2 HW tracking is not supported this Display stepping\n"); return false; } + if (!psr2_granularity_check(intel_dp, crtc_state)) { + drm_dbg_kms(&dev_priv->drm, "PSR2 not enabled, SU granularity not compatible\n"); + return false; + } + if (!crtc_state->enable_psr2_sel_fetch && (crtc_hdisplay > psr_max_h || crtc_vdisplay > psr_max_v)) { drm_dbg_kms(&dev_priv->drm, @@ -862,6 +966,20 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp, return false; } + if (!_compute_psr2_sdp_prior_scanline_indication(intel_dp, crtc_state)) { + drm_dbg_kms(&dev_priv->drm, + "PSR2 not enabled, PSR2 SDP indication do not fit in hblank\n"); + return false; + } + + /* Wa_16011303918:adl-p */ + if (crtc_state->vrr.enable && + IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0)) { + drm_dbg_kms(&dev_priv->drm, + "PSR2 not enabled, not compatible with HW stepping + VRR\n"); + return false; + } + tgl_dc3co_exitline_compute_config(intel_dp, crtc_state); return true; } @@ -1048,6 +1166,14 @@ static void intel_psr_enable_source(struct intel_dp *intel_dp) intel_de_rmw(dev_priv, CHICKEN_PAR1_1, IGNORE_PSR2_HW_TRACKING, intel_dp->psr.psr2_sel_fetch_enabled ? IGNORE_PSR2_HW_TRACKING : 0); + + /* Wa_16011168373:adl-p */ + if (IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0) && + intel_dp->psr.psr2_enabled) + intel_de_rmw(dev_priv, + TRANS_SET_CONTEXT_LATENCY(intel_dp->psr.transcoder), + TRANS_SET_CONTEXT_LATENCY_MASK, + TRANS_SET_CONTEXT_LATENCY_VALUE(1)); } static bool psr_interrupt_error_check(struct intel_dp *intel_dp) @@ -1087,6 +1213,7 @@ static void intel_psr_enable_locked(struct intel_dp *intel_dp, { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + enum phy phy = intel_port_to_phy(dev_priv, dig_port->base.port); struct intel_encoder *encoder = &dig_port->base; u32 val; @@ -1101,6 +1228,8 @@ static void intel_psr_enable_locked(struct intel_dp *intel_dp, intel_dp->psr.dc3co_exit_delay = val; intel_dp->psr.dc3co_exitline = crtc_state->dc3co_exitline; intel_dp->psr.psr2_sel_fetch_enabled = crtc_state->enable_psr2_sel_fetch; + intel_dp->psr.req_psr2_sdp_prior_scanline = + crtc_state->req_psr2_sdp_prior_scanline; if (!psr_interrupt_error_check(intel_dp)) return; @@ -1110,6 +1239,7 @@ static void intel_psr_enable_locked(struct intel_dp *intel_dp, intel_dp_compute_psr_vsc_sdp(intel_dp, crtc_state, conn_state, &intel_dp->psr.vsc); intel_write_dp_vsc_sdp(encoder, crtc_state, &intel_dp->psr.vsc); + intel_snps_phy_update_psr_power_state(dev_priv, phy, true); intel_psr_enable_sink(intel_dp); intel_psr_enable_source(intel_dp); intel_dp->psr.enabled = true; @@ -1206,6 +1336,8 @@ static void intel_psr_wait_exit_locked(struct intel_dp *intel_dp) static void intel_psr_disable_locked(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + enum phy phy = intel_port_to_phy(dev_priv, + dp_to_dig_port(intel_dp)->base.port); lockdep_assert_held(&intel_dp->psr.lock); @@ -1218,13 +1350,21 @@ static void intel_psr_disable_locked(struct intel_dp *intel_dp) intel_psr_exit(intel_dp); intel_psr_wait_exit_locked(intel_dp); - /* WA 1408330847 */ + /* Wa_1408330847 */ if (intel_dp->psr.psr2_sel_fetch_enabled && - (IS_TGL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_A0) || - IS_RKL_REVID(dev_priv, RKL_REVID_A0, RKL_REVID_A0))) + IS_TGL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0)) intel_de_rmw(dev_priv, CHICKEN_PAR1_1, DIS_RAM_BYPASS_PSR2_MAN_TRACK, 0); + /* Wa_16011168373:adl-p */ + if (IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0) && + intel_dp->psr.psr2_enabled) + intel_de_rmw(dev_priv, + TRANS_SET_CONTEXT_LATENCY(intel_dp->psr.transcoder), + TRANS_SET_CONTEXT_LATENCY_MASK, 0); + + intel_snps_phy_update_psr_power_state(dev_priv, phy, false); + /* Disable PSR on Sink */ drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG, 0); @@ -1397,21 +1537,32 @@ void intel_psr2_program_trans_man_trk_ctl(const struct intel_crtc_state *crtc_st static void psr2_man_trk_ctl_calc(struct intel_crtc_state *crtc_state, struct drm_rect *clip, bool full_update) { + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); u32 val = PSR2_MAN_TRK_CTL_ENABLE; if (full_update) { - val |= PSR2_MAN_TRK_CTL_SF_SINGLE_FULL_FRAME; + if (IS_ALDERLAKE_P(dev_priv)) + val |= ADLP_PSR2_MAN_TRK_CTL_SF_SINGLE_FULL_FRAME; + else + val |= PSR2_MAN_TRK_CTL_SF_SINGLE_FULL_FRAME; + goto exit; } if (clip->y1 == -1) goto exit; - drm_WARN_ON(crtc_state->uapi.crtc->dev, clip->y1 % 4 || clip->y2 % 4); + if (IS_ALDERLAKE_P(dev_priv)) { + val |= ADLP_PSR2_MAN_TRK_CTL_SU_REGION_START_ADDR(clip->y1); + val |= ADLP_PSR2_MAN_TRK_CTL_SU_REGION_END_ADDR(clip->y2); + } else { + drm_WARN_ON(crtc_state->uapi.crtc->dev, clip->y1 % 4 || clip->y2 % 4); - val |= PSR2_MAN_TRK_CTL_SF_PARTIAL_FRAME_UPDATE; - val |= PSR2_MAN_TRK_CTL_SU_REGION_START_ADDR(clip->y1 / 4 + 1); - val |= PSR2_MAN_TRK_CTL_SU_REGION_END_ADDR(clip->y2 / 4 + 1); + val |= PSR2_MAN_TRK_CTL_SF_PARTIAL_FRAME_UPDATE; + val |= PSR2_MAN_TRK_CTL_SU_REGION_START_ADDR(clip->y1 / 4 + 1); + val |= PSR2_MAN_TRK_CTL_SU_REGION_END_ADDR(clip->y2 / 4 + 1); + } exit: crtc_state->psr2_man_track_ctl = val; } @@ -1432,6 +1583,20 @@ static void clip_area_update(struct drm_rect *overlap_damage_area, overlap_damage_area->y2 = damage_area->y2; } +static void intel_psr2_sel_fetch_pipe_alignment(const struct intel_crtc_state *crtc_state, + struct drm_rect *pipe_clip) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); + const u16 y_alignment = crtc_state->su_y_granularity; + + pipe_clip->y1 -= pipe_clip->y1 % y_alignment; + if (pipe_clip->y2 % y_alignment) + pipe_clip->y2 = ((pipe_clip->y2 / y_alignment) + 1) * y_alignment; + + if (IS_ALDERLAKE_P(dev_priv) && crtc_state->dsc.compression_enable) + drm_warn(&dev_priv->drm, "Missing PSR2 sel fetch alignment with DSC\n"); +} + int intel_psr2_sel_fetch_update(struct intel_atomic_state *state, struct intel_crtc *crtc) { @@ -1540,10 +1705,7 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state, if (full_update) goto skip_sel_fetch_set_loop; - /* It must be aligned to 4 lines */ - pipe_clip.y1 -= pipe_clip.y1 % 4; - if (pipe_clip.y2 % 4) - pipe_clip.y2 = ((pipe_clip.y2 / 4) + 1) * 4; + intel_psr2_sel_fetch_pipe_alignment(crtc_state, &pipe_clip); /* * Now that we have the pipe damaged area check if it intersect with @@ -1564,6 +1726,7 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state, sel_fetch_area = &new_plane_state->psr2_sel_fetch_area; sel_fetch_area->y1 = inter.y1 - new_plane_state->uapi.dst.y1; sel_fetch_area->y2 = inter.y2 - new_plane_state->uapi.dst.y1; + crtc_state->update_planes |= BIT(plane->id); } skip_sel_fetch_set_loop: diff --git a/drivers/gpu/drm/i915/display/intel_quirks.c b/drivers/gpu/drm/i915/display/intel_quirks.c index 98dd787b00e3..8a52b7a16774 100644 --- a/drivers/gpu/drm/i915/display/intel_quirks.c +++ b/drivers/gpu/drm/i915/display/intel_quirks.c @@ -53,6 +53,12 @@ static void quirk_increase_ddi_disabled_time(struct drm_i915_private *i915) drm_info(&i915->drm, "Applying Increase DDI Disabled quirk\n"); } +static void quirk_no_pps_backlight_power_hook(struct drm_i915_private *i915) +{ + i915->quirks |= QUIRK_NO_PPS_BACKLIGHT_POWER_HOOK; + drm_info(&i915->drm, "Applying no pps backlight power quirk\n"); +} + struct intel_quirk { int device; int subsystem_vendor; @@ -72,6 +78,12 @@ static int intel_dmi_reverse_brightness(const struct dmi_system_id *id) return 1; } +static int intel_dmi_no_pps_backlight(const struct dmi_system_id *id) +{ + DRM_INFO("No pps backlight support on %s\n", id->ident); + return 1; +} + static const struct intel_dmi_quirk intel_dmi_quirks[] = { { .dmi_id_list = &(const struct dmi_system_id[]) { @@ -96,6 +108,28 @@ static const struct intel_dmi_quirk intel_dmi_quirks[] = { }, .hook = quirk_invert_brightness, }, + { + .dmi_id_list = &(const struct dmi_system_id[]) { + { + .callback = intel_dmi_no_pps_backlight, + .ident = "Google Lillipup sku524294", + .matches = {DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Google"), + DMI_EXACT_MATCH(DMI_BOARD_NAME, "Lindar"), + DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "sku524294"), + }, + }, + { + .callback = intel_dmi_no_pps_backlight, + .ident = "Google Lillipup sku524295", + .matches = {DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Google"), + DMI_EXACT_MATCH(DMI_BOARD_NAME, "Lindar"), + DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "sku524295"), + }, + }, + { } + }, + .hook = quirk_no_pps_backlight_power_hook, + }, }; static struct intel_quirk intel_quirks[] = { diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index e4f91d7a5c60..6cb27599ea03 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -1824,7 +1824,7 @@ static void intel_enable_sdvo(struct intel_atomic_state *state, struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_sdvo *intel_sdvo = to_sdvo(encoder); - struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->uapi.crtc); + struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); u32 temp; bool input1, input2; int i; @@ -1835,7 +1835,7 @@ static void intel_enable_sdvo(struct intel_atomic_state *state, intel_sdvo_write_sdvox(intel_sdvo, temp); for (i = 0; i < 2; i++) - intel_wait_for_vblank(dev_priv, intel_crtc->pipe); + intel_wait_for_vblank(dev_priv, crtc->pipe); success = intel_sdvo_get_trained_inputs(intel_sdvo, &input1, &input2); /* diff --git a/drivers/gpu/drm/i915/display/intel_snps_phy.c b/drivers/gpu/drm/i915/display/intel_snps_phy.c new file mode 100644 index 000000000000..18b52b64af95 --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_snps_phy.c @@ -0,0 +1,862 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include <linux/util_macros.h> + +#include "intel_de.h" +#include "intel_display_types.h" +#include "intel_snps_phy.h" + +/** + * DOC: Synopsis PHY support + * + * Synopsis PHYs are primarily programmed by looking up magic register values + * in tables rather than calculating the necessary values at runtime. + * + * Of special note is that the SNPS PHYs include a dedicated port PLL, known as + * an "MPLLB." The MPLLB replaces the shared DPLL functionality used on other + * platforms and must be programming directly during the modeset sequence + * since it is not handled by the shared DPLL framework as on other platforms. + */ + +void intel_snps_phy_wait_for_calibration(struct drm_i915_private *dev_priv) +{ + enum phy phy; + + for_each_phy_masked(phy, ~0) { + if (!intel_phy_is_snps(dev_priv, phy)) + continue; + + if (intel_de_wait_for_clear(dev_priv, ICL_PHY_MISC(phy), + DG2_PHY_DP_TX_ACK_MASK, 25)) + DRM_ERROR("SNPS PHY %c failed to calibrate after 25ms.\n", + phy); + } +} + +void intel_snps_phy_update_psr_power_state(struct drm_i915_private *dev_priv, + enum phy phy, bool enable) +{ + u32 val; + + if (!intel_phy_is_snps(dev_priv, phy)) + return; + + val = REG_FIELD_PREP(SNPS_PHY_TX_REQ_LN_DIS_PWR_STATE_PSR, + enable ? 2 : 3); + intel_uncore_rmw(&dev_priv->uncore, SNPS_PHY_TX_REQ(phy), + SNPS_PHY_TX_REQ_LN_DIS_PWR_STATE_PSR, val); +} + +static const u32 dg2_ddi_translations[] = { + /* VS 0, pre-emph 0 */ + REG_FIELD_PREP(SNPS_PHY_TX_EQ_MAIN, 26), + + /* VS 0, pre-emph 1 */ + REG_FIELD_PREP(SNPS_PHY_TX_EQ_MAIN, 33) | + REG_FIELD_PREP(SNPS_PHY_TX_EQ_POST, 6), + + /* VS 0, pre-emph 2 */ + REG_FIELD_PREP(SNPS_PHY_TX_EQ_MAIN, 38) | + REG_FIELD_PREP(SNPS_PHY_TX_EQ_POST, 12), + + /* VS 0, pre-emph 3 */ + REG_FIELD_PREP(SNPS_PHY_TX_EQ_MAIN, 43) | + REG_FIELD_PREP(SNPS_PHY_TX_EQ_POST, 19), + + /* VS 1, pre-emph 0 */ + REG_FIELD_PREP(SNPS_PHY_TX_EQ_MAIN, 39), + + /* VS 1, pre-emph 1 */ + REG_FIELD_PREP(SNPS_PHY_TX_EQ_MAIN, 44) | + REG_FIELD_PREP(SNPS_PHY_TX_EQ_POST, 8), + + /* VS 1, pre-emph 2 */ + REG_FIELD_PREP(SNPS_PHY_TX_EQ_MAIN, 47) | + REG_FIELD_PREP(SNPS_PHY_TX_EQ_POST, 15), + + /* VS 2, pre-emph 0 */ + REG_FIELD_PREP(SNPS_PHY_TX_EQ_MAIN, 52), + + /* VS 2, pre-emph 1 */ + REG_FIELD_PREP(SNPS_PHY_TX_EQ_MAIN, 51) | + REG_FIELD_PREP(SNPS_PHY_TX_EQ_POST, 10), + + /* VS 3, pre-emph 0 */ + REG_FIELD_PREP(SNPS_PHY_TX_EQ_MAIN, 62), +}; + +void intel_snps_phy_ddi_vswing_sequence(struct intel_encoder *encoder, + u32 level) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum phy phy = intel_port_to_phy(dev_priv, encoder->port); + int n_entries, ln; + + n_entries = ARRAY_SIZE(dg2_ddi_translations); + if (level >= n_entries) + level = n_entries - 1; + + for (ln = 0; ln < 4; ln++) + intel_de_write(dev_priv, SNPS_PHY_TX_EQ(ln, phy), + dg2_ddi_translations[level]); +} + +/* + * Basic DP link rates with 100 MHz reference clock. + */ + +static const struct intel_mpllb_state dg2_dp_rbr_100 = { + .clock = 162000, + .ref_control = + REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3), + .mpllb_cp = + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 4) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 20) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 65) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 127), + .mpllb_div = + REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 2), + .mpllb_div2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 226), + .mpllb_fracn1 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 5), + .mpllb_fracn2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 39321) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 3), +}; + +static const struct intel_mpllb_state dg2_dp_hbr1_100 = { + .clock = 270000, + .ref_control = + REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3), + .mpllb_cp = + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 4) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 20) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 65) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 127), + .mpllb_div = + REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 3), + .mpllb_div2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 184), + .mpllb_fracn1 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 1), +}; + +static const struct intel_mpllb_state dg2_dp_hbr2_100 = { + .clock = 540000, + .ref_control = + REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3), + .mpllb_cp = + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 4) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 20) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 65) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 127), + .mpllb_div = + REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 3), + .mpllb_div2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 184), + .mpllb_fracn1 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 1), +}; + +static const struct intel_mpllb_state dg2_dp_hbr3_100 = { + .clock = 810000, + .ref_control = + REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3), + .mpllb_cp = + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 4) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 19) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 65) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 127), + .mpllb_div = + REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2), + .mpllb_div2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 292), + .mpllb_fracn1 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 1), +}; + +static const struct intel_mpllb_state *dg2_dp_100_tables[] = { + &dg2_dp_rbr_100, + &dg2_dp_hbr1_100, + &dg2_dp_hbr2_100, + &dg2_dp_hbr3_100, + NULL, +}; + +/* + * Basic DP link rates with 38.4 MHz reference clock. + */ + +static const struct intel_mpllb_state dg2_dp_rbr_38_4 = { + .clock = 162000, + .ref_control = + REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 1), + .mpllb_cp = + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 5) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 25) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 65) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 127), + .mpllb_div = + REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 2), + .mpllb_div2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 304), + .mpllb_fracn1 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 1), + .mpllb_fracn2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 49152), +}; + +static const struct intel_mpllb_state dg2_dp_hbr1_38_4 = { + .clock = 270000, + .ref_control = + REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 1), + .mpllb_cp = + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 5) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 25) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 65) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 127), + .mpllb_div = + REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 3), + .mpllb_div2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 248), + .mpllb_fracn1 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 1), + .mpllb_fracn2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 40960), +}; + +static const struct intel_mpllb_state dg2_dp_hbr2_38_4 = { + .clock = 540000, + .ref_control = + REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 1), + .mpllb_cp = + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 5) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 25) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 65) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 127), + .mpllb_div = + REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 3), + .mpllb_div2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 248), + .mpllb_fracn1 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 1), + .mpllb_fracn2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 40960), +}; + +static const struct intel_mpllb_state dg2_dp_hbr3_38_4 = { + .clock = 810000, + .ref_control = + REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 1), + .mpllb_cp = + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 6) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 26) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 65) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 127), + .mpllb_div = + REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2), + .mpllb_div2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 388), + .mpllb_fracn1 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 1), + .mpllb_fracn2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 61440), +}; + +static const struct intel_mpllb_state *dg2_dp_38_4_tables[] = { + &dg2_dp_rbr_38_4, + &dg2_dp_hbr1_38_4, + &dg2_dp_hbr2_38_4, + &dg2_dp_hbr3_38_4, + NULL, +}; + +/* + * eDP link rates with 100 MHz reference clock. + */ + +static const struct intel_mpllb_state dg2_edp_r216 = { + .clock = 216000, + .ref_control = + REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3), + .mpllb_cp = + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 4) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 19) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 65) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 127), + .mpllb_div = + REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2), + .mpllb_div2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 312), + .mpllb_fracn1 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 5), + .mpllb_fracn2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 52428) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 4), + .mpllb_sscen = + REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_PEAK, 50961), + .mpllb_sscstep = + REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_STEPSIZE, 65752), +}; + +static const struct intel_mpllb_state dg2_edp_r243 = { + .clock = 243000, + .ref_control = + REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3), + .mpllb_cp = + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 4) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 20) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 65) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 127), + .mpllb_div = + REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2), + .mpllb_div2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 356), + .mpllb_fracn1 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 5), + .mpllb_fracn2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 26214) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 2), + .mpllb_sscen = + REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_PEAK, 57331), + .mpllb_sscstep = + REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_STEPSIZE, 73971), +}; + +static const struct intel_mpllb_state dg2_edp_r324 = { + .clock = 324000, + .ref_control = + REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3), + .mpllb_cp = + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 4) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 20) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 65) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 127), + .mpllb_div = + REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 2), + .mpllb_div2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 226), + .mpllb_fracn1 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 5), + .mpllb_fracn2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 39321) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 3), + .mpllb_sscen = + REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_PEAK, 38221), + .mpllb_sscstep = + REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_STEPSIZE, 49314), +}; + +static const struct intel_mpllb_state dg2_edp_r432 = { + .clock = 432000, + .ref_control = + REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3), + .mpllb_cp = + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 4) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 19) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 65) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 127), + .mpllb_div = + REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2), + .mpllb_div2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 312), + .mpllb_fracn1 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 5), + .mpllb_fracn2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 52428) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 4), + .mpllb_sscen = + REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_PEAK, 50961), + .mpllb_sscstep = + REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_STEPSIZE, 65752), +}; + +static const struct intel_mpllb_state *dg2_edp_tables[] = { + &dg2_dp_rbr_100, + &dg2_edp_r216, + &dg2_edp_r243, + &dg2_dp_hbr1_100, + &dg2_edp_r324, + &dg2_edp_r432, + &dg2_dp_hbr2_100, + &dg2_dp_hbr3_100, + NULL, +}; + +/* + * HDMI link rates with 100 MHz reference clock. + */ + +static const struct intel_mpllb_state dg2_hdmi_25_175 = { + .clock = 25175, + .ref_control = + REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3), + .mpllb_cp = + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 5) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 15) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124), + .mpllb_div = + REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 5) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2), + .mpllb_div2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 128) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1), + .mpllb_fracn1 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 143), + .mpllb_fracn2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 36663) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 71), + .mpllb_sscen = + REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1), +}; + +static const struct intel_mpllb_state dg2_hdmi_27_0 = { + .clock = 27000, + .ref_control = + REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3), + .mpllb_cp = + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 5) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 15) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124), + .mpllb_div = + REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 5) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2), + .mpllb_div2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 140) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1), + .mpllb_fracn1 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 5), + .mpllb_fracn2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 26214) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 2), + .mpllb_sscen = + REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1), +}; + +static const struct intel_mpllb_state dg2_hdmi_74_25 = { + .clock = 74250, + .ref_control = + REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3), + .mpllb_cp = + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 4) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 15) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124), + .mpllb_div = + REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 3) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 3), + .mpllb_div2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 86) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1), + .mpllb_fracn1 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 5), + .mpllb_fracn2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 26214) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 2), + .mpllb_sscen = + REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1), +}; + +static const struct intel_mpllb_state dg2_hdmi_148_5 = { + .clock = 148500, + .ref_control = + REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3), + .mpllb_cp = + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 4) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 15) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124), + .mpllb_div = + REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 3), + .mpllb_div2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 86) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1), + .mpllb_fracn1 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 5), + .mpllb_fracn2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 26214) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 2), + .mpllb_sscen = + REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1), +}; + +static const struct intel_mpllb_state dg2_hdmi_594 = { + .clock = 594000, + .ref_control = + REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3), + .mpllb_cp = + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 4) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 15) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124), + .mpllb_div = + REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 3), + .mpllb_div2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 86) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1), + .mpllb_fracn1 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 5), + .mpllb_fracn2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 26214) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 2), + .mpllb_sscen = + REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1), +}; + +static const struct intel_mpllb_state *dg2_hdmi_tables[] = { + &dg2_hdmi_25_175, + &dg2_hdmi_27_0, + &dg2_hdmi_74_25, + &dg2_hdmi_148_5, + &dg2_hdmi_594, + NULL, +}; + +static const struct intel_mpllb_state ** +intel_mpllb_tables_get(struct intel_crtc_state *crtc_state, + struct intel_encoder *encoder) +{ + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) { + return dg2_edp_tables; + } else if (intel_crtc_has_dp_encoder(crtc_state)) { + /* + * FIXME: Initially we're just enabling the "combo" outputs on + * port A-D. The MPLLB for those ports takes an input from the + * "Display Filter PLL" which always has an output frequency + * of 100 MHz, hence the use of the _100 tables below. + * + * Once we enable port TC1 it will either use the same 100 MHz + * "Display Filter PLL" (when strapped to support a native + * display connection) or different 38.4 MHz "Filter PLL" when + * strapped to support a USB connection, so we'll need to check + * that to determine which table to use. + */ + if (0) + return dg2_dp_38_4_tables; + else + return dg2_dp_100_tables; + } else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) { + return dg2_hdmi_tables; + } + + MISSING_CASE(encoder->type); + return NULL; +} + +int intel_mpllb_calc_state(struct intel_crtc_state *crtc_state, + struct intel_encoder *encoder) +{ + const struct intel_mpllb_state **tables; + int i; + + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) { + if (intel_snps_phy_check_hdmi_link_rate(crtc_state->port_clock) + != MODE_OK) { + /* + * FIXME: Can only support fixed HDMI frequencies + * until we have a proper algorithm under a valid + * license. + */ + DRM_DEBUG_KMS("Can't support HDMI link rate %d\n", + crtc_state->port_clock); + return -EINVAL; + } + } + + tables = intel_mpllb_tables_get(crtc_state, encoder); + if (!tables) + return -EINVAL; + + for (i = 0; tables[i]; i++) { + if (crtc_state->port_clock <= tables[i]->clock) { + crtc_state->mpllb_state = *tables[i]; + return 0; + } + } + + return -EINVAL; +} + +void intel_mpllb_enable(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + const struct intel_mpllb_state *pll_state = &crtc_state->mpllb_state; + enum phy phy = intel_port_to_phy(dev_priv, encoder->port); + i915_reg_t enable_reg = (phy <= PHY_D ? + DG2_PLL_ENABLE(phy) : MG_PLL_ENABLE(0)); + + /* + * 3. Software programs the following PLL registers for the desired + * frequency. + */ + intel_de_write(dev_priv, SNPS_PHY_MPLLB_CP(phy), pll_state->mpllb_cp); + intel_de_write(dev_priv, SNPS_PHY_MPLLB_DIV(phy), pll_state->mpllb_div); + intel_de_write(dev_priv, SNPS_PHY_MPLLB_DIV2(phy), pll_state->mpllb_div2); + intel_de_write(dev_priv, SNPS_PHY_MPLLB_SSCEN(phy), pll_state->mpllb_sscen); + intel_de_write(dev_priv, SNPS_PHY_MPLLB_SSCSTEP(phy), pll_state->mpllb_sscstep); + intel_de_write(dev_priv, SNPS_PHY_MPLLB_FRACN1(phy), pll_state->mpllb_fracn1); + intel_de_write(dev_priv, SNPS_PHY_MPLLB_FRACN2(phy), pll_state->mpllb_fracn2); + + /* + * 4. If the frequency will result in a change to the voltage + * requirement, follow the Display Voltage Frequency Switching - + * Sequence Before Frequency Change. + * + * We handle this step in bxt_set_cdclk(). + */ + + /* 5. Software sets DPLL_ENABLE [PLL Enable] to "1". */ + intel_uncore_rmw(&dev_priv->uncore, enable_reg, 0, PLL_ENABLE); + + /* + * 9. Software sets SNPS_PHY_MPLLB_DIV dp_mpllb_force_en to "1". This + * will keep the PLL running during the DDI lane programming and any + * typeC DP cable disconnect. Do not set the force before enabling the + * PLL because that will start the PLL before it has sampled the + * divider values. + */ + intel_de_write(dev_priv, SNPS_PHY_MPLLB_DIV(phy), + pll_state->mpllb_div | SNPS_PHY_MPLLB_FORCE_EN); + + /* + * 10. Software polls on register DPLL_ENABLE [PLL Lock] to confirm PLL + * is locked at new settings. This register bit is sampling PHY + * dp_mpllb_state interface signal. + */ + if (intel_de_wait_for_set(dev_priv, enable_reg, PLL_LOCK, 5)) + DRM_ERROR("Port %c PLL not locked\n", phy_name(phy)); + + /* + * 11. If the frequency will result in a change to the voltage + * requirement, follow the Display Voltage Frequency Switching - + * Sequence After Frequency Change. + * + * We handle this step in bxt_set_cdclk(). + */ +} + +void intel_mpllb_disable(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum phy phy = intel_port_to_phy(dev_priv, encoder->port); + i915_reg_t enable_reg = (phy <= PHY_D ? + DG2_PLL_ENABLE(phy) : MG_PLL_ENABLE(0)); + + /* + * 1. If the frequency will result in a change to the voltage + * requirement, follow the Display Voltage Frequency Switching - + * Sequence Before Frequency Change. + * + * We handle this step in bxt_set_cdclk(). + */ + + /* 2. Software programs DPLL_ENABLE [PLL Enable] to "0" */ + intel_uncore_rmw(&dev_priv->uncore, enable_reg, PLL_ENABLE, 0); + + /* + * 4. Software programs SNPS_PHY_MPLLB_DIV dp_mpllb_force_en to "0". + * This will allow the PLL to stop running. + */ + intel_uncore_rmw(&dev_priv->uncore, SNPS_PHY_MPLLB_DIV(phy), + SNPS_PHY_MPLLB_FORCE_EN, 0); + + /* + * 5. Software polls DPLL_ENABLE [PLL Lock] for PHY acknowledgment + * (dp_txX_ack) that the new transmitter setting request is completed. + */ + if (intel_de_wait_for_clear(dev_priv, enable_reg, PLL_LOCK, 5)) + DRM_ERROR("Port %c PLL not locked\n", phy_name(phy)); + + /* + * 6. If the frequency will result in a change to the voltage + * requirement, follow the Display Voltage Frequency Switching - + * Sequence After Frequency Change. + * + * We handle this step in bxt_set_cdclk(). + */ +} + +int intel_mpllb_calc_port_clock(struct intel_encoder *encoder, + const struct intel_mpllb_state *pll_state) +{ + unsigned int frac_quot = 0, frac_rem = 0, frac_den = 1; + unsigned int multiplier, tx_clk_div, refclk; + bool frac_en; + + if (0) + refclk = 38400; + else + refclk = 100000; + + refclk >>= REG_FIELD_GET(SNPS_PHY_MPLLB_REF_CLK_DIV, pll_state->mpllb_div2) - 1; + + frac_en = REG_FIELD_GET(SNPS_PHY_MPLLB_FRACN_EN, pll_state->mpllb_fracn1); + + if (frac_en) { + frac_quot = REG_FIELD_GET(SNPS_PHY_MPLLB_FRACN_QUOT, pll_state->mpllb_fracn2); + frac_rem = REG_FIELD_GET(SNPS_PHY_MPLLB_FRACN_REM, pll_state->mpllb_fracn2); + frac_den = REG_FIELD_GET(SNPS_PHY_MPLLB_FRACN_DEN, pll_state->mpllb_fracn1); + } + + multiplier = REG_FIELD_GET(SNPS_PHY_MPLLB_MULTIPLIER, pll_state->mpllb_div2) / 2 + 16; + + tx_clk_div = REG_FIELD_GET(SNPS_PHY_MPLLB_TX_CLK_DIV, pll_state->mpllb_div); + + return DIV_ROUND_CLOSEST_ULL(mul_u32_u32(refclk, (multiplier << 16) + frac_quot) + + DIV_ROUND_CLOSEST(refclk * frac_rem, frac_den), + 10 << (tx_clk_div + 16)); +} + +void intel_mpllb_readout_hw_state(struct intel_encoder *encoder, + struct intel_mpllb_state *pll_state) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum phy phy = intel_port_to_phy(dev_priv, encoder->port); + + pll_state->mpllb_cp = intel_de_read(dev_priv, SNPS_PHY_MPLLB_CP(phy)); + pll_state->mpllb_div = intel_de_read(dev_priv, SNPS_PHY_MPLLB_DIV(phy)); + pll_state->mpllb_div2 = intel_de_read(dev_priv, SNPS_PHY_MPLLB_DIV2(phy)); + pll_state->mpllb_sscen = intel_de_read(dev_priv, SNPS_PHY_MPLLB_SSCEN(phy)); + pll_state->mpllb_sscstep = intel_de_read(dev_priv, SNPS_PHY_MPLLB_SSCSTEP(phy)); + pll_state->mpllb_fracn1 = intel_de_read(dev_priv, SNPS_PHY_MPLLB_FRACN1(phy)); + pll_state->mpllb_fracn2 = intel_de_read(dev_priv, SNPS_PHY_MPLLB_FRACN2(phy)); + + /* + * REF_CONTROL is under firmware control and never programmed by the + * driver; we read it only for sanity checking purposes. The bspec + * only tells us the expected value for one field in this register, + * so we'll only read out those specific bits here. + */ + pll_state->ref_control = intel_de_read(dev_priv, SNPS_PHY_REF_CONTROL(phy)) & + SNPS_PHY_REF_CONTROL_REF_RANGE; + + /* + * MPLLB_DIV is programmed twice, once with the software-computed + * state, then again with the MPLLB_FORCE_EN bit added. Drop that + * extra bit during readout so that we return the actual expected + * software state. + */ + pll_state->mpllb_div &= ~SNPS_PHY_MPLLB_FORCE_EN; +} + +int intel_snps_phy_check_hdmi_link_rate(int clock) +{ + const struct intel_mpllb_state **tables = dg2_hdmi_tables; + int i; + + for (i = 0; tables[i]; i++) { + if (clock == tables[i]->clock) + return MODE_OK; + } + + return MODE_CLOCK_RANGE; +} diff --git a/drivers/gpu/drm/i915/display/intel_snps_phy.h b/drivers/gpu/drm/i915/display/intel_snps_phy.h new file mode 100644 index 000000000000..6261ff88ef5c --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_snps_phy.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_SNPS_PHY_H__ +#define __INTEL_SNPS_PHY_H__ + +#include <linux/types.h> + +struct drm_i915_private; +struct intel_encoder; +struct intel_crtc_state; +struct intel_mpllb_state; +enum phy; + +void intel_snps_phy_wait_for_calibration(struct drm_i915_private *dev_priv); +void intel_snps_phy_update_psr_power_state(struct drm_i915_private *dev_priv, + enum phy phy, bool enable); + +int intel_mpllb_calc_state(struct intel_crtc_state *crtc_state, + struct intel_encoder *encoder); +void intel_mpllb_enable(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state); +void intel_mpllb_disable(struct intel_encoder *encoder); +void intel_mpllb_readout_hw_state(struct intel_encoder *encoder, + struct intel_mpllb_state *pll_state); +int intel_mpllb_calc_port_clock(struct intel_encoder *encoder, + const struct intel_mpllb_state *pll_state); + +int intel_snps_phy_check_hdmi_link_rate(int clock); +void intel_snps_phy_ddi_vswing_sequence(struct intel_encoder *encoder, + u32 level); + +#endif /* __INTEL_SNPS_PHY_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c index 4ae9a7455b23..08116f41da26 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.c +++ b/drivers/gpu/drm/i915/display/intel_sprite.c @@ -1856,7 +1856,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, zpos = sprite + 1; drm_plane_create_zpos_immutable_property(&plane->base, zpos); - drm_plane_helper_add(&plane->base, &intel_plane_helper_funcs); + intel_plane_helper_add(plane); return plane; diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index c23c210a55f5..3ffece568ed9 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -556,7 +556,7 @@ intel_tc_port_get_target_mode(struct intel_digital_port *dig_port) } static void intel_tc_port_reset_mode(struct intel_digital_port *dig_port, - int required_lanes) + int required_lanes, bool force_disconnect) { struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); enum tc_port_mode old_tc_mode = dig_port->tc_mode; @@ -572,7 +572,8 @@ static void intel_tc_port_reset_mode(struct intel_digital_port *dig_port, } icl_tc_phy_disconnect(dig_port); - icl_tc_phy_connect(dig_port, required_lanes); + if (!force_disconnect) + icl_tc_phy_connect(dig_port, required_lanes); drm_dbg_kms(&i915->drm, "Port %s: TC port mode reset (%s -> %s)\n", dig_port->tc_port_name, @@ -662,7 +663,7 @@ bool intel_tc_port_connected(struct intel_encoder *encoder) } static void __intel_tc_port_lock(struct intel_digital_port *dig_port, - int required_lanes) + int required_lanes, bool force_disconnect) { struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); intel_wakeref_t wakeref; @@ -676,8 +677,9 @@ static void __intel_tc_port_lock(struct intel_digital_port *dig_port, tc_cold_wref = tc_cold_block(dig_port); - if (intel_tc_port_needs_reset(dig_port)) - intel_tc_port_reset_mode(dig_port, required_lanes); + if (force_disconnect || intel_tc_port_needs_reset(dig_port)) + intel_tc_port_reset_mode(dig_port, required_lanes, + force_disconnect); tc_cold_unblock(dig_port, tc_cold_wref); } @@ -688,7 +690,7 @@ static void __intel_tc_port_lock(struct intel_digital_port *dig_port, void intel_tc_port_lock(struct intel_digital_port *dig_port) { - __intel_tc_port_lock(dig_port, 1); + __intel_tc_port_lock(dig_port, 1, false); } void intel_tc_port_unlock(struct intel_digital_port *dig_port) @@ -702,6 +704,24 @@ void intel_tc_port_unlock(struct intel_digital_port *dig_port) wakeref); } +/** + * intel_tc_port_disconnect_phy: disconnect TypeC PHY from display port + * @dig_port: digital port + * + * Disconnect the given digital port from its TypeC PHY (handing back the + * control of the PHY to the TypeC subsystem). The only purpose of this + * function is to force the disconnect even with a TypeC display output still + * plugged to the TypeC connector, which is required by the TypeC firmwares + * during system suspend and shutdown. Otherwise - during the unplug event + * handling - the PHY ownership is released automatically by + * intel_tc_port_reset_mode(), when calling this function is not required. + */ +void intel_tc_port_disconnect_phy(struct intel_digital_port *dig_port) +{ + __intel_tc_port_lock(dig_port, 1, true); + intel_tc_port_unlock(dig_port); +} + bool intel_tc_port_ref_held(struct intel_digital_port *dig_port) { return mutex_is_locked(&dig_port->tc_lock) || @@ -711,7 +731,7 @@ bool intel_tc_port_ref_held(struct intel_digital_port *dig_port) void intel_tc_port_get_link(struct intel_digital_port *dig_port, int required_lanes) { - __intel_tc_port_lock(dig_port, required_lanes); + __intel_tc_port_lock(dig_port, required_lanes, false); dig_port->tc_link_refcount++; intel_tc_port_unlock(dig_port); } diff --git a/drivers/gpu/drm/i915/display/intel_tc.h b/drivers/gpu/drm/i915/display/intel_tc.h index 0eacbd76ec15..0c881f645e27 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.h +++ b/drivers/gpu/drm/i915/display/intel_tc.h @@ -13,6 +13,8 @@ struct intel_digital_port; struct intel_encoder; bool intel_tc_port_connected(struct intel_encoder *encoder); +void intel_tc_port_disconnect_phy(struct intel_digital_port *dig_port); + u32 intel_tc_port_get_lane_mask(struct intel_digital_port *dig_port); u32 intel_tc_port_get_pin_assignment_mask(struct intel_digital_port *dig_port); int intel_tc_port_fia_max_lane_count(struct intel_digital_port *dig_port); diff --git a/drivers/gpu/drm/i915/display/intel_tv.c b/drivers/gpu/drm/i915/display/intel_tv.c index aa52af7891f0..d02f09f7e750 100644 --- a/drivers/gpu/drm/i915/display/intel_tv.c +++ b/drivers/gpu/drm/i915/display/intel_tv.c @@ -1420,7 +1420,7 @@ static void intel_tv_pre_enable(struct intel_atomic_state *state, const struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->uapi.crtc); + struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); struct intel_tv *intel_tv = enc_to_tv(encoder); const struct intel_tv_connector_state *tv_conn_state = to_intel_tv_connector_state(conn_state); @@ -1466,7 +1466,7 @@ static void intel_tv_pre_enable(struct intel_atomic_state *state, break; } - tv_ctl |= TV_ENC_PIPE_SEL(intel_crtc->pipe); + tv_ctl |= TV_ENC_PIPE_SEL(crtc->pipe); switch (tv_mode->oversample) { case 8: @@ -1571,8 +1571,7 @@ static int intel_tv_detect_type(struct intel_tv *intel_tv, struct drm_connector *connector) { - struct drm_crtc *crtc = connector->state->crtc; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct intel_crtc *crtc = to_intel_crtc(connector->state->crtc); struct drm_device *dev = connector->dev; struct drm_i915_private *dev_priv = to_i915(dev); u32 tv_ctl, save_tv_ctl; @@ -1594,7 +1593,7 @@ intel_tv_detect_type(struct intel_tv *intel_tv, /* Poll for TV detection */ tv_ctl &= ~(TV_ENC_ENABLE | TV_ENC_PIPE_SEL_MASK | TV_TEST_MODE_MASK); tv_ctl |= TV_TEST_MODE_MONITOR_DETECT; - tv_ctl |= TV_ENC_PIPE_SEL(intel_crtc->pipe); + tv_ctl |= TV_ENC_PIPE_SEL(crtc->pipe); tv_dac &= ~(TVDAC_SENSE_MASK | DAC_A_MASK | DAC_B_MASK | DAC_C_MASK); tv_dac |= (TVDAC_STATE_CHG_EN | @@ -1619,7 +1618,7 @@ intel_tv_detect_type(struct intel_tv *intel_tv, intel_de_write(dev_priv, TV_DAC, tv_dac); intel_de_posting_read(dev_priv, TV_DAC); - intel_wait_for_vblank(dev_priv, intel_crtc->pipe); + intel_wait_for_vblank(dev_priv, crtc->pipe); type = -1; tv_dac = intel_de_read(dev_priv, TV_DAC); @@ -1652,7 +1651,7 @@ intel_tv_detect_type(struct intel_tv *intel_tv, intel_de_posting_read(dev_priv, TV_CTL); /* For unknown reasons the hw barfs if we don't do this vblank wait. */ - intel_wait_for_vblank(dev_priv, intel_crtc->pipe); + intel_wait_for_vblank(dev_priv, crtc->pipe); /* Restore interrupt config */ if (connector->polled & DRM_CONNECTOR_POLL_HPD) { diff --git a/drivers/gpu/drm/i915/display/intel_vbt_defs.h b/drivers/gpu/drm/i915/display/intel_vbt_defs.h index dbe24d7e7375..330077c2e588 100644 --- a/drivers/gpu/drm/i915/display/intel_vbt_defs.h +++ b/drivers/gpu/drm/i915/display/intel_vbt_defs.h @@ -456,7 +456,7 @@ struct child_device_config { u16 dp_gpio_pin_num; /* 195 */ u8 dp_iboost_level:4; /* 196 */ u8 hdmi_iboost_level:4; /* 196 */ - u8 dp_max_link_rate:3; /* 216/230 CNL+ */ + u8 dp_max_link_rate:3; /* 216/230 GLK+ */ u8 dp_max_link_rate_reserved:5; /* 216/230 */ } __packed; diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.c b/drivers/gpu/drm/i915/display/intel_vdsc.c index 85749370508c..df3286aa6999 100644 --- a/drivers/gpu/drm/i915/display/intel_vdsc.c +++ b/drivers/gpu/drm/i915/display/intel_vdsc.c @@ -348,7 +348,10 @@ bool intel_dsc_source_support(const struct intel_crtc_state *crtc_state) if (DISPLAY_VER(i915) >= 12) return true; - if ((DISPLAY_VER(i915) >= 11 || IS_CANNONLAKE(i915)) && (pipe != PIPE_A || (cpu_transcoder == TRANSCODER_EDP || cpu_transcoder == TRANSCODER_DSI_0 || cpu_transcoder == TRANSCODER_DSI_1))) + if (DISPLAY_VER(i915) >= 11 && + (pipe != PIPE_A || cpu_transcoder == TRANSCODER_EDP || + cpu_transcoder == TRANSCODER_DSI_0 || + cpu_transcoder == TRANSCODER_DSI_1)) return true; return false; diff --git a/drivers/gpu/drm/i915/display/intel_vga.c b/drivers/gpu/drm/i915/display/intel_vga.c index f002b82ba9c0..fa779f7ea415 100644 --- a/drivers/gpu/drm/i915/display/intel_vga.c +++ b/drivers/gpu/drm/i915/display/intel_vga.c @@ -29,6 +29,9 @@ void intel_vga_disable(struct drm_i915_private *dev_priv) i915_reg_t vga_reg = intel_vga_cntrl_reg(dev_priv); u8 sr1; + if (intel_de_read(dev_priv, vga_reg) & VGA_DISP_DISABLE) + return; + /* WaEnableVGAAccessThroughIOPort:ctg,elk,ilk,snb,ivb,vlv,hsw */ vga_get_uninterruptible(pdev, VGA_RSRC_LEGACY_IO); outb(SR01, VGA_SR_INDEX); @@ -121,9 +124,9 @@ intel_vga_set_state(struct drm_i915_private *i915, bool enable_decode) } static unsigned int -intel_vga_set_decode(void *cookie, bool enable_decode) +intel_vga_set_decode(struct pci_dev *pdev, bool enable_decode) { - struct drm_i915_private *i915 = cookie; + struct drm_i915_private *i915 = pdev_to_i915(pdev); intel_vga_set_state(i915, enable_decode); @@ -136,6 +139,7 @@ intel_vga_set_decode(void *cookie, bool enable_decode) int intel_vga_register(struct drm_i915_private *i915) { + struct pci_dev *pdev = to_pci_dev(i915->drm.dev); int ret; @@ -147,7 +151,7 @@ int intel_vga_register(struct drm_i915_private *i915) * then we do not take part in VGA arbitration and the * vga_client_register() fails with -ENODEV. */ - ret = vga_client_register(pdev, i915, NULL, intel_vga_set_decode); + ret = vga_client_register(pdev, intel_vga_set_decode); if (ret && ret != -ENODEV) return ret; @@ -158,5 +162,5 @@ void intel_vga_unregister(struct drm_i915_private *i915) { struct pci_dev *pdev = to_pci_dev(i915->drm.dev); - vga_client_register(pdev, NULL, NULL, NULL); + vga_client_unregister(pdev); } diff --git a/drivers/gpu/drm/i915/display/skl_scaler.c b/drivers/gpu/drm/i915/display/skl_scaler.c index 394b7bbf48d8..37eabeff8197 100644 --- a/drivers/gpu/drm/i915/display/skl_scaler.c +++ b/drivers/gpu/drm/i915/display/skl_scaler.c @@ -96,9 +96,8 @@ skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach, { struct intel_crtc_scaler_state *scaler_state = &crtc_state->scaler_state; - struct intel_crtc *intel_crtc = - to_intel_crtc(crtc_state->uapi.crtc); - struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; @@ -141,7 +140,7 @@ skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach, drm_dbg_kms(&dev_priv->drm, "scaler_user index %u.%u: " "Staged freeing scaler id %d scaler_users = 0x%x\n", - intel_crtc->pipe, scaler_user, *scaler_id, + crtc->pipe, scaler_user, *scaler_id, scaler_state->scaler_users); *scaler_id = -1; } @@ -167,7 +166,7 @@ skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach, drm_dbg_kms(&dev_priv->drm, "scaler_user index %u.%u: src %ux%u dst %ux%u " "size is out of scaler range\n", - intel_crtc->pipe, scaler_user, src_w, src_h, + crtc->pipe, scaler_user, src_w, src_h, dst_w, dst_h); return -EINVAL; } @@ -176,7 +175,7 @@ skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach, scaler_state->scaler_users |= (1 << scaler_user); drm_dbg_kms(&dev_priv->drm, "scaler_user index %u.%u: " "staged scaling request for %ux%u->%ux%u scaler_users = 0x%x\n", - intel_crtc->pipe, scaler_user, src_w, src_h, dst_w, dst_h, + crtc->pipe, scaler_user, src_w, src_h, dst_w, dst_h, scaler_state->scaler_users); return 0; @@ -295,12 +294,12 @@ int skl_update_scaler_plane(struct intel_crtc_state *crtc_state, return 0; } -static int cnl_coef_tap(int i) +static int glk_coef_tap(int i) { return i % 7; } -static u16 cnl_nearest_filter_coef(int t) +static u16 glk_nearest_filter_coef(int t) { return t == 3 ? 0x0800 : 0x3000; } @@ -342,29 +341,29 @@ static u16 cnl_nearest_filter_coef(int t) * */ -static void cnl_program_nearest_filter_coefs(struct drm_i915_private *dev_priv, +static void glk_program_nearest_filter_coefs(struct drm_i915_private *dev_priv, enum pipe pipe, int id, int set) { int i; - intel_de_write_fw(dev_priv, CNL_PS_COEF_INDEX_SET(pipe, id, set), + intel_de_write_fw(dev_priv, GLK_PS_COEF_INDEX_SET(pipe, id, set), PS_COEE_INDEX_AUTO_INC); for (i = 0; i < 17 * 7; i += 2) { u32 tmp; int t; - t = cnl_coef_tap(i); - tmp = cnl_nearest_filter_coef(t); + t = glk_coef_tap(i); + tmp = glk_nearest_filter_coef(t); - t = cnl_coef_tap(i + 1); - tmp |= cnl_nearest_filter_coef(t) << 16; + t = glk_coef_tap(i + 1); + tmp |= glk_nearest_filter_coef(t) << 16; - intel_de_write_fw(dev_priv, CNL_PS_COEF_DATA_SET(pipe, id, set), + intel_de_write_fw(dev_priv, GLK_PS_COEF_DATA_SET(pipe, id, set), tmp); } - intel_de_write_fw(dev_priv, CNL_PS_COEF_INDEX_SET(pipe, id, set), 0); + intel_de_write_fw(dev_priv, GLK_PS_COEF_INDEX_SET(pipe, id, set), 0); } static u32 skl_scaler_get_filter_select(enum drm_scaling_filter filter, int set) @@ -387,7 +386,7 @@ static void skl_scaler_setup_filter(struct drm_i915_private *dev_priv, enum pipe case DRM_SCALING_FILTER_DEFAULT: break; case DRM_SCALING_FILTER_NEAREST_NEIGHBOR: - cnl_program_nearest_filter_coefs(dev_priv, pipe, id, set); + glk_program_nearest_filter_coefs(dev_priv, pipe, id, set); break; default: MISSING_CASE(filter); @@ -515,17 +514,17 @@ skl_program_plane_scaler(struct intel_plane *plane, (crtc_w << 16) | crtc_h); } -static void skl_detach_scaler(struct intel_crtc *intel_crtc, int id) +static void skl_detach_scaler(struct intel_crtc *crtc, int id) { - struct drm_device *dev = intel_crtc->base.dev; + struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); unsigned long irqflags; spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); - intel_de_write_fw(dev_priv, SKL_PS_CTRL(intel_crtc->pipe, id), 0); - intel_de_write_fw(dev_priv, SKL_PS_WIN_POS(intel_crtc->pipe, id), 0); - intel_de_write_fw(dev_priv, SKL_PS_WIN_SZ(intel_crtc->pipe, id), 0); + intel_de_write_fw(dev_priv, SKL_PS_CTRL(crtc->pipe, id), 0); + intel_de_write_fw(dev_priv, SKL_PS_WIN_POS(crtc->pipe, id), 0); + intel_de_write_fw(dev_priv, SKL_PS_WIN_SZ(crtc->pipe, id), 0); spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } @@ -535,15 +534,15 @@ static void skl_detach_scaler(struct intel_crtc *intel_crtc, int id) */ void skl_detach_scalers(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); const struct intel_crtc_scaler_state *scaler_state = &crtc_state->scaler_state; int i; /* loop through and disable scalers that aren't in use */ - for (i = 0; i < intel_crtc->num_scalers; i++) { + for (i = 0; i < crtc->num_scalers; i++) { if (!scaler_state->scalers[i].in_use) - skl_detach_scaler(intel_crtc, i); + skl_detach_scaler(crtc, i); } } diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index 92a4fd508e92..724e7b04f3b6 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -835,7 +835,7 @@ static u32 skl_plane_ctl_rotate(unsigned int rotate) return 0; } -static u32 cnl_plane_ctl_flip(unsigned int reflect) +static u32 icl_plane_ctl_flip(unsigned int reflect) { switch (reflect) { case 0: @@ -917,8 +917,8 @@ static u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, plane_ctl |= skl_plane_ctl_tiling(fb->modifier); plane_ctl |= skl_plane_ctl_rotate(rotation & DRM_MODE_ROTATE_MASK); - if (DISPLAY_VER(dev_priv) >= 11 || IS_CANNONLAKE(dev_priv)) - plane_ctl |= cnl_plane_ctl_flip(rotation & + if (DISPLAY_VER(dev_priv) >= 11) + plane_ctl |= icl_plane_ctl_flip(rotation & DRM_MODE_REFLECT_MASK); if (key->flags & I915_SET_COLORKEY_DESTINATION) @@ -926,7 +926,7 @@ static u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, else if (key->flags & I915_SET_COLORKEY_SOURCE) plane_ctl |= PLANE_CTL_KEY_ENABLE_SOURCE; - /* Wa_22012358565:adlp */ + /* Wa_22012358565:adl-p */ if (DISPLAY_VER(dev_priv) == 13) plane_ctl |= adlp_plane_ctl_arb_slots(plane_state); @@ -1270,7 +1270,7 @@ static int skl_plane_check_dst_coordinates(const struct intel_crtc_state *crtc_s int pipe_src_w = crtc_state->pipe_src_w; /* - * Display WA #1175: cnl,glk + * Display WA #1175: glk * Planes other than the cursor may cause FIFO underflow and display * corruption if starting less than 4 pixels from the right edge of * the screen. @@ -1828,7 +1828,7 @@ static bool skl_plane_has_ccs(struct drm_i915_private *dev_priv, if (plane_id == PLANE_CURSOR) return false; - if (DISPLAY_VER(dev_priv) >= 11 || IS_CANNONLAKE(dev_priv)) + if (DISPLAY_VER(dev_priv) >= 11) return true; if (IS_GEMINILAKE(dev_priv)) @@ -1910,11 +1910,11 @@ static bool gen12_plane_supports_mc_ccs(struct drm_i915_private *dev_priv, { /* Wa_14010477008:tgl[a0..c0],rkl[all],dg1[all] */ if (IS_DG1(dev_priv) || IS_ROCKETLAKE(dev_priv) || - IS_TGL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_C0)) + IS_TGL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_D0)) return false; /* Wa_22011186057 */ - if (IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_A0)) + if (IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0)) return false; return plane_id < PLANE_SPRITE4; @@ -1938,7 +1938,7 @@ static bool gen12_plane_format_mod_supported(struct drm_plane *_plane, case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS: case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS_CC: /* Wa_22011186057 */ - if (IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_A0)) + if (IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0)) return false; break; default: @@ -1995,7 +1995,7 @@ static const u64 *gen12_get_plane_modifiers(struct drm_i915_private *dev_priv, enum plane_id plane_id) { /* Wa_22011186057 */ - if (IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_A0)) + if (IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0)) return adlp_step_a_plane_format_modifiers; else if (gen12_plane_supports_mc_ccs(dev_priv, plane_id)) return gen12_plane_format_modifiers_mc_ccs; @@ -2144,7 +2144,7 @@ skl_universal_plane_create(struct drm_i915_private *dev_priv, DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_180 | DRM_MODE_ROTATE_270; - if (DISPLAY_VER(dev_priv) >= 11 || IS_CANNONLAKE(dev_priv)) + if (DISPLAY_VER(dev_priv) >= 11) supported_rotations |= DRM_MODE_REFLECT_X; drm_plane_create_rotation_property(&plane->base, @@ -2174,12 +2174,12 @@ skl_universal_plane_create(struct drm_i915_private *dev_priv, if (DISPLAY_VER(dev_priv) >= 12) drm_plane_enable_fb_damage_clips(&plane->base); - if (DISPLAY_VER(dev_priv) >= 11 || IS_CANNONLAKE(dev_priv)) + if (DISPLAY_VER(dev_priv) >= 11) drm_plane_create_scaling_filter_property(&plane->base, BIT(DRM_SCALING_FILTER_DEFAULT) | BIT(DRM_SCALING_FILTER_NEAREST_NEIGHBOR)); - drm_plane_helper_add(&plane->base, &intel_plane_helper_funcs); + intel_plane_helper_add(plane); return plane; @@ -2295,7 +2295,7 @@ skl_get_initial_plane_config(struct intel_crtc *crtc, break; } - if ((DISPLAY_VER(dev_priv) >= 11 || IS_CANNONLAKE(dev_priv)) && val & PLANE_CTL_FLIP_HORIZONTAL) + if (DISPLAY_VER(dev_priv) >= 11 && val & PLANE_CTL_FLIP_HORIZONTAL) plane_config->rotation |= DRM_MODE_REFLECT_X; /* 90/270 degree rotation would require extra work */ diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index 084c9c43b2ed..0ee4ff341e25 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -780,10 +780,9 @@ static void intel_dsi_pre_enable(struct intel_atomic_state *state, const struct drm_connector_state *conn_state) { struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); - struct drm_crtc *crtc = pipe_config->uapi.crtc; - struct drm_i915_private *dev_priv = to_i915(crtc->dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - enum pipe pipe = intel_crtc->pipe; + struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; enum port port; u32 val; bool glk_cold_boot = false; @@ -1389,7 +1388,7 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder, struct drm_encoder *encoder = &intel_encoder->base; struct drm_device *dev = encoder->dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->uapi.crtc); + struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); struct intel_dsi *intel_dsi = enc_to_intel_dsi(to_intel_encoder(encoder)); const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; enum port port; @@ -1397,7 +1396,7 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder, u32 val, tmp; u16 mode_hdisplay; - drm_dbg_kms(&dev_priv->drm, "pipe %c\n", pipe_name(intel_crtc->pipe)); + drm_dbg_kms(&dev_priv->drm, "pipe %c\n", pipe_name(crtc->pipe)); mode_hdisplay = adjusted_mode->crtc_hdisplay; @@ -1424,7 +1423,7 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder, intel_de_write(dev_priv, MIPI_CTRL(port), tmp | READ_REQUEST_PRIORITY_HIGH); } else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { - enum pipe pipe = intel_crtc->pipe; + enum pipe pipe = crtc->pipe; tmp = intel_de_read(dev_priv, MIPI_CTRL(port)); tmp &= ~BXT_PIPE_SELECT_MASK; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c index daf9284ef1f5..f0435c6feb68 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c @@ -24,13 +24,11 @@ static void __do_clflush(struct drm_i915_gem_object *obj) i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); } -static int clflush_work(struct dma_fence_work *base) +static void clflush_work(struct dma_fence_work *base) { struct clflush *clflush = container_of(base, typeof(*clflush), base); __do_clflush(clflush->obj); - - return 0; } static void clflush_release(struct dma_fence_work *base) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c deleted file mode 100644 index 44821d94544f..000000000000 --- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c +++ /dev/null @@ -1,355 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2019 Intel Corporation - */ - -#include "i915_drv.h" -#include "gt/intel_context.h" -#include "gt/intel_engine_pm.h" -#include "i915_gem_client_blt.h" -#include "i915_gem_object_blt.h" - -struct i915_sleeve { - struct i915_vma *vma; - struct drm_i915_gem_object *obj; - struct sg_table *pages; - struct i915_page_sizes page_sizes; -}; - -static int vma_set_pages(struct i915_vma *vma) -{ - struct i915_sleeve *sleeve = vma->private; - - vma->pages = sleeve->pages; - vma->page_sizes = sleeve->page_sizes; - - return 0; -} - -static void vma_clear_pages(struct i915_vma *vma) -{ - GEM_BUG_ON(!vma->pages); - vma->pages = NULL; -} - -static void vma_bind(struct i915_address_space *vm, - struct i915_vm_pt_stash *stash, - struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 flags) -{ - vm->vma_ops.bind_vma(vm, stash, vma, cache_level, flags); -} - -static void vma_unbind(struct i915_address_space *vm, struct i915_vma *vma) -{ - vm->vma_ops.unbind_vma(vm, vma); -} - -static const struct i915_vma_ops proxy_vma_ops = { - .set_pages = vma_set_pages, - .clear_pages = vma_clear_pages, - .bind_vma = vma_bind, - .unbind_vma = vma_unbind, -}; - -static struct i915_sleeve *create_sleeve(struct i915_address_space *vm, - struct drm_i915_gem_object *obj, - struct sg_table *pages, - struct i915_page_sizes *page_sizes) -{ - struct i915_sleeve *sleeve; - struct i915_vma *vma; - int err; - - sleeve = kzalloc(sizeof(*sleeve), GFP_KERNEL); - if (!sleeve) - return ERR_PTR(-ENOMEM); - - vma = i915_vma_instance(obj, vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err_free; - } - - vma->private = sleeve; - vma->ops = &proxy_vma_ops; - - sleeve->vma = vma; - sleeve->pages = pages; - sleeve->page_sizes = *page_sizes; - - return sleeve; - -err_free: - kfree(sleeve); - return ERR_PTR(err); -} - -static void destroy_sleeve(struct i915_sleeve *sleeve) -{ - kfree(sleeve); -} - -struct clear_pages_work { - struct dma_fence dma; - struct dma_fence_cb cb; - struct i915_sw_fence wait; - struct work_struct work; - struct irq_work irq_work; - struct i915_sleeve *sleeve; - struct intel_context *ce; - u32 value; -}; - -static const char *clear_pages_work_driver_name(struct dma_fence *fence) -{ - return DRIVER_NAME; -} - -static const char *clear_pages_work_timeline_name(struct dma_fence *fence) -{ - return "clear"; -} - -static void clear_pages_work_release(struct dma_fence *fence) -{ - struct clear_pages_work *w = container_of(fence, typeof(*w), dma); - - destroy_sleeve(w->sleeve); - - i915_sw_fence_fini(&w->wait); - - BUILD_BUG_ON(offsetof(typeof(*w), dma)); - dma_fence_free(&w->dma); -} - -static const struct dma_fence_ops clear_pages_work_ops = { - .get_driver_name = clear_pages_work_driver_name, - .get_timeline_name = clear_pages_work_timeline_name, - .release = clear_pages_work_release, -}; - -static void clear_pages_signal_irq_worker(struct irq_work *work) -{ - struct clear_pages_work *w = container_of(work, typeof(*w), irq_work); - - dma_fence_signal(&w->dma); - dma_fence_put(&w->dma); -} - -static void clear_pages_dma_fence_cb(struct dma_fence *fence, - struct dma_fence_cb *cb) -{ - struct clear_pages_work *w = container_of(cb, typeof(*w), cb); - - if (fence->error) - dma_fence_set_error(&w->dma, fence->error); - - /* - * Push the signalling of the fence into yet another worker to avoid - * the nightmare locking around the fence spinlock. - */ - irq_work_queue(&w->irq_work); -} - -static void clear_pages_worker(struct work_struct *work) -{ - struct clear_pages_work *w = container_of(work, typeof(*w), work); - struct drm_i915_gem_object *obj = w->sleeve->vma->obj; - struct i915_vma *vma = w->sleeve->vma; - struct i915_gem_ww_ctx ww; - struct i915_request *rq; - struct i915_vma *batch; - int err = w->dma.error; - - if (unlikely(err)) - goto out_signal; - - if (obj->cache_dirty) { - if (i915_gem_object_has_struct_page(obj)) - drm_clflush_sg(w->sleeve->pages); - obj->cache_dirty = false; - } - obj->read_domains = I915_GEM_GPU_DOMAINS; - obj->write_domain = 0; - - i915_gem_ww_ctx_init(&ww, false); - intel_engine_pm_get(w->ce->engine); -retry: - err = intel_context_pin_ww(w->ce, &ww); - if (err) - goto out_signal; - - batch = intel_emit_vma_fill_blt(w->ce, vma, &ww, w->value); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto out_ctx; - } - - rq = i915_request_create(w->ce); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto out_batch; - } - - /* There's no way the fence has signalled */ - if (dma_fence_add_callback(&rq->fence, &w->cb, - clear_pages_dma_fence_cb)) - GEM_BUG_ON(1); - - err = intel_emit_vma_mark_active(batch, rq); - if (unlikely(err)) - goto out_request; - - /* - * w->dma is already exported via (vma|obj)->resv we need only - * keep track of the GPU activity within this vma/request, and - * propagate the signal from the request to w->dma. - */ - err = __i915_vma_move_to_active(vma, rq); - if (err) - goto out_request; - - if (rq->engine->emit_init_breadcrumb) { - err = rq->engine->emit_init_breadcrumb(rq); - if (unlikely(err)) - goto out_request; - } - - err = rq->engine->emit_bb_start(rq, - batch->node.start, batch->node.size, - 0); -out_request: - if (unlikely(err)) { - i915_request_set_error_once(rq, err); - err = 0; - } - - i915_request_add(rq); -out_batch: - intel_emit_vma_release(w->ce, batch); -out_ctx: - intel_context_unpin(w->ce); -out_signal: - if (err == -EDEADLK) { - err = i915_gem_ww_ctx_backoff(&ww); - if (!err) - goto retry; - } - i915_gem_ww_ctx_fini(&ww); - - i915_vma_unpin(w->sleeve->vma); - intel_engine_pm_put(w->ce->engine); - - if (unlikely(err)) { - dma_fence_set_error(&w->dma, err); - dma_fence_signal(&w->dma); - dma_fence_put(&w->dma); - } -} - -static int pin_wait_clear_pages_work(struct clear_pages_work *w, - struct intel_context *ce) -{ - struct i915_vma *vma = w->sleeve->vma; - struct i915_gem_ww_ctx ww; - int err; - - i915_gem_ww_ctx_init(&ww, false); -retry: - err = i915_gem_object_lock(vma->obj, &ww); - if (err) - goto out; - - err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER); - if (unlikely(err)) - goto out; - - err = i915_sw_fence_await_reservation(&w->wait, - vma->obj->base.resv, NULL, - true, 0, I915_FENCE_GFP); - if (err) - goto err_unpin_vma; - - dma_resv_add_excl_fence(vma->obj->base.resv, &w->dma); - -err_unpin_vma: - if (err) - i915_vma_unpin(vma); -out: - if (err == -EDEADLK) { - err = i915_gem_ww_ctx_backoff(&ww); - if (!err) - goto retry; - } - i915_gem_ww_ctx_fini(&ww); - return err; -} - -static int __i915_sw_fence_call -clear_pages_work_notify(struct i915_sw_fence *fence, - enum i915_sw_fence_notify state) -{ - struct clear_pages_work *w = container_of(fence, typeof(*w), wait); - - switch (state) { - case FENCE_COMPLETE: - schedule_work(&w->work); - break; - - case FENCE_FREE: - dma_fence_put(&w->dma); - break; - } - - return NOTIFY_DONE; -} - -static DEFINE_SPINLOCK(fence_lock); - -/* XXX: better name please */ -int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj, - struct intel_context *ce, - struct sg_table *pages, - struct i915_page_sizes *page_sizes, - u32 value) -{ - struct clear_pages_work *work; - struct i915_sleeve *sleeve; - int err; - - sleeve = create_sleeve(ce->vm, obj, pages, page_sizes); - if (IS_ERR(sleeve)) - return PTR_ERR(sleeve); - - work = kmalloc(sizeof(*work), GFP_KERNEL); - if (!work) { - destroy_sleeve(sleeve); - return -ENOMEM; - } - - work->value = value; - work->sleeve = sleeve; - work->ce = ce; - - INIT_WORK(&work->work, clear_pages_worker); - - init_irq_work(&work->irq_work, clear_pages_signal_irq_worker); - - dma_fence_init(&work->dma, &clear_pages_work_ops, &fence_lock, 0, 0); - i915_sw_fence_init(&work->wait, clear_pages_work_notify); - - err = pin_wait_clear_pages_work(work, ce); - if (err < 0) - dma_fence_set_error(&work->dma, err); - - dma_fence_get(&work->dma); - i915_sw_fence_commit(&work->wait); - - return err; -} - -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/i915_gem_client_blt.c" -#endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.h b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.h deleted file mode 100644 index 3dbd28c22ff5..000000000000 --- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.h +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2019 Intel Corporation - */ -#ifndef __I915_GEM_CLIENT_BLT_H__ -#define __I915_GEM_CLIENT_BLT_H__ - -#include <linux/types.h> - -struct drm_i915_gem_object; -struct i915_page_sizes; -struct intel_context; -struct sg_table; - -int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj, - struct intel_context *ce, - struct sg_table *pages, - struct i915_page_sizes *page_sizes, - u32 value); - -#endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 7720b8c22c81..cff72679ad7c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -67,35 +67,32 @@ #include <linux/log2.h> #include <linux/nospec.h> +#include <drm/drm_syncobj.h> + #include "gt/gen6_ppgtt.h" #include "gt/intel_context.h" #include "gt/intel_context_param.h" #include "gt/intel_engine_heartbeat.h" #include "gt/intel_engine_user.h" -#include "gt/intel_execlists_submission.h" /* virtual_engine */ #include "gt/intel_gpu_commands.h" #include "gt/intel_ring.h" #include "i915_gem_context.h" -#include "i915_globals.h" #include "i915_trace.h" #include "i915_user_extensions.h" #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1 -static struct i915_global_gem_context { - struct i915_global base; - struct kmem_cache *slab_luts; -} global; +static struct kmem_cache *slab_luts; struct i915_lut_handle *i915_lut_handle_alloc(void) { - return kmem_cache_alloc(global.slab_luts, GFP_KERNEL); + return kmem_cache_alloc(slab_luts, GFP_KERNEL); } void i915_lut_handle_free(struct i915_lut_handle *lut) { - return kmem_cache_free(global.slab_luts, lut); + return kmem_cache_free(slab_luts, lut); } static void lut_close(struct i915_gem_context *ctx) @@ -167,6 +164,577 @@ lookup_user_engine(struct i915_gem_context *ctx, return i915_gem_context_get_engine(ctx, idx); } +static int validate_priority(struct drm_i915_private *i915, + const struct drm_i915_gem_context_param *args) +{ + s64 priority = args->value; + + if (args->size) + return -EINVAL; + + if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY)) + return -ENODEV; + + if (priority > I915_CONTEXT_MAX_USER_PRIORITY || + priority < I915_CONTEXT_MIN_USER_PRIORITY) + return -EINVAL; + + if (priority > I915_CONTEXT_DEFAULT_PRIORITY && + !capable(CAP_SYS_NICE)) + return -EPERM; + + return 0; +} + +static void proto_context_close(struct i915_gem_proto_context *pc) +{ + int i; + + if (pc->vm) + i915_vm_put(pc->vm); + if (pc->user_engines) { + for (i = 0; i < pc->num_user_engines; i++) + kfree(pc->user_engines[i].siblings); + kfree(pc->user_engines); + } + kfree(pc); +} + +static int proto_context_set_persistence(struct drm_i915_private *i915, + struct i915_gem_proto_context *pc, + bool persist) +{ + if (persist) { + /* + * Only contexts that are short-lived [that will expire or be + * reset] are allowed to survive past termination. We require + * hangcheck to ensure that the persistent requests are healthy. + */ + if (!i915->params.enable_hangcheck) + return -EINVAL; + + pc->user_flags |= BIT(UCONTEXT_PERSISTENCE); + } else { + /* To cancel a context we use "preempt-to-idle" */ + if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION)) + return -ENODEV; + + /* + * If the cancel fails, we then need to reset, cleanly! + * + * If the per-engine reset fails, all hope is lost! We resort + * to a full GPU reset in that unlikely case, but realistically + * if the engine could not reset, the full reset does not fare + * much better. The damage has been done. + * + * However, if we cannot reset an engine by itself, we cannot + * cleanup a hanging persistent context without causing + * colateral damage, and we should not pretend we can by + * exposing the interface. + */ + if (!intel_has_reset_engine(&i915->gt)) + return -ENODEV; + + pc->user_flags &= ~BIT(UCONTEXT_PERSISTENCE); + } + + return 0; +} + +static struct i915_gem_proto_context * +proto_context_create(struct drm_i915_private *i915, unsigned int flags) +{ + struct i915_gem_proto_context *pc, *err; + + pc = kzalloc(sizeof(*pc), GFP_KERNEL); + if (!pc) + return ERR_PTR(-ENOMEM); + + pc->num_user_engines = -1; + pc->user_engines = NULL; + pc->user_flags = BIT(UCONTEXT_BANNABLE) | + BIT(UCONTEXT_RECOVERABLE); + if (i915->params.enable_hangcheck) + pc->user_flags |= BIT(UCONTEXT_PERSISTENCE); + pc->sched.priority = I915_PRIORITY_NORMAL; + + if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) { + if (!HAS_EXECLISTS(i915)) { + err = ERR_PTR(-EINVAL); + goto proto_close; + } + pc->single_timeline = true; + } + + return pc; + +proto_close: + proto_context_close(pc); + return err; +} + +static int proto_context_register_locked(struct drm_i915_file_private *fpriv, + struct i915_gem_proto_context *pc, + u32 *id) +{ + int ret; + void *old; + + lockdep_assert_held(&fpriv->proto_context_lock); + + ret = xa_alloc(&fpriv->context_xa, id, NULL, xa_limit_32b, GFP_KERNEL); + if (ret) + return ret; + + old = xa_store(&fpriv->proto_context_xa, *id, pc, GFP_KERNEL); + if (xa_is_err(old)) { + xa_erase(&fpriv->context_xa, *id); + return xa_err(old); + } + WARN_ON(old); + + return 0; +} + +static int proto_context_register(struct drm_i915_file_private *fpriv, + struct i915_gem_proto_context *pc, + u32 *id) +{ + int ret; + + mutex_lock(&fpriv->proto_context_lock); + ret = proto_context_register_locked(fpriv, pc, id); + mutex_unlock(&fpriv->proto_context_lock); + + return ret; +} + +static int set_proto_ctx_vm(struct drm_i915_file_private *fpriv, + struct i915_gem_proto_context *pc, + const struct drm_i915_gem_context_param *args) +{ + struct drm_i915_private *i915 = fpriv->dev_priv; + struct i915_address_space *vm; + + if (args->size) + return -EINVAL; + + if (!HAS_FULL_PPGTT(i915)) + return -ENODEV; + + if (upper_32_bits(args->value)) + return -ENOENT; + + vm = i915_gem_vm_lookup(fpriv, args->value); + if (!vm) + return -ENOENT; + + if (pc->vm) + i915_vm_put(pc->vm); + pc->vm = vm; + + return 0; +} + +struct set_proto_ctx_engines { + struct drm_i915_private *i915; + unsigned num_engines; + struct i915_gem_proto_engine *engines; +}; + +static int +set_proto_ctx_engines_balance(struct i915_user_extension __user *base, + void *data) +{ + struct i915_context_engines_load_balance __user *ext = + container_of_user(base, typeof(*ext), base); + const struct set_proto_ctx_engines *set = data; + struct drm_i915_private *i915 = set->i915; + struct intel_engine_cs **siblings; + u16 num_siblings, idx; + unsigned int n; + int err; + + if (!HAS_EXECLISTS(i915)) + return -ENODEV; + + if (get_user(idx, &ext->engine_index)) + return -EFAULT; + + if (idx >= set->num_engines) { + drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n", + idx, set->num_engines); + return -EINVAL; + } + + idx = array_index_nospec(idx, set->num_engines); + if (set->engines[idx].type != I915_GEM_ENGINE_TYPE_INVALID) { + drm_dbg(&i915->drm, + "Invalid placement[%d], already occupied\n", idx); + return -EEXIST; + } + + if (get_user(num_siblings, &ext->num_siblings)) + return -EFAULT; + + err = check_user_mbz(&ext->flags); + if (err) + return err; + + err = check_user_mbz(&ext->mbz64); + if (err) + return err; + + if (num_siblings == 0) + return 0; + + siblings = kmalloc_array(num_siblings, sizeof(*siblings), GFP_KERNEL); + if (!siblings) + return -ENOMEM; + + for (n = 0; n < num_siblings; n++) { + struct i915_engine_class_instance ci; + + if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) { + err = -EFAULT; + goto err_siblings; + } + + siblings[n] = intel_engine_lookup_user(i915, + ci.engine_class, + ci.engine_instance); + if (!siblings[n]) { + drm_dbg(&i915->drm, + "Invalid sibling[%d]: { class:%d, inst:%d }\n", + n, ci.engine_class, ci.engine_instance); + err = -EINVAL; + goto err_siblings; + } + } + + if (num_siblings == 1) { + set->engines[idx].type = I915_GEM_ENGINE_TYPE_PHYSICAL; + set->engines[idx].engine = siblings[0]; + kfree(siblings); + } else { + set->engines[idx].type = I915_GEM_ENGINE_TYPE_BALANCED; + set->engines[idx].num_siblings = num_siblings; + set->engines[idx].siblings = siblings; + } + + return 0; + +err_siblings: + kfree(siblings); + + return err; +} + +static int +set_proto_ctx_engines_bond(struct i915_user_extension __user *base, void *data) +{ + struct i915_context_engines_bond __user *ext = + container_of_user(base, typeof(*ext), base); + const struct set_proto_ctx_engines *set = data; + struct drm_i915_private *i915 = set->i915; + struct i915_engine_class_instance ci; + struct intel_engine_cs *master; + u16 idx, num_bonds; + int err, n; + + if (get_user(idx, &ext->virtual_index)) + return -EFAULT; + + if (idx >= set->num_engines) { + drm_dbg(&i915->drm, + "Invalid index for virtual engine: %d >= %d\n", + idx, set->num_engines); + return -EINVAL; + } + + idx = array_index_nospec(idx, set->num_engines); + if (set->engines[idx].type == I915_GEM_ENGINE_TYPE_INVALID) { + drm_dbg(&i915->drm, "Invalid engine at %d\n", idx); + return -EINVAL; + } + + if (set->engines[idx].type != I915_GEM_ENGINE_TYPE_PHYSICAL) { + drm_dbg(&i915->drm, + "Bonding with virtual engines not allowed\n"); + return -EINVAL; + } + + err = check_user_mbz(&ext->flags); + if (err) + return err; + + for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) { + err = check_user_mbz(&ext->mbz64[n]); + if (err) + return err; + } + + if (copy_from_user(&ci, &ext->master, sizeof(ci))) + return -EFAULT; + + master = intel_engine_lookup_user(i915, + ci.engine_class, + ci.engine_instance); + if (!master) { + drm_dbg(&i915->drm, + "Unrecognised master engine: { class:%u, instance:%u }\n", + ci.engine_class, ci.engine_instance); + return -EINVAL; + } + + if (intel_engine_uses_guc(master)) { + DRM_DEBUG("bonding extension not supported with GuC submission"); + return -ENODEV; + } + + if (get_user(num_bonds, &ext->num_bonds)) + return -EFAULT; + + for (n = 0; n < num_bonds; n++) { + struct intel_engine_cs *bond; + + if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) + return -EFAULT; + + bond = intel_engine_lookup_user(i915, + ci.engine_class, + ci.engine_instance); + if (!bond) { + drm_dbg(&i915->drm, + "Unrecognised engine[%d] for bonding: { class:%d, instance: %d }\n", + n, ci.engine_class, ci.engine_instance); + return -EINVAL; + } + } + + return 0; +} + +static const i915_user_extension_fn set_proto_ctx_engines_extensions[] = { + [I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_proto_ctx_engines_balance, + [I915_CONTEXT_ENGINES_EXT_BOND] = set_proto_ctx_engines_bond, +}; + +static int set_proto_ctx_engines(struct drm_i915_file_private *fpriv, + struct i915_gem_proto_context *pc, + const struct drm_i915_gem_context_param *args) +{ + struct drm_i915_private *i915 = fpriv->dev_priv; + struct set_proto_ctx_engines set = { .i915 = i915 }; + struct i915_context_param_engines __user *user = + u64_to_user_ptr(args->value); + unsigned int n; + u64 extensions; + int err; + + if (pc->num_user_engines >= 0) { + drm_dbg(&i915->drm, "Cannot set engines twice"); + return -EINVAL; + } + + if (args->size < sizeof(*user) || + !IS_ALIGNED(args->size - sizeof(*user), sizeof(*user->engines))) { + drm_dbg(&i915->drm, "Invalid size for engine array: %d\n", + args->size); + return -EINVAL; + } + + set.num_engines = (args->size - sizeof(*user)) / sizeof(*user->engines); + /* RING_MASK has no shift so we can use it directly here */ + if (set.num_engines > I915_EXEC_RING_MASK + 1) + return -EINVAL; + + set.engines = kmalloc_array(set.num_engines, sizeof(*set.engines), GFP_KERNEL); + if (!set.engines) + return -ENOMEM; + + for (n = 0; n < set.num_engines; n++) { + struct i915_engine_class_instance ci; + struct intel_engine_cs *engine; + + if (copy_from_user(&ci, &user->engines[n], sizeof(ci))) { + kfree(set.engines); + return -EFAULT; + } + + memset(&set.engines[n], 0, sizeof(set.engines[n])); + + if (ci.engine_class == (u16)I915_ENGINE_CLASS_INVALID && + ci.engine_instance == (u16)I915_ENGINE_CLASS_INVALID_NONE) + continue; + + engine = intel_engine_lookup_user(i915, + ci.engine_class, + ci.engine_instance); + if (!engine) { + drm_dbg(&i915->drm, + "Invalid engine[%d]: { class:%d, instance:%d }\n", + n, ci.engine_class, ci.engine_instance); + kfree(set.engines); + return -ENOENT; + } + + set.engines[n].type = I915_GEM_ENGINE_TYPE_PHYSICAL; + set.engines[n].engine = engine; + } + + err = -EFAULT; + if (!get_user(extensions, &user->extensions)) + err = i915_user_extensions(u64_to_user_ptr(extensions), + set_proto_ctx_engines_extensions, + ARRAY_SIZE(set_proto_ctx_engines_extensions), + &set); + if (err) { + kfree(set.engines); + return err; + } + + pc->num_user_engines = set.num_engines; + pc->user_engines = set.engines; + + return 0; +} + +static int set_proto_ctx_sseu(struct drm_i915_file_private *fpriv, + struct i915_gem_proto_context *pc, + struct drm_i915_gem_context_param *args) +{ + struct drm_i915_private *i915 = fpriv->dev_priv; + struct drm_i915_gem_context_param_sseu user_sseu; + struct intel_sseu *sseu; + int ret; + + if (args->size < sizeof(user_sseu)) + return -EINVAL; + + if (GRAPHICS_VER(i915) != 11) + return -ENODEV; + + if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value), + sizeof(user_sseu))) + return -EFAULT; + + if (user_sseu.rsvd) + return -EINVAL; + + if (user_sseu.flags & ~(I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX)) + return -EINVAL; + + if (!!(user_sseu.flags & I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX) != (pc->num_user_engines >= 0)) + return -EINVAL; + + if (pc->num_user_engines >= 0) { + int idx = user_sseu.engine.engine_instance; + struct i915_gem_proto_engine *pe; + + if (idx >= pc->num_user_engines) + return -EINVAL; + + pe = &pc->user_engines[idx]; + + /* Only render engine supports RPCS configuration. */ + if (pe->engine->class != RENDER_CLASS) + return -EINVAL; + + sseu = &pe->sseu; + } else { + /* Only render engine supports RPCS configuration. */ + if (user_sseu.engine.engine_class != I915_ENGINE_CLASS_RENDER) + return -EINVAL; + + /* There is only one render engine */ + if (user_sseu.engine.engine_instance != 0) + return -EINVAL; + + sseu = &pc->legacy_rcs_sseu; + } + + ret = i915_gem_user_to_context_sseu(&i915->gt, &user_sseu, sseu); + if (ret) + return ret; + + args->size = sizeof(user_sseu); + + return 0; +} + +static int set_proto_ctx_param(struct drm_i915_file_private *fpriv, + struct i915_gem_proto_context *pc, + struct drm_i915_gem_context_param *args) +{ + int ret = 0; + + switch (args->param) { + case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE: + if (args->size) + ret = -EINVAL; + else if (args->value) + pc->user_flags |= BIT(UCONTEXT_NO_ERROR_CAPTURE); + else + pc->user_flags &= ~BIT(UCONTEXT_NO_ERROR_CAPTURE); + break; + + case I915_CONTEXT_PARAM_BANNABLE: + if (args->size) + ret = -EINVAL; + else if (!capable(CAP_SYS_ADMIN) && !args->value) + ret = -EPERM; + else if (args->value) + pc->user_flags |= BIT(UCONTEXT_BANNABLE); + else + pc->user_flags &= ~BIT(UCONTEXT_BANNABLE); + break; + + case I915_CONTEXT_PARAM_RECOVERABLE: + if (args->size) + ret = -EINVAL; + else if (args->value) + pc->user_flags |= BIT(UCONTEXT_RECOVERABLE); + else + pc->user_flags &= ~BIT(UCONTEXT_RECOVERABLE); + break; + + case I915_CONTEXT_PARAM_PRIORITY: + ret = validate_priority(fpriv->dev_priv, args); + if (!ret) + pc->sched.priority = args->value; + break; + + case I915_CONTEXT_PARAM_SSEU: + ret = set_proto_ctx_sseu(fpriv, pc, args); + break; + + case I915_CONTEXT_PARAM_VM: + ret = set_proto_ctx_vm(fpriv, pc, args); + break; + + case I915_CONTEXT_PARAM_ENGINES: + ret = set_proto_ctx_engines(fpriv, pc, args); + break; + + case I915_CONTEXT_PARAM_PERSISTENCE: + if (args->size) + ret = -EINVAL; + ret = proto_context_set_persistence(fpriv->dev_priv, pc, + args->value); + break; + + case I915_CONTEXT_PARAM_NO_ZEROMAP: + case I915_CONTEXT_PARAM_BAN_PERIOD: + case I915_CONTEXT_PARAM_RINGSIZE: + default: + ret = -EINVAL; + break; + } + + return ret; +} + static struct i915_address_space * context_get_vm_rcu(struct i915_gem_context *ctx) { @@ -205,14 +773,16 @@ context_get_vm_rcu(struct i915_gem_context *ctx) } while (1); } -static void intel_context_set_gem(struct intel_context *ce, - struct i915_gem_context *ctx) +static int intel_context_set_gem(struct intel_context *ce, + struct i915_gem_context *ctx, + struct intel_sseu sseu) { + int ret = 0; + GEM_BUG_ON(rcu_access_pointer(ce->gem_context)); RCU_INIT_POINTER(ce->gem_context, ctx); - if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) - ce->ring = __intel_context_ring_size(SZ_16K); + ce->ring_size = SZ_16K; if (rcu_access_pointer(ctx->vm)) { struct i915_address_space *vm; @@ -225,15 +795,23 @@ static void intel_context_set_gem(struct intel_context *ce, ce->vm = vm; } - GEM_BUG_ON(ce->timeline); - if (ctx->timeline) - ce->timeline = intel_timeline_get(ctx->timeline); - if (ctx->sched.priority >= I915_PRIORITY_NORMAL && - intel_engine_has_timeslices(ce->engine)) + intel_engine_has_timeslices(ce->engine) && + intel_engine_has_semaphores(ce->engine)) __set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags); - intel_context_set_watchdog_us(ce, ctx->watchdog.timeout_us); + if (IS_ACTIVE(CONFIG_DRM_I915_REQUEST_TIMEOUT) && + ctx->i915->params.request_timeout_ms) { + unsigned int timeout_ms = ctx->i915->params.request_timeout_ms; + + intel_context_set_watchdog_us(ce, (u64)timeout_ms * 1000); + } + + /* A valid SSEU has no zero fields */ + if (sseu.slice_mask && !WARN_ON(ce->engine->class != RENDER_CLASS)) + ret = intel_context_reconfigure_sseu(ce, sseu); + + return ret; } static void __free_engines(struct i915_gem_engines *e, unsigned int count) @@ -301,11 +879,12 @@ static struct i915_gem_engines *alloc_engines(unsigned int count) return e; } -static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) +static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx, + struct intel_sseu rcs_sseu) { const struct intel_gt *gt = &ctx->i915->gt; struct intel_engine_cs *engine; - struct i915_gem_engines *e; + struct i915_gem_engines *e, *err; enum intel_engine_id id; e = alloc_engines(I915_NUM_ENGINES); @@ -314,6 +893,8 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) for_each_engine(engine, gt, id) { struct intel_context *ce; + struct intel_sseu sseu = {}; + int ret; if (engine->legacy_idx == INVALID_ENGINE) continue; @@ -323,18 +904,79 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) ce = intel_context_create(engine); if (IS_ERR(ce)) { - __free_engines(e, e->num_engines + 1); - return ERR_CAST(ce); + err = ERR_CAST(ce); + goto free_engines; } - intel_context_set_gem(ce, ctx); - e->engines[engine->legacy_idx] = ce; - e->num_engines = max(e->num_engines, engine->legacy_idx); + e->num_engines = max(e->num_engines, engine->legacy_idx + 1); + + if (engine->class == RENDER_CLASS) + sseu = rcs_sseu; + + ret = intel_context_set_gem(ce, ctx, sseu); + if (ret) { + err = ERR_PTR(ret); + goto free_engines; + } + + } + + return e; + +free_engines: + free_engines(e); + return err; +} + +static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx, + unsigned int num_engines, + struct i915_gem_proto_engine *pe) +{ + struct i915_gem_engines *e, *err; + unsigned int n; + + e = alloc_engines(num_engines); + for (n = 0; n < num_engines; n++) { + struct intel_context *ce; + int ret; + + switch (pe[n].type) { + case I915_GEM_ENGINE_TYPE_PHYSICAL: + ce = intel_context_create(pe[n].engine); + break; + + case I915_GEM_ENGINE_TYPE_BALANCED: + ce = intel_engine_create_virtual(pe[n].siblings, + pe[n].num_siblings); + break; + + case I915_GEM_ENGINE_TYPE_INVALID: + default: + GEM_WARN_ON(pe[n].type != I915_GEM_ENGINE_TYPE_INVALID); + continue; + } + + if (IS_ERR(ce)) { + err = ERR_CAST(ce); + goto free_engines; + } + + e->engines[n] = ce; + + ret = intel_context_set_gem(ce, ctx, pe->sseu); + if (ret) { + err = ERR_PTR(ret); + goto free_engines; + } } - e->num_engines++; + e->num_engines = num_engines; return e; + +free_engines: + free_engines(e); + return err; } void i915_gem_context_release(struct kref *ref) @@ -347,9 +989,6 @@ void i915_gem_context_release(struct kref *ref) mutex_destroy(&ctx->engines_mutex); mutex_destroy(&ctx->lut_mutex); - if (ctx->timeline) - intel_timeline_put(ctx->timeline); - put_pid(ctx->pid); mutex_destroy(&ctx->mutex); @@ -441,7 +1080,7 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban) for_each_gem_engine(ce, engines, it) { struct intel_engine_cs *engine; - if (ban && intel_context_set_banned(ce)) + if (ban && intel_context_ban(ce, NULL)) continue; /* @@ -566,6 +1205,9 @@ static void context_close(struct i915_gem_context *ctx) if (vm) i915_vm_close(vm); + if (ctx->syncobj) + drm_syncobj_put(ctx->syncobj); + ctx->file_priv = ERR_PTR(-EBADF); /* @@ -635,57 +1277,6 @@ static int __context_set_persistence(struct i915_gem_context *ctx, bool state) return 0; } -static struct i915_gem_context * -__create_context(struct drm_i915_private *i915) -{ - struct i915_gem_context *ctx; - struct i915_gem_engines *e; - int err; - int i; - - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); - if (!ctx) - return ERR_PTR(-ENOMEM); - - kref_init(&ctx->ref); - ctx->i915 = i915; - ctx->sched.priority = I915_PRIORITY_NORMAL; - mutex_init(&ctx->mutex); - INIT_LIST_HEAD(&ctx->link); - - spin_lock_init(&ctx->stale.lock); - INIT_LIST_HEAD(&ctx->stale.engines); - - mutex_init(&ctx->engines_mutex); - e = default_engines(ctx); - if (IS_ERR(e)) { - err = PTR_ERR(e); - goto err_free; - } - RCU_INIT_POINTER(ctx->engines, e); - - INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); - mutex_init(&ctx->lut_mutex); - - /* NB: Mark all slices as needing a remap so that when the context first - * loads it will restore whatever remap state already exists. If there - * is no remap info, it will be a NOP. */ - ctx->remap_slice = ALL_L3_SLICES(i915); - - i915_gem_context_set_bannable(ctx); - i915_gem_context_set_recoverable(ctx); - __context_set_persistence(ctx, true /* cgroup hook? */); - - for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) - ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; - - return ctx; - -err_free: - kfree(ctx); - return ERR_PTR(err); -} - static inline struct i915_gem_engines * __context_engines_await(const struct i915_gem_context *ctx, bool *user_engines) @@ -714,168 +1305,112 @@ __context_engines_await(const struct i915_gem_context *ctx, return engines; } -static int +static void context_apply_all(struct i915_gem_context *ctx, - int (*fn)(struct intel_context *ce, void *data), + void (*fn)(struct intel_context *ce, void *data), void *data) { struct i915_gem_engines_iter it; struct i915_gem_engines *e; struct intel_context *ce; - int err = 0; e = __context_engines_await(ctx, NULL); - for_each_gem_engine(ce, e, it) { - err = fn(ce, data); - if (err) - break; - } + for_each_gem_engine(ce, e, it) + fn(ce, data); i915_sw_fence_complete(&e->fence); - - return err; -} - -static int __apply_ppgtt(struct intel_context *ce, void *vm) -{ - i915_vm_put(ce->vm); - ce->vm = i915_vm_get(vm); - return 0; -} - -static struct i915_address_space * -__set_ppgtt(struct i915_gem_context *ctx, struct i915_address_space *vm) -{ - struct i915_address_space *old; - - old = rcu_replace_pointer(ctx->vm, - i915_vm_open(vm), - lockdep_is_held(&ctx->mutex)); - GEM_BUG_ON(old && i915_vm_is_4lvl(vm) != i915_vm_is_4lvl(old)); - - context_apply_all(ctx, __apply_ppgtt, vm); - - return old; -} - -static void __assign_ppgtt(struct i915_gem_context *ctx, - struct i915_address_space *vm) -{ - if (vm == rcu_access_pointer(ctx->vm)) - return; - - vm = __set_ppgtt(ctx, vm); - if (vm) - i915_vm_close(vm); -} - -static void __set_timeline(struct intel_timeline **dst, - struct intel_timeline *src) -{ - struct intel_timeline *old = *dst; - - *dst = src ? intel_timeline_get(src) : NULL; - - if (old) - intel_timeline_put(old); -} - -static int __apply_timeline(struct intel_context *ce, void *timeline) -{ - __set_timeline(&ce->timeline, timeline); - return 0; -} - -static void __assign_timeline(struct i915_gem_context *ctx, - struct intel_timeline *timeline) -{ - __set_timeline(&ctx->timeline, timeline); - context_apply_all(ctx, __apply_timeline, timeline); -} - -static int __apply_watchdog(struct intel_context *ce, void *timeout_us) -{ - return intel_context_set_watchdog_us(ce, (uintptr_t)timeout_us); -} - -static int -__set_watchdog(struct i915_gem_context *ctx, unsigned long timeout_us) -{ - int ret; - - ret = context_apply_all(ctx, __apply_watchdog, - (void *)(uintptr_t)timeout_us); - if (!ret) - ctx->watchdog.timeout_us = timeout_us; - - return ret; -} - -static void __set_default_fence_expiry(struct i915_gem_context *ctx) -{ - struct drm_i915_private *i915 = ctx->i915; - int ret; - - if (!IS_ACTIVE(CONFIG_DRM_I915_REQUEST_TIMEOUT) || - !i915->params.request_timeout_ms) - return; - - /* Default expiry for user fences. */ - ret = __set_watchdog(ctx, i915->params.request_timeout_ms * 1000); - if (ret) - drm_notice(&i915->drm, - "Failed to configure default fence expiry! (%d)", - ret); } static struct i915_gem_context * -i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags) +i915_gem_create_context(struct drm_i915_private *i915, + const struct i915_gem_proto_context *pc) { struct i915_gem_context *ctx; + struct i915_address_space *vm = NULL; + struct i915_gem_engines *e; + int err; + int i; - if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE && - !HAS_EXECLISTS(i915)) - return ERR_PTR(-EINVAL); + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return ERR_PTR(-ENOMEM); - ctx = __create_context(i915); - if (IS_ERR(ctx)) - return ctx; + kref_init(&ctx->ref); + ctx->i915 = i915; + ctx->sched = pc->sched; + mutex_init(&ctx->mutex); + INIT_LIST_HEAD(&ctx->link); - if (HAS_FULL_PPGTT(i915)) { + spin_lock_init(&ctx->stale.lock); + INIT_LIST_HEAD(&ctx->stale.engines); + + if (pc->vm) { + vm = i915_vm_get(pc->vm); + } else if (HAS_FULL_PPGTT(i915)) { struct i915_ppgtt *ppgtt; ppgtt = i915_ppgtt_create(&i915->gt); if (IS_ERR(ppgtt)) { drm_dbg(&i915->drm, "PPGTT setup failed (%ld)\n", PTR_ERR(ppgtt)); - context_close(ctx); - return ERR_CAST(ppgtt); + err = PTR_ERR(ppgtt); + goto err_ctx; } + vm = &ppgtt->vm; + } + if (vm) { + RCU_INIT_POINTER(ctx->vm, i915_vm_open(vm)); - mutex_lock(&ctx->mutex); - __assign_ppgtt(ctx, &ppgtt->vm); - mutex_unlock(&ctx->mutex); + /* i915_vm_open() takes a reference */ + i915_vm_put(vm); + } - i915_vm_put(&ppgtt->vm); + mutex_init(&ctx->engines_mutex); + if (pc->num_user_engines >= 0) { + i915_gem_context_set_user_engines(ctx); + e = user_engines(ctx, pc->num_user_engines, pc->user_engines); + } else { + i915_gem_context_clear_user_engines(ctx); + e = default_engines(ctx, pc->legacy_rcs_sseu); + } + if (IS_ERR(e)) { + err = PTR_ERR(e); + goto err_vm; } + RCU_INIT_POINTER(ctx->engines, e); - if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) { - struct intel_timeline *timeline; + INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); + mutex_init(&ctx->lut_mutex); - timeline = intel_timeline_create(&i915->gt); - if (IS_ERR(timeline)) { - context_close(ctx); - return ERR_CAST(timeline); - } + /* NB: Mark all slices as needing a remap so that when the context first + * loads it will restore whatever remap state already exists. If there + * is no remap info, it will be a NOP. */ + ctx->remap_slice = ALL_L3_SLICES(i915); - __assign_timeline(ctx, timeline); - intel_timeline_put(timeline); - } + ctx->user_flags = pc->user_flags; - __set_default_fence_expiry(ctx); + for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) + ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; + + if (pc->single_timeline) { + err = drm_syncobj_create(&ctx->syncobj, + DRM_SYNCOBJ_CREATE_SIGNALED, + NULL); + if (err) + goto err_engines; + } trace_i915_context_create(ctx); return ctx; + +err_engines: + free_engines(e); +err_vm: + if (ctx->vm) + i915_vm_close(ctx->vm); +err_ctx: + kfree(ctx); + return ERR_PTR(err); } static void init_contexts(struct i915_gem_contexts *gc) @@ -889,83 +1424,83 @@ void i915_gem_init__contexts(struct drm_i915_private *i915) init_contexts(&i915->gem.contexts); } -static int gem_context_register(struct i915_gem_context *ctx, - struct drm_i915_file_private *fpriv, - u32 *id) +static void gem_context_register(struct i915_gem_context *ctx, + struct drm_i915_file_private *fpriv, + u32 id) { struct drm_i915_private *i915 = ctx->i915; - struct i915_address_space *vm; - int ret; + void *old; ctx->file_priv = fpriv; - mutex_lock(&ctx->mutex); - vm = i915_gem_context_vm(ctx); - if (vm) - WRITE_ONCE(vm->file, fpriv); /* XXX */ - mutex_unlock(&ctx->mutex); - ctx->pid = get_task_pid(current, PIDTYPE_PID); snprintf(ctx->name, sizeof(ctx->name), "%s[%d]", current->comm, pid_nr(ctx->pid)); /* And finally expose ourselves to userspace via the idr */ - ret = xa_alloc(&fpriv->context_xa, id, ctx, xa_limit_32b, GFP_KERNEL); - if (ret) - goto err_pid; + old = xa_store(&fpriv->context_xa, id, ctx, GFP_KERNEL); + WARN_ON(old); spin_lock(&i915->gem.contexts.lock); list_add_tail(&ctx->link, &i915->gem.contexts.list); spin_unlock(&i915->gem.contexts.lock); - - return 0; - -err_pid: - put_pid(fetch_and_zero(&ctx->pid)); - return ret; } int i915_gem_context_open(struct drm_i915_private *i915, struct drm_file *file) { struct drm_i915_file_private *file_priv = file->driver_priv; + struct i915_gem_proto_context *pc; struct i915_gem_context *ctx; int err; - u32 id; - xa_init_flags(&file_priv->context_xa, XA_FLAGS_ALLOC); + mutex_init(&file_priv->proto_context_lock); + xa_init_flags(&file_priv->proto_context_xa, XA_FLAGS_ALLOC); + + /* 0 reserved for the default context */ + xa_init_flags(&file_priv->context_xa, XA_FLAGS_ALLOC1); /* 0 reserved for invalid/unassigned ppgtt */ xa_init_flags(&file_priv->vm_xa, XA_FLAGS_ALLOC1); - ctx = i915_gem_create_context(i915, 0); + pc = proto_context_create(i915, 0); + if (IS_ERR(pc)) { + err = PTR_ERR(pc); + goto err; + } + + ctx = i915_gem_create_context(i915, pc); + proto_context_close(pc); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto err; } - err = gem_context_register(ctx, file_priv, &id); - if (err < 0) - goto err_ctx; + gem_context_register(ctx, file_priv, 0); - GEM_BUG_ON(id); return 0; -err_ctx: - context_close(ctx); err: xa_destroy(&file_priv->vm_xa); xa_destroy(&file_priv->context_xa); + xa_destroy(&file_priv->proto_context_xa); + mutex_destroy(&file_priv->proto_context_lock); return err; } void i915_gem_context_close(struct drm_file *file) { struct drm_i915_file_private *file_priv = file->driver_priv; + struct i915_gem_proto_context *pc; struct i915_address_space *vm; struct i915_gem_context *ctx; unsigned long idx; + xa_for_each(&file_priv->proto_context_xa, idx, pc) + proto_context_close(pc); + xa_destroy(&file_priv->proto_context_xa); + mutex_destroy(&file_priv->proto_context_lock); + xa_for_each(&file_priv->context_xa, idx, ctx) context_close(ctx); xa_destroy(&file_priv->context_xa); @@ -995,8 +1530,6 @@ int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data, if (IS_ERR(ppgtt)) return PTR_ERR(ppgtt); - ppgtt->vm.file = file_priv; - if (args->extensions) { err = i915_user_extensions(u64_to_user_ptr(args->extensions), NULL, 0, @@ -1040,120 +1573,6 @@ int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data, return 0; } -struct context_barrier_task { - struct i915_active base; - void (*task)(void *data); - void *data; -}; - -static void cb_retire(struct i915_active *base) -{ - struct context_barrier_task *cb = container_of(base, typeof(*cb), base); - - if (cb->task) - cb->task(cb->data); - - i915_active_fini(&cb->base); - kfree(cb); -} - -I915_SELFTEST_DECLARE(static intel_engine_mask_t context_barrier_inject_fault); -static int context_barrier_task(struct i915_gem_context *ctx, - intel_engine_mask_t engines, - bool (*skip)(struct intel_context *ce, void *data), - int (*pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void *data), - int (*emit)(struct i915_request *rq, void *data), - void (*task)(void *data), - void *data) -{ - struct context_barrier_task *cb; - struct i915_gem_engines_iter it; - struct i915_gem_engines *e; - struct i915_gem_ww_ctx ww; - struct intel_context *ce; - int err = 0; - - GEM_BUG_ON(!task); - - cb = kmalloc(sizeof(*cb), GFP_KERNEL); - if (!cb) - return -ENOMEM; - - i915_active_init(&cb->base, NULL, cb_retire, 0); - err = i915_active_acquire(&cb->base); - if (err) { - kfree(cb); - return err; - } - - e = __context_engines_await(ctx, NULL); - if (!e) { - i915_active_release(&cb->base); - return -ENOENT; - } - - for_each_gem_engine(ce, e, it) { - struct i915_request *rq; - - if (I915_SELFTEST_ONLY(context_barrier_inject_fault & - ce->engine->mask)) { - err = -ENXIO; - break; - } - - if (!(ce->engine->mask & engines)) - continue; - - if (skip && skip(ce, data)) - continue; - - i915_gem_ww_ctx_init(&ww, true); -retry: - err = intel_context_pin_ww(ce, &ww); - if (err) - goto err; - - if (pin) - err = pin(ce, &ww, data); - if (err) - goto err_unpin; - - rq = i915_request_create(ce); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_unpin; - } - - err = 0; - if (emit) - err = emit(rq, data); - if (err == 0) - err = i915_active_add_request(&cb->base, rq); - - i915_request_add(rq); -err_unpin: - intel_context_unpin(ce); -err: - if (err == -EDEADLK) { - err = i915_gem_ww_ctx_backoff(&ww); - if (!err) - goto retry; - } - i915_gem_ww_ctx_fini(&ww); - - if (err) - break; - } - i915_sw_fence_complete(&e->fence); - - cb->task = err ? NULL : task; /* caller needs to unwind instead */ - cb->data = data; - - i915_active_release(&cb->base); - - return err; -} - static int get_ppgtt(struct drm_i915_file_private *file_priv, struct i915_gem_context *ctx, struct drm_i915_gem_context_param *args) @@ -1186,211 +1605,6 @@ err_put: return err; } -static void set_ppgtt_barrier(void *data) -{ - struct i915_address_space *old = data; - - if (GRAPHICS_VER(old->i915) < 8) - gen6_ppgtt_unpin_all(i915_vm_to_ppgtt(old)); - - i915_vm_close(old); -} - -static int pin_ppgtt_update(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void *data) -{ - struct i915_address_space *vm = ce->vm; - - if (!HAS_LOGICAL_RING_CONTEXTS(vm->i915)) - /* ppGTT is not part of the legacy context image */ - return gen6_ppgtt_pin(i915_vm_to_ppgtt(vm), ww); - - return 0; -} - -static int emit_ppgtt_update(struct i915_request *rq, void *data) -{ - struct i915_address_space *vm = rq->context->vm; - struct intel_engine_cs *engine = rq->engine; - u32 base = engine->mmio_base; - u32 *cs; - int i; - - if (i915_vm_is_4lvl(vm)) { - struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - const dma_addr_t pd_daddr = px_dma(ppgtt->pd); - - cs = intel_ring_begin(rq, 6); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_LOAD_REGISTER_IMM(2); - - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 0)); - *cs++ = upper_32_bits(pd_daddr); - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 0)); - *cs++ = lower_32_bits(pd_daddr); - - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - } else if (HAS_LOGICAL_RING_CONTEXTS(engine->i915)) { - struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - int err; - - /* Magic required to prevent forcewake errors! */ - err = engine->emit_flush(rq, EMIT_INVALIDATE); - if (err) - return err; - - cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED; - for (i = GEN8_3LVL_PDPES; i--; ) { - const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); - - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i)); - *cs++ = upper_32_bits(pd_daddr); - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i)); - *cs++ = lower_32_bits(pd_daddr); - } - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - } - - return 0; -} - -static bool skip_ppgtt_update(struct intel_context *ce, void *data) -{ - if (HAS_LOGICAL_RING_CONTEXTS(ce->engine->i915)) - return !ce->state; - else - return !atomic_read(&ce->pin_count); -} - -static int set_ppgtt(struct drm_i915_file_private *file_priv, - struct i915_gem_context *ctx, - struct drm_i915_gem_context_param *args) -{ - struct i915_address_space *vm, *old; - int err; - - if (args->size) - return -EINVAL; - - if (!rcu_access_pointer(ctx->vm)) - return -ENODEV; - - if (upper_32_bits(args->value)) - return -ENOENT; - - rcu_read_lock(); - vm = xa_load(&file_priv->vm_xa, args->value); - if (vm && !kref_get_unless_zero(&vm->ref)) - vm = NULL; - rcu_read_unlock(); - if (!vm) - return -ENOENT; - - err = mutex_lock_interruptible(&ctx->mutex); - if (err) - goto out; - - if (i915_gem_context_is_closed(ctx)) { - err = -ENOENT; - goto unlock; - } - - if (vm == rcu_access_pointer(ctx->vm)) - goto unlock; - - old = __set_ppgtt(ctx, vm); - - /* Teardown the existing obj:vma cache, it will have to be rebuilt. */ - lut_close(ctx); - - /* - * We need to flush any requests using the current ppgtt before - * we release it as the requests do not hold a reference themselves, - * only indirectly through the context. - */ - err = context_barrier_task(ctx, ALL_ENGINES, - skip_ppgtt_update, - pin_ppgtt_update, - emit_ppgtt_update, - set_ppgtt_barrier, - old); - if (err) { - i915_vm_close(__set_ppgtt(ctx, old)); - i915_vm_close(old); - lut_close(ctx); /* force a rebuild of the old obj:vma cache */ - } - -unlock: - mutex_unlock(&ctx->mutex); -out: - i915_vm_put(vm); - return err; -} - -static int __apply_ringsize(struct intel_context *ce, void *sz) -{ - return intel_context_set_ring_size(ce, (unsigned long)sz); -} - -static int set_ringsize(struct i915_gem_context *ctx, - struct drm_i915_gem_context_param *args) -{ - if (!HAS_LOGICAL_RING_CONTEXTS(ctx->i915)) - return -ENODEV; - - if (args->size) - return -EINVAL; - - if (!IS_ALIGNED(args->value, I915_GTT_PAGE_SIZE)) - return -EINVAL; - - if (args->value < I915_GTT_PAGE_SIZE) - return -EINVAL; - - if (args->value > 128 * I915_GTT_PAGE_SIZE) - return -EINVAL; - - return context_apply_all(ctx, - __apply_ringsize, - __intel_context_ring_size(args->value)); -} - -static int __get_ringsize(struct intel_context *ce, void *arg) -{ - long sz; - - sz = intel_context_get_ring_size(ce); - GEM_BUG_ON(sz > INT_MAX); - - return sz; /* stop on first engine */ -} - -static int get_ringsize(struct i915_gem_context *ctx, - struct drm_i915_gem_context_param *args) -{ - int sz; - - if (!HAS_LOGICAL_RING_CONTEXTS(ctx->i915)) - return -ENODEV; - - if (args->size) - return -EINVAL; - - sz = context_apply_all(ctx, __get_ringsize, NULL); - if (sz < 0) - return sz; - - args->value = sz; - return 0; -} - int i915_gem_user_to_context_sseu(struct intel_gt *gt, const struct drm_i915_gem_context_param_sseu *user, @@ -1545,382 +1759,6 @@ out_ce: return ret; } -struct set_engines { - struct i915_gem_context *ctx; - struct i915_gem_engines *engines; -}; - -static int -set_engines__load_balance(struct i915_user_extension __user *base, void *data) -{ - struct i915_context_engines_load_balance __user *ext = - container_of_user(base, typeof(*ext), base); - const struct set_engines *set = data; - struct drm_i915_private *i915 = set->ctx->i915; - struct intel_engine_cs *stack[16]; - struct intel_engine_cs **siblings; - struct intel_context *ce; - u16 num_siblings, idx; - unsigned int n; - int err; - - if (!HAS_EXECLISTS(i915)) - return -ENODEV; - - if (intel_uc_uses_guc_submission(&i915->gt.uc)) - return -ENODEV; /* not implement yet */ - - if (get_user(idx, &ext->engine_index)) - return -EFAULT; - - if (idx >= set->engines->num_engines) { - drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n", - idx, set->engines->num_engines); - return -EINVAL; - } - - idx = array_index_nospec(idx, set->engines->num_engines); - if (set->engines->engines[idx]) { - drm_dbg(&i915->drm, - "Invalid placement[%d], already occupied\n", idx); - return -EEXIST; - } - - if (get_user(num_siblings, &ext->num_siblings)) - return -EFAULT; - - err = check_user_mbz(&ext->flags); - if (err) - return err; - - err = check_user_mbz(&ext->mbz64); - if (err) - return err; - - siblings = stack; - if (num_siblings > ARRAY_SIZE(stack)) { - siblings = kmalloc_array(num_siblings, - sizeof(*siblings), - GFP_KERNEL); - if (!siblings) - return -ENOMEM; - } - - for (n = 0; n < num_siblings; n++) { - struct i915_engine_class_instance ci; - - if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) { - err = -EFAULT; - goto out_siblings; - } - - siblings[n] = intel_engine_lookup_user(i915, - ci.engine_class, - ci.engine_instance); - if (!siblings[n]) { - drm_dbg(&i915->drm, - "Invalid sibling[%d]: { class:%d, inst:%d }\n", - n, ci.engine_class, ci.engine_instance); - err = -EINVAL; - goto out_siblings; - } - } - - ce = intel_execlists_create_virtual(siblings, n); - if (IS_ERR(ce)) { - err = PTR_ERR(ce); - goto out_siblings; - } - - intel_context_set_gem(ce, set->ctx); - - if (cmpxchg(&set->engines->engines[idx], NULL, ce)) { - intel_context_put(ce); - err = -EEXIST; - goto out_siblings; - } - -out_siblings: - if (siblings != stack) - kfree(siblings); - - return err; -} - -static int -set_engines__bond(struct i915_user_extension __user *base, void *data) -{ - struct i915_context_engines_bond __user *ext = - container_of_user(base, typeof(*ext), base); - const struct set_engines *set = data; - struct drm_i915_private *i915 = set->ctx->i915; - struct i915_engine_class_instance ci; - struct intel_engine_cs *virtual; - struct intel_engine_cs *master; - u16 idx, num_bonds; - int err, n; - - if (get_user(idx, &ext->virtual_index)) - return -EFAULT; - - if (idx >= set->engines->num_engines) { - drm_dbg(&i915->drm, - "Invalid index for virtual engine: %d >= %d\n", - idx, set->engines->num_engines); - return -EINVAL; - } - - idx = array_index_nospec(idx, set->engines->num_engines); - if (!set->engines->engines[idx]) { - drm_dbg(&i915->drm, "Invalid engine at %d\n", idx); - return -EINVAL; - } - virtual = set->engines->engines[idx]->engine; - - err = check_user_mbz(&ext->flags); - if (err) - return err; - - for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) { - err = check_user_mbz(&ext->mbz64[n]); - if (err) - return err; - } - - if (copy_from_user(&ci, &ext->master, sizeof(ci))) - return -EFAULT; - - master = intel_engine_lookup_user(i915, - ci.engine_class, ci.engine_instance); - if (!master) { - drm_dbg(&i915->drm, - "Unrecognised master engine: { class:%u, instance:%u }\n", - ci.engine_class, ci.engine_instance); - return -EINVAL; - } - - if (get_user(num_bonds, &ext->num_bonds)) - return -EFAULT; - - for (n = 0; n < num_bonds; n++) { - struct intel_engine_cs *bond; - - if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) - return -EFAULT; - - bond = intel_engine_lookup_user(i915, - ci.engine_class, - ci.engine_instance); - if (!bond) { - drm_dbg(&i915->drm, - "Unrecognised engine[%d] for bonding: { class:%d, instance: %d }\n", - n, ci.engine_class, ci.engine_instance); - return -EINVAL; - } - - /* - * A non-virtual engine has no siblings to choose between; and - * a submit fence will always be directed to the one engine. - */ - if (intel_engine_is_virtual(virtual)) { - err = intel_virtual_engine_attach_bond(virtual, - master, - bond); - if (err) - return err; - } - } - - return 0; -} - -static const i915_user_extension_fn set_engines__extensions[] = { - [I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_engines__load_balance, - [I915_CONTEXT_ENGINES_EXT_BOND] = set_engines__bond, -}; - -static int -set_engines(struct i915_gem_context *ctx, - const struct drm_i915_gem_context_param *args) -{ - struct drm_i915_private *i915 = ctx->i915; - struct i915_context_param_engines __user *user = - u64_to_user_ptr(args->value); - struct set_engines set = { .ctx = ctx }; - unsigned int num_engines, n; - u64 extensions; - int err; - - if (!args->size) { /* switch back to legacy user_ring_map */ - if (!i915_gem_context_user_engines(ctx)) - return 0; - - set.engines = default_engines(ctx); - if (IS_ERR(set.engines)) - return PTR_ERR(set.engines); - - goto replace; - } - - BUILD_BUG_ON(!IS_ALIGNED(sizeof(*user), sizeof(*user->engines))); - if (args->size < sizeof(*user) || - !IS_ALIGNED(args->size, sizeof(*user->engines))) { - drm_dbg(&i915->drm, "Invalid size for engine array: %d\n", - args->size); - return -EINVAL; - } - - /* - * Note that I915_EXEC_RING_MASK limits execbuf to only using the - * first 64 engines defined here. - */ - num_engines = (args->size - sizeof(*user)) / sizeof(*user->engines); - set.engines = alloc_engines(num_engines); - if (!set.engines) - return -ENOMEM; - - for (n = 0; n < num_engines; n++) { - struct i915_engine_class_instance ci; - struct intel_engine_cs *engine; - struct intel_context *ce; - - if (copy_from_user(&ci, &user->engines[n], sizeof(ci))) { - __free_engines(set.engines, n); - return -EFAULT; - } - - if (ci.engine_class == (u16)I915_ENGINE_CLASS_INVALID && - ci.engine_instance == (u16)I915_ENGINE_CLASS_INVALID_NONE) { - set.engines->engines[n] = NULL; - continue; - } - - engine = intel_engine_lookup_user(ctx->i915, - ci.engine_class, - ci.engine_instance); - if (!engine) { - drm_dbg(&i915->drm, - "Invalid engine[%d]: { class:%d, instance:%d }\n", - n, ci.engine_class, ci.engine_instance); - __free_engines(set.engines, n); - return -ENOENT; - } - - ce = intel_context_create(engine); - if (IS_ERR(ce)) { - __free_engines(set.engines, n); - return PTR_ERR(ce); - } - - intel_context_set_gem(ce, ctx); - - set.engines->engines[n] = ce; - } - set.engines->num_engines = num_engines; - - err = -EFAULT; - if (!get_user(extensions, &user->extensions)) - err = i915_user_extensions(u64_to_user_ptr(extensions), - set_engines__extensions, - ARRAY_SIZE(set_engines__extensions), - &set); - if (err) { - free_engines(set.engines); - return err; - } - -replace: - mutex_lock(&ctx->engines_mutex); - if (i915_gem_context_is_closed(ctx)) { - mutex_unlock(&ctx->engines_mutex); - free_engines(set.engines); - return -ENOENT; - } - if (args->size) - i915_gem_context_set_user_engines(ctx); - else - i915_gem_context_clear_user_engines(ctx); - set.engines = rcu_replace_pointer(ctx->engines, set.engines, 1); - mutex_unlock(&ctx->engines_mutex); - - /* Keep track of old engine sets for kill_context() */ - engines_idle_release(ctx, set.engines); - - return 0; -} - -static int -get_engines(struct i915_gem_context *ctx, - struct drm_i915_gem_context_param *args) -{ - struct i915_context_param_engines __user *user; - struct i915_gem_engines *e; - size_t n, count, size; - bool user_engines; - int err = 0; - - e = __context_engines_await(ctx, &user_engines); - if (!e) - return -ENOENT; - - if (!user_engines) { - i915_sw_fence_complete(&e->fence); - args->size = 0; - return 0; - } - - count = e->num_engines; - - /* Be paranoid in case we have an impedance mismatch */ - if (!check_struct_size(user, engines, count, &size)) { - err = -EINVAL; - goto err_free; - } - if (overflows_type(size, args->size)) { - err = -EINVAL; - goto err_free; - } - - if (!args->size) { - args->size = size; - goto err_free; - } - - if (args->size < size) { - err = -EINVAL; - goto err_free; - } - - user = u64_to_user_ptr(args->value); - if (put_user(0, &user->extensions)) { - err = -EFAULT; - goto err_free; - } - - for (n = 0; n < count; n++) { - struct i915_engine_class_instance ci = { - .engine_class = I915_ENGINE_CLASS_INVALID, - .engine_instance = I915_ENGINE_CLASS_INVALID_NONE, - }; - - if (e->engines[n]) { - ci.engine_class = e->engines[n]->engine->uabi_class; - ci.engine_instance = e->engines[n]->engine->uabi_instance; - } - - if (copy_to_user(&user->engines[n], &ci, sizeof(ci))) { - err = -EFAULT; - goto err_free; - } - } - - args->size = size; - -err_free: - i915_sw_fence_complete(&e->fence); - return err; -} - static int set_persistence(struct i915_gem_context *ctx, const struct drm_i915_gem_context_param *args) @@ -1931,41 +1769,30 @@ set_persistence(struct i915_gem_context *ctx, return __context_set_persistence(ctx, args->value); } -static int __apply_priority(struct intel_context *ce, void *arg) +static void __apply_priority(struct intel_context *ce, void *arg) { struct i915_gem_context *ctx = arg; if (!intel_engine_has_timeslices(ce->engine)) - return 0; + return; - if (ctx->sched.priority >= I915_PRIORITY_NORMAL) + if (ctx->sched.priority >= I915_PRIORITY_NORMAL && + intel_engine_has_semaphores(ce->engine)) intel_context_set_use_semaphores(ce); else intel_context_clear_use_semaphores(ce); - - return 0; } static int set_priority(struct i915_gem_context *ctx, const struct drm_i915_gem_context_param *args) { - s64 priority = args->value; - - if (args->size) - return -EINVAL; - - if (!(ctx->i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY)) - return -ENODEV; - - if (priority > I915_CONTEXT_MAX_USER_PRIORITY || - priority < I915_CONTEXT_MIN_USER_PRIORITY) - return -EINVAL; + int err; - if (priority > I915_CONTEXT_DEFAULT_PRIORITY && - !capable(CAP_SYS_NICE)) - return -EPERM; + err = validate_priority(ctx->i915, args); + if (err) + return err; - ctx->sched.priority = priority; + ctx->sched.priority = args->value; context_apply_all(ctx, __apply_priority, ctx); return 0; @@ -1978,15 +1805,6 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, int ret = 0; switch (args->param) { - case I915_CONTEXT_PARAM_NO_ZEROMAP: - if (args->size) - ret = -EINVAL; - else if (args->value) - set_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags); - else - clear_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags); - break; - case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE: if (args->size) ret = -EINVAL; @@ -2024,23 +1842,15 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, ret = set_sseu(ctx, args); break; - case I915_CONTEXT_PARAM_VM: - ret = set_ppgtt(fpriv, ctx, args); - break; - - case I915_CONTEXT_PARAM_ENGINES: - ret = set_engines(ctx, args); - break; - case I915_CONTEXT_PARAM_PERSISTENCE: ret = set_persistence(ctx, args); break; - case I915_CONTEXT_PARAM_RINGSIZE: - ret = set_ringsize(ctx, args); - break; - + case I915_CONTEXT_PARAM_NO_ZEROMAP: case I915_CONTEXT_PARAM_BAN_PERIOD: + case I915_CONTEXT_PARAM_RINGSIZE: + case I915_CONTEXT_PARAM_VM: + case I915_CONTEXT_PARAM_ENGINES: default: ret = -EINVAL; break; @@ -2050,7 +1860,7 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, } struct create_ext { - struct i915_gem_context *ctx; + struct i915_gem_proto_context *pc; struct drm_i915_file_private *fpriv; }; @@ -2065,233 +1875,84 @@ static int create_setparam(struct i915_user_extension __user *ext, void *data) if (local.param.ctx_id) return -EINVAL; - return ctx_setparam(arg->fpriv, arg->ctx, &local.param); + return set_proto_ctx_param(arg->fpriv, arg->pc, &local.param); } -static int copy_ring_size(struct intel_context *dst, - struct intel_context *src) +static int invalid_ext(struct i915_user_extension __user *ext, void *data) { - long sz; - - sz = intel_context_get_ring_size(src); - if (sz < 0) - return sz; - - return intel_context_set_ring_size(dst, sz); + return -EINVAL; } -static int clone_engines(struct i915_gem_context *dst, - struct i915_gem_context *src) -{ - struct i915_gem_engines *clone, *e; - bool user_engines; - unsigned long n; - - e = __context_engines_await(src, &user_engines); - if (!e) - return -ENOENT; - - clone = alloc_engines(e->num_engines); - if (!clone) - goto err_unlock; - - for (n = 0; n < e->num_engines; n++) { - struct intel_engine_cs *engine; - - if (!e->engines[n]) { - clone->engines[n] = NULL; - continue; - } - engine = e->engines[n]->engine; - - /* - * Virtual engines are singletons; they can only exist - * inside a single context, because they embed their - * HW context... As each virtual context implies a single - * timeline (each engine can only dequeue a single request - * at any time), it would be surprising for two contexts - * to use the same engine. So let's create a copy of - * the virtual engine instead. - */ - if (intel_engine_is_virtual(engine)) - clone->engines[n] = - intel_execlists_clone_virtual(engine); - else - clone->engines[n] = intel_context_create(engine); - if (IS_ERR_OR_NULL(clone->engines[n])) { - __free_engines(clone, n); - goto err_unlock; - } - - intel_context_set_gem(clone->engines[n], dst); - - /* Copy across the preferred ringsize */ - if (copy_ring_size(clone->engines[n], e->engines[n])) { - __free_engines(clone, n + 1); - goto err_unlock; - } - } - clone->num_engines = n; - i915_sw_fence_complete(&e->fence); - - /* Serialised by constructor */ - engines_idle_release(dst, rcu_replace_pointer(dst->engines, clone, 1)); - if (user_engines) - i915_gem_context_set_user_engines(dst); - else - i915_gem_context_clear_user_engines(dst); - return 0; - -err_unlock: - i915_sw_fence_complete(&e->fence); - return -ENOMEM; -} - -static int clone_flags(struct i915_gem_context *dst, - struct i915_gem_context *src) -{ - dst->user_flags = src->user_flags; - return 0; -} - -static int clone_schedattr(struct i915_gem_context *dst, - struct i915_gem_context *src) -{ - dst->sched = src->sched; - return 0; -} - -static int clone_sseu(struct i915_gem_context *dst, - struct i915_gem_context *src) -{ - struct i915_gem_engines *e = i915_gem_context_lock_engines(src); - struct i915_gem_engines *clone; - unsigned long n; - int err; - - /* no locking required; sole access under constructor*/ - clone = __context_engines_static(dst); - if (e->num_engines != clone->num_engines) { - err = -EINVAL; - goto unlock; - } - - for (n = 0; n < e->num_engines; n++) { - struct intel_context *ce = e->engines[n]; - - if (clone->engines[n]->engine->class != ce->engine->class) { - /* Must have compatible engine maps! */ - err = -EINVAL; - goto unlock; - } - - /* serialises with set_sseu */ - err = intel_context_lock_pinned(ce); - if (err) - goto unlock; - - clone->engines[n]->sseu = ce->sseu; - intel_context_unlock_pinned(ce); - } - - err = 0; -unlock: - i915_gem_context_unlock_engines(src); - return err; -} +static const i915_user_extension_fn create_extensions[] = { + [I915_CONTEXT_CREATE_EXT_SETPARAM] = create_setparam, + [I915_CONTEXT_CREATE_EXT_CLONE] = invalid_ext, +}; -static int clone_timeline(struct i915_gem_context *dst, - struct i915_gem_context *src) +static bool client_is_banned(struct drm_i915_file_private *file_priv) { - if (src->timeline) - __assign_timeline(dst, src->timeline); - - return 0; + return atomic_read(&file_priv->ban_score) >= I915_CLIENT_SCORE_BANNED; } -static int clone_vm(struct i915_gem_context *dst, - struct i915_gem_context *src) +static inline struct i915_gem_context * +__context_lookup(struct drm_i915_file_private *file_priv, u32 id) { - struct i915_address_space *vm; - int err = 0; - - if (!rcu_access_pointer(src->vm)) - return 0; + struct i915_gem_context *ctx; rcu_read_lock(); - vm = context_get_vm_rcu(src); + ctx = xa_load(&file_priv->context_xa, id); + if (ctx && !kref_get_unless_zero(&ctx->ref)) + ctx = NULL; rcu_read_unlock(); - if (!mutex_lock_interruptible(&dst->mutex)) { - __assign_ppgtt(dst, vm); - mutex_unlock(&dst->mutex); - } else { - err = -EINTR; - } - - i915_vm_put(vm); - return err; + return ctx; } -static int create_clone(struct i915_user_extension __user *ext, void *data) -{ - static int (* const fn[])(struct i915_gem_context *dst, - struct i915_gem_context *src) = { -#define MAP(x, y) [ilog2(I915_CONTEXT_CLONE_##x)] = y - MAP(ENGINES, clone_engines), - MAP(FLAGS, clone_flags), - MAP(SCHEDATTR, clone_schedattr), - MAP(SSEU, clone_sseu), - MAP(TIMELINE, clone_timeline), - MAP(VM, clone_vm), -#undef MAP - }; - struct drm_i915_gem_context_create_ext_clone local; - const struct create_ext *arg = data; - struct i915_gem_context *dst = arg->ctx; - struct i915_gem_context *src; - int err, bit; +static struct i915_gem_context * +finalize_create_context_locked(struct drm_i915_file_private *file_priv, + struct i915_gem_proto_context *pc, u32 id) +{ + struct i915_gem_context *ctx; + void *old; - if (copy_from_user(&local, ext, sizeof(local))) - return -EFAULT; + lockdep_assert_held(&file_priv->proto_context_lock); - BUILD_BUG_ON(GENMASK(BITS_PER_TYPE(local.flags) - 1, ARRAY_SIZE(fn)) != - I915_CONTEXT_CLONE_UNKNOWN); + ctx = i915_gem_create_context(file_priv->dev_priv, pc); + if (IS_ERR(ctx)) + return ctx; - if (local.flags & I915_CONTEXT_CLONE_UNKNOWN) - return -EINVAL; + gem_context_register(ctx, file_priv, id); - if (local.rsvd) - return -EINVAL; + old = xa_erase(&file_priv->proto_context_xa, id); + GEM_BUG_ON(old != pc); + proto_context_close(pc); - rcu_read_lock(); - src = __i915_gem_context_lookup_rcu(arg->fpriv, local.clone_id); - rcu_read_unlock(); - if (!src) - return -ENOENT; + /* One for the xarray and one for the caller */ + return i915_gem_context_get(ctx); +} - GEM_BUG_ON(src == dst); +struct i915_gem_context * +i915_gem_context_lookup(struct drm_i915_file_private *file_priv, u32 id) +{ + struct i915_gem_proto_context *pc; + struct i915_gem_context *ctx; - for (bit = 0; bit < ARRAY_SIZE(fn); bit++) { - if (!(local.flags & BIT(bit))) - continue; + ctx = __context_lookup(file_priv, id); + if (ctx) + return ctx; - err = fn[bit](dst, src); - if (err) - return err; + mutex_lock(&file_priv->proto_context_lock); + /* Try one more time under the lock */ + ctx = __context_lookup(file_priv, id); + if (!ctx) { + pc = xa_load(&file_priv->proto_context_xa, id); + if (!pc) + ctx = ERR_PTR(-ENOENT); + else + ctx = finalize_create_context_locked(file_priv, pc, id); } + mutex_unlock(&file_priv->proto_context_lock); - return 0; -} - -static const i915_user_extension_fn create_extensions[] = { - [I915_CONTEXT_CREATE_EXT_SETPARAM] = create_setparam, - [I915_CONTEXT_CREATE_EXT_CLONE] = create_clone, -}; - -static bool client_is_banned(struct drm_i915_file_private *file_priv) -{ - return atomic_read(&file_priv->ban_score) >= I915_CLIENT_SCORE_BANNED; + return ctx; } int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, @@ -2321,9 +1982,9 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, return -EIO; } - ext_data.ctx = i915_gem_create_context(i915, args->flags); - if (IS_ERR(ext_data.ctx)) - return PTR_ERR(ext_data.ctx); + ext_data.pc = proto_context_create(i915, args->flags); + if (IS_ERR(ext_data.pc)) + return PTR_ERR(ext_data.pc); if (args->flags & I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS) { ret = i915_user_extensions(u64_to_user_ptr(args->extensions), @@ -2331,20 +1992,39 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, ARRAY_SIZE(create_extensions), &ext_data); if (ret) - goto err_ctx; + goto err_pc; } - ret = gem_context_register(ext_data.ctx, ext_data.fpriv, &id); - if (ret < 0) - goto err_ctx; + if (GRAPHICS_VER(i915) > 12) { + struct i915_gem_context *ctx; + + /* Get ourselves a context ID */ + ret = xa_alloc(&ext_data.fpriv->context_xa, &id, NULL, + xa_limit_32b, GFP_KERNEL); + if (ret) + goto err_pc; + + ctx = i915_gem_create_context(i915, ext_data.pc); + if (IS_ERR(ctx)) { + ret = PTR_ERR(ctx); + goto err_pc; + } + + proto_context_close(ext_data.pc); + gem_context_register(ctx, ext_data.fpriv, id); + } else { + ret = proto_context_register(ext_data.fpriv, ext_data.pc, &id); + if (ret < 0) + goto err_pc; + } args->ctx_id = id; drm_dbg(&i915->drm, "HW context %d created\n", args->ctx_id); return 0; -err_ctx: - context_close(ext_data.ctx); +err_pc: + proto_context_close(ext_data.pc); return ret; } @@ -2353,6 +2033,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_context_destroy *args = data; struct drm_i915_file_private *file_priv = file->driver_priv; + struct i915_gem_proto_context *pc; struct i915_gem_context *ctx; if (args->pad != 0) @@ -2361,11 +2042,24 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, if (!args->ctx_id) return -ENOENT; + /* We need to hold the proto-context lock here to prevent races + * with finalize_create_context_locked(). + */ + mutex_lock(&file_priv->proto_context_lock); ctx = xa_erase(&file_priv->context_xa, args->ctx_id); - if (!ctx) + pc = xa_erase(&file_priv->proto_context_xa, args->ctx_id); + mutex_unlock(&file_priv->proto_context_lock); + + if (!ctx && !pc) return -ENOENT; + GEM_WARN_ON(ctx && pc); + + if (pc) + proto_context_close(pc); + + if (ctx) + context_close(ctx); - context_close(ctx); return 0; } @@ -2433,15 +2127,10 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, int ret = 0; ctx = i915_gem_context_lookup(file_priv, args->ctx_id); - if (!ctx) - return -ENOENT; + if (IS_ERR(ctx)) + return PTR_ERR(ctx); switch (args->param) { - case I915_CONTEXT_PARAM_NO_ZEROMAP: - args->size = 0; - args->value = test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags); - break; - case I915_CONTEXT_PARAM_GTT_SIZE: args->size = 0; rcu_read_lock(); @@ -2480,20 +2169,15 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, ret = get_ppgtt(file_priv, ctx, args); break; - case I915_CONTEXT_PARAM_ENGINES: - ret = get_engines(ctx, args); - break; - case I915_CONTEXT_PARAM_PERSISTENCE: args->size = 0; args->value = i915_gem_context_is_persistent(ctx); break; - case I915_CONTEXT_PARAM_RINGSIZE: - ret = get_ringsize(ctx, args); - break; - + case I915_CONTEXT_PARAM_NO_ZEROMAP: case I915_CONTEXT_PARAM_BAN_PERIOD: + case I915_CONTEXT_PARAM_ENGINES: + case I915_CONTEXT_PARAM_RINGSIZE: default: ret = -EINVAL; break; @@ -2508,16 +2192,32 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, { struct drm_i915_file_private *file_priv = file->driver_priv; struct drm_i915_gem_context_param *args = data; + struct i915_gem_proto_context *pc; struct i915_gem_context *ctx; - int ret; + int ret = 0; - ctx = i915_gem_context_lookup(file_priv, args->ctx_id); - if (!ctx) - return -ENOENT; + mutex_lock(&file_priv->proto_context_lock); + ctx = __context_lookup(file_priv, args->ctx_id); + if (!ctx) { + pc = xa_load(&file_priv->proto_context_xa, args->ctx_id); + if (pc) { + /* Contexts should be finalized inside + * GEM_CONTEXT_CREATE starting with graphics + * version 13. + */ + WARN_ON(GRAPHICS_VER(file_priv->dev_priv) > 12); + ret = set_proto_ctx_param(file_priv, pc, args); + } else { + ret = -ENOENT; + } + } + mutex_unlock(&file_priv->proto_context_lock); - ret = ctx_setparam(file_priv, ctx, args); + if (ctx) { + ret = ctx_setparam(file_priv, ctx, args); + i915_gem_context_put(ctx); + } - i915_gem_context_put(ctx); return ret; } @@ -2527,16 +2227,13 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, struct drm_i915_private *i915 = to_i915(dev); struct drm_i915_reset_stats *args = data; struct i915_gem_context *ctx; - int ret; if (args->flags || args->pad) return -EINVAL; - ret = -ENOENT; - rcu_read_lock(); - ctx = __i915_gem_context_lookup_rcu(file->driver_priv, args->ctx_id); - if (!ctx) - goto out; + ctx = i915_gem_context_lookup(file->driver_priv, args->ctx_id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); /* * We opt for unserialised reads here. This may result in tearing @@ -2553,10 +2250,8 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, args->batch_active = atomic_read(&ctx->guilty_count); args->batch_pending = atomic_read(&ctx->active_count); - ret = 0; -out: - rcu_read_unlock(); - return ret; + i915_gem_context_put(ctx); + return 0; } /* GEM context-engines iterator: for_each_gem_engine() */ @@ -2584,27 +2279,16 @@ i915_gem_engines_iter_next(struct i915_gem_engines_iter *it) #include "selftests/i915_gem_context.c" #endif -static void i915_global_gem_context_shrink(void) +void i915_gem_context_module_exit(void) { - kmem_cache_shrink(global.slab_luts); + kmem_cache_destroy(slab_luts); } -static void i915_global_gem_context_exit(void) -{ - kmem_cache_destroy(global.slab_luts); -} - -static struct i915_global_gem_context global = { { - .shrink = i915_global_gem_context_shrink, - .exit = i915_global_gem_context_exit, -} }; - -int __init i915_global_gem_context_init(void) +int __init i915_gem_context_module_init(void) { - global.slab_luts = KMEM_CACHE(i915_lut_handle, 0); - if (!global.slab_luts) + slab_luts = KMEM_CACHE(i915_lut_handle, 0); + if (!slab_luts) return -ENOMEM; - i915_global_register(&global.base); return 0; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h index b5c908f3f4f2..18060536b0c2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h @@ -133,6 +133,9 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +struct i915_gem_context * +i915_gem_context_lookup(struct drm_i915_file_private *file_priv, u32 id); + static inline struct i915_gem_context * i915_gem_context_get(struct i915_gem_context *ctx) { @@ -221,6 +224,9 @@ i915_gem_engines_iter_next(struct i915_gem_engines_iter *it); for (i915_gem_engines_iter_init(&(it), (engines)); \ ((ce) = i915_gem_engines_iter_next(&(it)));) +void i915_gem_context_module_exit(void); +int i915_gem_context_module_init(void); + struct i915_lut_handle *i915_lut_handle_alloc(void); void i915_lut_handle_free(struct i915_lut_handle *lut); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 340473aa70de..94c03a97cb77 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -30,33 +30,187 @@ struct i915_address_space; struct intel_timeline; struct intel_ring; +/** + * struct i915_gem_engines - A set of engines + */ struct i915_gem_engines { union { + /** @link: Link in i915_gem_context::stale::engines */ struct list_head link; + + /** @rcu: RCU to use when freeing */ struct rcu_head rcu; }; + + /** @fence: Fence used for delayed destruction of engines */ struct i915_sw_fence fence; + + /** @ctx: i915_gem_context backpointer */ struct i915_gem_context *ctx; + + /** @num_engines: Number of engines in this set */ unsigned int num_engines; + + /** @engines: Array of engines */ struct intel_context *engines[]; }; +/** + * struct i915_gem_engines_iter - Iterator for an i915_gem_engines set + */ struct i915_gem_engines_iter { + /** @idx: Index into i915_gem_engines::engines */ unsigned int idx; + + /** @engines: Engine set being iterated */ const struct i915_gem_engines *engines; }; /** + * enum i915_gem_engine_type - Describes the type of an i915_gem_proto_engine + */ +enum i915_gem_engine_type { + /** @I915_GEM_ENGINE_TYPE_INVALID: An invalid engine */ + I915_GEM_ENGINE_TYPE_INVALID = 0, + + /** @I915_GEM_ENGINE_TYPE_PHYSICAL: A single physical engine */ + I915_GEM_ENGINE_TYPE_PHYSICAL, + + /** @I915_GEM_ENGINE_TYPE_BALANCED: A load-balanced engine set */ + I915_GEM_ENGINE_TYPE_BALANCED, +}; + +/** + * struct i915_gem_proto_engine - prototype engine + * + * This struct describes an engine that a context may contain. Engines + * have three types: + * + * - I915_GEM_ENGINE_TYPE_INVALID: Invalid engines can be created but they + * show up as a NULL in i915_gem_engines::engines[i] and any attempt to + * use them by the user results in -EINVAL. They are also useful during + * proto-context construction because the client may create invalid + * engines and then set them up later as virtual engines. + * + * - I915_GEM_ENGINE_TYPE_PHYSICAL: A single physical engine, described by + * i915_gem_proto_engine::engine. + * + * - I915_GEM_ENGINE_TYPE_BALANCED: A load-balanced engine set, described + * i915_gem_proto_engine::num_siblings and i915_gem_proto_engine::siblings. + */ +struct i915_gem_proto_engine { + /** @type: Type of this engine */ + enum i915_gem_engine_type type; + + /** @engine: Engine, for physical */ + struct intel_engine_cs *engine; + + /** @num_siblings: Number of balanced siblings */ + unsigned int num_siblings; + + /** @siblings: Balanced siblings */ + struct intel_engine_cs **siblings; + + /** @sseu: Client-set SSEU parameters */ + struct intel_sseu sseu; +}; + +/** + * struct i915_gem_proto_context - prototype context + * + * The struct i915_gem_proto_context represents the creation parameters for + * a struct i915_gem_context. This is used to gather parameters provided + * either through creation flags or via SET_CONTEXT_PARAM so that, when we + * create the final i915_gem_context, those parameters can be immutable. + * + * The context uAPI allows for two methods of setting context parameters: + * SET_CONTEXT_PARAM and CONTEXT_CREATE_EXT_SETPARAM. The former is + * allowed to be called at any time while the later happens as part of + * GEM_CONTEXT_CREATE. When these were initially added, Currently, + * everything settable via one is settable via the other. While some + * params are fairly simple and setting them on a live context is harmless + * such the context priority, others are far trickier such as the VM or the + * set of engines. To avoid some truly nasty race conditions, we don't + * allow setting the VM or the set of engines on live contexts. + * + * The way we dealt with this without breaking older userspace that sets + * the VM or engine set via SET_CONTEXT_PARAM is to delay the creation of + * the actual context until after the client is done configuring it with + * SET_CONTEXT_PARAM. From the perspective of the client, it has the same + * u32 context ID the whole time. From the perspective of i915, however, + * it's an i915_gem_proto_context right up until the point where we attempt + * to do something which the proto-context can't handle at which point the + * real context gets created. + * + * This is accomplished via a little xarray dance. When GEM_CONTEXT_CREATE + * is called, we create a proto-context, reserve a slot in context_xa but + * leave it NULL, the proto-context in the corresponding slot in + * proto_context_xa. Then, whenever we go to look up a context, we first + * check context_xa. If it's there, we return the i915_gem_context and + * we're done. If it's not, we look in proto_context_xa and, if we find it + * there, we create the actual context and kill the proto-context. + * + * At the time we made this change (April, 2021), we did a fairly complete + * audit of existing userspace to ensure this wouldn't break anything: + * + * - Mesa/i965 didn't use the engines or VM APIs at all + * + * - Mesa/ANV used the engines API but via CONTEXT_CREATE_EXT_SETPARAM and + * didn't use the VM API. + * + * - Mesa/iris didn't use the engines or VM APIs at all + * + * - The open-source compute-runtime didn't yet use the engines API but + * did use the VM API via SET_CONTEXT_PARAM. However, CONTEXT_SETPARAM + * was always the second ioctl on that context, immediately following + * GEM_CONTEXT_CREATE. + * + * - The media driver sets engines and bonding/balancing via + * SET_CONTEXT_PARAM. However, CONTEXT_SETPARAM to set the VM was + * always the second ioctl on that context, immediately following + * GEM_CONTEXT_CREATE and setting engines immediately followed that. + * + * In order for this dance to work properly, any modification to an + * i915_gem_proto_context that is exposed to the client via + * drm_i915_file_private::proto_context_xa must be guarded by + * drm_i915_file_private::proto_context_lock. The exception is when a + * proto-context has not yet been exposed such as when handling + * CONTEXT_CREATE_SET_PARAM during GEM_CONTEXT_CREATE. + */ +struct i915_gem_proto_context { + /** @vm: See &i915_gem_context.vm */ + struct i915_address_space *vm; + + /** @user_flags: See &i915_gem_context.user_flags */ + unsigned long user_flags; + + /** @sched: See &i915_gem_context.sched */ + struct i915_sched_attr sched; + + /** @num_user_engines: Number of user-specified engines or -1 */ + int num_user_engines; + + /** @user_engines: User-specified engines */ + struct i915_gem_proto_engine *user_engines; + + /** @legacy_rcs_sseu: Client-set SSEU parameters for the legacy RCS */ + struct intel_sseu legacy_rcs_sseu; + + /** @single_timeline: See See &i915_gem_context.syncobj */ + bool single_timeline; +}; + +/** * struct i915_gem_context - client state * * The struct i915_gem_context represents the combined view of the driver and * logical hardware state for a particular client. */ struct i915_gem_context { - /** i915: i915 device backpointer */ + /** @i915: i915 device backpointer */ struct drm_i915_private *i915; - /** file_priv: owning file descriptor */ + /** @file_priv: owning file descriptor */ struct drm_i915_file_private *file_priv; /** @@ -81,9 +235,23 @@ struct i915_gem_context { * CONTEXT_USER_ENGINES flag is set). */ struct i915_gem_engines __rcu *engines; - struct mutex engines_mutex; /* guards writes to engines */ - struct intel_timeline *timeline; + /** @engines_mutex: guards writes to engines */ + struct mutex engines_mutex; + + /** + * @syncobj: Shared timeline syncobj + * + * When the SHARED_TIMELINE flag is set on context creation, we + * emulate a single timeline across all engines using this syncobj. + * For every execbuffer2 call, this syncobj is used as both an in- + * and out-fence. Unlike the real intel_timeline, this doesn't + * provide perfect atomic in-order guarantees if the client races + * with itself by calling execbuffer2 twice concurrently. However, + * if userspace races with itself, that's not likely to yield well- + * defined results anyway so we choose to not care. + */ + struct drm_syncobj *syncobj; /** * @vm: unique address space (GTT) @@ -106,7 +274,7 @@ struct i915_gem_context { */ struct pid *pid; - /** link: place with &drm_i915_private.context_list */ + /** @link: place with &drm_i915_private.context_list */ struct list_head link; /** @@ -129,7 +297,6 @@ struct i915_gem_context { * @user_flags: small set of booleans controlled by the user */ unsigned long user_flags; -#define UCONTEXT_NO_ZEROMAP 0 #define UCONTEXT_NO_ERROR_CAPTURE 1 #define UCONTEXT_BANNABLE 2 #define UCONTEXT_RECOVERABLE 3 @@ -142,11 +309,13 @@ struct i915_gem_context { #define CONTEXT_CLOSED 0 #define CONTEXT_USER_ENGINES 1 + /** @mutex: guards everything that isn't engines or handles_vma */ struct mutex mutex; + /** @sched: scheduler parameters */ struct i915_sched_attr sched; - /** guilty_count: How many times this context has caused a GPU hang. */ + /** @guilty_count: How many times this context has caused a GPU hang. */ atomic_t guilty_count; /** * @active_count: How many times this context was active during a GPU @@ -154,25 +323,23 @@ struct i915_gem_context { */ atomic_t active_count; - struct { - u64 timeout_us; - } watchdog; - /** * @hang_timestamp: The last time(s) this context caused a GPU hang */ unsigned long hang_timestamp[2]; #define CONTEXT_FAST_HANG_JIFFIES (120 * HZ) /* 3 hangs within 120s? Banned! */ - /** remap_slice: Bitmask of cache lines that need remapping */ + /** @remap_slice: Bitmask of cache lines that need remapping */ u8 remap_slice; /** - * handles_vma: rbtree to look up our context specific obj/vma for + * @handles_vma: rbtree to look up our context specific obj/vma for * the user handle. (user handles are per fd, but the binding is * per vm, which may be one per context or shared with the global GTT) */ struct radix_tree_root handles_vma; + + /** @lut_mutex: Locks handles_vma */ struct mutex lut_mutex; /** @@ -184,8 +351,11 @@ struct i915_gem_context { */ char name[TASK_COMM_LEN + 8]; + /** @stale: tracks stale engines to be destroyed */ struct { + /** @lock: guards engines */ spinlock_t lock; + /** @engines: list of stale engines */ struct list_head engines; } stale; }; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c index 548ddf39d853..23fee13a3384 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c @@ -11,13 +11,14 @@ #include "i915_trace.h" #include "i915_user_extensions.h" -static u32 object_max_page_size(struct drm_i915_gem_object *obj) +static u32 object_max_page_size(struct intel_memory_region **placements, + unsigned int n_placements) { u32 max_page_size = 0; int i; - for (i = 0; i < obj->mm.n_placements; i++) { - struct intel_memory_region *mr = obj->mm.placements[i]; + for (i = 0; i < n_placements; i++) { + struct intel_memory_region *mr = placements[i]; GEM_BUG_ON(!is_power_of_2(mr->min_page_size)); max_page_size = max_t(u32, max_page_size, mr->min_page_size); @@ -27,10 +28,13 @@ static u32 object_max_page_size(struct drm_i915_gem_object *obj) return max_page_size; } -static void object_set_placements(struct drm_i915_gem_object *obj, - struct intel_memory_region **placements, - unsigned int n_placements) +static int object_set_placements(struct drm_i915_gem_object *obj, + struct intel_memory_region **placements, + unsigned int n_placements) { + struct intel_memory_region **arr; + unsigned int i; + GEM_BUG_ON(!n_placements); /* @@ -44,9 +48,20 @@ static void object_set_placements(struct drm_i915_gem_object *obj, obj->mm.placements = &i915->mm.regions[mr->id]; obj->mm.n_placements = 1; } else { - obj->mm.placements = placements; + arr = kmalloc_array(n_placements, + sizeof(struct intel_memory_region *), + GFP_KERNEL); + if (!arr) + return -ENOMEM; + + for (i = 0; i < n_placements; i++) + arr[i] = placements[i]; + + obj->mm.placements = arr; obj->mm.n_placements = n_placements; } + + return 0; } static int i915_gem_publish(struct drm_i915_gem_object *obj, @@ -67,40 +82,67 @@ static int i915_gem_publish(struct drm_i915_gem_object *obj, return 0; } -static int -i915_gem_setup(struct drm_i915_gem_object *obj, u64 size) +/** + * Creates a new object using the same path as DRM_I915_GEM_CREATE_EXT + * @i915: i915 private + * @size: size of the buffer, in bytes + * @placements: possible placement regions, in priority order + * @n_placements: number of possible placement regions + * + * This function is exposed primarily for selftests and does very little + * error checking. It is assumed that the set of placement regions has + * already been verified to be valid. + */ +struct drm_i915_gem_object * +__i915_gem_object_create_user(struct drm_i915_private *i915, u64 size, + struct intel_memory_region **placements, + unsigned int n_placements) { - struct intel_memory_region *mr = obj->mm.placements[0]; + struct intel_memory_region *mr = placements[0]; + struct drm_i915_gem_object *obj; unsigned int flags; int ret; - size = round_up(size, object_max_page_size(obj)); + i915_gem_flush_free_objects(i915); + + size = round_up(size, object_max_page_size(placements, n_placements)); if (size == 0) - return -EINVAL; + return ERR_PTR(-EINVAL); /* For most of the ABI (e.g. mmap) we think in system pages */ GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); if (i915_gem_object_size_2big(size)) - return -E2BIG; + return ERR_PTR(-E2BIG); + + obj = i915_gem_object_alloc(); + if (!obj) + return ERR_PTR(-ENOMEM); + + ret = object_set_placements(obj, placements, n_placements); + if (ret) + goto object_free; /* - * For now resort to CPU based clearing for device local-memory, in the - * near future this will use the blitter engine for accelerated, GPU - * based clearing. + * I915_BO_ALLOC_USER will make sure the object is cleared before + * any user access. */ - flags = 0; - if (mr->type == INTEL_MEMORY_LOCAL) - flags = I915_BO_ALLOC_CPU_CLEAR; + flags = I915_BO_ALLOC_USER; - ret = mr->ops->init_object(mr, obj, size, flags); + ret = mr->ops->init_object(mr, obj, size, 0, flags); if (ret) - return ret; + goto object_free; GEM_BUG_ON(size != obj->base.size); trace_i915_gem_object_create(obj); - return 0; + return obj; + +object_free: + if (obj->mm.n_placements > 1) + kfree(obj->mm.placements); + i915_gem_object_free(obj); + return ERR_PTR(ret); } int @@ -113,7 +155,6 @@ i915_gem_dumb_create(struct drm_file *file, enum intel_memory_type mem_type; int cpp = DIV_ROUND_UP(args->bpp, 8); u32 format; - int ret; switch (cpp) { case 1: @@ -146,22 +187,13 @@ i915_gem_dumb_create(struct drm_file *file, if (HAS_LMEM(to_i915(dev))) mem_type = INTEL_MEMORY_LOCAL; - obj = i915_gem_object_alloc(); - if (!obj) - return -ENOMEM; - mr = intel_memory_region_by_type(to_i915(dev), mem_type); - object_set_placements(obj, &mr, 1); - ret = i915_gem_setup(obj, args->size); - if (ret) - goto object_free; + obj = __i915_gem_object_create_user(to_i915(dev), args->size, &mr, 1); + if (IS_ERR(obj)) + return PTR_ERR(obj); return i915_gem_publish(obj, file, &args->size, &args->handle); - -object_free: - i915_gem_object_free(obj); - return ret; } /** @@ -178,31 +210,20 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data, struct drm_i915_gem_create *args = data; struct drm_i915_gem_object *obj; struct intel_memory_region *mr; - int ret; - - i915_gem_flush_free_objects(i915); - - obj = i915_gem_object_alloc(); - if (!obj) - return -ENOMEM; mr = intel_memory_region_by_type(i915, INTEL_MEMORY_SYSTEM); - object_set_placements(obj, &mr, 1); - ret = i915_gem_setup(obj, args->size); - if (ret) - goto object_free; + obj = __i915_gem_object_create_user(i915, args->size, &mr, 1); + if (IS_ERR(obj)) + return PTR_ERR(obj); return i915_gem_publish(obj, file, &args->size, &args->handle); - -object_free: - i915_gem_object_free(obj); - return ret; } struct create_ext { struct drm_i915_private *i915; - struct drm_i915_gem_object *vanilla_object; + struct intel_memory_region *placements[INTEL_REGION_UNKNOWN]; + unsigned int n_placements; }; static void repr_placements(char *buf, size_t size, @@ -233,8 +254,7 @@ static int set_placements(struct drm_i915_gem_create_ext_memory_regions *args, struct drm_i915_private *i915 = ext_data->i915; struct drm_i915_gem_memory_class_instance __user *uregions = u64_to_user_ptr(args->regions); - struct drm_i915_gem_object *obj = ext_data->vanilla_object; - struct intel_memory_region **placements; + struct intel_memory_region *placements[INTEL_REGION_UNKNOWN]; u32 mask; int i, ret = 0; @@ -248,6 +268,8 @@ static int set_placements(struct drm_i915_gem_create_ext_memory_regions *args, ret = -EINVAL; } + BUILD_BUG_ON(ARRAY_SIZE(i915->mm.regions) != ARRAY_SIZE(placements)); + BUILD_BUG_ON(ARRAY_SIZE(ext_data->placements) != ARRAY_SIZE(placements)); if (args->num_regions > ARRAY_SIZE(i915->mm.regions)) { drm_dbg(&i915->drm, "num_regions is too large\n"); ret = -EINVAL; @@ -256,21 +278,13 @@ static int set_placements(struct drm_i915_gem_create_ext_memory_regions *args, if (ret) return ret; - placements = kmalloc_array(args->num_regions, - sizeof(struct intel_memory_region *), - GFP_KERNEL); - if (!placements) - return -ENOMEM; - mask = 0; for (i = 0; i < args->num_regions; i++) { struct drm_i915_gem_memory_class_instance region; struct intel_memory_region *mr; - if (copy_from_user(®ion, uregions, sizeof(region))) { - ret = -EFAULT; - goto out_free; - } + if (copy_from_user(®ion, uregions, sizeof(region))) + return -EFAULT; mr = intel_memory_region_lookup(i915, region.memory_class, @@ -296,14 +310,14 @@ static int set_placements(struct drm_i915_gem_create_ext_memory_regions *args, ++uregions; } - if (obj->mm.placements) { + if (ext_data->n_placements) { ret = -EINVAL; goto out_dump; } - object_set_placements(obj, placements, args->num_regions); - if (args->num_regions == 1) - kfree(placements); + ext_data->n_placements = args->num_regions; + for (i = 0; i < args->num_regions; i++) + ext_data->placements[i] = placements[i]; return 0; @@ -311,11 +325,11 @@ out_dump: if (1) { char buf[256]; - if (obj->mm.placements) { + if (ext_data->n_placements) { repr_placements(buf, sizeof(buf), - obj->mm.placements, - obj->mm.n_placements); + ext_data->placements, + ext_data->n_placements); drm_dbg(&i915->drm, "Placements were already set in previous EXT. Existing placements: %s\n", buf); @@ -325,8 +339,6 @@ out_dump: drm_dbg(&i915->drm, "New placements(so far validated): %s\n", buf); } -out_free: - kfree(placements); return ret; } @@ -361,44 +373,30 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, struct drm_i915_private *i915 = to_i915(dev); struct drm_i915_gem_create_ext *args = data; struct create_ext ext_data = { .i915 = i915 }; - struct intel_memory_region **placements_ext; struct drm_i915_gem_object *obj; int ret; if (args->flags) return -EINVAL; - i915_gem_flush_free_objects(i915); - - obj = i915_gem_object_alloc(); - if (!obj) - return -ENOMEM; - - ext_data.vanilla_object = obj; ret = i915_user_extensions(u64_to_user_ptr(args->extensions), create_extensions, ARRAY_SIZE(create_extensions), &ext_data); - placements_ext = obj->mm.placements; if (ret) - goto object_free; + return ret; - if (!placements_ext) { - struct intel_memory_region *mr = + if (!ext_data.n_placements) { + ext_data.placements[0] = intel_memory_region_by_type(i915, INTEL_MEMORY_SYSTEM); - - object_set_placements(obj, &mr, 1); + ext_data.n_placements = 1; } - ret = i915_gem_setup(obj, args->size); - if (ret) - goto object_free; + obj = __i915_gem_object_create_user(i915, args->size, + ext_data.placements, + ext_data.n_placements); + if (IS_ERR(obj)) + return PTR_ERR(obj); return i915_gem_publish(obj, file, &args->size, &args->handle); - -object_free: - if (obj->mm.n_placements > 1) - kfree(placements_ext); - i915_gem_object_free(obj); - return ret; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index 616c3a2f1baf..afa34111de02 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -12,6 +12,8 @@ #include "i915_gem_object.h" #include "i915_scatterlist.h" +I915_SELFTEST_DECLARE(static bool force_different_devices;) + static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf) { return to_intel_bo(buf->priv); @@ -25,15 +27,11 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme struct scatterlist *src, *dst; int ret, i; - ret = i915_gem_object_pin_pages_unlocked(obj); - if (ret) - goto err; - /* Copy sg so that we make an independent mapping */ st = kmalloc(sizeof(struct sg_table), GFP_KERNEL); if (st == NULL) { ret = -ENOMEM; - goto err_unpin_pages; + goto err; } ret = sg_alloc_table(st, obj->mm.pages->nents, GFP_KERNEL); @@ -58,8 +56,6 @@ err_free_sg: sg_free_table(st); err_free: kfree(st); -err_unpin_pages: - i915_gem_object_unpin_pages(obj); err: return ERR_PTR(ret); } @@ -68,13 +64,9 @@ static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attachment, struct sg_table *sg, enum dma_data_direction dir) { - struct drm_i915_gem_object *obj = dma_buf_to_obj(attachment->dmabuf); - dma_unmap_sgtable(attachment->dev, sg, dir, DMA_ATTR_SKIP_CPU_SYNC); sg_free_table(sg); kfree(sg); - - i915_gem_object_unpin_pages(obj); } static int i915_gem_dmabuf_vmap(struct dma_buf *dma_buf, struct dma_buf_map *map) @@ -168,7 +160,46 @@ retry: return err; } +static int i915_gem_dmabuf_attach(struct dma_buf *dmabuf, + struct dma_buf_attachment *attach) +{ + struct drm_i915_gem_object *obj = dma_buf_to_obj(dmabuf); + struct i915_gem_ww_ctx ww; + int err; + + if (!i915_gem_object_can_migrate(obj, INTEL_REGION_SMEM)) + return -EOPNOTSUPP; + + for_i915_gem_ww(&ww, err, true) { + err = i915_gem_object_lock(obj, &ww); + if (err) + continue; + + err = i915_gem_object_migrate(obj, &ww, INTEL_REGION_SMEM); + if (err) + continue; + + err = i915_gem_object_wait_migration(obj, 0); + if (err) + continue; + + err = i915_gem_object_pin_pages(obj); + } + + return err; +} + +static void i915_gem_dmabuf_detach(struct dma_buf *dmabuf, + struct dma_buf_attachment *attach) +{ + struct drm_i915_gem_object *obj = dma_buf_to_obj(dmabuf); + + i915_gem_object_unpin_pages(obj); +} + static const struct dma_buf_ops i915_dmabuf_ops = { + .attach = i915_gem_dmabuf_attach, + .detach = i915_gem_dmabuf_detach, .map_dma_buf = i915_gem_map_dma_buf, .unmap_dma_buf = i915_gem_unmap_dma_buf, .release = drm_gem_dmabuf_release, @@ -204,6 +235,8 @@ static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) struct sg_table *pages; unsigned int sg_page_sizes; + assert_object_held(obj); + pages = dma_buf_map_attachment(obj->base.import_attach, DMA_BIDIRECTIONAL); if (IS_ERR(pages)) @@ -241,7 +274,8 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, if (dma_buf->ops == &i915_dmabuf_ops) { obj = dma_buf_to_obj(dma_buf); /* is it from our device? */ - if (obj->base.dev == dev) { + if (obj->base.dev == dev && + !I915_SELFTEST_ONLY(force_different_devices)) { /* * Importing dmabuf exported from out own gem increases * refcount on gem itself instead of f_count of dmabuf. diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 073822100da7..b684a62bf3b0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -268,6 +268,9 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, struct drm_i915_gem_object *obj; int err = 0; + if (IS_DGFX(to_i915(dev))) + return -ENODEV; + rcu_read_lock(); obj = i915_gem_object_lookup_rcu(file, args->handle); if (!obj) { @@ -303,6 +306,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, enum i915_cache_level level; int ret = 0; + if (IS_DGFX(i915)) + return -ENODEV; + switch (args->caching) { case I915_CACHING_NONE: level = I915_CACHE_NONE; @@ -375,7 +381,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, struct i915_vma *vma; int ret; - /* Frame buffer must be in LMEM (no migration yet) */ + /* Frame buffer must be in LMEM */ if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj)) return ERR_PTR(-EINVAL); @@ -484,6 +490,9 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, u32 write_domain = args->write_domain; int err; + if (IS_DGFX(to_i915(dev))) + return -ENODEV; + /* Only handle setting domains to types used by the CPU. */ if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) return -EINVAL; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 4a6419d7be93..1aa249908b64 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -277,18 +277,9 @@ struct i915_execbuffer { bool has_llc : 1; bool has_fence : 1; bool needs_unfenced : 1; - - struct i915_request *rq; - u32 *rq_cmd; - unsigned int rq_size; - struct intel_gt_buffer_pool_node *pool; } reloc_cache; - struct intel_gt_buffer_pool_node *reloc_pool; /** relocation pool for -EDEADLK handling */ - struct intel_context *reloc_context; - u64 invalid_flags; /** Set of execobj.flags that are invalid */ - u32 context_flags; /** Set of execobj.flags to insert from the ctx */ u64 batch_len; /** Length of batch within object */ u32 batch_start_offset; /** Location within object of batch */ @@ -539,9 +530,6 @@ eb_validate_vma(struct i915_execbuffer *eb, entry->flags |= EXEC_OBJECT_NEEDS_GTT | __EXEC_OBJECT_NEEDS_MAP; } - if (!(entry->flags & EXEC_OBJECT_PINNED)) - entry->flags |= eb->context_flags; - return 0; } @@ -741,17 +729,13 @@ static int eb_select_context(struct i915_execbuffer *eb) struct i915_gem_context *ctx; ctx = i915_gem_context_lookup(eb->file->driver_priv, eb->args->rsvd1); - if (unlikely(!ctx)) - return -ENOENT; + if (unlikely(IS_ERR(ctx))) + return PTR_ERR(ctx); eb->gem_context = ctx; if (rcu_access_pointer(ctx->vm)) eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT; - eb->context_flags = 0; - if (test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags)) - eb->context_flags |= __EXEC_OBJECT_NEEDS_BIAS; - return 0; } @@ -920,21 +904,38 @@ err: return err; } -static int eb_validate_vmas(struct i915_execbuffer *eb) +static int eb_lock_vmas(struct i915_execbuffer *eb) { unsigned int i; int err; - INIT_LIST_HEAD(&eb->unbound); - for (i = 0; i < eb->buffer_count; i++) { - struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; struct eb_vma *ev = &eb->vma[i]; struct i915_vma *vma = ev->vma; err = i915_gem_object_lock(vma->obj, &eb->ww); if (err) return err; + } + + return 0; +} + +static int eb_validate_vmas(struct i915_execbuffer *eb) +{ + unsigned int i; + int err; + + INIT_LIST_HEAD(&eb->unbound); + + err = eb_lock_vmas(eb); + if (err) + return err; + + for (i = 0; i < eb->buffer_count; i++) { + struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; + struct eb_vma *ev = &eb->vma[i]; + struct i915_vma *vma = ev->vma; err = eb_pin_vma(eb, entry, ev); if (err == -EDEADLK) @@ -992,7 +993,7 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle) } } -static void eb_release_vmas(struct i915_execbuffer *eb, bool final, bool release_userptr) +static void eb_release_vmas(struct i915_execbuffer *eb, bool final) { const unsigned int count = eb->buffer_count; unsigned int i; @@ -1006,11 +1007,6 @@ static void eb_release_vmas(struct i915_execbuffer *eb, bool final, bool release eb_unreserve_vma(ev); - if (release_userptr && ev->flags & __EXEC_OBJECT_USERPTR_INIT) { - ev->flags &= ~__EXEC_OBJECT_USERPTR_INIT; - i915_gem_object_userptr_submit_fini(vma->obj); - } - if (final) i915_vma_put(vma); } @@ -1020,8 +1016,6 @@ static void eb_release_vmas(struct i915_execbuffer *eb, bool final, bool release static void eb_destroy(const struct i915_execbuffer *eb) { - GEM_BUG_ON(eb->reloc_cache.rq); - if (eb->lut_size > 0) kfree(eb->buckets); } @@ -1033,14 +1027,6 @@ relocation_target(const struct drm_i915_gem_relocation_entry *reloc, return gen8_canonical_addr((int)reloc->delta + target->node.start); } -static void reloc_cache_clear(struct reloc_cache *cache) -{ - cache->rq = NULL; - cache->rq_cmd = NULL; - cache->pool = NULL; - cache->rq_size = 0; -} - static void reloc_cache_init(struct reloc_cache *cache, struct drm_i915_private *i915) { @@ -1053,7 +1039,6 @@ static void reloc_cache_init(struct reloc_cache *cache, cache->has_fence = cache->graphics_ver < 4; cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment; cache->node.flags = 0; - reloc_cache_clear(cache); } static inline void *unmask_page(unsigned long p) @@ -1075,48 +1060,10 @@ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache) return &i915->ggtt; } -static void reloc_cache_put_pool(struct i915_execbuffer *eb, struct reloc_cache *cache) -{ - if (!cache->pool) - return; - - /* - * This is a bit nasty, normally we keep objects locked until the end - * of execbuffer, but we already submit this, and have to unlock before - * dropping the reference. Fortunately we can only hold 1 pool node at - * a time, so this should be harmless. - */ - i915_gem_ww_unlock_single(cache->pool->obj); - intel_gt_buffer_pool_put(cache->pool); - cache->pool = NULL; -} - -static void reloc_gpu_flush(struct i915_execbuffer *eb, struct reloc_cache *cache) -{ - struct drm_i915_gem_object *obj = cache->rq->batch->obj; - - GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32)); - cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END; - - i915_gem_object_flush_map(obj); - i915_gem_object_unpin_map(obj); - - intel_gt_chipset_flush(cache->rq->engine->gt); - - i915_request_add(cache->rq); - reloc_cache_put_pool(eb, cache); - reloc_cache_clear(cache); - - eb->reloc_pool = NULL; -} - static void reloc_cache_reset(struct reloc_cache *cache, struct i915_execbuffer *eb) { void *vaddr; - if (cache->rq) - reloc_gpu_flush(eb, cache); - if (!cache->vaddr) return; @@ -1298,295 +1245,6 @@ static void clflush_write32(u32 *addr, u32 value, unsigned int flushes) *addr = value; } -static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma) -{ - struct drm_i915_gem_object *obj = vma->obj; - int err; - - assert_vma_held(vma); - - if (obj->cache_dirty & ~obj->cache_coherent) - i915_gem_clflush_object(obj, 0); - obj->write_domain = 0; - - err = i915_request_await_object(rq, vma->obj, true); - if (err == 0) - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - - return err; -} - -static int __reloc_gpu_alloc(struct i915_execbuffer *eb, - struct intel_engine_cs *engine, - struct i915_vma *vma, - unsigned int len) -{ - struct reloc_cache *cache = &eb->reloc_cache; - struct intel_gt_buffer_pool_node *pool = eb->reloc_pool; - struct i915_request *rq; - struct i915_vma *batch; - u32 *cmd; - int err; - - if (!pool) { - pool = intel_gt_get_buffer_pool(engine->gt, PAGE_SIZE, - cache->has_llc ? - I915_MAP_WB : - I915_MAP_WC); - if (IS_ERR(pool)) - return PTR_ERR(pool); - } - eb->reloc_pool = NULL; - - err = i915_gem_object_lock(pool->obj, &eb->ww); - if (err) - goto err_pool; - - cmd = i915_gem_object_pin_map(pool->obj, pool->type); - if (IS_ERR(cmd)) { - err = PTR_ERR(cmd); - goto err_pool; - } - intel_gt_buffer_pool_mark_used(pool); - - memset32(cmd, 0, pool->obj->base.size / sizeof(u32)); - - batch = i915_vma_instance(pool->obj, vma->vm, NULL); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto err_unmap; - } - - err = i915_vma_pin_ww(batch, &eb->ww, 0, 0, PIN_USER | PIN_NONBLOCK); - if (err) - goto err_unmap; - - if (engine == eb->context->engine) { - rq = i915_request_create(eb->context); - } else { - struct intel_context *ce = eb->reloc_context; - - if (!ce) { - ce = intel_context_create(engine); - if (IS_ERR(ce)) { - err = PTR_ERR(ce); - goto err_unpin; - } - - i915_vm_put(ce->vm); - ce->vm = i915_vm_get(eb->context->vm); - eb->reloc_context = ce; - } - - err = intel_context_pin_ww(ce, &eb->ww); - if (err) - goto err_unpin; - - rq = i915_request_create(ce); - intel_context_unpin(ce); - } - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_unpin; - } - - err = intel_gt_buffer_pool_mark_active(pool, rq); - if (err) - goto err_request; - - err = reloc_move_to_gpu(rq, vma); - if (err) - goto err_request; - - err = eb->engine->emit_bb_start(rq, - batch->node.start, PAGE_SIZE, - cache->graphics_ver > 5 ? 0 : I915_DISPATCH_SECURE); - if (err) - goto skip_request; - - assert_vma_held(batch); - err = i915_request_await_object(rq, batch->obj, false); - if (err == 0) - err = i915_vma_move_to_active(batch, rq, 0); - if (err) - goto skip_request; - - rq->batch = batch; - i915_vma_unpin(batch); - - cache->rq = rq; - cache->rq_cmd = cmd; - cache->rq_size = 0; - cache->pool = pool; - - /* Return with batch mapping (cmd) still pinned */ - return 0; - -skip_request: - i915_request_set_error_once(rq, err); -err_request: - i915_request_add(rq); -err_unpin: - i915_vma_unpin(batch); -err_unmap: - i915_gem_object_unpin_map(pool->obj); -err_pool: - eb->reloc_pool = pool; - return err; -} - -static bool reloc_can_use_engine(const struct intel_engine_cs *engine) -{ - return engine->class != VIDEO_DECODE_CLASS || GRAPHICS_VER(engine->i915) != 6; -} - -static u32 *reloc_gpu(struct i915_execbuffer *eb, - struct i915_vma *vma, - unsigned int len) -{ - struct reloc_cache *cache = &eb->reloc_cache; - u32 *cmd; - - if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1)) - reloc_gpu_flush(eb, cache); - - if (unlikely(!cache->rq)) { - int err; - struct intel_engine_cs *engine = eb->engine; - - /* If we need to copy for the cmdparser, we will stall anyway */ - if (eb_use_cmdparser(eb)) - return ERR_PTR(-EWOULDBLOCK); - - if (!reloc_can_use_engine(engine)) { - engine = engine->gt->engine_class[COPY_ENGINE_CLASS][0]; - if (!engine) - return ERR_PTR(-ENODEV); - } - - err = __reloc_gpu_alloc(eb, engine, vma, len); - if (unlikely(err)) - return ERR_PTR(err); - } - - cmd = cache->rq_cmd + cache->rq_size; - cache->rq_size += len; - - return cmd; -} - -static inline bool use_reloc_gpu(struct i915_vma *vma) -{ - if (DBG_FORCE_RELOC == FORCE_GPU_RELOC) - return true; - - if (DBG_FORCE_RELOC) - return false; - - return !dma_resv_test_signaled(vma->resv, true); -} - -static unsigned long vma_phys_addr(struct i915_vma *vma, u32 offset) -{ - struct page *page; - unsigned long addr; - - GEM_BUG_ON(vma->pages != vma->obj->mm.pages); - - page = i915_gem_object_get_page(vma->obj, offset >> PAGE_SHIFT); - addr = PFN_PHYS(page_to_pfn(page)); - GEM_BUG_ON(overflows_type(addr, u32)); /* expected dma32 */ - - return addr + offset_in_page(offset); -} - -static int __reloc_entry_gpu(struct i915_execbuffer *eb, - struct i915_vma *vma, - u64 offset, - u64 target_addr) -{ - const unsigned int ver = eb->reloc_cache.graphics_ver; - unsigned int len; - u32 *batch; - u64 addr; - - if (ver >= 8) - len = offset & 7 ? 8 : 5; - else if (ver >= 4) - len = 4; - else - len = 3; - - batch = reloc_gpu(eb, vma, len); - if (batch == ERR_PTR(-EDEADLK)) - return -EDEADLK; - else if (IS_ERR(batch)) - return false; - - addr = gen8_canonical_addr(vma->node.start + offset); - if (ver >= 8) { - if (offset & 7) { - *batch++ = MI_STORE_DWORD_IMM_GEN4; - *batch++ = lower_32_bits(addr); - *batch++ = upper_32_bits(addr); - *batch++ = lower_32_bits(target_addr); - - addr = gen8_canonical_addr(addr + 4); - - *batch++ = MI_STORE_DWORD_IMM_GEN4; - *batch++ = lower_32_bits(addr); - *batch++ = upper_32_bits(addr); - *batch++ = upper_32_bits(target_addr); - } else { - *batch++ = (MI_STORE_DWORD_IMM_GEN4 | (1 << 21)) + 1; - *batch++ = lower_32_bits(addr); - *batch++ = upper_32_bits(addr); - *batch++ = lower_32_bits(target_addr); - *batch++ = upper_32_bits(target_addr); - } - } else if (ver >= 6) { - *batch++ = MI_STORE_DWORD_IMM_GEN4; - *batch++ = 0; - *batch++ = addr; - *batch++ = target_addr; - } else if (IS_I965G(eb->i915)) { - *batch++ = MI_STORE_DWORD_IMM_GEN4; - *batch++ = 0; - *batch++ = vma_phys_addr(vma, offset); - *batch++ = target_addr; - } else if (ver >= 4) { - *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; - *batch++ = 0; - *batch++ = addr; - *batch++ = target_addr; - } else if (ver >= 3 && - !(IS_I915G(eb->i915) || IS_I915GM(eb->i915))) { - *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; - *batch++ = addr; - *batch++ = target_addr; - } else { - *batch++ = MI_STORE_DWORD_IMM; - *batch++ = vma_phys_addr(vma, offset); - *batch++ = target_addr; - } - - return true; -} - -static int reloc_entry_gpu(struct i915_execbuffer *eb, - struct i915_vma *vma, - u64 offset, - u64 target_addr) -{ - if (eb->reloc_cache.vaddr) - return false; - - if (!use_reloc_gpu(vma)) - return false; - - return __reloc_entry_gpu(eb, vma, offset, target_addr); -} - static u64 relocate_entry(struct i915_vma *vma, const struct drm_i915_gem_relocation_entry *reloc, @@ -1595,32 +1253,25 @@ relocate_entry(struct i915_vma *vma, { u64 target_addr = relocation_target(reloc, target); u64 offset = reloc->offset; - int reloc_gpu = reloc_entry_gpu(eb, vma, offset, target_addr); - - if (reloc_gpu < 0) - return reloc_gpu; - - if (!reloc_gpu) { - bool wide = eb->reloc_cache.use_64bit_reloc; - void *vaddr; + bool wide = eb->reloc_cache.use_64bit_reloc; + void *vaddr; repeat: - vaddr = reloc_vaddr(vma->obj, eb, - offset >> PAGE_SHIFT); - if (IS_ERR(vaddr)) - return PTR_ERR(vaddr); - - GEM_BUG_ON(!IS_ALIGNED(offset, sizeof(u32))); - clflush_write32(vaddr + offset_in_page(offset), - lower_32_bits(target_addr), - eb->reloc_cache.vaddr); - - if (wide) { - offset += sizeof(u32); - target_addr >>= 32; - wide = false; - goto repeat; - } + vaddr = reloc_vaddr(vma->obj, eb, + offset >> PAGE_SHIFT); + if (IS_ERR(vaddr)) + return PTR_ERR(vaddr); + + GEM_BUG_ON(!IS_ALIGNED(offset, sizeof(u32))); + clflush_write32(vaddr + offset_in_page(offset), + lower_32_bits(target_addr), + eb->reloc_cache.vaddr); + + if (wide) { + offset += sizeof(u32); + target_addr >>= 32; + wide = false; + goto repeat; } return target->node.start | UPDATE; @@ -1992,7 +1643,7 @@ repeat: } /* We may process another execbuffer during the unlock... */ - eb_release_vmas(eb, false, true); + eb_release_vmas(eb, false); i915_gem_ww_ctx_fini(&eb->ww); if (rq) { @@ -2061,9 +1712,7 @@ repeat_validate: list_for_each_entry(ev, &eb->relocs, reloc_link) { if (!have_copy) { - pagefault_disable(); err = eb_relocate_vma(eb, ev); - pagefault_enable(); if (err) break; } else { @@ -2096,7 +1745,7 @@ repeat_validate: err: if (err == -EDEADLK) { - eb_release_vmas(eb, false, false); + eb_release_vmas(eb, false); err = i915_gem_ww_ctx_backoff(&eb->ww); if (!err) goto repeat_validate; @@ -2193,7 +1842,7 @@ retry: err: if (err == -EDEADLK) { - eb_release_vmas(eb, false, false); + eb_release_vmas(eb, false); err = i915_gem_ww_ctx_backoff(&eb->ww); if (!err) goto retry; @@ -2270,7 +1919,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) #ifdef CONFIG_MMU_NOTIFIER if (!err && (eb->args->flags & __EXEC_USERPTR_USED)) { - spin_lock(&eb->i915->mm.notifier_lock); + read_lock(&eb->i915->mm.notifier_lock); /* * count is always at least 1, otherwise __EXEC_USERPTR_USED @@ -2288,7 +1937,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) break; } - spin_unlock(&eb->i915->mm.notifier_lock); + read_unlock(&eb->i915->mm.notifier_lock); } #endif @@ -2782,7 +2431,7 @@ __free_fence_array(struct eb_fence *fences, unsigned int n) while (n--) { drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2)); dma_fence_put(fences[n].dma_fence); - kfree(fences[n].chain_fence); + dma_fence_chain_free(fences[n].chain_fence); } kvfree(fences); } @@ -2896,9 +2545,7 @@ add_timeline_fence_array(struct i915_execbuffer *eb, return -EINVAL; } - f->chain_fence = - kmalloc(sizeof(*f->chain_fence), - GFP_KERNEL); + f->chain_fence = dma_fence_chain_alloc(); if (!f->chain_fence) { drm_syncobj_put(syncobj); dma_fence_put(fence); @@ -3158,8 +2805,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, eb.exec = exec; eb.vma = (struct eb_vma *)(exec + args->buffer_count + 1); eb.vma[0].vma = NULL; - eb.reloc_pool = eb.batch_pool = NULL; - eb.reloc_context = NULL; + eb.batch_pool = NULL; eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; reloc_cache_init(&eb.reloc_cache, eb.i915); @@ -3234,7 +2880,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, err = eb_lookup_vmas(&eb); if (err) { - eb_release_vmas(&eb, true, true); + eb_release_vmas(&eb, true); goto err_engine; } @@ -3257,9 +2903,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, batch = eb.batch->vma; - /* All GPU relocation batches must be submitted prior to the user rq */ - GEM_BUG_ON(eb.reloc_cache.rq); - /* Allocate a request for this batch buffer nice and early. */ eb.request = i915_request_create(eb.context); if (IS_ERR(eb.request)) { @@ -3267,11 +2910,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, goto err_vma; } + if (unlikely(eb.gem_context->syncobj)) { + struct dma_fence *fence; + + fence = drm_syncobj_fence_get(eb.gem_context->syncobj); + err = i915_request_await_dma_fence(eb.request, fence); + dma_fence_put(fence); + if (err) + goto err_ext; + } + if (in_fence) { if (args->flags & I915_EXEC_FENCE_SUBMIT) err = i915_request_await_execution(eb.request, - in_fence, - eb.engine->bond_execute); + in_fence); else err = i915_request_await_dma_fence(eb.request, in_fence); @@ -3324,10 +2976,16 @@ err_request: fput(out_fence->file); } } + + if (unlikely(eb.gem_context->syncobj)) { + drm_syncobj_replace_fence(eb.gem_context->syncobj, + &eb.request->fence); + } + i915_request_put(eb.request); err_vma: - eb_release_vmas(&eb, true, true); + eb_release_vmas(&eb, true); if (eb.trampoline) i915_vma_unpin(eb.trampoline); WARN_ON(err == -EDEADLK); @@ -3335,10 +2993,6 @@ err_vma: if (eb.batch_pool) intel_gt_buffer_pool_put(eb.batch_pool); - if (eb.reloc_pool) - intel_gt_buffer_pool_put(eb.reloc_pool); - if (eb.reloc_context) - intel_context_put(eb.reloc_context); err_engine: eb_put_engine(&eb); err_context: @@ -3452,7 +3106,3 @@ end:; kvfree(exec2_list); return err; } - -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/i915_gem_execbuffer.c" -#endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index ce6b664b10aa..13b217f75055 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -177,8 +177,8 @@ i915_gem_object_create_internal(struct drm_i915_private *i915, return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, &i915_gem_object_internal_ops, &lock_class, - I915_BO_ALLOC_STRUCT_PAGE); + i915_gem_object_init(obj, &i915_gem_object_internal_ops, &lock_class, 0); + obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; /* * Mark the object as volatile, such that the pages are marked as diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c index 3b4aa28a076d..eb345305dc52 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c @@ -4,74 +4,10 @@ */ #include "intel_memory_region.h" -#include "intel_region_ttm.h" #include "gem/i915_gem_region.h" #include "gem/i915_gem_lmem.h" #include "i915_drv.h" -static void lmem_put_pages(struct drm_i915_gem_object *obj, - struct sg_table *pages) -{ - intel_region_ttm_node_free(obj->mm.region, obj->mm.st_mm_node); - obj->mm.dirty = false; - sg_free_table(pages); - kfree(pages); -} - -static int lmem_get_pages(struct drm_i915_gem_object *obj) -{ - unsigned int flags; - struct sg_table *pages; - - flags = I915_ALLOC_MIN_PAGE_SIZE; - if (obj->flags & I915_BO_ALLOC_CONTIGUOUS) - flags |= I915_ALLOC_CONTIGUOUS; - - obj->mm.st_mm_node = intel_region_ttm_node_alloc(obj->mm.region, - obj->base.size, - flags); - if (IS_ERR(obj->mm.st_mm_node)) - return PTR_ERR(obj->mm.st_mm_node); - - /* Range manager is always contigous */ - if (obj->mm.region->is_range_manager) - obj->flags |= I915_BO_ALLOC_CONTIGUOUS; - pages = intel_region_ttm_node_to_st(obj->mm.region, obj->mm.st_mm_node); - if (IS_ERR(pages)) { - intel_region_ttm_node_free(obj->mm.region, obj->mm.st_mm_node); - return PTR_ERR(pages); - } - - __i915_gem_object_set_pages(obj, pages, i915_sg_dma_sizes(pages->sgl)); - - if (obj->flags & I915_BO_ALLOC_CPU_CLEAR) { - void __iomem *vaddr = - i915_gem_object_lmem_io_map(obj, 0, obj->base.size); - - if (!vaddr) { - struct sg_table *pages = - __i915_gem_object_unset_pages(obj); - - if (!IS_ERR_OR_NULL(pages)) - lmem_put_pages(obj, pages); - } - - memset_io(vaddr, 0, obj->base.size); - io_mapping_unmap(vaddr); - } - - return 0; -} - -const struct drm_i915_gem_object_ops i915_gem_lmem_obj_ops = { - .name = "i915_gem_object_lmem", - .flags = I915_GEM_OBJECT_HAS_IOMEM, - - .get_pages = lmem_get_pages, - .put_pages = lmem_put_pages, - .release = i915_gem_object_release_memory_region, -}; - void __iomem * i915_gem_object_lmem_io_map(struct drm_i915_gem_object *obj, unsigned long n, @@ -87,39 +23,91 @@ i915_gem_object_lmem_io_map(struct drm_i915_gem_object *obj, return io_mapping_map_wc(&obj->mm.region->iomap, offset, size); } +/** + * i915_gem_object_is_lmem - Whether the object is resident in + * lmem + * @obj: The object to check. + * + * Even if an object is allowed to migrate and change memory region, + * this function checks whether it will always be present in lmem when + * valid *or* if that's not the case, whether it's currently resident in lmem. + * For migratable and evictable objects, the latter only makes sense when + * the object is locked. + * + * Return: Whether the object migratable but resident in lmem, or not + * migratable and will be present in lmem when valid. + */ bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj) { - struct intel_memory_region *mr = obj->mm.region; + struct intel_memory_region *mr = READ_ONCE(obj->mm.region); +#ifdef CONFIG_LOCKDEP + if (i915_gem_object_migratable(obj) && + i915_gem_object_evictable(obj)) + assert_object_held(obj); +#endif return mr && (mr->type == INTEL_MEMORY_LOCAL || mr->type == INTEL_MEMORY_STOLEN_LOCAL); } +/** + * __i915_gem_object_is_lmem - Whether the object is resident in + * lmem while in the fence signaling critical path. + * @obj: The object to check. + * + * This function is intended to be called from within the fence signaling + * path where the fence keeps the object from being migrated. For example + * during gpu reset or similar. + * + * Return: Whether the object is resident in lmem. + */ +bool __i915_gem_object_is_lmem(struct drm_i915_gem_object *obj) +{ + struct intel_memory_region *mr = READ_ONCE(obj->mm.region); + +#ifdef CONFIG_LOCKDEP + GEM_WARN_ON(dma_resv_test_signaled(obj->base.resv, true)); +#endif + return mr && (mr->type == INTEL_MEMORY_LOCAL || + mr->type == INTEL_MEMORY_STOLEN_LOCAL); +} + +/** + * __i915_gem_object_create_lmem_with_ps - Create lmem object and force the + * minimum page size for the backing pages. + * @i915: The i915 instance. + * @size: The size in bytes for the object. Note that we need to round the size + * up depending on the @page_size. The final object size can be fished out from + * the drm GEM object. + * @page_size: The requested minimum page size in bytes for this object. This is + * useful if we need something bigger than the regions min_page_size due to some + * hw restriction, or in some very specialised cases where it needs to be + * smaller, where the internal fragmentation cost is too great when rounding up + * the object size. + * @flags: The optional BO allocation flags. + * + * Note that this interface assumes you know what you are doing when forcing the + * @page_size. If this is smaller than the regions min_page_size then it can + * never be inserted into any GTT, otherwise it might lead to undefined + * behaviour. + * + * Return: The object pointer, which might be an ERR_PTR in the case of failure. + */ struct drm_i915_gem_object * -i915_gem_object_create_lmem(struct drm_i915_private *i915, - resource_size_t size, - unsigned int flags) +__i915_gem_object_create_lmem_with_ps(struct drm_i915_private *i915, + resource_size_t size, + resource_size_t page_size, + unsigned int flags) { return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_LMEM], - size, flags); + size, page_size, flags); } -int __i915_gem_lmem_object_init(struct intel_memory_region *mem, - struct drm_i915_gem_object *obj, - resource_size_t size, - unsigned int flags) +struct drm_i915_gem_object * +i915_gem_object_create_lmem(struct drm_i915_private *i915, + resource_size_t size, + unsigned int flags) { - static struct lock_class_key lock_class; - struct drm_i915_private *i915 = mem->i915; - - drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, &i915_gem_lmem_obj_ops, &lock_class, flags); - - obj->read_domains = I915_GEM_DOMAIN_WC | I915_GEM_DOMAIN_GTT; - - i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); - - i915_gem_object_init_memory_region(obj, mem); - - return 0; + return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_LMEM], + size, 0, flags); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h index fac6bc5a5ebb..4ee81fc66302 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h @@ -21,14 +21,16 @@ i915_gem_object_lmem_io_map(struct drm_i915_gem_object *obj, bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj); +bool __i915_gem_object_is_lmem(struct drm_i915_gem_object *obj); + +struct drm_i915_gem_object * +__i915_gem_object_create_lmem_with_ps(struct drm_i915_private *i915, + resource_size_t size, + resource_size_t page_size, + unsigned int flags); struct drm_i915_gem_object * i915_gem_object_create_lmem(struct drm_i915_private *i915, resource_size_t size, unsigned int flags); -int __i915_gem_lmem_object_init(struct intel_memory_region *mem, - struct drm_i915_gem_object *obj, - resource_size_t size, - unsigned int flags); - #endif /* !__I915_GEM_LMEM_H */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 215326764606..5130e8ed9564 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -19,6 +19,7 @@ #include "i915_gem_mman.h" #include "i915_trace.h" #include "i915_user_extensions.h" +#include "i915_gem_ttm.h" #include "i915_vma.h" static inline bool @@ -624,6 +625,8 @@ mmap_offset_attach(struct drm_i915_gem_object *obj, struct i915_mmap_offset *mmo; int err; + GEM_BUG_ON(obj->ops->mmap_offset || obj->ops->mmap_ops); + mmo = lookup_mmo(obj, mmap_type); if (mmo) goto out; @@ -642,7 +645,8 @@ mmap_offset_attach(struct drm_i915_gem_object *obj, goto insert; /* Attempt to reap some mmap space from dead objects */ - err = intel_gt_retire_requests_timeout(&i915->gt, MAX_SCHEDULE_TIMEOUT); + err = intel_gt_retire_requests_timeout(&i915->gt, MAX_SCHEDULE_TIMEOUT, + NULL); if (err) goto err; @@ -666,40 +670,58 @@ err: } static int -__assign_mmap_offset(struct drm_file *file, - u32 handle, +__assign_mmap_offset(struct drm_i915_gem_object *obj, enum i915_mmap_type mmap_type, - u64 *offset) + u64 *offset, struct drm_file *file) { - struct drm_i915_gem_object *obj; struct i915_mmap_offset *mmo; - int err; - obj = i915_gem_object_lookup(file, handle); - if (!obj) - return -ENOENT; + if (i915_gem_object_never_mmap(obj)) + return -ENODEV; - if (i915_gem_object_never_mmap(obj)) { - err = -ENODEV; - goto out; + if (obj->ops->mmap_offset) { + if (mmap_type != I915_MMAP_TYPE_FIXED) + return -ENODEV; + + *offset = obj->ops->mmap_offset(obj); + return 0; } + if (mmap_type == I915_MMAP_TYPE_FIXED) + return -ENODEV; + if (mmap_type != I915_MMAP_TYPE_GTT && !i915_gem_object_has_struct_page(obj) && - !i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_IOMEM)) { - err = -ENODEV; - goto out; - } + !i915_gem_object_has_iomem(obj)) + return -ENODEV; mmo = mmap_offset_attach(obj, mmap_type, file); - if (IS_ERR(mmo)) { - err = PTR_ERR(mmo); - goto out; - } + if (IS_ERR(mmo)) + return PTR_ERR(mmo); *offset = drm_vma_node_offset_addr(&mmo->vma_node); - err = 0; -out: + return 0; +} + +static int +__assign_mmap_offset_handle(struct drm_file *file, + u32 handle, + enum i915_mmap_type mmap_type, + u64 *offset) +{ + struct drm_i915_gem_object *obj; + int err; + + obj = i915_gem_object_lookup(file, handle); + if (!obj) + return -ENOENT; + + err = i915_gem_object_lock_interruptible(obj, NULL); + if (err) + goto out_put; + err = __assign_mmap_offset(obj, mmap_type, offset, file); + i915_gem_object_unlock(obj); +out_put: i915_gem_object_put(obj); return err; } @@ -712,14 +734,16 @@ i915_gem_dumb_mmap_offset(struct drm_file *file, { enum i915_mmap_type mmap_type; - if (boot_cpu_has(X86_FEATURE_PAT)) + if (HAS_LMEM(to_i915(dev))) + mmap_type = I915_MMAP_TYPE_FIXED; + else if (boot_cpu_has(X86_FEATURE_PAT)) mmap_type = I915_MMAP_TYPE_WC; else if (!i915_ggtt_has_aperture(&to_i915(dev)->ggtt)) return -ENODEV; else mmap_type = I915_MMAP_TYPE_GTT; - return __assign_mmap_offset(file, handle, mmap_type, offset); + return __assign_mmap_offset_handle(file, handle, mmap_type, offset); } /** @@ -783,11 +807,15 @@ i915_gem_mmap_offset_ioctl(struct drm_device *dev, void *data, type = I915_MMAP_TYPE_UC; break; + case I915_MMAP_OFFSET_FIXED: + type = I915_MMAP_TYPE_FIXED; + break; + default: return -EINVAL; } - return __assign_mmap_offset(file, args->handle, type, &args->offset); + return __assign_mmap_offset_handle(file, args->handle, type, &args->offset); } static void vm_open(struct vm_area_struct *vma) @@ -891,8 +919,18 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma) * destroyed and will be invalid when the vma manager lock * is released. */ - mmo = container_of(node, struct i915_mmap_offset, vma_node); - obj = i915_gem_object_get_rcu(mmo->obj); + if (!node->driver_private) { + mmo = container_of(node, struct i915_mmap_offset, vma_node); + obj = i915_gem_object_get_rcu(mmo->obj); + + GEM_BUG_ON(obj && obj->ops->mmap_ops); + } else { + obj = i915_gem_object_get_rcu + (container_of(node, struct drm_i915_gem_object, + base.vma_node)); + + GEM_BUG_ON(obj && !obj->ops->mmap_ops); + } } drm_vma_offset_unlock_lookup(dev->vma_offset_manager); rcu_read_unlock(); @@ -913,8 +951,7 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma) return PTR_ERR(anon); } - vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; - vma->vm_private_data = mmo; + vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_IO; /* * We keep the ref on mmo->obj, not vm_file, but we require @@ -928,6 +965,15 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma) /* Drop the initial creation reference, the vma is now holding one. */ fput(anon); + if (obj->ops->mmap_ops) { + vma->vm_page_prot = pgprot_decrypted(vm_get_page_prot(vma->vm_flags)); + vma->vm_ops = obj->ops->mmap_ops; + vma->vm_private_data = node->driver_private; + return 0; + } + + vma->vm_private_data = mmo; + switch (mmo->mmap_type) { case I915_MMAP_TYPE_WC: vma->vm_page_prot = @@ -935,6 +981,9 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma) vma->vm_ops = &vm_ops_cpu; break; + case I915_MMAP_TYPE_FIXED: + GEM_WARN_ON(1); + fallthrough; case I915_MMAP_TYPE_WB: vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); vma->vm_ops = &vm_ops_cpu; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 5706d471692d..6fb9afb65034 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -30,14 +30,10 @@ #include "i915_gem_context.h" #include "i915_gem_mman.h" #include "i915_gem_object.h" -#include "i915_globals.h" #include "i915_memcpy.h" #include "i915_trace.h" -static struct i915_global_object { - struct i915_global base; - struct kmem_cache *slab_objects; -} global; +static struct kmem_cache *slab_objects; static const struct drm_gem_object_funcs i915_gem_object_funcs; @@ -45,7 +41,7 @@ struct drm_i915_gem_object *i915_gem_object_alloc(void) { struct drm_i915_gem_object *obj; - obj = kmem_cache_zalloc(global.slab_objects, GFP_KERNEL); + obj = kmem_cache_zalloc(slab_objects, GFP_KERNEL); if (!obj) return NULL; obj->base.funcs = &i915_gem_object_funcs; @@ -55,7 +51,7 @@ struct drm_i915_gem_object *i915_gem_object_alloc(void) void i915_gem_object_free(struct drm_i915_gem_object *obj) { - return kmem_cache_free(global.slab_objects, obj); + return kmem_cache_free(slab_objects, obj); } void i915_gem_object_init(struct drm_i915_gem_object *obj, @@ -172,7 +168,7 @@ static void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *f } } -static void __i915_gem_free_object_rcu(struct rcu_head *head) +void __i915_gem_free_object_rcu(struct rcu_head *head) { struct drm_i915_gem_object *obj = container_of(head, typeof(*obj), rcu); @@ -208,59 +204,69 @@ static void __i915_gem_object_free_mmaps(struct drm_i915_gem_object *obj) } } -static void __i915_gem_free_objects(struct drm_i915_private *i915, - struct llist_node *freed) +void __i915_gem_free_object(struct drm_i915_gem_object *obj) { - struct drm_i915_gem_object *obj, *on; + trace_i915_gem_object_destroy(obj); - llist_for_each_entry_safe(obj, on, freed, freed) { - trace_i915_gem_object_destroy(obj); + if (!list_empty(&obj->vma.list)) { + struct i915_vma *vma; + + /* + * Note that the vma keeps an object reference while + * it is active, so it *should* not sleep while we + * destroy it. Our debug code errs insits it *might*. + * For the moment, play along. + */ + spin_lock(&obj->vma.lock); + while ((vma = list_first_entry_or_null(&obj->vma.list, + struct i915_vma, + obj_link))) { + GEM_BUG_ON(vma->obj != obj); + spin_unlock(&obj->vma.lock); - if (!list_empty(&obj->vma.list)) { - struct i915_vma *vma; + __i915_vma_put(vma); - /* - * Note that the vma keeps an object reference while - * it is active, so it *should* not sleep while we - * destroy it. Our debug code errs insits it *might*. - * For the moment, play along. - */ spin_lock(&obj->vma.lock); - while ((vma = list_first_entry_or_null(&obj->vma.list, - struct i915_vma, - obj_link))) { - GEM_BUG_ON(vma->obj != obj); - spin_unlock(&obj->vma.lock); + } + spin_unlock(&obj->vma.lock); + } - __i915_vma_put(vma); + __i915_gem_object_free_mmaps(obj); - spin_lock(&obj->vma.lock); - } - spin_unlock(&obj->vma.lock); - } + GEM_BUG_ON(!list_empty(&obj->lut_list)); - __i915_gem_object_free_mmaps(obj); + atomic_set(&obj->mm.pages_pin_count, 0); + __i915_gem_object_put_pages(obj); + GEM_BUG_ON(i915_gem_object_has_pages(obj)); + bitmap_free(obj->bit_17); - GEM_BUG_ON(!list_empty(&obj->lut_list)); + if (obj->base.import_attach) + drm_prime_gem_destroy(&obj->base, NULL); - atomic_set(&obj->mm.pages_pin_count, 0); - __i915_gem_object_put_pages(obj); - GEM_BUG_ON(i915_gem_object_has_pages(obj)); - bitmap_free(obj->bit_17); + drm_gem_free_mmap_offset(&obj->base); - if (obj->base.import_attach) - drm_prime_gem_destroy(&obj->base, NULL); + if (obj->ops->release) + obj->ops->release(obj); - drm_gem_free_mmap_offset(&obj->base); + if (obj->mm.n_placements > 1) + kfree(obj->mm.placements); - if (obj->ops->release) - obj->ops->release(obj); + if (obj->shares_resv_from) + i915_vm_resv_put(obj->shares_resv_from); +} - if (obj->mm.n_placements > 1) - kfree(obj->mm.placements); +static void __i915_gem_free_objects(struct drm_i915_private *i915, + struct llist_node *freed) +{ + struct drm_i915_gem_object *obj, *on; - if (obj->shares_resv_from) - i915_vm_resv_put(obj->shares_resv_from); + llist_for_each_entry_safe(obj, on, freed, freed) { + might_sleep(); + if (obj->ops->delayed_free) { + obj->ops->delayed_free(obj); + continue; + } + __i915_gem_free_object(obj); /* But keep the pointer alive for RCU-protected lookups */ call_rcu(&obj->rcu, __i915_gem_free_object_rcu); @@ -318,6 +324,7 @@ static void i915_gem_free_object(struct drm_gem_object *gem_obj) * worker and performing frees directly from subsequent allocations for * crude but effective memory throttling. */ + if (llist_add(&obj->freed, &i915->mm.free_list)) queue_work(i915->wq, &i915->mm.free_work); } @@ -410,34 +417,254 @@ int i915_gem_object_read_from_page(struct drm_i915_gem_object *obj, u64 offset, return 0; } -void i915_gem_init__objects(struct drm_i915_private *i915) +/** + * i915_gem_object_evictable - Whether object is likely evictable after unbind. + * @obj: The object to check + * + * This function checks whether the object is likely unvictable after unbind. + * If the object is not locked when checking, the result is only advisory. + * If the object is locked when checking, and the function returns true, + * then an eviction should indeed be possible. But since unlocked vma + * unpinning and unbinding is currently possible, the object can actually + * become evictable even if this function returns false. + * + * Return: true if the object may be evictable. False otherwise. + */ +bool i915_gem_object_evictable(struct drm_i915_gem_object *obj) { - INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); + struct i915_vma *vma; + int pin_count = atomic_read(&obj->mm.pages_pin_count); + + if (!pin_count) + return true; + + spin_lock(&obj->vma.lock); + list_for_each_entry(vma, &obj->vma.list, obj_link) { + if (i915_vma_is_pinned(vma)) { + spin_unlock(&obj->vma.lock); + return false; + } + if (atomic_read(&vma->pages_count)) + pin_count--; + } + spin_unlock(&obj->vma.lock); + GEM_WARN_ON(pin_count < 0); + + return pin_count == 0; +} + +/** + * i915_gem_object_migratable - Whether the object is migratable out of the + * current region. + * @obj: Pointer to the object. + * + * Return: Whether the object is allowed to be resident in other + * regions than the current while pages are present. + */ +bool i915_gem_object_migratable(struct drm_i915_gem_object *obj) +{ + struct intel_memory_region *mr = READ_ONCE(obj->mm.region); + + if (!mr) + return false; + + return obj->mm.n_placements > 1; } -static void i915_global_objects_shrink(void) +/** + * i915_gem_object_has_struct_page - Whether the object is page-backed + * @obj: The object to query. + * + * This function should only be called while the object is locked or pinned, + * otherwise the page backing may change under the caller. + * + * Return: True if page-backed, false otherwise. + */ +bool i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) { - kmem_cache_shrink(global.slab_objects); +#ifdef CONFIG_LOCKDEP + if (IS_DGFX(to_i915(obj->base.dev)) && + i915_gem_object_evictable((void __force *)obj)) + assert_object_held_shared(obj); +#endif + return obj->mem_flags & I915_BO_FLAG_STRUCT_PAGE; } -static void i915_global_objects_exit(void) +/** + * i915_gem_object_has_iomem - Whether the object is iomem-backed + * @obj: The object to query. + * + * This function should only be called while the object is locked or pinned, + * otherwise the iomem backing may change under the caller. + * + * Return: True if iomem-backed, false otherwise. + */ +bool i915_gem_object_has_iomem(const struct drm_i915_gem_object *obj) { - kmem_cache_destroy(global.slab_objects); +#ifdef CONFIG_LOCKDEP + if (IS_DGFX(to_i915(obj->base.dev)) && + i915_gem_object_evictable((void __force *)obj)) + assert_object_held_shared(obj); +#endif + return obj->mem_flags & I915_BO_FLAG_IOMEM; } -static struct i915_global_object global = { { - .shrink = i915_global_objects_shrink, - .exit = i915_global_objects_exit, -} }; +/** + * i915_gem_object_can_migrate - Whether an object likely can be migrated + * + * @obj: The object to migrate + * @id: The region intended to migrate to + * + * Check whether the object backend supports migration to the + * given region. Note that pinning may affect the ability to migrate as + * returned by this function. + * + * This function is primarily intended as a helper for checking the + * possibility to migrate objects and might be slightly less permissive + * than i915_gem_object_migrate() when it comes to objects with the + * I915_BO_ALLOC_USER flag set. + * + * Return: true if migration is possible, false otherwise. + */ +bool i915_gem_object_can_migrate(struct drm_i915_gem_object *obj, + enum intel_region_id id) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + unsigned int num_allowed = obj->mm.n_placements; + struct intel_memory_region *mr; + unsigned int i; + + GEM_BUG_ON(id >= INTEL_REGION_UNKNOWN); + GEM_BUG_ON(obj->mm.madv != I915_MADV_WILLNEED); + + mr = i915->mm.regions[id]; + if (!mr) + return false; + + if (obj->mm.region == mr) + return true; + + if (!i915_gem_object_evictable(obj)) + return false; + + if (!obj->ops->migrate) + return false; + + if (!(obj->flags & I915_BO_ALLOC_USER)) + return true; + + if (num_allowed == 0) + return false; + + for (i = 0; i < num_allowed; ++i) { + if (mr == obj->mm.placements[i]) + return true; + } + + return false; +} + +/** + * i915_gem_object_migrate - Migrate an object to the desired region id + * @obj: The object to migrate. + * @ww: An optional struct i915_gem_ww_ctx. If NULL, the backend may + * not be successful in evicting other objects to make room for this object. + * @id: The region id to migrate to. + * + * Attempt to migrate the object to the desired memory region. The + * object backend must support migration and the object may not be + * pinned, (explicitly pinned pages or pinned vmas). The object must + * be locked. + * On successful completion, the object will have pages pointing to + * memory in the new region, but an async migration task may not have + * completed yet, and to accomplish that, i915_gem_object_wait_migration() + * must be called. + * + * Note: the @ww parameter is not used yet, but included to make sure + * callers put some effort into obtaining a valid ww ctx if one is + * available. + * + * Return: 0 on success. Negative error code on failure. In particular may + * return -ENXIO on lack of region space, -EDEADLK for deadlock avoidance + * if @ww is set, -EINTR or -ERESTARTSYS if signal pending, and + * -EBUSY if the object is pinned. + */ +int i915_gem_object_migrate(struct drm_i915_gem_object *obj, + struct i915_gem_ww_ctx *ww, + enum intel_region_id id) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct intel_memory_region *mr; + + GEM_BUG_ON(id >= INTEL_REGION_UNKNOWN); + GEM_BUG_ON(obj->mm.madv != I915_MADV_WILLNEED); + assert_object_held(obj); + + mr = i915->mm.regions[id]; + GEM_BUG_ON(!mr); + + if (!i915_gem_object_can_migrate(obj, id)) + return -EINVAL; + + if (!obj->ops->migrate) { + if (GEM_WARN_ON(obj->mm.region != mr)) + return -EINVAL; + return 0; + } + + return obj->ops->migrate(obj, mr); +} + +/** + * i915_gem_object_placement_possible - Check whether the object can be + * placed at certain memory type + * @obj: Pointer to the object + * @type: The memory type to check + * + * Return: True if the object can be placed in @type. False otherwise. + */ +bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj, + enum intel_memory_type type) +{ + unsigned int i; + + if (!obj->mm.n_placements) { + switch (type) { + case INTEL_MEMORY_LOCAL: + return i915_gem_object_has_iomem(obj); + case INTEL_MEMORY_SYSTEM: + return i915_gem_object_has_pages(obj); + default: + /* Ignore stolen for now */ + GEM_BUG_ON(1); + return false; + } + } + + for (i = 0; i < obj->mm.n_placements; i++) { + if (obj->mm.placements[i]->type == type) + return true; + } + + return false; +} + +void i915_gem_init__objects(struct drm_i915_private *i915) +{ + INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); +} + +void i915_objects_module_exit(void) +{ + kmem_cache_destroy(slab_objects); +} -int __init i915_global_objects_init(void) +int __init i915_objects_module_init(void) { - global.slab_objects = - KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN); - if (!global.slab_objects) + slab_objects = KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN); + if (!slab_objects) return -ENOMEM; - i915_global_register(&global.base); return 0; } @@ -450,6 +677,7 @@ static const struct drm_gem_object_funcs i915_gem_object_funcs = { #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/huge_gem_object.c" #include "selftests/huge_pages.c" +#include "selftests/i915_gem_migrate.c" #include "selftests/i915_gem_object.c" #include "selftests/i915_gem_coherency.c" #endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 7c0eb425cb3b..48112b9d76df 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -12,10 +12,14 @@ #include <drm/drm_device.h> #include "display/intel_frontbuffer.h" +#include "intel_memory_region.h" #include "i915_gem_object_types.h" #include "i915_gem_gtt.h" +#include "i915_gem_ww.h" #include "i915_vma_types.h" +enum intel_region_id; + /* * XXX: There is a prevalence of the assumption that we fit the * object's page count inside a 32bit _signed_ variable. Let's document @@ -44,6 +48,9 @@ static inline bool i915_gem_object_size_2big(u64 size) void i915_gem_init__objects(struct drm_i915_private *i915); +void i915_objects_module_exit(void); +int i915_objects_module_init(void); + struct drm_i915_gem_object *i915_gem_object_alloc(void); void i915_gem_object_free(struct drm_i915_gem_object *obj); @@ -57,6 +64,10 @@ i915_gem_object_create_shmem(struct drm_i915_private *i915, struct drm_i915_gem_object * i915_gem_object_create_shmem_from_data(struct drm_i915_private *i915, const void *data, resource_size_t size); +struct drm_i915_gem_object * +__i915_gem_object_create_user(struct drm_i915_private *i915, u64 size, + struct intel_memory_region **placements, + unsigned int n_placements); extern const struct drm_i915_gem_object_ops i915_gem_shmem_ops; @@ -147,7 +158,7 @@ i915_gem_object_put(struct drm_i915_gem_object *obj) /* * If more than one potential simultaneous locker, assert held. */ -static inline void assert_object_held_shared(struct drm_i915_gem_object *obj) +static inline void assert_object_held_shared(const struct drm_i915_gem_object *obj) { /* * Note mm list lookup is protected by @@ -169,13 +180,17 @@ static inline int __i915_gem_object_lock(struct drm_i915_gem_object *obj, else ret = dma_resv_lock(obj->base.resv, ww ? &ww->ctx : NULL); - if (!ret && ww) + if (!ret && ww) { + i915_gem_object_get(obj); list_add_tail(&obj->obj_link, &ww->obj_list); + } if (ret == -EALREADY) ret = 0; - if (ret == -EDEADLK) + if (ret == -EDEADLK) { + i915_gem_object_get(obj); ww->contended = obj; + } return ret; } @@ -200,6 +215,9 @@ static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj) static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj) { + if (obj->ops->adjust_lru) + obj->ops->adjust_lru(obj); + dma_resv_unlock(obj->base.resv); } @@ -258,17 +276,9 @@ i915_gem_object_type_has(const struct drm_i915_gem_object *obj, return obj->ops->flags & flags; } -static inline bool -i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) -{ - return obj->flags & I915_BO_ALLOC_STRUCT_PAGE; -} +bool i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj); -static inline bool -i915_gem_object_has_iomem(const struct drm_i915_gem_object *obj) -{ - return i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_IOMEM); -} +bool i915_gem_object_has_iomem(const struct drm_i915_gem_object *obj); static inline bool i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj) @@ -339,22 +349,22 @@ struct scatterlist * __i915_gem_object_get_sg(struct drm_i915_gem_object *obj, struct i915_gem_object_page_iter *iter, unsigned int n, - unsigned int *offset, bool allow_alloc); + unsigned int *offset, bool dma); static inline struct scatterlist * i915_gem_object_get_sg(struct drm_i915_gem_object *obj, unsigned int n, - unsigned int *offset, bool allow_alloc) + unsigned int *offset) { - return __i915_gem_object_get_sg(obj, &obj->mm.get_page, n, offset, allow_alloc); + return __i915_gem_object_get_sg(obj, &obj->mm.get_page, n, offset, false); } static inline struct scatterlist * i915_gem_object_get_sg_dma(struct drm_i915_gem_object *obj, unsigned int n, - unsigned int *offset, bool allow_alloc) + unsigned int *offset) { - return __i915_gem_object_get_sg(obj, &obj->mm.get_dma_page, n, offset, allow_alloc); + return __i915_gem_object_get_sg(obj, &obj->mm.get_dma_page, n, offset, true); } struct page * @@ -587,6 +597,27 @@ int i915_gem_object_read_from_page(struct drm_i915_gem_object *obj, u64 offset, bool i915_gem_object_is_shmem(const struct drm_i915_gem_object *obj); +void __i915_gem_free_object_rcu(struct rcu_head *head); + +void __i915_gem_free_object(struct drm_i915_gem_object *obj); + +bool i915_gem_object_evictable(struct drm_i915_gem_object *obj); + +bool i915_gem_object_migratable(struct drm_i915_gem_object *obj); + +int i915_gem_object_migrate(struct drm_i915_gem_object *obj, + struct i915_gem_ww_ctx *ww, + enum intel_region_id id); + +bool i915_gem_object_can_migrate(struct drm_i915_gem_object *obj, + enum intel_region_id id); + +int i915_gem_object_wait_migration(struct drm_i915_gem_object *obj, + unsigned int flags); + +bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj, + enum intel_memory_type type); + #ifdef CONFIG_MMU_NOTIFIER static inline bool i915_gem_object_is_userptr(struct drm_i915_gem_object *obj) @@ -596,14 +627,12 @@ i915_gem_object_is_userptr(struct drm_i915_gem_object *obj) int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj); int i915_gem_object_userptr_submit_done(struct drm_i915_gem_object *obj); -void i915_gem_object_userptr_submit_fini(struct drm_i915_gem_object *obj); int i915_gem_object_userptr_validate(struct drm_i915_gem_object *obj); #else static inline bool i915_gem_object_is_userptr(struct drm_i915_gem_object *obj) { return false; } static inline int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); return -ENODEV; } static inline int i915_gem_object_userptr_submit_done(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); return -ENODEV; } -static inline void i915_gem_object_userptr_submit_fini(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); } static inline int i915_gem_object_userptr_validate(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); return -ENODEV; } #endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c deleted file mode 100644 index 3e28c68fda3e..000000000000 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c +++ /dev/null @@ -1,461 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2019 Intel Corporation - */ - -#include "i915_drv.h" -#include "gt/intel_context.h" -#include "gt/intel_engine_pm.h" -#include "gt/intel_gpu_commands.h" -#include "gt/intel_gt.h" -#include "gt/intel_gt_buffer_pool.h" -#include "gt/intel_ring.h" -#include "i915_gem_clflush.h" -#include "i915_gem_object_blt.h" - -struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, - struct i915_vma *vma, - struct i915_gem_ww_ctx *ww, - u32 value) -{ - struct drm_i915_private *i915 = ce->vm->i915; - const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */ - struct intel_gt_buffer_pool_node *pool; - struct i915_vma *batch; - u64 offset; - u64 count; - u64 rem; - u32 size; - u32 *cmd; - int err; - - GEM_BUG_ON(intel_engine_is_virtual(ce->engine)); - intel_engine_pm_get(ce->engine); - - count = div_u64(round_up(vma->size, block_size), block_size); - size = (1 + 8 * count) * sizeof(u32); - size = round_up(size, PAGE_SIZE); - pool = intel_gt_get_buffer_pool(ce->engine->gt, size, I915_MAP_WC); - if (IS_ERR(pool)) { - err = PTR_ERR(pool); - goto out_pm; - } - - err = i915_gem_object_lock(pool->obj, ww); - if (err) - goto out_put; - - batch = i915_vma_instance(pool->obj, ce->vm, NULL); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto out_put; - } - - err = i915_vma_pin_ww(batch, ww, 0, 0, PIN_USER); - if (unlikely(err)) - goto out_put; - - /* we pinned the pool, mark it as such */ - intel_gt_buffer_pool_mark_used(pool); - - cmd = i915_gem_object_pin_map(pool->obj, pool->type); - if (IS_ERR(cmd)) { - err = PTR_ERR(cmd); - goto out_unpin; - } - - rem = vma->size; - offset = vma->node.start; - - do { - u32 size = min_t(u64, rem, block_size); - - GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX); - - if (GRAPHICS_VER(i915) >= 8) { - *cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2); - *cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; - *cmd++ = 0; - *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; - *cmd++ = lower_32_bits(offset); - *cmd++ = upper_32_bits(offset); - *cmd++ = value; - } else { - *cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); - *cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; - *cmd++ = 0; - *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; - *cmd++ = offset; - *cmd++ = value; - } - - /* Allow ourselves to be preempted in between blocks. */ - *cmd++ = MI_ARB_CHECK; - - offset += size; - rem -= size; - } while (rem); - - *cmd = MI_BATCH_BUFFER_END; - - i915_gem_object_flush_map(pool->obj); - i915_gem_object_unpin_map(pool->obj); - - intel_gt_chipset_flush(ce->vm->gt); - - batch->private = pool; - return batch; - -out_unpin: - i915_vma_unpin(batch); -out_put: - intel_gt_buffer_pool_put(pool); -out_pm: - intel_engine_pm_put(ce->engine); - return ERR_PTR(err); -} - -int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq) -{ - int err; - - err = i915_request_await_object(rq, vma->obj, false); - if (err == 0) - err = i915_vma_move_to_active(vma, rq, 0); - if (unlikely(err)) - return err; - - return intel_gt_buffer_pool_mark_active(vma->private, rq); -} - -void intel_emit_vma_release(struct intel_context *ce, struct i915_vma *vma) -{ - i915_vma_unpin(vma); - intel_gt_buffer_pool_put(vma->private); - intel_engine_pm_put(ce->engine); -} - -static int -move_obj_to_gpu(struct drm_i915_gem_object *obj, - struct i915_request *rq, - bool write) -{ - if (obj->cache_dirty & ~obj->cache_coherent) - i915_gem_clflush_object(obj, 0); - - return i915_request_await_object(rq, obj, write); -} - -int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj, - struct intel_context *ce, - u32 value) -{ - struct i915_gem_ww_ctx ww; - struct i915_request *rq; - struct i915_vma *batch; - struct i915_vma *vma; - int err; - - vma = i915_vma_instance(obj, ce->vm, NULL); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - i915_gem_ww_ctx_init(&ww, true); - intel_engine_pm_get(ce->engine); -retry: - err = i915_gem_object_lock(obj, &ww); - if (err) - goto out; - - err = intel_context_pin_ww(ce, &ww); - if (err) - goto out; - - err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER); - if (err) - goto out_ctx; - - batch = intel_emit_vma_fill_blt(ce, vma, &ww, value); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto out_vma; - } - - rq = i915_request_create(ce); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto out_batch; - } - - err = intel_emit_vma_mark_active(batch, rq); - if (unlikely(err)) - goto out_request; - - err = move_obj_to_gpu(vma->obj, rq, true); - if (err == 0) - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - if (unlikely(err)) - goto out_request; - - if (ce->engine->emit_init_breadcrumb) - err = ce->engine->emit_init_breadcrumb(rq); - - if (likely(!err)) - err = ce->engine->emit_bb_start(rq, - batch->node.start, - batch->node.size, - 0); -out_request: - if (unlikely(err)) - i915_request_set_error_once(rq, err); - - i915_request_add(rq); -out_batch: - intel_emit_vma_release(ce, batch); -out_vma: - i915_vma_unpin(vma); -out_ctx: - intel_context_unpin(ce); -out: - if (err == -EDEADLK) { - err = i915_gem_ww_ctx_backoff(&ww); - if (!err) - goto retry; - } - i915_gem_ww_ctx_fini(&ww); - intel_engine_pm_put(ce->engine); - return err; -} - -/* Wa_1209644611:icl,ehl */ -static bool wa_1209644611_applies(struct drm_i915_private *i915, u32 size) -{ - u32 height = size >> PAGE_SHIFT; - - if (GRAPHICS_VER(i915) != 11) - return false; - - return height % 4 == 3 && height <= 8; -} - -struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, - struct i915_gem_ww_ctx *ww, - struct i915_vma *src, - struct i915_vma *dst) -{ - struct drm_i915_private *i915 = ce->vm->i915; - const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */ - struct intel_gt_buffer_pool_node *pool; - struct i915_vma *batch; - u64 src_offset, dst_offset; - u64 count, rem; - u32 size, *cmd; - int err; - - GEM_BUG_ON(src->size != dst->size); - - GEM_BUG_ON(intel_engine_is_virtual(ce->engine)); - intel_engine_pm_get(ce->engine); - - count = div_u64(round_up(dst->size, block_size), block_size); - size = (1 + 11 * count) * sizeof(u32); - size = round_up(size, PAGE_SIZE); - pool = intel_gt_get_buffer_pool(ce->engine->gt, size, I915_MAP_WC); - if (IS_ERR(pool)) { - err = PTR_ERR(pool); - goto out_pm; - } - - err = i915_gem_object_lock(pool->obj, ww); - if (err) - goto out_put; - - batch = i915_vma_instance(pool->obj, ce->vm, NULL); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto out_put; - } - - err = i915_vma_pin_ww(batch, ww, 0, 0, PIN_USER); - if (unlikely(err)) - goto out_put; - - /* we pinned the pool, mark it as such */ - intel_gt_buffer_pool_mark_used(pool); - - cmd = i915_gem_object_pin_map(pool->obj, pool->type); - if (IS_ERR(cmd)) { - err = PTR_ERR(cmd); - goto out_unpin; - } - - rem = src->size; - src_offset = src->node.start; - dst_offset = dst->node.start; - - do { - size = min_t(u64, rem, block_size); - GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX); - - if (GRAPHICS_VER(i915) >= 9 && - !wa_1209644611_applies(i915, size)) { - *cmd++ = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2); - *cmd++ = BLT_DEPTH_32 | PAGE_SIZE; - *cmd++ = 0; - *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; - *cmd++ = lower_32_bits(dst_offset); - *cmd++ = upper_32_bits(dst_offset); - *cmd++ = 0; - *cmd++ = PAGE_SIZE; - *cmd++ = lower_32_bits(src_offset); - *cmd++ = upper_32_bits(src_offset); - } else if (GRAPHICS_VER(i915) >= 8) { - *cmd++ = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (10 - 2); - *cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE; - *cmd++ = 0; - *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; - *cmd++ = lower_32_bits(dst_offset); - *cmd++ = upper_32_bits(dst_offset); - *cmd++ = 0; - *cmd++ = PAGE_SIZE; - *cmd++ = lower_32_bits(src_offset); - *cmd++ = upper_32_bits(src_offset); - } else { - *cmd++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); - *cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE; - *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE; - *cmd++ = dst_offset; - *cmd++ = PAGE_SIZE; - *cmd++ = src_offset; - } - - /* Allow ourselves to be preempted in between blocks. */ - *cmd++ = MI_ARB_CHECK; - - src_offset += size; - dst_offset += size; - rem -= size; - } while (rem); - - *cmd = MI_BATCH_BUFFER_END; - - i915_gem_object_flush_map(pool->obj); - i915_gem_object_unpin_map(pool->obj); - - intel_gt_chipset_flush(ce->vm->gt); - batch->private = pool; - return batch; - -out_unpin: - i915_vma_unpin(batch); -out_put: - intel_gt_buffer_pool_put(pool); -out_pm: - intel_engine_pm_put(ce->engine); - return ERR_PTR(err); -} - -int i915_gem_object_copy_blt(struct drm_i915_gem_object *src, - struct drm_i915_gem_object *dst, - struct intel_context *ce) -{ - struct i915_address_space *vm = ce->vm; - struct i915_vma *vma[2], *batch; - struct i915_gem_ww_ctx ww; - struct i915_request *rq; - int err, i; - - vma[0] = i915_vma_instance(src, vm, NULL); - if (IS_ERR(vma[0])) - return PTR_ERR(vma[0]); - - vma[1] = i915_vma_instance(dst, vm, NULL); - if (IS_ERR(vma[1])) - return PTR_ERR(vma[1]); - - i915_gem_ww_ctx_init(&ww, true); - intel_engine_pm_get(ce->engine); -retry: - err = i915_gem_object_lock(src, &ww); - if (!err) - err = i915_gem_object_lock(dst, &ww); - if (!err) - err = intel_context_pin_ww(ce, &ww); - if (err) - goto out; - - err = i915_vma_pin_ww(vma[0], &ww, 0, 0, PIN_USER); - if (err) - goto out_ctx; - - err = i915_vma_pin_ww(vma[1], &ww, 0, 0, PIN_USER); - if (unlikely(err)) - goto out_unpin_src; - - batch = intel_emit_vma_copy_blt(ce, &ww, vma[0], vma[1]); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto out_unpin_dst; - } - - rq = i915_request_create(ce); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto out_batch; - } - - err = intel_emit_vma_mark_active(batch, rq); - if (unlikely(err)) - goto out_request; - - for (i = 0; i < ARRAY_SIZE(vma); i++) { - err = move_obj_to_gpu(vma[i]->obj, rq, i); - if (unlikely(err)) - goto out_request; - } - - for (i = 0; i < ARRAY_SIZE(vma); i++) { - unsigned int flags = i ? EXEC_OBJECT_WRITE : 0; - - err = i915_vma_move_to_active(vma[i], rq, flags); - if (unlikely(err)) - goto out_request; - } - - if (rq->engine->emit_init_breadcrumb) { - err = rq->engine->emit_init_breadcrumb(rq); - if (unlikely(err)) - goto out_request; - } - - err = rq->engine->emit_bb_start(rq, - batch->node.start, batch->node.size, - 0); - -out_request: - if (unlikely(err)) - i915_request_set_error_once(rq, err); - - i915_request_add(rq); -out_batch: - intel_emit_vma_release(ce, batch); -out_unpin_dst: - i915_vma_unpin(vma[1]); -out_unpin_src: - i915_vma_unpin(vma[0]); -out_ctx: - intel_context_unpin(ce); -out: - if (err == -EDEADLK) { - err = i915_gem_ww_ctx_backoff(&ww); - if (!err) - goto retry; - } - i915_gem_ww_ctx_fini(&ww); - intel_engine_pm_put(ce->engine); - return err; -} - -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/i915_gem_object_blt.c" -#endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h deleted file mode 100644 index 2409fdcccf0e..000000000000 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h +++ /dev/null @@ -1,39 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2019 Intel Corporation - */ - -#ifndef __I915_GEM_OBJECT_BLT_H__ -#define __I915_GEM_OBJECT_BLT_H__ - -#include <linux/types.h> - -#include "gt/intel_context.h" -#include "gt/intel_engine_pm.h" -#include "i915_vma.h" - -struct drm_i915_gem_object; -struct i915_gem_ww_ctx; - -struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, - struct i915_vma *vma, - struct i915_gem_ww_ctx *ww, - u32 value); - -struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, - struct i915_gem_ww_ctx *ww, - struct i915_vma *src, - struct i915_vma *dst); - -int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq); -void intel_emit_vma_release(struct intel_context *ce, struct i915_vma *vma); - -int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj, - struct intel_context *ce, - u32 value); - -int i915_gem_object_copy_blt(struct drm_i915_gem_object *src, - struct drm_i915_gem_object *dst, - struct intel_context *ce); - -#endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index d047ea126029..2471f36aaff3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -18,6 +18,7 @@ struct drm_i915_gem_object; struct intel_fronbuffer; +struct intel_memory_region; /* * struct i915_lut_handle tracks the fast lookups from handle to vma used @@ -33,10 +34,9 @@ struct i915_lut_handle { struct drm_i915_gem_object_ops { unsigned int flags; -#define I915_GEM_OBJECT_HAS_IOMEM BIT(1) -#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(2) -#define I915_GEM_OBJECT_IS_PROXY BIT(3) -#define I915_GEM_OBJECT_NO_MMAP BIT(4) +#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1) +#define I915_GEM_OBJECT_IS_PROXY BIT(2) +#define I915_GEM_OBJECT_NO_MMAP BIT(3) /* Interface between the GEM object and its backing storage. * get_pages() is called once prior to the use of the associated set @@ -61,13 +61,117 @@ struct drm_i915_gem_object_ops { const struct drm_i915_gem_pread *arg); int (*pwrite)(struct drm_i915_gem_object *obj, const struct drm_i915_gem_pwrite *arg); + u64 (*mmap_offset)(struct drm_i915_gem_object *obj); int (*dmabuf_export)(struct drm_i915_gem_object *obj); + + /** + * adjust_lru - notify that the madvise value was updated + * @obj: The gem object + * + * The madvise value may have been updated, or object was recently + * referenced so act accordingly (Perhaps changing an LRU list etc). + */ + void (*adjust_lru)(struct drm_i915_gem_object *obj); + + /** + * delayed_free - Override the default delayed free implementation + */ + void (*delayed_free)(struct drm_i915_gem_object *obj); + + /** + * migrate - Migrate object to a different region either for + * pinning or for as long as the object lock is held. + */ + int (*migrate)(struct drm_i915_gem_object *obj, + struct intel_memory_region *mr); + void (*release)(struct drm_i915_gem_object *obj); + const struct vm_operations_struct *mmap_ops; const char *name; /* friendly name for debug, e.g. lockdep classes */ }; +/** + * enum i915_cache_level - The supported GTT caching values for system memory + * pages. + * + * These translate to some special GTT PTE bits when binding pages into some + * address space. It also determines whether an object, or rather its pages are + * coherent with the GPU, when also reading or writing through the CPU cache + * with those pages. + * + * Userspace can also control this through struct drm_i915_gem_caching. + */ +enum i915_cache_level { + /** + * @I915_CACHE_NONE: + * + * GPU access is not coherent with the CPU cache. If the cache is dirty + * and we need the underlying pages to be coherent with some later GPU + * access then we need to manually flush the pages. + * + * On shared LLC platforms reads and writes through the CPU cache are + * still coherent even with this setting. See also + * &drm_i915_gem_object.cache_coherent for more details. Due to this we + * should only ever use uncached for scanout surfaces, otherwise we end + * up over-flushing in some places. + * + * This is the default on non-LLC platforms. + */ + I915_CACHE_NONE = 0, + /** + * @I915_CACHE_LLC: + * + * GPU access is coherent with the CPU cache. If the cache is dirty, + * then the GPU will ensure that access remains coherent, when both + * reading and writing through the CPU cache. GPU writes can dirty the + * CPU cache. + * + * Not used for scanout surfaces. + * + * Applies to both platforms with shared LLC(HAS_LLC), and snooping + * based platforms(HAS_SNOOP). + * + * This is the default on shared LLC platforms. The only exception is + * scanout objects, where the display engine is not coherent with the + * CPU cache. For such objects I915_CACHE_NONE or I915_CACHE_WT is + * automatically applied by the kernel in pin_for_display, if userspace + * has not done so already. + */ + I915_CACHE_LLC, + /** + * @I915_CACHE_L3_LLC: + * + * Explicitly enable the Gfx L3 cache, with coherent LLC. + * + * The Gfx L3 sits between the domain specific caches, e.g + * sampler/render caches, and the larger LLC. LLC is coherent with the + * GPU, but L3 is only visible to the GPU, so likely needs to be flushed + * when the workload completes. + * + * Not used for scanout surfaces. + * + * Only exposed on some gen7 + GGTT. More recent hardware has dropped + * this explicit setting, where it should now be enabled by default. + */ + I915_CACHE_L3_LLC, + /** + * @I915_CACHE_WT: + * + * Write-through. Used for scanout surfaces. + * + * The GPU can utilise the caches, while still having the display engine + * be coherent with GPU writes, as a result we don't need to flush the + * CPU caches when moving out of the render domain. This is the default + * setting chosen by the kernel, if supported by the HW, otherwise we + * fallback to I915_CACHE_NONE. On the CPU side writes through the CPU + * cache still need to be flushed, to remain coherent with the display + * engine. + */ + I915_CACHE_WT, +}; + enum i915_map_type { I915_MAP_WB = 0, I915_MAP_WC, @@ -81,6 +185,7 @@ enum i915_mmap_type { I915_MMAP_TYPE_WC, I915_MMAP_TYPE_WB, I915_MMAP_TYPE_UC, + I915_MMAP_TYPE_FIXED, }; struct i915_mmap_offset { @@ -185,23 +290,138 @@ struct drm_i915_gem_object { unsigned long flags; #define I915_BO_ALLOC_CONTIGUOUS BIT(0) #define I915_BO_ALLOC_VOLATILE BIT(1) -#define I915_BO_ALLOC_STRUCT_PAGE BIT(2) -#define I915_BO_ALLOC_CPU_CLEAR BIT(3) +#define I915_BO_ALLOC_CPU_CLEAR BIT(2) +#define I915_BO_ALLOC_USER BIT(3) #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \ I915_BO_ALLOC_VOLATILE | \ - I915_BO_ALLOC_STRUCT_PAGE | \ - I915_BO_ALLOC_CPU_CLEAR) + I915_BO_ALLOC_CPU_CLEAR | \ + I915_BO_ALLOC_USER) #define I915_BO_READONLY BIT(4) #define I915_TILING_QUIRK_BIT 5 /* unknown swizzling; do not release! */ - /* - * Is the object to be mapped as read-only to the GPU - * Only honoured if hardware has relevant pte bit + /** + * @mem_flags - Mutable placement-related flags + * + * These are flags that indicate specifics of the memory region + * the object is currently in. As such they are only stable + * either under the object lock or if the object is pinned. + */ + unsigned int mem_flags; +#define I915_BO_FLAG_STRUCT_PAGE BIT(0) /* Object backed by struct pages */ +#define I915_BO_FLAG_IOMEM BIT(1) /* Object backed by IO memory */ + /** + * @cache_level: The desired GTT caching level. + * + * See enum i915_cache_level for possible values, along with what + * each does. */ unsigned int cache_level:3; - unsigned int cache_coherent:2; + /** + * @cache_coherent: + * + * Track whether the pages are coherent with the GPU if reading or + * writing through the CPU caches. The largely depends on the + * @cache_level setting. + * + * On platforms which don't have the shared LLC(HAS_SNOOP), like on Atom + * platforms, coherency must be explicitly requested with some special + * GTT caching bits(see enum i915_cache_level). When enabling coherency + * it does come at a performance and power cost on such platforms. On + * the flip side the kernel does not need to manually flush any buffers + * which need to be coherent with the GPU, if the object is not coherent + * i.e @cache_coherent is zero. + * + * On platforms that share the LLC with the CPU(HAS_LLC), all GT memory + * access will automatically snoop the CPU caches(even with CACHE_NONE). + * The one exception is when dealing with the display engine, like with + * scanout surfaces. To handle this the kernel will always flush the + * surface out of the CPU caches when preparing it for scanout. Also + * note that since scanout surfaces are only ever read by the display + * engine we only need to care about flushing any writes through the CPU + * cache, reads on the other hand will always be coherent. + * + * Something strange here is why @cache_coherent is not a simple + * boolean, i.e coherent vs non-coherent. The reasoning for this is back + * to the display engine not being fully coherent. As a result scanout + * surfaces will either be marked as I915_CACHE_NONE or I915_CACHE_WT. + * In the case of seeing I915_CACHE_NONE the kernel makes the assumption + * that this is likely a scanout surface, and will set @cache_coherent + * as only I915_BO_CACHE_COHERENT_FOR_READ, on platforms with the shared + * LLC. The kernel uses this to always flush writes through the CPU + * cache as early as possible, where it can, in effect keeping + * @cache_dirty clean, so we can potentially avoid stalling when + * flushing the surface just before doing the scanout. This does mean + * we might unnecessarily flush non-scanout objects in some places, but + * the default assumption is that all normal objects should be using + * I915_CACHE_LLC, at least on platforms with the shared LLC. + * + * Supported values: + * + * I915_BO_CACHE_COHERENT_FOR_READ: + * + * On shared LLC platforms, we use this for special scanout surfaces, + * where the display engine is not coherent with the CPU cache. As such + * we need to ensure we flush any writes before doing the scanout. As an + * optimisation we try to flush any writes as early as possible to avoid + * stalling later. + * + * Thus for scanout surfaces using I915_CACHE_NONE, on shared LLC + * platforms, we use: + * + * cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ + * + * While for normal objects that are fully coherent, including special + * scanout surfaces marked as I915_CACHE_WT, we use: + * + * cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ | + * I915_BO_CACHE_COHERENT_FOR_WRITE + * + * And then for objects that are not coherent at all we use: + * + * cache_coherent = 0 + * + * I915_BO_CACHE_COHERENT_FOR_WRITE: + * + * When writing through the CPU cache, the GPU is still coherent. Note + * that this also implies I915_BO_CACHE_COHERENT_FOR_READ. + */ #define I915_BO_CACHE_COHERENT_FOR_READ BIT(0) #define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1) + unsigned int cache_coherent:2; + + /** + * @cache_dirty: + * + * Track if we are we dirty with writes through the CPU cache for this + * object. As a result reading directly from main memory might yield + * stale data. + * + * This also ties into whether the kernel is tracking the object as + * coherent with the GPU, as per @cache_coherent, as it determines if + * flushing might be needed at various points. + * + * Another part of @cache_dirty is managing flushing when first + * acquiring the pages for system memory, at this point the pages are + * considered foreign, so the default assumption is that the cache is + * dirty, for example the page zeroing done by the kernel might leave + * writes though the CPU cache, or swapping-in, while the actual data in + * main memory is potentially stale. Note that this is a potential + * security issue when dealing with userspace objects and zeroing. Now, + * whether we actually need apply the big sledgehammer of flushing all + * the pages on acquire depends on if @cache_coherent is marked as + * I915_BO_CACHE_COHERENT_FOR_WRITE, i.e that the GPU will be coherent + * for both reads and writes though the CPU cache. + * + * Note that on shared LLC platforms we still apply the heavy flush for + * I915_CACHE_NONE objects, under the assumption that this is going to + * be used for scanout. + * + * Update: On some hardware there is now also the 'Bypass LLC' MOCS + * entry, which defeats our @cache_coherent tracking, since userspace + * can freely bypass the CPU cache when touching the pages with the GPU, + * where the kernel is completely unaware. On such platform we need + * apply the sledgehammer-on-acquire regardless of the @cache_coherent. + */ unsigned int cache_dirty:1; /** @@ -247,9 +467,10 @@ struct drm_i915_gem_object { struct intel_memory_region *region; /** - * Memory manager node allocated for this object. + * Memory manager resource allocated for this object. Only + * needed for the mock region. */ - void *st_mm_node; + struct ttm_resource *res; /** * Element within memory_region->objects or region->purgeable @@ -310,6 +531,12 @@ struct drm_i915_gem_object { bool dirty:1; } mm; + struct { + struct sg_table *cached_io_st; + struct i915_gem_object_page_iter get_io_page; + bool created:1; + } ttm; + /** Record of address bit 17 of each page at last unbind. */ unsigned long *bit_17; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 6444e097016d..8eb1c3a6fc9c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -321,8 +321,7 @@ static void *i915_gem_object_map_pfn(struct drm_i915_gem_object *obj, dma_addr_t addr; void *vaddr; - if (type != I915_MAP_WC) - return ERR_PTR(-ENODEV); + GEM_BUG_ON(type != I915_MAP_WC); if (n_pfn > ARRAY_SIZE(stack)) { /* Too big for stack -- allocate temporary array instead */ @@ -351,7 +350,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, int err; if (!i915_gem_object_has_struct_page(obj) && - !i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_IOMEM)) + !i915_gem_object_has_iomem(obj)) return ERR_PTR(-ENXIO); assert_object_held(obj); @@ -374,6 +373,34 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, } GEM_BUG_ON(!i915_gem_object_has_pages(obj)); + /* + * For discrete our CPU mappings needs to be consistent in order to + * function correctly on !x86. When mapping things through TTM, we use + * the same rules to determine the caching type. + * + * The caching rules, starting from DG1: + * + * - If the object can be placed in device local-memory, then the + * pages should be allocated and mapped as write-combined only. + * + * - Everything else is always allocated and mapped as write-back, + * with the guarantee that everything is also coherent with the + * GPU. + * + * Internal users of lmem are already expected to get this right, so no + * fudging needed there. + */ + if (i915_gem_object_placement_possible(obj, INTEL_MEMORY_LOCAL)) { + if (type != I915_MAP_WC && !obj->mm.n_placements) { + ptr = ERR_PTR(-ENODEV); + goto err_unpin; + } + + type = I915_MAP_WC; + } else if (IS_DGFX(to_i915(obj->base.dev))) { + type = I915_MAP_WB; + } + ptr = page_unpack_bits(obj->mm.mapping, &has_type); if (ptr && has_type != type) { if (pinned) { @@ -467,9 +494,8 @@ __i915_gem_object_get_sg(struct drm_i915_gem_object *obj, struct i915_gem_object_page_iter *iter, unsigned int n, unsigned int *offset, - bool allow_alloc) + bool dma) { - const bool dma = iter == &obj->mm.get_dma_page; struct scatterlist *sg; unsigned int idx, count; @@ -490,9 +516,6 @@ __i915_gem_object_get_sg(struct drm_i915_gem_object *obj, if (n < READ_ONCE(iter->sg_idx)) goto lookup; - if (!allow_alloc) - goto manual_lookup; - mutex_lock(&iter->lock); /* We prefer to reuse the last sg so that repeated lookup of this @@ -542,16 +565,7 @@ scan: if (unlikely(n < idx)) /* insertion completed by another thread */ goto lookup; - goto manual_walk; - -manual_lookup: - idx = 0; - sg = obj->mm.pages->sgl; - count = __sg_page_count(sg); - -manual_walk: - /* - * In case we failed to insert the entry into the radixtree, we need + /* In case we failed to insert the entry into the radixtree, we need * to look beyond the current sg. */ while (idx + count <= n) { @@ -598,7 +612,7 @@ i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n) GEM_BUG_ON(!i915_gem_object_has_struct_page(obj)); - sg = i915_gem_object_get_sg(obj, n, &offset, true); + sg = i915_gem_object_get_sg(obj, n, &offset); return nth_page(sg_page(sg), offset); } @@ -624,7 +638,7 @@ i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj, struct scatterlist *sg; unsigned int offset; - sg = i915_gem_object_get_sg_dma(obj, n, &offset, true); + sg = i915_gem_object_get_sg_dma(obj, n, &offset); if (len) *len = sg_dma_len(sg) - (offset << PAGE_SHIFT); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c index be72ad0634ba..7986612f48fa 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c @@ -76,7 +76,7 @@ static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt); /* We're no longer struct page backed */ - obj->flags &= ~I915_BO_ALLOC_STRUCT_PAGE; + obj->mem_flags &= ~I915_BO_FLAG_STRUCT_PAGE; __i915_gem_object_set_pages(obj, st, sg->length); return 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c index f25e6646c5b7..1f557b2178ed 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c @@ -13,16 +13,8 @@ void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj, { obj->mm.region = intel_memory_region_get(mem); - if (obj->base.size <= mem->min_page_size) - obj->flags |= I915_BO_ALLOC_CONTIGUOUS; - mutex_lock(&mem->objects.lock); - - if (obj->flags & I915_BO_ALLOC_VOLATILE) - list_add(&obj->mm.region_link, &mem->objects.purgeable); - else - list_add(&obj->mm.region_link, &mem->objects.list); - + list_add(&obj->mm.region_link, &mem->objects.list); mutex_unlock(&mem->objects.lock); } @@ -40,9 +32,11 @@ void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj) struct drm_i915_gem_object * i915_gem_object_create_region(struct intel_memory_region *mem, resource_size_t size, + resource_size_t page_size, unsigned int flags) { struct drm_i915_gem_object *obj; + resource_size_t default_page_size; int err; /* @@ -56,7 +50,14 @@ i915_gem_object_create_region(struct intel_memory_region *mem, if (!mem) return ERR_PTR(-ENODEV); - size = round_up(size, mem->min_page_size); + default_page_size = mem->min_page_size; + if (page_size) + default_page_size = page_size; + + GEM_BUG_ON(!is_power_of_2_u64(default_page_size)); + GEM_BUG_ON(default_page_size < PAGE_SIZE); + + size = round_up(size, default_page_size); GEM_BUG_ON(!size); GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_MIN_ALIGNMENT)); @@ -68,7 +69,7 @@ i915_gem_object_create_region(struct intel_memory_region *mem, if (!obj) return ERR_PTR(-ENOMEM); - err = mem->ops->init_object(mem, obj, size, flags); + err = mem->ops->init_object(mem, obj, size, page_size, flags); if (err) goto err_object_free; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.h b/drivers/gpu/drm/i915/gem/i915_gem_region.h index 84fcb3297400..1008e580a89a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.h @@ -19,6 +19,7 @@ void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj); struct drm_i915_gem_object * i915_gem_object_create_region(struct intel_memory_region *mem, resource_size_t size, + resource_size_t page_size, unsigned int flags); #endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 5d16c4462fda..11f072193f3b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -182,6 +182,24 @@ rebuild_st: if (i915_gem_object_needs_bit17_swizzle(obj)) i915_gem_object_do_bit_17_swizzle(obj, st); + /* + * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it + * possible for userspace to bypass the GTT caching bits set by the + * kernel, as per the given object cache_level. This is troublesome + * since the heavy flush we apply when first gathering the pages is + * skipped if the kernel thinks the object is coherent with the GPU. As + * a result it might be possible to bypass the cache and read the + * contents of the page directly, which could be stale data. If it's + * just a case of userspace shooting themselves in the foot then so be + * it, but since i915 takes the stance of always zeroing memory before + * handing it to userspace, we need to prevent this. + * + * By setting cache_dirty here we make the clflush in set_pages + * unconditional on such platforms. + */ + if (IS_JSL_EHL(i915) && obj->flags & I915_BO_ALLOC_USER) + obj->cache_dirty = true; + __i915_gem_object_set_pages(obj, st, sg_page_sizes); return 0; @@ -302,6 +320,7 @@ void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_ struct pagevec pvec; struct page *page; + GEM_WARN_ON(IS_DGFX(to_i915(obj->base.dev))); __i915_gem_object_release_shmem(obj, pages, true); i915_gem_gtt_finish_pages(obj, pages); @@ -444,7 +463,7 @@ shmem_pread(struct drm_i915_gem_object *obj, static void shmem_release(struct drm_i915_gem_object *obj) { - if (obj->flags & I915_BO_ALLOC_STRUCT_PAGE) + if (i915_gem_object_has_struct_page(obj)) i915_gem_object_release_memory_region(obj); fput(obj->base.filp); @@ -489,6 +508,7 @@ static int __create_shmem(struct drm_i915_private *i915, static int shmem_object_init(struct intel_memory_region *mem, struct drm_i915_gem_object *obj, resource_size_t size, + resource_size_t page_size, unsigned int flags) { static struct lock_class_key lock_class; @@ -513,9 +533,8 @@ static int shmem_object_init(struct intel_memory_region *mem, mapping_set_gfp_mask(mapping, mask); GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM)); - i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class, - I915_BO_ALLOC_STRUCT_PAGE); - + i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class, 0); + obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; @@ -548,7 +567,7 @@ i915_gem_object_create_shmem(struct drm_i915_private *i915, resource_size_t size) { return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_SMEM], - size, 0); + size, 0, 0); } /* Allocate a new GEM object and fill it with the supplied data */ @@ -561,6 +580,7 @@ i915_gem_object_create_shmem_from_data(struct drm_i915_private *dev_priv, resource_size_t offset; int err; + GEM_WARN_ON(IS_DGFX(dev_priv)); obj = i915_gem_object_create_shmem(dev_priv, round_up(size, PAGE_SIZE)); if (IS_ERR(obj)) return obj; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index b0c3a7dc60d1..ddd37ccb1362 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -447,7 +447,6 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem) break; case 8: case 9: - case 10: if (IS_LP(i915)) chv_get_stolen_reserved(i915, uncore, &reserved_base, &reserved_size); @@ -670,6 +669,7 @@ static int __i915_gem_object_create_stolen(struct intel_memory_region *mem, static int _i915_gem_object_stolen_init(struct intel_memory_region *mem, struct drm_i915_gem_object *obj, resource_size_t size, + resource_size_t page_size, unsigned int flags) { struct drm_i915_private *i915 = mem->i915; @@ -708,7 +708,7 @@ struct drm_i915_gem_object * i915_gem_object_create_stolen(struct drm_i915_private *i915, resource_size_t size) { - return i915_gem_object_create_region(i915->mm.stolen_region, size, 0); + return i915_gem_object_create_region(i915->mm.stolen_region, size, 0, 0); } static int init_stolen_smem(struct intel_memory_region *mem) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c new file mode 100644 index 000000000000..771eb2963123 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -0,0 +1,965 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include <drm/ttm/ttm_bo_driver.h> +#include <drm/ttm/ttm_placement.h> + +#include "i915_drv.h" +#include "intel_memory_region.h" +#include "intel_region_ttm.h" + +#include "gem/i915_gem_object.h" +#include "gem/i915_gem_region.h" +#include "gem/i915_gem_ttm.h" +#include "gem/i915_gem_mman.h" + +#include "gt/intel_migrate.h" +#include "gt/intel_engine_pm.h" + +#define I915_PL_LMEM0 TTM_PL_PRIV +#define I915_PL_SYSTEM TTM_PL_SYSTEM +#define I915_PL_STOLEN TTM_PL_VRAM +#define I915_PL_GGTT TTM_PL_TT + +#define I915_TTM_PRIO_PURGE 0 +#define I915_TTM_PRIO_NO_PAGES 1 +#define I915_TTM_PRIO_HAS_PAGES 2 + +/* + * Size of struct ttm_place vector in on-stack struct ttm_placement allocs + */ +#define I915_TTM_MAX_PLACEMENTS INTEL_REGION_UNKNOWN + +/** + * struct i915_ttm_tt - TTM page vector with additional private information + * @ttm: The base TTM page vector. + * @dev: The struct device used for dma mapping and unmapping. + * @cached_st: The cached scatter-gather table. + * + * Note that DMA may be going on right up to the point where the page- + * vector is unpopulated in delayed destroy. Hence keep the + * scatter-gather table mapped and cached up to that point. This is + * different from the cached gem object io scatter-gather table which + * doesn't have an associated dma mapping. + */ +struct i915_ttm_tt { + struct ttm_tt ttm; + struct device *dev; + struct sg_table *cached_st; +}; + +static const struct ttm_place sys_placement_flags = { + .fpfn = 0, + .lpfn = 0, + .mem_type = I915_PL_SYSTEM, + .flags = 0, +}; + +static struct ttm_placement i915_sys_placement = { + .num_placement = 1, + .placement = &sys_placement_flags, + .num_busy_placement = 1, + .busy_placement = &sys_placement_flags, +}; + +static int i915_ttm_err_to_gem(int err) +{ + /* Fastpath */ + if (likely(!err)) + return 0; + + switch (err) { + case -EBUSY: + /* + * TTM likes to convert -EDEADLK to -EBUSY, and wants us to + * restart the operation, since we don't record the contending + * lock. We use -EAGAIN to restart. + */ + return -EAGAIN; + case -ENOSPC: + /* + * Memory type / region is full, and we can't evict. + * Except possibly system, that returns -ENOMEM; + */ + return -ENXIO; + default: + break; + } + + return err; +} + +static bool gpu_binds_iomem(struct ttm_resource *mem) +{ + return mem->mem_type != TTM_PL_SYSTEM; +} + +static bool cpu_maps_iomem(struct ttm_resource *mem) +{ + /* Once / if we support GGTT, this is also false for cached ttm_tts */ + return mem->mem_type != TTM_PL_SYSTEM; +} + +static enum i915_cache_level +i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res, + struct ttm_tt *ttm) +{ + return ((HAS_LLC(i915) || HAS_SNOOP(i915)) && !gpu_binds_iomem(res) && + ttm->caching == ttm_cached) ? I915_CACHE_LLC : + I915_CACHE_NONE; +} + +static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj); + +static enum ttm_caching +i915_ttm_select_tt_caching(const struct drm_i915_gem_object *obj) +{ + /* + * Objects only allowed in system get cached cpu-mappings. + * Other objects get WC mapping for now. Even if in system. + */ + if (obj->mm.region->type == INTEL_MEMORY_SYSTEM && + obj->mm.n_placements <= 1) + return ttm_cached; + + return ttm_write_combined; +} + +static void +i915_ttm_place_from_region(const struct intel_memory_region *mr, + struct ttm_place *place, + unsigned int flags) +{ + memset(place, 0, sizeof(*place)); + place->mem_type = intel_region_to_ttm_type(mr); + + if (flags & I915_BO_ALLOC_CONTIGUOUS) + place->flags = TTM_PL_FLAG_CONTIGUOUS; +} + +static void +i915_ttm_placement_from_obj(const struct drm_i915_gem_object *obj, + struct ttm_place *requested, + struct ttm_place *busy, + struct ttm_placement *placement) +{ + unsigned int num_allowed = obj->mm.n_placements; + unsigned int flags = obj->flags; + unsigned int i; + + placement->num_placement = 1; + i915_ttm_place_from_region(num_allowed ? obj->mm.placements[0] : + obj->mm.region, requested, flags); + + /* Cache this on object? */ + placement->num_busy_placement = num_allowed; + for (i = 0; i < placement->num_busy_placement; ++i) + i915_ttm_place_from_region(obj->mm.placements[i], busy + i, flags); + + if (num_allowed == 0) { + *busy = *requested; + placement->num_busy_placement = 1; + } + + placement->placement = requested; + placement->busy_placement = busy; +} + +static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo, + uint32_t page_flags) +{ + struct ttm_resource_manager *man = + ttm_manager_type(bo->bdev, bo->resource->mem_type); + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + struct i915_ttm_tt *i915_tt; + int ret; + + i915_tt = kzalloc(sizeof(*i915_tt), GFP_KERNEL); + if (!i915_tt) + return NULL; + + if (obj->flags & I915_BO_ALLOC_CPU_CLEAR && + man->use_tt) + page_flags |= TTM_PAGE_FLAG_ZERO_ALLOC; + + ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags, + i915_ttm_select_tt_caching(obj)); + if (ret) { + kfree(i915_tt); + return NULL; + } + + i915_tt->dev = obj->base.dev->dev; + + return &i915_tt->ttm; +} + +static void i915_ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm) +{ + struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm); + + if (i915_tt->cached_st) { + dma_unmap_sgtable(i915_tt->dev, i915_tt->cached_st, + DMA_BIDIRECTIONAL, 0); + sg_free_table(i915_tt->cached_st); + kfree(i915_tt->cached_st); + i915_tt->cached_st = NULL; + } + ttm_pool_free(&bdev->pool, ttm); +} + +static void i915_ttm_tt_destroy(struct ttm_device *bdev, struct ttm_tt *ttm) +{ + struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm); + + ttm_tt_destroy_common(bdev, ttm); + ttm_tt_fini(ttm); + kfree(i915_tt); +} + +static bool i915_ttm_eviction_valuable(struct ttm_buffer_object *bo, + const struct ttm_place *place) +{ + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + + /* Will do for now. Our pinned objects are still on TTM's LRU lists */ + return i915_gem_object_evictable(obj); +} + +static void i915_ttm_evict_flags(struct ttm_buffer_object *bo, + struct ttm_placement *placement) +{ + *placement = i915_sys_placement; +} + +static int i915_ttm_move_notify(struct ttm_buffer_object *bo) +{ + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + int ret; + + ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE); + if (ret) + return ret; + + ret = __i915_gem_object_put_pages(obj); + if (ret) + return ret; + + return 0; +} + +static void i915_ttm_free_cached_io_st(struct drm_i915_gem_object *obj) +{ + struct radix_tree_iter iter; + void __rcu **slot; + + if (!obj->ttm.cached_io_st) + return; + + rcu_read_lock(); + radix_tree_for_each_slot(slot, &obj->ttm.get_io_page.radix, &iter, 0) + radix_tree_delete(&obj->ttm.get_io_page.radix, iter.index); + rcu_read_unlock(); + + sg_free_table(obj->ttm.cached_io_st); + kfree(obj->ttm.cached_io_st); + obj->ttm.cached_io_st = NULL; +} + +static void +i915_ttm_adjust_domains_after_move(struct drm_i915_gem_object *obj) +{ + struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + + if (cpu_maps_iomem(bo->resource) || bo->ttm->caching != ttm_cached) { + obj->write_domain = I915_GEM_DOMAIN_WC; + obj->read_domains = I915_GEM_DOMAIN_WC; + } else { + obj->write_domain = I915_GEM_DOMAIN_CPU; + obj->read_domains = I915_GEM_DOMAIN_CPU; + } +} + +static void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj) +{ + struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + unsigned int cache_level; + unsigned int i; + + /* + * If object was moved to an allowable region, update the object + * region to consider it migrated. Note that if it's currently not + * in an allowable region, it's evicted and we don't update the + * object region. + */ + if (intel_region_to_ttm_type(obj->mm.region) != bo->resource->mem_type) { + for (i = 0; i < obj->mm.n_placements; ++i) { + struct intel_memory_region *mr = obj->mm.placements[i]; + + if (intel_region_to_ttm_type(mr) == bo->resource->mem_type && + mr != obj->mm.region) { + i915_gem_object_release_memory_region(obj); + i915_gem_object_init_memory_region(obj, mr); + break; + } + } + } + + obj->mem_flags &= ~(I915_BO_FLAG_STRUCT_PAGE | I915_BO_FLAG_IOMEM); + + obj->mem_flags |= cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM : + I915_BO_FLAG_STRUCT_PAGE; + + cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), bo->resource, + bo->ttm); + i915_gem_object_set_cache_coherency(obj, cache_level); +} + +static void i915_ttm_purge(struct drm_i915_gem_object *obj) +{ + struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + struct ttm_operation_ctx ctx = { + .interruptible = true, + .no_wait_gpu = false, + }; + struct ttm_placement place = {}; + int ret; + + if (obj->mm.madv == __I915_MADV_PURGED) + return; + + /* TTM's purge interface. Note that we might be reentering. */ + ret = ttm_bo_validate(bo, &place, &ctx); + if (!ret) { + obj->write_domain = 0; + obj->read_domains = 0; + i915_ttm_adjust_gem_after_move(obj); + i915_ttm_free_cached_io_st(obj); + obj->mm.madv = __I915_MADV_PURGED; + } +} + +static void i915_ttm_swap_notify(struct ttm_buffer_object *bo) +{ + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + int ret = i915_ttm_move_notify(bo); + + GEM_WARN_ON(ret); + GEM_WARN_ON(obj->ttm.cached_io_st); + if (!ret && obj->mm.madv != I915_MADV_WILLNEED) + i915_ttm_purge(obj); +} + +static void i915_ttm_delete_mem_notify(struct ttm_buffer_object *bo) +{ + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + + if (likely(obj)) { + /* This releases all gem object bindings to the backend. */ + i915_ttm_free_cached_io_st(obj); + __i915_gem_free_object(obj); + } +} + +static struct intel_memory_region * +i915_ttm_region(struct ttm_device *bdev, int ttm_mem_type) +{ + struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev); + + /* There's some room for optimization here... */ + GEM_BUG_ON(ttm_mem_type != I915_PL_SYSTEM && + ttm_mem_type < I915_PL_LMEM0); + if (ttm_mem_type == I915_PL_SYSTEM) + return intel_memory_region_lookup(i915, INTEL_MEMORY_SYSTEM, + 0); + + return intel_memory_region_lookup(i915, INTEL_MEMORY_LOCAL, + ttm_mem_type - I915_PL_LMEM0); +} + +static struct sg_table *i915_ttm_tt_get_st(struct ttm_tt *ttm) +{ + struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm); + struct scatterlist *sg; + struct sg_table *st; + int ret; + + if (i915_tt->cached_st) + return i915_tt->cached_st; + + st = kzalloc(sizeof(*st), GFP_KERNEL); + if (!st) + return ERR_PTR(-ENOMEM); + + sg = __sg_alloc_table_from_pages + (st, ttm->pages, ttm->num_pages, 0, + (unsigned long)ttm->num_pages << PAGE_SHIFT, + i915_sg_segment_size(), NULL, 0, GFP_KERNEL); + if (IS_ERR(sg)) { + kfree(st); + return ERR_CAST(sg); + } + + ret = dma_map_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL, 0); + if (ret) { + sg_free_table(st); + kfree(st); + return ERR_PTR(ret); + } + + i915_tt->cached_st = st; + return st; +} + +static struct sg_table * +i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, + struct ttm_resource *res) +{ + struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + + if (!gpu_binds_iomem(res)) + return i915_ttm_tt_get_st(bo->ttm); + + /* + * If CPU mapping differs, we need to add the ttm_tt pages to + * the resulting st. Might make sense for GGTT. + */ + GEM_WARN_ON(!cpu_maps_iomem(res)); + return intel_region_ttm_resource_to_st(obj->mm.region, res); +} + +static int i915_ttm_accel_move(struct ttm_buffer_object *bo, + struct ttm_resource *dst_mem, + struct sg_table *dst_st) +{ + struct drm_i915_private *i915 = container_of(bo->bdev, typeof(*i915), + bdev); + struct ttm_resource_manager *src_man = + ttm_manager_type(bo->bdev, bo->resource->mem_type); + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + struct sg_table *src_st; + struct i915_request *rq; + struct ttm_tt *ttm = bo->ttm; + enum i915_cache_level src_level, dst_level; + int ret; + + if (!i915->gt.migrate.context) + return -EINVAL; + + dst_level = i915_ttm_cache_level(i915, dst_mem, ttm); + if (!ttm || !ttm_tt_is_populated(ttm)) { + if (bo->type == ttm_bo_type_kernel) + return -EINVAL; + + if (ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)) + return 0; + + intel_engine_pm_get(i915->gt.migrate.context->engine); + ret = intel_context_migrate_clear(i915->gt.migrate.context, NULL, + dst_st->sgl, dst_level, + gpu_binds_iomem(dst_mem), + 0, &rq); + + if (!ret && rq) { + i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); + i915_request_put(rq); + } + intel_engine_pm_put(i915->gt.migrate.context->engine); + } else { + src_st = src_man->use_tt ? i915_ttm_tt_get_st(ttm) : + obj->ttm.cached_io_st; + + src_level = i915_ttm_cache_level(i915, bo->resource, ttm); + intel_engine_pm_get(i915->gt.migrate.context->engine); + ret = intel_context_migrate_copy(i915->gt.migrate.context, + NULL, src_st->sgl, src_level, + gpu_binds_iomem(bo->resource), + dst_st->sgl, dst_level, + gpu_binds_iomem(dst_mem), + &rq); + if (!ret && rq) { + i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); + i915_request_put(rq); + } + intel_engine_pm_put(i915->gt.migrate.context->engine); + } + + return ret; +} + +static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, + struct ttm_operation_ctx *ctx, + struct ttm_resource *dst_mem, + struct ttm_place *hop) +{ + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + struct ttm_resource_manager *dst_man = + ttm_manager_type(bo->bdev, dst_mem->mem_type); + struct intel_memory_region *dst_reg, *src_reg; + union { + struct ttm_kmap_iter_tt tt; + struct ttm_kmap_iter_iomap io; + } _dst_iter, _src_iter; + struct ttm_kmap_iter *dst_iter, *src_iter; + struct sg_table *dst_st; + int ret; + + dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type); + src_reg = i915_ttm_region(bo->bdev, bo->resource->mem_type); + GEM_BUG_ON(!dst_reg || !src_reg); + + /* Sync for now. We could do the actual copy async. */ + ret = ttm_bo_wait_ctx(bo, ctx); + if (ret) + return ret; + + ret = i915_ttm_move_notify(bo); + if (ret) + return ret; + + if (obj->mm.madv != I915_MADV_WILLNEED) { + i915_ttm_purge(obj); + ttm_resource_free(bo, &dst_mem); + return 0; + } + + /* Populate ttm with pages if needed. Typically system memory. */ + if (bo->ttm && (dst_man->use_tt || + (bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED))) { + ret = ttm_tt_populate(bo->bdev, bo->ttm, ctx); + if (ret) + return ret; + } + + dst_st = i915_ttm_resource_get_st(obj, dst_mem); + if (IS_ERR(dst_st)) + return PTR_ERR(dst_st); + + ret = i915_ttm_accel_move(bo, dst_mem, dst_st); + if (ret) { + /* If we start mapping GGTT, we can no longer use man::use_tt here. */ + dst_iter = !cpu_maps_iomem(dst_mem) ? + ttm_kmap_iter_tt_init(&_dst_iter.tt, bo->ttm) : + ttm_kmap_iter_iomap_init(&_dst_iter.io, &dst_reg->iomap, + dst_st, dst_reg->region.start); + + src_iter = !cpu_maps_iomem(bo->resource) ? + ttm_kmap_iter_tt_init(&_src_iter.tt, bo->ttm) : + ttm_kmap_iter_iomap_init(&_src_iter.io, &src_reg->iomap, + obj->ttm.cached_io_st, + src_reg->region.start); + + ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter); + } + /* Below dst_mem becomes bo->resource. */ + ttm_bo_move_sync_cleanup(bo, dst_mem); + i915_ttm_adjust_domains_after_move(obj); + i915_ttm_free_cached_io_st(obj); + + if (gpu_binds_iomem(dst_mem) || cpu_maps_iomem(dst_mem)) { + obj->ttm.cached_io_st = dst_st; + obj->ttm.get_io_page.sg_pos = dst_st->sgl; + obj->ttm.get_io_page.sg_idx = 0; + } + + i915_ttm_adjust_gem_after_move(obj); + return 0; +} + +static int i915_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource *mem) +{ + if (!cpu_maps_iomem(mem)) + return 0; + + mem->bus.caching = ttm_write_combined; + mem->bus.is_iomem = true; + + return 0; +} + +static unsigned long i915_ttm_io_mem_pfn(struct ttm_buffer_object *bo, + unsigned long page_offset) +{ + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + unsigned long base = obj->mm.region->iomap.base - obj->mm.region->region.start; + struct scatterlist *sg; + unsigned int ofs; + + GEM_WARN_ON(bo->ttm); + + sg = __i915_gem_object_get_sg(obj, &obj->ttm.get_io_page, page_offset, &ofs, true); + + return ((base + sg_dma_address(sg)) >> PAGE_SHIFT) + ofs; +} + +static struct ttm_device_funcs i915_ttm_bo_driver = { + .ttm_tt_create = i915_ttm_tt_create, + .ttm_tt_unpopulate = i915_ttm_tt_unpopulate, + .ttm_tt_destroy = i915_ttm_tt_destroy, + .eviction_valuable = i915_ttm_eviction_valuable, + .evict_flags = i915_ttm_evict_flags, + .move = i915_ttm_move, + .swap_notify = i915_ttm_swap_notify, + .delete_mem_notify = i915_ttm_delete_mem_notify, + .io_mem_reserve = i915_ttm_io_mem_reserve, + .io_mem_pfn = i915_ttm_io_mem_pfn, +}; + +/** + * i915_ttm_driver - Return a pointer to the TTM device funcs + * + * Return: Pointer to statically allocated TTM device funcs. + */ +struct ttm_device_funcs *i915_ttm_driver(void) +{ + return &i915_ttm_bo_driver; +} + +static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, + struct ttm_placement *placement) +{ + struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + struct ttm_operation_ctx ctx = { + .interruptible = true, + .no_wait_gpu = false, + }; + struct sg_table *st; + int real_num_busy; + int ret; + + /* First try only the requested placement. No eviction. */ + real_num_busy = fetch_and_zero(&placement->num_busy_placement); + ret = ttm_bo_validate(bo, placement, &ctx); + if (ret) { + ret = i915_ttm_err_to_gem(ret); + /* + * Anything that wants to restart the operation gets to + * do that. + */ + if (ret == -EDEADLK || ret == -EINTR || ret == -ERESTARTSYS || + ret == -EAGAIN) + return ret; + + /* + * If the initial attempt fails, allow all accepted placements, + * evicting if necessary. + */ + placement->num_busy_placement = real_num_busy; + ret = ttm_bo_validate(bo, placement, &ctx); + if (ret) + return i915_ttm_err_to_gem(ret); + } + + i915_ttm_adjust_lru(obj); + if (bo->ttm && !ttm_tt_is_populated(bo->ttm)) { + ret = ttm_tt_populate(bo->bdev, bo->ttm, &ctx); + if (ret) + return ret; + + i915_ttm_adjust_domains_after_move(obj); + i915_ttm_adjust_gem_after_move(obj); + } + + if (!i915_gem_object_has_pages(obj)) { + /* Object either has a page vector or is an iomem object */ + st = bo->ttm ? i915_ttm_tt_get_st(bo->ttm) : obj->ttm.cached_io_st; + if (IS_ERR(st)) + return PTR_ERR(st); + + __i915_gem_object_set_pages(obj, st, i915_sg_dma_sizes(st->sgl)); + } + + return ret; +} + +static int i915_ttm_get_pages(struct drm_i915_gem_object *obj) +{ + struct ttm_place requested, busy[I915_TTM_MAX_PLACEMENTS]; + struct ttm_placement placement; + + GEM_BUG_ON(obj->mm.n_placements > I915_TTM_MAX_PLACEMENTS); + + /* Move to the requested placement. */ + i915_ttm_placement_from_obj(obj, &requested, busy, &placement); + + return __i915_ttm_get_pages(obj, &placement); +} + +/** + * DOC: Migration vs eviction + * + * GEM migration may not be the same as TTM migration / eviction. If + * the TTM core decides to evict an object it may be evicted to a + * TTM memory type that is not in the object's allowable GEM regions, or + * in fact theoretically to a TTM memory type that doesn't correspond to + * a GEM memory region. In that case the object's GEM region is not + * updated, and the data is migrated back to the GEM region at + * get_pages time. TTM may however set up CPU ptes to the object even + * when it is evicted. + * Gem forced migration using the i915_ttm_migrate() op, is allowed even + * to regions that are not in the object's list of allowable placements. + */ +static int i915_ttm_migrate(struct drm_i915_gem_object *obj, + struct intel_memory_region *mr) +{ + struct ttm_place requested; + struct ttm_placement placement; + int ret; + + i915_ttm_place_from_region(mr, &requested, obj->flags); + placement.num_placement = 1; + placement.num_busy_placement = 1; + placement.placement = &requested; + placement.busy_placement = &requested; + + ret = __i915_ttm_get_pages(obj, &placement); + if (ret) + return ret; + + /* + * Reinitialize the region bindings. This is primarily + * required for objects where the new region is not in + * its allowable placements. + */ + if (obj->mm.region != mr) { + i915_gem_object_release_memory_region(obj); + i915_gem_object_init_memory_region(obj, mr); + } + + return 0; +} + +static void i915_ttm_put_pages(struct drm_i915_gem_object *obj, + struct sg_table *st) +{ + /* + * We're currently not called from a shrinker, so put_pages() + * typically means the object is about to destroyed, or called + * from move_notify(). So just avoid doing much for now. + * If the object is not destroyed next, The TTM eviction logic + * and shrinkers will move it out if needed. + */ + + i915_ttm_adjust_lru(obj); +} + +static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj) +{ + struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + + /* + * Don't manipulate the TTM LRUs while in TTM bo destruction. + * We're called through i915_ttm_delete_mem_notify(). + */ + if (!kref_read(&bo->kref)) + return; + + /* + * Put on the correct LRU list depending on the MADV status + */ + spin_lock(&bo->bdev->lru_lock); + if (obj->mm.madv != I915_MADV_WILLNEED) { + bo->priority = I915_TTM_PRIO_PURGE; + } else if (!i915_gem_object_has_pages(obj)) { + if (bo->priority < I915_TTM_PRIO_HAS_PAGES) + bo->priority = I915_TTM_PRIO_HAS_PAGES; + } else { + if (bo->priority > I915_TTM_PRIO_NO_PAGES) + bo->priority = I915_TTM_PRIO_NO_PAGES; + } + + ttm_bo_move_to_lru_tail(bo, bo->resource, NULL); + spin_unlock(&bo->bdev->lru_lock); +} + +/* + * TTM-backed gem object destruction requires some clarification. + * Basically we have two possibilities here. We can either rely on the + * i915 delayed destruction and put the TTM object when the object + * is idle. This would be detected by TTM which would bypass the + * TTM delayed destroy handling. The other approach is to put the TTM + * object early and rely on the TTM destroyed handling, and then free + * the leftover parts of the GEM object once TTM's destroyed list handling is + * complete. For now, we rely on the latter for two reasons: + * a) TTM can evict an object even when it's on the delayed destroy list, + * which in theory allows for complete eviction. + * b) There is work going on in TTM to allow freeing an object even when + * it's not idle, and using the TTM destroyed list handling could help us + * benefit from that. + */ +static void i915_ttm_delayed_free(struct drm_i915_gem_object *obj) +{ + if (obj->ttm.created) { + ttm_bo_put(i915_gem_to_ttm(obj)); + } else { + __i915_gem_free_object(obj); + call_rcu(&obj->rcu, __i915_gem_free_object_rcu); + } +} + +static vm_fault_t vm_fault_ttm(struct vm_fault *vmf) +{ + struct vm_area_struct *area = vmf->vma; + struct drm_i915_gem_object *obj = + i915_ttm_to_gem(area->vm_private_data); + + /* Sanity check that we allow writing into this object */ + if (unlikely(i915_gem_object_is_readonly(obj) && + area->vm_flags & VM_WRITE)) + return VM_FAULT_SIGBUS; + + return ttm_bo_vm_fault(vmf); +} + +static int +vm_access_ttm(struct vm_area_struct *area, unsigned long addr, + void *buf, int len, int write) +{ + struct drm_i915_gem_object *obj = + i915_ttm_to_gem(area->vm_private_data); + + if (i915_gem_object_is_readonly(obj) && write) + return -EACCES; + + return ttm_bo_vm_access(area, addr, buf, len, write); +} + +static void ttm_vm_open(struct vm_area_struct *vma) +{ + struct drm_i915_gem_object *obj = + i915_ttm_to_gem(vma->vm_private_data); + + GEM_BUG_ON(!obj); + i915_gem_object_get(obj); +} + +static void ttm_vm_close(struct vm_area_struct *vma) +{ + struct drm_i915_gem_object *obj = + i915_ttm_to_gem(vma->vm_private_data); + + GEM_BUG_ON(!obj); + i915_gem_object_put(obj); +} + +static const struct vm_operations_struct vm_ops_ttm = { + .fault = vm_fault_ttm, + .access = vm_access_ttm, + .open = ttm_vm_open, + .close = ttm_vm_close, +}; + +static u64 i915_ttm_mmap_offset(struct drm_i915_gem_object *obj) +{ + /* The ttm_bo must be allocated with I915_BO_ALLOC_USER */ + GEM_BUG_ON(!drm_mm_node_allocated(&obj->base.vma_node.vm_node)); + + return drm_vma_node_offset_addr(&obj->base.vma_node); +} + +static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = { + .name = "i915_gem_object_ttm", + + .get_pages = i915_ttm_get_pages, + .put_pages = i915_ttm_put_pages, + .truncate = i915_ttm_purge, + .adjust_lru = i915_ttm_adjust_lru, + .delayed_free = i915_ttm_delayed_free, + .migrate = i915_ttm_migrate, + .mmap_offset = i915_ttm_mmap_offset, + .mmap_ops = &vm_ops_ttm, +}; + +void i915_ttm_bo_destroy(struct ttm_buffer_object *bo) +{ + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + + i915_gem_object_release_memory_region(obj); + mutex_destroy(&obj->ttm.get_io_page.lock); + if (obj->ttm.created) + call_rcu(&obj->rcu, __i915_gem_free_object_rcu); +} + +/** + * __i915_gem_ttm_object_init - Initialize a ttm-backed i915 gem object + * @mem: The initial memory region for the object. + * @obj: The gem object. + * @size: Object size in bytes. + * @flags: gem object flags. + * + * Return: 0 on success, negative error code on failure. + */ +int __i915_gem_ttm_object_init(struct intel_memory_region *mem, + struct drm_i915_gem_object *obj, + resource_size_t size, + resource_size_t page_size, + unsigned int flags) +{ + static struct lock_class_key lock_class; + struct drm_i915_private *i915 = mem->i915; + struct ttm_operation_ctx ctx = { + .interruptible = true, + .no_wait_gpu = false, + }; + enum ttm_bo_type bo_type; + int ret; + + drm_gem_private_object_init(&i915->drm, &obj->base, size); + i915_gem_object_init(obj, &i915_gem_ttm_obj_ops, &lock_class, flags); + i915_gem_object_init_memory_region(obj, mem); + i915_gem_object_make_unshrinkable(obj); + INIT_RADIX_TREE(&obj->ttm.get_io_page.radix, GFP_KERNEL | __GFP_NOWARN); + mutex_init(&obj->ttm.get_io_page.lock); + bo_type = (obj->flags & I915_BO_ALLOC_USER) ? ttm_bo_type_device : + ttm_bo_type_kernel; + + obj->base.vma_node.driver_private = i915_gem_to_ttm(obj); + + /* Forcing the page size is kernel internal only */ + GEM_BUG_ON(page_size && obj->mm.n_placements); + + /* + * If this function fails, it will call the destructor, but + * our caller still owns the object. So no freeing in the + * destructor until obj->ttm.created is true. + * Similarly, in delayed_destroy, we can't call ttm_bo_put() + * until successful initialization. + */ + ret = ttm_bo_init_reserved(&i915->bdev, i915_gem_to_ttm(obj), size, + bo_type, &i915_sys_placement, + page_size >> PAGE_SHIFT, + &ctx, NULL, NULL, i915_ttm_bo_destroy); + if (ret) + return i915_ttm_err_to_gem(ret); + + obj->ttm.created = true; + i915_ttm_adjust_domains_after_move(obj); + i915_ttm_adjust_gem_after_move(obj); + i915_gem_object_unlock(obj); + + return 0; +} + +static const struct intel_memory_region_ops ttm_system_region_ops = { + .init_object = __i915_gem_ttm_object_init, +}; + +struct intel_memory_region * +i915_gem_ttm_system_setup(struct drm_i915_private *i915, + u16 type, u16 instance) +{ + struct intel_memory_region *mr; + + mr = intel_memory_region_create(i915, 0, + totalram_pages() << PAGE_SHIFT, + PAGE_SIZE, 0, + type, instance, + &ttm_system_region_ops); + if (IS_ERR(mr)) + return mr; + + intel_memory_region_set_name(mr, "system-ttm"); + return mr; +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h new file mode 100644 index 000000000000..40927f67b6d9 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ +#ifndef _I915_GEM_TTM_H_ +#define _I915_GEM_TTM_H_ + +#include "gem/i915_gem_object_types.h" + +/** + * i915_gem_to_ttm - Convert a struct drm_i915_gem_object to a + * struct ttm_buffer_object. + * @obj: Pointer to the gem object. + * + * Return: Pointer to the embedded struct ttm_buffer_object. + */ +static inline struct ttm_buffer_object * +i915_gem_to_ttm(struct drm_i915_gem_object *obj) +{ + return &obj->__do_not_access; +} + +/* + * i915 ttm gem object destructor. Internal use only. + */ +void i915_ttm_bo_destroy(struct ttm_buffer_object *bo); + +/** + * i915_ttm_to_gem - Convert a struct ttm_buffer_object to an embedding + * struct drm_i915_gem_object. + * + * Return: Pointer to the embedding struct ttm_buffer_object, or NULL + * if the object was not an i915 ttm object. + */ +static inline struct drm_i915_gem_object * +i915_ttm_to_gem(struct ttm_buffer_object *bo) +{ + if (GEM_WARN_ON(bo->destroy != i915_ttm_bo_destroy)) + return NULL; + + return container_of(bo, struct drm_i915_gem_object, __do_not_access); +} + +int __i915_gem_ttm_object_init(struct intel_memory_region *mem, + struct drm_i915_gem_object *obj, + resource_size_t size, + resource_size_t page_size, + unsigned int flags); +#endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 7487bab11f0b..468a7a617fbf 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -67,11 +67,11 @@ static bool i915_gem_userptr_invalidate(struct mmu_interval_notifier *mni, if (!mmu_notifier_range_blockable(range)) return false; - spin_lock(&i915->mm.notifier_lock); + write_lock(&i915->mm.notifier_lock); mmu_interval_set_seq(mni, cur_seq); - spin_unlock(&i915->mm.notifier_lock); + write_unlock(&i915->mm.notifier_lock); /* * We don't wait when the process is exiting. This is valid @@ -107,16 +107,15 @@ i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj) static void i915_gem_object_userptr_drop_ref(struct drm_i915_gem_object *obj) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); struct page **pvec = NULL; - spin_lock(&i915->mm.notifier_lock); + assert_object_held_shared(obj); + if (!--obj->userptr.page_ref) { pvec = obj->userptr.pvec; obj->userptr.pvec = NULL; } GEM_BUG_ON(obj->userptr.page_ref < 0); - spin_unlock(&i915->mm.notifier_lock); if (pvec) { const unsigned long num_pages = obj->base.size >> PAGE_SHIFT; @@ -128,7 +127,6 @@ static void i915_gem_object_userptr_drop_ref(struct drm_i915_gem_object *obj) static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); const unsigned long num_pages = obj->base.size >> PAGE_SHIFT; unsigned int max_segment = i915_sg_segment_size(); struct sg_table *st; @@ -141,16 +139,13 @@ static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) if (!st) return -ENOMEM; - spin_lock(&i915->mm.notifier_lock); - if (GEM_WARN_ON(!obj->userptr.page_ref)) { - spin_unlock(&i915->mm.notifier_lock); - ret = -EFAULT; + if (!obj->userptr.page_ref) { + ret = -EAGAIN; goto err_free; } obj->userptr.page_ref++; pvec = obj->userptr.pvec; - spin_unlock(&i915->mm.notifier_lock); alloc_table: sg = __sg_alloc_table_from_pages(st, pvec, num_pages, 0, @@ -241,7 +236,7 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj, i915_gem_object_userptr_drop_ref(obj); } -static int i915_gem_object_userptr_unbind(struct drm_i915_gem_object *obj, bool get_pages) +static int i915_gem_object_userptr_unbind(struct drm_i915_gem_object *obj) { struct sg_table *pages; int err; @@ -259,15 +254,11 @@ static int i915_gem_object_userptr_unbind(struct drm_i915_gem_object *obj, bool if (!IS_ERR_OR_NULL(pages)) i915_gem_userptr_put_pages(obj, pages); - if (get_pages) - err = ____i915_gem_object_get_pages(obj); - return err; } int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); const unsigned long num_pages = obj->base.size >> PAGE_SHIFT; struct page **pvec; unsigned int gup_flags = 0; @@ -277,39 +268,22 @@ int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj) if (obj->userptr.notifier.mm != current->mm) return -EFAULT; + notifier_seq = mmu_interval_read_begin(&obj->userptr.notifier); + ret = i915_gem_object_lock_interruptible(obj, NULL); if (ret) return ret; - /* optimistically try to preserve current pages while unlocked */ - if (i915_gem_object_has_pages(obj) && - !mmu_interval_check_retry(&obj->userptr.notifier, - obj->userptr.notifier_seq)) { - spin_lock(&i915->mm.notifier_lock); - if (obj->userptr.pvec && - !mmu_interval_read_retry(&obj->userptr.notifier, - obj->userptr.notifier_seq)) { - obj->userptr.page_ref++; - - /* We can keep using the current binding, this is the fastpath */ - ret = 1; - } - spin_unlock(&i915->mm.notifier_lock); + if (notifier_seq == obj->userptr.notifier_seq && obj->userptr.pvec) { + i915_gem_object_unlock(obj); + return 0; } - if (!ret) { - /* Make sure userptr is unbound for next attempt, so we don't use stale pages. */ - ret = i915_gem_object_userptr_unbind(obj, false); - } + ret = i915_gem_object_userptr_unbind(obj); i915_gem_object_unlock(obj); - if (ret < 0) + if (ret) return ret; - if (ret > 0) - return 0; - - notifier_seq = mmu_interval_read_begin(&obj->userptr.notifier); - pvec = kvmalloc_array(num_pages, sizeof(struct page *), GFP_KERNEL); if (!pvec) return -ENOMEM; @@ -329,7 +303,9 @@ int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj) } ret = 0; - spin_lock(&i915->mm.notifier_lock); + ret = i915_gem_object_lock_interruptible(obj, NULL); + if (ret) + goto out; if (mmu_interval_read_retry(&obj->userptr.notifier, !obj->userptr.page_ref ? notifier_seq : @@ -341,12 +317,14 @@ int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj) if (!obj->userptr.page_ref++) { obj->userptr.pvec = pvec; obj->userptr.notifier_seq = notifier_seq; - pvec = NULL; + ret = ____i915_gem_object_get_pages(obj); } + obj->userptr.page_ref--; + out_unlock: - spin_unlock(&i915->mm.notifier_lock); + i915_gem_object_unlock(obj); out: if (pvec) { @@ -369,11 +347,6 @@ int i915_gem_object_userptr_submit_done(struct drm_i915_gem_object *obj) return 0; } -void i915_gem_object_userptr_submit_fini(struct drm_i915_gem_object *obj) -{ - i915_gem_object_userptr_drop_ref(obj); -} - int i915_gem_object_userptr_validate(struct drm_i915_gem_object *obj) { int err; @@ -396,7 +369,6 @@ int i915_gem_object_userptr_validate(struct drm_i915_gem_object *obj) i915_gem_object_unlock(obj); } - i915_gem_object_userptr_submit_fini(obj); return err; } @@ -450,6 +422,34 @@ static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = { #endif +static int +probe_range(struct mm_struct *mm, unsigned long addr, unsigned long len) +{ + const unsigned long end = addr + len; + struct vm_area_struct *vma; + int ret = -EFAULT; + + mmap_read_lock(mm); + for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) { + /* Check for holes, note that we also update the addr below */ + if (vma->vm_start > addr) + break; + + if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)) + break; + + if (vma->vm_end >= end) { + ret = 0; + break; + } + + addr = vma->vm_end; + } + mmap_read_unlock(mm); + + return ret; +} + /* * Creates a new mm object that wraps some normal memory from the process * context - user memory. @@ -505,7 +505,8 @@ i915_gem_userptr_ioctl(struct drm_device *dev, } if (args->flags & ~(I915_USERPTR_READ_ONLY | - I915_USERPTR_UNSYNCHRONIZED)) + I915_USERPTR_UNSYNCHRONIZED | + I915_USERPTR_PROBE)) return -EINVAL; if (i915_gem_object_size_2big(args->user_size)) @@ -532,14 +533,24 @@ i915_gem_userptr_ioctl(struct drm_device *dev, return -ENODEV; } + if (args->flags & I915_USERPTR_PROBE) { + /* + * Check that the range pointed to represents real struct + * pages and not iomappings (at this moment in time!) + */ + ret = probe_range(current->mm, args->user_ptr, args->user_size); + if (ret) + return ret; + } + #ifdef CONFIG_MMU_NOTIFIER obj = i915_gem_object_alloc(); if (obj == NULL) return -ENOMEM; drm_gem_private_object_init(dev, &obj->base, args->user_size); - i915_gem_object_init(obj, &i915_gem_userptr_ops, &lock_class, - I915_BO_ALLOC_STRUCT_PAGE); + i915_gem_object_init(obj, &i915_gem_userptr_ops, &lock_class, 0); + obj->mem_flags = I915_BO_FLAG_STRUCT_PAGE; obj->read_domains = I915_GEM_DOMAIN_CPU; obj->write_domain = I915_GEM_DOMAIN_CPU; i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); @@ -572,7 +583,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev, int i915_gem_init_userptr(struct drm_i915_private *dev_priv) { #ifdef CONFIG_MMU_NOTIFIER - spin_lock_init(&dev_priv->mm.notifier_lock); + rwlock_init(&dev_priv->mm.notifier_lock); #endif return 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index 1e97520c62b2..f909aaa09d9c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -104,8 +104,8 @@ static void fence_set_priority(struct dma_fence *fence, engine = rq->engine; rcu_read_lock(); /* RCU serialisation for set-wedged protection */ - if (engine->schedule) - engine->schedule(rq, attr); + if (engine->sched_engine->schedule) + engine->sched_engine->schedule(rq, attr); rcu_read_unlock(); } @@ -290,3 +290,22 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) i915_gem_object_put(obj); return ret; } + +/** + * i915_gem_object_wait_migration - Sync an accelerated migration operation + * @obj: The migrating object. + * @flags: waiting flags. Currently supports only I915_WAIT_INTERRUPTIBLE. + * + * Wait for any pending async migration operation on the object, + * whether it's explicitly (i915_gem_object_migrate()) or implicitly + * (swapin, initial clearing) initiated. + * + * Return: 0 if successful, -ERESTARTSYS if a signal was hit during waiting. + */ +int i915_gem_object_wait_migration(struct drm_i915_gem_object *obj, + unsigned int flags) +{ + might_sleep(); + /* NOP for now. */ + return 0; +} diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c index 0c8ecfdf5405..f963b8e1e37b 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c @@ -114,8 +114,8 @@ huge_gem_object(struct drm_i915_private *i915, return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, dma_size); - i915_gem_object_init(obj, &huge_ops, &lock_class, - I915_BO_ALLOC_STRUCT_PAGE); + i915_gem_object_init(obj, &huge_ops, &lock_class, 0); + obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; obj->read_domains = I915_GEM_DOMAIN_CPU; obj->write_domain = I915_GEM_DOMAIN_CPU; diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index dadd485bc52f..a094f3ce1a90 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -167,9 +167,8 @@ huge_pages_object(struct drm_i915_private *i915, return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, &huge_page_ops, &lock_class, - I915_BO_ALLOC_STRUCT_PAGE); - + i915_gem_object_init(obj, &huge_page_ops, &lock_class, 0); + obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; i915_gem_object_set_volatile(obj); obj->write_domain = I915_GEM_DOMAIN_CPU; @@ -497,7 +496,8 @@ static int igt_mock_memory_region_huge_pages(void *arg) int i; for (i = 0; i < ARRAY_SIZE(flags); ++i) { - obj = i915_gem_object_create_region(mem, page_size, + obj = i915_gem_object_create_region(mem, + page_size, page_size, flags[i]); if (IS_ERR(obj)) { err = PTR_ERR(obj); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index 176e6b22f87f..ecbcbb86ae1e 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -5,6 +5,7 @@ #include "i915_selftest.h" +#include "gt/intel_context.h" #include "gt/intel_engine_user.h" #include "gt/intel_gt.h" #include "gt/intel_gpu_commands.h" @@ -16,118 +17,6 @@ #include "huge_gem_object.h" #include "mock_context.h" -static int __igt_client_fill(struct intel_engine_cs *engine) -{ - struct intel_context *ce = engine->kernel_context; - struct drm_i915_gem_object *obj; - I915_RND_STATE(prng); - IGT_TIMEOUT(end); - u32 *vaddr; - int err = 0; - - intel_engine_pm_get(engine); - do { - const u32 max_block_size = S16_MAX * PAGE_SIZE; - u32 sz = min_t(u64, ce->vm->total >> 4, prandom_u32_state(&prng)); - u32 phys_sz = sz % (max_block_size + 1); - u32 val = prandom_u32_state(&prng); - u32 i; - - sz = round_up(sz, PAGE_SIZE); - phys_sz = round_up(phys_sz, PAGE_SIZE); - - pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__, - phys_sz, sz, val); - - obj = huge_gem_object(engine->i915, phys_sz, sz); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto err_flush; - } - - vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); - if (IS_ERR(vaddr)) { - err = PTR_ERR(vaddr); - goto err_put; - } - - /* - * XXX: The goal is move this to get_pages, so try to dirty the - * CPU cache first to check that we do the required clflush - * before scheduling the blt for !llc platforms. This matches - * some version of reality where at get_pages the pages - * themselves may not yet be coherent with the GPU(swap-in). If - * we are missing the flush then we should see the stale cache - * values after we do the set_to_cpu_domain and pick it up as a - * test failure. - */ - memset32(vaddr, val ^ 0xdeadbeaf, - huge_gem_object_phys_size(obj) / sizeof(u32)); - - if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) - obj->cache_dirty = true; - - err = i915_gem_schedule_fill_pages_blt(obj, ce, obj->mm.pages, - &obj->mm.page_sizes, - val); - if (err) - goto err_unpin; - - i915_gem_object_lock(obj, NULL); - err = i915_gem_object_set_to_cpu_domain(obj, false); - i915_gem_object_unlock(obj); - if (err) - goto err_unpin; - - for (i = 0; i < huge_gem_object_phys_size(obj) / sizeof(u32); ++i) { - if (vaddr[i] != val) { - pr_err("vaddr[%u]=%x, expected=%x\n", i, - vaddr[i], val); - err = -EINVAL; - goto err_unpin; - } - } - - i915_gem_object_unpin_map(obj); - i915_gem_object_put(obj); - } while (!time_after(jiffies, end)); - - goto err_flush; - -err_unpin: - i915_gem_object_unpin_map(obj); -err_put: - i915_gem_object_put(obj); -err_flush: - if (err == -ENOMEM) - err = 0; - intel_engine_pm_put(engine); - - return err; -} - -static int igt_client_fill(void *arg) -{ - int inst = 0; - - do { - struct intel_engine_cs *engine; - int err; - - engine = intel_engine_lookup_user(arg, - I915_ENGINE_CLASS_COPY, - inst++); - if (!engine) - return 0; - - err = __igt_client_fill(engine); - if (err == -ENOMEM) - err = 0; - if (err) - return err; - } while (1); -} - #define WIDTH 512 #define HEIGHT 32 @@ -693,7 +582,6 @@ static int igt_client_tiled_blits(void *arg) int i915_gem_client_blt_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { - SUBTEST(igt_client_fill), SUBTEST(igt_client_tiled_blits), }; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index dbcfa28a9d91..8eb5050f8cb3 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -680,7 +680,7 @@ static int igt_ctx_exec(void *arg) struct i915_gem_context *ctx; struct intel_context *ce; - ctx = kernel_context(i915); + ctx = kernel_context(i915, NULL); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto out_file; @@ -813,16 +813,12 @@ static int igt_shared_ctx_exec(void *arg) struct i915_gem_context *ctx; struct intel_context *ce; - ctx = kernel_context(i915); + ctx = kernel_context(i915, ctx_vm(parent)); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto out_test; } - mutex_lock(&ctx->mutex); - __assign_ppgtt(ctx, ctx_vm(parent)); - mutex_unlock(&ctx->mutex); - ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); GEM_BUG_ON(IS_ERR(ce)); @@ -1875,125 +1871,6 @@ out_file: return err; } -static bool skip_unused_engines(struct intel_context *ce, void *data) -{ - return !ce->state; -} - -static void mock_barrier_task(void *data) -{ - unsigned int *counter = data; - - ++*counter; -} - -static int mock_context_barrier(void *arg) -{ -#undef pr_fmt -#define pr_fmt(x) "context_barrier_task():" # x - struct drm_i915_private *i915 = arg; - struct i915_gem_context *ctx; - struct i915_request *rq; - unsigned int counter; - int err; - - /* - * The context barrier provides us with a callback after it emits - * a request; useful for retiring old state after loading new. - */ - - ctx = mock_context(i915, "mock"); - if (!ctx) - return -ENOMEM; - - counter = 0; - err = context_barrier_task(ctx, 0, NULL, NULL, NULL, - mock_barrier_task, &counter); - if (err) { - pr_err("Failed at line %d, err=%d\n", __LINE__, err); - goto out; - } - if (counter == 0) { - pr_err("Did not retire immediately with 0 engines\n"); - err = -EINVAL; - goto out; - } - - counter = 0; - err = context_barrier_task(ctx, ALL_ENGINES, skip_unused_engines, - NULL, NULL, mock_barrier_task, &counter); - if (err) { - pr_err("Failed at line %d, err=%d\n", __LINE__, err); - goto out; - } - if (counter == 0) { - pr_err("Did not retire immediately for all unused engines\n"); - err = -EINVAL; - goto out; - } - - rq = igt_request_alloc(ctx, i915->gt.engine[RCS0]); - if (IS_ERR(rq)) { - pr_err("Request allocation failed!\n"); - goto out; - } - i915_request_add(rq); - - counter = 0; - context_barrier_inject_fault = BIT(RCS0); - err = context_barrier_task(ctx, ALL_ENGINES, NULL, NULL, NULL, - mock_barrier_task, &counter); - context_barrier_inject_fault = 0; - if (err == -ENXIO) - err = 0; - else - pr_err("Did not hit fault injection!\n"); - if (counter != 0) { - pr_err("Invoked callback on error!\n"); - err = -EIO; - } - if (err) - goto out; - - counter = 0; - err = context_barrier_task(ctx, ALL_ENGINES, skip_unused_engines, - NULL, NULL, mock_barrier_task, &counter); - if (err) { - pr_err("Failed at line %d, err=%d\n", __LINE__, err); - goto out; - } - mock_device_flush(i915); - if (counter == 0) { - pr_err("Did not retire on each active engines\n"); - err = -EINVAL; - goto out; - } - -out: - mock_context_close(ctx); - return err; -#undef pr_fmt -#define pr_fmt(x) x -} - -int i915_gem_context_mock_selftests(void) -{ - static const struct i915_subtest tests[] = { - SUBTEST(mock_context_barrier), - }; - struct drm_i915_private *i915; - int err; - - i915 = mock_gem_device(); - if (!i915) - return -ENOMEM; - - err = i915_subtests(tests, i915); - - mock_destroy_device(i915); - return err; -} - int i915_gem_context_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c index dd74bc09ec88..ffae7df5e4d7 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c @@ -35,7 +35,7 @@ static int igt_dmabuf_export(void *arg) static int igt_dmabuf_import_self(void *arg) { struct drm_i915_private *i915 = arg; - struct drm_i915_gem_object *obj; + struct drm_i915_gem_object *obj, *import_obj; struct drm_gem_object *import; struct dma_buf *dmabuf; int err; @@ -65,10 +65,19 @@ static int igt_dmabuf_import_self(void *arg) err = -EINVAL; goto out_import; } + import_obj = to_intel_bo(import); + + i915_gem_object_lock(import_obj, NULL); + err = __i915_gem_object_get_pages(import_obj); + i915_gem_object_unlock(import_obj); + if (err) { + pr_err("Same object dma-buf get_pages failed!\n"); + goto out_import; + } err = 0; out_import: - i915_gem_object_put(to_intel_bo(import)); + i915_gem_object_put(import_obj); out_dmabuf: dma_buf_put(dmabuf); out: @@ -76,6 +85,180 @@ out: return err; } +static int igt_dmabuf_import_same_driver_lmem(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_memory_region *lmem = i915->mm.regions[INTEL_REGION_LMEM]; + struct drm_i915_gem_object *obj; + struct drm_gem_object *import; + struct dma_buf *dmabuf; + int err; + + if (!lmem) + return 0; + + force_different_devices = true; + + obj = __i915_gem_object_create_user(i915, PAGE_SIZE, &lmem, 1); + if (IS_ERR(obj)) { + pr_err("__i915_gem_object_create_user failed with err=%ld\n", + PTR_ERR(dmabuf)); + err = PTR_ERR(obj); + goto out_ret; + } + + dmabuf = i915_gem_prime_export(&obj->base, 0); + if (IS_ERR(dmabuf)) { + pr_err("i915_gem_prime_export failed with err=%ld\n", + PTR_ERR(dmabuf)); + err = PTR_ERR(dmabuf); + goto out; + } + + /* + * We expect an import of an LMEM-only object to fail with + * -EOPNOTSUPP because it can't be migrated to SMEM. + */ + import = i915_gem_prime_import(&i915->drm, dmabuf); + if (!IS_ERR(import)) { + drm_gem_object_put(import); + pr_err("i915_gem_prime_import succeeded when it shouldn't have\n"); + err = -EINVAL; + } else if (PTR_ERR(import) != -EOPNOTSUPP) { + pr_err("i915_gem_prime_import failed with the wrong err=%ld\n", + PTR_ERR(import)); + err = PTR_ERR(import); + } + + dma_buf_put(dmabuf); +out: + i915_gem_object_put(obj); +out_ret: + force_different_devices = false; + return err; +} + +static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915, + struct intel_memory_region **regions, + unsigned int num_regions) +{ + struct drm_i915_gem_object *obj, *import_obj; + struct drm_gem_object *import; + struct dma_buf *dmabuf; + struct dma_buf_attachment *import_attach; + struct sg_table *st; + long timeout; + int err; + + force_different_devices = true; + + obj = __i915_gem_object_create_user(i915, PAGE_SIZE, + regions, num_regions); + if (IS_ERR(obj)) { + pr_err("__i915_gem_object_create_user failed with err=%ld\n", + PTR_ERR(dmabuf)); + err = PTR_ERR(obj); + goto out_ret; + } + + dmabuf = i915_gem_prime_export(&obj->base, 0); + if (IS_ERR(dmabuf)) { + pr_err("i915_gem_prime_export failed with err=%ld\n", + PTR_ERR(dmabuf)); + err = PTR_ERR(dmabuf); + goto out; + } + + import = i915_gem_prime_import(&i915->drm, dmabuf); + if (IS_ERR(import)) { + pr_err("i915_gem_prime_import failed with err=%ld\n", + PTR_ERR(import)); + err = PTR_ERR(import); + goto out_dmabuf; + } + + if (import == &obj->base) { + pr_err("i915_gem_prime_import reused gem object!\n"); + err = -EINVAL; + goto out_import; + } + + import_obj = to_intel_bo(import); + + i915_gem_object_lock(import_obj, NULL); + err = __i915_gem_object_get_pages(import_obj); + if (err) { + pr_err("Different objects dma-buf get_pages failed!\n"); + i915_gem_object_unlock(import_obj); + goto out_import; + } + + /* + * If the exported object is not in system memory, something + * weird is going on. TODO: When p2p is supported, this is no + * longer considered weird. + */ + if (obj->mm.region != i915->mm.regions[INTEL_REGION_SMEM]) { + pr_err("Exported dma-buf is not in system memory\n"); + err = -EINVAL; + } + + i915_gem_object_unlock(import_obj); + + /* Now try a fake an importer */ + import_attach = dma_buf_attach(dmabuf, obj->base.dev->dev); + if (IS_ERR(import_attach)) { + err = PTR_ERR(import_attach); + goto out_import; + } + + st = dma_buf_map_attachment(import_attach, DMA_BIDIRECTIONAL); + if (IS_ERR(st)) { + err = PTR_ERR(st); + goto out_detach; + } + + timeout = dma_resv_wait_timeout(dmabuf->resv, false, true, 5 * HZ); + if (!timeout) { + pr_err("dmabuf wait for exclusive fence timed out.\n"); + timeout = -ETIME; + } + err = timeout > 0 ? 0 : timeout; + dma_buf_unmap_attachment(import_attach, st, DMA_BIDIRECTIONAL); +out_detach: + dma_buf_detach(dmabuf, import_attach); +out_import: + i915_gem_object_put(import_obj); +out_dmabuf: + dma_buf_put(dmabuf); +out: + i915_gem_object_put(obj); +out_ret: + force_different_devices = false; + return err; +} + +static int igt_dmabuf_import_same_driver_smem(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_memory_region *smem = i915->mm.regions[INTEL_REGION_SMEM]; + + return igt_dmabuf_import_same_driver(i915, &smem, 1); +} + +static int igt_dmabuf_import_same_driver_lmem_smem(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_memory_region *regions[2]; + + if (!i915->mm.regions[INTEL_REGION_LMEM]) + return 0; + + regions[0] = i915->mm.regions[INTEL_REGION_LMEM]; + regions[1] = i915->mm.regions[INTEL_REGION_SMEM]; + return igt_dmabuf_import_same_driver(i915, regions, 2); +} + static int igt_dmabuf_import(void *arg) { struct drm_i915_private *i915 = arg; @@ -286,6 +469,9 @@ int i915_gem_dmabuf_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_dmabuf_export), + SUBTEST(igt_dmabuf_import_same_driver_lmem), + SUBTEST(igt_dmabuf_import_same_driver_smem), + SUBTEST(igt_dmabuf_import_same_driver_lmem_smem), }; return i915_subtests(tests, i915); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c new file mode 100644 index 000000000000..28a700f08b49 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c @@ -0,0 +1,243 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020-2021 Intel Corporation + */ + +#include "gt/intel_migrate.h" + +static int igt_fill_check_buffer(struct drm_i915_gem_object *obj, + bool fill) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + unsigned int i, count = obj->base.size / sizeof(u32); + enum i915_map_type map_type = + i915_coherent_map_type(i915, obj, false); + u32 *cur; + int err = 0; + + assert_object_held(obj); + cur = i915_gem_object_pin_map(obj, map_type); + if (IS_ERR(cur)) + return PTR_ERR(cur); + + if (fill) + for (i = 0; i < count; ++i) + *cur++ = i; + else + for (i = 0; i < count; ++i) + if (*cur++ != i) { + pr_err("Object content mismatch at location %d of %d\n", i, count); + err = -EINVAL; + break; + } + + i915_gem_object_unpin_map(obj); + + return err; +} + +static int igt_create_migrate(struct intel_gt *gt, enum intel_region_id src, + enum intel_region_id dst) +{ + struct drm_i915_private *i915 = gt->i915; + struct intel_memory_region *src_mr = i915->mm.regions[src]; + struct drm_i915_gem_object *obj; + struct i915_gem_ww_ctx ww; + int err = 0; + + GEM_BUG_ON(!src_mr); + + /* Switch object backing-store on create */ + obj = i915_gem_object_create_region(src_mr, PAGE_SIZE, 0, 0); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + for_i915_gem_ww(&ww, err, true) { + err = i915_gem_object_lock(obj, &ww); + if (err) + continue; + + err = igt_fill_check_buffer(obj, true); + if (err) + continue; + + err = i915_gem_object_migrate(obj, &ww, dst); + if (err) + continue; + + err = i915_gem_object_pin_pages(obj); + if (err) + continue; + + if (i915_gem_object_can_migrate(obj, src)) + err = -EINVAL; + + i915_gem_object_unpin_pages(obj); + err = i915_gem_object_wait_migration(obj, true); + if (err) + continue; + + err = igt_fill_check_buffer(obj, false); + } + i915_gem_object_put(obj); + + return err; +} + +static int igt_smem_create_migrate(void *arg) +{ + return igt_create_migrate(arg, INTEL_REGION_LMEM, INTEL_REGION_SMEM); +} + +static int igt_lmem_create_migrate(void *arg) +{ + return igt_create_migrate(arg, INTEL_REGION_SMEM, INTEL_REGION_LMEM); +} + +static int igt_same_create_migrate(void *arg) +{ + return igt_create_migrate(arg, INTEL_REGION_LMEM, INTEL_REGION_LMEM); +} + +static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww, + struct drm_i915_gem_object *obj) +{ + int err; + + err = i915_gem_object_lock(obj, ww); + if (err) + return err; + + if (i915_gem_object_is_lmem(obj)) { + err = i915_gem_object_migrate(obj, ww, INTEL_REGION_SMEM); + if (err) { + pr_err("Object failed migration to smem\n"); + if (err) + return err; + } + + if (i915_gem_object_is_lmem(obj)) { + pr_err("object still backed by lmem\n"); + err = -EINVAL; + } + + if (!i915_gem_object_has_struct_page(obj)) { + pr_err("object not backed by struct page\n"); + err = -EINVAL; + } + + } else { + err = i915_gem_object_migrate(obj, ww, INTEL_REGION_LMEM); + if (err) { + pr_err("Object failed migration to lmem\n"); + if (err) + return err; + } + + if (i915_gem_object_has_struct_page(obj)) { + pr_err("object still backed by struct page\n"); + err = -EINVAL; + } + + if (!i915_gem_object_is_lmem(obj)) { + pr_err("object not backed by lmem\n"); + err = -EINVAL; + } + } + + return err; +} + +static int igt_lmem_pages_migrate(void *arg) +{ + struct intel_gt *gt = arg; + struct drm_i915_private *i915 = gt->i915; + struct drm_i915_gem_object *obj; + struct i915_gem_ww_ctx ww; + struct i915_request *rq; + int err; + int i; + + /* From LMEM to shmem and back again */ + + obj = i915_gem_object_create_lmem(i915, SZ_2M, 0); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + /* Initial GPU fill, sync, CPU initialization. */ + for_i915_gem_ww(&ww, err, true) { + err = i915_gem_object_lock(obj, &ww); + if (err) + continue; + + err = ____i915_gem_object_get_pages(obj); + if (err) + continue; + + err = intel_migrate_clear(>->migrate, &ww, NULL, + obj->mm.pages->sgl, obj->cache_level, + i915_gem_object_is_lmem(obj), + 0xdeadbeaf, &rq); + if (rq) { + dma_resv_add_excl_fence(obj->base.resv, &rq->fence); + i915_request_put(rq); + } + if (err) + continue; + + err = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE, + 5 * HZ); + if (err) + continue; + + err = igt_fill_check_buffer(obj, true); + if (err) + continue; + } + if (err) + goto out_put; + + /* + * Migrate to and from smem without explicitly syncing. + * Finalize with data in smem for fast readout. + */ + for (i = 1; i <= 5; ++i) { + for_i915_gem_ww(&ww, err, true) + err = lmem_pages_migrate_one(&ww, obj); + if (err) + goto out_put; + } + + err = i915_gem_object_lock_interruptible(obj, NULL); + if (err) + goto out_put; + + /* Finally sync migration and check content. */ + err = i915_gem_object_wait_migration(obj, true); + if (err) + goto out_unlock; + + err = igt_fill_check_buffer(obj, false); + +out_unlock: + i915_gem_object_unlock(obj); +out_put: + i915_gem_object_put(obj); + + return err; +} + +int i915_gem_migrate_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_smem_create_migrate), + SUBTEST(igt_lmem_create_migrate), + SUBTEST(igt_same_create_migrate), + SUBTEST(igt_lmem_pages_migrate), + }; + + if (!HAS_LMEM(i915)) + return 0; + + return intel_gt_live_subtests(tests, &i915->gt); +} diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 5575172c66f5..b20f5621f62b 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -573,21 +573,30 @@ err: return 0; } +static enum i915_mmap_type default_mapping(struct drm_i915_private *i915) +{ + if (HAS_LMEM(i915)) + return I915_MMAP_TYPE_FIXED; + + return I915_MMAP_TYPE_GTT; +} + static bool assert_mmap_offset(struct drm_i915_private *i915, unsigned long size, int expected) { struct drm_i915_gem_object *obj; - struct i915_mmap_offset *mmo; + u64 offset; + int ret; obj = i915_gem_object_create_internal(i915, size); if (IS_ERR(obj)) - return false; + return expected && expected == PTR_ERR(obj); - mmo = mmap_offset_attach(obj, I915_MMAP_OFFSET_GTT, NULL); + ret = __assign_mmap_offset(obj, default_mapping(i915), &offset, NULL); i915_gem_object_put(obj); - return PTR_ERR_OR_ZERO(mmo) == expected; + return ret == expected; } static void disable_retire_worker(struct drm_i915_private *i915) @@ -622,8 +631,8 @@ static int igt_mmap_offset_exhaustion(void *arg) struct drm_mm *mm = &i915->drm.vma_offset_manager->vm_addr_space_mm; struct drm_i915_gem_object *obj; struct drm_mm_node *hole, *next; - struct i915_mmap_offset *mmo; int loop, err = 0; + u64 offset; /* Disable background reaper */ disable_retire_worker(i915); @@ -684,13 +693,13 @@ static int igt_mmap_offset_exhaustion(void *arg) obj = i915_gem_object_create_internal(i915, PAGE_SIZE); if (IS_ERR(obj)) { err = PTR_ERR(obj); + pr_err("Unable to create object for reclaimed hole\n"); goto out; } - mmo = mmap_offset_attach(obj, I915_MMAP_OFFSET_GTT, NULL); - if (IS_ERR(mmo)) { + err = __assign_mmap_offset(obj, default_mapping(i915), &offset, NULL); + if (err) { pr_err("Unable to insert object into reclaimed hole\n"); - err = PTR_ERR(mmo); goto err_obj; } @@ -830,34 +839,25 @@ static int wc_check(struct drm_i915_gem_object *obj) static bool can_mmap(struct drm_i915_gem_object *obj, enum i915_mmap_type type) { - if (type == I915_MMAP_TYPE_GTT && - !i915_ggtt_has_aperture(&to_i915(obj->base.dev)->ggtt)) - return false; + struct drm_i915_private *i915 = to_i915(obj->base.dev); + bool no_map; - if (type != I915_MMAP_TYPE_GTT && - !i915_gem_object_has_struct_page(obj) && - !i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_IOMEM)) + if (HAS_LMEM(i915)) + return type == I915_MMAP_TYPE_FIXED; + else if (type == I915_MMAP_TYPE_FIXED) return false; - return true; -} - -static void object_set_placements(struct drm_i915_gem_object *obj, - struct intel_memory_region **placements, - unsigned int n_placements) -{ - GEM_BUG_ON(!n_placements); + if (type == I915_MMAP_TYPE_GTT && + !i915_ggtt_has_aperture(&to_i915(obj->base.dev)->ggtt)) + return false; - if (n_placements == 1) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct intel_memory_region *mr = placements[0]; + i915_gem_object_lock(obj, NULL); + no_map = (type != I915_MMAP_TYPE_GTT && + !i915_gem_object_has_struct_page(obj) && + !i915_gem_object_has_iomem(obj)); + i915_gem_object_unlock(obj); - obj->mm.placements = &i915->mm.regions[mr->id]; - obj->mm.n_placements = 1; - } else { - obj->mm.placements = placements; - obj->mm.n_placements = n_placements; - } + return !no_map; } #define expand32(x) (((x) << 0) | ((x) << 8) | ((x) << 16) | ((x) << 24)) @@ -865,10 +865,10 @@ static int __igt_mmap(struct drm_i915_private *i915, struct drm_i915_gem_object *obj, enum i915_mmap_type type) { - struct i915_mmap_offset *mmo; struct vm_area_struct *area; unsigned long addr; int err, i; + u64 offset; if (!can_mmap(obj, type)) return 0; @@ -879,11 +879,11 @@ static int __igt_mmap(struct drm_i915_private *i915, if (err) return err; - mmo = mmap_offset_attach(obj, type, NULL); - if (IS_ERR(mmo)) - return PTR_ERR(mmo); + err = __assign_mmap_offset(obj, type, &offset, NULL); + if (err) + return err; - addr = igt_mmap_node(i915, &mmo->vma_node, 0, PROT_WRITE, MAP_SHARED); + addr = igt_mmap_offset(i915, offset, obj->base.size, PROT_WRITE, MAP_SHARED); if (IS_ERR_VALUE(addr)) return addr; @@ -897,13 +897,6 @@ static int __igt_mmap(struct drm_i915_private *i915, goto out_unmap; } - if (area->vm_private_data != mmo) { - pr_err("%s: vm_area_struct did not point back to our mmap_offset object!\n", - obj->mm.region->name); - err = -EINVAL; - goto out_unmap; - } - for (i = 0; i < obj->base.size / sizeof(u32); i++) { u32 __user *ux = u64_to_user_ptr((u64)(addr + i * sizeof(*ux))); u32 x; @@ -961,18 +954,18 @@ static int igt_mmap(void *arg) struct drm_i915_gem_object *obj; int err; - obj = i915_gem_object_create_region(mr, sizes[i], 0); + obj = __i915_gem_object_create_user(i915, sizes[i], &mr, 1); if (obj == ERR_PTR(-ENODEV)) continue; if (IS_ERR(obj)) return PTR_ERR(obj); - object_set_placements(obj, &mr, 1); - err = __igt_mmap(i915, obj, I915_MMAP_TYPE_GTT); if (err == 0) err = __igt_mmap(i915, obj, I915_MMAP_TYPE_WC); + if (err == 0) + err = __igt_mmap(i915, obj, I915_MMAP_TYPE_FIXED); i915_gem_object_put(obj); if (err) @@ -990,26 +983,33 @@ static const char *repr_mmap_type(enum i915_mmap_type type) case I915_MMAP_TYPE_WB: return "wb"; case I915_MMAP_TYPE_WC: return "wc"; case I915_MMAP_TYPE_UC: return "uc"; + case I915_MMAP_TYPE_FIXED: return "fixed"; default: return "unknown"; } } -static bool can_access(const struct drm_i915_gem_object *obj) +static bool can_access(struct drm_i915_gem_object *obj) { - return i915_gem_object_has_struct_page(obj) || - i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_IOMEM); + bool access; + + i915_gem_object_lock(obj, NULL); + access = i915_gem_object_has_struct_page(obj) || + i915_gem_object_has_iomem(obj); + i915_gem_object_unlock(obj); + + return access; } static int __igt_mmap_access(struct drm_i915_private *i915, struct drm_i915_gem_object *obj, enum i915_mmap_type type) { - struct i915_mmap_offset *mmo; unsigned long __user *ptr; unsigned long A, B; unsigned long x, y; unsigned long addr; int err; + u64 offset; memset(&A, 0xAA, sizeof(A)); memset(&B, 0xBB, sizeof(B)); @@ -1017,11 +1017,11 @@ static int __igt_mmap_access(struct drm_i915_private *i915, if (!can_mmap(obj, type) || !can_access(obj)) return 0; - mmo = mmap_offset_attach(obj, type, NULL); - if (IS_ERR(mmo)) - return PTR_ERR(mmo); + err = __assign_mmap_offset(obj, type, &offset, NULL); + if (err) + return err; - addr = igt_mmap_node(i915, &mmo->vma_node, 0, PROT_WRITE, MAP_SHARED); + addr = igt_mmap_offset(i915, offset, obj->base.size, PROT_WRITE, MAP_SHARED); if (IS_ERR_VALUE(addr)) return addr; ptr = (unsigned long __user *)addr; @@ -1081,15 +1081,13 @@ static int igt_mmap_access(void *arg) struct drm_i915_gem_object *obj; int err; - obj = i915_gem_object_create_region(mr, PAGE_SIZE, 0); + obj = __i915_gem_object_create_user(i915, PAGE_SIZE, &mr, 1); if (obj == ERR_PTR(-ENODEV)) continue; if (IS_ERR(obj)) return PTR_ERR(obj); - object_set_placements(obj, &mr, 1); - err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_GTT); if (err == 0) err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_WB); @@ -1097,6 +1095,8 @@ static int igt_mmap_access(void *arg) err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_WC); if (err == 0) err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_UC); + if (err == 0) + err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_FIXED); i915_gem_object_put(obj); if (err) @@ -1111,11 +1111,11 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915, enum i915_mmap_type type) { struct intel_engine_cs *engine; - struct i915_mmap_offset *mmo; unsigned long addr; u32 __user *ux; u32 bbe; int err; + u64 offset; /* * Verify that the mmap access into the backing store aligns with @@ -1132,11 +1132,11 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915, if (err) return err; - mmo = mmap_offset_attach(obj, type, NULL); - if (IS_ERR(mmo)) - return PTR_ERR(mmo); + err = __assign_mmap_offset(obj, type, &offset, NULL); + if (err) + return err; - addr = igt_mmap_node(i915, &mmo->vma_node, 0, PROT_WRITE, MAP_SHARED); + addr = igt_mmap_offset(i915, offset, obj->base.size, PROT_WRITE, MAP_SHARED); if (IS_ERR_VALUE(addr)) return addr; @@ -1226,18 +1226,18 @@ static int igt_mmap_gpu(void *arg) struct drm_i915_gem_object *obj; int err; - obj = i915_gem_object_create_region(mr, PAGE_SIZE, 0); + obj = __i915_gem_object_create_user(i915, PAGE_SIZE, &mr, 1); if (obj == ERR_PTR(-ENODEV)) continue; if (IS_ERR(obj)) return PTR_ERR(obj); - object_set_placements(obj, &mr, 1); - err = __igt_mmap_gpu(i915, obj, I915_MMAP_TYPE_GTT); if (err == 0) err = __igt_mmap_gpu(i915, obj, I915_MMAP_TYPE_WC); + if (err == 0) + err = __igt_mmap_gpu(i915, obj, I915_MMAP_TYPE_FIXED); i915_gem_object_put(obj); if (err) @@ -1303,18 +1303,18 @@ static int __igt_mmap_revoke(struct drm_i915_private *i915, struct drm_i915_gem_object *obj, enum i915_mmap_type type) { - struct i915_mmap_offset *mmo; unsigned long addr; int err; + u64 offset; if (!can_mmap(obj, type)) return 0; - mmo = mmap_offset_attach(obj, type, NULL); - if (IS_ERR(mmo)) - return PTR_ERR(mmo); + err = __assign_mmap_offset(obj, type, &offset, NULL); + if (err) + return err; - addr = igt_mmap_node(i915, &mmo->vma_node, 0, PROT_WRITE, MAP_SHARED); + addr = igt_mmap_offset(i915, offset, obj->base.size, PROT_WRITE, MAP_SHARED); if (IS_ERR_VALUE(addr)) return addr; @@ -1350,10 +1350,20 @@ static int __igt_mmap_revoke(struct drm_i915_private *i915, } } - err = check_absent(addr, obj->base.size); - if (err) { - pr_err("%s: was not absent\n", obj->mm.region->name); - goto out_unmap; + if (!obj->ops->mmap_ops) { + err = check_absent(addr, obj->base.size); + if (err) { + pr_err("%s: was not absent\n", obj->mm.region->name); + goto out_unmap; + } + } else { + /* ttm allows access to evicted regions by design */ + + err = check_present(addr, obj->base.size); + if (err) { + pr_err("%s: was not present\n", obj->mm.region->name); + goto out_unmap; + } } out_unmap: @@ -1371,18 +1381,18 @@ static int igt_mmap_revoke(void *arg) struct drm_i915_gem_object *obj; int err; - obj = i915_gem_object_create_region(mr, PAGE_SIZE, 0); + obj = __i915_gem_object_create_user(i915, PAGE_SIZE, &mr, 1); if (obj == ERR_PTR(-ENODEV)) continue; if (IS_ERR(obj)) return PTR_ERR(obj); - object_set_placements(obj, &mr, 1); - err = __igt_mmap_revoke(i915, obj, I915_MMAP_TYPE_GTT); if (err == 0) err = __igt_mmap_revoke(i915, obj, I915_MMAP_TYPE_WC); + if (err == 0) + err = __igt_mmap_revoke(i915, obj, I915_MMAP_TYPE_FIXED); i915_gem_object_put(obj); if (err) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c deleted file mode 100644 index 8c335d1a8406..000000000000 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c +++ /dev/null @@ -1,597 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2019 Intel Corporation - */ - -#include <linux/sort.h> - -#include "gt/intel_gt.h" -#include "gt/intel_engine_user.h" - -#include "i915_selftest.h" - -#include "gem/i915_gem_context.h" -#include "selftests/igt_flush_test.h" -#include "selftests/i915_random.h" -#include "selftests/mock_drm.h" -#include "huge_gem_object.h" -#include "mock_context.h" - -static int wrap_ktime_compare(const void *A, const void *B) -{ - const ktime_t *a = A, *b = B; - - return ktime_compare(*a, *b); -} - -static int __perf_fill_blt(struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *i915 = to_i915(obj->base.dev); - int inst = 0; - - do { - struct intel_engine_cs *engine; - ktime_t t[5]; - int pass; - int err; - - engine = intel_engine_lookup_user(i915, - I915_ENGINE_CLASS_COPY, - inst++); - if (!engine) - return 0; - - intel_engine_pm_get(engine); - for (pass = 0; pass < ARRAY_SIZE(t); pass++) { - struct intel_context *ce = engine->kernel_context; - ktime_t t0, t1; - - t0 = ktime_get(); - - err = i915_gem_object_fill_blt(obj, ce, 0); - if (err) - break; - - err = i915_gem_object_wait(obj, - I915_WAIT_ALL, - MAX_SCHEDULE_TIMEOUT); - if (err) - break; - - t1 = ktime_get(); - t[pass] = ktime_sub(t1, t0); - } - intel_engine_pm_put(engine); - if (err) - return err; - - sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); - pr_info("%s: blt %zd KiB fill: %lld MiB/s\n", - engine->name, - obj->base.size >> 10, - div64_u64(mul_u32_u32(4 * obj->base.size, - 1000 * 1000 * 1000), - t[1] + 2 * t[2] + t[3]) >> 20); - } while (1); -} - -static int perf_fill_blt(void *arg) -{ - struct drm_i915_private *i915 = arg; - static const unsigned long sizes[] = { - SZ_4K, - SZ_64K, - SZ_2M, - SZ_64M - }; - int i; - - for (i = 0; i < ARRAY_SIZE(sizes); i++) { - struct drm_i915_gem_object *obj; - int err; - - obj = i915_gem_object_create_internal(i915, sizes[i]); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - err = __perf_fill_blt(obj); - i915_gem_object_put(obj); - if (err) - return err; - } - - return 0; -} - -static int __perf_copy_blt(struct drm_i915_gem_object *src, - struct drm_i915_gem_object *dst) -{ - struct drm_i915_private *i915 = to_i915(src->base.dev); - int inst = 0; - - do { - struct intel_engine_cs *engine; - ktime_t t[5]; - int pass; - int err = 0; - - engine = intel_engine_lookup_user(i915, - I915_ENGINE_CLASS_COPY, - inst++); - if (!engine) - return 0; - - intel_engine_pm_get(engine); - for (pass = 0; pass < ARRAY_SIZE(t); pass++) { - struct intel_context *ce = engine->kernel_context; - ktime_t t0, t1; - - t0 = ktime_get(); - - err = i915_gem_object_copy_blt(src, dst, ce); - if (err) - break; - - err = i915_gem_object_wait(dst, - I915_WAIT_ALL, - MAX_SCHEDULE_TIMEOUT); - if (err) - break; - - t1 = ktime_get(); - t[pass] = ktime_sub(t1, t0); - } - intel_engine_pm_put(engine); - if (err) - return err; - - sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); - pr_info("%s: blt %zd KiB copy: %lld MiB/s\n", - engine->name, - src->base.size >> 10, - div64_u64(mul_u32_u32(4 * src->base.size, - 1000 * 1000 * 1000), - t[1] + 2 * t[2] + t[3]) >> 20); - } while (1); -} - -static int perf_copy_blt(void *arg) -{ - struct drm_i915_private *i915 = arg; - static const unsigned long sizes[] = { - SZ_4K, - SZ_64K, - SZ_2M, - SZ_64M - }; - int i; - - for (i = 0; i < ARRAY_SIZE(sizes); i++) { - struct drm_i915_gem_object *src, *dst; - int err; - - src = i915_gem_object_create_internal(i915, sizes[i]); - if (IS_ERR(src)) - return PTR_ERR(src); - - dst = i915_gem_object_create_internal(i915, sizes[i]); - if (IS_ERR(dst)) { - err = PTR_ERR(dst); - goto err_src; - } - - err = __perf_copy_blt(src, dst); - - i915_gem_object_put(dst); -err_src: - i915_gem_object_put(src); - if (err) - return err; - } - - return 0; -} - -struct igt_thread_arg { - struct intel_engine_cs *engine; - struct i915_gem_context *ctx; - struct file *file; - struct rnd_state prng; - unsigned int n_cpus; -}; - -static int igt_fill_blt_thread(void *arg) -{ - struct igt_thread_arg *thread = arg; - struct intel_engine_cs *engine = thread->engine; - struct rnd_state *prng = &thread->prng; - struct drm_i915_gem_object *obj; - struct i915_gem_context *ctx; - struct intel_context *ce; - unsigned int prio; - IGT_TIMEOUT(end); - u64 total, max; - int err; - - ctx = thread->ctx; - if (!ctx) { - ctx = live_context_for_engine(engine, thread->file); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng); - ctx->sched.priority = prio; - } - - ce = i915_gem_context_get_engine(ctx, 0); - GEM_BUG_ON(IS_ERR(ce)); - - /* - * If we have a tiny shared address space, like for the GGTT - * then we can't be too greedy. - */ - max = ce->vm->total; - if (i915_is_ggtt(ce->vm) || thread->ctx) - max = div_u64(max, thread->n_cpus); - max >>= 4; - - total = PAGE_SIZE; - do { - /* Aim to keep the runtime under reasonable bounds! */ - const u32 max_phys_size = SZ_64K; - u32 val = prandom_u32_state(prng); - u32 phys_sz; - u32 sz; - u32 *vaddr; - u32 i; - - total = min(total, max); - sz = i915_prandom_u32_max_state(total, prng) + 1; - phys_sz = sz % max_phys_size + 1; - - sz = round_up(sz, PAGE_SIZE); - phys_sz = round_up(phys_sz, PAGE_SIZE); - phys_sz = min(phys_sz, sz); - - pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__, - phys_sz, sz, val); - - obj = huge_gem_object(engine->i915, phys_sz, sz); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto err_flush; - } - - vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); - if (IS_ERR(vaddr)) { - err = PTR_ERR(vaddr); - goto err_put; - } - - /* - * Make sure the potentially async clflush does its job, if - * required. - */ - memset32(vaddr, val ^ 0xdeadbeaf, - huge_gem_object_phys_size(obj) / sizeof(u32)); - - if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) - obj->cache_dirty = true; - - err = i915_gem_object_fill_blt(obj, ce, val); - if (err) - goto err_unpin; - - err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); - if (err) - goto err_unpin; - - for (i = 0; i < huge_gem_object_phys_size(obj) / sizeof(u32); i += 17) { - if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) - drm_clflush_virt_range(&vaddr[i], sizeof(vaddr[i])); - - if (vaddr[i] != val) { - pr_err("vaddr[%u]=%x, expected=%x\n", i, - vaddr[i], val); - err = -EINVAL; - goto err_unpin; - } - } - - i915_gem_object_unpin_map(obj); - i915_gem_object_put(obj); - - total <<= 1; - } while (!time_after(jiffies, end)); - - goto err_flush; - -err_unpin: - i915_gem_object_unpin_map(obj); -err_put: - i915_gem_object_put(obj); -err_flush: - if (err == -ENOMEM) - err = 0; - - intel_context_put(ce); - return err; -} - -static int igt_copy_blt_thread(void *arg) -{ - struct igt_thread_arg *thread = arg; - struct intel_engine_cs *engine = thread->engine; - struct rnd_state *prng = &thread->prng; - struct drm_i915_gem_object *src, *dst; - struct i915_gem_context *ctx; - struct intel_context *ce; - unsigned int prio; - IGT_TIMEOUT(end); - u64 total, max; - int err; - - ctx = thread->ctx; - if (!ctx) { - ctx = live_context_for_engine(engine, thread->file); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng); - ctx->sched.priority = prio; - } - - ce = i915_gem_context_get_engine(ctx, 0); - GEM_BUG_ON(IS_ERR(ce)); - - /* - * If we have a tiny shared address space, like for the GGTT - * then we can't be too greedy. - */ - max = ce->vm->total; - if (i915_is_ggtt(ce->vm) || thread->ctx) - max = div_u64(max, thread->n_cpus); - max >>= 4; - - total = PAGE_SIZE; - do { - /* Aim to keep the runtime under reasonable bounds! */ - const u32 max_phys_size = SZ_64K; - u32 val = prandom_u32_state(prng); - u32 phys_sz; - u32 sz; - u32 *vaddr; - u32 i; - - total = min(total, max); - sz = i915_prandom_u32_max_state(total, prng) + 1; - phys_sz = sz % max_phys_size + 1; - - sz = round_up(sz, PAGE_SIZE); - phys_sz = round_up(phys_sz, PAGE_SIZE); - phys_sz = min(phys_sz, sz); - - pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__, - phys_sz, sz, val); - - src = huge_gem_object(engine->i915, phys_sz, sz); - if (IS_ERR(src)) { - err = PTR_ERR(src); - goto err_flush; - } - - vaddr = i915_gem_object_pin_map_unlocked(src, I915_MAP_WB); - if (IS_ERR(vaddr)) { - err = PTR_ERR(vaddr); - goto err_put_src; - } - - memset32(vaddr, val, - huge_gem_object_phys_size(src) / sizeof(u32)); - - i915_gem_object_unpin_map(src); - - if (!(src->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) - src->cache_dirty = true; - - dst = huge_gem_object(engine->i915, phys_sz, sz); - if (IS_ERR(dst)) { - err = PTR_ERR(dst); - goto err_put_src; - } - - vaddr = i915_gem_object_pin_map_unlocked(dst, I915_MAP_WB); - if (IS_ERR(vaddr)) { - err = PTR_ERR(vaddr); - goto err_put_dst; - } - - memset32(vaddr, val ^ 0xdeadbeaf, - huge_gem_object_phys_size(dst) / sizeof(u32)); - - if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) - dst->cache_dirty = true; - - err = i915_gem_object_copy_blt(src, dst, ce); - if (err) - goto err_unpin; - - err = i915_gem_object_wait(dst, 0, MAX_SCHEDULE_TIMEOUT); - if (err) - goto err_unpin; - - for (i = 0; i < huge_gem_object_phys_size(dst) / sizeof(u32); i += 17) { - if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) - drm_clflush_virt_range(&vaddr[i], sizeof(vaddr[i])); - - if (vaddr[i] != val) { - pr_err("vaddr[%u]=%x, expected=%x\n", i, - vaddr[i], val); - err = -EINVAL; - goto err_unpin; - } - } - - i915_gem_object_unpin_map(dst); - - i915_gem_object_put(src); - i915_gem_object_put(dst); - - total <<= 1; - } while (!time_after(jiffies, end)); - - goto err_flush; - -err_unpin: - i915_gem_object_unpin_map(dst); -err_put_dst: - i915_gem_object_put(dst); -err_put_src: - i915_gem_object_put(src); -err_flush: - if (err == -ENOMEM) - err = 0; - - intel_context_put(ce); - return err; -} - -static int igt_threaded_blt(struct intel_engine_cs *engine, - int (*blt_fn)(void *arg), - unsigned int flags) -#define SINGLE_CTX BIT(0) -{ - struct igt_thread_arg *thread; - struct task_struct **tsk; - unsigned int n_cpus, i; - I915_RND_STATE(prng); - int err = 0; - - n_cpus = num_online_cpus() + 1; - - tsk = kcalloc(n_cpus, sizeof(struct task_struct *), GFP_KERNEL); - if (!tsk) - return 0; - - thread = kcalloc(n_cpus, sizeof(struct igt_thread_arg), GFP_KERNEL); - if (!thread) - goto out_tsk; - - thread[0].file = mock_file(engine->i915); - if (IS_ERR(thread[0].file)) { - err = PTR_ERR(thread[0].file); - goto out_thread; - } - - if (flags & SINGLE_CTX) { - thread[0].ctx = live_context_for_engine(engine, thread[0].file); - if (IS_ERR(thread[0].ctx)) { - err = PTR_ERR(thread[0].ctx); - goto out_file; - } - } - - for (i = 0; i < n_cpus; ++i) { - thread[i].engine = engine; - thread[i].file = thread[0].file; - thread[i].ctx = thread[0].ctx; - thread[i].n_cpus = n_cpus; - thread[i].prng = - I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng)); - - tsk[i] = kthread_run(blt_fn, &thread[i], "igt/blt-%d", i); - if (IS_ERR(tsk[i])) { - err = PTR_ERR(tsk[i]); - break; - } - - get_task_struct(tsk[i]); - } - - yield(); /* start all threads before we kthread_stop() */ - - for (i = 0; i < n_cpus; ++i) { - int status; - - if (IS_ERR_OR_NULL(tsk[i])) - continue; - - status = kthread_stop(tsk[i]); - if (status && !err) - err = status; - - put_task_struct(tsk[i]); - } - -out_file: - fput(thread[0].file); -out_thread: - kfree(thread); -out_tsk: - kfree(tsk); - return err; -} - -static int test_copy_engines(struct drm_i915_private *i915, - int (*fn)(void *arg), - unsigned int flags) -{ - struct intel_engine_cs *engine; - int ret; - - for_each_uabi_class_engine(engine, I915_ENGINE_CLASS_COPY, i915) { - ret = igt_threaded_blt(engine, fn, flags); - if (ret) - return ret; - } - - return 0; -} - -static int igt_fill_blt(void *arg) -{ - return test_copy_engines(arg, igt_fill_blt_thread, 0); -} - -static int igt_fill_blt_ctx0(void *arg) -{ - return test_copy_engines(arg, igt_fill_blt_thread, SINGLE_CTX); -} - -static int igt_copy_blt(void *arg) -{ - return test_copy_engines(arg, igt_copy_blt_thread, 0); -} - -static int igt_copy_blt_ctx0(void *arg) -{ - return test_copy_engines(arg, igt_copy_blt_thread, SINGLE_CTX); -} - -int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915) -{ - static const struct i915_subtest tests[] = { - SUBTEST(igt_fill_blt), - SUBTEST(igt_fill_blt_ctx0), - SUBTEST(igt_copy_blt), - SUBTEST(igt_copy_blt_ctx0), - }; - - if (intel_gt_is_wedged(&i915->gt)) - return 0; - - return i915_live_subtests(tests, i915); -} - -int i915_gem_object_blt_perf_selftests(struct drm_i915_private *i915) -{ - static const struct i915_subtest tests[] = { - SUBTEST(perf_fill_blt), - SUBTEST(perf_copy_blt), - }; - - if (intel_gt_is_wedged(&i915->gt)) - return 0; - - return i915_live_subtests(tests, i915); -} diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c index 3a6ce87f8b52..d43d8dae0f69 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c @@ -25,13 +25,14 @@ static int mock_phys_object(void *arg) goto out; } + i915_gem_object_lock(obj, NULL); if (!i915_gem_object_has_struct_page(obj)) { + i915_gem_object_unlock(obj); err = -EINVAL; pr_err("shmem has no struct page\n"); goto out_obj; } - i915_gem_object_lock(obj, NULL); err = i915_gem_object_attach_phys(obj, PAGE_SIZE); i915_gem_object_unlock(obj); if (err) { diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c index 51b5a3421b40..fee070df1c97 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c @@ -14,6 +14,7 @@ mock_context(struct drm_i915_private *i915, { struct i915_gem_context *ctx; struct i915_gem_engines *e; + struct intel_sseu null_sseu = {}; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) @@ -30,15 +31,6 @@ mock_context(struct drm_i915_private *i915, i915_gem_context_set_persistence(ctx); - mutex_init(&ctx->engines_mutex); - e = default_engines(ctx); - if (IS_ERR(e)) - goto err_free; - RCU_INIT_POINTER(ctx->engines, e); - - INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); - mutex_init(&ctx->lut_mutex); - if (name) { struct i915_ppgtt *ppgtt; @@ -46,25 +38,29 @@ mock_context(struct drm_i915_private *i915, ppgtt = mock_ppgtt(i915, name); if (!ppgtt) - goto err_put; - - mutex_lock(&ctx->mutex); - __set_ppgtt(ctx, &ppgtt->vm); - mutex_unlock(&ctx->mutex); + goto err_free; + ctx->vm = i915_vm_open(&ppgtt->vm); i915_vm_put(&ppgtt->vm); } + mutex_init(&ctx->engines_mutex); + e = default_engines(ctx, null_sseu); + if (IS_ERR(e)) + goto err_vm; + RCU_INIT_POINTER(ctx->engines, e); + + INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); + mutex_init(&ctx->lut_mutex); + return ctx; +err_vm: + if (ctx->vm) + i915_vm_close(ctx->vm); err_free: kfree(ctx); return NULL; - -err_put: - i915_gem_context_set_closed(ctx); - i915_gem_context_put(ctx); - return NULL; } void mock_context_close(struct i915_gem_context *ctx) @@ -80,20 +76,29 @@ void mock_init_contexts(struct drm_i915_private *i915) struct i915_gem_context * live_context(struct drm_i915_private *i915, struct file *file) { + struct drm_i915_file_private *fpriv = to_drm_file(file)->driver_priv; + struct i915_gem_proto_context *pc; struct i915_gem_context *ctx; int err; u32 id; - ctx = i915_gem_create_context(i915, 0); + pc = proto_context_create(i915, 0); + if (IS_ERR(pc)) + return ERR_CAST(pc); + + ctx = i915_gem_create_context(i915, pc); + proto_context_close(pc); if (IS_ERR(ctx)) return ctx; i915_gem_context_set_no_error_capture(ctx); - err = gem_context_register(ctx, to_drm_file(file)->driver_priv, &id); + err = xa_alloc(&fpriv->context_xa, &id, NULL, xa_limit_32b, GFP_KERNEL); if (err < 0) goto err_ctx; + gem_context_register(ctx, fpriv, id); + return ctx; err_ctx: @@ -106,6 +111,7 @@ live_context_for_engine(struct intel_engine_cs *engine, struct file *file) { struct i915_gem_engines *engines; struct i915_gem_context *ctx; + struct intel_sseu null_sseu = {}; struct intel_context *ce; engines = alloc_engines(1); @@ -124,7 +130,7 @@ live_context_for_engine(struct intel_engine_cs *engine, struct file *file) return ERR_CAST(ce); } - intel_context_set_gem(ce, ctx); + intel_context_set_gem(ce, ctx, null_sseu); engines->engines[0] = ce; engines->num_engines = 1; @@ -139,11 +145,24 @@ live_context_for_engine(struct intel_engine_cs *engine, struct file *file) } struct i915_gem_context * -kernel_context(struct drm_i915_private *i915) +kernel_context(struct drm_i915_private *i915, + struct i915_address_space *vm) { struct i915_gem_context *ctx; + struct i915_gem_proto_context *pc; + + pc = proto_context_create(i915, 0); + if (IS_ERR(pc)) + return ERR_CAST(pc); + + if (vm) { + if (pc->vm) + i915_vm_put(pc->vm); + pc->vm = i915_vm_get(vm); + } - ctx = i915_gem_create_context(i915, 0); + ctx = i915_gem_create_context(i915, pc); + proto_context_close(pc); if (IS_ERR(ctx)) return ctx; diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.h b/drivers/gpu/drm/i915/gem/selftests/mock_context.h index 2a6121d33352..7a02fd9b5866 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.h +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.h @@ -10,6 +10,7 @@ struct file; struct drm_i915_private; struct intel_engine_cs; +struct i915_address_space; void mock_init_contexts(struct drm_i915_private *i915); @@ -25,7 +26,8 @@ live_context(struct drm_i915_private *i915, struct file *file); struct i915_gem_context * live_context_for_engine(struct intel_engine_cs *engine, struct file *file); -struct i915_gem_context *kernel_context(struct drm_i915_private *i915); +struct i915_gem_context *kernel_context(struct drm_i915_private *i915, + struct i915_address_space *vm); void kernel_context_close(struct i915_gem_context *ctx); #endif /* !__MOCK_CONTEXT_H */ diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c index 4270b5a34a83..d6f5836396f8 100644 --- a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c @@ -437,20 +437,20 @@ static int frequency_show(struct seq_file *m, void *unused) max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 0 : rp_state_cap >> 16) & 0xff; max_freq *= (IS_GEN9_BC(i915) || - GRAPHICS_VER(i915) >= 10 ? GEN9_FREQ_SCALER : 1); + GRAPHICS_VER(i915) >= 11 ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Lowest (RPN) frequency: %dMHz\n", intel_gpu_freq(rps, max_freq)); max_freq = (rp_state_cap & 0xff00) >> 8; max_freq *= (IS_GEN9_BC(i915) || - GRAPHICS_VER(i915) >= 10 ? GEN9_FREQ_SCALER : 1); + GRAPHICS_VER(i915) >= 11 ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Nominal (RP1) frequency: %dMHz\n", intel_gpu_freq(rps, max_freq)); max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 16 : rp_state_cap >> 0) & 0xff; max_freq *= (IS_GEN9_BC(i915) || - GRAPHICS_VER(i915) >= 10 ? GEN9_FREQ_SCALER : 1); + GRAPHICS_VER(i915) >= 11 ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n", intel_gpu_freq(rps, max_freq)); seq_printf(m, "Max overclocked frequency: %dMHz\n", @@ -500,7 +500,7 @@ static int llc_show(struct seq_file *m, void *data) min_gpu_freq = rps->min_freq; max_gpu_freq = rps->max_freq; - if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 10) { + if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) { /* Convert GT frequency to 50 HZ units */ min_gpu_freq /= GEN9_FREQ_SCALER; max_gpu_freq /= GEN9_FREQ_SCALER; @@ -518,7 +518,7 @@ static int llc_show(struct seq_file *m, void *data) intel_gpu_freq(rps, (gpu_freq * (IS_GEN9_BC(i915) || - GRAPHICS_VER(i915) >= 10 ? + GRAPHICS_VER(i915) >= 11 ? GEN9_FREQ_SCALER : 1))), ((ia_freq >> 0) & 0xff) * 100, ((ia_freq >> 8) & 0xff) * 100); diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index 94e0a5669f90..461844dffd7e 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -42,7 +42,7 @@ int gen8_emit_flush_rcs(struct i915_request *rq, u32 mode) vf_flush_wa = true; /* WaForGAMHang:kbl */ - if (IS_KBL_GT_STEP(rq->engine->i915, 0, STEP_B0)) + if (IS_KBL_GT_STEP(rq->engine->i915, 0, STEP_C0)) dc_flush_wa = true; } @@ -208,7 +208,7 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) flags |= PIPE_CONTROL_FLUSH_L3; flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; - /* Wa_1409600907:tgl */ + /* Wa_1409600907:tgl,adl-p */ flags |= PIPE_CONTROL_DEPTH_STALL; flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; flags |= PIPE_CONTROL_FLUSH_ENABLE; @@ -279,7 +279,7 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode) if (mode & EMIT_INVALIDATE) aux_inv = rq->engine->mask & ~BIT(BCS0); if (aux_inv) - cmd += 2 * hweight8(aux_inv) + 2; + cmd += 2 * hweight32(aux_inv) + 2; cs = intel_ring_begin(rq, cmd); if (IS_ERR(cs)) @@ -313,9 +313,8 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode) struct intel_engine_cs *engine; unsigned int tmp; - *cs++ = MI_LOAD_REGISTER_IMM(hweight8(aux_inv)); - for_each_engine_masked(engine, rq->engine->gt, - aux_inv, tmp) { + *cs++ = MI_LOAD_REGISTER_IMM(hweight32(aux_inv)); + for_each_engine_masked(engine, rq->engine->gt, aux_inv, tmp) { *cs++ = i915_mmio_reg_offset(aux_inv_reg(engine)); *cs++ = AUX_INV; } @@ -506,7 +505,8 @@ gen8_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs) *cs++ = MI_USER_INTERRUPT; *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; - if (intel_engine_has_semaphores(rq->engine)) + if (intel_engine_has_semaphores(rq->engine) && + !intel_uc_uses_guc_submission(&rq->engine->gt->uc)) cs = emit_preempt_busywait(rq, cs); rq->tail = intel_ring_offset(rq, cs); @@ -598,7 +598,8 @@ gen12_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs) *cs++ = MI_USER_INTERRUPT; *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; - if (intel_engine_has_semaphores(rq->engine)) + if (intel_engine_has_semaphores(rq->engine) && + !intel_uc_uses_guc_submission(&rq->engine->gt->uc)) cs = gen12_emit_preempt_busywait(rq, cs); rq->tail = intel_ring_offset(rq, cs); diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index da4f5eb43ac2..6e0e52eeb87a 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -358,6 +358,54 @@ static void gen8_ppgtt_alloc(struct i915_address_space *vm, &start, start + length, vm->top); } +static void __gen8_ppgtt_foreach(struct i915_address_space *vm, + struct i915_page_directory *pd, + u64 *start, u64 end, int lvl, + void (*fn)(struct i915_address_space *vm, + struct i915_page_table *pt, + void *data), + void *data) +{ + unsigned int idx, len; + + len = gen8_pd_range(*start, end, lvl--, &idx); + + spin_lock(&pd->lock); + do { + struct i915_page_table *pt = pd->entry[idx]; + + atomic_inc(&pt->used); + spin_unlock(&pd->lock); + + if (lvl) { + __gen8_ppgtt_foreach(vm, as_pd(pt), start, end, lvl, + fn, data); + } else { + fn(vm, pt, data); + *start += gen8_pt_count(*start, end); + } + + spin_lock(&pd->lock); + atomic_dec(&pt->used); + } while (idx++, --len); + spin_unlock(&pd->lock); +} + +static void gen8_ppgtt_foreach(struct i915_address_space *vm, + u64 start, u64 length, + void (*fn)(struct i915_address_space *vm, + struct i915_page_table *pt, + void *data), + void *data) +{ + start >>= GEN8_PTE_SHIFT; + length >>= GEN8_PTE_SHIFT; + + __gen8_ppgtt_foreach(vm, i915_vm_to_ppgtt(vm)->pd, + &start, start + length, vm->top, + fn, data); +} + static __always_inline u64 gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, struct i915_page_directory *pdp, @@ -552,6 +600,24 @@ static void gen8_ppgtt_insert(struct i915_address_space *vm, } } +static void gen8_ppgtt_insert_entry(struct i915_address_space *vm, + dma_addr_t addr, + u64 offset, + enum i915_cache_level level, + u32 flags) +{ + u64 idx = offset >> GEN8_PTE_SHIFT; + struct i915_page_directory * const pdp = + gen8_pdp_for_page_index(vm, idx); + struct i915_page_directory *pd = + i915_pd_entry(pdp, gen8_pd_index(idx, 2)); + gen8_pte_t *vaddr; + + vaddr = px_vaddr(i915_pt_entry(pd, gen8_pd_index(idx, 1))); + vaddr[gen8_pd_index(idx, 0)] = gen8_pte_encode(addr, level, flags); + clflush_cache_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr)); +} + static int gen8_init_scratch(struct i915_address_space *vm) { u32 pte_flags; @@ -731,8 +797,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt) ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND; ppgtt->vm.insert_entries = gen8_ppgtt_insert; + ppgtt->vm.insert_page = gen8_ppgtt_insert_entry; ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc; ppgtt->vm.clear_range = gen8_ppgtt_clear; + ppgtt->vm.foreach = gen8_ppgtt_foreach; ppgtt->vm.pte_encode = gen8_pte_encode; diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index 38cc42783dfb..209cf265bf74 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -15,28 +15,14 @@ #include "intel_gt_pm.h" #include "intel_gt_requests.h" -static bool irq_enable(struct intel_engine_cs *engine) +static bool irq_enable(struct intel_breadcrumbs *b) { - if (!engine->irq_enable) - return false; - - /* Caller disables interrupts */ - spin_lock(&engine->gt->irq_lock); - engine->irq_enable(engine); - spin_unlock(&engine->gt->irq_lock); - - return true; + return intel_engine_irq_enable(b->irq_engine); } -static void irq_disable(struct intel_engine_cs *engine) +static void irq_disable(struct intel_breadcrumbs *b) { - if (!engine->irq_disable) - return; - - /* Caller disables interrupts */ - spin_lock(&engine->gt->irq_lock); - engine->irq_disable(engine); - spin_unlock(&engine->gt->irq_lock); + intel_engine_irq_disable(b->irq_engine); } static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) @@ -57,7 +43,7 @@ static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) WRITE_ONCE(b->irq_armed, true); /* Requests may have completed before we could enable the interrupt. */ - if (!b->irq_enabled++ && irq_enable(b->irq_engine)) + if (!b->irq_enabled++ && b->irq_enable(b)) irq_work_queue(&b->irq_work); } @@ -76,7 +62,7 @@ static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) { GEM_BUG_ON(!b->irq_enabled); if (!--b->irq_enabled) - irq_disable(b->irq_engine); + b->irq_disable(b); WRITE_ONCE(b->irq_armed, false); intel_gt_pm_put_async(b->irq_engine->gt); @@ -259,6 +245,9 @@ static void signal_irq_work(struct irq_work *work) llist_entry(signal, typeof(*rq), signal_node); struct list_head cb_list; + if (rq->engine->sched_engine->retire_inflight_request_prio) + rq->engine->sched_engine->retire_inflight_request_prio(rq); + spin_lock(&rq->lock); list_replace(&rq->fence.cb_list, &cb_list); __dma_fence_signal__timestamp(&rq->fence, timestamp); @@ -281,7 +270,7 @@ intel_breadcrumbs_create(struct intel_engine_cs *irq_engine) if (!b) return NULL; - b->irq_engine = irq_engine; + kref_init(&b->ref); spin_lock_init(&b->signalers_lock); INIT_LIST_HEAD(&b->signalers); @@ -290,6 +279,10 @@ intel_breadcrumbs_create(struct intel_engine_cs *irq_engine) spin_lock_init(&b->irq_lock); init_irq_work(&b->irq_work, signal_irq_work); + b->irq_engine = irq_engine; + b->irq_enable = irq_enable; + b->irq_disable = irq_disable; + return b; } @@ -303,9 +296,9 @@ void intel_breadcrumbs_reset(struct intel_breadcrumbs *b) spin_lock_irqsave(&b->irq_lock, flags); if (b->irq_enabled) - irq_enable(b->irq_engine); + b->irq_enable(b); else - irq_disable(b->irq_engine); + b->irq_disable(b); spin_unlock_irqrestore(&b->irq_lock, flags); } @@ -325,11 +318,14 @@ void __intel_breadcrumbs_park(struct intel_breadcrumbs *b) } } -void intel_breadcrumbs_free(struct intel_breadcrumbs *b) +void intel_breadcrumbs_free(struct kref *kref) { + struct intel_breadcrumbs *b = container_of(kref, typeof(*b), ref); + irq_work_sync(&b->irq_work); GEM_BUG_ON(!list_empty(&b->signalers)); GEM_BUG_ON(b->irq_armed); + kfree(b); } diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.h b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.h index 3ce5ce270b04..be0d4f379a85 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.h +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.h @@ -9,7 +9,7 @@ #include <linux/atomic.h> #include <linux/irq_work.h> -#include "intel_engine_types.h" +#include "intel_breadcrumbs_types.h" struct drm_printer; struct i915_request; @@ -17,7 +17,7 @@ struct intel_breadcrumbs; struct intel_breadcrumbs * intel_breadcrumbs_create(struct intel_engine_cs *irq_engine); -void intel_breadcrumbs_free(struct intel_breadcrumbs *b); +void intel_breadcrumbs_free(struct kref *kref); void intel_breadcrumbs_reset(struct intel_breadcrumbs *b); void __intel_breadcrumbs_park(struct intel_breadcrumbs *b); @@ -48,4 +48,16 @@ void i915_request_cancel_breadcrumb(struct i915_request *request); void intel_context_remove_breadcrumbs(struct intel_context *ce, struct intel_breadcrumbs *b); +static inline struct intel_breadcrumbs * +intel_breadcrumbs_get(struct intel_breadcrumbs *b) +{ + kref_get(&b->ref); + return b; +} + +static inline void intel_breadcrumbs_put(struct intel_breadcrumbs *b) +{ + kref_put(&b->ref, intel_breadcrumbs_free); +} + #endif /* __INTEL_BREADCRUMBS__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h index 3a084ce8ff5e..72dfd3748c4c 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h @@ -7,10 +7,13 @@ #define __INTEL_BREADCRUMBS_TYPES__ #include <linux/irq_work.h> +#include <linux/kref.h> #include <linux/list.h> #include <linux/spinlock.h> #include <linux/types.h> +#include "intel_engine_types.h" + /* * Rather than have every client wait upon all user interrupts, * with the herd waking after every interrupt and each doing the @@ -29,6 +32,7 @@ * the overhead of waking that client is much preferred. */ struct intel_breadcrumbs { + struct kref ref; atomic_t active; spinlock_t signalers_lock; /* protects the list of signalers */ @@ -42,7 +46,10 @@ struct intel_breadcrumbs { bool irq_armed; /* Not all breadcrumbs are attached to physical HW */ + intel_engine_mask_t engine_mask; struct intel_engine_cs *irq_engine; + bool (*irq_enable)(struct intel_breadcrumbs *b); + void (*irq_disable)(struct intel_breadcrumbs *b); }; #endif /* __INTEL_BREADCRUMBS_TYPES__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 4033184f13b9..745e84c72c90 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -7,28 +7,26 @@ #include "gem/i915_gem_pm.h" #include "i915_drv.h" -#include "i915_globals.h" +#include "i915_trace.h" #include "intel_context.h" #include "intel_engine.h" #include "intel_engine_pm.h" #include "intel_ring.h" -static struct i915_global_context { - struct i915_global base; - struct kmem_cache *slab_ce; -} global; +static struct kmem_cache *slab_ce; static struct intel_context *intel_context_alloc(void) { - return kmem_cache_zalloc(global.slab_ce, GFP_KERNEL); + return kmem_cache_zalloc(slab_ce, GFP_KERNEL); } static void rcu_context_free(struct rcu_head *rcu) { struct intel_context *ce = container_of(rcu, typeof(*ce), rcu); - kmem_cache_free(global.slab_ce, ce); + trace_intel_context_free(ce); + kmem_cache_free(slab_ce, ce); } void intel_context_free(struct intel_context *ce) @@ -46,6 +44,7 @@ intel_context_create(struct intel_engine_cs *engine) return ERR_PTR(-ENOMEM); intel_context_init(ce, engine); + trace_intel_context_create(ce); return ce; } @@ -80,7 +79,7 @@ static int intel_context_active_acquire(struct intel_context *ce) __i915_active_acquire(&ce->active); - if (intel_context_is_barrier(ce)) + if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine)) return 0; /* Preallocate tracking nodes */ @@ -268,6 +267,8 @@ int __intel_context_do_pin_ww(struct intel_context *ce, GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */ + trace_intel_context_do_pin(ce); + err_unlock: mutex_unlock(&ce->pin_mutex); err_post_unpin: @@ -306,9 +307,9 @@ retry: return err; } -void intel_context_unpin(struct intel_context *ce) +void __intel_context_do_unpin(struct intel_context *ce, int sub) { - if (!atomic_dec_and_test(&ce->pin_count)) + if (!atomic_sub_and_test(sub, &ce->pin_count)) return; CE_TRACE(ce, "unpin\n"); @@ -323,6 +324,7 @@ void intel_context_unpin(struct intel_context *ce) */ intel_context_get(ce); intel_context_active_release(ce); + trace_intel_context_do_unpin(ce); intel_context_put(ce); } @@ -360,6 +362,12 @@ static int __intel_context_active(struct i915_active *active) return 0; } +static int sw_fence_dummy_notify(struct i915_sw_fence *sf, + enum i915_sw_fence_notify state) +{ + return NOTIFY_DONE; +} + void intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) { @@ -371,7 +379,8 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) ce->engine = engine; ce->ops = engine->cops; ce->sseu = engine->sseu; - ce->ring = __intel_context_ring_size(SZ_4K); + ce->ring = NULL; + ce->ring_size = SZ_4K; ewma_runtime_init(&ce->runtime.avg); @@ -383,6 +392,22 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) mutex_init(&ce->pin_mutex); + spin_lock_init(&ce->guc_state.lock); + INIT_LIST_HEAD(&ce->guc_state.fences); + + spin_lock_init(&ce->guc_active.lock); + INIT_LIST_HEAD(&ce->guc_active.requests); + + ce->guc_id = GUC_INVALID_LRC_ID; + INIT_LIST_HEAD(&ce->guc_id_link); + + /* + * Initialize fence to be complete as this is expected to be complete + * unless there is a pending schedule disable outstanding. + */ + i915_sw_fence_init(&ce->guc_blocked, sw_fence_dummy_notify); + i915_sw_fence_commit(&ce->guc_blocked); + i915_active_init(&ce->active, __intel_context_active, __intel_context_retire, 0); } @@ -397,28 +422,17 @@ void intel_context_fini(struct intel_context *ce) i915_active_fini(&ce->active); } -static void i915_global_context_shrink(void) -{ - kmem_cache_shrink(global.slab_ce); -} - -static void i915_global_context_exit(void) +void i915_context_module_exit(void) { - kmem_cache_destroy(global.slab_ce); + kmem_cache_destroy(slab_ce); } -static struct i915_global_context global = { { - .shrink = i915_global_context_shrink, - .exit = i915_global_context_exit, -} }; - -int __init i915_global_context_init(void) +int __init i915_context_module_init(void) { - global.slab_ce = KMEM_CACHE(intel_context, SLAB_HWCACHE_ALIGN); - if (!global.slab_ce) + slab_ce = KMEM_CACHE(intel_context, SLAB_HWCACHE_ALIGN); + if (!slab_ce) return -ENOMEM; - i915_global_register(&global.base); return 0; } @@ -499,6 +513,26 @@ retry: return rq; } +struct i915_request *intel_context_find_active_request(struct intel_context *ce) +{ + struct i915_request *rq, *active = NULL; + unsigned long flags; + + GEM_BUG_ON(!intel_engine_uses_guc(ce->engine)); + + spin_lock_irqsave(&ce->guc_active.lock, flags); + list_for_each_entry_reverse(rq, &ce->guc_active.requests, + sched.link) { + if (i915_request_completed(rq)) + break; + + active = rq; + } + spin_unlock_irqrestore(&ce->guc_active.lock, flags); + + return active; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftest_context.c" #endif diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index f83a73a2b39f..c41098950746 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -16,6 +16,7 @@ #include "intel_engine_types.h" #include "intel_ring_types.h" #include "intel_timeline_types.h" +#include "i915_trace.h" #define CE_TRACE(ce, fmt, ...) do { \ const struct intel_context *ce__ = (ce); \ @@ -30,6 +31,9 @@ void intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine); void intel_context_fini(struct intel_context *ce); +void i915_context_module_exit(void); +int i915_context_module_init(void); + struct intel_context * intel_context_create(struct intel_engine_cs *engine); @@ -69,6 +73,13 @@ intel_context_is_pinned(struct intel_context *ce) return atomic_read(&ce->pin_count); } +static inline void intel_context_cancel_request(struct intel_context *ce, + struct i915_request *rq) +{ + GEM_BUG_ON(!ce->ops->cancel_request); + return ce->ops->cancel_request(ce, rq); +} + /** * intel_context_unlock_pinned - Releases the earlier locking of 'pinned' status * @ce - the context @@ -113,7 +124,32 @@ static inline void __intel_context_pin(struct intel_context *ce) atomic_inc(&ce->pin_count); } -void intel_context_unpin(struct intel_context *ce); +void __intel_context_do_unpin(struct intel_context *ce, int sub); + +static inline void intel_context_sched_disable_unpin(struct intel_context *ce) +{ + __intel_context_do_unpin(ce, 2); +} + +static inline void intel_context_unpin(struct intel_context *ce) +{ + if (!ce->ops->sched_disable) { + __intel_context_do_unpin(ce, 1); + } else { + /* + * Move ownership of this pin to the scheduling disable which is + * an async operation. When that operation completes the above + * intel_context_sched_disable_unpin is called potentially + * unpinning the context. + */ + while (!atomic_add_unless(&ce->pin_count, -1, 1)) { + if (atomic_cmpxchg(&ce->pin_count, 1, 2) == 1) { + ce->ops->sched_disable(ce); + break; + } + } + } +} void intel_context_enter_engine(struct intel_context *ce); void intel_context_exit_engine(struct intel_context *ce); @@ -175,10 +211,8 @@ int intel_context_prepare_remote_request(struct intel_context *ce, struct i915_request *intel_context_create_request(struct intel_context *ce); -static inline struct intel_ring *__intel_context_ring_size(u64 sz) -{ - return u64_to_ptr(struct intel_ring, sz); -} +struct i915_request * +intel_context_find_active_request(struct intel_context *ce); static inline bool intel_context_is_barrier(const struct intel_context *ce) { @@ -220,6 +254,18 @@ static inline bool intel_context_set_banned(struct intel_context *ce) return test_and_set_bit(CONTEXT_BANNED, &ce->flags); } +static inline bool intel_context_ban(struct intel_context *ce, + struct i915_request *rq) +{ + bool ret = intel_context_set_banned(ce); + + trace_intel_context_ban(ce); + if (ce->ops->ban) + ce->ops->ban(ce, rq); + + return ret; +} + static inline bool intel_context_force_single_submission(const struct intel_context *ce) { diff --git a/drivers/gpu/drm/i915/gt/intel_context_param.c b/drivers/gpu/drm/i915/gt/intel_context_param.c deleted file mode 100644 index 65dcd090245d..000000000000 --- a/drivers/gpu/drm/i915/gt/intel_context_param.c +++ /dev/null @@ -1,63 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2019 Intel Corporation - */ - -#include "i915_active.h" -#include "intel_context.h" -#include "intel_context_param.h" -#include "intel_ring.h" - -int intel_context_set_ring_size(struct intel_context *ce, long sz) -{ - int err; - - if (intel_context_lock_pinned(ce)) - return -EINTR; - - err = i915_active_wait(&ce->active); - if (err < 0) - goto unlock; - - if (intel_context_is_pinned(ce)) { - err = -EBUSY; /* In active use, come back later! */ - goto unlock; - } - - if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) { - struct intel_ring *ring; - - /* Replace the existing ringbuffer */ - ring = intel_engine_create_ring(ce->engine, sz); - if (IS_ERR(ring)) { - err = PTR_ERR(ring); - goto unlock; - } - - intel_ring_put(ce->ring); - ce->ring = ring; - - /* Context image will be updated on next pin */ - } else { - ce->ring = __intel_context_ring_size(sz); - } - -unlock: - intel_context_unlock_pinned(ce); - return err; -} - -long intel_context_get_ring_size(struct intel_context *ce) -{ - long sz = (unsigned long)READ_ONCE(ce->ring); - - if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) { - if (intel_context_lock_pinned(ce)) - return -EINTR; - - sz = ce->ring->size; - intel_context_unlock_pinned(ce); - } - - return sz; -} diff --git a/drivers/gpu/drm/i915/gt/intel_context_param.h b/drivers/gpu/drm/i915/gt/intel_context_param.h index 3ecacc675f41..0c69cb42d075 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_param.h +++ b/drivers/gpu/drm/i915/gt/intel_context_param.h @@ -10,14 +10,10 @@ #include "intel_context.h" -int intel_context_set_ring_size(struct intel_context *ce, long sz); -long intel_context_get_ring_size(struct intel_context *ce); - -static inline int +static inline void intel_context_set_watchdog_us(struct intel_context *ce, u64 timeout_us) { ce->watchdog.timeout_us = timeout_us; - return 0; } #endif /* INTEL_CONTEXT_PARAM_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index ed8c447a7346..e54351a170e2 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -13,12 +13,14 @@ #include <linux/types.h> #include "i915_active_types.h" +#include "i915_sw_fence.h" #include "i915_utils.h" #include "intel_engine_types.h" #include "intel_sseu.h" -#define CONTEXT_REDZONE POISON_INUSE +#include "uc/intel_guc_fwif.h" +#define CONTEXT_REDZONE POISON_INUSE DECLARE_EWMA(runtime, 3, 8); struct i915_gem_context; @@ -35,16 +37,29 @@ struct intel_context_ops { int (*alloc)(struct intel_context *ce); + void (*ban)(struct intel_context *ce, struct i915_request *rq); + int (*pre_pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **vaddr); int (*pin)(struct intel_context *ce, void *vaddr); void (*unpin)(struct intel_context *ce); void (*post_unpin)(struct intel_context *ce); + void (*cancel_request)(struct intel_context *ce, + struct i915_request *rq); + void (*enter)(struct intel_context *ce); void (*exit)(struct intel_context *ce); + void (*sched_disable)(struct intel_context *ce); + void (*reset)(struct intel_context *ce); void (*destroy)(struct kref *kref); + + /* virtual engine/context interface */ + struct intel_context *(*create_virtual)(struct intel_engine_cs **engine, + unsigned int count); + struct intel_engine_cs *(*get_sibling)(struct intel_engine_cs *engine, + unsigned int sibling); }; struct intel_context { @@ -82,6 +97,7 @@ struct intel_context { spinlock_t signal_lock; /* protects signals, the list of requests */ struct i915_vma *state; + u32 ring_size; struct intel_ring *ring; struct intel_timeline *timeline; @@ -95,6 +111,7 @@ struct intel_context { #define CONTEXT_BANNED 6 #define CONTEXT_FORCE_SINGLE_SUBMISSION 7 #define CONTEXT_NOPREEMPT 8 +#define CONTEXT_LRCA_DIRTY 9 struct { u64 timeout_us; @@ -136,6 +153,51 @@ struct intel_context { struct intel_sseu sseu; u8 wa_bb_page; /* if set, page num reserved for context workarounds */ + + struct { + /** lock: protects everything in guc_state */ + spinlock_t lock; + /** + * sched_state: scheduling state of this context using GuC + * submission + */ + u16 sched_state; + /* + * fences: maintains of list of requests that have a submit + * fence related to GuC submission + */ + struct list_head fences; + } guc_state; + + struct { + /** lock: protects everything in guc_active */ + spinlock_t lock; + /** requests: active requests on this context */ + struct list_head requests; + } guc_active; + + /* GuC scheduling state flags that do not require a lock. */ + atomic_t guc_sched_state_no_lock; + + /* GuC LRC descriptor ID */ + u16 guc_id; + + /* GuC LRC descriptor reference count */ + atomic_t guc_id_ref; + + /* + * GuC ID link - in list when unpinned but guc_id still valid in GuC + */ + struct list_head guc_id_link; + + /* GuC context blocked fence */ + struct i915_sw_fence guc_blocked; + + /* + * GuC priority management + */ + u8 guc_prio; + u32 guc_prio_count[GUC_CLIENT_PRIORITY_NUM]; }; #endif /* __INTEL_CONTEXT_TYPES__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 8d9184920c51..87579affb952 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -19,7 +19,9 @@ #include "intel_workarounds.h" struct drm_printer; +struct intel_context; struct intel_gt; +struct lock_class_key; /* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, * but keeps the logic simple. Indeed, the whole purpose of this macro is just @@ -123,20 +125,6 @@ execlists_active(const struct intel_engine_execlists *execlists) return active; } -static inline void -execlists_active_lock_bh(struct intel_engine_execlists *execlists) -{ - local_bh_disable(); /* prevent local softirq and lock recursion */ - tasklet_lock(&execlists->tasklet); -} - -static inline void -execlists_active_unlock_bh(struct intel_engine_execlists *execlists) -{ - tasklet_unlock(&execlists->tasklet); - local_bh_enable(); /* restore softirq, and kick ksoftirqd! */ -} - struct i915_request * execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists); @@ -186,11 +174,12 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) #define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT * sizeof(u32)) #define I915_GEM_HWS_SEQNO 0x40 #define I915_GEM_HWS_SEQNO_ADDR (I915_GEM_HWS_SEQNO * sizeof(u32)) +#define I915_GEM_HWS_MIGRATE (0x42 * sizeof(u32)) #define I915_GEM_HWS_SCRATCH 0x80 #define I915_HWS_CSB_BUF0_INDEX 0x10 #define I915_HWS_CSB_WRITE_INDEX 0x1f -#define CNL_HWS_CSB_WRITE_INDEX 0x2f +#define ICL_HWS_CSB_WRITE_INDEX 0x2f void intel_engine_stop(struct intel_engine_cs *engine); void intel_engine_cleanup(struct intel_engine_cs *engine); @@ -223,6 +212,9 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, void intel_engine_init_execlists(struct intel_engine_cs *engine); +bool intel_engine_irq_enable(struct intel_engine_cs *engine); +void intel_engine_irq_disable(struct intel_engine_cs *engine); + static inline void __intel_engine_reset(struct intel_engine_cs *engine, bool stalled) { @@ -248,17 +240,27 @@ __printf(3, 4) void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m, const char *header, ...); +void intel_engine_dump_active_requests(struct list_head *requests, + struct i915_request *hung_rq, + struct drm_printer *m); ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now); struct i915_request * -intel_engine_find_active_request(struct intel_engine_cs *engine); +intel_engine_execlist_find_hung_request(struct intel_engine_cs *engine); u32 intel_engine_context_size(struct intel_gt *gt, u8 class); +struct intel_context * +intel_engine_create_pinned_context(struct intel_engine_cs *engine, + struct i915_address_space *vm, + unsigned int ring_size, + unsigned int hwsp, + struct lock_class_key *key, + const char *name); + +void intel_engine_destroy_pinned_context(struct intel_context *ce); -void intel_engine_init_active(struct intel_engine_cs *engine, - unsigned int subclass); #define ENGINE_PHYSICAL 0 #define ENGINE_MOCK 1 #define ENGINE_VIRTUAL 2 @@ -277,13 +279,60 @@ intel_engine_has_preempt_reset(const struct intel_engine_cs *engine) return intel_engine_has_preemption(engine); } +struct intel_context * +intel_engine_create_virtual(struct intel_engine_cs **siblings, + unsigned int count); + +static inline bool +intel_virtual_engine_has_heartbeat(const struct intel_engine_cs *engine) +{ + /* + * For non-GuC submission we expect the back-end to look at the + * heartbeat status of the actual physical engine that the work + * has been (or is being) scheduled on, so we should only reach + * here with GuC submission enabled. + */ + GEM_BUG_ON(!intel_engine_uses_guc(engine)); + + return intel_guc_virtual_engine_has_heartbeat(engine); +} + static inline bool intel_engine_has_heartbeat(const struct intel_engine_cs *engine) { if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL)) return false; - return READ_ONCE(engine->props.heartbeat_interval_ms); + if (intel_engine_is_virtual(engine)) + return intel_virtual_engine_has_heartbeat(engine); + else + return READ_ONCE(engine->props.heartbeat_interval_ms); +} + +static inline struct intel_engine_cs * +intel_engine_get_sibling(struct intel_engine_cs *engine, unsigned int sibling) +{ + GEM_BUG_ON(!intel_engine_is_virtual(engine)); + return engine->cops->get_sibling(engine, sibling); +} + +static inline void +intel_engine_set_hung_context(struct intel_engine_cs *engine, + struct intel_context *ce) +{ + engine->hung_ce = ce; +} + +static inline void +intel_engine_clear_hung_context(struct intel_engine_cs *engine) +{ + intel_engine_set_hung_context(engine, NULL); +} + +static inline struct intel_context * +intel_engine_get_hung_context(struct intel_engine_cs *engine) +{ + return engine->hung_ce; } #endif /* _INTEL_RINGBUFFER_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 7f03df236613..0d9105a31d84 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -35,14 +35,12 @@ #define DEFAULT_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) #define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE) #define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) -#define GEN10_LR_CONTEXT_RENDER_SIZE (18 * PAGE_SIZE) #define GEN11_LR_CONTEXT_RENDER_SIZE (14 * PAGE_SIZE) #define GEN8_LR_CONTEXT_OTHER_SIZE ( 2 * PAGE_SIZE) #define MAX_MMIO_BASES 3 struct engine_info { - unsigned int hw_id; u8 class; u8 instance; /* mmio bases table *must* be sorted in reverse graphics_ver order */ @@ -54,7 +52,6 @@ struct engine_info { static const struct engine_info intel_engines[] = { [RCS0] = { - .hw_id = RCS0_HW, .class = RENDER_CLASS, .instance = 0, .mmio_bases = { @@ -62,7 +59,6 @@ static const struct engine_info intel_engines[] = { }, }, [BCS0] = { - .hw_id = BCS0_HW, .class = COPY_ENGINE_CLASS, .instance = 0, .mmio_bases = { @@ -70,7 +66,6 @@ static const struct engine_info intel_engines[] = { }, }, [VCS0] = { - .hw_id = VCS0_HW, .class = VIDEO_DECODE_CLASS, .instance = 0, .mmio_bases = { @@ -80,7 +75,6 @@ static const struct engine_info intel_engines[] = { }, }, [VCS1] = { - .hw_id = VCS1_HW, .class = VIDEO_DECODE_CLASS, .instance = 1, .mmio_bases = { @@ -89,7 +83,6 @@ static const struct engine_info intel_engines[] = { }, }, [VCS2] = { - .hw_id = VCS2_HW, .class = VIDEO_DECODE_CLASS, .instance = 2, .mmio_bases = { @@ -97,15 +90,41 @@ static const struct engine_info intel_engines[] = { }, }, [VCS3] = { - .hw_id = VCS3_HW, .class = VIDEO_DECODE_CLASS, .instance = 3, .mmio_bases = { { .graphics_ver = 11, .base = GEN11_BSD4_RING_BASE } }, }, + [VCS4] = { + .class = VIDEO_DECODE_CLASS, + .instance = 4, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHP_BSD5_RING_BASE } + }, + }, + [VCS5] = { + .class = VIDEO_DECODE_CLASS, + .instance = 5, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHP_BSD6_RING_BASE } + }, + }, + [VCS6] = { + .class = VIDEO_DECODE_CLASS, + .instance = 6, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHP_BSD7_RING_BASE } + }, + }, + [VCS7] = { + .class = VIDEO_DECODE_CLASS, + .instance = 7, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHP_BSD8_RING_BASE } + }, + }, [VECS0] = { - .hw_id = VECS0_HW, .class = VIDEO_ENHANCEMENT_CLASS, .instance = 0, .mmio_bases = { @@ -114,13 +133,26 @@ static const struct engine_info intel_engines[] = { }, }, [VECS1] = { - .hw_id = VECS1_HW, .class = VIDEO_ENHANCEMENT_CLASS, .instance = 1, .mmio_bases = { { .graphics_ver = 11, .base = GEN11_VEBOX2_RING_BASE } }, }, + [VECS2] = { + .class = VIDEO_ENHANCEMENT_CLASS, + .instance = 2, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHP_VEBOX3_RING_BASE } + }, + }, + [VECS3] = { + .class = VIDEO_ENHANCEMENT_CLASS, + .instance = 3, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHP_VEBOX4_RING_BASE } + }, + }, }; /** @@ -153,8 +185,6 @@ u32 intel_engine_context_size(struct intel_gt *gt, u8 class) case 12: case 11: return GEN11_LR_CONTEXT_RENDER_SIZE; - case 10: - return GEN10_LR_CONTEXT_RENDER_SIZE; case 9: return GEN9_LR_CONTEXT_RENDER_SIZE; case 8: @@ -269,6 +299,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH)); BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH)); + BUILD_BUG_ON(I915_MAX_VCS > (MAX_ENGINE_INSTANCE + 1)); + BUILD_BUG_ON(I915_MAX_VECS > (MAX_ENGINE_INSTANCE + 1)); if (GEM_DEBUG_WARN_ON(id >= ARRAY_SIZE(gt->engine))) return -EINVAL; @@ -294,7 +326,6 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) engine->i915 = i915; engine->gt = gt; engine->uncore = gt->uncore; - engine->hw_id = info->hw_id; guc_class = engine_class_to_guc_class(info->class); engine->guc_id = MAKE_GUC_ID(guc_class, info->instance); engine->mmio_base = __engine_mmio_base(i915, info->mmio_bases); @@ -328,9 +359,6 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) if (engine->context_size) DRIVER_CAPS(i915)->has_logical_contexts = true; - /* Nothing to do here, execute in order of dependencies */ - engine->schedule = NULL; - ewma__engine_latency_init(&engine->latency); seqcount_init(&engine->stats.lock); @@ -445,6 +473,28 @@ void intel_engines_free(struct intel_gt *gt) } } +static +bool gen11_vdbox_has_sfc(struct drm_i915_private *i915, + unsigned int physical_vdbox, + unsigned int logical_vdbox, u16 vdbox_mask) +{ + /* + * In Gen11, only even numbered logical VDBOXes are hooked + * up to an SFC (Scaler & Format Converter) unit. + * In Gen12, Even numbered physical instance always are connected + * to an SFC. Odd numbered physical instances have SFC only if + * previous even instance is fused off. + */ + if (GRAPHICS_VER(i915) == 12) + return (physical_vdbox % 2 == 0) || + !(BIT(physical_vdbox - 1) & vdbox_mask); + else if (GRAPHICS_VER(i915) == 11) + return logical_vdbox % 2 == 0; + + MISSING_CASE(GRAPHICS_VER(i915)); + return false; +} + /* * Determine which engines are fused off in our particular hardware. * Note that we have a catch-22 situation where we need to be able to access @@ -471,7 +521,14 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt) if (GRAPHICS_VER(i915) < 11) return info->engine_mask; - media_fuse = ~intel_uncore_read(uncore, GEN11_GT_VEBOX_VDBOX_DISABLE); + /* + * On newer platforms the fusing register is called 'enable' and has + * enable semantics, while on older platforms it is called 'disable' + * and bits have disable semantices. + */ + media_fuse = intel_uncore_read(uncore, GEN11_GT_VEBOX_VDBOX_DISABLE); + if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) + media_fuse = ~media_fuse; vdbox_mask = media_fuse & GEN11_GT_VDBOX_DISABLE_MASK; vebox_mask = (media_fuse & GEN11_GT_VEBOX_DISABLE_MASK) >> @@ -489,13 +546,9 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt) continue; } - /* - * In Gen11, only even numbered logical VDBOXes are - * hooked up to an SFC (Scaler & Format Converter) unit. - * In TGL each VDBOX has access to an SFC. - */ - if (GRAPHICS_VER(i915) >= 12 || logical_vdbox++ % 2 == 0) + if (gen11_vdbox_has_sfc(i915, i, logical_vdbox, vdbox_mask)) gt->info.vdbox_sfc_access |= BIT(i); + logical_vdbox++; } drm_dbg(&i915->drm, "vdbox enable: %04x, instances: %04lx\n", vdbox_mask, VDBOX_MASK(gt)); @@ -585,9 +638,6 @@ void intel_engine_init_execlists(struct intel_engine_cs *engine) memset(execlists->pending, 0, sizeof(execlists->pending)); execlists->active = memset(execlists->inflight, 0, sizeof(execlists->inflight)); - - execlists->queue_priority_hint = INT_MIN; - execlists->queue = RB_ROOT_CACHED; } static void cleanup_status_page(struct intel_engine_cs *engine) @@ -714,11 +764,17 @@ static int engine_setup_common(struct intel_engine_cs *engine) goto err_status; } + engine->sched_engine = i915_sched_engine_create(ENGINE_PHYSICAL); + if (!engine->sched_engine) { + err = -ENOMEM; + goto err_sched_engine; + } + engine->sched_engine->private_data = engine; + err = intel_engine_init_cmd_parser(engine); if (err) goto err_cmd_parser; - intel_engine_init_active(engine, ENGINE_PHYSICAL); intel_engine_init_execlists(engine); intel_engine_init__pm(engine); intel_engine_init_retire(engine); @@ -737,7 +793,9 @@ static int engine_setup_common(struct intel_engine_cs *engine) return 0; err_cmd_parser: - intel_breadcrumbs_free(engine->breadcrumbs); + i915_sched_engine_put(engine->sched_engine); +err_sched_engine: + intel_breadcrumbs_put(engine->breadcrumbs); err_status: cleanup_status_page(engine); return err; @@ -775,11 +833,11 @@ static int measure_breadcrumb_dw(struct intel_context *ce) frame->rq.ring = &frame->ring; mutex_lock(&ce->timeline->mutex); - spin_lock_irq(&engine->active.lock); + spin_lock_irq(&engine->sched_engine->lock); dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs; - spin_unlock_irq(&engine->active.lock); + spin_unlock_irq(&engine->sched_engine->lock); mutex_unlock(&ce->timeline->mutex); GEM_BUG_ON(dw & 1); /* RING_TAIL must be qword aligned */ @@ -788,33 +846,13 @@ static int measure_breadcrumb_dw(struct intel_context *ce) return dw; } -void -intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass) -{ - INIT_LIST_HEAD(&engine->active.requests); - INIT_LIST_HEAD(&engine->active.hold); - - spin_lock_init(&engine->active.lock); - lockdep_set_subclass(&engine->active.lock, subclass); - - /* - * Due to an interesting quirk in lockdep's internal debug tracking, - * after setting a subclass we must ensure the lock is used. Otherwise, - * nr_unused_locks is incremented once too often. - */ -#ifdef CONFIG_DEBUG_LOCK_ALLOC - local_irq_disable(); - lock_map_acquire(&engine->active.lock.dep_map); - lock_map_release(&engine->active.lock.dep_map); - local_irq_enable(); -#endif -} - -static struct intel_context * -create_pinned_context(struct intel_engine_cs *engine, - unsigned int hwsp, - struct lock_class_key *key, - const char *name) +struct intel_context * +intel_engine_create_pinned_context(struct intel_engine_cs *engine, + struct i915_address_space *vm, + unsigned int ring_size, + unsigned int hwsp, + struct lock_class_key *key, + const char *name) { struct intel_context *ce; int err; @@ -825,6 +863,11 @@ create_pinned_context(struct intel_engine_cs *engine, __set_bit(CONTEXT_BARRIER_BIT, &ce->flags); ce->timeline = page_pack_bits(NULL, hwsp); + ce->ring = NULL; + ce->ring_size = ring_size; + + i915_vm_put(ce->vm); + ce->vm = i915_vm_get(vm); err = intel_context_pin(ce); /* perma-pin so it is always available */ if (err) { @@ -843,7 +886,7 @@ create_pinned_context(struct intel_engine_cs *engine, return ce; } -static void destroy_pinned_context(struct intel_context *ce) +void intel_engine_destroy_pinned_context(struct intel_context *ce) { struct intel_engine_cs *engine = ce->engine; struct i915_vma *hwsp = engine->status_page.vma; @@ -863,8 +906,9 @@ create_kernel_context(struct intel_engine_cs *engine) { static struct lock_class_key kernel; - return create_pinned_context(engine, I915_GEM_HWS_SEQNO_ADDR, - &kernel, "kernel_context"); + return intel_engine_create_pinned_context(engine, engine->gt->vm, SZ_4K, + I915_GEM_HWS_SEQNO_ADDR, + &kernel, "kernel_context"); } /** @@ -907,7 +951,7 @@ static int engine_init_common(struct intel_engine_cs *engine) return 0; err_context: - destroy_pinned_context(ce); + intel_engine_destroy_pinned_context(ce); return ret; } @@ -957,10 +1001,10 @@ int intel_engines_init(struct intel_gt *gt) */ void intel_engine_cleanup_common(struct intel_engine_cs *engine) { - GEM_BUG_ON(!list_empty(&engine->active.requests)); - tasklet_kill(&engine->execlists.tasklet); /* flush the callback */ + GEM_BUG_ON(!list_empty(&engine->sched_engine->requests)); - intel_breadcrumbs_free(engine->breadcrumbs); + i915_sched_engine_put(engine->sched_engine); + intel_breadcrumbs_put(engine->breadcrumbs); intel_engine_fini_retire(engine); intel_engine_cleanup_cmd_parser(engine); @@ -969,7 +1013,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) fput(engine->default_state); if (engine->kernel_context) - destroy_pinned_context(engine->kernel_context); + intel_engine_destroy_pinned_context(engine->kernel_context); GEM_BUG_ON(!llist_empty(&engine->barrier_tasks)); cleanup_status_page(engine); @@ -1105,45 +1149,8 @@ static u32 read_subslice_reg(const struct intel_engine_cs *engine, int slice, int subslice, i915_reg_t reg) { - struct drm_i915_private *i915 = engine->i915; - struct intel_uncore *uncore = engine->uncore; - u32 mcr_mask, mcr_ss, mcr, old_mcr, val; - enum forcewake_domains fw_domains; - - if (GRAPHICS_VER(i915) >= 11) { - mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK; - mcr_ss = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice); - } else { - mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK; - mcr_ss = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice); - } - - fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, - FW_REG_READ); - fw_domains |= intel_uncore_forcewake_for_reg(uncore, - GEN8_MCR_SELECTOR, - FW_REG_READ | FW_REG_WRITE); - - spin_lock_irq(&uncore->lock); - intel_uncore_forcewake_get__locked(uncore, fw_domains); - - old_mcr = mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR); - - mcr &= ~mcr_mask; - mcr |= mcr_ss; - intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr); - - val = intel_uncore_read_fw(uncore, reg); - - mcr &= ~mcr_mask; - mcr |= old_mcr & mcr_mask; - - intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr); - - intel_uncore_forcewake_put__locked(uncore, fw_domains); - spin_unlock_irq(&uncore->lock); - - return val; + return intel_uncore_read_with_mcr_steering(engine->uncore, reg, + slice, subslice); } /* NB: please notice the memset */ @@ -1243,7 +1250,7 @@ static bool ring_is_idle(struct intel_engine_cs *engine) void __intel_engine_flush_submission(struct intel_engine_cs *engine, bool sync) { - struct tasklet_struct *t = &engine->execlists.tasklet; + struct tasklet_struct *t = &engine->sched_engine->tasklet; if (!t->callback) return; @@ -1283,7 +1290,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) intel_engine_flush_submission(engine); /* ELSP is empty, but there are ready requests? E.g. after reset */ - if (!RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)) + if (!i915_sched_engine_is_empty(engine->sched_engine)) return false; /* Ring stopped? */ @@ -1314,6 +1321,30 @@ bool intel_engines_are_idle(struct intel_gt *gt) return true; } +bool intel_engine_irq_enable(struct intel_engine_cs *engine) +{ + if (!engine->irq_enable) + return false; + + /* Caller disables interrupts */ + spin_lock(&engine->gt->irq_lock); + engine->irq_enable(engine); + spin_unlock(&engine->gt->irq_lock); + + return true; +} + +void intel_engine_irq_disable(struct intel_engine_cs *engine) +{ + if (!engine->irq_disable) + return; + + /* Caller disables interrupts */ + spin_lock(&engine->gt->irq_lock); + engine->irq_disable(engine); + spin_unlock(&engine->gt->irq_lock); +} + void intel_engines_reset_default_submission(struct intel_gt *gt) { struct intel_engine_cs *engine; @@ -1349,7 +1380,7 @@ static struct intel_timeline *get_timeline(struct i915_request *rq) struct intel_timeline *tl; /* - * Even though we are holding the engine->active.lock here, there + * Even though we are holding the engine->sched_engine->lock here, there * is no control over the submission queue per-se and we are * inspecting the active state at a random point in time, with an * unknown queue. Play safe and make sure the timeline remains valid. @@ -1504,8 +1535,8 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, drm_printf(m, "\tExeclist tasklet queued? %s (%s), preempt? %s, timeslice? %s\n", yesno(test_bit(TASKLET_STATE_SCHED, - &engine->execlists.tasklet.state)), - enableddisabled(!atomic_read(&engine->execlists.tasklet.count)), + &engine->sched_engine->tasklet.state)), + enableddisabled(!atomic_read(&engine->sched_engine->tasklet.count)), repr_timer(&engine->execlists.preempt), repr_timer(&engine->execlists.timer)); @@ -1529,7 +1560,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, idx, hws[idx * 2], hws[idx * 2 + 1]); } - execlists_active_lock_bh(execlists); + i915_sched_engine_active_lock_bh(engine->sched_engine); rcu_read_lock(); for (port = execlists->active; (rq = *port); port++) { char hdr[160]; @@ -1560,7 +1591,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, i915_request_show(m, rq, hdr, 0); } rcu_read_unlock(); - execlists_active_unlock_bh(execlists); + i915_sched_engine_active_unlock_bh(engine->sched_engine); } else if (GRAPHICS_VER(dev_priv) > 6) { drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n", ENGINE_READ(engine, RING_PP_DIR_BASE)); @@ -1650,6 +1681,98 @@ static void print_properties(struct intel_engine_cs *engine, read_ul(&engine->defaults, p->offset)); } +static void engine_dump_request(struct i915_request *rq, struct drm_printer *m, const char *msg) +{ + struct intel_timeline *tl = get_timeline(rq); + + i915_request_show(m, rq, msg, 0); + + drm_printf(m, "\t\tring->start: 0x%08x\n", + i915_ggtt_offset(rq->ring->vma)); + drm_printf(m, "\t\tring->head: 0x%08x\n", + rq->ring->head); + drm_printf(m, "\t\tring->tail: 0x%08x\n", + rq->ring->tail); + drm_printf(m, "\t\tring->emit: 0x%08x\n", + rq->ring->emit); + drm_printf(m, "\t\tring->space: 0x%08x\n", + rq->ring->space); + + if (tl) { + drm_printf(m, "\t\tring->hwsp: 0x%08x\n", + tl->hwsp_offset); + intel_timeline_put(tl); + } + + print_request_ring(m, rq); + + if (rq->context->lrc_reg_state) { + drm_printf(m, "Logical Ring Context:\n"); + hexdump(m, rq->context->lrc_reg_state, PAGE_SIZE); + } +} + +void intel_engine_dump_active_requests(struct list_head *requests, + struct i915_request *hung_rq, + struct drm_printer *m) +{ + struct i915_request *rq; + const char *msg; + enum i915_request_state state; + + list_for_each_entry(rq, requests, sched.link) { + if (rq == hung_rq) + continue; + + state = i915_test_request_state(rq); + if (state < I915_REQUEST_QUEUED) + continue; + + if (state == I915_REQUEST_ACTIVE) + msg = "\t\tactive on engine"; + else + msg = "\t\tactive in queue"; + + engine_dump_request(rq, m, msg); + } +} + +static void engine_dump_active_requests(struct intel_engine_cs *engine, struct drm_printer *m) +{ + struct i915_request *hung_rq = NULL; + struct intel_context *ce; + bool guc; + + /* + * No need for an engine->irq_seqno_barrier() before the seqno reads. + * The GPU is still running so requests are still executing and any + * hardware reads will be out of date by the time they are reported. + * But the intention here is just to report an instantaneous snapshot + * so that's fine. + */ + lockdep_assert_held(&engine->sched_engine->lock); + + drm_printf(m, "\tRequests:\n"); + + guc = intel_uc_uses_guc_submission(&engine->gt->uc); + if (guc) { + ce = intel_engine_get_hung_context(engine); + if (ce) + hung_rq = intel_context_find_active_request(ce); + } else { + hung_rq = intel_engine_execlist_find_hung_request(engine); + } + + if (hung_rq) + engine_dump_request(hung_rq, m, "\t\thung"); + + if (guc) + intel_guc_dump_active_requests(engine, hung_rq, m); + else + intel_engine_dump_active_requests(&engine->sched_engine->requests, + hung_rq, m); +} + void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m, const char *header, ...) @@ -1694,41 +1817,12 @@ void intel_engine_dump(struct intel_engine_cs *engine, i915_reset_count(error)); print_properties(engine, m); - drm_printf(m, "\tRequests:\n"); + spin_lock_irqsave(&engine->sched_engine->lock, flags); + engine_dump_active_requests(engine, m); - spin_lock_irqsave(&engine->active.lock, flags); - rq = intel_engine_find_active_request(engine); - if (rq) { - struct intel_timeline *tl = get_timeline(rq); - - i915_request_show(m, rq, "\t\tactive ", 0); - - drm_printf(m, "\t\tring->start: 0x%08x\n", - i915_ggtt_offset(rq->ring->vma)); - drm_printf(m, "\t\tring->head: 0x%08x\n", - rq->ring->head); - drm_printf(m, "\t\tring->tail: 0x%08x\n", - rq->ring->tail); - drm_printf(m, "\t\tring->emit: 0x%08x\n", - rq->ring->emit); - drm_printf(m, "\t\tring->space: 0x%08x\n", - rq->ring->space); - - if (tl) { - drm_printf(m, "\t\tring->hwsp: 0x%08x\n", - tl->hwsp_offset); - intel_timeline_put(tl); - } - - print_request_ring(m, rq); - - if (rq->context->lrc_reg_state) { - drm_printf(m, "Logical Ring Context:\n"); - hexdump(m, rq->context->lrc_reg_state, PAGE_SIZE); - } - } - drm_printf(m, "\tOn hold?: %lu\n", list_count(&engine->active.hold)); - spin_unlock_irqrestore(&engine->active.lock, flags); + drm_printf(m, "\tOn hold?: %lu\n", + list_count(&engine->sched_engine->hold)); + spin_unlock_irqrestore(&engine->sched_engine->lock, flags); drm_printf(m, "\tMMIO base: 0x%08x\n", engine->mmio_base); wakeref = intel_runtime_pm_get_if_in_use(engine->uncore->rpm); @@ -1785,19 +1879,33 @@ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now) return total; } -static bool match_ring(struct i915_request *rq) +struct intel_context * +intel_engine_create_virtual(struct intel_engine_cs **siblings, + unsigned int count) { - u32 ring = ENGINE_READ(rq->engine, RING_START); + if (count == 0) + return ERR_PTR(-EINVAL); + + if (count == 1) + return intel_context_create(siblings[0]); - return ring == i915_ggtt_offset(rq->ring->vma); + GEM_BUG_ON(!siblings[0]->cops->create_virtual); + return siblings[0]->cops->create_virtual(siblings, count); } struct i915_request * -intel_engine_find_active_request(struct intel_engine_cs *engine) +intel_engine_execlist_find_hung_request(struct intel_engine_cs *engine) { struct i915_request *request, *active = NULL; /* + * This search does not work in GuC submission mode. However, the GuC + * will report the hanging context directly to the driver itself. So + * the driver should never get here when in GuC mode. + */ + GEM_BUG_ON(intel_uc_uses_guc_submission(&engine->gt->uc)); + + /* * We are called by the error capture, reset and to dump engine * state at random points in time. In particular, note that neither is * crucially ordered with an interrupt. After a hang, the GPU is dead @@ -1808,7 +1916,7 @@ intel_engine_find_active_request(struct intel_engine_cs *engine) * At all other times, we must assume the GPU is still running, but * we only care about the snapshot of this moment. */ - lockdep_assert_held(&engine->active.lock); + lockdep_assert_held(&engine->sched_engine->lock); rcu_read_lock(); request = execlists_active(&engine->execlists); @@ -1826,15 +1934,9 @@ intel_engine_find_active_request(struct intel_engine_cs *engine) if (active) return active; - list_for_each_entry(request, &engine->active.requests, sched.link) { - if (__i915_request_is_complete(request)) - continue; - - if (!__i915_request_has_started(request)) - continue; - - /* More than one preemptible request may match! */ - if (!match_ring(request)) + list_for_each_entry(request, &engine->sched_engine->requests, + sched.link) { + if (i915_test_request_state(request) != I915_REQUEST_ACTIVE) continue; active = request; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c index b99ac41695f3..74775ae961b2 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -70,12 +70,38 @@ static void show_heartbeat(const struct i915_request *rq, { struct drm_printer p = drm_debug_printer("heartbeat"); - intel_engine_dump(engine, &p, - "%s heartbeat {seqno:%llx:%lld, prio:%d} not ticking\n", - engine->name, - rq->fence.context, - rq->fence.seqno, - rq->sched.attr.priority); + if (!rq) { + intel_engine_dump(engine, &p, + "%s heartbeat not ticking\n", + engine->name); + } else { + intel_engine_dump(engine, &p, + "%s heartbeat {seqno:%llx:%lld, prio:%d} not ticking\n", + engine->name, + rq->fence.context, + rq->fence.seqno, + rq->sched.attr.priority); + } +} + +static void +reset_engine(struct intel_engine_cs *engine, struct i915_request *rq) +{ + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + show_heartbeat(rq, engine); + + if (intel_engine_uses_guc(engine)) + /* + * GuC itself is toast or GuC's hang detection + * is disabled. Either way, need to find the + * hang culprit manually. + */ + intel_guc_find_hung_context(engine); + + intel_gt_handle_error(engine->gt, engine->mask, + I915_ERROR_CAPTURE, + "stopped heartbeat on %s", + engine->name); } static void heartbeat(struct work_struct *wrk) @@ -102,6 +128,11 @@ static void heartbeat(struct work_struct *wrk) if (intel_gt_is_wedged(engine->gt)) goto out; + if (i915_sched_engine_disabled(engine->sched_engine)) { + reset_engine(engine, engine->heartbeat.systole); + goto out; + } + if (engine->heartbeat.systole) { long delay = READ_ONCE(engine->props.heartbeat_interval_ms); @@ -121,7 +152,7 @@ static void heartbeat(struct work_struct *wrk) * but all other contexts, including the kernel * context are stuck waiting for the signal. */ - } else if (engine->schedule && + } else if (engine->sched_engine->schedule && rq->sched.attr.priority < I915_PRIORITY_BARRIER) { /* * Gradually raise the priority of the heartbeat to @@ -136,16 +167,10 @@ static void heartbeat(struct work_struct *wrk) attr.priority = I915_PRIORITY_BARRIER; local_bh_disable(); - engine->schedule(rq, &attr); + engine->sched_engine->schedule(rq, &attr); local_bh_enable(); } else { - if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) - show_heartbeat(rq, engine); - - intel_gt_handle_error(engine->gt, engine->mask, - I915_ERROR_CAPTURE, - "stopped heartbeat on %s", - engine->name); + reset_engine(engine, rq); } rq->emitted_jiffies = jiffies; @@ -194,6 +219,25 @@ void intel_engine_park_heartbeat(struct intel_engine_cs *engine) i915_request_put(fetch_and_zero(&engine->heartbeat.systole)); } +void intel_gt_unpark_heartbeats(struct intel_gt *gt) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, gt, id) + if (intel_engine_pm_is_awake(engine)) + intel_engine_unpark_heartbeat(engine); +} + +void intel_gt_park_heartbeats(struct intel_gt *gt) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, gt, id) + intel_engine_park_heartbeat(engine); +} + void intel_engine_init_heartbeat(struct intel_engine_cs *engine) { INIT_DELAYED_WORK(&engine->heartbeat.work, heartbeat); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h index a488ea3e84a3..5da6d809a87a 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h @@ -7,6 +7,7 @@ #define INTEL_ENGINE_HEARTBEAT_H struct intel_engine_cs; +struct intel_gt; void intel_engine_init_heartbeat(struct intel_engine_cs *engine); @@ -16,6 +17,9 @@ int intel_engine_set_heartbeat(struct intel_engine_cs *engine, void intel_engine_park_heartbeat(struct intel_engine_cs *engine); void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine); +void intel_gt_park_heartbeats(struct intel_gt *gt); +void intel_gt_unpark_heartbeats(struct intel_gt *gt); + int intel_engine_pulse(struct intel_engine_cs *engine); int intel_engine_flush_barriers(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 47f4397095e5..1f07ac4e0672 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -275,13 +275,11 @@ static int __engine_park(struct intel_wakeref *wf) intel_breadcrumbs_park(engine->breadcrumbs); /* Must be reset upon idling, or we may miss the busy wakeup. */ - GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN); + GEM_BUG_ON(engine->sched_engine->queue_priority_hint != INT_MIN); if (engine->park) engine->park(engine); - engine->execlists.no_priolist = false; - /* While gt calls i915_vma_parked(), we have to break the lock cycle */ intel_gt_pm_put_async(engine->gt); return 0; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index e113f93b3274..ed91bcff20eb 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -21,32 +21,20 @@ #include "i915_pmu.h" #include "i915_priolist_types.h" #include "i915_selftest.h" -#include "intel_breadcrumbs_types.h" #include "intel_sseu.h" #include "intel_timeline_types.h" #include "intel_uncore.h" #include "intel_wakeref.h" #include "intel_workarounds_types.h" -/* Legacy HW Engine ID */ - -#define RCS0_HW 0 -#define VCS0_HW 1 -#define BCS0_HW 2 -#define VECS0_HW 3 -#define VCS1_HW 4 -#define VCS2_HW 6 -#define VCS3_HW 7 -#define VECS1_HW 12 - -/* Gen11+ HW Engine class + instance */ +/* HW Engine class + instance */ #define RENDER_CLASS 0 #define VIDEO_DECODE_CLASS 1 #define VIDEO_ENHANCEMENT_CLASS 2 #define COPY_ENGINE_CLASS 3 #define OTHER_CLASS 4 #define MAX_ENGINE_CLASS 4 -#define MAX_ENGINE_INSTANCE 3 +#define MAX_ENGINE_INSTANCE 7 #define I915_MAX_SLICES 3 #define I915_MAX_SUBSLICES 8 @@ -59,11 +47,13 @@ struct drm_i915_reg_table; struct i915_gem_context; struct i915_request; struct i915_sched_attr; +struct i915_sched_engine; struct intel_gt; struct intel_ring; struct intel_uncore; +struct intel_breadcrumbs; -typedef u8 intel_engine_mask_t; +typedef u32 intel_engine_mask_t; #define ALL_ENGINES ((intel_engine_mask_t)~0ul) struct intel_hw_status_page { @@ -100,8 +90,8 @@ struct i915_ctx_workarounds { struct i915_vma *vma; }; -#define I915_MAX_VCS 4 -#define I915_MAX_VECS 2 +#define I915_MAX_VCS 8 +#define I915_MAX_VECS 4 /* * Engine IDs definitions. @@ -114,9 +104,15 @@ enum intel_engine_id { VCS1, VCS2, VCS3, + VCS4, + VCS5, + VCS6, + VCS7, #define _VCS(n) (VCS0 + (n)) VECS0, VECS1, + VECS2, + VECS3, #define _VECS(n) (VECS0 + (n)) I915_NUM_ENGINES #define INVALID_ENGINE ((enum intel_engine_id)-1) @@ -138,11 +134,6 @@ struct st_preempt_hang { */ struct intel_engine_execlists { /** - * @tasklet: softirq tasklet for bottom handler - */ - struct tasklet_struct tasklet; - - /** * @timer: kick the current context if its timeslice expires */ struct timer_list timer; @@ -153,11 +144,6 @@ struct intel_engine_execlists { struct timer_list preempt; /** - * @default_priolist: priority list for I915_PRIORITY_NORMAL - */ - struct i915_priolist default_priolist; - - /** * @ccid: identifier for contexts submitted to this engine */ u32 ccid; @@ -192,11 +178,6 @@ struct intel_engine_execlists { u32 reset_ccid; /** - * @no_priolist: priority lists disabled - */ - bool no_priolist; - - /** * @submit_reg: gen-specific execlist submission register * set to the ExecList Submission Port (elsp) register pre-Gen11 and to * the ExecList Submission Queue Contents register array for Gen11+ @@ -238,23 +219,10 @@ struct intel_engine_execlists { unsigned int port_mask; /** - * @queue_priority_hint: Highest pending priority. - * - * When we add requests into the queue, or adjust the priority of - * executing requests, we compute the maximum priority of those - * pending requests. We can then use this value to determine if - * we need to preempt the executing requests to service the queue. - * However, since the we may have recorded the priority of an inflight - * request we wanted to preempt but since completed, at the time of - * dequeuing the priority hint may no longer may match the highest - * available request priority. + * @virtual: Queue of requets on a virtual engine, sorted by priority. + * Each RB entry is a struct i915_priolist containing a list of requests + * of the same priority. */ - int queue_priority_hint; - - /** - * @queue: queue of requests, in priority lists - */ - struct rb_root_cached queue; struct rb_root_cached virtual; /** @@ -295,7 +263,6 @@ struct intel_engine_cs { enum intel_engine_id id; enum intel_engine_id legacy_idx; - unsigned int hw_id; unsigned int guc_id; intel_engine_mask_t mask; @@ -326,15 +293,13 @@ struct intel_engine_cs { struct intel_sseu sseu; - struct { - spinlock_t lock; - struct list_head requests; - struct list_head hold; /* ready requests, but on hold */ - } active; + struct i915_sched_engine *sched_engine; /* keep a request in reserve for a [pm] barrier under oom */ struct i915_request *request_pool; + struct intel_context *hung_ce; + struct llist_head barrier_tasks; struct intel_context *kernel_context; /* pinned */ @@ -419,6 +384,8 @@ struct intel_engine_cs { void (*park)(struct intel_engine_cs *engine); void (*unpark)(struct intel_engine_cs *engine); + void (*bump_serial)(struct intel_engine_cs *engine); + void (*set_default_submission)(struct intel_engine_cs *engine); const struct intel_context_ops *cops; @@ -447,22 +414,13 @@ struct intel_engine_cs { */ void (*submit_request)(struct i915_request *rq); - /* - * Called on signaling of a SUBMIT_FENCE, passing along the signaling - * request down to the bonded pairs. - */ - void (*bond_execute)(struct i915_request *rq, - struct dma_fence *signal); + void (*release)(struct intel_engine_cs *engine); /* - * Call when the priority on a request has changed and it and its - * dependencies may need rescheduling. Note the request itself may - * not be ready to run! + * Add / remove request from engine active tracking */ - void (*schedule)(struct i915_request *request, - const struct i915_sched_attr *attr); - - void (*release)(struct intel_engine_cs *engine); + void (*add_active_request)(struct i915_request *rq); + void (*remove_active_request)(struct i915_request *rq); struct intel_engine_execlists execlists; @@ -485,6 +443,7 @@ struct intel_engine_cs { #define I915_ENGINE_IS_VIRTUAL BIT(5) #define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6) #define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7) +#define I915_ENGINE_WANT_FORCED_PREEMPTION BIT(8) unsigned int flags; /* diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c index 3cca7ea2d6ea..8f8bea08e734 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c @@ -11,6 +11,7 @@ #include "intel_engine.h" #include "intel_engine_user.h" #include "intel_gt.h" +#include "uc/intel_guc_submission.h" struct intel_engine_cs * intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance) @@ -108,13 +109,16 @@ static void set_scheduler_caps(struct drm_i915_private *i915) for_each_uabi_engine(engine, i915) { /* all engines must agree! */ int i; - if (engine->schedule) + if (engine->sched_engine->schedule) enabled |= (I915_SCHEDULER_CAP_ENABLED | I915_SCHEDULER_CAP_PRIORITY); else disabled |= (I915_SCHEDULER_CAP_ENABLED | I915_SCHEDULER_CAP_PRIORITY); + if (intel_uc_uses_guc_submission(&i915->gt.uc)) + enabled |= I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP; + for (i = 0; i < ARRAY_SIZE(map); i++) { if (engine->flags & BIT(map[i].engine)) enabled |= BIT(map[i].sched); diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index fc77592d88a9..de5f9c86b9a4 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -114,6 +114,7 @@ #include "gen8_engine_cs.h" #include "intel_breadcrumbs.h" #include "intel_context.h" +#include "intel_engine_heartbeat.h" #include "intel_engine_pm.h" #include "intel_engine_stats.h" #include "intel_execlists_submission.h" @@ -153,6 +154,12 @@ #define GEN12_CSB_CTX_VALID(csb_dw) \ (FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID) +#define XEHP_CTX_STATUS_SWITCHED_TO_NEW_QUEUE BIT(1) /* upper csb dword */ +#define XEHP_CSB_SW_CTX_ID_MASK GENMASK(31, 10) +#define XEHP_IDLE_CTX_ID 0xFFFF +#define XEHP_CSB_CTX_VALID(csb_dw) \ + (FIELD_GET(XEHP_CSB_SW_CTX_ID_MASK, csb_dw) != XEHP_IDLE_CTX_ID) + /* Typical size of the average request (2 pipecontrols and a MI_BB) */ #define EXECLISTS_REQUEST_SIZE 64 /* bytes */ @@ -182,18 +189,6 @@ struct virtual_engine { int prio; } nodes[I915_NUM_ENGINES]; - /* - * Keep track of bonded pairs -- restrictions upon on our selection - * of physical engines any particular request may be submitted to. - * If we receive a submit-fence from a master engine, we will only - * use one of sibling_mask physical engines. - */ - struct ve_bond { - const struct intel_engine_cs *master; - intel_engine_mask_t sibling_mask; - } *bonds; - unsigned int num_bonds; - /* And finally, which physical engines this virtual engine maps onto. */ unsigned int num_siblings; struct intel_engine_cs *siblings[]; @@ -205,6 +200,9 @@ static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine) return container_of(engine, struct virtual_engine, base); } +static struct intel_context * +execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count); + static struct i915_request * __active_request(const struct intel_timeline * const tl, struct i915_request *rq, @@ -273,11 +271,11 @@ static int effective_prio(const struct i915_request *rq) return prio; } -static int queue_prio(const struct intel_engine_execlists *execlists) +static int queue_prio(const struct i915_sched_engine *sched_engine) { struct rb_node *rb; - rb = rb_first_cached(&execlists->queue); + rb = rb_first_cached(&sched_engine->queue); if (!rb) return INT_MIN; @@ -318,14 +316,14 @@ static bool need_preempt(const struct intel_engine_cs *engine, * to preserve FIFO ordering of dependencies. */ last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1); - if (engine->execlists.queue_priority_hint <= last_prio) + if (engine->sched_engine->queue_priority_hint <= last_prio) return false; /* * Check against the first request in ELSP[1], it will, thanks to the * power of PI, be the highest priority of that context. */ - if (!list_is_last(&rq->sched.link, &engine->active.requests) && + if (!list_is_last(&rq->sched.link, &engine->sched_engine->requests) && rq_prio(list_next_entry(rq, sched.link)) > last_prio) return true; @@ -340,7 +338,7 @@ static bool need_preempt(const struct intel_engine_cs *engine, * context, it's priority would not exceed ELSP[0] aka last_prio. */ return max(virtual_prio(&engine->execlists), - queue_prio(&engine->execlists)) > last_prio; + queue_prio(engine->sched_engine)) > last_prio; } __maybe_unused static bool @@ -367,10 +365,10 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) struct list_head *pl; int prio = I915_PRIORITY_INVALID; - lockdep_assert_held(&engine->active.lock); + lockdep_assert_held(&engine->sched_engine->lock); list_for_each_entry_safe_reverse(rq, rn, - &engine->active.requests, + &engine->sched_engine->requests, sched.link) { if (__i915_request_is_complete(rq)) { list_del_init(&rq->sched.link); @@ -382,9 +380,10 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); if (rq_prio(rq) != prio) { prio = rq_prio(rq); - pl = i915_sched_lookup_priolist(engine, prio); + pl = i915_sched_lookup_priolist(engine->sched_engine, + prio); } - GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); + GEM_BUG_ON(i915_sched_engine_is_empty(engine->sched_engine)); list_move(&rq->sched.link, pl); set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); @@ -489,6 +488,16 @@ __execlists_schedule_in(struct i915_request *rq) /* Use a fixed tag for OA and friends */ GEM_BUG_ON(ce->tag <= BITS_PER_LONG); ce->lrc.ccid = ce->tag; + } else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) { + /* We don't need a strict matching tag, just different values */ + unsigned int tag = ffs(READ_ONCE(engine->context_tag)); + + GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG); + clear_bit(tag - 1, &engine->context_tag); + ce->lrc.ccid = tag << (XEHP_SW_CTX_ID_SHIFT - 32); + + BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID); + } else { /* We don't need a strict matching tag, just different values */ unsigned int tag = __ffs(engine->context_tag); @@ -534,13 +543,13 @@ resubmit_virtual_request(struct i915_request *rq, struct virtual_engine *ve) { struct intel_engine_cs *engine = rq->engine; - spin_lock_irq(&engine->active.lock); + spin_lock_irq(&engine->sched_engine->lock); clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); WRITE_ONCE(rq->engine, &ve->base); ve->base.submit_request(rq); - spin_unlock_irq(&engine->active.lock); + spin_unlock_irq(&engine->sched_engine->lock); } static void kick_siblings(struct i915_request *rq, struct intel_context *ce) @@ -569,7 +578,7 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce) resubmit_virtual_request(rq, ve); if (READ_ONCE(ve->request)) - tasklet_hi_schedule(&ve->base.execlists.tasklet); + tasklet_hi_schedule(&ve->base.sched_engine->tasklet); } static void __execlists_schedule_out(struct i915_request * const rq, @@ -579,7 +588,7 @@ static void __execlists_schedule_out(struct i915_request * const rq, unsigned int ccid; /* - * NB process_csb() is not under the engine->active.lock and hence + * NB process_csb() is not under the engine->sched_engine->lock and hence * schedule_out can race with schedule_in meaning that we should * refrain from doing non-trivial work here. */ @@ -599,8 +608,14 @@ static void __execlists_schedule_out(struct i915_request * const rq, intel_engine_add_retire(engine, ce->timeline); ccid = ce->lrc.ccid; - ccid >>= GEN11_SW_CTX_ID_SHIFT - 32; - ccid &= GEN12_MAX_CONTEXT_HW_ID; + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) { + ccid >>= XEHP_SW_CTX_ID_SHIFT - 32; + ccid &= XEHP_MAX_CONTEXT_HW_ID; + } else { + ccid >>= GEN11_SW_CTX_ID_SHIFT - 32; + ccid &= GEN12_MAX_CONTEXT_HW_ID; + } + if (ccid < BITS_PER_LONG) { GEM_BUG_ON(ccid == 0); GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag)); @@ -738,9 +753,9 @@ trace_ports(const struct intel_engine_execlists *execlists, } static bool -reset_in_progress(const struct intel_engine_execlists *execlists) +reset_in_progress(const struct intel_engine_cs *engine) { - return unlikely(!__tasklet_is_enabled(&execlists->tasklet)); + return unlikely(!__tasklet_is_enabled(&engine->sched_engine->tasklet)); } static __maybe_unused noinline bool @@ -756,7 +771,7 @@ assert_pending_valid(const struct intel_engine_execlists *execlists, trace_ports(execlists, msg, execlists->pending); /* We may be messing around with the lists during reset, lalala */ - if (reset_in_progress(execlists)) + if (reset_in_progress(engine)) return true; if (!execlists->pending[0]) { @@ -1096,7 +1111,8 @@ static void defer_active(struct intel_engine_cs *engine) if (!rq) return; - defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq))); + defer_request(rq, i915_sched_lookup_priolist(engine->sched_engine, + rq_prio(rq))); } static bool @@ -1133,13 +1149,14 @@ static bool needs_timeslice(const struct intel_engine_cs *engine, return false; /* If ELSP[1] is occupied, always check to see if worth slicing */ - if (!list_is_last_rcu(&rq->sched.link, &engine->active.requests)) { + if (!list_is_last_rcu(&rq->sched.link, + &engine->sched_engine->requests)) { ENGINE_TRACE(engine, "timeslice required for second inflight context\n"); return true; } /* Otherwise, ELSP[0] is by itself, but may be waiting in the queue */ - if (!RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)) { + if (!i915_sched_engine_is_empty(engine->sched_engine)) { ENGINE_TRACE(engine, "timeslice required for queue\n"); return true; } @@ -1187,7 +1204,7 @@ static void start_timeslice(struct intel_engine_cs *engine) * its timeslice, so recheck. */ if (!timer_pending(&el->timer)) - tasklet_hi_schedule(&el->tasklet); + tasklet_hi_schedule(&engine->sched_engine->tasklet); return; } @@ -1236,6 +1253,7 @@ static bool completed(const struct i915_request *rq) static void execlists_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; + struct i915_sched_engine * const sched_engine = engine->sched_engine; struct i915_request **port = execlists->pending; struct i915_request ** const last_port = port + execlists->port_mask; struct i915_request *last, * const *active; @@ -1265,7 +1283,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * and context switches) submission. */ - spin_lock(&engine->active.lock); + spin_lock(&sched_engine->lock); /* * If the queue is higher priority than the last @@ -1287,7 +1305,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) last->fence.context, last->fence.seqno, last->sched.attr.priority, - execlists->queue_priority_hint); + sched_engine->queue_priority_hint); record_preemption(execlists); /* @@ -1313,7 +1331,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) yesno(timer_expired(&execlists->timer)), last->fence.context, last->fence.seqno, rq_prio(last), - execlists->queue_priority_hint, + sched_engine->queue_priority_hint, yesno(timeslice_yield(execlists, last))); /* @@ -1365,7 +1383,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * Even if ELSP[1] is occupied and not worthy * of timeslices, our queue might be. */ - spin_unlock(&engine->active.lock); + spin_unlock(&sched_engine->lock); return; } } @@ -1375,7 +1393,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) while ((ve = first_virtual_engine(engine))) { struct i915_request *rq; - spin_lock(&ve->base.active.lock); + spin_lock(&ve->base.sched_engine->lock); rq = ve->request; if (unlikely(!virtual_matches(ve, rq, engine))) @@ -1384,14 +1402,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine) GEM_BUG_ON(rq->engine != &ve->base); GEM_BUG_ON(rq->context != &ve->context); - if (unlikely(rq_prio(rq) < queue_prio(execlists))) { - spin_unlock(&ve->base.active.lock); + if (unlikely(rq_prio(rq) < queue_prio(sched_engine))) { + spin_unlock(&ve->base.sched_engine->lock); break; } if (last && !can_merge_rq(last, rq)) { - spin_unlock(&ve->base.active.lock); - spin_unlock(&engine->active.lock); + spin_unlock(&ve->base.sched_engine->lock); + spin_unlock(&engine->sched_engine->lock); return; /* leave this for another sibling */ } @@ -1405,7 +1423,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) yesno(engine != ve->siblings[0])); WRITE_ONCE(ve->request, NULL); - WRITE_ONCE(ve->base.execlists.queue_priority_hint, INT_MIN); + WRITE_ONCE(ve->base.sched_engine->queue_priority_hint, INT_MIN); rb = &ve->nodes[engine->id].rb; rb_erase_cached(rb, &execlists->virtual); @@ -1437,7 +1455,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) i915_request_put(rq); unlock: - spin_unlock(&ve->base.active.lock); + spin_unlock(&ve->base.sched_engine->lock); /* * Hmm, we have a bunch of virtual engine requests, @@ -1450,7 +1468,7 @@ unlock: break; } - while ((rb = rb_first_cached(&execlists->queue))) { + while ((rb = rb_first_cached(&sched_engine->queue))) { struct i915_priolist *p = to_priolist(rb); struct i915_request *rq, *rn; @@ -1529,7 +1547,7 @@ unlock: } } - rb_erase_cached(&p->node, &execlists->queue); + rb_erase_cached(&p->node, &sched_engine->queue); i915_priolist_free(p); } done: @@ -1551,8 +1569,9 @@ done: * request triggering preemption on the next dequeue (or subsequent * interrupt for secondary ports). */ - execlists->queue_priority_hint = queue_prio(execlists); - spin_unlock(&engine->active.lock); + sched_engine->queue_priority_hint = queue_prio(sched_engine); + i915_sched_engine_reset_on_empty(sched_engine); + spin_unlock(&sched_engine->lock); /* * We can skip poking the HW if we ended up with exactly the same set @@ -1655,13 +1674,24 @@ static void invalidate_csb_entries(const u64 *first, const u64 *last) * bits 44-46: reserved * bits 47-57: sw context id of the lrc the GT switched away from * bits 58-63: sw counter of the lrc the GT switched away from + * + * Xe_HP csb shuffles things around compared to TGL: + * + * bits 0-3: context switch detail (same possible values as TGL) + * bits 4-9: engine instance + * bits 10-25: sw context id of the lrc the GT switched to + * bits 26-31: sw counter of the lrc the GT switched to + * bit 32: semaphore wait mode (poll or signal), Only valid when + * switch detail is set to "wait on semaphore" + * bit 33: switched to new queue + * bits 34-41: wait detail (for switch detail 1 to 4) + * bits 42-57: sw context id of the lrc the GT switched away from + * bits 58-63: sw counter of the lrc the GT switched away from */ -static bool gen12_csb_parse(const u64 csb) +static inline bool +__gen12_csb_parse(bool ctx_to_valid, bool ctx_away_valid, bool new_queue, + u8 switch_detail) { - bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(csb)); - bool new_queue = - lower_32_bits(csb) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE; - /* * The context switch detail is not guaranteed to be 5 when a preemption * occurs, so we can't just check for that. The check below works for @@ -1670,7 +1700,7 @@ static bool gen12_csb_parse(const u64 csb) * would require some extra handling, but we don't support that. */ if (!ctx_away_valid || new_queue) { - GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(csb))); + GEM_BUG_ON(!ctx_to_valid); return true; } @@ -1679,10 +1709,26 @@ static bool gen12_csb_parse(const u64 csb) * context switch on an unsuccessful wait instruction since we always * use polling mode. */ - GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(csb))); + GEM_BUG_ON(switch_detail); return false; } +static bool xehp_csb_parse(const u64 csb) +{ + return __gen12_csb_parse(XEHP_CSB_CTX_VALID(lower_32_bits(csb)), /* cxt to */ + XEHP_CSB_CTX_VALID(upper_32_bits(csb)), /* cxt away */ + upper_32_bits(csb) & XEHP_CTX_STATUS_SWITCHED_TO_NEW_QUEUE, + GEN12_CTX_SWITCH_DETAIL(lower_32_bits(csb))); +} + +static bool gen12_csb_parse(const u64 csb) +{ + return __gen12_csb_parse(GEN12_CSB_CTX_VALID(lower_32_bits(csb)), /* cxt to */ + GEN12_CSB_CTX_VALID(upper_32_bits(csb)), /* cxt away */ + lower_32_bits(csb) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE, + GEN12_CTX_SWITCH_DETAIL(upper_32_bits(csb))); +} + static bool gen8_csb_parse(const u64 csb) { return csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED); @@ -1767,8 +1813,8 @@ process_csb(struct intel_engine_cs *engine, struct i915_request **inactive) * access. Either we are inside the tasklet, or the tasklet is disabled * and we assume that is only inside the reset paths and so serialised. */ - GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) && - !reset_in_progress(execlists)); + GEM_BUG_ON(!tasklet_is_locked(&engine->sched_engine->tasklet) && + !reset_in_progress(engine)); /* * Note that csb_write, csb_status may be either in HWSP or mmio. @@ -1847,7 +1893,9 @@ process_csb(struct intel_engine_cs *engine, struct i915_request **inactive) ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n", head, upper_32_bits(csb), lower_32_bits(csb)); - if (GRAPHICS_VER(engine->i915) >= 12) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) + promote = xehp_csb_parse(csb); + else if (GRAPHICS_VER(engine->i915) >= 12) promote = gen12_csb_parse(csb); else promote = gen8_csb_parse(csb); @@ -1979,7 +2027,8 @@ static void __execlists_hold(struct i915_request *rq) __i915_request_unsubmit(rq); clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); - list_move_tail(&rq->sched.link, &rq->engine->active.hold); + list_move_tail(&rq->sched.link, + &rq->engine->sched_engine->hold); i915_request_set_hold(rq); RQ_TRACE(rq, "on hold\n"); @@ -2016,7 +2065,7 @@ static bool execlists_hold(struct intel_engine_cs *engine, if (i915_request_on_hold(rq)) return false; - spin_lock_irq(&engine->active.lock); + spin_lock_irq(&engine->sched_engine->lock); if (__i915_request_is_complete(rq)) { /* too late! */ rq = NULL; @@ -2032,10 +2081,10 @@ static bool execlists_hold(struct intel_engine_cs *engine, GEM_BUG_ON(i915_request_on_hold(rq)); GEM_BUG_ON(rq->engine != engine); __execlists_hold(rq); - GEM_BUG_ON(list_empty(&engine->active.hold)); + GEM_BUG_ON(list_empty(&engine->sched_engine->hold)); unlock: - spin_unlock_irq(&engine->active.lock); + spin_unlock_irq(&engine->sched_engine->lock); return rq; } @@ -2079,7 +2128,7 @@ static void __execlists_unhold(struct i915_request *rq) i915_request_clear_hold(rq); list_move_tail(&rq->sched.link, - i915_sched_lookup_priolist(rq->engine, + i915_sched_lookup_priolist(rq->engine->sched_engine, rq_prio(rq))); set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); @@ -2115,7 +2164,7 @@ static void __execlists_unhold(struct i915_request *rq) static void execlists_unhold(struct intel_engine_cs *engine, struct i915_request *rq) { - spin_lock_irq(&engine->active.lock); + spin_lock_irq(&engine->sched_engine->lock); /* * Move this request back to the priority queue, and all of its @@ -2123,12 +2172,12 @@ static void execlists_unhold(struct intel_engine_cs *engine, */ __execlists_unhold(rq); - if (rq_prio(rq) > engine->execlists.queue_priority_hint) { - engine->execlists.queue_priority_hint = rq_prio(rq); - tasklet_hi_schedule(&engine->execlists.tasklet); + if (rq_prio(rq) > engine->sched_engine->queue_priority_hint) { + engine->sched_engine->queue_priority_hint = rq_prio(rq); + tasklet_hi_schedule(&engine->sched_engine->tasklet); } - spin_unlock_irq(&engine->active.lock); + spin_unlock_irq(&engine->sched_engine->lock); } struct execlists_capture { @@ -2258,13 +2307,13 @@ static void execlists_capture(struct intel_engine_cs *engine) if (!cap) return; - spin_lock_irq(&engine->active.lock); + spin_lock_irq(&engine->sched_engine->lock); cap->rq = active_context(engine, active_ccid(engine)); if (cap->rq) { cap->rq = active_request(cap->rq->context->timeline, cap->rq); cap->rq = i915_request_get_rcu(cap->rq); } - spin_unlock_irq(&engine->active.lock); + spin_unlock_irq(&engine->sched_engine->lock); if (!cap->rq) goto err_free; @@ -2316,13 +2365,13 @@ static void execlists_reset(struct intel_engine_cs *engine, const char *msg) ENGINE_TRACE(engine, "reset for %s\n", msg); /* Mark this tasklet as disabled to avoid waiting for it to complete */ - tasklet_disable_nosync(&engine->execlists.tasklet); + tasklet_disable_nosync(&engine->sched_engine->tasklet); ring_set_paused(engine, 1); /* Freeze the current request in place */ execlists_capture(engine); intel_engine_reset(engine, msg); - tasklet_enable(&engine->execlists.tasklet); + tasklet_enable(&engine->sched_engine->tasklet); clear_and_wake_up_bit(bit, lock); } @@ -2345,8 +2394,9 @@ static bool preempt_timeout(const struct intel_engine_cs *const engine) */ static void execlists_submission_tasklet(struct tasklet_struct *t) { - struct intel_engine_cs * const engine = - from_tasklet(engine, t, execlists.tasklet); + struct i915_sched_engine *sched_engine = + from_tasklet(sched_engine, t, tasklet); + struct intel_engine_cs * const engine = sched_engine->private_data; struct i915_request *post[2 * EXECLIST_MAX_PORTS]; struct i915_request **inactive; @@ -2421,13 +2471,16 @@ static void execlists_irq_handler(struct intel_engine_cs *engine, u16 iir) intel_engine_signal_breadcrumbs(engine); if (tasklet) - tasklet_hi_schedule(&engine->execlists.tasklet); + tasklet_hi_schedule(&engine->sched_engine->tasklet); } static void __execlists_kick(struct intel_engine_execlists *execlists) { + struct intel_engine_cs *engine = + container_of(execlists, typeof(*engine), execlists); + /* Kick the tasklet for some interrupt coalescing and reset handling */ - tasklet_hi_schedule(&execlists->tasklet); + tasklet_hi_schedule(&engine->sched_engine->tasklet); } #define execlists_kick(t, member) \ @@ -2448,19 +2501,20 @@ static void queue_request(struct intel_engine_cs *engine, { GEM_BUG_ON(!list_empty(&rq->sched.link)); list_add_tail(&rq->sched.link, - i915_sched_lookup_priolist(engine, rq_prio(rq))); + i915_sched_lookup_priolist(engine->sched_engine, + rq_prio(rq))); set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); } static bool submit_queue(struct intel_engine_cs *engine, const struct i915_request *rq) { - struct intel_engine_execlists *execlists = &engine->execlists; + struct i915_sched_engine *sched_engine = engine->sched_engine; - if (rq_prio(rq) <= execlists->queue_priority_hint) + if (rq_prio(rq) <= sched_engine->queue_priority_hint) return false; - execlists->queue_priority_hint = rq_prio(rq); + sched_engine->queue_priority_hint = rq_prio(rq); return true; } @@ -2468,7 +2522,7 @@ static bool ancestor_on_hold(const struct intel_engine_cs *engine, const struct i915_request *rq) { GEM_BUG_ON(i915_request_on_hold(rq)); - return !list_empty(&engine->active.hold) && hold_request(rq); + return !list_empty(&engine->sched_engine->hold) && hold_request(rq); } static void execlists_submit_request(struct i915_request *request) @@ -2477,23 +2531,24 @@ static void execlists_submit_request(struct i915_request *request) unsigned long flags; /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->active.lock, flags); + spin_lock_irqsave(&engine->sched_engine->lock, flags); if (unlikely(ancestor_on_hold(engine, request))) { RQ_TRACE(request, "ancestor on hold\n"); - list_add_tail(&request->sched.link, &engine->active.hold); + list_add_tail(&request->sched.link, + &engine->sched_engine->hold); i915_request_set_hold(request); } else { queue_request(engine, request); - GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); + GEM_BUG_ON(i915_sched_engine_is_empty(engine->sched_engine)); GEM_BUG_ON(list_empty(&request->sched.link)); if (submit_queue(engine, request)) __execlists_kick(&engine->execlists); } - spin_unlock_irqrestore(&engine->active.lock, flags); + spin_unlock_irqrestore(&engine->sched_engine->lock, flags); } static int @@ -2533,11 +2588,26 @@ static int execlists_context_alloc(struct intel_context *ce) return lrc_alloc(ce, ce->engine); } +static void execlists_context_cancel_request(struct intel_context *ce, + struct i915_request *rq) +{ + struct intel_engine_cs *engine = NULL; + + i915_request_active_engine(rq, &engine); + + if (engine && intel_engine_pulse(engine)) + intel_gt_handle_error(engine->gt, engine->mask, 0, + "request cancellation by %s", + current->comm); +} + static const struct intel_context_ops execlists_context_ops = { .flags = COPS_HAS_INFLIGHT, .alloc = execlists_context_alloc, + .cancel_request = execlists_context_cancel_request, + .pre_pin = execlists_context_pre_pin, .pin = execlists_context_pin, .unpin = lrc_unpin, @@ -2548,6 +2618,8 @@ static const struct intel_context_ops execlists_context_ops = { .reset = lrc_reset, .destroy = lrc_destroy, + + .create_virtual = execlists_create_virtual, }; static int emit_pdps(struct i915_request *rq) @@ -2800,10 +2872,8 @@ static int execlists_resume(struct intel_engine_cs *engine) static void execlists_reset_prepare(struct intel_engine_cs *engine) { - struct intel_engine_execlists * const execlists = &engine->execlists; - ENGINE_TRACE(engine, "depth<-%d\n", - atomic_read(&execlists->tasklet.count)); + atomic_read(&engine->sched_engine->tasklet.count)); /* * Prevent request submission to the hardware until we have @@ -2814,8 +2884,8 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine) * Turning off the execlists->tasklet until the reset is over * prevents the race. */ - __tasklet_disable_sync_once(&execlists->tasklet); - GEM_BUG_ON(!reset_in_progress(execlists)); + __tasklet_disable_sync_once(&engine->sched_engine->tasklet); + GEM_BUG_ON(!reset_in_progress(engine)); /* * We stop engines, otherwise we might get failed reset and a @@ -2957,24 +3027,26 @@ static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled) /* Push back any incomplete requests for replay after the reset. */ rcu_read_lock(); - spin_lock_irqsave(&engine->active.lock, flags); + spin_lock_irqsave(&engine->sched_engine->lock, flags); __unwind_incomplete_requests(engine); - spin_unlock_irqrestore(&engine->active.lock, flags); + spin_unlock_irqrestore(&engine->sched_engine->lock, flags); rcu_read_unlock(); } static void nop_submission_tasklet(struct tasklet_struct *t) { - struct intel_engine_cs * const engine = - from_tasklet(engine, t, execlists.tasklet); + struct i915_sched_engine *sched_engine = + from_tasklet(sched_engine, t, tasklet); + struct intel_engine_cs * const engine = sched_engine->private_data; /* The driver is wedged; don't process any more events. */ - WRITE_ONCE(engine->execlists.queue_priority_hint, INT_MIN); + WRITE_ONCE(engine->sched_engine->queue_priority_hint, INT_MIN); } static void execlists_reset_cancel(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; + struct i915_sched_engine * const sched_engine = engine->sched_engine; struct i915_request *rq, *rn; struct rb_node *rb; unsigned long flags; @@ -2998,15 +3070,15 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) execlists_reset_csb(engine, true); rcu_read_lock(); - spin_lock_irqsave(&engine->active.lock, flags); + spin_lock_irqsave(&engine->sched_engine->lock, flags); /* Mark all executing requests as skipped. */ - list_for_each_entry(rq, &engine->active.requests, sched.link) + list_for_each_entry(rq, &engine->sched_engine->requests, sched.link) i915_request_put(i915_request_mark_eio(rq)); intel_engine_signal_breadcrumbs(engine); /* Flush the queued requests to the timeline list (for retiring). */ - while ((rb = rb_first_cached(&execlists->queue))) { + while ((rb = rb_first_cached(&sched_engine->queue))) { struct i915_priolist *p = to_priolist(rb); priolist_for_each_request_consume(rq, rn, p) { @@ -3016,12 +3088,12 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) } } - rb_erase_cached(&p->node, &execlists->queue); + rb_erase_cached(&p->node, &sched_engine->queue); i915_priolist_free(p); } /* On-hold requests will be flushed to timeline upon their release */ - list_for_each_entry(rq, &engine->active.hold, sched.link) + list_for_each_entry(rq, &sched_engine->hold, sched.link) i915_request_put(i915_request_mark_eio(rq)); /* Cancel all attached virtual engines */ @@ -3032,7 +3104,7 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) rb_erase_cached(rb, &execlists->virtual); RB_CLEAR_NODE(rb); - spin_lock(&ve->base.active.lock); + spin_lock(&ve->base.sched_engine->lock); rq = fetch_and_zero(&ve->request); if (rq) { if (i915_request_mark_eio(rq)) { @@ -3042,20 +3114,20 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) } i915_request_put(rq); - ve->base.execlists.queue_priority_hint = INT_MIN; + ve->base.sched_engine->queue_priority_hint = INT_MIN; } - spin_unlock(&ve->base.active.lock); + spin_unlock(&ve->base.sched_engine->lock); } /* Remaining _unready_ requests will be nop'ed when submitted */ - execlists->queue_priority_hint = INT_MIN; - execlists->queue = RB_ROOT_CACHED; + sched_engine->queue_priority_hint = INT_MIN; + sched_engine->queue = RB_ROOT_CACHED; - GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet)); - execlists->tasklet.callback = nop_submission_tasklet; + GEM_BUG_ON(__tasklet_is_enabled(&engine->sched_engine->tasklet)); + engine->sched_engine->tasklet.callback = nop_submission_tasklet; - spin_unlock_irqrestore(&engine->active.lock, flags); + spin_unlock_irqrestore(&engine->sched_engine->lock, flags); rcu_read_unlock(); } @@ -3073,14 +3145,14 @@ static void execlists_reset_finish(struct intel_engine_cs *engine) * reset as the next level of recovery, and as a final resort we * will declare the device wedged. */ - GEM_BUG_ON(!reset_in_progress(execlists)); + GEM_BUG_ON(!reset_in_progress(engine)); /* And kick in case we missed a new request submission. */ - if (__tasklet_enable(&execlists->tasklet)) + if (__tasklet_enable(&engine->sched_engine->tasklet)) __execlists_kick(execlists); ENGINE_TRACE(engine, "depth->%d\n", - atomic_read(&execlists->tasklet.count)); + atomic_read(&engine->sched_engine->tasklet.count)); } static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine) @@ -3101,6 +3173,42 @@ static void execlists_park(struct intel_engine_cs *engine) cancel_timer(&engine->execlists.preempt); } +static void add_to_engine(struct i915_request *rq) +{ + lockdep_assert_held(&rq->engine->sched_engine->lock); + list_move_tail(&rq->sched.link, &rq->engine->sched_engine->requests); +} + +static void remove_from_engine(struct i915_request *rq) +{ + struct intel_engine_cs *engine, *locked; + + /* + * Virtual engines complicate acquiring the engine timeline lock, + * as their rq->engine pointer is not stable until under that + * engine lock. The simple ploy we use is to take the lock then + * check that the rq still belongs to the newly locked engine. + */ + locked = READ_ONCE(rq->engine); + spin_lock_irq(&locked->sched_engine->lock); + while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { + spin_unlock(&locked->sched_engine->lock); + spin_lock(&engine->sched_engine->lock); + locked = engine; + } + list_del_init(&rq->sched.link); + + clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags); + + /* Prevent further __await_execution() registering a cb, then flush */ + set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); + + spin_unlock_irq(&locked->sched_engine->lock); + + i915_request_notify_execute_cb_imm(rq); +} + static bool can_preempt(struct intel_engine_cs *engine) { if (GRAPHICS_VER(engine->i915) > 8) @@ -3110,11 +3218,62 @@ static bool can_preempt(struct intel_engine_cs *engine) return engine->class != RENDER_CLASS; } +static void kick_execlists(const struct i915_request *rq, int prio) +{ + struct intel_engine_cs *engine = rq->engine; + struct i915_sched_engine *sched_engine = engine->sched_engine; + const struct i915_request *inflight; + + /* + * We only need to kick the tasklet once for the high priority + * new context we add into the queue. + */ + if (prio <= sched_engine->queue_priority_hint) + return; + + rcu_read_lock(); + + /* Nothing currently active? We're overdue for a submission! */ + inflight = execlists_active(&engine->execlists); + if (!inflight) + goto unlock; + + /* + * If we are already the currently executing context, don't + * bother evaluating if we should preempt ourselves. + */ + if (inflight->context == rq->context) + goto unlock; + + ENGINE_TRACE(engine, + "bumping queue-priority-hint:%d for rq:%llx:%lld, inflight:%llx:%lld prio %d\n", + prio, + rq->fence.context, rq->fence.seqno, + inflight->fence.context, inflight->fence.seqno, + inflight->sched.attr.priority); + + sched_engine->queue_priority_hint = prio; + + /* + * Allow preemption of low -> normal -> high, but we do + * not allow low priority tasks to preempt other low priority + * tasks under the impression that latency for low priority + * tasks does not matter (as much as background throughput), + * so kiss. + */ + if (prio >= max(I915_PRIORITY_NORMAL, rq_prio(inflight))) + tasklet_hi_schedule(&sched_engine->tasklet); + +unlock: + rcu_read_unlock(); +} + static void execlists_set_default_submission(struct intel_engine_cs *engine) { engine->submit_request = execlists_submit_request; - engine->schedule = i915_schedule; - engine->execlists.tasklet.callback = execlists_submission_tasklet; + engine->sched_engine->schedule = i915_schedule; + engine->sched_engine->kick_backend = kick_execlists; + engine->sched_engine->tasklet.callback = execlists_submission_tasklet; } static void execlists_shutdown(struct intel_engine_cs *engine) @@ -3122,7 +3281,7 @@ static void execlists_shutdown(struct intel_engine_cs *engine) /* Synchronise with residual timers and any softirq they raise */ del_timer_sync(&engine->execlists.timer); del_timer_sync(&engine->execlists.preempt); - tasklet_kill(&engine->execlists.tasklet); + tasklet_kill(&engine->sched_engine->tasklet); } static void execlists_release(struct intel_engine_cs *engine) @@ -3144,6 +3303,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) engine->cops = &execlists_context_ops; engine->request_alloc = execlists_request_alloc; + engine->add_active_request = add_to_engine; + engine->remove_active_request = remove_from_engine; engine->reset.prepare = execlists_reset_prepare; engine->reset.rewind = execlists_reset_rewind; @@ -3238,7 +3399,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) struct intel_uncore *uncore = engine->uncore; u32 base = engine->mmio_base; - tasklet_setup(&engine->execlists.tasklet, execlists_submission_tasklet); + tasklet_setup(&engine->sched_engine->tasklet, execlists_submission_tasklet); timer_setup(&engine->execlists.timer, execlists_timeslice, 0); timer_setup(&engine->execlists.preempt, execlists_preempt, 0); @@ -3255,6 +3416,10 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base)); execlists->ctrl_reg = uncore->regs + i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base)); + + engine->fw_domain = intel_uncore_forcewake_for_reg(engine->uncore, + RING_EXECLIST_CONTROL(engine->mmio_base), + FW_REG_WRITE); } else { execlists->submit_reg = uncore->regs + i915_mmio_reg_offset(RING_ELSP(base)); @@ -3272,7 +3437,8 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) execlists->csb_size = GEN11_CSB_ENTRIES; engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0); - if (GRAPHICS_VER(engine->i915) >= 11) { + if (GRAPHICS_VER(engine->i915) >= 11 && + GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 50)) { execlists->ccid |= engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT - 32); execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT - 32); } @@ -3286,7 +3452,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) static struct list_head *virtual_queue(struct virtual_engine *ve) { - return &ve->base.execlists.default_priolist.requests; + return &ve->base.sched_engine->default_priolist.requests; } static void rcu_virtual_context_destroy(struct work_struct *wrk) @@ -3301,7 +3467,7 @@ static void rcu_virtual_context_destroy(struct work_struct *wrk) if (unlikely(ve->request)) { struct i915_request *old; - spin_lock_irq(&ve->base.active.lock); + spin_lock_irq(&ve->base.sched_engine->lock); old = fetch_and_zero(&ve->request); if (old) { @@ -3310,7 +3476,7 @@ static void rcu_virtual_context_destroy(struct work_struct *wrk) i915_request_put(old); } - spin_unlock_irq(&ve->base.active.lock); + spin_unlock_irq(&ve->base.sched_engine->lock); } /* @@ -3320,7 +3486,7 @@ static void rcu_virtual_context_destroy(struct work_struct *wrk) * rbtrees as in the case it is running in parallel, it may reinsert * the rb_node into a sibling. */ - tasklet_kill(&ve->base.execlists.tasklet); + tasklet_kill(&ve->base.sched_engine->tasklet); /* Decouple ourselves from the siblings, no more access allowed. */ for (n = 0; n < ve->num_siblings; n++) { @@ -3330,24 +3496,26 @@ static void rcu_virtual_context_destroy(struct work_struct *wrk) if (RB_EMPTY_NODE(node)) continue; - spin_lock_irq(&sibling->active.lock); + spin_lock_irq(&sibling->sched_engine->lock); - /* Detachment is lazily performed in the execlists tasklet */ + /* Detachment is lazily performed in the sched_engine->tasklet */ if (!RB_EMPTY_NODE(node)) rb_erase_cached(node, &sibling->execlists.virtual); - spin_unlock_irq(&sibling->active.lock); + spin_unlock_irq(&sibling->sched_engine->lock); } - GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet)); + GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.sched_engine->tasklet)); GEM_BUG_ON(!list_empty(virtual_queue(ve))); lrc_fini(&ve->context); intel_context_fini(&ve->context); - intel_breadcrumbs_free(ve->base.breadcrumbs); + if (ve->base.breadcrumbs) + intel_breadcrumbs_put(ve->base.breadcrumbs); + if (ve->base.sched_engine) + i915_sched_engine_put(ve->base.sched_engine); intel_engine_free_request_pool(&ve->base); - kfree(ve->bonds); kfree(ve); } @@ -3440,11 +3608,24 @@ static void virtual_context_exit(struct intel_context *ce) intel_engine_pm_put(ve->siblings[n]); } +static struct intel_engine_cs * +virtual_get_sibling(struct intel_engine_cs *engine, unsigned int sibling) +{ + struct virtual_engine *ve = to_virtual_engine(engine); + + if (sibling >= ve->num_siblings) + return NULL; + + return ve->siblings[sibling]; +} + static const struct intel_context_ops virtual_context_ops = { .flags = COPS_HAS_INFLIGHT, .alloc = virtual_context_alloc, + .cancel_request = execlists_context_cancel_request, + .pre_pin = virtual_context_pre_pin, .pin = virtual_context_pin, .unpin = lrc_unpin, @@ -3454,6 +3635,8 @@ static const struct intel_context_ops virtual_context_ops = { .exit = virtual_context_exit, .destroy = virtual_context_destroy, + + .get_sibling = virtual_get_sibling, }; static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve) @@ -3475,16 +3658,18 @@ static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve) ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n", rq->fence.context, rq->fence.seqno, - mask, ve->base.execlists.queue_priority_hint); + mask, ve->base.sched_engine->queue_priority_hint); return mask; } static void virtual_submission_tasklet(struct tasklet_struct *t) { + struct i915_sched_engine *sched_engine = + from_tasklet(sched_engine, t, tasklet); struct virtual_engine * const ve = - from_tasklet(ve, t, base.execlists.tasklet); - const int prio = READ_ONCE(ve->base.execlists.queue_priority_hint); + (struct virtual_engine *)sched_engine->private_data; + const int prio = READ_ONCE(sched_engine->queue_priority_hint); intel_engine_mask_t mask; unsigned int n; @@ -3503,7 +3688,7 @@ static void virtual_submission_tasklet(struct tasklet_struct *t) if (!READ_ONCE(ve->request)) break; /* already handled by a sibling's tasklet */ - spin_lock_irq(&sibling->active.lock); + spin_lock_irq(&sibling->sched_engine->lock); if (unlikely(!(mask & sibling->mask))) { if (!RB_EMPTY_NODE(&node->rb)) { @@ -3552,11 +3737,11 @@ static void virtual_submission_tasklet(struct tasklet_struct *t) submit_engine: GEM_BUG_ON(RB_EMPTY_NODE(&node->rb)); node->prio = prio; - if (first && prio > sibling->execlists.queue_priority_hint) - tasklet_hi_schedule(&sibling->execlists.tasklet); + if (first && prio > sibling->sched_engine->queue_priority_hint) + tasklet_hi_schedule(&sibling->sched_engine->tasklet); unlock_engine: - spin_unlock_irq(&sibling->active.lock); + spin_unlock_irq(&sibling->sched_engine->lock); if (intel_context_inflight(&ve->context)) break; @@ -3574,7 +3759,7 @@ static void virtual_submit_request(struct i915_request *rq) GEM_BUG_ON(ve->base.submit_request != virtual_submit_request); - spin_lock_irqsave(&ve->base.active.lock, flags); + spin_lock_irqsave(&ve->base.sched_engine->lock, flags); /* By the time we resubmit a request, it may be completed */ if (__i915_request_is_complete(rq)) { @@ -3588,68 +3773,25 @@ static void virtual_submit_request(struct i915_request *rq) i915_request_put(ve->request); } - ve->base.execlists.queue_priority_hint = rq_prio(rq); + ve->base.sched_engine->queue_priority_hint = rq_prio(rq); ve->request = i915_request_get(rq); GEM_BUG_ON(!list_empty(virtual_queue(ve))); list_move_tail(&rq->sched.link, virtual_queue(ve)); - tasklet_hi_schedule(&ve->base.execlists.tasklet); + tasklet_hi_schedule(&ve->base.sched_engine->tasklet); unlock: - spin_unlock_irqrestore(&ve->base.active.lock, flags); + spin_unlock_irqrestore(&ve->base.sched_engine->lock, flags); } -static struct ve_bond * -virtual_find_bond(struct virtual_engine *ve, - const struct intel_engine_cs *master) -{ - int i; - - for (i = 0; i < ve->num_bonds; i++) { - if (ve->bonds[i].master == master) - return &ve->bonds[i]; - } - - return NULL; -} - -static void -virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal) -{ - struct virtual_engine *ve = to_virtual_engine(rq->engine); - intel_engine_mask_t allowed, exec; - struct ve_bond *bond; - - allowed = ~to_request(signal)->engine->mask; - - bond = virtual_find_bond(ve, to_request(signal)->engine); - if (bond) - allowed &= bond->sibling_mask; - - /* Restrict the bonded request to run on only the available engines */ - exec = READ_ONCE(rq->execution_mask); - while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed)) - ; - - /* Prevent the master from being re-run on the bonded engines */ - to_request(signal)->execution_mask &= ~allowed; -} - -struct intel_context * -intel_execlists_create_virtual(struct intel_engine_cs **siblings, - unsigned int count) +static struct intel_context * +execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count) { struct virtual_engine *ve; unsigned int n; int err; - if (count == 0) - return ERR_PTR(-EINVAL); - - if (count == 1) - return intel_context_create(siblings[0]); - ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL); if (!ve) return ERR_PTR(-ENOMEM); @@ -3681,19 +3823,24 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings, snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); - intel_engine_init_active(&ve->base, ENGINE_VIRTUAL); intel_engine_init_execlists(&ve->base); + ve->base.sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL); + if (!ve->base.sched_engine) { + err = -ENOMEM; + goto err_put; + } + ve->base.sched_engine->private_data = &ve->base; + ve->base.cops = &virtual_context_ops; ve->base.request_alloc = execlists_request_alloc; - ve->base.schedule = i915_schedule; + ve->base.sched_engine->schedule = i915_schedule; + ve->base.sched_engine->kick_backend = kick_execlists; ve->base.submit_request = virtual_submit_request; - ve->base.bond_execute = virtual_bond_execute; INIT_LIST_HEAD(virtual_queue(ve)); - ve->base.execlists.queue_priority_hint = INT_MIN; - tasklet_setup(&ve->base.execlists.tasklet, virtual_submission_tasklet); + tasklet_setup(&ve->base.sched_engine->tasklet, virtual_submission_tasklet); intel_context_init(&ve->context, &ve->base); @@ -3721,7 +3868,7 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings, * layering if we handle cloning of the requests and * submitting a copy into each backend. */ - if (sibling->execlists.tasklet.callback != + if (sibling->sched_engine->tasklet.callback != execlists_submission_tasklet) { err = -ENODEV; goto err_put; @@ -3756,6 +3903,8 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings, "v%dx%d", ve->base.class, count); ve->base.context_size = sibling->context_size; + ve->base.add_active_request = sibling->add_active_request; + ve->base.remove_active_request = sibling->remove_active_request; ve->base.emit_bb_start = sibling->emit_bb_start; ve->base.emit_flush = sibling->emit_flush; ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb; @@ -3776,70 +3925,6 @@ err_put: return ERR_PTR(err); } -struct intel_context * -intel_execlists_clone_virtual(struct intel_engine_cs *src) -{ - struct virtual_engine *se = to_virtual_engine(src); - struct intel_context *dst; - - dst = intel_execlists_create_virtual(se->siblings, - se->num_siblings); - if (IS_ERR(dst)) - return dst; - - if (se->num_bonds) { - struct virtual_engine *de = to_virtual_engine(dst->engine); - - de->bonds = kmemdup(se->bonds, - sizeof(*se->bonds) * se->num_bonds, - GFP_KERNEL); - if (!de->bonds) { - intel_context_put(dst); - return ERR_PTR(-ENOMEM); - } - - de->num_bonds = se->num_bonds; - } - - return dst; -} - -int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine, - const struct intel_engine_cs *master, - const struct intel_engine_cs *sibling) -{ - struct virtual_engine *ve = to_virtual_engine(engine); - struct ve_bond *bond; - int n; - - /* Sanity check the sibling is part of the virtual engine */ - for (n = 0; n < ve->num_siblings; n++) - if (sibling == ve->siblings[n]) - break; - if (n == ve->num_siblings) - return -EINVAL; - - bond = virtual_find_bond(ve, master); - if (bond) { - bond->sibling_mask |= sibling->mask; - return 0; - } - - bond = krealloc(ve->bonds, - sizeof(*bond) * (ve->num_bonds + 1), - GFP_KERNEL); - if (!bond) - return -ENOMEM; - - bond[ve->num_bonds].master = master; - bond[ve->num_bonds].sibling_mask = sibling->mask; - - ve->bonds = bond; - ve->num_bonds++; - - return 0; -} - void intel_execlists_show_requests(struct intel_engine_cs *engine, struct drm_printer *m, void (*show_request)(struct drm_printer *m, @@ -3849,16 +3934,17 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, unsigned int max) { const struct intel_engine_execlists *execlists = &engine->execlists; + struct i915_sched_engine *sched_engine = engine->sched_engine; struct i915_request *rq, *last; unsigned long flags; unsigned int count; struct rb_node *rb; - spin_lock_irqsave(&engine->active.lock, flags); + spin_lock_irqsave(&sched_engine->lock, flags); last = NULL; count = 0; - list_for_each_entry(rq, &engine->active.requests, sched.link) { + list_for_each_entry(rq, &sched_engine->requests, sched.link) { if (count++ < max - 1) show_request(m, rq, "\t\t", 0); else @@ -3873,13 +3959,13 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, show_request(m, last, "\t\t", 0); } - if (execlists->queue_priority_hint != INT_MIN) + if (sched_engine->queue_priority_hint != INT_MIN) drm_printf(m, "\t\tQueue priority hint: %d\n", - READ_ONCE(execlists->queue_priority_hint)); + READ_ONCE(sched_engine->queue_priority_hint)); last = NULL; count = 0; - for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { + for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) { struct i915_priolist *p = rb_entry(rb, typeof(*p), node); priolist_for_each_request(rq, p) { @@ -3921,7 +4007,7 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, show_request(m, last, "\t\t", 0); } - spin_unlock_irqrestore(&engine->active.lock, flags); + spin_unlock_irqrestore(&sched_engine->lock, flags); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.h b/drivers/gpu/drm/i915/gt/intel_execlists_submission.h index 4ca9b475e252..a1aa92c983a5 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.h +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.h @@ -32,15 +32,7 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, int indent), unsigned int max); -struct intel_context * -intel_execlists_create_virtual(struct intel_engine_cs **siblings, - unsigned int count); - -struct intel_context * -intel_execlists_clone_virtual(struct intel_engine_cs *src); - -int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine, - const struct intel_engine_cs *master, - const struct intel_engine_cs *sibling); +bool +intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine); #endif /* __INTEL_EXECLISTS_SUBMISSION_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 20e46b843324..de3ac58fceec 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -826,13 +826,13 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2; /* - * On BXT+/CNL+ writes larger than 64 bit to the GTT pagetable range + * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range * will be dropped. For WC mappings in general we have 64 byte burst * writes when the WC buffer is flushed, so we can't use it, but have to * resort to an uncached mapping. The WC issue is easily caught by the * readback check when writing GTT PTE entries. */ - if (IS_GEN9_LP(i915) || GRAPHICS_VER(i915) >= 10) + if (IS_GEN9_LP(i915) || GRAPHICS_VER(i915) >= 11) ggtt->gsm = ioremap(phys_addr, size); else ggtt->gsm = ioremap_wc(phys_addr, size); @@ -1494,7 +1494,7 @@ intel_partial_pages(const struct i915_ggtt_view *view, if (ret) goto err_sg_alloc; - iter = i915_gem_object_get_sg_dma(obj, view->partial.offset, &offset, true); + iter = i915_gem_object_get_sg_dma(obj, view->partial.offset, &offset); GEM_BUG_ON(!iter); sg = st->sgl; diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index 2694dbb9967e..1c3af0fc0456 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -123,8 +123,10 @@ #define MI_SEMAPHORE_SAD_NEQ_SDD (5 << 12) #define MI_SEMAPHORE_TOKEN_MASK REG_GENMASK(9, 5) #define MI_SEMAPHORE_TOKEN_SHIFT 5 +#define MI_STORE_DATA_IMM MI_INSTR(0x20, 0) #define MI_STORE_DWORD_IMM MI_INSTR(0x20, 1) #define MI_STORE_DWORD_IMM_GEN4 MI_INSTR(0x20, 2) +#define MI_STORE_QWORD_IMM_GEN8 (MI_INSTR(0x20, 3) | REG_BIT(21)) #define MI_MEM_VIRTUAL (1 << 22) /* 945,g33,965 */ #define MI_USE_GGTT (1 << 22) /* g4x+ */ #define MI_STORE_DWORD_INDEX MI_INSTR(0x21, 1) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 2161bf01ef8b..62d40c986642 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -13,6 +13,7 @@ #include "intel_gt_clock_utils.h" #include "intel_gt_pm.h" #include "intel_gt_requests.h" +#include "intel_migrate.h" #include "intel_mocs.h" #include "intel_rc6.h" #include "intel_renderstate.h" @@ -40,8 +41,8 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) intel_gt_init_timelines(gt); intel_gt_pm_init_early(gt); - intel_rps_init_early(>->rps); intel_uc_init_early(>->uc); + intel_rps_init_early(>->rps); } int intel_gt_probe_lmem(struct intel_gt *gt) @@ -83,13 +84,73 @@ void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt) gt->ggtt = ggtt; } +static const struct intel_mmio_range icl_l3bank_steering_table[] = { + { 0x00B100, 0x00B3FF }, + {}, +}; + +static const struct intel_mmio_range xehpsdv_mslice_steering_table[] = { + { 0x004000, 0x004AFF }, + { 0x00C800, 0x00CFFF }, + { 0x00DD00, 0x00DDFF }, + { 0x00E900, 0x00FFFF }, /* 0xEA00 - OxEFFF is unused */ + {}, +}; + +static const struct intel_mmio_range xehpsdv_lncf_steering_table[] = { + { 0x00B000, 0x00B0FF }, + { 0x00D800, 0x00D8FF }, + {}, +}; + +static const struct intel_mmio_range dg2_lncf_steering_table[] = { + { 0x00B000, 0x00B0FF }, + { 0x00D880, 0x00D8FF }, + {}, +}; + +static u16 slicemask(struct intel_gt *gt, int count) +{ + u64 dss_mask = intel_sseu_get_subslices(>->info.sseu, 0); + + return intel_slicemask_from_dssmask(dss_mask, count); +} + int intel_gt_init_mmio(struct intel_gt *gt) { + struct drm_i915_private *i915 = gt->i915; + intel_gt_init_clock_frequency(gt); intel_uc_init_mmio(>->uc); intel_sseu_info_init(gt); + /* + * An mslice is unavailable only if both the meml3 for the slice is + * disabled *and* all of the DSS in the slice (quadrant) are disabled. + */ + if (HAS_MSLICES(i915)) + gt->info.mslice_mask = + slicemask(gt, GEN_DSS_PER_MSLICE) | + (intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3) & + GEN12_MEML3_EN_MASK); + + if (IS_DG2(i915)) { + gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table; + gt->steering_table[LNCF] = dg2_lncf_steering_table; + } else if (IS_XEHPSDV(i915)) { + gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table; + gt->steering_table[LNCF] = xehpsdv_lncf_steering_table; + } else if (GRAPHICS_VER(i915) >= 11 && + GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) { + gt->steering_table[L3BANK] = icl_l3bank_steering_table; + gt->info.l3bank_mask = + ~intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3) & + GEN10_L3BANK_MASK; + } else if (HAS_MSLICES(i915)) { + MISSING_CASE(INTEL_INFO(i915)->platform); + } + return intel_engines_init_mmio(gt); } @@ -192,7 +253,7 @@ static void clear_register(struct intel_uncore *uncore, i915_reg_t reg) intel_uncore_rmw(uncore, reg, 0, 0); } -static void gen8_clear_engine_error_register(struct intel_engine_cs *engine) +static void gen6_clear_engine_error_register(struct intel_engine_cs *engine) { GEN6_RING_FAULT_REG_RMW(engine, RING_FAULT_VALID, 0); GEN6_RING_FAULT_REG_POSTING_READ(engine); @@ -238,7 +299,7 @@ intel_gt_clear_error_registers(struct intel_gt *gt, enum intel_engine_id id; for_each_engine_masked(engine, gt, engine_mask, id) - gen8_clear_engine_error_register(engine); + gen6_clear_engine_error_register(engine); } } @@ -572,6 +633,25 @@ static void __intel_gt_disable(struct intel_gt *gt) GEM_BUG_ON(intel_gt_pm_is_awake(gt)); } +int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout) +{ + long remaining_timeout; + + /* If the device is asleep, we have no requests outstanding */ + if (!intel_gt_pm_is_awake(gt)) + return 0; + + while ((timeout = intel_gt_retire_requests_timeout(gt, timeout, + &remaining_timeout)) > 0) { + cond_resched(); + if (signal_pending(current)) + return -EINTR; + } + + return timeout ? timeout : intel_uc_wait_for_idle(>->uc, + remaining_timeout); +} + int intel_gt_init(struct intel_gt *gt) { int err; @@ -622,10 +702,14 @@ int intel_gt_init(struct intel_gt *gt) if (err) goto err_gt; + intel_uc_init_late(>->uc); + err = i915_inject_probe_error(gt->i915, -EIO); if (err) goto err_gt; + intel_migrate_init(>->migrate, gt); + goto out_fw; err_gt: __intel_gt_disable(gt); @@ -649,6 +733,7 @@ void intel_gt_driver_remove(struct intel_gt *gt) { __intel_gt_disable(gt); + intel_migrate_fini(>->migrate); intel_uc_driver_remove(>->uc); intel_engines_release(gt); @@ -697,6 +782,112 @@ void intel_gt_driver_late_release(struct intel_gt *gt) intel_engines_free(gt); } +/** + * intel_gt_reg_needs_read_steering - determine whether a register read + * requires explicit steering + * @gt: GT structure + * @reg: the register to check steering requirements for + * @type: type of multicast steering to check + * + * Determines whether @reg needs explicit steering of a specific type for + * reads. + * + * Returns false if @reg does not belong to a register range of the given + * steering type, or if the default (subslice-based) steering IDs are suitable + * for @type steering too. + */ +static bool intel_gt_reg_needs_read_steering(struct intel_gt *gt, + i915_reg_t reg, + enum intel_steering_type type) +{ + const u32 offset = i915_mmio_reg_offset(reg); + const struct intel_mmio_range *entry; + + if (likely(!intel_gt_needs_read_steering(gt, type))) + return false; + + for (entry = gt->steering_table[type]; entry->end; entry++) { + if (offset >= entry->start && offset <= entry->end) + return true; + } + + return false; +} + +/** + * intel_gt_get_valid_steering - determines valid IDs for a class of MCR steering + * @gt: GT structure + * @type: multicast register type + * @sliceid: Slice ID returned + * @subsliceid: Subslice ID returned + * + * Determines sliceid and subsliceid values that will steer reads + * of a specific multicast register class to a valid value. + */ +static void intel_gt_get_valid_steering(struct intel_gt *gt, + enum intel_steering_type type, + u8 *sliceid, u8 *subsliceid) +{ + switch (type) { + case L3BANK: + GEM_DEBUG_WARN_ON(!gt->info.l3bank_mask); /* should be impossible! */ + + *sliceid = 0; /* unused */ + *subsliceid = __ffs(gt->info.l3bank_mask); + break; + case MSLICE: + GEM_DEBUG_WARN_ON(!gt->info.mslice_mask); /* should be impossible! */ + + *sliceid = __ffs(gt->info.mslice_mask); + *subsliceid = 0; /* unused */ + break; + case LNCF: + GEM_DEBUG_WARN_ON(!gt->info.mslice_mask); /* should be impossible! */ + + /* + * An LNCF is always present if its mslice is present, so we + * can safely just steer to LNCF 0 in all cases. + */ + *sliceid = __ffs(gt->info.mslice_mask) << 1; + *subsliceid = 0; /* unused */ + break; + default: + MISSING_CASE(type); + *sliceid = 0; + *subsliceid = 0; + } +} + +/** + * intel_gt_read_register_fw - reads a GT register with support for multicast + * @gt: GT structure + * @reg: register to read + * + * This function will read a GT register. If the register is a multicast + * register, the read will be steered to a valid instance (i.e., one that + * isn't fused off or powered down by power gating). + * + * Returns the value from a valid instance of @reg. + */ +u32 intel_gt_read_register_fw(struct intel_gt *gt, i915_reg_t reg) +{ + int type; + u8 sliceid, subsliceid; + + for (type = 0; type < NUM_STEERING_TYPES; type++) { + if (intel_gt_reg_needs_read_steering(gt, reg, type)) { + intel_gt_get_valid_steering(gt, type, &sliceid, + &subsliceid); + return intel_uncore_read_with_mcr_steering_fw(gt->uncore, + reg, + sliceid, + subsliceid); + } + } + + return intel_uncore_read_fw(gt->uncore, reg); +} + void intel_gt_info_print(const struct intel_gt_info *info, struct drm_printer *p) { diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 7ec395cace69..74e771871a9b 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -48,6 +48,8 @@ void intel_gt_driver_release(struct intel_gt *gt); void intel_gt_driver_late_release(struct intel_gt *gt); +int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout); + void intel_gt_check_and_clear_faults(struct intel_gt *gt); void intel_gt_clear_error_registers(struct intel_gt *gt, intel_engine_mask_t engine_mask); @@ -75,6 +77,14 @@ static inline bool intel_gt_is_wedged(const struct intel_gt *gt) return unlikely(test_bit(I915_WEDGED, >->reset.flags)); } +static inline bool intel_gt_needs_read_steering(struct intel_gt *gt, + enum intel_steering_type type) +{ + return gt->steering_table[type]; +} + +u32 intel_gt_read_register_fw(struct intel_gt *gt, i915_reg_t reg); + void intel_gt_info_print(const struct intel_gt_info *info, struct drm_printer *p); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c index 9f0e729d2d15..3513d6f90747 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c @@ -24,8 +24,8 @@ static u32 read_reference_ts_freq(struct intel_uncore *uncore) return base_freq + frac_freq; } -static u32 gen10_get_crystal_clock_freq(struct intel_uncore *uncore, - u32 rpm_config_reg) +static u32 gen9_get_crystal_clock_freq(struct intel_uncore *uncore, + u32 rpm_config_reg) { u32 f19_2_mhz = 19200000; u32 f24_mhz = 24000000; @@ -128,10 +128,10 @@ static u32 read_clock_frequency(struct intel_uncore *uncore) } else { u32 c0 = intel_uncore_read(uncore, RPM_CONFIG0); - if (GRAPHICS_VER(uncore->i915) <= 10) - freq = gen10_get_crystal_clock_freq(uncore, c0); - else + if (GRAPHICS_VER(uncore->i915) >= 11) freq = gen11_get_crystal_clock_freq(uncore, c0); + else + freq = gen9_get_crystal_clock_freq(uncore, c0); /* * Now figure out how the command stream's timestamp diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c index c13462274fe8..b2de83be4d97 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -184,7 +184,13 @@ void gen11_gt_irq_reset(struct intel_gt *gt) intel_uncore_write(uncore, GEN11_BCS_RSVD_INTR_MASK, ~0); intel_uncore_write(uncore, GEN11_VCS0_VCS1_INTR_MASK, ~0); intel_uncore_write(uncore, GEN11_VCS2_VCS3_INTR_MASK, ~0); + if (HAS_ENGINE(gt, VCS4) || HAS_ENGINE(gt, VCS5)) + intel_uncore_write(uncore, GEN12_VCS4_VCS5_INTR_MASK, ~0); + if (HAS_ENGINE(gt, VCS6) || HAS_ENGINE(gt, VCS7)) + intel_uncore_write(uncore, GEN12_VCS6_VCS7_INTR_MASK, ~0); intel_uncore_write(uncore, GEN11_VECS0_VECS1_INTR_MASK, ~0); + if (HAS_ENGINE(gt, VECS2) || HAS_ENGINE(gt, VECS3)) + intel_uncore_write(uncore, GEN12_VECS2_VECS3_INTR_MASK, ~0); intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE, 0); intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK, ~0); @@ -218,8 +224,13 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt) intel_uncore_write(uncore, GEN11_BCS_RSVD_INTR_MASK, ~smask); intel_uncore_write(uncore, GEN11_VCS0_VCS1_INTR_MASK, ~dmask); intel_uncore_write(uncore, GEN11_VCS2_VCS3_INTR_MASK, ~dmask); + if (HAS_ENGINE(gt, VCS4) || HAS_ENGINE(gt, VCS5)) + intel_uncore_write(uncore, GEN12_VCS4_VCS5_INTR_MASK, ~dmask); + if (HAS_ENGINE(gt, VCS6) || HAS_ENGINE(gt, VCS7)) + intel_uncore_write(uncore, GEN12_VCS6_VCS7_INTR_MASK, ~dmask); intel_uncore_write(uncore, GEN11_VECS0_VECS1_INTR_MASK, ~dmask); - + if (HAS_ENGINE(gt, VECS2) || HAS_ENGINE(gt, VECS3)) + intel_uncore_write(uncore, GEN12_VECS2_VECS3_INTR_MASK, ~dmask); /* * RPS interrupts will get enabled/disabled on demand when RPS itself * is enabled/disabled. diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index aef3084e8b16..dea8e2479897 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -6,7 +6,6 @@ #include <linux/suspend.h> #include "i915_drv.h" -#include "i915_globals.h" #include "i915_params.h" #include "intel_context.h" #include "intel_engine_pm.h" @@ -67,8 +66,6 @@ static int __gt_unpark(struct intel_wakeref *wf) GT_TRACE(gt, "\n"); - i915_globals_unpark(); - /* * It seems that the DMC likes to transition between the DC states a lot * when there are no connected displays (no active power domains) during @@ -116,8 +113,6 @@ static int __gt_park(struct intel_wakeref *wf) GEM_BUG_ON(!wakeref); intel_display_power_put_async(i915, POWER_DOMAIN_GT_IRQ, wakeref); - i915_globals_park(); - return 0; } @@ -174,8 +169,6 @@ static void gt_sanitize(struct intel_gt *gt, bool force) if (intel_gt_is_wedged(gt)) intel_gt_unset_wedged(gt); - intel_uc_sanitize(>->uc); - for_each_engine(engine, gt, id) if (engine->reset.prepare) engine->reset.prepare(engine); @@ -191,6 +184,8 @@ static void gt_sanitize(struct intel_gt *gt, bool force) __intel_engine_reset(engine, false); } + intel_uc_reset(>->uc, false); + for_each_engine(engine, gt, id) if (engine->reset.finish) engine->reset.finish(engine); @@ -243,6 +238,8 @@ int intel_gt_resume(struct intel_gt *gt) goto err_wedged; } + intel_uc_reset_finish(>->uc); + intel_rps_enable(>->rps); intel_llc_enable(>->llc); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c index 647eca9d867a..edb881d75630 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -130,7 +130,8 @@ void intel_engine_fini_retire(struct intel_engine_cs *engine) GEM_BUG_ON(engine->retire); } -long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) +long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout, + long *remaining_timeout) { struct intel_gt_timelines *timelines = >->timelines; struct intel_timeline *tl, *tn; @@ -195,22 +196,10 @@ out_active: spin_lock(&timelines->lock); if (flush_submission(gt, timeout)) /* Wait, there's more! */ active_count++; - return active_count ? timeout : 0; -} - -int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout) -{ - /* If the device is asleep, we have no requests outstanding */ - if (!intel_gt_pm_is_awake(gt)) - return 0; - - while ((timeout = intel_gt_retire_requests_timeout(gt, timeout)) > 0) { - cond_resched(); - if (signal_pending(current)) - return -EINTR; - } + if (remaining_timeout) + *remaining_timeout = timeout; - return timeout; + return active_count ? timeout : 0; } static void retire_work_handler(struct work_struct *work) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.h b/drivers/gpu/drm/i915/gt/intel_gt_requests.h index fcc30a6e4fe9..51dbe0e3294e 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.h @@ -6,14 +6,17 @@ #ifndef INTEL_GT_REQUESTS_H #define INTEL_GT_REQUESTS_H +#include <stddef.h> + struct intel_engine_cs; struct intel_gt; struct intel_timeline; -long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout); +long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout, + long *remaining_timeout); static inline void intel_gt_retire_requests(struct intel_gt *gt) { - intel_gt_retire_requests_timeout(gt, 0); + intel_gt_retire_requests_timeout(gt, 0, NULL); } void intel_engine_init_retire(struct intel_engine_cs *engine); @@ -21,8 +24,6 @@ void intel_engine_add_retire(struct intel_engine_cs *engine, struct intel_timeline *tl); void intel_engine_fini_retire(struct intel_engine_cs *engine); -int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout); - void intel_gt_init_requests(struct intel_gt *gt); void intel_gt_park_requests(struct intel_gt *gt); void intel_gt_unpark_requests(struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index fecfacf551d5..a81e21bf1bd1 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -24,6 +24,7 @@ #include "intel_reset_types.h" #include "intel_rc6_types.h" #include "intel_rps_types.h" +#include "intel_migrate_types.h" #include "intel_wakeref.h" struct drm_i915_private; @@ -31,6 +32,33 @@ struct i915_ggtt; struct intel_engine_cs; struct intel_uncore; +struct intel_mmio_range { + u32 start; + u32 end; +}; + +/* + * The hardware has multiple kinds of multicast register ranges that need + * special register steering (and future platforms are expected to add + * additional types). + * + * During driver startup, we initialize the steering control register to + * direct reads to a slice/subslice that are valid for the 'subslice' class + * of multicast registers. If another type of steering does not have any + * overlap in valid steering targets with 'subslice' style registers, we will + * need to explicitly re-steer reads of registers of the other type. + * + * Only the replication types that may need additional non-default steering + * are listed here. + */ +enum intel_steering_type { + L3BANK, + MSLICE, + LNCF, + + NUM_STEERING_TYPES +}; + enum intel_submission_method { INTEL_SUBMISSION_RING, INTEL_SUBMISSION_ELSP, @@ -145,8 +173,15 @@ struct intel_gt { struct i915_vma *scratch; + struct intel_migrate migrate; + + const struct intel_mmio_range *steering_table[NUM_STEERING_TYPES]; + struct intel_gt_info { intel_engine_mask_t engine_mask; + + u32 l3bank_mask; + u8 num_engines; /* Media engine access to SFC per instance */ @@ -154,6 +189,8 @@ struct intel_gt { /* Slice/subslice/EU info */ struct sseu_dev_info sseu; + + unsigned long mslice_mask; } info; }; diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index 084ea65d59c0..e137dd32b5b8 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -16,7 +16,19 @@ struct drm_i915_gem_object *alloc_pt_lmem(struct i915_address_space *vm, int sz) { struct drm_i915_gem_object *obj; - obj = i915_gem_object_create_lmem(vm->i915, sz, 0); + /* + * To avoid severe over-allocation when dealing with min_page_size + * restrictions, we override that behaviour here by allowing an object + * size and page layout which can be smaller. In practice this should be + * totally fine, since GTT paging structures are not typically inserted + * into the GTT. + * + * Note that we also hit this path for the scratch page, and for this + * case it might need to be 64K, but that should work fine here since we + * used the passed in size for the page size, which should ensure it + * also has the same alignment. + */ + obj = __i915_gem_object_create_lmem_with_ps(vm->i915, sz, sz, 0); /* * Ensure all paging structures for this vm share the same dma-resv * object underneath, with the idea that one object_lock() will lock @@ -414,7 +426,7 @@ static void tgl_setup_private_ppat(struct intel_uncore *uncore) intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB); } -static void cnl_setup_private_ppat(struct intel_uncore *uncore) +static void icl_setup_private_ppat(struct intel_uncore *uncore) { intel_uncore_write(uncore, GEN10_PAT_INDEX(0), @@ -514,8 +526,8 @@ void setup_private_pat(struct intel_uncore *uncore) if (GRAPHICS_VER(i915) >= 12) tgl_setup_private_ppat(uncore); - else if (GRAPHICS_VER(i915) >= 10) - cnl_setup_private_ppat(uncore); + else if (GRAPHICS_VER(i915) >= 11) + icl_setup_private_ppat(uncore); else if (IS_CHERRYVIEW(i915) || IS_GEN9_LP(i915)) chv_setup_private_ppat(uncore); else diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index edea95b97c36..bc7153018ebd 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -140,7 +140,6 @@ typedef u64 gen8_pte_t; enum i915_cache_level; -struct drm_i915_file_private; struct drm_i915_gem_object; struct i915_fence_reg; struct i915_vma; @@ -220,16 +219,6 @@ struct i915_address_space { struct intel_gt *gt; struct drm_i915_private *i915; struct device *dma; - /* - * Every address space belongs to a struct file - except for the global - * GTT that is owned by the driver (and so @file is set to NULL). In - * principle, no information should leak from one context to another - * (or between files/processes etc) unless explicitly shared by the - * owner. Tracking the owner is important in order to free up per-file - * objects along with the file, to aide resource tracking, and to - * assign blame. - */ - struct drm_i915_file_private *file; u64 total; /* size addr space maps (ex. 2GB for ggtt) */ u64 reserved; /* size addr space reserved */ @@ -296,6 +285,13 @@ struct i915_address_space { u32 flags); void (*cleanup)(struct i915_address_space *vm); + void (*foreach)(struct i915_address_space *vm, + u64 start, u64 length, + void (*fn)(struct i915_address_space *vm, + struct i915_page_table *pt, + void *data), + void *data); + struct i915_vma_ops vma_ops; I915_SELFTEST_DECLARE(struct fault_attr fault_attr); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index a27bac0a4bfb..bb4af4977920 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -70,7 +70,7 @@ static void set_offsets(u32 *regs, if (close) { /* Close the batch; used mainly by live_lrc_layout() */ *regs = MI_BATCH_BUFFER_END; - if (GRAPHICS_VER(engine->i915) >= 10) + if (GRAPHICS_VER(engine->i915) >= 11) *regs |= BIT(0); } } @@ -484,6 +484,47 @@ static const u8 gen12_rcs_offsets[] = { END }; +static const u8 xehp_rcs_offsets[] = { + NOP(1), + LRI(13, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + + NOP(5), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + LRI(3, POSTED), + REG(0x1b0), + REG16(0x5a8), + REG16(0x5ac), + + NOP(6), + LRI(1, 0), + REG(0x0c8), + + END +}; + #undef END #undef REG16 #undef REG @@ -502,7 +543,9 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine) !intel_engine_has_relative_mmio(engine)); if (engine->class == RENDER_CLASS) { - if (GRAPHICS_VER(engine->i915) >= 12) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) + return xehp_rcs_offsets; + else if (GRAPHICS_VER(engine->i915) >= 12) return gen12_rcs_offsets; else if (GRAPHICS_VER(engine->i915) >= 11) return gen11_rcs_offsets; @@ -522,7 +565,9 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine) static int lrc_ring_mi_mode(const struct intel_engine_cs *engine) { - if (GRAPHICS_VER(engine->i915) >= 12) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) + return 0x70; + else if (GRAPHICS_VER(engine->i915) >= 12) return 0x60; else if (GRAPHICS_VER(engine->i915) >= 9) return 0x54; @@ -534,7 +579,9 @@ static int lrc_ring_mi_mode(const struct intel_engine_cs *engine) static int lrc_ring_gpr0(const struct intel_engine_cs *engine) { - if (GRAPHICS_VER(engine->i915) >= 12) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) + return 0x84; + else if (GRAPHICS_VER(engine->i915) >= 12) return 0x74; else if (GRAPHICS_VER(engine->i915) >= 9) return 0x68; @@ -578,10 +625,16 @@ static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine) static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine) { - if (engine->class != RENDER_CLASS) - return -1; - if (GRAPHICS_VER(engine->i915) >= 12) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) + /* + * Note that the CSFE context has a dummy slot for CMD_BUF_CCTL + * simply to match the RCS context image layout. + */ + return 0xc6; + else if (engine->class != RENDER_CLASS) + return -1; + else if (GRAPHICS_VER(engine->i915) >= 12) return 0xb6; else if (GRAPHICS_VER(engine->i915) >= 11) return 0xaa; @@ -600,8 +653,6 @@ lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine) return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; case 11: return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; - case 10: - return GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; case 9: return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; case 8: @@ -845,7 +896,7 @@ int lrc_alloc(struct intel_context *ce, struct intel_engine_cs *engine) if (IS_ERR(vma)) return PTR_ERR(vma); - ring = intel_engine_create_ring(engine, (unsigned long)ce->ring); + ring = intel_engine_create_ring(engine, ce->ring_size); if (IS_ERR(ring)) { err = PTR_ERR(ring); goto err_vma; @@ -1101,6 +1152,14 @@ setup_indirect_ctx_bb(const struct intel_context *ce, * bits 55-60: SW counter * bits 61-63: engine class * + * On Xe_HP, the upper dword of the descriptor has a new format: + * + * bits 32-37: virtual function number + * bit 38: mbz, reserved for use by hardware + * bits 39-54: SW context ID + * bits 55-57: reserved + * bits 58-63: SW counter + * * engine info, SW context ID and SW counter need to form a unique number * (Context ID) per lrc. */ @@ -1387,40 +1446,6 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) return batch; } -static u32 * -gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) -{ - int i; - - /* - * WaPipeControlBefore3DStateSamplePattern: cnl - * - * Ensure the engine is idle prior to programming a - * 3DSTATE_SAMPLE_PATTERN during a context restore. - */ - batch = gen8_emit_pipe_control(batch, - PIPE_CONTROL_CS_STALL, - 0); - /* - * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for - * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in - * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is - * confusing. Since gen8_emit_pipe_control() already advances the - * batch by 6 dwords, we advance the other 10 here, completing a - * cacheline. It's not clear if the workaround requires this padding - * before other commands, or if it's just the regular padding we would - * already have for the workaround bb, so leave it here for now. - */ - for (i = 0; i < 10; i++) - *batch++ = MI_NOOP; - - /* Pad to end of cacheline */ - while ((unsigned long)batch % CACHELINE_BYTES) - *batch++ = MI_NOOP; - - return batch; -} - #define CTX_WA_BB_SIZE (PAGE_SIZE) static int lrc_create_wa_ctx(struct intel_engine_cs *engine) @@ -1473,10 +1498,6 @@ void lrc_init_wa_ctx(struct intel_engine_cs *engine) case 12: case 11: return; - case 10: - wa_bb_fn[0] = gen10_init_indirectctx_bb; - wa_bb_fn[1] = NULL; - break; case 9: wa_bb_fn[0] = gen9_init_indirectctx_bb; wa_bb_fn[1] = NULL; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h index 41e5350a7a05..f785d0ed238f 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h @@ -87,9 +87,10 @@ #define GEN11_CSB_WRITE_PTR_MASK (GEN11_CSB_PTR_MASK << 0) #define MAX_CONTEXT_HW_ID (1 << 21) /* exclusive */ -#define MAX_GUC_CONTEXT_HW_ID (1 << 20) /* exclusive */ #define GEN11_MAX_CONTEXT_HW_ID (1 << 11) /* exclusive */ /* in Gen12 ID 0x7FF is reserved to indicate idle */ #define GEN12_MAX_CONTEXT_HW_ID (GEN11_MAX_CONTEXT_HW_ID - 1) +/* in Xe_HP ID 0xFFFF is reserved to indicate "invalid context" */ +#define XEHP_MAX_CONTEXT_HW_ID 0xFFFF #endif /* _INTEL_LRC_REG_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c new file mode 100644 index 000000000000..1dac21aa7e5c --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -0,0 +1,688 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include "i915_drv.h" +#include "intel_context.h" +#include "intel_gpu_commands.h" +#include "intel_gt.h" +#include "intel_gtt.h" +#include "intel_migrate.h" +#include "intel_ring.h" + +struct insert_pte_data { + u64 offset; + bool is_lmem; +}; + +#define CHUNK_SZ SZ_8M /* ~1ms at 8GiB/s preemption delay */ + +static bool engine_supports_migration(struct intel_engine_cs *engine) +{ + if (!engine) + return false; + + /* + * We need the ability to prevent aribtration (MI_ARB_ON_OFF), + * the ability to write PTE using inline data (MI_STORE_DATA) + * and of course the ability to do the block transfer (blits). + */ + GEM_BUG_ON(engine->class != COPY_ENGINE_CLASS); + + return true; +} + +static void insert_pte(struct i915_address_space *vm, + struct i915_page_table *pt, + void *data) +{ + struct insert_pte_data *d = data; + + vm->insert_page(vm, px_dma(pt), d->offset, I915_CACHE_NONE, + d->is_lmem ? PTE_LM : 0); + d->offset += PAGE_SIZE; +} + +static struct i915_address_space *migrate_vm(struct intel_gt *gt) +{ + struct i915_vm_pt_stash stash = {}; + struct i915_ppgtt *vm; + int err; + int i; + + /* + * We construct a very special VM for use by all migration contexts, + * it is kept pinned so that it can be used at any time. As we need + * to pre-allocate the page directories for the migration VM, this + * limits us to only using a small number of prepared vma. + * + * To be able to pipeline and reschedule migration operations while + * avoiding unnecessary contention on the vm itself, the PTE updates + * are inline with the blits. All the blits use the same fixed + * addresses, with the backing store redirection being updated on the + * fly. Only 2 implicit vma are used for all migration operations. + * + * We lay the ppGTT out as: + * + * [0, CHUNK_SZ) -> first object + * [CHUNK_SZ, 2 * CHUNK_SZ) -> second object + * [2 * CHUNK_SZ, 2 * CHUNK_SZ + 2 * CHUNK_SZ >> 9] -> PTE + * + * By exposing the dma addresses of the page directories themselves + * within the ppGTT, we are then able to rewrite the PTE prior to use. + * But the PTE update and subsequent migration operation must be atomic, + * i.e. within the same non-preemptible window so that we do not switch + * to another migration context that overwrites the PTE. + * + * TODO: Add support for huge LMEM PTEs + */ + + vm = i915_ppgtt_create(gt); + if (IS_ERR(vm)) + return ERR_CAST(vm); + + if (!vm->vm.allocate_va_range || !vm->vm.foreach) { + err = -ENODEV; + goto err_vm; + } + + /* + * Each engine instance is assigned its own chunk in the VM, so + * that we can run multiple instances concurrently + */ + for (i = 0; i < ARRAY_SIZE(gt->engine_class[COPY_ENGINE_CLASS]); i++) { + struct intel_engine_cs *engine; + u64 base = (u64)i << 32; + struct insert_pte_data d = {}; + struct i915_gem_ww_ctx ww; + u64 sz; + + engine = gt->engine_class[COPY_ENGINE_CLASS][i]; + if (!engine_supports_migration(engine)) + continue; + + /* + * We copy in 8MiB chunks. Each PDE covers 2MiB, so we need + * 4x2 page directories for source/destination. + */ + sz = 2 * CHUNK_SZ; + d.offset = base + sz; + + /* + * We need another page directory setup so that we can write + * the 8x512 PTE in each chunk. + */ + sz += (sz >> 12) * sizeof(u64); + + err = i915_vm_alloc_pt_stash(&vm->vm, &stash, sz); + if (err) + goto err_vm; + + for_i915_gem_ww(&ww, err, true) { + err = i915_vm_lock_objects(&vm->vm, &ww); + if (err) + continue; + err = i915_vm_map_pt_stash(&vm->vm, &stash); + if (err) + continue; + + vm->vm.allocate_va_range(&vm->vm, &stash, base, sz); + } + i915_vm_free_pt_stash(&vm->vm, &stash); + if (err) + goto err_vm; + + /* Now allow the GPU to rewrite the PTE via its own ppGTT */ + d.is_lmem = i915_gem_object_is_lmem(vm->vm.scratch[0]); + vm->vm.foreach(&vm->vm, base, base + sz, insert_pte, &d); + } + + return &vm->vm; + +err_vm: + i915_vm_put(&vm->vm); + return ERR_PTR(err); +} + +static struct intel_engine_cs *first_copy_engine(struct intel_gt *gt) +{ + struct intel_engine_cs *engine; + int i; + + for (i = 0; i < ARRAY_SIZE(gt->engine_class[COPY_ENGINE_CLASS]); i++) { + engine = gt->engine_class[COPY_ENGINE_CLASS][i]; + if (engine_supports_migration(engine)) + return engine; + } + + return NULL; +} + +static struct intel_context *pinned_context(struct intel_gt *gt) +{ + static struct lock_class_key key; + struct intel_engine_cs *engine; + struct i915_address_space *vm; + struct intel_context *ce; + + engine = first_copy_engine(gt); + if (!engine) + return ERR_PTR(-ENODEV); + + vm = migrate_vm(gt); + if (IS_ERR(vm)) + return ERR_CAST(vm); + + ce = intel_engine_create_pinned_context(engine, vm, SZ_512K, + I915_GEM_HWS_MIGRATE, + &key, "migrate"); + i915_vm_put(vm); + return ce; +} + +int intel_migrate_init(struct intel_migrate *m, struct intel_gt *gt) +{ + struct intel_context *ce; + + memset(m, 0, sizeof(*m)); + + ce = pinned_context(gt); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + m->context = ce; + return 0; +} + +static int random_index(unsigned int max) +{ + return upper_32_bits(mul_u32_u32(get_random_u32(), max)); +} + +static struct intel_context *__migrate_engines(struct intel_gt *gt) +{ + struct intel_engine_cs *engines[MAX_ENGINE_INSTANCE]; + struct intel_engine_cs *engine; + unsigned int count, i; + + count = 0; + for (i = 0; i < ARRAY_SIZE(gt->engine_class[COPY_ENGINE_CLASS]); i++) { + engine = gt->engine_class[COPY_ENGINE_CLASS][i]; + if (engine_supports_migration(engine)) + engines[count++] = engine; + } + + return intel_context_create(engines[random_index(count)]); +} + +struct intel_context *intel_migrate_create_context(struct intel_migrate *m) +{ + struct intel_context *ce; + + /* + * We randomly distribute contexts across the engines upon constrction, + * as they all share the same pinned vm, and so in order to allow + * multiple blits to run in parallel, we must construct each blit + * to use a different range of the vm for its GTT. This has to be + * known at construction, so we can not use the late greedy load + * balancing of the virtual-engine. + */ + ce = __migrate_engines(m->context->engine->gt); + if (IS_ERR(ce)) + return ce; + + ce->ring = NULL; + ce->ring_size = SZ_256K; + + i915_vm_put(ce->vm); + ce->vm = i915_vm_get(m->context->vm); + + return ce; +} + +static inline struct sgt_dma sg_sgt(struct scatterlist *sg) +{ + dma_addr_t addr = sg_dma_address(sg); + + return (struct sgt_dma){ sg, addr, addr + sg_dma_len(sg) }; +} + +static int emit_no_arbitration(struct i915_request *rq) +{ + u32 *cs; + + cs = intel_ring_begin(rq, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* Explicitly disable preemption for this request. */ + *cs++ = MI_ARB_ON_OFF; + *cs++ = MI_NOOP; + intel_ring_advance(rq, cs); + + return 0; +} + +static int emit_pte(struct i915_request *rq, + struct sgt_dma *it, + enum i915_cache_level cache_level, + bool is_lmem, + u64 offset, + int length) +{ + const u64 encode = rq->context->vm->pte_encode(0, cache_level, + is_lmem ? PTE_LM : 0); + struct intel_ring *ring = rq->ring; + int total = 0; + u32 *hdr, *cs; + int pkt; + + GEM_BUG_ON(GRAPHICS_VER(rq->engine->i915) < 8); + + /* Compute the page directory offset for the target address range */ + offset += (u64)rq->engine->instance << 32; + offset >>= 12; + offset *= sizeof(u64); + offset += 2 * CHUNK_SZ; + + cs = intel_ring_begin(rq, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* Pack as many PTE updates as possible into a single MI command */ + pkt = min_t(int, 0x400, ring->space / sizeof(u32) + 5); + pkt = min_t(int, pkt, (ring->size - ring->emit) / sizeof(u32) + 5); + + hdr = cs; + *cs++ = MI_STORE_DATA_IMM | REG_BIT(21); /* as qword elements */ + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + + do { + if (cs - hdr >= pkt) { + *hdr += cs - hdr - 2; + *cs++ = MI_NOOP; + + ring->emit = (void *)cs - ring->vaddr; + intel_ring_advance(rq, cs); + intel_ring_update_space(ring); + + cs = intel_ring_begin(rq, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + pkt = min_t(int, 0x400, ring->space / sizeof(u32) + 5); + pkt = min_t(int, pkt, (ring->size - ring->emit) / sizeof(u32) + 5); + + hdr = cs; + *cs++ = MI_STORE_DATA_IMM | REG_BIT(21); + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + } + + *cs++ = lower_32_bits(encode | it->dma); + *cs++ = upper_32_bits(encode | it->dma); + + offset += 8; + total += I915_GTT_PAGE_SIZE; + + it->dma += I915_GTT_PAGE_SIZE; + if (it->dma >= it->max) { + it->sg = __sg_next(it->sg); + if (!it->sg || sg_dma_len(it->sg) == 0) + break; + + it->dma = sg_dma_address(it->sg); + it->max = it->dma + sg_dma_len(it->sg); + } + } while (total < length); + + *hdr += cs - hdr - 2; + *cs++ = MI_NOOP; + + ring->emit = (void *)cs - ring->vaddr; + intel_ring_advance(rq, cs); + intel_ring_update_space(ring); + + return total; +} + +static bool wa_1209644611_applies(int ver, u32 size) +{ + u32 height = size >> PAGE_SHIFT; + + if (ver != 11) + return false; + + return height % 4 == 3 && height <= 8; +} + +static int emit_copy(struct i915_request *rq, int size) +{ + const int ver = GRAPHICS_VER(rq->engine->i915); + u32 instance = rq->engine->instance; + u32 *cs; + + cs = intel_ring_begin(rq, ver >= 8 ? 10 : 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + if (ver >= 9 && !wa_1209644611_applies(ver, size)) { + *cs++ = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2); + *cs++ = BLT_DEPTH_32 | PAGE_SIZE; + *cs++ = 0; + *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; + *cs++ = CHUNK_SZ; /* dst offset */ + *cs++ = instance; + *cs++ = 0; + *cs++ = PAGE_SIZE; + *cs++ = 0; /* src offset */ + *cs++ = instance; + } else if (ver >= 8) { + *cs++ = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (10 - 2); + *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE; + *cs++ = 0; + *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; + *cs++ = CHUNK_SZ; /* dst offset */ + *cs++ = instance; + *cs++ = 0; + *cs++ = PAGE_SIZE; + *cs++ = 0; /* src offset */ + *cs++ = instance; + } else { + GEM_BUG_ON(instance); + *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); + *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE; + *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE; + *cs++ = CHUNK_SZ; /* dst offset */ + *cs++ = PAGE_SIZE; + *cs++ = 0; /* src offset */ + } + + intel_ring_advance(rq, cs); + return 0; +} + +int +intel_context_migrate_copy(struct intel_context *ce, + struct dma_fence *await, + struct scatterlist *src, + enum i915_cache_level src_cache_level, + bool src_is_lmem, + struct scatterlist *dst, + enum i915_cache_level dst_cache_level, + bool dst_is_lmem, + struct i915_request **out) +{ + struct sgt_dma it_src = sg_sgt(src), it_dst = sg_sgt(dst); + struct i915_request *rq; + int err; + + GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm); + *out = NULL; + + GEM_BUG_ON(ce->ring->size < SZ_64K); + + do { + int len; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_ce; + } + + if (await) { + err = i915_request_await_dma_fence(rq, await); + if (err) + goto out_rq; + + if (rq->engine->emit_init_breadcrumb) { + err = rq->engine->emit_init_breadcrumb(rq); + if (err) + goto out_rq; + } + + await = NULL; + } + + /* The PTE updates + copy must not be interrupted. */ + err = emit_no_arbitration(rq); + if (err) + goto out_rq; + + len = emit_pte(rq, &it_src, src_cache_level, src_is_lmem, 0, + CHUNK_SZ); + if (len <= 0) { + err = len; + goto out_rq; + } + + err = emit_pte(rq, &it_dst, dst_cache_level, dst_is_lmem, + CHUNK_SZ, len); + if (err < 0) + goto out_rq; + if (err < len) { + err = -EINVAL; + goto out_rq; + } + + err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); + if (err) + goto out_rq; + + err = emit_copy(rq, len); + + /* Arbitration is re-enabled between requests. */ +out_rq: + if (*out) + i915_request_put(*out); + *out = i915_request_get(rq); + i915_request_add(rq); + if (err || !it_src.sg || !sg_dma_len(it_src.sg)) + break; + + cond_resched(); + } while (1); + +out_ce: + return err; +} + +static int emit_clear(struct i915_request *rq, int size, u32 value) +{ + const int ver = GRAPHICS_VER(rq->engine->i915); + u32 instance = rq->engine->instance; + u32 *cs; + + GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX); + + cs = intel_ring_begin(rq, ver >= 8 ? 8 : 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + if (ver >= 8) { + *cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2); + *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; + *cs++ = 0; + *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; + *cs++ = 0; /* offset */ + *cs++ = instance; + *cs++ = value; + *cs++ = MI_NOOP; + } else { + GEM_BUG_ON(instance); + *cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); + *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; + *cs++ = 0; + *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; + *cs++ = 0; + *cs++ = value; + } + + intel_ring_advance(rq, cs); + return 0; +} + +int +intel_context_migrate_clear(struct intel_context *ce, + struct dma_fence *await, + struct scatterlist *sg, + enum i915_cache_level cache_level, + bool is_lmem, + u32 value, + struct i915_request **out) +{ + struct sgt_dma it = sg_sgt(sg); + struct i915_request *rq; + int err; + + GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm); + *out = NULL; + + GEM_BUG_ON(ce->ring->size < SZ_64K); + + do { + int len; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_ce; + } + + if (await) { + err = i915_request_await_dma_fence(rq, await); + if (err) + goto out_rq; + + if (rq->engine->emit_init_breadcrumb) { + err = rq->engine->emit_init_breadcrumb(rq); + if (err) + goto out_rq; + } + + await = NULL; + } + + /* The PTE updates + clear must not be interrupted. */ + err = emit_no_arbitration(rq); + if (err) + goto out_rq; + + len = emit_pte(rq, &it, cache_level, is_lmem, 0, CHUNK_SZ); + if (len <= 0) { + err = len; + goto out_rq; + } + + err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); + if (err) + goto out_rq; + + err = emit_clear(rq, len, value); + + /* Arbitration is re-enabled between requests. */ +out_rq: + if (*out) + i915_request_put(*out); + *out = i915_request_get(rq); + i915_request_add(rq); + if (err || !it.sg || !sg_dma_len(it.sg)) + break; + + cond_resched(); + } while (1); + +out_ce: + return err; +} + +int intel_migrate_copy(struct intel_migrate *m, + struct i915_gem_ww_ctx *ww, + struct dma_fence *await, + struct scatterlist *src, + enum i915_cache_level src_cache_level, + bool src_is_lmem, + struct scatterlist *dst, + enum i915_cache_level dst_cache_level, + bool dst_is_lmem, + struct i915_request **out) +{ + struct intel_context *ce; + int err; + + *out = NULL; + if (!m->context) + return -ENODEV; + + ce = intel_migrate_create_context(m); + if (IS_ERR(ce)) + ce = intel_context_get(m->context); + GEM_BUG_ON(IS_ERR(ce)); + + err = intel_context_pin_ww(ce, ww); + if (err) + goto out; + + err = intel_context_migrate_copy(ce, await, + src, src_cache_level, src_is_lmem, + dst, dst_cache_level, dst_is_lmem, + out); + + intel_context_unpin(ce); +out: + intel_context_put(ce); + return err; +} + +int +intel_migrate_clear(struct intel_migrate *m, + struct i915_gem_ww_ctx *ww, + struct dma_fence *await, + struct scatterlist *sg, + enum i915_cache_level cache_level, + bool is_lmem, + u32 value, + struct i915_request **out) +{ + struct intel_context *ce; + int err; + + *out = NULL; + if (!m->context) + return -ENODEV; + + ce = intel_migrate_create_context(m); + if (IS_ERR(ce)) + ce = intel_context_get(m->context); + GEM_BUG_ON(IS_ERR(ce)); + + err = intel_context_pin_ww(ce, ww); + if (err) + goto out; + + err = intel_context_migrate_clear(ce, await, sg, cache_level, + is_lmem, value, out); + + intel_context_unpin(ce); +out: + intel_context_put(ce); + return err; +} + +void intel_migrate_fini(struct intel_migrate *m) +{ + struct intel_context *ce; + + ce = fetch_and_zero(&m->context); + if (!ce) + return; + + intel_engine_destroy_pinned_context(ce); +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_migrate.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.h b/drivers/gpu/drm/i915/gt/intel_migrate.h new file mode 100644 index 000000000000..4e18e755a00b --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_migrate.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2020 Intel Corporation + */ + +#ifndef __INTEL_MIGRATE__ +#define __INTEL_MIGRATE__ + +#include <linux/types.h> + +#include "intel_migrate_types.h" + +struct dma_fence; +struct i915_request; +struct i915_gem_ww_ctx; +struct intel_gt; +struct scatterlist; +enum i915_cache_level; + +int intel_migrate_init(struct intel_migrate *m, struct intel_gt *gt); + +struct intel_context *intel_migrate_create_context(struct intel_migrate *m); + +int intel_migrate_copy(struct intel_migrate *m, + struct i915_gem_ww_ctx *ww, + struct dma_fence *await, + struct scatterlist *src, + enum i915_cache_level src_cache_level, + bool src_is_lmem, + struct scatterlist *dst, + enum i915_cache_level dst_cache_level, + bool dst_is_lmem, + struct i915_request **out); + +int intel_context_migrate_copy(struct intel_context *ce, + struct dma_fence *await, + struct scatterlist *src, + enum i915_cache_level src_cache_level, + bool src_is_lmem, + struct scatterlist *dst, + enum i915_cache_level dst_cache_level, + bool dst_is_lmem, + struct i915_request **out); + +int +intel_migrate_clear(struct intel_migrate *m, + struct i915_gem_ww_ctx *ww, + struct dma_fence *await, + struct scatterlist *sg, + enum i915_cache_level cache_level, + bool is_lmem, + u32 value, + struct i915_request **out); +int +intel_context_migrate_clear(struct intel_context *ce, + struct dma_fence *await, + struct scatterlist *sg, + enum i915_cache_level cache_level, + bool is_lmem, + u32 value, + struct i915_request **out); + +void intel_migrate_fini(struct intel_migrate *m); + +#endif /* __INTEL_MIGRATE__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_migrate_types.h b/drivers/gpu/drm/i915/gt/intel_migrate_types.h new file mode 100644 index 000000000000..d98230597f42 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_migrate_types.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2020 Intel Corporation + */ + +#ifndef __INTEL_MIGRATE_TYPES__ +#define __INTEL_MIGRATE_TYPES__ + +struct intel_context; + +struct intel_migrate { + struct intel_context *context; +}; + +#endif /* __INTEL_MIGRATE_TYPES__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 17848807f111..582c4423b95d 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -352,7 +352,7 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, table->size = ARRAY_SIZE(icl_mocs_table); table->table = icl_mocs_table; table->n_entries = GEN9_NUM_MOCS_ENTRIES; - } else if (IS_GEN9_BC(i915) || IS_CANNONLAKE(i915)) { + } else if (IS_GEN9_BC(i915)) { table->size = ARRAY_SIZE(skl_mocs_table); table->n_entries = GEN9_NUM_MOCS_ENTRIES; table->table = skl_mocs_table; diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 259d7eb4e165..799d382eea79 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -62,20 +62,25 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6) u32 pg_enable; int i; - /* 2b: Program RC6 thresholds.*/ - set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85); - set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150); + /* + * With GuCRC, these parameters are set by GuC + */ + if (!intel_uc_uses_guc_rc(>->uc)) { + /* 2b: Program RC6 thresholds.*/ + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85); + set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150); - set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ - set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ - for_each_engine(engine, rc6_to_gt(rc6), id) - set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); + set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ + set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ + for_each_engine(engine, rc6_to_gt(rc6), id) + set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); - set(uncore, GUC_MAX_IDLE_COUNT, 0xA); + set(uncore, GUC_MAX_IDLE_COUNT, 0xA); - set(uncore, GEN6_RC_SLEEP, 0); + set(uncore, GEN6_RC_SLEEP, 0); - set(uncore, GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */ + set(uncore, GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */ + } /* * 2c: Program Coarse Power Gating Policies. @@ -98,11 +103,19 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6) set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 60); set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 60); - /* 3a: Enable RC6 */ - rc6->ctl_enable = - GEN6_RC_CTL_HW_ENABLE | - GEN6_RC_CTL_RC6_ENABLE | - GEN6_RC_CTL_EI_MODE(1); + /* 3a: Enable RC6 + * + * With GuCRC, we do not enable bit 31 of RC_CTL, + * thus allowing GuC to control RC6 entry/exit fully instead. + * We will not set the HW ENABLE and EI bits + */ + if (!intel_guc_rc_enable(>->uc.guc)) + rc6->ctl_enable = GEN6_RC_CTL_RC6_ENABLE; + else + rc6->ctl_enable = + GEN6_RC_CTL_HW_ENABLE | + GEN6_RC_CTL_RC6_ENABLE | + GEN6_RC_CTL_EI_MODE(1); pg_enable = GEN9_RENDER_PG_ENABLE | @@ -126,7 +139,7 @@ static void gen9_rc6_enable(struct intel_rc6 *rc6) enum intel_engine_id id; /* 2b: Program RC6 thresholds.*/ - if (GRAPHICS_VER(rc6_to_i915(rc6)) >= 10) { + if (GRAPHICS_VER(rc6_to_i915(rc6)) >= 11) { set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85); set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150); } else if (IS_SKYLAKE(rc6_to_i915(rc6))) { @@ -513,6 +526,10 @@ static void __intel_rc6_disable(struct intel_rc6 *rc6) { struct drm_i915_private *i915 = rc6_to_i915(rc6); struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct intel_gt *gt = rc6_to_gt(rc6); + + /* Take control of RC6 back from GuC */ + intel_guc_rc_disable(>->uc.guc); intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); if (GRAPHICS_VER(i915) >= 9) diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c index f7366b054f8e..a74b72f50cc9 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c @@ -9,7 +9,8 @@ #include "intel_region_ttm.h" #include "gem/i915_gem_lmem.h" #include "gem/i915_gem_region.h" -#include "intel_region_lmem.h" +#include "gem/i915_gem_ttm.h" +#include "gt/intel_gt.h" static int init_fake_lmem_bar(struct intel_memory_region *mem) { @@ -107,7 +108,7 @@ out_no_io: static const struct intel_memory_region_ops intel_region_lmem_ops = { .init = region_lmem_init, .release = region_lmem_release, - .init_object = __i915_gem_lmem_object_init, + .init_object = __i915_gem_ttm_object_init, }; struct intel_memory_region * @@ -157,7 +158,7 @@ intel_gt_setup_fake_lmem(struct intel_gt *gt) static bool get_legacy_lowmem_region(struct intel_uncore *uncore, u64 *start, u32 *size) { - if (!IS_DG1_REVID(uncore->i915, DG1_REVID_A0, DG1_REVID_B0)) + if (!IS_DG1_GT_STEP(uncore->i915, STEP_A0, STEP_C0)) return false; *start = 0; diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.h b/drivers/gpu/drm/i915/gt/intel_renderstate.h index 48f009203917..4da4c5234ef0 100644 --- a/drivers/gpu/drm/i915/gt/intel_renderstate.h +++ b/drivers/gpu/drm/i915/gt/intel_renderstate.h @@ -8,6 +8,7 @@ #include <linux/types.h> #include "i915_gem.h" +#include "i915_gem_ww.h" struct i915_request; struct intel_context; diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 72251638d4ea..91200c43951f 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -22,7 +22,6 @@ #include "intel_reset.h" #include "uc/intel_guc.h" -#include "uc/intel_guc_submission.h" #define RESET_MAX_RETRIES 3 @@ -39,21 +38,6 @@ static void rmw_clear_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 clr) intel_uncore_rmw_fw(uncore, reg, clr, 0); } -static void skip_context(struct i915_request *rq) -{ - struct intel_context *hung_ctx = rq->context; - - list_for_each_entry_from_rcu(rq, &hung_ctx->timeline->requests, link) { - if (!i915_request_is_active(rq)) - return; - - if (rq->context == hung_ctx) { - i915_request_set_error_once(rq, -EIO); - __i915_request_skip(rq); - } - } -} - static void client_mark_guilty(struct i915_gem_context *ctx, bool banned) { struct drm_i915_file_private *file_priv = ctx->file_priv; @@ -88,10 +72,8 @@ static bool mark_guilty(struct i915_request *rq) bool banned; int i; - if (intel_context_is_closed(rq->context)) { - intel_context_set_banned(rq->context); + if (intel_context_is_closed(rq->context)) return true; - } rcu_read_lock(); ctx = rcu_dereference(rq->context->gem_context); @@ -123,11 +105,9 @@ static bool mark_guilty(struct i915_request *rq) banned = !i915_gem_context_is_recoverable(ctx); if (time_before(jiffies, prev_hang + CONTEXT_FAST_HANG_JIFFIES)) banned = true; - if (banned) { + if (banned) drm_dbg(&ctx->i915->drm, "context %s: guilty %d, banned\n", ctx->name, atomic_read(&ctx->guilty_count)); - intel_context_set_banned(rq->context); - } client_mark_guilty(ctx, banned); @@ -149,6 +129,8 @@ static void mark_innocent(struct i915_request *rq) void __i915_request_reset(struct i915_request *rq, bool guilty) { + bool banned = false; + RQ_TRACE(rq, "guilty? %s\n", yesno(guilty)); GEM_BUG_ON(__i915_request_is_complete(rq)); @@ -156,13 +138,15 @@ void __i915_request_reset(struct i915_request *rq, bool guilty) if (guilty) { i915_request_set_error_once(rq, -EIO); __i915_request_skip(rq); - if (mark_guilty(rq)) - skip_context(rq); + banned = mark_guilty(rq); } else { i915_request_set_error_once(rq, -EAGAIN); mark_innocent(rq); } rcu_read_unlock(); + + if (banned) + intel_context_ban(rq->context, rq); } static bool i915_in_reset(struct pci_dev *pdev) @@ -515,8 +499,14 @@ static int gen11_reset_engines(struct intel_gt *gt, [VCS1] = GEN11_GRDOM_MEDIA2, [VCS2] = GEN11_GRDOM_MEDIA3, [VCS3] = GEN11_GRDOM_MEDIA4, + [VCS4] = GEN11_GRDOM_MEDIA5, + [VCS5] = GEN11_GRDOM_MEDIA6, + [VCS6] = GEN11_GRDOM_MEDIA7, + [VCS7] = GEN11_GRDOM_MEDIA8, [VECS0] = GEN11_GRDOM_VECS, [VECS1] = GEN11_GRDOM_VECS2, + [VECS2] = GEN11_GRDOM_VECS3, + [VECS3] = GEN11_GRDOM_VECS4, }; struct intel_engine_cs *engine; intel_engine_mask_t tmp; @@ -826,6 +816,8 @@ static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask) __intel_engine_reset(engine, stalled_mask & engine->mask); local_bh_enable(); + intel_uc_reset(>->uc, true); + intel_ggtt_restore_fences(gt->ggtt); return err; @@ -850,6 +842,8 @@ static void reset_finish(struct intel_gt *gt, intel_engine_mask_t awake) if (awake & engine->mask) intel_engine_pm_put(engine); } + + intel_uc_reset_finish(>->uc); } static void nop_submit_request(struct i915_request *request) @@ -903,6 +897,7 @@ static void __intel_gt_set_wedged(struct intel_gt *gt) for_each_engine(engine, gt, id) if (engine->reset.cancel) engine->reset.cancel(engine); + intel_uc_cancel_requests(>->uc); local_bh_enable(); reset_finish(gt, awake); @@ -1191,6 +1186,9 @@ int __intel_engine_reset_bh(struct intel_engine_cs *engine, const char *msg) ENGINE_TRACE(engine, "flags=%lx\n", gt->reset.flags); GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, >->reset.flags)); + if (intel_engine_uses_guc(engine)) + return -ENODEV; + if (!intel_engine_pm_get_if_awake(engine)) return 0; @@ -1201,13 +1199,10 @@ int __intel_engine_reset_bh(struct intel_engine_cs *engine, const char *msg) "Resetting %s for %s\n", engine->name, msg); atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]); - if (intel_engine_uses_guc(engine)) - ret = intel_guc_reset_engine(&engine->gt->uc.guc, engine); - else - ret = intel_gt_reset_engine(engine); + ret = intel_gt_reset_engine(engine); if (ret) { /* If we fail here, we expect to fallback to a global reset */ - ENGINE_TRACE(engine, "Failed to reset, err: %d\n", ret); + ENGINE_TRACE(engine, "Failed to reset %s, err: %d\n", engine->name, ret); goto out; } @@ -1341,7 +1336,8 @@ void intel_gt_handle_error(struct intel_gt *gt, * Try engine reset when available. We fall back to full reset if * single reset fails. */ - if (intel_has_reset_engine(gt) && !intel_gt_is_wedged(gt)) { + if (!intel_uc_uses_guc_submission(>->uc) && + intel_has_reset_engine(gt) && !intel_gt_is_wedged(gt)) { local_bh_disable(); for_each_engine_masked(engine, gt, engine_mask, tmp) { BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE); diff --git a/drivers/gpu/drm/i915/gt/intel_ring.h b/drivers/gpu/drm/i915/gt/intel_ring.h index dbf5f14a136f..1b32dadfb8c3 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.h +++ b/drivers/gpu/drm/i915/gt/intel_ring.h @@ -49,6 +49,7 @@ static inline void intel_ring_advance(struct i915_request *rq, u32 *cs) * intel_ring_begin()). */ GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs); + GEM_BUG_ON(!IS_ALIGNED(rq->ring->emit, 8)); /* RING_TAIL qword align */ } static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos) diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 37d74d4ed59b..2958e2fae380 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -16,6 +16,7 @@ #include "intel_reset.h" #include "intel_ring.h" #include "shmem_utils.h" +#include "intel_engine_heartbeat.h" /* Rough estimate of the typical request size, performing a flush, * set-context and then emitting the batch. @@ -342,9 +343,9 @@ static void reset_rewind(struct intel_engine_cs *engine, bool stalled) u32 head; rq = NULL; - spin_lock_irqsave(&engine->active.lock, flags); + spin_lock_irqsave(&engine->sched_engine->lock, flags); rcu_read_lock(); - list_for_each_entry(pos, &engine->active.requests, sched.link) { + list_for_each_entry(pos, &engine->sched_engine->requests, sched.link) { if (!__i915_request_is_complete(pos)) { rq = pos; break; @@ -399,7 +400,7 @@ static void reset_rewind(struct intel_engine_cs *engine, bool stalled) } engine->legacy.ring->head = intel_ring_wrap(engine->legacy.ring, head); - spin_unlock_irqrestore(&engine->active.lock, flags); + spin_unlock_irqrestore(&engine->sched_engine->lock, flags); } static void reset_finish(struct intel_engine_cs *engine) @@ -411,16 +412,16 @@ static void reset_cancel(struct intel_engine_cs *engine) struct i915_request *request; unsigned long flags; - spin_lock_irqsave(&engine->active.lock, flags); + spin_lock_irqsave(&engine->sched_engine->lock, flags); /* Mark all submitted requests as skipped. */ - list_for_each_entry(request, &engine->active.requests, sched.link) + list_for_each_entry(request, &engine->sched_engine->requests, sched.link) i915_request_put(i915_request_mark_eio(request)); intel_engine_signal_breadcrumbs(engine); /* Remaining _unready_ requests will be nop'ed when submitted */ - spin_unlock_irqrestore(&engine->active.lock, flags); + spin_unlock_irqrestore(&engine->sched_engine->lock, flags); } static void i9xx_submit_request(struct i915_request *request) @@ -586,9 +587,44 @@ static void ring_context_reset(struct intel_context *ce) clear_bit(CONTEXT_VALID_BIT, &ce->flags); } +static void ring_context_ban(struct intel_context *ce, + struct i915_request *rq) +{ + struct intel_engine_cs *engine; + + if (!rq || !i915_request_is_active(rq)) + return; + + engine = rq->engine; + lockdep_assert_held(&engine->sched_engine->lock); + list_for_each_entry_continue(rq, &engine->sched_engine->requests, + sched.link) + if (rq->context == ce) { + i915_request_set_error_once(rq, -EIO); + __i915_request_skip(rq); + } +} + +static void ring_context_cancel_request(struct intel_context *ce, + struct i915_request *rq) +{ + struct intel_engine_cs *engine = NULL; + + i915_request_active_engine(rq, &engine); + + if (engine && intel_engine_pulse(engine)) + intel_gt_handle_error(engine->gt, engine->mask, 0, + "request cancellation by %s", + current->comm); +} + static const struct intel_context_ops ring_context_ops = { .alloc = ring_context_alloc, + .cancel_request = ring_context_cancel_request, + + .ban = ring_context_ban, + .pre_pin = ring_context_pre_pin, .pin = ring_context_pin, .unpin = ring_context_unpin, @@ -1047,6 +1083,25 @@ static void setup_irq(struct intel_engine_cs *engine) } } +static void add_to_engine(struct i915_request *rq) +{ + lockdep_assert_held(&rq->engine->sched_engine->lock); + list_move_tail(&rq->sched.link, &rq->engine->sched_engine->requests); +} + +static void remove_from_engine(struct i915_request *rq) +{ + spin_lock_irq(&rq->engine->sched_engine->lock); + list_del_init(&rq->sched.link); + + /* Prevent further __await_execution() registering a cb, then flush */ + set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); + + spin_unlock_irq(&rq->engine->sched_engine->lock); + + i915_request_notify_execute_cb_imm(rq); +} + static void setup_common(struct intel_engine_cs *engine) { struct drm_i915_private *i915 = engine->i915; @@ -1064,6 +1119,9 @@ static void setup_common(struct intel_engine_cs *engine) engine->reset.cancel = reset_cancel; engine->reset.finish = reset_finish; + engine->add_active_request = add_to_engine; + engine->remove_active_request = remove_from_engine; + engine->cops = &ring_context_ops; engine->request_alloc = ring_request_alloc; diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 06e9a8ed4e03..d812b27835f8 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -37,6 +37,20 @@ static struct intel_uncore *rps_to_uncore(struct intel_rps *rps) return rps_to_gt(rps)->uncore; } +static struct intel_guc_slpc *rps_to_slpc(struct intel_rps *rps) +{ + struct intel_gt *gt = rps_to_gt(rps); + + return >->uc.guc.slpc; +} + +static bool rps_uses_slpc(struct intel_rps *rps) +{ + struct intel_gt *gt = rps_to_gt(rps); + + return intel_uc_uses_guc_slpc(>->uc); +} + static u32 rps_pm_sanitize_mask(struct intel_rps *rps, u32 mask) { return mask & ~rps->pm_intrmsk_mbz; @@ -167,6 +181,8 @@ static void rps_enable_interrupts(struct intel_rps *rps) { struct intel_gt *gt = rps_to_gt(rps); + GEM_BUG_ON(rps_uses_slpc(rps)); + GT_TRACE(gt, "interrupts:on rps->pm_events: %x, rps_pm_mask:%x\n", rps->pm_events, rps_pm_mask(rps, rps->last_freq)); @@ -771,6 +787,8 @@ static int gen6_rps_set(struct intel_rps *rps, u8 val) struct drm_i915_private *i915 = rps_to_i915(rps); u32 swreq; + GEM_BUG_ON(rps_uses_slpc(rps)); + if (GRAPHICS_VER(i915) >= 9) swreq = GEN9_FREQUENCY(val); else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) @@ -861,6 +879,9 @@ void intel_rps_park(struct intel_rps *rps) { int adj; + if (!intel_rps_is_enabled(rps)) + return; + GEM_BUG_ON(atomic_read(&rps->num_waiters)); if (!intel_rps_clear_active(rps)) @@ -999,7 +1020,7 @@ static void gen6_rps_init(struct intel_rps *rps) rps->efficient_freq = rps->rp1_freq; if (IS_HASWELL(i915) || IS_BROADWELL(i915) || - IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 10) { + IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) { u32 ddcc_status = 0; if (sandybridge_pcode_read(i915, @@ -1012,7 +1033,7 @@ static void gen6_rps_init(struct intel_rps *rps) rps->max_freq); } - if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 10) { + if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) { /* Store the frequency values in 16.66 MHZ units, which is * the natural hardware unit for SKL */ @@ -1356,6 +1377,9 @@ void intel_rps_enable(struct intel_rps *rps) if (!HAS_RPS(i915)) return; + if (rps_uses_slpc(rps)) + return; + intel_gt_check_clock_frequency(rps_to_gt(rps)); intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); @@ -1829,6 +1853,9 @@ void intel_rps_init(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); + if (rps_uses_slpc(rps)) + return; + if (IS_CHERRYVIEW(i915)) chv_rps_init(rps); else if (IS_VALLEYVIEW(i915)) @@ -1877,10 +1904,17 @@ void intel_rps_init(struct intel_rps *rps) if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) < 11) rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; + + /* GuC needs ARAT expired interrupt unmasked */ + if (intel_uc_uses_guc_submission(&rps_to_gt(rps)->uc)) + rps->pm_intrmsk_mbz |= ARAT_EXPIRED_INTRMSK; } void intel_rps_sanitize(struct intel_rps *rps) { + if (rps_uses_slpc(rps)) + return; + if (GRAPHICS_VER(rps_to_i915(rps)) >= 6) rps_disable_interrupts(rps); } @@ -1936,6 +1970,176 @@ u32 intel_rps_read_actual_frequency(struct intel_rps *rps) return freq; } +u32 intel_rps_read_punit_req(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + + return intel_uncore_read(uncore, GEN6_RPNSWREQ); +} + +static u32 intel_rps_get_req(u32 pureq) +{ + u32 req = pureq >> GEN9_SW_REQ_UNSLICE_RATIO_SHIFT; + + return req; +} + +u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps) +{ + u32 freq = intel_rps_get_req(intel_rps_read_punit_req(rps)); + + return intel_gpu_freq(rps, freq); +} + +u32 intel_rps_get_requested_frequency(struct intel_rps *rps) +{ + if (rps_uses_slpc(rps)) + return intel_rps_read_punit_req_frequency(rps); + else + return intel_gpu_freq(rps, rps->cur_freq); +} + +u32 intel_rps_get_max_frequency(struct intel_rps *rps) +{ + struct intel_guc_slpc *slpc = rps_to_slpc(rps); + + if (rps_uses_slpc(rps)) + return slpc->max_freq_softlimit; + else + return intel_gpu_freq(rps, rps->max_freq_softlimit); +} + +u32 intel_rps_get_rp0_frequency(struct intel_rps *rps) +{ + struct intel_guc_slpc *slpc = rps_to_slpc(rps); + + if (rps_uses_slpc(rps)) + return slpc->rp0_freq; + else + return intel_gpu_freq(rps, rps->rp0_freq); +} + +u32 intel_rps_get_rp1_frequency(struct intel_rps *rps) +{ + struct intel_guc_slpc *slpc = rps_to_slpc(rps); + + if (rps_uses_slpc(rps)) + return slpc->rp1_freq; + else + return intel_gpu_freq(rps, rps->rp1_freq); +} + +u32 intel_rps_get_rpn_frequency(struct intel_rps *rps) +{ + struct intel_guc_slpc *slpc = rps_to_slpc(rps); + + if (rps_uses_slpc(rps)) + return slpc->min_freq; + else + return intel_gpu_freq(rps, rps->min_freq); +} + +static int set_max_freq(struct intel_rps *rps, u32 val) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + int ret = 0; + + mutex_lock(&rps->lock); + + val = intel_freq_opcode(rps, val); + if (val < rps->min_freq || + val > rps->max_freq || + val < rps->min_freq_softlimit) { + ret = -EINVAL; + goto unlock; + } + + if (val > rps->rp0_freq) + drm_dbg(&i915->drm, "User requested overclocking to %d\n", + intel_gpu_freq(rps, val)); + + rps->max_freq_softlimit = val; + + val = clamp_t(int, rps->cur_freq, + rps->min_freq_softlimit, + rps->max_freq_softlimit); + + /* + * We still need *_set_rps to process the new max_delay and + * update the interrupt limits and PMINTRMSK even though + * frequency request may be unchanged. + */ + intel_rps_set(rps, val); + +unlock: + mutex_unlock(&rps->lock); + + return ret; +} + +int intel_rps_set_max_frequency(struct intel_rps *rps, u32 val) +{ + struct intel_guc_slpc *slpc = rps_to_slpc(rps); + + if (rps_uses_slpc(rps)) + return intel_guc_slpc_set_max_freq(slpc, val); + else + return set_max_freq(rps, val); +} + +u32 intel_rps_get_min_frequency(struct intel_rps *rps) +{ + struct intel_guc_slpc *slpc = rps_to_slpc(rps); + + if (rps_uses_slpc(rps)) + return slpc->min_freq_softlimit; + else + return intel_gpu_freq(rps, rps->min_freq_softlimit); +} + +static int set_min_freq(struct intel_rps *rps, u32 val) +{ + int ret = 0; + + mutex_lock(&rps->lock); + + val = intel_freq_opcode(rps, val); + if (val < rps->min_freq || + val > rps->max_freq || + val > rps->max_freq_softlimit) { + ret = -EINVAL; + goto unlock; + } + + rps->min_freq_softlimit = val; + + val = clamp_t(int, rps->cur_freq, + rps->min_freq_softlimit, + rps->max_freq_softlimit); + + /* + * We still need *_set_rps to process the new min_delay and + * update the interrupt limits and PMINTRMSK even though + * frequency request may be unchanged. + */ + intel_rps_set(rps, val); + +unlock: + mutex_unlock(&rps->lock); + + return ret; +} + +int intel_rps_set_min_frequency(struct intel_rps *rps, u32 val) +{ + struct intel_guc_slpc *slpc = rps_to_slpc(rps); + + if (rps_uses_slpc(rps)) + return intel_guc_slpc_set_min_freq(slpc, val); + else + return set_min_freq(rps, val); +} + /* External interface for intel_ips.ko */ static struct drm_i915_private __rcu *ips_mchdev; @@ -2129,4 +2333,5 @@ EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable); #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftest_rps.c" +#include "selftest_slpc.c" #endif diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h index 1d2cfc98b510..4213bcce1667 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.h +++ b/drivers/gpu/drm/i915/gt/intel_rps.h @@ -31,6 +31,16 @@ int intel_gpu_freq(struct intel_rps *rps, int val); int intel_freq_opcode(struct intel_rps *rps, int val); u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat1); u32 intel_rps_read_actual_frequency(struct intel_rps *rps); +u32 intel_rps_get_requested_frequency(struct intel_rps *rps); +u32 intel_rps_get_min_frequency(struct intel_rps *rps); +int intel_rps_set_min_frequency(struct intel_rps *rps, u32 val); +u32 intel_rps_get_max_frequency(struct intel_rps *rps); +int intel_rps_set_max_frequency(struct intel_rps *rps, u32 val); +u32 intel_rps_get_rp0_frequency(struct intel_rps *rps); +u32 intel_rps_get_rp1_frequency(struct intel_rps *rps); +u32 intel_rps_get_rpn_frequency(struct intel_rps *rps); +u32 intel_rps_read_punit_req(struct intel_rps *rps); +u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps); void gen5_rps_irq_handler(struct intel_rps *rps); void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir); diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index 367fd44b81c8..bbd272943c3f 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -139,17 +139,36 @@ static void gen12_sseu_info_init(struct intel_gt *gt) * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS. * Instead of splitting these, provide userspace with an array * of DSS to more closely represent the hardware resource. + * + * In addition, the concept of slice has been removed in Xe_HP. + * To be compatible with prior generations, assume a single slice + * across the entire device. Then calculate out the DSS for each + * workload type within that software slice. */ - intel_sseu_set_info(sseu, 1, 6, 16); + if (IS_DG2(gt->i915) || IS_XEHPSDV(gt->i915)) + intel_sseu_set_info(sseu, 1, 32, 16); + else + intel_sseu_set_info(sseu, 1, 6, 16); - s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) & - GEN11_GT_S_ENA_MASK; + /* + * As mentioned above, Xe_HP does not have the concept of a slice. + * Enable one for software backwards compatibility. + */ + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) + s_en = 0x1; + else + s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) & + GEN11_GT_S_ENA_MASK; dss_en = intel_uncore_read(uncore, GEN12_GT_DSS_ENABLE); /* one bit per pair of EUs */ - eu_en_fuse = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) & - GEN11_EU_DIS_MASK); + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) + eu_en_fuse = intel_uncore_read(uncore, XEHP_EU_ENABLE) & XEHP_EU_ENA_MASK; + else + eu_en_fuse = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) & + GEN11_EU_DIS_MASK); + for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++) if (eu_en_fuse & BIT(eu)) eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); @@ -188,83 +207,6 @@ static void gen11_sseu_info_init(struct intel_gt *gt) sseu->has_eu_pg = 1; } -static void gen10_sseu_info_init(struct intel_gt *gt) -{ - struct intel_uncore *uncore = gt->uncore; - struct sseu_dev_info *sseu = >->info.sseu; - const u32 fuse2 = intel_uncore_read(uncore, GEN8_FUSE2); - const int eu_mask = 0xff; - u32 subslice_mask, eu_en; - int s, ss; - - intel_sseu_set_info(sseu, 6, 4, 8); - - sseu->slice_mask = (fuse2 & GEN10_F2_S_ENA_MASK) >> - GEN10_F2_S_ENA_SHIFT; - - /* Slice0 */ - eu_en = ~intel_uncore_read(uncore, GEN8_EU_DISABLE0); - for (ss = 0; ss < sseu->max_subslices; ss++) - sseu_set_eus(sseu, 0, ss, (eu_en >> (8 * ss)) & eu_mask); - /* Slice1 */ - sseu_set_eus(sseu, 1, 0, (eu_en >> 24) & eu_mask); - eu_en = ~intel_uncore_read(uncore, GEN8_EU_DISABLE1); - sseu_set_eus(sseu, 1, 1, eu_en & eu_mask); - /* Slice2 */ - sseu_set_eus(sseu, 2, 0, (eu_en >> 8) & eu_mask); - sseu_set_eus(sseu, 2, 1, (eu_en >> 16) & eu_mask); - /* Slice3 */ - sseu_set_eus(sseu, 3, 0, (eu_en >> 24) & eu_mask); - eu_en = ~intel_uncore_read(uncore, GEN8_EU_DISABLE2); - sseu_set_eus(sseu, 3, 1, eu_en & eu_mask); - /* Slice4 */ - sseu_set_eus(sseu, 4, 0, (eu_en >> 8) & eu_mask); - sseu_set_eus(sseu, 4, 1, (eu_en >> 16) & eu_mask); - /* Slice5 */ - sseu_set_eus(sseu, 5, 0, (eu_en >> 24) & eu_mask); - eu_en = ~intel_uncore_read(uncore, GEN10_EU_DISABLE3); - sseu_set_eus(sseu, 5, 1, eu_en & eu_mask); - - subslice_mask = (1 << 4) - 1; - subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >> - GEN10_F2_SS_DIS_SHIFT); - - for (s = 0; s < sseu->max_slices; s++) { - u32 subslice_mask_with_eus = subslice_mask; - - for (ss = 0; ss < sseu->max_subslices; ss++) { - if (sseu_get_eus(sseu, s, ss) == 0) - subslice_mask_with_eus &= ~BIT(ss); - } - - /* - * Slice0 can have up to 3 subslices, but there are only 2 in - * slice1/2. - */ - intel_sseu_set_subslices(sseu, s, s == 0 ? - subslice_mask_with_eus : - subslice_mask_with_eus & 0x3); - } - - sseu->eu_total = compute_eu_total(sseu); - - /* - * CNL is expected to always have a uniform distribution - * of EU across subslices with the exception that any one - * EU in any one subslice may be fused off for die - * recovery. - */ - sseu->eu_per_subslice = - intel_sseu_subslice_total(sseu) ? - DIV_ROUND_UP(sseu->eu_total, intel_sseu_subslice_total(sseu)) : - 0; - - /* No restrictions on Power Gating */ - sseu->has_slice_pg = 1; - sseu->has_subslice_pg = 1; - sseu->has_eu_pg = 1; -} - static void cherryview_sseu_info_init(struct intel_gt *gt) { struct sseu_dev_info *sseu = >->info.sseu; @@ -592,8 +534,6 @@ void intel_sseu_info_init(struct intel_gt *gt) bdw_sseu_info_init(gt); else if (GRAPHICS_VER(i915) == 9) gen9_sseu_info_init(gt); - else if (GRAPHICS_VER(i915) == 10) - gen10_sseu_info_init(gt); else if (GRAPHICS_VER(i915) == 11) gen11_sseu_info_init(gt); else if (GRAPHICS_VER(i915) >= 12) @@ -759,3 +699,21 @@ void intel_sseu_print_topology(const struct sseu_dev_info *sseu, } } } + +u16 intel_slicemask_from_dssmask(u64 dss_mask, int dss_per_slice) +{ + u16 slice_mask = 0; + int i; + + WARN_ON(sizeof(dss_mask) * 8 / dss_per_slice > 8 * sizeof(slice_mask)); + + for (i = 0; dss_mask; i++) { + if (dss_mask & GENMASK(dss_per_slice - 1, 0)) + slice_mask |= BIT(i); + + dss_mask >>= dss_per_slice; + } + + return slice_mask; +} + diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index 4cd1a8a7298a..22fef98887c0 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -15,13 +15,17 @@ struct drm_i915_private; struct intel_gt; struct drm_printer; -#define GEN_MAX_SLICES (6) /* CNL upper bound */ -#define GEN_MAX_SUBSLICES (8) /* ICL upper bound */ +#define GEN_MAX_SLICES (3) /* SKL upper bound */ +#define GEN_MAX_SUBSLICES (32) /* XEHPSDV upper bound */ #define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE) #define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES) #define GEN_MAX_EUS (16) /* TGL upper bound */ #define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS) +#define GEN_DSS_PER_GSLICE 4 +#define GEN_DSS_PER_CSLICE 8 +#define GEN_DSS_PER_MSLICE 8 + struct sseu_dev_info { u8 slice_mask; u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE]; @@ -104,4 +108,6 @@ void intel_sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p); void intel_sseu_print_topology(const struct sseu_dev_info *sseu, struct drm_printer *p); +u16 intel_slicemask_from_dssmask(u64 dss_mask, int dss_per_slice); + #endif /* __INTEL_SSEU_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c b/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c index 714fe8495775..1ba8b7da9d37 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c @@ -50,10 +50,10 @@ static void cherryview_sseu_device_status(struct intel_gt *gt, #undef SS_MAX } -static void gen10_sseu_device_status(struct intel_gt *gt, +static void gen11_sseu_device_status(struct intel_gt *gt, struct sseu_dev_info *sseu) { -#define SS_MAX 6 +#define SS_MAX 8 struct intel_uncore *uncore = gt->uncore; const struct intel_gt_info *info = >->info; u32 s_reg[SS_MAX], eu_reg[2 * SS_MAX], eu_mask[2]; @@ -267,8 +267,8 @@ int intel_sseu_status(struct seq_file *m, struct intel_gt *gt) bdw_sseu_device_status(gt, &sseu); else if (GRAPHICS_VER(i915) == 9) gen9_sseu_device_status(gt, &sseu); - else if (GRAPHICS_VER(i915) >= 10) - gen10_sseu_device_status(gt, &sseu); + else if (GRAPHICS_VER(i915) >= 11) + gen11_sseu_device_status(gt, &sseu); } i915_print_sseu_info(m, false, HAS_POOLED_EU(i915), &sseu); diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index b62d1e31a645..aae609d7d85d 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -150,13 +150,14 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa) } static void wa_add(struct i915_wa_list *wal, i915_reg_t reg, - u32 clear, u32 set, u32 read_mask) + u32 clear, u32 set, u32 read_mask, bool masked_reg) { struct i915_wa wa = { .reg = reg, .clr = clear, .set = set, .read = read_mask, + .masked_reg = masked_reg, }; _wa_add(wal, &wa); @@ -165,7 +166,7 @@ static void wa_add(struct i915_wa_list *wal, i915_reg_t reg, static void wa_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set) { - wa_add(wal, reg, clear, set, clear); + wa_add(wal, reg, clear, set, clear, false); } static void @@ -200,20 +201,20 @@ wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr) static void wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) { - wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val); + wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true); } static void wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val) { - wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val); + wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true); } static void wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val) { - wa_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask); + wa_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true); } static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine, @@ -514,53 +515,15 @@ static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine, GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); } -static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine, - struct i915_wa_list *wal) -{ - /* WaForceContextSaveRestoreNonCoherent:cnl */ - wa_masked_en(wal, CNL_HDC_CHICKEN0, - HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT); - - /* WaDisableReplayBufferBankArbitrationOptimization:cnl */ - wa_masked_en(wal, COMMON_SLICE_CHICKEN2, - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); - - /* WaPushConstantDereferenceHoldDisable:cnl */ - wa_masked_en(wal, GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE); - - /* FtrEnableFastAnisoL1BankingFix:cnl */ - wa_masked_en(wal, HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX); - - /* WaDisable3DMidCmdPreemption:cnl */ - wa_masked_dis(wal, GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); - - /* WaDisableGPGPUMidCmdPreemption:cnl */ - wa_masked_field_set(wal, GEN8_CS_CHICKEN1, - GEN9_PREEMPT_GPGPU_LEVEL_MASK, - GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); - - /* WaDisableEarlyEOT:cnl */ - wa_masked_en(wal, GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT); -} - static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { - struct drm_i915_private *i915 = engine->i915; - - /* WaDisableBankHangMode:icl */ + /* Wa_1406697149 (WaDisableBankHangMode:icl) */ wa_write(wal, GEN8_L3CNTLREG, intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) | GEN8_ERRDETBCTRL); - /* Wa_1604370585:icl (pre-prod) - * Formerly known as WaPushConstantDereferenceHoldDisable - */ - if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) - wa_masked_en(wal, GEN7_ROW_CHICKEN2, - PUSH_CONSTANT_DEREF_DISABLE); - /* WaForceEnableNonCoherent:icl * This is not the same workaround as in early Gen9 platforms, where * lacking this could cause system hangs, but coherency performance @@ -570,23 +533,11 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, */ wa_masked_en(wal, ICL_HDC_MODE, HDC_FORCE_NON_COHERENT); - /* Wa_2006611047:icl (pre-prod) - * Formerly known as WaDisableImprovedTdlClkGating - */ - if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) - wa_masked_en(wal, GEN7_ROW_CHICKEN2, - GEN11_TDL_CLOCK_GATING_FIX_DISABLE); - - /* Wa_2006665173:icl (pre-prod) */ - if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) - wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3, - GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC); - /* WaEnableFloatBlendOptimization:icl */ - wa_write_clr_set(wal, - GEN10_CACHE_MODE_SS, - 0, /* write-only, so skip validation */ - _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE)); + wa_add(wal, GEN10_CACHE_MODE_SS, 0, + _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE), + 0 /* write-only, so skip validation */, + true); /* WaDisableGPGPUMidThreadPreemption:icl */ wa_masked_field_set(wal, GEN8_CS_CHICKEN1, @@ -631,7 +582,7 @@ static void gen12_ctx_gt_tuning_init(struct intel_engine_cs *engine, FF_MODE2, FF_MODE2_TDS_TIMER_MASK, FF_MODE2_TDS_TIMER_128, - 0); + 0, false); } static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine, @@ -640,15 +591,16 @@ static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine, gen12_ctx_gt_tuning_init(engine, wal); /* - * Wa_1409142259:tgl - * Wa_1409347922:tgl - * Wa_1409252684:tgl - * Wa_1409217633:tgl - * Wa_1409207793:tgl - * Wa_1409178076:tgl - * Wa_1408979724:tgl - * Wa_14010443199:rkl - * Wa_14010698770:rkl + * Wa_1409142259:tgl,dg1,adl-p + * Wa_1409347922:tgl,dg1,adl-p + * Wa_1409252684:tgl,dg1,adl-p + * Wa_1409217633:tgl,dg1,adl-p + * Wa_1409207793:tgl,dg1,adl-p + * Wa_1409178076:tgl,dg1,adl-p + * Wa_1408979724:tgl,dg1,adl-p + * Wa_14010443199:tgl,rkl,dg1,adl-p + * Wa_14010698770:tgl,rkl,dg1,adl-s,adl-p + * Wa_1409342910:tgl,rkl,dg1,adl-s,adl-p */ wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3, GEN12_DISABLE_CPS_AWARE_COLOR_PIPE); @@ -668,7 +620,14 @@ static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine, FF_MODE2, FF_MODE2_GS_TIMER_MASK, FF_MODE2_GS_TIMER_224, - 0); + 0, false); + + /* + * Wa_14012131227:dg1 + * Wa_1508744258:tgl,rkl,dg1,adl-s,adl-p + */ + wa_masked_en(wal, GEN7_COMMON_SLICE_CHICKEN1, + GEN9_RHWO_OPTIMIZATION_DISABLE); } static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine, @@ -703,8 +662,6 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, gen12_ctx_workarounds_init(engine, wal); else if (GRAPHICS_VER(i915) == 11) icl_ctx_workarounds_init(engine, wal); - else if (IS_CANNONLAKE(i915)) - cnl_ctx_workarounds_init(engine, wal); else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915)) cfl_ctx_workarounds_init(engine, wal); else if (IS_GEMINILAKE(i915)) @@ -839,7 +796,7 @@ hsw_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) wa_add(wal, HSW_ROW_CHICKEN3, 0, _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE), - 0 /* XXX does this reg exist? */); + 0 /* XXX does this reg exist? */, true); /* WaVSRefCountFullforceMissDisable:hsw */ wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME); @@ -882,30 +839,19 @@ skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); /* WaInPlaceDecompressionHang:skl */ - if (IS_SKL_REVID(i915, SKL_REVID_H0, REVID_FOREVER)) + if (IS_SKL_GT_STEP(i915, STEP_A0, STEP_H0)) wa_write_or(wal, GEN9_GAMT_ECO_REG_RW_IA, GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); } static void -bxt_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) -{ - gen9_gt_workarounds_init(i915, wal); - - /* WaInPlaceDecompressionHang:bxt */ - wa_write_or(wal, - GEN9_GAMT_ECO_REG_RW_IA, - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); -} - -static void kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { gen9_gt_workarounds_init(i915, wal); /* WaDisableDynamicCreditSharing:kbl */ - if (IS_KBL_GT_STEP(i915, 0, STEP_B0)) + if (IS_KBL_GT_STEP(i915, 0, STEP_C0)) wa_write_or(wal, GAMT_CHKN_BIT_REG, GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING); @@ -943,98 +889,144 @@ cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); } +static void __set_mcr_steering(struct i915_wa_list *wal, + i915_reg_t steering_reg, + unsigned int slice, unsigned int subslice) +{ + u32 mcr, mcr_mask; + + mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice); + mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK; + + wa_write_clr_set(wal, steering_reg, mcr_mask, mcr); +} + +static void __add_mcr_wa(struct drm_i915_private *i915, struct i915_wa_list *wal, + unsigned int slice, unsigned int subslice) +{ + drm_dbg(&i915->drm, "MCR slice=0x%x, subslice=0x%x\n", slice, subslice); + + __set_mcr_steering(wal, GEN8_MCR_SELECTOR, slice, subslice); +} + static void -wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) +icl_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) { const struct sseu_dev_info *sseu = &i915->gt.info.sseu; unsigned int slice, subslice; - u32 l3_en, mcr, mcr_mask; - GEM_BUG_ON(GRAPHICS_VER(i915) < 10); + GEM_BUG_ON(GRAPHICS_VER(i915) < 11); + GEM_BUG_ON(hweight8(sseu->slice_mask) > 1); + slice = 0; /* - * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl - * L3Banks could be fused off in single slice scenario. If that is - * the case, we might need to program MCR select to a valid L3Bank - * by default, to make sure we correctly read certain registers - * later on (in the range 0xB100 - 0xB3FF). + * Although a platform may have subslices, we need to always steer + * reads to the lowest instance that isn't fused off. When Render + * Power Gating is enabled, grabbing forcewake will only power up a + * single subslice (the "minconfig") if there isn't a real workload + * that needs to be run; this means that if we steer register reads to + * one of the higher subslices, we run the risk of reading back 0's or + * random garbage. + */ + subslice = __ffs(intel_sseu_get_subslices(sseu, slice)); + + /* + * If the subslice we picked above also steers us to a valid L3 bank, + * then we can just rely on the default steering and won't need to + * worry about explicitly re-steering L3BANK reads later. + */ + if (i915->gt.info.l3bank_mask & BIT(subslice)) + i915->gt.steering_table[L3BANK] = NULL; + + __add_mcr_wa(i915, wal, slice, subslice); +} + +static void +xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal) +{ + struct drm_i915_private *i915 = gt->i915; + const struct sseu_dev_info *sseu = >->info.sseu; + unsigned long slice, subslice = 0, slice_mask = 0; + u64 dss_mask = 0; + u32 lncf_mask = 0; + int i; + + /* + * On Xe_HP the steering increases in complexity. There are now several + * more units that require steering and we're not guaranteed to be able + * to find a common setting for all of them. These are: + * - GSLICE (fusable) + * - DSS (sub-unit within gslice; fusable) + * - L3 Bank (fusable) + * - MSLICE (fusable) + * - LNCF (sub-unit within mslice; always present if mslice is present) * - * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl - * Before any MMIO read into slice/subslice specific registers, MCR - * packet control register needs to be programmed to point to any - * enabled s/ss pair. Otherwise, incorrect values will be returned. - * This means each subsequent MMIO read will be forwarded to an - * specific s/ss combination, but this is OK since these registers - * are consistent across s/ss in almost all cases. In the rare - * occasions, such as INSTDONE, where this value is dependent - * on s/ss combo, the read should be done with read_subslice_reg. + * We'll do our default/implicit steering based on GSLICE (in the + * sliceid field) and DSS (in the subsliceid field). If we can + * find overlap between the valid MSLICE and/or LNCF values with + * a suitable GSLICE, then we can just re-use the default value and + * skip and explicit steering at runtime. * - * Since GEN8_MCR_SELECTOR contains dual-purpose bits which select both - * to which subslice, or to which L3 bank, the respective mmio reads - * will go, we have to find a common index which works for both - * accesses. + * We only need to look for overlap between GSLICE/MSLICE/LNCF to find + * a valid sliceid value. DSS steering is the only type of steering + * that utilizes the 'subsliceid' bits. * - * Case where we cannot find a common index fortunately should not - * happen in production hardware, so we only emit a warning instead of - * implementing something more complex that requires checking the range - * of every MMIO read. + * Also note that, even though the steering domain is called "GSlice" + * and it is encoded in the register using the gslice format, the spec + * says that the combined (geometry | compute) fuse should be used to + * select the steering. */ - if (GRAPHICS_VER(i915) >= 10 && is_power_of_2(sseu->slice_mask)) { - u32 l3_fuse = - intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3) & - GEN10_L3BANK_MASK; + /* Find the potential gslice candidates */ + dss_mask = intel_sseu_get_subslices(sseu, 0); + slice_mask = intel_slicemask_from_dssmask(dss_mask, GEN_DSS_PER_GSLICE); - drm_dbg(&i915->drm, "L3 fuse = %x\n", l3_fuse); - l3_en = ~(l3_fuse << GEN10_L3BANK_PAIR_COUNT | l3_fuse); - } else { - l3_en = ~0; - } + /* + * Find the potential LNCF candidates. Either LNCF within a valid + * mslice is fine. + */ + for_each_set_bit(i, >->info.mslice_mask, GEN12_MAX_MSLICES) + lncf_mask |= (0x3 << (i * 2)); - slice = fls(sseu->slice_mask) - 1; - subslice = fls(l3_en & intel_sseu_get_subslices(sseu, slice)); - if (!subslice) { - drm_warn(&i915->drm, - "No common index found between subslice mask %x and L3 bank mask %x!\n", - intel_sseu_get_subslices(sseu, slice), l3_en); - subslice = fls(l3_en); - drm_WARN_ON(&i915->drm, !subslice); - } - subslice--; - - if (GRAPHICS_VER(i915) >= 11) { - mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice); - mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK; - } else { - mcr = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice); - mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK; + /* + * Are there any sliceid values that work for both GSLICE and LNCF + * steering? + */ + if (slice_mask & lncf_mask) { + slice_mask &= lncf_mask; + gt->steering_table[LNCF] = NULL; } - drm_dbg(&i915->drm, "MCR slice/subslice = %x\n", mcr); + /* How about sliceid values that also work for MSLICE steering? */ + if (slice_mask & gt->info.mslice_mask) { + slice_mask &= gt->info.mslice_mask; + gt->steering_table[MSLICE] = NULL; + } - wa_write_clr_set(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr); -} + slice = __ffs(slice_mask); + subslice = __ffs(dss_mask >> (slice * GEN_DSS_PER_GSLICE)); + WARN_ON(subslice > GEN_DSS_PER_GSLICE); + WARN_ON(dss_mask >> (slice * GEN_DSS_PER_GSLICE) == 0); -static void -cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) -{ - wa_init_mcr(i915, wal); + __add_mcr_wa(i915, wal, slice, subslice); - /* WaInPlaceDecompressionHang:cnl */ - wa_write_or(wal, - GEN9_GAMT_ECO_REG_RW_IA, - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); + /* + * SQIDI ranges are special because they use different steering + * registers than everything else we work with. On XeHP SDV and + * DG2-G10, any value in the steering registers will work fine since + * all instances are present, but DG2-G11 only has SQIDI instances at + * ID's 2 and 3, so we need to steer to one of those. For simplicity + * we'll just steer to a hardcoded "2" since that value will work + * everywhere. + */ + __set_mcr_steering(wal, MCFG_MCR_SELECTOR, 0, 2); + __set_mcr_steering(wal, SF_MCR_SELECTOR, 0, 2); } static void icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { - wa_init_mcr(i915, wal); - - /* WaInPlaceDecompressionHang:icl */ - wa_write_or(wal, - GEN9_GAMT_ECO_REG_RW_IA, - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); + icl_wa_init_mcr(i915, wal); /* WaModifyGamTlbPartitioning:icl */ wa_write_clr_set(wal, @@ -1057,18 +1049,6 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) GEN8_GAMW_ECO_DEV_RW_IA, GAMW_ECO_DEV_CTX_RELOAD_DISABLE); - /* Wa_1405779004:icl (pre-prod) */ - if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) - wa_write_or(wal, - SLICE_UNIT_LEVEL_CLKGATE, - MSCUNIT_CLKGATE_DIS); - - /* Wa_1406838659:icl (pre-prod) */ - if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) - wa_write_or(wal, - INF_UNIT_LEVEL_CLKGATE, - CGPSF_CLKGATE_DIS); - /* Wa_1406463099:icl * Formerly known as WaGamTlbPendError */ @@ -1078,10 +1058,16 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) /* Wa_1607087056:icl,ehl,jsl */ if (IS_ICELAKE(i915) || - IS_JSL_EHL_REVID(i915, EHL_REVID_A0, EHL_REVID_A0)) + IS_JSL_EHL_GT_STEP(i915, STEP_A0, STEP_B0)) wa_write_or(wal, SLICE_UNIT_LEVEL_CLKGATE, L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); + + /* + * This is not a documented workaround, but rather an optimization + * to reduce sampler power. + */ + wa_write_clr(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE); } /* @@ -1111,10 +1097,13 @@ static void gen12_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { - wa_init_mcr(i915, wal); + icl_wa_init_mcr(i915, wal); - /* Wa_14011060649:tgl,rkl,dg1,adls */ + /* Wa_14011060649:tgl,rkl,dg1,adl-s,adl-p */ wa_14011060649(i915, wal); + + /* Wa_14011059788:tgl,rkl,adl-s,dg1,adl-p */ + wa_write_or(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE); } static void @@ -1123,19 +1112,19 @@ tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) gen12_gt_workarounds_init(i915, wal); /* Wa_1409420604:tgl */ - if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_A0)) + if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0)) wa_write_or(wal, SUBSLICE_UNIT_LEVEL_CLKGATE2, CPSSUNIT_CLKGATE_DIS); /* Wa_1607087056:tgl also know as BUG:1409180338 */ - if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_A0)) + if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0)) wa_write_or(wal, SLICE_UNIT_LEVEL_CLKGATE, L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); /* Wa_1408615072:tgl[a0] */ - if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_A0)) + if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0)) wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2, VSUNIT_CLKGATE_DIS_TGL); } @@ -1146,7 +1135,7 @@ dg1_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) gen12_gt_workarounds_init(i915, wal); /* Wa_1607087056:dg1 */ - if (IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0)) + if (IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0)) wa_write_or(wal, SLICE_UNIT_LEVEL_CLKGATE, L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); @@ -1165,9 +1154,17 @@ dg1_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) } static void +xehpsdv_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + xehp_init_mcr(&i915->gt, wal); +} + +static void gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) { - if (IS_DG1(i915)) + if (IS_XEHPSDV(i915)) + xehpsdv_gt_workarounds_init(i915, wal); + else if (IS_DG1(i915)) dg1_gt_workarounds_init(i915, wal); else if (IS_TIGERLAKE(i915)) tgl_gt_workarounds_init(i915, wal); @@ -1175,8 +1172,6 @@ gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) gen12_gt_workarounds_init(i915, wal); else if (GRAPHICS_VER(i915) == 11) icl_gt_workarounds_init(i915, wal); - else if (IS_CANNONLAKE(i915)) - cnl_gt_workarounds_init(i915, wal); else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915)) cfl_gt_workarounds_init(i915, wal); else if (IS_GEMINILAKE(i915)) @@ -1184,7 +1179,7 @@ gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) else if (IS_KABYLAKE(i915)) kbl_gt_workarounds_init(i915, wal); else if (IS_BROXTON(i915)) - bxt_gt_workarounds_init(i915, wal); + gen9_gt_workarounds_init(i915, wal); else if (IS_SKYLAKE(i915)) skl_gt_workarounds_init(i915, wal); else if (IS_HASWELL(i915)) @@ -1247,8 +1242,9 @@ wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from) } static void -wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal) +wa_list_apply(struct intel_gt *gt, const struct i915_wa_list *wal) { + struct intel_uncore *uncore = gt->uncore; enum forcewake_domains fw; unsigned long flags; struct i915_wa *wa; @@ -1263,13 +1259,16 @@ wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal) intel_uncore_forcewake_get__locked(uncore, fw); for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { - if (wa->clr) - intel_uncore_rmw_fw(uncore, wa->reg, wa->clr, wa->set); - else - intel_uncore_write_fw(uncore, wa->reg, wa->set); + u32 val, old = 0; + + /* open-coded rmw due to steering */ + old = wa->clr ? intel_gt_read_register_fw(gt, wa->reg) : 0; + val = (old & ~wa->clr) | wa->set; + if (val != old || !wa->clr) + intel_uncore_write_fw(uncore, wa->reg, val); + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) - wa_verify(wa, - intel_uncore_read_fw(uncore, wa->reg), + wa_verify(wa, intel_gt_read_register_fw(gt, wa->reg), wal->name, "application"); } @@ -1279,28 +1278,39 @@ wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal) void intel_gt_apply_workarounds(struct intel_gt *gt) { - wa_list_apply(gt->uncore, >->i915->gt_wa_list); + wa_list_apply(gt, >->i915->gt_wa_list); } -static bool wa_list_verify(struct intel_uncore *uncore, +static bool wa_list_verify(struct intel_gt *gt, const struct i915_wa_list *wal, const char *from) { + struct intel_uncore *uncore = gt->uncore; struct i915_wa *wa; + enum forcewake_domains fw; + unsigned long flags; unsigned int i; bool ok = true; + fw = wal_get_fw_for_rmw(uncore, wal); + + spin_lock_irqsave(&uncore->lock, flags); + intel_uncore_forcewake_get__locked(uncore, fw); + for (i = 0, wa = wal->list; i < wal->count; i++, wa++) ok &= wa_verify(wa, - intel_uncore_read(uncore, wa->reg), + intel_gt_read_register_fw(gt, wa->reg), wal->name, from); + intel_uncore_forcewake_put__locked(uncore, fw); + spin_unlock_irqrestore(&uncore->lock, flags); + return ok; } bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from) { - return wa_list_verify(gt->uncore, >->i915->gt_wa_list, from); + return wa_list_verify(gt, >->i915->gt_wa_list, from); } __maybe_unused @@ -1438,17 +1448,6 @@ static void cml_whitelist_build(struct intel_engine_cs *engine) cfl_whitelist_build(engine); } -static void cnl_whitelist_build(struct intel_engine_cs *engine) -{ - struct i915_wa_list *w = &engine->whitelist; - - if (engine->class != RENDER_CLASS) - return; - - /* WaEnablePreemptionGranularityControlByUMD:cnl */ - whitelist_reg(w, GEN8_CS_CHICKEN1); -} - static void icl_whitelist_build(struct intel_engine_cs *engine) { struct i915_wa_list *w = &engine->whitelist; @@ -1542,7 +1541,7 @@ static void dg1_whitelist_build(struct intel_engine_cs *engine) tgl_whitelist_build(engine); /* GEN:BUG:1409280441:dg1 */ - if (IS_DG1_REVID(engine->i915, DG1_REVID_A0, DG1_REVID_A0) && + if (IS_DG1_GT_STEP(engine->i915, STEP_A0, STEP_B0) && (engine->class == RENDER_CLASS || engine->class == COPY_ENGINE_CLASS)) whitelist_reg_ext(w, RING_ID(engine->mmio_base), @@ -1562,8 +1561,6 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine) tgl_whitelist_build(engine); else if (GRAPHICS_VER(i915) == 11) icl_whitelist_build(engine); - else if (IS_CANNONLAKE(i915)) - cnl_whitelist_build(engine); else if (IS_COMETLAKE(i915)) cml_whitelist_build(engine); else if (IS_COFFEELAKE(i915)) @@ -1612,8 +1609,8 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { struct drm_i915_private *i915 = engine->i915; - if (IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0) || - IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_A0)) { + if (IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0) || + IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0)) { /* * Wa_1607138336:tgl[a0],dg1[a0] * Wa_1607063988:tgl[a0],dg1[a0] @@ -1623,7 +1620,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN12_DISABLE_POSH_BUSY_FF_DOP_CG); } - if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_A0)) { + if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0)) { /* * Wa_1606679103:tgl * (see also Wa_1606682166:icl) @@ -1633,44 +1630,46 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN7_DISABLE_SAMPLER_PREFETCH); } - if (IS_ALDERLAKE_S(i915) || IS_DG1(i915) || + if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { - /* Wa_1606931601:tgl,rkl,dg1,adl-s */ + /* Wa_1606931601:tgl,rkl,dg1,adl-s,adl-p */ wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ); /* * Wa_1407928979:tgl A* * Wa_18011464164:tgl[B0+],dg1[B0+] * Wa_22010931296:tgl[B0+],dg1[B0+] - * Wa_14010919138:rkl,dg1,adl-s + * Wa_14010919138:rkl,dg1,adl-s,adl-p */ wa_write_or(wal, GEN7_FF_THREAD_MODE, GEN12_FF_TESSELATION_DOP_GATE_DISABLE); /* - * Wa_1606700617:tgl,dg1 - * Wa_22010271021:tgl,rkl,dg1, adl-s + * Wa_1606700617:tgl,dg1,adl-p + * Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p + * Wa_14010826681:tgl,dg1,rkl,adl-p */ wa_masked_en(wal, GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE); } - if (IS_ALDERLAKE_S(i915) || IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0) || + if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || + IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { - /* Wa_1409804808:tgl,rkl,dg1[a0],adl-s */ + /* Wa_1409804808:tgl,rkl,dg1[a0],adl-s,adl-p */ wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_PUSH_CONST_DEREF_HOLD_DIS); /* * Wa_1409085225:tgl - * Wa_14010229206:tgl,rkl,dg1[a0],adl-s + * Wa_14010229206:tgl,rkl,dg1[a0],adl-s,adl-p */ wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH); } - if (IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0) || + if (IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { /* * Wa_1607030317:tgl @@ -1688,8 +1687,9 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN8_RC_SEMA_IDLE_MSG_DISABLE); } - if (IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { - /* Wa_1406941453:tgl,rkl,dg1 */ + if (IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915) || + IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) { + /* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */ wa_masked_en(wal, GEN10_SAMPLER_MODE, ENABLE_SMALLPL); @@ -1701,11 +1701,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) _3D_CHICKEN3, _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE); - /* WaPipelineFlushCoherentLines:icl */ - wa_write_or(wal, - GEN8_L3SQCREG4, - GEN8_LQSC_FLUSH_COHERENT_LINES); - /* * Wa_1405543622:icl * Formerly known as WaGAPZPriorityScheme @@ -1735,19 +1730,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN8_L3SQCREG4, GEN11_LQSC_CLEAN_EVICT_DISABLE); - /* WaForwardProgressSoftReset:icl */ - wa_write_or(wal, - GEN10_SCRATCH_LNCF2, - PMFLUSHDONE_LNICRSDROP | - PMFLUSH_GAPL3UNBLOCK | - PMFLUSHDONE_LNEBLK); - - /* Wa_1406609255:icl (pre-prod) */ - if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) - wa_write_or(wal, - GEN7_SARCHKMD, - GEN7_DISABLE_DEMAND_PREFETCH); - /* Wa_1606682166:icl */ wa_write_or(wal, GEN7_SARCHKMD, @@ -1947,10 +1929,10 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) * disable bit, which we don't touch here, but it's good * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). */ - wa_add(wal, GEN7_GT_MODE, 0, - _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, - GEN6_WIZ_HASHING_16x4), - GEN6_WIZ_HASHING_16x4); + wa_masked_field_set(wal, + GEN7_GT_MODE, + GEN6_WIZ_HASHING_MASK, + GEN6_WIZ_HASHING_16x4); } if (IS_GRAPHICS_VER(i915, 6, 7)) @@ -2000,10 +1982,10 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) * disable bit, which we don't touch here, but it's good * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). */ - wa_add(wal, - GEN6_GT_MODE, 0, - _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4), - GEN6_WIZ_HASHING_16x4); + wa_masked_field_set(wal, + GEN6_GT_MODE, + GEN6_WIZ_HASHING_MASK, + GEN6_WIZ_HASHING_16x4); /* WaDisable_RenderCache_OperationalFlush:snb */ wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE); @@ -2024,7 +2006,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) wa_add(wal, MI_MODE, 0, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH), /* XXX bit doesn't stick on Broadwater */ - IS_I965G(i915) ? 0 : VS_TIMER_DISPATCH); + IS_I965G(i915) ? 0 : VS_TIMER_DISPATCH, true); if (GRAPHICS_VER(i915) == 4) /* @@ -2039,7 +2021,8 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) */ wa_add(wal, ECOSKPD, 0, _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE), - 0 /* XXX bit doesn't stick on Broadwater */); + 0 /* XXX bit doesn't stick on Broadwater */, + true); } static void @@ -2048,7 +2031,7 @@ xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) struct drm_i915_private *i915 = engine->i915; /* WaKBLVECSSemaphoreWaitPoll:kbl */ - if (IS_KBL_GT_STEP(i915, STEP_A0, STEP_E0)) { + if (IS_KBL_GT_STEP(i915, STEP_A0, STEP_F0)) { wa_write(wal, RING_SEMA_WAIT_POLL(engine->mmio_base), 1); @@ -2081,7 +2064,7 @@ void intel_engine_init_workarounds(struct intel_engine_cs *engine) void intel_engine_apply_workarounds(struct intel_engine_cs *engine) { - wa_list_apply(engine->uncore, &engine->wa_list); + wa_list_apply(engine->gt, &engine->wa_list); } struct mcr_range { @@ -2107,12 +2090,31 @@ static const struct mcr_range mcr_ranges_gen12[] = { {}, }; +static const struct mcr_range mcr_ranges_xehp[] = { + { .start = 0x4000, .end = 0x4aff }, + { .start = 0x5200, .end = 0x52ff }, + { .start = 0x5400, .end = 0x7fff }, + { .start = 0x8140, .end = 0x815f }, + { .start = 0x8c80, .end = 0x8dff }, + { .start = 0x94d0, .end = 0x955f }, + { .start = 0x9680, .end = 0x96ff }, + { .start = 0xb000, .end = 0xb3ff }, + { .start = 0xc800, .end = 0xcfff }, + { .start = 0xd800, .end = 0xd8ff }, + { .start = 0xdc00, .end = 0xffff }, + { .start = 0x17000, .end = 0x17fff }, + { .start = 0x24a00, .end = 0x24a7f }, + {}, +}; + static bool mcr_range(struct drm_i915_private *i915, u32 offset) { const struct mcr_range *mcr_ranges; int i; - if (GRAPHICS_VER(i915) >= 12) + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) + mcr_ranges = mcr_ranges_xehp; + else if (GRAPHICS_VER(i915) >= 12) mcr_ranges = mcr_ranges_gen12; else if (GRAPHICS_VER(i915) >= 8) mcr_ranges = mcr_ranges_gen8; diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds_types.h b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h index c214111ea367..1e873681795d 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds_types.h +++ b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h @@ -15,6 +15,7 @@ struct i915_wa { u32 clr; u32 set; u32 read; + bool masked_reg; }; struct i915_wa_list { diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 32589c6625e1..2c1af030310c 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -235,6 +235,34 @@ static void mock_submit_request(struct i915_request *request) spin_unlock_irqrestore(&engine->hw_lock, flags); } +static void mock_add_to_engine(struct i915_request *rq) +{ + lockdep_assert_held(&rq->engine->sched_engine->lock); + list_move_tail(&rq->sched.link, &rq->engine->sched_engine->requests); +} + +static void mock_remove_from_engine(struct i915_request *rq) +{ + struct intel_engine_cs *engine, *locked; + + /* + * Virtual engines complicate acquiring the engine timeline lock, + * as their rq->engine pointer is not stable until under that + * engine lock. The simple ploy we use is to take the lock then + * check that the rq still belongs to the newly locked engine. + */ + + locked = READ_ONCE(rq->engine); + spin_lock_irq(&locked->sched_engine->lock); + while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { + spin_unlock(&locked->sched_engine->lock); + spin_lock(&engine->sched_engine->lock); + locked = engine; + } + list_del_init(&rq->sched.link); + spin_unlock_irq(&locked->sched_engine->lock); +} + static void mock_reset_prepare(struct intel_engine_cs *engine) { } @@ -253,10 +281,10 @@ static void mock_reset_cancel(struct intel_engine_cs *engine) del_timer_sync(&mock->hw_delay); - spin_lock_irqsave(&engine->active.lock, flags); + spin_lock_irqsave(&engine->sched_engine->lock, flags); /* Mark all submitted requests as skipped. */ - list_for_each_entry(rq, &engine->active.requests, sched.link) + list_for_each_entry(rq, &engine->sched_engine->requests, sched.link) i915_request_put(i915_request_mark_eio(rq)); intel_engine_signal_breadcrumbs(engine); @@ -269,7 +297,7 @@ static void mock_reset_cancel(struct intel_engine_cs *engine) } INIT_LIST_HEAD(&mock->hw_queue); - spin_unlock_irqrestore(&engine->active.lock, flags); + spin_unlock_irqrestore(&engine->sched_engine->lock, flags); } static void mock_reset_finish(struct intel_engine_cs *engine) @@ -283,7 +311,8 @@ static void mock_engine_release(struct intel_engine_cs *engine) GEM_BUG_ON(timer_pending(&mock->hw_delay)); - intel_breadcrumbs_free(engine->breadcrumbs); + i915_sched_engine_put(engine->sched_engine); + intel_breadcrumbs_put(engine->breadcrumbs); intel_context_unpin(engine->kernel_context); intel_context_put(engine->kernel_context); @@ -320,6 +349,8 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, engine->base.emit_flush = mock_emit_flush; engine->base.emit_fini_breadcrumb = mock_emit_breadcrumb; engine->base.submit_request = mock_submit_request; + engine->base.add_active_request = mock_add_to_engine; + engine->base.remove_active_request = mock_remove_from_engine; engine->base.reset.prepare = mock_reset_prepare; engine->base.reset.rewind = mock_reset_rewind; @@ -345,14 +376,18 @@ int mock_engine_init(struct intel_engine_cs *engine) { struct intel_context *ce; - intel_engine_init_active(engine, ENGINE_MOCK); + engine->sched_engine = i915_sched_engine_create(ENGINE_MOCK); + if (!engine->sched_engine) + return -ENOMEM; + engine->sched_engine->private_data = engine; + intel_engine_init_execlists(engine); intel_engine_init__pm(engine); intel_engine_init_retire(engine); engine->breadcrumbs = intel_breadcrumbs_create(NULL); if (!engine->breadcrumbs) - return -ENOMEM; + goto err_schedule; ce = create_kernel_context(engine); if (IS_ERR(ce)) @@ -365,7 +400,9 @@ int mock_engine_init(struct intel_engine_cs *engine) return 0; err_breadcrumbs: - intel_breadcrumbs_free(engine->breadcrumbs); + intel_breadcrumbs_put(engine->breadcrumbs); +err_schedule: + i915_sched_engine_put(engine->sched_engine); return -ENOMEM; } diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c index 26685b927169..fa7b99a671dd 100644 --- a/drivers/gpu/drm/i915/gt/selftest_context.c +++ b/drivers/gpu/drm/i915/gt/selftest_context.c @@ -209,7 +209,13 @@ static int __live_active_context(struct intel_engine_cs *engine) * This test makes sure that the context is kept alive until a * subsequent idle-barrier (emitted when the engine wakeref hits 0 * with no more outstanding requests). + * + * In GuC submission mode we don't use idle barriers and we instead + * get a message from the GuC to signal that it is safe to unpin the + * context from memory. */ + if (intel_engine_uses_guc(engine)) + return 0; if (intel_engine_pm_is_awake(engine)) { pr_err("%s is awake before starting %s!\n", @@ -357,7 +363,11 @@ static int __live_remote_context(struct intel_engine_cs *engine) * on the context image remotely (intel_context_prepare_remote_request), * which inserts foreign fences into intel_context.active, does not * clobber the idle-barrier. + * + * In GuC submission mode we don't use idle barriers. */ + if (intel_engine_uses_guc(engine)) + return 0; if (intel_engine_pm_is_awake(engine)) { pr_err("%s is awake before starting %s!\n", diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c index 4896e4ccad50..317eebf086c3 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c @@ -405,3 +405,25 @@ void st_engine_heartbeat_enable(struct intel_engine_cs *engine) engine->props.heartbeat_interval_ms = engine->defaults.heartbeat_interval_ms; } + +void st_engine_heartbeat_disable_no_pm(struct intel_engine_cs *engine) +{ + engine->props.heartbeat_interval_ms = 0; + + /* + * Park the heartbeat but without holding the PM lock as that + * makes the engines appear not-idle. Note that if/when unpark + * is called due to the PM lock being acquired later the + * heartbeat still won't be enabled because of the above = 0. + */ + if (intel_engine_pm_get_if_awake(engine)) { + intel_engine_park_heartbeat(engine); + intel_engine_pm_put(engine); + } +} + +void st_engine_heartbeat_enable_no_pm(struct intel_engine_cs *engine) +{ + engine->props.heartbeat_interval_ms = + engine->defaults.heartbeat_interval_ms; +} diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.h b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.h index cd27113d5400..81da2cd8e406 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.h +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.h @@ -9,6 +9,8 @@ struct intel_engine_cs; void st_engine_heartbeat_disable(struct intel_engine_cs *engine); +void st_engine_heartbeat_disable_no_pm(struct intel_engine_cs *engine); void st_engine_heartbeat_enable(struct intel_engine_cs *engine); +void st_engine_heartbeat_enable_no_pm(struct intel_engine_cs *engine); #endif /* SELFTEST_ENGINE_HEARTBEAT_H */ diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c index 72cca3f0da21..75569666105d 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c @@ -173,8 +173,8 @@ static int __live_engine_timestamps(struct intel_engine_cs *engine) d_ctx = trifilter(s_ctx); d_ctx *= engine->gt->clock_frequency; - if (IS_ICELAKE(engine->i915)) - d_ring *= 12500000; /* Fixed 80ns for icl ctx timestamp? */ + if (GRAPHICS_VER(engine->i915) == 11) + d_ring *= 12500000; /* Fixed 80ns for GEN11 ctx timestamp? */ else d_ring *= engine->gt->clock_frequency; diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index 1c8108d30b85..f12ffe797639 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -43,7 +43,7 @@ static int wait_for_submit(struct intel_engine_cs *engine, unsigned long timeout) { /* Ignore our own attempts to suppress excess tasklets */ - tasklet_hi_schedule(&engine->execlists.tasklet); + tasklet_hi_schedule(&engine->sched_engine->tasklet); timeout += jiffies; do { @@ -273,7 +273,7 @@ static int live_unlite_restore(struct intel_gt *gt, int prio) }; /* Alternatively preempt the spinner with ce[1] */ - engine->schedule(rq[1], &attr); + engine->sched_engine->schedule(rq[1], &attr); } /* And switch back to ce[0] for good measure */ @@ -553,13 +553,13 @@ static int live_pin_rewind(void *arg) static int engine_lock_reset_tasklet(struct intel_engine_cs *engine) { - tasklet_disable(&engine->execlists.tasklet); + tasklet_disable(&engine->sched_engine->tasklet); local_bh_disable(); if (test_and_set_bit(I915_RESET_ENGINE + engine->id, &engine->gt->reset.flags)) { local_bh_enable(); - tasklet_enable(&engine->execlists.tasklet); + tasklet_enable(&engine->sched_engine->tasklet); intel_gt_set_wedged(engine->gt); return -EBUSY; @@ -574,7 +574,7 @@ static void engine_unlock_reset_tasklet(struct intel_engine_cs *engine) &engine->gt->reset.flags); local_bh_enable(); - tasklet_enable(&engine->execlists.tasklet); + tasklet_enable(&engine->sched_engine->tasklet); } static int live_hold_reset(void *arg) @@ -628,7 +628,7 @@ static int live_hold_reset(void *arg) if (err) goto out; - engine->execlists.tasklet.callback(&engine->execlists.tasklet); + engine->sched_engine->tasklet.callback(&engine->sched_engine->tasklet); GEM_BUG_ON(execlists_active(&engine->execlists) != rq); i915_request_get(rq); @@ -917,7 +917,7 @@ release_queue(struct intel_engine_cs *engine, i915_request_add(rq); local_bh_disable(); - engine->schedule(rq, &attr); + engine->sched_engine->schedule(rq, &attr); local_bh_enable(); /* kick tasklet */ i915_request_put(rq); @@ -1200,7 +1200,7 @@ static int live_timeslice_rewind(void *arg) while (i915_request_is_active(rq[A2])) { /* semaphore yield! */ /* Wait for the timeslice to kick in */ del_timer(&engine->execlists.timer); - tasklet_hi_schedule(&engine->execlists.tasklet); + tasklet_hi_schedule(&engine->sched_engine->tasklet); intel_engine_flush_submission(engine); } /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */ @@ -1342,7 +1342,7 @@ static int live_timeslice_queue(void *arg) err = PTR_ERR(rq); goto err_heartbeat; } - engine->schedule(rq, &attr); + engine->sched_engine->schedule(rq, &attr); err = wait_for_submit(engine, rq, HZ / 2); if (err) { pr_err("%s: Timed out trying to submit semaphores\n", @@ -1539,12 +1539,12 @@ static int live_busywait_preempt(void *arg) * preempt the busywaits used to synchronise between rings. */ - ctx_hi = kernel_context(gt->i915); + ctx_hi = kernel_context(gt->i915, NULL); if (!ctx_hi) return -ENOMEM; ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY; - ctx_lo = kernel_context(gt->i915); + ctx_lo = kernel_context(gt->i915, NULL); if (!ctx_lo) goto err_ctx_hi; ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY; @@ -1741,12 +1741,12 @@ static int live_preempt(void *arg) if (igt_spinner_init(&spin_lo, gt)) goto err_spin_hi; - ctx_hi = kernel_context(gt->i915); + ctx_hi = kernel_context(gt->i915, NULL); if (!ctx_hi) goto err_spin_lo; ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY; - ctx_lo = kernel_context(gt->i915); + ctx_lo = kernel_context(gt->i915, NULL); if (!ctx_lo) goto err_ctx_hi; ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY; @@ -1833,11 +1833,11 @@ static int live_late_preempt(void *arg) if (igt_spinner_init(&spin_lo, gt)) goto err_spin_hi; - ctx_hi = kernel_context(gt->i915); + ctx_hi = kernel_context(gt->i915, NULL); if (!ctx_hi) goto err_spin_lo; - ctx_lo = kernel_context(gt->i915); + ctx_lo = kernel_context(gt->i915, NULL); if (!ctx_lo) goto err_ctx_hi; @@ -1884,7 +1884,7 @@ static int live_late_preempt(void *arg) } attr.priority = I915_PRIORITY_MAX; - engine->schedule(rq, &attr); + engine->sched_engine->schedule(rq, &attr); if (!igt_wait_for_spinner(&spin_hi, rq)) { pr_err("High priority context failed to preempt the low priority context\n"); @@ -1927,7 +1927,7 @@ struct preempt_client { static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c) { - c->ctx = kernel_context(gt->i915); + c->ctx = kernel_context(gt->i915, NULL); if (!c->ctx) return -ENOMEM; @@ -2497,7 +2497,7 @@ static int live_suppress_self_preempt(void *arg) i915_request_add(rq_b); GEM_BUG_ON(i915_request_completed(rq_a)); - engine->schedule(rq_a, &attr); + engine->sched_engine->schedule(rq_a, &attr); igt_spinner_end(&a.spin); if (!igt_wait_for_spinner(&b.spin, rq_b)) { @@ -2629,7 +2629,7 @@ static int live_chain_preempt(void *arg) i915_request_get(rq); i915_request_add(rq); - engine->schedule(rq, &attr); + engine->sched_engine->schedule(rq, &attr); igt_spinner_end(&hi.spin); if (i915_request_wait(rq, 0, HZ / 5) < 0) { @@ -2810,7 +2810,7 @@ static int __live_preempt_ring(struct intel_engine_cs *engine, goto err_ce; } - tmp->ring = __intel_context_ring_size(ring_sz); + tmp->ring_size = ring_sz; err = intel_context_pin(tmp); if (err) { @@ -2988,7 +2988,7 @@ static int live_preempt_gang(void *arg) break; /* Submit each spinner at increasing priority */ - engine->schedule(rq, &attr); + engine->sched_engine->schedule(rq, &attr); } while (prio <= I915_PRIORITY_MAX && !__igt_timeout(end_time, NULL)); pr_debug("%s: Preempt chain of %d requests\n", @@ -3236,7 +3236,7 @@ static int preempt_user(struct intel_engine_cs *engine, i915_request_get(rq); i915_request_add(rq); - engine->schedule(rq, &attr); + engine->sched_engine->schedule(rq, &attr); if (i915_request_wait(rq, 0, HZ / 2) < 0) err = -ETIME; @@ -3384,12 +3384,12 @@ static int live_preempt_timeout(void *arg) if (igt_spinner_init(&spin_lo, gt)) return -ENOMEM; - ctx_hi = kernel_context(gt->i915); + ctx_hi = kernel_context(gt->i915, NULL); if (!ctx_hi) goto err_spin_lo; ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY; - ctx_lo = kernel_context(gt->i915); + ctx_lo = kernel_context(gt->i915, NULL); if (!ctx_lo) goto err_ctx_hi; ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY; @@ -3561,12 +3561,16 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) #define BATCH BIT(0) { struct task_struct *tsk[I915_NUM_ENGINES] = {}; - struct preempt_smoke arg[I915_NUM_ENGINES]; + struct preempt_smoke *arg; struct intel_engine_cs *engine; enum intel_engine_id id; unsigned long count; int err = 0; + arg = kmalloc_array(I915_NUM_ENGINES, sizeof(*arg), GFP_KERNEL); + if (!arg) + return -ENOMEM; + for_each_engine(engine, smoke->gt, id) { arg[id] = *smoke; arg[id].engine = engine; @@ -3574,7 +3578,7 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) arg[id].batch = NULL; arg[id].count = 0; - tsk[id] = kthread_run(smoke_crescendo_thread, &arg, + tsk[id] = kthread_run(smoke_crescendo_thread, arg, "igt/smoke:%d", id); if (IS_ERR(tsk[id])) { err = PTR_ERR(tsk[id]); @@ -3603,6 +3607,8 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n", count, flags, smoke->gt->info.num_engines, smoke->ncontext); + + kfree(arg); return 0; } @@ -3676,7 +3682,7 @@ static int live_preempt_smoke(void *arg) } for (n = 0; n < smoke.ncontext; n++) { - smoke.contexts[n] = kernel_context(smoke.gt->i915); + smoke.contexts[n] = kernel_context(smoke.gt->i915, NULL); if (!smoke.contexts[n]) goto err_ctx; } @@ -3727,7 +3733,7 @@ static int nop_virtual_engine(struct intel_gt *gt, GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve)); for (n = 0; n < nctx; n++) { - ve[n] = intel_execlists_create_virtual(siblings, nsibling); + ve[n] = intel_engine_create_virtual(siblings, nsibling); if (IS_ERR(ve[n])) { err = PTR_ERR(ve[n]); nctx = n; @@ -3923,7 +3929,7 @@ static int mask_virtual_engine(struct intel_gt *gt, * restrict it to our desired engine within the virtual engine. */ - ve = intel_execlists_create_virtual(siblings, nsibling); + ve = intel_engine_create_virtual(siblings, nsibling); if (IS_ERR(ve)) { err = PTR_ERR(ve); goto out_close; @@ -4054,7 +4060,7 @@ static int slicein_virtual_engine(struct intel_gt *gt, i915_request_add(rq); } - ce = intel_execlists_create_virtual(siblings, nsibling); + ce = intel_engine_create_virtual(siblings, nsibling); if (IS_ERR(ce)) { err = PTR_ERR(ce); goto out; @@ -4106,7 +4112,7 @@ static int sliceout_virtual_engine(struct intel_gt *gt, /* XXX We do not handle oversubscription and fairness with normal rq */ for (n = 0; n < nsibling; n++) { - ce = intel_execlists_create_virtual(siblings, nsibling); + ce = intel_engine_create_virtual(siblings, nsibling); if (IS_ERR(ce)) { err = PTR_ERR(ce); goto out; @@ -4208,7 +4214,7 @@ static int preserved_virtual_engine(struct intel_gt *gt, if (err) goto out_scratch; - ve = intel_execlists_create_virtual(siblings, nsibling); + ve = intel_engine_create_virtual(siblings, nsibling); if (IS_ERR(ve)) { err = PTR_ERR(ve); goto out_scratch; @@ -4328,234 +4334,6 @@ static int live_virtual_preserved(void *arg) return 0; } -static int bond_virtual_engine(struct intel_gt *gt, - unsigned int class, - struct intel_engine_cs **siblings, - unsigned int nsibling, - unsigned int flags) -#define BOND_SCHEDULE BIT(0) -{ - struct intel_engine_cs *master; - struct i915_request *rq[16]; - enum intel_engine_id id; - struct igt_spinner spin; - unsigned long n; - int err; - - /* - * A set of bonded requests is intended to be run concurrently - * across a number of engines. We use one request per-engine - * and a magic fence to schedule each of the bonded requests - * at the same time. A consequence of our current scheduler is that - * we only move requests to the HW ready queue when the request - * becomes ready, that is when all of its prerequisite fences have - * been signaled. As one of those fences is the master submit fence, - * there is a delay on all secondary fences as the HW may be - * currently busy. Equally, as all the requests are independent, - * they may have other fences that delay individual request - * submission to HW. Ergo, we do not guarantee that all requests are - * immediately submitted to HW at the same time, just that if the - * rules are abided by, they are ready at the same time as the - * first is submitted. Userspace can embed semaphores in its batch - * to ensure parallel execution of its phases as it requires. - * Though naturally it gets requested that perhaps the scheduler should - * take care of parallel execution, even across preemption events on - * different HW. (The proper answer is of course "lalalala".) - * - * With the submit-fence, we have identified three possible phases - * of synchronisation depending on the master fence: queued (not - * ready), executing, and signaled. The first two are quite simple - * and checked below. However, the signaled master fence handling is - * contentious. Currently we do not distinguish between a signaled - * fence and an expired fence, as once signaled it does not convey - * any information about the previous execution. It may even be freed - * and hence checking later it may not exist at all. Ergo we currently - * do not apply the bonding constraint for an already signaled fence, - * as our expectation is that it should not constrain the secondaries - * and is outside of the scope of the bonded request API (i.e. all - * userspace requests are meant to be running in parallel). As - * it imposes no constraint, and is effectively a no-op, we do not - * check below as normal execution flows are checked extensively above. - * - * XXX Is the degenerate handling of signaled submit fences the - * expected behaviour for userpace? - */ - - GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1); - - if (igt_spinner_init(&spin, gt)) - return -ENOMEM; - - err = 0; - rq[0] = ERR_PTR(-ENOMEM); - for_each_engine(master, gt, id) { - struct i915_sw_fence fence = {}; - struct intel_context *ce; - - if (master->class == class) - continue; - - ce = intel_context_create(master); - if (IS_ERR(ce)) { - err = PTR_ERR(ce); - goto out; - } - - memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq)); - - rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP); - intel_context_put(ce); - if (IS_ERR(rq[0])) { - err = PTR_ERR(rq[0]); - goto out; - } - i915_request_get(rq[0]); - - if (flags & BOND_SCHEDULE) { - onstack_fence_init(&fence); - err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit, - &fence, - GFP_KERNEL); - } - - i915_request_add(rq[0]); - if (err < 0) - goto out; - - if (!(flags & BOND_SCHEDULE) && - !igt_wait_for_spinner(&spin, rq[0])) { - err = -EIO; - goto out; - } - - for (n = 0; n < nsibling; n++) { - struct intel_context *ve; - - ve = intel_execlists_create_virtual(siblings, nsibling); - if (IS_ERR(ve)) { - err = PTR_ERR(ve); - onstack_fence_fini(&fence); - goto out; - } - - err = intel_virtual_engine_attach_bond(ve->engine, - master, - siblings[n]); - if (err) { - intel_context_put(ve); - onstack_fence_fini(&fence); - goto out; - } - - err = intel_context_pin(ve); - intel_context_put(ve); - if (err) { - onstack_fence_fini(&fence); - goto out; - } - - rq[n + 1] = i915_request_create(ve); - intel_context_unpin(ve); - if (IS_ERR(rq[n + 1])) { - err = PTR_ERR(rq[n + 1]); - onstack_fence_fini(&fence); - goto out; - } - i915_request_get(rq[n + 1]); - - err = i915_request_await_execution(rq[n + 1], - &rq[0]->fence, - ve->engine->bond_execute); - i915_request_add(rq[n + 1]); - if (err < 0) { - onstack_fence_fini(&fence); - goto out; - } - } - onstack_fence_fini(&fence); - intel_engine_flush_submission(master); - igt_spinner_end(&spin); - - if (i915_request_wait(rq[0], 0, HZ / 10) < 0) { - pr_err("Master request did not execute (on %s)!\n", - rq[0]->engine->name); - err = -EIO; - goto out; - } - - for (n = 0; n < nsibling; n++) { - if (i915_request_wait(rq[n + 1], 0, - MAX_SCHEDULE_TIMEOUT) < 0) { - err = -EIO; - goto out; - } - - if (rq[n + 1]->engine != siblings[n]) { - pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n", - siblings[n]->name, - rq[n + 1]->engine->name, - rq[0]->engine->name); - err = -EINVAL; - goto out; - } - } - - for (n = 0; !IS_ERR(rq[n]); n++) - i915_request_put(rq[n]); - rq[0] = ERR_PTR(-ENOMEM); - } - -out: - for (n = 0; !IS_ERR(rq[n]); n++) - i915_request_put(rq[n]); - if (igt_flush_test(gt->i915)) - err = -EIO; - - igt_spinner_fini(&spin); - return err; -} - -static int live_virtual_bond(void *arg) -{ - static const struct phase { - const char *name; - unsigned int flags; - } phases[] = { - { "", 0 }, - { "schedule", BOND_SCHEDULE }, - { }, - }; - struct intel_gt *gt = arg; - struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; - unsigned int class; - int err; - - if (intel_uc_uses_guc_submission(>->uc)) - return 0; - - for (class = 0; class <= MAX_ENGINE_CLASS; class++) { - const struct phase *p; - int nsibling; - - nsibling = select_siblings(gt, class, siblings); - if (nsibling < 2) - continue; - - for (p = phases; p->name; p++) { - err = bond_virtual_engine(gt, - class, siblings, nsibling, - p->flags); - if (err) { - pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n", - __func__, p->name, class, nsibling, err); - return err; - } - } - } - - return 0; -} - static int reset_virtual_engine(struct intel_gt *gt, struct intel_engine_cs **siblings, unsigned int nsibling) @@ -4576,7 +4354,7 @@ static int reset_virtual_engine(struct intel_gt *gt, if (igt_spinner_init(&spin, gt)) return -ENOMEM; - ve = intel_execlists_create_virtual(siblings, nsibling); + ve = intel_engine_create_virtual(siblings, nsibling); if (IS_ERR(ve)) { err = PTR_ERR(ve); goto out_spin; @@ -4606,13 +4384,13 @@ static int reset_virtual_engine(struct intel_gt *gt, if (err) goto out_heartbeat; - engine->execlists.tasklet.callback(&engine->execlists.tasklet); + engine->sched_engine->tasklet.callback(&engine->sched_engine->tasklet); GEM_BUG_ON(execlists_active(&engine->execlists) != rq); /* Fake a preemption event; failed of course */ - spin_lock_irq(&engine->active.lock); + spin_lock_irq(&engine->sched_engine->lock); __unwind_incomplete_requests(engine); - spin_unlock_irq(&engine->active.lock); + spin_unlock_irq(&engine->sched_engine->lock); GEM_BUG_ON(rq->engine != engine); /* Reset the engine while keeping our active request on hold */ @@ -4721,7 +4499,6 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_virtual_mask), SUBTEST(live_virtual_preserved), SUBTEST(live_virtual_slice), - SUBTEST(live_virtual_bond), SUBTEST(live_virtual_reset), }; diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 853246fad05f..2c1ed32ca5ac 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -17,6 +17,8 @@ #include "selftests/igt_flush_test.h" #include "selftests/igt_reset.h" #include "selftests/igt_atomic.h" +#include "selftests/igt_spinner.h" +#include "selftests/intel_scheduler_helpers.h" #include "selftests/mock_drm.h" @@ -42,7 +44,7 @@ static int hang_init(struct hang *h, struct intel_gt *gt) memset(h, 0, sizeof(*h)); h->gt = gt; - h->ctx = kernel_context(gt->i915); + h->ctx = kernel_context(gt->i915, NULL); if (IS_ERR(h->ctx)) return PTR_ERR(h->ctx); @@ -378,6 +380,7 @@ static int igt_reset_nop(void *arg) ce = intel_context_create(engine); if (IS_ERR(ce)) { err = PTR_ERR(ce); + pr_err("[%s] Create context failed: %d!\n", engine->name, err); break; } @@ -387,6 +390,8 @@ static int igt_reset_nop(void *arg) rq = intel_context_create_request(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); + pr_err("[%s] Create request failed: %d!\n", + engine->name, err); break; } @@ -401,24 +406,31 @@ static int igt_reset_nop(void *arg) igt_global_reset_unlock(gt); if (intel_gt_is_wedged(gt)) { + pr_err("[%s] GT is wedged!\n", engine->name); err = -EIO; break; } if (i915_reset_count(global) != reset_count + ++count) { - pr_err("Full GPU reset not recorded!\n"); + pr_err("[%s] Reset not recorded: %d vs %d + %d!\n", + engine->name, i915_reset_count(global), reset_count, count); err = -EINVAL; break; } err = igt_flush_test(gt->i915); - if (err) + if (err) { + pr_err("[%s] Flush failed: %d!\n", engine->name, err); break; + } } while (time_before(jiffies, end_time)); pr_info("%s: %d resets\n", __func__, count); - if (igt_flush_test(gt->i915)) + if (igt_flush_test(gt->i915)) { + pr_err("Post flush failed: %d!\n", err); err = -EIO; + } + return err; } @@ -440,9 +452,19 @@ static int igt_reset_nop_engine(void *arg) IGT_TIMEOUT(end_time); int err; + if (intel_engine_uses_guc(engine)) { + /* Engine level resets are triggered by GuC when a hang + * is detected. They can't be triggered by the KMD any + * more. Thus a nop batch cannot be used as a reset test + */ + continue; + } + ce = intel_context_create(engine); - if (IS_ERR(ce)) + if (IS_ERR(ce)) { + pr_err("[%s] Create context failed: %pe!\n", engine->name, ce); return PTR_ERR(ce); + } reset_count = i915_reset_count(global); reset_engine_count = i915_reset_engine_count(global, engine); @@ -549,9 +571,15 @@ static int igt_reset_fail_engine(void *arg) IGT_TIMEOUT(end_time); int err; + /* Can't manually break the reset if i915 doesn't perform it */ + if (intel_engine_uses_guc(engine)) + continue; + ce = intel_context_create(engine); - if (IS_ERR(ce)) + if (IS_ERR(ce)) { + pr_err("[%s] Create context failed: %pe!\n", engine->name, ce); return PTR_ERR(ce); + } st_engine_heartbeat_disable(engine); set_bit(I915_RESET_ENGINE + id, >->reset.flags); @@ -686,8 +714,12 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) for_each_engine(engine, gt, id) { unsigned int reset_count, reset_engine_count; unsigned long count; + bool using_guc = intel_engine_uses_guc(engine); IGT_TIMEOUT(end_time); + if (using_guc && !active) + continue; + if (active && !intel_engine_can_store_dword(engine)) continue; @@ -705,13 +737,24 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) set_bit(I915_RESET_ENGINE + id, >->reset.flags); count = 0; do { - if (active) { - struct i915_request *rq; + struct i915_request *rq = NULL; + struct intel_selftest_saved_policy saved; + int err2; + + err = intel_selftest_modify_policy(engine, &saved, + SELFTEST_SCHEDULER_MODIFY_FAST_RESET); + if (err) { + pr_err("[%s] Modify policy failed: %d!\n", engine->name, err); + break; + } + if (active) { rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); - break; + pr_err("[%s] Create hang request failed: %d!\n", + engine->name, err); + goto restore; } i915_request_get(rq); @@ -727,34 +770,59 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) i915_request_put(rq); err = -EIO; - break; + goto restore; } + } - i915_request_put(rq); + if (!using_guc) { + err = intel_engine_reset(engine, NULL); + if (err) { + pr_err("intel_engine_reset(%s) failed, err:%d\n", + engine->name, err); + goto skip; + } } - err = intel_engine_reset(engine, NULL); - if (err) { - pr_err("intel_engine_reset(%s) failed, err:%d\n", - engine->name, err); - break; + if (rq) { + /* Ensure the reset happens and kills the engine */ + err = intel_selftest_wait_for_rq(rq); + if (err) + pr_err("[%s] Wait for request %lld:%lld [0x%04X] failed: %d!\n", + engine->name, rq->fence.context, + rq->fence.seqno, rq->context->guc_id, err); } +skip: + if (rq) + i915_request_put(rq); + if (i915_reset_count(global) != reset_count) { pr_err("Full GPU reset recorded! (engine reset expected)\n"); err = -EINVAL; - break; + goto restore; } - if (i915_reset_engine_count(global, engine) != - ++reset_engine_count) { - pr_err("%s engine reset not recorded!\n", - engine->name); - err = -EINVAL; - break; + /* GuC based resets are not logged per engine */ + if (!using_guc) { + if (i915_reset_engine_count(global, engine) != + ++reset_engine_count) { + pr_err("%s engine reset not recorded!\n", + engine->name); + err = -EINVAL; + goto restore; + } } count++; + +restore: + err2 = intel_selftest_restore_policy(engine, &saved); + if (err2) + pr_err("[%s] Restore policy failed: %d!\n", engine->name, err); + if (err == 0) + err = err2; + if (err) + break; } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, >->reset.flags); st_engine_heartbeat_enable(engine); @@ -765,12 +833,16 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) break; err = igt_flush_test(gt->i915); - if (err) + if (err) { + pr_err("[%s] Flush failed: %d!\n", engine->name, err); break; + } } - if (intel_gt_is_wedged(gt)) + if (intel_gt_is_wedged(gt)) { + pr_err("GT is wedged!\n"); err = -EIO; + } if (active) hang_fini(&h); @@ -807,7 +879,7 @@ static int active_request_put(struct i915_request *rq) if (!rq) return 0; - if (i915_request_wait(rq, 0, 5 * HZ) < 0) { + if (i915_request_wait(rq, 0, 10 * HZ) < 0) { GEM_TRACE("%s timed out waiting for completion of fence %llx:%lld\n", rq->engine->name, rq->fence.context, @@ -837,6 +909,7 @@ static int active_engine(void *data) ce[count] = intel_context_create(engine); if (IS_ERR(ce[count])) { err = PTR_ERR(ce[count]); + pr_err("[%s] Create context #%ld failed: %d!\n", engine->name, count, err); while (--count) intel_context_put(ce[count]); return err; @@ -852,23 +925,26 @@ static int active_engine(void *data) new = intel_context_create_request(ce[idx]); if (IS_ERR(new)) { err = PTR_ERR(new); + pr_err("[%s] Create request #%d failed: %d!\n", engine->name, idx, err); break; } rq[idx] = i915_request_get(new); i915_request_add(new); - if (engine->schedule && arg->flags & TEST_PRIORITY) { + if (engine->sched_engine->schedule && arg->flags & TEST_PRIORITY) { struct i915_sched_attr attr = { .priority = i915_prandom_u32_max_state(512, &prng), }; - engine->schedule(rq[idx], &attr); + engine->sched_engine->schedule(rq[idx], &attr); } err = active_request_put(old); - if (err) + if (err) { + pr_err("[%s] Request put failed: %d!\n", engine->name, err); break; + } cond_resched(); } @@ -876,6 +952,9 @@ static int active_engine(void *data) for (count = 0; count < ARRAY_SIZE(rq); count++) { int err__ = active_request_put(rq[count]); + if (err) + pr_err("[%s] Request put #%ld failed: %d!\n", engine->name, count, err); + /* Keep the first error */ if (!err) err = err__; @@ -916,10 +995,13 @@ static int __igt_reset_engines(struct intel_gt *gt, struct active_engine threads[I915_NUM_ENGINES] = {}; unsigned long device = i915_reset_count(global); unsigned long count = 0, reported; + bool using_guc = intel_engine_uses_guc(engine); IGT_TIMEOUT(end_time); - if (flags & TEST_ACTIVE && - !intel_engine_can_store_dword(engine)) + if (flags & TEST_ACTIVE) { + if (!intel_engine_can_store_dword(engine)) + continue; + } else if (using_guc) continue; if (!wait_for_idle(engine)) { @@ -949,6 +1031,7 @@ static int __igt_reset_engines(struct intel_gt *gt, "igt/%s", other->name); if (IS_ERR(tsk)) { err = PTR_ERR(tsk); + pr_err("[%s] Thread spawn failed: %d!\n", engine->name, err); goto unwind; } @@ -958,16 +1041,27 @@ static int __igt_reset_engines(struct intel_gt *gt, yield(); /* start all threads before we begin */ - st_engine_heartbeat_disable(engine); + st_engine_heartbeat_disable_no_pm(engine); set_bit(I915_RESET_ENGINE + id, >->reset.flags); do { struct i915_request *rq = NULL; + struct intel_selftest_saved_policy saved; + int err2; + + err = intel_selftest_modify_policy(engine, &saved, + SELFTEST_SCHEDULER_MODIFY_FAST_RESET); + if (err) { + pr_err("[%s] Modify policy failed: %d!\n", engine->name, err); + break; + } if (flags & TEST_ACTIVE) { rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); - break; + pr_err("[%s] Create hang request failed: %d!\n", + engine->name, err); + goto restore; } i915_request_get(rq); @@ -983,32 +1077,44 @@ static int __igt_reset_engines(struct intel_gt *gt, i915_request_put(rq); err = -EIO; - break; + goto restore; } + } else { + intel_engine_pm_get(engine); } - err = intel_engine_reset(engine, NULL); - if (err) { - pr_err("i915_reset_engine(%s:%s): failed, err=%d\n", - engine->name, test_name, err); - break; + if (!using_guc) { + err = intel_engine_reset(engine, NULL); + if (err) { + pr_err("i915_reset_engine(%s:%s): failed, err=%d\n", + engine->name, test_name, err); + goto restore; + } + } + + if (rq) { + /* Ensure the reset happens and kills the engine */ + err = intel_selftest_wait_for_rq(rq); + if (err) + pr_err("[%s] Wait for request %lld:%lld [0x%04X] failed: %d!\n", + engine->name, rq->fence.context, + rq->fence.seqno, rq->context->guc_id, err); } count++; if (rq) { if (rq->fence.error != -EIO) { - pr_err("i915_reset_engine(%s:%s):" - " failed to reset request %llx:%lld\n", + pr_err("i915_reset_engine(%s:%s): failed to reset request %lld:%lld [0x%04X]\n", engine->name, test_name, rq->fence.context, - rq->fence.seqno); + rq->fence.seqno, rq->context->guc_id); i915_request_put(rq); GEM_TRACE_DUMP(); intel_gt_set_wedged(gt); err = -EIO; - break; + goto restore; } if (i915_request_wait(rq, 0, HZ / 5) < 0) { @@ -1027,12 +1133,15 @@ static int __igt_reset_engines(struct intel_gt *gt, GEM_TRACE_DUMP(); intel_gt_set_wedged(gt); err = -EIO; - break; + goto restore; } i915_request_put(rq); } + if (!(flags & TEST_ACTIVE)) + intel_engine_pm_put(engine); + if (!(flags & TEST_SELF) && !wait_for_idle(engine)) { struct drm_printer p = drm_info_printer(gt->i915->drm.dev); @@ -1044,22 +1153,34 @@ static int __igt_reset_engines(struct intel_gt *gt, "%s\n", engine->name); err = -EIO; - break; + goto restore; } + +restore: + err2 = intel_selftest_restore_policy(engine, &saved); + if (err2) + pr_err("[%s] Restore policy failed: %d!\n", engine->name, err2); + if (err == 0) + err = err2; + if (err) + break; } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, >->reset.flags); - st_engine_heartbeat_enable(engine); + st_engine_heartbeat_enable_no_pm(engine); pr_info("i915_reset_engine(%s:%s): %lu resets\n", engine->name, test_name, count); - reported = i915_reset_engine_count(global, engine); - reported -= threads[engine->id].resets; - if (reported != count) { - pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu\n", - engine->name, test_name, count, reported); - if (!err) - err = -EINVAL; + /* GuC based resets are not logged per engine */ + if (!using_guc) { + reported = i915_reset_engine_count(global, engine); + reported -= threads[engine->id].resets; + if (reported != count) { + pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu\n", + engine->name, test_name, count, reported); + if (!err) + err = -EINVAL; + } } unwind: @@ -1078,15 +1199,18 @@ unwind: } put_task_struct(threads[tmp].task); - if (other->uabi_class != engine->uabi_class && - threads[tmp].resets != - i915_reset_engine_count(global, other)) { - pr_err("Innocent engine %s was reset (count=%ld)\n", - other->name, - i915_reset_engine_count(global, other) - - threads[tmp].resets); - if (!err) - err = -EINVAL; + /* GuC based resets are not logged per engine */ + if (!using_guc) { + if (other->uabi_class != engine->uabi_class && + threads[tmp].resets != + i915_reset_engine_count(global, other)) { + pr_err("Innocent engine %s was reset (count=%ld)\n", + other->name, + i915_reset_engine_count(global, other) - + threads[tmp].resets); + if (!err) + err = -EINVAL; + } } } @@ -1101,8 +1225,10 @@ unwind: break; err = igt_flush_test(gt->i915); - if (err) + if (err) { + pr_err("[%s] Flush failed: %d!\n", engine->name, err); break; + } } if (intel_gt_is_wedged(gt)) @@ -1180,12 +1306,15 @@ static int igt_reset_wait(void *arg) igt_global_reset_lock(gt); err = hang_init(&h, gt); - if (err) + if (err) { + pr_err("[%s] Hang init failed: %d!\n", engine->name, err); goto unlock; + } rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); + pr_err("[%s] Create hang request failed: %d!\n", engine->name, err); goto fini; } @@ -1310,12 +1439,15 @@ static int __igt_reset_evict_vma(struct intel_gt *gt, /* Check that we can recover an unbind stuck on a hanging request */ err = hang_init(&h, gt); - if (err) + if (err) { + pr_err("[%s] Hang init failed: %d!\n", engine->name, err); return err; + } obj = i915_gem_object_create_internal(gt->i915, SZ_1M); if (IS_ERR(obj)) { err = PTR_ERR(obj); + pr_err("[%s] Create object failed: %d!\n", engine->name, err); goto fini; } @@ -1330,12 +1462,14 @@ static int __igt_reset_evict_vma(struct intel_gt *gt, arg.vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(arg.vma)) { err = PTR_ERR(arg.vma); + pr_err("[%s] VMA instance failed: %d!\n", engine->name, err); goto out_obj; } rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); + pr_err("[%s] Create hang request failed: %d!\n", engine->name, err); goto out_obj; } @@ -1347,6 +1481,7 @@ static int __igt_reset_evict_vma(struct intel_gt *gt, err = i915_vma_pin(arg.vma, 0, 0, pin_flags); if (err) { i915_request_add(rq); + pr_err("[%s] VMA pin failed: %d!\n", engine->name, err); goto out_obj; } @@ -1363,8 +1498,14 @@ static int __igt_reset_evict_vma(struct intel_gt *gt, i915_vma_lock(arg.vma); err = i915_request_await_object(rq, arg.vma->obj, flags & EXEC_OBJECT_WRITE); - if (err == 0) + if (err == 0) { err = i915_vma_move_to_active(arg.vma, rq, flags); + if (err) + pr_err("[%s] Move to active failed: %d!\n", engine->name, err); + } else { + pr_err("[%s] Request await failed: %d!\n", engine->name, err); + } + i915_vma_unlock(arg.vma); if (flags & EXEC_OBJECT_NEEDS_FENCE) @@ -1392,6 +1533,7 @@ static int __igt_reset_evict_vma(struct intel_gt *gt, tsk = kthread_run(fn, &arg, "igt/evict_vma"); if (IS_ERR(tsk)) { err = PTR_ERR(tsk); + pr_err("[%s] Thread spawn failed: %d!\n", engine->name, err); tsk = NULL; goto out_reset; } @@ -1508,17 +1650,29 @@ static int igt_reset_queue(void *arg) goto unlock; for_each_engine(engine, gt, id) { + struct intel_selftest_saved_policy saved; struct i915_request *prev; IGT_TIMEOUT(end_time); unsigned int count; + bool using_guc = intel_engine_uses_guc(engine); if (!intel_engine_can_store_dword(engine)) continue; + if (using_guc) { + err = intel_selftest_modify_policy(engine, &saved, + SELFTEST_SCHEDULER_MODIFY_NO_HANGCHECK); + if (err) { + pr_err("[%s] Modify policy failed: %d!\n", engine->name, err); + goto fini; + } + } + prev = hang_create_request(&h, engine); if (IS_ERR(prev)) { err = PTR_ERR(prev); - goto fini; + pr_err("[%s] Create 'prev' hang request failed: %d!\n", engine->name, err); + goto restore; } i915_request_get(prev); @@ -1532,7 +1686,8 @@ static int igt_reset_queue(void *arg) rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); - goto fini; + pr_err("[%s] Create hang request failed: %d!\n", engine->name, err); + goto restore; } i915_request_get(rq); @@ -1557,7 +1712,7 @@ static int igt_reset_queue(void *arg) GEM_TRACE_DUMP(); intel_gt_set_wedged(gt); - goto fini; + goto restore; } if (!wait_until_running(&h, prev)) { @@ -1575,7 +1730,7 @@ static int igt_reset_queue(void *arg) intel_gt_set_wedged(gt); err = -EIO; - goto fini; + goto restore; } reset_count = fake_hangcheck(gt, BIT(id)); @@ -1586,7 +1741,7 @@ static int igt_reset_queue(void *arg) i915_request_put(rq); i915_request_put(prev); err = -EINVAL; - goto fini; + goto restore; } if (rq->fence.error) { @@ -1595,7 +1750,7 @@ static int igt_reset_queue(void *arg) i915_request_put(rq); i915_request_put(prev); err = -EINVAL; - goto fini; + goto restore; } if (i915_reset_count(global) == reset_count) { @@ -1603,7 +1758,7 @@ static int igt_reset_queue(void *arg) i915_request_put(rq); i915_request_put(prev); err = -EINVAL; - goto fini; + goto restore; } i915_request_put(prev); @@ -1618,9 +1773,24 @@ static int igt_reset_queue(void *arg) i915_request_put(prev); - err = igt_flush_test(gt->i915); +restore: + if (using_guc) { + int err2 = intel_selftest_restore_policy(engine, &saved); + + if (err2) + pr_err("%s:%d> [%s] Restore policy failed: %d!\n", + __func__, __LINE__, engine->name, err2); + if (err == 0) + err = err2; + } if (err) + goto fini; + + err = igt_flush_test(gt->i915); + if (err) { + pr_err("[%s] Flush failed: %d!\n", engine->name, err); break; + } } fini: @@ -1653,12 +1823,15 @@ static int igt_handle_error(void *arg) return 0; err = hang_init(&h, gt); - if (err) + if (err) { + pr_err("[%s] Hang init failed: %d!\n", engine->name, err); return err; + } rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); + pr_err("[%s] Create hang request failed: %d!\n", engine->name, err); goto err_fini; } @@ -1702,7 +1875,7 @@ static int __igt_atomic_reset_engine(struct intel_engine_cs *engine, const struct igt_atomic_section *p, const char *mode) { - struct tasklet_struct * const t = &engine->execlists.tasklet; + struct tasklet_struct * const t = &engine->sched_engine->tasklet; int err; GEM_TRACE("i915_reset_engine(%s:%s) under %s\n", @@ -1743,12 +1916,15 @@ static int igt_atomic_reset_engine(struct intel_engine_cs *engine, return err; err = hang_init(&h, engine->gt); - if (err) + if (err) { + pr_err("[%s] Hang init failed: %d!\n", engine->name, err); return err; + } rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); + pr_err("[%s] Create hang request failed: %d!\n", engine->name, err); goto out; } diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 3119016d9910..b0977a3b699b 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -49,7 +49,7 @@ static int wait_for_submit(struct intel_engine_cs *engine, unsigned long timeout) { /* Ignore our own attempts to suppress excess tasklets */ - tasklet_hi_schedule(&engine->execlists.tasklet); + tasklet_hi_schedule(&engine->sched_engine->tasklet); timeout += jiffies; do { @@ -1613,12 +1613,12 @@ static void garbage_reset(struct intel_engine_cs *engine, local_bh_disable(); if (!test_and_set_bit(bit, lock)) { - tasklet_disable(&engine->execlists.tasklet); + tasklet_disable(&engine->sched_engine->tasklet); if (!rq->fence.error) __intel_engine_reset_bh(engine, NULL); - tasklet_enable(&engine->execlists.tasklet); + tasklet_enable(&engine->sched_engine->tasklet); clear_and_wake_up_bit(bit, lock); } local_bh_enable(); diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c b/drivers/gpu/drm/i915/gt/selftest_migrate.c new file mode 100644 index 000000000000..12ef2837c89b --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c @@ -0,0 +1,669 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include <linux/sort.h> + +#include "selftests/i915_random.h" + +static const unsigned int sizes[] = { + SZ_4K, + SZ_64K, + SZ_2M, + CHUNK_SZ - SZ_4K, + CHUNK_SZ, + CHUNK_SZ + SZ_4K, + SZ_64M, +}; + +static struct drm_i915_gem_object * +create_lmem_or_internal(struct drm_i915_private *i915, size_t size) +{ + struct drm_i915_gem_object *obj; + + obj = i915_gem_object_create_lmem(i915, size, 0); + if (!IS_ERR(obj)) + return obj; + + return i915_gem_object_create_internal(i915, size); +} + +static int copy(struct intel_migrate *migrate, + int (*fn)(struct intel_migrate *migrate, + struct i915_gem_ww_ctx *ww, + struct drm_i915_gem_object *src, + struct drm_i915_gem_object *dst, + struct i915_request **out), + u32 sz, struct rnd_state *prng) +{ + struct drm_i915_private *i915 = migrate->context->engine->i915; + struct drm_i915_gem_object *src, *dst; + struct i915_request *rq; + struct i915_gem_ww_ctx ww; + u32 *vaddr; + int err = 0; + int i; + + src = create_lmem_or_internal(i915, sz); + if (IS_ERR(src)) + return 0; + + dst = i915_gem_object_create_internal(i915, sz); + if (IS_ERR(dst)) + goto err_free_src; + + for_i915_gem_ww(&ww, err, true) { + err = i915_gem_object_lock(src, &ww); + if (err) + continue; + + err = i915_gem_object_lock(dst, &ww); + if (err) + continue; + + vaddr = i915_gem_object_pin_map(src, I915_MAP_WC); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + continue; + } + + for (i = 0; i < sz / sizeof(u32); i++) + vaddr[i] = i; + i915_gem_object_flush_map(src); + + vaddr = i915_gem_object_pin_map(dst, I915_MAP_WC); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto unpin_src; + } + + for (i = 0; i < sz / sizeof(u32); i++) + vaddr[i] = ~i; + i915_gem_object_flush_map(dst); + + err = fn(migrate, &ww, src, dst, &rq); + if (!err) + continue; + + if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS) + pr_err("%ps failed, size: %u\n", fn, sz); + if (rq) { + i915_request_wait(rq, 0, HZ); + i915_request_put(rq); + } + i915_gem_object_unpin_map(dst); +unpin_src: + i915_gem_object_unpin_map(src); + } + if (err) + goto err_out; + + if (rq) { + if (i915_request_wait(rq, 0, HZ) < 0) { + pr_err("%ps timed out, size: %u\n", fn, sz); + err = -ETIME; + } + i915_request_put(rq); + } + + for (i = 0; !err && i < sz / PAGE_SIZE; i++) { + int x = i * 1024 + i915_prandom_u32_max_state(1024, prng); + + if (vaddr[x] != x) { + pr_err("%ps failed, size: %u, offset: %zu\n", + fn, sz, x * sizeof(u32)); + igt_hexdump(vaddr + i * 1024, 4096); + err = -EINVAL; + } + } + + i915_gem_object_unpin_map(dst); + i915_gem_object_unpin_map(src); + +err_out: + i915_gem_object_put(dst); +err_free_src: + i915_gem_object_put(src); + + return err; +} + +static int clear(struct intel_migrate *migrate, + int (*fn)(struct intel_migrate *migrate, + struct i915_gem_ww_ctx *ww, + struct drm_i915_gem_object *obj, + u32 value, + struct i915_request **out), + u32 sz, struct rnd_state *prng) +{ + struct drm_i915_private *i915 = migrate->context->engine->i915; + struct drm_i915_gem_object *obj; + struct i915_request *rq; + struct i915_gem_ww_ctx ww; + u32 *vaddr; + int err = 0; + int i; + + obj = create_lmem_or_internal(i915, sz); + if (IS_ERR(obj)) + return 0; + + for_i915_gem_ww(&ww, err, true) { + err = i915_gem_object_lock(obj, &ww); + if (err) + continue; + + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + continue; + } + + for (i = 0; i < sz / sizeof(u32); i++) + vaddr[i] = ~i; + i915_gem_object_flush_map(obj); + + err = fn(migrate, &ww, obj, sz, &rq); + if (!err) + continue; + + if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS) + pr_err("%ps failed, size: %u\n", fn, sz); + if (rq) { + i915_request_wait(rq, 0, HZ); + i915_request_put(rq); + } + i915_gem_object_unpin_map(obj); + } + if (err) + goto err_out; + + if (rq) { + if (i915_request_wait(rq, 0, HZ) < 0) { + pr_err("%ps timed out, size: %u\n", fn, sz); + err = -ETIME; + } + i915_request_put(rq); + } + + for (i = 0; !err && i < sz / PAGE_SIZE; i++) { + int x = i * 1024 + i915_prandom_u32_max_state(1024, prng); + + if (vaddr[x] != sz) { + pr_err("%ps failed, size: %u, offset: %zu\n", + fn, sz, x * sizeof(u32)); + igt_hexdump(vaddr + i * 1024, 4096); + err = -EINVAL; + } + } + + i915_gem_object_unpin_map(obj); +err_out: + i915_gem_object_put(obj); + + return err; +} + +static int __migrate_copy(struct intel_migrate *migrate, + struct i915_gem_ww_ctx *ww, + struct drm_i915_gem_object *src, + struct drm_i915_gem_object *dst, + struct i915_request **out) +{ + return intel_migrate_copy(migrate, ww, NULL, + src->mm.pages->sgl, src->cache_level, + i915_gem_object_is_lmem(src), + dst->mm.pages->sgl, dst->cache_level, + i915_gem_object_is_lmem(dst), + out); +} + +static int __global_copy(struct intel_migrate *migrate, + struct i915_gem_ww_ctx *ww, + struct drm_i915_gem_object *src, + struct drm_i915_gem_object *dst, + struct i915_request **out) +{ + return intel_context_migrate_copy(migrate->context, NULL, + src->mm.pages->sgl, src->cache_level, + i915_gem_object_is_lmem(src), + dst->mm.pages->sgl, dst->cache_level, + i915_gem_object_is_lmem(dst), + out); +} + +static int +migrate_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng) +{ + return copy(migrate, __migrate_copy, sz, prng); +} + +static int +global_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng) +{ + return copy(migrate, __global_copy, sz, prng); +} + +static int __migrate_clear(struct intel_migrate *migrate, + struct i915_gem_ww_ctx *ww, + struct drm_i915_gem_object *obj, + u32 value, + struct i915_request **out) +{ + return intel_migrate_clear(migrate, ww, NULL, + obj->mm.pages->sgl, + obj->cache_level, + i915_gem_object_is_lmem(obj), + value, out); +} + +static int __global_clear(struct intel_migrate *migrate, + struct i915_gem_ww_ctx *ww, + struct drm_i915_gem_object *obj, + u32 value, + struct i915_request **out) +{ + return intel_context_migrate_clear(migrate->context, NULL, + obj->mm.pages->sgl, + obj->cache_level, + i915_gem_object_is_lmem(obj), + value, out); +} + +static int +migrate_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng) +{ + return clear(migrate, __migrate_clear, sz, prng); +} + +static int +global_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng) +{ + return clear(migrate, __global_clear, sz, prng); +} + +static int live_migrate_copy(void *arg) +{ + struct intel_migrate *migrate = arg; + struct drm_i915_private *i915 = migrate->context->engine->i915; + I915_RND_STATE(prng); + int i; + + for (i = 0; i < ARRAY_SIZE(sizes); i++) { + int err; + + err = migrate_copy(migrate, sizes[i], &prng); + if (err == 0) + err = global_copy(migrate, sizes[i], &prng); + i915_gem_drain_freed_objects(i915); + if (err) + return err; + } + + return 0; +} + +static int live_migrate_clear(void *arg) +{ + struct intel_migrate *migrate = arg; + struct drm_i915_private *i915 = migrate->context->engine->i915; + I915_RND_STATE(prng); + int i; + + for (i = 0; i < ARRAY_SIZE(sizes); i++) { + int err; + + err = migrate_clear(migrate, sizes[i], &prng); + if (err == 0) + err = global_clear(migrate, sizes[i], &prng); + + i915_gem_drain_freed_objects(i915); + if (err) + return err; + } + + return 0; +} + +struct threaded_migrate { + struct intel_migrate *migrate; + struct task_struct *tsk; + struct rnd_state prng; +}; + +static int threaded_migrate(struct intel_migrate *migrate, + int (*fn)(void *arg), + unsigned int flags) +{ + const unsigned int n_cpus = num_online_cpus() + 1; + struct threaded_migrate *thread; + I915_RND_STATE(prng); + unsigned int i; + int err = 0; + + thread = kcalloc(n_cpus, sizeof(*thread), GFP_KERNEL); + if (!thread) + return 0; + + for (i = 0; i < n_cpus; ++i) { + struct task_struct *tsk; + + thread[i].migrate = migrate; + thread[i].prng = + I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng)); + + tsk = kthread_run(fn, &thread[i], "igt-%d", i); + if (IS_ERR(tsk)) { + err = PTR_ERR(tsk); + break; + } + + get_task_struct(tsk); + thread[i].tsk = tsk; + } + + msleep(10); /* start all threads before we kthread_stop() */ + + for (i = 0; i < n_cpus; ++i) { + struct task_struct *tsk = thread[i].tsk; + int status; + + if (IS_ERR_OR_NULL(tsk)) + continue; + + status = kthread_stop(tsk); + if (status && !err) + err = status; + + put_task_struct(tsk); + } + + kfree(thread); + return err; +} + +static int __thread_migrate_copy(void *arg) +{ + struct threaded_migrate *tm = arg; + + return migrate_copy(tm->migrate, 2 * CHUNK_SZ, &tm->prng); +} + +static int thread_migrate_copy(void *arg) +{ + return threaded_migrate(arg, __thread_migrate_copy, 0); +} + +static int __thread_global_copy(void *arg) +{ + struct threaded_migrate *tm = arg; + + return global_copy(tm->migrate, 2 * CHUNK_SZ, &tm->prng); +} + +static int thread_global_copy(void *arg) +{ + return threaded_migrate(arg, __thread_global_copy, 0); +} + +static int __thread_migrate_clear(void *arg) +{ + struct threaded_migrate *tm = arg; + + return migrate_clear(tm->migrate, 2 * CHUNK_SZ, &tm->prng); +} + +static int __thread_global_clear(void *arg) +{ + struct threaded_migrate *tm = arg; + + return global_clear(tm->migrate, 2 * CHUNK_SZ, &tm->prng); +} + +static int thread_migrate_clear(void *arg) +{ + return threaded_migrate(arg, __thread_migrate_clear, 0); +} + +static int thread_global_clear(void *arg) +{ + return threaded_migrate(arg, __thread_global_clear, 0); +} + +int intel_migrate_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_migrate_copy), + SUBTEST(live_migrate_clear), + SUBTEST(thread_migrate_copy), + SUBTEST(thread_migrate_clear), + SUBTEST(thread_global_copy), + SUBTEST(thread_global_clear), + }; + struct intel_gt *gt = &i915->gt; + + if (!gt->migrate.context) + return 0; + + return i915_subtests(tests, >->migrate); +} + +static struct drm_i915_gem_object * +create_init_lmem_internal(struct intel_gt *gt, size_t sz, bool try_lmem) +{ + struct drm_i915_gem_object *obj = NULL; + int err; + + if (try_lmem) + obj = i915_gem_object_create_lmem(gt->i915, sz, 0); + + if (IS_ERR_OR_NULL(obj)) { + obj = i915_gem_object_create_internal(gt->i915, sz); + if (IS_ERR(obj)) + return obj; + } + + i915_gem_object_trylock(obj); + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_unlock(obj); + i915_gem_object_put(obj); + return ERR_PTR(err); + } + + return obj; +} + +static int wrap_ktime_compare(const void *A, const void *B) +{ + const ktime_t *a = A, *b = B; + + return ktime_compare(*a, *b); +} + +static int __perf_clear_blt(struct intel_context *ce, + struct scatterlist *sg, + enum i915_cache_level cache_level, + bool is_lmem, + size_t sz) +{ + ktime_t t[5]; + int pass; + int err = 0; + + for (pass = 0; pass < ARRAY_SIZE(t); pass++) { + struct i915_request *rq; + ktime_t t0, t1; + + t0 = ktime_get(); + + err = intel_context_migrate_clear(ce, NULL, sg, cache_level, + is_lmem, 0, &rq); + if (rq) { + if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0) + err = -EIO; + i915_request_put(rq); + } + if (err) + break; + + t1 = ktime_get(); + t[pass] = ktime_sub(t1, t0); + } + if (err) + return err; + + sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); + pr_info("%s: %zd KiB fill: %lld MiB/s\n", + ce->engine->name, sz >> 10, + div64_u64(mul_u32_u32(4 * sz, + 1000 * 1000 * 1000), + t[1] + 2 * t[2] + t[3]) >> 20); + return 0; +} + +static int perf_clear_blt(void *arg) +{ + struct intel_gt *gt = arg; + static const unsigned long sizes[] = { + SZ_4K, + SZ_64K, + SZ_2M, + SZ_64M + }; + int i; + + for (i = 0; i < ARRAY_SIZE(sizes); i++) { + struct drm_i915_gem_object *dst; + int err; + + dst = create_init_lmem_internal(gt, sizes[i], true); + if (IS_ERR(dst)) + return PTR_ERR(dst); + + err = __perf_clear_blt(gt->migrate.context, + dst->mm.pages->sgl, + I915_CACHE_NONE, + i915_gem_object_is_lmem(dst), + sizes[i]); + + i915_gem_object_unlock(dst); + i915_gem_object_put(dst); + if (err) + return err; + } + + return 0; +} + +static int __perf_copy_blt(struct intel_context *ce, + struct scatterlist *src, + enum i915_cache_level src_cache_level, + bool src_is_lmem, + struct scatterlist *dst, + enum i915_cache_level dst_cache_level, + bool dst_is_lmem, + size_t sz) +{ + ktime_t t[5]; + int pass; + int err = 0; + + for (pass = 0; pass < ARRAY_SIZE(t); pass++) { + struct i915_request *rq; + ktime_t t0, t1; + + t0 = ktime_get(); + + err = intel_context_migrate_copy(ce, NULL, + src, src_cache_level, + src_is_lmem, + dst, dst_cache_level, + dst_is_lmem, + &rq); + if (rq) { + if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0) + err = -EIO; + i915_request_put(rq); + } + if (err) + break; + + t1 = ktime_get(); + t[pass] = ktime_sub(t1, t0); + } + if (err) + return err; + + sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); + pr_info("%s: %zd KiB copy: %lld MiB/s\n", + ce->engine->name, sz >> 10, + div64_u64(mul_u32_u32(4 * sz, + 1000 * 1000 * 1000), + t[1] + 2 * t[2] + t[3]) >> 20); + return 0; +} + +static int perf_copy_blt(void *arg) +{ + struct intel_gt *gt = arg; + static const unsigned long sizes[] = { + SZ_4K, + SZ_64K, + SZ_2M, + SZ_64M + }; + int i; + + for (i = 0; i < ARRAY_SIZE(sizes); i++) { + struct drm_i915_gem_object *src, *dst; + int err; + + src = create_init_lmem_internal(gt, sizes[i], true); + if (IS_ERR(src)) + return PTR_ERR(src); + + dst = create_init_lmem_internal(gt, sizes[i], false); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); + goto err_src; + } + + err = __perf_copy_blt(gt->migrate.context, + src->mm.pages->sgl, + I915_CACHE_NONE, + i915_gem_object_is_lmem(src), + dst->mm.pages->sgl, + I915_CACHE_NONE, + i915_gem_object_is_lmem(dst), + sizes[i]); + + i915_gem_object_unlock(dst); + i915_gem_object_put(dst); +err_src: + i915_gem_object_unlock(src); + i915_gem_object_put(src); + if (err) + return err; + } + + return 0; +} + +int intel_migrate_perf_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(perf_clear_blt), + SUBTEST(perf_copy_blt), + }; + struct intel_gt *gt = &i915->gt; + + if (intel_gt_is_wedged(gt)) + return 0; + + if (!gt->migrate.context) + return 0; + + return intel_gt_live_subtests(tests, gt); +} diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c index b9bb0e6e97f7..13d25bf2a94a 100644 --- a/drivers/gpu/drm/i915/gt/selftest_mocs.c +++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c @@ -10,6 +10,7 @@ #include "gem/selftests/mock_context.h" #include "selftests/igt_reset.h" #include "selftests/igt_spinner.h" +#include "selftests/intel_scheduler_helpers.h" struct live_mocs { struct drm_i915_mocs_table table; @@ -28,7 +29,7 @@ static struct intel_context *mocs_context_create(struct intel_engine_cs *engine) return ce; /* We build large requests to read the registers from the ring */ - ce->ring = __intel_context_ring_size(SZ_16K); + ce->ring_size = SZ_16K; return ce; } @@ -318,7 +319,8 @@ static int live_mocs_clean(void *arg) } static int active_engine_reset(struct intel_context *ce, - const char *reason) + const char *reason, + bool using_guc) { struct igt_spinner spin; struct i915_request *rq; @@ -335,9 +337,13 @@ static int active_engine_reset(struct intel_context *ce, } err = request_add_spin(rq, &spin); - if (err == 0) + if (err == 0 && !using_guc) err = intel_engine_reset(ce->engine, reason); + /* Ensure the reset happens and kills the engine */ + if (err == 0) + err = intel_selftest_wait_for_rq(rq); + igt_spinner_end(&spin); igt_spinner_fini(&spin); @@ -345,21 +351,23 @@ static int active_engine_reset(struct intel_context *ce, } static int __live_mocs_reset(struct live_mocs *mocs, - struct intel_context *ce) + struct intel_context *ce, bool using_guc) { struct intel_gt *gt = ce->engine->gt; int err; if (intel_has_reset_engine(gt)) { - err = intel_engine_reset(ce->engine, "mocs"); - if (err) - return err; - - err = check_mocs_engine(mocs, ce); - if (err) - return err; + if (!using_guc) { + err = intel_engine_reset(ce->engine, "mocs"); + if (err) + return err; + + err = check_mocs_engine(mocs, ce); + if (err) + return err; + } - err = active_engine_reset(ce, "mocs"); + err = active_engine_reset(ce, "mocs", using_guc); if (err) return err; @@ -395,19 +403,33 @@ static int live_mocs_reset(void *arg) igt_global_reset_lock(gt); for_each_engine(engine, gt, id) { + bool using_guc = intel_engine_uses_guc(engine); + struct intel_selftest_saved_policy saved; struct intel_context *ce; + int err2; + + err = intel_selftest_modify_policy(engine, &saved, + SELFTEST_SCHEDULER_MODIFY_FAST_RESET); + if (err) + break; ce = mocs_context_create(engine); if (IS_ERR(ce)) { err = PTR_ERR(ce); - break; + goto restore; } intel_engine_pm_get(engine); - err = __live_mocs_reset(&mocs, ce); - intel_engine_pm_put(engine); + err = __live_mocs_reset(&mocs, ce, using_guc); + + intel_engine_pm_put(engine); intel_context_put(ce); + +restore: + err2 = intel_selftest_restore_policy(engine, &saved); + if (err == 0) + err = err2; if (err) break; } diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index 8784257ec808..7a50c9f4071b 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -321,7 +321,7 @@ static int igt_atomic_engine_reset(void *arg) goto out_unlock; for_each_engine(engine, gt, id) { - struct tasklet_struct *t = &engine->execlists.tasklet; + struct tasklet_struct *t = &engine->sched_engine->tasklet; if (t->func) tasklet_disable(t); diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c b/drivers/gpu/drm/i915/gt/selftest_slpc.c new file mode 100644 index 000000000000..9334bad131a2 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c @@ -0,0 +1,311 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#define NUM_STEPS 5 +#define H2G_DELAY 50000 +#define delay_for_h2g() usleep_range(H2G_DELAY, H2G_DELAY + 10000) +#define FREQUENCY_REQ_UNIT DIV_ROUND_CLOSEST(GT_FREQUENCY_MULTIPLIER, \ + GEN9_FREQ_SCALER) + +static int slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 freq) +{ + int ret; + + ret = intel_guc_slpc_set_min_freq(slpc, freq); + if (ret) + pr_err("Could not set min frequency to [%u]\n", freq); + else /* Delay to ensure h2g completes */ + delay_for_h2g(); + + return ret; +} + +static int slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 freq) +{ + int ret; + + ret = intel_guc_slpc_set_max_freq(slpc, freq); + if (ret) + pr_err("Could not set maximum frequency [%u]\n", + freq); + else /* Delay to ensure h2g completes */ + delay_for_h2g(); + + return ret; +} + +static int live_slpc_clamp_min(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_gt *gt = &i915->gt; + struct intel_guc_slpc *slpc = >->uc.guc.slpc; + struct intel_rps *rps = >->rps; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct igt_spinner spin; + u32 slpc_min_freq, slpc_max_freq; + int err = 0; + + if (!intel_uc_uses_guc_slpc(>->uc)) + return 0; + + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; + + if (intel_guc_slpc_get_max_freq(slpc, &slpc_max_freq)) { + pr_err("Could not get SLPC max freq\n"); + return -EIO; + } + + if (intel_guc_slpc_get_min_freq(slpc, &slpc_min_freq)) { + pr_err("Could not get SLPC min freq\n"); + return -EIO; + } + + if (slpc_min_freq == slpc_max_freq) { + pr_err("Min/Max are fused to the same value\n"); + return -EINVAL; + } + + intel_gt_pm_wait_for_idle(gt); + intel_gt_pm_get(gt); + for_each_engine(engine, gt, id) { + struct i915_request *rq; + u32 step, min_freq, req_freq; + u32 act_freq, max_act_freq; + + if (!intel_engine_can_store_dword(engine)) + continue; + + /* Go from min to max in 5 steps */ + step = (slpc_max_freq - slpc_min_freq) / NUM_STEPS; + max_act_freq = slpc_min_freq; + for (min_freq = slpc_min_freq; min_freq < slpc_max_freq; + min_freq += step) { + err = slpc_set_min_freq(slpc, min_freq); + if (err) + break; + + st_engine_heartbeat_disable(engine); + + rq = igt_spinner_create_request(&spin, + engine->kernel_context, + MI_NOOP); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + st_engine_heartbeat_enable(engine); + break; + } + + i915_request_add(rq); + + if (!igt_wait_for_spinner(&spin, rq)) { + pr_err("%s: Spinner did not start\n", + engine->name); + igt_spinner_end(&spin); + st_engine_heartbeat_enable(engine); + intel_gt_set_wedged(engine->gt); + err = -EIO; + break; + } + + /* Wait for GuC to detect business and raise + * requested frequency if necessary. + */ + delay_for_h2g(); + + req_freq = intel_rps_read_punit_req_frequency(rps); + + /* GuC requests freq in multiples of 50/3 MHz */ + if (req_freq < (min_freq - FREQUENCY_REQ_UNIT)) { + pr_err("SWReq is %d, should be at least %d\n", req_freq, + min_freq - FREQUENCY_REQ_UNIT); + igt_spinner_end(&spin); + st_engine_heartbeat_enable(engine); + err = -EINVAL; + break; + } + + act_freq = intel_rps_read_actual_frequency(rps); + if (act_freq > max_act_freq) + max_act_freq = act_freq; + + igt_spinner_end(&spin); + st_engine_heartbeat_enable(engine); + } + + pr_info("Max actual frequency for %s was %d\n", + engine->name, max_act_freq); + + /* Actual frequency should rise above min */ + if (max_act_freq == slpc_min_freq) { + pr_err("Actual freq did not rise above min\n"); + err = -EINVAL; + } + + if (err) + break; + } + + /* Restore min/max frequencies */ + slpc_set_max_freq(slpc, slpc_max_freq); + slpc_set_min_freq(slpc, slpc_min_freq); + + if (igt_flush_test(gt->i915)) + err = -EIO; + + intel_gt_pm_put(gt); + igt_spinner_fini(&spin); + intel_gt_pm_wait_for_idle(gt); + + return err; +} + +static int live_slpc_clamp_max(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_gt *gt = &i915->gt; + struct intel_guc_slpc *slpc; + struct intel_rps *rps; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct igt_spinner spin; + int err = 0; + u32 slpc_min_freq, slpc_max_freq; + + slpc = >->uc.guc.slpc; + rps = >->rps; + + if (!intel_uc_uses_guc_slpc(>->uc)) + return 0; + + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; + + if (intel_guc_slpc_get_max_freq(slpc, &slpc_max_freq)) { + pr_err("Could not get SLPC max freq\n"); + return -EIO; + } + + if (intel_guc_slpc_get_min_freq(slpc, &slpc_min_freq)) { + pr_err("Could not get SLPC min freq\n"); + return -EIO; + } + + if (slpc_min_freq == slpc_max_freq) { + pr_err("Min/Max are fused to the same value\n"); + return -EINVAL; + } + + intel_gt_pm_wait_for_idle(gt); + intel_gt_pm_get(gt); + for_each_engine(engine, gt, id) { + struct i915_request *rq; + u32 max_freq, req_freq; + u32 act_freq, max_act_freq; + u32 step; + + if (!intel_engine_can_store_dword(engine)) + continue; + + /* Go from max to min in 5 steps */ + step = (slpc_max_freq - slpc_min_freq) / NUM_STEPS; + max_act_freq = slpc_min_freq; + for (max_freq = slpc_max_freq; max_freq > slpc_min_freq; + max_freq -= step) { + err = slpc_set_max_freq(slpc, max_freq); + if (err) + break; + + st_engine_heartbeat_disable(engine); + + rq = igt_spinner_create_request(&spin, + engine->kernel_context, + MI_NOOP); + if (IS_ERR(rq)) { + st_engine_heartbeat_enable(engine); + err = PTR_ERR(rq); + break; + } + + i915_request_add(rq); + + if (!igt_wait_for_spinner(&spin, rq)) { + pr_err("%s: SLPC spinner did not start\n", + engine->name); + igt_spinner_end(&spin); + st_engine_heartbeat_enable(engine); + intel_gt_set_wedged(engine->gt); + err = -EIO; + break; + } + + delay_for_h2g(); + + /* Verify that SWREQ indeed was set to specific value */ + req_freq = intel_rps_read_punit_req_frequency(rps); + + /* GuC requests freq in multiples of 50/3 MHz */ + if (req_freq > (max_freq + FREQUENCY_REQ_UNIT)) { + pr_err("SWReq is %d, should be at most %d\n", req_freq, + max_freq + FREQUENCY_REQ_UNIT); + igt_spinner_end(&spin); + st_engine_heartbeat_enable(engine); + err = -EINVAL; + break; + } + + act_freq = intel_rps_read_actual_frequency(rps); + if (act_freq > max_act_freq) + max_act_freq = act_freq; + + st_engine_heartbeat_enable(engine); + igt_spinner_end(&spin); + + if (err) + break; + } + + pr_info("Max actual frequency for %s was %d\n", + engine->name, max_act_freq); + + /* Actual frequency should rise above min */ + if (max_act_freq == slpc_min_freq) { + pr_err("Actual freq did not rise above min\n"); + err = -EINVAL; + } + + if (igt_flush_test(gt->i915)) { + err = -EIO; + break; + } + + if (err) + break; + } + + /* Restore min/max freq */ + slpc_set_max_freq(slpc, slpc_max_freq); + slpc_set_min_freq(slpc, slpc_min_freq); + + intel_gt_pm_put(gt); + igt_spinner_fini(&spin); + intel_gt_pm_wait_for_idle(gt); + + return err; +} + +int intel_slpc_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_slpc_clamp_max), + SUBTEST(live_slpc_clamp_min), + }; + + if (intel_gt_is_wedged(&i915->gt)) + return 0; + + return i915_live_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c index 64da0c91dec1..d0b6a3afcf44 100644 --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -874,7 +874,7 @@ static int create_watcher(struct hwsp_watcher *w, if (IS_ERR(ce)) return PTR_ERR(ce); - ce->ring = __intel_context_ring_size(ringsz); + ce->ring_size = ringsz; w->rq = intel_context_create_request(ce); intel_context_put(ce); if (IS_ERR(w->rq)) diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index c30754daf4b1..e623ac45f4aa 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -12,6 +12,7 @@ #include "selftests/igt_flush_test.h" #include "selftests/igt_reset.h" #include "selftests/igt_spinner.h" +#include "selftests/intel_scheduler_helpers.h" #include "selftests/mock_drm.h" #include "gem/selftests/igt_gem_utils.h" @@ -261,28 +262,34 @@ static int do_engine_reset(struct intel_engine_cs *engine) return intel_engine_reset(engine, "live_workarounds"); } +static int do_guc_reset(struct intel_engine_cs *engine) +{ + /* Currently a no-op as the reset is handled by GuC */ + return 0; +} + static int switch_to_scratch_context(struct intel_engine_cs *engine, - struct igt_spinner *spin) + struct igt_spinner *spin, + struct i915_request **rq) { struct intel_context *ce; - struct i915_request *rq; int err = 0; ce = intel_context_create(engine); if (IS_ERR(ce)) return PTR_ERR(ce); - rq = igt_spinner_create_request(spin, ce, MI_NOOP); + *rq = igt_spinner_create_request(spin, ce, MI_NOOP); intel_context_put(ce); - if (IS_ERR(rq)) { + if (IS_ERR(*rq)) { spin = NULL; - err = PTR_ERR(rq); + err = PTR_ERR(*rq); goto err; } - err = request_add_spin(rq, spin); + err = request_add_spin(*rq, spin); err: if (err && spin) igt_spinner_end(spin); @@ -296,6 +303,7 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine, { struct intel_context *ce, *tmp; struct igt_spinner spin; + struct i915_request *rq; intel_wakeref_t wakeref; int err; @@ -316,13 +324,24 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine, goto out_spin; } - err = switch_to_scratch_context(engine, &spin); + err = switch_to_scratch_context(engine, &spin, &rq); if (err) goto out_spin; + /* Ensure the spinner hasn't aborted */ + if (i915_request_completed(rq)) { + pr_err("%s spinner failed to start\n", name); + err = -ETIMEDOUT; + goto out_spin; + } + with_intel_runtime_pm(engine->uncore->rpm, wakeref) err = reset(engine); + /* Ensure the reset happens and kills the engine */ + if (err == 0) + err = intel_selftest_wait_for_rq(rq); + igt_spinner_end(&spin); if (err) { @@ -787,9 +806,28 @@ static int live_reset_whitelist(void *arg) continue; if (intel_has_reset_engine(gt)) { - err = check_whitelist_across_reset(engine, - do_engine_reset, - "engine"); + if (intel_engine_uses_guc(engine)) { + struct intel_selftest_saved_policy saved; + int err2; + + err = intel_selftest_modify_policy(engine, &saved, + SELFTEST_SCHEDULER_MODIFY_FAST_RESET); + if (err) + goto out; + + err = check_whitelist_across_reset(engine, + do_guc_reset, + "guc"); + + err2 = intel_selftest_restore_policy(engine, &saved); + if (err == 0) + err = err2; + } else { + err = check_whitelist_across_reset(engine, + do_engine_reset, + "engine"); + } + if (err) goto out; } @@ -1147,7 +1185,7 @@ verify_wa_lists(struct intel_gt *gt, struct wa_lists *lists, enum intel_engine_id id; bool ok = true; - ok &= wa_list_verify(gt->uncore, &lists->gt_wa_list, str); + ok &= wa_list_verify(gt, &lists->gt_wa_list, str); for_each_engine(engine, gt, id) { struct intel_context *ce; @@ -1175,31 +1213,36 @@ live_gpu_reset_workarounds(void *arg) { struct intel_gt *gt = arg; intel_wakeref_t wakeref; - struct wa_lists lists; + struct wa_lists *lists; bool ok; if (!intel_has_gpu_reset(gt)) return 0; + lists = kzalloc(sizeof(*lists), GFP_KERNEL); + if (!lists) + return -ENOMEM; + pr_info("Verifying after GPU reset...\n"); igt_global_reset_lock(gt); wakeref = intel_runtime_pm_get(gt->uncore->rpm); - reference_lists_init(gt, &lists); + reference_lists_init(gt, lists); - ok = verify_wa_lists(gt, &lists, "before reset"); + ok = verify_wa_lists(gt, lists, "before reset"); if (!ok) goto out; intel_gt_reset(gt, ALL_ENGINES, "live_workarounds"); - ok = verify_wa_lists(gt, &lists, "after reset"); + ok = verify_wa_lists(gt, lists, "after reset"); out: - reference_lists_fini(gt, &lists); + reference_lists_fini(gt, lists); intel_runtime_pm_put(gt->uncore->rpm, wakeref); igt_global_reset_unlock(gt); + kfree(lists); return ok ? 0 : -ESRCH; } @@ -1214,43 +1257,57 @@ live_engine_reset_workarounds(void *arg) struct igt_spinner spin; struct i915_request *rq; intel_wakeref_t wakeref; - struct wa_lists lists; + struct wa_lists *lists; int ret = 0; if (!intel_has_reset_engine(gt)) return 0; + lists = kzalloc(sizeof(*lists), GFP_KERNEL); + if (!lists) + return -ENOMEM; + igt_global_reset_lock(gt); wakeref = intel_runtime_pm_get(gt->uncore->rpm); - reference_lists_init(gt, &lists); + reference_lists_init(gt, lists); for_each_engine(engine, gt, id) { + struct intel_selftest_saved_policy saved; + bool using_guc = intel_engine_uses_guc(engine); bool ok; + int ret2; pr_info("Verifying after %s reset...\n", engine->name); + ret = intel_selftest_modify_policy(engine, &saved, + SELFTEST_SCHEDULER_MODIFY_FAST_RESET); + if (ret) + break; + ce = intel_context_create(engine); if (IS_ERR(ce)) { ret = PTR_ERR(ce); - break; + goto restore; } - ok = verify_wa_lists(gt, &lists, "before reset"); - if (!ok) { - ret = -ESRCH; - goto err; - } + if (!using_guc) { + ok = verify_wa_lists(gt, lists, "before reset"); + if (!ok) { + ret = -ESRCH; + goto err; + } - ret = intel_engine_reset(engine, "live_workarounds:idle"); - if (ret) { - pr_err("%s: Reset failed while idle\n", engine->name); - goto err; - } + ret = intel_engine_reset(engine, "live_workarounds:idle"); + if (ret) { + pr_err("%s: Reset failed while idle\n", engine->name); + goto err; + } - ok = verify_wa_lists(gt, &lists, "after idle reset"); - if (!ok) { - ret = -ESRCH; - goto err; + ok = verify_wa_lists(gt, lists, "after idle reset"); + if (!ok) { + ret = -ESRCH; + goto err; + } } ret = igt_spinner_init(&spin, engine->gt); @@ -1271,32 +1328,49 @@ live_engine_reset_workarounds(void *arg) goto err; } - ret = intel_engine_reset(engine, "live_workarounds:active"); - if (ret) { - pr_err("%s: Reset failed on an active spinner\n", - engine->name); - igt_spinner_fini(&spin); - goto err; + /* Ensure the spinner hasn't aborted */ + if (i915_request_completed(rq)) { + ret = -ETIMEDOUT; + goto skip; } + if (!using_guc) { + ret = intel_engine_reset(engine, "live_workarounds:active"); + if (ret) { + pr_err("%s: Reset failed on an active spinner\n", + engine->name); + igt_spinner_fini(&spin); + goto err; + } + } + + /* Ensure the reset happens and kills the engine */ + if (ret == 0) + ret = intel_selftest_wait_for_rq(rq); + +skip: igt_spinner_end(&spin); igt_spinner_fini(&spin); - ok = verify_wa_lists(gt, &lists, "after busy reset"); - if (!ok) { + ok = verify_wa_lists(gt, lists, "after busy reset"); + if (!ok) ret = -ESRCH; - goto err; - } err: intel_context_put(ce); + +restore: + ret2 = intel_selftest_restore_policy(engine, &saved); + if (ret == 0) + ret = ret2; if (ret) break; } - reference_lists_fini(gt, &lists); + reference_lists_fini(gt, lists); intel_runtime_pm_put(gt->uncore->rpm, wakeref); igt_global_reset_unlock(gt); + kfree(lists); igt_flush_test(gt->i915); diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h index 90efef8a73e4..8ff582222aff 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h @@ -6,6 +6,113 @@ #ifndef _ABI_GUC_ACTIONS_ABI_H #define _ABI_GUC_ACTIONS_ABI_H +/** + * DOC: HOST2GUC_REGISTER_CTB + * + * This message is used as part of the `CTB based communication`_ setup. + * + * This message must be sent as `MMIO HXG Message`_. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | DATA0 = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`GUC_ACTION_HOST2GUC_REGISTER_CTB` = 0x4505 | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:12 | RESERVED = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 11:8 | **TYPE** - type for the `CT Buffer`_ | + * | | | | + * | | | - _`GUC_CTB_TYPE_HOST2GUC` = 0 | + * | | | - _`GUC_CTB_TYPE_GUC2HOST` = 1 | + * | +-------+--------------------------------------------------------------+ + * | | 7:0 | **SIZE** - size of the `CT Buffer`_ in 4K units minus 1 | + * +---+-------+--------------------------------------------------------------+ + * | 2 | 31:0 | **DESC_ADDR** - GGTT address of the `CTB Descriptor`_ | + * +---+-------+--------------------------------------------------------------+ + * | 3 | 31:0 | **BUFF_ADDF** - GGTT address of the `CT Buffer`_ | + * +---+-------+--------------------------------------------------------------+ + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | DATA0 = MBZ | + * +---+-------+--------------------------------------------------------------+ + */ +#define GUC_ACTION_HOST2GUC_REGISTER_CTB 0x4505 + +#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_LEN (GUC_HXG_REQUEST_MSG_MIN_LEN + 3u) +#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 +#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_1_MBZ (0xfffff << 12) +#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_1_TYPE (0xf << 8) +#define GUC_CTB_TYPE_HOST2GUC 0u +#define GUC_CTB_TYPE_GUC2HOST 1u +#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_1_SIZE (0xff << 0) +#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_2_DESC_ADDR GUC_HXG_REQUEST_MSG_n_DATAn +#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_3_BUFF_ADDR GUC_HXG_REQUEST_MSG_n_DATAn + +#define HOST2GUC_REGISTER_CTB_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN +#define HOST2GUC_REGISTER_CTB_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0 + +/** + * DOC: HOST2GUC_DEREGISTER_CTB + * + * This message is used as part of the `CTB based communication`_ teardown. + * + * This message must be sent as `MMIO HXG Message`_. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | DATA0 = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`GUC_ACTION_HOST2GUC_DEREGISTER_CTB` = 0x4506 | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:12 | RESERVED = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 11:8 | **TYPE** - type of the `CT Buffer`_ | + * | | | | + * | | | see `GUC_ACTION_HOST2GUC_REGISTER_CTB`_ | + * | +-------+--------------------------------------------------------------+ + * | | 7:0 | RESERVED = MBZ | + * +---+-------+--------------------------------------------------------------+ + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | DATA0 = MBZ | + * +---+-------+--------------------------------------------------------------+ + */ +#define GUC_ACTION_HOST2GUC_DEREGISTER_CTB 0x4506 + +#define HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_LEN (GUC_HXG_REQUEST_MSG_MIN_LEN + 1u) +#define HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 +#define HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_1_MBZ (0xfffff << 12) +#define HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_1_TYPE (0xf << 8) +#define HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_1_MBZ2 (0xff << 0) + +#define HOST2GUC_DEREGISTER_CTB_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN +#define HOST2GUC_DEREGISTER_CTB_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0 + +/* legacy definitions */ + enum intel_guc_action { INTEL_GUC_ACTION_DEFAULT = 0x0, INTEL_GUC_ACTION_REQUEST_PREEMPTION = 0x2, @@ -17,13 +124,33 @@ enum intel_guc_action { INTEL_GUC_ACTION_FORCE_LOG_BUFFER_FLUSH = 0x302, INTEL_GUC_ACTION_ENTER_S_STATE = 0x501, INTEL_GUC_ACTION_EXIT_S_STATE = 0x502, - INTEL_GUC_ACTION_SLPC_REQUEST = 0x3003, + INTEL_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE = 0x506, + INTEL_GUC_ACTION_SCHED_CONTEXT = 0x1000, + INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET = 0x1001, + INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE = 0x1002, + INTEL_GUC_ACTION_SCHED_ENGINE_MODE_SET = 0x1003, + INTEL_GUC_ACTION_SCHED_ENGINE_MODE_DONE = 0x1004, + INTEL_GUC_ACTION_SET_CONTEXT_PRIORITY = 0x1005, + INTEL_GUC_ACTION_SET_CONTEXT_EXECUTION_QUANTUM = 0x1006, + INTEL_GUC_ACTION_SET_CONTEXT_PREEMPTION_TIMEOUT = 0x1007, + INTEL_GUC_ACTION_CONTEXT_RESET_NOTIFICATION = 0x1008, + INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION = 0x1009, + INTEL_GUC_ACTION_SETUP_PC_GUCRC = 0x3004, INTEL_GUC_ACTION_AUTHENTICATE_HUC = 0x4000, + INTEL_GUC_ACTION_REGISTER_CONTEXT = 0x4502, + INTEL_GUC_ACTION_DEREGISTER_CONTEXT = 0x4503, INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505, INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506, + INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600, + INTEL_GUC_ACTION_RESET_CLIENT = 0x5507, INTEL_GUC_ACTION_LIMIT }; +enum intel_guc_rc_options { + INTEL_GUCRC_HOST_CONTROL, + INTEL_GUCRC_FIRMWARE_CONTROL, +}; + enum intel_guc_preempt_options { INTEL_GUC_PREEMPT_OPTION_DROP_WORK_Q = 0x4, INTEL_GUC_PREEMPT_OPTION_DROP_SUBMIT_Q = 0x8, diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h new file mode 100644 index 000000000000..7a8d4bfc5f6a --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h @@ -0,0 +1,235 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _GUC_ACTIONS_SLPC_ABI_H_ +#define _GUC_ACTIONS_SLPC_ABI_H_ + +#include <linux/types.h> +#include "i915_reg.h" + +/** + * DOC: SLPC SHARED DATA STRUCTURE + * + * +----+------+--------------------------------------------------------------+ + * | CL | Bytes| Description | + * +====+======+==============================================================+ + * | 1 | 0-3 | SHARED DATA SIZE | + * | +------+--------------------------------------------------------------+ + * | | 4-7 | GLOBAL STATE | + * | +------+--------------------------------------------------------------+ + * | | 8-11 | DISPLAY DATA ADDRESS | + * | +------+--------------------------------------------------------------+ + * | | 12:63| PADDING | + * +----+------+--------------------------------------------------------------+ + * | | 0:63 | PADDING(PLATFORM INFO) | + * +----+------+--------------------------------------------------------------+ + * | 3 | 0-3 | TASK STATE DATA | + * + +------+--------------------------------------------------------------+ + * | | 4:63 | PADDING | + * +----+------+--------------------------------------------------------------+ + * |4-21|0:1087| OVERRIDE PARAMS AND BIT FIELDS | + * +----+------+--------------------------------------------------------------+ + * | | | PADDING + EXTRA RESERVED PAGE | + * +----+------+--------------------------------------------------------------+ + */ + +/* + * SLPC exposes certain parameters for global configuration by the host. + * These are referred to as override parameters, because in most cases + * the host will not need to modify the default values used by SLPC. + * SLPC remembers the default values which allows the host to easily restore + * them by simply unsetting the override. The host can set or unset override + * parameters during SLPC (re-)initialization using the SLPC Reset event. + * The host can also set or unset override parameters on the fly using the + * Parameter Set and Parameter Unset events + */ + +#define SLPC_MAX_OVERRIDE_PARAMETERS 256 +#define SLPC_OVERRIDE_BITFIELD_SIZE \ + (SLPC_MAX_OVERRIDE_PARAMETERS / 32) + +#define SLPC_PAGE_SIZE_BYTES 4096 +#define SLPC_CACHELINE_SIZE_BYTES 64 +#define SLPC_SHARED_DATA_SIZE_BYTE_HEADER SLPC_CACHELINE_SIZE_BYTES +#define SLPC_SHARED_DATA_SIZE_BYTE_PLATFORM_INFO SLPC_CACHELINE_SIZE_BYTES +#define SLPC_SHARED_DATA_SIZE_BYTE_TASK_STATE SLPC_CACHELINE_SIZE_BYTES +#define SLPC_SHARED_DATA_MODE_DEFN_TABLE_SIZE SLPC_PAGE_SIZE_BYTES +#define SLPC_SHARED_DATA_SIZE_BYTE_MAX (2 * SLPC_PAGE_SIZE_BYTES) + +/* + * Cacheline size aligned (Total size needed for + * SLPM_KMD_MAX_OVERRIDE_PARAMETERS=256 is 1088 bytes) + */ +#define SLPC_OVERRIDE_PARAMS_TOTAL_BYTES (((((SLPC_MAX_OVERRIDE_PARAMETERS * 4) \ + + ((SLPC_MAX_OVERRIDE_PARAMETERS / 32) * 4)) \ + + (SLPC_CACHELINE_SIZE_BYTES - 1)) / SLPC_CACHELINE_SIZE_BYTES) * \ + SLPC_CACHELINE_SIZE_BYTES) + +#define SLPC_SHARED_DATA_SIZE_BYTE_OTHER (SLPC_SHARED_DATA_SIZE_BYTE_MAX - \ + (SLPC_SHARED_DATA_SIZE_BYTE_HEADER \ + + SLPC_SHARED_DATA_SIZE_BYTE_PLATFORM_INFO \ + + SLPC_SHARED_DATA_SIZE_BYTE_TASK_STATE \ + + SLPC_OVERRIDE_PARAMS_TOTAL_BYTES \ + + SLPC_SHARED_DATA_MODE_DEFN_TABLE_SIZE)) + +enum slpc_task_enable { + SLPC_PARAM_TASK_DEFAULT = 0, + SLPC_PARAM_TASK_ENABLED, + SLPC_PARAM_TASK_DISABLED, + SLPC_PARAM_TASK_UNKNOWN +}; + +enum slpc_global_state { + SLPC_GLOBAL_STATE_NOT_RUNNING = 0, + SLPC_GLOBAL_STATE_INITIALIZING = 1, + SLPC_GLOBAL_STATE_RESETTING = 2, + SLPC_GLOBAL_STATE_RUNNING = 3, + SLPC_GLOBAL_STATE_SHUTTING_DOWN = 4, + SLPC_GLOBAL_STATE_ERROR = 5 +}; + +enum slpc_param_id { + SLPC_PARAM_TASK_ENABLE_GTPERF = 0, + SLPC_PARAM_TASK_DISABLE_GTPERF = 1, + SLPC_PARAM_TASK_ENABLE_BALANCER = 2, + SLPC_PARAM_TASK_DISABLE_BALANCER = 3, + SLPC_PARAM_TASK_ENABLE_DCC = 4, + SLPC_PARAM_TASK_DISABLE_DCC = 5, + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ = 6, + SLPC_PARAM_GLOBAL_MAX_GT_UNSLICE_FREQ_MHZ = 7, + SLPC_PARAM_GLOBAL_MIN_GT_SLICE_FREQ_MHZ = 8, + SLPC_PARAM_GLOBAL_MAX_GT_SLICE_FREQ_MHZ = 9, + SLPC_PARAM_GTPERF_THRESHOLD_MAX_FPS = 10, + SLPC_PARAM_GLOBAL_DISABLE_GT_FREQ_MANAGEMENT = 11, + SLPC_PARAM_GTPERF_ENABLE_FRAMERATE_STALLING = 12, + SLPC_PARAM_GLOBAL_DISABLE_RC6_MODE_CHANGE = 13, + SLPC_PARAM_GLOBAL_OC_UNSLICE_FREQ_MHZ = 14, + SLPC_PARAM_GLOBAL_OC_SLICE_FREQ_MHZ = 15, + SLPC_PARAM_GLOBAL_ENABLE_IA_GT_BALANCING = 16, + SLPC_PARAM_GLOBAL_ENABLE_ADAPTIVE_BURST_TURBO = 17, + SLPC_PARAM_GLOBAL_ENABLE_EVAL_MODE = 18, + SLPC_PARAM_GLOBAL_ENABLE_BALANCER_IN_NON_GAMING_MODE = 19, + SLPC_PARAM_GLOBAL_RT_MODE_TURBO_FREQ_DELTA_MHZ = 20, + SLPC_PARAM_PWRGATE_RC_MODE = 21, + SLPC_PARAM_EDR_MODE_COMPUTE_TIMEOUT_MS = 22, + SLPC_PARAM_EDR_QOS_FREQ_MHZ = 23, + SLPC_PARAM_MEDIA_FF_RATIO_MODE = 24, + SLPC_PARAM_ENABLE_IA_FREQ_LIMITING = 25, + SLPC_PARAM_STRATEGIES = 26, + SLPC_PARAM_POWER_PROFILE = 27, + SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY = 28, + SLPC_MAX_PARAM = 32, +}; + +enum slpc_event_id { + SLPC_EVENT_RESET = 0, + SLPC_EVENT_SHUTDOWN = 1, + SLPC_EVENT_PLATFORM_INFO_CHANGE = 2, + SLPC_EVENT_DISPLAY_MODE_CHANGE = 3, + SLPC_EVENT_FLIP_COMPLETE = 4, + SLPC_EVENT_QUERY_TASK_STATE = 5, + SLPC_EVENT_PARAMETER_SET = 6, + SLPC_EVENT_PARAMETER_UNSET = 7, +}; + +struct slpc_task_state_data { + union { + u32 task_status_padding; + struct { + u32 status; +#define SLPC_GTPERF_TASK_ENABLED REG_BIT(0) +#define SLPC_DCC_TASK_ENABLED REG_BIT(11) +#define SLPC_IN_DCC REG_BIT(12) +#define SLPC_BALANCER_ENABLED REG_BIT(15) +#define SLPC_IBC_TASK_ENABLED REG_BIT(16) +#define SLPC_BALANCER_IA_LMT_ENABLED REG_BIT(17) +#define SLPC_BALANCER_IA_LMT_ACTIVE REG_BIT(18) + }; + }; + union { + u32 freq_padding; + struct { +#define SLPC_MAX_UNSLICE_FREQ_MASK REG_GENMASK(7, 0) +#define SLPC_MIN_UNSLICE_FREQ_MASK REG_GENMASK(15, 8) +#define SLPC_MAX_SLICE_FREQ_MASK REG_GENMASK(23, 16) +#define SLPC_MIN_SLICE_FREQ_MASK REG_GENMASK(31, 24) + u32 freq; + }; + }; +} __packed; + +struct slpc_shared_data_header { + /* Total size in bytes of this shared buffer. */ + u32 size; + u32 global_state; + u32 display_data_addr; +} __packed; + +struct slpc_override_params { + u32 bits[SLPC_OVERRIDE_BITFIELD_SIZE]; + u32 values[SLPC_MAX_OVERRIDE_PARAMETERS]; +} __packed; + +struct slpc_shared_data { + struct slpc_shared_data_header header; + u8 shared_data_header_pad[SLPC_SHARED_DATA_SIZE_BYTE_HEADER - + sizeof(struct slpc_shared_data_header)]; + + u8 platform_info_pad[SLPC_SHARED_DATA_SIZE_BYTE_PLATFORM_INFO]; + + struct slpc_task_state_data task_state_data; + u8 task_state_data_pad[SLPC_SHARED_DATA_SIZE_BYTE_TASK_STATE - + sizeof(struct slpc_task_state_data)]; + + struct slpc_override_params override_params; + u8 override_params_pad[SLPC_OVERRIDE_PARAMS_TOTAL_BYTES - + sizeof(struct slpc_override_params)]; + + u8 shared_data_pad[SLPC_SHARED_DATA_SIZE_BYTE_OTHER]; + + /* PAGE 2 (4096 bytes), mode based parameter will be removed soon */ + u8 reserved_mode_definition[4096]; +} __packed; + +/** + * DOC: SLPC H2G MESSAGE FORMAT + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | DATA0 = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`GUC_ACTION_HOST2GUC_PC_SLPM_REQUEST` = 0x3003 | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:8 | **EVENT_ID** | + * + +-------+--------------------------------------------------------------+ + * | | 7:0 | **EVENT_ARGC** - number of data arguments | + * +---+-------+--------------------------------------------------------------+ + * | 2 | 31:0 | **EVENT_DATA1** | + * +---+-------+--------------------------------------------------------------+ + * |...| 31:0 | ... | + * +---+-------+--------------------------------------------------------------+ + * |2+n| 31:0 | **EVENT_DATAn** | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST 0x3003 + +#define HOST2GUC_PC_SLPC_REQUEST_MSG_MIN_LEN \ + (GUC_HXG_REQUEST_MSG_MIN_LEN + 1u) +#define HOST2GUC_PC_SLPC_EVENT_MAX_INPUT_ARGS 9 +#define HOST2GUC_PC_SLPC_REQUEST_MSG_MAX_LEN \ + (HOST2GUC_PC_SLPC_REQUEST_REQUEST_MSG_MIN_LEN + \ + HOST2GUC_PC_SLPC_EVENT_MAX_INPUT_ARGS) +#define HOST2GUC_PC_SLPC_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 +#define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ID (0xff << 8) +#define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC (0xff << 0) +#define HOST2GUC_PC_SLPC_REQUEST_MSG_N_EVENT_DATA_N GUC_HXG_REQUEST_MSG_n_DATAn + +#endif diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h index d38935f47ecf..99e1fad5ca20 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h @@ -7,6 +7,111 @@ #define _ABI_GUC_COMMUNICATION_CTB_ABI_H #include <linux/types.h> +#include <linux/build_bug.h> + +#include "guc_messages_abi.h" + +/** + * DOC: CT Buffer + * + * Circular buffer used to send `CTB Message`_ + */ + +/** + * DOC: CTB Descriptor + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31:0 | **HEAD** - offset (in dwords) to the last dword that was | + * | | | read from the `CT Buffer`_. | + * | | | It can only be updated by the receiver. | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | **TAIL** - offset (in dwords) to the last dword that was | + * | | | written to the `CT Buffer`_. | + * | | | It can only be updated by the sender. | + * +---+-------+--------------------------------------------------------------+ + * | 2 | 31:0 | **STATUS** - status of the CTB | + * | | | | + * | | | - _`GUC_CTB_STATUS_NO_ERROR` = 0 (normal operation) | + * | | | - _`GUC_CTB_STATUS_OVERFLOW` = 1 (head/tail too large) | + * | | | - _`GUC_CTB_STATUS_UNDERFLOW` = 2 (truncated message) | + * | | | - _`GUC_CTB_STATUS_MISMATCH` = 4 (head/tail modified) | + * +---+-------+--------------------------------------------------------------+ + * |...| | RESERVED = MBZ | + * +---+-------+--------------------------------------------------------------+ + * | 15| 31:0 | RESERVED = MBZ | + * +---+-------+--------------------------------------------------------------+ + */ + +struct guc_ct_buffer_desc { + u32 head; + u32 tail; + u32 status; +#define GUC_CTB_STATUS_NO_ERROR 0 +#define GUC_CTB_STATUS_OVERFLOW (1 << 0) +#define GUC_CTB_STATUS_UNDERFLOW (1 << 1) +#define GUC_CTB_STATUS_MISMATCH (1 << 2) + u32 reserved[13]; +} __packed; +static_assert(sizeof(struct guc_ct_buffer_desc) == 64); + +/** + * DOC: CTB Message + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31:16 | **FENCE** - message identifier | + * | +-------+--------------------------------------------------------------+ + * | | 15:12 | **FORMAT** - format of the CTB message | + * | | | - _`GUC_CTB_FORMAT_HXG` = 0 - see `CTB HXG Message`_ | + * | +-------+--------------------------------------------------------------+ + * | | 11:8 | **RESERVED** | + * | +-------+--------------------------------------------------------------+ + * | | 7:0 | **NUM_DWORDS** - length of the CTB message (w/o header) | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | optional (depends on FORMAT) | + * +---+-------+ | + * |...| | | + * +---+-------+ | + * | n | 31:0 | | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_CTB_HDR_LEN 1u +#define GUC_CTB_MSG_MIN_LEN GUC_CTB_HDR_LEN +#define GUC_CTB_MSG_MAX_LEN 256u +#define GUC_CTB_MSG_0_FENCE (0xffff << 16) +#define GUC_CTB_MSG_0_FORMAT (0xf << 12) +#define GUC_CTB_FORMAT_HXG 0u +#define GUC_CTB_MSG_0_RESERVED (0xf << 8) +#define GUC_CTB_MSG_0_NUM_DWORDS (0xff << 0) + +/** + * DOC: CTB HXG Message + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31:16 | FENCE | + * | +-------+--------------------------------------------------------------+ + * | | 15:12 | FORMAT = GUC_CTB_FORMAT_HXG_ | + * | +-------+--------------------------------------------------------------+ + * | | 11:8 | RESERVED = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 7:0 | NUM_DWORDS = length (in dwords) of the embedded HXG message | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | +--------------------------------------------------------+ | + * +---+-------+ | | | + * |...| | | Embedded `HXG Message`_ | | + * +---+-------+ | | | + * | n | 31:0 | +--------------------------------------------------------+ | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_CTB_HXG_MSG_MIN_LEN (GUC_CTB_MSG_MIN_LEN + GUC_HXG_MSG_MIN_LEN) +#define GUC_CTB_HXG_MSG_MAX_LEN GUC_CTB_MSG_MAX_LEN /** * DOC: CTB based communication @@ -61,28 +166,6 @@ */ /* - * Describes single command transport buffer. - * Used by both guc-master and clients. - */ -struct guc_ct_buffer_desc { - u32 addr; /* gfx address */ - u64 host_private; /* host private data */ - u32 size; /* size in bytes */ - u32 head; /* offset updated by GuC*/ - u32 tail; /* offset updated by owner */ - u32 is_in_error; /* error indicator */ - u32 reserved1; - u32 reserved2; - u32 owner; /* id of the channel owner */ - u32 owner_sub_id; /* owner-defined field for extra tracking */ - u32 reserved[5]; -} __packed; - -/* Type of command transport buffer */ -#define INTEL_GUC_CT_BUFFER_TYPE_SEND 0x0u -#define INTEL_GUC_CT_BUFFER_TYPE_RECV 0x1u - -/* * Definition of the command transport message header (DW0) * * bit[4..0] message len (in dwords) diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_mmio_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_mmio_abi.h index be066a62e9e0..bbf1ddb77434 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_mmio_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_mmio_abi.h @@ -7,46 +7,43 @@ #define _ABI_GUC_COMMUNICATION_MMIO_ABI_H /** - * DOC: MMIO based communication + * DOC: GuC MMIO based communication * - * The MMIO based communication between Host and GuC uses software scratch - * registers, where first register holds data treated as message header, - * and other registers are used to hold message payload. + * The MMIO based communication between Host and GuC relies on special + * hardware registers which format could be defined by the software + * (so called scratch registers). * - * For Gen9+, GuC uses software scratch registers 0xC180-0xC1B8, - * but no H2G command takes more than 8 parameters and the GuC FW - * itself uses an 8-element array to store the H2G message. + * Each MMIO based message, both Host to GuC (H2G) and GuC to Host (G2H) + * messages, which maximum length depends on number of available scratch + * registers, is directly written into those scratch registers. * - * +-----------+---------+---------+---------+ - * | MMIO[0] | MMIO[1] | ... | MMIO[n] | - * +-----------+---------+---------+---------+ - * | header | optional payload | - * +======+====+=========+=========+=========+ - * | 31:28|type| | | | - * +------+----+ | | | - * | 27:16|data| | | | - * +------+----+ | | | - * | 15:0|code| | | | - * +------+----+---------+---------+---------+ + * For Gen9+, there are 16 software scratch registers 0xC180-0xC1B8, + * but no H2G command takes more than 4 parameters and the GuC firmware + * itself uses an 4-element array to store the H2G message. * - * The message header consists of: + * For Gen11+, there are additional 4 registers 0x190240-0x19024C, which + * are, regardless on lower count, preferred over legacy ones. * - * - **type**, indicates message type - * - **code**, indicates message code, is specific for **type** - * - **data**, indicates message data, optional, depends on **code** - * - * The following message **types** are supported: - * - * - **REQUEST**, indicates Host-to-GuC request, requested GuC action code - * must be priovided in **code** field. Optional action specific parameters - * can be provided in remaining payload registers or **data** field. - * - * - **RESPONSE**, indicates GuC-to-Host response from earlier GuC request, - * action response status will be provided in **code** field. Optional - * response data can be returned in remaining payload registers or **data** - * field. + * The MMIO based communication is mainly used during driver initialization + * phase to setup the `CTB based communication`_ that will be used afterwards. */ -#define GUC_MAX_MMIO_MSG_LEN 8 +#define GUC_MAX_MMIO_MSG_LEN 4 + +/** + * DOC: MMIO HXG Message + * + * Format of the MMIO messages follows definitions of `HXG Message`_. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31:0 | +--------------------------------------------------------+ | + * +---+-------+ | | | + * |...| | | Embedded `HXG Message`_ | | + * +---+-------+ | | | + * | n | 31:0 | +--------------------------------------------------------+ | + * +---+-------+--------------------------------------------------------------+ + */ #endif /* _ABI_GUC_COMMUNICATION_MMIO_ABI_H */ diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h index 775e21f3058c..29ac823acd4c 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h @@ -6,6 +6,219 @@ #ifndef _ABI_GUC_MESSAGES_ABI_H #define _ABI_GUC_MESSAGES_ABI_H +/** + * DOC: HXG Message + * + * All messages exchanged with GuC are defined using 32 bit dwords. + * First dword is treated as a message header. Remaining dwords are optional. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | | | | + * | 0 | 31 | **ORIGIN** - originator of the message | + * | | | - _`GUC_HXG_ORIGIN_HOST` = 0 | + * | | | - _`GUC_HXG_ORIGIN_GUC` = 1 | + * | | | | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | **TYPE** - message type | + * | | | - _`GUC_HXG_TYPE_REQUEST` = 0 | + * | | | - _`GUC_HXG_TYPE_EVENT` = 1 | + * | | | - _`GUC_HXG_TYPE_NO_RESPONSE_BUSY` = 3 | + * | | | - _`GUC_HXG_TYPE_NO_RESPONSE_RETRY` = 5 | + * | | | - _`GUC_HXG_TYPE_RESPONSE_FAILURE` = 6 | + * | | | - _`GUC_HXG_TYPE_RESPONSE_SUCCESS` = 7 | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | **AUX** - auxiliary data (depends on TYPE) | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | | + * +---+-------+ | + * |...| | **PAYLOAD** - optional payload (depends on TYPE) | + * +---+-------+ | + * | n | 31:0 | | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_HXG_MSG_MIN_LEN 1u +#define GUC_HXG_MSG_0_ORIGIN (0x1 << 31) +#define GUC_HXG_ORIGIN_HOST 0u +#define GUC_HXG_ORIGIN_GUC 1u +#define GUC_HXG_MSG_0_TYPE (0x7 << 28) +#define GUC_HXG_TYPE_REQUEST 0u +#define GUC_HXG_TYPE_EVENT 1u +#define GUC_HXG_TYPE_NO_RESPONSE_BUSY 3u +#define GUC_HXG_TYPE_NO_RESPONSE_RETRY 5u +#define GUC_HXG_TYPE_RESPONSE_FAILURE 6u +#define GUC_HXG_TYPE_RESPONSE_SUCCESS 7u +#define GUC_HXG_MSG_0_AUX (0xfffffff << 0) +#define GUC_HXG_MSG_n_PAYLOAD (0xffffffff << 0) + +/** + * DOC: HXG Request + * + * The `HXG Request`_ message should be used to initiate synchronous activity + * for which confirmation or return data is expected. + * + * The recipient of this message shall use `HXG Response`_, `HXG Failure`_ + * or `HXG Retry`_ message as a definite reply, and may use `HXG Busy`_ + * message as a intermediate reply. + * + * Format of @DATA0 and all @DATAn fields depends on the @ACTION code. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | **DATA0** - request data (depends on ACTION) | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | **ACTION** - requested action code | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | | + * +---+-------+ | + * |...| | **DATAn** - optional data (depends on ACTION) | + * +---+-------+ | + * | n | 31:0 | | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_HXG_REQUEST_MSG_MIN_LEN GUC_HXG_MSG_MIN_LEN +#define GUC_HXG_REQUEST_MSG_0_DATA0 (0xfff << 16) +#define GUC_HXG_REQUEST_MSG_0_ACTION (0xffff << 0) +#define GUC_HXG_REQUEST_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD + +/** + * DOC: HXG Event + * + * The `HXG Event`_ message should be used to initiate asynchronous activity + * that does not involves immediate confirmation nor data. + * + * Format of @DATA0 and all @DATAn fields depends on the @ACTION code. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_EVENT_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | **DATA0** - event data (depends on ACTION) | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | **ACTION** - event action code | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | | + * +---+-------+ | + * |...| | **DATAn** - optional event data (depends on ACTION) | + * +---+-------+ | + * | n | 31:0 | | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_HXG_EVENT_MSG_MIN_LEN GUC_HXG_MSG_MIN_LEN +#define GUC_HXG_EVENT_MSG_0_DATA0 (0xfff << 16) +#define GUC_HXG_EVENT_MSG_0_ACTION (0xffff << 0) +#define GUC_HXG_EVENT_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD + +/** + * DOC: HXG Busy + * + * The `HXG Busy`_ message may be used to acknowledge reception of the `HXG Request`_ + * message if the recipient expects that it processing will be longer than default + * timeout. + * + * The @COUNTER field may be used as a progress indicator. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_NO_RESPONSE_BUSY_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | **COUNTER** - progress indicator | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_HXG_BUSY_MSG_LEN GUC_HXG_MSG_MIN_LEN +#define GUC_HXG_BUSY_MSG_0_COUNTER GUC_HXG_MSG_0_AUX + +/** + * DOC: HXG Retry + * + * The `HXG Retry`_ message should be used by recipient to indicate that the + * `HXG Request`_ message was dropped and it should be resent again. + * + * The @REASON field may be used to provide additional information. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_NO_RESPONSE_RETRY_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | **REASON** - reason for retry | + * | | | - _`GUC_HXG_RETRY_REASON_UNSPECIFIED` = 0 | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_HXG_RETRY_MSG_LEN GUC_HXG_MSG_MIN_LEN +#define GUC_HXG_RETRY_MSG_0_REASON GUC_HXG_MSG_0_AUX +#define GUC_HXG_RETRY_REASON_UNSPECIFIED 0u + +/** + * DOC: HXG Failure + * + * The `HXG Failure`_ message shall be used as a reply to the `HXG Request`_ + * message that could not be processed due to an error. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_FAILURE_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | **HINT** - additional error hint | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | **ERROR** - error/result code | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_HXG_FAILURE_MSG_LEN GUC_HXG_MSG_MIN_LEN +#define GUC_HXG_FAILURE_MSG_0_HINT (0xfff << 16) +#define GUC_HXG_FAILURE_MSG_0_ERROR (0xffff << 0) + +/** + * DOC: HXG Response + * + * The `HXG Response`_ message shall be used as a reply to the `HXG Request`_ + * message that was successfully processed without an error. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | **DATA0** - data (depends on ACTION from `HXG Request`_) | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | | + * +---+-------+ | + * |...| | **DATAn** - data (depends on ACTION from `HXG Request`_) | + * +---+-------+ | + * | n | 31:0 | | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_HXG_RESPONSE_MSG_MIN_LEN GUC_HXG_MSG_MIN_LEN +#define GUC_HXG_RESPONSE_MSG_0_DATA0 GUC_HXG_MSG_0_AUX +#define GUC_HXG_RESPONSE_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD + +/* deprecated */ #define INTEL_GUC_MSG_TYPE_SHIFT 28 #define INTEL_GUC_MSG_TYPE_MASK (0xF << INTEL_GUC_MSG_TYPE_SHIFT) #define INTEL_GUC_MSG_DATA_SHIFT 16 diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index f147cb389a20..fbfcae727d7f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -7,6 +7,7 @@ #include "gt/intel_gt_irq.h" #include "gt/intel_gt_pm_irq.h" #include "intel_guc.h" +#include "intel_guc_slpc.h" #include "intel_guc_ads.h" #include "intel_guc_submission.h" #include "i915_drv.h" @@ -157,6 +158,8 @@ void intel_guc_init_early(struct intel_guc *guc) intel_guc_ct_init_early(&guc->ct); intel_guc_log_init_early(&guc->log); intel_guc_submission_init_early(guc); + intel_guc_slpc_init_early(&guc->slpc); + intel_guc_rc_init_early(guc); mutex_init(&guc->send_mutex); spin_lock_init(&guc->irq_lock); @@ -180,6 +183,11 @@ void intel_guc_init_early(struct intel_guc *guc) } } +void intel_guc_init_late(struct intel_guc *guc) +{ + intel_guc_ads_init_late(guc); +} + static u32 guc_ctl_debug_flags(struct intel_guc *guc) { u32 level = intel_guc_log_get_level(&guc->log); @@ -201,6 +209,9 @@ static u32 guc_ctl_feature_flags(struct intel_guc *guc) if (!intel_guc_submission_is_used(guc)) flags |= GUC_CTL_DISABLE_SCHEDULER; + if (intel_guc_slpc_is_used(guc)) + flags |= GUC_CTL_ENABLE_SLPC; + return flags; } @@ -219,24 +230,19 @@ static u32 guc_ctl_log_params_flags(struct intel_guc *guc) BUILD_BUG_ON(!CRASH_BUFFER_SIZE); BUILD_BUG_ON(!IS_ALIGNED(CRASH_BUFFER_SIZE, UNIT)); - BUILD_BUG_ON(!DPC_BUFFER_SIZE); - BUILD_BUG_ON(!IS_ALIGNED(DPC_BUFFER_SIZE, UNIT)); - BUILD_BUG_ON(!ISR_BUFFER_SIZE); - BUILD_BUG_ON(!IS_ALIGNED(ISR_BUFFER_SIZE, UNIT)); + BUILD_BUG_ON(!DEBUG_BUFFER_SIZE); + BUILD_BUG_ON(!IS_ALIGNED(DEBUG_BUFFER_SIZE, UNIT)); BUILD_BUG_ON((CRASH_BUFFER_SIZE / UNIT - 1) > (GUC_LOG_CRASH_MASK >> GUC_LOG_CRASH_SHIFT)); - BUILD_BUG_ON((DPC_BUFFER_SIZE / UNIT - 1) > - (GUC_LOG_DPC_MASK >> GUC_LOG_DPC_SHIFT)); - BUILD_BUG_ON((ISR_BUFFER_SIZE / UNIT - 1) > - (GUC_LOG_ISR_MASK >> GUC_LOG_ISR_SHIFT)); + BUILD_BUG_ON((DEBUG_BUFFER_SIZE / UNIT - 1) > + (GUC_LOG_DEBUG_MASK >> GUC_LOG_DEBUG_SHIFT)); flags = GUC_LOG_VALID | GUC_LOG_NOTIFY_ON_HALF_FULL | FLAG | ((CRASH_BUFFER_SIZE / UNIT - 1) << GUC_LOG_CRASH_SHIFT) | - ((DPC_BUFFER_SIZE / UNIT - 1) << GUC_LOG_DPC_SHIFT) | - ((ISR_BUFFER_SIZE / UNIT - 1) << GUC_LOG_ISR_SHIFT) | + ((DEBUG_BUFFER_SIZE / UNIT - 1) << GUC_LOG_DEBUG_SHIFT) | (offset << GUC_LOG_BUF_ADDR_SHIFT); #undef UNIT @@ -331,6 +337,12 @@ int intel_guc_init(struct intel_guc *guc) goto err_ct; } + if (intel_guc_slpc_is_used(guc)) { + ret = intel_guc_slpc_init(&guc->slpc); + if (ret) + goto err_submission; + } + /* now that everything is perma-pinned, initialize the parameters */ guc_init_params(guc); @@ -341,6 +353,8 @@ int intel_guc_init(struct intel_guc *guc) return 0; +err_submission: + intel_guc_submission_fini(guc); err_ct: intel_guc_ct_fini(&guc->ct); err_ads: @@ -363,6 +377,9 @@ void intel_guc_fini(struct intel_guc *guc) i915_ggtt_disable_guc(gt->ggtt); + if (intel_guc_slpc_is_used(guc)) + intel_guc_slpc_fini(&guc->slpc); + if (intel_guc_submission_is_used(guc)) intel_guc_submission_fini(guc); @@ -376,29 +393,27 @@ void intel_guc_fini(struct intel_guc *guc) /* * This function implements the MMIO based host to GuC interface. */ -int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len, +int intel_guc_send_mmio(struct intel_guc *guc, const u32 *request, u32 len, u32 *response_buf, u32 response_buf_size) { + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; struct intel_uncore *uncore = guc_to_gt(guc)->uncore; - u32 status; + u32 header; int i; int ret; GEM_BUG_ON(!len); GEM_BUG_ON(len > guc->send_regs.count); - /* We expect only action code */ - GEM_BUG_ON(*action & ~INTEL_GUC_MSG_CODE_MASK); - - /* If CT is available, we expect to use MMIO only during init/fini */ - GEM_BUG_ON(*action != INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER && - *action != INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER); + GEM_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, request[0]) != GUC_HXG_ORIGIN_HOST); + GEM_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_TYPE, request[0]) != GUC_HXG_TYPE_REQUEST); mutex_lock(&guc->send_mutex); intel_uncore_forcewake_get(uncore, guc->send_regs.fw_domains); +retry: for (i = 0; i < len; i++) - intel_uncore_write(uncore, guc_send_reg(guc, i), action[i]); + intel_uncore_write(uncore, guc_send_reg(guc, i), request[i]); intel_uncore_posting_read(uncore, guc_send_reg(guc, i - 1)); @@ -410,30 +425,74 @@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len, */ ret = __intel_wait_for_register_fw(uncore, guc_send_reg(guc, 0), - INTEL_GUC_MSG_TYPE_MASK, - INTEL_GUC_MSG_TYPE_RESPONSE << - INTEL_GUC_MSG_TYPE_SHIFT, - 10, 10, &status); - /* If GuC explicitly returned an error, convert it to -EIO */ - if (!ret && !INTEL_GUC_MSG_IS_RESPONSE_SUCCESS(status)) - ret = -EIO; + GUC_HXG_MSG_0_ORIGIN, + FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, + GUC_HXG_ORIGIN_GUC), + 10, 10, &header); + if (unlikely(ret)) { +timeout: + drm_err(&i915->drm, "mmio request %#x: no reply %x\n", + request[0], header); + goto out; + } - if (ret) { - DRM_ERROR("MMIO: GuC action %#x failed with error %d %#x\n", - action[0], ret, status); + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) == GUC_HXG_TYPE_NO_RESPONSE_BUSY) { +#define done ({ header = intel_uncore_read(uncore, guc_send_reg(guc, 0)); \ + FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) != GUC_HXG_ORIGIN_GUC || \ + FIELD_GET(GUC_HXG_MSG_0_TYPE, header) != GUC_HXG_TYPE_NO_RESPONSE_BUSY; }) + + ret = wait_for(done, 1000); + if (unlikely(ret)) + goto timeout; + if (unlikely(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) != + GUC_HXG_ORIGIN_GUC)) + goto proto; +#undef done + } + + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) == GUC_HXG_TYPE_NO_RESPONSE_RETRY) { + u32 reason = FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, header); + + drm_dbg(&i915->drm, "mmio request %#x: retrying, reason %u\n", + request[0], reason); + goto retry; + } + + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) == GUC_HXG_TYPE_RESPONSE_FAILURE) { + u32 hint = FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, header); + u32 error = FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, header); + + drm_err(&i915->drm, "mmio request %#x: failure %x/%u\n", + request[0], error, hint); + ret = -ENXIO; + goto out; + } + + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) != GUC_HXG_TYPE_RESPONSE_SUCCESS) { +proto: + drm_err(&i915->drm, "mmio request %#x: unexpected reply %#x\n", + request[0], header); + ret = -EPROTO; goto out; } if (response_buf) { - int count = min(response_buf_size, guc->send_regs.count - 1); + int count = min(response_buf_size, guc->send_regs.count); + + GEM_BUG_ON(!count); - for (i = 0; i < count; i++) + response_buf[0] = header; + + for (i = 1; i < count; i++) response_buf[i] = intel_uncore_read(uncore, - guc_send_reg(guc, i + 1)); - } + guc_send_reg(guc, i)); - /* Use data from the GuC response as our return value */ - ret = INTEL_GUC_MSG_TO_DATA(status); + /* Use number of copied dwords as our return value */ + ret = count; + } else { + /* Use data from the GuC response as our return value */ + ret = FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, header); + } out: intel_uncore_forcewake_put(uncore, guc->send_regs.fw_domains); @@ -487,65 +546,35 @@ int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset) */ int intel_guc_suspend(struct intel_guc *guc) { - struct intel_uncore *uncore = guc_to_gt(guc)->uncore; int ret; - u32 status; u32 action[] = { - INTEL_GUC_ACTION_ENTER_S_STATE, - GUC_POWER_D1, /* any value greater than GUC_POWER_D0 */ + INTEL_GUC_ACTION_RESET_CLIENT, }; - /* - * If GuC communication is enabled but submission is not supported, - * we do not need to suspend the GuC. - */ - if (!intel_guc_submission_is_used(guc) || !intel_guc_is_ready(guc)) + if (!intel_guc_is_ready(guc)) return 0; - /* - * The ENTER_S_STATE action queues the save/restore operation in GuC FW - * and then returns, so waiting on the H2G is not enough to guarantee - * GuC is done. When all the processing is done, GuC writes - * INTEL_GUC_SLEEP_STATE_SUCCESS to scratch register 14, so we can poll - * on that. Note that GuC does not ensure that the value in the register - * is different from INTEL_GUC_SLEEP_STATE_SUCCESS while the action is - * in progress so we need to take care of that ourselves as well. - */ - - intel_uncore_write(uncore, SOFT_SCRATCH(14), - INTEL_GUC_SLEEP_STATE_INVALID_MASK); - - ret = intel_guc_send(guc, action, ARRAY_SIZE(action)); - if (ret) - return ret; - - ret = __intel_wait_for_register(uncore, SOFT_SCRATCH(14), - INTEL_GUC_SLEEP_STATE_INVALID_MASK, - 0, 0, 10, &status); - if (ret) - return ret; - - if (status != INTEL_GUC_SLEEP_STATE_SUCCESS) { - DRM_ERROR("GuC failed to change sleep state. " - "action=0x%x, err=%u\n", - action[0], status); - return -EIO; + if (intel_guc_submission_is_used(guc)) { + /* + * This H2G MMIO command tears down the GuC in two steps. First it will + * generate a G2H CTB for every active context indicating a reset. In + * practice the i915 shouldn't ever get a G2H as suspend should only be + * called when the GPU is idle. Next, it tears down the CTBs and this + * H2G MMIO command completes. + * + * Don't abort on a failure code from the GuC. Keep going and do the + * clean up in santize() and re-initialisation on resume and hopefully + * the error here won't be problematic. + */ + ret = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0); + if (ret) + DRM_ERROR("GuC suspend: RESET_CLIENT action failed with error %d!\n", ret); } - return 0; -} - -/** - * intel_guc_reset_engine() - ask GuC to reset an engine - * @guc: intel_guc structure - * @engine: engine to be reset - */ -int intel_guc_reset_engine(struct intel_guc *guc, - struct intel_engine_cs *engine) -{ - /* XXX: to be implemented with submission interface rework */ + /* Signal that the GuC isn't running. */ + intel_guc_sanitize(guc); - return -ENODEV; + return 0; } /** @@ -554,7 +583,12 @@ int intel_guc_reset_engine(struct intel_guc *guc, */ int intel_guc_resume(struct intel_guc *guc) { - /* XXX: to be implemented with submission interface rework */ + /* + * NB: This function can still be called even if GuC submission is + * disabled, e.g. if GuC is enabled for HuC authentication only. Thus, + * if any code is later added here, it must be support doing nothing + * if submission is disabled (as per intel_guc_suspend). + */ return 0; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 4abc59f6f3cd..2e27fe59786b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -6,12 +6,16 @@ #ifndef _INTEL_GUC_H_ #define _INTEL_GUC_H_ +#include <linux/xarray.h> +#include <linux/delay.h> + #include "intel_uncore.h" #include "intel_guc_fw.h" #include "intel_guc_fwif.h" #include "intel_guc_ct.h" #include "intel_guc_log.h" #include "intel_guc_reg.h" +#include "intel_guc_slpc_types.h" #include "intel_uc_fw.h" #include "i915_utils.h" #include "i915_vma.h" @@ -27,24 +31,47 @@ struct intel_guc { struct intel_uc_fw fw; struct intel_guc_log log; struct intel_guc_ct ct; + struct intel_guc_slpc slpc; + + /* Global engine used to submit requests to GuC */ + struct i915_sched_engine *sched_engine; + struct i915_request *stalled_request; /* intel_guc_recv interrupt related state */ spinlock_t irq_lock; unsigned int msg_enabled_mask; + atomic_t outstanding_submission_g2h; + struct { void (*reset)(struct intel_guc *guc); void (*enable)(struct intel_guc *guc); void (*disable)(struct intel_guc *guc); } interrupts; + /* + * contexts_lock protects the pool of free guc ids and a linked list of + * guc ids available to be stolen + */ + spinlock_t contexts_lock; + struct ida guc_ids; + struct list_head guc_id_list; + + bool submission_supported; bool submission_selected; + bool rc_supported; + bool rc_selected; struct i915_vma *ads_vma; struct __guc_ads_blob *ads_blob; + u32 ads_regset_size; + u32 ads_golden_ctxt_size; - struct i915_vma *stage_desc_pool; - void *stage_desc_pool_vaddr; + struct i915_vma *lrc_desc_pool; + void *lrc_desc_pool_vaddr; + + /* guc_id to intel_context lookup */ + struct xarray context_lookup; /* Control params for fw initialization */ u32 params[GUC_CTL_MAX_DWORDS]; @@ -74,7 +101,15 @@ static inline struct intel_guc *log_to_guc(struct intel_guc_log *log) static inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len) { - return intel_guc_ct_send(&guc->ct, action, len, NULL, 0); + return intel_guc_ct_send(&guc->ct, action, len, NULL, 0, 0); +} + +static +inline int intel_guc_send_nb(struct intel_guc *guc, const u32 *action, u32 len, + u32 g2h_len_dw) +{ + return intel_guc_ct_send(&guc->ct, action, len, NULL, 0, + MAKE_SEND_FLAGS(g2h_len_dw)); } static inline int @@ -82,7 +117,43 @@ intel_guc_send_and_receive(struct intel_guc *guc, const u32 *action, u32 len, u32 *response_buf, u32 response_buf_size) { return intel_guc_ct_send(&guc->ct, action, len, - response_buf, response_buf_size); + response_buf, response_buf_size, 0); +} + +static inline int intel_guc_send_busy_loop(struct intel_guc *guc, + const u32 *action, + u32 len, + u32 g2h_len_dw, + bool loop) +{ + int err; + unsigned int sleep_period_ms = 1; + bool not_atomic = !in_atomic() && !irqs_disabled(); + + /* + * FIXME: Have caller pass in if we are in an atomic context to avoid + * using in_atomic(). It is likely safe here as we check for irqs + * disabled which basically all the spin locks in the i915 do but + * regardless this should be cleaned up. + */ + + /* No sleeping with spin locks, just busy loop */ + might_sleep_if(loop && not_atomic); + +retry: + err = intel_guc_send_nb(guc, action, len, g2h_len_dw); + if (unlikely(err == -EBUSY && loop)) { + if (likely(not_atomic)) { + if (msleep_interruptible(sleep_period_ms)) + return -EINTR; + sleep_period_ms = sleep_period_ms << 1; + } else { + cpu_relax(); + } + goto retry; + } + + return err; } static inline void intel_guc_to_host_event_handler(struct intel_guc *guc) @@ -118,6 +189,7 @@ static inline u32 intel_guc_ggtt_offset(struct intel_guc *guc, } void intel_guc_init_early(struct intel_guc *guc); +void intel_guc_init_late(struct intel_guc *guc); void intel_guc_init_send_regs(struct intel_guc *guc); void intel_guc_write_params(struct intel_guc *guc); int intel_guc_init(struct intel_guc *guc); @@ -160,9 +232,25 @@ static inline bool intel_guc_is_ready(struct intel_guc *guc) return intel_guc_is_fw_running(guc) && intel_guc_ct_enabled(&guc->ct); } +static inline void intel_guc_reset_interrupts(struct intel_guc *guc) +{ + guc->interrupts.reset(guc); +} + +static inline void intel_guc_enable_interrupts(struct intel_guc *guc) +{ + guc->interrupts.enable(guc); +} + +static inline void intel_guc_disable_interrupts(struct intel_guc *guc) +{ + guc->interrupts.disable(guc); +} + static inline int intel_guc_sanitize(struct intel_guc *guc) { intel_uc_fw_sanitize(&guc->fw); + intel_guc_disable_interrupts(guc); intel_guc_ct_sanitize(&guc->ct); guc->mmio_msg = 0; @@ -183,8 +271,27 @@ static inline void intel_guc_disable_msg(struct intel_guc *guc, u32 mask) spin_unlock_irq(&guc->irq_lock); } -int intel_guc_reset_engine(struct intel_guc *guc, - struct intel_engine_cs *engine); +int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout); + +int intel_guc_deregister_done_process_msg(struct intel_guc *guc, + const u32 *msg, u32 len); +int intel_guc_sched_done_process_msg(struct intel_guc *guc, + const u32 *msg, u32 len); +int intel_guc_context_reset_process_msg(struct intel_guc *guc, + const u32 *msg, u32 len); +int intel_guc_engine_failure_process_msg(struct intel_guc *guc, + const u32 *msg, u32 len); + +void intel_guc_find_hung_context(struct intel_engine_cs *engine); + +int intel_guc_global_policies_update(struct intel_guc *guc); + +void intel_guc_context_ban(struct intel_context *ce, struct i915_request *rq); + +void intel_guc_submission_reset_prepare(struct intel_guc *guc); +void intel_guc_submission_reset(struct intel_guc *guc, bool stalled); +void intel_guc_submission_reset_finish(struct intel_guc *guc); +void intel_guc_submission_cancel_requests(struct intel_guc *guc); void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 9abfbc6edbd6..6926919bcac6 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -3,8 +3,11 @@ * Copyright © 2014-2019 Intel Corporation */ +#include <linux/bsearch.h> + #include "gt/intel_gt.h" #include "gt/intel_lrc.h" +#include "gt/shmem_utils.h" #include "intel_guc_ads.h" #include "intel_guc_fwif.h" #include "intel_uc.h" @@ -23,10 +26,15 @@ * | guc_policies | * +---------------------------------------+ * | guc_gt_system_info | - * +---------------------------------------+ - * | guc_clients_info | - * +---------------------------------------+ - * | guc_ct_pool_entry[size] | + * +---------------------------------------+ <== static + * | guc_mmio_reg[countA] (engine 0.0) | + * | guc_mmio_reg[countB] (engine 0.1) | + * | guc_mmio_reg[countC] (engine 1.0) | + * | ... | + * +---------------------------------------+ <== dynamic + * | padding | + * +---------------------------------------+ <== 4K aligned + * | golden contexts | * +---------------------------------------+ * | padding | * +---------------------------------------+ <== 4K aligned @@ -39,18 +47,49 @@ struct __guc_ads_blob { struct guc_ads ads; struct guc_policies policies; struct guc_gt_system_info system_info; - struct guc_clients_info clients_info; - struct guc_ct_pool_entry ct_pool[GUC_CT_POOL_SIZE]; + /* From here on, location is dynamic! Refer to above diagram. */ + struct guc_mmio_reg regset[0]; } __packed; +static u32 guc_ads_regset_size(struct intel_guc *guc) +{ + GEM_BUG_ON(!guc->ads_regset_size); + return guc->ads_regset_size; +} + +static u32 guc_ads_golden_ctxt_size(struct intel_guc *guc) +{ + return PAGE_ALIGN(guc->ads_golden_ctxt_size); +} + static u32 guc_ads_private_data_size(struct intel_guc *guc) { return PAGE_ALIGN(guc->fw.private_data_size); } +static u32 guc_ads_regset_offset(struct intel_guc *guc) +{ + return offsetof(struct __guc_ads_blob, regset); +} + +static u32 guc_ads_golden_ctxt_offset(struct intel_guc *guc) +{ + u32 offset; + + offset = guc_ads_regset_offset(guc) + + guc_ads_regset_size(guc); + + return PAGE_ALIGN(offset); +} + static u32 guc_ads_private_data_offset(struct intel_guc *guc) { - return PAGE_ALIGN(sizeof(struct __guc_ads_blob)); + u32 offset; + + offset = guc_ads_golden_ctxt_offset(guc) + + guc_ads_golden_ctxt_size(guc); + + return PAGE_ALIGN(offset); } static u32 guc_ads_blob_size(struct intel_guc *guc) @@ -59,36 +98,66 @@ static u32 guc_ads_blob_size(struct intel_guc *guc) guc_ads_private_data_size(guc); } -static void guc_policy_init(struct guc_policy *policy) +static void guc_policies_init(struct intel_guc *guc, struct guc_policies *policies) { - policy->execution_quantum = POLICY_DEFAULT_EXECUTION_QUANTUM_US; - policy->preemption_time = POLICY_DEFAULT_PREEMPTION_TIME_US; - policy->fault_time = POLICY_DEFAULT_FAULT_TIME_US; - policy->policy_flags = 0; + struct intel_gt *gt = guc_to_gt(guc); + struct drm_i915_private *i915 = gt->i915; + + policies->dpc_promote_time = GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US; + policies->max_num_work_items = GLOBAL_POLICY_MAX_NUM_WI; + + policies->global_flags = 0; + if (i915->params.reset < 2) + policies->global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET; + + policies->is_valid = 1; } -static void guc_policies_init(struct guc_policies *policies) +void intel_guc_ads_print_policy_info(struct intel_guc *guc, + struct drm_printer *dp) { - struct guc_policy *policy; - u32 p, i; + struct __guc_ads_blob *blob = guc->ads_blob; - policies->dpc_promote_time = POLICY_DEFAULT_DPC_PROMOTE_TIME_US; - policies->max_num_work_items = POLICY_MAX_NUM_WI; + if (unlikely(!blob)) + return; - for (p = 0; p < GUC_CLIENT_PRIORITY_NUM; p++) { - for (i = 0; i < GUC_MAX_ENGINE_CLASSES; i++) { - policy = &policies->policy[p][i]; + drm_printf(dp, "Global scheduling policies:\n"); + drm_printf(dp, " DPC promote time = %u\n", blob->policies.dpc_promote_time); + drm_printf(dp, " Max num work items = %u\n", blob->policies.max_num_work_items); + drm_printf(dp, " Flags = %u\n", blob->policies.global_flags); +} - guc_policy_init(policy); - } - } +static int guc_action_policies_update(struct intel_guc *guc, u32 policy_offset) +{ + u32 action[] = { + INTEL_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE, + policy_offset + }; - policies->is_valid = 1; + return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); } -static void guc_ct_pool_entries_init(struct guc_ct_pool_entry *pool, u32 num) +int intel_guc_global_policies_update(struct intel_guc *guc) { - memset(pool, 0, num * sizeof(*pool)); + struct __guc_ads_blob *blob = guc->ads_blob; + struct intel_gt *gt = guc_to_gt(guc); + intel_wakeref_t wakeref; + int ret; + + if (!blob) + return -EOPNOTSUPP; + + GEM_BUG_ON(!blob->ads.scheduler_policies); + + guc_policies_init(guc, &blob->policies); + + if (!intel_guc_is_ready(guc)) + return 0; + + with_intel_runtime_pm(>->i915->runtime_pm, wakeref) + ret = guc_action_policies_update(guc, blob->ads.scheduler_policies); + + return ret; } static void guc_mapping_table_init(struct intel_gt *gt, @@ -113,53 +182,324 @@ static void guc_mapping_table_init(struct intel_gt *gt, } /* - * The first 80 dwords of the register state context, containing the - * execlists and ppgtt registers. + * The save/restore register list must be pre-calculated to a temporary + * buffer of driver defined size before it can be generated in place + * inside the ADS. */ -#define LR_HW_CONTEXT_SIZE (80 * sizeof(u32)) +#define MAX_MMIO_REGS 128 /* Arbitrary size, increase as needed */ +struct temp_regset { + struct guc_mmio_reg *registers; + u32 used; + u32 size; +}; -static void __guc_ads_init(struct intel_guc *guc) +static int guc_mmio_reg_cmp(const void *a, const void *b) +{ + const struct guc_mmio_reg *ra = a; + const struct guc_mmio_reg *rb = b; + + return (int)ra->offset - (int)rb->offset; +} + +static void guc_mmio_reg_add(struct temp_regset *regset, + u32 offset, u32 flags) +{ + u32 count = regset->used; + struct guc_mmio_reg reg = { + .offset = offset, + .flags = flags, + }; + struct guc_mmio_reg *slot; + + GEM_BUG_ON(count >= regset->size); + + /* + * The mmio list is built using separate lists within the driver. + * It's possible that at some point we may attempt to add the same + * register more than once. Do not consider this an error; silently + * move on if the register is already in the list. + */ + if (bsearch(®, regset->registers, count, + sizeof(reg), guc_mmio_reg_cmp)) + return; + + slot = ®set->registers[count]; + regset->used++; + *slot = reg; + + while (slot-- > regset->registers) { + GEM_BUG_ON(slot[0].offset == slot[1].offset); + if (slot[1].offset > slot[0].offset) + break; + + swap(slot[1], slot[0]); + } +} + +#define GUC_MMIO_REG_ADD(regset, reg, masked) \ + guc_mmio_reg_add(regset, \ + i915_mmio_reg_offset((reg)), \ + (masked) ? GUC_REGSET_MASKED : 0) + +static void guc_mmio_regset_init(struct temp_regset *regset, + struct intel_engine_cs *engine) +{ + const u32 base = engine->mmio_base; + struct i915_wa_list *wal = &engine->wa_list; + struct i915_wa *wa; + unsigned int i; + + regset->used = 0; + + GUC_MMIO_REG_ADD(regset, RING_MODE_GEN7(base), true); + GUC_MMIO_REG_ADD(regset, RING_HWS_PGA(base), false); + GUC_MMIO_REG_ADD(regset, RING_IMR(base), false); + + for (i = 0, wa = wal->list; i < wal->count; i++, wa++) + GUC_MMIO_REG_ADD(regset, wa->reg, wa->masked_reg); + + /* Be extra paranoid and include all whitelist registers. */ + for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) + GUC_MMIO_REG_ADD(regset, + RING_FORCE_TO_NONPRIV(base, i), + false); + + /* add in local MOCS registers */ + for (i = 0; i < GEN9_LNCFCMOCS_REG_COUNT; i++) + GUC_MMIO_REG_ADD(regset, GEN9_LNCFCMOCS(i), false); +} + +static int guc_mmio_reg_state_query(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); - struct drm_i915_private *i915 = gt->i915; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct temp_regset temp_set; + u32 total; + + /* + * Need to actually build the list in order to filter out + * duplicates and other such data dependent constructions. + */ + temp_set.size = MAX_MMIO_REGS; + temp_set.registers = kmalloc_array(temp_set.size, + sizeof(*temp_set.registers), + GFP_KERNEL); + if (!temp_set.registers) + return -ENOMEM; + + total = 0; + for_each_engine(engine, gt, id) { + guc_mmio_regset_init(&temp_set, engine); + total += temp_set.used; + } + + kfree(temp_set.registers); + + return total * sizeof(struct guc_mmio_reg); +} + +static void guc_mmio_reg_state_init(struct intel_guc *guc, + struct __guc_ads_blob *blob) +{ + struct intel_gt *gt = guc_to_gt(guc); + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct temp_regset temp_set; + struct guc_mmio_reg_set *ads_reg_set; + u32 addr_ggtt, offset; + u8 guc_class; + + offset = guc_ads_regset_offset(guc); + addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset; + temp_set.registers = (struct guc_mmio_reg *)(((u8 *)blob) + offset); + temp_set.size = guc->ads_regset_size / sizeof(temp_set.registers[0]); + + for_each_engine(engine, gt, id) { + /* Class index is checked in class converter */ + GEM_BUG_ON(engine->instance >= GUC_MAX_INSTANCES_PER_CLASS); + + guc_class = engine_class_to_guc_class(engine->class); + ads_reg_set = &blob->ads.reg_state_list[guc_class][engine->instance]; + + guc_mmio_regset_init(&temp_set, engine); + if (!temp_set.used) { + ads_reg_set->address = 0; + ads_reg_set->count = 0; + continue; + } + + ads_reg_set->address = addr_ggtt; + ads_reg_set->count = temp_set.used; + + temp_set.size -= temp_set.used; + temp_set.registers += temp_set.used; + addr_ggtt += temp_set.used * sizeof(struct guc_mmio_reg); + } + + GEM_BUG_ON(temp_set.size); +} + +static void fill_engine_enable_masks(struct intel_gt *gt, + struct guc_gt_system_info *info) +{ + info->engine_enabled_masks[GUC_RENDER_CLASS] = 1; + info->engine_enabled_masks[GUC_BLITTER_CLASS] = 1; + info->engine_enabled_masks[GUC_VIDEO_CLASS] = VDBOX_MASK(gt); + info->engine_enabled_masks[GUC_VIDEOENHANCE_CLASS] = VEBOX_MASK(gt); +} + +static int guc_prep_golden_context(struct intel_guc *guc, + struct __guc_ads_blob *blob) +{ + struct intel_gt *gt = guc_to_gt(guc); + u32 addr_ggtt, offset; + u32 total_size = 0, alloc_size, real_size; + u8 engine_class, guc_class; + struct guc_gt_system_info *info, local_info; + + /* + * Reserve the memory for the golden contexts and point GuC at it but + * leave it empty for now. The context data will be filled in later + * once there is something available to put there. + * + * Note that the HWSP and ring context are not included. + * + * Note also that the storage must be pinned in the GGTT, so that the + * address won't change after GuC has been told where to find it. The + * GuC will also validate that the LRC base + size fall within the + * allowed GGTT range. + */ + if (blob) { + offset = guc_ads_golden_ctxt_offset(guc); + addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset; + info = &blob->system_info; + } else { + memset(&local_info, 0, sizeof(local_info)); + info = &local_info; + fill_engine_enable_masks(gt, info); + } + + for (engine_class = 0; engine_class <= MAX_ENGINE_CLASS; ++engine_class) { + if (engine_class == OTHER_CLASS) + continue; + + guc_class = engine_class_to_guc_class(engine_class); + + if (!info->engine_enabled_masks[guc_class]) + continue; + + real_size = intel_engine_context_size(gt, engine_class); + alloc_size = PAGE_ALIGN(real_size); + total_size += alloc_size; + + if (!blob) + continue; + + blob->ads.eng_state_size[guc_class] = real_size; + blob->ads.golden_context_lrca[guc_class] = addr_ggtt; + addr_ggtt += alloc_size; + } + + if (!blob) + return total_size; + + GEM_BUG_ON(guc->ads_golden_ctxt_size != total_size); + return total_size; +} + +static struct intel_engine_cs *find_engine_state(struct intel_gt *gt, u8 engine_class) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, gt, id) { + if (engine->class != engine_class) + continue; + + if (!engine->default_state) + continue; + + return engine; + } + + return NULL; +} + +static void guc_init_golden_context(struct intel_guc *guc) +{ struct __guc_ads_blob *blob = guc->ads_blob; - const u32 skipped_size = LRC_PPHWSP_SZ * PAGE_SIZE + LR_HW_CONTEXT_SIZE; - u32 base; + struct intel_engine_cs *engine; + struct intel_gt *gt = guc_to_gt(guc); + u32 addr_ggtt, offset; + u32 total_size = 0, alloc_size, real_size; u8 engine_class, guc_class; + u8 *ptr; - /* GuC scheduling policies */ - guc_policies_init(&blob->policies); + /* Skip execlist and PPGTT registers + HWSP */ + const u32 lr_hw_context_size = 80 * sizeof(u32); + const u32 skip_size = LRC_PPHWSP_SZ * PAGE_SIZE + + lr_hw_context_size; + + if (!intel_uc_uses_guc_submission(>->uc)) + return; + + GEM_BUG_ON(!blob); /* - * GuC expects a per-engine-class context image and size - * (minus hwsp and ring context). The context image will be - * used to reinitialize engines after a reset. It must exist - * and be pinned in the GGTT, so that the address won't change after - * we have told GuC where to find it. The context size will be used - * to validate that the LRC base + size fall within allowed GGTT. + * Go back and fill in the golden context data now that it is + * available. */ + offset = guc_ads_golden_ctxt_offset(guc); + addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset; + ptr = ((u8 *)blob) + offset; + for (engine_class = 0; engine_class <= MAX_ENGINE_CLASS; ++engine_class) { if (engine_class == OTHER_CLASS) continue; guc_class = engine_class_to_guc_class(engine_class); - /* - * TODO: Set context pointer to default state to allow - * GuC to re-init guilty contexts after internal reset. - */ - blob->ads.golden_context_lrca[guc_class] = 0; - blob->ads.eng_state_size[guc_class] = - intel_engine_context_size(guc_to_gt(guc), - engine_class) - - skipped_size; + if (!blob->system_info.engine_enabled_masks[guc_class]) + continue; + + real_size = intel_engine_context_size(gt, engine_class); + alloc_size = PAGE_ALIGN(real_size); + total_size += alloc_size; + + engine = find_engine_state(gt, engine_class); + if (!engine) { + drm_err(>->i915->drm, "No engine state recorded for class %d!\n", + engine_class); + blob->ads.eng_state_size[guc_class] = 0; + blob->ads.golden_context_lrca[guc_class] = 0; + continue; + } + + GEM_BUG_ON(blob->ads.eng_state_size[guc_class] != real_size); + GEM_BUG_ON(blob->ads.golden_context_lrca[guc_class] != addr_ggtt); + addr_ggtt += alloc_size; + + shmem_read(engine->default_state, skip_size, ptr + skip_size, + real_size - skip_size); + ptr += alloc_size; } + GEM_BUG_ON(guc->ads_golden_ctxt_size != total_size); +} + +static void __guc_ads_init(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + struct drm_i915_private *i915 = gt->i915; + struct __guc_ads_blob *blob = guc->ads_blob; + u32 base; + + /* GuC scheduling policies */ + guc_policies_init(guc, &blob->policies); + /* System info */ - blob->system_info.engine_enabled_masks[GUC_RENDER_CLASS] = 1; - blob->system_info.engine_enabled_masks[GUC_BLITTER_CLASS] = 1; - blob->system_info.engine_enabled_masks[GUC_VIDEO_CLASS] = VDBOX_MASK(gt); - blob->system_info.engine_enabled_masks[GUC_VIDEOENHANCE_CLASS] = VEBOX_MASK(gt); + fill_engine_enable_masks(gt, &blob->system_info); blob->system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_SLICE_ENABLED] = hweight8(gt->info.sseu.slice_mask); @@ -174,21 +514,19 @@ static void __guc_ads_init(struct intel_guc *guc) GEN12_DOORBELLS_PER_SQIDI) + 1; } + /* Golden contexts for re-initialising after a watchdog reset */ + guc_prep_golden_context(guc, blob); + guc_mapping_table_init(guc_to_gt(guc), &blob->system_info); base = intel_guc_ggtt_offset(guc, guc->ads_vma); - /* Clients info */ - guc_ct_pool_entries_init(blob->ct_pool, ARRAY_SIZE(blob->ct_pool)); - - blob->clients_info.clients_num = 1; - blob->clients_info.ct_pool_addr = base + ptr_offset(blob, ct_pool); - blob->clients_info.ct_pool_count = ARRAY_SIZE(blob->ct_pool); - /* ADS */ blob->ads.scheduler_policies = base + ptr_offset(blob, policies); blob->ads.gt_system_info = base + ptr_offset(blob, system_info); - blob->ads.clients_info = base + ptr_offset(blob, clients_info); + + /* MMIO save/restore list */ + guc_mmio_reg_state_init(guc, blob); /* Private Data */ blob->ads.private_data = base + guc_ads_private_data_offset(guc); @@ -210,6 +548,19 @@ int intel_guc_ads_create(struct intel_guc *guc) GEM_BUG_ON(guc->ads_vma); + /* Need to calculate the reg state size dynamically: */ + ret = guc_mmio_reg_state_query(guc); + if (ret < 0) + return ret; + guc->ads_regset_size = ret; + + /* Likewise the golden contexts: */ + ret = guc_prep_golden_context(guc, NULL); + if (ret < 0) + return ret; + guc->ads_golden_ctxt_size = ret; + + /* Now the total size can be determined: */ size = guc_ads_blob_size(guc); ret = intel_guc_allocate_and_map_vma(guc, size, &guc->ads_vma, @@ -222,6 +573,18 @@ int intel_guc_ads_create(struct intel_guc *guc) return 0; } +void intel_guc_ads_init_late(struct intel_guc *guc) +{ + /* + * The golden context setup requires the saved engine state from + * __engines_record_defaults(). However, that requires engines to be + * operational which means the ADS must already have been configured. + * Fortunately, the golden context state is not needed until a hang + * occurs, so it can be filled in during this late init phase. + */ + guc_init_golden_context(guc); +} + void intel_guc_ads_destroy(struct intel_guc *guc) { i915_vma_unpin_and_release(&guc->ads_vma, I915_VMA_RELEASE_MAP); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h index b00d3ae1113a..3d85051d57e4 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h @@ -7,9 +7,13 @@ #define _INTEL_GUC_ADS_H_ struct intel_guc; +struct drm_printer; int intel_guc_ads_create(struct intel_guc *guc); void intel_guc_ads_destroy(struct intel_guc *guc); +void intel_guc_ads_init_late(struct intel_guc *guc); void intel_guc_ads_reset(struct intel_guc *guc); +void intel_guc_ads_print_policy_info(struct intel_guc *guc, + struct drm_printer *p); #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 8f7b148fef58..22b4733b55e2 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -3,6 +3,11 @@ * Copyright © 2016-2019 Intel Corporation */ +#include <linux/circ_buf.h> +#include <linux/ktime.h> +#include <linux/time64.h> +#include <linux/timekeeping.h> + #include "i915_drv.h" #include "intel_guc_ct.h" #include "gt/intel_gt.h" @@ -58,11 +63,17 @@ static inline struct drm_device *ct_to_drm(struct intel_guc_ct *ct) * +--------+-----------------------------------------------+------+ * * Size of each `CT Buffer`_ must be multiple of 4K. - * As we don't expect too many messages, for now use minimum sizes. + * We don't expect too many messages in flight at any time, unless we are + * using the GuC submission. In that case each request requires a minimum + * 2 dwords which gives us a maximum 256 queue'd requests. Hopefully this + * enough space to avoid backpressure on the driver. We increase the size + * of the receive buffer (relative to the send) to ensure a G2H response + * CTB has a landing spot. */ #define CTB_DESC_SIZE ALIGN(sizeof(struct guc_ct_buffer_desc), SZ_2K) #define CTB_H2G_BUFFER_SIZE (SZ_4K) -#define CTB_G2H_BUFFER_SIZE (SZ_4K) +#define CTB_G2H_BUFFER_SIZE (4 * CTB_H2G_BUFFER_SIZE) +#define G2H_ROOM_BUFFER_SIZE (CTB_G2H_BUFFER_SIZE / 4) struct ct_request { struct list_head link; @@ -98,66 +109,84 @@ void intel_guc_ct_init_early(struct intel_guc_ct *ct) INIT_LIST_HEAD(&ct->requests.incoming); INIT_WORK(&ct->requests.worker, ct_incoming_request_worker_func); tasklet_setup(&ct->receive_tasklet, ct_receive_tasklet_func); + init_waitqueue_head(&ct->wq); } static inline const char *guc_ct_buffer_type_to_str(u32 type) { switch (type) { - case INTEL_GUC_CT_BUFFER_TYPE_SEND: + case GUC_CTB_TYPE_HOST2GUC: return "SEND"; - case INTEL_GUC_CT_BUFFER_TYPE_RECV: + case GUC_CTB_TYPE_GUC2HOST: return "RECV"; default: return "<invalid>"; } } -static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc, - u32 cmds_addr, u32 size) +static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc) { memset(desc, 0, sizeof(*desc)); - desc->addr = cmds_addr; - desc->size = size; - desc->owner = CTB_OWNER_HOST; } -static void guc_ct_buffer_reset(struct intel_guc_ct_buffer *ctb, u32 cmds_addr) +static void guc_ct_buffer_reset(struct intel_guc_ct_buffer *ctb) { - guc_ct_buffer_desc_init(ctb->desc, cmds_addr, ctb->size); + u32 space; + + ctb->broken = false; + ctb->tail = 0; + ctb->head = 0; + space = CIRC_SPACE(ctb->tail, ctb->head, ctb->size) - ctb->resv_space; + atomic_set(&ctb->space, space); + + guc_ct_buffer_desc_init(ctb->desc); } static void guc_ct_buffer_init(struct intel_guc_ct_buffer *ctb, struct guc_ct_buffer_desc *desc, - u32 *cmds, u32 size) + u32 *cmds, u32 size_in_bytes, u32 resv_space) { - GEM_BUG_ON(size % 4); + GEM_BUG_ON(size_in_bytes % 4); ctb->desc = desc; ctb->cmds = cmds; - ctb->size = size; + ctb->size = size_in_bytes / 4; + ctb->resv_space = resv_space / 4; - guc_ct_buffer_reset(ctb, 0); + guc_ct_buffer_reset(ctb); } -static int guc_action_register_ct_buffer(struct intel_guc *guc, - u32 desc_addr, - u32 type) +static int guc_action_register_ct_buffer(struct intel_guc *guc, u32 type, + u32 desc_addr, u32 buff_addr, u32 size) { - u32 action[] = { - INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER, - desc_addr, - sizeof(struct guc_ct_buffer_desc), - type + u32 request[HOST2GUC_REGISTER_CTB_REQUEST_MSG_LEN] = { + FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_HOST2GUC_REGISTER_CTB), + FIELD_PREP(HOST2GUC_REGISTER_CTB_REQUEST_MSG_1_SIZE, size / SZ_4K - 1) | + FIELD_PREP(HOST2GUC_REGISTER_CTB_REQUEST_MSG_1_TYPE, type), + FIELD_PREP(HOST2GUC_REGISTER_CTB_REQUEST_MSG_2_DESC_ADDR, desc_addr), + FIELD_PREP(HOST2GUC_REGISTER_CTB_REQUEST_MSG_3_BUFF_ADDR, buff_addr), }; - /* Can't use generic send(), CT registration must go over MMIO */ - return intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0); + GEM_BUG_ON(type != GUC_CTB_TYPE_HOST2GUC && type != GUC_CTB_TYPE_GUC2HOST); + GEM_BUG_ON(size % SZ_4K); + + /* CT registration must go over MMIO */ + return intel_guc_send_mmio(guc, request, ARRAY_SIZE(request), NULL, 0); } -static int ct_register_buffer(struct intel_guc_ct *ct, u32 desc_addr, u32 type) +static int ct_register_buffer(struct intel_guc_ct *ct, u32 type, + u32 desc_addr, u32 buff_addr, u32 size) { - int err = guc_action_register_ct_buffer(ct_to_guc(ct), desc_addr, type); + int err; + + err = i915_inject_probe_error(guc_to_gt(ct_to_guc(ct))->i915, -ENXIO); + if (unlikely(err)) + return err; + err = guc_action_register_ct_buffer(ct_to_guc(ct), type, + desc_addr, buff_addr, size); if (unlikely(err)) CT_ERROR(ct, "Failed to register %s buffer (err=%d)\n", guc_ct_buffer_type_to_str(type), err); @@ -166,14 +195,17 @@ static int ct_register_buffer(struct intel_guc_ct *ct, u32 desc_addr, u32 type) static int guc_action_deregister_ct_buffer(struct intel_guc *guc, u32 type) { - u32 action[] = { - INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER, - CTB_OWNER_HOST, - type + u32 request[HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_LEN] = { + FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_HOST2GUC_DEREGISTER_CTB), + FIELD_PREP(HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_1_TYPE, type), }; - /* Can't use generic send(), CT deregistration must go over MMIO */ - return intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0); + GEM_BUG_ON(type != GUC_CTB_TYPE_HOST2GUC && type != GUC_CTB_TYPE_GUC2HOST); + + /* CT deregistration must go over MMIO */ + return intel_guc_send_mmio(guc, request, ARRAY_SIZE(request), NULL, 0); } static int ct_deregister_buffer(struct intel_guc_ct *ct, u32 type) @@ -200,10 +232,15 @@ int intel_guc_ct_init(struct intel_guc_ct *ct) struct guc_ct_buffer_desc *desc; u32 blob_size; u32 cmds_size; + u32 resv_space; void *blob; u32 *cmds; int err; + err = i915_inject_probe_error(guc_to_gt(guc)->i915, -ENXIO); + if (err) + return err; + GEM_BUG_ON(ct->vma); blob_size = 2 * CTB_DESC_SIZE + CTB_H2G_BUFFER_SIZE + CTB_G2H_BUFFER_SIZE; @@ -220,19 +257,23 @@ int intel_guc_ct_init(struct intel_guc_ct *ct) desc = blob; cmds = blob + 2 * CTB_DESC_SIZE; cmds_size = CTB_H2G_BUFFER_SIZE; - CT_DEBUG(ct, "%s desc %#tx cmds %#tx size %u\n", "send", - ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size); + resv_space = 0; + CT_DEBUG(ct, "%s desc %#tx cmds %#tx size %u/%u\n", "send", + ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size, + resv_space); - guc_ct_buffer_init(&ct->ctbs.send, desc, cmds, cmds_size); + guc_ct_buffer_init(&ct->ctbs.send, desc, cmds, cmds_size, resv_space); /* store pointers to desc and cmds for recv ctb */ desc = blob + CTB_DESC_SIZE; cmds = blob + 2 * CTB_DESC_SIZE + CTB_H2G_BUFFER_SIZE; cmds_size = CTB_G2H_BUFFER_SIZE; - CT_DEBUG(ct, "%s desc %#tx cmds %#tx size %u\n", "recv", - ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size); + resv_space = G2H_ROOM_BUFFER_SIZE; + CT_DEBUG(ct, "%s desc %#tx cmds %#tx size %u/%u\n", "recv", + ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size, + resv_space); - guc_ct_buffer_init(&ct->ctbs.recv, desc, cmds, cmds_size); + guc_ct_buffer_init(&ct->ctbs.recv, desc, cmds, cmds_size, resv_space); return 0; } @@ -261,7 +302,7 @@ void intel_guc_ct_fini(struct intel_guc_ct *ct) int intel_guc_ct_enable(struct intel_guc_ct *ct) { struct intel_guc *guc = ct_to_guc(ct); - u32 base, cmds; + u32 base, desc, cmds; void *blob; int err; @@ -277,32 +318,36 @@ int intel_guc_ct_enable(struct intel_guc_ct *ct) GEM_BUG_ON(blob != ct->ctbs.send.desc); /* (re)initialize descriptors */ - cmds = base + ptrdiff(ct->ctbs.send.cmds, blob); - guc_ct_buffer_reset(&ct->ctbs.send, cmds); - - cmds = base + ptrdiff(ct->ctbs.recv.cmds, blob); - guc_ct_buffer_reset(&ct->ctbs.recv, cmds); + guc_ct_buffer_reset(&ct->ctbs.send); + guc_ct_buffer_reset(&ct->ctbs.recv); /* * Register both CT buffers starting with RECV buffer. * Descriptors are in first half of the blob. */ - err = ct_register_buffer(ct, base + ptrdiff(ct->ctbs.recv.desc, blob), - INTEL_GUC_CT_BUFFER_TYPE_RECV); + desc = base + ptrdiff(ct->ctbs.recv.desc, blob); + cmds = base + ptrdiff(ct->ctbs.recv.cmds, blob); + err = ct_register_buffer(ct, GUC_CTB_TYPE_GUC2HOST, + desc, cmds, ct->ctbs.recv.size * 4); + if (unlikely(err)) goto err_out; - err = ct_register_buffer(ct, base + ptrdiff(ct->ctbs.send.desc, blob), - INTEL_GUC_CT_BUFFER_TYPE_SEND); + desc = base + ptrdiff(ct->ctbs.send.desc, blob); + cmds = base + ptrdiff(ct->ctbs.send.cmds, blob); + err = ct_register_buffer(ct, GUC_CTB_TYPE_HOST2GUC, + desc, cmds, ct->ctbs.send.size * 4); + if (unlikely(err)) goto err_deregister; ct->enabled = true; + ct->stall_time = KTIME_MAX; return 0; err_deregister: - ct_deregister_buffer(ct, INTEL_GUC_CT_BUFFER_TYPE_RECV); + ct_deregister_buffer(ct, GUC_CTB_TYPE_GUC2HOST); err_out: CT_PROBE_ERROR(ct, "Failed to enable CTB (%pe)\n", ERR_PTR(err)); return err; @@ -321,8 +366,8 @@ void intel_guc_ct_disable(struct intel_guc_ct *ct) ct->enabled = false; if (intel_guc_is_fw_running(guc)) { - ct_deregister_buffer(ct, INTEL_GUC_CT_BUFFER_TYPE_SEND); - ct_deregister_buffer(ct, INTEL_GUC_CT_BUFFER_TYPE_RECV); + ct_deregister_buffer(ct, GUC_CTB_TYPE_HOST2GUC); + ct_deregister_buffer(ct, GUC_CTB_TYPE_GUC2HOST); } } @@ -354,81 +399,63 @@ static void write_barrier(struct intel_guc_ct *ct) } } -/** - * DOC: CTB Host to GuC request - * - * Format of the CTB Host to GuC request message is as follows:: - * - * +------------+---------+---------+---------+---------+ - * | msg[0] | [1] | [2] | ... | [n-1] | - * +------------+---------+---------+---------+---------+ - * | MESSAGE | MESSAGE PAYLOAD | - * + HEADER +---------+---------+---------+---------+ - * | | 0 | 1 | ... | n | - * +============+=========+=========+=========+=========+ - * | len >= 1 | FENCE | request specific data | - * +------+-----+---------+---------+---------+---------+ - * - * ^-----------------len-------------------^ - */ - static int ct_write(struct intel_guc_ct *ct, const u32 *action, u32 len /* in dwords */, - u32 fence) + u32 fence, u32 flags) { struct intel_guc_ct_buffer *ctb = &ct->ctbs.send; struct guc_ct_buffer_desc *desc = ctb->desc; - u32 head = desc->head; - u32 tail = desc->tail; + u32 tail = ctb->tail; u32 size = ctb->size; - u32 used; u32 header; + u32 hxg; + u32 type; u32 *cmds = ctb->cmds; unsigned int i; - if (unlikely(desc->is_in_error)) - return -EPIPE; - - if (unlikely(!IS_ALIGNED(head | tail, 4) || - (tail | head) >= size)) + if (unlikely(desc->status)) goto corrupted; - /* later calculations will be done in dwords */ - head /= 4; - tail /= 4; - size /= 4; - - /* - * tail == head condition indicates empty. GuC FW does not support - * using up the entire buffer to get tail == head meaning full. - */ - if (tail < head) - used = (size - head) + tail; - else - used = tail - head; + GEM_BUG_ON(tail > size); - /* make sure there is a space including extra dw for the fence */ - if (unlikely(used + len + 1 >= size)) - return -ENOSPC; +#ifdef CONFIG_DRM_I915_DEBUG_GUC + if (unlikely(tail != READ_ONCE(desc->tail))) { + CT_ERROR(ct, "Tail was modified %u != %u\n", + desc->tail, tail); + desc->status |= GUC_CTB_STATUS_MISMATCH; + goto corrupted; + } + if (unlikely(READ_ONCE(desc->head) >= size)) { + CT_ERROR(ct, "Invalid head offset %u >= %u)\n", + desc->head, size); + desc->status |= GUC_CTB_STATUS_OVERFLOW; + goto corrupted; + } +#endif /* - * Write the message. The format is the following: - * DW0: header (including action code) - * DW1: fence - * DW2+: action data + * dw0: CT header (including fence) + * dw1: HXG header (including action code) + * dw2+: action data */ - header = (len << GUC_CT_MSG_LEN_SHIFT) | - GUC_CT_MSG_SEND_STATUS | - (action[0] << GUC_CT_MSG_ACTION_SHIFT); + header = FIELD_PREP(GUC_CTB_MSG_0_FORMAT, GUC_CTB_FORMAT_HXG) | + FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) | + FIELD_PREP(GUC_CTB_MSG_0_FENCE, fence); - CT_DEBUG(ct, "writing %*ph %*ph %*ph\n", - 4, &header, 4, &fence, 4 * (len - 1), &action[1]); + type = (flags & INTEL_GUC_CT_SEND_NB) ? GUC_HXG_TYPE_EVENT : + GUC_HXG_TYPE_REQUEST; + hxg = FIELD_PREP(GUC_HXG_MSG_0_TYPE, type) | + FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION | + GUC_HXG_EVENT_MSG_0_DATA0, action[0]); + + CT_DEBUG(ct, "writing (tail %u) %*ph %*ph %*ph\n", + tail, 4, &header, 4, &hxg, 4 * (len - 1), &action[1]); cmds[tail] = header; tail = (tail + 1) % size; - cmds[tail] = fence; + cmds[tail] = hxg; tail = (tail + 1) % size; for (i = 1; i < len; i++) { @@ -443,14 +470,20 @@ static int ct_write(struct intel_guc_ct *ct, */ write_barrier(ct); - /* now update desc tail (back in bytes) */ - desc->tail = tail * 4; + /* update local copies */ + ctb->tail = tail; + GEM_BUG_ON(atomic_read(&ctb->space) < len + GUC_CTB_HDR_LEN); + atomic_sub(len + GUC_CTB_HDR_LEN, &ctb->space); + + /* now update descriptor */ + WRITE_ONCE(desc->tail, tail); + return 0; corrupted: - CT_ERROR(ct, "Corrupted descriptor addr=%#x head=%u tail=%u size=%u\n", - desc->addr, desc->head, desc->tail, desc->size); - desc->is_in_error = 1; + CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u status=%#x\n", + desc->head, desc->tail, desc->status); + ctb->broken = true; return -EPIPE; } @@ -459,7 +492,7 @@ corrupted: * @req: pointer to pending request * @status: placeholder for status * - * For each sent request, Guc shall send bac CT response message. + * For each sent request, GuC shall send back CT response message. * Our message handler will update status of tracked request once * response message with given fence is received. Wait here and * check for valid response status value. @@ -475,12 +508,18 @@ static int wait_for_ct_request_update(struct ct_request *req, u32 *status) /* * Fast commands should complete in less than 10us, so sample quickly * up to that length of time, then switch to a slower sleep-wait loop. - * No GuC command should ever take longer than 10ms. + * No GuC command should ever take longer than 10ms but many GuC + * commands can be inflight at time, so use a 1s timeout on the slower + * sleep-wait loop. */ -#define done INTEL_GUC_MSG_IS_RESPONSE(READ_ONCE(req->status)) - err = wait_for_us(done, 10); +#define GUC_CTB_RESPONSE_TIMEOUT_SHORT_MS 10 +#define GUC_CTB_RESPONSE_TIMEOUT_LONG_MS 1000 +#define done \ + (FIELD_GET(GUC_HXG_MSG_0_ORIGIN, READ_ONCE(req->status)) == \ + GUC_HXG_ORIGIN_GUC) + err = wait_for_us(done, GUC_CTB_RESPONSE_TIMEOUT_SHORT_MS); if (err) - err = wait_for(done, 10); + err = wait_for(done, GUC_CTB_RESPONSE_TIMEOUT_LONG_MS); #undef done if (unlikely(err)) @@ -490,6 +529,131 @@ static int wait_for_ct_request_update(struct ct_request *req, u32 *status) return err; } +#define GUC_CTB_TIMEOUT_MS 1500 +static inline bool ct_deadlocked(struct intel_guc_ct *ct) +{ + long timeout = GUC_CTB_TIMEOUT_MS; + bool ret = ktime_ms_delta(ktime_get(), ct->stall_time) > timeout; + + if (unlikely(ret)) { + struct guc_ct_buffer_desc *send = ct->ctbs.send.desc; + struct guc_ct_buffer_desc *recv = ct->ctbs.send.desc; + + CT_ERROR(ct, "Communication stalled for %lld ms, desc status=%#x,%#x\n", + ktime_ms_delta(ktime_get(), ct->stall_time), + send->status, recv->status); + ct->ctbs.send.broken = true; + } + + return ret; +} + +static inline bool g2h_has_room(struct intel_guc_ct *ct, u32 g2h_len_dw) +{ + struct intel_guc_ct_buffer *ctb = &ct->ctbs.recv; + + /* + * We leave a certain amount of space in the G2H CTB buffer for + * unexpected G2H CTBs (e.g. logging, engine hang, etc...) + */ + return !g2h_len_dw || atomic_read(&ctb->space) >= g2h_len_dw; +} + +static inline void g2h_reserve_space(struct intel_guc_ct *ct, u32 g2h_len_dw) +{ + lockdep_assert_held(&ct->ctbs.send.lock); + + GEM_BUG_ON(!g2h_has_room(ct, g2h_len_dw)); + + if (g2h_len_dw) + atomic_sub(g2h_len_dw, &ct->ctbs.recv.space); +} + +static inline void g2h_release_space(struct intel_guc_ct *ct, u32 g2h_len_dw) +{ + atomic_add(g2h_len_dw, &ct->ctbs.recv.space); +} + +static inline bool h2g_has_room(struct intel_guc_ct *ct, u32 len_dw) +{ + struct intel_guc_ct_buffer *ctb = &ct->ctbs.send; + struct guc_ct_buffer_desc *desc = ctb->desc; + u32 head; + u32 space; + + if (atomic_read(&ctb->space) >= len_dw) + return true; + + head = READ_ONCE(desc->head); + if (unlikely(head > ctb->size)) { + CT_ERROR(ct, "Invalid head offset %u >= %u)\n", + head, ctb->size); + desc->status |= GUC_CTB_STATUS_OVERFLOW; + ctb->broken = true; + return false; + } + + space = CIRC_SPACE(ctb->tail, head, ctb->size); + atomic_set(&ctb->space, space); + + return space >= len_dw; +} + +static int has_room_nb(struct intel_guc_ct *ct, u32 h2g_dw, u32 g2h_dw) +{ + lockdep_assert_held(&ct->ctbs.send.lock); + + if (unlikely(!h2g_has_room(ct, h2g_dw) || !g2h_has_room(ct, g2h_dw))) { + if (ct->stall_time == KTIME_MAX) + ct->stall_time = ktime_get(); + + if (unlikely(ct_deadlocked(ct))) + return -EPIPE; + else + return -EBUSY; + } + + ct->stall_time = KTIME_MAX; + return 0; +} + +#define G2H_LEN_DW(f) ({ \ + typeof(f) f_ = (f); \ + FIELD_GET(INTEL_GUC_CT_SEND_G2H_DW_MASK, f_) ? \ + FIELD_GET(INTEL_GUC_CT_SEND_G2H_DW_MASK, f_) + \ + GUC_CTB_HXG_MSG_MIN_LEN : 0; \ +}) +static int ct_send_nb(struct intel_guc_ct *ct, + const u32 *action, + u32 len, + u32 flags) +{ + struct intel_guc_ct_buffer *ctb = &ct->ctbs.send; + unsigned long spin_flags; + u32 g2h_len_dw = G2H_LEN_DW(flags); + u32 fence; + int ret; + + spin_lock_irqsave(&ctb->lock, spin_flags); + + ret = has_room_nb(ct, len + GUC_CTB_HDR_LEN, g2h_len_dw); + if (unlikely(ret)) + goto out; + + fence = ct_get_next_fence(ct); + ret = ct_write(ct, action, len, fence, flags); + if (unlikely(ret)) + goto out; + + g2h_reserve_space(ct, g2h_len_dw); + intel_guc_notify(ct_to_guc(ct)); + +out: + spin_unlock_irqrestore(&ctb->lock, spin_flags); + + return ret; +} + static int ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len, @@ -497,8 +661,10 @@ static int ct_send(struct intel_guc_ct *ct, u32 response_buf_size, u32 *status) { + struct intel_guc_ct_buffer *ctb = &ct->ctbs.send; struct ct_request request; unsigned long flags; + unsigned int sleep_period_ms = 1; u32 fence; int err; @@ -506,8 +672,33 @@ static int ct_send(struct intel_guc_ct *ct, GEM_BUG_ON(!len); GEM_BUG_ON(len & ~GUC_CT_MSG_LEN_MASK); GEM_BUG_ON(!response_buf && response_buf_size); + might_sleep(); + + /* + * We use a lazy spin wait loop here as we believe that if the CT + * buffers are sized correctly the flow control condition should be + * rare. Reserving the maximum size in the G2H credits as we don't know + * how big the response is going to be. + */ +retry: + spin_lock_irqsave(&ctb->lock, flags); + if (unlikely(!h2g_has_room(ct, len + GUC_CTB_HDR_LEN) || + !g2h_has_room(ct, GUC_CTB_HXG_MSG_MAX_LEN))) { + if (ct->stall_time == KTIME_MAX) + ct->stall_time = ktime_get(); + spin_unlock_irqrestore(&ctb->lock, flags); + + if (unlikely(ct_deadlocked(ct))) + return -EPIPE; + + if (msleep_interruptible(sleep_period_ms)) + return -EINTR; + sleep_period_ms = sleep_period_ms << 1; + + goto retry; + } - spin_lock_irqsave(&ct->ctbs.send.lock, flags); + ct->stall_time = KTIME_MAX; fence = ct_get_next_fence(ct); request.fence = fence; @@ -519,9 +710,10 @@ static int ct_send(struct intel_guc_ct *ct, list_add_tail(&request.link, &ct->requests.pending); spin_unlock(&ct->requests.lock); - err = ct_write(ct, action, len, fence); + err = ct_write(ct, action, len, fence, 0); + g2h_reserve_space(ct, GUC_CTB_HXG_MSG_MAX_LEN); - spin_unlock_irqrestore(&ct->ctbs.send.lock, flags); + spin_unlock_irqrestore(&ctb->lock, flags); if (unlikely(err)) goto unlink; @@ -529,24 +721,25 @@ static int ct_send(struct intel_guc_ct *ct, intel_guc_notify(ct_to_guc(ct)); err = wait_for_ct_request_update(&request, status); + g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN); if (unlikely(err)) goto unlink; - if (!INTEL_GUC_MSG_IS_RESPONSE_SUCCESS(*status)) { + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, *status) != GUC_HXG_TYPE_RESPONSE_SUCCESS) { err = -EIO; goto unlink; } if (response_buf) { /* There shall be no data in the status */ - WARN_ON(INTEL_GUC_MSG_TO_DATA(request.status)); + WARN_ON(FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, request.status)); /* Return actual response len */ err = request.response_len; } else { /* There shall be no response payload */ WARN_ON(request.response_len); /* Return data decoded from the status dword */ - err = INTEL_GUC_MSG_TO_DATA(*status); + err = FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, *status); } unlink: @@ -561,16 +754,25 @@ unlink: * Command Transport (CT) buffer based GuC send function. */ int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len, - u32 *response_buf, u32 response_buf_size) + u32 *response_buf, u32 response_buf_size, u32 flags) { u32 status = ~0; /* undefined */ int ret; if (unlikely(!ct->enabled)) { - WARN(1, "Unexpected send: action=%#x\n", *action); + struct intel_guc *guc = ct_to_guc(ct); + struct intel_uc *uc = container_of(guc, struct intel_uc, guc); + + WARN(!uc->reset_in_progress, "Unexpected send: action=%#x\n", *action); return -ENODEV; } + if (unlikely(ct->ctbs.send.broken)) + return -EPIPE; + + if (flags & INTEL_GUC_CT_SEND_NB) + return ct_send_nb(ct, action, len, flags); + ret = ct_send(ct, action, len, response_buf, response_buf_size, &status); if (unlikely(ret < 0)) { CT_ERROR(ct, "Sending action %#x failed (err=%d status=%#X)\n", @@ -583,21 +785,6 @@ int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len, return ret; } -static inline unsigned int ct_header_get_len(u32 header) -{ - return (header >> GUC_CT_MSG_LEN_SHIFT) & GUC_CT_MSG_LEN_MASK; -} - -static inline unsigned int ct_header_get_action(u32 header) -{ - return (header >> GUC_CT_MSG_ACTION_SHIFT) & GUC_CT_MSG_ACTION_MASK; -} - -static inline bool ct_header_is_response(u32 header) -{ - return !!(header & GUC_CT_MSG_IS_RESPONSE); -} - static struct ct_incoming_msg *ct_alloc_msg(u32 num_dwords) { struct ct_incoming_msg *msg; @@ -621,8 +808,8 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg) { struct intel_guc_ct_buffer *ctb = &ct->ctbs.recv; struct guc_ct_buffer_desc *desc = ctb->desc; - u32 head = desc->head; - u32 tail = desc->tail; + u32 head = ctb->head; + u32 tail = READ_ONCE(desc->tail); u32 size = ctb->size; u32 *cmds = ctb->cmds; s32 available; @@ -630,17 +817,28 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg) unsigned int i; u32 header; - if (unlikely(desc->is_in_error)) + if (unlikely(ctb->broken)) return -EPIPE; - if (unlikely(!IS_ALIGNED(head | tail, 4) || - (tail | head) >= size)) + if (unlikely(desc->status)) goto corrupted; - /* later calculations will be done in dwords */ - head /= 4; - tail /= 4; - size /= 4; + GEM_BUG_ON(head > size); + +#ifdef CONFIG_DRM_I915_DEBUG_GUC + if (unlikely(head != READ_ONCE(desc->head))) { + CT_ERROR(ct, "Head was modified %u != %u\n", + desc->head, head); + desc->status |= GUC_CTB_STATUS_MISMATCH; + goto corrupted; + } +#endif + if (unlikely(tail >= size)) { + CT_ERROR(ct, "Invalid tail offset %u >= %u)\n", + tail, size); + desc->status |= GUC_CTB_STATUS_OVERFLOW; + goto corrupted; + } /* tail == head condition indicates empty */ available = tail - head; @@ -652,14 +850,14 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg) /* beware of buffer wrap case */ if (unlikely(available < 0)) available += size; - CT_DEBUG(ct, "available %d (%u:%u)\n", available, head, tail); + CT_DEBUG(ct, "available %d (%u:%u:%u)\n", available, head, tail, size); GEM_BUG_ON(available < 0); header = cmds[head]; head = (head + 1) % size; /* message len with header */ - len = ct_header_get_len(header) + 1; + len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, header) + GUC_CTB_MSG_MIN_LEN; if (unlikely(len > (u32)available)) { CT_ERROR(ct, "Incomplete message %*ph %*ph %*ph\n", 4, &header, @@ -667,6 +865,7 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg) size - head : available - 1), &cmds[head], 4 * (head + available - 1 > size ? available - 1 - size + head : 0), &cmds[0]); + desc->status |= GUC_CTB_STATUS_UNDERFLOW; goto corrupted; } @@ -689,65 +888,39 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg) } CT_DEBUG(ct, "received %*ph\n", 4 * len, (*msg)->msg); - desc->head = head * 4; + /* update local copies */ + ctb->head = head; + + /* now update descriptor */ + WRITE_ONCE(desc->head, head); + return available - len; corrupted: - CT_ERROR(ct, "Corrupted descriptor addr=%#x head=%u tail=%u size=%u\n", - desc->addr, desc->head, desc->tail, desc->size); - desc->is_in_error = 1; + CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u status=%#x\n", + desc->head, desc->tail, desc->status); + ctb->broken = true; return -EPIPE; } -/** - * DOC: CTB GuC to Host response - * - * Format of the CTB GuC to Host response message is as follows:: - * - * +------------+---------+---------+---------+---------+---------+ - * | msg[0] | [1] | [2] | [3] | ... | [n-1] | - * +------------+---------+---------+---------+---------+---------+ - * | MESSAGE | MESSAGE PAYLOAD | - * + HEADER +---------+---------+---------+---------+---------+ - * | | 0 | 1 | 2 | ... | n | - * +============+=========+=========+=========+=========+=========+ - * | len >= 2 | FENCE | STATUS | response specific data | - * +------+-----+---------+---------+---------+---------+---------+ - * - * ^-----------------------len-----------------------^ - */ - static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *response) { - u32 header = response->msg[0]; - u32 len = ct_header_get_len(header); - u32 fence; - u32 status; - u32 datalen; + u32 len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, response->msg[0]); + u32 fence = FIELD_GET(GUC_CTB_MSG_0_FENCE, response->msg[0]); + const u32 *hxg = &response->msg[GUC_CTB_MSG_MIN_LEN]; + const u32 *data = &hxg[GUC_HXG_MSG_MIN_LEN]; + u32 datalen = len - GUC_HXG_MSG_MIN_LEN; struct ct_request *req; unsigned long flags; bool found = false; int err = 0; - GEM_BUG_ON(!ct_header_is_response(header)); - - /* Response payload shall at least include fence and status */ - if (unlikely(len < 2)) { - CT_ERROR(ct, "Corrupted response (len %u)\n", len); - return -EPROTO; - } - - fence = response->msg[1]; - status = response->msg[2]; - datalen = len - 2; - - /* Format of the status follows RESPONSE message */ - if (unlikely(!INTEL_GUC_MSG_IS_RESPONSE(status))) { - CT_ERROR(ct, "Corrupted response (status %#x)\n", status); - return -EPROTO; - } + GEM_BUG_ON(len < GUC_HXG_MSG_MIN_LEN); + GEM_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, hxg[0]) != GUC_HXG_ORIGIN_GUC); + GEM_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_RESPONSE_SUCCESS && + FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_RESPONSE_FAILURE); - CT_DEBUG(ct, "response fence %u status %#x\n", fence, status); + CT_DEBUG(ct, "response fence %u status %#x\n", fence, hxg[0]); spin_lock_irqsave(&ct->requests.lock, flags); list_for_each_entry(req, &ct->requests.pending, link) { @@ -763,18 +936,22 @@ static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *r err = -EMSGSIZE; } if (datalen) - memcpy(req->response_buf, response->msg + 3, 4 * datalen); + memcpy(req->response_buf, data, 4 * datalen); req->response_len = datalen; - WRITE_ONCE(req->status, status); + WRITE_ONCE(req->status, hxg[0]); found = true; break; } - spin_unlock_irqrestore(&ct->requests.lock, flags); - if (!found) { CT_ERROR(ct, "Unsolicited response (fence %u)\n", fence); - return -ENOKEY; + CT_ERROR(ct, "Could not find fence=%u, last_fence=%u\n", fence, + ct->requests.last_fence); + list_for_each_entry(req, &ct->requests.pending, link) + CT_ERROR(ct, "request %u awaits response\n", + req->fence); + err = -ENOKEY; } + spin_unlock_irqrestore(&ct->requests.lock, flags); if (unlikely(err)) return err; @@ -786,14 +963,16 @@ static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *r static int ct_process_request(struct intel_guc_ct *ct, struct ct_incoming_msg *request) { struct intel_guc *guc = ct_to_guc(ct); - u32 header, action, len; + const u32 *hxg; const u32 *payload; + u32 hxg_len, action, len; int ret; - header = request->msg[0]; - payload = &request->msg[1]; - action = ct_header_get_action(header); - len = ct_header_get_len(header); + hxg = &request->msg[GUC_CTB_MSG_MIN_LEN]; + hxg_len = request->size - GUC_CTB_MSG_MIN_LEN; + payload = &hxg[GUC_HXG_MSG_MIN_LEN]; + action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]); + len = hxg_len - GUC_HXG_MSG_MIN_LEN; CT_DEBUG(ct, "request %x %*ph\n", action, 4 * len, payload); @@ -801,6 +980,19 @@ static int ct_process_request(struct intel_guc_ct *ct, struct ct_incoming_msg *r case INTEL_GUC_ACTION_DEFAULT: ret = intel_guc_to_host_process_recv_msg(guc, payload, len); break; + case INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE: + ret = intel_guc_deregister_done_process_msg(guc, payload, + len); + break; + case INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE: + ret = intel_guc_sched_done_process_msg(guc, payload, len); + break; + case INTEL_GUC_ACTION_CONTEXT_RESET_NOTIFICATION: + ret = intel_guc_context_reset_process_msg(guc, payload, len); + break; + case INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION: + ret = intel_guc_engine_failure_process_msg(guc, payload, len); + break; default: ret = -EOPNOTSUPP; break; @@ -855,29 +1047,24 @@ static void ct_incoming_request_worker_func(struct work_struct *w) queue_work(system_unbound_wq, &ct->requests.worker); } -/** - * DOC: CTB GuC to Host request - * - * Format of the CTB GuC to Host request message is as follows:: - * - * +------------+---------+---------+---------+---------+---------+ - * | msg[0] | [1] | [2] | [3] | ... | [n-1] | - * +------------+---------+---------+---------+---------+---------+ - * | MESSAGE | MESSAGE PAYLOAD | - * + HEADER +---------+---------+---------+---------+---------+ - * | | 0 | 1 | 2 | ... | n | - * +============+=========+=========+=========+=========+=========+ - * | len | request specific data | - * +------+-----+---------+---------+---------+---------+---------+ - * - * ^-----------------------len-----------------------^ - */ - -static int ct_handle_request(struct intel_guc_ct *ct, struct ct_incoming_msg *request) +static int ct_handle_event(struct intel_guc_ct *ct, struct ct_incoming_msg *request) { + const u32 *hxg = &request->msg[GUC_CTB_MSG_MIN_LEN]; + u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]); unsigned long flags; - GEM_BUG_ON(ct_header_is_response(request->msg[0])); + GEM_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_EVENT); + + /* + * Adjusting the space must be done in IRQ or deadlock can occur as the + * CTB processing in the below workqueue can send CTBs which creates a + * circular dependency if the space was returned there. + */ + switch (action) { + case INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE: + case INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE: + g2h_release_space(ct, request->size); + } spin_lock_irqsave(&ct->requests.lock, flags); list_add_tail(&request->link, &ct->requests.incoming); @@ -887,15 +1074,53 @@ static int ct_handle_request(struct intel_guc_ct *ct, struct ct_incoming_msg *re return 0; } -static void ct_handle_msg(struct intel_guc_ct *ct, struct ct_incoming_msg *msg) +static int ct_handle_hxg(struct intel_guc_ct *ct, struct ct_incoming_msg *msg) { - u32 header = msg->msg[0]; + u32 origin, type; + u32 *hxg; int err; - if (ct_header_is_response(header)) + if (unlikely(msg->size < GUC_CTB_HXG_MSG_MIN_LEN)) + return -EBADMSG; + + hxg = &msg->msg[GUC_CTB_MSG_MIN_LEN]; + + origin = FIELD_GET(GUC_HXG_MSG_0_ORIGIN, hxg[0]); + if (unlikely(origin != GUC_HXG_ORIGIN_GUC)) { + err = -EPROTO; + goto failed; + } + + type = FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]); + switch (type) { + case GUC_HXG_TYPE_EVENT: + err = ct_handle_event(ct, msg); + break; + case GUC_HXG_TYPE_RESPONSE_SUCCESS: + case GUC_HXG_TYPE_RESPONSE_FAILURE: err = ct_handle_response(ct, msg); + break; + default: + err = -EOPNOTSUPP; + } + + if (unlikely(err)) { +failed: + CT_ERROR(ct, "Failed to handle HXG message (%pe) %*ph\n", + ERR_PTR(err), 4 * GUC_HXG_MSG_MIN_LEN, hxg); + } + return err; +} + +static void ct_handle_msg(struct intel_guc_ct *ct, struct ct_incoming_msg *msg) +{ + u32 format = FIELD_GET(GUC_CTB_MSG_0_FORMAT, msg->msg[0]); + int err; + + if (format == GUC_CTB_FORMAT_HXG) + err = ct_handle_hxg(ct, msg); else - err = ct_handle_request(ct, msg); + err = -EOPNOTSUPP; if (unlikely(err)) { CT_ERROR(ct, "Failed to process CT message (%pe) %*ph\n", @@ -958,3 +1183,25 @@ void intel_guc_ct_event_handler(struct intel_guc_ct *ct) ct_try_receive_message(ct); } + +void intel_guc_ct_print_info(struct intel_guc_ct *ct, + struct drm_printer *p) +{ + drm_printf(p, "CT %s\n", enableddisabled(ct->enabled)); + + if (!ct->enabled) + return; + + drm_printf(p, "H2G Space: %u\n", + atomic_read(&ct->ctbs.send.space) * 4); + drm_printf(p, "Head: %u\n", + ct->ctbs.send.desc->head); + drm_printf(p, "Tail: %u\n", + ct->ctbs.send.desc->tail); + drm_printf(p, "G2H Space: %u\n", + atomic_read(&ct->ctbs.recv.space) * 4); + drm_printf(p, "Head: %u\n", + ct->ctbs.recv.desc->head); + drm_printf(p, "Tail: %u\n", + ct->ctbs.recv.desc->tail); +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h index cb222f202301..f709a19c7e21 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h @@ -9,11 +9,14 @@ #include <linux/interrupt.h> #include <linux/spinlock.h> #include <linux/workqueue.h> +#include <linux/ktime.h> +#include <linux/wait.h> #include "intel_guc_fwif.h" struct i915_vma; struct intel_guc; +struct drm_printer; /** * DOC: Command Transport (CT). @@ -31,16 +34,25 @@ struct intel_guc; * @lock: protects access to the commands buffer and buffer descriptor * @desc: pointer to the buffer descriptor * @cmds: pointer to the commands buffer - * @size: size of the commands buffer + * @size: size of the commands buffer in dwords + * @resv_space: reserved space in buffer in dwords + * @head: local shadow copy of head in dwords + * @tail: local shadow copy of tail in dwords + * @space: local shadow copy of space in dwords + * @broken: flag to indicate if descriptor data is broken */ struct intel_guc_ct_buffer { spinlock_t lock; struct guc_ct_buffer_desc *desc; u32 *cmds; u32 size; + u32 resv_space; + u32 tail; + u32 head; + atomic_t space; + bool broken; }; - /** Top-level structure for Command Transport related data * * Includes a pair of CT buffers for bi-directional communication and tracking @@ -58,8 +70,11 @@ struct intel_guc_ct { struct tasklet_struct receive_tasklet; + /** @wq: wait queue for g2h chanenl */ + wait_queue_head_t wq; + struct { - u32 last_fence; /* last fence used to send request */ + u16 last_fence; /* last fence used to send request */ spinlock_t lock; /* protects pending requests list */ struct list_head pending; /* requests waiting for response */ @@ -67,6 +82,9 @@ struct intel_guc_ct { struct list_head incoming; /* incoming requests */ struct work_struct worker; /* handler for incoming requests */ } requests; + + /** @stall_time: time of first time a CTB submission is stalled */ + ktime_t stall_time; }; void intel_guc_ct_init_early(struct intel_guc_ct *ct); @@ -85,8 +103,18 @@ static inline bool intel_guc_ct_enabled(struct intel_guc_ct *ct) return ct->enabled; } +#define INTEL_GUC_CT_SEND_NB BIT(31) +#define INTEL_GUC_CT_SEND_G2H_DW_SHIFT 0 +#define INTEL_GUC_CT_SEND_G2H_DW_MASK (0xff << INTEL_GUC_CT_SEND_G2H_DW_SHIFT) +#define MAKE_SEND_FLAGS(len) ({ \ + typeof(len) len_ = (len); \ + GEM_BUG_ON(!FIELD_FIT(INTEL_GUC_CT_SEND_G2H_DW_MASK, len_)); \ + (FIELD_PREP(INTEL_GUC_CT_SEND_G2H_DW_MASK, len_) | INTEL_GUC_CT_SEND_NB); \ +}) int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len, - u32 *response_buf, u32 response_buf_size); + u32 *response_buf, u32 response_buf_size, u32 flags); void intel_guc_ct_event_handler(struct intel_guc_ct *ct); +void intel_guc_ct_print_info(struct intel_guc_ct *ct, struct drm_printer *p); + #endif /* _INTEL_GUC_CT_H_ */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c index fe7cb7b29a1e..887c8c8f35db 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c @@ -9,6 +9,10 @@ #include "intel_guc.h" #include "intel_guc_debugfs.h" #include "intel_guc_log_debugfs.h" +#include "gt/uc/intel_guc_ct.h" +#include "gt/uc/intel_guc_ads.h" +#include "gt/uc/intel_guc_submission.h" +#include "gt/uc/intel_guc_slpc.h" static int guc_info_show(struct seq_file *m, void *data) { @@ -22,16 +26,57 @@ static int guc_info_show(struct seq_file *m, void *data) drm_puts(&p, "\n"); intel_guc_log_info(&guc->log, &p); - /* Add more as required ... */ + if (!intel_guc_submission_is_used(guc)) + return 0; + + intel_guc_ct_print_info(&guc->ct, &p); + intel_guc_submission_print_info(guc, &p); + intel_guc_ads_print_policy_info(guc, &p); return 0; } DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_info); +static int guc_registered_contexts_show(struct seq_file *m, void *data) +{ + struct intel_guc *guc = m->private; + struct drm_printer p = drm_seq_file_printer(m); + + if (!intel_guc_submission_is_used(guc)) + return -ENODEV; + + intel_guc_submission_print_context_info(guc, &p); + + return 0; +} +DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_registered_contexts); + +static int guc_slpc_info_show(struct seq_file *m, void *unused) +{ + struct intel_guc *guc = m->private; + struct intel_guc_slpc *slpc = &guc->slpc; + struct drm_printer p = drm_seq_file_printer(m); + + if (!intel_guc_slpc_is_used(guc)) + return -ENODEV; + + return intel_guc_slpc_print_info(slpc, &p); +} +DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_slpc_info); + +static bool intel_eval_slpc_support(void *data) +{ + struct intel_guc *guc = (struct intel_guc *)data; + + return intel_guc_slpc_is_used(guc); +} + void intel_guc_debugfs_register(struct intel_guc *guc, struct dentry *root) { static const struct debugfs_gt_file files[] = { { "guc_info", &guc_info_fops, NULL }, + { "guc_registered_contexts", &guc_registered_contexts_fops, NULL }, + { "guc_slpc_info", &guc_slpc_info_fops, &intel_eval_slpc_support}, }; if (!intel_guc_is_supported(guc)) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index e9a9d85e2aa3..fa4be13c8854 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -12,19 +12,27 @@ #include "gt/intel_engine_types.h" #include "abi/guc_actions_abi.h" +#include "abi/guc_actions_slpc_abi.h" #include "abi/guc_errors_abi.h" #include "abi/guc_communication_mmio_abi.h" #include "abi/guc_communication_ctb_abi.h" #include "abi/guc_messages_abi.h" +/* Payload length only i.e. don't include G2H header length */ +#define G2H_LEN_DW_SCHED_CONTEXT_MODE_SET 2 +#define G2H_LEN_DW_DEREGISTER_CONTEXT 1 + +#define GUC_CONTEXT_DISABLE 0 +#define GUC_CONTEXT_ENABLE 1 + #define GUC_CLIENT_PRIORITY_KMD_HIGH 0 #define GUC_CLIENT_PRIORITY_HIGH 1 #define GUC_CLIENT_PRIORITY_KMD_NORMAL 2 #define GUC_CLIENT_PRIORITY_NORMAL 3 #define GUC_CLIENT_PRIORITY_NUM 4 -#define GUC_MAX_STAGE_DESCRIPTORS 1024 -#define GUC_INVALID_STAGE_ID GUC_MAX_STAGE_DESCRIPTORS +#define GUC_MAX_LRC_DESCRIPTORS 65535 +#define GUC_INVALID_LRC_ID GUC_MAX_LRC_DESCRIPTORS #define GUC_RENDER_ENGINE 0 #define GUC_VIDEO_ENGINE 1 @@ -81,15 +89,14 @@ #define GUC_LOG_ALLOC_IN_MEGABYTE (1 << 3) #define GUC_LOG_CRASH_SHIFT 4 #define GUC_LOG_CRASH_MASK (0x3 << GUC_LOG_CRASH_SHIFT) -#define GUC_LOG_DPC_SHIFT 6 -#define GUC_LOG_DPC_MASK (0x7 << GUC_LOG_DPC_SHIFT) -#define GUC_LOG_ISR_SHIFT 9 -#define GUC_LOG_ISR_MASK (0x7 << GUC_LOG_ISR_SHIFT) +#define GUC_LOG_DEBUG_SHIFT 6 +#define GUC_LOG_DEBUG_MASK (0xF << GUC_LOG_DEBUG_SHIFT) #define GUC_LOG_BUF_ADDR_SHIFT 12 #define GUC_CTL_WA 1 #define GUC_CTL_FEATURE 2 #define GUC_CTL_DISABLE_SCHEDULER (1 << 14) +#define GUC_CTL_ENABLE_SLPC BIT(2) #define GUC_CTL_DEBUG 3 #define GUC_LOG_VERBOSITY_SHIFT 0 @@ -136,6 +143,11 @@ #define GUC_ID_TO_ENGINE_INSTANCE(guc_id) \ (((guc_id) & GUC_ENGINE_INSTANCE_MASK) >> GUC_ENGINE_INSTANCE_SHIFT) +#define SLPC_EVENT(id, c) (\ +FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ID, id) | \ +FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC, c) \ +) + static inline u8 engine_class_to_guc_class(u8 class) { BUILD_BUG_ON(GUC_RENDER_CLASS != RENDER_CLASS); @@ -177,66 +189,40 @@ struct guc_process_desc { u32 reserved[30]; } __packed; -/* engine id and context id is packed into guc_execlist_context.context_id*/ -#define GUC_ELC_CTXID_OFFSET 0 -#define GUC_ELC_ENGINE_OFFSET 29 +#define CONTEXT_REGISTRATION_FLAG_KMD BIT(0) -/* The execlist context including software and HW information */ -struct guc_execlist_context { - u32 context_desc; - u32 context_id; - u32 ring_status; - u32 ring_lrca; - u32 ring_begin; - u32 ring_end; - u32 ring_next_free_location; - u32 ring_current_tail_pointer_value; - u8 engine_state_submit_value; - u8 engine_state_wait_value; - u16 pagefault_count; - u16 engine_submit_queue_count; -} __packed; +#define CONTEXT_POLICY_DEFAULT_EXECUTION_QUANTUM_US 1000000 +#define CONTEXT_POLICY_DEFAULT_PREEMPTION_TIME_US 500000 + +/* Preempt to idle on quantum expiry */ +#define CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE BIT(0) /* - * This structure describes a stage set arranged for a particular communication - * between uKernel (GuC) and Driver (KMD). Technically, this is known as a - * "GuC Context descriptor" in the specs, but we use the term "stage descriptor" - * to avoid confusion with all the other things already named "context" in the - * driver. A static pool of these descriptors are stored inside a GEM object - * (stage_desc_pool) which is held for the entire lifetime of our interaction - * with the GuC, being allocated before the GuC is loaded with its firmware. + * GuC Context registration descriptor. + * FIXME: This is only required to exist during context registration. + * The current 1:1 between guc_lrc_desc and LRCs for the lifetime of the LRC + * is not required. */ -struct guc_stage_desc { - u32 sched_common_area; - u32 stage_id; - u32 pas_id; - u8 engines_used; - u64 db_trigger_cpu; - u32 db_trigger_uk; - u64 db_trigger_phy; - u16 db_id; - - struct guc_execlist_context lrc[GUC_MAX_ENGINES_NUM]; - - u8 attribute; - +struct guc_lrc_desc { + u32 hw_context_desc; + u32 slpm_perf_mode_hint; /* SPLC v1 only */ + u32 slpm_freq_hint; + u32 engine_submit_mask; /* In logical space */ + u8 engine_class; + u8 reserved0[3]; u32 priority; - - u32 wq_sampled_tail_offset; - u32 wq_total_submit_enqueues; - u32 process_desc; u32 wq_addr; u32 wq_size; - - u32 engine_presence; - - u8 engine_suspended; - - u8 reserved0[3]; - u64 reserved1[1]; - - u64 desc_private; + u32 context_flags; /* CONTEXT_REGISTRATION_* */ + /* Time for one workload to execute. (in micro seconds) */ + u32 execution_quantum; + /* Time to wait for a preemption request to complete before issuing a + * reset. (in micro seconds). + */ + u32 preemption_timeout; + u32 policy_flags; /* CONTEXT_POLICY_* */ + u32 reserved1[19]; } __packed; #define GUC_POWER_UNSPECIFIED 0 @@ -247,32 +233,14 @@ struct guc_stage_desc { /* Scheduling policy settings */ -/* Reset engine upon preempt failure */ -#define POLICY_RESET_ENGINE (1<<0) -/* Preempt to idle on quantum expiry */ -#define POLICY_PREEMPT_TO_IDLE (1<<1) +#define GLOBAL_POLICY_MAX_NUM_WI 15 -#define POLICY_MAX_NUM_WI 15 -#define POLICY_DEFAULT_DPC_PROMOTE_TIME_US 500000 -#define POLICY_DEFAULT_EXECUTION_QUANTUM_US 1000000 -#define POLICY_DEFAULT_PREEMPTION_TIME_US 500000 -#define POLICY_DEFAULT_FAULT_TIME_US 250000 +/* Don't reset an engine upon preemption failure */ +#define GLOBAL_POLICY_DISABLE_ENGINE_RESET BIT(0) -struct guc_policy { - /* Time for one workload to execute. (in micro seconds) */ - u32 execution_quantum; - /* Time to wait for a preemption request to completed before issuing a - * reset. (in micro seconds). */ - u32 preemption_time; - /* How much time to allow to run after the first fault is observed. - * Then preempt afterwards. (in micro seconds) */ - u32 fault_time; - u32 policy_flags; - u32 reserved[8]; -} __packed; +#define GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US 500000 struct guc_policies { - struct guc_policy policy[GUC_CLIENT_PRIORITY_NUM][GUC_MAX_ENGINE_CLASSES]; u32 submission_queue_depth[GUC_MAX_ENGINE_CLASSES]; /* In micro seconds. How much time to allow before DPC processing is * called back via interrupt (to prevent DPC queue drain starving). @@ -286,6 +254,7 @@ struct guc_policies { * idle. */ u32 max_num_work_items; + u32 global_flags; u32 reserved[4]; } __packed; @@ -311,29 +280,13 @@ struct guc_gt_system_info { u32 generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_MAX]; } __packed; -/* Clients info */ -struct guc_ct_pool_entry { - struct guc_ct_buffer_desc desc; - u32 reserved[7]; -} __packed; - -#define GUC_CT_POOL_SIZE 2 - -struct guc_clients_info { - u32 clients_num; - u32 reserved0[13]; - u32 ct_pool_addr; - u32 ct_pool_count; - u32 reserved[4]; -} __packed; - /* GuC Additional Data Struct */ struct guc_ads { struct guc_mmio_reg_set reg_state_list[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS]; u32 reserved0; u32 scheduler_policies; u32 gt_system_info; - u32 clients_info; + u32 reserved1; u32 control_data; u32 golden_context_lrca[GUC_MAX_ENGINE_CLASSES]; u32 eng_state_size[GUC_MAX_ENGINE_CLASSES]; @@ -344,8 +297,7 @@ struct guc_ads { /* GuC logging structures */ enum guc_log_buffer_type { - GUC_ISR_LOG_BUFFER, - GUC_DPC_LOG_BUFFER, + GUC_DEBUG_LOG_BUFFER, GUC_CRASH_DUMP_LOG_BUFFER, GUC_MAX_LOG_BUFFER }; @@ -414,23 +366,6 @@ struct guc_shared_ctx_data { struct guc_ctx_report preempt_ctx_report[GUC_MAX_ENGINES_NUM]; } __packed; -#define __INTEL_GUC_MSG_GET(T, m) \ - (((m) & INTEL_GUC_MSG_ ## T ## _MASK) >> INTEL_GUC_MSG_ ## T ## _SHIFT) -#define INTEL_GUC_MSG_TO_TYPE(m) __INTEL_GUC_MSG_GET(TYPE, m) -#define INTEL_GUC_MSG_TO_DATA(m) __INTEL_GUC_MSG_GET(DATA, m) -#define INTEL_GUC_MSG_TO_CODE(m) __INTEL_GUC_MSG_GET(CODE, m) - -#define __INTEL_GUC_MSG_TYPE_IS(T, m) \ - (INTEL_GUC_MSG_TO_TYPE(m) == INTEL_GUC_MSG_TYPE_ ## T) -#define INTEL_GUC_MSG_IS_REQUEST(m) __INTEL_GUC_MSG_TYPE_IS(REQUEST, m) -#define INTEL_GUC_MSG_IS_RESPONSE(m) __INTEL_GUC_MSG_TYPE_IS(RESPONSE, m) - -#define INTEL_GUC_MSG_IS_RESPONSE_SUCCESS(m) \ - (typecheck(u32, (m)) && \ - ((m) & (INTEL_GUC_MSG_TYPE_MASK | INTEL_GUC_MSG_CODE_MASK)) == \ - ((INTEL_GUC_MSG_TYPE_RESPONSE << INTEL_GUC_MSG_TYPE_SHIFT) | \ - (INTEL_GUC_RESPONSE_STATUS_SUCCESS << INTEL_GUC_MSG_CODE_SHIFT))) - /* This action will be programmed in C1BC - SOFT_SCRATCH_15_REG */ enum intel_guc_recv_message { INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED = BIT(1), diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index c36d5eb5bbb9..ac0931f0374b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -197,10 +197,8 @@ static bool guc_check_log_buf_overflow(struct intel_guc_log *log, static unsigned int guc_get_log_buffer_size(enum guc_log_buffer_type type) { switch (type) { - case GUC_ISR_LOG_BUFFER: - return ISR_BUFFER_SIZE; - case GUC_DPC_LOG_BUFFER: - return DPC_BUFFER_SIZE; + case GUC_DEBUG_LOG_BUFFER: + return DEBUG_BUFFER_SIZE; case GUC_CRASH_DUMP_LOG_BUFFER: return CRASH_BUFFER_SIZE; default: @@ -245,7 +243,7 @@ static void guc_read_update_log_buffer(struct intel_guc_log *log) src_data += PAGE_SIZE; dst_data += PAGE_SIZE; - for (type = GUC_ISR_LOG_BUFFER; type < GUC_MAX_LOG_BUFFER; type++) { + for (type = GUC_DEBUG_LOG_BUFFER; type < GUC_MAX_LOG_BUFFER; type++) { /* * Make a copy of the state structure, inside GuC log buffer * (which is uncached mapped), on the stack to avoid reading @@ -463,21 +461,16 @@ int intel_guc_log_create(struct intel_guc_log *log) * +===============================+ 00B * | Crash dump state header | * +-------------------------------+ 32B - * | DPC state header | + * | Debug state header | * +-------------------------------+ 64B - * | ISR state header | - * +-------------------------------+ 96B * | | * +===============================+ PAGE_SIZE (4KB) * | Crash Dump logs | * +===============================+ + CRASH_SIZE - * | DPC logs | - * +===============================+ + DPC_SIZE - * | ISR logs | - * +===============================+ + ISR_SIZE + * | Debug logs | + * +===============================+ + DEBUG_SIZE */ - guc_log_size = PAGE_SIZE + CRASH_BUFFER_SIZE + DPC_BUFFER_SIZE + - ISR_BUFFER_SIZE; + guc_log_size = PAGE_SIZE + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE; vma = intel_guc_allocate_vma(guc, guc_log_size); if (IS_ERR(vma)) { @@ -675,10 +668,8 @@ static const char * stringify_guc_log_type(enum guc_log_buffer_type type) { switch (type) { - case GUC_ISR_LOG_BUFFER: - return "ISR"; - case GUC_DPC_LOG_BUFFER: - return "DPC"; + case GUC_DEBUG_LOG_BUFFER: + return "DEBUG"; case GUC_CRASH_DUMP_LOG_BUFFER: return "CRASH"; default: @@ -708,7 +699,7 @@ void intel_guc_log_info(struct intel_guc_log *log, struct drm_printer *p) drm_printf(p, "\tRelay full count: %u\n", log->relay.full_count); - for (type = GUC_ISR_LOG_BUFFER; type < GUC_MAX_LOG_BUFFER; type++) { + for (type = GUC_DEBUG_LOG_BUFFER; type < GUC_MAX_LOG_BUFFER; type++) { drm_printf(p, "\t%s:\tflush count %10u, overflow count %10u\n", stringify_guc_log_type(type), log->stats[type].flush, diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h index 11fccd0b2294..ac1ee1d5ce10 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h @@ -17,12 +17,10 @@ struct intel_guc; #ifdef CONFIG_DRM_I915_DEBUG_GUC #define CRASH_BUFFER_SIZE SZ_2M -#define DPC_BUFFER_SIZE SZ_8M -#define ISR_BUFFER_SIZE SZ_8M +#define DEBUG_BUFFER_SIZE SZ_16M #else #define CRASH_BUFFER_SIZE SZ_8K -#define DPC_BUFFER_SIZE SZ_32K -#define ISR_BUFFER_SIZE SZ_32K +#define DEBUG_BUFFER_SIZE SZ_64K #endif /* diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c new file mode 100644 index 000000000000..fc805d466d99 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "intel_guc_rc.h" +#include "gt/intel_gt.h" +#include "i915_drv.h" + +static bool __guc_rc_supported(struct intel_guc *guc) +{ + /* GuC RC is unavailable for pre-Gen12 */ + return guc->submission_supported && + GRAPHICS_VER(guc_to_gt(guc)->i915) >= 12; +} + +static bool __guc_rc_selected(struct intel_guc *guc) +{ + if (!intel_guc_rc_is_supported(guc)) + return false; + + return guc->submission_selected; +} + +void intel_guc_rc_init_early(struct intel_guc *guc) +{ + guc->rc_supported = __guc_rc_supported(guc); + guc->rc_selected = __guc_rc_selected(guc); +} + +static int guc_action_control_gucrc(struct intel_guc *guc, bool enable) +{ + u32 rc_mode = enable ? INTEL_GUCRC_FIRMWARE_CONTROL : + INTEL_GUCRC_HOST_CONTROL; + u32 action[] = { + INTEL_GUC_ACTION_SETUP_PC_GUCRC, + rc_mode + }; + int ret; + + ret = intel_guc_send(guc, action, ARRAY_SIZE(action)); + ret = ret > 0 ? -EPROTO : ret; + + return ret; +} + +static int __guc_rc_control(struct intel_guc *guc, bool enable) +{ + struct intel_gt *gt = guc_to_gt(guc); + struct drm_device *drm = &guc_to_gt(guc)->i915->drm; + int ret; + + if (!intel_uc_uses_guc_rc(>->uc)) + return -EOPNOTSUPP; + + if (!intel_guc_is_ready(guc)) + return -EINVAL; + + ret = guc_action_control_gucrc(guc, enable); + if (ret) { + drm_err(drm, "Failed to %s GuC RC (%pe)\n", + enabledisable(enable), ERR_PTR(ret)); + return ret; + } + + drm_info(>->i915->drm, "GuC RC: %s\n", + enableddisabled(enable)); + + return 0; +} + +int intel_guc_rc_enable(struct intel_guc *guc) +{ + return __guc_rc_control(guc, true); +} + +int intel_guc_rc_disable(struct intel_guc *guc) +{ + return __guc_rc_control(guc, false); +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.h new file mode 100644 index 000000000000..57e86c337838 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _INTEL_GUC_RC_H_ +#define _INTEL_GUC_RC_H_ + +#include "intel_guc_submission.h" + +void intel_guc_rc_init_early(struct intel_guc *guc); + +static inline bool intel_guc_rc_is_supported(struct intel_guc *guc) +{ + return guc->rc_supported; +} + +static inline bool intel_guc_rc_is_wanted(struct intel_guc *guc) +{ + return guc->submission_selected && intel_guc_rc_is_supported(guc); +} + +static inline bool intel_guc_rc_is_used(struct intel_guc *guc) +{ + return intel_guc_submission_is_used(guc) && intel_guc_rc_is_wanted(guc); +} + +int intel_guc_rc_enable(struct intel_guc *guc); +int intel_guc_rc_disable(struct intel_guc *guc); + +#endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c new file mode 100644 index 000000000000..65a3e7fdb2b2 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -0,0 +1,626 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "i915_drv.h" +#include "intel_guc_slpc.h" +#include "gt/intel_gt.h" + +static inline struct intel_guc *slpc_to_guc(struct intel_guc_slpc *slpc) +{ + return container_of(slpc, struct intel_guc, slpc); +} + +static inline struct intel_gt *slpc_to_gt(struct intel_guc_slpc *slpc) +{ + return guc_to_gt(slpc_to_guc(slpc)); +} + +static inline struct drm_i915_private *slpc_to_i915(struct intel_guc_slpc *slpc) +{ + return slpc_to_gt(slpc)->i915; +} + +static bool __detect_slpc_supported(struct intel_guc *guc) +{ + /* GuC SLPC is unavailable for pre-Gen12 */ + return guc->submission_supported && + GRAPHICS_VER(guc_to_gt(guc)->i915) >= 12; +} + +static bool __guc_slpc_selected(struct intel_guc *guc) +{ + if (!intel_guc_slpc_is_supported(guc)) + return false; + + return guc->submission_selected; +} + +void intel_guc_slpc_init_early(struct intel_guc_slpc *slpc) +{ + struct intel_guc *guc = slpc_to_guc(slpc); + + slpc->supported = __detect_slpc_supported(guc); + slpc->selected = __guc_slpc_selected(guc); +} + +static void slpc_mem_set_param(struct slpc_shared_data *data, + u32 id, u32 value) +{ + GEM_BUG_ON(id >= SLPC_MAX_OVERRIDE_PARAMETERS); + /* + * When the flag bit is set, corresponding value will be read + * and applied by SLPC. + */ + data->override_params.bits[id >> 5] |= (1 << (id % 32)); + data->override_params.values[id] = value; +} + +static void slpc_mem_set_enabled(struct slpc_shared_data *data, + u8 enable_id, u8 disable_id) +{ + /* + * Enabling a param involves setting the enable_id + * to 1 and disable_id to 0. + */ + slpc_mem_set_param(data, enable_id, 1); + slpc_mem_set_param(data, disable_id, 0); +} + +static void slpc_mem_set_disabled(struct slpc_shared_data *data, + u8 enable_id, u8 disable_id) +{ + /* + * Disabling a param involves setting the enable_id + * to 0 and disable_id to 1. + */ + slpc_mem_set_param(data, disable_id, 1); + slpc_mem_set_param(data, enable_id, 0); +} + +int intel_guc_slpc_init(struct intel_guc_slpc *slpc) +{ + struct intel_guc *guc = slpc_to_guc(slpc); + struct drm_i915_private *i915 = slpc_to_i915(slpc); + u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data)); + int err; + + GEM_BUG_ON(slpc->vma); + + err = intel_guc_allocate_and_map_vma(guc, size, &slpc->vma, (void **)&slpc->vaddr); + if (unlikely(err)) { + drm_err(&i915->drm, + "Failed to allocate SLPC struct (err=%pe)\n", + ERR_PTR(err)); + return err; + } + + slpc->max_freq_softlimit = 0; + slpc->min_freq_softlimit = 0; + + return err; +} + +static u32 slpc_get_state(struct intel_guc_slpc *slpc) +{ + struct slpc_shared_data *data; + + GEM_BUG_ON(!slpc->vma); + + drm_clflush_virt_range(slpc->vaddr, sizeof(u32)); + data = slpc->vaddr; + + return data->header.global_state; +} + +static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value) +{ + u32 request[] = { + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, + SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2), + id, + value, + }; + int ret; + + ret = intel_guc_send(guc, request, ARRAY_SIZE(request)); + + return ret > 0 ? -EPROTO : ret; +} + +static int guc_action_slpc_unset_param(struct intel_guc *guc, u8 id) +{ + u32 request[] = { + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, + SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 2), + id, + }; + + return intel_guc_send(guc, request, ARRAY_SIZE(request)); +} + +static bool slpc_is_running(struct intel_guc_slpc *slpc) +{ + return slpc_get_state(slpc) == SLPC_GLOBAL_STATE_RUNNING; +} + +static int guc_action_slpc_query(struct intel_guc *guc, u32 offset) +{ + u32 request[] = { + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, + SLPC_EVENT(SLPC_EVENT_QUERY_TASK_STATE, 2), + offset, + 0, + }; + int ret; + + ret = intel_guc_send(guc, request, ARRAY_SIZE(request)); + + return ret > 0 ? -EPROTO : ret; +} + +static int slpc_query_task_state(struct intel_guc_slpc *slpc) +{ + struct intel_guc *guc = slpc_to_guc(slpc); + struct drm_i915_private *i915 = slpc_to_i915(slpc); + u32 offset = intel_guc_ggtt_offset(guc, slpc->vma); + int ret; + + ret = guc_action_slpc_query(guc, offset); + if (unlikely(ret)) + drm_err(&i915->drm, "Failed to query task state (%pe)\n", + ERR_PTR(ret)); + + drm_clflush_virt_range(slpc->vaddr, SLPC_PAGE_SIZE_BYTES); + + return ret; +} + +static int slpc_set_param(struct intel_guc_slpc *slpc, u8 id, u32 value) +{ + struct intel_guc *guc = slpc_to_guc(slpc); + struct drm_i915_private *i915 = slpc_to_i915(slpc); + int ret; + + GEM_BUG_ON(id >= SLPC_MAX_PARAM); + + ret = guc_action_slpc_set_param(guc, id, value); + if (ret) + drm_err(&i915->drm, "Failed to set param %d to %u (%pe)\n", + id, value, ERR_PTR(ret)); + + return ret; +} + +static int slpc_unset_param(struct intel_guc_slpc *slpc, + u8 id) +{ + struct intel_guc *guc = slpc_to_guc(slpc); + + GEM_BUG_ON(id >= SLPC_MAX_PARAM); + + return guc_action_slpc_unset_param(guc, id); +} + +static const char *slpc_global_state_to_string(enum slpc_global_state state) +{ + switch (state) { + case SLPC_GLOBAL_STATE_NOT_RUNNING: + return "not running"; + case SLPC_GLOBAL_STATE_INITIALIZING: + return "initializing"; + case SLPC_GLOBAL_STATE_RESETTING: + return "resetting"; + case SLPC_GLOBAL_STATE_RUNNING: + return "running"; + case SLPC_GLOBAL_STATE_SHUTTING_DOWN: + return "shutting down"; + case SLPC_GLOBAL_STATE_ERROR: + return "error"; + default: + return "unknown"; + } +} + +static const char *slpc_get_state_string(struct intel_guc_slpc *slpc) +{ + return slpc_global_state_to_string(slpc_get_state(slpc)); +} + +static int guc_action_slpc_reset(struct intel_guc *guc, u32 offset) +{ + u32 request[] = { + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, + SLPC_EVENT(SLPC_EVENT_RESET, 2), + offset, + 0, + }; + int ret; + + ret = intel_guc_send(guc, request, ARRAY_SIZE(request)); + + return ret > 0 ? -EPROTO : ret; +} + +static int slpc_reset(struct intel_guc_slpc *slpc) +{ + struct drm_i915_private *i915 = slpc_to_i915(slpc); + struct intel_guc *guc = slpc_to_guc(slpc); + u32 offset = intel_guc_ggtt_offset(guc, slpc->vma); + int ret; + + ret = guc_action_slpc_reset(guc, offset); + + if (unlikely(ret < 0)) { + drm_err(&i915->drm, "SLPC reset action failed (%pe)\n", + ERR_PTR(ret)); + return ret; + } + + if (!ret) { + if (wait_for(slpc_is_running(slpc), SLPC_RESET_TIMEOUT_MS)) { + drm_err(&i915->drm, "SLPC not enabled! State = %s\n", + slpc_get_state_string(slpc)); + return -EIO; + } + } + + return 0; +} + +static u32 slpc_decode_min_freq(struct intel_guc_slpc *slpc) +{ + struct slpc_shared_data *data = slpc->vaddr; + + GEM_BUG_ON(!slpc->vma); + + return DIV_ROUND_CLOSEST(REG_FIELD_GET(SLPC_MIN_UNSLICE_FREQ_MASK, + data->task_state_data.freq) * + GT_FREQUENCY_MULTIPLIER, GEN9_FREQ_SCALER); +} + +static u32 slpc_decode_max_freq(struct intel_guc_slpc *slpc) +{ + struct slpc_shared_data *data = slpc->vaddr; + + GEM_BUG_ON(!slpc->vma); + + return DIV_ROUND_CLOSEST(REG_FIELD_GET(SLPC_MAX_UNSLICE_FREQ_MASK, + data->task_state_data.freq) * + GT_FREQUENCY_MULTIPLIER, GEN9_FREQ_SCALER); +} + +static void slpc_shared_data_reset(struct slpc_shared_data *data) +{ + memset(data, 0, sizeof(struct slpc_shared_data)); + + data->header.size = sizeof(struct slpc_shared_data); + + /* Enable only GTPERF task, disable others */ + slpc_mem_set_enabled(data, SLPC_PARAM_TASK_ENABLE_GTPERF, + SLPC_PARAM_TASK_DISABLE_GTPERF); + + slpc_mem_set_disabled(data, SLPC_PARAM_TASK_ENABLE_BALANCER, + SLPC_PARAM_TASK_DISABLE_BALANCER); + + slpc_mem_set_disabled(data, SLPC_PARAM_TASK_ENABLE_DCC, + SLPC_PARAM_TASK_DISABLE_DCC); +} + +/** + * intel_guc_slpc_set_max_freq() - Set max frequency limit for SLPC. + * @slpc: pointer to intel_guc_slpc. + * @val: frequency (MHz) + * + * This function will invoke GuC SLPC action to update the max frequency + * limit for unslice. + * + * Return: 0 on success, non-zero error code on failure. + */ +int intel_guc_slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 val) +{ + struct drm_i915_private *i915 = slpc_to_i915(slpc); + intel_wakeref_t wakeref; + int ret; + + if (val < slpc->min_freq || + val > slpc->rp0_freq || + val < slpc->min_freq_softlimit) + return -EINVAL; + + with_intel_runtime_pm(&i915->runtime_pm, wakeref) { + ret = slpc_set_param(slpc, + SLPC_PARAM_GLOBAL_MAX_GT_UNSLICE_FREQ_MHZ, + val); + + /* Return standardized err code for sysfs calls */ + if (ret) + ret = -EIO; + } + + if (!ret) + slpc->max_freq_softlimit = val; + + return ret; +} + +/** + * intel_guc_slpc_get_max_freq() - Get max frequency limit for SLPC. + * @slpc: pointer to intel_guc_slpc. + * @val: pointer to val which will hold max frequency (MHz) + * + * This function will invoke GuC SLPC action to read the max frequency + * limit for unslice. + * + * Return: 0 on success, non-zero error code on failure. + */ +int intel_guc_slpc_get_max_freq(struct intel_guc_slpc *slpc, u32 *val) +{ + struct drm_i915_private *i915 = slpc_to_i915(slpc); + intel_wakeref_t wakeref; + int ret = 0; + + with_intel_runtime_pm(&i915->runtime_pm, wakeref) { + /* Force GuC to update task data */ + ret = slpc_query_task_state(slpc); + + if (!ret) + *val = slpc_decode_max_freq(slpc); + } + + return ret; +} + +/** + * intel_guc_slpc_set_min_freq() - Set min frequency limit for SLPC. + * @slpc: pointer to intel_guc_slpc. + * @val: frequency (MHz) + * + * This function will invoke GuC SLPC action to update the min unslice + * frequency. + * + * Return: 0 on success, non-zero error code on failure. + */ +int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val) +{ + struct drm_i915_private *i915 = slpc_to_i915(slpc); + intel_wakeref_t wakeref; + int ret; + + if (val < slpc->min_freq || + val > slpc->rp0_freq || + val > slpc->max_freq_softlimit) + return -EINVAL; + + with_intel_runtime_pm(&i915->runtime_pm, wakeref) { + ret = slpc_set_param(slpc, + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, + val); + + /* Return standardized err code for sysfs calls */ + if (ret) + ret = -EIO; + } + + if (!ret) + slpc->min_freq_softlimit = val; + + return ret; +} + +/** + * intel_guc_slpc_get_min_freq() - Get min frequency limit for SLPC. + * @slpc: pointer to intel_guc_slpc. + * @val: pointer to val which will hold min frequency (MHz) + * + * This function will invoke GuC SLPC action to read the min frequency + * limit for unslice. + * + * Return: 0 on success, non-zero error code on failure. + */ +int intel_guc_slpc_get_min_freq(struct intel_guc_slpc *slpc, u32 *val) +{ + struct drm_i915_private *i915 = slpc_to_i915(slpc); + intel_wakeref_t wakeref; + int ret = 0; + + with_intel_runtime_pm(&i915->runtime_pm, wakeref) { + /* Force GuC to update task data */ + ret = slpc_query_task_state(slpc); + + if (!ret) + *val = slpc_decode_min_freq(slpc); + } + + return ret; +} + +void intel_guc_pm_intrmsk_enable(struct intel_gt *gt) +{ + u32 pm_intrmsk_mbz = 0; + + /* + * Allow GuC to receive ARAT timer expiry event. + * This interrupt register is setup by RPS code + * when host based Turbo is enabled. + */ + pm_intrmsk_mbz |= ARAT_EXPIRED_INTRMSK; + + intel_uncore_rmw(gt->uncore, + GEN6_PMINTRMSK, pm_intrmsk_mbz, 0); +} + +static int slpc_set_softlimits(struct intel_guc_slpc *slpc) +{ + int ret = 0; + + /* + * Softlimits are initially equivalent to platform limits + * unless they have deviated from defaults, in which case, + * we retain the values and set min/max accordingly. + */ + if (!slpc->max_freq_softlimit) + slpc->max_freq_softlimit = slpc->rp0_freq; + else if (slpc->max_freq_softlimit != slpc->rp0_freq) + ret = intel_guc_slpc_set_max_freq(slpc, + slpc->max_freq_softlimit); + + if (unlikely(ret)) + return ret; + + if (!slpc->min_freq_softlimit) + slpc->min_freq_softlimit = slpc->min_freq; + else if (slpc->min_freq_softlimit != slpc->min_freq) + return intel_guc_slpc_set_min_freq(slpc, + slpc->min_freq_softlimit); + + return 0; +} + +static int slpc_ignore_eff_freq(struct intel_guc_slpc *slpc, bool ignore) +{ + int ret = 0; + + if (ignore) { + ret = slpc_set_param(slpc, + SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY, + ignore); + if (!ret) + return slpc_set_param(slpc, + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, + slpc->min_freq); + } else { + ret = slpc_unset_param(slpc, + SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY); + if (!ret) + return slpc_unset_param(slpc, + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ); + } + + return ret; +} + +static int slpc_use_fused_rp0(struct intel_guc_slpc *slpc) +{ + /* Force SLPC to used platform rp0 */ + return slpc_set_param(slpc, + SLPC_PARAM_GLOBAL_MAX_GT_UNSLICE_FREQ_MHZ, + slpc->rp0_freq); +} + +static void slpc_get_rp_values(struct intel_guc_slpc *slpc) +{ + u32 rp_state_cap; + + rp_state_cap = intel_uncore_read(slpc_to_gt(slpc)->uncore, + GEN6_RP_STATE_CAP); + + slpc->rp0_freq = REG_FIELD_GET(RP0_CAP_MASK, rp_state_cap) * + GT_FREQUENCY_MULTIPLIER; + slpc->rp1_freq = REG_FIELD_GET(RP1_CAP_MASK, rp_state_cap) * + GT_FREQUENCY_MULTIPLIER; + slpc->min_freq = REG_FIELD_GET(RPN_CAP_MASK, rp_state_cap) * + GT_FREQUENCY_MULTIPLIER; +} + +/* + * intel_guc_slpc_enable() - Start SLPC + * @slpc: pointer to intel_guc_slpc. + * + * SLPC is enabled by setting up the shared data structure and + * sending reset event to GuC SLPC. Initial data is setup in + * intel_guc_slpc_init. Here we send the reset event. We do + * not currently need a slpc_disable since this is taken care + * of automatically when a reset/suspend occurs and the GuC + * CTB is destroyed. + * + * Return: 0 on success, non-zero error code on failure. + */ +int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) +{ + struct drm_i915_private *i915 = slpc_to_i915(slpc); + int ret; + + GEM_BUG_ON(!slpc->vma); + + slpc_shared_data_reset(slpc->vaddr); + + ret = slpc_reset(slpc); + if (unlikely(ret < 0)) { + drm_err(&i915->drm, "SLPC Reset event returned (%pe)\n", + ERR_PTR(ret)); + return ret; + } + + ret = slpc_query_task_state(slpc); + if (unlikely(ret < 0)) + return ret; + + intel_guc_pm_intrmsk_enable(&i915->gt); + + slpc_get_rp_values(slpc); + + /* Ignore efficient freq and set min to platform min */ + ret = slpc_ignore_eff_freq(slpc, true); + if (unlikely(ret)) { + drm_err(&i915->drm, "Failed to set SLPC min to RPn (%pe)\n", + ERR_PTR(ret)); + return ret; + } + + /* Set SLPC max limit to RP0 */ + ret = slpc_use_fused_rp0(slpc); + if (unlikely(ret)) { + drm_err(&i915->drm, "Failed to set SLPC max to RP0 (%pe)\n", + ERR_PTR(ret)); + return ret; + } + + /* Revert SLPC min/max to softlimits if necessary */ + ret = slpc_set_softlimits(slpc); + if (unlikely(ret)) { + drm_err(&i915->drm, "Failed to set SLPC softlimits (%pe)\n", + ERR_PTR(ret)); + return ret; + } + + return 0; +} + +int intel_guc_slpc_print_info(struct intel_guc_slpc *slpc, struct drm_printer *p) +{ + struct drm_i915_private *i915 = slpc_to_i915(slpc); + struct slpc_shared_data *data = slpc->vaddr; + struct slpc_task_state_data *slpc_tasks; + intel_wakeref_t wakeref; + int ret = 0; + + GEM_BUG_ON(!slpc->vma); + + with_intel_runtime_pm(&i915->runtime_pm, wakeref) { + ret = slpc_query_task_state(slpc); + + if (!ret) { + slpc_tasks = &data->task_state_data; + + drm_printf(p, "\tSLPC state: %s\n", slpc_get_state_string(slpc)); + drm_printf(p, "\tGTPERF task active: %s\n", + yesno(slpc_tasks->status & SLPC_GTPERF_TASK_ENABLED)); + drm_printf(p, "\tMax freq: %u MHz\n", + slpc_decode_max_freq(slpc)); + drm_printf(p, "\tMin freq: %u MHz\n", + slpc_decode_min_freq(slpc)); + } + } + + return ret; +} + +void intel_guc_slpc_fini(struct intel_guc_slpc *slpc) +{ + if (!slpc->vma) + return; + + i915_vma_unpin_and_release(&slpc->vma, I915_VMA_RELEASE_MAP); +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h new file mode 100644 index 000000000000..e45054d5b9b4 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _INTEL_GUC_SLPC_H_ +#define _INTEL_GUC_SLPC_H_ + +#include "intel_guc_submission.h" +#include "intel_guc_slpc_types.h" + +struct intel_gt; +struct drm_printer; + +static inline bool intel_guc_slpc_is_supported(struct intel_guc *guc) +{ + return guc->slpc.supported; +} + +static inline bool intel_guc_slpc_is_wanted(struct intel_guc *guc) +{ + return guc->slpc.selected; +} + +static inline bool intel_guc_slpc_is_used(struct intel_guc *guc) +{ + return intel_guc_submission_is_used(guc) && intel_guc_slpc_is_wanted(guc); +} + +void intel_guc_slpc_init_early(struct intel_guc_slpc *slpc); + +int intel_guc_slpc_init(struct intel_guc_slpc *slpc); +int intel_guc_slpc_enable(struct intel_guc_slpc *slpc); +void intel_guc_slpc_fini(struct intel_guc_slpc *slpc); +int intel_guc_slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 val); +int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val); +int intel_guc_slpc_get_max_freq(struct intel_guc_slpc *slpc, u32 *val); +int intel_guc_slpc_get_min_freq(struct intel_guc_slpc *slpc, u32 *val); +int intel_guc_slpc_print_info(struct intel_guc_slpc *slpc, struct drm_printer *p); +void intel_guc_pm_intrmsk_enable(struct intel_gt *gt); + +#endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h new file mode 100644 index 000000000000..41d13527666f --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _INTEL_GUC_SLPC_TYPES_H_ +#define _INTEL_GUC_SLPC_TYPES_H_ + +#include <linux/types.h> + +#define SLPC_RESET_TIMEOUT_MS 5 + +struct intel_guc_slpc { + struct i915_vma *vma; + struct slpc_shared_data *vaddr; + bool supported; + bool selected; + + /* platform frequency limits */ + u32 min_freq; + u32 rp0_freq; + u32 rp1_freq; + + /* frequency softlimits */ + u32 min_freq_softlimit; + u32 max_freq_softlimit; +}; + +#endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 7c8ff9792f7b..87d8dc8f51b9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -10,10 +10,13 @@ #include "gt/intel_breadcrumbs.h" #include "gt/intel_context.h" #include "gt/intel_engine_pm.h" +#include "gt/intel_engine_heartbeat.h" #include "gt/intel_gt.h" #include "gt/intel_gt_irq.h" #include "gt/intel_gt_pm.h" +#include "gt/intel_gt_requests.h" #include "gt/intel_lrc.h" +#include "gt/intel_lrc_reg.h" #include "gt/intel_mocs.h" #include "gt/intel_ring.h" @@ -58,244 +61,705 @@ * */ +/* GuC Virtual Engine */ +struct guc_virtual_engine { + struct intel_engine_cs base; + struct intel_context context; +}; + +static struct intel_context * +guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count); + #define GUC_REQUEST_SIZE 64 /* bytes */ -static inline struct i915_priolist *to_priolist(struct rb_node *rb) +/* + * Below is a set of functions which control the GuC scheduling state which do + * not require a lock as all state transitions are mutually exclusive. i.e. It + * is not possible for the context pinning code and submission, for the same + * context, to be executing simultaneously. We still need an atomic as it is + * possible for some of the bits to changing at the same time though. + */ +#define SCHED_STATE_NO_LOCK_ENABLED BIT(0) +#define SCHED_STATE_NO_LOCK_PENDING_ENABLE BIT(1) +#define SCHED_STATE_NO_LOCK_REGISTERED BIT(2) +static inline bool context_enabled(struct intel_context *ce) { - return rb_entry(rb, struct i915_priolist, node); + return (atomic_read(&ce->guc_sched_state_no_lock) & + SCHED_STATE_NO_LOCK_ENABLED); } -static struct guc_stage_desc *__get_stage_desc(struct intel_guc *guc, u32 id) +static inline void set_context_enabled(struct intel_context *ce) { - struct guc_stage_desc *base = guc->stage_desc_pool_vaddr; + atomic_or(SCHED_STATE_NO_LOCK_ENABLED, &ce->guc_sched_state_no_lock); +} - return &base[id]; +static inline void clr_context_enabled(struct intel_context *ce) +{ + atomic_and((u32)~SCHED_STATE_NO_LOCK_ENABLED, + &ce->guc_sched_state_no_lock); } -static int guc_stage_desc_pool_create(struct intel_guc *guc) +static inline bool context_pending_enable(struct intel_context *ce) { - u32 size = PAGE_ALIGN(sizeof(struct guc_stage_desc) * - GUC_MAX_STAGE_DESCRIPTORS); + return (atomic_read(&ce->guc_sched_state_no_lock) & + SCHED_STATE_NO_LOCK_PENDING_ENABLE); +} - return intel_guc_allocate_and_map_vma(guc, size, &guc->stage_desc_pool, - &guc->stage_desc_pool_vaddr); +static inline void set_context_pending_enable(struct intel_context *ce) +{ + atomic_or(SCHED_STATE_NO_LOCK_PENDING_ENABLE, + &ce->guc_sched_state_no_lock); +} + +static inline void clr_context_pending_enable(struct intel_context *ce) +{ + atomic_and((u32)~SCHED_STATE_NO_LOCK_PENDING_ENABLE, + &ce->guc_sched_state_no_lock); } -static void guc_stage_desc_pool_destroy(struct intel_guc *guc) +static inline bool context_registered(struct intel_context *ce) { - i915_vma_unpin_and_release(&guc->stage_desc_pool, I915_VMA_RELEASE_MAP); + return (atomic_read(&ce->guc_sched_state_no_lock) & + SCHED_STATE_NO_LOCK_REGISTERED); +} + +static inline void set_context_registered(struct intel_context *ce) +{ + atomic_or(SCHED_STATE_NO_LOCK_REGISTERED, + &ce->guc_sched_state_no_lock); +} + +static inline void clr_context_registered(struct intel_context *ce) +{ + atomic_and((u32)~SCHED_STATE_NO_LOCK_REGISTERED, + &ce->guc_sched_state_no_lock); } /* - * Initialise/clear the stage descriptor shared with the GuC firmware. - * - * This descriptor tells the GuC where (in GGTT space) to find the important - * data structures related to work submission (process descriptor, write queue, - * etc). + * Below is a set of functions which control the GuC scheduling state which + * require a lock, aside from the special case where the functions are called + * from guc_lrc_desc_pin(). In that case it isn't possible for any other code + * path to be executing on the context. */ -static void guc_stage_desc_init(struct intel_guc *guc) +#define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER BIT(0) +#define SCHED_STATE_DESTROYED BIT(1) +#define SCHED_STATE_PENDING_DISABLE BIT(2) +#define SCHED_STATE_BANNED BIT(3) +#define SCHED_STATE_BLOCKED_SHIFT 4 +#define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT) +#define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT) +static inline void init_sched_state(struct intel_context *ce) +{ + /* Only should be called from guc_lrc_desc_pin() */ + atomic_set(&ce->guc_sched_state_no_lock, 0); + ce->guc_state.sched_state = 0; +} + +static inline bool +context_wait_for_deregister_to_register(struct intel_context *ce) +{ + return ce->guc_state.sched_state & + SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; +} + +static inline void +set_context_wait_for_deregister_to_register(struct intel_context *ce) { - struct guc_stage_desc *desc; + /* Only should be called from guc_lrc_desc_pin() without lock */ + ce->guc_state.sched_state |= + SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; +} - /* we only use 1 stage desc, so hardcode it to 0 */ - desc = __get_stage_desc(guc, 0); - memset(desc, 0, sizeof(*desc)); +static inline void +clr_context_wait_for_deregister_to_register(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state &= + ~SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; +} - desc->attribute = GUC_STAGE_DESC_ATTR_ACTIVE | - GUC_STAGE_DESC_ATTR_KERNEL; +static inline bool +context_destroyed(struct intel_context *ce) +{ + return ce->guc_state.sched_state & SCHED_STATE_DESTROYED; +} - desc->stage_id = 0; - desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL; +static inline void +set_context_destroyed(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state |= SCHED_STATE_DESTROYED; +} - desc->wq_size = GUC_WQ_SIZE; +static inline bool context_pending_disable(struct intel_context *ce) +{ + return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE; } -static void guc_stage_desc_fini(struct intel_guc *guc) +static inline void set_context_pending_disable(struct intel_context *ce) { - struct guc_stage_desc *desc; + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state |= SCHED_STATE_PENDING_DISABLE; +} - desc = __get_stage_desc(guc, 0); - memset(desc, 0, sizeof(*desc)); +static inline void clr_context_pending_disable(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_DISABLE; } -static void guc_add_request(struct intel_guc *guc, struct i915_request *rq) +static inline bool context_banned(struct intel_context *ce) { - /* Leaving stub as this function will be used in future patches */ + return ce->guc_state.sched_state & SCHED_STATE_BANNED; } -/* - * When we're doing submissions using regular execlists backend, writing to - * ELSP from CPU side is enough to make sure that writes to ringbuffer pages - * pinned in mappable aperture portion of GGTT are visible to command streamer. - * Writes done by GuC on our behalf are not guaranteeing such ordering, - * therefore, to ensure the flush, we're issuing a POSTING READ. - */ -static void flush_ggtt_writes(struct i915_vma *vma) +static inline void set_context_banned(struct intel_context *ce) { - if (i915_vma_is_map_and_fenceable(vma)) - intel_uncore_posting_read_fw(vma->vm->gt->uncore, - GUC_STATUS); + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state |= SCHED_STATE_BANNED; } -static void guc_submit(struct intel_engine_cs *engine, - struct i915_request **out, - struct i915_request **end) +static inline void clr_context_banned(struct intel_context *ce) { - struct intel_guc *guc = &engine->gt->uc.guc; + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state &= ~SCHED_STATE_BANNED; +} - do { - struct i915_request *rq = *out++; +static inline u32 context_blocked(struct intel_context *ce) +{ + return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >> + SCHED_STATE_BLOCKED_SHIFT; +} - flush_ggtt_writes(rq->ring->vma); - guc_add_request(guc, rq); - } while (out != end); +static inline void incr_context_blocked(struct intel_context *ce) +{ + lockdep_assert_held(&ce->engine->sched_engine->lock); + lockdep_assert_held(&ce->guc_state.lock); + + ce->guc_state.sched_state += SCHED_STATE_BLOCKED; + + GEM_BUG_ON(!context_blocked(ce)); /* Overflow check */ } -static inline int rq_prio(const struct i915_request *rq) +static inline void decr_context_blocked(struct intel_context *ce) { - return rq->sched.attr.priority; + lockdep_assert_held(&ce->engine->sched_engine->lock); + lockdep_assert_held(&ce->guc_state.lock); + + GEM_BUG_ON(!context_blocked(ce)); /* Underflow check */ + + ce->guc_state.sched_state -= SCHED_STATE_BLOCKED; } -static struct i915_request *schedule_in(struct i915_request *rq, int idx) +static inline bool context_guc_id_invalid(struct intel_context *ce) { - trace_i915_request_in(rq, idx); + return ce->guc_id == GUC_INVALID_LRC_ID; +} + +static inline void set_context_guc_id_invalid(struct intel_context *ce) +{ + ce->guc_id = GUC_INVALID_LRC_ID; +} + +static inline struct intel_guc *ce_to_guc(struct intel_context *ce) +{ + return &ce->engine->gt->uc.guc; +} + +static inline struct i915_priolist *to_priolist(struct rb_node *rb) +{ + return rb_entry(rb, struct i915_priolist, node); +} + +static struct guc_lrc_desc *__get_lrc_desc(struct intel_guc *guc, u32 index) +{ + struct guc_lrc_desc *base = guc->lrc_desc_pool_vaddr; + + GEM_BUG_ON(index >= GUC_MAX_LRC_DESCRIPTORS); + + return &base[index]; +} + +static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id) +{ + struct intel_context *ce = xa_load(&guc->context_lookup, id); + + GEM_BUG_ON(id >= GUC_MAX_LRC_DESCRIPTORS); + + return ce; +} + +static int guc_lrc_desc_pool_create(struct intel_guc *guc) +{ + u32 size; + int ret; + + size = PAGE_ALIGN(sizeof(struct guc_lrc_desc) * + GUC_MAX_LRC_DESCRIPTORS); + ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool, + (void **)&guc->lrc_desc_pool_vaddr); + if (ret) + return ret; + + return 0; +} + +static void guc_lrc_desc_pool_destroy(struct intel_guc *guc) +{ + guc->lrc_desc_pool_vaddr = NULL; + i915_vma_unpin_and_release(&guc->lrc_desc_pool, I915_VMA_RELEASE_MAP); +} + +static inline bool guc_submission_initialized(struct intel_guc *guc) +{ + return !!guc->lrc_desc_pool_vaddr; +} + +static inline void reset_lrc_desc(struct intel_guc *guc, u32 id) +{ + if (likely(guc_submission_initialized(guc))) { + struct guc_lrc_desc *desc = __get_lrc_desc(guc, id); + unsigned long flags; + + memset(desc, 0, sizeof(*desc)); + + /* + * xarray API doesn't have xa_erase_irqsave wrapper, so calling + * the lower level functions directly. + */ + xa_lock_irqsave(&guc->context_lookup, flags); + __xa_erase(&guc->context_lookup, id); + xa_unlock_irqrestore(&guc->context_lookup, flags); + } +} + +static inline bool lrc_desc_registered(struct intel_guc *guc, u32 id) +{ + return __get_context(guc, id); +} + +static inline void set_lrc_desc_registered(struct intel_guc *guc, u32 id, + struct intel_context *ce) +{ + unsigned long flags; /* - * Currently we are not tracking the rq->context being inflight - * (ce->inflight = rq->engine). It is only used by the execlists - * backend at the moment, a similar counting strategy would be - * required if we generalise the inflight tracking. + * xarray API doesn't have xa_save_irqsave wrapper, so calling the + * lower level functions directly. */ + xa_lock_irqsave(&guc->context_lookup, flags); + __xa_store(&guc->context_lookup, id, ce, GFP_ATOMIC); + xa_unlock_irqrestore(&guc->context_lookup, flags); +} + +static int guc_submission_send_busy_loop(struct intel_guc *guc, + const u32 *action, + u32 len, + u32 g2h_len_dw, + bool loop) +{ + int err; + + err = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); + + if (!err && g2h_len_dw) + atomic_inc(&guc->outstanding_submission_g2h); + + return err; +} + +int intel_guc_wait_for_pending_msg(struct intel_guc *guc, + atomic_t *wait_var, + bool interruptible, + long timeout) +{ + const int state = interruptible ? + TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; + DEFINE_WAIT(wait); - __intel_gt_pm_get(rq->engine->gt); - return i915_request_get(rq); + might_sleep(); + GEM_BUG_ON(timeout < 0); + + if (!atomic_read(wait_var)) + return 0; + + if (!timeout) + return -ETIME; + + for (;;) { + prepare_to_wait(&guc->ct.wq, &wait, state); + + if (!atomic_read(wait_var)) + break; + + if (signal_pending_state(state, current)) { + timeout = -EINTR; + break; + } + + if (!timeout) { + timeout = -ETIME; + break; + } + + timeout = io_schedule_timeout(timeout); + } + finish_wait(&guc->ct.wq, &wait); + + return (timeout < 0) ? timeout : 0; } -static void schedule_out(struct i915_request *rq) +int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout) { - trace_i915_request_out(rq); + if (!intel_uc_uses_guc_submission(&guc_to_gt(guc)->uc)) + return 0; - intel_gt_pm_put_async(rq->engine->gt); - i915_request_put(rq); + return intel_guc_wait_for_pending_msg(guc, + &guc->outstanding_submission_g2h, + true, timeout); } -static void __guc_dequeue(struct intel_engine_cs *engine) +static int guc_lrc_desc_pin(struct intel_context *ce, bool loop); + +static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) { - struct intel_engine_execlists * const execlists = &engine->execlists; - struct i915_request **first = execlists->inflight; - struct i915_request ** const last_port = first + execlists->port_mask; - struct i915_request *last = first[0]; - struct i915_request **port; - bool submit = false; - struct rb_node *rb; + int err = 0; + struct intel_context *ce = rq->context; + u32 action[3]; + int len = 0; + u32 g2h_len_dw = 0; + bool enabled; - lockdep_assert_held(&engine->active.lock); + /* + * Corner case where requests were sitting in the priority list or a + * request resubmitted after the context was banned. + */ + if (unlikely(intel_context_is_banned(ce))) { + i915_request_put(i915_request_mark_eio(rq)); + intel_engine_signal_breadcrumbs(ce->engine); + goto out; + } - if (last) { - if (*++first) - return; + GEM_BUG_ON(!atomic_read(&ce->guc_id_ref)); + GEM_BUG_ON(context_guc_id_invalid(ce)); - last = NULL; + /* + * Corner case where the GuC firmware was blown away and reloaded while + * this context was pinned. + */ + if (unlikely(!lrc_desc_registered(guc, ce->guc_id))) { + err = guc_lrc_desc_pin(ce, false); + if (unlikely(err)) + goto out; } /* - * We write directly into the execlists->inflight queue and don't use - * the execlists->pending queue, as we don't have a distinct switch - * event. + * The request / context will be run on the hardware when scheduling + * gets enabled in the unblock. */ - port = first; - while ((rb = rb_first_cached(&execlists->queue))) { + if (unlikely(context_blocked(ce))) + goto out; + + enabled = context_enabled(ce); + + if (!enabled) { + action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET; + action[len++] = ce->guc_id; + action[len++] = GUC_CONTEXT_ENABLE; + set_context_pending_enable(ce); + intel_context_get(ce); + g2h_len_dw = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; + } else { + action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT; + action[len++] = ce->guc_id; + } + + err = intel_guc_send_nb(guc, action, len, g2h_len_dw); + if (!enabled && !err) { + trace_intel_context_sched_enable(ce); + atomic_inc(&guc->outstanding_submission_g2h); + set_context_enabled(ce); + } else if (!enabled) { + clr_context_pending_enable(ce); + intel_context_put(ce); + } + if (likely(!err)) + trace_i915_request_guc_submit(rq); + +out: + return err; +} + +static inline void guc_set_lrc_tail(struct i915_request *rq) +{ + rq->context->lrc_reg_state[CTX_RING_TAIL] = + intel_ring_set_tail(rq->ring, rq->tail); +} + +static inline int rq_prio(const struct i915_request *rq) +{ + return rq->sched.attr.priority; +} + +static int guc_dequeue_one_context(struct intel_guc *guc) +{ + struct i915_sched_engine * const sched_engine = guc->sched_engine; + struct i915_request *last = NULL; + bool submit = false; + struct rb_node *rb; + int ret; + + lockdep_assert_held(&sched_engine->lock); + + if (guc->stalled_request) { + submit = true; + last = guc->stalled_request; + goto resubmit; + } + + while ((rb = rb_first_cached(&sched_engine->queue))) { struct i915_priolist *p = to_priolist(rb); struct i915_request *rq, *rn; priolist_for_each_request_consume(rq, rn, p) { - if (last && rq->context != last->context) { - if (port == last_port) - goto done; - - *port = schedule_in(last, - port - execlists->inflight); - port++; - } + if (last && rq->context != last->context) + goto done; list_del_init(&rq->sched.link); + __i915_request_submit(rq); - submit = true; + + trace_i915_request_in(rq, 0); last = rq; + submit = true; } - rb_erase_cached(&p->node, &execlists->queue); + rb_erase_cached(&p->node, &sched_engine->queue); i915_priolist_free(p); } done: - execlists->queue_priority_hint = - rb ? to_priolist(rb)->priority : INT_MIN; if (submit) { - *port = schedule_in(last, port - execlists->inflight); - *++port = NULL; - guc_submit(engine, first, port); + guc_set_lrc_tail(last); +resubmit: + ret = guc_add_request(guc, last); + if (unlikely(ret == -EPIPE)) + goto deadlk; + else if (ret == -EBUSY) { + tasklet_schedule(&sched_engine->tasklet); + guc->stalled_request = last; + return false; + } } - execlists->active = execlists->inflight; + + guc->stalled_request = NULL; + return submit; + +deadlk: + sched_engine->tasklet.callback = NULL; + tasklet_disable_nosync(&sched_engine->tasklet); + return false; } static void guc_submission_tasklet(struct tasklet_struct *t) { - struct intel_engine_cs * const engine = - from_tasklet(engine, t, execlists.tasklet); - struct intel_engine_execlists * const execlists = &engine->execlists; - struct i915_request **port, *rq; + struct i915_sched_engine *sched_engine = + from_tasklet(sched_engine, t, tasklet); unsigned long flags; + bool loop; - spin_lock_irqsave(&engine->active.lock, flags); - - for (port = execlists->inflight; (rq = *port); port++) { - if (!i915_request_completed(rq)) - break; + spin_lock_irqsave(&sched_engine->lock, flags); - schedule_out(rq); - } - if (port != execlists->inflight) { - int idx = port - execlists->inflight; - int rem = ARRAY_SIZE(execlists->inflight) - idx; - memmove(execlists->inflight, port, rem * sizeof(*port)); - } + do { + loop = guc_dequeue_one_context(sched_engine->private_data); + } while (loop); - __guc_dequeue(engine); + i915_sched_engine_reset_on_empty(sched_engine); - spin_unlock_irqrestore(&engine->active.lock, flags); + spin_unlock_irqrestore(&sched_engine->lock, flags); } static void cs_irq_handler(struct intel_engine_cs *engine, u16 iir) { - if (iir & GT_RENDER_USER_INTERRUPT) { + if (iir & GT_RENDER_USER_INTERRUPT) intel_engine_signal_breadcrumbs(engine); - tasklet_hi_schedule(&engine->execlists.tasklet); +} + +static void __guc_context_destroy(struct intel_context *ce); +static void release_guc_id(struct intel_guc *guc, struct intel_context *ce); +static void guc_signal_context_fence(struct intel_context *ce); +static void guc_cancel_context_requests(struct intel_context *ce); +static void guc_blocked_fence_complete(struct intel_context *ce); + +static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) +{ + struct intel_context *ce; + unsigned long index, flags; + bool pending_disable, pending_enable, deregister, destroyed, banned; + + xa_for_each(&guc->context_lookup, index, ce) { + /* Flush context */ + spin_lock_irqsave(&ce->guc_state.lock, flags); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + + /* + * Once we are at this point submission_disabled() is guaranteed + * to be visible to all callers who set the below flags (see above + * flush and flushes in reset_prepare). If submission_disabled() + * is set, the caller shouldn't set these flags. + */ + + destroyed = context_destroyed(ce); + pending_enable = context_pending_enable(ce); + pending_disable = context_pending_disable(ce); + deregister = context_wait_for_deregister_to_register(ce); + banned = context_banned(ce); + init_sched_state(ce); + + if (pending_enable || destroyed || deregister) { + atomic_dec(&guc->outstanding_submission_g2h); + if (deregister) + guc_signal_context_fence(ce); + if (destroyed) { + release_guc_id(guc, ce); + __guc_context_destroy(ce); + } + if (pending_enable || deregister) + intel_context_put(ce); + } + + /* Not mutualy exclusive with above if statement. */ + if (pending_disable) { + guc_signal_context_fence(ce); + if (banned) { + guc_cancel_context_requests(ce); + intel_engine_signal_breadcrumbs(ce->engine); + } + intel_context_sched_disable_unpin(ce); + atomic_dec(&guc->outstanding_submission_g2h); + spin_lock_irqsave(&ce->guc_state.lock, flags); + guc_blocked_fence_complete(ce); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + + intel_context_put(ce); + } } } -static void guc_reset_prepare(struct intel_engine_cs *engine) +static inline bool +submission_disabled(struct intel_guc *guc) +{ + struct i915_sched_engine * const sched_engine = guc->sched_engine; + + return unlikely(!sched_engine || + !__tasklet_is_enabled(&sched_engine->tasklet)); +} + +static void disable_submission(struct intel_guc *guc) { - struct intel_engine_execlists * const execlists = &engine->execlists; + struct i915_sched_engine * const sched_engine = guc->sched_engine; + + if (__tasklet_is_enabled(&sched_engine->tasklet)) { + GEM_BUG_ON(!guc->ct.enabled); + __tasklet_disable_sync_once(&sched_engine->tasklet); + sched_engine->tasklet.callback = NULL; + } +} + +static void enable_submission(struct intel_guc *guc) +{ + struct i915_sched_engine * const sched_engine = guc->sched_engine; + unsigned long flags; - ENGINE_TRACE(engine, "\n"); + spin_lock_irqsave(&guc->sched_engine->lock, flags); + sched_engine->tasklet.callback = guc_submission_tasklet; + wmb(); /* Make sure callback visible */ + if (!__tasklet_is_enabled(&sched_engine->tasklet) && + __tasklet_enable(&sched_engine->tasklet)) { + GEM_BUG_ON(!guc->ct.enabled); + + /* And kick in case we missed a new request submission. */ + tasklet_hi_schedule(&sched_engine->tasklet); + } + spin_unlock_irqrestore(&guc->sched_engine->lock, flags); +} + +static void guc_flush_submissions(struct intel_guc *guc) +{ + struct i915_sched_engine * const sched_engine = guc->sched_engine; + unsigned long flags; + + spin_lock_irqsave(&sched_engine->lock, flags); + spin_unlock_irqrestore(&sched_engine->lock, flags); +} + +void intel_guc_submission_reset_prepare(struct intel_guc *guc) +{ + int i; + + if (unlikely(!guc_submission_initialized(guc))) { + /* Reset called during driver load? GuC not yet initialised! */ + return; + } + + intel_gt_park_heartbeats(guc_to_gt(guc)); + disable_submission(guc); + guc->interrupts.disable(guc); + + /* Flush IRQ handler */ + spin_lock_irq(&guc_to_gt(guc)->irq_lock); + spin_unlock_irq(&guc_to_gt(guc)->irq_lock); + + guc_flush_submissions(guc); /* - * Prevent request submission to the hardware until we have - * completed the reset in i915_gem_reset_finish(). If a request - * is completed by one engine, it may then queue a request - * to a second via its execlists->tasklet *just* as we are - * calling engine->init_hw() and also writing the ELSP. - * Turning off the execlists->tasklet until the reset is over - * prevents the race. + * Handle any outstanding G2Hs before reset. Call IRQ handler directly + * each pass as interrupt have been disabled. We always scrub for + * outstanding G2H as it is possible for outstanding_submission_g2h to + * be incremented after the context state update. */ - __tasklet_disable_sync_once(&execlists->tasklet); + for (i = 0; i < 4 && atomic_read(&guc->outstanding_submission_g2h); ++i) { + intel_guc_to_host_event_handler(guc); +#define wait_for_reset(guc, wait_var) \ + intel_guc_wait_for_pending_msg(guc, wait_var, false, (HZ / 20)) + do { + wait_for_reset(guc, &guc->outstanding_submission_g2h); + } while (!list_empty(&guc->ct.requests.incoming)); + } + scrub_guc_desc_for_outstanding_g2h(guc); +} + +static struct intel_engine_cs * +guc_virtual_get_sibling(struct intel_engine_cs *ve, unsigned int sibling) +{ + struct intel_engine_cs *engine; + intel_engine_mask_t tmp, mask = ve->mask; + unsigned int num_siblings = 0; + + for_each_engine_masked(engine, ve->gt, mask, tmp) + if (num_siblings++ == sibling) + return engine; + + return NULL; +} + +static inline struct intel_engine_cs * +__context_to_physical_engine(struct intel_context *ce) +{ + struct intel_engine_cs *engine = ce->engine; + + if (intel_engine_is_virtual(engine)) + engine = guc_virtual_get_sibling(engine, 0); + + return engine; } -static void guc_reset_state(struct intel_context *ce, - struct intel_engine_cs *engine, - u32 head, - bool scrub) +static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub) { + struct intel_engine_cs *engine = __context_to_physical_engine(ce); + + if (intel_context_is_banned(ce)) + return; + GEM_BUG_ON(!intel_context_is_pinned(ce)); /* @@ -313,37 +777,132 @@ static void guc_reset_state(struct intel_context *ce, lrc_update_regs(ce, engine, head); } -static void guc_reset_rewind(struct intel_engine_cs *engine, bool stalled) +static void guc_reset_nop(struct intel_engine_cs *engine) { - struct intel_engine_execlists * const execlists = &engine->execlists; - struct i915_request *rq; +} + +static void guc_rewind_nop(struct intel_engine_cs *engine, bool stalled) +{ +} + +static void +__unwind_incomplete_requests(struct intel_context *ce) +{ + struct i915_request *rq, *rn; + struct list_head *pl; + int prio = I915_PRIORITY_INVALID; + struct i915_sched_engine * const sched_engine = + ce->engine->sched_engine; unsigned long flags; - spin_lock_irqsave(&engine->active.lock, flags); + spin_lock_irqsave(&sched_engine->lock, flags); + spin_lock(&ce->guc_active.lock); + list_for_each_entry_safe(rq, rn, + &ce->guc_active.requests, + sched.link) { + if (i915_request_completed(rq)) + continue; - /* Push back any incomplete requests for replay after the reset. */ - rq = execlists_unwind_incomplete_requests(execlists); - if (!rq) - goto out_unlock; + list_del_init(&rq->sched.link); + spin_unlock(&ce->guc_active.lock); + + __i915_request_unsubmit(rq); + + /* Push the request back into the queue for later resubmission. */ + GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); + if (rq_prio(rq) != prio) { + prio = rq_prio(rq); + pl = i915_sched_lookup_priolist(sched_engine, prio); + } + GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine)); + + list_add_tail(&rq->sched.link, pl); + set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + + spin_lock(&ce->guc_active.lock); + } + spin_unlock(&ce->guc_active.lock); + spin_unlock_irqrestore(&sched_engine->lock, flags); +} + +static void __guc_reset_context(struct intel_context *ce, bool stalled) +{ + struct i915_request *rq; + u32 head; + + intel_context_get(ce); + + /* + * GuC will implicitly mark the context as non-schedulable + * when it sends the reset notification. Make sure our state + * reflects this change. The context will be marked enabled + * on resubmission. + */ + clr_context_enabled(ce); + + rq = intel_context_find_active_request(ce); + if (!rq) { + head = ce->ring->tail; + stalled = false; + goto out_replay; + } if (!i915_request_started(rq)) stalled = false; + GEM_BUG_ON(i915_active_is_idle(&ce->active)); + head = intel_ring_wrap(ce->ring, rq->head); __i915_request_reset(rq, stalled); - guc_reset_state(rq->context, engine, rq->head, stalled); -out_unlock: - spin_unlock_irqrestore(&engine->active.lock, flags); +out_replay: + guc_reset_state(ce, head, stalled); + __unwind_incomplete_requests(ce); + intel_context_put(ce); +} + +void intel_guc_submission_reset(struct intel_guc *guc, bool stalled) +{ + struct intel_context *ce; + unsigned long index; + + if (unlikely(!guc_submission_initialized(guc))) { + /* Reset called during driver load? GuC not yet initialised! */ + return; + } + + xa_for_each(&guc->context_lookup, index, ce) + if (intel_context_is_pinned(ce)) + __guc_reset_context(ce, stalled); + + /* GuC is blown away, drop all references to contexts */ + xa_destroy(&guc->context_lookup); } -static void guc_reset_cancel(struct intel_engine_cs *engine) +static void guc_cancel_context_requests(struct intel_context *ce) +{ + struct i915_sched_engine *sched_engine = ce_to_guc(ce)->sched_engine; + struct i915_request *rq; + unsigned long flags; + + /* Mark all executing requests as skipped. */ + spin_lock_irqsave(&sched_engine->lock, flags); + spin_lock(&ce->guc_active.lock); + list_for_each_entry(rq, &ce->guc_active.requests, sched.link) + i915_request_put(i915_request_mark_eio(rq)); + spin_unlock(&ce->guc_active.lock); + spin_unlock_irqrestore(&sched_engine->lock, flags); +} + +static void +guc_cancel_sched_engine_requests(struct i915_sched_engine *sched_engine) { - struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_request *rq, *rn; struct rb_node *rb; unsigned long flags; - ENGINE_TRACE(engine, "\n"); + /* Can be called during boot if GuC fails to load */ + if (!sched_engine) + return; /* * Before we call engine->cancel_requests(), we should have exclusive @@ -359,47 +918,67 @@ static void guc_reset_cancel(struct intel_engine_cs *engine) * submission's irq state, we also wish to remind ourselves that * it is irq state.) */ - spin_lock_irqsave(&engine->active.lock, flags); - - /* Mark all executing requests as skipped. */ - list_for_each_entry(rq, &engine->active.requests, sched.link) { - i915_request_set_error_once(rq, -EIO); - i915_request_mark_complete(rq); - } + spin_lock_irqsave(&sched_engine->lock, flags); /* Flush the queued requests to the timeline list (for retiring). */ - while ((rb = rb_first_cached(&execlists->queue))) { + while ((rb = rb_first_cached(&sched_engine->queue))) { struct i915_priolist *p = to_priolist(rb); priolist_for_each_request_consume(rq, rn, p) { list_del_init(&rq->sched.link); + __i915_request_submit(rq); - dma_fence_set_error(&rq->fence, -EIO); - i915_request_mark_complete(rq); + + i915_request_put(i915_request_mark_eio(rq)); } - rb_erase_cached(&p->node, &execlists->queue); + rb_erase_cached(&p->node, &sched_engine->queue); i915_priolist_free(p); } /* Remaining _unready_ requests will be nop'ed when submitted */ - execlists->queue_priority_hint = INT_MIN; - execlists->queue = RB_ROOT_CACHED; + sched_engine->queue_priority_hint = INT_MIN; + sched_engine->queue = RB_ROOT_CACHED; - spin_unlock_irqrestore(&engine->active.lock, flags); + spin_unlock_irqrestore(&sched_engine->lock, flags); } -static void guc_reset_finish(struct intel_engine_cs *engine) +void intel_guc_submission_cancel_requests(struct intel_guc *guc) { - struct intel_engine_execlists * const execlists = &engine->execlists; + struct intel_context *ce; + unsigned long index; - if (__tasklet_enable(&execlists->tasklet)) - /* And kick in case we missed a new request submission. */ - tasklet_hi_schedule(&execlists->tasklet); + xa_for_each(&guc->context_lookup, index, ce) + if (intel_context_is_pinned(ce)) + guc_cancel_context_requests(ce); + + guc_cancel_sched_engine_requests(guc->sched_engine); - ENGINE_TRACE(engine, "depth->%d\n", - atomic_read(&execlists->tasklet.count)); + /* GuC is blown away, drop all references to contexts */ + xa_destroy(&guc->context_lookup); +} + +void intel_guc_submission_reset_finish(struct intel_guc *guc) +{ + /* Reset called during driver load or during wedge? */ + if (unlikely(!guc_submission_initialized(guc) || + test_bit(I915_WEDGED, &guc_to_gt(guc)->reset.flags))) { + return; + } + + /* + * Technically possible for either of these values to be non-zero here, + * but very unlikely + harmless. Regardless let's add a warn so we can + * see in CI if this happens frequently / a precursor to taking down the + * machine. + */ + GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h)); + atomic_set(&guc->outstanding_submission_g2h, 0); + + intel_guc_global_policies_update(guc); + enable_submission(guc); + intel_gt_unpark_heartbeats(guc_to_gt(guc)); } /* @@ -410,43 +989,986 @@ int intel_guc_submission_init(struct intel_guc *guc) { int ret; - if (guc->stage_desc_pool) + if (guc->lrc_desc_pool) return 0; - ret = guc_stage_desc_pool_create(guc); + ret = guc_lrc_desc_pool_create(guc); if (ret) return ret; /* * Keep static analysers happy, let them know that we allocated the * vma after testing that it didn't exist earlier. */ - GEM_BUG_ON(!guc->stage_desc_pool); + GEM_BUG_ON(!guc->lrc_desc_pool); + + xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ); + + spin_lock_init(&guc->contexts_lock); + INIT_LIST_HEAD(&guc->guc_id_list); + ida_init(&guc->guc_ids); return 0; } void intel_guc_submission_fini(struct intel_guc *guc) { - if (guc->stage_desc_pool) { - guc_stage_desc_pool_destroy(guc); + if (!guc->lrc_desc_pool) + return; + + guc_lrc_desc_pool_destroy(guc); + i915_sched_engine_put(guc->sched_engine); +} + +static inline void queue_request(struct i915_sched_engine *sched_engine, + struct i915_request *rq, + int prio) +{ + GEM_BUG_ON(!list_empty(&rq->sched.link)); + list_add_tail(&rq->sched.link, + i915_sched_lookup_priolist(sched_engine, prio)); + set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); +} + +static int guc_bypass_tasklet_submit(struct intel_guc *guc, + struct i915_request *rq) +{ + int ret; + + __i915_request_submit(rq); + + trace_i915_request_in(rq, 0); + + guc_set_lrc_tail(rq); + ret = guc_add_request(guc, rq); + if (ret == -EBUSY) + guc->stalled_request = rq; + + if (unlikely(ret == -EPIPE)) + disable_submission(guc); + + return ret; +} + +static void guc_submit_request(struct i915_request *rq) +{ + struct i915_sched_engine *sched_engine = rq->engine->sched_engine; + struct intel_guc *guc = &rq->engine->gt->uc.guc; + unsigned long flags; + + /* Will be called from irq-context when using foreign fences. */ + spin_lock_irqsave(&sched_engine->lock, flags); + + if (submission_disabled(guc) || guc->stalled_request || + !i915_sched_engine_is_empty(sched_engine)) + queue_request(sched_engine, rq, rq_prio(rq)); + else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY) + tasklet_hi_schedule(&sched_engine->tasklet); + + spin_unlock_irqrestore(&sched_engine->lock, flags); +} + +static int new_guc_id(struct intel_guc *guc) +{ + return ida_simple_get(&guc->guc_ids, 0, + GUC_MAX_LRC_DESCRIPTORS, GFP_KERNEL | + __GFP_RETRY_MAYFAIL | __GFP_NOWARN); +} + +static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce) +{ + if (!context_guc_id_invalid(ce)) { + ida_simple_remove(&guc->guc_ids, ce->guc_id); + reset_lrc_desc(guc, ce->guc_id); + set_context_guc_id_invalid(ce); } + if (!list_empty(&ce->guc_id_link)) + list_del_init(&ce->guc_id_link); } -static int guc_context_alloc(struct intel_context *ce) +static void release_guc_id(struct intel_guc *guc, struct intel_context *ce) { - return lrc_alloc(ce, ce->engine); + unsigned long flags; + + spin_lock_irqsave(&guc->contexts_lock, flags); + __release_guc_id(guc, ce); + spin_unlock_irqrestore(&guc->contexts_lock, flags); +} + +static int steal_guc_id(struct intel_guc *guc) +{ + struct intel_context *ce; + int guc_id; + + lockdep_assert_held(&guc->contexts_lock); + + if (!list_empty(&guc->guc_id_list)) { + ce = list_first_entry(&guc->guc_id_list, + struct intel_context, + guc_id_link); + + GEM_BUG_ON(atomic_read(&ce->guc_id_ref)); + GEM_BUG_ON(context_guc_id_invalid(ce)); + + list_del_init(&ce->guc_id_link); + guc_id = ce->guc_id; + clr_context_registered(ce); + set_context_guc_id_invalid(ce); + return guc_id; + } else { + return -EAGAIN; + } +} + +static int assign_guc_id(struct intel_guc *guc, u16 *out) +{ + int ret; + + lockdep_assert_held(&guc->contexts_lock); + + ret = new_guc_id(guc); + if (unlikely(ret < 0)) { + ret = steal_guc_id(guc); + if (ret < 0) + return ret; + } + + *out = ret; + return 0; +} + +#define PIN_GUC_ID_TRIES 4 +static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce) +{ + int ret = 0; + unsigned long flags, tries = PIN_GUC_ID_TRIES; + + GEM_BUG_ON(atomic_read(&ce->guc_id_ref)); + +try_again: + spin_lock_irqsave(&guc->contexts_lock, flags); + + if (context_guc_id_invalid(ce)) { + ret = assign_guc_id(guc, &ce->guc_id); + if (ret) + goto out_unlock; + ret = 1; /* Indidcates newly assigned guc_id */ + } + if (!list_empty(&ce->guc_id_link)) + list_del_init(&ce->guc_id_link); + atomic_inc(&ce->guc_id_ref); + +out_unlock: + spin_unlock_irqrestore(&guc->contexts_lock, flags); + + /* + * -EAGAIN indicates no guc_ids are available, let's retire any + * outstanding requests to see if that frees up a guc_id. If the first + * retire didn't help, insert a sleep with the timeslice duration before + * attempting to retire more requests. Double the sleep period each + * subsequent pass before finally giving up. The sleep period has max of + * 100ms and minimum of 1ms. + */ + if (ret == -EAGAIN && --tries) { + if (PIN_GUC_ID_TRIES - tries > 1) { + unsigned int timeslice_shifted = + ce->engine->props.timeslice_duration_ms << + (PIN_GUC_ID_TRIES - tries - 2); + unsigned int max = min_t(unsigned int, 100, + timeslice_shifted); + + msleep(max_t(unsigned int, max, 1)); + } + intel_gt_retire_requests(guc_to_gt(guc)); + goto try_again; + } + + return ret; +} + +static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce) +{ + unsigned long flags; + + GEM_BUG_ON(atomic_read(&ce->guc_id_ref) < 0); + + if (unlikely(context_guc_id_invalid(ce))) + return; + + spin_lock_irqsave(&guc->contexts_lock, flags); + if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id_link) && + !atomic_read(&ce->guc_id_ref)) + list_add_tail(&ce->guc_id_link, &guc->guc_id_list); + spin_unlock_irqrestore(&guc->contexts_lock, flags); +} + +static int __guc_action_register_context(struct intel_guc *guc, + u32 guc_id, + u32 offset, + bool loop) +{ + u32 action[] = { + INTEL_GUC_ACTION_REGISTER_CONTEXT, + guc_id, + offset, + }; + + return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), + 0, loop); +} + +static int register_context(struct intel_context *ce, bool loop) +{ + struct intel_guc *guc = ce_to_guc(ce); + u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool) + + ce->guc_id * sizeof(struct guc_lrc_desc); + int ret; + + trace_intel_context_register(ce); + + ret = __guc_action_register_context(guc, ce->guc_id, offset, loop); + if (likely(!ret)) + set_context_registered(ce); + + return ret; +} + +static int __guc_action_deregister_context(struct intel_guc *guc, + u32 guc_id, + bool loop) +{ + u32 action[] = { + INTEL_GUC_ACTION_DEREGISTER_CONTEXT, + guc_id, + }; + + return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), + G2H_LEN_DW_DEREGISTER_CONTEXT, + loop); +} + +static int deregister_context(struct intel_context *ce, u32 guc_id, bool loop) +{ + struct intel_guc *guc = ce_to_guc(ce); + + trace_intel_context_deregister(ce); + + return __guc_action_deregister_context(guc, guc_id, loop); +} + +static intel_engine_mask_t adjust_engine_mask(u8 class, intel_engine_mask_t mask) +{ + switch (class) { + case RENDER_CLASS: + return mask >> RCS0; + case VIDEO_ENHANCEMENT_CLASS: + return mask >> VECS0; + case VIDEO_DECODE_CLASS: + return mask >> VCS0; + case COPY_ENGINE_CLASS: + return mask >> BCS0; + default: + MISSING_CASE(class); + return 0; + } +} + +static void guc_context_policy_init(struct intel_engine_cs *engine, + struct guc_lrc_desc *desc) +{ + desc->policy_flags = 0; + + if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) + desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE; + + /* NB: For both of these, zero means disabled. */ + desc->execution_quantum = engine->props.timeslice_duration_ms * 1000; + desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000; +} + +static inline u8 map_i915_prio_to_guc_prio(int prio); + +static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) +{ + struct intel_engine_cs *engine = ce->engine; + struct intel_runtime_pm *runtime_pm = engine->uncore->rpm; + struct intel_guc *guc = &engine->gt->uc.guc; + u32 desc_idx = ce->guc_id; + struct guc_lrc_desc *desc; + const struct i915_gem_context *ctx; + int prio = I915_CONTEXT_DEFAULT_PRIORITY; + bool context_registered; + intel_wakeref_t wakeref; + int ret = 0; + + GEM_BUG_ON(!engine->mask); + + /* + * Ensure LRC + CT vmas are is same region as write barrier is done + * based on CT vma region. + */ + GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) != + i915_gem_object_is_lmem(ce->ring->vma->obj)); + + context_registered = lrc_desc_registered(guc, desc_idx); + + rcu_read_lock(); + ctx = rcu_dereference(ce->gem_context); + if (ctx) + prio = ctx->sched.priority; + rcu_read_unlock(); + + reset_lrc_desc(guc, desc_idx); + set_lrc_desc_registered(guc, desc_idx, ce); + + desc = __get_lrc_desc(guc, desc_idx); + desc->engine_class = engine_class_to_guc_class(engine->class); + desc->engine_submit_mask = adjust_engine_mask(engine->class, + engine->mask); + desc->hw_context_desc = ce->lrc.lrca; + ce->guc_prio = map_i915_prio_to_guc_prio(prio); + desc->priority = ce->guc_prio; + desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; + guc_context_policy_init(engine, desc); + init_sched_state(ce); + + /* + * The context_lookup xarray is used to determine if the hardware + * context is currently registered. There are two cases in which it + * could be registered either the guc_id has been stolen from another + * context or the lrc descriptor address of this context has changed. In + * either case the context needs to be deregistered with the GuC before + * registering this context. + */ + if (context_registered) { + trace_intel_context_steal_guc_id(ce); + if (!loop) { + set_context_wait_for_deregister_to_register(ce); + intel_context_get(ce); + } else { + bool disabled; + unsigned long flags; + + /* Seal race with Reset */ + spin_lock_irqsave(&ce->guc_state.lock, flags); + disabled = submission_disabled(guc); + if (likely(!disabled)) { + set_context_wait_for_deregister_to_register(ce); + intel_context_get(ce); + } + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + if (unlikely(disabled)) { + reset_lrc_desc(guc, desc_idx); + return 0; /* Will get registered later */ + } + } + + /* + * If stealing the guc_id, this ce has the same guc_id as the + * context whose guc_id was stolen. + */ + with_intel_runtime_pm(runtime_pm, wakeref) + ret = deregister_context(ce, ce->guc_id, loop); + if (unlikely(ret == -EBUSY)) { + clr_context_wait_for_deregister_to_register(ce); + intel_context_put(ce); + } else if (unlikely(ret == -ENODEV)) { + ret = 0; /* Will get registered later */ + } + } else { + with_intel_runtime_pm(runtime_pm, wakeref) + ret = register_context(ce, loop); + if (unlikely(ret == -EBUSY)) + reset_lrc_desc(guc, desc_idx); + else if (unlikely(ret == -ENODEV)) + ret = 0; /* Will get registered later */ + } + + return ret; +} + +static int __guc_context_pre_pin(struct intel_context *ce, + struct intel_engine_cs *engine, + struct i915_gem_ww_ctx *ww, + void **vaddr) +{ + return lrc_pre_pin(ce, engine, ww, vaddr); +} + +static int __guc_context_pin(struct intel_context *ce, + struct intel_engine_cs *engine, + void *vaddr) +{ + if (i915_ggtt_offset(ce->state) != + (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK)) + set_bit(CONTEXT_LRCA_DIRTY, &ce->flags); + + /* + * GuC context gets pinned in guc_request_alloc. See that function for + * explaination of why. + */ + + return lrc_pin(ce, engine, vaddr); } static int guc_context_pre_pin(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **vaddr) { - return lrc_pre_pin(ce, ce->engine, ww, vaddr); + return __guc_context_pre_pin(ce, ce->engine, ww, vaddr); } static int guc_context_pin(struct intel_context *ce, void *vaddr) { - return lrc_pin(ce, ce->engine, vaddr); + return __guc_context_pin(ce, ce->engine, vaddr); +} + +static void guc_context_unpin(struct intel_context *ce) +{ + struct intel_guc *guc = ce_to_guc(ce); + + unpin_guc_id(guc, ce); + lrc_unpin(ce); +} + +static void guc_context_post_unpin(struct intel_context *ce) +{ + lrc_post_unpin(ce); +} + +static void __guc_context_sched_enable(struct intel_guc *guc, + struct intel_context *ce) +{ + u32 action[] = { + INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, + ce->guc_id, + GUC_CONTEXT_ENABLE + }; + + trace_intel_context_sched_enable(ce); + + guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), + G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); +} + +static void __guc_context_sched_disable(struct intel_guc *guc, + struct intel_context *ce, + u16 guc_id) +{ + u32 action[] = { + INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, + guc_id, /* ce->guc_id not stable */ + GUC_CONTEXT_DISABLE + }; + + GEM_BUG_ON(guc_id == GUC_INVALID_LRC_ID); + + trace_intel_context_sched_disable(ce); + + guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), + G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); +} + +static void guc_blocked_fence_complete(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + + if (!i915_sw_fence_done(&ce->guc_blocked)) + i915_sw_fence_complete(&ce->guc_blocked); +} + +static void guc_blocked_fence_reinit(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_blocked)); + + /* + * This fence is always complete unless a pending schedule disable is + * outstanding. We arm the fence here and complete it when we receive + * the pending schedule disable complete message. + */ + i915_sw_fence_fini(&ce->guc_blocked); + i915_sw_fence_reinit(&ce->guc_blocked); + i915_sw_fence_await(&ce->guc_blocked); + i915_sw_fence_commit(&ce->guc_blocked); +} + +static u16 prep_context_pending_disable(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + + set_context_pending_disable(ce); + clr_context_enabled(ce); + guc_blocked_fence_reinit(ce); + intel_context_get(ce); + + return ce->guc_id; +} + +static struct i915_sw_fence *guc_context_block(struct intel_context *ce) +{ + struct intel_guc *guc = ce_to_guc(ce); + struct i915_sched_engine *sched_engine = ce->engine->sched_engine; + unsigned long flags; + struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; + intel_wakeref_t wakeref; + u16 guc_id; + bool enabled; + + spin_lock_irqsave(&ce->guc_state.lock, flags); + + /* + * Sync with submission path, increment before below changes to context + * state. + */ + spin_lock(&sched_engine->lock); + incr_context_blocked(ce); + spin_unlock(&sched_engine->lock); + + enabled = context_enabled(ce); + if (unlikely(!enabled || submission_disabled(guc))) { + if (enabled) + clr_context_enabled(ce); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + return &ce->guc_blocked; + } + + /* + * We add +2 here as the schedule disable complete CTB handler calls + * intel_context_sched_disable_unpin (-2 to pin_count). + */ + atomic_add(2, &ce->pin_count); + + guc_id = prep_context_pending_disable(ce); + + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + + with_intel_runtime_pm(runtime_pm, wakeref) + __guc_context_sched_disable(guc, ce, guc_id); + + return &ce->guc_blocked; +} + +static void guc_context_unblock(struct intel_context *ce) +{ + struct intel_guc *guc = ce_to_guc(ce); + struct i915_sched_engine *sched_engine = ce->engine->sched_engine; + unsigned long flags; + struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; + intel_wakeref_t wakeref; + bool enable; + + GEM_BUG_ON(context_enabled(ce)); + + spin_lock_irqsave(&ce->guc_state.lock, flags); + + if (unlikely(submission_disabled(guc) || + !intel_context_is_pinned(ce) || + context_pending_disable(ce) || + context_blocked(ce) > 1)) { + enable = false; + } else { + enable = true; + set_context_pending_enable(ce); + set_context_enabled(ce); + intel_context_get(ce); + } + + /* + * Sync with submission path, decrement after above changes to context + * state. + */ + spin_lock(&sched_engine->lock); + decr_context_blocked(ce); + spin_unlock(&sched_engine->lock); + + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + + if (enable) { + with_intel_runtime_pm(runtime_pm, wakeref) + __guc_context_sched_enable(guc, ce); + } +} + +static void guc_context_cancel_request(struct intel_context *ce, + struct i915_request *rq) +{ + if (i915_sw_fence_signaled(&rq->submit)) { + struct i915_sw_fence *fence = guc_context_block(ce); + + i915_sw_fence_wait(fence); + if (!i915_request_completed(rq)) { + __i915_request_skip(rq); + guc_reset_state(ce, intel_ring_wrap(ce->ring, rq->head), + true); + } + guc_context_unblock(ce); + } +} + +static void __guc_context_set_preemption_timeout(struct intel_guc *guc, + u16 guc_id, + u32 preemption_timeout) +{ + u32 action[] = { + INTEL_GUC_ACTION_SET_CONTEXT_PREEMPTION_TIMEOUT, + guc_id, + preemption_timeout + }; + + intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); +} + +static void guc_context_ban(struct intel_context *ce, struct i915_request *rq) +{ + struct intel_guc *guc = ce_to_guc(ce); + struct intel_runtime_pm *runtime_pm = + &ce->engine->gt->i915->runtime_pm; + intel_wakeref_t wakeref; + unsigned long flags; + + guc_flush_submissions(guc); + + spin_lock_irqsave(&ce->guc_state.lock, flags); + set_context_banned(ce); + + if (submission_disabled(guc) || + (!context_enabled(ce) && !context_pending_disable(ce))) { + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + + guc_cancel_context_requests(ce); + intel_engine_signal_breadcrumbs(ce->engine); + } else if (!context_pending_disable(ce)) { + u16 guc_id; + + /* + * We add +2 here as the schedule disable complete CTB handler + * calls intel_context_sched_disable_unpin (-2 to pin_count). + */ + atomic_add(2, &ce->pin_count); + + guc_id = prep_context_pending_disable(ce); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + + /* + * In addition to disabling scheduling, set the preemption + * timeout to the minimum value (1 us) so the banned context + * gets kicked off the HW ASAP. + */ + with_intel_runtime_pm(runtime_pm, wakeref) { + __guc_context_set_preemption_timeout(guc, guc_id, 1); + __guc_context_sched_disable(guc, ce, guc_id); + } + } else { + if (!context_guc_id_invalid(ce)) + with_intel_runtime_pm(runtime_pm, wakeref) + __guc_context_set_preemption_timeout(guc, + ce->guc_id, + 1); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + } +} + +static void guc_context_sched_disable(struct intel_context *ce) +{ + struct intel_guc *guc = ce_to_guc(ce); + unsigned long flags; + struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm; + intel_wakeref_t wakeref; + u16 guc_id; + bool enabled; + + if (submission_disabled(guc) || context_guc_id_invalid(ce) || + !lrc_desc_registered(guc, ce->guc_id)) { + clr_context_enabled(ce); + goto unpin; + } + + if (!context_enabled(ce)) + goto unpin; + + spin_lock_irqsave(&ce->guc_state.lock, flags); + + /* + * We have to check if the context has been disabled by another thread. + * We also have to check if the context has been pinned again as another + * pin operation is allowed to pass this function. Checking the pin + * count, within ce->guc_state.lock, synchronizes this function with + * guc_request_alloc ensuring a request doesn't slip through the + * 'context_pending_disable' fence. Checking within the spin lock (can't + * sleep) ensures another process doesn't pin this context and generate + * a request before we set the 'context_pending_disable' flag here. + */ + enabled = context_enabled(ce); + if (unlikely(!enabled || submission_disabled(guc))) { + if (enabled) + clr_context_enabled(ce); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + goto unpin; + } + if (unlikely(atomic_add_unless(&ce->pin_count, -2, 2))) { + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + return; + } + guc_id = prep_context_pending_disable(ce); + + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + + with_intel_runtime_pm(runtime_pm, wakeref) + __guc_context_sched_disable(guc, ce, guc_id); + + return; +unpin: + intel_context_sched_disable_unpin(ce); +} + +static inline void guc_lrc_desc_unpin(struct intel_context *ce) +{ + struct intel_guc *guc = ce_to_guc(ce); + + GEM_BUG_ON(!lrc_desc_registered(guc, ce->guc_id)); + GEM_BUG_ON(ce != __get_context(guc, ce->guc_id)); + GEM_BUG_ON(context_enabled(ce)); + + clr_context_registered(ce); + deregister_context(ce, ce->guc_id, true); +} + +static void __guc_context_destroy(struct intel_context *ce) +{ + GEM_BUG_ON(ce->guc_prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] || + ce->guc_prio_count[GUC_CLIENT_PRIORITY_HIGH] || + ce->guc_prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] || + ce->guc_prio_count[GUC_CLIENT_PRIORITY_NORMAL]); + + lrc_fini(ce); + intel_context_fini(ce); + + if (intel_engine_is_virtual(ce->engine)) { + struct guc_virtual_engine *ve = + container_of(ce, typeof(*ve), context); + + if (ve->base.breadcrumbs) + intel_breadcrumbs_put(ve->base.breadcrumbs); + + kfree(ve); + } else { + intel_context_free(ce); + } +} + +static void guc_context_destroy(struct kref *kref) +{ + struct intel_context *ce = container_of(kref, typeof(*ce), ref); + struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; + struct intel_guc *guc = ce_to_guc(ce); + intel_wakeref_t wakeref; + unsigned long flags; + bool disabled; + + /* + * If the guc_id is invalid this context has been stolen and we can free + * it immediately. Also can be freed immediately if the context is not + * registered with the GuC or the GuC is in the middle of a reset. + */ + if (context_guc_id_invalid(ce)) { + __guc_context_destroy(ce); + return; + } else if (submission_disabled(guc) || + !lrc_desc_registered(guc, ce->guc_id)) { + release_guc_id(guc, ce); + __guc_context_destroy(ce); + return; + } + + /* + * We have to acquire the context spinlock and check guc_id again, if it + * is valid it hasn't been stolen and needs to be deregistered. We + * delete this context from the list of unpinned guc_ids available to + * steal to seal a race with guc_lrc_desc_pin(). When the G2H CTB + * returns indicating this context has been deregistered the guc_id is + * returned to the pool of available guc_ids. + */ + spin_lock_irqsave(&guc->contexts_lock, flags); + if (context_guc_id_invalid(ce)) { + spin_unlock_irqrestore(&guc->contexts_lock, flags); + __guc_context_destroy(ce); + return; + } + + if (!list_empty(&ce->guc_id_link)) + list_del_init(&ce->guc_id_link); + spin_unlock_irqrestore(&guc->contexts_lock, flags); + + /* Seal race with Reset */ + spin_lock_irqsave(&ce->guc_state.lock, flags); + disabled = submission_disabled(guc); + if (likely(!disabled)) + set_context_destroyed(ce); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + if (unlikely(disabled)) { + release_guc_id(guc, ce); + __guc_context_destroy(ce); + return; + } + + /* + * We defer GuC context deregistration until the context is destroyed + * in order to save on CTBs. With this optimization ideally we only need + * 1 CTB to register the context during the first pin and 1 CTB to + * deregister the context when the context is destroyed. Without this + * optimization, a CTB would be needed every pin & unpin. + * + * XXX: Need to acqiure the runtime wakeref as this can be triggered + * from context_free_worker when runtime wakeref is not held. + * guc_lrc_desc_unpin requires the runtime as a GuC register is written + * in H2G CTB to deregister the context. A future patch may defer this + * H2G CTB if the runtime wakeref is zero. + */ + with_intel_runtime_pm(runtime_pm, wakeref) + guc_lrc_desc_unpin(ce); +} + +static int guc_context_alloc(struct intel_context *ce) +{ + return lrc_alloc(ce, ce->engine); +} + +static void guc_context_set_prio(struct intel_guc *guc, + struct intel_context *ce, + u8 prio) +{ + u32 action[] = { + INTEL_GUC_ACTION_SET_CONTEXT_PRIORITY, + ce->guc_id, + prio, + }; + + GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH || + prio > GUC_CLIENT_PRIORITY_NORMAL); + + if (ce->guc_prio == prio || submission_disabled(guc) || + !context_registered(ce)) + return; + + guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); + + ce->guc_prio = prio; + trace_intel_context_set_prio(ce); +} + +static inline u8 map_i915_prio_to_guc_prio(int prio) +{ + if (prio == I915_PRIORITY_NORMAL) + return GUC_CLIENT_PRIORITY_KMD_NORMAL; + else if (prio < I915_PRIORITY_NORMAL) + return GUC_CLIENT_PRIORITY_NORMAL; + else if (prio < I915_PRIORITY_DISPLAY) + return GUC_CLIENT_PRIORITY_HIGH; + else + return GUC_CLIENT_PRIORITY_KMD_HIGH; +} + +static inline void add_context_inflight_prio(struct intel_context *ce, + u8 guc_prio) +{ + lockdep_assert_held(&ce->guc_active.lock); + GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_prio_count)); + + ++ce->guc_prio_count[guc_prio]; + + /* Overflow protection */ + GEM_WARN_ON(!ce->guc_prio_count[guc_prio]); +} + +static inline void sub_context_inflight_prio(struct intel_context *ce, + u8 guc_prio) +{ + lockdep_assert_held(&ce->guc_active.lock); + GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_prio_count)); + + /* Underflow protection */ + GEM_WARN_ON(!ce->guc_prio_count[guc_prio]); + + --ce->guc_prio_count[guc_prio]; +} + +static inline void update_context_prio(struct intel_context *ce) +{ + struct intel_guc *guc = &ce->engine->gt->uc.guc; + int i; + + BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH != 0); + BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH > GUC_CLIENT_PRIORITY_NORMAL); + + lockdep_assert_held(&ce->guc_active.lock); + + for (i = 0; i < ARRAY_SIZE(ce->guc_prio_count); ++i) { + if (ce->guc_prio_count[i]) { + guc_context_set_prio(guc, ce, i); + break; + } + } +} + +static inline bool new_guc_prio_higher(u8 old_guc_prio, u8 new_guc_prio) +{ + /* Lower value is higher priority */ + return new_guc_prio < old_guc_prio; +} + +static void add_to_context(struct i915_request *rq) +{ + struct intel_context *ce = rq->context; + u8 new_guc_prio = map_i915_prio_to_guc_prio(rq_prio(rq)); + + GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI); + + spin_lock(&ce->guc_active.lock); + list_move_tail(&rq->sched.link, &ce->guc_active.requests); + + if (rq->guc_prio == GUC_PRIO_INIT) { + rq->guc_prio = new_guc_prio; + add_context_inflight_prio(ce, rq->guc_prio); + } else if (new_guc_prio_higher(rq->guc_prio, new_guc_prio)) { + sub_context_inflight_prio(ce, rq->guc_prio); + rq->guc_prio = new_guc_prio; + add_context_inflight_prio(ce, rq->guc_prio); + } + update_context_prio(ce); + + spin_unlock(&ce->guc_active.lock); +} + +static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_active.lock); + + if (rq->guc_prio != GUC_PRIO_INIT && + rq->guc_prio != GUC_PRIO_FINI) { + sub_context_inflight_prio(ce, rq->guc_prio); + update_context_prio(ce); + } + rq->guc_prio = GUC_PRIO_FINI; +} + +static void remove_from_context(struct i915_request *rq) +{ + struct intel_context *ce = rq->context; + + spin_lock_irq(&ce->guc_active.lock); + + list_del_init(&rq->sched.link); + clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + + /* Prevent further __await_execution() registering a cb, then flush */ + set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); + + guc_prio_fini(rq, ce); + + spin_unlock_irq(&ce->guc_active.lock); + + atomic_dec(&ce->guc_id_ref); + i915_request_notify_execute_cb_imm(rq); } static const struct intel_context_ops guc_context_ops = { @@ -454,28 +1976,71 @@ static const struct intel_context_ops guc_context_ops = { .pre_pin = guc_context_pre_pin, .pin = guc_context_pin, - .unpin = lrc_unpin, - .post_unpin = lrc_post_unpin, + .unpin = guc_context_unpin, + .post_unpin = guc_context_post_unpin, + + .ban = guc_context_ban, + + .cancel_request = guc_context_cancel_request, .enter = intel_context_enter_engine, .exit = intel_context_exit_engine, + .sched_disable = guc_context_sched_disable, + .reset = lrc_reset, - .destroy = lrc_destroy, + .destroy = guc_context_destroy, + + .create_virtual = guc_create_virtual, }; -static int guc_request_alloc(struct i915_request *request) +static void __guc_signal_context_fence(struct intel_context *ce) { + struct i915_request *rq; + + lockdep_assert_held(&ce->guc_state.lock); + + if (!list_empty(&ce->guc_state.fences)) + trace_intel_context_fence_release(ce); + + list_for_each_entry(rq, &ce->guc_state.fences, guc_fence_link) + i915_sw_fence_complete(&rq->submit); + + INIT_LIST_HEAD(&ce->guc_state.fences); +} + +static void guc_signal_context_fence(struct intel_context *ce) +{ + unsigned long flags; + + spin_lock_irqsave(&ce->guc_state.lock, flags); + clr_context_wait_for_deregister_to_register(ce); + __guc_signal_context_fence(ce); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); +} + +static bool context_needs_register(struct intel_context *ce, bool new_guc_id) +{ + return (new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) || + !lrc_desc_registered(ce_to_guc(ce), ce->guc_id)) && + !submission_disabled(ce_to_guc(ce)); +} + +static int guc_request_alloc(struct i915_request *rq) +{ + struct intel_context *ce = rq->context; + struct intel_guc *guc = ce_to_guc(ce); + unsigned long flags; int ret; - GEM_BUG_ON(!intel_context_is_pinned(request->context)); + GEM_BUG_ON(!intel_context_is_pinned(rq->context)); /* * Flush enough space to reduce the likelihood of waiting after * we start building the request - in which case we will just * have to repeat work. */ - request->reserved_space += GUC_REQUEST_SIZE; + rq->reserved_space += GUC_REQUEST_SIZE; /* * Note that after this point, we have committed to using @@ -486,40 +2051,232 @@ static int guc_request_alloc(struct i915_request *request) */ /* Unconditionally invalidate GPU caches and TLBs. */ - ret = request->engine->emit_flush(request, EMIT_INVALIDATE); + ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE); if (ret) return ret; - request->reserved_space -= GUC_REQUEST_SIZE; + rq->reserved_space -= GUC_REQUEST_SIZE; + + /* + * Call pin_guc_id here rather than in the pinning step as with + * dma_resv, contexts can be repeatedly pinned / unpinned trashing the + * guc_ids and creating horrible race conditions. This is especially bad + * when guc_ids are being stolen due to over subscription. By the time + * this function is reached, it is guaranteed that the guc_id will be + * persistent until the generated request is retired. Thus, sealing these + * race conditions. It is still safe to fail here if guc_ids are + * exhausted and return -EAGAIN to the user indicating that they can try + * again in the future. + * + * There is no need for a lock here as the timeline mutex ensures at + * most one context can be executing this code path at once. The + * guc_id_ref is incremented once for every request in flight and + * decremented on each retire. When it is zero, a lock around the + * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id. + */ + if (atomic_add_unless(&ce->guc_id_ref, 1, 0)) + goto out; + + ret = pin_guc_id(guc, ce); /* returns 1 if new guc_id assigned */ + if (unlikely(ret < 0)) + return ret; + if (context_needs_register(ce, !!ret)) { + ret = guc_lrc_desc_pin(ce, true); + if (unlikely(ret)) { /* unwind */ + if (ret == -EPIPE) { + disable_submission(guc); + goto out; /* GPU will be reset */ + } + atomic_dec(&ce->guc_id_ref); + unpin_guc_id(guc, ce); + return ret; + } + } + + clear_bit(CONTEXT_LRCA_DIRTY, &ce->flags); + +out: + /* + * We block all requests on this context if a G2H is pending for a + * schedule disable or context deregistration as the GuC will fail a + * schedule enable or context registration if either G2H is pending + * respectfully. Once a G2H returns, the fence is released that is + * blocking these requests (see guc_signal_context_fence). + * + * We can safely check the below fields outside of the lock as it isn't + * possible for these fields to transition from being clear to set but + * converse is possible, hence the need for the check within the lock. + */ + if (likely(!context_wait_for_deregister_to_register(ce) && + !context_pending_disable(ce))) + return 0; + + spin_lock_irqsave(&ce->guc_state.lock, flags); + if (context_wait_for_deregister_to_register(ce) || + context_pending_disable(ce)) { + i915_sw_fence_await(&rq->submit); + + list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences); + } + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + return 0; } -static inline void queue_request(struct intel_engine_cs *engine, - struct i915_request *rq, - int prio) +static int guc_virtual_context_pre_pin(struct intel_context *ce, + struct i915_gem_ww_ctx *ww, + void **vaddr) { - GEM_BUG_ON(!list_empty(&rq->sched.link)); - list_add_tail(&rq->sched.link, - i915_sched_lookup_priolist(engine, prio)); - set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); + + return __guc_context_pre_pin(ce, engine, ww, vaddr); } -static void guc_submit_request(struct i915_request *rq) +static int guc_virtual_context_pin(struct intel_context *ce, void *vaddr) { - struct intel_engine_cs *engine = rq->engine; - unsigned long flags; + struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); - /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->active.lock, flags); + return __guc_context_pin(ce, engine, vaddr); +} - queue_request(engine, rq, rq_prio(rq)); +static void guc_virtual_context_enter(struct intel_context *ce) +{ + intel_engine_mask_t tmp, mask = ce->engine->mask; + struct intel_engine_cs *engine; - GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); - GEM_BUG_ON(list_empty(&rq->sched.link)); + for_each_engine_masked(engine, ce->engine->gt, mask, tmp) + intel_engine_pm_get(engine); - tasklet_hi_schedule(&engine->execlists.tasklet); + intel_timeline_enter(ce->timeline); +} + +static void guc_virtual_context_exit(struct intel_context *ce) +{ + intel_engine_mask_t tmp, mask = ce->engine->mask; + struct intel_engine_cs *engine; - spin_unlock_irqrestore(&engine->active.lock, flags); + for_each_engine_masked(engine, ce->engine->gt, mask, tmp) + intel_engine_pm_put(engine); + + intel_timeline_exit(ce->timeline); +} + +static int guc_virtual_context_alloc(struct intel_context *ce) +{ + struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); + + return lrc_alloc(ce, engine); +} + +static const struct intel_context_ops virtual_guc_context_ops = { + .alloc = guc_virtual_context_alloc, + + .pre_pin = guc_virtual_context_pre_pin, + .pin = guc_virtual_context_pin, + .unpin = guc_context_unpin, + .post_unpin = guc_context_post_unpin, + + .ban = guc_context_ban, + + .cancel_request = guc_context_cancel_request, + + .enter = guc_virtual_context_enter, + .exit = guc_virtual_context_exit, + + .sched_disable = guc_context_sched_disable, + + .destroy = guc_context_destroy, + + .get_sibling = guc_virtual_get_sibling, +}; + +static bool +guc_irq_enable_breadcrumbs(struct intel_breadcrumbs *b) +{ + struct intel_engine_cs *sibling; + intel_engine_mask_t tmp, mask = b->engine_mask; + bool result = false; + + for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp) + result |= intel_engine_irq_enable(sibling); + + return result; +} + +static void +guc_irq_disable_breadcrumbs(struct intel_breadcrumbs *b) +{ + struct intel_engine_cs *sibling; + intel_engine_mask_t tmp, mask = b->engine_mask; + + for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp) + intel_engine_irq_disable(sibling); +} + +static void guc_init_breadcrumbs(struct intel_engine_cs *engine) +{ + int i; + + /* + * In GuC submission mode we do not know which physical engine a request + * will be scheduled on, this creates a problem because the breadcrumb + * interrupt is per physical engine. To work around this we attach + * requests and direct all breadcrumb interrupts to the first instance + * of an engine per class. In addition all breadcrumb interrupts are + * enabled / disabled across an engine class in unison. + */ + for (i = 0; i < MAX_ENGINE_INSTANCE; ++i) { + struct intel_engine_cs *sibling = + engine->gt->engine_class[engine->class][i]; + + if (sibling) { + if (engine->breadcrumbs != sibling->breadcrumbs) { + intel_breadcrumbs_put(engine->breadcrumbs); + engine->breadcrumbs = + intel_breadcrumbs_get(sibling->breadcrumbs); + } + break; + } + } + + if (engine->breadcrumbs) { + engine->breadcrumbs->engine_mask |= engine->mask; + engine->breadcrumbs->irq_enable = guc_irq_enable_breadcrumbs; + engine->breadcrumbs->irq_disable = guc_irq_disable_breadcrumbs; + } +} + +static void guc_bump_inflight_request_prio(struct i915_request *rq, + int prio) +{ + struct intel_context *ce = rq->context; + u8 new_guc_prio = map_i915_prio_to_guc_prio(prio); + + /* Short circuit function */ + if (prio < I915_PRIORITY_NORMAL || + rq->guc_prio == GUC_PRIO_FINI || + (rq->guc_prio != GUC_PRIO_INIT && + !new_guc_prio_higher(rq->guc_prio, new_guc_prio))) + return; + + spin_lock(&ce->guc_active.lock); + if (rq->guc_prio != GUC_PRIO_FINI) { + if (rq->guc_prio != GUC_PRIO_INIT) + sub_context_inflight_prio(ce, rq->guc_prio); + rq->guc_prio = new_guc_prio; + add_context_inflight_prio(ce, rq->guc_prio); + update_context_prio(ce); + } + spin_unlock(&ce->guc_active.lock); +} + +static void guc_retire_inflight_request_prio(struct i915_request *rq) +{ + struct intel_context *ce = rq->context; + + spin_lock(&ce->guc_active.lock); + guc_prio_fini(rq, ce); + spin_unlock(&ce->guc_active.lock); } static void sanitize_hwsp(struct intel_engine_cs *engine) @@ -588,21 +2345,68 @@ static int guc_resume(struct intel_engine_cs *engine) return 0; } +static bool guc_sched_engine_disabled(struct i915_sched_engine *sched_engine) +{ + return !sched_engine->tasklet.callback; +} + static void guc_set_default_submission(struct intel_engine_cs *engine) { engine->submit_request = guc_submit_request; } +static inline void guc_kernel_context_pin(struct intel_guc *guc, + struct intel_context *ce) +{ + if (context_guc_id_invalid(ce)) + pin_guc_id(guc, ce); + guc_lrc_desc_pin(ce, true); +} + +static inline void guc_init_lrc_mapping(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* make sure all descriptors are clean... */ + xa_destroy(&guc->context_lookup); + + /* + * Some contexts might have been pinned before we enabled GuC + * submission, so we need to add them to the GuC bookeeping. + * Also, after a reset the of the GuC we want to make sure that the + * information shared with GuC is properly reset. The kernel LRCs are + * not attached to the gem_context, so they need to be added separately. + * + * Note: we purposefully do not check the return of guc_lrc_desc_pin, + * because that function can only fail if a reset is just starting. This + * is at the end of reset so presumably another reset isn't happening + * and even it did this code would be run again. + */ + + for_each_engine(engine, gt, id) + if (engine->kernel_context) + guc_kernel_context_pin(guc, engine->kernel_context); +} + static void guc_release(struct intel_engine_cs *engine) { engine->sanitize = NULL; /* no longer in control, nothing to sanitize */ - tasklet_kill(&engine->execlists.tasklet); - intel_engine_cleanup_common(engine); lrc_fini_wa_ctx(engine); } +static void virtual_guc_bump_serial(struct intel_engine_cs *engine) +{ + struct intel_engine_cs *e; + intel_engine_mask_t tmp, mask = engine->mask; + + for_each_engine_masked(e, engine->gt, mask, tmp) + e->serial++; +} + static void guc_default_vfuncs(struct intel_engine_cs *engine) { /* Default vfuncs which can be overridden by each engine. */ @@ -611,13 +2415,15 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine) engine->cops = &guc_context_ops; engine->request_alloc = guc_request_alloc; + engine->add_active_request = add_to_context; + engine->remove_active_request = remove_from_context; - engine->schedule = i915_schedule; + engine->sched_engine->schedule = i915_schedule; - engine->reset.prepare = guc_reset_prepare; - engine->reset.rewind = guc_reset_rewind; - engine->reset.cancel = guc_reset_cancel; - engine->reset.finish = guc_reset_finish; + engine->reset.prepare = guc_reset_nop; + engine->reset.rewind = guc_rewind_nop; + engine->reset.cancel = guc_reset_nop; + engine->reset.finish = guc_reset_nop; engine->emit_flush = gen8_emit_flush_xcs; engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; @@ -629,13 +2435,13 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine) engine->set_default_submission = guc_set_default_submission; engine->flags |= I915_ENGINE_HAS_PREEMPTION; + engine->flags |= I915_ENGINE_HAS_TIMESLICES; /* * TODO: GuC supports timeslicing and semaphores as well, but they're * handled by the firmware so some minor tweaks are required before * enabling. * - * engine->flags |= I915_ENGINE_HAS_TIMESLICES; * engine->flags |= I915_ENGINE_HAS_SEMAPHORES; */ @@ -666,9 +2472,21 @@ static inline void guc_default_irqs(struct intel_engine_cs *engine) intel_engine_set_irq_handler(engine, cs_irq_handler); } +static void guc_sched_engine_destroy(struct kref *kref) +{ + struct i915_sched_engine *sched_engine = + container_of(kref, typeof(*sched_engine), ref); + struct intel_guc *guc = sched_engine->private_data; + + guc->sched_engine = NULL; + tasklet_kill(&sched_engine->tasklet); /* flush the callback */ + kfree(sched_engine); +} + int intel_guc_submission_setup(struct intel_engine_cs *engine) { struct drm_i915_private *i915 = engine->i915; + struct intel_guc *guc = &engine->gt->uc.guc; /* * The setup relies on several assumptions (e.g. irqs always enabled) @@ -676,10 +2494,28 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine) */ GEM_BUG_ON(GRAPHICS_VER(i915) < 11); - tasklet_setup(&engine->execlists.tasklet, guc_submission_tasklet); + if (!guc->sched_engine) { + guc->sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL); + if (!guc->sched_engine) + return -ENOMEM; + + guc->sched_engine->schedule = i915_schedule; + guc->sched_engine->disabled = guc_sched_engine_disabled; + guc->sched_engine->private_data = guc; + guc->sched_engine->destroy = guc_sched_engine_destroy; + guc->sched_engine->bump_inflight_request_prio = + guc_bump_inflight_request_prio; + guc->sched_engine->retire_inflight_request_prio = + guc_retire_inflight_request_prio; + tasklet_setup(&guc->sched_engine->tasklet, + guc_submission_tasklet); + } + i915_sched_engine_put(engine->sched_engine); + engine->sched_engine = i915_sched_engine_get(guc->sched_engine); guc_default_vfuncs(engine); guc_default_irqs(engine); + guc_init_breadcrumbs(engine); if (engine->class == RENDER_CLASS) rcs_submission_override(engine); @@ -695,18 +2531,19 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine) void intel_guc_submission_enable(struct intel_guc *guc) { - guc_stage_desc_init(guc); + guc_init_lrc_mapping(guc); } void intel_guc_submission_disable(struct intel_guc *guc) { - struct intel_gt *gt = guc_to_gt(guc); - - GEM_BUG_ON(gt->awake); /* GT should be parked first */ - /* Note: By the time we're here, GuC may have already been reset */ +} - guc_stage_desc_fini(guc); +static bool __guc_submission_supported(struct intel_guc *guc) +{ + /* GuC submission is unavailable for pre-Gen11 */ + return intel_guc_is_supported(guc) && + GRAPHICS_VER(guc_to_gt(guc)->i915) >= 11; } static bool __guc_submission_selected(struct intel_guc *guc) @@ -721,5 +2558,481 @@ static bool __guc_submission_selected(struct intel_guc *guc) void intel_guc_submission_init_early(struct intel_guc *guc) { + guc->submission_supported = __guc_submission_supported(guc); guc->submission_selected = __guc_submission_selected(guc); } + +static inline struct intel_context * +g2h_context_lookup(struct intel_guc *guc, u32 desc_idx) +{ + struct intel_context *ce; + + if (unlikely(desc_idx >= GUC_MAX_LRC_DESCRIPTORS)) { + drm_err(&guc_to_gt(guc)->i915->drm, + "Invalid desc_idx %u", desc_idx); + return NULL; + } + + ce = __get_context(guc, desc_idx); + if (unlikely(!ce)) { + drm_err(&guc_to_gt(guc)->i915->drm, + "Context is NULL, desc_idx %u", desc_idx); + return NULL; + } + + return ce; +} + +static void decr_outstanding_submission_g2h(struct intel_guc *guc) +{ + if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) + wake_up_all(&guc->ct.wq); +} + +int intel_guc_deregister_done_process_msg(struct intel_guc *guc, + const u32 *msg, + u32 len) +{ + struct intel_context *ce; + u32 desc_idx = msg[0]; + + if (unlikely(len < 1)) { + drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); + return -EPROTO; + } + + ce = g2h_context_lookup(guc, desc_idx); + if (unlikely(!ce)) + return -EPROTO; + + trace_intel_context_deregister_done(ce); + + if (context_wait_for_deregister_to_register(ce)) { + struct intel_runtime_pm *runtime_pm = + &ce->engine->gt->i915->runtime_pm; + intel_wakeref_t wakeref; + + /* + * Previous owner of this guc_id has been deregistered, now safe + * register this context. + */ + with_intel_runtime_pm(runtime_pm, wakeref) + register_context(ce, true); + guc_signal_context_fence(ce); + intel_context_put(ce); + } else if (context_destroyed(ce)) { + /* Context has been destroyed */ + release_guc_id(guc, ce); + __guc_context_destroy(ce); + } + + decr_outstanding_submission_g2h(guc); + + return 0; +} + +int intel_guc_sched_done_process_msg(struct intel_guc *guc, + const u32 *msg, + u32 len) +{ + struct intel_context *ce; + unsigned long flags; + u32 desc_idx = msg[0]; + + if (unlikely(len < 2)) { + drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); + return -EPROTO; + } + + ce = g2h_context_lookup(guc, desc_idx); + if (unlikely(!ce)) + return -EPROTO; + + if (unlikely(context_destroyed(ce) || + (!context_pending_enable(ce) && + !context_pending_disable(ce)))) { + drm_err(&guc_to_gt(guc)->i915->drm, + "Bad context sched_state 0x%x, 0x%x, desc_idx %u", + atomic_read(&ce->guc_sched_state_no_lock), + ce->guc_state.sched_state, desc_idx); + return -EPROTO; + } + + trace_intel_context_sched_done(ce); + + if (context_pending_enable(ce)) { + clr_context_pending_enable(ce); + } else if (context_pending_disable(ce)) { + bool banned; + + /* + * Unpin must be done before __guc_signal_context_fence, + * otherwise a race exists between the requests getting + * submitted + retired before this unpin completes resulting in + * the pin_count going to zero and the context still being + * enabled. + */ + intel_context_sched_disable_unpin(ce); + + spin_lock_irqsave(&ce->guc_state.lock, flags); + banned = context_banned(ce); + clr_context_banned(ce); + clr_context_pending_disable(ce); + __guc_signal_context_fence(ce); + guc_blocked_fence_complete(ce); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + + if (banned) { + guc_cancel_context_requests(ce); + intel_engine_signal_breadcrumbs(ce->engine); + } + } + + decr_outstanding_submission_g2h(guc); + intel_context_put(ce); + + return 0; +} + +static void capture_error_state(struct intel_guc *guc, + struct intel_context *ce) +{ + struct intel_gt *gt = guc_to_gt(guc); + struct drm_i915_private *i915 = gt->i915; + struct intel_engine_cs *engine = __context_to_physical_engine(ce); + intel_wakeref_t wakeref; + + intel_engine_set_hung_context(engine, ce); + with_intel_runtime_pm(&i915->runtime_pm, wakeref) + i915_capture_error_state(gt, engine->mask); + atomic_inc(&i915->gpu_error.reset_engine_count[engine->uabi_class]); +} + +static void guc_context_replay(struct intel_context *ce) +{ + struct i915_sched_engine *sched_engine = ce->engine->sched_engine; + + __guc_reset_context(ce, true); + tasklet_hi_schedule(&sched_engine->tasklet); +} + +static void guc_handle_context_reset(struct intel_guc *guc, + struct intel_context *ce) +{ + trace_intel_context_reset(ce); + + if (likely(!intel_context_is_banned(ce))) { + capture_error_state(guc, ce); + guc_context_replay(ce); + } +} + +int intel_guc_context_reset_process_msg(struct intel_guc *guc, + const u32 *msg, u32 len) +{ + struct intel_context *ce; + int desc_idx; + + if (unlikely(len != 1)) { + drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); + return -EPROTO; + } + + desc_idx = msg[0]; + ce = g2h_context_lookup(guc, desc_idx); + if (unlikely(!ce)) + return -EPROTO; + + guc_handle_context_reset(guc, ce); + + return 0; +} + +static struct intel_engine_cs * +guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance) +{ + struct intel_gt *gt = guc_to_gt(guc); + u8 engine_class = guc_class_to_engine_class(guc_class); + + /* Class index is checked in class converter */ + GEM_BUG_ON(instance > MAX_ENGINE_INSTANCE); + + return gt->engine_class[engine_class][instance]; +} + +int intel_guc_engine_failure_process_msg(struct intel_guc *guc, + const u32 *msg, u32 len) +{ + struct intel_engine_cs *engine; + u8 guc_class, instance; + u32 reason; + + if (unlikely(len != 3)) { + drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); + return -EPROTO; + } + + guc_class = msg[0]; + instance = msg[1]; + reason = msg[2]; + + engine = guc_lookup_engine(guc, guc_class, instance); + if (unlikely(!engine)) { + drm_err(&guc_to_gt(guc)->i915->drm, + "Invalid engine %d:%d", guc_class, instance); + return -EPROTO; + } + + intel_gt_handle_error(guc_to_gt(guc), engine->mask, + I915_ERROR_CAPTURE, + "GuC failed to reset %s (reason=0x%08x)\n", + engine->name, reason); + + return 0; +} + +void intel_guc_find_hung_context(struct intel_engine_cs *engine) +{ + struct intel_guc *guc = &engine->gt->uc.guc; + struct intel_context *ce; + struct i915_request *rq; + unsigned long index; + + /* Reset called during driver load? GuC not yet initialised! */ + if (unlikely(!guc_submission_initialized(guc))) + return; + + xa_for_each(&guc->context_lookup, index, ce) { + if (!intel_context_is_pinned(ce)) + continue; + + if (intel_engine_is_virtual(ce->engine)) { + if (!(ce->engine->mask & engine->mask)) + continue; + } else { + if (ce->engine != engine) + continue; + } + + list_for_each_entry(rq, &ce->guc_active.requests, sched.link) { + if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE) + continue; + + intel_engine_set_hung_context(engine, ce); + + /* Can only cope with one hang at a time... */ + return; + } + } +} + +void intel_guc_dump_active_requests(struct intel_engine_cs *engine, + struct i915_request *hung_rq, + struct drm_printer *m) +{ + struct intel_guc *guc = &engine->gt->uc.guc; + struct intel_context *ce; + unsigned long index; + unsigned long flags; + + /* Reset called during driver load? GuC not yet initialised! */ + if (unlikely(!guc_submission_initialized(guc))) + return; + + xa_for_each(&guc->context_lookup, index, ce) { + if (!intel_context_is_pinned(ce)) + continue; + + if (intel_engine_is_virtual(ce->engine)) { + if (!(ce->engine->mask & engine->mask)) + continue; + } else { + if (ce->engine != engine) + continue; + } + + spin_lock_irqsave(&ce->guc_active.lock, flags); + intel_engine_dump_active_requests(&ce->guc_active.requests, + hung_rq, m); + spin_unlock_irqrestore(&ce->guc_active.lock, flags); + } +} + +void intel_guc_submission_print_info(struct intel_guc *guc, + struct drm_printer *p) +{ + struct i915_sched_engine *sched_engine = guc->sched_engine; + struct rb_node *rb; + unsigned long flags; + + if (!sched_engine) + return; + + drm_printf(p, "GuC Number Outstanding Submission G2H: %u\n", + atomic_read(&guc->outstanding_submission_g2h)); + drm_printf(p, "GuC tasklet count: %u\n\n", + atomic_read(&sched_engine->tasklet.count)); + + spin_lock_irqsave(&sched_engine->lock, flags); + drm_printf(p, "Requests in GuC submit tasklet:\n"); + for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) { + struct i915_priolist *pl = to_priolist(rb); + struct i915_request *rq; + + priolist_for_each_request(rq, pl) + drm_printf(p, "guc_id=%u, seqno=%llu\n", + rq->context->guc_id, + rq->fence.seqno); + } + spin_unlock_irqrestore(&sched_engine->lock, flags); + drm_printf(p, "\n"); +} + +static inline void guc_log_context_priority(struct drm_printer *p, + struct intel_context *ce) +{ + int i; + + drm_printf(p, "\t\tPriority: %d\n", + ce->guc_prio); + drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n"); + for (i = GUC_CLIENT_PRIORITY_KMD_HIGH; + i < GUC_CLIENT_PRIORITY_NUM; ++i) { + drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n", + i, ce->guc_prio_count[i]); + } + drm_printf(p, "\n"); +} + +void intel_guc_submission_print_context_info(struct intel_guc *guc, + struct drm_printer *p) +{ + struct intel_context *ce; + unsigned long index; + + xa_for_each(&guc->context_lookup, index, ce) { + drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id); + drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca); + drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", + ce->ring->head, + ce->lrc_reg_state[CTX_RING_HEAD]); + drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n", + ce->ring->tail, + ce->lrc_reg_state[CTX_RING_TAIL]); + drm_printf(p, "\t\tContext Pin Count: %u\n", + atomic_read(&ce->pin_count)); + drm_printf(p, "\t\tGuC ID Ref Count: %u\n", + atomic_read(&ce->guc_id_ref)); + drm_printf(p, "\t\tSchedule State: 0x%x, 0x%x\n\n", + ce->guc_state.sched_state, + atomic_read(&ce->guc_sched_state_no_lock)); + + guc_log_context_priority(p, ce); + } +} + +static struct intel_context * +guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count) +{ + struct guc_virtual_engine *ve; + struct intel_guc *guc; + unsigned int n; + int err; + + ve = kzalloc(sizeof(*ve), GFP_KERNEL); + if (!ve) + return ERR_PTR(-ENOMEM); + + guc = &siblings[0]->gt->uc.guc; + + ve->base.i915 = siblings[0]->i915; + ve->base.gt = siblings[0]->gt; + ve->base.uncore = siblings[0]->uncore; + ve->base.id = -1; + + ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; + ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; + ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; + ve->base.saturated = ALL_ENGINES; + + snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); + + ve->base.sched_engine = i915_sched_engine_get(guc->sched_engine); + + ve->base.cops = &virtual_guc_context_ops; + ve->base.request_alloc = guc_request_alloc; + ve->base.bump_serial = virtual_guc_bump_serial; + + ve->base.submit_request = guc_submit_request; + + ve->base.flags = I915_ENGINE_IS_VIRTUAL; + + intel_context_init(&ve->context, &ve->base); + + for (n = 0; n < count; n++) { + struct intel_engine_cs *sibling = siblings[n]; + + GEM_BUG_ON(!is_power_of_2(sibling->mask)); + if (sibling->mask & ve->base.mask) { + DRM_DEBUG("duplicate %s entry in load balancer\n", + sibling->name); + err = -EINVAL; + goto err_put; + } + + ve->base.mask |= sibling->mask; + + if (n != 0 && ve->base.class != sibling->class) { + DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n", + sibling->class, ve->base.class); + err = -EINVAL; + goto err_put; + } else if (n == 0) { + ve->base.class = sibling->class; + ve->base.uabi_class = sibling->uabi_class; + snprintf(ve->base.name, sizeof(ve->base.name), + "v%dx%d", ve->base.class, count); + ve->base.context_size = sibling->context_size; + + ve->base.add_active_request = + sibling->add_active_request; + ve->base.remove_active_request = + sibling->remove_active_request; + ve->base.emit_bb_start = sibling->emit_bb_start; + ve->base.emit_flush = sibling->emit_flush; + ve->base.emit_init_breadcrumb = + sibling->emit_init_breadcrumb; + ve->base.emit_fini_breadcrumb = + sibling->emit_fini_breadcrumb; + ve->base.emit_fini_breadcrumb_dw = + sibling->emit_fini_breadcrumb_dw; + ve->base.breadcrumbs = + intel_breadcrumbs_get(sibling->breadcrumbs); + + ve->base.flags |= sibling->flags; + + ve->base.props.timeslice_duration_ms = + sibling->props.timeslice_duration_ms; + ve->base.props.preempt_timeout_ms = + sibling->props.preempt_timeout_ms; + } + } + + return &ve->context; + +err_put: + intel_context_put(&ve->context); + return ERR_PTR(err); +} + +bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve) +{ + struct intel_engine_cs *engine; + intel_engine_mask_t tmp, mask = ve->mask; + + for_each_engine_masked(engine, ve->gt, mask, tmp) + if (READ_ONCE(engine->props.heartbeat_interval_ms)) + return true; + + return false; +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h index 3f7005018939..c7ef44fa0c36 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h @@ -10,6 +10,7 @@ #include "intel_guc.h" +struct drm_printer; struct intel_engine_cs; void intel_guc_submission_init_early(struct intel_guc *guc); @@ -20,11 +21,24 @@ void intel_guc_submission_fini(struct intel_guc *guc); int intel_guc_preempt_work_create(struct intel_guc *guc); void intel_guc_preempt_work_destroy(struct intel_guc *guc); int intel_guc_submission_setup(struct intel_engine_cs *engine); +void intel_guc_submission_print_info(struct intel_guc *guc, + struct drm_printer *p); +void intel_guc_submission_print_context_info(struct intel_guc *guc, + struct drm_printer *p); +void intel_guc_dump_active_requests(struct intel_engine_cs *engine, + struct i915_request *hung_rq, + struct drm_printer *m); + +bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve); + +int intel_guc_wait_for_pending_msg(struct intel_guc *guc, + atomic_t *wait_var, + bool interruptible, + long timeout); static inline bool intel_guc_submission_is_supported(struct intel_guc *guc) { - /* XXX: GuC submission is unavailable for now */ - return false; + return guc->submission_supported; } static inline bool intel_guc_submission_is_wanted(struct intel_guc *guc) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 6d8b9233214e..b104fb7607eb 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -34,8 +34,14 @@ static void uc_expand_default_options(struct intel_uc *uc) return; } - /* Default: enable HuC authentication only */ - i915->params.enable_guc = ENABLE_GUC_LOAD_HUC; + /* Intermediate platforms are HuC authentication only */ + if (IS_DG1(i915) || IS_ALDERLAKE_S(i915)) { + i915->params.enable_guc = ENABLE_GUC_LOAD_HUC; + return; + } + + /* Default: enable HuC authentication and GuC submission */ + i915->params.enable_guc = ENABLE_GUC_LOAD_HUC | ENABLE_GUC_SUBMISSION; } /* Reset GuC providing us with fresh state for both GuC and HuC. @@ -69,16 +75,18 @@ static void __confirm_options(struct intel_uc *uc) struct drm_i915_private *i915 = uc_to_gt(uc)->i915; drm_dbg(&i915->drm, - "enable_guc=%d (guc:%s submission:%s huc:%s)\n", + "enable_guc=%d (guc:%s submission:%s huc:%s slpc:%s)\n", i915->params.enable_guc, yesno(intel_uc_wants_guc(uc)), yesno(intel_uc_wants_guc_submission(uc)), - yesno(intel_uc_wants_huc(uc))); + yesno(intel_uc_wants_huc(uc)), + yesno(intel_uc_wants_guc_slpc(uc))); if (i915->params.enable_guc == 0) { GEM_BUG_ON(intel_uc_wants_guc(uc)); GEM_BUG_ON(intel_uc_wants_guc_submission(uc)); GEM_BUG_ON(intel_uc_wants_huc(uc)); + GEM_BUG_ON(intel_uc_wants_guc_slpc(uc)); return; } @@ -120,6 +128,11 @@ void intel_uc_init_early(struct intel_uc *uc) uc->ops = &uc_ops_off; } +void intel_uc_init_late(struct intel_uc *uc) +{ + intel_guc_init_late(&uc->guc); +} + void intel_uc_driver_late_release(struct intel_uc *uc) { } @@ -207,21 +220,6 @@ static void guc_handle_mmio_msg(struct intel_guc *guc) spin_unlock_irq(&guc->irq_lock); } -static void guc_reset_interrupts(struct intel_guc *guc) -{ - guc->interrupts.reset(guc); -} - -static void guc_enable_interrupts(struct intel_guc *guc) -{ - guc->interrupts.enable(guc); -} - -static void guc_disable_interrupts(struct intel_guc *guc) -{ - guc->interrupts.disable(guc); -} - static int guc_enable_communication(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); @@ -242,7 +240,7 @@ static int guc_enable_communication(struct intel_guc *guc) guc_get_mmio_msg(guc); guc_handle_mmio_msg(guc); - guc_enable_interrupts(guc); + intel_guc_enable_interrupts(guc); /* check for CT messages received before we enabled interrupts */ spin_lock_irq(>->irq_lock); @@ -265,7 +263,7 @@ static void guc_disable_communication(struct intel_guc *guc) */ guc_clear_mmio_msg(guc); - guc_disable_interrupts(guc); + intel_guc_disable_interrupts(guc); intel_guc_ct_disable(&guc->ct); @@ -323,9 +321,6 @@ static int __uc_init(struct intel_uc *uc) if (i915_inject_probe_failure(uc_to_gt(uc)->i915)) return -ENOMEM; - /* XXX: GuC submission is unavailable for now */ - GEM_BUG_ON(intel_uc_uses_guc_submission(uc)); - ret = intel_guc_init(guc); if (ret) return ret; @@ -463,7 +458,7 @@ static int __uc_init_hw(struct intel_uc *uc) if (ret) goto err_out; - guc_reset_interrupts(guc); + intel_guc_reset_interrupts(guc); /* WaEnableuKernelHeaderValidFix:skl */ /* WaEnableGuCBootHashCheckNotSet:skl,bxt,kbl */ @@ -505,12 +500,21 @@ static int __uc_init_hw(struct intel_uc *uc) if (intel_uc_uses_guc_submission(uc)) intel_guc_submission_enable(guc); + if (intel_uc_uses_guc_slpc(uc)) { + ret = intel_guc_slpc_enable(&guc->slpc); + if (ret) + goto err_submission; + } + drm_info(&i915->drm, "%s firmware %s version %u.%u %s:%s\n", intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_GUC), guc->fw.path, guc->fw.major_ver_found, guc->fw.minor_ver_found, "submission", enableddisabled(intel_uc_uses_guc_submission(uc))); + drm_info(&i915->drm, "GuC SLPC: %s\n", + enableddisabled(intel_uc_uses_guc_slpc(uc))); + if (intel_uc_uses_huc(uc)) { drm_info(&i915->drm, "%s firmware %s version %u.%u %s:%s\n", intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_HUC), @@ -525,6 +529,8 @@ static int __uc_init_hw(struct intel_uc *uc) /* * We've failed to load the firmware :( */ +err_submission: + intel_guc_submission_disable(guc); err_log_capture: __uc_capture_load_err_log(uc); err_out: @@ -565,23 +571,67 @@ void intel_uc_reset_prepare(struct intel_uc *uc) { struct intel_guc *guc = &uc->guc; - if (!intel_guc_is_ready(guc)) + uc->reset_in_progress = true; + + /* Nothing to do if GuC isn't supported */ + if (!intel_uc_supports_guc(uc)) return; + /* Firmware expected to be running when this function is called */ + if (!intel_guc_is_ready(guc)) + goto sanitize; + + if (intel_uc_uses_guc_submission(uc)) + intel_guc_submission_reset_prepare(guc); + +sanitize: __uc_sanitize(uc); } +void intel_uc_reset(struct intel_uc *uc, bool stalled) +{ + struct intel_guc *guc = &uc->guc; + + /* Firmware can not be running when this function is called */ + if (intel_uc_uses_guc_submission(uc)) + intel_guc_submission_reset(guc, stalled); +} + +void intel_uc_reset_finish(struct intel_uc *uc) +{ + struct intel_guc *guc = &uc->guc; + + uc->reset_in_progress = false; + + /* Firmware expected to be running when this function is called */ + if (intel_guc_is_fw_running(guc) && intel_uc_uses_guc_submission(uc)) + intel_guc_submission_reset_finish(guc); +} + +void intel_uc_cancel_requests(struct intel_uc *uc) +{ + struct intel_guc *guc = &uc->guc; + + /* Firmware can not be running when this function is called */ + if (intel_uc_uses_guc_submission(uc)) + intel_guc_submission_cancel_requests(guc); +} + void intel_uc_runtime_suspend(struct intel_uc *uc) { struct intel_guc *guc = &uc->guc; - int err; if (!intel_guc_is_ready(guc)) return; - err = intel_guc_suspend(guc); - if (err) - DRM_DEBUG_DRIVER("Failed to suspend GuC, err=%d", err); + /* + * Wait for any outstanding CTB before tearing down communication /w the + * GuC. + */ +#define OUTSTANDING_CTB_TIMEOUT_PERIOD (HZ / 5) + intel_guc_wait_for_pending_msg(guc, &guc->outstanding_submission_g2h, + false, OUTSTANDING_CTB_TIMEOUT_PERIOD); + GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h)); guc_disable_communication(guc); } @@ -590,17 +640,22 @@ void intel_uc_suspend(struct intel_uc *uc) { struct intel_guc *guc = &uc->guc; intel_wakeref_t wakeref; + int err; if (!intel_guc_is_ready(guc)) return; - with_intel_runtime_pm(uc_to_gt(uc)->uncore->rpm, wakeref) - intel_uc_runtime_suspend(uc); + with_intel_runtime_pm(&uc_to_gt(uc)->i915->runtime_pm, wakeref) { + err = intel_guc_suspend(guc); + if (err) + DRM_DEBUG_DRIVER("Failed to suspend GuC, err=%d", err); + } } static int __uc_resume(struct intel_uc *uc, bool enable_communication) { struct intel_guc *guc = &uc->guc; + struct intel_gt *gt = guc_to_gt(guc); int err; if (!intel_guc_is_fw_running(guc)) @@ -612,6 +667,13 @@ static int __uc_resume(struct intel_uc *uc, bool enable_communication) if (enable_communication) guc_enable_communication(guc); + /* If we are only resuming GuC communication but not reloading + * GuC, we need to ensure the ARAT timer interrupt is enabled + * again. In case of GuC reload, it is enabled during SLPC enable. + */ + if (enable_communication && intel_uc_uses_guc_slpc(uc)) + intel_guc_pm_intrmsk_enable(gt); + err = intel_guc_resume(guc); if (err) { DRM_DEBUG_DRIVER("Failed to resume GuC, err=%d", err); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h index 9c954c589edf..866b462821c0 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h @@ -7,7 +7,9 @@ #define _INTEL_UC_H_ #include "intel_guc.h" +#include "intel_guc_rc.h" #include "intel_guc_submission.h" +#include "intel_guc_slpc.h" #include "intel_huc.h" #include "i915_params.h" @@ -30,13 +32,19 @@ struct intel_uc { /* Snapshot of GuC log from last failed load */ struct drm_i915_gem_object *load_err_log; + + bool reset_in_progress; }; void intel_uc_init_early(struct intel_uc *uc); +void intel_uc_init_late(struct intel_uc *uc); void intel_uc_driver_late_release(struct intel_uc *uc); void intel_uc_driver_remove(struct intel_uc *uc); void intel_uc_init_mmio(struct intel_uc *uc); void intel_uc_reset_prepare(struct intel_uc *uc); +void intel_uc_reset(struct intel_uc *uc, bool stalled); +void intel_uc_reset_finish(struct intel_uc *uc); +void intel_uc_cancel_requests(struct intel_uc *uc); void intel_uc_suspend(struct intel_uc *uc); void intel_uc_runtime_suspend(struct intel_uc *uc); int intel_uc_resume(struct intel_uc *uc); @@ -77,10 +85,17 @@ __uc_state_checker(x, func, uses, used) uc_state_checkers(guc, guc); uc_state_checkers(huc, huc); uc_state_checkers(guc, guc_submission); +uc_state_checkers(guc, guc_slpc); +uc_state_checkers(guc, guc_rc); #undef uc_state_checkers #undef __uc_state_checker +static inline int intel_uc_wait_for_idle(struct intel_uc *uc, long timeout) +{ + return intel_guc_wait_for_idle(&uc->guc, timeout); +} + #define intel_uc_ops_function(_NAME, _OPS, _TYPE, _RET) \ static inline _TYPE intel_uc_##_NAME(struct intel_uc *uc) \ { \ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index df647c9a8d56..3a16d08608a5 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -48,19 +48,20 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, * firmware as TGL. */ #define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \ - fw_def(ALDERLAKE_S, 0, guc_def(tgl, 49, 0, 1), huc_def(tgl, 7, 5, 0)) \ - fw_def(ROCKETLAKE, 0, guc_def(tgl, 49, 0, 1), huc_def(tgl, 7, 5, 0)) \ - fw_def(TIGERLAKE, 0, guc_def(tgl, 49, 0, 1), huc_def(tgl, 7, 5, 0)) \ - fw_def(JASPERLAKE, 0, guc_def(ehl, 49, 0, 1), huc_def(ehl, 9, 0, 0)) \ - fw_def(ELKHARTLAKE, 0, guc_def(ehl, 49, 0, 1), huc_def(ehl, 9, 0, 0)) \ - fw_def(ICELAKE, 0, guc_def(icl, 49, 0, 1), huc_def(icl, 9, 0, 0)) \ - fw_def(COMETLAKE, 5, guc_def(cml, 49, 0, 1), huc_def(cml, 4, 0, 0)) \ - fw_def(COMETLAKE, 0, guc_def(kbl, 49, 0, 1), huc_def(kbl, 4, 0, 0)) \ - fw_def(COFFEELAKE, 0, guc_def(kbl, 49, 0, 1), huc_def(kbl, 4, 0, 0)) \ - fw_def(GEMINILAKE, 0, guc_def(glk, 49, 0, 1), huc_def(glk, 4, 0, 0)) \ - fw_def(KABYLAKE, 0, guc_def(kbl, 49, 0, 1), huc_def(kbl, 4, 0, 0)) \ - fw_def(BROXTON, 0, guc_def(bxt, 49, 0, 1), huc_def(bxt, 2, 0, 0)) \ - fw_def(SKYLAKE, 0, guc_def(skl, 49, 0, 1), huc_def(skl, 2, 0, 0)) + fw_def(ALDERLAKE_P, 0, guc_def(adlp, 62, 0, 3), huc_def(tgl, 7, 9, 3)) \ + fw_def(ALDERLAKE_S, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 9, 3)) \ + fw_def(ROCKETLAKE, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 9, 3)) \ + fw_def(TIGERLAKE, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 9, 3)) \ + fw_def(JASPERLAKE, 0, guc_def(ehl, 62, 0, 0), huc_def(ehl, 9, 0, 0)) \ + fw_def(ELKHARTLAKE, 0, guc_def(ehl, 62, 0, 0), huc_def(ehl, 9, 0, 0)) \ + fw_def(ICELAKE, 0, guc_def(icl, 62, 0, 0), huc_def(icl, 9, 0, 0)) \ + fw_def(COMETLAKE, 5, guc_def(cml, 62, 0, 0), huc_def(cml, 4, 0, 0)) \ + fw_def(COMETLAKE, 0, guc_def(kbl, 62, 0, 0), huc_def(kbl, 4, 0, 0)) \ + fw_def(COFFEELAKE, 0, guc_def(kbl, 62, 0, 0), huc_def(kbl, 4, 0, 0)) \ + fw_def(GEMINILAKE, 0, guc_def(glk, 62, 0, 0), huc_def(glk, 4, 0, 0)) \ + fw_def(KABYLAKE, 0, guc_def(kbl, 62, 0, 0), huc_def(kbl, 4, 0, 0)) \ + fw_def(BROXTON, 0, guc_def(bxt, 62, 0, 0), huc_def(bxt, 2, 0, 0)) \ + fw_def(SKYLAKE, 0, guc_def(skl, 62, 0, 0), huc_def(skl, 2, 0, 0)) #define __MAKE_UC_FW_PATH(prefix_, name_, major_, minor_, patch_) \ "i915/" \ diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index cc2c05e18206..e5c2fdfc20e3 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -1055,7 +1055,7 @@ static bool vgpu_ips_enabled(struct intel_vgpu *vgpu) { struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915; - if (GRAPHICS_VER(dev_priv) == 9 || GRAPHICS_VER(dev_priv) == 10) { + if (GRAPHICS_VER(dev_priv) == 9) { u32 ips = vgpu_vreg_t(vgpu, GEN8_GAMW_ECO_DEV_RW_IA) & GAMW_ECO_ENABLE_64K_IPS_FIELD; diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 734c37c5e347..b56a8e37a3cd 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -1409,11 +1409,8 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) intel_context_set_single_submission(ce); /* Max ring buffer size */ - if (!intel_uc_wants_guc_submission(&engine->gt->uc)) { - const unsigned int ring_size = 512 * SZ_4K; - - ce->ring = __intel_context_ring_size(ring_size); - } + if (!intel_uc_wants_guc_submission(&engine->gt->uc)) + ce->ring_size = SZ_2M; s->shadow[i] = ce; } diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index b1aa1c482c32..3103c1e1fd14 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -13,7 +13,6 @@ #include "i915_drv.h" #include "i915_active.h" -#include "i915_globals.h" /* * Active refs memory management @@ -22,10 +21,7 @@ * they idle (when we know the active requests are inactive) and allocate the * nodes from a local slab cache to hopefully reduce the fragmentation. */ -static struct i915_global_active { - struct i915_global base; - struct kmem_cache *slab_cache; -} global; +static struct kmem_cache *slab_cache; struct active_node { struct rb_node node; @@ -174,7 +170,7 @@ __active_retire(struct i915_active *ref) /* Finally free the discarded timeline tree */ rbtree_postorder_for_each_entry_safe(it, n, &root, node) { GEM_BUG_ON(i915_active_fence_isset(&it->base)); - kmem_cache_free(global.slab_cache, it); + kmem_cache_free(slab_cache, it); } } @@ -322,7 +318,7 @@ active_instance(struct i915_active *ref, u64 idx) * XXX: We should preallocate this before i915_active_ref() is ever * called, but we cannot call into fs_reclaim() anyway, so use GFP_ATOMIC. */ - node = kmem_cache_alloc(global.slab_cache, GFP_ATOMIC); + node = kmem_cache_alloc(slab_cache, GFP_ATOMIC); if (!node) goto out; @@ -788,7 +784,7 @@ void i915_active_fini(struct i915_active *ref) mutex_destroy(&ref->mutex); if (ref->cache) - kmem_cache_free(global.slab_cache, ref->cache); + kmem_cache_free(slab_cache, ref->cache); } static inline bool is_idle_barrier(struct active_node *node, u64 idx) @@ -908,7 +904,7 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref, node = reuse_idle_barrier(ref, idx); rcu_read_unlock(); if (!node) { - node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); + node = kmem_cache_alloc(slab_cache, GFP_KERNEL); if (!node) goto unwind; @@ -956,7 +952,7 @@ unwind: atomic_dec(&ref->count); intel_engine_pm_put(barrier_to_engine(node)); - kmem_cache_free(global.slab_cache, node); + kmem_cache_free(slab_cache, node); } return -ENOMEM; } @@ -1176,27 +1172,16 @@ struct i915_active *i915_active_create(void) #include "selftests/i915_active.c" #endif -static void i915_global_active_shrink(void) +void i915_active_module_exit(void) { - kmem_cache_shrink(global.slab_cache); + kmem_cache_destroy(slab_cache); } -static void i915_global_active_exit(void) +int __init i915_active_module_init(void) { - kmem_cache_destroy(global.slab_cache); -} - -static struct i915_global_active global = { { - .shrink = i915_global_active_shrink, - .exit = i915_global_active_exit, -} }; - -int __init i915_global_active_init(void) -{ - global.slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN); - if (!global.slab_cache) + slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN); + if (!slab_cache) return -ENOMEM; - i915_global_register(&global.base); return 0; } diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h index d0feda68b874..5fcdb0e2bc9e 100644 --- a/drivers/gpu/drm/i915/i915_active.h +++ b/drivers/gpu/drm/i915/i915_active.h @@ -247,4 +247,7 @@ static inline int __i915_request_await_exclusive(struct i915_request *rq, return err; } +void i915_active_module_exit(void); +int i915_active_module_init(void); + #endif /* _I915_ACTIVE_H_ */ diff --git a/drivers/gpu/drm/i915/i915_buddy.c b/drivers/gpu/drm/i915/i915_buddy.c new file mode 100644 index 000000000000..7b274c51cac0 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_buddy.c @@ -0,0 +1,421 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include <linux/kmemleak.h> + +#include "i915_buddy.h" + +#include "i915_gem.h" +#include "i915_utils.h" + +static struct kmem_cache *slab_blocks; + +static struct i915_buddy_block *i915_block_alloc(struct i915_buddy_mm *mm, + struct i915_buddy_block *parent, + unsigned int order, + u64 offset) +{ + struct i915_buddy_block *block; + + GEM_BUG_ON(order > I915_BUDDY_MAX_ORDER); + + block = kmem_cache_zalloc(slab_blocks, GFP_KERNEL); + if (!block) + return NULL; + + block->header = offset; + block->header |= order; + block->parent = parent; + + GEM_BUG_ON(block->header & I915_BUDDY_HEADER_UNUSED); + return block; +} + +static void i915_block_free(struct i915_buddy_mm *mm, + struct i915_buddy_block *block) +{ + kmem_cache_free(slab_blocks, block); +} + +static void mark_allocated(struct i915_buddy_block *block) +{ + block->header &= ~I915_BUDDY_HEADER_STATE; + block->header |= I915_BUDDY_ALLOCATED; + + list_del(&block->link); +} + +static void mark_free(struct i915_buddy_mm *mm, + struct i915_buddy_block *block) +{ + block->header &= ~I915_BUDDY_HEADER_STATE; + block->header |= I915_BUDDY_FREE; + + list_add(&block->link, + &mm->free_list[i915_buddy_block_order(block)]); +} + +static void mark_split(struct i915_buddy_block *block) +{ + block->header &= ~I915_BUDDY_HEADER_STATE; + block->header |= I915_BUDDY_SPLIT; + + list_del(&block->link); +} + +int i915_buddy_init(struct i915_buddy_mm *mm, u64 size, u64 chunk_size) +{ + unsigned int i; + u64 offset; + + if (size < chunk_size) + return -EINVAL; + + if (chunk_size < PAGE_SIZE) + return -EINVAL; + + if (!is_power_of_2(chunk_size)) + return -EINVAL; + + size = round_down(size, chunk_size); + + mm->size = size; + mm->chunk_size = chunk_size; + mm->max_order = ilog2(size) - ilog2(chunk_size); + + GEM_BUG_ON(mm->max_order > I915_BUDDY_MAX_ORDER); + + mm->free_list = kmalloc_array(mm->max_order + 1, + sizeof(struct list_head), + GFP_KERNEL); + if (!mm->free_list) + return -ENOMEM; + + for (i = 0; i <= mm->max_order; ++i) + INIT_LIST_HEAD(&mm->free_list[i]); + + mm->n_roots = hweight64(size); + + mm->roots = kmalloc_array(mm->n_roots, + sizeof(struct i915_buddy_block *), + GFP_KERNEL); + if (!mm->roots) + goto out_free_list; + + offset = 0; + i = 0; + + /* + * Split into power-of-two blocks, in case we are given a size that is + * not itself a power-of-two. + */ + do { + struct i915_buddy_block *root; + unsigned int order; + u64 root_size; + + root_size = rounddown_pow_of_two(size); + order = ilog2(root_size) - ilog2(chunk_size); + + root = i915_block_alloc(mm, NULL, order, offset); + if (!root) + goto out_free_roots; + + mark_free(mm, root); + + GEM_BUG_ON(i > mm->max_order); + GEM_BUG_ON(i915_buddy_block_size(mm, root) < chunk_size); + + mm->roots[i] = root; + + offset += root_size; + size -= root_size; + i++; + } while (size); + + return 0; + +out_free_roots: + while (i--) + i915_block_free(mm, mm->roots[i]); + kfree(mm->roots); +out_free_list: + kfree(mm->free_list); + return -ENOMEM; +} + +void i915_buddy_fini(struct i915_buddy_mm *mm) +{ + int i; + + for (i = 0; i < mm->n_roots; ++i) { + GEM_WARN_ON(!i915_buddy_block_is_free(mm->roots[i])); + i915_block_free(mm, mm->roots[i]); + } + + kfree(mm->roots); + kfree(mm->free_list); +} + +static int split_block(struct i915_buddy_mm *mm, + struct i915_buddy_block *block) +{ + unsigned int block_order = i915_buddy_block_order(block) - 1; + u64 offset = i915_buddy_block_offset(block); + + GEM_BUG_ON(!i915_buddy_block_is_free(block)); + GEM_BUG_ON(!i915_buddy_block_order(block)); + + block->left = i915_block_alloc(mm, block, block_order, offset); + if (!block->left) + return -ENOMEM; + + block->right = i915_block_alloc(mm, block, block_order, + offset + (mm->chunk_size << block_order)); + if (!block->right) { + i915_block_free(mm, block->left); + return -ENOMEM; + } + + mark_free(mm, block->left); + mark_free(mm, block->right); + + mark_split(block); + + return 0; +} + +static struct i915_buddy_block * +get_buddy(struct i915_buddy_block *block) +{ + struct i915_buddy_block *parent; + + parent = block->parent; + if (!parent) + return NULL; + + if (parent->left == block) + return parent->right; + + return parent->left; +} + +static void __i915_buddy_free(struct i915_buddy_mm *mm, + struct i915_buddy_block *block) +{ + struct i915_buddy_block *parent; + + while ((parent = block->parent)) { + struct i915_buddy_block *buddy; + + buddy = get_buddy(block); + + if (!i915_buddy_block_is_free(buddy)) + break; + + list_del(&buddy->link); + + i915_block_free(mm, block); + i915_block_free(mm, buddy); + + block = parent; + } + + mark_free(mm, block); +} + +void i915_buddy_free(struct i915_buddy_mm *mm, + struct i915_buddy_block *block) +{ + GEM_BUG_ON(!i915_buddy_block_is_allocated(block)); + __i915_buddy_free(mm, block); +} + +void i915_buddy_free_list(struct i915_buddy_mm *mm, struct list_head *objects) +{ + struct i915_buddy_block *block, *on; + + list_for_each_entry_safe(block, on, objects, link) { + i915_buddy_free(mm, block); + cond_resched(); + } + INIT_LIST_HEAD(objects); +} + +/* + * Allocate power-of-two block. The order value here translates to: + * + * 0 = 2^0 * mm->chunk_size + * 1 = 2^1 * mm->chunk_size + * 2 = 2^2 * mm->chunk_size + * ... + */ +struct i915_buddy_block * +i915_buddy_alloc(struct i915_buddy_mm *mm, unsigned int order) +{ + struct i915_buddy_block *block = NULL; + unsigned int i; + int err; + + for (i = order; i <= mm->max_order; ++i) { + block = list_first_entry_or_null(&mm->free_list[i], + struct i915_buddy_block, + link); + if (block) + break; + } + + if (!block) + return ERR_PTR(-ENOSPC); + + GEM_BUG_ON(!i915_buddy_block_is_free(block)); + + while (i != order) { + err = split_block(mm, block); + if (unlikely(err)) + goto out_free; + + /* Go low */ + block = block->left; + i--; + } + + mark_allocated(block); + kmemleak_update_trace(block); + return block; + +out_free: + if (i != order) + __i915_buddy_free(mm, block); + return ERR_PTR(err); +} + +static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2) +{ + return s1 <= e2 && e1 >= s2; +} + +static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2) +{ + return s1 <= s2 && e1 >= e2; +} + +/* + * Allocate range. Note that it's safe to chain together multiple alloc_ranges + * with the same blocks list. + * + * Intended for pre-allocating portions of the address space, for example to + * reserve a block for the initial framebuffer or similar, hence the expectation + * here is that i915_buddy_alloc() is still the main vehicle for + * allocations, so if that's not the case then the drm_mm range allocator is + * probably a much better fit, and so you should probably go use that instead. + */ +int i915_buddy_alloc_range(struct i915_buddy_mm *mm, + struct list_head *blocks, + u64 start, u64 size) +{ + struct i915_buddy_block *block; + struct i915_buddy_block *buddy; + LIST_HEAD(allocated); + LIST_HEAD(dfs); + u64 end; + int err; + int i; + + if (size < mm->chunk_size) + return -EINVAL; + + if (!IS_ALIGNED(size | start, mm->chunk_size)) + return -EINVAL; + + if (range_overflows(start, size, mm->size)) + return -EINVAL; + + for (i = 0; i < mm->n_roots; ++i) + list_add_tail(&mm->roots[i]->tmp_link, &dfs); + + end = start + size - 1; + + do { + u64 block_start; + u64 block_end; + + block = list_first_entry_or_null(&dfs, + struct i915_buddy_block, + tmp_link); + if (!block) + break; + + list_del(&block->tmp_link); + + block_start = i915_buddy_block_offset(block); + block_end = block_start + i915_buddy_block_size(mm, block) - 1; + + if (!overlaps(start, end, block_start, block_end)) + continue; + + if (i915_buddy_block_is_allocated(block)) { + err = -ENOSPC; + goto err_free; + } + + if (contains(start, end, block_start, block_end)) { + if (!i915_buddy_block_is_free(block)) { + err = -ENOSPC; + goto err_free; + } + + mark_allocated(block); + list_add_tail(&block->link, &allocated); + continue; + } + + if (!i915_buddy_block_is_split(block)) { + err = split_block(mm, block); + if (unlikely(err)) + goto err_undo; + } + + list_add(&block->right->tmp_link, &dfs); + list_add(&block->left->tmp_link, &dfs); + } while (1); + + list_splice_tail(&allocated, blocks); + return 0; + +err_undo: + /* + * We really don't want to leave around a bunch of split blocks, since + * bigger is better, so make sure we merge everything back before we + * free the allocated blocks. + */ + buddy = get_buddy(block); + if (buddy && + (i915_buddy_block_is_free(block) && + i915_buddy_block_is_free(buddy))) + __i915_buddy_free(mm, block); + +err_free: + i915_buddy_free_list(mm, &allocated); + return err; +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/i915_buddy.c" +#endif + +void i915_buddy_module_exit(void) +{ + kmem_cache_destroy(slab_blocks); +} + +int __init i915_buddy_module_init(void) +{ + slab_blocks = KMEM_CACHE(i915_buddy_block, 0); + if (!slab_blocks) + return -ENOMEM; + + return 0; +} diff --git a/drivers/gpu/drm/i915/i915_buddy.h b/drivers/gpu/drm/i915/i915_buddy.h new file mode 100644 index 000000000000..3940d632f208 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_buddy.h @@ -0,0 +1,135 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef __I915_BUDDY_H__ +#define __I915_BUDDY_H__ + +#include <linux/bitops.h> +#include <linux/list.h> +#include <linux/slab.h> + +struct i915_buddy_block { +#define I915_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12) +#define I915_BUDDY_HEADER_STATE GENMASK_ULL(11, 10) +#define I915_BUDDY_ALLOCATED (1 << 10) +#define I915_BUDDY_FREE (2 << 10) +#define I915_BUDDY_SPLIT (3 << 10) +/* Free to be used, if needed in the future */ +#define I915_BUDDY_HEADER_UNUSED GENMASK_ULL(9, 6) +#define I915_BUDDY_HEADER_ORDER GENMASK_ULL(5, 0) + u64 header; + + struct i915_buddy_block *left; + struct i915_buddy_block *right; + struct i915_buddy_block *parent; + + void *private; /* owned by creator */ + + /* + * While the block is allocated by the user through i915_buddy_alloc*, + * the user has ownership of the link, for example to maintain within + * a list, if so desired. As soon as the block is freed with + * i915_buddy_free* ownership is given back to the mm. + */ + struct list_head link; + struct list_head tmp_link; +}; + +/* Order-zero must be at least PAGE_SIZE */ +#define I915_BUDDY_MAX_ORDER (63 - PAGE_SHIFT) + +/* + * Binary Buddy System. + * + * Locking should be handled by the user, a simple mutex around + * i915_buddy_alloc* and i915_buddy_free* should suffice. + */ +struct i915_buddy_mm { + /* Maintain a free list for each order. */ + struct list_head *free_list; + + /* + * Maintain explicit binary tree(s) to track the allocation of the + * address space. This gives us a simple way of finding a buddy block + * and performing the potentially recursive merge step when freeing a + * block. Nodes are either allocated or free, in which case they will + * also exist on the respective free list. + */ + struct i915_buddy_block **roots; + + /* + * Anything from here is public, and remains static for the lifetime of + * the mm. Everything above is considered do-not-touch. + */ + unsigned int n_roots; + unsigned int max_order; + + /* Must be at least PAGE_SIZE */ + u64 chunk_size; + u64 size; +}; + +static inline u64 +i915_buddy_block_offset(struct i915_buddy_block *block) +{ + return block->header & I915_BUDDY_HEADER_OFFSET; +} + +static inline unsigned int +i915_buddy_block_order(struct i915_buddy_block *block) +{ + return block->header & I915_BUDDY_HEADER_ORDER; +} + +static inline unsigned int +i915_buddy_block_state(struct i915_buddy_block *block) +{ + return block->header & I915_BUDDY_HEADER_STATE; +} + +static inline bool +i915_buddy_block_is_allocated(struct i915_buddy_block *block) +{ + return i915_buddy_block_state(block) == I915_BUDDY_ALLOCATED; +} + +static inline bool +i915_buddy_block_is_free(struct i915_buddy_block *block) +{ + return i915_buddy_block_state(block) == I915_BUDDY_FREE; +} + +static inline bool +i915_buddy_block_is_split(struct i915_buddy_block *block) +{ + return i915_buddy_block_state(block) == I915_BUDDY_SPLIT; +} + +static inline u64 +i915_buddy_block_size(struct i915_buddy_mm *mm, + struct i915_buddy_block *block) +{ + return mm->chunk_size << i915_buddy_block_order(block); +} + +int i915_buddy_init(struct i915_buddy_mm *mm, u64 size, u64 chunk_size); + +void i915_buddy_fini(struct i915_buddy_mm *mm); + +struct i915_buddy_block * +i915_buddy_alloc(struct i915_buddy_mm *mm, unsigned int order); + +int i915_buddy_alloc_range(struct i915_buddy_mm *mm, + struct list_head *blocks, + u64 start, u64 size); + +void i915_buddy_free(struct i915_buddy_mm *mm, struct i915_buddy_block *block); + +void i915_buddy_free_list(struct i915_buddy_mm *mm, struct list_head *objects); + +void i915_buddy_module_exit(void); +int i915_buddy_module_init(void); + +#endif diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index a3b4d99d64b9..e0403ce9ce69 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -1468,42 +1468,43 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, * space. Parsing should be faster in some cases this way. */ batch_end = cmd + batch_length / sizeof(*batch_end); - while (*cmd != MI_BATCH_BUFFER_END) { - u32 length = 1; - - if (*cmd != MI_NOOP) { /* MI_NOOP == 0 */ - desc = find_cmd(engine, *cmd, desc, &default_desc); - if (!desc) { - DRM_DEBUG("CMD: Unrecognized command: 0x%08X\n", *cmd); - ret = -EINVAL; - break; - } + do { + u32 length; - if (desc->flags & CMD_DESC_FIXED) - length = desc->length.fixed; - else - length = (*cmd & desc->length.mask) + LENGTH_BIAS; + if (*cmd == MI_BATCH_BUFFER_END) + break; - if ((batch_end - cmd) < length) { - DRM_DEBUG("CMD: Command length exceeds batch length: 0x%08X length=%u batchlen=%td\n", - *cmd, - length, - batch_end - cmd); - ret = -EINVAL; - break; - } + desc = find_cmd(engine, *cmd, desc, &default_desc); + if (!desc) { + DRM_DEBUG("CMD: Unrecognized command: 0x%08X\n", *cmd); + ret = -EINVAL; + break; + } - if (!check_cmd(engine, desc, cmd, length)) { - ret = -EACCES; - break; - } + if (desc->flags & CMD_DESC_FIXED) + length = desc->length.fixed; + else + length = (*cmd & desc->length.mask) + LENGTH_BIAS; - if (cmd_desc_is(desc, MI_BATCH_BUFFER_START)) { - ret = check_bbstart(cmd, offset, length, batch_length, - batch_addr, shadow_addr, - jump_whitelist); - break; - } + if ((batch_end - cmd) < length) { + DRM_DEBUG("CMD: Command length exceeds batch length: 0x%08X length=%u batchlen=%td\n", + *cmd, + length, + batch_end - cmd); + ret = -EINVAL; + break; + } + + if (!check_cmd(engine, desc, cmd, length)) { + ret = -EACCES; + break; + } + + if (cmd_desc_is(desc, MI_BATCH_BUFFER_START)) { + ret = check_bbstart(cmd, offset, length, batch_length, + batch_addr, shadow_addr, + jump_whitelist); + break; } if (!IS_ERR_OR_NULL(jump_whitelist)) @@ -1516,7 +1517,7 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, ret = -EINVAL; break; } - } + } while (1); if (trampoline) { /* diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index cc745751ac53..44969f5dde50 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -538,20 +538,20 @@ static int i915_frequency_info(struct seq_file *m, void *unused) max_freq = (IS_GEN9_LP(dev_priv) ? rp_state_cap >> 0 : rp_state_cap >> 16) & 0xff; max_freq *= (IS_GEN9_BC(dev_priv) || - GRAPHICS_VER(dev_priv) >= 10 ? GEN9_FREQ_SCALER : 1); + GRAPHICS_VER(dev_priv) >= 11 ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Lowest (RPN) frequency: %dMHz\n", intel_gpu_freq(rps, max_freq)); max_freq = (rp_state_cap & 0xff00) >> 8; max_freq *= (IS_GEN9_BC(dev_priv) || - GRAPHICS_VER(dev_priv) >= 10 ? GEN9_FREQ_SCALER : 1); + GRAPHICS_VER(dev_priv) >= 11 ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Nominal (RP1) frequency: %dMHz\n", intel_gpu_freq(rps, max_freq)); max_freq = (IS_GEN9_LP(dev_priv) ? rp_state_cap >> 16 : rp_state_cap >> 0) & 0xff; max_freq *= (IS_GEN9_BC(dev_priv) || - GRAPHICS_VER(dev_priv) >= 10 ? GEN9_FREQ_SCALER : 1); + GRAPHICS_VER(dev_priv) >= 11 ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n", intel_gpu_freq(rps, max_freq)); seq_printf(m, "Max overclocked frequency: %dMHz\n", @@ -636,7 +636,7 @@ static int i915_swizzle_info(struct seq_file *m, void *data) intel_uncore_read16(uncore, C0DRB3_BW)); seq_printf(m, "C1DRB3 = 0x%04x\n", intel_uncore_read16(uncore, C1DRB3_BW)); - } else if (INTEL_GEN(dev_priv) >= 6) { + } else if (GRAPHICS_VER(dev_priv) >= 6) { seq_printf(m, "MAD_DIMM_C0 = 0x%08x\n", intel_uncore_read(uncore, MAD_DIMM_C0)); seq_printf(m, "MAD_DIMM_C1 = 0x%08x\n", diff --git a/drivers/gpu/drm/i915/i915_debugfs_params.c b/drivers/gpu/drm/i915/i915_debugfs_params.c index 4e2b077692cb..20424275d41e 100644 --- a/drivers/gpu/drm/i915/i915_debugfs_params.c +++ b/drivers/gpu/drm/i915/i915_debugfs_params.c @@ -6,9 +6,21 @@ #include <linux/kernel.h> #include "i915_debugfs_params.h" +#include "gt/intel_gt.h" +#include "gt/uc/intel_guc.h" #include "i915_drv.h" #include "i915_params.h" +#define MATCH_DEBUGFS_NODE_NAME(_file, _name) \ + (strcmp((_file)->f_path.dentry->d_name.name, (_name)) == 0) + +#define GET_I915(i915, name, ptr) \ + do { \ + struct i915_params *params; \ + params = container_of(((void *)(ptr)), typeof(*params), name); \ + (i915) = container_of(params, typeof(*(i915)), params); \ + } while (0) + /* int param */ static int i915_param_int_show(struct seq_file *m, void *data) { @@ -24,6 +36,16 @@ static int i915_param_int_open(struct inode *inode, struct file *file) return single_open(file, i915_param_int_show, inode->i_private); } +static int notify_guc(struct drm_i915_private *i915) +{ + int ret = 0; + + if (intel_uc_uses_guc_submission(&i915->gt.uc)) + ret = intel_guc_global_policies_update(&i915->gt.uc.guc); + + return ret; +} + static ssize_t i915_param_int_write(struct file *file, const char __user *ubuf, size_t len, loff_t *offp) @@ -81,8 +103,10 @@ static ssize_t i915_param_uint_write(struct file *file, const char __user *ubuf, size_t len, loff_t *offp) { + struct drm_i915_private *i915; struct seq_file *m = file->private_data; unsigned int *value = m->private; + unsigned int old = *value; int ret; ret = kstrtouint_from_user(ubuf, len, 0, value); @@ -95,6 +119,14 @@ static ssize_t i915_param_uint_write(struct file *file, *value = b; } + if (!ret && MATCH_DEBUGFS_NODE_NAME(file, "reset")) { + GET_I915(i915, reset, value); + + ret = notify_guc(i915); + if (ret) + *value = old; + } + return ret ?: len; } diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 73de45472f60..59fb4c710c8c 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -271,10 +271,11 @@ static void intel_detect_preproduction_hw(struct drm_i915_private *dev_priv) bool pre = false; pre |= IS_HSW_EARLY_SDV(dev_priv); - pre |= IS_SKL_REVID(dev_priv, 0, SKL_REVID_F0); - pre |= IS_BXT_REVID(dev_priv, 0, BXT_REVID_B_LAST); - pre |= IS_KBL_GT_STEP(dev_priv, 0, STEP_A0); - pre |= IS_GLK_REVID(dev_priv, 0, GLK_REVID_A2); + pre |= IS_SKYLAKE(dev_priv) && INTEL_REVID(dev_priv) < 0x6; + pre |= IS_BROXTON(dev_priv) && INTEL_REVID(dev_priv) < 0xA; + pre |= IS_KABYLAKE(dev_priv) && INTEL_REVID(dev_priv) < 0x1; + pre |= IS_GEMINILAKE(dev_priv) && INTEL_REVID(dev_priv) < 0x3; + pre |= IS_ICELAKE(dev_priv) && INTEL_REVID(dev_priv) < 0x7; if (pre) { drm_err(&dev_priv->drm, "This is a pre-production stepping. " @@ -561,7 +562,7 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) if (ret) goto err_perf; - ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, "inteldrmfb"); + ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, dev_priv->drm.driver); if (ret) goto err_ggtt; @@ -619,7 +620,9 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) intel_opregion_setup(dev_priv); - intel_pcode_init(dev_priv); + ret = intel_pcode_init(dev_priv); + if (ret) + goto err_msi; /* * Fill the dram structure to get the system dram info. This will be @@ -1230,6 +1233,10 @@ static int i915_drm_resume(struct drm_device *dev) disable_rpm_wakeref_asserts(&dev_priv->runtime_pm); + ret = intel_pcode_init(dev_priv); + if (ret) + return ret; + sanitize_gpu(dev_priv); ret = i915_ggtt_enable_hw(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b30397b04529..005b1cec7007 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -202,6 +202,68 @@ struct drm_i915_file_private { struct rcu_head rcu; }; + /** @proto_context_lock: Guards all struct i915_gem_proto_context + * operations + * + * This not only guards @proto_context_xa, but is always held + * whenever we manipulate any struct i915_gem_proto_context, + * including finalizing it on first actual use of the GEM context. + * + * See i915_gem_proto_context. + */ + struct mutex proto_context_lock; + + /** @proto_context_xa: xarray of struct i915_gem_proto_context + * + * Historically, the context uAPI allowed for two methods of + * setting context parameters: SET_CONTEXT_PARAM and + * CONTEXT_CREATE_EXT_SETPARAM. The former is allowed to be called + * at any time while the later happens as part of + * GEM_CONTEXT_CREATE. Everything settable via one was settable + * via the other. While some params are fairly simple and setting + * them on a live context is harmless such as the context priority, + * others are far trickier such as the VM or the set of engines. + * In order to swap out the VM, for instance, we have to delay + * until all current in-flight work is complete, swap in the new + * VM, and then continue. This leads to a plethora of potential + * race conditions we'd really rather avoid. + * + * We have since disallowed setting these more complex parameters + * on active contexts. This works by delaying the creation of the + * actual context until after the client is done configuring it + * with SET_CONTEXT_PARAM. From the perspective of the client, it + * has the same u32 context ID the whole time. From the + * perspective of i915, however, it's a struct i915_gem_proto_context + * right up until the point where we attempt to do something which + * the proto-context can't handle. Then the struct i915_gem_context + * gets created. + * + * This is accomplished via a little xarray dance. When + * GEM_CONTEXT_CREATE is called, we create a struct + * i915_gem_proto_context, reserve a slot in @context_xa but leave + * it NULL, and place the proto-context in the corresponding slot + * in @proto_context_xa. Then, in i915_gem_context_lookup(), we + * first check @context_xa. If it's there, we return the struct + * i915_gem_context and we're done. If it's not, we look in + * @proto_context_xa and, if we find it there, we create the actual + * context and kill the proto-context. + * + * In order for this dance to work properly, everything which ever + * touches a struct i915_gem_proto_context is guarded by + * @proto_context_lock, including context creation. Yes, this + * means context creation now takes a giant global lock but it + * can't really be helped and that should never be on any driver's + * fast-path anyway. + */ + struct xarray proto_context_xa; + + /** @context_xa: xarray of fully created i915_gem_context + * + * Write access to this xarray is guarded by @proto_context_lock. + * Otherwise, writers may race with finalize_create_context_locked(). + * + * See @proto_context_xa. + */ struct xarray context_xa; struct xarray vm_xa; @@ -270,8 +332,10 @@ struct drm_i915_display_funcs { int (*bw_calc_min_cdclk)(struct intel_atomic_state *state); int (*get_fifo_size)(struct drm_i915_private *dev_priv, enum i9xx_plane_id i9xx_plane); - int (*compute_pipe_wm)(struct intel_crtc_state *crtc_state); - int (*compute_intermediate_wm)(struct intel_crtc_state *crtc_state); + int (*compute_pipe_wm)(struct intel_atomic_state *state, + struct intel_crtc *crtc); + int (*compute_intermediate_wm)(struct intel_atomic_state *state, + struct intel_crtc *crtc); void (*initial_watermarks)(struct intel_atomic_state *state, struct intel_crtc *crtc); void (*atomic_update_watermarks)(struct intel_atomic_state *state, @@ -330,15 +394,6 @@ struct drm_i915_display_funcs { void (*read_luts)(struct intel_crtc_state *crtc_state); }; -enum i915_cache_level { - I915_CACHE_NONE = 0, - I915_CACHE_LLC, /* also used for snoopable memory on non-LLC */ - I915_CACHE_L3_LLC, /* gen7+, L3 sits between the domain specifc - caches, eg sampler/render caches, and the - large Last-Level-Cache. LLC is coherent with - the CPU, but L3 is only visible to the GPU. */ - I915_CACHE_WT, /* hsw:gt3e WriteThrough for scanouts */ -}; #define I915_COLOR_UNEVICTABLE (-1) /* a non-vma sharing the address space */ @@ -346,13 +401,14 @@ struct intel_fbc { /* This is always the inner lock when overlapping with struct_mutex and * it's the outer lock when overlapping with stolen_lock. */ struct mutex lock; - unsigned threshold; unsigned int possible_framebuffer_bits; unsigned int busy_bits; struct intel_crtc *crtc; struct drm_mm_node compressed_fb; - struct drm_mm_node *compressed_llb; + struct drm_mm_node compressed_llb; + + u8 limit; bool false_color; @@ -467,6 +523,7 @@ struct i915_drrs { #define QUIRK_PIN_SWIZZLED_PAGES (1<<5) #define QUIRK_INCREASE_T12_DELAY (1<<6) #define QUIRK_INCREASE_DDI_DISABLED_TIME (1<<7) +#define QUIRK_NO_PPS_BACKLIGHT_POWER_HOOK (1<<8) struct intel_fbdev; struct intel_fbc_work; @@ -552,7 +609,7 @@ struct i915_gem_mm { * notifier_lock for mmu notifiers, memory may not be allocated * while holding this lock. */ - spinlock_t notifier_lock; + rwlock_t notifier_lock; #endif /* shrinker accounting, also useful for userland debugging */ @@ -576,6 +633,9 @@ i915_fence_timeout(const struct drm_i915_private *i915) #define HAS_HW_SAGV_WM(i915) (DISPLAY_VER(i915) >= 13 && !IS_DGFX(i915)) +/* Amount of PSF GV points, BSpec precisely defines this */ +#define I915_NUM_PSF_GV_POINTS 3 + struct ddi_vbt_port_info { /* Non-NULL if port present. */ struct intel_bios_encoder_data *devdata; @@ -1089,12 +1149,16 @@ struct drm_i915_private { INTEL_DRAM_LPDDR5, } type; u8 num_qgv_points; + u8 num_psf_gv_points; } dram_info; struct intel_bw_info { /* for each QGV point */ unsigned int deratedbw[I915_NUM_QGV_POINTS]; + /* for each PSF GV point */ + unsigned int psf_bw[I915_NUM_PSF_GV_POINTS]; u8 num_qgv_points; + u8 num_psf_gv_points; u8 num_planes; } max_bw[6]; @@ -1134,6 +1198,8 @@ struct drm_i915_private { /* For i915gm/i945gm vblank irq workaround */ u8 vblank_enabled; + bool irq_enabled; + /* perform PHY state sanity checks? */ bool chv_phy_assert[2]; @@ -1237,26 +1303,17 @@ static inline struct drm_i915_private *pdev_to_i915(struct pci_dev *pdev) #define INTEL_DEVID(dev_priv) (RUNTIME_INFO(dev_priv)->device_id) -/* - * Deprecated: this will be replaced by individual IP checks: - * GRAPHICS_VER(), MEDIA_VER() and DISPLAY_VER() - */ -#define INTEL_GEN(dev_priv) GRAPHICS_VER(dev_priv) -/* - * Deprecated: use IS_GRAPHICS_VER(), IS_MEDIA_VER() and IS_DISPLAY_VER() as - * appropriate. - */ -#define IS_GEN_RANGE(dev_priv, s, e) IS_GRAPHICS_VER(dev_priv, (s), (e)) -/* - * Deprecated: use GRAPHICS_VER(), MEDIA_VER() and DISPLAY_VER() as appropriate. - */ -#define IS_GEN(dev_priv, n) (GRAPHICS_VER(dev_priv) == (n)) +#define IP_VER(ver, rel) ((ver) << 8 | (rel)) #define GRAPHICS_VER(i915) (INTEL_INFO(i915)->graphics_ver) +#define GRAPHICS_VER_FULL(i915) IP_VER(INTEL_INFO(i915)->graphics_ver, \ + INTEL_INFO(i915)->graphics_rel) #define IS_GRAPHICS_VER(i915, from, until) \ (GRAPHICS_VER(i915) >= (from) && GRAPHICS_VER(i915) <= (until)) #define MEDIA_VER(i915) (INTEL_INFO(i915)->media_ver) +#define MEDIA_VER_FULL(i915) IP_VER(INTEL_INFO(i915)->media_ver, \ + INTEL_INFO(i915)->media_rel) #define IS_MEDIA_VER(i915, from, until) \ (MEDIA_VER(i915) >= (from) && MEDIA_VER(i915) <= (until)) @@ -1264,29 +1321,20 @@ static inline struct drm_i915_private *pdev_to_i915(struct pci_dev *pdev) #define IS_DISPLAY_VER(i915, from, until) \ (DISPLAY_VER(i915) >= (from) && DISPLAY_VER(i915) <= (until)) -#define REVID_FOREVER 0xff #define INTEL_REVID(dev_priv) (to_pci_dev((dev_priv)->drm.dev)->revision) #define HAS_DSB(dev_priv) (INTEL_INFO(dev_priv)->display.has_dsb) -/* - * Return true if revision is in range [since,until] inclusive. - * - * Use 0 for open-ended since, and REVID_FOREVER for open-ended until. - */ -#define IS_REVID(p, since, until) \ - (INTEL_REVID(p) >= (since) && INTEL_REVID(p) <= (until)) - #define INTEL_DISPLAY_STEP(__i915) (RUNTIME_INFO(__i915)->step.display_step) #define INTEL_GT_STEP(__i915) (RUNTIME_INFO(__i915)->step.gt_step) #define IS_DISPLAY_STEP(__i915, since, until) \ (drm_WARN_ON(&(__i915)->drm, INTEL_DISPLAY_STEP(__i915) == STEP_NONE), \ - INTEL_DISPLAY_STEP(__i915) >= (since) && INTEL_DISPLAY_STEP(__i915) <= (until)) + INTEL_DISPLAY_STEP(__i915) >= (since) && INTEL_DISPLAY_STEP(__i915) < (until)) #define IS_GT_STEP(__i915, since, until) \ (drm_WARN_ON(&(__i915)->drm, INTEL_GT_STEP(__i915) == STEP_NONE), \ - INTEL_GT_STEP(__i915) >= (since) && INTEL_GT_STEP(__i915) <= (until)) + INTEL_GT_STEP(__i915) >= (since) && INTEL_GT_STEP(__i915) < (until)) static __always_inline unsigned int __platform_mask_index(const struct intel_runtime_info *info, @@ -1385,7 +1433,7 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_GEMINILAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_GEMINILAKE) #define IS_COFFEELAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_COFFEELAKE) #define IS_COMETLAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_COMETLAKE) -#define IS_CANNONLAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_CANNONLAKE) +#define IS_CANNONLAKE(dev_priv) 0 #define IS_ICELAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_ICELAKE) #define IS_JSL_EHL(dev_priv) (IS_PLATFORM(dev_priv, INTEL_JASPERLAKE) || \ IS_PLATFORM(dev_priv, INTEL_ELKHARTLAKE)) @@ -1394,6 +1442,12 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_DG1(dev_priv) IS_PLATFORM(dev_priv, INTEL_DG1) #define IS_ALDERLAKE_S(dev_priv) IS_PLATFORM(dev_priv, INTEL_ALDERLAKE_S) #define IS_ALDERLAKE_P(dev_priv) IS_PLATFORM(dev_priv, INTEL_ALDERLAKE_P) +#define IS_XEHPSDV(dev_priv) IS_PLATFORM(dev_priv, INTEL_XEHPSDV) +#define IS_DG2(dev_priv) IS_PLATFORM(dev_priv, INTEL_DG2) +#define IS_DG2_G10(dev_priv) \ + IS_SUBPLATFORM(dev_priv, INTEL_DG2, INTEL_SUBPLATFORM_G10) +#define IS_DG2_G11(dev_priv) \ + IS_SUBPLATFORM(dev_priv, INTEL_DG2, INTEL_SUBPLATFORM_G11) #define IS_HSW_EARLY_SDV(dev_priv) (IS_HASWELL(dev_priv) && \ (INTEL_DEVID(dev_priv) & 0xFF00) == 0x0C00) #define IS_BDW_ULT(dev_priv) \ @@ -1445,8 +1499,6 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_CML_GT2(dev_priv) (IS_COMETLAKE(dev_priv) && \ INTEL_INFO(dev_priv)->gt == 2) -#define IS_CNL_WITH_PORT_F(dev_priv) \ - IS_SUBPLATFORM(dev_priv, INTEL_CANNONLAKE, INTEL_SUBPLATFORM_PORTF) #define IS_ICL_WITH_PORT_F(dev_priv) \ IS_SUBPLATFORM(dev_priv, INTEL_ICELAKE, INTEL_SUBPLATFORM_PORTF) @@ -1456,60 +1508,17 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_TGL_Y(dev_priv) \ IS_SUBPLATFORM(dev_priv, INTEL_TIGERLAKE, INTEL_SUBPLATFORM_ULX) -#define SKL_REVID_A0 0x0 -#define SKL_REVID_B0 0x1 -#define SKL_REVID_C0 0x2 -#define SKL_REVID_D0 0x3 -#define SKL_REVID_E0 0x4 -#define SKL_REVID_F0 0x5 -#define SKL_REVID_G0 0x6 -#define SKL_REVID_H0 0x7 - -#define IS_SKL_REVID(p, since, until) (IS_SKYLAKE(p) && IS_REVID(p, since, until)) - -#define BXT_REVID_A0 0x0 -#define BXT_REVID_A1 0x1 -#define BXT_REVID_B0 0x3 -#define BXT_REVID_B_LAST 0x8 -#define BXT_REVID_C0 0x9 - -#define IS_BXT_REVID(dev_priv, since, until) \ - (IS_BROXTON(dev_priv) && IS_REVID(dev_priv, since, until)) +#define IS_SKL_GT_STEP(p, since, until) (IS_SKYLAKE(p) && IS_GT_STEP(p, since, until)) #define IS_KBL_GT_STEP(dev_priv, since, until) \ (IS_KABYLAKE(dev_priv) && IS_GT_STEP(dev_priv, since, until)) #define IS_KBL_DISPLAY_STEP(dev_priv, since, until) \ (IS_KABYLAKE(dev_priv) && IS_DISPLAY_STEP(dev_priv, since, until)) -#define GLK_REVID_A0 0x0 -#define GLK_REVID_A1 0x1 -#define GLK_REVID_A2 0x2 -#define GLK_REVID_B0 0x3 - -#define IS_GLK_REVID(dev_priv, since, until) \ - (IS_GEMINILAKE(dev_priv) && IS_REVID(dev_priv, since, until)) - -#define CNL_REVID_A0 0x0 -#define CNL_REVID_B0 0x1 -#define CNL_REVID_C0 0x2 - -#define IS_CNL_REVID(p, since, until) \ - (IS_CANNONLAKE(p) && IS_REVID(p, since, until)) - -#define ICL_REVID_A0 0x0 -#define ICL_REVID_A2 0x1 -#define ICL_REVID_B0 0x3 -#define ICL_REVID_B2 0x4 -#define ICL_REVID_C0 0x5 - -#define IS_ICL_REVID(p, since, until) \ - (IS_ICELAKE(p) && IS_REVID(p, since, until)) - -#define EHL_REVID_A0 0x0 -#define EHL_REVID_B0 0x1 - -#define IS_JSL_EHL_REVID(p, since, until) \ - (IS_JSL_EHL(p) && IS_REVID(p, since, until)) +#define IS_JSL_EHL_GT_STEP(p, since, until) \ + (IS_JSL_EHL(p) && IS_GT_STEP(p, since, until)) +#define IS_JSL_EHL_DISPLAY_STEP(p, since, until) \ + (IS_JSL_EHL(p) && IS_DISPLAY_STEP(p, since, until)) #define IS_TGL_DISPLAY_STEP(__i915, since, until) \ (IS_TIGERLAKE(__i915) && \ @@ -1523,18 +1532,13 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, (IS_TIGERLAKE(__i915) && !(IS_TGL_U(__i915) || IS_TGL_Y(__i915)) && \ IS_GT_STEP(__i915, since, until)) -#define RKL_REVID_A0 0x0 -#define RKL_REVID_B0 0x1 -#define RKL_REVID_C0 0x4 +#define IS_RKL_DISPLAY_STEP(p, since, until) \ + (IS_ROCKETLAKE(p) && IS_DISPLAY_STEP(p, since, until)) -#define IS_RKL_REVID(p, since, until) \ - (IS_ROCKETLAKE(p) && IS_REVID(p, since, until)) - -#define DG1_REVID_A0 0x0 -#define DG1_REVID_B0 0x1 - -#define IS_DG1_REVID(p, since, until) \ - (IS_DG1(p) && IS_REVID(p, since, until)) +#define IS_DG1_GT_STEP(p, since, until) \ + (IS_DG1(p) && IS_GT_STEP(p, since, until)) +#define IS_DG1_DISPLAY_STEP(p, since, until) \ + (IS_DG1(p) && IS_DISPLAY_STEP(p, since, until)) #define IS_ADLS_DISPLAY_STEP(__i915, since, until) \ (IS_ALDERLAKE_S(__i915) && \ @@ -1552,6 +1556,31 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, (IS_ALDERLAKE_P(__i915) && \ IS_GT_STEP(__i915, since, until)) +#define IS_XEHPSDV_GT_STEP(__i915, since, until) \ + (IS_XEHPSDV(__i915) && IS_GT_STEP(__i915, since, until)) + +/* + * DG2 hardware steppings are a bit unusual. The hardware design was forked + * to create two variants (G10 and G11) which have distinct workaround sets. + * The G11 fork of the DG2 design resets the GT stepping back to "A0" for its + * first iteration, even though it's more similar to a G10 B0 stepping in terms + * of functionality and workarounds. However the display stepping does not + * reset in the same manner --- a specific stepping like "B0" has a consistent + * meaning regardless of whether it belongs to a G10 or G11 DG2. + * + * TLDR: All GT workarounds and stepping-specific logic must be applied in + * relation to a specific subplatform (G10 or G11), whereas display workarounds + * and stepping-specific logic will be applied with a general DG2-wide stepping + * number. + */ +#define IS_DG2_GT_STEP(__i915, variant, since, until) \ + (IS_SUBPLATFORM(__i915, INTEL_DG2, INTEL_SUBPLATFORM_##variant) && \ + IS_GT_STEP(__i915, since, until)) + +#define IS_DG2_DISP_STEP(__i915, since, until) \ + (IS_DG2(__i915) && \ + IS_DISPLAY_STEP(__i915, since, until)) + #define IS_LP(dev_priv) (INTEL_INFO(dev_priv)->is_lp) #define IS_GEN9_LP(dev_priv) (GRAPHICS_VER(dev_priv) == 9 && IS_LP(dev_priv)) #define IS_GEN9_BC(dev_priv) (GRAPHICS_VER(dev_priv) == 9 && !IS_LP(dev_priv)) @@ -1589,8 +1618,6 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_LOGICAL_RING_ELSQ(dev_priv) \ (INTEL_INFO(dev_priv)->has_logical_ring_elsq) -#define HAS_MASTER_UNIT_IRQ(dev_priv) (INTEL_INFO(dev_priv)->has_master_unit_irq) - #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv) #define INTEL_PPGTT(dev_priv) (INTEL_INFO(dev_priv)->ppgtt_type) @@ -1616,12 +1643,10 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, /* WaRsDisableCoarsePowerGating:skl,cnl */ #define NEEDS_WaRsDisableCoarsePowerGating(dev_priv) \ - (IS_CANNONLAKE(dev_priv) || \ - IS_SKL_GT3(dev_priv) || \ - IS_SKL_GT4(dev_priv)) + (IS_SKL_GT3(dev_priv) || IS_SKL_GT4(dev_priv)) #define HAS_GMBUS_IRQ(dev_priv) (GRAPHICS_VER(dev_priv) >= 4) -#define HAS_GMBUS_BURST_READ(dev_priv) (GRAPHICS_VER(dev_priv) >= 10 || \ +#define HAS_GMBUS_BURST_READ(dev_priv) (GRAPHICS_VER(dev_priv) >= 11 || \ IS_GEMINILAKE(dev_priv) || \ IS_KABYLAKE(dev_priv)) @@ -1641,6 +1666,7 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_DP_MST(dev_priv) (INTEL_INFO(dev_priv)->display.has_dp_mst) +#define HAS_CDCLK_CRAWL(dev_priv) (INTEL_INFO(dev_priv)->display.has_cdclk_crawl) #define HAS_DDI(dev_priv) (INTEL_INFO(dev_priv)->display.has_ddi) #define HAS_FPGA_DBG_UNCLAIMED(dev_priv) (INTEL_INFO(dev_priv)->display.has_fpga_dbg) #define HAS_PSR(dev_priv) (INTEL_INFO(dev_priv)->display.has_psr) @@ -1662,6 +1688,9 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_RUNTIME_PM(dev_priv) (INTEL_INFO(dev_priv)->has_runtime_pm) #define HAS_64BIT_RELOC(dev_priv) (INTEL_INFO(dev_priv)->has_64bit_reloc) +#define HAS_MSLICES(dev_priv) \ + (INTEL_INFO(dev_priv)->has_mslices) + #define HAS_IPC(dev_priv) (INTEL_INFO(dev_priv)->display.has_ipc) #define HAS_REGION(i915, i) (INTEL_INFO(i915)->memory_regions & (i)) @@ -1751,9 +1780,6 @@ void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv); void i915_gem_init_early(struct drm_i915_private *dev_priv); void i915_gem_cleanup_early(struct drm_i915_private *dev_priv); -struct intel_memory_region *i915_gem_shmem_setup(struct drm_i915_private *i915, - u16 type, u16 instance); - static inline void i915_gem_drain_freed_objects(struct drm_i915_private *i915) { /* @@ -1850,24 +1876,18 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, struct dma_buf *i915_gem_prime_export(struct drm_gem_object *gem_obj, int flags); -static inline struct i915_gem_context * -__i915_gem_context_lookup_rcu(struct drm_i915_file_private *file_priv, u32 id) -{ - return xa_load(&file_priv->context_xa, id); -} - -static inline struct i915_gem_context * -i915_gem_context_lookup(struct drm_i915_file_private *file_priv, u32 id) +static inline struct i915_address_space * +i915_gem_vm_lookup(struct drm_i915_file_private *file_priv, u32 id) { - struct i915_gem_context *ctx; + struct i915_address_space *vm; rcu_read_lock(); - ctx = __i915_gem_context_lookup_rcu(file_priv, id); - if (ctx && !kref_get_unless_zero(&ctx->ref)) - ctx = NULL; + vm = xa_load(&file_priv->vm_xa, id); + if (vm && !kref_get_unless_zero(&vm->ref)) + vm = NULL; rcu_read_unlock(); - return ctx; + return vm; } /* i915_gem_evict.c */ @@ -1934,8 +1954,8 @@ int remap_io_sg(struct vm_area_struct *vma, static inline int intel_hws_csb_write_index(struct drm_i915_private *i915) { - if (GRAPHICS_VER(i915) >= 10) - return CNL_HWS_CSB_WRITE_INDEX; + if (GRAPHICS_VER(i915) >= 11) + return ICL_HWS_CSB_WRITE_INDEX; else return I915_HWS_CSB_WRITE_INDEX; } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 589388dec48a..590efc8b0265 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -469,12 +469,6 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, if (ret != -ENODEV) goto out; - ret = -ENODEV; - if (obj->ops->pread) - ret = obj->ops->pread(obj, args); - if (ret != -ENODEV) - goto out; - ret = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); @@ -1005,8 +999,11 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, } } - if (obj->mm.madv != __I915_MADV_PURGED) + if (obj->mm.madv != __I915_MADV_PURGED) { obj->mm.madv = args->madv; + if (obj->ops->adjust_lru) + obj->ops->adjust_lru(obj); + } if (i915_gem_object_has_pages(obj)) { unsigned long flags; @@ -1204,58 +1201,6 @@ int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) return ret; } -void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ww, bool intr) -{ - ww_acquire_init(&ww->ctx, &reservation_ww_class); - INIT_LIST_HEAD(&ww->obj_list); - ww->intr = intr; - ww->contended = NULL; -} - -static void i915_gem_ww_ctx_unlock_all(struct i915_gem_ww_ctx *ww) -{ - struct drm_i915_gem_object *obj; - - while ((obj = list_first_entry_or_null(&ww->obj_list, struct drm_i915_gem_object, obj_link))) { - list_del(&obj->obj_link); - i915_gem_object_unlock(obj); - } -} - -void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj) -{ - list_del(&obj->obj_link); - i915_gem_object_unlock(obj); -} - -void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ww) -{ - i915_gem_ww_ctx_unlock_all(ww); - WARN_ON(ww->contended); - ww_acquire_fini(&ww->ctx); -} - -int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ww) -{ - int ret = 0; - - if (WARN_ON(!ww->contended)) - return -EINVAL; - - i915_gem_ww_ctx_unlock_all(ww); - if (ww->intr) - ret = dma_resv_lock_slow_interruptible(ww->contended->base.resv, &ww->ctx); - else - dma_resv_lock_slow(ww->contended->base.resv, &ww->ctx); - - if (!ret) - list_add_tail(&ww->contended->obj_link, &ww->obj_list); - - ww->contended = NULL; - - return ret; -} - #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_gem_device.c" #include "selftests/i915_gem.c" diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index 440c35f1abc9..d0752e5553db 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -123,16 +123,4 @@ static inline bool __tasklet_is_scheduled(struct tasklet_struct *t) return test_bit(TASKLET_STATE_SCHED, &t->state); } -struct i915_gem_ww_ctx { - struct ww_acquire_ctx ctx; - struct list_head obj_list; - bool intr; - struct drm_i915_gem_object *contended; -}; - -void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ctx, bool intr); -void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ctx); -int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ctx); -void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj); - #endif /* __I915_GEM_H__ */ diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 4d2d59a9942b..2b73ddb11c66 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -27,6 +27,7 @@ */ #include "gem/i915_gem_context.h" +#include "gt/intel_gt.h" #include "gt/intel_gt_requests.h" #include "i915_drv.h" diff --git a/drivers/gpu/drm/i915/i915_gem_ww.c b/drivers/gpu/drm/i915/i915_gem_ww.c new file mode 100644 index 000000000000..3f6ff139478e --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_ww.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ +#include <linux/dma-resv.h> +#include "i915_gem_ww.h" +#include "gem/i915_gem_object.h" + +void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ww, bool intr) +{ + ww_acquire_init(&ww->ctx, &reservation_ww_class); + INIT_LIST_HEAD(&ww->obj_list); + ww->intr = intr; + ww->contended = NULL; +} + +static void i915_gem_ww_ctx_unlock_all(struct i915_gem_ww_ctx *ww) +{ + struct drm_i915_gem_object *obj; + + while ((obj = list_first_entry_or_null(&ww->obj_list, struct drm_i915_gem_object, obj_link))) { + list_del(&obj->obj_link); + i915_gem_object_unlock(obj); + i915_gem_object_put(obj); + } +} + +void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj) +{ + list_del(&obj->obj_link); + i915_gem_object_unlock(obj); + i915_gem_object_put(obj); +} + +void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ww) +{ + i915_gem_ww_ctx_unlock_all(ww); + WARN_ON(ww->contended); + ww_acquire_fini(&ww->ctx); +} + +int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ww) +{ + int ret = 0; + + if (WARN_ON(!ww->contended)) + return -EINVAL; + + i915_gem_ww_ctx_unlock_all(ww); + if (ww->intr) + ret = dma_resv_lock_slow_interruptible(ww->contended->base.resv, &ww->ctx); + else + dma_resv_lock_slow(ww->contended->base.resv, &ww->ctx); + + if (!ret) + list_add_tail(&ww->contended->obj_link, &ww->obj_list); + else + i915_gem_object_put(ww->contended); + + ww->contended = NULL; + + return ret; +} diff --git a/drivers/gpu/drm/i915/i915_gem_ww.h b/drivers/gpu/drm/i915/i915_gem_ww.h new file mode 100644 index 000000000000..f6b1a796667b --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_ww.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2020 Intel Corporation + */ +#ifndef __I915_GEM_WW_H__ +#define __I915_GEM_WW_H__ + +#include <drm/drm_drv.h> + +struct i915_gem_ww_ctx { + struct ww_acquire_ctx ctx; + struct list_head obj_list; + struct drm_i915_gem_object *contended; + unsigned short intr; + unsigned short loop; +}; + +void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ctx, bool intr); +void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ctx); +int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ctx); +void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj); + +/* Internal functions used by the inlines! Don't use. */ +static inline int __i915_gem_ww_fini(struct i915_gem_ww_ctx *ww, int err) +{ + ww->loop = 0; + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(ww); + if (!err) + ww->loop = 1; + } + + if (!ww->loop) + i915_gem_ww_ctx_fini(ww); + + return err; +} + +static inline void +__i915_gem_ww_init(struct i915_gem_ww_ctx *ww, bool intr) +{ + i915_gem_ww_ctx_init(ww, intr); + ww->loop = 1; +} + +#define for_i915_gem_ww(_ww, _err, _intr) \ + for (__i915_gem_ww_init(_ww, _intr); (_ww)->loop; \ + _err = __i915_gem_ww_fini(_ww, _err)) + +#endif diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c index 24e18219eb50..77490cb5ff9c 100644 --- a/drivers/gpu/drm/i915/i915_getparam.c +++ b/drivers/gpu/drm/i915/i915_getparam.c @@ -15,7 +15,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, struct pci_dev *pdev = to_pci_dev(dev->dev); const struct sseu_dev_info *sseu = &i915->gt.info.sseu; drm_i915_getparam_t *param = data; - int value; + int value = 0; switch (param->param) { case I915_PARAM_IRQ_ACTIVE: @@ -134,6 +134,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, case I915_PARAM_HAS_EXEC_FENCE_ARRAY: case I915_PARAM_HAS_EXEC_SUBMIT_FENCE: case I915_PARAM_HAS_EXEC_TIMELINE_FENCES: + case I915_PARAM_HAS_USERPTR_PROBE: /* For the time being all of these are always true; * if some supported hardware does not have one of these * features this value needs to be provided from @@ -150,7 +151,9 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, return -ENODEV; break; case I915_PARAM_SUBSLICE_MASK: - value = sseu->subslice_mask[0]; + /* Only copy bits from the first slice */ + memcpy(&value, sseu->subslice_mask, + min(sseu->ss_stride, (u8)sizeof(value))); if (!value) return -ENODEV; break; diff --git a/drivers/gpu/drm/i915/i915_globals.c b/drivers/gpu/drm/i915/i915_globals.c deleted file mode 100644 index 3acb0b6be284..000000000000 --- a/drivers/gpu/drm/i915/i915_globals.c +++ /dev/null @@ -1,160 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2019 Intel Corporation - */ - -#include <linux/slab.h> -#include <linux/workqueue.h> - -#include "i915_active.h" -#include "gem/i915_gem_context.h" -#include "gem/i915_gem_object.h" -#include "i915_globals.h" -#include "i915_request.h" -#include "i915_scheduler.h" -#include "i915_vma.h" - -static LIST_HEAD(globals); - -static atomic_t active; -static atomic_t epoch; -static struct park_work { - struct delayed_work work; - struct rcu_head rcu; - unsigned long flags; -#define PENDING 0 - int epoch; -} park; - -static void i915_globals_shrink(void) -{ - struct i915_global *global; - - /* - * kmem_cache_shrink() discards empty slabs and reorders partially - * filled slabs to prioritise allocating from the mostly full slabs, - * with the aim of reducing fragmentation. - */ - list_for_each_entry(global, &globals, link) - global->shrink(); -} - -static void __i915_globals_grace(struct rcu_head *rcu) -{ - /* Ratelimit parking as shrinking is quite slow */ - schedule_delayed_work(&park.work, round_jiffies_up_relative(2 * HZ)); -} - -static void __i915_globals_queue_rcu(void) -{ - park.epoch = atomic_inc_return(&epoch); - if (!atomic_read(&active)) { - init_rcu_head(&park.rcu); - call_rcu(&park.rcu, __i915_globals_grace); - } -} - -static void __i915_globals_park(struct work_struct *work) -{ - destroy_rcu_head(&park.rcu); - - /* Confirm nothing woke up in the last grace period */ - if (park.epoch != atomic_read(&epoch)) { - __i915_globals_queue_rcu(); - return; - } - - clear_bit(PENDING, &park.flags); - i915_globals_shrink(); -} - -void __init i915_global_register(struct i915_global *global) -{ - GEM_BUG_ON(!global->shrink); - GEM_BUG_ON(!global->exit); - - list_add_tail(&global->link, &globals); -} - -static void __i915_globals_cleanup(void) -{ - struct i915_global *global, *next; - - list_for_each_entry_safe_reverse(global, next, &globals, link) - global->exit(); -} - -static __initconst int (* const initfn[])(void) = { - i915_global_active_init, - i915_global_context_init, - i915_global_gem_context_init, - i915_global_objects_init, - i915_global_request_init, - i915_global_scheduler_init, - i915_global_vma_init, -}; - -int __init i915_globals_init(void) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(initfn); i++) { - int err; - - err = initfn[i](); - if (err) { - __i915_globals_cleanup(); - return err; - } - } - - INIT_DELAYED_WORK(&park.work, __i915_globals_park); - return 0; -} - -void i915_globals_park(void) -{ - /* - * Defer shrinking the global slab caches (and other work) until - * after a RCU grace period has completed with no activity. This - * is to try and reduce the latency impact on the consumers caused - * by us shrinking the caches the same time as they are trying to - * allocate, with the assumption being that if we idle long enough - * for an RCU grace period to elapse since the last use, it is likely - * to be longer until we need the caches again. - */ - if (!atomic_dec_and_test(&active)) - return; - - /* Queue cleanup after the next RCU grace period has freed slabs */ - if (!test_and_set_bit(PENDING, &park.flags)) - __i915_globals_queue_rcu(); -} - -void i915_globals_unpark(void) -{ - atomic_inc(&epoch); - atomic_inc(&active); -} - -static void __i915_globals_flush(void) -{ - atomic_inc(&active); /* skip shrinking */ - - rcu_barrier(); /* wait for the work to be queued */ - flush_delayed_work(&park.work); - - atomic_dec(&active); -} - -void i915_globals_exit(void) -{ - GEM_BUG_ON(atomic_read(&active)); - - __i915_globals_flush(); - __i915_globals_cleanup(); - - /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */ - rcu_barrier(); -} diff --git a/drivers/gpu/drm/i915/i915_globals.h b/drivers/gpu/drm/i915/i915_globals.h deleted file mode 100644 index 2d199f411a4a..000000000000 --- a/drivers/gpu/drm/i915/i915_globals.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2019 Intel Corporation - */ - -#ifndef _I915_GLOBALS_H_ -#define _I915_GLOBALS_H_ - -#include <linux/types.h> - -typedef void (*i915_global_func_t)(void); - -struct i915_global { - struct list_head link; - - i915_global_func_t shrink; - i915_global_func_t exit; -}; - -void i915_global_register(struct i915_global *global); - -int i915_globals_init(void); -void i915_globals_park(void); -void i915_globals_unpark(void); -void i915_globals_exit(void); - -/* constructors */ -int i915_global_active_init(void); -int i915_global_context_init(void); -int i915_global_gem_context_init(void); -int i915_global_objects_init(void); -int i915_global_request_init(void); -int i915_global_scheduler_init(void); -int i915_global_vma_init(void); - -#endif /* _I915_GLOBALS_H_ */ diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 966664610c8c..9cf6ac575de1 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1048,7 +1048,7 @@ i915_vma_coredump_create(const struct intel_gt *gt, if (ret) break; } - } else if (i915_gem_object_is_lmem(vma->obj)) { + } else if (__i915_gem_object_is_lmem(vma->obj)) { struct intel_memory_region *mem = vma->obj->mm.region; dma_addr_t dma; @@ -1438,20 +1438,37 @@ capture_engine(struct intel_engine_cs *engine, { struct intel_engine_capture_vma *capture = NULL; struct intel_engine_coredump *ee; - struct i915_request *rq; + struct intel_context *ce; + struct i915_request *rq = NULL; unsigned long flags; ee = intel_engine_coredump_alloc(engine, GFP_KERNEL); if (!ee) return NULL; - spin_lock_irqsave(&engine->active.lock, flags); - rq = intel_engine_find_active_request(engine); + ce = intel_engine_get_hung_context(engine); + if (ce) { + intel_engine_clear_hung_context(engine); + rq = intel_context_find_active_request(ce); + if (!rq || !i915_request_started(rq)) + goto no_request_capture; + } else { + /* + * Getting here with GuC enabled means it is a forced error capture + * with no actual hang. So, no need to attempt the execlist search. + */ + if (!intel_uc_uses_guc_submission(&engine->gt->uc)) { + spin_lock_irqsave(&engine->sched_engine->lock, flags); + rq = intel_engine_execlist_find_hung_request(engine); + spin_unlock_irqrestore(&engine->sched_engine->lock, + flags); + } + } if (rq) capture = intel_engine_coredump_add_request(ee, rq, ATOMIC_MAYFAIL); - spin_unlock_irqrestore(&engine->active.lock, flags); if (!capture) { +no_request_capture: kfree(ee); return NULL; } diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index c3816f5c6900..9bc4f4a8e12e 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -207,7 +207,7 @@ static void intel_hpd_init_pins(struct drm_i915_private *dev_priv) (!HAS_PCH_SPLIT(dev_priv) || HAS_PCH_NOP(dev_priv))) return; - if (HAS_PCH_DG1(dev_priv)) + if (INTEL_PCH_TYPE(dev_priv) >= PCH_DG1) hpd->pch_hpd = hpd_sde_dg1; else if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) hpd->pch_hpd = hpd_icp; @@ -2297,11 +2297,10 @@ static u32 gen8_de_port_aux_mask(struct drm_i915_private *dev_priv) GEN9_AUX_CHANNEL_C | GEN9_AUX_CHANNEL_D; - if (IS_CNL_WITH_PORT_F(dev_priv) || DISPLAY_VER(dev_priv) == 11) - mask |= CNL_AUX_CHANNEL_F; - - if (DISPLAY_VER(dev_priv) == 11) + if (DISPLAY_VER(dev_priv) == 11) { + mask |= ICL_AUX_CHANNEL_F; mask |= ICL_AUX_CHANNEL_E; + } return mask; } @@ -2698,11 +2697,9 @@ gen11_display_irq_handler(struct drm_i915_private *i915) enable_rpm_wakeref_asserts(&i915->runtime_pm); } -static __always_inline irqreturn_t -__gen11_irq_handler(struct drm_i915_private * const i915, - u32 (*intr_disable)(void __iomem * const regs), - void (*intr_enable)(void __iomem * const regs)) +static irqreturn_t gen11_irq_handler(int irq, void *arg) { + struct drm_i915_private *i915 = arg; void __iomem * const regs = i915->uncore.regs; struct intel_gt *gt = &i915->gt; u32 master_ctl; @@ -2711,9 +2708,9 @@ __gen11_irq_handler(struct drm_i915_private * const i915, if (!intel_irqs_enabled(i915)) return IRQ_NONE; - master_ctl = intr_disable(regs); + master_ctl = gen11_master_intr_disable(regs); if (!master_ctl) { - intr_enable(regs); + gen11_master_intr_enable(regs); return IRQ_NONE; } @@ -2726,7 +2723,7 @@ __gen11_irq_handler(struct drm_i915_private * const i915, gu_misc_iir = gen11_gu_misc_irq_ack(gt, master_ctl); - intr_enable(regs); + gen11_master_intr_enable(regs); gen11_gu_misc_irq_handler(gt, gu_misc_iir); @@ -2735,51 +2732,69 @@ __gen11_irq_handler(struct drm_i915_private * const i915, return IRQ_HANDLED; } -static irqreturn_t gen11_irq_handler(int irq, void *arg) -{ - return __gen11_irq_handler(arg, - gen11_master_intr_disable, - gen11_master_intr_enable); -} - -static u32 dg1_master_intr_disable_and_ack(void __iomem * const regs) +static inline u32 dg1_master_intr_disable(void __iomem * const regs) { u32 val; /* First disable interrupts */ - raw_reg_write(regs, DG1_MSTR_UNIT_INTR, 0); + raw_reg_write(regs, DG1_MSTR_TILE_INTR, 0); /* Get the indication levels and ack the master unit */ - val = raw_reg_read(regs, DG1_MSTR_UNIT_INTR); + val = raw_reg_read(regs, DG1_MSTR_TILE_INTR); if (unlikely(!val)) return 0; - raw_reg_write(regs, DG1_MSTR_UNIT_INTR, val); - - /* - * Now with master disabled, get a sample of level indications - * for this interrupt and ack them right away - we keep GEN11_MASTER_IRQ - * out as this bit doesn't exist anymore for DG1 - */ - val = raw_reg_read(regs, GEN11_GFX_MSTR_IRQ) & ~GEN11_MASTER_IRQ; - if (unlikely(!val)) - return 0; - - raw_reg_write(regs, GEN11_GFX_MSTR_IRQ, val); + raw_reg_write(regs, DG1_MSTR_TILE_INTR, val); return val; } static inline void dg1_master_intr_enable(void __iomem * const regs) { - raw_reg_write(regs, DG1_MSTR_UNIT_INTR, DG1_MSTR_IRQ); + raw_reg_write(regs, DG1_MSTR_TILE_INTR, DG1_MSTR_IRQ); } static irqreturn_t dg1_irq_handler(int irq, void *arg) { - return __gen11_irq_handler(arg, - dg1_master_intr_disable_and_ack, - dg1_master_intr_enable); + struct drm_i915_private * const i915 = arg; + struct intel_gt *gt = &i915->gt; + void __iomem * const regs = i915->uncore.regs; + u32 master_tile_ctl, master_ctl; + u32 gu_misc_iir; + + if (!intel_irqs_enabled(i915)) + return IRQ_NONE; + + master_tile_ctl = dg1_master_intr_disable(regs); + if (!master_tile_ctl) { + dg1_master_intr_enable(regs); + return IRQ_NONE; + } + + /* FIXME: we only support tile 0 for now. */ + if (master_tile_ctl & DG1_MSTR_TILE(0)) { + master_ctl = raw_reg_read(regs, GEN11_GFX_MSTR_IRQ); + raw_reg_write(regs, GEN11_GFX_MSTR_IRQ, master_ctl); + } else { + DRM_ERROR("Tile not supported: 0x%08x\n", master_tile_ctl); + dg1_master_intr_enable(regs); + return IRQ_NONE; + } + + gen11_gt_irq_handler(gt, master_ctl); + + if (master_ctl & GEN11_DISPLAY_IRQ) + gen11_display_irq_handler(i915); + + gu_misc_iir = gen11_gu_misc_irq_ack(gt, master_ctl); + + dg1_master_intr_enable(regs); + + gen11_gu_misc_irq_handler(gt, gu_misc_iir); + + pmu_irq_stats(i915, IRQ_HANDLED); + + return IRQ_HANDLED; } /* Called from drm generic code, passed 'crtc' which @@ -2880,14 +2895,14 @@ static bool gen11_dsi_configure_te(struct intel_crtc *intel_crtc, return true; } -int bdw_enable_vblank(struct drm_crtc *crtc) +int bdw_enable_vblank(struct drm_crtc *_crtc) { - struct drm_i915_private *dev_priv = to_i915(crtc->dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - enum pipe pipe = intel_crtc->pipe; + struct intel_crtc *crtc = to_intel_crtc(_crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; unsigned long irqflags; - if (gen11_dsi_configure_te(intel_crtc, true)) + if (gen11_dsi_configure_te(crtc, true)) return 0; spin_lock_irqsave(&dev_priv->irq_lock, irqflags); @@ -2898,7 +2913,7 @@ int bdw_enable_vblank(struct drm_crtc *crtc) * PSR is active as no frames are generated, so check only for PSR. */ if (HAS_PSR(dev_priv)) - drm_crtc_vblank_restore(crtc); + drm_crtc_vblank_restore(&crtc->base); return 0; } @@ -2952,14 +2967,14 @@ void ilk_disable_vblank(struct drm_crtc *crtc) spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); } -void bdw_disable_vblank(struct drm_crtc *crtc) +void bdw_disable_vblank(struct drm_crtc *_crtc) { - struct drm_i915_private *dev_priv = to_i915(crtc->dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - enum pipe pipe = intel_crtc->pipe; + struct intel_crtc *crtc = to_intel_crtc(_crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; unsigned long irqflags; - if (gen11_dsi_configure_te(intel_crtc, false)) + if (gen11_dsi_configure_te(crtc, false)) return; spin_lock_irqsave(&dev_priv->irq_lock, irqflags); @@ -3146,10 +3161,20 @@ static void gen11_irq_reset(struct drm_i915_private *dev_priv) { struct intel_uncore *uncore = &dev_priv->uncore; - if (HAS_MASTER_UNIT_IRQ(dev_priv)) - dg1_master_intr_disable_and_ack(dev_priv->uncore.regs); - else - gen11_master_intr_disable(dev_priv->uncore.regs); + gen11_master_intr_disable(dev_priv->uncore.regs); + + gen11_gt_irq_reset(&dev_priv->gt); + gen11_display_irq_reset(dev_priv); + + GEN3_IRQ_RESET(uncore, GEN11_GU_MISC_); + GEN3_IRQ_RESET(uncore, GEN8_PCU_); +} + +static void dg1_irq_reset(struct drm_i915_private *dev_priv) +{ + struct intel_uncore *uncore = &dev_priv->uncore; + + dg1_master_intr_disable(dev_priv->uncore.regs); gen11_gt_irq_reset(&dev_priv->gt); gen11_display_irq_reset(dev_priv); @@ -3841,13 +3866,28 @@ static void gen11_irq_postinstall(struct drm_i915_private *dev_priv) GEN3_IRQ_INIT(uncore, GEN11_GU_MISC_, ~gu_misc_masked, gu_misc_masked); - if (HAS_MASTER_UNIT_IRQ(dev_priv)) { - dg1_master_intr_enable(uncore->regs); - intel_uncore_posting_read(&dev_priv->uncore, DG1_MSTR_UNIT_INTR); - } else { - gen11_master_intr_enable(uncore->regs); - intel_uncore_posting_read(&dev_priv->uncore, GEN11_GFX_MSTR_IRQ); + gen11_master_intr_enable(uncore->regs); + intel_uncore_posting_read(&dev_priv->uncore, GEN11_GFX_MSTR_IRQ); +} + +static void dg1_irq_postinstall(struct drm_i915_private *dev_priv) +{ + struct intel_uncore *uncore = &dev_priv->uncore; + u32 gu_misc_masked = GEN11_GU_MISC_GSE; + + gen11_gt_irq_postinstall(&dev_priv->gt); + + GEN3_IRQ_INIT(uncore, GEN11_GU_MISC_, ~gu_misc_masked, gu_misc_masked); + + if (HAS_DISPLAY(dev_priv)) { + icp_irq_postinstall(dev_priv); + gen8_de_irq_postinstall(dev_priv); + intel_uncore_write(&dev_priv->uncore, GEN11_DISPLAY_INT_CTL, + GEN11_DISPLAY_IRQ_ENABLE); } + + dg1_master_intr_enable(dev_priv->uncore.regs); + intel_uncore_posting_read(&dev_priv->uncore, DG1_MSTR_TILE_INTR); } static void cherryview_irq_postinstall(struct drm_i915_private *dev_priv) @@ -4386,9 +4426,9 @@ static irq_handler_t intel_irq_handler(struct drm_i915_private *dev_priv) else return i8xx_irq_handler; } else { - if (HAS_MASTER_UNIT_IRQ(dev_priv)) + if (GRAPHICS_VER_FULL(dev_priv) >= IP_VER(12, 10)) return dg1_irq_handler; - if (GRAPHICS_VER(dev_priv) >= 11) + else if (GRAPHICS_VER(dev_priv) >= 11) return gen11_irq_handler; else if (GRAPHICS_VER(dev_priv) >= 8) return gen8_irq_handler; @@ -4411,7 +4451,9 @@ static void intel_irq_reset(struct drm_i915_private *dev_priv) else i8xx_irq_reset(dev_priv); } else { - if (GRAPHICS_VER(dev_priv) >= 11) + if (GRAPHICS_VER_FULL(dev_priv) >= IP_VER(12, 10)) + dg1_irq_reset(dev_priv); + else if (GRAPHICS_VER(dev_priv) >= 11) gen11_irq_reset(dev_priv); else if (GRAPHICS_VER(dev_priv) >= 8) gen8_irq_reset(dev_priv); @@ -4434,7 +4476,9 @@ static void intel_irq_postinstall(struct drm_i915_private *dev_priv) else i8xx_irq_postinstall(dev_priv); } else { - if (GRAPHICS_VER(dev_priv) >= 11) + if (GRAPHICS_VER_FULL(dev_priv) >= IP_VER(12, 10)) + dg1_irq_postinstall(dev_priv); + else if (GRAPHICS_VER(dev_priv) >= 11) gen11_irq_postinstall(dev_priv); else if (GRAPHICS_VER(dev_priv) >= 8) gen8_irq_postinstall(dev_priv); @@ -4466,14 +4510,14 @@ int intel_irq_install(struct drm_i915_private *dev_priv) */ dev_priv->runtime_pm.irqs_enabled = true; - dev_priv->drm.irq_enabled = true; + dev_priv->irq_enabled = true; intel_irq_reset(dev_priv); ret = request_irq(irq, intel_irq_handler(dev_priv), IRQF_SHARED, DRIVER_NAME, dev_priv); if (ret < 0) { - dev_priv->drm.irq_enabled = false; + dev_priv->irq_enabled = false; return ret; } @@ -4499,10 +4543,10 @@ void intel_irq_uninstall(struct drm_i915_private *dev_priv) * intel_modeset_driver_remove() calling us out of sequence. * Would be nice if it didn't do that... */ - if (!dev_priv->drm.irq_enabled) + if (!dev_priv->irq_enabled) return; - dev_priv->drm.irq_enabled = false; + dev_priv->irq_enabled = false; intel_irq_reset(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_module.c b/drivers/gpu/drm/i915/i915_module.c new file mode 100644 index 000000000000..d8b4482c69d0 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_module.c @@ -0,0 +1,124 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2021 Intel Corporation + */ + +#include <linux/console.h> + +#include "gem/i915_gem_context.h" +#include "gem/i915_gem_object.h" +#include "i915_active.h" +#include "i915_buddy.h" +#include "i915_params.h" +#include "i915_pci.h" +#include "i915_perf.h" +#include "i915_request.h" +#include "i915_scheduler.h" +#include "i915_selftest.h" +#include "i915_vma.h" + +static int i915_check_nomodeset(void) +{ + bool use_kms = true; + + /* + * Enable KMS by default, unless explicitly overriden by + * either the i915.modeset prarameter or by the + * vga_text_mode_force boot option. + */ + + if (i915_modparams.modeset == 0) + use_kms = false; + + if (vgacon_text_force() && i915_modparams.modeset == -1) + use_kms = false; + + if (!use_kms) { + /* Silently fail loading to not upset userspace. */ + DRM_DEBUG_DRIVER("KMS disabled.\n"); + return 1; + } + + return 0; +} + +static const struct { + int (*init)(void); + void (*exit)(void); +} init_funcs[] = { + { .init = i915_check_nomodeset }, + { .init = i915_active_module_init, + .exit = i915_active_module_exit }, + { .init = i915_buddy_module_init, + .exit = i915_buddy_module_exit }, + { .init = i915_context_module_init, + .exit = i915_context_module_exit }, + { .init = i915_gem_context_module_init, + .exit = i915_gem_context_module_exit }, + { .init = i915_objects_module_init, + .exit = i915_objects_module_exit }, + { .init = i915_request_module_init, + .exit = i915_request_module_exit }, + { .init = i915_scheduler_module_init, + .exit = i915_scheduler_module_exit }, + { .init = i915_vma_module_init, + .exit = i915_vma_module_exit }, + { .init = i915_mock_selftests }, + { .init = i915_pmu_init, + .exit = i915_pmu_exit }, + { .init = i915_register_pci_driver, + .exit = i915_unregister_pci_driver }, + { .init = i915_perf_sysctl_register, + .exit = i915_perf_sysctl_unregister }, +}; +static int init_progress; + +static int __init i915_init(void) +{ + int err, i; + + for (i = 0; i < ARRAY_SIZE(init_funcs); i++) { + err = init_funcs[i].init(); + if (err < 0) { + while (i--) { + if (init_funcs[i].exit) + init_funcs[i].exit(); + } + return err; + } else if (err > 0) { + /* + * Early-exit success is reserved for things which + * don't have an exit() function because we have no + * idea how far they got or how to partially tear + * them down. + */ + WARN_ON(init_funcs[i].exit); + break; + } + } + + init_progress = i; + + return 0; +} + +static void __exit i915_exit(void) +{ + int i; + + for (i = init_progress - 1; i >= 0; i--) { + GEM_BUG_ON(i >= ARRAY_SIZE(init_funcs)); + if (init_funcs[i].exit) + init_funcs[i].exit(); + } +} + +module_init(i915_init); +module_exit(i915_exit); + +MODULE_AUTHOR("Tungsten Graphics, Inc."); +MODULE_AUTHOR("Intel Corporation"); + +MODULE_DESCRIPTION(DRIVER_DESC); +MODULE_LICENSE("GPL and additional rights"); diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 2880ec57c97d..1bbd09ad5287 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -22,18 +22,13 @@ * */ -#include <linux/console.h> #include <linux/vga_switcheroo.h> #include <drm/drm_drv.h> #include <drm/i915_pciids.h> -#include "display/intel_fbdev.h" - #include "i915_drv.h" -#include "i915_perf.h" -#include "i915_globals.h" -#include "i915_selftest.h" +#include "i915_pci.h" #define PLATFORM(x) .platform = (x) #define GEN(x) \ @@ -787,27 +782,13 @@ static const struct intel_device_info cml_gt2_info = { .gt = 2, }; -#define GEN10_FEATURES \ - GEN9_FEATURES, \ - GEN(10), \ - .dbuf.size = 1024 - 4, /* 4 blocks for bypass path allocation */ \ - .display.has_dsc = 1, \ - .has_coherent_ggtt = false, \ - GLK_COLORS - -static const struct intel_device_info cnl_info = { - GEN10_FEATURES, - PLATFORM(INTEL_CANNONLAKE), - .gt = 2, -}; - #define GEN11_DEFAULT_PAGE_SIZES \ .page_sizes = I915_GTT_PAGE_SIZE_4K | \ I915_GTT_PAGE_SIZE_64K | \ I915_GTT_PAGE_SIZE_2M #define GEN11_FEATURES \ - GEN10_FEATURES, \ + GEN9_FEATURES, \ GEN11_DEFAULT_PAGE_SIZES, \ .abox_mask = BIT(0), \ .cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | \ @@ -830,10 +811,12 @@ static const struct intel_device_info cnl_info = { [TRANSCODER_DSI_1] = TRANSCODER_DSI1_OFFSET, \ }, \ GEN(11), \ + .color = { .degamma_lut_size = 33, .gamma_lut_size = 262145 }, \ .dbuf.size = 2048, \ .dbuf.slice_mask = BIT(DBUF_S1) | BIT(DBUF_S2), \ - .has_logical_ring_elsq = 1, \ - .color = { .degamma_lut_size = 33, .gamma_lut_size = 262145 } + .display.has_dsc = 1, \ + .has_coherent_ggtt = false, \ + .has_logical_ring_elsq = 1 static const struct intel_device_info icl_info = { GEN11_FEATURES, @@ -845,7 +828,6 @@ static const struct intel_device_info icl_info = { static const struct intel_device_info ehl_info = { GEN11_FEATURES, PLATFORM(INTEL_ELKHARTLAKE), - .require_force_probe = 1, .platform_engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(VCS0) | BIT(VECS0), .ppgtt_size = 36, }; @@ -853,7 +835,6 @@ static const struct intel_device_info ehl_info = { static const struct intel_device_info jsl_info = { GEN11_FEATURES, PLATFORM(INTEL_JASPERLAKE), - .require_force_probe = 1, .platform_engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(VCS0) | BIT(VECS0), .ppgtt_size = 36, }; @@ -909,7 +890,6 @@ static const struct intel_device_info rkl_info = { #define DGFX_FEATURES \ .memory_regions = REGION_SMEM | REGION_LMEM | REGION_STOLEN_LMEM, \ - .has_master_unit_irq = 1, \ .has_llc = 0, \ .has_snoop = 1, \ .is_dgfx = 1 @@ -917,6 +897,7 @@ static const struct intel_device_info rkl_info = { static const struct intel_device_info dg1_info __maybe_unused = { GEN12_FEATURES, DGFX_FEATURES, + .graphics_rel = 10, PLATFORM(INTEL_DG1), .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D), .require_force_probe = 1, @@ -936,26 +917,60 @@ static const struct intel_device_info adl_s_info = { .display.has_psr_hw_tracking = 0, .platform_engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2), - .dma_mask_size = 46, + .dma_mask_size = 39, }; +#define XE_LPD_CURSOR_OFFSETS \ + .cursor_offsets = { \ + [PIPE_A] = CURSOR_A_OFFSET, \ + [PIPE_B] = IVB_CURSOR_B_OFFSET, \ + [PIPE_C] = IVB_CURSOR_C_OFFSET, \ + [PIPE_D] = TGL_CURSOR_D_OFFSET, \ + } + #define XE_LPD_FEATURES \ - .display.ver = 13, \ - .display.has_psr_hw_tracking = 0, \ - .abox_mask = GENMASK(1, 0), \ - .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D), \ - .cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | \ - BIT(TRANSCODER_C) | BIT(TRANSCODER_D), \ - .dbuf.size = 4096, \ - .dbuf.slice_mask = BIT(DBUF_S1) | BIT(DBUF_S2) | BIT(DBUF_S3) | BIT(DBUF_S4) + .abox_mask = GENMASK(1, 0), \ + .color = { .degamma_lut_size = 0, .gamma_lut_size = 0 }, \ + .cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | \ + BIT(TRANSCODER_C) | BIT(TRANSCODER_D), \ + .dbuf.size = 4096, \ + .dbuf.slice_mask = BIT(DBUF_S1) | BIT(DBUF_S2) | BIT(DBUF_S3) | \ + BIT(DBUF_S4), \ + .display.has_ddi = 1, \ + .display.has_dmc = 1, \ + .display.has_dp_mst = 1, \ + .display.has_dsb = 1, \ + .display.has_dsc = 1, \ + .display.has_fbc = 1, \ + .display.has_fpga_dbg = 1, \ + .display.has_hdcp = 1, \ + .display.has_hotplug = 1, \ + .display.has_ipc = 1, \ + .display.has_psr = 1, \ + .display.ver = 13, \ + .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D), \ + .pipe_offsets = { \ + [TRANSCODER_A] = PIPE_A_OFFSET, \ + [TRANSCODER_B] = PIPE_B_OFFSET, \ + [TRANSCODER_C] = PIPE_C_OFFSET, \ + [TRANSCODER_D] = PIPE_D_OFFSET, \ + }, \ + .trans_offsets = { \ + [TRANSCODER_A] = TRANSCODER_A_OFFSET, \ + [TRANSCODER_B] = TRANSCODER_B_OFFSET, \ + [TRANSCODER_C] = TRANSCODER_C_OFFSET, \ + [TRANSCODER_D] = TRANSCODER_D_OFFSET, \ + }, \ + XE_LPD_CURSOR_OFFSETS static const struct intel_device_info adl_p_info = { GEN12_FEATURES, XE_LPD_FEATURES, PLATFORM(INTEL_ALDERLAKE_P), - .has_cdclk_crawl = 1, .require_force_probe = 1, + .display.has_cdclk_crawl = 1, .display.has_modular_fia = 1, + .display.has_psr_hw_tracking = 0, .platform_engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2), .ppgtt_size = 48, @@ -963,6 +978,67 @@ static const struct intel_device_info adl_p_info = { }; #undef GEN + +#define XE_HP_PAGE_SIZES \ + .page_sizes = I915_GTT_PAGE_SIZE_4K | \ + I915_GTT_PAGE_SIZE_64K | \ + I915_GTT_PAGE_SIZE_2M + +#define XE_HP_FEATURES \ + .graphics_ver = 12, \ + .graphics_rel = 50, \ + XE_HP_PAGE_SIZES, \ + .dma_mask_size = 46, \ + .has_64bit_reloc = 1, \ + .has_global_mocs = 1, \ + .has_gt_uc = 1, \ + .has_llc = 1, \ + .has_logical_ring_contexts = 1, \ + .has_logical_ring_elsq = 1, \ + .has_mslices = 1, \ + .has_rc6 = 1, \ + .has_reset_engine = 1, \ + .has_rps = 1, \ + .has_runtime_pm = 1, \ + .ppgtt_size = 48, \ + .ppgtt_type = INTEL_PPGTT_FULL + +#define XE_HPM_FEATURES \ + .media_ver = 12, \ + .media_rel = 50 + +__maybe_unused +static const struct intel_device_info xehpsdv_info = { + XE_HP_FEATURES, + XE_HPM_FEATURES, + DGFX_FEATURES, + PLATFORM(INTEL_XEHPSDV), + .display = { }, + .pipe_mask = 0, + .platform_engine_mask = + BIT(RCS0) | BIT(BCS0) | + BIT(VECS0) | BIT(VECS1) | BIT(VECS2) | BIT(VECS3) | + BIT(VCS0) | BIT(VCS1) | BIT(VCS2) | BIT(VCS3) | + BIT(VCS4) | BIT(VCS5) | BIT(VCS6) | BIT(VCS7), + .require_force_probe = 1, +}; + +__maybe_unused +static const struct intel_device_info dg2_info = { + XE_HP_FEATURES, + XE_HPM_FEATURES, + XE_LPD_FEATURES, + DGFX_FEATURES, + .graphics_rel = 55, + .media_rel = 55, + PLATFORM(INTEL_DG2), + .platform_engine_mask = + BIT(RCS0) | BIT(BCS0) | + BIT(VECS0) | BIT(VECS1) | + BIT(VCS0) | BIT(VCS2), + .require_force_probe = 1, +}; + #undef PLATFORM /* @@ -1032,7 +1108,6 @@ static const struct pci_device_id pciidlist[] = { INTEL_CML_GT2_IDS(&cml_gt2_info), INTEL_CML_U_GT1_IDS(&cml_gt1_info), INTEL_CML_U_GT2_IDS(&cml_gt2_info), - INTEL_CNL_IDS(&cnl_info), INTEL_ICL_11_IDS(&icl_info), INTEL_EHL_IDS(&ehl_info), INTEL_JSL_IDS(&jsl_info), @@ -1159,66 +1234,12 @@ static struct pci_driver i915_pci_driver = { .driver.pm = &i915_pm_ops, }; -static int __init i915_init(void) +int i915_register_pci_driver(void) { - bool use_kms = true; - int err; - - err = i915_globals_init(); - if (err) - return err; - - err = i915_mock_selftests(); - if (err) - return err > 0 ? 0 : err; - - /* - * Enable KMS by default, unless explicitly overriden by - * either the i915.modeset prarameter or by the - * vga_text_mode_force boot option. - */ - - if (i915_modparams.modeset == 0) - use_kms = false; - - if (vgacon_text_force() && i915_modparams.modeset == -1) - use_kms = false; - - if (!use_kms) { - /* Silently fail loading to not upset userspace. */ - DRM_DEBUG_DRIVER("KMS disabled.\n"); - return 0; - } - - i915_pmu_init(); - - err = pci_register_driver(&i915_pci_driver); - if (err) { - i915_pmu_exit(); - i915_globals_exit(); - return err; - } - - i915_perf_sysctl_register(); - return 0; + return pci_register_driver(&i915_pci_driver); } -static void __exit i915_exit(void) +void i915_unregister_pci_driver(void) { - if (!i915_pci_driver.driver.owner) - return; - - i915_perf_sysctl_unregister(); pci_unregister_driver(&i915_pci_driver); - i915_globals_exit(); - i915_pmu_exit(); } - -module_init(i915_init); -module_exit(i915_exit); - -MODULE_AUTHOR("Tungsten Graphics, Inc."); -MODULE_AUTHOR("Intel Corporation"); - -MODULE_DESCRIPTION(DRIVER_DESC); -MODULE_LICENSE("GPL and additional rights"); diff --git a/drivers/gpu/drm/i915/i915_pci.h b/drivers/gpu/drm/i915/i915_pci.h new file mode 100644 index 000000000000..b386f319f52e --- /dev/null +++ b/drivers/gpu/drm/i915/i915_pci.h @@ -0,0 +1,8 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2021 Intel Corporation + */ + +int i915_register_pci_driver(void); +void i915_unregister_pci_driver(void); diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 9f94914958c3..2f01b8c0284c 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1256,7 +1256,6 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) case 8: case 9: - case 10: if (intel_engine_uses_guc(ce->engine)) { /* * When using GuC, the context descriptor we write in @@ -1284,17 +1283,26 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) break; case 11: - case 12: { - stream->specific_ctx_id_mask = - ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32); - /* - * Pick an unused context id - * 0 - BITS_PER_LONG are used by other contexts - * GEN12_MAX_CONTEXT_HW_ID (0x7ff) is used by idle context - */ - stream->specific_ctx_id = (GEN12_MAX_CONTEXT_HW_ID - 1) << (GEN11_SW_CTX_ID_SHIFT - 32); + case 12: + if (GRAPHICS_VER_FULL(ce->engine->i915) >= IP_VER(12, 50)) { + stream->specific_ctx_id_mask = + ((1U << XEHP_SW_CTX_ID_WIDTH) - 1) << + (XEHP_SW_CTX_ID_SHIFT - 32); + stream->specific_ctx_id = + (XEHP_MAX_CONTEXT_HW_ID - 1) << + (XEHP_SW_CTX_ID_SHIFT - 32); + } else { + stream->specific_ctx_id_mask = + ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32); + /* + * Pick an unused context id + * 0 - BITS_PER_LONG are used by other contexts + * GEN12_MAX_CONTEXT_HW_ID (0x7ff) is used by idle context + */ + stream->specific_ctx_id = + (GEN12_MAX_CONTEXT_HW_ID - 1) << (GEN11_SW_CTX_ID_SHIFT - 32); + } break; - } default: MISSING_CASE(GRAPHICS_VER(ce->engine->i915)); @@ -2580,7 +2588,7 @@ static void gen8_disable_metric_set(struct i915_perf_stream *stream) intel_uncore_rmw(uncore, GDT_CHICKEN_BITS, GT_NOA_ENABLE, 0); } -static void gen10_disable_metric_set(struct i915_perf_stream *stream) +static void gen11_disable_metric_set(struct i915_perf_stream *stream) { struct intel_uncore *uncore = stream->uncore; @@ -3414,10 +3422,10 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf, struct drm_i915_file_private *file_priv = file->driver_priv; specific_ctx = i915_gem_context_lookup(file_priv, ctx_handle); - if (!specific_ctx) { + if (IS_ERR(specific_ctx)) { DRM_DEBUG("Failed to look up context with ID %u for opening perf stream\n", ctx_handle); - ret = -ENOENT; + ret = PTR_ERR(specific_ctx); goto err; } } @@ -3887,7 +3895,7 @@ static bool gen8_is_valid_mux_addr(struct i915_perf *perf, u32 addr) REG_IN_RANGE(addr, RPM_CONFIG0, NOA_CONFIG(8)); } -static bool gen10_is_valid_mux_addr(struct i915_perf *perf, u32 addr) +static bool gen11_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { return gen8_is_valid_mux_addr(perf, addr) || REG_EQUAL(addr, GEN10_NOA_WRITE_HIGH) || @@ -4310,7 +4318,6 @@ static void oa_init_supported_formats(struct i915_perf *perf) case INTEL_GEMINILAKE: case INTEL_COFFEELAKE: case INTEL_COMETLAKE: - case INTEL_CANNONLAKE: case INTEL_ICELAKE: case INTEL_ELKHARTLAKE: case INTEL_JASPERLAKE: @@ -4395,27 +4402,23 @@ void i915_perf_init(struct drm_i915_private *i915) perf->gen8_valid_ctx_bit = BIT(16); } - } else if (IS_GRAPHICS_VER(i915, 10, 11)) { + } else if (GRAPHICS_VER(i915) == 11) { perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr; perf->ops.is_valid_mux_reg = - gen10_is_valid_mux_addr; + gen11_is_valid_mux_addr; perf->ops.is_valid_flex_reg = gen8_is_valid_flex_addr; perf->ops.oa_enable = gen8_oa_enable; perf->ops.oa_disable = gen8_oa_disable; perf->ops.enable_metric_set = gen8_enable_metric_set; - perf->ops.disable_metric_set = gen10_disable_metric_set; + perf->ops.disable_metric_set = gen11_disable_metric_set; perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read; - if (GRAPHICS_VER(i915) == 10) { - perf->ctx_oactxctrl_offset = 0x128; - perf->ctx_flexeu0_offset = 0x3de; - } else { - perf->ctx_oactxctrl_offset = 0x124; - perf->ctx_flexeu0_offset = 0x78e; - } + perf->ctx_oactxctrl_offset = 0x124; + perf->ctx_flexeu0_offset = 0x78e; + perf->gen8_valid_ctx_bit = BIT(16); } else if (GRAPHICS_VER(i915) == 12) { perf->ops.is_valid_b_counter_reg = @@ -4483,9 +4486,10 @@ static int destroy_config(int id, void *p, void *data) return 0; } -void i915_perf_sysctl_register(void) +int i915_perf_sysctl_register(void) { sysctl_header = register_sysctl_table(dev_root); + return 0; } void i915_perf_sysctl_unregister(void) diff --git a/drivers/gpu/drm/i915/i915_perf.h b/drivers/gpu/drm/i915/i915_perf.h index 882fdd0a7680..1d1329e5af3a 100644 --- a/drivers/gpu/drm/i915/i915_perf.h +++ b/drivers/gpu/drm/i915/i915_perf.h @@ -23,7 +23,7 @@ void i915_perf_fini(struct drm_i915_private *i915); void i915_perf_register(struct drm_i915_private *i915); void i915_perf_unregister(struct drm_i915_private *i915); int i915_perf_ioctl_version(void); -void i915_perf_sysctl_register(void); +int i915_perf_sysctl_register(void); void i915_perf_sysctl_unregister(void); int i915_perf_open_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 34d37d46a126..0b488d49694c 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -407,7 +407,7 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns) if (pmu->enable & config_mask(I915_PMU_REQUESTED_FREQUENCY)) { add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_REQ], - intel_gpu_freq(rps, rps->cur_freq), + intel_rps_get_requested_frequency(rps), period_ns / 1000); } @@ -1088,7 +1088,7 @@ static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node) static enum cpuhp_state cpuhp_slot = CPUHP_INVALID; -void i915_pmu_init(void) +int i915_pmu_init(void) { int ret; @@ -1101,6 +1101,8 @@ void i915_pmu_init(void) ret); else cpuhp_slot = ret; + + return 0; } void i915_pmu_exit(void) diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h index 60f9595f902c..449057648f39 100644 --- a/drivers/gpu/drm/i915/i915_pmu.h +++ b/drivers/gpu/drm/i915/i915_pmu.h @@ -147,14 +147,14 @@ struct i915_pmu { }; #ifdef CONFIG_PERF_EVENTS -void i915_pmu_init(void); +int i915_pmu_init(void); void i915_pmu_exit(void); void i915_pmu_register(struct drm_i915_private *i915); void i915_pmu_unregister(struct drm_i915_private *i915); void i915_pmu_gt_parked(struct drm_i915_private *i915); void i915_pmu_gt_unparked(struct drm_i915_private *i915); #else -static inline void i915_pmu_init(void) {} +static inline int i915_pmu_init(void) { return 0; } static inline void i915_pmu_exit(void) {} static inline void i915_pmu_register(struct drm_i915_private *i915) {} static inline void i915_pmu_unregister(struct drm_i915_private *i915) {} diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 476bb3b9ad11..664970f2bc62 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -395,10 +395,18 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN11_GRDOM_MEDIA2 (1 << 6) #define GEN11_GRDOM_MEDIA3 (1 << 7) #define GEN11_GRDOM_MEDIA4 (1 << 8) +#define GEN11_GRDOM_MEDIA5 (1 << 9) +#define GEN11_GRDOM_MEDIA6 (1 << 10) +#define GEN11_GRDOM_MEDIA7 (1 << 11) +#define GEN11_GRDOM_MEDIA8 (1 << 12) #define GEN11_GRDOM_VECS (1 << 13) #define GEN11_GRDOM_VECS2 (1 << 14) +#define GEN11_GRDOM_VECS3 (1 << 15) +#define GEN11_GRDOM_VECS4 (1 << 16) #define GEN11_GRDOM_SFC0 (1 << 17) #define GEN11_GRDOM_SFC1 (1 << 18) +#define GEN11_GRDOM_SFC2 (1 << 19) +#define GEN11_GRDOM_SFC3 (1 << 20) #define GEN11_VCS_SFC_RESET_BIT(instance) (GEN11_GRDOM_SFC0 << ((instance) >> 1)) #define GEN11_VECS_SFC_RESET_BIT(instance) (GEN11_GRDOM_SFC0 << (instance)) @@ -1877,7 +1885,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define BXT_PORT_CL1CM_DW30(phy) _BXT_PHY((phy), _PORT_CL1CM_DW30_BC) /* - * CNL/ICL Port/COMBO-PHY Registers + * ICL Port/COMBO-PHY Registers */ #define _ICL_COMBOPHY_A 0x162000 #define _ICL_COMBOPHY_B 0x6C000 @@ -1891,11 +1899,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) _RKL_COMBOPHY_D, \ _ADL_COMBOPHY_E) -/* CNL/ICL Port CL_DW registers */ +/* ICL Port CL_DW registers */ #define _ICL_PORT_CL_DW(dw, phy) (_ICL_COMBOPHY(phy) + \ 4 * (dw)) -#define CNL_PORT_CL1CM_DW5 _MMIO(0x162014) #define ICL_PORT_CL_DW5(phy) _MMIO(_ICL_PORT_CL_DW(5, phy)) #define CL_POWER_DOWN_ENABLE (1 << 4) #define SUS_CLOCK_CONFIG (3 << 0) @@ -1920,19 +1927,16 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define ICL_PORT_CL_DW12(phy) _MMIO(_ICL_PORT_CL_DW(12, phy)) #define ICL_LANE_ENABLE_AUX (1 << 0) -/* CNL/ICL Port COMP_DW registers */ +/* ICL Port COMP_DW registers */ #define _ICL_PORT_COMP 0x100 #define _ICL_PORT_COMP_DW(dw, phy) (_ICL_COMBOPHY(phy) + \ _ICL_PORT_COMP + 4 * (dw)) -#define CNL_PORT_COMP_DW0 _MMIO(0x162100) #define ICL_PORT_COMP_DW0(phy) _MMIO(_ICL_PORT_COMP_DW(0, phy)) #define COMP_INIT (1 << 31) -#define CNL_PORT_COMP_DW1 _MMIO(0x162104) #define ICL_PORT_COMP_DW1(phy) _MMIO(_ICL_PORT_COMP_DW(1, phy)) -#define CNL_PORT_COMP_DW3 _MMIO(0x16210c) #define ICL_PORT_COMP_DW3(phy) _MMIO(_ICL_PORT_COMP_DW(3, phy)) #define PROCESS_INFO_DOT_0 (0 << 26) #define PROCESS_INFO_DOT_1 (1 << 26) @@ -1948,38 +1952,11 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define ICL_PORT_COMP_DW8(phy) _MMIO(_ICL_PORT_COMP_DW(8, phy)) #define IREFGEN (1 << 24) -#define CNL_PORT_COMP_DW9 _MMIO(0x162124) #define ICL_PORT_COMP_DW9(phy) _MMIO(_ICL_PORT_COMP_DW(9, phy)) -#define CNL_PORT_COMP_DW10 _MMIO(0x162128) #define ICL_PORT_COMP_DW10(phy) _MMIO(_ICL_PORT_COMP_DW(10, phy)) -/* CNL/ICL Port PCS registers */ -#define _CNL_PORT_PCS_DW1_GRP_AE 0x162304 -#define _CNL_PORT_PCS_DW1_GRP_B 0x162384 -#define _CNL_PORT_PCS_DW1_GRP_C 0x162B04 -#define _CNL_PORT_PCS_DW1_GRP_D 0x162B84 -#define _CNL_PORT_PCS_DW1_GRP_F 0x162A04 -#define _CNL_PORT_PCS_DW1_LN0_AE 0x162404 -#define _CNL_PORT_PCS_DW1_LN0_B 0x162604 -#define _CNL_PORT_PCS_DW1_LN0_C 0x162C04 -#define _CNL_PORT_PCS_DW1_LN0_D 0x162E04 -#define _CNL_PORT_PCS_DW1_LN0_F 0x162804 -#define CNL_PORT_PCS_DW1_GRP(phy) _MMIO(_PICK(phy, \ - _CNL_PORT_PCS_DW1_GRP_AE, \ - _CNL_PORT_PCS_DW1_GRP_B, \ - _CNL_PORT_PCS_DW1_GRP_C, \ - _CNL_PORT_PCS_DW1_GRP_D, \ - _CNL_PORT_PCS_DW1_GRP_AE, \ - _CNL_PORT_PCS_DW1_GRP_F)) -#define CNL_PORT_PCS_DW1_LN0(phy) _MMIO(_PICK(phy, \ - _CNL_PORT_PCS_DW1_LN0_AE, \ - _CNL_PORT_PCS_DW1_LN0_B, \ - _CNL_PORT_PCS_DW1_LN0_C, \ - _CNL_PORT_PCS_DW1_LN0_D, \ - _CNL_PORT_PCS_DW1_LN0_AE, \ - _CNL_PORT_PCS_DW1_LN0_F)) - +/* ICL Port PCS registers */ #define _ICL_PORT_PCS_AUX 0x300 #define _ICL_PORT_PCS_GRP 0x600 #define _ICL_PORT_PCS_LN(ln) (0x800 + (ln) * 0x100) @@ -1998,34 +1975,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define LATENCY_OPTIM_MASK (0x3 << 2) #define LATENCY_OPTIM_VAL(x) ((x) << 2) -/* CNL/ICL Port TX registers */ -#define _CNL_PORT_TX_AE_GRP_OFFSET 0x162340 -#define _CNL_PORT_TX_B_GRP_OFFSET 0x1623C0 -#define _CNL_PORT_TX_C_GRP_OFFSET 0x162B40 -#define _CNL_PORT_TX_D_GRP_OFFSET 0x162BC0 -#define _CNL_PORT_TX_F_GRP_OFFSET 0x162A40 -#define _CNL_PORT_TX_AE_LN0_OFFSET 0x162440 -#define _CNL_PORT_TX_B_LN0_OFFSET 0x162640 -#define _CNL_PORT_TX_C_LN0_OFFSET 0x162C40 -#define _CNL_PORT_TX_D_LN0_OFFSET 0x162E40 -#define _CNL_PORT_TX_F_LN0_OFFSET 0x162840 -#define _CNL_PORT_TX_DW_GRP(dw, port) (_PICK((port), \ - _CNL_PORT_TX_AE_GRP_OFFSET, \ - _CNL_PORT_TX_B_GRP_OFFSET, \ - _CNL_PORT_TX_B_GRP_OFFSET, \ - _CNL_PORT_TX_D_GRP_OFFSET, \ - _CNL_PORT_TX_AE_GRP_OFFSET, \ - _CNL_PORT_TX_F_GRP_OFFSET) + \ - 4 * (dw)) -#define _CNL_PORT_TX_DW_LN0(dw, port) (_PICK((port), \ - _CNL_PORT_TX_AE_LN0_OFFSET, \ - _CNL_PORT_TX_B_LN0_OFFSET, \ - _CNL_PORT_TX_B_LN0_OFFSET, \ - _CNL_PORT_TX_D_LN0_OFFSET, \ - _CNL_PORT_TX_AE_LN0_OFFSET, \ - _CNL_PORT_TX_F_LN0_OFFSET) + \ - 4 * (dw)) - +/* ICL Port TX registers */ #define _ICL_PORT_TX_AUX 0x380 #define _ICL_PORT_TX_GRP 0x680 #define _ICL_PORT_TX_LN(ln) (0x880 + (ln) * 0x100) @@ -2037,8 +1987,6 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define _ICL_PORT_TX_DW_LN(dw, ln, phy) (_ICL_COMBOPHY(phy) + \ _ICL_PORT_TX_LN(ln) + 4 * (dw)) -#define CNL_PORT_TX_DW2_GRP(port) _MMIO(_CNL_PORT_TX_DW_GRP(2, port)) -#define CNL_PORT_TX_DW2_LN0(port) _MMIO(_CNL_PORT_TX_DW_LN0(2, port)) #define ICL_PORT_TX_DW2_AUX(phy) _MMIO(_ICL_PORT_TX_DW_AUX(2, phy)) #define ICL_PORT_TX_DW2_GRP(phy) _MMIO(_ICL_PORT_TX_DW_GRP(2, phy)) #define ICL_PORT_TX_DW2_LN0(phy) _MMIO(_ICL_PORT_TX_DW_LN(2, 0, phy)) @@ -2051,13 +1999,6 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define RCOMP_SCALAR(x) ((x) << 0) #define RCOMP_SCALAR_MASK (0xFF << 0) -#define _CNL_PORT_TX_DW4_LN0_AE 0x162450 -#define _CNL_PORT_TX_DW4_LN1_AE 0x1624D0 -#define CNL_PORT_TX_DW4_GRP(port) _MMIO(_CNL_PORT_TX_DW_GRP(4, (port))) -#define CNL_PORT_TX_DW4_LN0(port) _MMIO(_CNL_PORT_TX_DW_LN0(4, (port))) -#define CNL_PORT_TX_DW4_LN(ln, port) _MMIO(_CNL_PORT_TX_DW_LN0(4, (port)) + \ - ((ln) * (_CNL_PORT_TX_DW4_LN1_AE - \ - _CNL_PORT_TX_DW4_LN0_AE))) #define ICL_PORT_TX_DW4_AUX(phy) _MMIO(_ICL_PORT_TX_DW_AUX(4, phy)) #define ICL_PORT_TX_DW4_GRP(phy) _MMIO(_ICL_PORT_TX_DW_GRP(4, phy)) #define ICL_PORT_TX_DW4_LN0(phy) _MMIO(_ICL_PORT_TX_DW_LN(4, 0, phy)) @@ -2070,8 +2011,6 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define CURSOR_COEFF(x) ((x) << 0) #define CURSOR_COEFF_MASK (0x3F << 0) -#define CNL_PORT_TX_DW5_GRP(port) _MMIO(_CNL_PORT_TX_DW_GRP(5, port)) -#define CNL_PORT_TX_DW5_LN0(port) _MMIO(_CNL_PORT_TX_DW_LN0(5, port)) #define ICL_PORT_TX_DW5_AUX(phy) _MMIO(_ICL_PORT_TX_DW_AUX(5, phy)) #define ICL_PORT_TX_DW5_GRP(phy) _MMIO(_ICL_PORT_TX_DW_GRP(5, phy)) #define ICL_PORT_TX_DW5_LN0(phy) _MMIO(_ICL_PORT_TX_DW_LN(5, 0, phy)) @@ -2083,8 +2022,6 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define RTERM_SELECT(x) ((x) << 3) #define RTERM_SELECT_MASK (0x7 << 3) -#define CNL_PORT_TX_DW7_GRP(port) _MMIO(_CNL_PORT_TX_DW_GRP(7, (port))) -#define CNL_PORT_TX_DW7_LN0(port) _MMIO(_CNL_PORT_TX_DW_LN0(7, (port))) #define ICL_PORT_TX_DW7_AUX(phy) _MMIO(_ICL_PORT_TX_DW_AUX(7, phy)) #define ICL_PORT_TX_DW7_GRP(phy) _MMIO(_ICL_PORT_TX_DW_GRP(7, phy)) #define ICL_PORT_TX_DW7_LN0(phy) _MMIO(_ICL_PORT_TX_DW_LN(7, 0, phy)) @@ -2278,6 +2215,68 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define MG_DP_MODE_CFG_DP_X2_MODE (1 << 7) #define MG_DP_MODE_CFG_DP_X1_MODE (1 << 6) +/* + * DG2 SNPS PHY registers (TC1 = PHY_E) + */ +#define _SNPS_PHY_A_BASE 0x168000 +#define _SNPS_PHY_B_BASE 0x169000 +#define _SNPS_PHY(phy) _PHY(phy, \ + _SNPS_PHY_A_BASE, \ + _SNPS_PHY_B_BASE) +#define _SNPS2(phy, reg) (_SNPS_PHY(phy) - \ + _SNPS_PHY_A_BASE + (reg)) +#define _MMIO_SNPS(phy, reg) _MMIO(_SNPS2(phy, reg)) +#define _MMIO_SNPS_LN(ln, phy, reg) _MMIO(_SNPS2(phy, \ + (reg) + (ln) * 0x10)) + +#define SNPS_PHY_MPLLB_CP(phy) _MMIO_SNPS(phy, 0x168000) +#define SNPS_PHY_MPLLB_CP_INT REG_GENMASK(31, 25) +#define SNPS_PHY_MPLLB_CP_INT_GS REG_GENMASK(23, 17) +#define SNPS_PHY_MPLLB_CP_PROP REG_GENMASK(15, 9) +#define SNPS_PHY_MPLLB_CP_PROP_GS REG_GENMASK(7, 1) + +#define SNPS_PHY_MPLLB_DIV(phy) _MMIO_SNPS(phy, 0x168004) +#define SNPS_PHY_MPLLB_FORCE_EN REG_BIT(31) +#define SNPS_PHY_MPLLB_DIV5_CLK_EN REG_BIT(29) +#define SNPS_PHY_MPLLB_V2I REG_GENMASK(27, 26) +#define SNPS_PHY_MPLLB_FREQ_VCO REG_GENMASK(25, 24) +#define SNPS_PHY_MPLLB_PMIX_EN REG_BIT(10) +#define SNPS_PHY_MPLLB_TX_CLK_DIV REG_GENMASK(7, 5) + +#define SNPS_PHY_MPLLB_FRACN1(phy) _MMIO_SNPS(phy, 0x168008) +#define SNPS_PHY_MPLLB_FRACN_EN REG_BIT(31) +#define SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN REG_BIT(30) +#define SNPS_PHY_MPLLB_FRACN_DEN REG_GENMASK(15, 0) + +#define SNPS_PHY_MPLLB_FRACN2(phy) _MMIO_SNPS(phy, 0x16800C) +#define SNPS_PHY_MPLLB_FRACN_REM REG_GENMASK(31, 16) +#define SNPS_PHY_MPLLB_FRACN_QUOT REG_GENMASK(15, 0) + +#define SNPS_PHY_MPLLB_SSCEN(phy) _MMIO_SNPS(phy, 0x168014) +#define SNPS_PHY_MPLLB_SSC_EN REG_BIT(31) +#define SNPS_PHY_MPLLB_SSC_UP_SPREAD REG_BIT(30) +#define SNPS_PHY_MPLLB_SSC_PEAK REG_GENMASK(29, 10) + +#define SNPS_PHY_MPLLB_SSCSTEP(phy) _MMIO_SNPS(phy, 0x168018) +#define SNPS_PHY_MPLLB_SSC_STEPSIZE REG_GENMASK(31, 11) + +#define SNPS_PHY_MPLLB_DIV2(phy) _MMIO_SNPS(phy, 0x16801C) +#define SNPS_PHY_MPLLB_HDMI_PIXEL_CLK_DIV REG_GENMASK(19, 18) +#define SNPS_PHY_MPLLB_HDMI_DIV REG_GENMASK(17, 15) +#define SNPS_PHY_MPLLB_REF_CLK_DIV REG_GENMASK(14, 12) +#define SNPS_PHY_MPLLB_MULTIPLIER REG_GENMASK(11, 0) + +#define SNPS_PHY_REF_CONTROL(phy) _MMIO_SNPS(phy, 0x168188) +#define SNPS_PHY_REF_CONTROL_REF_RANGE REG_GENMASK(31, 27) + +#define SNPS_PHY_TX_REQ(phy) _MMIO_SNPS(phy, 0x168200) +#define SNPS_PHY_TX_REQ_LN_DIS_PWR_STATE_PSR REG_GENMASK(31, 30) + +#define SNPS_PHY_TX_EQ(ln, phy) _MMIO_SNPS_LN(ln, phy, 0x168300) +#define SNPS_PHY_TX_EQ_MAIN REG_GENMASK(23, 18) +#define SNPS_PHY_TX_EQ_POST REG_GENMASK(15, 10) +#define SNPS_PHY_TX_EQ_PRE REG_GENMASK(7, 2) + /* The spec defines this only for BXT PHY0, but lets assume that this * would exist for PHY1 too if it had a second channel. */ @@ -2516,9 +2515,15 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN11_BSD2_RING_BASE 0x1c4000 #define GEN11_BSD3_RING_BASE 0x1d0000 #define GEN11_BSD4_RING_BASE 0x1d4000 +#define XEHP_BSD5_RING_BASE 0x1e0000 +#define XEHP_BSD6_RING_BASE 0x1e4000 +#define XEHP_BSD7_RING_BASE 0x1f0000 +#define XEHP_BSD8_RING_BASE 0x1f4000 #define VEBOX_RING_BASE 0x1a000 #define GEN11_VEBOX_RING_BASE 0x1c8000 #define GEN11_VEBOX2_RING_BASE 0x1d8000 +#define XEHP_VEBOX3_RING_BASE 0x1e8000 +#define XEHP_VEBOX4_RING_BASE 0x1f8000 #define BLT_RING_BASE 0x22000 #define RING_TAIL(base) _MMIO((base) + 0x30) #define RING_HEAD(base) _MMIO((base) + 0x34) @@ -2572,7 +2577,16 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define ARB_MODE_BWGTLB_DISABLE (1 << 9) #define ARB_MODE_SWIZZLE_BDW (1 << 1) #define RENDER_HWS_PGA_GEN7 _MMIO(0x04080) -#define RING_FAULT_REG(engine) _MMIO(0x4094 + 0x100 * (engine)->hw_id) + +#define _RING_FAULT_REG_RCS 0x4094 +#define _RING_FAULT_REG_VCS 0x4194 +#define _RING_FAULT_REG_BCS 0x4294 +#define _RING_FAULT_REG_VECS 0x4394 +#define RING_FAULT_REG(engine) _MMIO(_PICK((engine)->class, \ + _RING_FAULT_REG_RCS, \ + _RING_FAULT_REG_VCS, \ + _RING_FAULT_REG_VECS, \ + _RING_FAULT_REG_BCS)) #define GEN8_RING_FAULT_REG _MMIO(0x4094) #define GEN12_RING_FAULT_REG _MMIO(0xcec4) #define GEN8_RING_FAULT_ENGINE_ID(x) (((x) >> 12) & 0x7) @@ -2672,6 +2686,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN12_SC_INSTDONE_EXTRA2 _MMIO(0x7108) #define GEN7_SAMPLER_INSTDONE _MMIO(0xe160) #define GEN7_ROW_INSTDONE _MMIO(0xe164) +#define MCFG_MCR_SELECTOR _MMIO(0xfd0) +#define SF_MCR_SELECTOR _MMIO(0xfd8) #define GEN8_MCR_SELECTOR _MMIO(0xfdc) #define GEN8_MCR_SLICE(slice) (((slice) & 3) << 26) #define GEN8_MCR_SLICE_MASK GEN8_MCR_SLICE(3) @@ -3099,6 +3115,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN10_MIRROR_FUSE3 _MMIO(0x9118) #define GEN10_L3BANK_PAIR_COUNT 4 #define GEN10_L3BANK_MASK 0x0F +/* on Xe_HP the same fuses indicates mslices instead of L3 banks */ +#define GEN12_MAX_MSLICES 4 +#define GEN12_MEML3_EN_MASK 0x0F #define GEN8_EU_DISABLE0 _MMIO(0x9134) #define GEN8_EU_DIS0_S0_MASK 0xffffff @@ -3133,6 +3152,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN12_GT_DSS_ENABLE _MMIO(0x913C) +#define XEHP_EU_ENABLE _MMIO(0x9134) +#define XEHP_EU_ENA_MASK 0xFF + #define GEN6_BSD_SLEEP_PSMI_CONTROL _MMIO(0x12050) #define GEN6_BSD_SLEEP_MSG_DISABLE (1 << 0) #define GEN6_BSD_SLEEP_FLUSH_DISABLE (1 << 2) @@ -4086,6 +4108,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define BXT_GT_PERF_STATUS _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x7070) #define GEN6_RP_STATE_LIMITS _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5994) #define GEN6_RP_STATE_CAP _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5998) +#define RP0_CAP_MASK REG_GENMASK(7, 0) +#define RP1_CAP_MASK REG_GENMASK(15, 8) +#define RPN_CAP_MASK REG_GENMASK(23, 16) #define BXT_RP_STATE_CAP _MMIO(0x138170) #define GEN9_RP_STATE_LIMITS _MMIO(0x138148) @@ -4142,6 +4167,7 @@ enum { FAULT_AND_CONTINUE /* Unsupported */ }; +#define CTX_GTT_ADDRESS_MASK GENMASK(31, 12) #define GEN8_CTX_VALID (1 << 0) #define GEN8_CTX_FORCE_PD_RESTORE (1 << 1) #define GEN8_CTX_FORCE_RESTORE (1 << 2) @@ -4158,6 +4184,11 @@ enum { #define GEN11_ENGINE_INSTANCE_SHIFT 48 #define GEN11_ENGINE_INSTANCE_WIDTH 6 +#define XEHP_SW_CTX_ID_SHIFT 39 +#define XEHP_SW_CTX_ID_WIDTH 16 +#define XEHP_SW_COUNTER_SHIFT 58 +#define XEHP_SW_COUNTER_WIDTH 6 + #define CHV_CLK_CTL1 _MMIO(0x101100) #define VLV_CLK_CTL2 _MMIO(0x101104) #define CLK_CTL2_CZCOUNT_30NS_SHIFT 28 @@ -4586,23 +4617,26 @@ enum { #define _PSR2_CTL_EDP 0x6f900 #define EDP_PSR2_CTL(tran) _MMIO_TRANS2(tran, _PSR2_CTL_A) #define EDP_PSR2_ENABLE (1 << 31) -#define EDP_SU_TRACK_ENABLE (1 << 30) +#define EDP_SU_TRACK_ENABLE (1 << 30) /* up to adl-p */ #define TGL_EDP_PSR2_BLOCK_COUNT_NUM_2 (0 << 28) #define TGL_EDP_PSR2_BLOCK_COUNT_NUM_3 (1 << 28) #define EDP_Y_COORDINATE_ENABLE REG_BIT(25) /* display 10, 11 and 12 */ +#define EDP_PSR2_SU_SDP_SCANLINE REG_BIT(25) /* display 13+ */ #define EDP_MAX_SU_DISABLE_TIME(t) ((t) << 20) #define EDP_MAX_SU_DISABLE_TIME_MASK (0x1f << 20) #define EDP_PSR2_IO_BUFFER_WAKE_MAX_LINES 8 #define EDP_PSR2_IO_BUFFER_WAKE(lines) ((EDP_PSR2_IO_BUFFER_WAKE_MAX_LINES - (lines)) << 13) #define EDP_PSR2_IO_BUFFER_WAKE_MASK (3 << 13) #define TGL_EDP_PSR2_IO_BUFFER_WAKE_MIN_LINES 5 -#define TGL_EDP_PSR2_IO_BUFFER_WAKE(lines) (((lines) - TGL_EDP_PSR2_IO_BUFFER_WAKE_MIN_LINES) << 13) +#define TGL_EDP_PSR2_IO_BUFFER_WAKE_SHIFT 13 +#define TGL_EDP_PSR2_IO_BUFFER_WAKE(lines) (((lines) - TGL_EDP_PSR2_IO_BUFFER_WAKE_MIN_LINES) << TGL_EDP_PSR2_IO_BUFFER_WAKE_SHIFT) #define TGL_EDP_PSR2_IO_BUFFER_WAKE_MASK (7 << 13) #define EDP_PSR2_FAST_WAKE_MAX_LINES 8 #define EDP_PSR2_FAST_WAKE(lines) ((EDP_PSR2_FAST_WAKE_MAX_LINES - (lines)) << 11) #define EDP_PSR2_FAST_WAKE_MASK (3 << 11) #define TGL_EDP_PSR2_FAST_WAKE_MIN_LINES 5 -#define TGL_EDP_PSR2_FAST_WAKE(lines) (((lines) - TGL_EDP_PSR2_FAST_WAKE_MIN_LINES) << 10) +#define TGL_EDP_PSR2_FAST_WAKE_MIN_SHIFT 10 +#define TGL_EDP_PSR2_FAST_WAKE(lines) (((lines) - TGL_EDP_PSR2_FAST_WAKE_MIN_LINES) << TGL_EDP_PSR2_FAST_WAKE_MIN_SHIFT) #define TGL_EDP_PSR2_FAST_WAKE_MASK (7 << 10) #define EDP_PSR2_TP2_TIME_500us (0 << 8) #define EDP_PSR2_TP2_TIME_100us (1 << 8) @@ -4652,17 +4686,23 @@ enum { #define PSR2_SU_STATUS_MASK(frame) (0x3ff << PSR2_SU_STATUS_SHIFT(frame)) #define PSR2_SU_STATUS_FRAMES 8 -#define _PSR2_MAN_TRK_CTL_A 0x60910 -#define _PSR2_MAN_TRK_CTL_EDP 0x6f910 -#define PSR2_MAN_TRK_CTL(tran) _MMIO_TRANS2(tran, _PSR2_MAN_TRK_CTL_A) -#define PSR2_MAN_TRK_CTL_ENABLE REG_BIT(31) -#define PSR2_MAN_TRK_CTL_SU_REGION_START_ADDR_MASK REG_GENMASK(30, 21) -#define PSR2_MAN_TRK_CTL_SU_REGION_START_ADDR(val) REG_FIELD_PREP(PSR2_MAN_TRK_CTL_SU_REGION_START_ADDR_MASK, val) +#define _PSR2_MAN_TRK_CTL_A 0x60910 +#define _PSR2_MAN_TRK_CTL_EDP 0x6f910 +#define PSR2_MAN_TRK_CTL(tran) _MMIO_TRANS2(tran, _PSR2_MAN_TRK_CTL_A) +#define PSR2_MAN_TRK_CTL_ENABLE REG_BIT(31) +#define PSR2_MAN_TRK_CTL_SU_REGION_START_ADDR_MASK REG_GENMASK(30, 21) +#define PSR2_MAN_TRK_CTL_SU_REGION_START_ADDR(val) REG_FIELD_PREP(PSR2_MAN_TRK_CTL_SU_REGION_START_ADDR_MASK, val) #define PSR2_MAN_TRK_CTL_SU_REGION_END_ADDR_MASK REG_GENMASK(20, 11) #define PSR2_MAN_TRK_CTL_SU_REGION_END_ADDR(val) REG_FIELD_PREP(PSR2_MAN_TRK_CTL_SU_REGION_END_ADDR_MASK, val) -#define PSR2_MAN_TRK_CTL_SF_SINGLE_FULL_FRAME REG_BIT(3) -#define PSR2_MAN_TRK_CTL_SF_CONTINUOS_FULL_FRAME REG_BIT(2) -#define PSR2_MAN_TRK_CTL_SF_PARTIAL_FRAME_UPDATE REG_BIT(1) +#define PSR2_MAN_TRK_CTL_SF_SINGLE_FULL_FRAME REG_BIT(3) +#define PSR2_MAN_TRK_CTL_SF_CONTINUOS_FULL_FRAME REG_BIT(2) +#define PSR2_MAN_TRK_CTL_SF_PARTIAL_FRAME_UPDATE REG_BIT(1) +#define ADLP_PSR2_MAN_TRK_CTL_SU_REGION_START_ADDR_MASK REG_GENMASK(28, 16) +#define ADLP_PSR2_MAN_TRK_CTL_SU_REGION_START_ADDR(val) REG_FIELD_PREP(ADLP_PSR2_MAN_TRK_CTL_SU_REGION_START_ADDR_MASK, val) +#define ADLP_PSR2_MAN_TRK_CTL_SU_REGION_END_ADDR_MASK REG_GENMASK(12, 0) +#define ADLP_PSR2_MAN_TRK_CTL_SU_REGION_END_ADDR(val) REG_FIELD_PREP(ADLP_PSR2_MAN_TRK_CTL_SU_REGION_END_ADDR_MASK, val) +#define ADLP_PSR2_MAN_TRK_CTL_SF_SINGLE_FULL_FRAME REG_BIT(14) +#define ADLP_PSR2_MAN_TRK_CTL_SF_CONTINUOS_FULL_FRAME REG_BIT(13) /* Icelake DSC Rate Control Range Parameter Registers */ #define DSCA_RC_RANGE_PARAMETERS_0 _MMIO(0x6B240) @@ -7717,11 +7757,11 @@ enum { #define SKL_PS_ECC_STAT(pipe, id) _MMIO_PIPE(pipe, \ _ID(id, _PS_ECC_STAT_1A, _PS_ECC_STAT_2A), \ _ID(id, _PS_ECC_STAT_1B, _PS_ECC_STAT_2B)) -#define CNL_PS_COEF_INDEX_SET(pipe, id, set) _MMIO_PIPE(pipe, \ +#define GLK_PS_COEF_INDEX_SET(pipe, id, set) _MMIO_PIPE(pipe, \ _ID(id, _PS_COEF_SET0_INDEX_1A, _PS_COEF_SET0_INDEX_2A) + (set) * 8, \ _ID(id, _PS_COEF_SET0_INDEX_1B, _PS_COEF_SET0_INDEX_2B) + (set) * 8) -#define CNL_PS_COEF_DATA_SET(pipe, id, set) _MMIO_PIPE(pipe, \ +#define GLK_PS_COEF_DATA_SET(pipe, id, set) _MMIO_PIPE(pipe, \ _ID(id, _PS_COEF_SET0_DATA_1A, _PS_COEF_SET0_DATA_2A) + (set) * 8, \ _ID(id, _PS_COEF_SET0_DATA_1B, _PS_COEF_SET0_DATA_2B) + (set) * 8) /* legacy palette */ @@ -7757,7 +7797,7 @@ enum { #define GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED (3 << 0) /* icl + */ /* DMC */ -#define DMC_PROGRAM(i) _MMIO(0x80000 + (i) * 4) +#define DMC_PROGRAM(addr, i) _MMIO((addr) + (i) * 4) #define DMC_SSP_BASE_ADDR_GEN9 0x00002FC0 #define DMC_HTP_ADDR_SKL 0x00500034 #define DMC_SSP_BASE _MMIO(0x8F074) @@ -7936,7 +7976,7 @@ enum { #define DSI1_NON_TE (1 << 31) #define DSI0_NON_TE (1 << 30) #define ICL_AUX_CHANNEL_E (1 << 29) -#define CNL_AUX_CHANNEL_F (1 << 28) +#define ICL_AUX_CHANNEL_F (1 << 28) #define GEN9_AUX_CHANNEL_D (1 << 27) #define GEN9_AUX_CHANNEL_C (1 << 26) #define GEN9_AUX_CHANNEL_B (1 << 25) @@ -7988,9 +8028,9 @@ enum { #define GEN11_GT_DW1_IRQ (1 << 1) #define GEN11_GT_DW0_IRQ (1 << 0) -#define DG1_MSTR_UNIT_INTR _MMIO(0x190008) +#define DG1_MSTR_TILE_INTR _MMIO(0x190008) #define DG1_MSTR_IRQ REG_BIT(31) -#define DG1_MSTR_UNIT(u) REG_BIT(u) +#define DG1_MSTR_TILE(t) REG_BIT(t) #define GEN11_DISPLAY_INT_CTL _MMIO(0x44200) #define GEN11_DISPLAY_IRQ_ENABLE (1 << 31) @@ -8073,7 +8113,10 @@ enum { #define GEN11_BCS_RSVD_INTR_MASK _MMIO(0x1900a0) #define GEN11_VCS0_VCS1_INTR_MASK _MMIO(0x1900a8) #define GEN11_VCS2_VCS3_INTR_MASK _MMIO(0x1900ac) +#define GEN12_VCS4_VCS5_INTR_MASK _MMIO(0x1900b0) +#define GEN12_VCS6_VCS7_INTR_MASK _MMIO(0x1900b4) #define GEN11_VECS0_VECS1_INTR_MASK _MMIO(0x1900d0) +#define GEN12_VECS2_VECS3_INTR_MASK _MMIO(0x1900d4) #define GEN11_GUC_SG_INTR_MASK _MMIO(0x1900e8) #define GEN11_GPM_WGBOXPERF_INTR_MASK _MMIO(0x1900ec) #define GEN11_CRYPTO_RSVD_INTR_MASK _MMIO(0x1900f0) @@ -8113,6 +8156,7 @@ enum { # define CHICKEN3_DGMG_DONE_FIX_DISABLE (1 << 2) #define CHICKEN_PAR1_1 _MMIO(0x42080) +#define IGNORE_KVMR_PIPE_A REG_BIT(23) #define KBL_ARB_FILL_SPARE_22 REG_BIT(22) #define DIS_RAM_BYPASS_PSR2_MAN_TRACK (1 << 16) #define SKL_DE_COMPRESSED_HASH_MODE (1 << 15) @@ -8125,7 +8169,6 @@ enum { #define KVM_CONFIG_CHANGE_NOTIFICATION_SELECT (1 << 14) #define CHICKEN_MISC_2 _MMIO(0x42084) -#define CNL_COMP_PWR_DOWN (1 << 23) #define KBL_ARB_FILL_SPARE_14 REG_BIT(14) #define KBL_ARB_FILL_SPARE_13 REG_BIT(13) #define GLK_CL2_PWR_DOWN (1 << 12) @@ -8163,15 +8206,16 @@ enum { [TRANSCODER_B] = _CHICKEN_TRANS_B, \ [TRANSCODER_C] = _CHICKEN_TRANS_C, \ [TRANSCODER_D] = _CHICKEN_TRANS_D)) -#define HSW_FRAME_START_DELAY_MASK (3 << 27) -#define HSW_FRAME_START_DELAY(x) ((x) << 27) /* 0-3 */ -#define VSC_DATA_SEL_SOFTWARE_CONTROL (1 << 25) /* GLK and CNL+ */ -#define DDI_TRAINING_OVERRIDE_ENABLE (1 << 19) -#define DDI_TRAINING_OVERRIDE_VALUE (1 << 18) -#define DDIE_TRAINING_OVERRIDE_ENABLE (1 << 17) /* CHICKEN_TRANS_A only */ -#define DDIE_TRAINING_OVERRIDE_VALUE (1 << 16) /* CHICKEN_TRANS_A only */ -#define PSR2_ADD_VERTICAL_LINE_COUNT (1 << 15) -#define PSR2_VSC_ENABLE_PROG_HEADER (1 << 12) +#define HSW_FRAME_START_DELAY_MASK REG_GENMASK(28, 27) +#define HSW_FRAME_START_DELAY(x) REG_FIELD_PREP(HSW_FRAME_START_DELAY_MASK, x) +#define VSC_DATA_SEL_SOFTWARE_CONTROL REG_BIT(25) /* GLK */ +#define FECSTALL_DIS_DPTSTREAM_DPTTG REG_BIT(23) +#define DDI_TRAINING_OVERRIDE_ENABLE REG_BIT(19) +#define DDI_TRAINING_OVERRIDE_VALUE REG_BIT(18) +#define DDIE_TRAINING_OVERRIDE_ENABLE REG_BIT(17) /* CHICKEN_TRANS_A only */ +#define DDIE_TRAINING_OVERRIDE_VALUE REG_BIT(16) /* CHICKEN_TRANS_A only */ +#define PSR2_ADD_VERTICAL_LINE_COUNT REG_BIT(15) +#define PSR2_VSC_ENABLE_PROG_HEADER REG_BIT(12) #define DISP_ARB_CTL _MMIO(0x45000) #define DISP_FBC_MEMORY_WAKE (1 << 31) @@ -8229,9 +8273,8 @@ enum { #define GEN8_CHICKEN_DCPR_1 _MMIO(0x46430) #define SKL_SELECT_ALTERNATE_DC_EXIT (1 << 30) -#define CNL_DELAY_PMRSP (1 << 22) +#define ICL_DELAY_PMRSP (1 << 22) #define MASK_WAKEMEM (1 << 13) -#define CNL_DDI_CLOCK_REG_ACCESS_ON (1 << 7) #define GEN11_CHICKEN_DCPR_2 _MMIO(0x46434) #define DCPR_MASK_MAXLATENCY_MEMUP_CLR REG_BIT(27) @@ -8252,10 +8295,9 @@ enum { #define SKL_DFSM_PIPE_B_DISABLE (1 << 21) #define SKL_DFSM_PIPE_C_DISABLE (1 << 28) #define TGL_DFSM_PIPE_D_DISABLE (1 << 22) -#define CNL_DFSM_DISPLAY_DSC_DISABLE (1 << 7) +#define GLK_DFSM_DISPLAY_DSC_DISABLE (1 << 7) #define SKL_DSSM _MMIO(0x51004) -#define CNL_DSSM_CDCLK_PLL_REFCLK_24MHz (1 << 31) #define ICL_DSSM_CDCLK_PLL_REFCLK_MASK (7 << 29) #define ICL_DSSM_CDCLK_PLL_REFCLK_24MHz (0 << 29) #define ICL_DSSM_CDCLK_PLL_REFCLK_19_2MHz (1 << 29) @@ -8354,7 +8396,6 @@ enum { /* GEN8 chicken */ #define HDC_CHICKEN0 _MMIO(0x7300) -#define CNL_HDC_CHICKEN0 _MMIO(0xE5F0) #define ICL_HDC_MODE _MMIO(0xE5F4) #define HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE (1 << 15) #define HDC_FENCE_DEST_SLM_DISABLE (1 << 14) @@ -8388,7 +8429,8 @@ enum { #define _PIPEC_CHICKEN 0x72038 #define PIPE_CHICKEN(pipe) _MMIO_PIPE(pipe, _PIPEA_CHICKEN,\ _PIPEB_CHICKEN) -#define UNDERRUN_RECOVERY_DISABLE REG_BIT(30) +#define UNDERRUN_RECOVERY_DISABLE_ADLP REG_BIT(30) +#define UNDERRUN_RECOVERY_ENABLE_DG2 REG_BIT(30) #define PIXEL_ROUNDING_TRUNC_FB_PASSTHRU (1 << 15) #define PER_PIXEL_ALPHA_BYPASS_EN (1 << 7) @@ -9200,6 +9242,8 @@ enum { #define GEN9_FREQUENCY(x) ((x) << 23) #define GEN6_OFFSET(x) ((x) << 19) #define GEN6_AGGRESSIVE_TURBO (0 << 15) +#define GEN9_SW_REQ_UNSLICE_RATIO_SHIFT 23 + #define GEN6_RC_VIDEO_FREQ _MMIO(0xA00C) #define GEN6_RC_CONTROL _MMIO(0xA090) #define GEN6_RC_CTL_RC6pp_ENABLE (1 << 16) @@ -9368,9 +9412,13 @@ enum { #define ICL_PCODE_MEM_SUBSYSYSTEM_INFO 0xd #define ICL_PCODE_MEM_SS_READ_GLOBAL_INFO (0x0 << 8) #define ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point) (((point) << 16) | (0x1 << 8)) +#define ADL_PCODE_MEM_SS_READ_PSF_GV_INFO ((0) | (0x2 << 8)) #define ICL_PCODE_SAGV_DE_MEM_SS_CONFIG 0xe #define ICL_PCODE_POINTS_RESTRICTED 0x0 -#define ICL_PCODE_POINTS_RESTRICTED_MASK 0x1 +#define ICL_PCODE_POINTS_RESTRICTED_MASK 0xf +#define ADLS_PSF_PT_SHIFT 8 +#define ADLS_QGV_PT_MASK REG_GENMASK(7, 0) +#define ADLS_PSF_PT_MASK REG_GENMASK(10, 8) #define GEN6_PCODE_READ_D_COMP 0x10 #define GEN6_PCODE_WRITE_D_COMP 0x11 #define ICL_PCODE_EXIT_TCCOLD 0x12 @@ -9530,7 +9578,6 @@ enum { #define HSW_SAMPLE_C_PERFORMANCE (1 << 9) #define GEN8_CENTROID_PIXEL_OPT_DIS (1 << 8) #define GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC (1 << 5) -#define CNL_FAST_ANISO_L1_BANKING_FIX (1 << 4) #define GEN8_SAMPLER_POWER_BYPASS_DIS (1 << 1) #define GEN9_HALF_SLICE_CHICKEN7 _MMIO(0xe194) @@ -9711,15 +9758,12 @@ enum { /* HSW/BDW power well */ #define HSW_PW_CTL_IDX_GLOBAL 15 -/* SKL/BXT/GLK/CNL power wells */ +/* SKL/BXT/GLK power wells */ #define SKL_PW_CTL_IDX_PW_2 15 #define SKL_PW_CTL_IDX_PW_1 14 -#define CNL_PW_CTL_IDX_AUX_F 12 -#define CNL_PW_CTL_IDX_AUX_D 11 #define GLK_PW_CTL_IDX_AUX_C 10 #define GLK_PW_CTL_IDX_AUX_B 9 #define GLK_PW_CTL_IDX_AUX_A 8 -#define CNL_PW_CTL_IDX_DDI_F 6 #define SKL_PW_CTL_IDX_DDI_D 4 #define SKL_PW_CTL_IDX_DDI_C 3 #define SKL_PW_CTL_IDX_DDI_B 2 @@ -9818,19 +9862,6 @@ enum skl_power_gate { ((pw_idx) - ICL_PW_CTL_IDX_PW_1 + SKL_PG1) #define SKL_FUSE_PG_DIST_STATUS(pg) (1 << (27 - (pg))) -#define _CNL_AUX_REG_IDX(pw_idx) ((pw_idx) - GLK_PW_CTL_IDX_AUX_B) -#define _CNL_AUX_ANAOVRD1_B 0x162250 -#define _CNL_AUX_ANAOVRD1_C 0x162210 -#define _CNL_AUX_ANAOVRD1_D 0x1622D0 -#define _CNL_AUX_ANAOVRD1_F 0x162A90 -#define CNL_AUX_ANAOVRD1(pw_idx) _MMIO(_PICK(_CNL_AUX_REG_IDX(pw_idx), \ - _CNL_AUX_ANAOVRD1_B, \ - _CNL_AUX_ANAOVRD1_C, \ - _CNL_AUX_ANAOVRD1_D, \ - _CNL_AUX_ANAOVRD1_F)) -#define CNL_AUX_ANAOVRD1_ENABLE (1 << 16) -#define CNL_AUX_ANAOVRD1_LDO_BYPASS (1 << 23) - #define _ICL_AUX_REG_IDX(pw_idx) ((pw_idx) - ICL_PW_CTL_IDX_AUX_A) #define _ICL_AUX_ANAOVRD1_A 0x162398 #define _ICL_AUX_ANAOVRD1_B 0x6C398 @@ -10130,11 +10161,11 @@ enum skl_power_gate { #define TRANS_DDI_BPC_10 (1 << 20) #define TRANS_DDI_BPC_6 (2 << 20) #define TRANS_DDI_BPC_12 (3 << 20) -#define TRANS_DDI_PORT_SYNC_MASTER_SELECT_MASK REG_GENMASK(19, 18) /* bdw-cnl */ +#define TRANS_DDI_PORT_SYNC_MASTER_SELECT_MASK REG_GENMASK(19, 18) #define TRANS_DDI_PORT_SYNC_MASTER_SELECT(x) REG_FIELD_PREP(TRANS_DDI_PORT_SYNC_MASTER_SELECT_MASK, (x)) #define TRANS_DDI_PVSYNC (1 << 17) #define TRANS_DDI_PHSYNC (1 << 16) -#define TRANS_DDI_PORT_SYNC_ENABLE REG_BIT(15) /* bdw-cnl */ +#define TRANS_DDI_PORT_SYNC_ENABLE REG_BIT(15) #define TRANS_DDI_EDP_INPUT_MASK (7 << 12) #define TRANS_DDI_EDP_INPUT_A_ON (0 << 12) #define TRANS_DDI_EDP_INPUT_A_ONOFF (4 << 12) @@ -10167,6 +10198,9 @@ enum skl_power_gate { #define PORT_SYNC_MODE_MASTER_SELECT_MASK REG_GENMASK(2, 0) #define PORT_SYNC_MODE_MASTER_SELECT(x) REG_FIELD_PREP(PORT_SYNC_MODE_MASTER_SELECT_MASK, (x)) +#define TRANS_CMTG_CHICKEN _MMIO(0x6fa90) +#define DISABLE_DPT_CLK_GATING REG_BIT(1) + /* DisplayPort Transport Control */ #define _DP_TP_CTL_A 0x64040 #define _DP_TP_CTL_B 0x64140 @@ -10371,6 +10405,14 @@ enum skl_power_gate { #define TRANS_MSA_MISC(tran) _MMIO_TRANS2(tran, _TRANSA_MSA_MISC) /* See DP_MSA_MISC_* for the bit definitions */ +#define _TRANS_A_SET_CONTEXT_LATENCY 0x6007C +#define _TRANS_B_SET_CONTEXT_LATENCY 0x6107C +#define _TRANS_C_SET_CONTEXT_LATENCY 0x6207C +#define _TRANS_D_SET_CONTEXT_LATENCY 0x6307C +#define TRANS_SET_CONTEXT_LATENCY(tran) _MMIO_TRANS2(tran, _TRANS_A_SET_CONTEXT_LATENCY) +#define TRANS_SET_CONTEXT_LATENCY_MASK REG_GENMASK(15, 0) +#define TRANS_SET_CONTEXT_LATENCY_VALUE(x) REG_FIELD_PREP(TRANS_SET_CONTEXT_LATENCY_MASK, (x)) + /* LCPLL Control */ #define LCPLL_CTL _MMIO(0x130040) #define LCPLL_PLL_DISABLE (1 << 31) @@ -10482,17 +10524,6 @@ enum skl_power_gate { #define DPLL_CFGCR1(id) _MMIO_PIPE((id) - SKL_DPLL1, _DPLL1_CFGCR1, _DPLL2_CFGCR1) #define DPLL_CFGCR2(id) _MMIO_PIPE((id) - SKL_DPLL1, _DPLL1_CFGCR2, _DPLL2_CFGCR2) -/* - * CNL Clocks - */ -#define DPCLKA_CFGCR0 _MMIO(0x6C200) -#define DPCLKA_CFGCR0_DDI_CLK_OFF(port) (1 << ((port) == PORT_F ? 23 : \ - (port) + 10)) -#define DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(port) ((port) == PORT_F ? 21 : \ - (port) * 2) -#define DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(port) (3 << DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(port)) -#define DPCLKA_CFGCR0_DDI_CLK_SEL(pll, port) ((pll) << DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(port)) - /* ICL Clocks */ #define ICL_DPCLKA_CFGCR0 _MMIO(0x164280) #define ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy) (1 << _PICK(phy, 10, 11, 24, 4, 5)) @@ -10548,7 +10579,7 @@ enum skl_power_gate { ADLS_DPCLKA_DDIJ_SEL_MASK, \ ADLS_DPCLKA_DDIK_SEL_MASK) -/* CNL PLL */ +/* ICL PLL */ #define DPLL0_ENABLE 0x46010 #define DPLL1_ENABLE 0x46014 #define _ADLS_DPLL2_ENABLE 0x46018 @@ -10557,9 +10588,14 @@ enum skl_power_gate { #define PLL_LOCK (1 << 30) #define PLL_POWER_ENABLE (1 << 27) #define PLL_POWER_STATE (1 << 26) -#define CNL_DPLL_ENABLE(pll) _MMIO_PLL3(pll, DPLL0_ENABLE, DPLL1_ENABLE, \ +#define ICL_DPLL_ENABLE(pll) _MMIO_PLL3(pll, DPLL0_ENABLE, DPLL1_ENABLE, \ _ADLS_DPLL2_ENABLE, _ADLS_DPLL3_ENABLE) +#define _DG2_PLL3_ENABLE 0x4601C + +#define DG2_PLL_ENABLE(pll) _MMIO_PLL3(pll, DPLL0_ENABLE, DPLL1_ENABLE, \ + _ADLS_DPLL2_ENABLE, _DG2_PLL3_ENABLE) + #define TBT_PLL_ENABLE _MMIO(0x46020) #define _MG_PLL1_ENABLE 0x46030 @@ -10725,60 +10761,52 @@ enum skl_power_gate { _MG_PLL_TDC_COLDST_BIAS_PORT1, \ _MG_PLL_TDC_COLDST_BIAS_PORT2) -#define _CNL_DPLL0_CFGCR0 0x6C000 -#define _CNL_DPLL1_CFGCR0 0x6C080 -#define DPLL_CFGCR0_HDMI_MODE (1 << 30) -#define DPLL_CFGCR0_SSC_ENABLE (1 << 29) -#define DPLL_CFGCR0_SSC_ENABLE_ICL (1 << 25) -#define DPLL_CFGCR0_LINK_RATE_MASK (0xf << 25) -#define DPLL_CFGCR0_LINK_RATE_2700 (0 << 25) -#define DPLL_CFGCR0_LINK_RATE_1350 (1 << 25) -#define DPLL_CFGCR0_LINK_RATE_810 (2 << 25) -#define DPLL_CFGCR0_LINK_RATE_1620 (3 << 25) -#define DPLL_CFGCR0_LINK_RATE_1080 (4 << 25) -#define DPLL_CFGCR0_LINK_RATE_2160 (5 << 25) -#define DPLL_CFGCR0_LINK_RATE_3240 (6 << 25) -#define DPLL_CFGCR0_LINK_RATE_4050 (7 << 25) -#define DPLL_CFGCR0_DCO_FRACTION_MASK (0x7fff << 10) -#define DPLL_CFGCR0_DCO_FRACTION_SHIFT (10) -#define DPLL_CFGCR0_DCO_FRACTION(x) ((x) << 10) -#define DPLL_CFGCR0_DCO_INTEGER_MASK (0x3ff) -#define CNL_DPLL_CFGCR0(pll) _MMIO_PLL(pll, _CNL_DPLL0_CFGCR0, _CNL_DPLL1_CFGCR0) - -#define _CNL_DPLL0_CFGCR1 0x6C004 -#define _CNL_DPLL1_CFGCR1 0x6C084 -#define DPLL_CFGCR1_QDIV_RATIO_MASK (0xff << 10) -#define DPLL_CFGCR1_QDIV_RATIO_SHIFT (10) -#define DPLL_CFGCR1_QDIV_RATIO(x) ((x) << 10) -#define DPLL_CFGCR1_QDIV_MODE_SHIFT (9) -#define DPLL_CFGCR1_QDIV_MODE(x) ((x) << 9) -#define DPLL_CFGCR1_KDIV_MASK (7 << 6) -#define DPLL_CFGCR1_KDIV_SHIFT (6) -#define DPLL_CFGCR1_KDIV(x) ((x) << 6) -#define DPLL_CFGCR1_KDIV_1 (1 << 6) -#define DPLL_CFGCR1_KDIV_2 (2 << 6) -#define DPLL_CFGCR1_KDIV_3 (4 << 6) -#define DPLL_CFGCR1_PDIV_MASK (0xf << 2) -#define DPLL_CFGCR1_PDIV_SHIFT (2) -#define DPLL_CFGCR1_PDIV(x) ((x) << 2) -#define DPLL_CFGCR1_PDIV_2 (1 << 2) -#define DPLL_CFGCR1_PDIV_3 (2 << 2) -#define DPLL_CFGCR1_PDIV_5 (4 << 2) -#define DPLL_CFGCR1_PDIV_7 (8 << 2) -#define DPLL_CFGCR1_CENTRAL_FREQ (3 << 0) -#define DPLL_CFGCR1_CENTRAL_FREQ_8400 (3 << 0) -#define TGL_DPLL_CFGCR1_CFSELOVRD_NORMAL_XTAL (0 << 0) -#define CNL_DPLL_CFGCR1(pll) _MMIO_PLL(pll, _CNL_DPLL0_CFGCR1, _CNL_DPLL1_CFGCR1) - #define _ICL_DPLL0_CFGCR0 0x164000 #define _ICL_DPLL1_CFGCR0 0x164080 #define ICL_DPLL_CFGCR0(pll) _MMIO_PLL(pll, _ICL_DPLL0_CFGCR0, \ _ICL_DPLL1_CFGCR0) +#define DPLL_CFGCR0_HDMI_MODE (1 << 30) +#define DPLL_CFGCR0_SSC_ENABLE (1 << 29) +#define DPLL_CFGCR0_SSC_ENABLE_ICL (1 << 25) +#define DPLL_CFGCR0_LINK_RATE_MASK (0xf << 25) +#define DPLL_CFGCR0_LINK_RATE_2700 (0 << 25) +#define DPLL_CFGCR0_LINK_RATE_1350 (1 << 25) +#define DPLL_CFGCR0_LINK_RATE_810 (2 << 25) +#define DPLL_CFGCR0_LINK_RATE_1620 (3 << 25) +#define DPLL_CFGCR0_LINK_RATE_1080 (4 << 25) +#define DPLL_CFGCR0_LINK_RATE_2160 (5 << 25) +#define DPLL_CFGCR0_LINK_RATE_3240 (6 << 25) +#define DPLL_CFGCR0_LINK_RATE_4050 (7 << 25) +#define DPLL_CFGCR0_DCO_FRACTION_MASK (0x7fff << 10) +#define DPLL_CFGCR0_DCO_FRACTION_SHIFT (10) +#define DPLL_CFGCR0_DCO_FRACTION(x) ((x) << 10) +#define DPLL_CFGCR0_DCO_INTEGER_MASK (0x3ff) #define _ICL_DPLL0_CFGCR1 0x164004 #define _ICL_DPLL1_CFGCR1 0x164084 #define ICL_DPLL_CFGCR1(pll) _MMIO_PLL(pll, _ICL_DPLL0_CFGCR1, \ _ICL_DPLL1_CFGCR1) +#define DPLL_CFGCR1_QDIV_RATIO_MASK (0xff << 10) +#define DPLL_CFGCR1_QDIV_RATIO_SHIFT (10) +#define DPLL_CFGCR1_QDIV_RATIO(x) ((x) << 10) +#define DPLL_CFGCR1_QDIV_MODE_SHIFT (9) +#define DPLL_CFGCR1_QDIV_MODE(x) ((x) << 9) +#define DPLL_CFGCR1_KDIV_MASK (7 << 6) +#define DPLL_CFGCR1_KDIV_SHIFT (6) +#define DPLL_CFGCR1_KDIV(x) ((x) << 6) +#define DPLL_CFGCR1_KDIV_1 (1 << 6) +#define DPLL_CFGCR1_KDIV_2 (2 << 6) +#define DPLL_CFGCR1_KDIV_3 (4 << 6) +#define DPLL_CFGCR1_PDIV_MASK (0xf << 2) +#define DPLL_CFGCR1_PDIV_SHIFT (2) +#define DPLL_CFGCR1_PDIV(x) ((x) << 2) +#define DPLL_CFGCR1_PDIV_2 (1 << 2) +#define DPLL_CFGCR1_PDIV_3 (2 << 2) +#define DPLL_CFGCR1_PDIV_5 (4 << 2) +#define DPLL_CFGCR1_PDIV_7 (8 << 2) +#define DPLL_CFGCR1_CENTRAL_FREQ (3 << 0) +#define DPLL_CFGCR1_CENTRAL_FREQ_8400 (3 << 0) +#define TGL_DPLL_CFGCR1_CFSELOVRD_NORMAL_XTAL (0 << 0) #define _TGL_DPLL0_CFGCR0 0x164284 #define _TGL_DPLL1_CFGCR0 0x16428C @@ -10998,8 +11026,8 @@ enum skl_power_gate { #define BXT_DE_PLL_LOCK (1 << 30) #define BXT_DE_PLL_FREQ_REQ (1 << 23) #define BXT_DE_PLL_FREQ_REQ_ACK (1 << 22) -#define CNL_CDCLK_PLL_RATIO(x) (x) -#define CNL_CDCLK_PLL_RATIO_MASK 0xff +#define ICL_CDCLK_PLL_RATIO(x) (x) +#define ICL_CDCLK_PLL_RATIO_MASK 0xff /* GEN9 DC */ #define DC_STATE_EN _MMIO(0x45504) @@ -11054,6 +11082,7 @@ enum skl_power_gate { #define SKL_MEMORY_FREQ_MULTIPLIER_HZ 266666666 #define SKL_MC_BIOS_DATA_0_0_0_MCHBAR_PCU _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5E04) #define SKL_REQ_DATA_MASK (0xF << 0) +#define DG1_GEAR_TYPE REG_BIT(16) #define SKL_MAD_INTER_CHANNEL_0_0_0_MCHBAR_MCMAIN _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5000) #define SKL_DRAM_DDR_TYPE_MASK (0x3 << 0) @@ -11076,18 +11105,29 @@ enum skl_power_gate { #define SKL_DRAM_RANK_1 (0x0 << 10) #define SKL_DRAM_RANK_2 (0x1 << 10) #define SKL_DRAM_RANK_MASK (0x1 << 10) -#define CNL_DRAM_SIZE_MASK 0x7F -#define CNL_DRAM_WIDTH_MASK (0x3 << 7) -#define CNL_DRAM_WIDTH_SHIFT 7 -#define CNL_DRAM_WIDTH_X8 (0x0 << 7) -#define CNL_DRAM_WIDTH_X16 (0x1 << 7) -#define CNL_DRAM_WIDTH_X32 (0x2 << 7) -#define CNL_DRAM_RANK_MASK (0x3 << 9) -#define CNL_DRAM_RANK_SHIFT 9 -#define CNL_DRAM_RANK_1 (0x0 << 9) -#define CNL_DRAM_RANK_2 (0x1 << 9) -#define CNL_DRAM_RANK_3 (0x2 << 9) -#define CNL_DRAM_RANK_4 (0x3 << 9) +#define ICL_DRAM_SIZE_MASK 0x7F +#define ICL_DRAM_WIDTH_MASK (0x3 << 7) +#define ICL_DRAM_WIDTH_SHIFT 7 +#define ICL_DRAM_WIDTH_X8 (0x0 << 7) +#define ICL_DRAM_WIDTH_X16 (0x1 << 7) +#define ICL_DRAM_WIDTH_X32 (0x2 << 7) +#define ICL_DRAM_RANK_MASK (0x3 << 9) +#define ICL_DRAM_RANK_SHIFT 9 +#define ICL_DRAM_RANK_1 (0x0 << 9) +#define ICL_DRAM_RANK_2 (0x1 << 9) +#define ICL_DRAM_RANK_3 (0x2 << 9) +#define ICL_DRAM_RANK_4 (0x3 << 9) + +#define SA_PERF_STATUS_0_0_0_MCHBAR_PC _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5918) +#define DG1_QCLK_RATIO_MASK REG_GENMASK(9, 2) +#define DG1_QCLK_REFERENCE REG_BIT(10) + +#define MCHBAR_CH0_CR_TC_PRE_0_0_0_MCHBAR _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x4000) +#define DG1_DRAM_T_RDPRE_MASK REG_GENMASK(16, 11) +#define DG1_DRAM_T_RP_MASK REG_GENMASK(6, 0) +#define MCHBAR_CH0_CR_TC_PRE_0_0_0_MCHBAR_HIGH _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x4004) +#define DG1_DRAM_T_RCD_MASK REG_GENMASK(15, 9) +#define DG1_DRAM_T_RAS_MASK REG_GENMASK(8, 1) /* * Please see hsw_read_dcomp() and hsw_write_dcomp() before using this register, @@ -12290,6 +12330,7 @@ enum skl_power_gate { /* MOCS (Memory Object Control State) registers */ #define GEN9_LNCFCMOCS(i) _MMIO(0xb020 + (i) * 4) /* L3 Cache Control */ +#define GEN9_LNCFCMOCS_REG_COUNT 32 #define __GEN9_RCS0_MOCS0 0xc800 #define GEN9_GFX_MOCS(i) _MMIO(__GEN9_RCS0_MOCS0 + (i) * 4) @@ -12337,6 +12378,7 @@ enum skl_power_gate { _ICL_PHY_MISC_B) #define ICL_PHY_MISC_MUX_DDID (1 << 28) #define ICL_PHY_MISC_DE_IO_COMP_PWR_DOWN (1 << 23) +#define DG2_PHY_DP_TX_ACK_MASK REG_GENMASK(23, 20) /* Icelake Display Stream Compression Registers */ #define DSCA_PICTURE_PARAMETER_SET_0 _MMIO(0x6B200) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 37aef1308573..ce446716d092 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -42,22 +42,17 @@ #include "i915_active.h" #include "i915_drv.h" -#include "i915_globals.h" #include "i915_trace.h" #include "intel_pm.h" struct execute_cb { struct irq_work work; struct i915_sw_fence *fence; - void (*hook)(struct i915_request *rq, struct dma_fence *signal); struct i915_request *signal; }; -static struct i915_global_request { - struct i915_global base; - struct kmem_cache *slab_requests; - struct kmem_cache *slab_execute_cbs; -} global; +static struct kmem_cache *slab_requests; +static struct kmem_cache *slab_execute_cbs; static const char *i915_fence_get_driver_name(struct dma_fence *fence) { @@ -108,13 +103,16 @@ static signed long i915_fence_wait(struct dma_fence *fence, struct kmem_cache *i915_request_slab_cache(void) { - return global.slab_requests; + return slab_requests; } static void i915_fence_release(struct dma_fence *fence) { struct i915_request *rq = to_request(fence); + GEM_BUG_ON(rq->guc_prio != GUC_PRIO_INIT && + rq->guc_prio != GUC_PRIO_FINI); + /* * The request is put onto a RCU freelist (i.e. the address * is immediately reused), mark the fences as being freed now. @@ -126,41 +124,19 @@ static void i915_fence_release(struct dma_fence *fence) i915_sw_fence_fini(&rq->semaphore); /* - * Keep one request on each engine for reserved use under mempressure - * - * We do not hold a reference to the engine here and so have to be - * very careful in what rq->engine we poke. The virtual engine is - * referenced via the rq->context and we released that ref during - * i915_request_retire(), ergo we must not dereference a virtual - * engine here. Not that we would want to, as the only consumer of - * the reserved engine->request_pool is the power management parking, - * which must-not-fail, and that is only run on the physical engines. - * - * Since the request must have been executed to be have completed, - * we know that it will have been processed by the HW and will - * not be unsubmitted again, so rq->engine and rq->execution_mask - * at this point is stable. rq->execution_mask will be a single - * bit if the last and _only_ engine it could execution on was a - * physical engine, if it's multiple bits then it started on and - * could still be on a virtual engine. Thus if the mask is not a - * power-of-two we assume that rq->engine may still be a virtual - * engine and so a dangling invalid pointer that we cannot dereference - * - * For example, consider the flow of a bonded request through a virtual - * engine. The request is created with a wide engine mask (all engines - * that we might execute on). On processing the bond, the request mask - * is reduced to one or more engines. If the request is subsequently - * bound to a single engine, it will then be constrained to only - * execute on that engine and never returned to the virtual engine - * after timeslicing away, see __unwind_incomplete_requests(). Thus we - * know that if the rq->execution_mask is a single bit, rq->engine - * can be a physical engine with the exact corresponding mask. + * Keep one request on each engine for reserved use under mempressure, + * do not use with virtual engines as this really is only needed for + * kernel contexts. */ - if (is_power_of_2(rq->execution_mask) && - !cmpxchg(&rq->engine->request_pool, NULL, rq)) + if (!intel_engine_is_virtual(rq->engine) && + !cmpxchg(&rq->engine->request_pool, NULL, rq)) { + intel_context_put(rq->context); return; + } + + intel_context_put(rq->context); - kmem_cache_free(global.slab_requests, rq); + kmem_cache_free(slab_requests, rq); } const struct dma_fence_ops i915_fence_ops = { @@ -177,18 +153,7 @@ static void irq_execute_cb(struct irq_work *wrk) struct execute_cb *cb = container_of(wrk, typeof(*cb), work); i915_sw_fence_complete(cb->fence); - kmem_cache_free(global.slab_execute_cbs, cb); -} - -static void irq_execute_cb_hook(struct irq_work *wrk) -{ - struct execute_cb *cb = container_of(wrk, typeof(*cb), work); - - cb->hook(container_of(cb->fence, struct i915_request, submit), - &cb->signal->fence); - i915_request_put(cb->signal); - - irq_execute_cb(wrk); + kmem_cache_free(slab_execute_cbs, cb); } static __always_inline void @@ -216,7 +181,7 @@ static bool irq_work_imm(struct irq_work *wrk) return false; } -static void __notify_execute_cb_imm(struct i915_request *rq) +void i915_request_notify_execute_cb_imm(struct i915_request *rq) { __notify_execute_cb(rq, irq_work_imm); } @@ -272,11 +237,11 @@ i915_request_active_engine(struct i915_request *rq, * check that we have acquired the lock on the final engine. */ locked = READ_ONCE(rq->engine); - spin_lock_irq(&locked->active.lock); + spin_lock_irq(&locked->sched_engine->lock); while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { - spin_unlock(&locked->active.lock); + spin_unlock(&locked->sched_engine->lock); locked = engine; - spin_lock(&locked->active.lock); + spin_lock(&locked->sched_engine->lock); } if (i915_request_is_active(rq)) { @@ -285,42 +250,11 @@ i915_request_active_engine(struct i915_request *rq, ret = true; } - spin_unlock_irq(&locked->active.lock); + spin_unlock_irq(&locked->sched_engine->lock); return ret; } - -static void remove_from_engine(struct i915_request *rq) -{ - struct intel_engine_cs *engine, *locked; - - /* - * Virtual engines complicate acquiring the engine timeline lock, - * as their rq->engine pointer is not stable until under that - * engine lock. The simple ploy we use is to take the lock then - * check that the rq still belongs to the newly locked engine. - */ - locked = READ_ONCE(rq->engine); - spin_lock_irq(&locked->active.lock); - while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { - spin_unlock(&locked->active.lock); - spin_lock(&engine->active.lock); - locked = engine; - } - list_del_init(&rq->sched.link); - - clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); - clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags); - - /* Prevent further __await_execution() registering a cb, then flush */ - set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); - - spin_unlock_irq(&locked->active.lock); - - __notify_execute_cb_imm(rq); -} - static void __rq_init_watchdog(struct i915_request *rq) { rq->watchdog.timer.function = NULL; @@ -417,8 +351,7 @@ bool i915_request_retire(struct i915_request *rq) * after removing the breadcrumb and signaling it, so that we do not * inadvertently attach the breadcrumb to a completed request. */ - if (!list_empty(&rq->sched.link)) - remove_from_engine(rq); + rq->engine->remove_active_request(rq); GEM_BUG_ON(!llist_empty(&rq->execute_cb)); __list_del_entry(&rq->link); /* poison neither prev/next (RCU walks) */ @@ -443,6 +376,7 @@ void i915_request_retire_upto(struct i915_request *rq) do { tmp = list_first_entry(&tl->requests, typeof(*tmp), link); + GEM_BUG_ON(!i915_request_completed(tmp)); } while (i915_request_retire(tmp) && tmp != rq); } @@ -517,19 +451,14 @@ static bool __request_in_flight(const struct i915_request *signal) static int __await_execution(struct i915_request *rq, struct i915_request *signal, - void (*hook)(struct i915_request *rq, - struct dma_fence *signal), gfp_t gfp) { struct execute_cb *cb; - if (i915_request_is_active(signal)) { - if (hook) - hook(rq, &signal->fence); + if (i915_request_is_active(signal)) return 0; - } - cb = kmem_cache_alloc(global.slab_execute_cbs, gfp); + cb = kmem_cache_alloc(slab_execute_cbs, gfp); if (!cb) return -ENOMEM; @@ -537,12 +466,6 @@ __await_execution(struct i915_request *rq, i915_sw_fence_await(cb->fence); init_irq_work(&cb->work, irq_execute_cb); - if (hook) { - cb->hook = hook; - cb->signal = i915_request_get(signal); - cb->work.func = irq_execute_cb_hook; - } - /* * Register the callback first, then see if the signaler is already * active. This ensures that if we race with the @@ -559,7 +482,7 @@ __await_execution(struct i915_request *rq, if (llist_add(&cb->work.node.llist, &signal->execute_cb)) { if (i915_request_is_active(signal) || __request_in_flight(signal)) - __notify_execute_cb_imm(signal); + i915_request_notify_execute_cb_imm(signal); } return 0; @@ -637,7 +560,7 @@ bool __i915_request_submit(struct i915_request *request) RQ_TRACE(request, "\n"); GEM_BUG_ON(!irqs_disabled()); - lockdep_assert_held(&engine->active.lock); + lockdep_assert_held(&engine->sched_engine->lock); /* * With the advent of preempt-to-busy, we frequently encounter @@ -649,7 +572,7 @@ bool __i915_request_submit(struct i915_request *request) * * We must remove the request from the caller's priority queue, * and the caller must only call us when the request is in their - * priority queue, under the active.lock. This ensures that the + * priority queue, under the sched_engine->lock. This ensures that the * request has *not* yet been retired and we can safely move * the request into the engine->active.list where it will be * dropped upon retiring. (Otherwise if resubmit a *retired* @@ -690,11 +613,15 @@ bool __i915_request_submit(struct i915_request *request) request->ring->vaddr + request->postfix); trace_i915_request_execute(request); - engine->serial++; + if (engine->bump_serial) + engine->bump_serial(engine); + else + engine->serial++; + result = true; GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); - list_move_tail(&request->sched.link, &engine->active.requests); + engine->add_active_request(request); active: clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags); set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); @@ -724,11 +651,11 @@ void i915_request_submit(struct i915_request *request) unsigned long flags; /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->active.lock, flags); + spin_lock_irqsave(&engine->sched_engine->lock, flags); __i915_request_submit(request); - spin_unlock_irqrestore(&engine->active.lock, flags); + spin_unlock_irqrestore(&engine->sched_engine->lock, flags); } void __i915_request_unsubmit(struct i915_request *request) @@ -742,7 +669,7 @@ void __i915_request_unsubmit(struct i915_request *request) RQ_TRACE(request, "\n"); GEM_BUG_ON(!irqs_disabled()); - lockdep_assert_held(&engine->active.lock); + lockdep_assert_held(&engine->sched_engine->lock); /* * Before we remove this breadcrumb from the signal list, we have @@ -775,23 +702,11 @@ void i915_request_unsubmit(struct i915_request *request) unsigned long flags; /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->active.lock, flags); + spin_lock_irqsave(&engine->sched_engine->lock, flags); __i915_request_unsubmit(request); - spin_unlock_irqrestore(&engine->active.lock, flags); -} - -static void __cancel_request(struct i915_request *rq) -{ - struct intel_engine_cs *engine = NULL; - - i915_request_active_engine(rq, &engine); - - if (engine && intel_engine_pulse(engine)) - intel_gt_handle_error(engine->gt, engine->mask, 0, - "request cancellation by %s", - current->comm); + spin_unlock_irqrestore(&engine->sched_engine->lock, flags); } void i915_request_cancel(struct i915_request *rq, int error) @@ -801,7 +716,7 @@ void i915_request_cancel(struct i915_request *rq, int error) set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags); - __cancel_request(rq); + intel_context_cancel_request(rq->context, rq); } static int __i915_sw_fence_call @@ -889,7 +804,7 @@ request_alloc_slow(struct intel_timeline *tl, rq = list_first_entry(&tl->requests, typeof(*rq), link); i915_request_retire(rq); - rq = kmem_cache_alloc(global.slab_requests, + rq = kmem_cache_alloc(slab_requests, gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); if (rq) return rq; @@ -902,7 +817,7 @@ request_alloc_slow(struct intel_timeline *tl, retire_requests(tl); out: - return kmem_cache_alloc(global.slab_requests, gfp); + return kmem_cache_alloc(slab_requests, gfp); } static void __i915_request_ctor(void *arg) @@ -963,7 +878,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) * * Do not use kmem_cache_zalloc() here! */ - rq = kmem_cache_alloc(global.slab_requests, + rq = kmem_cache_alloc(slab_requests, gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); if (unlikely(!rq)) { rq = request_alloc_slow(tl, &ce->engine->request_pool, gfp); @@ -973,7 +888,19 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) } } - rq->context = ce; + /* + * Hold a reference to the intel_context over life of an i915_request. + * Without this an i915_request can exist after the context has been + * destroyed (e.g. request retired, context closed, but user space holds + * a reference to the request from an out fence). In the case of GuC + * submission + virtual engine, the engine that the request references + * is also destroyed which can trigger bad pointer dref in fence ops + * (e.g. i915_fence_get_driver_name). We could likely change these + * functions to avoid touching the engine but let's just be safe and + * hold the intel_context reference. In execlist mode the request always + * eventually points to a physical engine so this isn't an issue. + */ + rq->context = intel_context_get(ce); rq->engine = ce->engine; rq->ring = ce->ring; rq->execution_mask = ce->engine->mask; @@ -996,6 +923,8 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */ + rq->guc_prio = GUC_PRIO_INIT; + /* We bump the ref for the fence chain */ i915_sw_fence_reinit(&i915_request_get(rq)->submit); i915_sw_fence_reinit(&i915_request_get(rq)->semaphore); @@ -1050,7 +979,8 @@ err_unwind: GEM_BUG_ON(!list_empty(&rq->sched.waiters_list)); err_free: - kmem_cache_free(global.slab_requests, rq); + intel_context_put(ce); + kmem_cache_free(slab_requests, rq); err_unreserve: intel_context_unpin(ce); return ERR_PTR(ret); @@ -1253,7 +1183,7 @@ emit_semaphore_wait(struct i915_request *to, goto await_fence; /* Only submit our spinner after the signaler is running! */ - if (__await_execution(to, from, NULL, gfp)) + if (__await_execution(to, from, gfp)) goto await_fence; if (__emit_semaphore_wait(to, from, from->fence.seqno)) @@ -1284,16 +1214,14 @@ static int intel_timeline_sync_set_start(struct intel_timeline *tl, static int __i915_request_await_execution(struct i915_request *to, - struct i915_request *from, - void (*hook)(struct i915_request *rq, - struct dma_fence *signal)) + struct i915_request *from) { int err; GEM_BUG_ON(intel_context_is_barrier(from->context)); /* Submit both requests at the same time */ - err = __await_execution(to, from, hook, I915_FENCE_GFP); + err = __await_execution(to, from, I915_FENCE_GFP); if (err) return err; @@ -1343,7 +1271,7 @@ __i915_request_await_execution(struct i915_request *to, } /* Couple the dependency tree for PI on this exposed to->fence */ - if (to->engine->schedule) { + if (to->engine->sched_engine->schedule) { err = i915_sched_node_add_dependency(&to->sched, &from->sched, I915_DEPENDENCY_WEAK); @@ -1406,9 +1334,7 @@ i915_request_await_external(struct i915_request *rq, struct dma_fence *fence) int i915_request_await_execution(struct i915_request *rq, - struct dma_fence *fence, - void (*hook)(struct i915_request *rq, - struct dma_fence *signal)) + struct dma_fence *fence) { struct dma_fence **child = &fence; unsigned int nchild = 1; @@ -1439,8 +1365,7 @@ i915_request_await_execution(struct i915_request *rq, if (dma_fence_is_i915(fence)) ret = __i915_request_await_execution(rq, - to_request(fence), - hook); + to_request(fence)); else ret = i915_request_await_external(rq, fence); if (ret < 0) @@ -1466,7 +1391,7 @@ await_request_submit(struct i915_request *to, struct i915_request *from) &from->submit, I915_FENCE_GFP); else - return __i915_request_await_execution(to, from, NULL); + return __i915_request_await_execution(to, from); } static int @@ -1482,7 +1407,7 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from) return 0; } - if (to->engine->schedule) { + if (to->engine->sched_engine->schedule) { ret = i915_sched_node_add_dependency(&to->sched, &from->sched, I915_DEPENDENCY_EXTERNAL); @@ -1490,7 +1415,8 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from) return ret; } - if (is_power_of_2(to->execution_mask | READ_ONCE(from->execution_mask))) + if (!intel_engine_uses_guc(to->engine) && + is_power_of_2(to->execution_mask | READ_ONCE(from->execution_mask))) ret = await_request_submit(to, from); else ret = emit_semaphore_wait(to, from, I915_FENCE_GFP); @@ -1649,6 +1575,8 @@ __i915_request_add_to_timeline(struct i915_request *rq) prev = to_request(__i915_active_fence_set(&timeline->last_request, &rq->fence)); if (prev && !__i915_request_is_complete(prev)) { + bool uses_guc = intel_engine_uses_guc(rq->engine); + /* * The requests are supposed to be kept in order. However, * we need to be wary in case the timeline->last_request @@ -1659,7 +1587,9 @@ __i915_request_add_to_timeline(struct i915_request *rq) i915_seqno_passed(prev->fence.seqno, rq->fence.seqno)); - if (is_power_of_2(READ_ONCE(prev->engine)->mask | rq->engine->mask)) + if ((!uses_guc && + is_power_of_2(READ_ONCE(prev->engine)->mask | rq->engine->mask)) || + (uses_guc && prev->context == rq->context)) i915_sw_fence_await_sw_fence(&rq->submit, &prev->submit, &rq->submitq); @@ -1667,7 +1597,7 @@ __i915_request_add_to_timeline(struct i915_request *rq) __i915_sw_fence_await_dma_fence(&rq->submit, &prev->fence, &rq->dmaq); - if (rq->engine->schedule) + if (rq->engine->sched_engine->schedule) __i915_sched_node_add_dependency(&rq->sched, &prev->sched, &rq->dep, @@ -1739,8 +1669,8 @@ void __i915_request_queue(struct i915_request *rq, * decide whether to preempt the entire chain so that it is ready to * run at the earliest possible convenience. */ - if (attr && rq->engine->schedule) - rq->engine->schedule(rq, attr); + if (attr && rq->engine->sched_engine->schedule) + rq->engine->sched_engine->schedule(rq, attr); local_bh_disable(); __i915_request_queue_bh(rq); @@ -2100,31 +2030,61 @@ void i915_request_show(struct drm_printer *m, name); } -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/mock_request.c" -#include "selftests/i915_request.c" -#endif +static bool engine_match_ring(struct intel_engine_cs *engine, struct i915_request *rq) +{ + u32 ring = ENGINE_READ(engine, RING_START); + + return ring == i915_ggtt_offset(rq->ring->vma); +} -static void i915_global_request_shrink(void) +static bool match_ring(struct i915_request *rq) { - kmem_cache_shrink(global.slab_execute_cbs); - kmem_cache_shrink(global.slab_requests); + struct intel_engine_cs *engine; + bool found; + int i; + + if (!intel_engine_is_virtual(rq->engine)) + return engine_match_ring(rq->engine, rq); + + found = false; + i = 0; + while ((engine = intel_engine_get_sibling(rq->engine, i++))) { + found = engine_match_ring(engine, rq); + if (found) + break; + } + + return found; } -static void i915_global_request_exit(void) +enum i915_request_state i915_test_request_state(struct i915_request *rq) { - kmem_cache_destroy(global.slab_execute_cbs); - kmem_cache_destroy(global.slab_requests); + if (i915_request_completed(rq)) + return I915_REQUEST_COMPLETE; + + if (!i915_request_started(rq)) + return I915_REQUEST_PENDING; + + if (match_ring(rq)) + return I915_REQUEST_ACTIVE; + + return I915_REQUEST_QUEUED; } -static struct i915_global_request global = { { - .shrink = i915_global_request_shrink, - .exit = i915_global_request_exit, -} }; +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_request.c" +#include "selftests/i915_request.c" +#endif + +void i915_request_module_exit(void) +{ + kmem_cache_destroy(slab_execute_cbs); + kmem_cache_destroy(slab_requests); +} -int __init i915_global_request_init(void) +int __init i915_request_module_init(void) { - global.slab_requests = + slab_requests = kmem_cache_create("i915_request", sizeof(struct i915_request), __alignof__(struct i915_request), @@ -2132,20 +2092,19 @@ int __init i915_global_request_init(void) SLAB_RECLAIM_ACCOUNT | SLAB_TYPESAFE_BY_RCU, __i915_request_ctor); - if (!global.slab_requests) + if (!slab_requests) return -ENOMEM; - global.slab_execute_cbs = KMEM_CACHE(execute_cb, + slab_execute_cbs = KMEM_CACHE(execute_cb, SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT | SLAB_TYPESAFE_BY_RCU); - if (!global.slab_execute_cbs) + if (!slab_execute_cbs) goto err_requests; - i915_global_register(&global.base); return 0; err_requests: - kmem_cache_destroy(global.slab_requests); + kmem_cache_destroy(slab_requests); return -ENOMEM; } diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 270f6cd37650..1bc1349ba3c2 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -285,6 +285,23 @@ struct i915_request { struct hrtimer timer; } watchdog; + /* + * Requests may need to be stalled when using GuC submission waiting for + * certain GuC operations to complete. If that is the case, stalled + * requests are added to a per context list of stalled requests. The + * below list_head is the link in that list. + */ + struct list_head guc_fence_link; + + /** + * Priority level while the request is inflight. Differs from i915 + * scheduler priority. See comment above + * I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP for details. + */ +#define GUC_PRIO_INIT 0xff +#define GUC_PRIO_FINI 0xfe + u8 guc_prio; + I915_SELFTEST_DECLARE(struct { struct list_head link; unsigned long delay; @@ -352,9 +369,7 @@ int i915_request_await_object(struct i915_request *to, int i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence); int i915_request_await_execution(struct i915_request *rq, - struct dma_fence *fence, - void (*hook)(struct i915_request *rq, - struct dma_fence *signal)); + struct dma_fence *fence); void i915_request_add(struct i915_request *rq); @@ -613,7 +628,7 @@ i915_request_active_timeline(const struct i915_request *rq) * this submission. */ return rcu_dereference_protected(rq->timeline, - lockdep_is_held(&rq->engine->active.lock)); + lockdep_is_held(&rq->engine->sched_engine->lock)); } static inline u32 @@ -641,4 +656,19 @@ bool i915_request_active_engine(struct i915_request *rq, struct intel_engine_cs **active); +void i915_request_notify_execute_cb_imm(struct i915_request *rq); + +enum i915_request_state { + I915_REQUEST_UNKNOWN = 0, + I915_REQUEST_COMPLETE, + I915_REQUEST_PENDING, + I915_REQUEST_QUEUED, + I915_REQUEST_ACTIVE, +}; + +enum i915_request_state i915_test_request_state(struct i915_request *rq); + +void i915_request_module_exit(void); +int i915_request_module_init(void); + #endif /* I915_REQUEST_H */ diff --git a/drivers/gpu/drm/i915/i915_scatterlist.c b/drivers/gpu/drm/i915/i915_scatterlist.c index 69e9e6c3135e..4a6712dca838 100644 --- a/drivers/gpu/drm/i915/i915_scatterlist.c +++ b/drivers/gpu/drm/i915/i915_scatterlist.c @@ -6,6 +6,9 @@ #include "i915_scatterlist.h" +#include "i915_buddy.h" +#include "i915_ttm_buddy_manager.h" + #include <drm/drm_mm.h> #include <linux/slab.h> @@ -104,6 +107,83 @@ struct sg_table *i915_sg_from_mm_node(const struct drm_mm_node *node, return st; } +/** + * i915_sg_from_buddy_resource - Create an sg_table from a struct + * i915_buddy_block list + * @res: The struct i915_ttm_buddy_resource. + * @region_start: An offset to add to the dma addresses of the sg list. + * + * Create a struct sg_table, initializing it from struct i915_buddy_block list, + * taking a maximum segment length into account, splitting into segments + * if necessary. + * + * Return: A pointer to a kmalloced struct sg_table on success, negative + * error code cast to an error pointer on failure. + */ +struct sg_table *i915_sg_from_buddy_resource(struct ttm_resource *res, + u64 region_start) +{ + struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res); + const u64 size = res->num_pages << PAGE_SHIFT; + const u64 max_segment = rounddown(UINT_MAX, PAGE_SIZE); + struct i915_buddy_mm *mm = bman_res->mm; + struct list_head *blocks = &bman_res->blocks; + struct i915_buddy_block *block; + struct scatterlist *sg; + struct sg_table *st; + resource_size_t prev_end; + + GEM_BUG_ON(list_empty(blocks)); + + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (!st) + return ERR_PTR(-ENOMEM); + + if (sg_alloc_table(st, res->num_pages, GFP_KERNEL)) { + kfree(st); + return ERR_PTR(-ENOMEM); + } + + sg = st->sgl; + st->nents = 0; + prev_end = (resource_size_t)-1; + + list_for_each_entry(block, blocks, link) { + u64 block_size, offset; + + block_size = min_t(u64, size, i915_buddy_block_size(mm, block)); + offset = i915_buddy_block_offset(block); + + while (block_size) { + u64 len; + + if (offset != prev_end || sg->length >= max_segment) { + if (st->nents) + sg = __sg_next(sg); + + sg_dma_address(sg) = region_start + offset; + sg_dma_len(sg) = 0; + sg->length = 0; + st->nents++; + } + + len = min(block_size, max_segment - sg->length); + sg->length += len; + sg_dma_len(sg) += len; + + offset += len; + block_size -= len; + + prev_end = offset; + } + } + + sg_mark_end(sg); + i915_sg_trim(st); + + return st; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/scatterlist.c" #endif diff --git a/drivers/gpu/drm/i915/i915_scatterlist.h b/drivers/gpu/drm/i915/i915_scatterlist.h index 5acca45ea981..b8bd5925b03f 100644 --- a/drivers/gpu/drm/i915/i915_scatterlist.h +++ b/drivers/gpu/drm/i915/i915_scatterlist.h @@ -14,6 +14,7 @@ #include "i915_gem.h" struct drm_mm_node; +struct ttm_resource; /* * Optimised SGL iterator for GEM objects @@ -145,4 +146,8 @@ bool i915_sg_trim(struct sg_table *orig_st); struct sg_table *i915_sg_from_mm_node(const struct drm_mm_node *node, u64 region_start); + +struct sg_table *i915_sg_from_buddy_resource(struct ttm_resource *res, + u64 region_start); + #endif diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index efa638c3acc7..762127dd56c5 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -7,15 +7,11 @@ #include <linux/mutex.h> #include "i915_drv.h" -#include "i915_globals.h" #include "i915_request.h" #include "i915_scheduler.h" -static struct i915_global_scheduler { - struct i915_global base; - struct kmem_cache *slab_dependencies; - struct kmem_cache *slab_priorities; -} global; +static struct kmem_cache *slab_dependencies; +static struct kmem_cache *slab_priorities; static DEFINE_SPINLOCK(schedule_lock); @@ -40,7 +36,7 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb) return rb_entry(rb, struct i915_priolist, node); } -static void assert_priolists(struct intel_engine_execlists * const execlists) +static void assert_priolists(struct i915_sched_engine * const sched_engine) { struct rb_node *rb; long last_prio; @@ -48,11 +44,11 @@ static void assert_priolists(struct intel_engine_execlists * const execlists) if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) return; - GEM_BUG_ON(rb_first_cached(&execlists->queue) != - rb_first(&execlists->queue.rb_root)); + GEM_BUG_ON(rb_first_cached(&sched_engine->queue) != + rb_first(&sched_engine->queue.rb_root)); last_prio = INT_MAX; - for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { + for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) { const struct i915_priolist *p = to_priolist(rb); GEM_BUG_ON(p->priority > last_prio); @@ -61,23 +57,22 @@ static void assert_priolists(struct intel_engine_execlists * const execlists) } struct list_head * -i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio) +i915_sched_lookup_priolist(struct i915_sched_engine *sched_engine, int prio) { - struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_priolist *p; struct rb_node **parent, *rb; bool first = true; - lockdep_assert_held(&engine->active.lock); - assert_priolists(execlists); + lockdep_assert_held(&sched_engine->lock); + assert_priolists(sched_engine); - if (unlikely(execlists->no_priolist)) + if (unlikely(sched_engine->no_priolist)) prio = I915_PRIORITY_NORMAL; find_priolist: /* most positive priority is scheduled first, equal priorities fifo */ rb = NULL; - parent = &execlists->queue.rb_root.rb_node; + parent = &sched_engine->queue.rb_root.rb_node; while (*parent) { rb = *parent; p = to_priolist(rb); @@ -92,9 +87,9 @@ find_priolist: } if (prio == I915_PRIORITY_NORMAL) { - p = &execlists->default_priolist; + p = &sched_engine->default_priolist; } else { - p = kmem_cache_alloc(global.slab_priorities, GFP_ATOMIC); + p = kmem_cache_alloc(slab_priorities, GFP_ATOMIC); /* Convert an allocation failure to a priority bump */ if (unlikely(!p)) { prio = I915_PRIORITY_NORMAL; /* recurses just once */ @@ -107,7 +102,7 @@ find_priolist: * requests, so if userspace lied about their * dependencies that reordering may be visible. */ - execlists->no_priolist = true; + sched_engine->no_priolist = true; goto find_priolist; } } @@ -116,27 +111,27 @@ find_priolist: INIT_LIST_HEAD(&p->requests); rb_link_node(&p->node, rb, parent); - rb_insert_color_cached(&p->node, &execlists->queue, first); + rb_insert_color_cached(&p->node, &sched_engine->queue, first); return &p->requests; } void __i915_priolist_free(struct i915_priolist *p) { - kmem_cache_free(global.slab_priorities, p); + kmem_cache_free(slab_priorities, p); } struct sched_cache { struct list_head *priolist; }; -static struct intel_engine_cs * -sched_lock_engine(const struct i915_sched_node *node, - struct intel_engine_cs *locked, +static struct i915_sched_engine * +lock_sched_engine(struct i915_sched_node *node, + struct i915_sched_engine *locked, struct sched_cache *cache) { const struct i915_request *rq = node_to_request(node); - struct intel_engine_cs *engine; + struct i915_sched_engine *sched_engine; GEM_BUG_ON(!locked); @@ -146,81 +141,22 @@ sched_lock_engine(const struct i915_sched_node *node, * engine lock. The simple ploy we use is to take the lock then * check that the rq still belongs to the newly locked engine. */ - while (locked != (engine = READ_ONCE(rq->engine))) { - spin_unlock(&locked->active.lock); + while (locked != (sched_engine = READ_ONCE(rq->engine)->sched_engine)) { + spin_unlock(&locked->lock); memset(cache, 0, sizeof(*cache)); - spin_lock(&engine->active.lock); - locked = engine; + spin_lock(&sched_engine->lock); + locked = sched_engine; } - GEM_BUG_ON(locked != engine); + GEM_BUG_ON(locked != sched_engine); return locked; } -static inline int rq_prio(const struct i915_request *rq) -{ - return rq->sched.attr.priority; -} - -static inline bool need_preempt(int prio, int active) -{ - /* - * Allow preemption of low -> normal -> high, but we do - * not allow low priority tasks to preempt other low priority - * tasks under the impression that latency for low priority - * tasks does not matter (as much as background throughput), - * so kiss. - */ - return prio >= max(I915_PRIORITY_NORMAL, active); -} - -static void kick_submission(struct intel_engine_cs *engine, - const struct i915_request *rq, - int prio) -{ - const struct i915_request *inflight; - - /* - * We only need to kick the tasklet once for the high priority - * new context we add into the queue. - */ - if (prio <= engine->execlists.queue_priority_hint) - return; - - rcu_read_lock(); - - /* Nothing currently active? We're overdue for a submission! */ - inflight = execlists_active(&engine->execlists); - if (!inflight) - goto unlock; - - /* - * If we are already the currently executing context, don't - * bother evaluating if we should preempt ourselves. - */ - if (inflight->context == rq->context) - goto unlock; - - ENGINE_TRACE(engine, - "bumping queue-priority-hint:%d for rq:%llx:%lld, inflight:%llx:%lld prio %d\n", - prio, - rq->fence.context, rq->fence.seqno, - inflight->fence.context, inflight->fence.seqno, - inflight->sched.attr.priority); - - engine->execlists.queue_priority_hint = prio; - if (need_preempt(prio, rq_prio(inflight))) - tasklet_hi_schedule(&engine->execlists.tasklet); - -unlock: - rcu_read_unlock(); -} - static void __i915_schedule(struct i915_sched_node *node, const struct i915_sched_attr *attr) { const int prio = max(attr->priority, node->attr.priority); - struct intel_engine_cs *engine; + struct i915_sched_engine *sched_engine; struct i915_dependency *dep, *p; struct i915_dependency stack; struct sched_cache cache; @@ -295,23 +231,31 @@ static void __i915_schedule(struct i915_sched_node *node, } memset(&cache, 0, sizeof(cache)); - engine = node_to_request(node)->engine; - spin_lock(&engine->active.lock); + sched_engine = node_to_request(node)->engine->sched_engine; + spin_lock(&sched_engine->lock); /* Fifo and depth-first replacement ensure our deps execute before us */ - engine = sched_lock_engine(node, engine, &cache); + sched_engine = lock_sched_engine(node, sched_engine, &cache); list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) { + struct i915_request *from = container_of(dep->signaler, + struct i915_request, + sched); INIT_LIST_HEAD(&dep->dfs_link); node = dep->signaler; - engine = sched_lock_engine(node, engine, &cache); - lockdep_assert_held(&engine->active.lock); + sched_engine = lock_sched_engine(node, sched_engine, &cache); + lockdep_assert_held(&sched_engine->lock); /* Recheck after acquiring the engine->timeline.lock */ if (prio <= node->attr.priority || node_signaled(node)) continue; - GEM_BUG_ON(node_to_request(node)->engine != engine); + GEM_BUG_ON(node_to_request(node)->engine->sched_engine != + sched_engine); + + /* Must be called before changing the nodes priority */ + if (sched_engine->bump_inflight_request_prio) + sched_engine->bump_inflight_request_prio(from, prio); WRITE_ONCE(node->attr.priority, prio); @@ -329,16 +273,17 @@ static void __i915_schedule(struct i915_sched_node *node, if (i915_request_in_priority_queue(node_to_request(node))) { if (!cache.priolist) cache.priolist = - i915_sched_lookup_priolist(engine, + i915_sched_lookup_priolist(sched_engine, prio); list_move_tail(&node->link, cache.priolist); } /* Defer (tasklet) submission until after all of our updates. */ - kick_submission(engine, node_to_request(node), prio); + if (sched_engine->kick_backend) + sched_engine->kick_backend(node_to_request(node), prio); } - spin_unlock(&engine->active.lock); + spin_unlock(&sched_engine->lock); } void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr) @@ -371,13 +316,13 @@ void i915_sched_node_reinit(struct i915_sched_node *node) static struct i915_dependency * i915_dependency_alloc(void) { - return kmem_cache_alloc(global.slab_dependencies, GFP_KERNEL); + return kmem_cache_alloc(slab_dependencies, GFP_KERNEL); } static void i915_dependency_free(struct i915_dependency *dep) { - kmem_cache_free(global.slab_dependencies, dep); + kmem_cache_free(slab_dependencies, dep); } bool __i915_sched_node_add_dependency(struct i915_sched_node *node, @@ -489,39 +434,78 @@ void i915_request_show_with_schedule(struct drm_printer *m, rcu_read_unlock(); } -static void i915_global_scheduler_shrink(void) +static void default_destroy(struct kref *kref) +{ + struct i915_sched_engine *sched_engine = + container_of(kref, typeof(*sched_engine), ref); + + tasklet_kill(&sched_engine->tasklet); /* flush the callback */ + kfree(sched_engine); +} + +static bool default_disabled(struct i915_sched_engine *sched_engine) { - kmem_cache_shrink(global.slab_dependencies); - kmem_cache_shrink(global.slab_priorities); + return false; } -static void i915_global_scheduler_exit(void) +struct i915_sched_engine * +i915_sched_engine_create(unsigned int subclass) { - kmem_cache_destroy(global.slab_dependencies); - kmem_cache_destroy(global.slab_priorities); + struct i915_sched_engine *sched_engine; + + sched_engine = kzalloc(sizeof(*sched_engine), GFP_KERNEL); + if (!sched_engine) + return NULL; + + kref_init(&sched_engine->ref); + + sched_engine->queue = RB_ROOT_CACHED; + sched_engine->queue_priority_hint = INT_MIN; + sched_engine->destroy = default_destroy; + sched_engine->disabled = default_disabled; + + INIT_LIST_HEAD(&sched_engine->requests); + INIT_LIST_HEAD(&sched_engine->hold); + + spin_lock_init(&sched_engine->lock); + lockdep_set_subclass(&sched_engine->lock, subclass); + + /* + * Due to an interesting quirk in lockdep's internal debug tracking, + * after setting a subclass we must ensure the lock is used. Otherwise, + * nr_unused_locks is incremented once too often. + */ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + local_irq_disable(); + lock_map_acquire(&sched_engine->lock.dep_map); + lock_map_release(&sched_engine->lock.dep_map); + local_irq_enable(); +#endif + + return sched_engine; } -static struct i915_global_scheduler global = { { - .shrink = i915_global_scheduler_shrink, - .exit = i915_global_scheduler_exit, -} }; +void i915_scheduler_module_exit(void) +{ + kmem_cache_destroy(slab_dependencies); + kmem_cache_destroy(slab_priorities); +} -int __init i915_global_scheduler_init(void) +int __init i915_scheduler_module_init(void) { - global.slab_dependencies = KMEM_CACHE(i915_dependency, + slab_dependencies = KMEM_CACHE(i915_dependency, SLAB_HWCACHE_ALIGN | SLAB_TYPESAFE_BY_RCU); - if (!global.slab_dependencies) + if (!slab_dependencies) return -ENOMEM; - global.slab_priorities = KMEM_CACHE(i915_priolist, 0); - if (!global.slab_priorities) + slab_priorities = KMEM_CACHE(i915_priolist, 0); + if (!slab_priorities) goto err_priorities; - i915_global_register(&global.base); return 0; err_priorities: - kmem_cache_destroy(global.slab_priorities); + kmem_cache_destroy(slab_priorities); return -ENOMEM; } diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index 858a0938f47a..0b9b86af6c7f 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -39,7 +39,7 @@ void i915_schedule(struct i915_request *request, const struct i915_sched_attr *attr); struct list_head * -i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio); +i915_sched_lookup_priolist(struct i915_sched_engine *sched_engine, int prio); void __i915_priolist_free(struct i915_priolist *p); static inline void i915_priolist_free(struct i915_priolist *p) @@ -48,9 +48,61 @@ static inline void i915_priolist_free(struct i915_priolist *p) __i915_priolist_free(p); } +struct i915_sched_engine * +i915_sched_engine_create(unsigned int subclass); + +static inline struct i915_sched_engine * +i915_sched_engine_get(struct i915_sched_engine *sched_engine) +{ + kref_get(&sched_engine->ref); + return sched_engine; +} + +static inline void +i915_sched_engine_put(struct i915_sched_engine *sched_engine) +{ + kref_put(&sched_engine->ref, sched_engine->destroy); +} + +static inline bool +i915_sched_engine_is_empty(struct i915_sched_engine *sched_engine) +{ + return RB_EMPTY_ROOT(&sched_engine->queue.rb_root); +} + +static inline void +i915_sched_engine_reset_on_empty(struct i915_sched_engine *sched_engine) +{ + if (i915_sched_engine_is_empty(sched_engine)) + sched_engine->no_priolist = false; +} + +static inline void +i915_sched_engine_active_lock_bh(struct i915_sched_engine *sched_engine) +{ + local_bh_disable(); /* prevent local softirq and lock recursion */ + tasklet_lock(&sched_engine->tasklet); +} + +static inline void +i915_sched_engine_active_unlock_bh(struct i915_sched_engine *sched_engine) +{ + tasklet_unlock(&sched_engine->tasklet); + local_bh_enable(); /* restore softirq, and kick ksoftirqd! */ +} + void i915_request_show_with_schedule(struct drm_printer *m, const struct i915_request *rq, const char *prefix, int indent); +static inline bool +i915_sched_engine_disabled(struct i915_sched_engine *sched_engine) +{ + return sched_engine->disabled(sched_engine); +} + +void i915_scheduler_module_exit(void); +int i915_scheduler_module_init(void); + #endif /* _I915_SCHEDULER_H_ */ diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h index 343ed44d5ed4..b0a1b58c7893 100644 --- a/drivers/gpu/drm/i915/i915_scheduler_types.h +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h @@ -91,4 +91,115 @@ struct i915_dependency { &(rq__)->sched.signalers_list, \ signal_link) +/** + * struct i915_sched_engine - scheduler engine + * + * A schedule engine represents a submission queue with different priority + * bands. It contains all the common state (relative to the backend) to queue, + * track, and submit a request. + * + * This object at the moment is quite i915 specific but will transition into a + * container for the drm_gpu_scheduler plus a few other variables once the i915 + * is integrated with the DRM scheduler. + */ +struct i915_sched_engine { + /** + * @ref: reference count of schedule engine object + */ + struct kref ref; + + /** + * @lock: protects requests in priority lists, requests, hold and + * tasklet while running + */ + spinlock_t lock; + + /** + * @requests: list of requests inflight on this schedule engine + */ + struct list_head requests; + + /** + * @hold: list of ready requests, but on hold + */ + struct list_head hold; + + /** + * @tasklet: softirq tasklet for submission + */ + struct tasklet_struct tasklet; + + /** + * @default_priolist: priority list for I915_PRIORITY_NORMAL + */ + struct i915_priolist default_priolist; + + /** + * @queue_priority_hint: Highest pending priority. + * + * When we add requests into the queue, or adjust the priority of + * executing requests, we compute the maximum priority of those + * pending requests. We can then use this value to determine if + * we need to preempt the executing requests to service the queue. + * However, since the we may have recorded the priority of an inflight + * request we wanted to preempt but since completed, at the time of + * dequeuing the priority hint may no longer may match the highest + * available request priority. + */ + int queue_priority_hint; + + /** + * @queue: queue of requests, in priority lists + */ + struct rb_root_cached queue; + + /** + * @no_priolist: priority lists disabled + */ + bool no_priolist; + + /** + * @private_data: private data of the submission backend + */ + void *private_data; + + /** + * @destroy: destroy schedule engine / cleanup in backend + */ + void (*destroy)(struct kref *kref); + + /** + * @disabled: check if backend has disabled submission + */ + bool (*disabled)(struct i915_sched_engine *sched_engine); + + /** + * @kick_backend: kick backend after a request's priority has changed + */ + void (*kick_backend)(const struct i915_request *rq, + int prio); + + /** + * @bump_inflight_request_prio: update priority of an inflight request + */ + void (*bump_inflight_request_prio)(struct i915_request *rq, + int prio); + + /** + * @retire_inflight_request_prio: indicate request is retired to + * priority tracking + */ + void (*retire_inflight_request_prio)(struct i915_request *rq); + + /** + * @schedule: adjust priority of request + * + * Call when the priority on a request has changed and it and its + * dependencies may need rescheduling. Note the request itself may + * not be ready to run! + */ + void (*schedule)(struct i915_request *request, + const struct i915_sched_attr *attr); +}; + #endif /* _I915_SCHEDULER_TYPES_H_ */ diff --git a/drivers/gpu/drm/i915/i915_sw_fence_work.c b/drivers/gpu/drm/i915/i915_sw_fence_work.c index a3a81bb8f2c3..5b33ef23d54c 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence_work.c +++ b/drivers/gpu/drm/i915/i915_sw_fence_work.c @@ -16,11 +16,8 @@ static void fence_complete(struct dma_fence_work *f) static void fence_work(struct work_struct *work) { struct dma_fence_work *f = container_of(work, typeof(*f), work); - int err; - err = f->ops->work(f); - if (err) - dma_fence_set_error(&f->dma, err); + f->ops->work(f); fence_complete(f); dma_fence_put(&f->dma); diff --git a/drivers/gpu/drm/i915/i915_sw_fence_work.h b/drivers/gpu/drm/i915/i915_sw_fence_work.h index 2c409f11c5c5..d56806918d13 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence_work.h +++ b/drivers/gpu/drm/i915/i915_sw_fence_work.h @@ -17,7 +17,7 @@ struct dma_fence_work; struct dma_fence_work_ops { const char *name; - int (*work)(struct dma_fence_work *f); + void (*work)(struct dma_fence_work *f); void (*release)(struct dma_fence_work *f); }; diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 873bf996ceb5..cdf0e9c6fd73 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -272,7 +272,7 @@ static ssize_t gt_cur_freq_mhz_show(struct device *kdev, struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); struct intel_rps *rps = &i915->gt.rps; - return sysfs_emit(buf, "%d\n", intel_gpu_freq(rps, rps->cur_freq)); + return sysfs_emit(buf, "%d\n", intel_rps_get_requested_frequency(rps)); } static ssize_t gt_boost_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) @@ -326,9 +326,10 @@ static ssize_t vlv_rpe_freq_mhz_show(struct device *kdev, static ssize_t gt_max_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &dev_priv->gt.rps; + struct intel_gt *gt = &dev_priv->gt; + struct intel_rps *rps = >->rps; - return sysfs_emit(buf, "%d\n", intel_gpu_freq(rps, rps->max_freq_softlimit)); + return sysfs_emit(buf, "%d\n", intel_rps_get_max_frequency(rps)); } static ssize_t gt_max_freq_mhz_store(struct device *kdev, @@ -336,7 +337,8 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, const char *buf, size_t count) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &dev_priv->gt.rps; + struct intel_gt *gt = &dev_priv->gt; + struct intel_rps *rps = >->rps; ssize_t ret; u32 val; @@ -344,53 +346,26 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, if (ret) return ret; - mutex_lock(&rps->lock); - - val = intel_freq_opcode(rps, val); - if (val < rps->min_freq || - val > rps->max_freq || - val < rps->min_freq_softlimit) { - ret = -EINVAL; - goto unlock; - } - - if (val > rps->rp0_freq) - DRM_DEBUG("User requested overclocking to %d\n", - intel_gpu_freq(rps, val)); - - rps->max_freq_softlimit = val; - - val = clamp_t(int, rps->cur_freq, - rps->min_freq_softlimit, - rps->max_freq_softlimit); - - /* - * We still need *_set_rps to process the new max_delay and - * update the interrupt limits and PMINTRMSK even though - * frequency request may be unchanged. - */ - intel_rps_set(rps, val); - -unlock: - mutex_unlock(&rps->lock); + ret = intel_rps_set_max_frequency(rps, val); return ret ?: count; } static ssize_t gt_min_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { - struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &dev_priv->gt.rps; + struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); + struct intel_gt *gt = &i915->gt; + struct intel_rps *rps = >->rps; - return sysfs_emit(buf, "%d\n", intel_gpu_freq(rps, rps->min_freq_softlimit)); + return sysfs_emit(buf, "%d\n", intel_rps_get_min_frequency(rps)); } static ssize_t gt_min_freq_mhz_store(struct device *kdev, struct device_attribute *attr, const char *buf, size_t count) { - struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &dev_priv->gt.rps; + struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); + struct intel_rps *rps = &i915->gt.rps; ssize_t ret; u32 val; @@ -398,31 +373,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, if (ret) return ret; - mutex_lock(&rps->lock); - - val = intel_freq_opcode(rps, val); - if (val < rps->min_freq || - val > rps->max_freq || - val > rps->max_freq_softlimit) { - ret = -EINVAL; - goto unlock; - } - - rps->min_freq_softlimit = val; - - val = clamp_t(int, rps->cur_freq, - rps->min_freq_softlimit, - rps->max_freq_softlimit); - - /* - * We still need *_set_rps to process the new min_delay and - * update the interrupt limits and PMINTRMSK even though - * frequency request may be unchanged. - */ - intel_rps_set(rps, val); - -unlock: - mutex_unlock(&rps->lock); + ret = intel_rps_set_min_frequency(rps, val); return ret ?: count; } @@ -448,11 +399,11 @@ static ssize_t gt_rp_mhz_show(struct device *kdev, struct device_attribute *attr u32 val; if (attr == &dev_attr_gt_RP0_freq_mhz) - val = intel_gpu_freq(rps, rps->rp0_freq); + val = intel_rps_get_rp0_frequency(rps); else if (attr == &dev_attr_gt_RP1_freq_mhz) - val = intel_gpu_freq(rps, rps->rp1_freq); + val = intel_rps_get_rp1_frequency(rps); else if (attr == &dev_attr_gt_RPn_freq_mhz) - val = intel_gpu_freq(rps, rps->min_freq); + val = intel_rps_get_rpn_frequency(rps); else BUG(); diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 6778ad2a14a4..806ad688274b 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -794,30 +794,40 @@ DECLARE_EVENT_CLASS(i915_request, TP_STRUCT__entry( __field(u32, dev) __field(u64, ctx) + __field(u32, guc_id) __field(u16, class) __field(u16, instance) __field(u32, seqno) + __field(u32, tail) ), TP_fast_assign( __entry->dev = rq->engine->i915->drm.primary->index; __entry->class = rq->engine->uabi_class; __entry->instance = rq->engine->uabi_instance; + __entry->guc_id = rq->context->guc_id; __entry->ctx = rq->fence.context; __entry->seqno = rq->fence.seqno; + __entry->tail = rq->tail; ), - TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u", + TP_printk("dev=%u, engine=%u:%u, guc_id=%u, ctx=%llu, seqno=%u, tail=%u", __entry->dev, __entry->class, __entry->instance, - __entry->ctx, __entry->seqno) + __entry->guc_id, __entry->ctx, __entry->seqno, + __entry->tail) ); DEFINE_EVENT(i915_request, i915_request_add, - TP_PROTO(struct i915_request *rq), - TP_ARGS(rq) + TP_PROTO(struct i915_request *rq), + TP_ARGS(rq) ); #if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS) +DEFINE_EVENT(i915_request, i915_request_guc_submit, + TP_PROTO(struct i915_request *rq), + TP_ARGS(rq) +); + DEFINE_EVENT(i915_request, i915_request_submit, TP_PROTO(struct i915_request *rq), TP_ARGS(rq) @@ -885,9 +895,117 @@ TRACE_EVENT(i915_request_out, __entry->ctx, __entry->seqno, __entry->completed) ); +DECLARE_EVENT_CLASS(intel_context, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce), + + TP_STRUCT__entry( + __field(u32, guc_id) + __field(int, pin_count) + __field(u32, sched_state) + __field(u32, guc_sched_state_no_lock) + __field(u8, guc_prio) + ), + + TP_fast_assign( + __entry->guc_id = ce->guc_id; + __entry->pin_count = atomic_read(&ce->pin_count); + __entry->sched_state = ce->guc_state.sched_state; + __entry->guc_sched_state_no_lock = + atomic_read(&ce->guc_sched_state_no_lock); + __entry->guc_prio = ce->guc_prio; + ), + + TP_printk("guc_id=%d, pin_count=%d sched_state=0x%x,0x%x, guc_prio=%u", + __entry->guc_id, __entry->pin_count, + __entry->sched_state, + __entry->guc_sched_state_no_lock, + __entry->guc_prio) +); + +DEFINE_EVENT(intel_context, intel_context_set_prio, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + +DEFINE_EVENT(intel_context, intel_context_reset, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + +DEFINE_EVENT(intel_context, intel_context_ban, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + +DEFINE_EVENT(intel_context, intel_context_register, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + +DEFINE_EVENT(intel_context, intel_context_deregister, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + +DEFINE_EVENT(intel_context, intel_context_deregister_done, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + +DEFINE_EVENT(intel_context, intel_context_sched_enable, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + +DEFINE_EVENT(intel_context, intel_context_sched_disable, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + +DEFINE_EVENT(intel_context, intel_context_sched_done, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + +DEFINE_EVENT(intel_context, intel_context_create, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + +DEFINE_EVENT(intel_context, intel_context_fence_release, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + +DEFINE_EVENT(intel_context, intel_context_free, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + +DEFINE_EVENT(intel_context, intel_context_steal_guc_id, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + +DEFINE_EVENT(intel_context, intel_context_do_pin, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + +DEFINE_EVENT(intel_context, intel_context_do_unpin, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + #else #if !defined(TRACE_HEADER_MULTI_READ) static inline void +trace_i915_request_guc_submit(struct i915_request *rq) +{ +} + +static inline void trace_i915_request_submit(struct i915_request *rq) { } @@ -906,6 +1024,81 @@ static inline void trace_i915_request_out(struct i915_request *rq) { } + +static inline void +trace_intel_context_set_prio(struct intel_context *ce) +{ +} + +static inline void +trace_intel_context_reset(struct intel_context *ce) +{ +} + +static inline void +trace_intel_context_ban(struct intel_context *ce) +{ +} + +static inline void +trace_intel_context_register(struct intel_context *ce) +{ +} + +static inline void +trace_intel_context_deregister(struct intel_context *ce) +{ +} + +static inline void +trace_intel_context_deregister_done(struct intel_context *ce) +{ +} + +static inline void +trace_intel_context_sched_enable(struct intel_context *ce) +{ +} + +static inline void +trace_intel_context_sched_disable(struct intel_context *ce) +{ +} + +static inline void +trace_intel_context_sched_done(struct intel_context *ce) +{ +} + +static inline void +trace_intel_context_create(struct intel_context *ce) +{ +} + +static inline void +trace_intel_context_fence_release(struct intel_context *ce) +{ +} + +static inline void +trace_intel_context_free(struct intel_context *ce) +{ +} + +static inline void +trace_intel_context_steal_guc_id(struct intel_context *ce) +{ +} + +static inline void +trace_intel_context_do_pin(struct intel_context *ce) +{ +} + +static inline void +trace_intel_context_do_unpin(struct intel_context *ce) +{ +} #endif #endif diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c new file mode 100644 index 000000000000..6877362f6b85 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c @@ -0,0 +1,258 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include <linux/slab.h> + +#include <drm/ttm/ttm_bo_driver.h> +#include <drm/ttm/ttm_placement.h> + +#include "i915_ttm_buddy_manager.h" + +#include "i915_buddy.h" +#include "i915_gem.h" + +struct i915_ttm_buddy_manager { + struct ttm_resource_manager manager; + struct i915_buddy_mm mm; + struct list_head reserved; + struct mutex lock; + u64 default_page_size; +}; + +static struct i915_ttm_buddy_manager * +to_buddy_manager(struct ttm_resource_manager *man) +{ + return container_of(man, struct i915_ttm_buddy_manager, manager); +} + +static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man, + struct ttm_buffer_object *bo, + const struct ttm_place *place, + struct ttm_resource **res) +{ + struct i915_ttm_buddy_manager *bman = to_buddy_manager(man); + struct i915_ttm_buddy_resource *bman_res; + struct i915_buddy_mm *mm = &bman->mm; + unsigned long n_pages; + unsigned int min_order; + u64 min_page_size; + u64 size; + int err; + + GEM_BUG_ON(place->fpfn || place->lpfn); + + bman_res = kzalloc(sizeof(*bman_res), GFP_KERNEL); + if (!bman_res) + return -ENOMEM; + + ttm_resource_init(bo, place, &bman_res->base); + INIT_LIST_HEAD(&bman_res->blocks); + bman_res->mm = mm; + + GEM_BUG_ON(!bman_res->base.num_pages); + size = bman_res->base.num_pages << PAGE_SHIFT; + + min_page_size = bman->default_page_size; + if (bo->page_alignment) + min_page_size = bo->page_alignment << PAGE_SHIFT; + + GEM_BUG_ON(min_page_size < mm->chunk_size); + min_order = ilog2(min_page_size) - ilog2(mm->chunk_size); + if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { + size = roundup_pow_of_two(size); + min_order = ilog2(size) - ilog2(mm->chunk_size); + } + + if (size > mm->size) { + err = -E2BIG; + goto err_free_res; + } + + n_pages = size >> ilog2(mm->chunk_size); + + do { + struct i915_buddy_block *block; + unsigned int order; + + order = fls(n_pages) - 1; + GEM_BUG_ON(order > mm->max_order); + GEM_BUG_ON(order < min_order); + + do { + mutex_lock(&bman->lock); + block = i915_buddy_alloc(mm, order); + mutex_unlock(&bman->lock); + if (!IS_ERR(block)) + break; + + if (order-- == min_order) { + err = -ENOSPC; + goto err_free_blocks; + } + } while (1); + + n_pages -= BIT(order); + + list_add_tail(&block->link, &bman_res->blocks); + + if (!n_pages) + break; + } while (1); + + *res = &bman_res->base; + return 0; + +err_free_blocks: + mutex_lock(&bman->lock); + i915_buddy_free_list(mm, &bman_res->blocks); + mutex_unlock(&bman->lock); +err_free_res: + kfree(bman_res); + return err; +} + +static void i915_ttm_buddy_man_free(struct ttm_resource_manager *man, + struct ttm_resource *res) +{ + struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res); + struct i915_ttm_buddy_manager *bman = to_buddy_manager(man); + + mutex_lock(&bman->lock); + i915_buddy_free_list(&bman->mm, &bman_res->blocks); + mutex_unlock(&bman->lock); + + kfree(bman_res); +} + +static const struct ttm_resource_manager_func i915_ttm_buddy_manager_func = { + .alloc = i915_ttm_buddy_man_alloc, + .free = i915_ttm_buddy_man_free, +}; + + +/** + * i915_ttm_buddy_man_init - Setup buddy allocator based ttm manager + * @bdev: The ttm device + * @type: Memory type we want to manage + * @use_tt: Set use_tt for the manager + * @size: The size in bytes to manage + * @default_page_size: The default minimum page size in bytes for allocations, + * this must be at least as large as @chunk_size, and can be overridden by + * setting the BO page_alignment, to be larger or smaller as needed. + * @chunk_size: The minimum page size in bytes for our allocations i.e + * order-zero + * + * Note that the starting address is assumed to be zero here, since this + * simplifies keeping the property where allocated blocks having natural + * power-of-two alignment. So long as the real starting address is some large + * power-of-two, or naturally start from zero, then this should be fine. Also + * the &i915_ttm_buddy_man_reserve interface can be used to preserve alignment + * if say there is some unusable range from the start of the region. We can + * revisit this in the future and make the interface accept an actual starting + * offset and let it take care of the rest. + * + * Note that if the @size is not aligned to the @chunk_size then we perform the + * required rounding to get the usable size. The final size in pages can be + * taken from &ttm_resource_manager.size. + * + * Return: 0 on success, negative error code on failure. + */ +int i915_ttm_buddy_man_init(struct ttm_device *bdev, + unsigned int type, bool use_tt, + u64 size, u64 default_page_size, + u64 chunk_size) +{ + struct ttm_resource_manager *man; + struct i915_ttm_buddy_manager *bman; + int err; + + bman = kzalloc(sizeof(*bman), GFP_KERNEL); + if (!bman) + return -ENOMEM; + + err = i915_buddy_init(&bman->mm, size, chunk_size); + if (err) + goto err_free_bman; + + mutex_init(&bman->lock); + INIT_LIST_HEAD(&bman->reserved); + GEM_BUG_ON(default_page_size < chunk_size); + bman->default_page_size = default_page_size; + + man = &bman->manager; + man->use_tt = use_tt; + man->func = &i915_ttm_buddy_manager_func; + ttm_resource_manager_init(man, bman->mm.size >> PAGE_SHIFT); + + ttm_resource_manager_set_used(man, true); + ttm_set_driver_manager(bdev, type, man); + + return 0; + +err_free_bman: + kfree(bman); + return err; +} + +/** + * i915_ttm_buddy_man_fini - Destroy the buddy allocator ttm manager + * @bdev: The ttm device + * @type: Memory type we want to manage + * + * Note that if we reserved anything with &i915_ttm_buddy_man_reserve, this will + * also be freed for us here. + * + * Return: 0 on success, negative error code on failure. + */ +int i915_ttm_buddy_man_fini(struct ttm_device *bdev, unsigned int type) +{ + struct ttm_resource_manager *man = ttm_manager_type(bdev, type); + struct i915_ttm_buddy_manager *bman = to_buddy_manager(man); + struct i915_buddy_mm *mm = &bman->mm; + int ret; + + ttm_resource_manager_set_used(man, false); + + ret = ttm_resource_manager_evict_all(bdev, man); + if (ret) + return ret; + + ttm_set_driver_manager(bdev, type, NULL); + + mutex_lock(&bman->lock); + i915_buddy_free_list(mm, &bman->reserved); + i915_buddy_fini(mm); + mutex_unlock(&bman->lock); + + ttm_resource_manager_cleanup(man); + kfree(bman); + + return 0; +} + +/** + * i915_ttm_buddy_man_reserve - Reserve address range + * @man: The buddy allocator ttm manager + * @start: The offset in bytes, where the region start is assumed to be zero + * @size: The size in bytes + * + * Note that the starting address for the region is always assumed to be zero. + * + * Return: 0 on success, negative error code on failure. + */ +int i915_ttm_buddy_man_reserve(struct ttm_resource_manager *man, + u64 start, u64 size) +{ + struct i915_ttm_buddy_manager *bman = to_buddy_manager(man); + struct i915_buddy_mm *mm = &bman->mm; + int ret; + + mutex_lock(&bman->lock); + ret = i915_buddy_alloc_range(mm, &bman->reserved, start, size); + mutex_unlock(&bman->lock); + + return ret; +} + diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h new file mode 100644 index 000000000000..0722d33f3e14 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef __I915_TTM_BUDDY_MANAGER_H__ +#define __I915_TTM_BUDDY_MANAGER_H__ + +#include <linux/list.h> +#include <linux/types.h> + +#include <drm/ttm/ttm_resource.h> + +struct ttm_device; +struct ttm_resource_manager; +struct i915_buddy_mm; + +/** + * struct i915_ttm_buddy_resource + * + * @base: struct ttm_resource base class we extend + * @blocks: the list of struct i915_buddy_block for this resource/allocation + * @mm: the struct i915_buddy_mm for this resource + * + * Extends the struct ttm_resource to manage an address space allocation with + * one or more struct i915_buddy_block. + */ +struct i915_ttm_buddy_resource { + struct ttm_resource base; + struct list_head blocks; + struct i915_buddy_mm *mm; +}; + +/** + * to_ttm_buddy_resource + * + * @res: the resource to upcast + * + * Upcast the struct ttm_resource object into a struct i915_ttm_buddy_resource. + */ +static inline struct i915_ttm_buddy_resource * +to_ttm_buddy_resource(struct ttm_resource *res) +{ + return container_of(res, struct i915_ttm_buddy_resource, base); +} + +int i915_ttm_buddy_man_init(struct ttm_device *bdev, + unsigned type, bool use_tt, + u64 size, u64 default_page_size, u64 chunk_size); +int i915_ttm_buddy_man_fini(struct ttm_device *bdev, + unsigned int type); + +int i915_ttm_buddy_man_reserve(struct ttm_resource_manager *man, + u64 start, u64 size); + +#endif diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 0f227f28b280..4b7fc4647e46 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -34,24 +34,20 @@ #include "gt/intel_gt_requests.h" #include "i915_drv.h" -#include "i915_globals.h" #include "i915_sw_fence_work.h" #include "i915_trace.h" #include "i915_vma.h" -static struct i915_global_vma { - struct i915_global base; - struct kmem_cache *slab_vmas; -} global; +static struct kmem_cache *slab_vmas; struct i915_vma *i915_vma_alloc(void) { - return kmem_cache_zalloc(global.slab_vmas, GFP_KERNEL); + return kmem_cache_zalloc(slab_vmas, GFP_KERNEL); } void i915_vma_free(struct i915_vma *vma) { - return kmem_cache_free(global.slab_vmas, vma); + return kmem_cache_free(slab_vmas, vma); } #if IS_ENABLED(CONFIG_DRM_I915_ERRLOG_GEM) && IS_ENABLED(CONFIG_DRM_DEBUG_MM) @@ -300,14 +296,13 @@ struct i915_vma_work { unsigned int flags; }; -static int __vma_bind(struct dma_fence_work *work) +static void __vma_bind(struct dma_fence_work *work) { struct i915_vma_work *vw = container_of(work, typeof(*vw), base); struct i915_vma *vma = vw->vma; vma->ops->bind_vma(vw->vm, &vw->stash, vma, vw->cache_level, vw->flags); - return 0; } static void __vma_release(struct dma_fence_work *work) @@ -1415,27 +1410,16 @@ void i915_vma_make_purgeable(struct i915_vma *vma) #include "selftests/i915_vma.c" #endif -static void i915_global_vma_shrink(void) +void i915_vma_module_exit(void) { - kmem_cache_shrink(global.slab_vmas); + kmem_cache_destroy(slab_vmas); } -static void i915_global_vma_exit(void) -{ - kmem_cache_destroy(global.slab_vmas); -} - -static struct i915_global_vma global = { { - .shrink = i915_global_vma_shrink, - .exit = i915_global_vma_exit, -} }; - -int __init i915_global_vma_init(void) +int __init i915_vma_module_init(void) { - global.slab_vmas = KMEM_CACHE(i915_vma, SLAB_HWCACHE_ALIGN); - if (!global.slab_vmas) + slab_vmas = KMEM_CACHE(i915_vma, SLAB_HWCACHE_ALIGN); + if (!slab_vmas) return -ENOMEM; - i915_global_register(&global.base); return 0; } diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index eca452a9851f..ed69f66c7ab0 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -426,4 +426,7 @@ static inline int i915_vma_sync(struct i915_vma *vma) return i915_active_wait(&vma->active); } +void i915_vma_module_exit(void); +int i915_vma_module_init(void); + #endif diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index e0a10f36acc1..305facedd284 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -59,7 +59,6 @@ static const char * const platform_names[] = { PLATFORM_NAME(GEMINILAKE), PLATFORM_NAME(COFFEELAKE), PLATFORM_NAME(COMETLAKE), - PLATFORM_NAME(CANNONLAKE), PLATFORM_NAME(ICELAKE), PLATFORM_NAME(ELKHARTLAKE), PLATFORM_NAME(JASPERLAKE), @@ -68,6 +67,8 @@ static const char * const platform_names[] = { PLATFORM_NAME(DG1), PLATFORM_NAME(ALDERLAKE_S), PLATFORM_NAME(ALDERLAKE_P), + PLATFORM_NAME(XEHPSDV), + PLATFORM_NAME(DG2), }; #undef PLATFORM_NAME @@ -96,9 +97,17 @@ static const char *iommu_name(void) void intel_device_info_print_static(const struct intel_device_info *info, struct drm_printer *p) { - drm_printf(p, "graphics_ver: %u\n", info->graphics_ver); - drm_printf(p, "media_ver: %u\n", info->media_ver); - drm_printf(p, "display_ver: %u\n", info->display.ver); + if (info->graphics_rel) + drm_printf(p, "graphics version: %u.%02u\n", info->graphics_ver, info->graphics_rel); + else + drm_printf(p, "graphics version: %u\n", info->graphics_ver); + + if (info->media_rel) + drm_printf(p, "media version: %u.%02u\n", info->media_ver, info->media_rel); + else + drm_printf(p, "media version: %u\n", info->media_ver); + + drm_printf(p, "display version: %u\n", info->display.ver); drm_printf(p, "gt: %d\n", info->gt); drm_printf(p, "iommu: %s\n", iommu_name()); drm_printf(p, "memory-regions: %x\n", info->memory_regions); @@ -165,7 +174,6 @@ static const u16 subplatform_ulx_ids[] = { }; static const u16 subplatform_portf_ids[] = { - INTEL_CNL_PORT_F_IDS(0), INTEL_ICL_PORT_F_IDS(0), }; @@ -253,14 +261,14 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) struct intel_runtime_info *runtime = RUNTIME_INFO(dev_priv); enum pipe pipe; - /* Wa_14011765242: adl-s A0 */ - if (IS_ADLS_DISPLAY_STEP(dev_priv, STEP_A0, STEP_A0)) + /* Wa_14011765242: adl-s A0,A1 */ + if (IS_ADLS_DISPLAY_STEP(dev_priv, STEP_A0, STEP_A2)) for_each_pipe(dev_priv, pipe) runtime->num_scalers[pipe] = 0; - else if (GRAPHICS_VER(dev_priv) >= 10) { + else if (DISPLAY_VER(dev_priv) >= 11) { for_each_pipe(dev_priv, pipe) runtime->num_scalers[pipe] = 2; - } else if (GRAPHICS_VER(dev_priv) == 9) { + } else if (DISPLAY_VER(dev_priv) >= 9) { runtime->num_scalers[PIPE_A] = 2; runtime->num_scalers[PIPE_B] = 2; runtime->num_scalers[PIPE_C] = 1; @@ -271,10 +279,10 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) if (DISPLAY_VER(dev_priv) >= 13 || HAS_D12_PLANE_MINIMIZATION(dev_priv)) for_each_pipe(dev_priv, pipe) runtime->num_sprites[pipe] = 4; - else if (GRAPHICS_VER(dev_priv) >= 11) + else if (DISPLAY_VER(dev_priv) >= 11) for_each_pipe(dev_priv, pipe) runtime->num_sprites[pipe] = 6; - else if (GRAPHICS_VER(dev_priv) == 10 || IS_GEMINILAKE(dev_priv)) + else if (DISPLAY_VER(dev_priv) == 10) for_each_pipe(dev_priv, pipe) runtime->num_sprites[pipe] = 3; else if (IS_BROXTON(dev_priv)) { @@ -293,7 +301,7 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { for_each_pipe(dev_priv, pipe) runtime->num_sprites[pipe] = 2; - } else if (GRAPHICS_VER(dev_priv) >= 5 || IS_G4X(dev_priv)) { + } else if (DISPLAY_VER(dev_priv) >= 5 || IS_G4X(dev_priv)) { for_each_pipe(dev_priv, pipe) runtime->num_sprites[pipe] = 1; } @@ -357,7 +365,7 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) info->display.has_dmc = 0; if (DISPLAY_VER(dev_priv) >= 10 && - (dfsm & CNL_DFSM_DISPLAY_DSC_DISABLE)) + (dfsm & GLK_DFSM_DISPLAY_DSC_DISABLE)) info->display.has_dsc = 0; } diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index b326aff65cd6..d328bb95c49b 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -76,8 +76,6 @@ enum intel_platform { INTEL_GEMINILAKE, INTEL_COFFEELAKE, INTEL_COMETLAKE, - /* gen10 */ - INTEL_CANNONLAKE, /* gen11 */ INTEL_ICELAKE, INTEL_ELKHARTLAKE, @@ -88,6 +86,8 @@ enum intel_platform { INTEL_DG1, INTEL_ALDERLAKE_S, INTEL_ALDERLAKE_P, + INTEL_XEHPSDV, + INTEL_DG2, INTEL_MAX_PLATFORMS }; @@ -103,9 +103,13 @@ enum intel_platform { #define INTEL_SUBPLATFORM_ULT (0) #define INTEL_SUBPLATFORM_ULX (1) -/* CNL/ICL */ +/* ICL */ #define INTEL_SUBPLATFORM_PORTF (0) +/* DG2 */ +#define INTEL_SUBPLATFORM_G10 0 +#define INTEL_SUBPLATFORM_G11 1 + enum intel_ppgtt_type { INTEL_PPGTT_NONE = I915_GEM_PPGTT_NONE, INTEL_PPGTT_ALIASING = I915_GEM_PPGTT_ALIASING, @@ -127,7 +131,7 @@ enum intel_ppgtt_type { func(has_llc); \ func(has_logical_ring_contexts); \ func(has_logical_ring_elsq); \ - func(has_master_unit_irq); \ + func(has_mslices); \ func(has_pooled_eu); \ func(has_rc6); \ func(has_rc6p); \ @@ -141,6 +145,7 @@ enum intel_ppgtt_type { #define DEV_INFO_DISPLAY_FOR_EACH_FLAG(func) \ /* Keep in alphabetical order */ \ func(cursor_needs_physical); \ + func(has_cdclk_crawl); \ func(has_dmc); \ func(has_ddi); \ func(has_dp_mst); \ @@ -162,9 +167,10 @@ enum intel_ppgtt_type { struct intel_device_info { u8 graphics_ver; + u8 graphics_rel; u8 media_ver; + u8 media_rel; - u8 gt; /* GT number, 0 if undefined */ intel_engine_mask_t platform_engine_mask; /* Engines supported by the HW */ enum intel_platform platform; @@ -180,13 +186,13 @@ struct intel_device_info { u32 display_mmio_offset; + u8 gt; /* GT number, 0 if undefined */ + u8 pipe_mask; u8 cpu_transcoder_mask; u8 abox_mask; - u8 has_cdclk_crawl; /* does support CDCLK crawling */ - #define DEFINE_FLAG(name) u8 name:1 DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG); #undef DEFINE_FLAG diff --git a/drivers/gpu/drm/i915/intel_dram.c b/drivers/gpu/drm/i915/intel_dram.c index 50fdea84ba70..91866520c173 100644 --- a/drivers/gpu/drm/i915/intel_dram.c +++ b/drivers/gpu/drm/i915/intel_dram.c @@ -77,21 +77,21 @@ static int skl_get_dimm_ranks(u16 val) } /* Returns total Gb for the whole DIMM */ -static int cnl_get_dimm_size(u16 val) +static int icl_get_dimm_size(u16 val) { - return (val & CNL_DRAM_SIZE_MASK) * 8 / 2; + return (val & ICL_DRAM_SIZE_MASK) * 8 / 2; } -static int cnl_get_dimm_width(u16 val) +static int icl_get_dimm_width(u16 val) { - if (cnl_get_dimm_size(val) == 0) + if (icl_get_dimm_size(val) == 0) return 0; - switch (val & CNL_DRAM_WIDTH_MASK) { - case CNL_DRAM_WIDTH_X8: - case CNL_DRAM_WIDTH_X16: - case CNL_DRAM_WIDTH_X32: - val = (val & CNL_DRAM_WIDTH_MASK) >> CNL_DRAM_WIDTH_SHIFT; + switch (val & ICL_DRAM_WIDTH_MASK) { + case ICL_DRAM_WIDTH_X8: + case ICL_DRAM_WIDTH_X16: + case ICL_DRAM_WIDTH_X32: + val = (val & ICL_DRAM_WIDTH_MASK) >> ICL_DRAM_WIDTH_SHIFT; return 8 << val; default: MISSING_CASE(val); @@ -99,12 +99,12 @@ static int cnl_get_dimm_width(u16 val) } } -static int cnl_get_dimm_ranks(u16 val) +static int icl_get_dimm_ranks(u16 val) { - if (cnl_get_dimm_size(val) == 0) + if (icl_get_dimm_size(val) == 0) return 0; - val = (val & CNL_DRAM_RANK_MASK) >> CNL_DRAM_RANK_SHIFT; + val = (val & ICL_DRAM_RANK_MASK) >> ICL_DRAM_RANK_SHIFT; return val + 1; } @@ -121,10 +121,10 @@ skl_dram_get_dimm_info(struct drm_i915_private *i915, struct dram_dimm_info *dimm, int channel, char dimm_name, u16 val) { - if (GRAPHICS_VER(i915) >= 10) { - dimm->size = cnl_get_dimm_size(val); - dimm->width = cnl_get_dimm_width(val); - dimm->ranks = cnl_get_dimm_ranks(val); + if (GRAPHICS_VER(i915) >= 11) { + dimm->size = icl_get_dimm_size(val); + dimm->width = icl_get_dimm_width(val); + dimm->ranks = icl_get_dimm_ranks(val); } else { dimm->size = skl_get_dimm_size(val); dimm->width = skl_get_dimm_width(val); @@ -468,6 +468,7 @@ static int icl_pcode_read_mem_global_info(struct drm_i915_private *dev_priv) dram_info->num_channels = (val & 0xf0) >> 4; dram_info->num_qgv_points = (val & 0xf00) >> 8; + dram_info->num_psf_gv_points = (val & 0x3000) >> 12; return 0; } @@ -484,8 +485,7 @@ static int gen11_get_dram_info(struct drm_i915_private *i915) static int gen12_get_dram_info(struct drm_i915_private *i915) { - /* Always needed for GEN12+ */ - i915->dram_info.wm_lv_0_adjust_needed = true; + i915->dram_info.wm_lv_0_adjust_needed = false; return icl_pcode_read_mem_global_info(i915); } @@ -495,15 +495,15 @@ void intel_dram_detect(struct drm_i915_private *i915) struct dram_info *dram_info = &i915->dram_info; int ret; + if (GRAPHICS_VER(i915) < 9 || IS_DG2(i915) || !HAS_DISPLAY(i915)) + return; + /* * Assume level 0 watermark latency adjustment is needed until proven * otherwise, this w/a is not needed by bxt/glk. */ dram_info->wm_lv_0_adjust_needed = !IS_GEN9_LP(i915); - if (GRAPHICS_VER(i915) < 9 || !HAS_DISPLAY(i915)) - return; - if (GRAPHICS_VER(i915) >= 12) ret = gen12_get_dram_info(i915); else if (GRAPHICS_VER(i915) >= 11) diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c index e6024eb7cca4..779eb2fa90b6 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.c +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -5,6 +5,7 @@ #include "intel_memory_region.h" #include "i915_drv.h" +#include "i915_ttm_buddy_manager.h" static const struct { u16 class; @@ -28,11 +29,6 @@ static const struct { }, }; -struct intel_region_reserve { - struct list_head link; - struct ttm_resource *res; -}; - struct intel_memory_region * intel_memory_region_lookup(struct drm_i915_private *i915, u16 class, u16 instance) @@ -64,27 +60,6 @@ intel_memory_region_by_type(struct drm_i915_private *i915, } /** - * intel_memory_region_unreserve - Unreserve all previously reserved - * ranges - * @mem: The region containing the reserved ranges. - */ -void intel_memory_region_unreserve(struct intel_memory_region *mem) -{ - struct intel_region_reserve *reserve, *next; - - if (!mem->priv_ops || !mem->priv_ops->free) - return; - - mutex_lock(&mem->mm_lock); - list_for_each_entry_safe(reserve, next, &mem->reserved, link) { - list_del(&reserve->link); - mem->priv_ops->free(mem, reserve->res); - kfree(reserve); - } - mutex_unlock(&mem->mm_lock); -} - -/** * intel_memory_region_reserve - Reserve a memory range * @mem: The region for which we want to reserve a range. * @offset: Start of the range to reserve. @@ -96,28 +71,11 @@ int intel_memory_region_reserve(struct intel_memory_region *mem, resource_size_t offset, resource_size_t size) { - int ret; - struct intel_region_reserve *reserve; - - if (!mem->priv_ops || !mem->priv_ops->reserve) - return -EINVAL; - - reserve = kzalloc(sizeof(*reserve), GFP_KERNEL); - if (!reserve) - return -ENOMEM; + struct ttm_resource_manager *man = mem->region_private; - reserve->res = mem->priv_ops->reserve(mem, offset, size); - if (IS_ERR(reserve->res)) { - ret = PTR_ERR(reserve->res); - kfree(reserve); - return ret; - } - - mutex_lock(&mem->mm_lock); - list_add_tail(&reserve->link, &mem->reserved); - mutex_unlock(&mem->mm_lock); + GEM_BUG_ON(mem->is_range_manager); - return 0; + return i915_ttm_buddy_man_reserve(man, offset, size); } struct intel_memory_region * @@ -149,10 +107,6 @@ intel_memory_region_create(struct drm_i915_private *i915, mutex_init(&mem->objects.lock); INIT_LIST_HEAD(&mem->objects.list); - INIT_LIST_HEAD(&mem->objects.purgeable); - INIT_LIST_HEAD(&mem->reserved); - - mutex_init(&mem->mm_lock); if (ops->init) { err = ops->init(mem); @@ -183,11 +137,9 @@ static void __intel_memory_region_destroy(struct kref *kref) struct intel_memory_region *mem = container_of(kref, typeof(*mem), kref); - intel_memory_region_unreserve(mem); if (mem->ops->release) mem->ops->release(mem); - mutex_destroy(&mem->mm_lock); mutex_destroy(&mem->objects.lock); kfree(mem); } @@ -221,7 +173,12 @@ int intel_memory_regions_hw_probe(struct drm_i915_private *i915) instance = intel_region_map[i].instance; switch (type) { case INTEL_MEMORY_SYSTEM: - mem = i915_gem_shmem_setup(i915, type, instance); + if (IS_DGFX(i915)) + mem = i915_gem_ttm_system_setup(i915, type, + instance); + else + mem = i915_gem_shmem_setup(i915, type, + instance); break; case INTEL_MEMORY_STOLEN_LOCAL: mem = i915_gem_stolen_lmem_setup(i915, type, instance); diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h index 1f7dac63abb7..1f2b96efa69d 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.h +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -40,8 +40,7 @@ enum intel_region_id { #define REGION_STOLEN_SMEM BIT(INTEL_REGION_STOLEN_SMEM) #define REGION_STOLEN_LMEM BIT(INTEL_REGION_STOLEN_LMEM) -#define I915_ALLOC_MIN_PAGE_SIZE BIT(0) -#define I915_ALLOC_CONTIGUOUS BIT(1) +#define I915_ALLOC_CONTIGUOUS BIT(0) #define for_each_memory_region(mr, i915, id) \ for (id = 0; id < ARRAY_SIZE((i915)->mm.regions); id++) \ @@ -56,22 +55,14 @@ struct intel_memory_region_ops { int (*init_object)(struct intel_memory_region *mem, struct drm_i915_gem_object *obj, resource_size_t size, + resource_size_t page_size, unsigned int flags); }; -struct intel_memory_region_private_ops { - struct ttm_resource *(*reserve)(struct intel_memory_region *mem, - resource_size_t offset, - resource_size_t size); - void (*free)(struct intel_memory_region *mem, - struct ttm_resource *res); -}; - struct intel_memory_region { struct drm_i915_private *i915; const struct intel_memory_region_ops *ops; - const struct intel_memory_region_private_ops *priv_ops; struct io_mapping iomap; struct resource region; @@ -79,8 +70,6 @@ struct intel_memory_region { /* For fake LMEM */ struct drm_mm_node fake_mappable; - struct mutex mm_lock; - struct kref kref; resource_size_t io_start; @@ -94,18 +83,13 @@ struct intel_memory_region { char name[16]; bool private; /* not for userspace */ - struct list_head reserved; - dma_addr_t remap_addr; struct { struct mutex lock; /* Protects access to objects */ struct list_head list; - struct list_head purgeable; } objects; - size_t chunk_size; - unsigned int max_order; bool is_range_manager; void *region_private; @@ -139,9 +123,15 @@ __printf(2, 3) void intel_memory_region_set_name(struct intel_memory_region *mem, const char *fmt, ...); -void intel_memory_region_unreserve(struct intel_memory_region *mem); - int intel_memory_region_reserve(struct intel_memory_region *mem, resource_size_t offset, resource_size_t size); + +struct intel_memory_region * +i915_gem_ttm_system_setup(struct drm_i915_private *i915, + u16 type, u16 instance); +struct intel_memory_region * +i915_gem_shmem_setup(struct drm_i915_private *i915, + u16 type, u16 instance); + #endif diff --git a/drivers/gpu/drm/i915/intel_pch.c b/drivers/gpu/drm/i915/intel_pch.c index 4e92ae19189e..d1d4b97b86f5 100644 --- a/drivers/gpu/drm/i915/intel_pch.c +++ b/drivers/gpu/drm/i915/intel_pch.c @@ -81,7 +81,6 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id) case INTEL_PCH_CNP_DEVICE_ID_TYPE: drm_dbg_kms(&dev_priv->drm, "Found Cannon Lake PCH (CNP)\n"); drm_WARN_ON(&dev_priv->drm, - !IS_CANNONLAKE(dev_priv) && !IS_COFFEELAKE(dev_priv) && !IS_COMETLAKE(dev_priv)); return PCH_CNP; @@ -89,7 +88,6 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id) drm_dbg_kms(&dev_priv->drm, "Found Cannon Lake LP PCH (CNP-LP)\n"); drm_WARN_ON(&dev_priv->drm, - !IS_CANNONLAKE(dev_priv) && !IS_COFFEELAKE(dev_priv) && !IS_COMETLAKE(dev_priv)); return PCH_CNP; @@ -171,8 +169,7 @@ intel_virt_detect_pch(const struct drm_i915_private *dev_priv, id = INTEL_PCH_MCC_DEVICE_ID_TYPE; else if (IS_ICELAKE(dev_priv)) id = INTEL_PCH_ICP_DEVICE_ID_TYPE; - else if (IS_CANNONLAKE(dev_priv) || - IS_COFFEELAKE(dev_priv) || + else if (IS_COFFEELAKE(dev_priv) || IS_COMETLAKE(dev_priv)) id = INTEL_PCH_CNP_DEVICE_ID_TYPE; else if (IS_KABYLAKE(dev_priv) || IS_SKYLAKE(dev_priv)) @@ -211,6 +208,9 @@ void intel_detect_pch(struct drm_i915_private *dev_priv) if (IS_DG1(dev_priv)) { dev_priv->pch_type = PCH_DG1; return; + } else if (IS_DG2(dev_priv)) { + dev_priv->pch_type = PCH_DG2; + return; } /* diff --git a/drivers/gpu/drm/i915/intel_pch.h b/drivers/gpu/drm/i915/intel_pch.h index e2f3f30c6445..7c0d83d292dc 100644 --- a/drivers/gpu/drm/i915/intel_pch.h +++ b/drivers/gpu/drm/i915/intel_pch.h @@ -30,6 +30,7 @@ enum intel_pch { /* Fake PCHs, functionality handled on the same PCI dev */ PCH_DG1 = 1024, + PCH_DG2, }; #define INTEL_PCH_DEVICE_ID_MASK 0xff80 @@ -62,6 +63,7 @@ enum intel_pch { #define INTEL_PCH_TYPE(dev_priv) ((dev_priv)->pch_type) #define INTEL_PCH_ID(dev_priv) ((dev_priv)->pch_id) +#define HAS_PCH_DG2(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_DG2) #define HAS_PCH_ADP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_ADP) #define HAS_PCH_DG1(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_DG1) #define HAS_PCH_JSP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_JSP) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 45fefa0ed160..65bc3709f54c 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1370,11 +1370,11 @@ static bool g4x_compute_fbc_en(const struct g4x_wm_state *wm_state, return true; } -static int g4x_compute_pipe_wm(struct intel_crtc_state *crtc_state) +static int g4x_compute_pipe_wm(struct intel_atomic_state *state, + struct intel_crtc *crtc) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - struct intel_atomic_state *state = - to_intel_atomic_state(crtc_state->uapi.state); + struct intel_crtc_state *crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); struct g4x_wm_state *wm_state = &crtc_state->wm.g4x.optimal; int num_active_planes = hweight8(crtc_state->active_planes & ~BIT(PLANE_CURSOR)); @@ -1451,20 +1451,21 @@ static int g4x_compute_pipe_wm(struct intel_crtc_state *crtc_state) return 0; } -static int g4x_compute_intermediate_wm(struct intel_crtc_state *new_crtc_state) +static int g4x_compute_intermediate_wm(struct intel_atomic_state *state, + struct intel_crtc *crtc) { - struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_crtc_state *new_crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); struct g4x_wm_state *intermediate = &new_crtc_state->wm.g4x.intermediate; const struct g4x_wm_state *optimal = &new_crtc_state->wm.g4x.optimal; - struct intel_atomic_state *intel_state = - to_intel_atomic_state(new_crtc_state->uapi.state); - const struct intel_crtc_state *old_crtc_state = - intel_atomic_get_old_crtc_state(intel_state, crtc); const struct g4x_wm_state *active = &old_crtc_state->wm.g4x.optimal; enum plane_id plane_id; - if (!new_crtc_state->hw.active || drm_atomic_crtc_needs_modeset(&new_crtc_state->uapi)) { + if (!new_crtc_state->hw.active || + drm_atomic_crtc_needs_modeset(&new_crtc_state->uapi)) { *intermediate = *optimal; intermediate->cxsr = false; @@ -1890,12 +1891,12 @@ static bool vlv_raw_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state, vlv_raw_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level); } -static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state) +static int vlv_compute_pipe_wm(struct intel_atomic_state *state, + struct intel_crtc *crtc) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - struct intel_atomic_state *state = - to_intel_atomic_state(crtc_state->uapi.state); + struct intel_crtc_state *crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal; const struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state; @@ -2095,19 +2096,20 @@ static void vlv_atomic_update_fifo(struct intel_atomic_state *state, #undef VLV_FIFO -static int vlv_compute_intermediate_wm(struct intel_crtc_state *new_crtc_state) +static int vlv_compute_intermediate_wm(struct intel_atomic_state *state, + struct intel_crtc *crtc) { - struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc); + struct intel_crtc_state *new_crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); struct vlv_wm_state *intermediate = &new_crtc_state->wm.vlv.intermediate; const struct vlv_wm_state *optimal = &new_crtc_state->wm.vlv.optimal; - struct intel_atomic_state *intel_state = - to_intel_atomic_state(new_crtc_state->uapi.state); - const struct intel_crtc_state *old_crtc_state = - intel_atomic_get_old_crtc_state(intel_state, crtc); const struct vlv_wm_state *active = &old_crtc_state->wm.vlv.optimal; int level; - if (!new_crtc_state->hw.active || drm_atomic_crtc_needs_modeset(&new_crtc_state->uapi)) { + if (!new_crtc_state->hw.active || + drm_atomic_crtc_needs_modeset(&new_crtc_state->uapi)) { *intermediate = *optimal; intermediate->cxsr = false; @@ -2906,24 +2908,25 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv, if (wm[level] == 0) { for (i = level + 1; i <= max_level; i++) wm[i] = 0; + + max_level = level - 1; + break; } } /* - * WaWmMemoryReadLatency:skl+,glk + * WaWmMemoryReadLatency * * punit doesn't take into account the read latency so we need - * to add 2us to the various latency levels we retrieve from the - * punit when level 0 response data us 0us. + * to add proper adjustement to each valid level we retrieve + * from the punit when level 0 response data is 0us. */ if (wm[0] == 0) { - wm[0] += 2; - for (level = 1; level <= max_level; level++) { - if (wm[level] == 0) - break; - wm[level] += 2; - } + u8 adjust = DISPLAY_VER(dev_priv) >= 12 ? 3 : 2; + + for (level = 0; level <= max_level; level++) + wm[level] += adjust; } /* @@ -2934,7 +2937,6 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv, */ if (dev_priv->dram_info.wm_lv_0_adjust_needed) wm[0] += 1; - } else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { u64 sskpd = intel_uncore_read64(uncore, MCH_SSKPD); @@ -3144,10 +3146,12 @@ static bool ilk_validate_pipe_wm(const struct drm_i915_private *dev_priv, } /* Compute new watermarks for the pipe */ -static int ilk_compute_pipe_wm(struct intel_crtc_state *crtc_state) +static int ilk_compute_pipe_wm(struct intel_atomic_state *state, + struct intel_crtc *crtc) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); - struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct intel_crtc_state *crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); struct intel_pipe_wm *pipe_wm; struct intel_plane *plane; const struct intel_plane_state *plane_state; @@ -3220,16 +3224,16 @@ static int ilk_compute_pipe_wm(struct intel_crtc_state *crtc_state) * state and the new state. These can be programmed to the hardware * immediately. */ -static int ilk_compute_intermediate_wm(struct intel_crtc_state *newstate) -{ - struct intel_crtc *intel_crtc = to_intel_crtc(newstate->uapi.crtc); - struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev); - struct intel_pipe_wm *a = &newstate->wm.ilk.intermediate; - struct intel_atomic_state *intel_state = - to_intel_atomic_state(newstate->uapi.state); - const struct intel_crtc_state *oldstate = - intel_atomic_get_old_crtc_state(intel_state, intel_crtc); - const struct intel_pipe_wm *b = &oldstate->wm.ilk.optimal; +static int ilk_compute_intermediate_wm(struct intel_atomic_state *state, + struct intel_crtc *crtc) +{ + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_crtc_state *new_crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); + struct intel_pipe_wm *a = &new_crtc_state->wm.ilk.intermediate; + const struct intel_pipe_wm *b = &old_crtc_state->wm.ilk.optimal; int level, max_level = ilk_wm_max_level(dev_priv); /* @@ -3237,9 +3241,10 @@ static int ilk_compute_intermediate_wm(struct intel_crtc_state *newstate) * currently active watermarks to get values that are safe both before * and after the vblank. */ - *a = newstate->wm.ilk.optimal; - if (!newstate->hw.active || drm_atomic_crtc_needs_modeset(&newstate->uapi) || - intel_state->skip_intermediate_wm) + *a = new_crtc_state->wm.ilk.optimal; + if (!new_crtc_state->hw.active || + drm_atomic_crtc_needs_modeset(&new_crtc_state->uapi) || + state->skip_intermediate_wm) return 0; a->pipe_enabled |= b->pipe_enabled; @@ -3270,8 +3275,8 @@ static int ilk_compute_intermediate_wm(struct intel_crtc_state *newstate) * If our intermediate WM are identical to the final WM, then we can * omit the post-vblank programming; only update if it's different. */ - if (memcmp(a, &newstate->wm.ilk.optimal, sizeof(*a)) != 0) - newstate->wm.need_postvbl_update = true; + if (memcmp(a, &new_crtc_state->wm.ilk.optimal, sizeof(*a)) != 0) + new_crtc_state->wm.need_postvbl_update = true; return 0; } @@ -3283,12 +3288,12 @@ static void ilk_merge_wm_level(struct drm_i915_private *dev_priv, int level, struct intel_wm_level *ret_wm) { - const struct intel_crtc *intel_crtc; + const struct intel_crtc *crtc; ret_wm->enable = true; - for_each_intel_crtc(&dev_priv->drm, intel_crtc) { - const struct intel_pipe_wm *active = &intel_crtc->wm.active.ilk; + for_each_intel_crtc(&dev_priv->drm, crtc) { + const struct intel_pipe_wm *active = &crtc->wm.active.ilk; const struct intel_wm_level *wm = &active->wm[level]; if (!active->pipe_enabled) @@ -3388,7 +3393,7 @@ static void ilk_compute_wm_results(struct drm_i915_private *dev_priv, enum intel_ddb_partitioning partitioning, struct ilk_wm_values *results) { - struct intel_crtc *intel_crtc; + struct intel_crtc *crtc; int level, wm_lp; results->enable_fbc_wm = merged->fbc_wm_enabled; @@ -3433,9 +3438,9 @@ static void ilk_compute_wm_results(struct drm_i915_private *dev_priv, } /* LP0 register values */ - for_each_intel_crtc(&dev_priv->drm, intel_crtc) { - enum pipe pipe = intel_crtc->pipe; - const struct intel_pipe_wm *pipe_wm = &intel_crtc->wm.active.ilk; + for_each_intel_crtc(&dev_priv->drm, crtc) { + enum pipe pipe = crtc->pipe; + const struct intel_pipe_wm *pipe_wm = &crtc->wm.active.ilk; const struct intel_wm_level *r = &pipe_wm->wm[0]; if (drm_WARN_ON(&dev_priv->drm, !r->enable)) @@ -4579,6 +4584,117 @@ static const struct dbuf_slice_conf_entry tgl_allowed_dbufs[] = {} }; +static const struct dbuf_slice_conf_entry dg2_allowed_dbufs[] = { + { + .active_pipes = BIT(PIPE_A), + .dbuf_mask = { + [PIPE_A] = BIT(DBUF_S1) | BIT(DBUF_S2), + }, + }, + { + .active_pipes = BIT(PIPE_B), + .dbuf_mask = { + [PIPE_B] = BIT(DBUF_S1) | BIT(DBUF_S2), + }, + }, + { + .active_pipes = BIT(PIPE_A) | BIT(PIPE_B), + .dbuf_mask = { + [PIPE_A] = BIT(DBUF_S1), + [PIPE_B] = BIT(DBUF_S2), + }, + }, + { + .active_pipes = BIT(PIPE_C), + .dbuf_mask = { + [PIPE_C] = BIT(DBUF_S3) | BIT(DBUF_S4), + }, + }, + { + .active_pipes = BIT(PIPE_A) | BIT(PIPE_C), + .dbuf_mask = { + [PIPE_A] = BIT(DBUF_S1) | BIT(DBUF_S2), + [PIPE_C] = BIT(DBUF_S3) | BIT(DBUF_S4), + }, + }, + { + .active_pipes = BIT(PIPE_B) | BIT(PIPE_C), + .dbuf_mask = { + [PIPE_B] = BIT(DBUF_S1) | BIT(DBUF_S2), + [PIPE_C] = BIT(DBUF_S3) | BIT(DBUF_S4), + }, + }, + { + .active_pipes = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C), + .dbuf_mask = { + [PIPE_A] = BIT(DBUF_S1), + [PIPE_B] = BIT(DBUF_S2), + [PIPE_C] = BIT(DBUF_S3) | BIT(DBUF_S4), + }, + }, + { + .active_pipes = BIT(PIPE_D), + .dbuf_mask = { + [PIPE_D] = BIT(DBUF_S3) | BIT(DBUF_S4), + }, + }, + { + .active_pipes = BIT(PIPE_A) | BIT(PIPE_D), + .dbuf_mask = { + [PIPE_A] = BIT(DBUF_S1) | BIT(DBUF_S2), + [PIPE_D] = BIT(DBUF_S3) | BIT(DBUF_S4), + }, + }, + { + .active_pipes = BIT(PIPE_B) | BIT(PIPE_D), + .dbuf_mask = { + [PIPE_B] = BIT(DBUF_S1) | BIT(DBUF_S2), + [PIPE_D] = BIT(DBUF_S3) | BIT(DBUF_S4), + }, + }, + { + .active_pipes = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_D), + .dbuf_mask = { + [PIPE_A] = BIT(DBUF_S1), + [PIPE_B] = BIT(DBUF_S2), + [PIPE_D] = BIT(DBUF_S3) | BIT(DBUF_S4), + }, + }, + { + .active_pipes = BIT(PIPE_C) | BIT(PIPE_D), + .dbuf_mask = { + [PIPE_C] = BIT(DBUF_S3), + [PIPE_D] = BIT(DBUF_S4), + }, + }, + { + .active_pipes = BIT(PIPE_A) | BIT(PIPE_C) | BIT(PIPE_D), + .dbuf_mask = { + [PIPE_A] = BIT(DBUF_S1) | BIT(DBUF_S2), + [PIPE_C] = BIT(DBUF_S3), + [PIPE_D] = BIT(DBUF_S4), + }, + }, + { + .active_pipes = BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D), + .dbuf_mask = { + [PIPE_B] = BIT(DBUF_S1) | BIT(DBUF_S2), + [PIPE_C] = BIT(DBUF_S3), + [PIPE_D] = BIT(DBUF_S4), + }, + }, + { + .active_pipes = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D), + .dbuf_mask = { + [PIPE_A] = BIT(DBUF_S1), + [PIPE_B] = BIT(DBUF_S2), + [PIPE_C] = BIT(DBUF_S3), + [PIPE_D] = BIT(DBUF_S4), + }, + }, + {} +}; + static const struct dbuf_slice_conf_entry adlp_allowed_dbufs[] = { { .active_pipes = BIT(PIPE_A), @@ -4754,12 +4870,19 @@ static u32 adlp_compute_dbuf_slices(enum pipe pipe, u32 active_pipes) return compute_dbuf_slices(pipe, active_pipes, adlp_allowed_dbufs); } +static u32 dg2_compute_dbuf_slices(enum pipe pipe, u32 active_pipes) +{ + return compute_dbuf_slices(pipe, active_pipes, dg2_allowed_dbufs); +} + static u8 skl_compute_dbuf_slices(struct intel_crtc *crtc, u8 active_pipes) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; - if (IS_ALDERLAKE_P(dev_priv)) + if (IS_DG2(dev_priv)) + return dg2_compute_dbuf_slices(pipe, active_pipes); + else if (IS_ALDERLAKE_P(dev_priv)) return adlp_compute_dbuf_slices(pipe, active_pipes); else if (DISPLAY_VER(dev_priv) == 12) return tgl_compute_dbuf_slices(pipe, active_pipes); @@ -7340,30 +7463,24 @@ static void icl_init_clock_gating(struct drm_i915_private *dev_priv) intel_uncore_write(&dev_priv->uncore, ILK_DPFC_CHICKEN, ILK_DPFC_CHICKEN_COMP_DUMMY_PIXEL); - /* This is not an Wa. Enable to reduce Sampler power */ - intel_uncore_write(&dev_priv->uncore, GEN10_DFR_RATIO_EN_AND_CHICKEN, - intel_uncore_read(&dev_priv->uncore, GEN10_DFR_RATIO_EN_AND_CHICKEN) & ~DFR_DISABLE); - /*Wa_14010594013:icl, ehl */ intel_uncore_rmw(&dev_priv->uncore, GEN8_CHICKEN_DCPR_1, - 0, CNL_DELAY_PMRSP); + 0, ICL_DELAY_PMRSP); } static void gen12lp_init_clock_gating(struct drm_i915_private *dev_priv) { - /* Wa_1409120013:tgl,rkl,adl_s,dg1 */ - intel_uncore_write(&dev_priv->uncore, ILK_DPFC_CHICKEN, - ILK_DPFC_CHICKEN_COMP_DUMMY_PIXEL); + /* Wa_1409120013:tgl,rkl,adl-s,dg1 */ + if (IS_TIGERLAKE(dev_priv) || IS_ROCKETLAKE(dev_priv) || + IS_ALDERLAKE_S(dev_priv) || IS_DG1(dev_priv)) + intel_uncore_write(&dev_priv->uncore, ILK_DPFC_CHICKEN, + ILK_DPFC_CHICKEN_COMP_DUMMY_PIXEL); /* Wa_1409825376:tgl (pre-prod)*/ - if (IS_TGL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B1)) + if (IS_TGL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_C0)) intel_uncore_write(&dev_priv->uncore, GEN9_CLKGATE_DIS_3, intel_uncore_read(&dev_priv->uncore, GEN9_CLKGATE_DIS_3) | TGL_VRH_GATING_DIS); - /* Wa_14011059788:tgl,rkl,adl_s,dg1 */ - intel_uncore_rmw(&dev_priv->uncore, GEN10_DFR_RATIO_EN_AND_CHICKEN, - 0, DFR_DISABLE); - /* Wa_14013723622:tgl,rkl,dg1,adl-s */ if (DISPLAY_VER(dev_priv) == 12) intel_uncore_rmw(&dev_priv->uncore, CLKREQ_POLICY, @@ -7383,7 +7500,7 @@ static void dg1_init_clock_gating(struct drm_i915_private *dev_priv) gen12lp_init_clock_gating(dev_priv); /* Wa_1409836686:dg1[a0] */ - if (IS_DG1_REVID(dev_priv, DG1_REVID_A0, DG1_REVID_A0)) + if (IS_DG1_GT_STEP(dev_priv, STEP_A0, STEP_B0)) intel_uncore_write(&dev_priv->uncore, GEN9_CLKGATE_DIS_3, intel_uncore_read(&dev_priv->uncore, GEN9_CLKGATE_DIS_3) | DPT_GATING_DIS); } @@ -7398,43 +7515,6 @@ static void cnp_init_clock_gating(struct drm_i915_private *dev_priv) CNP_PWM_CGE_GATING_DISABLE); } -static void cnl_init_clock_gating(struct drm_i915_private *dev_priv) -{ - u32 val; - cnp_init_clock_gating(dev_priv); - - /* This is not an Wa. Enable for better image quality */ - intel_uncore_write(&dev_priv->uncore, _3D_CHICKEN3, - _MASKED_BIT_ENABLE(_3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE)); - - /* WaEnableChickenDCPR:cnl */ - intel_uncore_write(&dev_priv->uncore, GEN8_CHICKEN_DCPR_1, - intel_uncore_read(&dev_priv->uncore, GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM); - - /* - * WaFbcWakeMemOn:cnl - * Display WA #0859: cnl - */ - intel_uncore_write(&dev_priv->uncore, DISP_ARB_CTL, intel_uncore_read(&dev_priv->uncore, DISP_ARB_CTL) | - DISP_FBC_MEMORY_WAKE); - - val = intel_uncore_read(&dev_priv->uncore, SLICE_UNIT_LEVEL_CLKGATE); - /* ReadHitWriteOnlyDisable:cnl */ - val |= RCCUNIT_CLKGATE_DIS; - intel_uncore_write(&dev_priv->uncore, SLICE_UNIT_LEVEL_CLKGATE, val); - - /* Wa_2201832410:cnl */ - val = intel_uncore_read(&dev_priv->uncore, SUBSLICE_UNIT_LEVEL_CLKGATE); - val |= GWUNIT_CLKGATE_DIS; - intel_uncore_write(&dev_priv->uncore, SUBSLICE_UNIT_LEVEL_CLKGATE, val); - - /* WaDisableVFclkgate:cnl */ - /* WaVFUnitClockGatingDisable:cnl */ - val = intel_uncore_read(&dev_priv->uncore, UNSLICE_UNIT_LEVEL_CLKGATE); - val |= VFUNIT_CLKGATE_DIS; - intel_uncore_write(&dev_priv->uncore, UNSLICE_UNIT_LEVEL_CLKGATE, val); -} - static void cfl_init_clock_gating(struct drm_i915_private *dev_priv) { cnp_init_clock_gating(dev_priv); @@ -7468,12 +7548,12 @@ static void kbl_init_clock_gating(struct drm_i915_private *dev_priv) FBC_LLC_FULLY_OPEN); /* WaDisableSDEUnitClockGating:kbl */ - if (IS_KBL_GT_STEP(dev_priv, 0, STEP_B0)) + if (IS_KBL_GT_STEP(dev_priv, 0, STEP_C0)) intel_uncore_write(&dev_priv->uncore, GEN8_UCGCTL6, intel_uncore_read(&dev_priv->uncore, GEN8_UCGCTL6) | GEN8_SDEUNIT_CLOCK_GATE_DISABLE); /* WaDisableGamClockGating:kbl */ - if (IS_KBL_GT_STEP(dev_priv, 0, STEP_B0)) + if (IS_KBL_GT_STEP(dev_priv, 0, STEP_C0)) intel_uncore_write(&dev_priv->uncore, GEN6_UCGCTL1, intel_uncore_read(&dev_priv->uncore, GEN6_UCGCTL1) | GEN6_GAMUNIT_CLOCK_GATE_DISABLE); @@ -7863,8 +7943,6 @@ void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv) dev_priv->display.init_clock_gating = gen12lp_init_clock_gating; else if (GRAPHICS_VER(dev_priv) == 11) dev_priv->display.init_clock_gating = icl_init_clock_gating; - else if (IS_CANNONLAKE(dev_priv)) - dev_priv->display.init_clock_gating = cnl_init_clock_gating; else if (IS_COFFEELAKE(dev_priv) || IS_COMETLAKE(dev_priv)) dev_priv->display.init_clock_gating = cfl_init_clock_gating; else if (IS_SKYLAKE(dev_priv)) diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index 82a6727ede46..98c7339bf8ba 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -8,9 +8,11 @@ #include "i915_drv.h" #include "i915_scatterlist.h" +#include "i915_ttm_buddy_manager.h" #include "intel_region_ttm.h" +#include "gem/i915_gem_ttm.h" /* For the funcs/ops export only */ /** * DOC: TTM support structure * @@ -20,9 +22,6 @@ * i915 GEM regions to TTM memory types and resource managers. */ -/* A Zero-initialized driver for now. We don't have a TTM backend yet. */ -static struct ttm_device_funcs i915_ttm_bo_driver; - /** * intel_region_ttm_device_init - Initialize a TTM device * @dev_priv: Pointer to an i915 device private structure. @@ -33,7 +32,7 @@ int intel_region_ttm_device_init(struct drm_i915_private *dev_priv) { struct drm_device *drm = &dev_priv->drm; - return ttm_device_init(&dev_priv->bdev, &i915_ttm_bo_driver, + return ttm_device_init(&dev_priv->bdev, i915_ttm_driver(), drm->dev, drm->anon_inode->i_mapping, drm->vma_offset_manager, false, false); } @@ -52,12 +51,16 @@ void intel_region_ttm_device_fini(struct drm_i915_private *dev_priv) * driver-private types for now, reserving TTM_PL_VRAM for stolen * memory and TTM_PL_TT for GGTT use if decided to implement this. */ -static int intel_region_to_ttm_type(struct intel_memory_region *mem) +int intel_region_to_ttm_type(const struct intel_memory_region *mem) { int type; GEM_BUG_ON(mem->type != INTEL_MEMORY_LOCAL && - mem->type != INTEL_MEMORY_MOCK); + mem->type != INTEL_MEMORY_MOCK && + mem->type != INTEL_MEMORY_SYSTEM); + + if (mem->type == INTEL_MEMORY_SYSTEM) + return TTM_PL_SYSTEM; type = mem->instance + TTM_PL_PRIV; GEM_BUG_ON(type >= TTM_NUM_MEM_TYPES); @@ -65,72 +68,29 @@ static int intel_region_to_ttm_type(struct intel_memory_region *mem) return type; } -static struct ttm_resource * -intel_region_ttm_node_reserve(struct intel_memory_region *mem, - resource_size_t offset, - resource_size_t size) -{ - struct ttm_resource_manager *man = mem->region_private; - struct ttm_place place = {}; - struct ttm_buffer_object mock_bo = {}; - struct ttm_resource *res; - int ret; - - /* - * Having to use a mock_bo is unfortunate but stems from some - * drivers having private managers that insist to know what the - * allocate memory is intended for, using it to send private - * data to the manager. Also recently the bo has been used to send - * alignment info to the manager. Assume that apart from the latter, - * none of the managers we use will ever access the buffer object - * members, hoping we can pass the alignment info in the - * struct ttm_place in the future. - */ - - place.fpfn = offset >> PAGE_SHIFT; - place.lpfn = place.fpfn + (size >> PAGE_SHIFT); - mock_bo.base.size = size; - ret = man->func->alloc(man, &mock_bo, &place, &res); - if (ret == -ENOSPC) - ret = -ENXIO; - - return ret ? ERR_PTR(ret) : res; -} - /** - * intel_region_ttm_node_free - Free a node allocated from a resource manager - * @mem: The region the node was allocated from. - * @node: The opaque node representing an allocation. + * intel_region_ttm_init - Initialize a memory region for TTM. + * @mem: The region to initialize. + * + * This function initializes a suitable TTM resource manager for the + * region, and if it's a LMEM region type, attaches it to the TTM + * device. MOCK regions are NOT attached to the TTM device, since we don't + * have one for the mock selftests. + * + * Return: 0 on success, negative error code on failure. */ -void intel_region_ttm_node_free(struct intel_memory_region *mem, - struct ttm_resource *res) -{ - struct ttm_resource_manager *man = mem->region_private; - - man->func->free(man, res); -} - -static const struct intel_memory_region_private_ops priv_ops = { - .reserve = intel_region_ttm_node_reserve, - .free = intel_region_ttm_node_free, -}; - int intel_region_ttm_init(struct intel_memory_region *mem) { struct ttm_device *bdev = &mem->i915->bdev; int mem_type = intel_region_to_ttm_type(mem); int ret; - ret = ttm_range_man_init(bdev, mem_type, false, - resource_size(&mem->region) >> PAGE_SHIFT); + ret = i915_ttm_buddy_man_init(bdev, mem_type, false, + resource_size(&mem->region), + mem->min_page_size, PAGE_SIZE); if (ret) return ret; - mem->chunk_size = PAGE_SIZE; - mem->max_order = - get_order(rounddown_pow_of_two(resource_size(&mem->region))); - mem->is_range_manager = true; - mem->priv_ops = &priv_ops; mem->region_private = ttm_manager_type(bdev, mem_type); return 0; @@ -148,17 +108,17 @@ void intel_region_ttm_fini(struct intel_memory_region *mem) { int ret; - ret = ttm_range_man_fini(&mem->i915->bdev, - intel_region_to_ttm_type(mem)); + ret = i915_ttm_buddy_man_fini(&mem->i915->bdev, + intel_region_to_ttm_type(mem)); GEM_WARN_ON(ret); mem->region_private = NULL; } /** - * intel_region_ttm_node_to_st - Convert an opaque TTM resource manager node + * intel_region_ttm_resource_to_st - Convert an opaque TTM resource manager resource * to an sg_table. * @mem: The memory region. - * @node: The resource manager node obtained from the TTM resource manager. + * @res: The resource manager resource obtained from the TTM resource manager. * * The gem backends typically use sg-tables for operations on the underlying * io_memory. So provide a way for the backends to translate the @@ -166,19 +126,23 @@ void intel_region_ttm_fini(struct intel_memory_region *mem) * * Return: A malloced sg_table on success, an error pointer on failure. */ -struct sg_table *intel_region_ttm_node_to_st(struct intel_memory_region *mem, - struct ttm_resource *res) +struct sg_table *intel_region_ttm_resource_to_st(struct intel_memory_region *mem, + struct ttm_resource *res) { - struct ttm_range_mgr_node *range_node = - container_of(res, typeof(*range_node), base); + if (mem->is_range_manager) { + struct ttm_range_mgr_node *range_node = + to_ttm_range_mgr_node(res); - GEM_WARN_ON(!mem->is_range_manager); - return i915_sg_from_mm_node(&range_node->mm_nodes[0], - mem->region.start); + return i915_sg_from_mm_node(&range_node->mm_nodes[0], + mem->region.start); + } else { + return i915_sg_from_buddy_resource(res, mem->region.start); + } } +#ifdef CONFIG_DRM_I915_SELFTEST /** - * intel_region_ttm_node_alloc - Allocate memory resources from a region + * intel_region_ttm_resource_alloc - Allocate memory resources from a region * @mem: The memory region, * @size: The requested size in bytes * @flags: Allocation flags @@ -187,15 +151,15 @@ struct sg_table *intel_region_ttm_node_to_st(struct intel_memory_region *mem, * memory from standalone TTM range managers, without the TTM eviction * functionality. Don't use if you are not completely sure that's the * case. The returned opaque node can be converted to an sg_table using - * intel_region_ttm_node_to_st(), and can be freed using - * intel_region_ttm_node_free(). + * intel_region_ttm_resource_to_st(), and can be freed using + * intel_region_ttm_resource_free(). * * Return: A valid pointer on success, an error pointer on failure. */ struct ttm_resource * -intel_region_ttm_node_alloc(struct intel_memory_region *mem, - resource_size_t size, - unsigned int flags) +intel_region_ttm_resource_alloc(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags) { struct ttm_resource_manager *man = mem->region_private; struct ttm_place place = {}; @@ -203,24 +167,26 @@ intel_region_ttm_node_alloc(struct intel_memory_region *mem, struct ttm_resource *res; int ret; - /* - * We ignore the flags for now since we're using the range - * manager and contigous and min page size would be fulfilled - * by default if size is min page size aligned. - */ mock_bo.base.size = size; - - if (mem->is_range_manager) { - if (size >= SZ_1G) - mock_bo.page_alignment = SZ_1G >> PAGE_SHIFT; - else if (size >= SZ_2M) - mock_bo.page_alignment = SZ_2M >> PAGE_SHIFT; - else if (size >= SZ_64K) - mock_bo.page_alignment = SZ_64K >> PAGE_SHIFT; - } + place.flags = flags; ret = man->func->alloc(man, &mock_bo, &place, &res); if (ret == -ENOSPC) ret = -ENXIO; return ret ? ERR_PTR(ret) : res; } + +#endif + +/** + * intel_region_ttm_resource_free - Free a resource allocated from a resource manager + * @mem: The region the resource was allocated from. + * @res: The opaque resource representing an allocation. + */ +void intel_region_ttm_resource_free(struct intel_memory_region *mem, + struct ttm_resource *res) +{ + struct ttm_resource_manager *man = mem->region_private; + + man->func->free(man, res); +} diff --git a/drivers/gpu/drm/i915/intel_region_ttm.h b/drivers/gpu/drm/i915/intel_region_ttm.h index 11b0574ab791..6f44075920f2 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.h +++ b/drivers/gpu/drm/i915/intel_region_ttm.h @@ -12,6 +12,7 @@ struct drm_i915_private; struct intel_memory_region; struct ttm_resource; +struct ttm_device_funcs; int intel_region_ttm_device_init(struct drm_i915_private *dev_priv); @@ -21,14 +22,20 @@ int intel_region_ttm_init(struct intel_memory_region *mem); void intel_region_ttm_fini(struct intel_memory_region *mem); -struct sg_table *intel_region_ttm_node_to_st(struct intel_memory_region *mem, - struct ttm_resource *res); +struct sg_table *intel_region_ttm_resource_to_st(struct intel_memory_region *mem, + struct ttm_resource *res); -struct ttm_resource * -intel_region_ttm_node_alloc(struct intel_memory_region *mem, - resource_size_t size, - unsigned int flags); +void intel_region_ttm_resource_free(struct intel_memory_region *mem, + struct ttm_resource *res); + +int intel_region_to_ttm_type(const struct intel_memory_region *mem); -void intel_region_ttm_node_free(struct intel_memory_region *mem, - struct ttm_resource *node); +struct ttm_device_funcs *i915_ttm_driver(void); + +#ifdef CONFIG_DRM_I915_SELFTEST +struct ttm_resource * +intel_region_ttm_resource_alloc(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags); +#endif #endif /* _INTEL_REGION_TTM_H_ */ diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c index f0a82b37bd1a..e304bf44e1ff 100644 --- a/drivers/gpu/drm/i915/intel_sideband.c +++ b/drivers/gpu/drm/i915/intel_sideband.c @@ -556,17 +556,22 @@ out: #undef COND } -void intel_pcode_init(struct drm_i915_private *i915) +int intel_pcode_init(struct drm_i915_private *i915) { - int ret; + int ret = 0; if (!IS_DGFX(i915)) - return; + return ret; ret = skl_pcode_request(i915, DG1_PCODE_STATUS, DG1_UNCORE_GET_INIT_STATUS, DG1_UNCORE_INIT_STATUS_COMPLETE, - DG1_UNCORE_INIT_STATUS_COMPLETE, 50); + DG1_UNCORE_INIT_STATUS_COMPLETE, 180000); + + drm_dbg(&i915->drm, "PCODE init status %d\n", ret); + if (ret) drm_err(&i915->drm, "Pcode did not report uncore initialization completion!\n"); + + return ret; } diff --git a/drivers/gpu/drm/i915/intel_sideband.h b/drivers/gpu/drm/i915/intel_sideband.h index 094c7b19c5d4..d1d14bcb8f56 100644 --- a/drivers/gpu/drm/i915/intel_sideband.h +++ b/drivers/gpu/drm/i915/intel_sideband.h @@ -138,6 +138,6 @@ int sandybridge_pcode_write_timeout(struct drm_i915_private *i915, u32 mbox, int skl_pcode_request(struct drm_i915_private *i915, u32 mbox, u32 request, u32 reply_mask, u32 reply, int timeout_base_ms); -void intel_pcode_init(struct drm_i915_private *i915); +int intel_pcode_init(struct drm_i915_private *i915); #endif /* _INTEL_SIDEBAND_H */ diff --git a/drivers/gpu/drm/i915/intel_step.c b/drivers/gpu/drm/i915/intel_step.c index ba9479a67521..6cf967631395 100644 --- a/drivers/gpu/drm/i915/intel_step.c +++ b/drivers/gpu/drm/i915/intel_step.c @@ -7,16 +7,32 @@ #include "intel_step.h" /* - * KBL revision ID ordering is bizarre; higher revision ID's map to lower - * steppings in some cases. So rather than test against the revision ID - * directly, let's map that into our own range of increasing ID's that we - * can test against in a regular manner. + * Some platforms have unusual ways of mapping PCI revision ID to GT/display + * steppings. E.g., in some cases a higher PCI revision may translate to a + * lower stepping of the GT and/or display IP. This file provides lookup + * tables to map the PCI revision into a standard set of stepping values that + * can be compared numerically. + * + * Also note that some revisions/steppings may have been set aside as + * placeholders but never materialized in real hardware; in those cases there + * may be jumps in the revision IDs or stepping values in the tables below. */ +/* + * Some platforms always have the same stepping value for GT and display; + * use a macro to define these to make it easier to identify the platforms + * where the two steppings can deviate. + */ +#define COMMON_STEP(x) .gt_step = STEP_##x, .display_step = STEP_##x + +static const struct intel_step_info skl_revids[] = { + [0x6] = { COMMON_STEP(G0) }, + [0x7] = { COMMON_STEP(H0) }, + [0x9] = { COMMON_STEP(J0) }, + [0xA] = { COMMON_STEP(I1) }, +}; -/* FIXME: what about REVID_E0 */ static const struct intel_step_info kbl_revids[] = { - [0] = { .gt_step = STEP_A0, .display_step = STEP_A0 }, [1] = { .gt_step = STEP_B0, .display_step = STEP_B0 }, [2] = { .gt_step = STEP_C0, .display_step = STEP_B0 }, [3] = { .gt_step = STEP_D0, .display_step = STEP_B0 }, @@ -26,7 +42,27 @@ static const struct intel_step_info kbl_revids[] = { [7] = { .gt_step = STEP_G0, .display_step = STEP_C0 }, }; -static const struct intel_step_info tgl_uy_revid_step_tbl[] = { +static const struct intel_step_info bxt_revids[] = { + [0xA] = { COMMON_STEP(C0) }, + [0xB] = { COMMON_STEP(C0) }, + [0xC] = { COMMON_STEP(D0) }, + [0xD] = { COMMON_STEP(E0) }, +}; + +static const struct intel_step_info glk_revids[] = { + [3] = { COMMON_STEP(B0) }, +}; + +static const struct intel_step_info icl_revids[] = { + [7] = { COMMON_STEP(D0) }, +}; + +static const struct intel_step_info jsl_ehl_revids[] = { + [0] = { COMMON_STEP(A0) }, + [1] = { COMMON_STEP(B0) }, +}; + +static const struct intel_step_info tgl_uy_revids[] = { [0] = { .gt_step = STEP_A0, .display_step = STEP_A0 }, [1] = { .gt_step = STEP_B0, .display_step = STEP_C0 }, [2] = { .gt_step = STEP_B1, .display_step = STEP_C0 }, @@ -34,12 +70,23 @@ static const struct intel_step_info tgl_uy_revid_step_tbl[] = { }; /* Same GT stepping between tgl_uy_revids and tgl_revids don't mean the same HW */ -static const struct intel_step_info tgl_revid_step_tbl[] = { +static const struct intel_step_info tgl_revids[] = { [0] = { .gt_step = STEP_A0, .display_step = STEP_B0 }, [1] = { .gt_step = STEP_B0, .display_step = STEP_D0 }, }; -static const struct intel_step_info adls_revid_step_tbl[] = { +static const struct intel_step_info rkl_revids[] = { + [0] = { COMMON_STEP(A0) }, + [1] = { COMMON_STEP(B0) }, + [4] = { COMMON_STEP(C0) }, +}; + +static const struct intel_step_info dg1_revids[] = { + [0] = { COMMON_STEP(A0) }, + [1] = { COMMON_STEP(B0) }, +}; + +static const struct intel_step_info adls_revids[] = { [0x0] = { .gt_step = STEP_A0, .display_step = STEP_A0 }, [0x1] = { .gt_step = STEP_A0, .display_step = STEP_A2 }, [0x4] = { .gt_step = STEP_B0, .display_step = STEP_B0 }, @@ -47,13 +94,33 @@ static const struct intel_step_info adls_revid_step_tbl[] = { [0xC] = { .gt_step = STEP_D0, .display_step = STEP_C0 }, }; -static const struct intel_step_info adlp_revid_step_tbl[] = { +static const struct intel_step_info adlp_revids[] = { [0x0] = { .gt_step = STEP_A0, .display_step = STEP_A0 }, [0x4] = { .gt_step = STEP_B0, .display_step = STEP_B0 }, [0x8] = { .gt_step = STEP_C0, .display_step = STEP_C0 }, [0xC] = { .gt_step = STEP_C0, .display_step = STEP_D0 }, }; +static const struct intel_step_info xehpsdv_revids[] = { + [0x0] = { .gt_step = STEP_A0 }, + [0x1] = { .gt_step = STEP_A1 }, + [0x4] = { .gt_step = STEP_B0 }, + [0x8] = { .gt_step = STEP_C0 }, +}; + +static const struct intel_step_info dg2_g10_revid_step_tbl[] = { + [0x0] = { .gt_step = STEP_A0, .display_step = STEP_A0 }, + [0x1] = { .gt_step = STEP_A1, .display_step = STEP_A0 }, + [0x4] = { .gt_step = STEP_B0, .display_step = STEP_B0 }, + [0x8] = { .gt_step = STEP_C0, .display_step = STEP_C0 }, +}; + +static const struct intel_step_info dg2_g11_revid_step_tbl[] = { + [0x0] = { .gt_step = STEP_A0, .display_step = STEP_B0 }, + [0x4] = { .gt_step = STEP_B0, .display_step = STEP_C0 }, + [0x5] = { .gt_step = STEP_B1, .display_step = STEP_C0 }, +}; + void intel_step_init(struct drm_i915_private *i915) { const struct intel_step_info *revids = NULL; @@ -61,21 +128,51 @@ void intel_step_init(struct drm_i915_private *i915) int revid = INTEL_REVID(i915); struct intel_step_info step = {}; - if (IS_ALDERLAKE_P(i915)) { - revids = adlp_revid_step_tbl; - size = ARRAY_SIZE(adlp_revid_step_tbl); + if (IS_DG2_G10(i915)) { + revids = dg2_g10_revid_step_tbl; + size = ARRAY_SIZE(dg2_g10_revid_step_tbl); + } else if (IS_DG2_G11(i915)) { + revids = dg2_g11_revid_step_tbl; + size = ARRAY_SIZE(dg2_g11_revid_step_tbl); + } else if (IS_XEHPSDV(i915)) { + revids = xehpsdv_revids; + size = ARRAY_SIZE(xehpsdv_revids); + } else if (IS_ALDERLAKE_P(i915)) { + revids = adlp_revids; + size = ARRAY_SIZE(adlp_revids); } else if (IS_ALDERLAKE_S(i915)) { - revids = adls_revid_step_tbl; - size = ARRAY_SIZE(adls_revid_step_tbl); + revids = adls_revids; + size = ARRAY_SIZE(adls_revids); + } else if (IS_DG1(i915)) { + revids = dg1_revids; + size = ARRAY_SIZE(dg1_revids); + } else if (IS_ROCKETLAKE(i915)) { + revids = rkl_revids; + size = ARRAY_SIZE(rkl_revids); } else if (IS_TGL_U(i915) || IS_TGL_Y(i915)) { - revids = tgl_uy_revid_step_tbl; - size = ARRAY_SIZE(tgl_uy_revid_step_tbl); + revids = tgl_uy_revids; + size = ARRAY_SIZE(tgl_uy_revids); } else if (IS_TIGERLAKE(i915)) { - revids = tgl_revid_step_tbl; - size = ARRAY_SIZE(tgl_revid_step_tbl); + revids = tgl_revids; + size = ARRAY_SIZE(tgl_revids); + } else if (IS_JSL_EHL(i915)) { + revids = jsl_ehl_revids; + size = ARRAY_SIZE(jsl_ehl_revids); + } else if (IS_ICELAKE(i915)) { + revids = icl_revids; + size = ARRAY_SIZE(icl_revids); + } else if (IS_GEMINILAKE(i915)) { + revids = glk_revids; + size = ARRAY_SIZE(glk_revids); + } else if (IS_BROXTON(i915)) { + revids = bxt_revids; + size = ARRAY_SIZE(bxt_revids); } else if (IS_KABYLAKE(i915)) { revids = kbl_revids; size = ARRAY_SIZE(kbl_revids); + } else if (IS_SKYLAKE(i915)) { + revids = skl_revids; + size = ARRAY_SIZE(skl_revids); } /* Not using the stepping scheme for the platform yet. */ @@ -114,3 +211,17 @@ void intel_step_init(struct drm_i915_private *i915) RUNTIME_INFO(i915)->step = step; } + +#define STEP_NAME_CASE(name) \ + case STEP_##name: \ + return #name; + +const char *intel_step_name(enum intel_step step) +{ + switch (step) { + STEP_NAME_LIST(STEP_NAME_CASE); + + default: + return "**"; + } +} diff --git a/drivers/gpu/drm/i915/intel_step.h b/drivers/gpu/drm/i915/intel_step.h index 958a8bb5d677..f6641e2a3c77 100644 --- a/drivers/gpu/drm/i915/intel_step.h +++ b/drivers/gpu/drm/i915/intel_step.h @@ -15,26 +15,39 @@ struct intel_step_info { u8 display_step; }; +#define STEP_ENUM_VAL(name) STEP_##name, + +#define STEP_NAME_LIST(func) \ + func(A0) \ + func(A1) \ + func(A2) \ + func(B0) \ + func(B1) \ + func(B2) \ + func(C0) \ + func(C1) \ + func(D0) \ + func(D1) \ + func(E0) \ + func(F0) \ + func(G0) \ + func(H0) \ + func(I0) \ + func(I1) \ + func(J0) + /* * Symbolic steppings that do not match the hardware. These are valid both as gt * and display steppings as symbolic names. */ enum intel_step { STEP_NONE = 0, - STEP_A0, - STEP_A2, - STEP_B0, - STEP_B1, - STEP_C0, - STEP_D0, - STEP_D1, - STEP_E0, - STEP_F0, - STEP_G0, + STEP_NAME_LIST(STEP_ENUM_VAL) STEP_FUTURE, STEP_FOREVER, }; void intel_step_init(struct drm_i915_private *i915); +const char *intel_step_name(enum intel_step step); #endif /* __INTEL_STEP_H__ */ diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 1bed8f666048..6b38bc2811c1 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -24,6 +24,8 @@ #include <linux/pm_runtime.h> #include <asm/iosf_mbi.h> +#include "gt/intel_lrc_reg.h" /* for shadow reg list */ + #include "i915_drv.h" #include "i915_trace.h" #include "i915_vgpu.h" @@ -68,8 +70,14 @@ static const char * const forcewake_domain_names[] = { "vdbox1", "vdbox2", "vdbox3", + "vdbox4", + "vdbox5", + "vdbox6", + "vdbox7", "vebox0", "vebox1", + "vebox2", + "vebox3", }; const char * @@ -952,30 +960,80 @@ static const i915_reg_t gen8_shadowed_regs[] = { }; static const i915_reg_t gen11_shadowed_regs[] = { - RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ - GEN6_RPNSWREQ, /* 0xA008 */ - GEN6_RC_VIDEO_FREQ, /* 0xA00C */ - RING_TAIL(BLT_RING_BASE), /* 0x22000 (base) */ - RING_TAIL(GEN11_BSD_RING_BASE), /* 0x1C0000 (base) */ - RING_TAIL(GEN11_BSD2_RING_BASE), /* 0x1C4000 (base) */ - RING_TAIL(GEN11_VEBOX_RING_BASE), /* 0x1C8000 (base) */ - RING_TAIL(GEN11_BSD3_RING_BASE), /* 0x1D0000 (base) */ - RING_TAIL(GEN11_BSD4_RING_BASE), /* 0x1D4000 (base) */ - RING_TAIL(GEN11_VEBOX2_RING_BASE), /* 0x1D8000 (base) */ + RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ + RING_EXECLIST_CONTROL(RENDER_RING_BASE), /* 0x2550 */ + GEN6_RPNSWREQ, /* 0xA008 */ + GEN6_RC_VIDEO_FREQ, /* 0xA00C */ + RING_TAIL(BLT_RING_BASE), /* 0x22000 (base) */ + RING_EXECLIST_CONTROL(BLT_RING_BASE), /* 0x22550 */ + RING_TAIL(GEN11_BSD_RING_BASE), /* 0x1C0000 (base) */ + RING_EXECLIST_CONTROL(GEN11_BSD_RING_BASE), /* 0x1C0550 */ + RING_TAIL(GEN11_BSD2_RING_BASE), /* 0x1C4000 (base) */ + RING_EXECLIST_CONTROL(GEN11_BSD2_RING_BASE), /* 0x1C4550 */ + RING_TAIL(GEN11_VEBOX_RING_BASE), /* 0x1C8000 (base) */ + RING_EXECLIST_CONTROL(GEN11_VEBOX_RING_BASE), /* 0x1C8550 */ + RING_TAIL(GEN11_BSD3_RING_BASE), /* 0x1D0000 (base) */ + RING_EXECLIST_CONTROL(GEN11_BSD3_RING_BASE), /* 0x1D0550 */ + RING_TAIL(GEN11_BSD4_RING_BASE), /* 0x1D4000 (base) */ + RING_EXECLIST_CONTROL(GEN11_BSD4_RING_BASE), /* 0x1D4550 */ + RING_TAIL(GEN11_VEBOX2_RING_BASE), /* 0x1D8000 (base) */ + RING_EXECLIST_CONTROL(GEN11_VEBOX2_RING_BASE), /* 0x1D8550 */ /* TODO: Other registers are not yet used */ }; static const i915_reg_t gen12_shadowed_regs[] = { - RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ - GEN6_RPNSWREQ, /* 0xA008 */ - GEN6_RC_VIDEO_FREQ, /* 0xA00C */ - RING_TAIL(BLT_RING_BASE), /* 0x22000 (base) */ - RING_TAIL(GEN11_BSD_RING_BASE), /* 0x1C0000 (base) */ - RING_TAIL(GEN11_BSD2_RING_BASE), /* 0x1C4000 (base) */ - RING_TAIL(GEN11_VEBOX_RING_BASE), /* 0x1C8000 (base) */ - RING_TAIL(GEN11_BSD3_RING_BASE), /* 0x1D0000 (base) */ - RING_TAIL(GEN11_BSD4_RING_BASE), /* 0x1D4000 (base) */ - RING_TAIL(GEN11_VEBOX2_RING_BASE), /* 0x1D8000 (base) */ + RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ + RING_EXECLIST_CONTROL(RENDER_RING_BASE), /* 0x2550 */ + GEN6_RPNSWREQ, /* 0xA008 */ + GEN6_RC_VIDEO_FREQ, /* 0xA00C */ + RING_TAIL(BLT_RING_BASE), /* 0x22000 (base) */ + RING_EXECLIST_CONTROL(BLT_RING_BASE), /* 0x22550 */ + RING_TAIL(GEN11_BSD_RING_BASE), /* 0x1C0000 (base) */ + RING_EXECLIST_CONTROL(GEN11_BSD_RING_BASE), /* 0x1C0550 */ + RING_TAIL(GEN11_BSD2_RING_BASE), /* 0x1C4000 (base) */ + RING_EXECLIST_CONTROL(GEN11_BSD2_RING_BASE), /* 0x1C4550 */ + RING_TAIL(GEN11_VEBOX_RING_BASE), /* 0x1C8000 (base) */ + RING_EXECLIST_CONTROL(GEN11_VEBOX_RING_BASE), /* 0x1C8550 */ + RING_TAIL(GEN11_BSD3_RING_BASE), /* 0x1D0000 (base) */ + RING_EXECLIST_CONTROL(GEN11_BSD3_RING_BASE), /* 0x1D0550 */ + RING_TAIL(GEN11_BSD4_RING_BASE), /* 0x1D4000 (base) */ + RING_EXECLIST_CONTROL(GEN11_BSD4_RING_BASE), /* 0x1D4550 */ + RING_TAIL(GEN11_VEBOX2_RING_BASE), /* 0x1D8000 (base) */ + RING_EXECLIST_CONTROL(GEN11_VEBOX2_RING_BASE), /* 0x1D8550 */ + /* TODO: Other registers are not yet used */ +}; + +static const i915_reg_t xehp_shadowed_regs[] = { + RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ + RING_EXECLIST_CONTROL(RENDER_RING_BASE), /* 0x2550 */ + GEN6_RPNSWREQ, /* 0xA008 */ + GEN6_RC_VIDEO_FREQ, /* 0xA00C */ + RING_TAIL(BLT_RING_BASE), /* 0x22000 (base) */ + RING_EXECLIST_CONTROL(BLT_RING_BASE), /* 0x22550 */ + RING_TAIL(GEN11_BSD_RING_BASE), /* 0x1C0000 (base) */ + RING_EXECLIST_CONTROL(GEN11_BSD_RING_BASE), /* 0x1C0550 */ + RING_TAIL(GEN11_BSD2_RING_BASE), /* 0x1C4000 (base) */ + RING_EXECLIST_CONTROL(GEN11_BSD2_RING_BASE), /* 0x1C4550 */ + RING_TAIL(GEN11_VEBOX_RING_BASE), /* 0x1C8000 (base) */ + RING_EXECLIST_CONTROL(GEN11_VEBOX_RING_BASE), /* 0x1C8550 */ + RING_TAIL(GEN11_BSD3_RING_BASE), /* 0x1D0000 (base) */ + RING_EXECLIST_CONTROL(GEN11_BSD3_RING_BASE), /* 0x1D0550 */ + RING_TAIL(GEN11_BSD4_RING_BASE), /* 0x1D4000 (base) */ + RING_EXECLIST_CONTROL(GEN11_BSD4_RING_BASE), /* 0x1D4550 */ + RING_TAIL(GEN11_VEBOX2_RING_BASE), /* 0x1D8000 (base) */ + RING_EXECLIST_CONTROL(GEN11_VEBOX2_RING_BASE), /* 0x1D8550 */ + RING_TAIL(XEHP_BSD5_RING_BASE), /* 0x1E0000 (base) */ + RING_EXECLIST_CONTROL(XEHP_BSD5_RING_BASE), /* 0x1E0550 */ + RING_TAIL(XEHP_BSD6_RING_BASE), /* 0x1E4000 (base) */ + RING_EXECLIST_CONTROL(XEHP_BSD6_RING_BASE), /* 0x1E4550 */ + RING_TAIL(XEHP_VEBOX3_RING_BASE), /* 0x1E8000 (base) */ + RING_EXECLIST_CONTROL(XEHP_VEBOX3_RING_BASE), /* 0x1E8550 */ + RING_TAIL(XEHP_BSD7_RING_BASE), /* 0x1F0000 (base) */ + RING_EXECLIST_CONTROL(XEHP_BSD7_RING_BASE), /* 0x1F0550 */ + RING_TAIL(XEHP_BSD8_RING_BASE), /* 0x1F4000 (base) */ + RING_EXECLIST_CONTROL(XEHP_BSD8_RING_BASE), /* 0x1F4550 */ + RING_TAIL(XEHP_VEBOX4_RING_BASE), /* 0x1F8000 (base) */ + RING_EXECLIST_CONTROL(XEHP_VEBOX4_RING_BASE), /* 0x1F8550 */ /* TODO: Other registers are not yet used */ }; @@ -991,17 +1049,18 @@ static int mmio_reg_cmp(u32 key, const i915_reg_t *reg) return 0; } -#define __is_genX_shadowed(x) \ -static bool is_gen##x##_shadowed(u32 offset) \ +#define __is_X_shadowed(x) \ +static bool is_##x##_shadowed(u32 offset) \ { \ - const i915_reg_t *regs = gen##x##_shadowed_regs; \ - return BSEARCH(offset, regs, ARRAY_SIZE(gen##x##_shadowed_regs), \ + const i915_reg_t *regs = x##_shadowed_regs; \ + return BSEARCH(offset, regs, ARRAY_SIZE(x##_shadowed_regs), \ mmio_reg_cmp); \ } -__is_genX_shadowed(8) -__is_genX_shadowed(11) -__is_genX_shadowed(12) +__is_X_shadowed(gen8) +__is_X_shadowed(gen11) +__is_X_shadowed(gen12) +__is_X_shadowed(xehp) static enum forcewake_domains gen6_reg_write_fw_domains(struct intel_uncore *uncore, i915_reg_t reg) @@ -1065,6 +1124,15 @@ static const struct intel_forcewake_range __chv_fw_ranges[] = { __fwd; \ }) +#define __xehp_fwtable_reg_write_fw_domains(uncore, offset) \ +({ \ + enum forcewake_domains __fwd = 0; \ + const u32 __offset = (offset); \ + if (!is_xehp_shadowed(__offset)) \ + __fwd = find_fw_domain(uncore, __offset); \ + __fwd; \ +}) + /* *Must* be sorted by offset ranges! See intel_fw_table_check(). */ static const struct intel_forcewake_range __gen9_fw_ranges[] = { GEN_FW_RANGE(0x0, 0xaff, FORCEWAKE_GT), @@ -1249,6 +1317,172 @@ static const struct intel_forcewake_range __gen12_fw_ranges[] = { 0x1d3f00 - 0x1d3fff: VD2 */ }; +/* + * Graphics IP version 12.55 brings a slight change to the 0xd800 range, + * switching it from the GT domain to the render domain. + * + * *Must* be sorted by offset ranges! See intel_fw_table_check(). + */ +#define XEHP_FWRANGES(FW_RANGE_D800) \ + GEN_FW_RANGE(0x0, 0x1fff, 0), /* \ + 0x0 - 0xaff: reserved \ + 0xb00 - 0x1fff: always on */ \ + GEN_FW_RANGE(0x2000, 0x26ff, FORCEWAKE_RENDER), \ + GEN_FW_RANGE(0x2700, 0x4aff, FORCEWAKE_GT), \ + GEN_FW_RANGE(0x4b00, 0x51ff, 0), /* \ + 0x4b00 - 0x4fff: reserved \ + 0x5000 - 0x51ff: always on */ \ + GEN_FW_RANGE(0x5200, 0x7fff, FORCEWAKE_RENDER), \ + GEN_FW_RANGE(0x8000, 0x813f, FORCEWAKE_GT), \ + GEN_FW_RANGE(0x8140, 0x815f, FORCEWAKE_RENDER), \ + GEN_FW_RANGE(0x8160, 0x81ff, 0), /* \ + 0x8160 - 0x817f: reserved \ + 0x8180 - 0x81ff: always on */ \ + GEN_FW_RANGE(0x8200, 0x82ff, FORCEWAKE_GT), \ + GEN_FW_RANGE(0x8300, 0x84ff, FORCEWAKE_RENDER), \ + GEN_FW_RANGE(0x8500, 0x8cff, FORCEWAKE_GT), /* \ + 0x8500 - 0x87ff: gt \ + 0x8800 - 0x8c7f: reserved \ + 0x8c80 - 0x8cff: gt (DG2 only) */ \ + GEN_FW_RANGE(0x8d00, 0x8fff, FORCEWAKE_RENDER), /* \ + 0x8d00 - 0x8dff: render (DG2 only) \ + 0x8e00 - 0x8fff: reserved */ \ + GEN_FW_RANGE(0x9000, 0x94cf, FORCEWAKE_GT), /* \ + 0x9000 - 0x947f: gt \ + 0x9480 - 0x94cf: reserved */ \ + GEN_FW_RANGE(0x94d0, 0x955f, FORCEWAKE_RENDER), \ + GEN_FW_RANGE(0x9560, 0x967f, 0), /* \ + 0x9560 - 0x95ff: always on \ + 0x9600 - 0x967f: reserved */ \ + GEN_FW_RANGE(0x9680, 0x97ff, FORCEWAKE_RENDER), /* \ + 0x9680 - 0x96ff: render (DG2 only) \ + 0x9700 - 0x97ff: reserved */ \ + GEN_FW_RANGE(0x9800, 0xcfff, FORCEWAKE_GT), /* \ + 0x9800 - 0xb4ff: gt \ + 0xb500 - 0xbfff: reserved \ + 0xc000 - 0xcfff: gt */ \ + GEN_FW_RANGE(0xd000, 0xd7ff, 0), \ + GEN_FW_RANGE(0xd800, 0xd87f, FW_RANGE_D800), \ + GEN_FW_RANGE(0xd880, 0xdbff, FORCEWAKE_GT), \ + GEN_FW_RANGE(0xdc00, 0xdcff, FORCEWAKE_RENDER), \ + GEN_FW_RANGE(0xdd00, 0xde7f, FORCEWAKE_GT), /* \ + 0xdd00 - 0xddff: gt \ + 0xde00 - 0xde7f: reserved */ \ + GEN_FW_RANGE(0xde80, 0xe8ff, FORCEWAKE_RENDER), /* \ + 0xde80 - 0xdfff: render \ + 0xe000 - 0xe0ff: reserved \ + 0xe100 - 0xe8ff: render */ \ + GEN_FW_RANGE(0xe900, 0xffff, FORCEWAKE_GT), /* \ + 0xe900 - 0xe9ff: gt \ + 0xea00 - 0xefff: reserved \ + 0xf000 - 0xffff: gt */ \ + GEN_FW_RANGE(0x10000, 0x12fff, 0), /* \ + 0x10000 - 0x11fff: reserved \ + 0x12000 - 0x127ff: always on \ + 0x12800 - 0x12fff: reserved */ \ + GEN_FW_RANGE(0x13000, 0x131ff, FORCEWAKE_MEDIA_VDBOX0), /* DG2 only */ \ + GEN_FW_RANGE(0x13200, 0x13fff, FORCEWAKE_MEDIA_VDBOX2), /* \ + 0x13200 - 0x133ff: VD2 (DG2 only) \ + 0x13400 - 0x13fff: reserved */ \ + GEN_FW_RANGE(0x14000, 0x141ff, FORCEWAKE_MEDIA_VDBOX0), /* XEHPSDV only */ \ + GEN_FW_RANGE(0x14200, 0x143ff, FORCEWAKE_MEDIA_VDBOX2), /* XEHPSDV only */ \ + GEN_FW_RANGE(0x14400, 0x145ff, FORCEWAKE_MEDIA_VDBOX4), /* XEHPSDV only */ \ + GEN_FW_RANGE(0x14600, 0x147ff, FORCEWAKE_MEDIA_VDBOX6), /* XEHPSDV only */ \ + GEN_FW_RANGE(0x14800, 0x14fff, FORCEWAKE_RENDER), \ + GEN_FW_RANGE(0x15000, 0x16dff, FORCEWAKE_GT), /* \ + 0x15000 - 0x15fff: gt (DG2 only) \ + 0x16000 - 0x16dff: reserved */ \ + GEN_FW_RANGE(0x16e00, 0x1ffff, FORCEWAKE_RENDER), \ + GEN_FW_RANGE(0x20000, 0x21fff, FORCEWAKE_MEDIA_VDBOX0), /* \ + 0x20000 - 0x20fff: VD0 (XEHPSDV only) \ + 0x21000 - 0x21fff: reserved */ \ + GEN_FW_RANGE(0x22000, 0x23fff, FORCEWAKE_GT), \ + GEN_FW_RANGE(0x24000, 0x2417f, 0), /* \ + 0x24000 - 0x2407f: always on \ + 0x24080 - 0x2417f: reserved */ \ + GEN_FW_RANGE(0x24180, 0x249ff, FORCEWAKE_GT), /* \ + 0x24180 - 0x241ff: gt \ + 0x24200 - 0x249ff: reserved */ \ + GEN_FW_RANGE(0x24a00, 0x251ff, FORCEWAKE_RENDER), /* \ + 0x24a00 - 0x24a7f: render \ + 0x24a80 - 0x251ff: reserved */ \ + GEN_FW_RANGE(0x25200, 0x25fff, FORCEWAKE_GT), /* \ + 0x25200 - 0x252ff: gt \ + 0x25300 - 0x25fff: reserved */ \ + GEN_FW_RANGE(0x26000, 0x2ffff, FORCEWAKE_RENDER), /* \ + 0x26000 - 0x27fff: render \ + 0x28000 - 0x29fff: reserved \ + 0x2a000 - 0x2ffff: undocumented */ \ + GEN_FW_RANGE(0x30000, 0x3ffff, FORCEWAKE_GT), \ + GEN_FW_RANGE(0x40000, 0x1bffff, 0), \ + GEN_FW_RANGE(0x1c0000, 0x1c3fff, FORCEWAKE_MEDIA_VDBOX0), /* \ + 0x1c0000 - 0x1c2bff: VD0 \ + 0x1c2c00 - 0x1c2cff: reserved \ + 0x1c2d00 - 0x1c2dff: VD0 \ + 0x1c2e00 - 0x1c3eff: VD0 (DG2 only) \ + 0x1c3f00 - 0x1c3fff: VD0 */ \ + GEN_FW_RANGE(0x1c4000, 0x1c7fff, FORCEWAKE_MEDIA_VDBOX1), /* \ + 0x1c4000 - 0x1c6bff: VD1 \ + 0x1c6c00 - 0x1c6cff: reserved \ + 0x1c6d00 - 0x1c6dff: VD1 \ + 0x1c6e00 - 0x1c7fff: reserved */ \ + GEN_FW_RANGE(0x1c8000, 0x1cbfff, FORCEWAKE_MEDIA_VEBOX0), /* \ + 0x1c8000 - 0x1ca0ff: VE0 \ + 0x1ca100 - 0x1cbfff: reserved */ \ + GEN_FW_RANGE(0x1cc000, 0x1ccfff, FORCEWAKE_MEDIA_VDBOX0), \ + GEN_FW_RANGE(0x1cd000, 0x1cdfff, FORCEWAKE_MEDIA_VDBOX2), \ + GEN_FW_RANGE(0x1ce000, 0x1cefff, FORCEWAKE_MEDIA_VDBOX4), \ + GEN_FW_RANGE(0x1cf000, 0x1cffff, FORCEWAKE_MEDIA_VDBOX6), \ + GEN_FW_RANGE(0x1d0000, 0x1d3fff, FORCEWAKE_MEDIA_VDBOX2), /* \ + 0x1d0000 - 0x1d2bff: VD2 \ + 0x1d2c00 - 0x1d2cff: reserved \ + 0x1d2d00 - 0x1d2dff: VD2 \ + 0x1d2e00 - 0x1d3dff: VD2 (DG2 only) \ + 0x1d3e00 - 0x1d3eff: reserved \ + 0x1d3f00 - 0x1d3fff: VD2 */ \ + GEN_FW_RANGE(0x1d4000, 0x1d7fff, FORCEWAKE_MEDIA_VDBOX3), /* \ + 0x1d4000 - 0x1d6bff: VD3 \ + 0x1d6c00 - 0x1d6cff: reserved \ + 0x1d6d00 - 0x1d6dff: VD3 \ + 0x1d6e00 - 0x1d7fff: reserved */ \ + GEN_FW_RANGE(0x1d8000, 0x1dffff, FORCEWAKE_MEDIA_VEBOX1), /* \ + 0x1d8000 - 0x1da0ff: VE1 \ + 0x1da100 - 0x1dffff: reserved */ \ + GEN_FW_RANGE(0x1e0000, 0x1e3fff, FORCEWAKE_MEDIA_VDBOX4), /* \ + 0x1e0000 - 0x1e2bff: VD4 \ + 0x1e2c00 - 0x1e2cff: reserved \ + 0x1e2d00 - 0x1e2dff: VD4 \ + 0x1e2e00 - 0x1e3eff: reserved \ + 0x1e3f00 - 0x1e3fff: VD4 */ \ + GEN_FW_RANGE(0x1e4000, 0x1e7fff, FORCEWAKE_MEDIA_VDBOX5), /* \ + 0x1e4000 - 0x1e6bff: VD5 \ + 0x1e6c00 - 0x1e6cff: reserved \ + 0x1e6d00 - 0x1e6dff: VD5 \ + 0x1e6e00 - 0x1e7fff: reserved */ \ + GEN_FW_RANGE(0x1e8000, 0x1effff, FORCEWAKE_MEDIA_VEBOX2), /* \ + 0x1e8000 - 0x1ea0ff: VE2 \ + 0x1ea100 - 0x1effff: reserved */ \ + GEN_FW_RANGE(0x1f0000, 0x1f3fff, FORCEWAKE_MEDIA_VDBOX6), /* \ + 0x1f0000 - 0x1f2bff: VD6 \ + 0x1f2c00 - 0x1f2cff: reserved \ + 0x1f2d00 - 0x1f2dff: VD6 \ + 0x1f2e00 - 0x1f3eff: reserved \ + 0x1f3f00 - 0x1f3fff: VD6 */ \ + GEN_FW_RANGE(0x1f4000, 0x1f7fff, FORCEWAKE_MEDIA_VDBOX7), /* \ + 0x1f4000 - 0x1f6bff: VD7 \ + 0x1f6c00 - 0x1f6cff: reserved \ + 0x1f6d00 - 0x1f6dff: VD7 \ + 0x1f6e00 - 0x1f7fff: reserved */ \ + GEN_FW_RANGE(0x1f8000, 0x1fa0ff, FORCEWAKE_MEDIA_VEBOX3), + +static const struct intel_forcewake_range __xehp_fw_ranges[] = { + XEHP_FWRANGES(FORCEWAKE_GT) +}; + +static const struct intel_forcewake_range __dg2_fw_ranges[] = { + XEHP_FWRANGES(FORCEWAKE_RENDER) +}; + static void ilk_dummy_write(struct intel_uncore *uncore) { @@ -1502,6 +1736,7 @@ __gen_write(func, 8) \ __gen_write(func, 16) \ __gen_write(func, 32) +__gen_reg_write_funcs(xehp_fwtable); __gen_reg_write_funcs(gen12_fwtable); __gen_reg_write_funcs(gen11_fwtable); __gen_reg_write_funcs(fwtable); @@ -1582,8 +1817,14 @@ static int __fw_domain_init(struct intel_uncore *uncore, BUILD_BUG_ON(FORCEWAKE_MEDIA_VDBOX1 != (1 << FW_DOMAIN_ID_MEDIA_VDBOX1)); BUILD_BUG_ON(FORCEWAKE_MEDIA_VDBOX2 != (1 << FW_DOMAIN_ID_MEDIA_VDBOX2)); BUILD_BUG_ON(FORCEWAKE_MEDIA_VDBOX3 != (1 << FW_DOMAIN_ID_MEDIA_VDBOX3)); + BUILD_BUG_ON(FORCEWAKE_MEDIA_VDBOX4 != (1 << FW_DOMAIN_ID_MEDIA_VDBOX4)); + BUILD_BUG_ON(FORCEWAKE_MEDIA_VDBOX5 != (1 << FW_DOMAIN_ID_MEDIA_VDBOX5)); + BUILD_BUG_ON(FORCEWAKE_MEDIA_VDBOX6 != (1 << FW_DOMAIN_ID_MEDIA_VDBOX6)); + BUILD_BUG_ON(FORCEWAKE_MEDIA_VDBOX7 != (1 << FW_DOMAIN_ID_MEDIA_VDBOX7)); BUILD_BUG_ON(FORCEWAKE_MEDIA_VEBOX0 != (1 << FW_DOMAIN_ID_MEDIA_VEBOX0)); BUILD_BUG_ON(FORCEWAKE_MEDIA_VEBOX1 != (1 << FW_DOMAIN_ID_MEDIA_VEBOX1)); + BUILD_BUG_ON(FORCEWAKE_MEDIA_VEBOX2 != (1 << FW_DOMAIN_ID_MEDIA_VEBOX2)); + BUILD_BUG_ON(FORCEWAKE_MEDIA_VEBOX3 != (1 << FW_DOMAIN_ID_MEDIA_VEBOX3)); d->mask = BIT(domain_id); @@ -1870,36 +2111,40 @@ static int uncore_forcewake_init(struct intel_uncore *uncore) return ret; forcewake_early_sanitize(uncore, 0); - if (IS_GRAPHICS_VER(i915, 6, 7)) { - ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen6); - - if (IS_VALLEYVIEW(i915)) { - ASSIGN_FW_DOMAINS_TABLE(uncore, __vlv_fw_ranges); - ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); - } else { - ASSIGN_READ_MMIO_VFUNCS(uncore, gen6); - } - } else if (GRAPHICS_VER(i915) == 8) { - if (IS_CHERRYVIEW(i915)) { - ASSIGN_FW_DOMAINS_TABLE(uncore, __chv_fw_ranges); - ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable); - ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); - } else { - ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen8); - ASSIGN_READ_MMIO_VFUNCS(uncore, gen6); - } - } else if (IS_GRAPHICS_VER(i915, 9, 10)) { - ASSIGN_FW_DOMAINS_TABLE(uncore, __gen9_fw_ranges); - ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable); - ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); - } else if (GRAPHICS_VER(i915) == 11) { - ASSIGN_FW_DOMAINS_TABLE(uncore, __gen11_fw_ranges); - ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen11_fwtable); + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) { + ASSIGN_FW_DOMAINS_TABLE(uncore, __dg2_fw_ranges); + ASSIGN_WRITE_MMIO_VFUNCS(uncore, xehp_fwtable); ASSIGN_READ_MMIO_VFUNCS(uncore, gen11_fwtable); - } else { + } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { + ASSIGN_FW_DOMAINS_TABLE(uncore, __xehp_fw_ranges); + ASSIGN_WRITE_MMIO_VFUNCS(uncore, xehp_fwtable); + ASSIGN_READ_MMIO_VFUNCS(uncore, gen11_fwtable); + } else if (GRAPHICS_VER(i915) >= 12) { ASSIGN_FW_DOMAINS_TABLE(uncore, __gen12_fw_ranges); ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen12_fwtable); ASSIGN_READ_MMIO_VFUNCS(uncore, gen12_fwtable); + } else if (GRAPHICS_VER(i915) == 11) { + ASSIGN_FW_DOMAINS_TABLE(uncore, __gen11_fw_ranges); + ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen11_fwtable); + ASSIGN_READ_MMIO_VFUNCS(uncore, gen11_fwtable); + } else if (IS_GRAPHICS_VER(i915, 9, 10)) { + ASSIGN_FW_DOMAINS_TABLE(uncore, __gen9_fw_ranges); + ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable); + ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); + } else if (IS_CHERRYVIEW(i915)) { + ASSIGN_FW_DOMAINS_TABLE(uncore, __chv_fw_ranges); + ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable); + ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); + } else if (GRAPHICS_VER(i915) == 8) { + ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen8); + ASSIGN_READ_MMIO_VFUNCS(uncore, gen6); + } else if (IS_VALLEYVIEW(i915)) { + ASSIGN_FW_DOMAINS_TABLE(uncore, __vlv_fw_ranges); + ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen6); + ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); + } else if (IS_GRAPHICS_VER(i915, 6, 7)) { + ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen6); + ASSIGN_READ_MMIO_VFUNCS(uncore, gen6); } uncore->pmic_bus_access_nb.notifier_call = i915_pmic_bus_access_notifier; @@ -1929,7 +2174,7 @@ int intel_uncore_init_mmio(struct intel_uncore *uncore) return -ENODEV; } - if (INTEL_GEN(i915) > 5 && !intel_vgpu_active(i915)) + if (GRAPHICS_VER(i915) > 5 && !intel_vgpu_active(i915)) uncore->flags |= UNCORE_HAS_FORCEWAKE; if (!intel_uncore_has_forcewake(uncore)) { @@ -1988,6 +2233,22 @@ void intel_uncore_prune_engine_fw_domains(struct intel_uncore *uncore, if (HAS_ENGINE(gt, _VCS(i))) continue; + /* + * Starting with XeHP, the power well for an even-numbered + * VDBOX is also used for shared units within the + * media slice such as SFC. So even if the engine + * itself is fused off, we still need to initialize + * the forcewake domain if any of the other engines + * in the same media slice are present. + */ + if (GRAPHICS_VER_FULL(uncore->i915) >= IP_VER(12, 50) && i % 2 == 0) { + if ((i + 1 < I915_MAX_VCS) && HAS_ENGINE(gt, _VCS(i + 1))) + continue; + + if (HAS_ENGINE(gt, _VECS(i / 2))) + continue; + } + if (fw_domains & BIT(domain_id)) fw_domain_fini(uncore, domain_id); } @@ -2277,6 +2538,61 @@ intel_uncore_forcewake_for_reg(struct intel_uncore *uncore, return fw_domains; } +u32 intel_uncore_read_with_mcr_steering_fw(struct intel_uncore *uncore, + i915_reg_t reg, + int slice, int subslice) +{ + u32 mcr_mask, mcr_ss, mcr, old_mcr, val; + + lockdep_assert_held(&uncore->lock); + + if (GRAPHICS_VER(uncore->i915) >= 11) { + mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK; + mcr_ss = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice); + } else { + mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK; + mcr_ss = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice); + } + + old_mcr = mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR); + + mcr &= ~mcr_mask; + mcr |= mcr_ss; + intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr); + + val = intel_uncore_read_fw(uncore, reg); + + mcr &= ~mcr_mask; + mcr |= old_mcr & mcr_mask; + + intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr); + + return val; +} + +u32 intel_uncore_read_with_mcr_steering(struct intel_uncore *uncore, + i915_reg_t reg, int slice, int subslice) +{ + enum forcewake_domains fw_domains; + u32 val; + + fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, + FW_REG_READ); + fw_domains |= intel_uncore_forcewake_for_reg(uncore, + GEN8_MCR_SELECTOR, + FW_REG_READ | FW_REG_WRITE); + + spin_lock_irq(&uncore->lock); + intel_uncore_forcewake_get__locked(uncore, fw_domains); + + val = intel_uncore_read_with_mcr_steering_fw(uncore, reg, slice, subslice); + + intel_uncore_forcewake_put__locked(uncore, fw_domains); + spin_unlock_irq(&uncore->lock); + + return val; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_uncore.c" #include "selftests/intel_uncore.c" diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index 59f0da8f1fbb..3c0b0a8b5250 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -52,8 +52,14 @@ enum forcewake_domain_id { FW_DOMAIN_ID_MEDIA_VDBOX1, FW_DOMAIN_ID_MEDIA_VDBOX2, FW_DOMAIN_ID_MEDIA_VDBOX3, + FW_DOMAIN_ID_MEDIA_VDBOX4, + FW_DOMAIN_ID_MEDIA_VDBOX5, + FW_DOMAIN_ID_MEDIA_VDBOX6, + FW_DOMAIN_ID_MEDIA_VDBOX7, FW_DOMAIN_ID_MEDIA_VEBOX0, FW_DOMAIN_ID_MEDIA_VEBOX1, + FW_DOMAIN_ID_MEDIA_VEBOX2, + FW_DOMAIN_ID_MEDIA_VEBOX3, FW_DOMAIN_ID_COUNT }; @@ -66,10 +72,16 @@ enum forcewake_domains { FORCEWAKE_MEDIA_VDBOX1 = BIT(FW_DOMAIN_ID_MEDIA_VDBOX1), FORCEWAKE_MEDIA_VDBOX2 = BIT(FW_DOMAIN_ID_MEDIA_VDBOX2), FORCEWAKE_MEDIA_VDBOX3 = BIT(FW_DOMAIN_ID_MEDIA_VDBOX3), + FORCEWAKE_MEDIA_VDBOX4 = BIT(FW_DOMAIN_ID_MEDIA_VDBOX4), + FORCEWAKE_MEDIA_VDBOX5 = BIT(FW_DOMAIN_ID_MEDIA_VDBOX5), + FORCEWAKE_MEDIA_VDBOX6 = BIT(FW_DOMAIN_ID_MEDIA_VDBOX6), + FORCEWAKE_MEDIA_VDBOX7 = BIT(FW_DOMAIN_ID_MEDIA_VDBOX7), FORCEWAKE_MEDIA_VEBOX0 = BIT(FW_DOMAIN_ID_MEDIA_VEBOX0), FORCEWAKE_MEDIA_VEBOX1 = BIT(FW_DOMAIN_ID_MEDIA_VEBOX1), + FORCEWAKE_MEDIA_VEBOX2 = BIT(FW_DOMAIN_ID_MEDIA_VEBOX2), + FORCEWAKE_MEDIA_VEBOX3 = BIT(FW_DOMAIN_ID_MEDIA_VEBOX3), - FORCEWAKE_ALL = BIT(FW_DOMAIN_ID_COUNT) - 1 + FORCEWAKE_ALL = BIT(FW_DOMAIN_ID_COUNT) - 1, }; struct intel_uncore_funcs { @@ -182,6 +194,12 @@ intel_uncore_has_fifo(const struct intel_uncore *uncore) return uncore->flags & UNCORE_HAS_FIFO; } +u32 intel_uncore_read_with_mcr_steering_fw(struct intel_uncore *uncore, + i915_reg_t reg, + int slice, int subslice); +u32 intel_uncore_read_with_mcr_steering(struct intel_uncore *uncore, + i915_reg_t reg, int slice, int subslice); + void intel_uncore_mmio_debug_init_early(struct intel_uncore_mmio_debug *mmio_debug); void intel_uncore_init_early(struct intel_uncore *uncore, diff --git a/drivers/gpu/drm/i915/intel_wopcm.c b/drivers/gpu/drm/i915/intel_wopcm.c index 8309455f13ea..5e511bb891f9 100644 --- a/drivers/gpu/drm/i915/intel_wopcm.c +++ b/drivers/gpu/drm/i915/intel_wopcm.c @@ -56,8 +56,8 @@ /* 24KB at the end of WOPCM is reserved for RC6 CTX on BXT. */ #define BXT_WOPCM_RC6_CTX_RESERVED (SZ_16K + SZ_8K) -/* 36KB WOPCM reserved at the end of WOPCM on CNL. */ -#define CNL_WOPCM_HW_CTX_RESERVED (SZ_32K + SZ_4K) +/* 36KB WOPCM reserved at the end of WOPCM on ICL. */ +#define ICL_WOPCM_HW_CTX_RESERVED (SZ_32K + SZ_4K) /* 128KB from GUC_WOPCM_RESERVED is reserved for FW on Gen9. */ #define GEN9_GUC_FW_RESERVED SZ_128K @@ -93,8 +93,8 @@ static u32 context_reserved_size(struct drm_i915_private *i915) { if (IS_GEN9_LP(i915)) return BXT_WOPCM_RC6_CTX_RESERVED; - else if (GRAPHICS_VER(i915) >= 10) - return CNL_WOPCM_HW_CTX_RESERVED; + else if (GRAPHICS_VER(i915) >= 11) + return ICL_WOPCM_HW_CTX_RESERVED; else return 0; } @@ -126,7 +126,7 @@ static bool gen9_check_huc_fw_fits(struct drm_i915_private *i915, u32 guc_wopcm_size, u32 huc_fw_size) { /* - * On Gen9 & CNL A0, hardware requires the total available GuC WOPCM + * On Gen9, hardware requires the total available GuC WOPCM * size to be larger than or equal to HuC firmware size. Otherwise, * firmware uploading would fail. */ diff --git a/drivers/gpu/drm/i915/selftests/i915_buddy.c b/drivers/gpu/drm/i915/selftests/i915_buddy.c new file mode 100644 index 000000000000..d61ec9c951bf --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_buddy.c @@ -0,0 +1,787 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include <linux/prime_numbers.h> + +#include "../i915_selftest.h" +#include "i915_random.h" + +static void __igt_dump_block(struct i915_buddy_mm *mm, + struct i915_buddy_block *block, + bool buddy) +{ + pr_err("block info: header=%llx, state=%u, order=%d, offset=%llx size=%llx root=%s buddy=%s\n", + block->header, + i915_buddy_block_state(block), + i915_buddy_block_order(block), + i915_buddy_block_offset(block), + i915_buddy_block_size(mm, block), + yesno(!block->parent), + yesno(buddy)); +} + +static void igt_dump_block(struct i915_buddy_mm *mm, + struct i915_buddy_block *block) +{ + struct i915_buddy_block *buddy; + + __igt_dump_block(mm, block, false); + + buddy = get_buddy(block); + if (buddy) + __igt_dump_block(mm, buddy, true); +} + +static int igt_check_block(struct i915_buddy_mm *mm, + struct i915_buddy_block *block) +{ + struct i915_buddy_block *buddy; + unsigned int block_state; + u64 block_size; + u64 offset; + int err = 0; + + block_state = i915_buddy_block_state(block); + + if (block_state != I915_BUDDY_ALLOCATED && + block_state != I915_BUDDY_FREE && + block_state != I915_BUDDY_SPLIT) { + pr_err("block state mismatch\n"); + err = -EINVAL; + } + + block_size = i915_buddy_block_size(mm, block); + offset = i915_buddy_block_offset(block); + + if (block_size < mm->chunk_size) { + pr_err("block size smaller than min size\n"); + err = -EINVAL; + } + + if (!is_power_of_2(block_size)) { + pr_err("block size not power of two\n"); + err = -EINVAL; + } + + if (!IS_ALIGNED(block_size, mm->chunk_size)) { + pr_err("block size not aligned to min size\n"); + err = -EINVAL; + } + + if (!IS_ALIGNED(offset, mm->chunk_size)) { + pr_err("block offset not aligned to min size\n"); + err = -EINVAL; + } + + if (!IS_ALIGNED(offset, block_size)) { + pr_err("block offset not aligned to block size\n"); + err = -EINVAL; + } + + buddy = get_buddy(block); + + if (!buddy && block->parent) { + pr_err("buddy has gone fishing\n"); + err = -EINVAL; + } + + if (buddy) { + if (i915_buddy_block_offset(buddy) != (offset ^ block_size)) { + pr_err("buddy has wrong offset\n"); + err = -EINVAL; + } + + if (i915_buddy_block_size(mm, buddy) != block_size) { + pr_err("buddy size mismatch\n"); + err = -EINVAL; + } + + if (i915_buddy_block_state(buddy) == block_state && + block_state == I915_BUDDY_FREE) { + pr_err("block and its buddy are free\n"); + err = -EINVAL; + } + } + + return err; +} + +static int igt_check_blocks(struct i915_buddy_mm *mm, + struct list_head *blocks, + u64 expected_size, + bool is_contiguous) +{ + struct i915_buddy_block *block; + struct i915_buddy_block *prev; + u64 total; + int err = 0; + + block = NULL; + prev = NULL; + total = 0; + + list_for_each_entry(block, blocks, link) { + err = igt_check_block(mm, block); + + if (!i915_buddy_block_is_allocated(block)) { + pr_err("block not allocated\n"), + err = -EINVAL; + } + + if (is_contiguous && prev) { + u64 prev_block_size; + u64 prev_offset; + u64 offset; + + prev_offset = i915_buddy_block_offset(prev); + prev_block_size = i915_buddy_block_size(mm, prev); + offset = i915_buddy_block_offset(block); + + if (offset != (prev_offset + prev_block_size)) { + pr_err("block offset mismatch\n"); + err = -EINVAL; + } + } + + if (err) + break; + + total += i915_buddy_block_size(mm, block); + prev = block; + } + + if (!err) { + if (total != expected_size) { + pr_err("size mismatch, expected=%llx, found=%llx\n", + expected_size, total); + err = -EINVAL; + } + return err; + } + + if (prev) { + pr_err("prev block, dump:\n"); + igt_dump_block(mm, prev); + } + + pr_err("bad block, dump:\n"); + igt_dump_block(mm, block); + + return err; +} + +static int igt_check_mm(struct i915_buddy_mm *mm) +{ + struct i915_buddy_block *root; + struct i915_buddy_block *prev; + unsigned int i; + u64 total; + int err = 0; + + if (!mm->n_roots) { + pr_err("n_roots is zero\n"); + return -EINVAL; + } + + if (mm->n_roots != hweight64(mm->size)) { + pr_err("n_roots mismatch, n_roots=%u, expected=%lu\n", + mm->n_roots, hweight64(mm->size)); + return -EINVAL; + } + + root = NULL; + prev = NULL; + total = 0; + + for (i = 0; i < mm->n_roots; ++i) { + struct i915_buddy_block *block; + unsigned int order; + + root = mm->roots[i]; + if (!root) { + pr_err("root(%u) is NULL\n", i); + err = -EINVAL; + break; + } + + err = igt_check_block(mm, root); + + if (!i915_buddy_block_is_free(root)) { + pr_err("root not free\n"); + err = -EINVAL; + } + + order = i915_buddy_block_order(root); + + if (!i) { + if (order != mm->max_order) { + pr_err("max order root missing\n"); + err = -EINVAL; + } + } + + if (prev) { + u64 prev_block_size; + u64 prev_offset; + u64 offset; + + prev_offset = i915_buddy_block_offset(prev); + prev_block_size = i915_buddy_block_size(mm, prev); + offset = i915_buddy_block_offset(root); + + if (offset != (prev_offset + prev_block_size)) { + pr_err("root offset mismatch\n"); + err = -EINVAL; + } + } + + block = list_first_entry_or_null(&mm->free_list[order], + struct i915_buddy_block, + link); + if (block != root) { + pr_err("root mismatch at order=%u\n", order); + err = -EINVAL; + } + + if (err) + break; + + prev = root; + total += i915_buddy_block_size(mm, root); + } + + if (!err) { + if (total != mm->size) { + pr_err("expected mm size=%llx, found=%llx\n", mm->size, + total); + err = -EINVAL; + } + return err; + } + + if (prev) { + pr_err("prev root(%u), dump:\n", i - 1); + igt_dump_block(mm, prev); + } + + if (root) { + pr_err("bad root(%u), dump:\n", i); + igt_dump_block(mm, root); + } + + return err; +} + +static void igt_mm_config(u64 *size, u64 *chunk_size) +{ + I915_RND_STATE(prng); + u32 s, ms; + + /* Nothing fancy, just try to get an interesting bit pattern */ + + prandom_seed_state(&prng, i915_selftest.random_seed); + + /* Let size be a random number of pages up to 8 GB (2M pages) */ + s = 1 + i915_prandom_u32_max_state((BIT(33 - 12)) - 1, &prng); + /* Let the chunk size be a random power of 2 less than size */ + ms = BIT(i915_prandom_u32_max_state(ilog2(s), &prng)); + /* Round size down to the chunk size */ + s &= -ms; + + /* Convert from pages to bytes */ + *chunk_size = (u64)ms << 12; + *size = (u64)s << 12; +} + +static int igt_buddy_alloc_smoke(void *arg) +{ + struct i915_buddy_mm mm; + IGT_TIMEOUT(end_time); + I915_RND_STATE(prng); + u64 chunk_size; + u64 mm_size; + int *order; + int err, i; + + igt_mm_config(&mm_size, &chunk_size); + + pr_info("buddy_init with size=%llx, chunk_size=%llx\n", mm_size, chunk_size); + + err = i915_buddy_init(&mm, mm_size, chunk_size); + if (err) { + pr_err("buddy_init failed(%d)\n", err); + return err; + } + + order = i915_random_order(mm.max_order + 1, &prng); + if (!order) + goto out_fini; + + for (i = 0; i <= mm.max_order; ++i) { + struct i915_buddy_block *block; + int max_order = order[i]; + bool timeout = false; + LIST_HEAD(blocks); + int order; + u64 total; + + err = igt_check_mm(&mm); + if (err) { + pr_err("pre-mm check failed, abort\n"); + break; + } + + pr_info("filling from max_order=%u\n", max_order); + + order = max_order; + total = 0; + + do { +retry: + block = i915_buddy_alloc(&mm, order); + if (IS_ERR(block)) { + err = PTR_ERR(block); + if (err == -ENOMEM) { + pr_info("buddy_alloc hit -ENOMEM with order=%d\n", + order); + } else { + if (order--) { + err = 0; + goto retry; + } + + pr_err("buddy_alloc with order=%d failed(%d)\n", + order, err); + } + + break; + } + + list_add_tail(&block->link, &blocks); + + if (i915_buddy_block_order(block) != order) { + pr_err("buddy_alloc order mismatch\n"); + err = -EINVAL; + break; + } + + total += i915_buddy_block_size(&mm, block); + + if (__igt_timeout(end_time, NULL)) { + timeout = true; + break; + } + } while (total < mm.size); + + if (!err) + err = igt_check_blocks(&mm, &blocks, total, false); + + i915_buddy_free_list(&mm, &blocks); + + if (!err) { + err = igt_check_mm(&mm); + if (err) + pr_err("post-mm check failed\n"); + } + + if (err || timeout) + break; + + cond_resched(); + } + + if (err == -ENOMEM) + err = 0; + + kfree(order); +out_fini: + i915_buddy_fini(&mm); + + return err; +} + +static int igt_buddy_alloc_pessimistic(void *arg) +{ + const unsigned int max_order = 16; + struct i915_buddy_block *block, *bn; + struct i915_buddy_mm mm; + unsigned int order; + LIST_HEAD(blocks); + int err; + + /* + * Create a pot-sized mm, then allocate one of each possible + * order within. This should leave the mm with exactly one + * page left. + */ + + err = i915_buddy_init(&mm, PAGE_SIZE << max_order, PAGE_SIZE); + if (err) { + pr_err("buddy_init failed(%d)\n", err); + return err; + } + GEM_BUG_ON(mm.max_order != max_order); + + for (order = 0; order < max_order; order++) { + block = i915_buddy_alloc(&mm, order); + if (IS_ERR(block)) { + pr_info("buddy_alloc hit -ENOMEM with order=%d\n", + order); + err = PTR_ERR(block); + goto err; + } + + list_add_tail(&block->link, &blocks); + } + + /* And now the last remaining block available */ + block = i915_buddy_alloc(&mm, 0); + if (IS_ERR(block)) { + pr_info("buddy_alloc hit -ENOMEM on final alloc\n"); + err = PTR_ERR(block); + goto err; + } + list_add_tail(&block->link, &blocks); + + /* Should be completely full! */ + for (order = max_order; order--; ) { + block = i915_buddy_alloc(&mm, order); + if (!IS_ERR(block)) { + pr_info("buddy_alloc unexpectedly succeeded at order %d, it should be full!", + order); + list_add_tail(&block->link, &blocks); + err = -EINVAL; + goto err; + } + } + + block = list_last_entry(&blocks, typeof(*block), link); + list_del(&block->link); + i915_buddy_free(&mm, block); + + /* As we free in increasing size, we make available larger blocks */ + order = 1; + list_for_each_entry_safe(block, bn, &blocks, link) { + list_del(&block->link); + i915_buddy_free(&mm, block); + + block = i915_buddy_alloc(&mm, order); + if (IS_ERR(block)) { + pr_info("buddy_alloc (realloc) hit -ENOMEM with order=%d\n", + order); + err = PTR_ERR(block); + goto err; + } + i915_buddy_free(&mm, block); + order++; + } + + /* To confirm, now the whole mm should be available */ + block = i915_buddy_alloc(&mm, max_order); + if (IS_ERR(block)) { + pr_info("buddy_alloc (realloc) hit -ENOMEM with order=%d\n", + max_order); + err = PTR_ERR(block); + goto err; + } + i915_buddy_free(&mm, block); + +err: + i915_buddy_free_list(&mm, &blocks); + i915_buddy_fini(&mm); + return err; +} + +static int igt_buddy_alloc_optimistic(void *arg) +{ + const int max_order = 16; + struct i915_buddy_block *block; + struct i915_buddy_mm mm; + LIST_HEAD(blocks); + int order; + int err; + + /* + * Create a mm with one block of each order available, and + * try to allocate them all. + */ + + err = i915_buddy_init(&mm, + PAGE_SIZE * ((1 << (max_order + 1)) - 1), + PAGE_SIZE); + if (err) { + pr_err("buddy_init failed(%d)\n", err); + return err; + } + GEM_BUG_ON(mm.max_order != max_order); + + for (order = 0; order <= max_order; order++) { + block = i915_buddy_alloc(&mm, order); + if (IS_ERR(block)) { + pr_info("buddy_alloc hit -ENOMEM with order=%d\n", + order); + err = PTR_ERR(block); + goto err; + } + + list_add_tail(&block->link, &blocks); + } + + /* Should be completely full! */ + block = i915_buddy_alloc(&mm, 0); + if (!IS_ERR(block)) { + pr_info("buddy_alloc unexpectedly succeeded, it should be full!"); + list_add_tail(&block->link, &blocks); + err = -EINVAL; + goto err; + } + +err: + i915_buddy_free_list(&mm, &blocks); + i915_buddy_fini(&mm); + return err; +} + +static int igt_buddy_alloc_pathological(void *arg) +{ + const int max_order = 16; + struct i915_buddy_block *block; + struct i915_buddy_mm mm; + LIST_HEAD(blocks); + LIST_HEAD(holes); + int order, top; + int err; + + /* + * Create a pot-sized mm, then allocate one of each possible + * order within. This should leave the mm with exactly one + * page left. Free the largest block, then whittle down again. + * Eventually we will have a fully 50% fragmented mm. + */ + + err = i915_buddy_init(&mm, PAGE_SIZE << max_order, PAGE_SIZE); + if (err) { + pr_err("buddy_init failed(%d)\n", err); + return err; + } + GEM_BUG_ON(mm.max_order != max_order); + + for (top = max_order; top; top--) { + /* Make room by freeing the largest allocated block */ + block = list_first_entry_or_null(&blocks, typeof(*block), link); + if (block) { + list_del(&block->link); + i915_buddy_free(&mm, block); + } + + for (order = top; order--; ) { + block = i915_buddy_alloc(&mm, order); + if (IS_ERR(block)) { + pr_info("buddy_alloc hit -ENOMEM with order=%d, top=%d\n", + order, top); + err = PTR_ERR(block); + goto err; + } + list_add_tail(&block->link, &blocks); + } + + /* There should be one final page for this sub-allocation */ + block = i915_buddy_alloc(&mm, 0); + if (IS_ERR(block)) { + pr_info("buddy_alloc hit -ENOMEM for hole\n"); + err = PTR_ERR(block); + goto err; + } + list_add_tail(&block->link, &holes); + + block = i915_buddy_alloc(&mm, top); + if (!IS_ERR(block)) { + pr_info("buddy_alloc unexpectedly succeeded at top-order %d/%d, it should be full!", + top, max_order); + list_add_tail(&block->link, &blocks); + err = -EINVAL; + goto err; + } + } + + i915_buddy_free_list(&mm, &holes); + + /* Nothing larger than blocks of chunk_size now available */ + for (order = 1; order <= max_order; order++) { + block = i915_buddy_alloc(&mm, order); + if (!IS_ERR(block)) { + pr_info("buddy_alloc unexpectedly succeeded at order %d, it should be full!", + order); + list_add_tail(&block->link, &blocks); + err = -EINVAL; + goto err; + } + } + +err: + list_splice_tail(&holes, &blocks); + i915_buddy_free_list(&mm, &blocks); + i915_buddy_fini(&mm); + return err; +} + +static int igt_buddy_alloc_range(void *arg) +{ + struct i915_buddy_mm mm; + unsigned long page_num; + LIST_HEAD(blocks); + u64 chunk_size; + u64 offset; + u64 size; + u64 rem; + int err; + + igt_mm_config(&size, &chunk_size); + + pr_info("buddy_init with size=%llx, chunk_size=%llx\n", size, chunk_size); + + err = i915_buddy_init(&mm, size, chunk_size); + if (err) { + pr_err("buddy_init failed(%d)\n", err); + return err; + } + + err = igt_check_mm(&mm); + if (err) { + pr_err("pre-mm check failed, abort, abort, abort!\n"); + goto err_fini; + } + + rem = mm.size; + offset = 0; + + for_each_prime_number_from(page_num, 1, ULONG_MAX - 1) { + struct i915_buddy_block *block; + LIST_HEAD(tmp); + + size = min(page_num * mm.chunk_size, rem); + + err = i915_buddy_alloc_range(&mm, &tmp, offset, size); + if (err) { + if (err == -ENOMEM) { + pr_info("alloc_range hit -ENOMEM with size=%llx\n", + size); + } else { + pr_err("alloc_range with offset=%llx, size=%llx failed(%d)\n", + offset, size, err); + } + + break; + } + + block = list_first_entry_or_null(&tmp, + struct i915_buddy_block, + link); + if (!block) { + pr_err("alloc_range has no blocks\n"); + err = -EINVAL; + break; + } + + if (i915_buddy_block_offset(block) != offset) { + pr_err("alloc_range start offset mismatch, found=%llx, expected=%llx\n", + i915_buddy_block_offset(block), offset); + err = -EINVAL; + } + + if (!err) + err = igt_check_blocks(&mm, &tmp, size, true); + + list_splice_tail(&tmp, &blocks); + + if (err) + break; + + offset += size; + + rem -= size; + if (!rem) + break; + + cond_resched(); + } + + if (err == -ENOMEM) + err = 0; + + i915_buddy_free_list(&mm, &blocks); + + if (!err) { + err = igt_check_mm(&mm); + if (err) + pr_err("post-mm check failed\n"); + } + +err_fini: + i915_buddy_fini(&mm); + + return err; +} + +static int igt_buddy_alloc_limit(void *arg) +{ + struct i915_buddy_block *block; + struct i915_buddy_mm mm; + const u64 size = U64_MAX; + int err; + + err = i915_buddy_init(&mm, size, PAGE_SIZE); + if (err) + return err; + + if (mm.max_order != I915_BUDDY_MAX_ORDER) { + pr_err("mm.max_order(%d) != %d\n", + mm.max_order, I915_BUDDY_MAX_ORDER); + err = -EINVAL; + goto out_fini; + } + + block = i915_buddy_alloc(&mm, mm.max_order); + if (IS_ERR(block)) { + err = PTR_ERR(block); + goto out_fini; + } + + if (i915_buddy_block_order(block) != mm.max_order) { + pr_err("block order(%d) != %d\n", + i915_buddy_block_order(block), mm.max_order); + err = -EINVAL; + goto out_free; + } + + if (i915_buddy_block_size(&mm, block) != + BIT_ULL(mm.max_order) * PAGE_SIZE) { + pr_err("block size(%llu) != %llu\n", + i915_buddy_block_size(&mm, block), + BIT_ULL(mm.max_order) * PAGE_SIZE); + err = -EINVAL; + goto out_free; + } + +out_free: + i915_buddy_free(&mm, block); +out_fini: + i915_buddy_fini(&mm); + return err; +} + +int i915_buddy_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_buddy_alloc_pessimistic), + SUBTEST(igt_buddy_alloc_optimistic), + SUBTEST(igt_buddy_alloc_pathological), + SUBTEST(igt_buddy_alloc_smoke), + SUBTEST(igt_buddy_alloc_range), + SUBTEST(igt_buddy_alloc_limit), + }; + + return i915_subtests(tests, NULL); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index a92c0e9b7e6b..cfa5c4165a4f 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -26,6 +26,7 @@ selftest(gt_mocs, intel_mocs_live_selftests) selftest(gt_pm, intel_gt_pm_live_selftests) selftest(gt_heartbeat, intel_heartbeat_live_selftests) selftest(requests, i915_request_live_selftests) +selftest(migrate, intel_migrate_live_selftests) selftest(active, i915_active_live_selftests) selftest(objects, i915_gem_object_live_selftests) selftest(mman, i915_gem_mman_live_selftests) @@ -37,14 +38,14 @@ selftest(gem, i915_gem_live_selftests) selftest(evict, i915_gem_evict_live_selftests) selftest(hugepages, i915_gem_huge_page_live_selftests) selftest(gem_contexts, i915_gem_context_live_selftests) -selftest(gem_execbuf, i915_gem_execbuffer_live_selftests) -selftest(blt, i915_gem_object_blt_live_selftests) selftest(client, i915_gem_client_blt_live_selftests) +selftest(gem_migrate, i915_gem_migrate_live_selftests) selftest(reset, intel_reset_live_selftests) selftest(memory_region, intel_memory_region_live_selftests) selftest(hangcheck, intel_hangcheck_live_selftests) selftest(execlists, intel_execlists_live_selftests) selftest(ring_submission, intel_ring_submission_live_selftests) selftest(perf, i915_perf_live_selftests) +selftest(slpc, intel_slpc_live_selftests) /* Here be dragons: keep last to run last! */ selftest(late_gt_pm, intel_gt_pm_late_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index 34e5caf38093..793fb28a770d 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -32,5 +32,5 @@ selftest(vma, i915_vma_mock_selftests) selftest(evict, i915_gem_evict_mock_selftests) selftest(gtt, i915_gem_gtt_mock_selftests) selftest(hugepages, i915_gem_huge_page_mock_selftests) -selftest(contexts, i915_gem_context_mock_selftests) selftest(memory_region, intel_memory_region_mock_selftests) +selftest(buddy, i915_buddy_mock_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h index c2389f8a257d..058450d351f7 100644 --- a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h @@ -17,5 +17,5 @@ */ selftest(engine_cs, intel_engine_cs_perf_selftests) selftest(request, i915_request_perf_selftests) -selftest(blt, i915_gem_object_blt_perf_selftests) +selftest(migrate, intel_migrate_perf_selftests) selftest(region, intel_memory_region_perf_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index bd5c96a77ba3..d67710d10615 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -1313,7 +1313,7 @@ static int __live_parallel_engine1(void *arg) i915_request_add(rq); err = 0; - if (i915_request_wait(rq, 0, HZ / 5) < 0) + if (i915_request_wait(rq, 0, HZ) < 0) err = -ETIME; i915_request_put(rq); if (err) @@ -1419,7 +1419,7 @@ static int __live_parallel_spin(void *arg) } igt_spinner_end(&spin); - if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) + if (err == 0 && i915_request_wait(rq, 0, HZ) < 0) err = -EIO; i915_request_put(rq); diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c index 1bc11c09faef..484759c9409c 100644 --- a/drivers/gpu/drm/i915/selftests/i915_selftest.c +++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c @@ -187,7 +187,7 @@ int i915_mock_selftests(void) err = run_selftests(mock, NULL); if (err) { i915_selftest.mock = err; - return err; + return 1; } if (i915_selftest.mock < 0) { @@ -430,7 +430,7 @@ module_param_named(st_timeout, i915_selftest.timeout_ms, uint, 0400); module_param_named(st_filter, i915_selftest.filter, charp, 0400); module_param_named_unsafe(mock_selftests, i915_selftest.mock, int, 0400); -MODULE_PARM_DESC(mock_selftests, "Run selftests before loading, using mock hardware (0:disabled [default], 1:run tests then load driver, -1:run tests then exit module)"); +MODULE_PARM_DESC(mock_selftests, "Run selftests before loading, using mock hardware (0:disabled [default], 1:run tests then load driver, -1:run tests then leave dummy module)"); module_param_named_unsafe(live_selftests, i915_selftest.live, int, 0400); MODULE_PARM_DESC(live_selftests, "Run selftests after driver initialisation on the live system (0:disabled [default], 1:run tests then continue, -1:run tests then exit module)"); diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c index 7b0939e3f007..a6c71fca61aa 100644 --- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c @@ -19,7 +19,7 @@ int igt_flush_test(struct drm_i915_private *i915) cond_resched(); - if (intel_gt_wait_for_idle(gt, HZ / 5) == -ETIME) { + if (intel_gt_wait_for_idle(gt, HZ) == -ETIME) { pr_err("%pS timed out, cancelling all further testing.\n", __builtin_return_address(0)); diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c index c130010a7033..1c721542e277 100644 --- a/drivers/gpu/drm/i915/selftests/igt_live_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c @@ -5,7 +5,7 @@ */ #include "i915_drv.h" -#include "gt/intel_gt_requests.h" +#include "gt/intel_gt.h" #include "../i915_selftest.h" #include "igt_flush_test.h" diff --git a/drivers/gpu/drm/i915/selftests/igt_mmap.c b/drivers/gpu/drm/i915/selftests/igt_mmap.c index 583a4ff8b8c9..e920a461bd36 100644 --- a/drivers/gpu/drm/i915/selftests/igt_mmap.c +++ b/drivers/gpu/drm/i915/selftests/igt_mmap.c @@ -9,15 +9,28 @@ #include "i915_drv.h" #include "igt_mmap.h" -unsigned long igt_mmap_node(struct drm_i915_private *i915, - struct drm_vma_offset_node *node, - unsigned long addr, - unsigned long prot, - unsigned long flags) +unsigned long igt_mmap_offset(struct drm_i915_private *i915, + u64 offset, + unsigned long size, + unsigned long prot, + unsigned long flags) { + struct drm_vma_offset_node *node; struct file *file; + unsigned long addr; int err; + /* no need to refcount, we own this object */ + drm_vma_offset_lock_lookup(i915->drm.vma_offset_manager); + node = drm_vma_offset_exact_lookup_locked(i915->drm.vma_offset_manager, + offset / PAGE_SIZE, size / PAGE_SIZE); + drm_vma_offset_unlock_lookup(i915->drm.vma_offset_manager); + + if (GEM_WARN_ON(!node)) { + pr_info("Failed to lookup %llx\n", offset); + return -ENOENT; + } + /* Pretend to open("/dev/dri/card0") */ file = mock_drm_getfile(i915->drm.primary, O_RDWR); if (IS_ERR(file)) @@ -29,7 +42,7 @@ unsigned long igt_mmap_node(struct drm_i915_private *i915, goto out_file; } - addr = vm_mmap(file, addr, drm_vma_node_size(node) << PAGE_SHIFT, + addr = vm_mmap(file, 0, drm_vma_node_size(node) << PAGE_SHIFT, prot, flags, drm_vma_node_offset_addr(node)); drm_vma_node_revoke(node, file->private_data); diff --git a/drivers/gpu/drm/i915/selftests/igt_mmap.h b/drivers/gpu/drm/i915/selftests/igt_mmap.h index 6e716cb59d7e..acbe34d81a6d 100644 --- a/drivers/gpu/drm/i915/selftests/igt_mmap.h +++ b/drivers/gpu/drm/i915/selftests/igt_mmap.h @@ -7,13 +7,15 @@ #ifndef IGT_MMAP_H #define IGT_MMAP_H +#include <linux/types.h> + struct drm_i915_private; struct drm_vma_offset_node; -unsigned long igt_mmap_node(struct drm_i915_private *i915, - struct drm_vma_offset_node *node, - unsigned long addr, - unsigned long prot, - unsigned long flags); +unsigned long igt_mmap_offset(struct drm_i915_private *i915, + u64 offset, + unsigned long size, + unsigned long prot, + unsigned long flags); #endif /* IGT_MMAP_H */ diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c index c85d516b85cd..418caae84759 100644 --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -15,12 +15,15 @@ #include "gem/i915_gem_context.h" #include "gem/i915_gem_lmem.h" #include "gem/i915_gem_region.h" -#include "gem/i915_gem_object_blt.h" #include "gem/selftests/igt_gem_utils.h" #include "gem/selftests/mock_context.h" +#include "gt/intel_engine_pm.h" #include "gt/intel_engine_user.h" #include "gt/intel_gt.h" +#include "i915_buddy.h" +#include "gt/intel_migrate.h" #include "i915_memcpy.h" +#include "i915_ttm_buddy_manager.h" #include "selftests/igt_flush_test.h" #include "selftests/i915_random.h" @@ -57,16 +60,15 @@ static int igt_mock_fill(void *arg) LIST_HEAD(objects); int err = 0; - page_size = mem->chunk_size; + page_size = PAGE_SIZE; + max_pages = div64_u64(total, page_size); rem = total; -retry: - max_pages = div64_u64(rem, page_size); for_each_prime_number_from(page_num, 1, max_pages) { resource_size_t size = page_num * page_size; struct drm_i915_gem_object *obj; - obj = i915_gem_object_create_region(mem, size, 0); + obj = i915_gem_object_create_region(mem, size, 0, 0); if (IS_ERR(obj)) { err = PTR_ERR(obj); break; @@ -86,11 +88,6 @@ retry: err = 0; if (err == -ENXIO) { if (page_num * page_size <= rem) { - if (mem->is_range_manager && max_pages > 1) { - max_pages >>= 1; - goto retry; - } - pr_err("%s failed, space still left in region\n", __func__); err = -EINVAL; @@ -113,7 +110,7 @@ igt_object_create(struct intel_memory_region *mem, struct drm_i915_gem_object *obj; int err; - obj = i915_gem_object_create_region(mem, size, flags); + obj = i915_gem_object_create_region(mem, size, 0, flags); if (IS_ERR(obj)) return obj; @@ -157,6 +154,7 @@ static bool is_contiguous(struct drm_i915_gem_object *obj) static int igt_mock_reserve(void *arg) { struct intel_memory_region *mem = arg; + struct drm_i915_private *i915 = mem->i915; resource_size_t avail = resource_size(&mem->region); struct drm_i915_gem_object *obj; const u32 chunk_size = SZ_32M; @@ -166,16 +164,18 @@ static int igt_mock_reserve(void *arg) LIST_HEAD(objects); int err = 0; - if (!list_empty(&mem->reserved)) { - pr_err("%s region reserved list is not empty\n", __func__); - return -EINVAL; - } - count = avail / chunk_size; order = i915_random_order(count, &prng); if (!order) return 0; + mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0); + if (IS_ERR(mem)) { + pr_err("failed to create memory region\n"); + err = PTR_ERR(mem); + goto out_free_order; + } + /* Reserve a bunch of ranges within the region */ for (i = 0; i < count; ++i) { u64 start = order[i] * chunk_size; @@ -205,18 +205,12 @@ static int igt_mock_reserve(void *arg) do { u32 size = i915_prandom_u32_max_state(cur_avail, &prng); -retry: size = max_t(u32, round_up(size, PAGE_SIZE), PAGE_SIZE); obj = igt_object_create(mem, &objects, size, 0); if (IS_ERR(obj)) { - if (PTR_ERR(obj) == -ENXIO) { - if (mem->is_range_manager && - size > mem->chunk_size) { - size >>= 1; - goto retry; - } + if (PTR_ERR(obj) == -ENXIO) break; - } + err = PTR_ERR(obj); goto out_close; } @@ -230,9 +224,10 @@ retry: } out_close: - kfree(order); close_objects(mem, &objects); - intel_memory_region_unreserve(mem); + intel_memory_region_put(mem); +out_free_order: + kfree(order); return err; } @@ -252,7 +247,7 @@ static int igt_mock_contiguous(void *arg) total = resource_size(&mem->region); /* Min size */ - obj = igt_object_create(mem, &objects, mem->chunk_size, + obj = igt_object_create(mem, &objects, PAGE_SIZE, I915_BO_ALLOC_CONTIGUOUS); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -333,17 +328,15 @@ static int igt_mock_contiguous(void *arg) min = target; target = total >> 1; - if (!mem->is_range_manager) { - /* Make sure we can still allocate all the fragmented space */ - obj = igt_object_create(mem, &objects, target, 0); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto err_close_objects; - } - - igt_object_release(obj); + /* Make sure we can still allocate all the fragmented space */ + obj = igt_object_create(mem, &objects, target, 0); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto err_close_objects; } + igt_object_release(obj); + /* * Even though we have enough free space, we don't have a big enough * contiguous block. Make sure that holds true. @@ -362,7 +355,7 @@ static int igt_mock_contiguous(void *arg) } target >>= 1; - } while (target >= mem->chunk_size); + } while (target >= PAGE_SIZE); err_close_objects: list_splice_tail(&holes, &objects); @@ -374,7 +367,9 @@ static int igt_mock_splintered_region(void *arg) { struct intel_memory_region *mem = arg; struct drm_i915_private *i915 = mem->i915; + struct i915_ttm_buddy_resource *res; struct drm_i915_gem_object *obj; + struct i915_buddy_mm *mm; unsigned int expected_order; LIST_HEAD(objects); u64 size; @@ -382,7 +377,7 @@ static int igt_mock_splintered_region(void *arg) /* * Sanity check we can still allocate everything even if the - * max_order != mm.size. i.e our starting address space size is not a + * mm.max_order != mm.size. i.e our starting address space size is not a * power-of-two. */ @@ -391,20 +386,29 @@ static int igt_mock_splintered_region(void *arg) if (IS_ERR(mem)) return PTR_ERR(mem); - expected_order = get_order(rounddown_pow_of_two(size)); - if (mem->max_order != expected_order) { - pr_err("%s order mismatch(%u != %u)\n", - __func__, mem->max_order, expected_order); - err = -EINVAL; - goto out_put; - } - obj = igt_object_create(mem, &objects, size, 0); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto out_close; } + res = to_ttm_buddy_resource(obj->mm.res); + mm = res->mm; + if (mm->size != size) { + pr_err("%s size mismatch(%llu != %llu)\n", + __func__, mm->size, size); + err = -EINVAL; + goto out_put; + } + + expected_order = get_order(rounddown_pow_of_two(size)); + if (mm->max_order != expected_order) { + pr_err("%s order mismatch(%u != %u)\n", + __func__, mm->max_order, expected_order); + err = -EINVAL; + goto out_put; + } + close_objects(mem, &objects); /* @@ -415,15 +419,12 @@ static int igt_mock_splintered_region(void *arg) * sure that does indeed hold true. */ - if (!mem->is_range_manager) { - obj = igt_object_create(mem, &objects, size, - I915_BO_ALLOC_CONTIGUOUS); - if (!IS_ERR(obj)) { - pr_err("%s too large contiguous allocation was not rejected\n", - __func__); - err = -EINVAL; - goto out_close; - } + obj = igt_object_create(mem, &objects, size, I915_BO_ALLOC_CONTIGUOUS); + if (!IS_ERR(obj)) { + pr_err("%s too large contiguous allocation was not rejected\n", + __func__); + err = -EINVAL; + goto out_close; } obj = igt_object_create(mem, &objects, rounddown_pow_of_two(size), @@ -442,6 +443,74 @@ out_put: return err; } +#ifndef SZ_8G +#define SZ_8G BIT_ULL(33) +#endif + +static int igt_mock_max_segment(void *arg) +{ + const unsigned int max_segment = rounddown(UINT_MAX, PAGE_SIZE); + struct intel_memory_region *mem = arg; + struct drm_i915_private *i915 = mem->i915; + struct i915_ttm_buddy_resource *res; + struct drm_i915_gem_object *obj; + struct i915_buddy_block *block; + struct i915_buddy_mm *mm; + struct list_head *blocks; + struct scatterlist *sg; + LIST_HEAD(objects); + u64 size; + int err = 0; + + /* + * While we may create very large contiguous blocks, we may need + * to break those down for consumption elsewhere. In particular, + * dma-mapping with scatterlist elements have an implicit limit of + * UINT_MAX on each element. + */ + + size = SZ_8G; + mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0); + if (IS_ERR(mem)) + return PTR_ERR(mem); + + obj = igt_object_create(mem, &objects, size, 0); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_put; + } + + res = to_ttm_buddy_resource(obj->mm.res); + blocks = &res->blocks; + mm = res->mm; + size = 0; + list_for_each_entry(block, blocks, link) { + if (i915_buddy_block_size(mm, block) > size) + size = i915_buddy_block_size(mm, block); + } + if (size < max_segment) { + pr_err("%s: Failed to create a huge contiguous block [> %u], largest block %lld\n", + __func__, max_segment, size); + err = -EINVAL; + goto out_close; + } + + for (sg = obj->mm.pages->sgl; sg; sg = sg_next(sg)) { + if (sg->length > max_segment) { + pr_err("%s: Created an oversized scatterlist entry, %u > %u\n", + __func__, sg->length, max_segment); + err = -EINVAL; + goto out_close; + } + } + +out_close: + close_objects(mem, &objects); +out_put: + intel_memory_region_put(mem); + return err; +} + static int igt_gpu_write_dw(struct intel_context *ce, struct i915_vma *vma, u32 dword, @@ -579,6 +648,62 @@ out_put: return err; } +static int igt_lmem_create_with_ps(void *arg) +{ + struct drm_i915_private *i915 = arg; + int err = 0; + u32 ps; + + for (ps = PAGE_SIZE; ps <= SZ_1G; ps <<= 1) { + struct drm_i915_gem_object *obj; + dma_addr_t daddr; + + obj = __i915_gem_object_create_lmem_with_ps(i915, ps, ps, 0); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + if (err == -ENXIO || err == -E2BIG) { + pr_info("%s not enough lmem for ps(%u) err=%d\n", + __func__, ps, err); + err = 0; + } + + break; + } + + if (obj->base.size != ps) { + pr_err("%s size(%zu) != ps(%u)\n", + __func__, obj->base.size, ps); + err = -EINVAL; + goto out_put; + } + + i915_gem_object_lock(obj, NULL); + err = i915_gem_object_pin_pages(obj); + if (err) + goto out_put; + + daddr = i915_gem_object_get_dma_address(obj, 0); + if (!IS_ALIGNED(daddr, ps)) { + pr_err("%s daddr(%pa) not aligned with ps(%u)\n", + __func__, &daddr, ps); + err = -EINVAL; + goto out_unpin; + } + +out_unpin: + i915_gem_object_unpin_pages(obj); + __i915_gem_object_put_pages(obj); +out_put: + i915_gem_object_unlock(obj); + i915_gem_object_put(obj); + + if (err) + break; + } + + return err; +} + static int igt_lmem_create_cleared_cpu(void *arg) { struct drm_i915_private *i915 = arg; @@ -741,6 +866,7 @@ static int igt_lmem_write_cpu(void *arg) PAGE_SIZE - 64, }; struct intel_engine_cs *engine; + struct i915_request *rq; u32 *vaddr; u32 sz; u32 i; @@ -767,15 +893,20 @@ static int igt_lmem_write_cpu(void *arg) goto out_put; } + i915_gem_object_lock(obj, NULL); /* Put the pages into a known state -- from the gpu for added fun */ intel_engine_pm_get(engine); - err = i915_gem_object_fill_blt(obj, engine->kernel_context, 0xdeadbeaf); - intel_engine_pm_put(engine); - if (err) - goto out_unpin; + err = intel_context_migrate_clear(engine->gt->migrate.context, NULL, + obj->mm.pages->sgl, I915_CACHE_NONE, + true, 0xdeadbeaf, &rq); + if (rq) { + dma_resv_add_excl_fence(obj->base.resv, &rq->fence); + i915_request_put(rq); + } - i915_gem_object_lock(obj, NULL); - err = i915_gem_object_set_to_wc_domain(obj, true); + intel_engine_pm_put(engine); + if (!err) + err = i915_gem_object_set_to_wc_domain(obj, true); i915_gem_object_unlock(obj); if (err) goto out_unpin; @@ -858,7 +989,7 @@ create_region_for_mapping(struct intel_memory_region *mr, u64 size, u32 type, struct drm_i915_gem_object *obj; void *addr; - obj = i915_gem_object_create_region(mr, size, 0); + obj = i915_gem_object_create_region(mr, size, 0, 0); if (IS_ERR(obj)) { if (PTR_ERR(obj) == -ENOSPC) /* Stolen memory */ return ERR_PTR(-ENODEV); @@ -1046,6 +1177,7 @@ int intel_memory_region_mock_selftests(void) SUBTEST(igt_mock_fill), SUBTEST(igt_mock_contiguous), SUBTEST(igt_mock_splintered_region), + SUBTEST(igt_mock_max_segment), }; struct intel_memory_region *mem; struct drm_i915_private *i915; @@ -1074,6 +1206,7 @@ int intel_memory_region_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_lmem_create), + SUBTEST(igt_lmem_create_with_ps), SUBTEST(igt_lmem_create_cleared_cpu), SUBTEST(igt_lmem_write_cpu), SUBTEST(igt_lmem_write_gpu), diff --git a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c new file mode 100644 index 000000000000..4b328346b48a --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +//#include "gt/intel_engine_user.h" +#include "gt/intel_gt.h" +#include "i915_drv.h" +#include "i915_selftest.h" + +#include "selftests/intel_scheduler_helpers.h" + +#define REDUCED_TIMESLICE 5 +#define REDUCED_PREEMPT 10 +#define WAIT_FOR_RESET_TIME 10000 + +int intel_selftest_modify_policy(struct intel_engine_cs *engine, + struct intel_selftest_saved_policy *saved, + u32 modify_type) + +{ + int err; + + saved->reset = engine->i915->params.reset; + saved->flags = engine->flags; + saved->timeslice = engine->props.timeslice_duration_ms; + saved->preempt_timeout = engine->props.preempt_timeout_ms; + + switch (modify_type) { + case SELFTEST_SCHEDULER_MODIFY_FAST_RESET: + /* + * Enable force pre-emption on time slice expiration + * together with engine reset on pre-emption timeout. + * This is required to make the GuC notice and reset + * the single hanging context. + * Also, reduce the preemption timeout to something + * small to speed the test up. + */ + engine->i915->params.reset = 2; + engine->flags |= I915_ENGINE_WANT_FORCED_PREEMPTION; + engine->props.timeslice_duration_ms = REDUCED_TIMESLICE; + engine->props.preempt_timeout_ms = REDUCED_PREEMPT; + break; + + case SELFTEST_SCHEDULER_MODIFY_NO_HANGCHECK: + engine->props.preempt_timeout_ms = 0; + break; + + default: + pr_err("Invalid scheduler policy modification type: %d!\n", modify_type); + return -EINVAL; + } + + if (!intel_engine_uses_guc(engine)) + return 0; + + err = intel_guc_global_policies_update(&engine->gt->uc.guc); + if (err) + intel_selftest_restore_policy(engine, saved); + + return err; +} + +int intel_selftest_restore_policy(struct intel_engine_cs *engine, + struct intel_selftest_saved_policy *saved) +{ + /* Restore the original policies */ + engine->i915->params.reset = saved->reset; + engine->flags = saved->flags; + engine->props.timeslice_duration_ms = saved->timeslice; + engine->props.preempt_timeout_ms = saved->preempt_timeout; + + if (!intel_engine_uses_guc(engine)) + return 0; + + return intel_guc_global_policies_update(&engine->gt->uc.guc); +} + +int intel_selftest_wait_for_rq(struct i915_request *rq) +{ + long ret; + + ret = i915_request_wait(rq, 0, WAIT_FOR_RESET_TIME); + if (ret < 0) + return ret; + + return 0; +} diff --git a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.h b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.h new file mode 100644 index 000000000000..35c098601ac0 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _INTEL_SELFTEST_SCHEDULER_HELPERS_H_ +#define _INTEL_SELFTEST_SCHEDULER_HELPERS_H_ + +#include <linux/types.h> + +struct i915_request; +struct intel_engine_cs; + +struct intel_selftest_saved_policy { + u32 flags; + u32 reset; + u64 timeslice; + u64 preempt_timeout; +}; + +enum selftest_scheduler_modify { + SELFTEST_SCHEDULER_MODIFY_NO_HANGCHECK = 0, + SELFTEST_SCHEDULER_MODIFY_FAST_RESET, +}; + +int intel_selftest_modify_policy(struct intel_engine_cs *engine, + struct intel_selftest_saved_policy *saved, + enum selftest_scheduler_modify modify_type); +int intel_selftest_restore_policy(struct intel_engine_cs *engine, + struct intel_selftest_saved_policy *saved); +int intel_selftest_wait_for_rq(struct i915_request *rq); + +#endif diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c index 8ef9e6a4ad05..720b60853f8b 100644 --- a/drivers/gpu/drm/i915/selftests/intel_uncore.c +++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c @@ -68,6 +68,7 @@ static int intel_shadow_table_check(void) { gen8_shadowed_regs, ARRAY_SIZE(gen8_shadowed_regs) }, { gen11_shadowed_regs, ARRAY_SIZE(gen11_shadowed_regs) }, { gen12_shadowed_regs, ARRAY_SIZE(gen12_shadowed_regs) }, + { xehp_shadowed_regs, ARRAY_SIZE(xehp_shadowed_regs) }, }; const i915_reg_t *reg; unsigned int i, j; @@ -103,6 +104,7 @@ int intel_uncore_mock_selftests(void) { __gen9_fw_ranges, ARRAY_SIZE(__gen9_fw_ranges), true }, { __gen11_fw_ranges, ARRAY_SIZE(__gen11_fw_ranges), true }, { __gen12_fw_ranges, ARRAY_SIZE(__gen12_fw_ranges), true }, + { __xehp_fw_ranges, ARRAY_SIZE(__xehp_fw_ranges), true }, }; int err, i; diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index d189c4bd4bef..4f8180146888 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -52,7 +52,8 @@ void mock_device_flush(struct drm_i915_private *i915) do { for_each_engine(engine, gt, id) mock_engine_flush(engine); - } while (intel_gt_retire_requests_timeout(gt, MAX_SCHEDULE_TIMEOUT)); + } while (intel_gt_retire_requests_timeout(gt, MAX_SCHEDULE_TIMEOUT, + NULL)); } static void mock_device_release(struct drm_device *dev) diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c index 5c7ae40bba63..cc047ec594f9 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gtt.c +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c @@ -73,7 +73,6 @@ struct i915_ppgtt *mock_ppgtt(struct drm_i915_private *i915, const char *name) ppgtt->vm.gt = &i915->gt; ppgtt->vm.i915 = i915; ppgtt->vm.total = round_down(U64_MAX, PAGE_SIZE); - ppgtt->vm.file = ERR_PTR(-ENODEV); ppgtt->vm.dma = i915->drm.dev; i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT); diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c index eafc5a04975c..efa86dffe3c6 100644 --- a/drivers/gpu/drm/i915/selftests/mock_region.c +++ b/drivers/gpu/drm/i915/selftests/mock_region.c @@ -3,6 +3,7 @@ * Copyright © 2019-2021 Intel Corporation */ +#include <drm/ttm/ttm_placement.h> #include <linux/scatterlist.h> #include <drm/ttm/ttm_placement.h> @@ -16,7 +17,7 @@ static void mock_region_put_pages(struct drm_i915_gem_object *obj, struct sg_table *pages) { - intel_region_ttm_node_free(obj->mm.region, obj->mm.st_mm_node); + intel_region_ttm_resource_free(obj->mm.region, obj->mm.res); sg_free_table(pages); kfree(pages); } @@ -25,26 +26,31 @@ static int mock_region_get_pages(struct drm_i915_gem_object *obj) { unsigned int flags; struct sg_table *pages; + int err; - flags = I915_ALLOC_MIN_PAGE_SIZE; + flags = 0; if (obj->flags & I915_BO_ALLOC_CONTIGUOUS) - flags |= I915_ALLOC_CONTIGUOUS; + flags |= TTM_PL_FLAG_CONTIGUOUS; - obj->mm.st_mm_node = intel_region_ttm_node_alloc(obj->mm.region, - obj->base.size, - flags); - if (IS_ERR(obj->mm.st_mm_node)) - return PTR_ERR(obj->mm.st_mm_node); + obj->mm.res = intel_region_ttm_resource_alloc(obj->mm.region, + obj->base.size, + flags); + if (IS_ERR(obj->mm.res)) + return PTR_ERR(obj->mm.res); - pages = intel_region_ttm_node_to_st(obj->mm.region, obj->mm.st_mm_node); + pages = intel_region_ttm_resource_to_st(obj->mm.region, obj->mm.res); if (IS_ERR(pages)) { - intel_region_ttm_node_free(obj->mm.region, obj->mm.st_mm_node); - return PTR_ERR(pages); + err = PTR_ERR(pages); + goto err_free_resource; } __i915_gem_object_set_pages(obj, pages, i915_sg_dma_sizes(pages->sgl)); return 0; + +err_free_resource: + intel_region_ttm_resource_free(obj->mm.region, obj->mm.res); + return err; } static const struct drm_i915_gem_object_ops mock_region_obj_ops = { @@ -57,6 +63,7 @@ static const struct drm_i915_gem_object_ops mock_region_obj_ops = { static int mock_object_init(struct intel_memory_region *mem, struct drm_i915_gem_object *obj, resource_size_t size, + resource_size_t page_size, unsigned int flags) { static struct lock_class_key lock_class; |