diff options
Diffstat (limited to 'drivers/ras')
-rw-r--r-- | drivers/ras/amd/atl/core.c | 52 | ||||
-rw-r--r-- | drivers/ras/amd/atl/dehash.c | 43 | ||||
-rw-r--r-- | drivers/ras/amd/atl/denormalize.c | 561 | ||||
-rw-r--r-- | drivers/ras/amd/atl/internal.h | 48 | ||||
-rw-r--r-- | drivers/ras/amd/atl/map.c | 97 | ||||
-rw-r--r-- | drivers/ras/amd/atl/system.c | 21 | ||||
-rw-r--r-- | drivers/ras/amd/fmpm.c | 4 |
7 files changed, 756 insertions, 70 deletions
diff --git a/drivers/ras/amd/atl/core.c b/drivers/ras/amd/atl/core.c index 6dc4e06305f7..4197e10993ac 100644 --- a/drivers/ras/amd/atl/core.c +++ b/drivers/ras/amd/atl/core.c @@ -49,26 +49,26 @@ static bool legacy_hole_en(struct addr_ctx *ctx) return FIELD_GET(DF_LEGACY_MMIO_HOLE_EN, reg); } -static int add_legacy_hole(struct addr_ctx *ctx) +static u64 add_legacy_hole(struct addr_ctx *ctx, u64 addr) { - u32 dram_hole_base; - u8 func = 0; - if (!legacy_hole_en(ctx)) - return 0; + return addr; - if (df_cfg.rev >= DF4) - func = 7; + if (addr >= df_cfg.dram_hole_base) + addr += (BIT_ULL(32) - df_cfg.dram_hole_base); - if (df_indirect_read_broadcast(ctx->node_id, func, 0x104, &dram_hole_base)) - return -EINVAL; + return addr; +} - dram_hole_base &= DF_DRAM_HOLE_BASE_MASK; +static u64 remove_legacy_hole(struct addr_ctx *ctx, u64 addr) +{ + if (!legacy_hole_en(ctx)) + return addr; - if (ctx->ret_addr >= dram_hole_base) - ctx->ret_addr += (BIT_ULL(32) - dram_hole_base); + if (addr >= df_cfg.dram_hole_base) + addr -= (BIT_ULL(32) - df_cfg.dram_hole_base); - return 0; + return addr; } static u64 get_base_addr(struct addr_ctx *ctx) @@ -83,14 +83,14 @@ static u64 get_base_addr(struct addr_ctx *ctx) return base_addr << DF_DRAM_BASE_LIMIT_LSB; } -static int add_base_and_hole(struct addr_ctx *ctx) +u64 add_base_and_hole(struct addr_ctx *ctx, u64 addr) { - ctx->ret_addr += get_base_addr(ctx); - - if (add_legacy_hole(ctx)) - return -EINVAL; + return add_legacy_hole(ctx, addr + get_base_addr(ctx)); +} - return 0; +u64 remove_base_and_hole(struct addr_ctx *ctx, u64 addr) +{ + return remove_legacy_hole(ctx, addr) - get_base_addr(ctx); } static bool late_hole_remove(struct addr_ctx *ctx) @@ -125,6 +125,9 @@ unsigned long norm_to_sys_addr(u8 socket_id, u8 die_id, u8 coh_st_inst_id, unsig ctx.inputs.die_id = die_id; ctx.inputs.coh_st_inst_id = coh_st_inst_id; + if (legacy_hole_en(&ctx) && !df_cfg.dram_hole_base) + return -EINVAL; + if (determine_node_id(&ctx, socket_id, die_id)) return -EINVAL; @@ -134,14 +137,14 @@ unsigned long norm_to_sys_addr(u8 socket_id, u8 die_id, u8 coh_st_inst_id, unsig if (denormalize_address(&ctx)) return -EINVAL; - if (!late_hole_remove(&ctx) && add_base_and_hole(&ctx)) - return -EINVAL; + if (!late_hole_remove(&ctx)) + ctx.ret_addr = add_base_and_hole(&ctx, ctx.ret_addr); if (dehash_address(&ctx)) return -EINVAL; - if (late_hole_remove(&ctx) && add_base_and_hole(&ctx)) - return -EINVAL; + if (late_hole_remove(&ctx)) + ctx.ret_addr = add_base_and_hole(&ctx, ctx.ret_addr); if (addr_over_limit(&ctx)) return -EINVAL; @@ -206,7 +209,7 @@ static int __init amd_atl_init(void) __module_get(THIS_MODULE); amd_atl_register_decoder(convert_umc_mca_addr_to_sys_addr); - pr_info("AMD Address Translation Library initialized"); + pr_info("AMD Address Translation Library initialized\n"); return 0; } @@ -222,4 +225,5 @@ static void __exit amd_atl_exit(void) module_init(amd_atl_init); module_exit(amd_atl_exit); +MODULE_DESCRIPTION("AMD Address Translation Library"); MODULE_LICENSE("GPL"); diff --git a/drivers/ras/amd/atl/dehash.c b/drivers/ras/amd/atl/dehash.c index 4ea46262c4f5..d4ee7ecabaee 100644 --- a/drivers/ras/amd/atl/dehash.c +++ b/drivers/ras/amd/atl/dehash.c @@ -12,41 +12,10 @@ #include "internal.h" -/* - * Verify the interleave bits are correct in the different interleaving - * settings. - * - * If @num_intlv_dies and/or @num_intlv_sockets are 1, it means the - * respective interleaving is disabled. - */ -static inline bool map_bits_valid(struct addr_ctx *ctx, u8 bit1, u8 bit2, - u8 num_intlv_dies, u8 num_intlv_sockets) -{ - if (!(ctx->map.intlv_bit_pos == bit1 || ctx->map.intlv_bit_pos == bit2)) { - pr_debug("Invalid interleave bit: %u", ctx->map.intlv_bit_pos); - return false; - } - - if (ctx->map.num_intlv_dies > num_intlv_dies) { - pr_debug("Invalid number of interleave dies: %u", ctx->map.num_intlv_dies); - return false; - } - - if (ctx->map.num_intlv_sockets > num_intlv_sockets) { - pr_debug("Invalid number of interleave sockets: %u", ctx->map.num_intlv_sockets); - return false; - } - - return true; -} - static int df2_dehash_addr(struct addr_ctx *ctx) { u8 hashed_bit, intlv_bit, intlv_bit_pos; - if (!map_bits_valid(ctx, 8, 9, 1, 1)) - return -EINVAL; - intlv_bit_pos = ctx->map.intlv_bit_pos; intlv_bit = !!(BIT_ULL(intlv_bit_pos) & ctx->ret_addr); @@ -67,9 +36,6 @@ static int df3_dehash_addr(struct addr_ctx *ctx) bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G; u8 hashed_bit, intlv_bit, intlv_bit_pos; - if (!map_bits_valid(ctx, 8, 9, 1, 1)) - return -EINVAL; - hash_ctl_64k = FIELD_GET(DF3_HASH_CTL_64K, ctx->map.ctl); hash_ctl_2M = FIELD_GET(DF3_HASH_CTL_2M, ctx->map.ctl); hash_ctl_1G = FIELD_GET(DF3_HASH_CTL_1G, ctx->map.ctl); @@ -171,9 +137,6 @@ static int df4_dehash_addr(struct addr_ctx *ctx) bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G; u8 hashed_bit, intlv_bit; - if (!map_bits_valid(ctx, 8, 8, 1, 2)) - return -EINVAL; - hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl); hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl); hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl); @@ -247,9 +210,6 @@ static int df4p5_dehash_addr(struct addr_ctx *ctx) u8 hashed_bit, intlv_bit; u64 rehash_vector; - if (!map_bits_valid(ctx, 8, 8, 1, 2)) - return -EINVAL; - hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl); hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl); hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl); @@ -360,9 +320,6 @@ static int mi300_dehash_addr(struct addr_ctx *ctx) bool hashed_bit, intlv_bit, test_bit; u8 num_intlv_bits, base_bit, i; - if (!map_bits_valid(ctx, 8, 8, 4, 1)) - return -EINVAL; - hash_ctl_4k = FIELD_GET(DF4p5_HASH_CTL_4K, ctx->map.ctl); hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl); hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl); diff --git a/drivers/ras/amd/atl/denormalize.c b/drivers/ras/amd/atl/denormalize.c index e279224288d6..1a525cfa983c 100644 --- a/drivers/ras/amd/atl/denormalize.c +++ b/drivers/ras/amd/atl/denormalize.c @@ -448,6 +448,118 @@ static u16 get_logical_coh_st_fabric_id(struct addr_ctx *ctx) return (phys_fabric_id & df_cfg.node_id_mask) | log_fabric_id; } +static u16 get_logical_coh_st_fabric_id_for_current_spa(struct addr_ctx *ctx, + struct df4p5_denorm_ctx *denorm_ctx) +{ + bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G, hash_ctl_1T; + bool hash_pa8, hash_pa9, hash_pa12, hash_pa13; + u64 cs_id = 0; + + hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl); + hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl); + hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl); + hash_ctl_1T = FIELD_GET(DF4p5_HASH_CTL_1T, ctx->map.ctl); + + hash_pa8 = FIELD_GET(BIT_ULL(8), denorm_ctx->current_spa); + hash_pa8 ^= FIELD_GET(BIT_ULL(14), denorm_ctx->current_spa); + hash_pa8 ^= FIELD_GET(BIT_ULL(16), denorm_ctx->current_spa) & hash_ctl_64k; + hash_pa8 ^= FIELD_GET(BIT_ULL(21), denorm_ctx->current_spa) & hash_ctl_2M; + hash_pa8 ^= FIELD_GET(BIT_ULL(30), denorm_ctx->current_spa) & hash_ctl_1G; + hash_pa8 ^= FIELD_GET(BIT_ULL(40), denorm_ctx->current_spa) & hash_ctl_1T; + + hash_pa9 = FIELD_GET(BIT_ULL(9), denorm_ctx->current_spa); + hash_pa9 ^= FIELD_GET(BIT_ULL(17), denorm_ctx->current_spa) & hash_ctl_64k; + hash_pa9 ^= FIELD_GET(BIT_ULL(22), denorm_ctx->current_spa) & hash_ctl_2M; + hash_pa9 ^= FIELD_GET(BIT_ULL(31), denorm_ctx->current_spa) & hash_ctl_1G; + hash_pa9 ^= FIELD_GET(BIT_ULL(41), denorm_ctx->current_spa) & hash_ctl_1T; + + hash_pa12 = FIELD_GET(BIT_ULL(12), denorm_ctx->current_spa); + hash_pa12 ^= FIELD_GET(BIT_ULL(18), denorm_ctx->current_spa) & hash_ctl_64k; + hash_pa12 ^= FIELD_GET(BIT_ULL(23), denorm_ctx->current_spa) & hash_ctl_2M; + hash_pa12 ^= FIELD_GET(BIT_ULL(32), denorm_ctx->current_spa) & hash_ctl_1G; + hash_pa12 ^= FIELD_GET(BIT_ULL(42), denorm_ctx->current_spa) & hash_ctl_1T; + + hash_pa13 = FIELD_GET(BIT_ULL(13), denorm_ctx->current_spa); + hash_pa13 ^= FIELD_GET(BIT_ULL(19), denorm_ctx->current_spa) & hash_ctl_64k; + hash_pa13 ^= FIELD_GET(BIT_ULL(24), denorm_ctx->current_spa) & hash_ctl_2M; + hash_pa13 ^= FIELD_GET(BIT_ULL(33), denorm_ctx->current_spa) & hash_ctl_1G; + hash_pa13 ^= FIELD_GET(BIT_ULL(43), denorm_ctx->current_spa) & hash_ctl_1T; + + switch (ctx->map.intlv_mode) { + case DF4p5_NPS0_24CHAN_1K_HASH: + cs_id = FIELD_GET(GENMASK_ULL(63, 13), denorm_ctx->current_spa) << 3; + cs_id %= denorm_ctx->mod_value; + cs_id <<= 2; + cs_id |= (hash_pa9 | (hash_pa12 << 1)); + cs_id |= hash_pa8 << df_cfg.socket_id_shift; + break; + + case DF4p5_NPS0_24CHAN_2K_HASH: + cs_id = FIELD_GET(GENMASK_ULL(63, 14), denorm_ctx->current_spa) << 4; + cs_id %= denorm_ctx->mod_value; + cs_id <<= 2; + cs_id |= (hash_pa12 | (hash_pa13 << 1)); + cs_id |= hash_pa8 << df_cfg.socket_id_shift; + break; + + case DF4p5_NPS1_12CHAN_1K_HASH: + cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2; + cs_id %= denorm_ctx->mod_value; + cs_id <<= 2; + cs_id |= (hash_pa8 | (hash_pa9 << 1)); + break; + + case DF4p5_NPS1_12CHAN_2K_HASH: + cs_id = FIELD_GET(GENMASK_ULL(63, 13), denorm_ctx->current_spa) << 3; + cs_id %= denorm_ctx->mod_value; + cs_id <<= 2; + cs_id |= (hash_pa8 | (hash_pa12 << 1)); + break; + + case DF4p5_NPS2_6CHAN_1K_HASH: + case DF4p5_NPS1_10CHAN_1K_HASH: + cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2; + cs_id |= (FIELD_GET(BIT_ULL(9), denorm_ctx->current_spa) << 1); + cs_id %= denorm_ctx->mod_value; + cs_id <<= 1; + cs_id |= hash_pa8; + break; + + case DF4p5_NPS2_6CHAN_2K_HASH: + case DF4p5_NPS1_10CHAN_2K_HASH: + cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2; + cs_id %= denorm_ctx->mod_value; + cs_id <<= 1; + cs_id |= hash_pa8; + break; + + case DF4p5_NPS4_3CHAN_1K_HASH: + case DF4p5_NPS2_5CHAN_1K_HASH: + cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2; + cs_id |= FIELD_GET(GENMASK_ULL(9, 8), denorm_ctx->current_spa); + cs_id %= denorm_ctx->mod_value; + break; + + case DF4p5_NPS4_3CHAN_2K_HASH: + case DF4p5_NPS2_5CHAN_2K_HASH: + cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2; + cs_id |= FIELD_GET(BIT_ULL(8), denorm_ctx->current_spa) << 1; + cs_id %= denorm_ctx->mod_value; + break; + + default: + atl_debug_on_bad_intlv_mode(ctx); + return 0; + } + + if (cs_id > 0xffff) { + atl_debug(ctx, "Translation error: Resulting cs_id larger than u16\n"); + return 0; + } + + return cs_id; +} + static int denorm_addr_common(struct addr_ctx *ctx) { u64 denorm_addr; @@ -699,6 +811,442 @@ static int denorm_addr_df4_np2(struct addr_ctx *ctx) return 0; } +static u64 normalize_addr_df4p5_np2(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx, + u64 addr) +{ + u64 temp_addr_a = 0, temp_addr_b = 0; + + switch (ctx->map.intlv_mode) { + case DF4p5_NPS0_24CHAN_1K_HASH: + case DF4p5_NPS1_12CHAN_1K_HASH: + case DF4p5_NPS2_6CHAN_1K_HASH: + case DF4p5_NPS4_3CHAN_1K_HASH: + case DF4p5_NPS1_10CHAN_1K_HASH: + case DF4p5_NPS2_5CHAN_1K_HASH: + temp_addr_a = FIELD_GET(GENMASK_ULL(11, 10), addr) << 8; + break; + + case DF4p5_NPS0_24CHAN_2K_HASH: + case DF4p5_NPS1_12CHAN_2K_HASH: + case DF4p5_NPS2_6CHAN_2K_HASH: + case DF4p5_NPS4_3CHAN_2K_HASH: + case DF4p5_NPS1_10CHAN_2K_HASH: + case DF4p5_NPS2_5CHAN_2K_HASH: + temp_addr_a = FIELD_GET(GENMASK_ULL(11, 9), addr) << 8; + break; + + default: + atl_debug_on_bad_intlv_mode(ctx); + return 0; + } + + switch (ctx->map.intlv_mode) { + case DF4p5_NPS0_24CHAN_1K_HASH: + temp_addr_b = FIELD_GET(GENMASK_ULL(63, 13), addr) / denorm_ctx->mod_value; + temp_addr_b <<= 10; + break; + + case DF4p5_NPS0_24CHAN_2K_HASH: + temp_addr_b = FIELD_GET(GENMASK_ULL(63, 14), addr) / denorm_ctx->mod_value; + temp_addr_b <<= 11; + break; + + case DF4p5_NPS1_12CHAN_1K_HASH: + temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) / denorm_ctx->mod_value; + temp_addr_b <<= 10; + break; + + case DF4p5_NPS1_12CHAN_2K_HASH: + temp_addr_b = FIELD_GET(GENMASK_ULL(63, 13), addr) / denorm_ctx->mod_value; + temp_addr_b <<= 11; + break; + + case DF4p5_NPS2_6CHAN_1K_HASH: + case DF4p5_NPS1_10CHAN_1K_HASH: + temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) << 1; + temp_addr_b |= FIELD_GET(BIT_ULL(9), addr); + temp_addr_b /= denorm_ctx->mod_value; + temp_addr_b <<= 10; + break; + + case DF4p5_NPS2_6CHAN_2K_HASH: + case DF4p5_NPS1_10CHAN_2K_HASH: + temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) / denorm_ctx->mod_value; + temp_addr_b <<= 11; + break; + + case DF4p5_NPS4_3CHAN_1K_HASH: + case DF4p5_NPS2_5CHAN_1K_HASH: + temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) << 2; + temp_addr_b |= FIELD_GET(GENMASK_ULL(9, 8), addr); + temp_addr_b /= denorm_ctx->mod_value; + temp_addr_b <<= 10; + break; + + case DF4p5_NPS4_3CHAN_2K_HASH: + case DF4p5_NPS2_5CHAN_2K_HASH: + temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) << 1; + temp_addr_b |= FIELD_GET(BIT_ULL(8), addr); + temp_addr_b /= denorm_ctx->mod_value; + temp_addr_b <<= 11; + break; + + default: + atl_debug_on_bad_intlv_mode(ctx); + return 0; + } + + return denorm_ctx->base_denorm_addr | temp_addr_a | temp_addr_b; +} + +static void recalculate_hashed_bits_df4p5_np2(struct addr_ctx *ctx, + struct df4p5_denorm_ctx *denorm_ctx) +{ + bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G, hash_ctl_1T, hashed_bit; + + if (!denorm_ctx->rehash_vector) + return; + + hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl); + hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl); + hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl); + hash_ctl_1T = FIELD_GET(DF4p5_HASH_CTL_1T, ctx->map.ctl); + + if (denorm_ctx->rehash_vector & BIT_ULL(8)) { + hashed_bit = FIELD_GET(BIT_ULL(8), denorm_ctx->current_spa); + hashed_bit ^= FIELD_GET(BIT_ULL(14), denorm_ctx->current_spa); + hashed_bit ^= FIELD_GET(BIT_ULL(16), denorm_ctx->current_spa) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(21), denorm_ctx->current_spa) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(30), denorm_ctx->current_spa) & hash_ctl_1G; + hashed_bit ^= FIELD_GET(BIT_ULL(40), denorm_ctx->current_spa) & hash_ctl_1T; + + if (FIELD_GET(BIT_ULL(8), denorm_ctx->current_spa) != hashed_bit) + denorm_ctx->current_spa ^= BIT_ULL(8); + } + + if (denorm_ctx->rehash_vector & BIT_ULL(9)) { + hashed_bit = FIELD_GET(BIT_ULL(9), denorm_ctx->current_spa); + hashed_bit ^= FIELD_GET(BIT_ULL(17), denorm_ctx->current_spa) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(22), denorm_ctx->current_spa) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(31), denorm_ctx->current_spa) & hash_ctl_1G; + hashed_bit ^= FIELD_GET(BIT_ULL(41), denorm_ctx->current_spa) & hash_ctl_1T; + + if (FIELD_GET(BIT_ULL(9), denorm_ctx->current_spa) != hashed_bit) + denorm_ctx->current_spa ^= BIT_ULL(9); + } + + if (denorm_ctx->rehash_vector & BIT_ULL(12)) { + hashed_bit = FIELD_GET(BIT_ULL(12), denorm_ctx->current_spa); + hashed_bit ^= FIELD_GET(BIT_ULL(18), denorm_ctx->current_spa) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(23), denorm_ctx->current_spa) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(32), denorm_ctx->current_spa) & hash_ctl_1G; + hashed_bit ^= FIELD_GET(BIT_ULL(42), denorm_ctx->current_spa) & hash_ctl_1T; + + if (FIELD_GET(BIT_ULL(12), denorm_ctx->current_spa) != hashed_bit) + denorm_ctx->current_spa ^= BIT_ULL(12); + } + + if (denorm_ctx->rehash_vector & BIT_ULL(13)) { + hashed_bit = FIELD_GET(BIT_ULL(13), denorm_ctx->current_spa); + hashed_bit ^= FIELD_GET(BIT_ULL(19), denorm_ctx->current_spa) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(24), denorm_ctx->current_spa) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(33), denorm_ctx->current_spa) & hash_ctl_1G; + hashed_bit ^= FIELD_GET(BIT_ULL(43), denorm_ctx->current_spa) & hash_ctl_1T; + + if (FIELD_GET(BIT_ULL(13), denorm_ctx->current_spa) != hashed_bit) + denorm_ctx->current_spa ^= BIT_ULL(13); + } +} + +static bool match_logical_coh_st_fabric_id(struct addr_ctx *ctx, + struct df4p5_denorm_ctx *denorm_ctx) +{ + /* + * The logical CS fabric ID of the permutation must be calculated from the + * current SPA with the base and with the MMIO hole. + */ + u16 id = get_logical_coh_st_fabric_id_for_current_spa(ctx, denorm_ctx); + + atl_debug(ctx, "Checking calculated logical coherent station fabric id:\n"); + atl_debug(ctx, " calculated fabric id = 0x%x\n", id); + atl_debug(ctx, " expected fabric id = 0x%x\n", denorm_ctx->coh_st_fabric_id); + + return denorm_ctx->coh_st_fabric_id == id; +} + +static bool match_norm_addr(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx) +{ + u64 addr = remove_base_and_hole(ctx, denorm_ctx->current_spa); + + /* + * The normalized address must be calculated with the current SPA without + * the base and without the MMIO hole. + */ + addr = normalize_addr_df4p5_np2(ctx, denorm_ctx, addr); + + atl_debug(ctx, "Checking calculated normalized address:\n"); + atl_debug(ctx, " calculated normalized addr = 0x%016llx\n", addr); + atl_debug(ctx, " expected normalized addr = 0x%016llx\n", ctx->ret_addr); + + return addr == ctx->ret_addr; +} + +static int check_permutations(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx) +{ + u64 test_perm, temp_addr, denorm_addr, num_perms; + unsigned int dropped_remainder; + + denorm_ctx->div_addr *= denorm_ctx->mod_value; + + /* + * The high order bits of num_permutations represent the permutations + * of the dropped remainder. This will be either 0-3 or 0-5 depending + * on the interleave mode. The low order bits represent the + * permutations of other "lost" bits which will be any combination of + * 1, 2, or 3 bits depending on the interleave mode. + */ + num_perms = denorm_ctx->mod_value << denorm_ctx->perm_shift; + + for (test_perm = 0; test_perm < num_perms; test_perm++) { + denorm_addr = denorm_ctx->base_denorm_addr; + dropped_remainder = test_perm >> denorm_ctx->perm_shift; + temp_addr = denorm_ctx->div_addr + dropped_remainder; + + switch (ctx->map.intlv_mode) { + case DF4p5_NPS0_24CHAN_2K_HASH: + denorm_addr |= temp_addr << 14; + break; + + case DF4p5_NPS0_24CHAN_1K_HASH: + case DF4p5_NPS1_12CHAN_2K_HASH: + denorm_addr |= temp_addr << 13; + break; + + case DF4p5_NPS1_12CHAN_1K_HASH: + case DF4p5_NPS2_6CHAN_2K_HASH: + case DF4p5_NPS1_10CHAN_2K_HASH: + denorm_addr |= temp_addr << 12; + break; + + case DF4p5_NPS2_6CHAN_1K_HASH: + case DF4p5_NPS1_10CHAN_1K_HASH: + denorm_addr |= FIELD_GET(BIT_ULL(0), temp_addr) << 9; + denorm_addr |= FIELD_GET(GENMASK_ULL(63, 1), temp_addr) << 12; + break; + + case DF4p5_NPS4_3CHAN_1K_HASH: + case DF4p5_NPS2_5CHAN_1K_HASH: + denorm_addr |= FIELD_GET(GENMASK_ULL(1, 0), temp_addr) << 8; + denorm_addr |= FIELD_GET(GENMASK_ULL(63, 2), (temp_addr)) << 12; + break; + + case DF4p5_NPS4_3CHAN_2K_HASH: + case DF4p5_NPS2_5CHAN_2K_HASH: + denorm_addr |= FIELD_GET(BIT_ULL(0), temp_addr) << 8; + denorm_addr |= FIELD_GET(GENMASK_ULL(63, 1), temp_addr) << 12; + break; + + default: + atl_debug_on_bad_intlv_mode(ctx); + return -EINVAL; + } + + switch (ctx->map.intlv_mode) { + case DF4p5_NPS0_24CHAN_1K_HASH: + denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8; + denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 9; + denorm_addr |= FIELD_GET(BIT_ULL(2), test_perm) << 12; + break; + + case DF4p5_NPS0_24CHAN_2K_HASH: + denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8; + denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 12; + denorm_addr |= FIELD_GET(BIT_ULL(2), test_perm) << 13; + break; + + case DF4p5_NPS1_12CHAN_2K_HASH: + denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8; + denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 12; + break; + + case DF4p5_NPS1_12CHAN_1K_HASH: + case DF4p5_NPS4_3CHAN_1K_HASH: + case DF4p5_NPS2_5CHAN_1K_HASH: + denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8; + denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 9; + break; + + case DF4p5_NPS2_6CHAN_1K_HASH: + case DF4p5_NPS2_6CHAN_2K_HASH: + case DF4p5_NPS4_3CHAN_2K_HASH: + case DF4p5_NPS1_10CHAN_1K_HASH: + case DF4p5_NPS1_10CHAN_2K_HASH: + case DF4p5_NPS2_5CHAN_2K_HASH: + denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8; + break; + + default: + atl_debug_on_bad_intlv_mode(ctx); + return -EINVAL; + } + + denorm_ctx->current_spa = add_base_and_hole(ctx, denorm_addr); + recalculate_hashed_bits_df4p5_np2(ctx, denorm_ctx); + + atl_debug(ctx, "Checking potential system physical address 0x%016llx\n", + denorm_ctx->current_spa); + + if (!match_logical_coh_st_fabric_id(ctx, denorm_ctx)) + continue; + + if (!match_norm_addr(ctx, denorm_ctx)) + continue; + + if (denorm_ctx->resolved_spa == INVALID_SPA || + denorm_ctx->current_spa > denorm_ctx->resolved_spa) + denorm_ctx->resolved_spa = denorm_ctx->current_spa; + } + + if (denorm_ctx->resolved_spa == INVALID_SPA) { + atl_debug(ctx, "Failed to find valid SPA for normalized address 0x%016llx\n", + ctx->ret_addr); + return -EINVAL; + } + + /* Return the resolved SPA without the base, without the MMIO hole */ + ctx->ret_addr = remove_base_and_hole(ctx, denorm_ctx->resolved_spa); + + return 0; +} + +static int init_df4p5_denorm_ctx(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx) +{ + denorm_ctx->current_spa = INVALID_SPA; + denorm_ctx->resolved_spa = INVALID_SPA; + + switch (ctx->map.intlv_mode) { + case DF4p5_NPS0_24CHAN_1K_HASH: + denorm_ctx->perm_shift = 3; + denorm_ctx->rehash_vector = BIT(8) | BIT(9) | BIT(12); + break; + + case DF4p5_NPS0_24CHAN_2K_HASH: + denorm_ctx->perm_shift = 3; + denorm_ctx->rehash_vector = BIT(8) | BIT(12) | BIT(13); + break; + + case DF4p5_NPS1_12CHAN_1K_HASH: + denorm_ctx->perm_shift = 2; + denorm_ctx->rehash_vector = BIT(8); + break; + + case DF4p5_NPS1_12CHAN_2K_HASH: + denorm_ctx->perm_shift = 2; + denorm_ctx->rehash_vector = BIT(8) | BIT(12); + break; + + case DF4p5_NPS2_6CHAN_1K_HASH: + case DF4p5_NPS2_6CHAN_2K_HASH: + case DF4p5_NPS1_10CHAN_1K_HASH: + case DF4p5_NPS1_10CHAN_2K_HASH: + denorm_ctx->perm_shift = 1; + denorm_ctx->rehash_vector = BIT(8); + break; + + case DF4p5_NPS4_3CHAN_1K_HASH: + case DF4p5_NPS2_5CHAN_1K_HASH: + denorm_ctx->perm_shift = 2; + denorm_ctx->rehash_vector = 0; + break; + + case DF4p5_NPS4_3CHAN_2K_HASH: + case DF4p5_NPS2_5CHAN_2K_HASH: + denorm_ctx->perm_shift = 1; + denorm_ctx->rehash_vector = 0; + break; + + default: + atl_debug_on_bad_intlv_mode(ctx); + return -EINVAL; + } + + denorm_ctx->base_denorm_addr = FIELD_GET(GENMASK_ULL(7, 0), ctx->ret_addr); + + switch (ctx->map.intlv_mode) { + case DF4p5_NPS0_24CHAN_1K_HASH: + case DF4p5_NPS1_12CHAN_1K_HASH: + case DF4p5_NPS2_6CHAN_1K_HASH: + case DF4p5_NPS4_3CHAN_1K_HASH: + case DF4p5_NPS1_10CHAN_1K_HASH: + case DF4p5_NPS2_5CHAN_1K_HASH: + denorm_ctx->base_denorm_addr |= FIELD_GET(GENMASK_ULL(9, 8), ctx->ret_addr) << 10; + denorm_ctx->div_addr = FIELD_GET(GENMASK_ULL(63, 10), ctx->ret_addr); + break; + + case DF4p5_NPS0_24CHAN_2K_HASH: + case DF4p5_NPS1_12CHAN_2K_HASH: + case DF4p5_NPS2_6CHAN_2K_HASH: + case DF4p5_NPS4_3CHAN_2K_HASH: + case DF4p5_NPS1_10CHAN_2K_HASH: + case DF4p5_NPS2_5CHAN_2K_HASH: + denorm_ctx->base_denorm_addr |= FIELD_GET(GENMASK_ULL(10, 8), ctx->ret_addr) << 9; + denorm_ctx->div_addr = FIELD_GET(GENMASK_ULL(63, 11), ctx->ret_addr); + break; + + default: + atl_debug_on_bad_intlv_mode(ctx); + return -EINVAL; + } + + if (ctx->map.num_intlv_chan % 3 == 0) + denorm_ctx->mod_value = 3; + else + denorm_ctx->mod_value = 5; + + denorm_ctx->coh_st_fabric_id = get_logical_coh_st_fabric_id(ctx) - get_dst_fabric_id(ctx); + + atl_debug(ctx, "Initialized df4p5_denorm_ctx:"); + atl_debug(ctx, " mod_value = %d", denorm_ctx->mod_value); + atl_debug(ctx, " perm_shift = %d", denorm_ctx->perm_shift); + atl_debug(ctx, " rehash_vector = 0x%x", denorm_ctx->rehash_vector); + atl_debug(ctx, " base_denorm_addr = 0x%016llx", denorm_ctx->base_denorm_addr); + atl_debug(ctx, " div_addr = 0x%016llx", denorm_ctx->div_addr); + atl_debug(ctx, " coh_st_fabric_id = 0x%x", denorm_ctx->coh_st_fabric_id); + + return 0; +} + +/* + * For DF 4.5, parts of the physical address can be directly pulled from the + * normalized address. The exact bits will differ between interleave modes, but + * using NPS0_24CHAN_1K_HASH as an example, the normalized address consists of + * bits [63:13] (divided by 3), bits [11:10], and bits [7:0] of the system + * physical address. + * + * In this case, there is no way to reconstruct the missing bits (bits 8, 9, + * and 12) from the normalized address. Additionally, when bits [63:13] are + * divided by 3, the remainder is dropped. Determine the proper combination of + * "lost" bits and dropped remainder by iterating through each possible + * permutation of these bits and then normalizing the generated system physical + * addresses. If the normalized address matches the address we are trying to + * translate, then we have found the correct permutation of bits. + */ +static int denorm_addr_df4p5_np2(struct addr_ctx *ctx) +{ + struct df4p5_denorm_ctx denorm_ctx; + int ret = 0; + + memset(&denorm_ctx, 0, sizeof(denorm_ctx)); + + atl_debug(ctx, "Denormalizing DF 4.5 normalized address 0x%016llx", ctx->ret_addr); + + ret = init_df4p5_denorm_ctx(ctx, &denorm_ctx); + if (ret) + return ret; + + return check_permutations(ctx, &denorm_ctx); +} + int denormalize_address(struct addr_ctx *ctx) { switch (ctx->map.intlv_mode) { @@ -710,6 +1258,19 @@ int denormalize_address(struct addr_ctx *ctx) case DF4_NPS2_5CHAN_HASH: case DF4_NPS1_10CHAN_HASH: return denorm_addr_df4_np2(ctx); + case DF4p5_NPS0_24CHAN_1K_HASH: + case DF4p5_NPS4_3CHAN_1K_HASH: + case DF4p5_NPS2_6CHAN_1K_HASH: + case DF4p5_NPS1_12CHAN_1K_HASH: + case DF4p5_NPS2_5CHAN_1K_HASH: + case DF4p5_NPS1_10CHAN_1K_HASH: + case DF4p5_NPS4_3CHAN_2K_HASH: + case DF4p5_NPS2_6CHAN_2K_HASH: + case DF4p5_NPS1_12CHAN_2K_HASH: + case DF4p5_NPS0_24CHAN_2K_HASH: + case DF4p5_NPS2_5CHAN_2K_HASH: + case DF4p5_NPS1_10CHAN_2K_HASH: + return denorm_addr_df4p5_np2(ctx); case DF3_6CHAN: return denorm_addr_df3_6chan(ctx); default: diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h index 196c1c8b578c..9de5d53d0568 100644 --- a/drivers/ras/amd/atl/internal.h +++ b/drivers/ras/amd/atl/internal.h @@ -21,6 +21,9 @@ #include "reg_fields.h" +#undef pr_fmt +#define pr_fmt(fmt) "amd_atl: " fmt + /* Maximum possible number of Coherent Stations within a single Data Fabric. */ #define MAX_COH_ST_CHANNELS 32 @@ -34,6 +37,8 @@ #define DF_DRAM_BASE_LIMIT_LSB 28 #define MI300_DRAM_LIMIT_LSB 20 +#define INVALID_SPA ~0ULL + enum df_revisions { UNKNOWN, DF2, @@ -90,6 +95,44 @@ enum intlv_modes { DF4p5_NPS1_10CHAN_2K_HASH = 0x49, }; +struct df4p5_denorm_ctx { + /* Indicates the number of "lost" bits. This will be 1, 2, or 3. */ + u8 perm_shift; + + /* A mask indicating the bits that need to be rehashed. */ + u16 rehash_vector; + + /* + * Represents the value that the high bits of the normalized address + * are divided by during normalization. This value will be 3 for + * interleave modes with a number of channels divisible by 3 or the + * value will be 5 for interleave modes with a number of channels + * divisible by 5. Power-of-two interleave modes are handled + * separately. + */ + u8 mod_value; + + /* + * Represents the bits that can be directly pulled from the normalized + * address. In each case, pass through bits [7:0] of the normalized + * address. The other bits depend on the interleave bit position which + * will be bit 10 for 1K interleave stripe cases and bit 11 for 2K + * interleave stripe cases. + */ + u64 base_denorm_addr; + + /* + * Represents the high bits of the physical address that have been + * divided by the mod_value. + */ + u64 div_addr; + + u64 current_spa; + u64 resolved_spa; + + u16 coh_st_fabric_id; +}; + struct df_flags { __u8 legacy_ficaa : 1, socket_id_shift_quirk : 1, @@ -132,6 +175,8 @@ struct df_config { /* Number of DRAM Address maps visible in a Coherent Station. */ u8 num_coh_st_maps; + u32 dram_hole_base; + /* Global flags to handle special cases. */ struct df_flags flags; }; @@ -234,6 +279,9 @@ int dehash_address(struct addr_ctx *ctx); unsigned long norm_to_sys_addr(u8 socket_id, u8 die_id, u8 coh_st_inst_id, unsigned long addr); unsigned long convert_umc_mca_addr_to_sys_addr(struct atl_err *err); +u64 add_base_and_hole(struct addr_ctx *ctx, u64 addr); +u64 remove_base_and_hole(struct addr_ctx *ctx, u64 addr); + /* * Make a gap in @data that is @num_bits long starting at @bit_num. * e.g. data = 11111111'b diff --git a/drivers/ras/amd/atl/map.c b/drivers/ras/amd/atl/map.c index 8b908e8d7495..24a05af747d5 100644 --- a/drivers/ras/amd/atl/map.c +++ b/drivers/ras/amd/atl/map.c @@ -642,6 +642,99 @@ static int get_global_map_data(struct addr_ctx *ctx) return 0; } +/* + * Verify the interleave bits are correct in the different interleaving + * settings. + * + * If @num_intlv_dies and/or @num_intlv_sockets are 1, it means the + * respective interleaving is disabled. + */ +static inline bool map_bits_valid(struct addr_ctx *ctx, u8 bit1, u8 bit2, + u8 num_intlv_dies, u8 num_intlv_sockets) +{ + if (!(ctx->map.intlv_bit_pos == bit1 || ctx->map.intlv_bit_pos == bit2)) { + pr_debug("Invalid interleave bit: %u", ctx->map.intlv_bit_pos); + return false; + } + + if (ctx->map.num_intlv_dies > num_intlv_dies) { + pr_debug("Invalid number of interleave dies: %u", ctx->map.num_intlv_dies); + return false; + } + + if (ctx->map.num_intlv_sockets > num_intlv_sockets) { + pr_debug("Invalid number of interleave sockets: %u", ctx->map.num_intlv_sockets); + return false; + } + + return true; +} + +static int validate_address_map(struct addr_ctx *ctx) +{ + switch (ctx->map.intlv_mode) { + case DF2_2CHAN_HASH: + case DF3_COD4_2CHAN_HASH: + case DF3_COD2_4CHAN_HASH: + case DF3_COD1_8CHAN_HASH: + if (!map_bits_valid(ctx, 8, 9, 1, 1)) + goto err; + break; + + case DF4_NPS4_2CHAN_HASH: + case DF4_NPS2_4CHAN_HASH: + case DF4_NPS1_8CHAN_HASH: + case DF4p5_NPS4_2CHAN_1K_HASH: + case DF4p5_NPS4_2CHAN_2K_HASH: + case DF4p5_NPS2_4CHAN_1K_HASH: + case DF4p5_NPS2_4CHAN_2K_HASH: + case DF4p5_NPS1_8CHAN_1K_HASH: + case DF4p5_NPS1_8CHAN_2K_HASH: + case DF4p5_NPS1_16CHAN_1K_HASH: + case DF4p5_NPS1_16CHAN_2K_HASH: + if (!map_bits_valid(ctx, 8, 8, 1, 2)) + goto err; + break; + + case DF4p5_NPS4_3CHAN_1K_HASH: + case DF4p5_NPS4_3CHAN_2K_HASH: + case DF4p5_NPS2_5CHAN_1K_HASH: + case DF4p5_NPS2_5CHAN_2K_HASH: + case DF4p5_NPS2_6CHAN_1K_HASH: + case DF4p5_NPS2_6CHAN_2K_HASH: + case DF4p5_NPS1_10CHAN_1K_HASH: + case DF4p5_NPS1_10CHAN_2K_HASH: + case DF4p5_NPS1_12CHAN_1K_HASH: + case DF4p5_NPS1_12CHAN_2K_HASH: + if (ctx->map.num_intlv_sockets != 1 || !map_bits_valid(ctx, 8, 0, 1, 1)) + goto err; + break; + + case DF4p5_NPS0_24CHAN_1K_HASH: + case DF4p5_NPS0_24CHAN_2K_HASH: + if (ctx->map.num_intlv_sockets < 2 || !map_bits_valid(ctx, 8, 0, 1, 2)) + goto err; + break; + + case MI3_HASH_8CHAN: + case MI3_HASH_16CHAN: + case MI3_HASH_32CHAN: + if (!map_bits_valid(ctx, 8, 8, 4, 1)) + goto err; + break; + + /* Nothing to do for modes that don't need special validation checks. */ + default: + break; + } + + return 0; + +err: + atl_debug(ctx, "Inconsistent address map"); + return -EINVAL; +} + static void dump_address_map(struct dram_addr_map *map) { u8 i; @@ -678,5 +771,9 @@ int get_address_map(struct addr_ctx *ctx) dump_address_map(&ctx->map); + ret = validate_address_map(ctx); + if (ret) + return ret; + return ret; } diff --git a/drivers/ras/amd/atl/system.c b/drivers/ras/amd/atl/system.c index 6979fa3d4fe2..e18d916d5e8b 100644 --- a/drivers/ras/amd/atl/system.c +++ b/drivers/ras/amd/atl/system.c @@ -223,6 +223,21 @@ static int determine_df_rev(void) return -EINVAL; } +static int get_dram_hole_base(void) +{ + u8 func = 0; + + if (df_cfg.rev >= DF4) + func = 7; + + if (df_indirect_read_broadcast(0, func, 0x104, &df_cfg.dram_hole_base)) + return -EINVAL; + + df_cfg.dram_hole_base &= DF_DRAM_HOLE_BASE_MASK; + + return 0; +} + static void get_num_maps(void) { switch (df_cfg.rev) { @@ -266,6 +281,7 @@ static void dump_df_cfg(void) pr_debug("num_coh_st_maps=%u", df_cfg.num_coh_st_maps); + pr_debug("dram_hole_base=0x%x", df_cfg.dram_hole_base); pr_debug("flags.legacy_ficaa=%u", df_cfg.flags.legacy_ficaa); pr_debug("flags.socket_id_shift_quirk=%u", df_cfg.flags.socket_id_shift_quirk); } @@ -273,7 +289,7 @@ static void dump_df_cfg(void) int get_df_system_info(void) { if (determine_df_rev()) { - pr_warn("amd_atl: Failed to determine DF Revision"); + pr_warn("Failed to determine DF Revision"); df_cfg.rev = UNKNOWN; return -EINVAL; } @@ -282,6 +298,9 @@ int get_df_system_info(void) get_num_maps(); + if (get_dram_hole_base()) + pr_warn("Failed to read DRAM hole base"); + dump_df_cfg(); return 0; diff --git a/drivers/ras/amd/fmpm.c b/drivers/ras/amd/fmpm.c index 271dfad05d68..90de737fbc90 100644 --- a/drivers/ras/amd/fmpm.c +++ b/drivers/ras/amd/fmpm.c @@ -56,6 +56,8 @@ #include "../debugfs.h" +#include "atl/internal.h" + #define INVALID_CPU UINT_MAX /* Validation Bits */ @@ -116,8 +118,6 @@ static struct fru_rec **fru_records; /* system physical addresses array */ static u64 *spa_entries; -#define INVALID_SPA ~0ULL - static struct dentry *fmpm_dfs_dir; static struct dentry *fmpm_dfs_entries; |