1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
|
/* SPDX-License-Identifier: GPL-2.0 */
/*
* AMD Address Translation Library
*
* internal.h : Helper functions and common defines
*
* Copyright (c) 2023, Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Author: Yazen Ghannam <Yazen.Ghannam@amd.com>
*/
#ifndef __AMD_ATL_INTERNAL_H__
#define __AMD_ATL_INTERNAL_H__
#include <linux/bitfield.h>
#include <linux/bitops.h>
#include <linux/ras.h>
#include <asm/amd_nb.h>
#include "reg_fields.h"
#undef pr_fmt
#define pr_fmt(fmt) "amd_atl: " fmt
/* Maximum possible number of Coherent Stations within a single Data Fabric. */
#define MAX_COH_ST_CHANNELS 32
/* PCI ID for Zen4 Server DF Function 0. */
#define DF_FUNC0_ID_ZEN4_SERVER 0x14AD1022
/* PCI IDs for MI300 DF Function 0. */
#define DF_FUNC0_ID_MI300 0x15281022
/* Shift needed for adjusting register values to true values. */
#define DF_DRAM_BASE_LIMIT_LSB 28
#define MI300_DRAM_LIMIT_LSB 20
#define INVALID_SPA ~0ULL
enum df_revisions {
UNKNOWN,
DF2,
DF3,
DF3p5,
DF4,
DF4p5,
};
/* These are mapped 1:1 to the hardware values. Special cases are set at > 0x20. */
enum intlv_modes {
NONE = 0x00,
NOHASH_2CHAN = 0x01,
NOHASH_4CHAN = 0x03,
NOHASH_8CHAN = 0x05,
DF3_6CHAN = 0x06,
NOHASH_16CHAN = 0x07,
NOHASH_32CHAN = 0x08,
DF3_COD4_2CHAN_HASH = 0x0C,
DF3_COD2_4CHAN_HASH = 0x0D,
DF3_COD1_8CHAN_HASH = 0x0E,
DF4_NPS4_2CHAN_HASH = 0x10,
DF4_NPS2_4CHAN_HASH = 0x11,
DF4_NPS1_8CHAN_HASH = 0x12,
DF4_NPS4_3CHAN_HASH = 0x13,
DF4_NPS2_6CHAN_HASH = 0x14,
DF4_NPS1_12CHAN_HASH = 0x15,
DF4_NPS2_5CHAN_HASH = 0x16,
DF4_NPS1_10CHAN_HASH = 0x17,
MI3_HASH_8CHAN = 0x18,
MI3_HASH_16CHAN = 0x19,
MI3_HASH_32CHAN = 0x1A,
DF2_2CHAN_HASH = 0x21,
/* DF4.5 modes are all IntLvNumChan + 0x20 */
DF4p5_NPS1_16CHAN_1K_HASH = 0x2C,
DF4p5_NPS0_24CHAN_1K_HASH = 0x2E,
DF4p5_NPS4_2CHAN_1K_HASH = 0x30,
DF4p5_NPS2_4CHAN_1K_HASH = 0x31,
DF4p5_NPS1_8CHAN_1K_HASH = 0x32,
DF4p5_NPS4_3CHAN_1K_HASH = 0x33,
DF4p5_NPS2_6CHAN_1K_HASH = 0x34,
DF4p5_NPS1_12CHAN_1K_HASH = 0x35,
DF4p5_NPS2_5CHAN_1K_HASH = 0x36,
DF4p5_NPS1_10CHAN_1K_HASH = 0x37,
DF4p5_NPS4_2CHAN_2K_HASH = 0x40,
DF4p5_NPS2_4CHAN_2K_HASH = 0x41,
DF4p5_NPS1_8CHAN_2K_HASH = 0x42,
DF4p5_NPS1_16CHAN_2K_HASH = 0x43,
DF4p5_NPS4_3CHAN_2K_HASH = 0x44,
DF4p5_NPS2_6CHAN_2K_HASH = 0x45,
DF4p5_NPS1_12CHAN_2K_HASH = 0x46,
DF4p5_NPS0_24CHAN_2K_HASH = 0x47,
DF4p5_NPS2_5CHAN_2K_HASH = 0x48,
DF4p5_NPS1_10CHAN_2K_HASH = 0x49,
};
struct df4p5_denorm_ctx {
/* Indicates the number of "lost" bits. This will be 1, 2, or 3. */
u8 perm_shift;
/* A mask indicating the bits that need to be rehashed. */
u16 rehash_vector;
/*
* Represents the value that the high bits of the normalized address
* are divided by during normalization. This value will be 3 for
* interleave modes with a number of channels divisible by 3 or the
* value will be 5 for interleave modes with a number of channels
* divisible by 5. Power-of-two interleave modes are handled
* separately.
*/
u8 mod_value;
/*
* Represents the bits that can be directly pulled from the normalized
* address. In each case, pass through bits [7:0] of the normalized
* address. The other bits depend on the interleave bit position which
* will be bit 10 for 1K interleave stripe cases and bit 11 for 2K
* interleave stripe cases.
*/
u64 base_denorm_addr;
/*
* Represents the high bits of the physical address that have been
* divided by the mod_value.
*/
u64 div_addr;
u64 current_spa;
u64 resolved_spa;
u16 coh_st_fabric_id;
};
struct df_flags {
__u8 legacy_ficaa : 1,
socket_id_shift_quirk : 1,
heterogeneous : 1,
__reserved_0 : 5;
};
struct df_config {
enum df_revisions rev;
/*
* These masks operate on the 16-bit Coherent Station IDs,
* e.g. Instance, Fabric, Destination, etc.
*/
u16 component_id_mask;
u16 die_id_mask;
u16 node_id_mask;
u16 socket_id_mask;
/*
* Least-significant bit of Node ID portion of the
* system-wide Coherent Station Fabric ID.
*/
u8 node_id_shift;
/*
* Least-significant bit of Die portion of the Node ID.
* Adjusted to include the Node ID shift in order to apply
* to the Coherent Station Fabric ID.
*/
u8 die_id_shift;
/*
* Least-significant bit of Socket portion of the Node ID.
* Adjusted to include the Node ID shift in order to apply
* to the Coherent Station Fabric ID.
*/
u8 socket_id_shift;
/* Number of DRAM Address maps visible in a Coherent Station. */
u8 num_coh_st_maps;
u32 dram_hole_base;
/* Global flags to handle special cases. */
struct df_flags flags;
};
extern struct df_config df_cfg;
struct dram_addr_map {
/*
* Each DRAM Address Map can operate independently
* in different interleaving modes.
*/
enum intlv_modes intlv_mode;
/* System-wide number for this address map. */
u8 num;
/* Raw register values */
u32 base;
u32 limit;
u32 ctl;
u32 intlv;
/*
* Logical to Physical Coherent Station Remapping array
*
* Index: Logical Coherent Station Instance ID
* Value: Physical Coherent Station Instance ID
*
* phys_coh_st_inst_id = remap_array[log_coh_st_inst_id]
*/
u8 remap_array[MAX_COH_ST_CHANNELS];
/*
* Number of bits covering DRAM Address map 0
* when interleaving is non-power-of-2.
*
* Used only for DF3_6CHAN.
*/
u8 np2_bits;
/* Position of the 'interleave bit'. */
u8 intlv_bit_pos;
/* Number of channels interleaved in this map. */
u8 num_intlv_chan;
/* Number of dies interleaved in this map. */
u8 num_intlv_dies;
/* Number of sockets interleaved in this map. */
u8 num_intlv_sockets;
/*
* Total number of channels interleaved accounting
* for die and socket interleaving.
*/
u8 total_intlv_chan;
/* Total bits needed to cover 'total_intlv_chan'. */
u8 total_intlv_bits;
};
/* Original input values cached for debug printing. */
struct addr_ctx_inputs {
u64 norm_addr;
u8 socket_id;
u8 die_id;
u8 coh_st_inst_id;
};
struct addr_ctx {
u64 ret_addr;
struct addr_ctx_inputs inputs;
struct dram_addr_map map;
/* AMD Node ID calculated from Socket and Die IDs. */
u8 node_id;
/*
* Coherent Station Instance ID
* Local ID used within a 'node'.
*/
u16 inst_id;
/*
* Coherent Station Fabric ID
* System-wide ID that includes 'node' bits.
*/
u16 coh_st_fabric_id;
};
int df_indirect_read_instance(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo);
int df_indirect_read_broadcast(u16 node, u8 func, u16 reg, u32 *lo);
int get_df_system_info(void);
int determine_node_id(struct addr_ctx *ctx, u8 socket_num, u8 die_num);
int get_umc_info_mi300(void);
int get_address_map(struct addr_ctx *ctx);
int denormalize_address(struct addr_ctx *ctx);
int dehash_address(struct addr_ctx *ctx);
unsigned long norm_to_sys_addr(u8 socket_id, u8 die_id, u8 coh_st_inst_id, unsigned long addr);
unsigned long convert_umc_mca_addr_to_sys_addr(struct atl_err *err);
u64 add_base_and_hole(struct addr_ctx *ctx, u64 addr);
u64 remove_base_and_hole(struct addr_ctx *ctx, u64 addr);
/*
* Make a gap in @data that is @num_bits long starting at @bit_num.
* e.g. data = 11111111'b
* bit_num = 3
* num_bits = 2
* result = 1111100111'b
*/
static inline u64 expand_bits(u8 bit_num, u8 num_bits, u64 data)
{
u64 temp1, temp2;
if (!num_bits)
return data;
if (!bit_num) {
WARN_ON_ONCE(num_bits >= BITS_PER_LONG);
return data << num_bits;
}
WARN_ON_ONCE(bit_num >= BITS_PER_LONG);
temp1 = data & GENMASK_ULL(bit_num - 1, 0);
temp2 = data & GENMASK_ULL(63, bit_num);
temp2 <<= num_bits;
return temp1 | temp2;
}
/*
* Remove bits in @data between @low_bit and @high_bit inclusive.
* e.g. data = XXXYYZZZ'b
* low_bit = 3
* high_bit = 4
* result = XXXZZZ'b
*/
static inline u64 remove_bits(u8 low_bit, u8 high_bit, u64 data)
{
u64 temp1, temp2;
WARN_ON_ONCE(high_bit >= BITS_PER_LONG);
WARN_ON_ONCE(low_bit >= BITS_PER_LONG);
WARN_ON_ONCE(low_bit > high_bit);
if (!low_bit)
return data >> (high_bit++);
temp1 = GENMASK_ULL(low_bit - 1, 0) & data;
temp2 = GENMASK_ULL(63, high_bit + 1) & data;
temp2 >>= high_bit - low_bit + 1;
return temp1 | temp2;
}
#define atl_debug(ctx, fmt, arg...) \
pr_debug("socket_id=%u die_id=%u coh_st_inst_id=%u norm_addr=0x%016llx: " fmt,\
(ctx)->inputs.socket_id, (ctx)->inputs.die_id,\
(ctx)->inputs.coh_st_inst_id, (ctx)->inputs.norm_addr, ##arg)
static inline void atl_debug_on_bad_df_rev(void)
{
pr_debug("Unrecognized DF rev: %u", df_cfg.rev);
}
static inline void atl_debug_on_bad_intlv_mode(struct addr_ctx *ctx)
{
atl_debug(ctx, "Unrecognized interleave mode: %u", ctx->map.intlv_mode);
}
#endif /* __AMD_ATL_INTERNAL_H__ */
|