drivers/gpu/drm/xe/xe_mmio.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374

// SPDX-License-Identifier: MIT
/*
 * Copyright © 2021-2023 Intel Corporation
 */

#include "xe_mmio.h"

#include <linux/delay.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/minmax.h>
#include <linux/pci.h>

#include <drm/drm_managed.h>
#include <drm/drm_print.h>

#include "regs/xe_bars.h"
#include "regs/xe_regs.h"
#include "xe_device.h"
#include "xe_gt.h"
#include "xe_gt_printk.h"
#include "xe_gt_sriov_vf.h"
#include "xe_macros.h"
#include "xe_sriov.h"
#include "xe_trace.h"

static void tiles_fini(void *arg)
{
	struct xe_device *xe = arg;
	struct xe_tile *tile;
	int id;

	for_each_tile(tile, xe, id)
		tile->mmio.regs = NULL;
}

int xe_mmio_probe_tiles(struct xe_device *xe)
{
	size_t tile_mmio_size = SZ_16M, tile_mmio_ext_size = xe->info.tile_mmio_ext_size;
	u8 id, tile_count = xe->info.tile_count;
	struct xe_gt *gt = xe_root_mmio_gt(xe);
	struct xe_tile *tile;
	void __iomem *regs;
	u32 mtcfg;

	if (tile_count == 1)
		goto add_mmio_ext;

	if (!xe->info.skip_mtcfg) {
		mtcfg = xe_mmio_read64_2x32(gt, XEHP_MTCFG_ADDR);
		tile_count = REG_FIELD_GET(TILE_COUNT, mtcfg) + 1;
		if (tile_count < xe->info.tile_count) {
			drm_info(&xe->drm, "tile_count: %d, reduced_tile_count %d\n",
					xe->info.tile_count, tile_count);
			xe->info.tile_count = tile_count;

			/*
			 * FIXME: Needs some work for standalone media, but should be impossible
			 * with multi-tile for now.
			 */
			xe->info.gt_count = xe->info.tile_count;
		}
	}

	regs = xe->mmio.regs;
	for_each_tile(tile, xe, id) {
		tile->mmio.size = tile_mmio_size;
		tile->mmio.regs = regs;
		regs += tile_mmio_size;
	}

add_mmio_ext:
	/*
	 * By design, there's a contiguous multi-tile MMIO space (16MB hard coded per tile).
	 * When supported, there could be an additional contiguous multi-tile MMIO extension
	 * space ON TOP of it, and hence the necessity for distinguished MMIO spaces.
	 */
	if (xe->info.has_mmio_ext) {
		regs = xe->mmio.regs + tile_mmio_size * tile_count;

		for_each_tile(tile, xe, id) {
			tile->mmio_ext.size = tile_mmio_ext_size;
			tile->mmio_ext.regs = regs;

			regs += tile_mmio_ext_size;
		}
	}

	return devm_add_action_or_reset(xe->drm.dev, tiles_fini, xe);
}

static void mmio_fini(void *arg)
{
	struct xe_device *xe = arg;

	pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs);
	xe->mmio.regs = NULL;
}

int xe_mmio_init(struct xe_device *xe)
{
	struct xe_tile *root_tile = xe_device_get_root_tile(xe);
	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
	const int mmio_bar = 0;

	/*
	 * Map the entire BAR.
	 * The first 16MB of the BAR, belong to the root tile, and include:
	 * registers (0-4MB), reserved space (4MB-8MB) and GGTT (8MB-16MB).
	 */
	xe->mmio.size = pci_resource_len(pdev, mmio_bar);
	xe->mmio.regs = pci_iomap(pdev, mmio_bar, GTTMMADR_BAR);
	if (xe->mmio.regs == NULL) {
		drm_err(&xe->drm, "failed to map registers\n");
		return -EIO;
	}

	/* Setup first tile; other tiles (if present) will be setup later. */
	root_tile->mmio.size = SZ_16M;
	root_tile->mmio.regs = xe->mmio.regs;

	return devm_add_action_or_reset(xe->drm.dev, mmio_fini, xe);
}

u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg)
{
	struct xe_tile *tile = gt_to_tile(gt);
	u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
	u8 val;

	val = readb((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr);
	trace_xe_reg_rw(gt, false, addr, val, sizeof(val));

	return val;
}

u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg)
{
	struct xe_tile *tile = gt_to_tile(gt);
	u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
	u16 val;

	val = readw((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr);
	trace_xe_reg_rw(gt, false, addr, val, sizeof(val));

	return val;
}

void xe_mmio_write32(struct xe_gt *gt, struct xe_reg reg, u32 val)
{
	struct xe_tile *tile = gt_to_tile(gt);
	u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);

	trace_xe_reg_rw(gt, true, addr, val, sizeof(val));
	writel(val, (reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr);
}

u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg)
{
	struct xe_tile *tile = gt_to_tile(gt);
	u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
	u32 val;

	if (!reg.vf && IS_SRIOV_VF(gt_to_xe(gt)))
		val = xe_gt_sriov_vf_read32(gt, reg);
	else
		val = readl((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr);

	trace_xe_reg_rw(gt, false, addr, val, sizeof(val));

	return val;
}

u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr, u32 set)
{
	u32 old, reg_val;

	old = xe_mmio_read32(gt, reg);
	reg_val = (old & ~clr) | set;
	xe_mmio_write32(gt, reg, reg_val);

	return old;
}

int xe_mmio_write32_and_verify(struct xe_gt *gt,
			       struct xe_reg reg, u32 val, u32 mask, u32 eval)
{
	u32 reg_val;

	xe_mmio_write32(gt, reg, val);
	reg_val = xe_mmio_read32(gt, reg);

	return (reg_val & mask) != eval ? -EINVAL : 0;
}

bool xe_mmio_in_range(const struct xe_gt *gt,
		      const struct xe_mmio_range *range,
		      struct xe_reg reg)
{
	u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);

	return range && addr >= range->start && addr <= range->end;
}

/**
 * xe_mmio_read64_2x32() - Read a 64-bit register as two 32-bit reads
 * @gt: MMIO target GT
 * @reg: register to read value from
 *
 * Although Intel GPUs have some 64-bit registers, the hardware officially
 * only supports GTTMMADR register reads of 32 bits or smaller.  Even if
 * a readq operation may return a reasonable value, that violation of the
 * spec shouldn't be relied upon and all 64-bit register reads should be
 * performed as two 32-bit reads of the upper and lower dwords.
 *
 * When reading registers that may be changing (such as
 * counters), a rollover of the lower dword between the two 32-bit reads
 * can be problematic.  This function attempts to ensure the upper dword has
 * stabilized before returning the 64-bit value.
 *
 * Note that because this function may re-read the register multiple times
 * while waiting for the value to stabilize it should not be used to read
 * any registers where read operations have side effects.
 *
 * Returns the value of the 64-bit register.
 */
u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg)
{
	struct xe_reg reg_udw = { .addr = reg.addr + 0x4 };
	u32 ldw, udw, oldudw, retries;

	reg.addr = xe_mmio_adjusted_addr(gt, reg.addr);
	reg_udw.addr = xe_mmio_adjusted_addr(gt, reg_udw.addr);

	/* we shouldn't adjust just one register address */
	xe_gt_assert(gt, reg_udw.addr == reg.addr + 0x4);

	oldudw = xe_mmio_read32(gt, reg_udw);
	for (retries = 5; retries; --retries) {
		ldw = xe_mmio_read32(gt, reg);
		udw = xe_mmio_read32(gt, reg_udw);

		if (udw == oldudw)
			break;

		oldudw = udw;
	}

	xe_gt_WARN(gt, retries == 0,
		   "64-bit read of %#x did not stabilize\n", reg.addr);

	return (u64)udw << 32 | ldw;
}

/**
 * xe_mmio_wait32() - Wait for a register to match the desired masked value
 * @gt: MMIO target GT
 * @reg: register to read value from
 * @mask: mask to be applied to the value read from the register
 * @val: desired value after applying the mask
 * @timeout_us: time out after this period of time. Wait logic tries to be
 * smart, applying an exponential backoff until @timeout_us is reached.
 * @out_val: if not NULL, points where to store the last unmasked value
 * @atomic: needs to be true if calling from an atomic context
 *
 * This function polls for the desired masked value and returns zero on success
 * or -ETIMEDOUT if timed out.
 *
 * Note that @timeout_us represents the minimum amount of time to wait before
 * giving up. The actual time taken by this function can be a little more than
 * @timeout_us for different reasons, specially in non-atomic contexts. Thus,
 * it is possible that this function succeeds even after @timeout_us has passed.
 */
int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
		   u32 *out_val, bool atomic)
{
	ktime_t cur = ktime_get_raw();
	const ktime_t end = ktime_add_us(cur, timeout_us);
	int ret = -ETIMEDOUT;
	s64 wait = 10;
	u32 read;

	for (;;) {
		read = xe_mmio_read32(gt, reg);
		if ((read & mask) == val) {
			ret = 0;
			break;
		}

		cur = ktime_get_raw();
		if (!ktime_before(cur, end))
			break;

		if (ktime_after(ktime_add_us(cur, wait), end))
			wait = ktime_us_delta(end, cur);

		if (atomic)
			udelay(wait);
		else
			usleep_range(wait, wait << 1);
		wait <<= 1;
	}

	if (ret != 0) {
		read = xe_mmio_read32(gt, reg);
		if ((read & mask) == val)
			ret = 0;
	}

	if (out_val)
		*out_val = read;

	return ret;
}

/**
 * xe_mmio_wait32_not() - Wait for a register to return anything other than the given masked value
 * @gt: MMIO target GT
 * @reg: register to read value from
 * @mask: mask to be applied to the value read from the register
 * @val: value to match after applying the mask
 * @timeout_us: time out after this period of time. Wait logic tries to be
 * smart, applying an exponential backoff until @timeout_us is reached.
 * @out_val: if not NULL, points where to store the last unmasked value
 * @atomic: needs to be true if calling from an atomic context
 *
 * This function polls for a masked value to change from a given value and
 * returns zero on success or -ETIMEDOUT if timed out.
 *
 * Note that @timeout_us represents the minimum amount of time to wait before
 * giving up. The actual time taken by this function can be a little more than
 * @timeout_us for different reasons, specially in non-atomic contexts. Thus,
 * it is possible that this function succeeds even after @timeout_us has passed.
 */
int xe_mmio_wait32_not(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
		       u32 *out_val, bool atomic)
{
	ktime_t cur = ktime_get_raw();
	const ktime_t end = ktime_add_us(cur, timeout_us);
	int ret = -ETIMEDOUT;
	s64 wait = 10;
	u32 read;

	for (;;) {
		read = xe_mmio_read32(gt, reg);
		if ((read & mask) != val) {
			ret = 0;
			break;
		}

		cur = ktime_get_raw();
		if (!ktime_before(cur, end))
			break;

		if (ktime_after(ktime_add_us(cur, wait), end))
			wait = ktime_us_delta(end, cur);

		if (atomic)
			udelay(wait);
		else
			usleep_range(wait, wait << 1);
		wait <<= 1;
	}

	if (ret != 0) {
		read = xe_mmio_read32(gt, reg);
		if ((read & mask) != val)
			ret = 0;
	}

	if (out_val)
		*out_val = read;

	return ret;
}