summaryrefslogtreecommitdiff
path: root/fs/bcachefs/six.h
blob: 4c268b0b83162cc314c9ff810261a6c6362e6b97 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
/* SPDX-License-Identifier: GPL-2.0 */

#ifndef _LINUX_SIX_H
#define _LINUX_SIX_H

/**
 * DOC: SIX locks overview
 *
 * Shared/intent/exclusive locks: sleepable read/write locks, like rw semaphores
 * but with an additional state: read/shared, intent, exclusive/write
 *
 * The purpose of the intent state is to allow for greater concurrency on tree
 * structures without deadlocking. In general, a read can't be upgraded to a
 * write lock without deadlocking, so an operation that updates multiple nodes
 * will have to take write locks for the full duration of the operation.
 *
 * But by adding an intent state, which is exclusive with other intent locks but
 * not with readers, we can take intent locks at thte start of the operation,
 * and then take write locks only for the actual update to each individual
 * nodes, without deadlocking.
 *
 * Example usage:
 *   six_lock_read(&foo->lock);
 *   six_unlock_read(&foo->lock);
 *
 * An intent lock must be held before taking a write lock:
 *   six_lock_intent(&foo->lock);
 *   six_lock_write(&foo->lock);
 *   six_unlock_write(&foo->lock);
 *   six_unlock_intent(&foo->lock);
 *
 * Other operations:
 *   six_trylock_read()
 *   six_trylock_intent()
 *   six_trylock_write()
 *
 *   six_lock_downgrade()	convert from intent to read
 *   six_lock_tryupgrade()	attempt to convert from read to intent, may fail
 *
 * There are also interfaces that take the lock type as an enum:
 *
 *   six_lock_type(&foo->lock, SIX_LOCK_read);
 *   six_trylock_convert(&foo->lock, SIX_LOCK_read, SIX_LOCK_intent)
 *   six_lock_type(&foo->lock, SIX_LOCK_write);
 *   six_unlock_type(&foo->lock, SIX_LOCK_write);
 *   six_unlock_type(&foo->lock, SIX_LOCK_intent);
 *
 * Lock sequence numbers - unlock(), relock():
 *
 *   Locks embed sequences numbers, which are incremented on write lock/unlock.
 *   This allows locks to be dropped and the retaken iff the state they protect
 *   hasn't changed; this makes it much easier to avoid holding locks while e.g.
 *   doing IO or allocating memory.
 *
 *   Example usage:
 *     six_lock_read(&foo->lock);
 *     u32 seq = six_lock_seq(&foo->lock);
 *     six_unlock_read(&foo->lock);
 *
 *     some_operation_that_may_block();
 *
 *     if (six_relock_read(&foo->lock, seq)) { ... }
 *
 *   If the relock operation succeeds, it is as if the lock was never unlocked.
 *
 * Reentrancy:
 *
 *   Six locks are not by themselves reentrent, but have counters for both the
 *   read and intent states that can be used to provide reentrency by an upper
 *   layer that tracks held locks. If a lock is known to already be held in the
 *   read or intent state, six_lock_increment() can be used to bump the "lock
 *   held in this state" counter, increasing the number of unlock calls that
 *   will be required to fully unlock it.
 *
 *   Example usage:
 *     six_lock_read(&foo->lock);
 *     six_lock_increment(&foo->lock, SIX_LOCK_read);
 *     six_unlock_read(&foo->lock);
 *     six_unlock_read(&foo->lock);
 *   foo->lock is now fully unlocked.
 *
 *   Since the intent state supercedes read, it's legal to increment the read
 *   counter when holding an intent lock, but not the reverse.
 *
 *   A lock may only be held once for write: six_lock_increment(.., SIX_LOCK_write)
 *   is not legal.
 *
 * should_sleep_fn:
 *
 *   There is a six_lock() variant that takes a function pointer that is called
 *   immediately prior to schedule() when blocking, and may return an error to
 *   abort.
 *
 *   One possible use for this feature is when objects being locked are part of
 *   a cache and may reused, and lock ordering is based on a property of the
 *   object that will change when the object is reused - i.e. logical key order.
 *
 *   If looking up an object in the cache may race with object reuse, and lock
 *   ordering is required to prevent deadlock, object reuse may change the
 *   correct lock order for that object and cause a deadlock. should_sleep_fn
 *   can be used to check if the object is still the object we want and avoid
 *   this deadlock.
 *
 * Wait list entry interface:
 *
 *   There is a six_lock() variant, six_lock_waiter(), that takes a pointer to a
 *   wait list entry. By embedding six_lock_waiter into another object, and by
 *   traversing lock waitlists, it is then possible for an upper layer to
 *   implement full cycle detection for deadlock avoidance.
 *
 *   should_sleep_fn should be used for invoking the cycle detector, walking the
 *   graph of held locks to check for a deadlock. The upper layer must track
 *   held locks for each thread, and each thread's held locks must be reachable
 *   from its six_lock_waiter object.
 *
 *   six_lock_waiter() will add the wait object to the waitlist re-trying taking
 *   the lock, and before calling should_sleep_fn, and the wait object will not
 *   be removed from the waitlist until either the lock has been successfully
 *   acquired, or we aborted because should_sleep_fn returned an error.
 *
 *   Also, six_lock_waiter contains a timestamp, and waiters on a waitlist will
 *   have timestamps in strictly ascending order - this is so the timestamp can
 *   be used as a cursor for lock graph traverse.
 */

#include <linux/lockdep.h>
#include <linux/sched.h>
#include <linux/types.h>

#ifdef CONFIG_SIX_LOCK_SPIN_ON_OWNER
#include <linux/osq_lock.h>
#endif

enum six_lock_type {
	SIX_LOCK_read,
	SIX_LOCK_intent,
	SIX_LOCK_write,
};

struct six_lock {
	atomic_t		state;
	u32			seq;
	unsigned		intent_lock_recurse;
	struct task_struct	*owner;
	unsigned __percpu	*readers;
#ifdef CONFIG_SIX_LOCK_SPIN_ON_OWNER
	struct optimistic_spin_queue osq;
#endif
	raw_spinlock_t		wait_lock;
	struct list_head	wait_list;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
	struct lockdep_map	dep_map;
#endif
};

struct six_lock_waiter {
	struct list_head	list;
	struct task_struct	*task;
	enum six_lock_type	lock_want;
	bool			lock_acquired;
	u64			start_time;
};

typedef int (*six_lock_should_sleep_fn)(struct six_lock *lock, void *);

void six_lock_exit(struct six_lock *lock);

enum six_lock_init_flags {
	SIX_LOCK_INIT_PCPU	= 1U << 0,
};

void __six_lock_init(struct six_lock *lock, const char *name,
		     struct lock_class_key *key, enum six_lock_init_flags flags);

/**
 * six_lock_init - initialize a six lock
 * @lock:	lock to initialize
 * @flags:	optional flags, i.e. SIX_LOCK_INIT_PCPU
 */
#define six_lock_init(lock, flags)					\
do {									\
	static struct lock_class_key __key;				\
									\
	__six_lock_init((lock), #lock, &__key, flags);			\
} while (0)

/**
 * six_lock_seq - obtain current lock sequence number
 * @lock:	six_lock to obtain sequence number for
 *
 * @lock should be held for read or intent, and not write
 *
 * By saving the lock sequence number, we can unlock @lock and then (typically
 * after some blocking operation) attempt to relock it: the relock will succeed
 * if the sequence number hasn't changed, meaning no write locks have been taken
 * and state corresponding to what @lock protects is still valid.
 */
static inline u32 six_lock_seq(const struct six_lock *lock)
{
	return lock->seq;
}

bool six_trylock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip);

/**
 * six_trylock_type - attempt to take a six lock without blocking
 * @lock:	lock to take
 * @type:	SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
 *
 * Return: true on success, false on failure.
 */
static inline bool six_trylock_type(struct six_lock *lock, enum six_lock_type type)
{
	return six_trylock_ip(lock, type, _THIS_IP_);
}

int six_lock_ip_waiter(struct six_lock *lock, enum six_lock_type type,
		       struct six_lock_waiter *wait,
		       six_lock_should_sleep_fn should_sleep_fn, void *p,
		       unsigned long ip);

/**
 * six_lock_waiter - take a lock, with full waitlist interface
 * @lock:	lock to take
 * @type:	SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
 * @wait:	pointer to wait object, which will be added to lock's waitlist
 * @should_sleep_fn: callback run after adding to waitlist, immediately prior
 *		to scheduling
 * @p:		passed through to @should_sleep_fn
 *
 * This is a convenience wrapper around six_lock_ip_waiter(), see that function
 * for full documentation.
 *
 * Return: 0 on success, or the return code from @should_sleep_fn on failure.
 */
static inline int six_lock_waiter(struct six_lock *lock, enum six_lock_type type,
				  struct six_lock_waiter *wait,
				  six_lock_should_sleep_fn should_sleep_fn, void *p)
{
	return six_lock_ip_waiter(lock, type, wait, should_sleep_fn, p, _THIS_IP_);
}

/**
 * six_lock_ip - take a six lock lock
 * @lock:	lock to take
 * @type:	SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
 * @should_sleep_fn: callback run after adding to waitlist, immediately prior
 *		to scheduling
 * @p:		passed through to @should_sleep_fn
 * @ip:		ip parameter for lockdep/lockstat, i.e. _THIS_IP_
 *
 * Return: 0 on success, or the return code from @should_sleep_fn on failure.
 */
static inline int six_lock_ip(struct six_lock *lock, enum six_lock_type type,
			      six_lock_should_sleep_fn should_sleep_fn, void *p,
			      unsigned long ip)
{
	struct six_lock_waiter wait;

	return six_lock_ip_waiter(lock, type, &wait, should_sleep_fn, p, ip);
}

/**
 * six_lock_type - take a six lock lock
 * @lock:	lock to take
 * @type:	SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
 * @should_sleep_fn: callback run after adding to waitlist, immediately prior
 *		to scheduling
 * @p:		passed through to @should_sleep_fn
 *
 * Return: 0 on success, or the return code from @should_sleep_fn on failure.
 */
static inline int six_lock_type(struct six_lock *lock, enum six_lock_type type,
				six_lock_should_sleep_fn should_sleep_fn, void *p)
{
	struct six_lock_waiter wait;

	return six_lock_ip_waiter(lock, type, &wait, should_sleep_fn, p, _THIS_IP_);
}

bool six_relock_ip(struct six_lock *lock, enum six_lock_type type,
		   unsigned seq, unsigned long ip);

/**
 * six_relock_type - attempt to re-take a lock that was held previously
 * @lock:	lock to take
 * @type:	SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
 * @seq:	lock sequence number obtained from six_lock_seq() while lock was
 *		held previously
 *
 * Return: true on success, false on failure.
 */
static inline bool six_relock_type(struct six_lock *lock, enum six_lock_type type,
				   unsigned seq)
{
	return six_relock_ip(lock, type, seq, _THIS_IP_);
}

void six_unlock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip);

/**
 * six_unlock_type - drop a six lock
 * @lock:	lock to unlock
 * @type:	SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
 *
 * When a lock is held multiple times (because six_lock_incement()) was used),
 * this decrements the 'lock held' counter by one.
 *
 * For example:
 * six_lock_read(&foo->lock);				read count 1
 * six_lock_increment(&foo->lock, SIX_LOCK_read);	read count 2
 * six_lock_unlock(&foo->lock, SIX_LOCK_read);		read count 1
 * six_lock_unlock(&foo->lock, SIX_LOCK_read);		read count 0
 */
static inline void six_unlock_type(struct six_lock *lock, enum six_lock_type type)
{
	six_unlock_ip(lock, type, _THIS_IP_);
}

#define __SIX_LOCK(type)						\
static inline bool six_trylock_ip_##type(struct six_lock *lock, unsigned long ip)\
{									\
	return six_trylock_ip(lock, SIX_LOCK_##type, ip);		\
}									\
									\
static inline bool six_trylock_##type(struct six_lock *lock)		\
{									\
	return six_trylock_ip(lock, SIX_LOCK_##type, _THIS_IP_);	\
}									\
									\
static inline int six_lock_ip_waiter_##type(struct six_lock *lock,	\
			   struct six_lock_waiter *wait,		\
			   six_lock_should_sleep_fn should_sleep_fn, void *p,\
			   unsigned long ip)				\
{									\
	return six_lock_ip_waiter(lock, SIX_LOCK_##type, wait, should_sleep_fn, p, ip);\
}									\
									\
static inline int six_lock_ip_##type(struct six_lock *lock,		\
		    six_lock_should_sleep_fn should_sleep_fn, void *p,	\
		    unsigned long ip)					\
{									\
	return six_lock_ip(lock, SIX_LOCK_##type, should_sleep_fn, p, ip);\
}									\
									\
static inline bool six_relock_ip_##type(struct six_lock *lock, u32 seq, unsigned long ip)\
{									\
	return six_relock_ip(lock, SIX_LOCK_##type, seq, ip);		\
}									\
									\
static inline bool six_relock_##type(struct six_lock *lock, u32 seq)	\
{									\
	return six_relock_ip(lock, SIX_LOCK_##type, seq, _THIS_IP_);	\
}									\
									\
static inline int six_lock_##type(struct six_lock *lock,		\
				  six_lock_should_sleep_fn fn, void *p)\
{									\
	return six_lock_ip_##type(lock, fn, p, _THIS_IP_);		\
}									\
									\
static inline void six_unlock_ip_##type(struct six_lock *lock, unsigned long ip)	\
{									\
	six_unlock_ip(lock, SIX_LOCK_##type, ip);			\
}									\
									\
static inline void six_unlock_##type(struct six_lock *lock)		\
{									\
	six_unlock_ip(lock, SIX_LOCK_##type, _THIS_IP_);		\
}

__SIX_LOCK(read)
__SIX_LOCK(intent)
__SIX_LOCK(write)
#undef __SIX_LOCK

void six_lock_downgrade(struct six_lock *);
bool six_lock_tryupgrade(struct six_lock *);
bool six_trylock_convert(struct six_lock *, enum six_lock_type,
			 enum six_lock_type);

void six_lock_increment(struct six_lock *, enum six_lock_type);

void six_lock_wakeup_all(struct six_lock *);

struct six_lock_count {
	unsigned n[3];
};

struct six_lock_count six_lock_counts(struct six_lock *);
void six_lock_readers_add(struct six_lock *, int);

#endif /* _LINUX_SIX_H */