summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2020-11-11 12:33:12 -0500
committerKent Overstreet <kent.overstreet@linux.dev>2023-10-22 17:08:46 -0400
commiteb8e6e9ccbb4ba37c04a7cff032975b4df7d63c7 (patch)
tree946041b0c3ce44e48e4968840527b11e2d1c25d8
parent6d9378f3dcd7b91effdc4ffe1da1a2e8987e9f1e (diff)
bcachefs: Deadlock prevention for ei_pagecache_lock
In the dio write path, when get_user_pages() invokes the fault handler we have a recursive locking situation - we have to handle the lock ordering ourselves or we have a deadlock: this patch addresses that by checking for locking ordering violations and doing the unlock/relock dance if necessary. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--fs/bcachefs/fs-io.c68
-rw-r--r--fs/bcachefs/fs.c5
-rw-r--r--fs/bcachefs/fs.h1
3 files changed, 72 insertions, 2 deletions
diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c
index 658d19c04b99..1afdd775ffb3 100644
--- a/fs/bcachefs/fs-io.c
+++ b/fs/bcachefs/fs-io.c
@@ -44,6 +44,22 @@ static inline bool bio_full(struct bio *bio, unsigned len)
return false;
}
+static inline struct address_space *faults_disabled_mapping(void)
+{
+ return (void *) (((unsigned long) current->faults_disabled_mapping) & ~1UL);
+}
+
+static inline void set_fdm_dropped_locks(void)
+{
+ current->faults_disabled_mapping =
+ (void *) (((unsigned long) current->faults_disabled_mapping)|1);
+}
+
+static inline bool fdm_dropped_locks(void)
+{
+ return ((unsigned long) current->faults_disabled_mapping) & 1;
+}
+
struct quota_res {
u64 sectors;
};
@@ -501,10 +517,35 @@ static void bch2_set_page_dirty(struct bch_fs *c,
vm_fault_t bch2_page_fault(struct vm_fault *vmf)
{
struct file *file = vmf->vma->vm_file;
+ struct address_space *mapping = file->f_mapping;
+ struct address_space *fdm = faults_disabled_mapping();
struct bch_inode_info *inode = file_bch_inode(file);
int ret;
+ if (fdm == mapping)
+ return VM_FAULT_SIGBUS;
+
+ /* Lock ordering: */
+ if (fdm > mapping) {
+ struct bch_inode_info *fdm_host = to_bch_ei(fdm->host);
+
+ if (bch2_pagecache_add_tryget(&inode->ei_pagecache_lock))
+ goto got_lock;
+
+ bch2_pagecache_block_put(&fdm_host->ei_pagecache_lock);
+
+ bch2_pagecache_add_get(&inode->ei_pagecache_lock);
+ bch2_pagecache_add_put(&inode->ei_pagecache_lock);
+
+ bch2_pagecache_block_get(&fdm_host->ei_pagecache_lock);
+
+ /* Signal that lock has been dropped: */
+ set_fdm_dropped_locks();
+ return VM_FAULT_SIGBUS;
+ }
+
bch2_pagecache_add_get(&inode->ei_pagecache_lock);
+got_lock:
ret = filemap_fault(vmf);
bch2_pagecache_add_put(&inode->ei_pagecache_lock);
@@ -1765,14 +1806,16 @@ static long bch2_dio_write_loop(struct dio_write *dio)
struct bio *bio = &dio->op.wbio.bio;
struct bvec_iter_all iter;
struct bio_vec *bv;
- unsigned unaligned;
- bool sync = dio->sync;
+ unsigned unaligned, iter_count;
+ bool sync = dio->sync, dropped_locks;
long ret;
if (dio->loop)
goto loop;
while (1) {
+ iter_count = dio->iter.count;
+
if (kthread)
kthread_use_mm(dio->mm);
BUG_ON(current->faults_disabled_mapping);
@@ -1780,13 +1823,34 @@ static long bch2_dio_write_loop(struct dio_write *dio)
ret = bio_iov_iter_get_pages(bio, &dio->iter);
+ dropped_locks = fdm_dropped_locks();
+
current->faults_disabled_mapping = NULL;
if (kthread)
kthread_unuse_mm(dio->mm);
+ /*
+ * If the fault handler returned an error but also signalled
+ * that it dropped & retook ei_pagecache_lock, we just need to
+ * re-shoot down the page cache and retry:
+ */
+ if (dropped_locks && ret)
+ ret = 0;
+
if (unlikely(ret < 0))
goto err;
+ if (unlikely(dropped_locks)) {
+ ret = write_invalidate_inode_pages_range(mapping,
+ req->ki_pos,
+ req->ki_pos + iter_count - 1);
+ if (unlikely(ret))
+ goto err;
+
+ if (!bio->bi_iter.bi_size)
+ continue;
+ }
+
unaligned = bio->bi_iter.bi_size & (block_bytes(c) - 1);
bio->bi_iter.bi_size -= unaligned;
iov_iter_revert(&dio->iter, unaligned);
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index 3e3ab4e53f33..231a5433577f 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -93,6 +93,11 @@ void bch2_pagecache_add_put(struct pagecache_lock *lock)
__pagecache_lock_put(lock, 1);
}
+bool bch2_pagecache_add_tryget(struct pagecache_lock *lock)
+{
+ return __pagecache_lock_tryget(lock, 1);
+}
+
void bch2_pagecache_add_get(struct pagecache_lock *lock)
{
__pagecache_lock_get(lock, 1);
diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h
index b3a2993dd9bc..7c095b856b05 100644
--- a/fs/bcachefs/fs.h
+++ b/fs/bcachefs/fs.h
@@ -26,6 +26,7 @@ static inline void pagecache_lock_init(struct pagecache_lock *lock)
}
void bch2_pagecache_add_put(struct pagecache_lock *);
+bool bch2_pagecache_add_tryget(struct pagecache_lock *);
void bch2_pagecache_add_get(struct pagecache_lock *);
void bch2_pagecache_block_put(struct pagecache_lock *);
void bch2_pagecache_block_get(struct pagecache_lock *);