From 85c2a74fabadfc561b75fbd7decc6bcbfe873d57 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Tue, 28 Apr 2009 23:38:46 +0900 Subject: nilfs2: fix possible recovery failure due to block creation without writer Some function calls in nilfs_prepare_segment_for_recovery() may fail because they can create blocks on meta data files without configuring a writable FS-instance. Concretely, nilfs_mdt_create_block() routine of meta data files will fail in that case. This fixes the problem by temporarily attaching a writable FS-instace during the function is called. Signed-off-by: Ryusuke Konishi --- fs/nilfs2/recovery.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index 4fc081e47d70..57afa9d24061 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -407,6 +407,7 @@ void nilfs_dispose_segment_list(struct list_head *head) } static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, + struct nilfs_sb_info *sbi, struct nilfs_recovery_info *ri) { struct list_head *head = &ri->ri_used_segments; @@ -421,6 +422,7 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, segnum[2] = ri->ri_segnum; segnum[3] = ri->ri_nextnum; + nilfs_attach_writer(nilfs, sbi); /* * Releasing the next segment of the latest super root. * The next segment is invalidated by this recovery. @@ -459,10 +461,10 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, nilfs->ns_pseg_offset = 0; nilfs->ns_seg_seq = ri->ri_seq + 2; nilfs->ns_nextnum = nilfs->ns_segnum = segnum[0]; - return 0; failed: /* No need to recover sufile because it will be destroyed on error */ + nilfs_detach_writer(nilfs, sbi); return err; } @@ -728,7 +730,7 @@ int nilfs_recover_logical_segments(struct the_nilfs *nilfs, goto failed; if (ri->ri_need_recovery == NILFS_RECOVERY_ROLLFORWARD_DONE) { - err = nilfs_prepare_segment_for_recovery(nilfs, ri); + err = nilfs_prepare_segment_for_recovery(nilfs, sbi, ri); if (unlikely(err)) { printk(KERN_ERR "NILFS: Error preparing segments for " "recovery.\n"); -- cgit v1.2.3-58-ga151 From 201913ed746c7724a40d33ee5a0b6a1fd2ef3193 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Tue, 28 Apr 2009 21:04:59 +0900 Subject: nilfs2: fix circular locking dependency of writer mutex This fixes the following circular locking dependency problem: ======================================================= [ INFO: possible circular locking dependency detected ] 2.6.30-rc3 #5 ------------------------------------------------------- segctord/3895 is trying to acquire lock: (&nilfs->ns_writer_mutex){+.+...}, at: [] nilfs_mdt_get_block+0x89/0x20f [nilfs2] but task is already holding lock: (&bmap->b_sem){++++..}, at: [] nilfs_bmap_propagate+0x14/0x2e [nilfs2] which lock already depends on the new lock. The bugfix is done by replacing call sites of nilfs_get_writer() which are never called from read-only context with direct dereferencing of pointer to a writable FS-instance. Signed-off-by: Ryusuke Konishi --- fs/nilfs2/ioctl.c | 8 +++++--- fs/nilfs2/mdt.c | 13 +++++++------ 2 files changed, 12 insertions(+), 9 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 108d281ebca5..be387c6b2d46 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -516,14 +516,16 @@ static ssize_t nilfs_ioctl_do_free_segments(struct the_nilfs *nilfs, __u64 *posp, int flags, void *buf, size_t size, size_t nmembs) { - struct nilfs_sb_info *sbi = nilfs_get_writer(nilfs); + struct nilfs_sb_info *sbi = nilfs->ns_writer; int ret; - if (unlikely(!sbi)) + if (unlikely(!sbi)) { + /* never happens because called for a writable mount */ + WARN_ON(1); return -EROFS; + } ret = nilfs_segctor_add_segments_to_be_freed( NILFS_SC(sbi), buf, nmembs); - nilfs_put_writer(nilfs); return (ret < 0) ? ret : nmembs; } diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index 47dd815433fd..e1c6777931b7 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -77,19 +77,22 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block, void *)) { struct the_nilfs *nilfs = NILFS_MDT(inode)->mi_nilfs; - struct nilfs_sb_info *writer = NULL; struct super_block *sb = inode->i_sb; struct nilfs_transaction_info ti; struct buffer_head *bh; int err; if (!sb) { - writer = nilfs_get_writer(nilfs); - if (!writer) { + /* + * Make sure this function is not called from any + * read-only context. + */ + if (!nilfs->ns_writer) { + WARN_ON(1); err = -EROFS; goto out; } - sb = writer->s_super; + sb = nilfs->ns_writer->s_super; } nilfs_transaction_begin(sb, &ti, 0); @@ -127,8 +130,6 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block, err = nilfs_transaction_commit(sb); else nilfs_transaction_abort(sb); - if (writer) - nilfs_put_writer(nilfs); out: return err; } -- cgit v1.2.3-58-ga151 From 843382370ec614768ac13582405f93635cf3637c Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Tue, 5 May 2009 21:52:06 +0900 Subject: nilfs2: ensure to clear dirty state when deleting metadata file block This would fix the following failure during GC: nilfs_cpfile_delete_checkpoints: cannot delete block NILFS: GC failed during preparation: cannot delete checkpoints: err=-2 The problem was caused by a break in state consistency between page cache and btree; the above block was removed from the btree but the page buffering the block was remaining in the page cache in dirty state. This resolves the inconsistency by ensuring to clear dirty state of the page buffering the deleted block. Reported-by: David Arendt Signed-off-by: Ryusuke Konishi --- fs/nilfs2/mdt.c | 2 +- fs/nilfs2/page.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index e1c6777931b7..bb78745a0e30 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -300,7 +300,7 @@ int nilfs_mdt_delete_block(struct inode *inode, unsigned long block) int err; err = nilfs_bmap_delete(ii->i_bmap, block); - if (likely(!err)) { + if (!err || err == -ENOENT) { nilfs_mdt_mark_dirty(inode); nilfs_mdt_forget_block(inode, block); } diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 1bfbba9c0e9a..a2692bbc7b50 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -128,7 +128,8 @@ void nilfs_forget_buffer(struct buffer_head *bh) lock_buffer(bh); clear_buffer_nilfs_volatile(bh); - if (test_clear_buffer_dirty(bh) && nilfs_page_buffers_clean(page)) + clear_buffer_dirty(bh); + if (nilfs_page_buffers_clean(page)) __nilfs_clear_page_dirty(page); clear_buffer_uptodate(bh); -- cgit v1.2.3-58-ga151 From 47eb6b9c8fa963c9f49967ad1d9d7ec947d15b68 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Thu, 30 Apr 2009 02:21:00 +0900 Subject: nilfs2: fix possible circular locking for get information ioctls This is one of two patches which are to correct possible circular locking between mm->mmap_sem and nilfs->ns_segctor_sem. The problem was detected by lockdep check as follows: ======================================================= [ INFO: possible circular locking dependency detected ] 2.6.30-rc3-nilfs-00002-g3552613 #6 ------------------------------------------------------- mmap/5418 is trying to acquire lock: (&nilfs->ns_segctor_sem){++++.+}, at: [] nilfs_transaction_begin+0xb6/0x10c [nilfs2] but task is already holding lock: (&mm->mmap_sem){++++++}, at: [] do_page_fault+0x1d8/0x30a which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&mm->mmap_sem){++++++}: [] __lock_acquire+0x1066/0x13b0 [] lock_acquire+0xba/0xdd [] might_fault+0x68/0x88 [] copy_to_user+0x2c/0xfc [] nilfs_ioctl_wrap_copy+0x103/0x160 [nilfs2] [] nilfs_ioctl+0x30a/0x3b0 [nilfs2] [] vfs_ioctl+0x22/0x69 [] do_vfs_ioctl+0x460/0x499 [] sys_ioctl+0x40/0x5a [] sysenter_do_call+0x12/0x38 [] 0xffffffff -> #0 (&nilfs->ns_segctor_sem){++++.+}: [] __lock_acquire+0xdcc/0x13b0 [] lock_acquire+0xba/0xdd [] down_read+0x2a/0x3e [] nilfs_transaction_begin+0xb6/0x10c [nilfs2] [] nilfs_page_mkwrite+0xe7/0x154 [nilfs2] [] __do_fault+0x165/0x376 [] handle_mm_fault+0x287/0x5d1 [] do_page_fault+0x2fb/0x30a [] error_code+0x72/0x78 [] 0xffffffff other info that might help us debug this: 1 lock held by mmap/5418: #0: (&mm->mmap_sem){++++++}, at: [] do_page_fault+0x1d8/0x30a stack backtrace: Pid: 5418, comm: mmap Not tainted 2.6.30-rc3-nilfs-00002-g3552613 #6 Call Trace: [] ? printk+0xf/0x12 [] print_circular_bug_tail+0xaa/0xb5 [] __lock_acquire+0xdcc/0x13b0 [] ? nilfs_sufile_get_stat+0x1e/0x105 [nilfs2] [] ? up_read+0x16/0x2c [] ? nilfs_sufile_get_stat+0xfa/0x105 [nilfs2] [] lock_acquire+0xba/0xdd [] ? nilfs_transaction_begin+0xb6/0x10c [nilfs2] [] down_read+0x2a/0x3e [] ? nilfs_transaction_begin+0xb6/0x10c [nilfs2] [] nilfs_transaction_begin+0xb6/0x10c [nilfs2] [] nilfs_page_mkwrite+0xe7/0x154 [nilfs2] [] __do_fault+0x165/0x376 [] handle_mm_fault+0x287/0x5d1 [] ? do_page_fault+0x1d8/0x30a [] ? down_read_trylock+0x39/0x43 [] do_page_fault+0x2fb/0x30a [] ? do_page_fault+0x0/0x30a [] error_code+0x72/0x78 [] ? do_page_fault+0x0/0x30a This makes the lock granularity of nilfs->ns_segctor_sem finer than that of the mmap semaphore for ioctl commands except nilfs_clean_segments(). The successive patch ("nilfs2: fix lock order reversal in nilfs_clean_segments ioctl") is required to fully resolve the problem. Signed-off-by: Ryusuke Konishi --- fs/nilfs2/ioctl.c | 100 +++++++++++++++++++++--------------------------------- 1 file changed, 38 insertions(+), 62 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index be387c6b2d46..e3c693d37d69 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -147,29 +147,12 @@ static ssize_t nilfs_ioctl_do_get_cpinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, void *buf, size_t size, size_t nmembs) { - return nilfs_cpfile_get_cpinfo(nilfs->ns_cpfile, posp, flags, buf, - nmembs); -} - -static int nilfs_ioctl_get_cpinfo(struct inode *inode, struct file *filp, - unsigned int cmd, void __user *argp) -{ - struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; - struct nilfs_argv argv; int ret; - if (copy_from_user(&argv, argp, sizeof(argv))) - return -EFAULT; - down_read(&nilfs->ns_segctor_sem); - ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), - nilfs_ioctl_do_get_cpinfo); + ret = nilfs_cpfile_get_cpinfo(nilfs->ns_cpfile, posp, flags, buf, + nmembs); up_read(&nilfs->ns_segctor_sem); - if (ret < 0) - return ret; - - if (copy_to_user(argp, &argv, sizeof(argv))) - ret = -EFAULT; return ret; } @@ -195,28 +178,11 @@ static ssize_t nilfs_ioctl_do_get_suinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, void *buf, size_t size, size_t nmembs) { - return nilfs_sufile_get_suinfo(nilfs->ns_sufile, *posp, buf, nmembs); -} - -static int nilfs_ioctl_get_suinfo(struct inode *inode, struct file *filp, - unsigned int cmd, void __user *argp) -{ - struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; - struct nilfs_argv argv; int ret; - if (copy_from_user(&argv, argp, sizeof(argv))) - return -EFAULT; - down_read(&nilfs->ns_segctor_sem); - ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), - nilfs_ioctl_do_get_suinfo); + ret = nilfs_sufile_get_suinfo(nilfs->ns_sufile, *posp, buf, nmembs); up_read(&nilfs->ns_segctor_sem); - if (ret < 0) - return ret; - - if (copy_to_user(argp, &argv, sizeof(argv))) - ret = -EFAULT; return ret; } @@ -242,28 +208,11 @@ static ssize_t nilfs_ioctl_do_get_vinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, void *buf, size_t size, size_t nmembs) { - return nilfs_dat_get_vinfo(nilfs_dat_inode(nilfs), buf, nmembs); -} - -static int nilfs_ioctl_get_vinfo(struct inode *inode, struct file *filp, - unsigned int cmd, void __user *argp) -{ - struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; - struct nilfs_argv argv; int ret; - if (copy_from_user(&argv, argp, sizeof(argv))) - return -EFAULT; - down_read(&nilfs->ns_segctor_sem); - ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), - nilfs_ioctl_do_get_vinfo); + ret = nilfs_dat_get_vinfo(nilfs_dat_inode(nilfs), buf, nmembs); up_read(&nilfs->ns_segctor_sem); - if (ret < 0) - return ret; - - if (copy_to_user(argp, &argv, sizeof(argv))) - ret = -EFAULT; return ret; } @@ -276,17 +225,21 @@ nilfs_ioctl_do_get_bdescs(struct the_nilfs *nilfs, __u64 *posp, int flags, struct nilfs_bdesc *bdescs = buf; int ret, i; + down_read(&nilfs->ns_segctor_sem); for (i = 0; i < nmembs; i++) { ret = nilfs_bmap_lookup_at_level(bmap, bdescs[i].bd_offset, bdescs[i].bd_level + 1, &bdescs[i].bd_blocknr); if (ret < 0) { - if (ret != -ENOENT) + if (ret != -ENOENT) { + up_read(&nilfs->ns_segctor_sem); return ret; + } bdescs[i].bd_blocknr = 0; } } + up_read(&nilfs->ns_segctor_sem); return nmembs; } @@ -300,10 +253,8 @@ static int nilfs_ioctl_get_bdescs(struct inode *inode, struct file *filp, if (copy_from_user(&argv, argp, sizeof(argv))) return -EFAULT; - down_read(&nilfs->ns_segctor_sem); ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), nilfs_ioctl_do_get_bdescs); - up_read(&nilfs->ns_segctor_sem); if (ret < 0) return ret; @@ -623,6 +574,29 @@ static int nilfs_ioctl_sync(struct inode *inode, struct file *filp, return 0; } +static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp, + unsigned int cmd, void __user *argp, + ssize_t (*dofunc)(struct the_nilfs *, + __u64 *, int, + void *, size_t, size_t)) + +{ + struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; + struct nilfs_argv argv; + int ret; + + if (copy_from_user(&argv, argp, sizeof(argv))) + return -EFAULT; + + ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), dofunc); + if (ret < 0) + return ret; + + if (copy_to_user(argp, &argv, sizeof(argv))) + ret = -EFAULT; + return ret; +} + long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = filp->f_dentry->d_inode; @@ -634,16 +608,18 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) case NILFS_IOCTL_DELETE_CHECKPOINT: return nilfs_ioctl_delete_checkpoint(inode, filp, cmd, argp); case NILFS_IOCTL_GET_CPINFO: - return nilfs_ioctl_get_cpinfo(inode, filp, cmd, argp); + return nilfs_ioctl_get_info(inode, filp, cmd, argp, + nilfs_ioctl_do_get_cpinfo); case NILFS_IOCTL_GET_CPSTAT: return nilfs_ioctl_get_cpstat(inode, filp, cmd, argp); case NILFS_IOCTL_GET_SUINFO: - return nilfs_ioctl_get_suinfo(inode, filp, cmd, argp); + return nilfs_ioctl_get_info(inode, filp, cmd, argp, + nilfs_ioctl_do_get_suinfo); case NILFS_IOCTL_GET_SUSTAT: return nilfs_ioctl_get_sustat(inode, filp, cmd, argp); case NILFS_IOCTL_GET_VINFO: - /* XXX: rename to ??? */ - return nilfs_ioctl_get_vinfo(inode, filp, cmd, argp); + return nilfs_ioctl_get_info(inode, filp, cmd, argp, + nilfs_ioctl_do_get_vinfo); case NILFS_IOCTL_GET_BDESCS: return nilfs_ioctl_get_bdescs(inode, filp, cmd, argp); case NILFS_IOCTL_CLEAN_SEGMENTS: -- cgit v1.2.3-58-ga151 From 4f6b828837b4e3836f2c9ac2f0eab9773b6c1327 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sun, 10 May 2009 22:41:43 +0900 Subject: nilfs2: fix lock order reversal in nilfs_clean_segments ioctl This is a companion patch to ("nilfs2: fix possible circular locking for get information ioctls"). This corrects lock order reversal between mm->mmap_sem and nilfs->ns_segctor_sem in nilfs_clean_segments() which was detected by lockdep check: ======================================================= [ INFO: possible circular locking dependency detected ] 2.6.30-rc3-nilfs-00003-g360bdc1 #7 ------------------------------------------------------- mmap/5294 is trying to acquire lock: (&nilfs->ns_segctor_sem){++++.+}, at: [] nilfs_transaction_begin+0xb6/0x10c [nilfs2] but task is already holding lock: (&mm->mmap_sem){++++++}, at: [] do_page_fault+0x1d8/0x30a which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&mm->mmap_sem){++++++}: [] __lock_acquire+0x1066/0x13b0 [] lock_acquire+0xba/0xdd [] might_fault+0x68/0x88 [] copy_from_user+0x2a/0x111 [] nilfs_ioctl_prepare_clean_segments+0x1d/0xf1 [nilfs2] [] nilfs_clean_segments+0x6d/0x1b9 [nilfs2] [] nilfs_ioctl+0x2ad/0x318 [nilfs2] [] vfs_ioctl+0x22/0x69 [] do_vfs_ioctl+0x460/0x499 [] sys_ioctl+0x40/0x5a [] sysenter_do_call+0x12/0x38 [] 0xffffffff -> #0 (&nilfs->ns_segctor_sem){++++.+}: [] __lock_acquire+0xdcc/0x13b0 [] lock_acquire+0xba/0xdd [] down_read+0x2a/0x3e [] nilfs_transaction_begin+0xb6/0x10c [nilfs2] [] nilfs_page_mkwrite+0xe7/0x154 [nilfs2] [] __do_fault+0x165/0x376 [] handle_mm_fault+0x287/0x5d1 [] do_page_fault+0x2fb/0x30a [] error_code+0x72/0x78 [] 0xffffffff where nilfs_clean_segments() holds: nilfs->ns_segctor_sem -> copy_from_user() --> page fault -> mm->mmap_sem And, page fault path may hold: page fault -> mm->mmap_sem --> nilfs_page_mkwrite() -> nilfs->ns_segctor_sem Even though nilfs_clean_segments() does not perform write access on given user pages, it may cause deadlock because nilfs->ns_segctor_sem is shared per device and mm->mmap_sem can be shared with other tasks. To avoid this problem, this patch moves all calls of copy_from_user() outside the nilfs->ns_segctor_sem lock in the ioctl. Signed-off-by: Ryusuke Konishi --- fs/nilfs2/ioctl.c | 163 ++++++++++++++++++++++++++++++---------------------- fs/nilfs2/nilfs.h | 3 +- fs/nilfs2/segment.c | 5 +- fs/nilfs2/segment.h | 3 +- 4 files changed, 100 insertions(+), 74 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index e3c693d37d69..49489f68eabe 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -25,6 +25,7 @@ #include /* lock_kernel(), unlock_kernel() */ #include /* capable() */ #include /* copy_from_user(), copy_to_user() */ +#include #include #include "nilfs.h" #include "segment.h" @@ -297,10 +298,10 @@ static int nilfs_ioctl_move_inode_block(struct inode *inode, return 0; } -static ssize_t -nilfs_ioctl_do_move_blocks(struct the_nilfs *nilfs, __u64 *posp, int flags, - void *buf, size_t size, size_t nmembs) +static int nilfs_ioctl_move_blocks(struct the_nilfs *nilfs, + struct nilfs_argv *argv, void *buf) { + size_t nmembs = argv->v_nmembs; struct inode *inode; struct nilfs_vdesc *vdesc; struct buffer_head *bh, *n; @@ -361,19 +362,10 @@ nilfs_ioctl_do_move_blocks(struct the_nilfs *nilfs, __u64 *posp, int flags, return ret; } -static inline int nilfs_ioctl_move_blocks(struct the_nilfs *nilfs, - struct nilfs_argv *argv, - int dir) -{ - return nilfs_ioctl_wrap_copy(nilfs, argv, dir, - nilfs_ioctl_do_move_blocks); -} - -static ssize_t -nilfs_ioctl_do_delete_checkpoints(struct the_nilfs *nilfs, __u64 *posp, - int flags, void *buf, size_t size, - size_t nmembs) +static int nilfs_ioctl_delete_checkpoints(struct the_nilfs *nilfs, + struct nilfs_argv *argv, void *buf) { + size_t nmembs = argv->v_nmembs; struct inode *cpfile = nilfs->ns_cpfile; struct nilfs_period *periods = buf; int ret, i; @@ -387,36 +379,21 @@ nilfs_ioctl_do_delete_checkpoints(struct the_nilfs *nilfs, __u64 *posp, return nmembs; } -static inline int nilfs_ioctl_delete_checkpoints(struct the_nilfs *nilfs, - struct nilfs_argv *argv, - int dir) +static int nilfs_ioctl_free_vblocknrs(struct the_nilfs *nilfs, + struct nilfs_argv *argv, void *buf) { - return nilfs_ioctl_wrap_copy(nilfs, argv, dir, - nilfs_ioctl_do_delete_checkpoints); -} + size_t nmembs = argv->v_nmembs; + int ret; -static ssize_t -nilfs_ioctl_do_free_vblocknrs(struct the_nilfs *nilfs, __u64 *posp, int flags, - void *buf, size_t size, size_t nmembs) -{ - int ret = nilfs_dat_freev(nilfs_dat_inode(nilfs), buf, nmembs); + ret = nilfs_dat_freev(nilfs_dat_inode(nilfs), buf, nmembs); return (ret < 0) ? ret : nmembs; } -static inline int nilfs_ioctl_free_vblocknrs(struct the_nilfs *nilfs, - struct nilfs_argv *argv, - int dir) -{ - return nilfs_ioctl_wrap_copy(nilfs, argv, dir, - nilfs_ioctl_do_free_vblocknrs); -} - -static ssize_t -nilfs_ioctl_do_mark_blocks_dirty(struct the_nilfs *nilfs, __u64 *posp, - int flags, void *buf, size_t size, - size_t nmembs) +static int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs, + struct nilfs_argv *argv, void *buf) { + size_t nmembs = argv->v_nmembs; struct inode *dat = nilfs_dat_inode(nilfs); struct nilfs_bmap *bmap = NILFS_I(dat)->i_bmap; struct nilfs_bdesc *bdescs = buf; @@ -455,18 +432,10 @@ nilfs_ioctl_do_mark_blocks_dirty(struct the_nilfs *nilfs, __u64 *posp, return nmembs; } -static inline int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs, - struct nilfs_argv *argv, - int dir) -{ - return nilfs_ioctl_wrap_copy(nilfs, argv, dir, - nilfs_ioctl_do_mark_blocks_dirty); -} - -static ssize_t -nilfs_ioctl_do_free_segments(struct the_nilfs *nilfs, __u64 *posp, int flags, - void *buf, size_t size, size_t nmembs) +static int nilfs_ioctl_free_segments(struct the_nilfs *nilfs, + struct nilfs_argv *argv, void *buf) { + size_t nmembs = argv->v_nmembs; struct nilfs_sb_info *sbi = nilfs->ns_writer; int ret; @@ -481,31 +450,19 @@ nilfs_ioctl_do_free_segments(struct the_nilfs *nilfs, __u64 *posp, int flags, return (ret < 0) ? ret : nmembs; } -static inline int nilfs_ioctl_free_segments(struct the_nilfs *nilfs, - struct nilfs_argv *argv, - int dir) -{ - return nilfs_ioctl_wrap_copy(nilfs, argv, dir, - nilfs_ioctl_do_free_segments); -} - int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, - void __user *argp) + struct nilfs_argv *argv, void **kbufs) { - struct nilfs_argv argv[5]; const char *msg; - int dir, ret; - - if (copy_from_user(argv, argp, sizeof(argv))) - return -EFAULT; + int ret; - dir = _IOC_WRITE; - ret = nilfs_ioctl_move_blocks(nilfs, &argv[0], dir); + ret = nilfs_ioctl_move_blocks(nilfs, &argv[0], kbufs[0]); if (ret < 0) { msg = "cannot read source blocks"; goto failed; } - ret = nilfs_ioctl_delete_checkpoints(nilfs, &argv[1], dir); + + ret = nilfs_ioctl_delete_checkpoints(nilfs, &argv[1], kbufs[1]); if (ret < 0) { /* * can safely abort because checkpoints can be removed @@ -514,7 +471,7 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, msg = "cannot delete checkpoints"; goto failed; } - ret = nilfs_ioctl_free_vblocknrs(nilfs, &argv[2], dir); + ret = nilfs_ioctl_free_vblocknrs(nilfs, &argv[2], kbufs[2]); if (ret < 0) { /* * can safely abort because DAT file is updated atomically @@ -523,7 +480,7 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, msg = "cannot delete virtual blocks from DAT file"; goto failed; } - ret = nilfs_ioctl_mark_blocks_dirty(nilfs, &argv[3], dir); + ret = nilfs_ioctl_mark_blocks_dirty(nilfs, &argv[3], kbufs[3]); if (ret < 0) { /* * can safely abort because the operation is nondestructive. @@ -531,7 +488,7 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, msg = "cannot mark copying blocks dirty"; goto failed; } - ret = nilfs_ioctl_free_segments(nilfs, &argv[4], dir); + ret = nilfs_ioctl_free_segments(nilfs, &argv[4], kbufs[4]); if (ret < 0) { /* * can safely abort because this operation is atomic. @@ -551,9 +508,75 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) { + struct nilfs_argv argv[5]; + const static size_t argsz[5] = { + sizeof(struct nilfs_vdesc), + sizeof(struct nilfs_period), + sizeof(__u64), + sizeof(struct nilfs_bdesc), + sizeof(__u64), + }; + void __user *base; + void *kbufs[5]; + struct the_nilfs *nilfs; + size_t len, nsegs; + int n, ret; + if (!capable(CAP_SYS_ADMIN)) return -EPERM; - return nilfs_clean_segments(inode->i_sb, argp); + + if (copy_from_user(argv, argp, sizeof(argv))) + return -EFAULT; + + nsegs = argv[4].v_nmembs; + if (argv[4].v_size != argsz[4]) + return -EINVAL; + /* + * argv[4] points to segment numbers this ioctl cleans. We + * use kmalloc() for its buffer because memory used for the + * segment numbers is enough small. + */ + kbufs[4] = memdup_user((void __user *)(unsigned long)argv[4].v_base, + nsegs * sizeof(__u64)); + if (IS_ERR(kbufs[4])) + return PTR_ERR(kbufs[4]); + + nilfs = NILFS_SB(inode->i_sb)->s_nilfs; + + for (n = 0; n < 4; n++) { + ret = -EINVAL; + if (argv[n].v_size != argsz[n]) + goto out_free; + + if (argv[n].v_nmembs > nsegs * nilfs->ns_blocks_per_segment) + goto out_free; + + len = argv[n].v_size * argv[n].v_nmembs; + base = (void __user *)(unsigned long)argv[n].v_base; + if (len == 0) { + kbufs[n] = NULL; + continue; + } + + kbufs[n] = vmalloc(len); + if (!kbufs[n]) { + ret = -ENOMEM; + goto out_free; + } + if (copy_from_user(kbufs[n], base, len)) { + ret = -EFAULT; + vfree(kbufs[n]); + goto out_free; + } + } + + ret = nilfs_clean_segments(inode->i_sb, argv, kbufs); + + out_free: + while (--n > 0) + vfree(kbufs[n]); + kfree(kbufs[4]); + return ret; } static int nilfs_ioctl_sync(struct inode *inode, struct file *filp, diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 3d0c18a16db1..da6fc0bba2e5 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -236,7 +236,8 @@ extern int nilfs_sync_file(struct file *, struct dentry *, int); /* ioctl.c */ long nilfs_ioctl(struct file *, unsigned int, unsigned long); -int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *, void __user *); +int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *, struct nilfs_argv *, + void **); /* inode.c */ extern struct inode *nilfs_new_inode(struct inode *, int); diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index fb70ec3be20e..22c7f65c2403 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2589,7 +2589,8 @@ nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head) } } -int nilfs_clean_segments(struct super_block *sb, void __user *argp) +int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv, + void **kbufs) { struct nilfs_sb_info *sbi = NILFS_SB(sb); struct nilfs_sc_info *sci = NILFS_SC(sbi); @@ -2606,7 +2607,7 @@ int nilfs_clean_segments(struct super_block *sb, void __user *argp) err = nilfs_init_gcdat_inode(nilfs); if (unlikely(err)) goto out_unlock; - err = nilfs_ioctl_prepare_clean_segments(nilfs, argp); + err = nilfs_ioctl_prepare_clean_segments(nilfs, argv, kbufs); if (unlikely(err)) goto out_unlock; diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index a98fc1ed0bbb..476bdd5df5be 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -222,7 +222,8 @@ extern int nilfs_construct_segment(struct super_block *); extern int nilfs_construct_dsync_segment(struct super_block *, struct inode *, loff_t, loff_t); extern void nilfs_flush_segment(struct super_block *, ino_t); -extern int nilfs_clean_segments(struct super_block *, void __user *); +extern int nilfs_clean_segments(struct super_block *, struct nilfs_argv *, + void **); extern int nilfs_segctor_add_segments_to_be_freed(struct nilfs_sc_info *, __u64 *, size_t); -- cgit v1.2.3-58-ga151 From 83aca8f480fcd2d9748301a5d060cf947dc75b94 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 11 May 2009 23:24:47 +0900 Subject: nilfs2: check size of array structured data exchanged via ioctls Although some ioctls of nilfs2 exchange data in the form of indirectly referenced array, some of them lack size check on the array elements. This inserts the missing checks and rejects requests if data of ioctl does not have a valid format. We usually don't have to check size of structures that we associated with ioctl commands because the size is tested implicitly for identifying ioctl command; the checks this patch adds are for the cases where the implicit check is not applied. Signed-off-by: Ryusuke Konishi --- fs/nilfs2/ioctl.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 49489f68eabe..50ff3f2cdf24 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -254,6 +254,9 @@ static int nilfs_ioctl_get_bdescs(struct inode *inode, struct file *filp, if (copy_from_user(&argv, argp, sizeof(argv))) return -EFAULT; + if (argv.v_size != sizeof(struct nilfs_bdesc)) + return -EINVAL; + ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), nilfs_ioctl_do_get_bdescs); if (ret < 0) @@ -599,6 +602,7 @@ static int nilfs_ioctl_sync(struct inode *inode, struct file *filp, static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp, + size_t membsz, ssize_t (*dofunc)(struct the_nilfs *, __u64 *, int, void *, size_t, size_t)) @@ -611,6 +615,9 @@ static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp, if (copy_from_user(&argv, argp, sizeof(argv))) return -EFAULT; + if (argv.v_size != membsz) + return -EINVAL; + ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), dofunc); if (ret < 0) return ret; @@ -632,16 +639,19 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return nilfs_ioctl_delete_checkpoint(inode, filp, cmd, argp); case NILFS_IOCTL_GET_CPINFO: return nilfs_ioctl_get_info(inode, filp, cmd, argp, + sizeof(struct nilfs_cpinfo), nilfs_ioctl_do_get_cpinfo); case NILFS_IOCTL_GET_CPSTAT: return nilfs_ioctl_get_cpstat(inode, filp, cmd, argp); case NILFS_IOCTL_GET_SUINFO: return nilfs_ioctl_get_info(inode, filp, cmd, argp, + sizeof(struct nilfs_suinfo), nilfs_ioctl_do_get_suinfo); case NILFS_IOCTL_GET_SUSTAT: return nilfs_ioctl_get_sustat(inode, filp, cmd, argp); case NILFS_IOCTL_GET_VINFO: return nilfs_ioctl_get_info(inode, filp, cmd, argp, + sizeof(struct nilfs_vinfo), nilfs_ioctl_do_get_vinfo); case NILFS_IOCTL_GET_BDESCS: return nilfs_ioctl_get_bdescs(inode, filp, cmd, argp); -- cgit v1.2.3-58-ga151 From d5046853634a8d73f28bad3cf68d182c4a99035d Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 22 May 2009 20:36:21 +0900 Subject: nilfs2: fix memory leak in nilfs_ioctl_clean_segments This fixes a new memory leak problem in garbage collection. The problem was brought by the bugfix patch ("nilfs2: fix lock order reversal in nilfs_clean_segments ioctl"). Thanks to Kentaro Suzuki for finding this problem. Reported-by: Kentaro Suzuki Signed-off-by: Ryusuke Konishi --- fs/nilfs2/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 50ff3f2cdf24..d6759b92006f 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -576,7 +576,7 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, ret = nilfs_clean_segments(inode->i_sb, argv, kbufs); out_free: - while (--n > 0) + while (--n >= 0) vfree(kbufs[n]); kfree(kbufs[4]); return ret; -- cgit v1.2.3-58-ga151 From 62013ab5d5df297a01ae5863b5c26d758ec0af7f Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sat, 30 May 2009 21:50:58 +0900 Subject: nilfs2: fix bh leak in nilfs_cpfile_delete_checkpoints function The nilfs_cpfile_delete_checkpoints() wrongly skips brelse() for the header block of checkpoint file in case of errors. This fixes the leak bug. Signed-off-by: Ryusuke Konishi --- fs/nilfs2/cpfile.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index e90b60dfced9..300f1cdfa862 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c @@ -311,7 +311,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); if (ret < 0) { if (ret != -ENOENT) - goto out_sem; + goto out_header; /* skip hole */ ret = 0; continue; @@ -344,7 +344,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, continue; printk(KERN_ERR "%s: cannot delete block\n", __func__); - goto out_sem; + goto out_header; } } @@ -361,6 +361,8 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, nilfs_mdt_mark_dirty(cpfile); kunmap_atomic(kaddr, KM_USER0); } + + out_header: brelse(header_bh); out_sem: -- cgit v1.2.3-58-ga151