diff options
Diffstat (limited to 'fs')
169 files changed, 3389 insertions, 2621 deletions
diff --git a/fs/9p/cache.h b/fs/9p/cache.h index 40cc54ced5d9..2f9675491095 100644 --- a/fs/9p/cache.h +++ b/fs/9p/cache.h @@ -101,6 +101,18 @@ static inline void v9fs_fscache_wait_on_page_write(struct inode *inode, #else /* CONFIG_9P_FSCACHE */ +static inline void v9fs_cache_inode_get_cookie(struct inode *inode) +{ +} + +static inline void v9fs_cache_inode_put_cookie(struct inode *inode) +{ +} + +static inline void v9fs_cache_inode_set_cookie(struct inode *inode, struct file *file) +{ +} + static inline int v9fs_fscache_release_page(struct page *page, gfp_t gfp) { return 1; diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index aa5ecf479a57..a0df3e73c2b1 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -105,10 +105,8 @@ int v9fs_file_open(struct inode *inode, struct file *file) v9inode->writeback_fid = (void *) fid; } mutex_unlock(&v9inode->v_mutex); -#ifdef CONFIG_9P_FSCACHE if (v9ses->cache) v9fs_cache_inode_set_cookie(inode, file); -#endif return 0; out_error: p9_client_clunk(file->private_data); diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 94de6d1482e2..4e65aa903345 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -448,9 +448,7 @@ void v9fs_evict_inode(struct inode *inode) clear_inode(inode); filemap_fdatawrite(inode->i_mapping); -#ifdef CONFIG_9P_FSCACHE v9fs_cache_inode_put_cookie(inode); -#endif /* clunk the fid stashed in writeback_fid */ if (v9inode->writeback_fid) { p9_client_clunk(v9inode->writeback_fid); @@ -531,9 +529,7 @@ static struct inode *v9fs_qid_iget(struct super_block *sb, goto error; v9fs_stat2inode(st, inode, sb); -#ifdef CONFIG_9P_FSCACHE v9fs_cache_inode_get_cookie(inode); -#endif unlock_new_inode(inode); return inode; error: @@ -905,10 +901,8 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, goto error; file->private_data = fid; -#ifdef CONFIG_9P_FSCACHE if (v9ses->cache) v9fs_cache_inode_set_cookie(dentry->d_inode, file); -#endif *opened |= FILE_CREATED; out: diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index a7c481402c46..4c10edec26a0 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -141,9 +141,7 @@ static struct inode *v9fs_qid_iget_dotl(struct super_block *sb, goto error; v9fs_stat2inode_dotl(st, inode); -#ifdef CONFIG_9P_FSCACHE v9fs_cache_inode_get_cookie(inode); -#endif retval = v9fs_get_acl(inode, fid); if (retval) goto error; @@ -355,10 +353,8 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, if (err) goto err_clunk_old_fid; file->private_data = ofid; -#ifdef CONFIG_9P_FSCACHE if (v9ses->cache) v9fs_cache_inode_set_cookie(inode, file); -#endif *opened |= FILE_CREATED; out: v9fs_put_acl(dacl, pacl); diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h index 585adafb0cc2..c770337c4b45 100644 --- a/fs/adfs/adfs.h +++ b/fs/adfs/adfs.h @@ -43,9 +43,12 @@ struct adfs_dir_ops; * ADFS file system superblock data in memory */ struct adfs_sb_info { - struct adfs_discmap *s_map; /* bh list containing map */ - struct adfs_dir_ops *s_dir; /* directory operations */ - + union { struct { + struct adfs_discmap *s_map; /* bh list containing map */ + struct adfs_dir_ops *s_dir; /* directory operations */ + }; + struct rcu_head rcu; /* used only at shutdown time */ + }; kuid_t s_uid; /* owner uid */ kgid_t s_gid; /* owner gid */ umode_t s_owner_mask; /* ADFS owner perm -> unix perm */ diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 0ff4bae2c2a2..7b3003cb6f1b 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -123,8 +123,7 @@ static void adfs_put_super(struct super_block *sb) for (i = 0; i < asb->s_map_size; i++) brelse(asb->s_map[i].dm_bh); kfree(asb->s_map); - kfree(asb); - sb->s_fs_info = NULL; + kfree_rcu(asb, rcu); } static int adfs_show_options(struct seq_file *seq, struct dentry *root) @@ -36,10 +36,10 @@ #include <linux/eventfd.h> #include <linux/blkdev.h> #include <linux/compat.h> -#include <linux/anon_inodes.h> #include <linux/migrate.h> #include <linux/ramfs.h> #include <linux/percpu-refcount.h> +#include <linux/mount.h> #include <asm/kmap_types.h> #include <asm/uaccess.h> @@ -152,12 +152,67 @@ unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio request static struct kmem_cache *kiocb_cachep; static struct kmem_cache *kioctx_cachep; +static struct vfsmount *aio_mnt; + +static const struct file_operations aio_ring_fops; +static const struct address_space_operations aio_ctx_aops; + +static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages) +{ + struct qstr this = QSTR_INIT("[aio]", 5); + struct file *file; + struct path path; + struct inode *inode = alloc_anon_inode(aio_mnt->mnt_sb); + if (IS_ERR(inode)) + return ERR_CAST(inode); + + inode->i_mapping->a_ops = &aio_ctx_aops; + inode->i_mapping->private_data = ctx; + inode->i_size = PAGE_SIZE * nr_pages; + + path.dentry = d_alloc_pseudo(aio_mnt->mnt_sb, &this); + if (!path.dentry) { + iput(inode); + return ERR_PTR(-ENOMEM); + } + path.mnt = mntget(aio_mnt); + + d_instantiate(path.dentry, inode); + file = alloc_file(&path, FMODE_READ | FMODE_WRITE, &aio_ring_fops); + if (IS_ERR(file)) { + path_put(&path); + return file; + } + + file->f_flags = O_RDWR; + file->private_data = ctx; + return file; +} + +static struct dentry *aio_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) +{ + static const struct dentry_operations ops = { + .d_dname = simple_dname, + }; + return mount_pseudo(fs_type, "aio:", NULL, &ops, 0xa10a10a1); +} + /* aio_setup * Creates the slab caches used by the aio routines, panic on * failure as this is done early during the boot sequence. */ static int __init aio_setup(void) { + static struct file_system_type aio_fs = { + .name = "aio", + .mount = aio_mount, + .kill_sb = kill_anon_super, + }; + aio_mnt = kern_mount(&aio_fs); + if (IS_ERR(aio_mnt)) + panic("Failed to create aio fs mount."); + kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); @@ -283,16 +338,12 @@ static int aio_setup_ring(struct kioctx *ctx) if (nr_pages < 0) return -EINVAL; - file = anon_inode_getfile_private("[aio]", &aio_ring_fops, ctx, O_RDWR); + file = aio_private_file(ctx, nr_pages); if (IS_ERR(file)) { ctx->aio_ring_file = NULL; return -EAGAIN; } - file->f_inode->i_mapping->a_ops = &aio_ctx_aops; - file->f_inode->i_mapping->private_data = ctx; - file->f_inode->i_size = PAGE_SIZE * (loff_t)nr_pages; - for (i = 0; i < nr_pages; i++) { struct page *page; page = find_or_create_page(file->f_inode->i_mapping, diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 85c961849953..24084732b1d0 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -24,7 +24,6 @@ static struct vfsmount *anon_inode_mnt __read_mostly; static struct inode *anon_inode_inode; -static const struct file_operations anon_inode_fops; /* * anon_inodefs_dname() is called from d_path(). @@ -39,51 +38,6 @@ static const struct dentry_operations anon_inodefs_dentry_operations = { .d_dname = anon_inodefs_dname, }; -/* - * nop .set_page_dirty method so that people can use .page_mkwrite on - * anon inodes. - */ -static int anon_set_page_dirty(struct page *page) -{ - return 0; -}; - -static const struct address_space_operations anon_aops = { - .set_page_dirty = anon_set_page_dirty, -}; - -/* - * A single inode exists for all anon_inode files. Contrary to pipes, - * anon_inode inodes have no associated per-instance data, so we need - * only allocate one of them. - */ -static struct inode *anon_inode_mkinode(struct super_block *s) -{ - struct inode *inode = new_inode_pseudo(s); - - if (!inode) - return ERR_PTR(-ENOMEM); - - inode->i_ino = get_next_ino(); - inode->i_fop = &anon_inode_fops; - - inode->i_mapping->a_ops = &anon_aops; - - /* - * Mark the inode dirty from the very beginning, - * that way it will never be moved to the dirty - * list because mark_inode_dirty() will think - * that it already _is_ on the dirty list. - */ - inode->i_state = I_DIRTY; - inode->i_mode = S_IRUSR | S_IWUSR; - inode->i_uid = current_fsuid(); - inode->i_gid = current_fsgid(); - inode->i_flags |= S_PRIVATE; - inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; - return inode; -} - static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { @@ -92,7 +46,7 @@ static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type, &anon_inodefs_dentry_operations, ANON_INODE_FS_MAGIC); if (!IS_ERR(root)) { struct super_block *s = root->d_sb; - anon_inode_inode = anon_inode_mkinode(s); + anon_inode_inode = alloc_anon_inode(s); if (IS_ERR(anon_inode_inode)) { dput(root); deactivate_locked_super(s); @@ -109,72 +63,6 @@ static struct file_system_type anon_inode_fs_type = { }; /** - * anon_inode_getfile_private - creates a new file instance by hooking it up to an - * anonymous inode, and a dentry that describe the "class" - * of the file - * - * @name: [in] name of the "class" of the new file - * @fops: [in] file operations for the new file - * @priv: [in] private data for the new file (will be file's private_data) - * @flags: [in] flags - * - * - * Similar to anon_inode_getfile, but each file holds a single inode. - * - */ -struct file *anon_inode_getfile_private(const char *name, - const struct file_operations *fops, - void *priv, int flags) -{ - struct qstr this; - struct path path; - struct file *file; - struct inode *inode; - - if (fops->owner && !try_module_get(fops->owner)) - return ERR_PTR(-ENOENT); - - inode = anon_inode_mkinode(anon_inode_mnt->mnt_sb); - if (IS_ERR(inode)) { - file = ERR_PTR(-ENOMEM); - goto err_module; - } - - /* - * Link the inode to a directory entry by creating a unique name - * using the inode sequence number. - */ - file = ERR_PTR(-ENOMEM); - this.name = name; - this.len = strlen(name); - this.hash = 0; - path.dentry = d_alloc_pseudo(anon_inode_mnt->mnt_sb, &this); - if (!path.dentry) - goto err_module; - - path.mnt = mntget(anon_inode_mnt); - - d_instantiate(path.dentry, inode); - - file = alloc_file(&path, OPEN_FMODE(flags), fops); - if (IS_ERR(file)) - goto err_dput; - - file->f_mapping = inode->i_mapping; - file->f_flags = flags & (O_ACCMODE | O_NONBLOCK); - file->private_data = priv; - - return file; - -err_dput: - path_put(&path); -err_module: - module_put(fops->owner); - return file; -} -EXPORT_SYMBOL_GPL(anon_inode_getfile_private); - -/** * anon_inode_getfile - creates a new file instance by hooking it up to an * anonymous inode, and a dentry that describe the "class" * of the file diff --git a/fs/attr.c b/fs/attr.c index 1449adb14ef6..267968d94673 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -167,7 +167,27 @@ void setattr_copy(struct inode *inode, const struct iattr *attr) } EXPORT_SYMBOL(setattr_copy); -int notify_change(struct dentry * dentry, struct iattr * attr) +/** + * notify_change - modify attributes of a filesytem object + * @dentry: object affected + * @iattr: new attributes + * @delegated_inode: returns inode, if the inode is delegated + * + * The caller must hold the i_mutex on the affected object. + * + * If notify_change discovers a delegation in need of breaking, + * it will return -EWOULDBLOCK and return a reference to the inode in + * delegated_inode. The caller should then break the delegation and + * retry. Because breaking a delegation may take a long time, the + * caller should drop the i_mutex before doing so. + * + * Alternatively, a caller may pass NULL for delegated_inode. This may + * be appropriate for callers that expect the underlying filesystem not + * to be NFS exported. Also, passing NULL is fine for callers holding + * the file open for write, as there can be no conflicting delegation in + * that case. + */ +int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **delegated_inode) { struct inode *inode = dentry->d_inode; umode_t mode = inode->i_mode; @@ -243,6 +263,9 @@ int notify_change(struct dentry * dentry, struct iattr * attr) error = security_inode_setattr(dentry, attr); if (error) return error; + error = try_break_deleg(inode, delegated_inode); + if (error) + return error; if (inode->i_op->setattr) error = inode->i_op->setattr(dentry, attr); diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 3f1128b37e46..4218e26df916 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -122,6 +122,7 @@ struct autofs_sb_info { spinlock_t lookup_lock; struct list_head active_list; struct list_head expiring_list; + struct rcu_head rcu; }; static inline struct autofs_sb_info *autofs4_sbi(struct super_block *sb) @@ -271,7 +272,7 @@ void autofs4_clean_ino(struct autofs_info *); static inline int autofs_prepare_pipe(struct file *pipe) { - if (!pipe->f_op || !pipe->f_op->write) + if (!pipe->f_op->write) return -EINVAL; if (!S_ISFIFO(file_inode(pipe)->i_mode)) return -EINVAL; diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index 0f00da329e71..1818ce7f5a06 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -658,12 +658,6 @@ static int _autofs_dev_ioctl(unsigned int command, struct autofs_dev_ioctl __use goto out; } - if (!fp->f_op) { - err = -ENOTTY; - fput(fp); - goto out; - } - sbi = autofs_dev_ioctl_sbi(fp); if (!sbi || sbi->magic != AUTOFS_SBI_MAGIC) { err = -EINVAL; diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index b104726e2d0a..3b9cc9b973c2 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -56,18 +56,13 @@ void autofs4_kill_sb(struct super_block *sb) * just call kill_anon_super when we are called from * deactivate_super. */ - if (!sbi) - goto out_kill_sb; - - /* Free wait queues, close pipe */ - autofs4_catatonic_mode(sbi); - - sb->s_fs_info = NULL; - kfree(sbi); + if (sbi) /* Free wait queues, close pipe */ + autofs4_catatonic_mode(sbi); -out_kill_sb: DPRINTK("shutting down"); kill_litter_super(sb); + if (sbi) + kfree_rcu(sbi, rcu); } static int autofs4_show_options(struct seq_file *m, struct dentry *root) diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index e9c75e20db32..daa15d6ba450 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -42,7 +42,7 @@ static void befs_destroy_inode(struct inode *inode); static int befs_init_inodecache(void); static void befs_destroy_inodecache(void); static void *befs_follow_link(struct dentry *, struct nameidata *); -static void befs_put_link(struct dentry *, struct nameidata *, void *); +static void *befs_fast_follow_link(struct dentry *, struct nameidata *); static int befs_utf2nls(struct super_block *sb, const char *in, int in_len, char **out, int *out_len); static int befs_nls2utf(struct super_block *sb, const char *in, int in_len, @@ -79,10 +79,15 @@ static const struct address_space_operations befs_aops = { .bmap = befs_bmap, }; +static const struct inode_operations befs_fast_symlink_inode_operations = { + .readlink = generic_readlink, + .follow_link = befs_fast_follow_link, +}; + static const struct inode_operations befs_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = befs_follow_link, - .put_link = befs_put_link, + .put_link = kfree_put_link, }; /* @@ -411,7 +416,10 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino) inode->i_op = &befs_dir_inode_operations; inode->i_fop = &befs_dir_operations; } else if (S_ISLNK(inode->i_mode)) { - inode->i_op = &befs_symlink_inode_operations; + if (befs_ino->i_flags & BEFS_LONG_SYMLINK) + inode->i_op = &befs_symlink_inode_operations; + else + inode->i_op = &befs_fast_symlink_inode_operations; } else { befs_error(sb, "Inode %lu is not a regular file, " "directory or symlink. THAT IS WRONG! BeFS has no " @@ -477,47 +485,40 @@ befs_destroy_inodecache(void) static void * befs_follow_link(struct dentry *dentry, struct nameidata *nd) { + struct super_block *sb = dentry->d_sb; befs_inode_info *befs_ino = BEFS_I(dentry->d_inode); + befs_data_stream *data = &befs_ino->i_data.ds; + befs_off_t len = data->size; char *link; - if (befs_ino->i_flags & BEFS_LONG_SYMLINK) { - struct super_block *sb = dentry->d_sb; - befs_data_stream *data = &befs_ino->i_data.ds; - befs_off_t len = data->size; + if (len == 0) { + befs_error(sb, "Long symlink with illegal length"); + link = ERR_PTR(-EIO); + } else { + befs_debug(sb, "Follow long symlink"); - if (len == 0) { - befs_error(sb, "Long symlink with illegal length"); + link = kmalloc(len, GFP_NOFS); + if (!link) { + link = ERR_PTR(-ENOMEM); + } else if (befs_read_lsymlink(sb, data, link, len) != len) { + kfree(link); + befs_error(sb, "Failed to read entire long symlink"); link = ERR_PTR(-EIO); } else { - befs_debug(sb, "Follow long symlink"); - - link = kmalloc(len, GFP_NOFS); - if (!link) { - link = ERR_PTR(-ENOMEM); - } else if (befs_read_lsymlink(sb, data, link, len) != len) { - kfree(link); - befs_error(sb, "Failed to read entire long symlink"); - link = ERR_PTR(-EIO); - } else { - link[len - 1] = '\0'; - } + link[len - 1] = '\0'; } - } else { - link = befs_ino->i_data.symlink; } - nd_set_link(nd, link); return NULL; } -static void befs_put_link(struct dentry *dentry, struct nameidata *nd, void *p) + +static void * +befs_fast_follow_link(struct dentry *dentry, struct nameidata *nd) { befs_inode_info *befs_ino = BEFS_I(dentry->d_inode); - if (befs_ino->i_flags & BEFS_LONG_SYMLINK) { - char *link = nd_get_link(nd); - if (!IS_ERR(link)) - kfree(link); - } + nd_set_link(nd, befs_ino->i_data.symlink); + return NULL; } /* diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index 89dec7f789a4..ca0ba15a7306 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -45,7 +45,6 @@ static int load_aout_library(struct file*); */ static int aout_core_dump(struct coredump_params *cprm) { - struct file *file = cprm->file; mm_segment_t fs; int has_dumped = 0; void __user *dump_start; @@ -85,10 +84,10 @@ static int aout_core_dump(struct coredump_params *cprm) set_fs(KERNEL_DS); /* struct user */ - if (!dump_write(file, &dump, sizeof(dump))) + if (!dump_emit(cprm, &dump, sizeof(dump))) goto end_coredump; /* Now dump all of the user data. Include malloced stuff as well */ - if (!dump_seek(cprm->file, PAGE_SIZE - sizeof(dump))) + if (!dump_skip(cprm, PAGE_SIZE - sizeof(dump))) goto end_coredump; /* now we start writing out the user space info */ set_fs(USER_DS); @@ -96,14 +95,14 @@ static int aout_core_dump(struct coredump_params *cprm) if (dump.u_dsize != 0) { dump_start = START_DATA(dump); dump_size = dump.u_dsize << PAGE_SHIFT; - if (!dump_write(file, dump_start, dump_size)) + if (!dump_emit(cprm, dump_start, dump_size)) goto end_coredump; } /* Now prepare to dump the stack area */ if (dump.u_ssize != 0) { dump_start = START_STACK(dump); dump_size = dump.u_ssize << PAGE_SHIFT; - if (!dump_write(file, dump_start, dump_size)) + if (!dump_emit(cprm, dump_start, dump_size)) goto end_coredump; } end_coredump: @@ -221,7 +220,7 @@ static int load_aout_binary(struct linux_binprm * bprm) * Requires a mmap handler. This prevents people from using a.out * as part of an exploit attack against /proc-related vulnerabilities. */ - if (!bprm->file->f_op || !bprm->file->f_op->mmap) + if (!bprm->file->f_op->mmap) return -ENOEXEC; fd_offset = N_TXTOFF(ex); @@ -374,7 +373,7 @@ static int load_aout_library(struct file *file) * Requires a mmap handler. This prevents people from using a.out * as part of an exploit attack against /proc-related vulnerabilities. */ - if (!file->f_op || !file->f_op->mmap) + if (!file->f_op->mmap) goto out; if (N_FLAGS(ex)) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 4c94a79991bb..571a42326908 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -406,7 +406,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, goto out; if (!elf_check_arch(interp_elf_ex)) goto out; - if (!interpreter->f_op || !interpreter->f_op->mmap) + if (!interpreter->f_op->mmap) goto out; /* @@ -607,7 +607,7 @@ static int load_elf_binary(struct linux_binprm *bprm) goto out; if (!elf_check_arch(&loc->elf_ex)) goto out; - if (!bprm->file->f_op || !bprm->file->f_op->mmap) + if (!bprm->file->f_op->mmap) goto out; /* Now read in all of the header information */ @@ -1028,7 +1028,7 @@ static int load_elf_library(struct file *file) /* First of all, some simple consistency checks */ if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 || - !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap) + !elf_check_arch(&elf_ex) || !file->f_op->mmap) goto out; /* Now read in all of the header information */ @@ -1225,35 +1225,17 @@ static int notesize(struct memelfnote *en) return sz; } -#define DUMP_WRITE(addr, nr, foffset) \ - do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0) - -static int alignfile(struct file *file, loff_t *foffset) -{ - static const char buf[4] = { 0, }; - DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset); - return 1; -} - -static int writenote(struct memelfnote *men, struct file *file, - loff_t *foffset) +static int writenote(struct memelfnote *men, struct coredump_params *cprm) { struct elf_note en; en.n_namesz = strlen(men->name) + 1; en.n_descsz = men->datasz; en.n_type = men->type; - DUMP_WRITE(&en, sizeof(en), foffset); - DUMP_WRITE(men->name, en.n_namesz, foffset); - if (!alignfile(file, foffset)) - return 0; - DUMP_WRITE(men->data, men->datasz, foffset); - if (!alignfile(file, foffset)) - return 0; - - return 1; + return dump_emit(cprm, &en, sizeof(en)) && + dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) && + dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4); } -#undef DUMP_WRITE static void fill_elf_header(struct elfhdr *elf, int segs, u16 machine, u32 flags) @@ -1392,7 +1374,7 @@ static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm) } static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata, - siginfo_t *siginfo) + const siginfo_t *siginfo) { mm_segment_t old_fs = get_fs(); set_fs(KERNEL_DS); @@ -1599,7 +1581,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t, static int fill_note_info(struct elfhdr *elf, int phdrs, struct elf_note_info *info, - siginfo_t *siginfo, struct pt_regs *regs) + const siginfo_t *siginfo, struct pt_regs *regs) { struct task_struct *dump_task = current; const struct user_regset_view *view = task_user_regset_view(dump_task); @@ -1702,7 +1684,7 @@ static size_t get_note_info_size(struct elf_note_info *info) * process-wide notes are interleaved after the first thread-specific note. */ static int write_note_info(struct elf_note_info *info, - struct file *file, loff_t *foffset) + struct coredump_params *cprm) { bool first = 1; struct elf_thread_core_info *t = info->thread; @@ -1710,22 +1692,22 @@ static int write_note_info(struct elf_note_info *info, do { int i; - if (!writenote(&t->notes[0], file, foffset)) + if (!writenote(&t->notes[0], cprm)) return 0; - if (first && !writenote(&info->psinfo, file, foffset)) + if (first && !writenote(&info->psinfo, cprm)) return 0; - if (first && !writenote(&info->signote, file, foffset)) + if (first && !writenote(&info->signote, cprm)) return 0; - if (first && !writenote(&info->auxv, file, foffset)) + if (first && !writenote(&info->auxv, cprm)) return 0; if (first && info->files.data && - !writenote(&info->files, file, foffset)) + !writenote(&info->files, cprm)) return 0; for (i = 1; i < info->thread_notes; ++i) if (t->notes[i].data && - !writenote(&t->notes[i], file, foffset)) + !writenote(&t->notes[i], cprm)) return 0; first = 0; @@ -1848,34 +1830,31 @@ static int elf_note_info_init(struct elf_note_info *info) static int fill_note_info(struct elfhdr *elf, int phdrs, struct elf_note_info *info, - siginfo_t *siginfo, struct pt_regs *regs) + const siginfo_t *siginfo, struct pt_regs *regs) { struct list_head *t; + struct core_thread *ct; + struct elf_thread_status *ets; if (!elf_note_info_init(info)) return 0; - if (siginfo->si_signo) { - struct core_thread *ct; - struct elf_thread_status *ets; - - for (ct = current->mm->core_state->dumper.next; - ct; ct = ct->next) { - ets = kzalloc(sizeof(*ets), GFP_KERNEL); - if (!ets) - return 0; + for (ct = current->mm->core_state->dumper.next; + ct; ct = ct->next) { + ets = kzalloc(sizeof(*ets), GFP_KERNEL); + if (!ets) + return 0; - ets->thread = ct->task; - list_add(&ets->list, &info->thread_list); - } + ets->thread = ct->task; + list_add(&ets->list, &info->thread_list); + } - list_for_each(t, &info->thread_list) { - int sz; + list_for_each(t, &info->thread_list) { + int sz; - ets = list_entry(t, struct elf_thread_status, list); - sz = elf_dump_thread_status(siginfo->si_signo, ets); - info->thread_status_size += sz; - } + ets = list_entry(t, struct elf_thread_status, list); + sz = elf_dump_thread_status(siginfo->si_signo, ets); + info->thread_status_size += sz; } /* now collect the dump for the current */ memset(info->prstatus, 0, sizeof(*info->prstatus)); @@ -1935,13 +1914,13 @@ static size_t get_note_info_size(struct elf_note_info *info) } static int write_note_info(struct elf_note_info *info, - struct file *file, loff_t *foffset) + struct coredump_params *cprm) { int i; struct list_head *t; for (i = 0; i < info->numnote; i++) - if (!writenote(info->notes + i, file, foffset)) + if (!writenote(info->notes + i, cprm)) return 0; /* write out the thread status notes section */ @@ -1950,7 +1929,7 @@ static int write_note_info(struct elf_note_info *info, list_entry(t, struct elf_thread_status, list); for (i = 0; i < tmp->num_notes; i++) - if (!writenote(&tmp->notes[i], file, foffset)) + if (!writenote(&tmp->notes[i], cprm)) return 0; } @@ -2046,10 +2025,9 @@ static int elf_core_dump(struct coredump_params *cprm) int has_dumped = 0; mm_segment_t fs; int segs; - size_t size = 0; struct vm_area_struct *vma, *gate_vma; struct elfhdr *elf = NULL; - loff_t offset = 0, dataoff, foffset; + loff_t offset = 0, dataoff; struct elf_note_info info = { }; struct elf_phdr *phdr4note = NULL; struct elf_shdr *shdr4extnum = NULL; @@ -2105,7 +2083,6 @@ static int elf_core_dump(struct coredump_params *cprm) offset += sizeof(*elf); /* Elf header */ offset += segs * sizeof(struct elf_phdr); /* Program headers */ - foffset = offset; /* Write notes phdr entry */ { @@ -2136,13 +2113,10 @@ static int elf_core_dump(struct coredump_params *cprm) offset = dataoff; - size += sizeof(*elf); - if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf))) + if (!dump_emit(cprm, elf, sizeof(*elf))) goto end_coredump; - size += sizeof(*phdr4note); - if (size > cprm->limit - || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note))) + if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note))) goto end_coredump; /* Write program headers for segments dump */ @@ -2164,24 +2138,22 @@ static int elf_core_dump(struct coredump_params *cprm) phdr.p_flags |= PF_X; phdr.p_align = ELF_EXEC_PAGESIZE; - size += sizeof(phdr); - if (size > cprm->limit - || !dump_write(cprm->file, &phdr, sizeof(phdr))) + if (!dump_emit(cprm, &phdr, sizeof(phdr))) goto end_coredump; } - if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit)) + if (!elf_core_write_extra_phdrs(cprm, offset)) goto end_coredump; /* write out the notes section */ - if (!write_note_info(&info, cprm->file, &foffset)) + if (!write_note_info(&info, cprm)) goto end_coredump; - if (elf_coredump_extra_notes_write(cprm->file, &foffset)) + if (elf_coredump_extra_notes_write(cprm)) goto end_coredump; /* Align to page */ - if (!dump_seek(cprm->file, dataoff - foffset)) + if (!dump_skip(cprm, dataoff - cprm->written)) goto end_coredump; for (vma = first_vma(current, gate_vma); vma != NULL; @@ -2198,26 +2170,21 @@ static int elf_core_dump(struct coredump_params *cprm) page = get_dump_page(addr); if (page) { void *kaddr = kmap(page); - stop = ((size += PAGE_SIZE) > cprm->limit) || - !dump_write(cprm->file, kaddr, - PAGE_SIZE); + stop = !dump_emit(cprm, kaddr, PAGE_SIZE); kunmap(page); page_cache_release(page); } else - stop = !dump_seek(cprm->file, PAGE_SIZE); + stop = !dump_skip(cprm, PAGE_SIZE); if (stop) goto end_coredump; } } - if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit)) + if (!elf_core_write_extra_data(cprm)) goto end_coredump; if (e_phnum == PN_XNUM) { - size += sizeof(*shdr4extnum); - if (size > cprm->limit - || !dump_write(cprm->file, shdr4extnum, - sizeof(*shdr4extnum))) + if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum))) goto end_coredump; } diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index c166f325a183..fe2a643ee005 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -111,7 +111,7 @@ static int is_elf_fdpic(struct elfhdr *hdr, struct file *file) return 0; if (!elf_check_arch(hdr) || !elf_check_fdpic(hdr)) return 0; - if (!file->f_op || !file->f_op->mmap) + if (!file->f_op->mmap) return 0; return 1; } @@ -1267,35 +1267,17 @@ static int notesize(struct memelfnote *en) /* #define DEBUG */ -#define DUMP_WRITE(addr, nr, foffset) \ - do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0) - -static int alignfile(struct file *file, loff_t *foffset) -{ - static const char buf[4] = { 0, }; - DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset); - return 1; -} - -static int writenote(struct memelfnote *men, struct file *file, - loff_t *foffset) +static int writenote(struct memelfnote *men, struct coredump_params *cprm) { struct elf_note en; en.n_namesz = strlen(men->name) + 1; en.n_descsz = men->datasz; en.n_type = men->type; - DUMP_WRITE(&en, sizeof(en), foffset); - DUMP_WRITE(men->name, en.n_namesz, foffset); - if (!alignfile(file, foffset)) - return 0; - DUMP_WRITE(men->data, men->datasz, foffset); - if (!alignfile(file, foffset)) - return 0; - - return 1; + return dump_emit(cprm, &en, sizeof(en)) && + dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) && + dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4); } -#undef DUMP_WRITE static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs) { @@ -1500,66 +1482,40 @@ static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum, /* * dump the segments for an MMU process */ -#ifdef CONFIG_MMU -static int elf_fdpic_dump_segments(struct file *file, size_t *size, - unsigned long *limit, unsigned long mm_flags) +static bool elf_fdpic_dump_segments(struct coredump_params *cprm) { struct vm_area_struct *vma; - int err = 0; for (vma = current->mm->mmap; vma; vma = vma->vm_next) { unsigned long addr; - if (!maydump(vma, mm_flags)) + if (!maydump(vma, cprm->mm_flags)) continue; +#ifdef CONFIG_MMU for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) { + bool res; struct page *page = get_dump_page(addr); if (page) { void *kaddr = kmap(page); - *size += PAGE_SIZE; - if (*size > *limit) - err = -EFBIG; - else if (!dump_write(file, kaddr, PAGE_SIZE)) - err = -EIO; + res = dump_emit(cprm, kaddr, PAGE_SIZE); kunmap(page); page_cache_release(page); - } else if (!dump_seek(file, PAGE_SIZE)) - err = -EFBIG; - if (err) - goto out; + } else { + res = dump_skip(cprm, PAGE_SIZE); + } + if (!res) + return false; } - } -out: - return err; -} -#endif - -/* - * dump the segments for a NOMMU process - */ -#ifndef CONFIG_MMU -static int elf_fdpic_dump_segments(struct file *file, size_t *size, - unsigned long *limit, unsigned long mm_flags) -{ - struct vm_area_struct *vma; - - for (vma = current->mm->mmap; vma; vma = vma->vm_next) { - if (!maydump(vma, mm_flags)) - continue; - - if ((*size += PAGE_SIZE) > *limit) - return -EFBIG; - - if (!dump_write(file, (void *) vma->vm_start, +#else + if (!dump_emit(cprm, (void *) vma->vm_start, vma->vm_end - vma->vm_start)) - return -EIO; + return false; +#endif } - - return 0; + return true; } -#endif static size_t elf_core_vma_data_size(unsigned long mm_flags) { @@ -1585,11 +1541,10 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) int has_dumped = 0; mm_segment_t fs; int segs; - size_t size = 0; int i; struct vm_area_struct *vma; struct elfhdr *elf = NULL; - loff_t offset = 0, dataoff, foffset; + loff_t offset = 0, dataoff; int numnote; struct memelfnote *notes = NULL; struct elf_prstatus *prstatus = NULL; /* NT_PRSTATUS */ @@ -1606,6 +1561,8 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) struct elf_shdr *shdr4extnum = NULL; Elf_Half e_phnum; elf_addr_t e_shoff; + struct core_thread *ct; + struct elf_thread_status *tmp; /* * We no longer stop all VM operations. @@ -1641,28 +1598,23 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) goto cleanup; #endif - if (cprm->siginfo->si_signo) { - struct core_thread *ct; - struct elf_thread_status *tmp; - - for (ct = current->mm->core_state->dumper.next; - ct; ct = ct->next) { - tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); - if (!tmp) - goto cleanup; + for (ct = current->mm->core_state->dumper.next; + ct; ct = ct->next) { + tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); + if (!tmp) + goto cleanup; - tmp->thread = ct->task; - list_add(&tmp->list, &thread_list); - } + tmp->thread = ct->task; + list_add(&tmp->list, &thread_list); + } - list_for_each(t, &thread_list) { - struct elf_thread_status *tmp; - int sz; + list_for_each(t, &thread_list) { + struct elf_thread_status *tmp; + int sz; - tmp = list_entry(t, struct elf_thread_status, list); - sz = elf_dump_thread_status(cprm->siginfo->si_signo, tmp); - thread_status_size += sz; - } + tmp = list_entry(t, struct elf_thread_status, list); + sz = elf_dump_thread_status(cprm->siginfo->si_signo, tmp); + thread_status_size += sz; } /* now collect the dump for the current */ @@ -1720,7 +1672,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) offset += sizeof(*elf); /* Elf header */ offset += segs * sizeof(struct elf_phdr); /* Program headers */ - foffset = offset; /* Write notes phdr entry */ { @@ -1755,13 +1706,10 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) offset = dataoff; - size += sizeof(*elf); - if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf))) + if (!dump_emit(cprm, elf, sizeof(*elf))) goto end_coredump; - size += sizeof(*phdr4note); - if (size > cprm->limit - || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note))) + if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note))) goto end_coredump; /* write program headers for segments dump */ @@ -1785,18 +1733,16 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) phdr.p_flags |= PF_X; phdr.p_align = ELF_EXEC_PAGESIZE; - size += sizeof(phdr); - if (size > cprm->limit - || !dump_write(cprm->file, &phdr, sizeof(phdr))) + if (!dump_emit(cprm, &phdr, sizeof(phdr))) goto end_coredump; } - if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit)) + if (!elf_core_write_extra_phdrs(cprm, offset)) goto end_coredump; /* write out the notes section */ for (i = 0; i < numnote; i++) - if (!writenote(notes + i, cprm->file, &foffset)) + if (!writenote(notes + i, cprm)) goto end_coredump; /* write out the thread status notes section */ @@ -1805,25 +1751,21 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) list_entry(t, struct elf_thread_status, list); for (i = 0; i < tmp->num_notes; i++) - if (!writenote(&tmp->notes[i], cprm->file, &foffset)) + if (!writenote(&tmp->notes[i], cprm)) goto end_coredump; } - if (!dump_seek(cprm->file, dataoff - foffset)) + if (!dump_skip(cprm, dataoff - cprm->written)) goto end_coredump; - if (elf_fdpic_dump_segments(cprm->file, &size, &cprm->limit, - cprm->mm_flags) < 0) + if (!elf_fdpic_dump_segments(cprm)) goto end_coredump; - if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit)) + if (!elf_core_write_extra_data(cprm)) goto end_coredump; if (e_phnum == PN_XNUM) { - size += sizeof(*shdr4extnum); - if (size > cprm->limit - || !dump_write(cprm->file, shdr4extnum, - sizeof(*shdr4extnum))) + if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum))) goto end_coredump; } diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c index 037a3e2b045b..f37b08cea1f7 100644 --- a/fs/binfmt_em86.c +++ b/fs/binfmt_em86.c @@ -38,7 +38,7 @@ static int load_em86(struct linux_binprm *bprm) /* First of all, some simple consistency checks */ if ((elf_ex.e_type != ET_EXEC && elf_ex.e_type != ET_DYN) || (!((elf_ex.e_machine == EM_386) || (elf_ex.e_machine == EM_486))) || - (!bprm->file->f_op || !bprm->file->f_op->mmap)) { + !bprm->file->f_op->mmap) { return -ENOEXEC; } @@ -1805,6 +1805,52 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors) EXPORT_SYMBOL(bio_split); /** + * bio_trim - trim a bio + * @bio: bio to trim + * @offset: number of sectors to trim from the front of @bio + * @size: size we want to trim @bio to, in sectors + */ +void bio_trim(struct bio *bio, int offset, int size) +{ + /* 'bio' is a cloned bio which we need to trim to match + * the given offset and size. + * This requires adjusting bi_sector, bi_size, and bi_io_vec + */ + int i; + struct bio_vec *bvec; + int sofar = 0; + + size <<= 9; + if (offset == 0 && size == bio->bi_size) + return; + + clear_bit(BIO_SEG_VALID, &bio->bi_flags); + + bio_advance(bio, offset << 9); + + bio->bi_size = size; + + /* avoid any complications with bi_idx being non-zero*/ + if (bio->bi_idx) { + memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx, + (bio->bi_vcnt - bio->bi_idx) * sizeof(struct bio_vec)); + bio->bi_vcnt -= bio->bi_idx; + bio->bi_idx = 0; + } + /* Make sure vcnt and last bv are not too big */ + bio_for_each_segment(bvec, bio, i) { + if (sofar + bvec->bv_len > size) + bvec->bv_len = size - sofar; + if (bvec->bv_len == 0) { + bio->bi_vcnt = i; + break; + } + sofar += bvec->bv_len; + } +} +EXPORT_SYMBOL_GPL(bio_trim); + +/** * bio_sector_offset - Find hardware sector offset in bio * @bio: bio to inspect * @index: bio_vec index diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index 00baf1419989..57e17fe6121a 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -449,14 +449,14 @@ static int cachefiles_attr_changed(struct fscache_object *_object) _debug("discard tail %llx", oi_size); newattrs.ia_valid = ATTR_SIZE; newattrs.ia_size = oi_size & PAGE_MASK; - ret = notify_change(object->backer, &newattrs); + ret = notify_change(object->backer, &newattrs, NULL); if (ret < 0) goto truncate_failed; } newattrs.ia_valid = ATTR_SIZE; newattrs.ia_size = ni_size; - ret = notify_change(object->backer, &newattrs); + ret = notify_change(object->backer, &newattrs, NULL); truncate_failed: mutex_unlock(&object->backer->d_inode->i_mutex); diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index f4a08d7fa2f7..ca65f39dc8dc 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -294,7 +294,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache, if (ret < 0) { cachefiles_io_error(cache, "Unlink security error"); } else { - ret = vfs_unlink(dir->d_inode, rep); + ret = vfs_unlink(dir->d_inode, rep, NULL); if (preemptive) cachefiles_mark_object_buried(cache, rep); @@ -396,7 +396,7 @@ try_again: cachefiles_io_error(cache, "Rename security error %d", ret); } else { ret = vfs_rename(dir->d_inode, rep, - cache->graveyard->d_inode, grave); + cache->graveyard->d_inode, grave, NULL); if (ret != 0 && ret != -ENOMEM) cachefiles_io_error(cache, "Rename failed with error %d", ret); diff --git a/fs/char_dev.c b/fs/char_dev.c index afc2bb691780..f77f7702fabe 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -368,6 +368,7 @@ void cdev_put(struct cdev *p) */ static int chrdev_open(struct inode *inode, struct file *filp) { + const struct file_operations *fops; struct cdev *p; struct cdev *new = NULL; int ret = 0; @@ -400,10 +401,11 @@ static int chrdev_open(struct inode *inode, struct file *filp) return ret; ret = -ENXIO; - filp->f_op = fops_get(p->ops); - if (!filp->f_op) + fops = fops_get(p->ops); + if (!fops) goto out_cdev_put; + replace_fops(filp, fops); if (filp->f_op->open) { ret = filp->f_op->open(inode, filp); if (ret) @@ -574,7 +576,8 @@ static struct kobject *base_probe(dev_t dev, int *part, void *data) void __init chrdev_init(void) { cdev_map = kobj_map_init(base_probe, &chrdevs_lock); - bdi_init(&directly_mappable_cdev_bdi); + if (bdi_init(&directly_mappable_cdev_bdi)) + panic("Failed to init directly mappable cdev bdi"); } diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h index 37e4a72a7d1c..9409fa10bd5c 100644 --- a/fs/cifs/cifs_fs_sb.h +++ b/fs/cifs/cifs_fs_sb.h @@ -65,5 +65,6 @@ struct cifs_sb_info { char *mountdata; /* options received at mount time or via DFS refs */ struct backing_dev_info bdi; struct delayed_work prune_tlinks; + struct rcu_head rcu; }; #endif /* _CIFS_FS_SB_H */ diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 77fc5e181077..849f6132b327 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -862,7 +862,7 @@ const struct inode_operations cifs_file_inode_ops = { const struct inode_operations cifs_symlink_inode_ops = { .readlink = generic_readlink, .follow_link = cifs_follow_link, - .put_link = cifs_put_link, + .put_link = kfree_put_link, .permission = cifs_permission, /* BB add the following two eventually */ /* revalidate: cifs_revalidate, diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 6d0b07217ac9..26a754f49ba1 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -115,8 +115,6 @@ extern struct vfsmount *cifs_dfs_d_automount(struct path *path); /* Functions related to symlinks */ extern void *cifs_follow_link(struct dentry *direntry, struct nameidata *nd); -extern void cifs_put_link(struct dentry *direntry, - struct nameidata *nd, void *); extern int cifs_readlink(struct dentry *direntry, char __user *buffer, int buflen); extern int cifs_symlink(struct inode *inode, struct dentry *direntry, diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 62a55147400a..8813ff776ba3 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3770,6 +3770,13 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses, return rc; } +static void delayed_free(struct rcu_head *p) +{ + struct cifs_sb_info *sbi = container_of(p, struct cifs_sb_info, rcu); + unload_nls(sbi->local_nls); + kfree(sbi); +} + void cifs_umount(struct cifs_sb_info *cifs_sb) { @@ -3794,8 +3801,7 @@ cifs_umount(struct cifs_sb_info *cifs_sb) bdi_destroy(&cifs_sb->bdi); kfree(cifs_sb->mountdata); - unload_nls(cifs_sb->local_nls); - kfree(cifs_sb); + call_rcu(&cifs_sb->rcu, delayed_free); } int diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 7e36ceba0c7a..cc0234710ddb 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -621,10 +621,3 @@ symlink_exit: free_xid(xid); return rc; } - -void cifs_put_link(struct dentry *direntry, struct nameidata *nd, void *cookie) -{ - char *p = nd_get_link(nd); - if (!IS_ERR(p)) - kfree(p); -} diff --git a/fs/coda/coda_linux.h b/fs/coda/coda_linux.h index cc0ea9fe5ecf..e7550cb9fb74 100644 --- a/fs/coda/coda_linux.h +++ b/fs/coda/coda_linux.h @@ -40,7 +40,7 @@ extern const struct file_operations coda_ioctl_operations; int coda_open(struct inode *i, struct file *f); int coda_release(struct inode *i, struct file *f); int coda_permission(struct inode *inode, int mask); -int coda_revalidate_inode(struct dentry *); +int coda_revalidate_inode(struct inode *); int coda_getattr(struct vfsmount *, struct dentry *, struct kstat *); int coda_setattr(struct dentry *, struct iattr *); diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 190effc6a6fa..5efbb5ee0adc 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -387,9 +387,6 @@ static int coda_readdir(struct file *coda_file, struct dir_context *ctx) BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); host_file = cfi->cfi_container; - if (!host_file->f_op) - return -ENOTDIR; - if (host_file->f_op->iterate) { struct inode *host_inode = file_inode(host_file); mutex_lock(&host_inode->i_mutex); @@ -566,13 +563,12 @@ static int coda_dentry_delete(const struct dentry * dentry) * cache manager Venus issues a downcall to the kernel when this * happens */ -int coda_revalidate_inode(struct dentry *dentry) +int coda_revalidate_inode(struct inode *inode) { struct coda_vattr attr; int error; int old_mode; ino_t old_ino; - struct inode *inode = dentry->d_inode; struct coda_inode_info *cii = ITOC(inode); if (!cii->c_flags) diff --git a/fs/coda/file.c b/fs/coda/file.c index 380b798f8443..9e83b7790212 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c @@ -36,7 +36,7 @@ coda_file_read(struct file *coda_file, char __user *buf, size_t count, loff_t *p BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); host_file = cfi->cfi_container; - if (!host_file->f_op || !host_file->f_op->read) + if (!host_file->f_op->read) return -EINVAL; return host_file->f_op->read(host_file, buf, count, ppos); @@ -75,7 +75,7 @@ coda_file_write(struct file *coda_file, const char __user *buf, size_t count, lo BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); host_file = cfi->cfi_container; - if (!host_file->f_op || !host_file->f_op->write) + if (!host_file->f_op->write) return -EINVAL; host_inode = file_inode(host_file); @@ -105,7 +105,7 @@ coda_file_mmap(struct file *coda_file, struct vm_area_struct *vma) BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); host_file = cfi->cfi_container; - if (!host_file->f_op || !host_file->f_op->mmap) + if (!host_file->f_op->mmap) return -ENODEV; coda_inode = file_inode(coda_file); diff --git a/fs/coda/inode.c b/fs/coda/inode.c index 4dcc0d81a7aa..506de34a4ef3 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -257,7 +257,7 @@ static void coda_evict_inode(struct inode *inode) int coda_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { - int err = coda_revalidate_inode(dentry); + int err = coda_revalidate_inode(dentry->d_inode); if (!err) generic_fillattr(dentry->d_inode, stat); return err; diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 5d19acfa7c6c..dc52e13d58e0 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -1583,13 +1583,13 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, /*FALL THROUGH*/ default: - if (f.file->f_op && f.file->f_op->compat_ioctl) { + if (f.file->f_op->compat_ioctl) { error = f.file->f_op->compat_ioctl(f.file, cmd, arg); if (error != -ENOIOCTLCMD) goto out_fput; } - if (!f.file->f_op || !f.file->f_op->unlocked_ioctl) + if (!f.file->f_op->unlocked_ioctl) goto do_ioctl; break; } diff --git a/fs/coredump.c b/fs/coredump.c index 9bdeca12ae0e..62406b6959b6 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -485,7 +485,7 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) return err; } -void do_coredump(siginfo_t *siginfo) +void do_coredump(const siginfo_t *siginfo) { struct core_state core_state; struct core_name cn; @@ -645,7 +645,7 @@ void do_coredump(siginfo_t *siginfo) */ if (!uid_eq(inode->i_uid, current_fsuid())) goto close_fail; - if (!cprm.file->f_op || !cprm.file->f_op->write) + if (!cprm.file->f_op->write) goto close_fail; if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file)) goto close_fail; @@ -685,40 +685,55 @@ fail: * do on a core-file: use only these functions to write out all the * necessary info. */ -int dump_write(struct file *file, const void *addr, int nr) +int dump_emit(struct coredump_params *cprm, const void *addr, int nr) { - return !dump_interrupted() && - access_ok(VERIFY_READ, addr, nr) && - file->f_op->write(file, addr, nr, &file->f_pos) == nr; + struct file *file = cprm->file; + loff_t pos = file->f_pos; + ssize_t n; + if (cprm->written + nr > cprm->limit) + return 0; + while (nr) { + if (dump_interrupted()) + return 0; + n = vfs_write(file, addr, nr, &pos); + if (n <= 0) + return 0; + file->f_pos = pos; + cprm->written += n; + nr -= n; + } + return 1; } -EXPORT_SYMBOL(dump_write); +EXPORT_SYMBOL(dump_emit); -int dump_seek(struct file *file, loff_t off) +int dump_skip(struct coredump_params *cprm, size_t nr) { - int ret = 1; - + static char zeroes[PAGE_SIZE]; + struct file *file = cprm->file; if (file->f_op->llseek && file->f_op->llseek != no_llseek) { + if (cprm->written + nr > cprm->limit) + return 0; if (dump_interrupted() || - file->f_op->llseek(file, off, SEEK_CUR) < 0) + file->f_op->llseek(file, nr, SEEK_CUR) < 0) return 0; + cprm->written += nr; + return 1; } else { - char *buf = (char *)get_zeroed_page(GFP_KERNEL); - - if (!buf) - return 0; - while (off > 0) { - unsigned long n = off; - - if (n > PAGE_SIZE) - n = PAGE_SIZE; - if (!dump_write(file, buf, n)) { - ret = 0; - break; - } - off -= n; + while (nr > PAGE_SIZE) { + if (!dump_emit(cprm, zeroes, PAGE_SIZE)) + return 0; + nr -= PAGE_SIZE; } - free_page((unsigned long)buf); + return dump_emit(cprm, zeroes, nr); } - return ret; } -EXPORT_SYMBOL(dump_seek); +EXPORT_SYMBOL(dump_skip); + +int dump_align(struct coredump_params *cprm, int align) +{ + unsigned mod = cprm->written & (align - 1); + if (align & (align - 1)) + return -EINVAL; + return mod ? dump_skip(cprm, align - mod) : 0; +} +EXPORT_SYMBOL(dump_align); diff --git a/fs/cramfs/Kconfig b/fs/cramfs/Kconfig index cd06466f365e..11b29d491b7c 100644 --- a/fs/cramfs/Kconfig +++ b/fs/cramfs/Kconfig @@ -1,5 +1,5 @@ config CRAMFS - tristate "Compressed ROM file system support (cramfs)" + tristate "Compressed ROM file system support (cramfs) (OBSOLETE)" depends on BLOCK select ZLIB_INFLATE help @@ -16,4 +16,7 @@ config CRAMFS cramfs. Note that the root file system (the one containing the directory /) cannot be compiled as a module. + This filesystem is obsoleted by SquashFS, which is much better + in terms of performance and features. + If unsure, say N. diff --git a/fs/dcache.c b/fs/dcache.c index ae6ebb88ceff..a9dd384c5e80 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -343,6 +343,7 @@ static void dentry_unlink_inode(struct dentry * dentry) __releases(dentry->d_inode->i_lock) { struct inode *inode = dentry->d_inode; + __d_clear_type(dentry); dentry->d_inode = NULL; hlist_del_init(&dentry->d_alias); dentry_rcuwalk_barrier(dentry); @@ -483,27 +484,6 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) return parent; } -/* - * Unhash a dentry without inserting an RCU walk barrier or checking that - * dentry->d_lock is locked. The caller must take care of that, if - * appropriate. - */ -static void __d_shrink(struct dentry *dentry) -{ - if (!d_unhashed(dentry)) { - struct hlist_bl_head *b; - if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) - b = &dentry->d_sb->s_anon; - else - b = d_hash(dentry->d_parent, dentry->d_name.hash); - - hlist_bl_lock(b); - __hlist_bl_del(&dentry->d_hash); - dentry->d_hash.pprev = NULL; - hlist_bl_unlock(b); - } -} - /** * d_drop - drop a dentry * @dentry: dentry to drop @@ -522,7 +502,21 @@ static void __d_shrink(struct dentry *dentry) void __d_drop(struct dentry *dentry) { if (!d_unhashed(dentry)) { - __d_shrink(dentry); + struct hlist_bl_head *b; + /* + * Hashed dentries are normally on the dentry hashtable, + * with the exception of those newly allocated by + * d_obtain_alias, which are always IS_ROOT: + */ + if (unlikely(IS_ROOT(dentry))) + b = &dentry->d_sb->s_anon; + else + b = d_hash(dentry->d_parent, dentry->d_name.hash); + + hlist_bl_lock(b); + __hlist_bl_del(&dentry->d_hash); + dentry->d_hash.pprev = NULL; + hlist_bl_unlock(b); dentry_rcuwalk_barrier(dentry); } } @@ -1076,116 +1070,6 @@ void shrink_dcache_sb(struct super_block *sb) EXPORT_SYMBOL(shrink_dcache_sb); /* - * destroy a single subtree of dentries for unmount - * - see the comments on shrink_dcache_for_umount() for a description of the - * locking - */ -static void shrink_dcache_for_umount_subtree(struct dentry *dentry) -{ - struct dentry *parent; - - BUG_ON(!IS_ROOT(dentry)); - - for (;;) { - /* descend to the first leaf in the current subtree */ - while (!list_empty(&dentry->d_subdirs)) - dentry = list_entry(dentry->d_subdirs.next, - struct dentry, d_u.d_child); - - /* consume the dentries from this leaf up through its parents - * until we find one with children or run out altogether */ - do { - struct inode *inode; - - /* - * inform the fs that this dentry is about to be - * unhashed and destroyed. - */ - if ((dentry->d_flags & DCACHE_OP_PRUNE) && - !d_unhashed(dentry)) - dentry->d_op->d_prune(dentry); - - dentry_lru_del(dentry); - __d_shrink(dentry); - - if (dentry->d_lockref.count != 0) { - printk(KERN_ERR - "BUG: Dentry %p{i=%lx,n=%s}" - " still in use (%d)" - " [unmount of %s %s]\n", - dentry, - dentry->d_inode ? - dentry->d_inode->i_ino : 0UL, - dentry->d_name.name, - dentry->d_lockref.count, - dentry->d_sb->s_type->name, - dentry->d_sb->s_id); - BUG(); - } - - if (IS_ROOT(dentry)) { - parent = NULL; - list_del(&dentry->d_u.d_child); - } else { - parent = dentry->d_parent; - parent->d_lockref.count--; - list_del(&dentry->d_u.d_child); - } - - inode = dentry->d_inode; - if (inode) { - dentry->d_inode = NULL; - hlist_del_init(&dentry->d_alias); - if (dentry->d_op && dentry->d_op->d_iput) - dentry->d_op->d_iput(dentry, inode); - else - iput(inode); - } - - d_free(dentry); - - /* finished when we fall off the top of the tree, - * otherwise we ascend to the parent and move to the - * next sibling if there is one */ - if (!parent) - return; - dentry = parent; - } while (list_empty(&dentry->d_subdirs)); - - dentry = list_entry(dentry->d_subdirs.next, - struct dentry, d_u.d_child); - } -} - -/* - * destroy the dentries attached to a superblock on unmounting - * - we don't need to use dentry->d_lock because: - * - the superblock is detached from all mountings and open files, so the - * dentry trees will not be rearranged by the VFS - * - s_umount is write-locked, so the memory pressure shrinker will ignore - * any dentries belonging to this superblock that it comes across - * - the filesystem itself is no longer permitted to rearrange the dentries - * in this superblock - */ -void shrink_dcache_for_umount(struct super_block *sb) -{ - struct dentry *dentry; - - if (down_read_trylock(&sb->s_umount)) - BUG(); - - dentry = sb->s_root; - sb->s_root = NULL; - dentry->d_lockref.count--; - shrink_dcache_for_umount_subtree(dentry); - - while (!hlist_bl_empty(&sb->s_anon)) { - dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash); - shrink_dcache_for_umount_subtree(dentry); - } -} - -/* * This tries to ascend one level of parenthood, but * we can race with renaming, so we need to re-check * the parenthood after dropping the lock and check @@ -1478,6 +1362,91 @@ void shrink_dcache_parent(struct dentry *parent) } EXPORT_SYMBOL(shrink_dcache_parent); +static enum d_walk_ret umount_collect(void *_data, struct dentry *dentry) +{ + struct select_data *data = _data; + enum d_walk_ret ret = D_WALK_CONTINUE; + + if (dentry->d_lockref.count) { + dentry_lru_del(dentry); + if (likely(!list_empty(&dentry->d_subdirs))) + goto out; + if (dentry == data->start && dentry->d_lockref.count == 1) + goto out; + printk(KERN_ERR + "BUG: Dentry %p{i=%lx,n=%s}" + " still in use (%d)" + " [unmount of %s %s]\n", + dentry, + dentry->d_inode ? + dentry->d_inode->i_ino : 0UL, + dentry->d_name.name, + dentry->d_lockref.count, + dentry->d_sb->s_type->name, + dentry->d_sb->s_id); + BUG(); + } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) { + /* + * We can't use d_lru_shrink_move() because we + * need to get the global LRU lock and do the + * LRU accounting. + */ + if (dentry->d_flags & DCACHE_LRU_LIST) + d_lru_del(dentry); + d_shrink_add(dentry, &data->dispose); + data->found++; + ret = D_WALK_NORETRY; + } +out: + if (data->found && need_resched()) + ret = D_WALK_QUIT; + return ret; +} + +/* + * destroy the dentries attached to a superblock on unmounting + */ +void shrink_dcache_for_umount(struct super_block *sb) +{ + struct dentry *dentry; + + if (down_read_trylock(&sb->s_umount)) + BUG(); + + dentry = sb->s_root; + sb->s_root = NULL; + for (;;) { + struct select_data data; + + INIT_LIST_HEAD(&data.dispose); + data.start = dentry; + data.found = 0; + + d_walk(dentry, &data, umount_collect, NULL); + if (!data.found) + break; + + shrink_dentry_list(&data.dispose); + cond_resched(); + } + d_drop(dentry); + dput(dentry); + + while (!hlist_bl_empty(&sb->s_anon)) { + struct select_data data; + dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash); + + INIT_LIST_HEAD(&data.dispose); + data.start = NULL; + data.found = 0; + + d_walk(dentry, &data, umount_collect, NULL); + if (data.found) + shrink_dentry_list(&data.dispose); + cond_resched(); + } +} + static enum d_walk_ret check_and_collect(void *_data, struct dentry *dentry) { struct select_data *data = _data; @@ -1638,12 +1607,17 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) } EXPORT_SYMBOL(d_alloc); +/** + * d_alloc_pseudo - allocate a dentry (for lookup-less filesystems) + * @sb: the superblock + * @name: qstr of the name + * + * For a filesystem that just pins its dentries in memory and never + * performs lookups at all, return an unhashed IS_ROOT dentry. + */ struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name) { - struct dentry *dentry = __d_alloc(sb, name); - if (dentry) - dentry->d_flags |= DCACHE_DISCONNECTED; - return dentry; + return __d_alloc(sb, name); } EXPORT_SYMBOL(d_alloc_pseudo); @@ -1685,14 +1659,42 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) } EXPORT_SYMBOL(d_set_d_op); +static unsigned d_flags_for_inode(struct inode *inode) +{ + unsigned add_flags = DCACHE_FILE_TYPE; + + if (!inode) + return DCACHE_MISS_TYPE; + + if (S_ISDIR(inode->i_mode)) { + add_flags = DCACHE_DIRECTORY_TYPE; + if (unlikely(!(inode->i_opflags & IOP_LOOKUP))) { + if (unlikely(!inode->i_op->lookup)) + add_flags = DCACHE_AUTODIR_TYPE; + else + inode->i_opflags |= IOP_LOOKUP; + } + } else if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) { + if (unlikely(inode->i_op->follow_link)) + add_flags = DCACHE_SYMLINK_TYPE; + else + inode->i_opflags |= IOP_NOFOLLOW; + } + + if (unlikely(IS_AUTOMOUNT(inode))) + add_flags |= DCACHE_NEED_AUTOMOUNT; + return add_flags; +} + static void __d_instantiate(struct dentry *dentry, struct inode *inode) { + unsigned add_flags = d_flags_for_inode(inode); + spin_lock(&dentry->d_lock); - if (inode) { - if (unlikely(IS_AUTOMOUNT(inode))) - dentry->d_flags |= DCACHE_NEED_AUTOMOUNT; + dentry->d_flags &= ~DCACHE_ENTRY_TYPE; + dentry->d_flags |= add_flags; + if (inode) hlist_add_head(&dentry->d_alias, &inode->i_dentry); - } dentry->d_inode = inode; dentry_rcuwalk_barrier(dentry); spin_unlock(&dentry->d_lock); @@ -1801,6 +1803,33 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) EXPORT_SYMBOL(d_instantiate_unique); +/** + * d_instantiate_no_diralias - instantiate a non-aliased dentry + * @entry: dentry to complete + * @inode: inode to attach to this dentry + * + * Fill in inode information in the entry. If a directory alias is found, then + * return an error (and drop inode). Together with d_materialise_unique() this + * guarantees that a directory inode may never have more than one alias. + */ +int d_instantiate_no_diralias(struct dentry *entry, struct inode *inode) +{ + BUG_ON(!hlist_unhashed(&entry->d_alias)); + + spin_lock(&inode->i_lock); + if (S_ISDIR(inode->i_mode) && !hlist_empty(&inode->i_dentry)) { + spin_unlock(&inode->i_lock); + iput(inode); + return -EBUSY; + } + __d_instantiate(entry, inode); + spin_unlock(&inode->i_lock); + security_d_instantiate(entry, inode); + + return 0; +} +EXPORT_SYMBOL(d_instantiate_no_diralias); + struct dentry *d_make_root(struct inode *root_inode) { struct dentry *res = NULL; @@ -1870,6 +1899,7 @@ struct dentry *d_obtain_alias(struct inode *inode) static const struct qstr anonstring = QSTR_INIT("/", 1); struct dentry *tmp; struct dentry *res; + unsigned add_flags; if (!inode) return ERR_PTR(-ESTALE); @@ -1895,9 +1925,11 @@ struct dentry *d_obtain_alias(struct inode *inode) } /* attach a disconnected dentry */ + add_flags = d_flags_for_inode(inode) | DCACHE_DISCONNECTED; + spin_lock(&tmp->d_lock); tmp->d_inode = inode; - tmp->d_flags |= DCACHE_DISCONNECTED; + tmp->d_flags |= add_flags; hlist_add_head(&tmp->d_alias, &inode->i_dentry); hlist_bl_lock(&tmp->d_sb->s_anon); hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon); @@ -2725,7 +2757,6 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) spin_unlock(&dentry->d_lock); /* anon->d_lock still locked, returns locked */ - anon->d_flags &= ~DCACHE_DISCONNECTED; } /** @@ -2881,27 +2912,36 @@ static int prepend_path(const struct path *path, const struct path *root, char **buffer, int *buflen) { - struct dentry *dentry = path->dentry; - struct vfsmount *vfsmnt = path->mnt; - struct mount *mnt = real_mount(vfsmnt); + struct dentry *dentry; + struct vfsmount *vfsmnt; + struct mount *mnt; int error = 0; - unsigned seq = 0; + unsigned seq, m_seq = 0; char *bptr; int blen; rcu_read_lock(); +restart_mnt: + read_seqbegin_or_lock(&mount_lock, &m_seq); + seq = 0; + rcu_read_lock(); restart: bptr = *buffer; blen = *buflen; + error = 0; + dentry = path->dentry; + vfsmnt = path->mnt; + mnt = real_mount(vfsmnt); read_seqbegin_or_lock(&rename_lock, &seq); while (dentry != root->dentry || vfsmnt != root->mnt) { struct dentry * parent; if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { + struct mount *parent = ACCESS_ONCE(mnt->mnt_parent); /* Global root? */ - if (mnt_has_parent(mnt)) { - dentry = mnt->mnt_mountpoint; - mnt = mnt->mnt_parent; + if (mnt != parent) { + dentry = ACCESS_ONCE(mnt->mnt_mountpoint); + mnt = parent; vfsmnt = &mnt->mnt; continue; } @@ -2936,6 +2976,14 @@ restart: } done_seqretry(&rename_lock, seq); + if (!(m_seq & 1)) + rcu_read_unlock(); + if (need_seqretry(&mount_lock, m_seq)) { + m_seq = 1; + goto restart_mnt; + } + done_seqretry(&mount_lock, m_seq); + if (error >= 0 && bptr == *buffer) { if (--blen < 0) error = -ENAMETOOLONG; @@ -2971,9 +3019,7 @@ char *__d_path(const struct path *path, int error; prepend(&res, &buflen, "\0", 1); - br_read_lock(&vfsmount_lock); error = prepend_path(path, root, &res, &buflen); - br_read_unlock(&vfsmount_lock); if (error < 0) return ERR_PTR(error); @@ -2990,9 +3036,7 @@ char *d_absolute_path(const struct path *path, int error; prepend(&res, &buflen, "\0", 1); - br_read_lock(&vfsmount_lock); error = prepend_path(path, &root, &res, &buflen); - br_read_unlock(&vfsmount_lock); if (error > 1) error = -EINVAL; @@ -3067,9 +3111,7 @@ char *d_path(const struct path *path, char *buf, int buflen) rcu_read_lock(); get_fs_root_rcu(current->fs, &root); - br_read_lock(&vfsmount_lock); error = path_with_deleted(path, &root, &res, &buflen); - br_read_unlock(&vfsmount_lock); rcu_read_unlock(); if (error < 0) @@ -3224,7 +3266,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) get_fs_root_and_pwd_rcu(current->fs, &root, &pwd); error = -ENOENT; - br_read_lock(&vfsmount_lock); if (!d_unlinked(pwd.dentry)) { unsigned long len; char *cwd = page + PATH_MAX; @@ -3232,7 +3273,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) prepend(&cwd, &buflen, "\0", 1); error = prepend_path(&pwd, &root, &cwd, &buflen); - br_read_unlock(&vfsmount_lock); rcu_read_unlock(); if (error < 0) @@ -3253,7 +3293,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) error = -EFAULT; } } else { - br_read_unlock(&vfsmount_lock); rcu_read_unlock(); } diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index c7c83ff0f752..9c0444cccbe1 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -566,8 +566,7 @@ void debugfs_remove_recursive(struct dentry *dentry) mutex_lock(&parent->d_inode->i_mutex); if (child != dentry) { - next = list_entry(child->d_u.d_child.next, struct dentry, - d_u.d_child); + next = list_next_entry(child, d_u.d_child); goto up; } diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 073d30b9d1ac..a726b9f29cb7 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -498,6 +498,7 @@ static void devpts_kill_sb(struct super_block *sb) { struct pts_fs_info *fsi = DEVPTS_SB(sb); + ida_destroy(&fsi->allocated_ptys); kfree(fsi); kill_litter_super(sb); } diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 88556dc0458e..d5abafd56a6d 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -706,9 +706,7 @@ static int lkb_idr_is_local(int id, void *p, void *data) { struct dlm_lkb *lkb = p; - if (!lkb->lkb_nodeid) - return 1; - return 0; + return lkb->lkb_nodeid == 0 && lkb->lkb_grmode != DLM_LOCK_IV; } static int lkb_idr_is_any(int id, void *p, void *data) diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c index bf12ba5dd223..4000f6b3a750 100644 --- a/fs/ecryptfs/dentry.c +++ b/fs/ecryptfs/dentry.c @@ -44,15 +44,15 @@ */ static int ecryptfs_d_revalidate(struct dentry *dentry, unsigned int flags) { - struct dentry *lower_dentry; - int rc = 1; + struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); + int rc; + + if (!(lower_dentry->d_flags & DCACHE_OP_REVALIDATE)) + return 1; if (flags & LOOKUP_RCU) return -ECHILD; - lower_dentry = ecryptfs_dentry_to_lower(dentry); - if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate) - goto out; rc = lower_dentry->d_op->d_revalidate(lower_dentry, flags); if (dentry->d_inode) { struct inode *lower_inode = @@ -60,12 +60,17 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, unsigned int flags) fsstack_copy_attr_all(dentry->d_inode, lower_inode); } -out: return rc; } struct kmem_cache *ecryptfs_dentry_info_cache; +static void ecryptfs_dentry_free_rcu(struct rcu_head *head) +{ + kmem_cache_free(ecryptfs_dentry_info_cache, + container_of(head, struct ecryptfs_dentry_info, rcu)); +} + /** * ecryptfs_d_release * @dentry: The ecryptfs dentry @@ -74,15 +79,11 @@ struct kmem_cache *ecryptfs_dentry_info_cache; */ static void ecryptfs_d_release(struct dentry *dentry) { - if (ecryptfs_dentry_to_private(dentry)) { - if (ecryptfs_dentry_to_lower(dentry)) { - dput(ecryptfs_dentry_to_lower(dentry)); - mntput(ecryptfs_dentry_to_lower_mnt(dentry)); - } - kmem_cache_free(ecryptfs_dentry_info_cache, - ecryptfs_dentry_to_private(dentry)); + struct ecryptfs_dentry_info *p = dentry->d_fsdata; + if (p) { + path_put(&p->lower_path); + call_rcu(&p->rcu, ecryptfs_dentry_free_rcu); } - return; } const struct dentry_operations ecryptfs_dops = { diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index df19d34a033b..90d1882b306f 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -261,7 +261,10 @@ struct ecryptfs_inode_info { * vfsmount too. */ struct ecryptfs_dentry_info { struct path lower_path; - struct ecryptfs_crypt_stat *crypt_stat; + union { + struct ecryptfs_crypt_stat *crypt_stat; + struct rcu_head rcu; + }; }; /** @@ -512,13 +515,6 @@ ecryptfs_dentry_to_lower(struct dentry *dentry) return ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path.dentry; } -static inline void -ecryptfs_set_dentry_lower(struct dentry *dentry, struct dentry *lower_dentry) -{ - ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path.dentry = - lower_dentry; -} - static inline struct vfsmount * ecryptfs_dentry_to_lower_mnt(struct dentry *dentry) { @@ -531,13 +527,6 @@ ecryptfs_dentry_to_lower_path(struct dentry *dentry) return &((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path; } -static inline void -ecryptfs_set_dentry_lower_mnt(struct dentry *dentry, struct vfsmount *lower_mnt) -{ - ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path.mnt = - lower_mnt; -} - #define ecryptfs_printk(type, fmt, arg...) \ __ecryptfs_printk(type "%s: " fmt, __func__, ## arg); __printf(1, 2) diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 992cf95830b5..2229a74aeeed 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -271,7 +271,7 @@ static int ecryptfs_flush(struct file *file, fl_owner_t td) { struct file *lower_file = ecryptfs_file_to_lower(file); - if (lower_file->f_op && lower_file->f_op->flush) { + if (lower_file->f_op->flush) { filemap_write_and_wait(file->f_mapping); return lower_file->f_op->flush(lower_file, td); } @@ -305,7 +305,7 @@ static int ecryptfs_fasync(int fd, struct file *file, int flag) struct file *lower_file = NULL; lower_file = ecryptfs_file_to_lower(file); - if (lower_file->f_op && lower_file->f_op->fasync) + if (lower_file->f_op->fasync) rc = lower_file->f_op->fasync(fd, lower_file, flag); return rc; } @@ -318,7 +318,7 @@ ecryptfs_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (ecryptfs_file_to_private(file)) lower_file = ecryptfs_file_to_lower(file); - if (lower_file && lower_file->f_op && lower_file->f_op->unlocked_ioctl) + if (lower_file->f_op->unlocked_ioctl) rc = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg); return rc; } @@ -332,7 +332,7 @@ ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (ecryptfs_file_to_private(file)) lower_file = ecryptfs_file_to_lower(file); - if (lower_file && lower_file->f_op && lower_file->f_op->compat_ioctl) + if (lower_file->f_op && lower_file->f_op->compat_ioctl) rc = lower_file->f_op->compat_ioctl(lower_file, cmd, arg); return rc; } diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 67e9b6339691..c36c44824471 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -153,7 +153,7 @@ static int ecryptfs_do_unlink(struct inode *dir, struct dentry *dentry, dget(lower_dentry); lower_dir_dentry = lock_parent(lower_dentry); - rc = vfs_unlink(lower_dir_inode, lower_dentry); + rc = vfs_unlink(lower_dir_inode, lower_dentry, NULL); if (rc) { printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc); goto out_unlock; @@ -208,7 +208,7 @@ ecryptfs_do_create(struct inode *directory_inode, inode = __ecryptfs_get_inode(lower_dentry->d_inode, directory_inode->i_sb); if (IS_ERR(inode)) { - vfs_unlink(lower_dir_dentry->d_inode, lower_dentry); + vfs_unlink(lower_dir_dentry->d_inode, lower_dentry, NULL); goto out_lock; } fsstack_copy_attr_times(directory_inode, lower_dir_dentry->d_inode); @@ -361,8 +361,8 @@ static int ecryptfs_lookup_interpose(struct dentry *dentry, BUG_ON(!d_count(lower_dentry)); ecryptfs_set_dentry_private(dentry, dentry_info); - ecryptfs_set_dentry_lower(dentry, lower_dentry); - ecryptfs_set_dentry_lower_mnt(dentry, lower_mnt); + dentry_info->lower_path.mnt = lower_mnt; + dentry_info->lower_path.dentry = lower_dentry; if (!lower_dentry->d_inode) { /* We want to add because we couldn't find in lower */ @@ -475,7 +475,7 @@ static int ecryptfs_link(struct dentry *old_dentry, struct inode *dir, dget(lower_new_dentry); lower_dir_dentry = lock_parent(lower_new_dentry); rc = vfs_link(lower_old_dentry, lower_dir_dentry->d_inode, - lower_new_dentry); + lower_new_dentry, NULL); if (rc || !lower_new_dentry->d_inode) goto out_lock; rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb); @@ -640,7 +640,8 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out_lock; } rc = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry, - lower_new_dir_dentry->d_inode, lower_new_dentry); + lower_new_dir_dentry->d_inode, lower_new_dentry, + NULL); if (rc) goto out_lock; if (target_inode) @@ -703,16 +704,6 @@ out: return NULL; } -static void -ecryptfs_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr) -{ - char *buf = nd_get_link(nd); - if (!IS_ERR(buf)) { - /* Free the char* */ - kfree(buf); - } -} - /** * upper_size_to_lower_size * @crypt_stat: Crypt_stat associated with file @@ -891,7 +882,7 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length) struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); mutex_lock(&lower_dentry->d_inode->i_mutex); - rc = notify_change(lower_dentry, &lower_ia); + rc = notify_change(lower_dentry, &lower_ia, NULL); mutex_unlock(&lower_dentry->d_inode->i_mutex); } return rc; @@ -992,7 +983,7 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia) lower_ia.ia_valid &= ~ATTR_MODE; mutex_lock(&lower_dentry->d_inode->i_mutex); - rc = notify_change(lower_dentry, &lower_ia); + rc = notify_change(lower_dentry, &lower_ia, NULL); mutex_unlock(&lower_dentry->d_inode->i_mutex); out: fsstack_copy_attr_all(inode, lower_inode); @@ -1121,7 +1112,7 @@ out: const struct inode_operations ecryptfs_symlink_iops = { .readlink = generic_readlink, .follow_link = ecryptfs_follow_link, - .put_link = ecryptfs_put_link, + .put_link = kfree_put_link, .permission = ecryptfs_permission, .setattr = ecryptfs_setattr, .getattr = ecryptfs_getattr_link, diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index eb1c5979ecaf..1b119d3bf924 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -585,8 +585,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags /* ->kill_sb() will take care of root_info */ ecryptfs_set_dentry_private(s->s_root, root_info); - ecryptfs_set_dentry_lower(s->s_root, path.dentry); - ecryptfs_set_dentry_lower_mnt(s->s_root, path.mnt); + root_info->lower_path = path; s->s_flags |= MS_ACTIVE; return dget(s->s_root); diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 810c28fb8c3c..79b65c3b9e87 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -41,6 +41,7 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/compat.h> +#include <linux/rculist.h> /* * LOCKING: @@ -133,8 +134,12 @@ struct nested_calls { * of these on a server and we do not want this to take another cache line. */ struct epitem { - /* RB tree node used to link this structure to the eventpoll RB tree */ - struct rb_node rbn; + union { + /* RB tree node links this structure to the eventpoll RB tree */ + struct rb_node rbn; + /* Used to free the struct epitem */ + struct rcu_head rcu; + }; /* List header used to link this structure to the eventpoll ready list */ struct list_head rdllink; @@ -580,14 +585,14 @@ static inline void ep_pm_stay_awake_rcu(struct epitem *epi) * @sproc: Pointer to the scan callback. * @priv: Private opaque data passed to the @sproc callback. * @depth: The current depth of recursive f_op->poll calls. + * @ep_locked: caller already holds ep->mtx * * Returns: The same integer error code returned by the @sproc callback. */ static int ep_scan_ready_list(struct eventpoll *ep, int (*sproc)(struct eventpoll *, struct list_head *, void *), - void *priv, - int depth) + void *priv, int depth, bool ep_locked) { int error, pwake = 0; unsigned long flags; @@ -598,7 +603,9 @@ static int ep_scan_ready_list(struct eventpoll *ep, * We need to lock this because we could be hit by * eventpoll_release_file() and epoll_ctl(). */ - mutex_lock_nested(&ep->mtx, depth); + + if (!ep_locked) + mutex_lock_nested(&ep->mtx, depth); /* * Steal the ready list, and re-init the original one to the @@ -662,7 +669,8 @@ static int ep_scan_ready_list(struct eventpoll *ep, } spin_unlock_irqrestore(&ep->lock, flags); - mutex_unlock(&ep->mtx); + if (!ep_locked) + mutex_unlock(&ep->mtx); /* We have to call this outside the lock */ if (pwake) @@ -671,6 +679,12 @@ static int ep_scan_ready_list(struct eventpoll *ep, return error; } +static void epi_rcu_free(struct rcu_head *head) +{ + struct epitem *epi = container_of(head, struct epitem, rcu); + kmem_cache_free(epi_cache, epi); +} + /* * Removes a "struct epitem" from the eventpoll RB tree and deallocates * all the associated resources. Must be called with "mtx" held. @@ -692,8 +706,7 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) /* Remove the current item from the list of epoll hooks */ spin_lock(&file->f_lock); - if (ep_is_linked(&epi->fllink)) - list_del_init(&epi->fllink); + list_del_rcu(&epi->fllink); spin_unlock(&file->f_lock); rb_erase(&epi->rbn, &ep->rbr); @@ -704,9 +717,14 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) spin_unlock_irqrestore(&ep->lock, flags); wakeup_source_unregister(ep_wakeup_source(epi)); - - /* At this point it is safe to free the eventpoll item */ - kmem_cache_free(epi_cache, epi); + /* + * At this point it is safe to free the eventpoll item. Use the union + * field epi->rcu, since we are trying to minimize the size of + * 'struct epitem'. The 'rbn' field is no longer in use. Protected by + * ep->mtx. The rcu read side, reverse_path_check_proc(), does not make + * use of the rbn field. + */ + call_rcu(&epi->rcu, epi_rcu_free); atomic_long_dec(&ep->user->epoll_watches); @@ -807,15 +825,34 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head, return 0; } +static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead, + poll_table *pt); + +struct readyevents_arg { + struct eventpoll *ep; + bool locked; +}; + static int ep_poll_readyevents_proc(void *priv, void *cookie, int call_nests) { - return ep_scan_ready_list(priv, ep_read_events_proc, NULL, call_nests + 1); + struct readyevents_arg *arg = priv; + + return ep_scan_ready_list(arg->ep, ep_read_events_proc, NULL, + call_nests + 1, arg->locked); } static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait) { int pollflags; struct eventpoll *ep = file->private_data; + struct readyevents_arg arg; + + /* + * During ep_insert() we already hold the ep->mtx for the tfile. + * Prevent re-aquisition. + */ + arg.locked = wait && (wait->_qproc == ep_ptable_queue_proc); + arg.ep = ep; /* Insert inside our poll wait queue */ poll_wait(file, &ep->poll_wait, wait); @@ -827,7 +864,7 @@ static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait) * could re-enter here. */ pollflags = ep_call_nested(&poll_readywalk_ncalls, EP_MAX_NESTS, - ep_poll_readyevents_proc, ep, ep, current); + ep_poll_readyevents_proc, &arg, ep, current); return pollflags != -1 ? pollflags : 0; } @@ -872,7 +909,6 @@ static const struct file_operations eventpoll_fops = { */ void eventpoll_release_file(struct file *file) { - struct list_head *lsthead = &file->f_ep_links; struct eventpoll *ep; struct epitem *epi; @@ -890,17 +926,12 @@ void eventpoll_release_file(struct file *file) * Besides, ep_remove() acquires the lock, so we can't hold it here. */ mutex_lock(&epmutex); - - while (!list_empty(lsthead)) { - epi = list_first_entry(lsthead, struct epitem, fllink); - + list_for_each_entry_rcu(epi, &file->f_ep_links, fllink) { ep = epi->ep; - list_del_init(&epi->fllink); mutex_lock_nested(&ep->mtx, 0); ep_remove(ep, epi); mutex_unlock(&ep->mtx); } - mutex_unlock(&epmutex); } @@ -1138,7 +1169,9 @@ static int reverse_path_check_proc(void *priv, void *cookie, int call_nests) struct file *child_file; struct epitem *epi; - list_for_each_entry(epi, &file->f_ep_links, fllink) { + /* CTL_DEL can remove links here, but that can't increase our count */ + rcu_read_lock(); + list_for_each_entry_rcu(epi, &file->f_ep_links, fllink) { child_file = epi->ep->file; if (is_file_epoll(child_file)) { if (list_empty(&child_file->f_ep_links)) { @@ -1160,6 +1193,7 @@ static int reverse_path_check_proc(void *priv, void *cookie, int call_nests) "file is not an ep!\n"); } } + rcu_read_unlock(); return error; } @@ -1231,7 +1265,7 @@ static noinline void ep_destroy_wakeup_source(struct epitem *epi) * Must be called with "mtx" held. */ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, - struct file *tfile, int fd) + struct file *tfile, int fd, int full_check) { int error, revents, pwake = 0; unsigned long flags; @@ -1286,7 +1320,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, /* Add the current item to the list of active epoll hook for this file */ spin_lock(&tfile->f_lock); - list_add_tail(&epi->fllink, &tfile->f_ep_links); + list_add_tail_rcu(&epi->fllink, &tfile->f_ep_links); spin_unlock(&tfile->f_lock); /* @@ -1297,7 +1331,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, /* now check if we've created too many backpaths */ error = -EINVAL; - if (reverse_path_check()) + if (full_check && reverse_path_check()) goto error_remove_epi; /* We have to drop the new item inside our item list to keep track of it */ @@ -1327,8 +1361,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, error_remove_epi: spin_lock(&tfile->f_lock); - if (ep_is_linked(&epi->fllink)) - list_del_init(&epi->fllink); + list_del_rcu(&epi->fllink); spin_unlock(&tfile->f_lock); rb_erase(&epi->rbn, &ep->rbr); @@ -1521,7 +1554,7 @@ static int ep_send_events(struct eventpoll *ep, esed.maxevents = maxevents; esed.events = events; - return ep_scan_ready_list(ep, ep_send_events_proc, &esed, 0); + return ep_scan_ready_list(ep, ep_send_events_proc, &esed, 0, false); } static inline struct timespec ep_set_mstimeout(long ms) @@ -1791,11 +1824,12 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, struct epoll_event __user *, event) { int error; - int did_lock_epmutex = 0; + int full_check = 0; struct fd f, tf; struct eventpoll *ep; struct epitem *epi; struct epoll_event epds; + struct eventpoll *tep = NULL; error = -EFAULT; if (ep_op_has_event(op) && @@ -1814,7 +1848,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, /* The target file descriptor must support poll */ error = -EPERM; - if (!tf.file->f_op || !tf.file->f_op->poll) + if (!tf.file->f_op->poll) goto error_tgt_fput; /* Check if EPOLLWAKEUP is allowed */ @@ -1844,26 +1878,40 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, * and hang them on the tfile_check_list, so we can check that we * haven't created too many possible wakeup paths. * - * We need to hold the epmutex across both ep_insert and ep_remove - * b/c we want to make sure we are looking at a coherent view of - * epoll network. + * We do not need to take the global 'epumutex' on EPOLL_CTL_ADD when + * the epoll file descriptor is attaching directly to a wakeup source, + * unless the epoll file descriptor is nested. The purpose of taking the + * 'epmutex' on add is to prevent complex toplogies such as loops and + * deep wakeup paths from forming in parallel through multiple + * EPOLL_CTL_ADD operations. */ - if (op == EPOLL_CTL_ADD || op == EPOLL_CTL_DEL) { - mutex_lock(&epmutex); - did_lock_epmutex = 1; - } + mutex_lock_nested(&ep->mtx, 0); if (op == EPOLL_CTL_ADD) { - if (is_file_epoll(tf.file)) { - error = -ELOOP; - if (ep_loop_check(ep, tf.file) != 0) { - clear_tfile_check_list(); - goto error_tgt_fput; + if (!list_empty(&f.file->f_ep_links) || + is_file_epoll(tf.file)) { + full_check = 1; + mutex_unlock(&ep->mtx); + mutex_lock(&epmutex); + if (is_file_epoll(tf.file)) { + error = -ELOOP; + if (ep_loop_check(ep, tf.file) != 0) { + clear_tfile_check_list(); + goto error_tgt_fput; + } + } else + list_add(&tf.file->f_tfile_llink, + &tfile_check_list); + mutex_lock_nested(&ep->mtx, 0); + if (is_file_epoll(tf.file)) { + tep = tf.file->private_data; + mutex_lock_nested(&tep->mtx, 1); } - } else - list_add(&tf.file->f_tfile_llink, &tfile_check_list); + } + } + if (op == EPOLL_CTL_DEL && is_file_epoll(tf.file)) { + tep = tf.file->private_data; + mutex_lock_nested(&tep->mtx, 1); } - - mutex_lock_nested(&ep->mtx, 0); /* * Try to lookup the file inside our RB tree, Since we grabbed "mtx" @@ -1877,10 +1925,11 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, case EPOLL_CTL_ADD: if (!epi) { epds.events |= POLLERR | POLLHUP; - error = ep_insert(ep, &epds, tf.file, fd); + error = ep_insert(ep, &epds, tf.file, fd, full_check); } else error = -EEXIST; - clear_tfile_check_list(); + if (full_check) + clear_tfile_check_list(); break; case EPOLL_CTL_DEL: if (epi) @@ -1896,10 +1945,12 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, error = -ENOENT; break; } + if (tep != NULL) + mutex_unlock(&tep->mtx); mutex_unlock(&ep->mtx); error_tgt_fput: - if (did_lock_epmutex) + if (full_check) mutex_unlock(&epmutex); fdput(tf); diff --git a/fs/exec.c b/fs/exec.c index 2ea437e5acf4..977319fd77f3 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -106,6 +106,7 @@ static inline void put_binfmt(struct linux_binfmt * fmt) */ SYSCALL_DEFINE1(uselib, const char __user *, library) { + struct linux_binfmt *fmt; struct file *file; struct filename *tmp = getname(library); int error = PTR_ERR(tmp); @@ -136,24 +137,21 @@ SYSCALL_DEFINE1(uselib, const char __user *, library) fsnotify_open(file); error = -ENOEXEC; - if(file->f_op) { - struct linux_binfmt * fmt; - read_lock(&binfmt_lock); - list_for_each_entry(fmt, &formats, lh) { - if (!fmt->load_shlib) - continue; - if (!try_module_get(fmt->module)) - continue; - read_unlock(&binfmt_lock); - error = fmt->load_shlib(file); - read_lock(&binfmt_lock); - put_binfmt(fmt); - if (error != -ENOEXEC) - break; - } + read_lock(&binfmt_lock); + list_for_each_entry(fmt, &formats, lh) { + if (!fmt->load_shlib) + continue; + if (!try_module_get(fmt->module)) + continue; read_unlock(&binfmt_lock); + error = fmt->load_shlib(file); + read_lock(&binfmt_lock); + put_binfmt(fmt); + if (error != -ENOEXEC) + break; } + read_unlock(&binfmt_lock); exit: fput(file); out: @@ -1277,13 +1275,10 @@ static int check_unsafe_exec(struct linux_binprm *bprm) */ int prepare_binprm(struct linux_binprm *bprm) { - umode_t mode; - struct inode * inode = file_inode(bprm->file); + struct inode *inode = file_inode(bprm->file); + umode_t mode = inode->i_mode; int retval; - mode = inode->i_mode; - if (bprm->file->f_op == NULL) - return -EACCES; /* clear any previous set[ug]id data from a previous binary */ bprm->cred->euid = current_euid(); @@ -1669,6 +1664,12 @@ int __get_dumpable(unsigned long mm_flags) return (ret > SUID_DUMP_USER) ? SUID_DUMP_ROOT : ret; } +/* + * This returns the actual value of the suid_dumpable flag. For things + * that are using this for checking for privilege transitions, it must + * test against SUID_DUMP_USER rather than treating it as a boolean + * value. + */ int get_dumpable(struct mm_struct *mm) { return __get_dumpable(mm->flags); diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index a235f0016889..48a359dd286e 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -69,145 +69,162 @@ find_acceptable_alias(struct dentry *result, return NULL; } -/* - * Find root of a disconnected subtree and return a reference to it. - */ -static struct dentry * -find_disconnected_root(struct dentry *dentry) +static bool dentry_connected(struct dentry *dentry) { dget(dentry); - while (!IS_ROOT(dentry)) { + while (dentry->d_flags & DCACHE_DISCONNECTED) { struct dentry *parent = dget_parent(dentry); - if (!(parent->d_flags & DCACHE_DISCONNECTED)) { + dput(dentry); + if (IS_ROOT(dentry)) { dput(parent); - break; + return false; } + dentry = parent; + } + dput(dentry); + return true; +} + +static void clear_disconnected(struct dentry *dentry) +{ + dget(dentry); + while (dentry->d_flags & DCACHE_DISCONNECTED) { + struct dentry *parent = dget_parent(dentry); + + WARN_ON_ONCE(IS_ROOT(dentry)); + + spin_lock(&dentry->d_lock); + dentry->d_flags &= ~DCACHE_DISCONNECTED; + spin_unlock(&dentry->d_lock); dput(dentry); dentry = parent; } - return dentry; + dput(dentry); +} + +/* + * Reconnect a directory dentry with its parent. + * + * This can return a dentry, or NULL, or an error. + * + * In the first case the returned dentry is the parent of the given + * dentry, and may itself need to be reconnected to its parent. + * + * In the NULL case, a concurrent VFS operation has either renamed or + * removed this directory. The concurrent operation has reconnected our + * dentry, so we no longer need to. + */ +static struct dentry *reconnect_one(struct vfsmount *mnt, + struct dentry *dentry, char *nbuf) +{ + struct dentry *parent; + struct dentry *tmp; + int err; + + parent = ERR_PTR(-EACCES); + mutex_lock(&dentry->d_inode->i_mutex); + if (mnt->mnt_sb->s_export_op->get_parent) + parent = mnt->mnt_sb->s_export_op->get_parent(dentry); + mutex_unlock(&dentry->d_inode->i_mutex); + + if (IS_ERR(parent)) { + dprintk("%s: get_parent of %ld failed, err %d\n", + __func__, dentry->d_inode->i_ino, PTR_ERR(parent)); + return parent; + } + + dprintk("%s: find name of %lu in %lu\n", __func__, + dentry->d_inode->i_ino, parent->d_inode->i_ino); + err = exportfs_get_name(mnt, parent, nbuf, dentry); + if (err == -ENOENT) + goto out_reconnected; + if (err) + goto out_err; + dprintk("%s: found name: %s\n", __func__, nbuf); + mutex_lock(&parent->d_inode->i_mutex); + tmp = lookup_one_len(nbuf, parent, strlen(nbuf)); + mutex_unlock(&parent->d_inode->i_mutex); + if (IS_ERR(tmp)) { + dprintk("%s: lookup failed: %d\n", __func__, PTR_ERR(tmp)); + goto out_err; + } + if (tmp != dentry) { + dput(tmp); + goto out_reconnected; + } + dput(tmp); + if (IS_ROOT(dentry)) { + err = -ESTALE; + goto out_err; + } + return parent; + +out_err: + dput(parent); + return ERR_PTR(err); +out_reconnected: + dput(parent); + /* + * Someone must have renamed our entry into another parent, in + * which case it has been reconnected by the rename. + * + * Or someone removed it entirely, in which case filehandle + * lookup will succeed but the directory is now IS_DEAD and + * subsequent operations on it will fail. + * + * Alternatively, maybe there was no race at all, and the + * filesystem is just corrupt and gave us a parent that doesn't + * actually contain any entry pointing to this inode. So, + * double check that this worked and return -ESTALE if not: + */ + if (!dentry_connected(dentry)) + return ERR_PTR(-ESTALE); + return NULL; } /* * Make sure target_dir is fully connected to the dentry tree. * - * It may already be, as the flag isn't always updated when connection happens. + * On successful return, DCACHE_DISCONNECTED will be cleared on + * target_dir, and target_dir->d_parent->...->d_parent will reach the + * root of the filesystem. + * + * Whenever DCACHE_DISCONNECTED is unset, target_dir is fully connected. + * But the converse is not true: target_dir may have DCACHE_DISCONNECTED + * set but already be connected. In that case we'll verify the + * connection to root and then clear the flag. + * + * Note that target_dir could be removed by a concurrent operation. In + * that case reconnect_path may still succeed with target_dir fully + * connected, but further operations using the filehandle will fail when + * necessary (due to S_DEAD being set on the directory). */ static int reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf) { - int noprogress = 0; - int err = -ESTALE; + struct dentry *dentry, *parent; - /* - * It is possible that a confused file system might not let us complete - * the path to the root. For example, if get_parent returns a directory - * in which we cannot find a name for the child. While this implies a - * very sick filesystem we don't want it to cause knfsd to spin. Hence - * the noprogress counter. If we go through the loop 10 times (2 is - * probably enough) without getting anywhere, we just give up - */ - while (target_dir->d_flags & DCACHE_DISCONNECTED && noprogress++ < 10) { - struct dentry *pd = find_disconnected_root(target_dir); - - if (!IS_ROOT(pd)) { - /* must have found a connected parent - great */ - spin_lock(&pd->d_lock); - pd->d_flags &= ~DCACHE_DISCONNECTED; - spin_unlock(&pd->d_lock); - noprogress = 0; - } else if (pd == mnt->mnt_sb->s_root) { - printk(KERN_ERR "export: Eeek filesystem root is not connected, impossible\n"); - spin_lock(&pd->d_lock); - pd->d_flags &= ~DCACHE_DISCONNECTED; - spin_unlock(&pd->d_lock); - noprogress = 0; - } else { - /* - * We have hit the top of a disconnected path, try to - * find parent and connect. - * - * Racing with some other process renaming a directory - * isn't much of a problem here. If someone renames - * the directory, it will end up properly connected, - * which is what we want - * - * Getting the parent can't be supported generically, - * the locking is too icky. - * - * Instead we just return EACCES. If server reboots - * or inodes get flushed, you lose - */ - struct dentry *ppd = ERR_PTR(-EACCES); - struct dentry *npd; - - mutex_lock(&pd->d_inode->i_mutex); - if (mnt->mnt_sb->s_export_op->get_parent) - ppd = mnt->mnt_sb->s_export_op->get_parent(pd); - mutex_unlock(&pd->d_inode->i_mutex); - - if (IS_ERR(ppd)) { - err = PTR_ERR(ppd); - dprintk("%s: get_parent of %ld failed, err %d\n", - __func__, pd->d_inode->i_ino, err); - dput(pd); - break; - } + dentry = dget(target_dir); - dprintk("%s: find name of %lu in %lu\n", __func__, - pd->d_inode->i_ino, ppd->d_inode->i_ino); - err = exportfs_get_name(mnt, ppd, nbuf, pd); - if (err) { - dput(ppd); - dput(pd); - if (err == -ENOENT) - /* some race between get_parent and - * get_name? just try again - */ - continue; - break; - } - dprintk("%s: found name: %s\n", __func__, nbuf); - mutex_lock(&ppd->d_inode->i_mutex); - npd = lookup_one_len(nbuf, ppd, strlen(nbuf)); - mutex_unlock(&ppd->d_inode->i_mutex); - if (IS_ERR(npd)) { - err = PTR_ERR(npd); - dprintk("%s: lookup failed: %d\n", - __func__, err); - dput(ppd); - dput(pd); - break; - } - /* we didn't really want npd, we really wanted - * a side-effect of the lookup. - * hopefully, npd == pd, though it isn't really - * a problem if it isn't - */ - if (npd == pd) - noprogress = 0; - else - printk("%s: npd != pd\n", __func__); - dput(npd); - dput(ppd); - if (IS_ROOT(pd)) { - /* something went wrong, we have to give up */ - dput(pd); - break; - } - } - dput(pd); - } + while (dentry->d_flags & DCACHE_DISCONNECTED) { + BUG_ON(dentry == mnt->mnt_sb->s_root); - if (target_dir->d_flags & DCACHE_DISCONNECTED) { - /* something went wrong - oh-well */ - if (!err) - err = -ESTALE; - return err; - } + if (IS_ROOT(dentry)) + parent = reconnect_one(mnt, dentry, nbuf); + else + parent = dget_parent(dentry); + if (!parent) + break; + dput(dentry); + if (IS_ERR(parent)) + return PTR_ERR(parent); + dentry = parent; + } + dput(dentry); + clear_disconnected(target_dir); return 0; } @@ -215,7 +232,7 @@ struct getdents_callback { struct dir_context ctx; char *name; /* name that was found. It already points to a buffer NAME_MAX+1 is size */ - unsigned long ino; /* the inum we are looking for */ + u64 ino; /* the inum we are looking for */ int found; /* inode matched? */ int sequence; /* sequence counter */ }; @@ -255,10 +272,14 @@ static int get_name(const struct path *path, char *name, struct dentry *child) struct inode *dir = path->dentry->d_inode; int error; struct file *file; + struct kstat stat; + struct path child_path = { + .mnt = path->mnt, + .dentry = child, + }; struct getdents_callback buffer = { .ctx.actor = filldir_one, .name = name, - .ino = child->d_inode->i_ino }; error = -ENOTDIR; @@ -268,6 +289,16 @@ static int get_name(const struct path *path, char *name, struct dentry *child) if (!dir->i_fop) goto out; /* + * inode->i_ino is unsigned long, kstat->ino is u64, so the + * former would be insufficient on 32-bit hosts when the + * filesystem supports 64-bit inode numbers. So we need to + * actually call ->getattr, not just read i_ino: + */ + error = vfs_getattr_nosec(&child_path, &stat); + if (error) + return error; + buffer.ino = stat.ino; + /* * Open the directory ... */ file = dentry_open(path, O_RDONLY, cred); diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index c260de6d7b6d..8a337640a46a 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -632,6 +632,8 @@ static int ext2_get_blocks(struct inode *inode, int count = 0; ext2_fsblk_t first_block = 0; + BUG_ON(maxblocks == 0); + depth = ext2_block_to_path(inode,iblock,offsets,&blocks_to_boundary); if (depth == 0) diff --git a/fs/ext2/xip.c b/fs/ext2/xip.c index 1c3312858fcf..e98171a11cfe 100644 --- a/fs/ext2/xip.c +++ b/fs/ext2/xip.c @@ -35,6 +35,7 @@ __ext2_get_block(struct inode *inode, pgoff_t pgoff, int create, int rc; memset(&tmp, 0, sizeof(struct buffer_head)); + tmp.b_size = 1 << inode->i_blkbits; rc = ext2_get_block(inode, pgoff, &tmp, create); *result = tmp.b_blocknr; diff --git a/fs/ext3/super.c b/fs/ext3/super.c index c50c76190373..37fd31ed16e7 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -2825,6 +2825,10 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf) * bitmap, and an inode table. */ overhead += ngroups * (2 + sbi->s_itb_per_group); + + /* Add the journal blocks as well */ + overhead += sbi->s_journal->j_maxlen; + sbi->s_overhead_last = overhead; smp_wmb(); sbi->s_blocks_last = le32_to_cpu(es->s_blocks_count); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index af815ea9d7cc..d01d62315f7e 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2734,8 +2734,6 @@ extern void ext4_double_down_write_data_sem(struct inode *first, struct inode *second); extern void ext4_double_up_write_data_sem(struct inode *orig_inode, struct inode *donor_inode); -void ext4_inode_double_lock(struct inode *inode1, struct inode *inode2); -void ext4_inode_double_unlock(struct inode *inode1, struct inode *inode2); extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 start_orig, __u64 start_donor, __u64 len, __u64 *moved_len); diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index a569d335f804..60589b60e9b0 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -130,7 +130,7 @@ static long swap_inode_boot_loader(struct super_block *sb, /* Protect orig inodes against a truncate and make sure, * that only 1 swap_inode_boot_loader is running. */ - ext4_inode_double_lock(inode, inode_bl); + lock_two_nondirectories(inode, inode_bl); truncate_inode_pages(&inode->i_data, 0); truncate_inode_pages(&inode_bl->i_data, 0); @@ -205,7 +205,7 @@ static long swap_inode_boot_loader(struct super_block *sb, ext4_inode_resume_unlocked_dio(inode); ext4_inode_resume_unlocked_dio(inode_bl); - ext4_inode_double_unlock(inode, inode_bl); + unlock_two_nondirectories(inode, inode_bl); iput(inode_bl); diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 7fa4d855dbd5..773b503bd18c 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -1203,42 +1203,6 @@ mext_check_arguments(struct inode *orig_inode, } /** - * ext4_inode_double_lock - Lock i_mutex on both @inode1 and @inode2 - * - * @inode1: the inode structure - * @inode2: the inode structure - * - * Lock two inodes' i_mutex - */ -void -ext4_inode_double_lock(struct inode *inode1, struct inode *inode2) -{ - BUG_ON(inode1 == inode2); - if (inode1 < inode2) { - mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); - mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); - } else { - mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT); - mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD); - } -} - -/** - * ext4_inode_double_unlock - Release i_mutex on both @inode1 and @inode2 - * - * @inode1: the inode that is released first - * @inode2: the inode that is released second - * - */ - -void -ext4_inode_double_unlock(struct inode *inode1, struct inode *inode2) -{ - mutex_unlock(&inode1->i_mutex); - mutex_unlock(&inode2->i_mutex); -} - -/** * ext4_move_extents - Exchange the specified range of a file * * @o_filp: file structure of the original file @@ -1327,7 +1291,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, return -EINVAL; } /* Protect orig and donor inodes against a truncate */ - ext4_inode_double_lock(orig_inode, donor_inode); + lock_two_nondirectories(orig_inode, donor_inode); /* Wait for all existing dio workers */ ext4_inode_block_unlocked_dio(orig_inode); @@ -1535,7 +1499,7 @@ out: ext4_double_up_write_data_sem(orig_inode, donor_inode); ext4_inode_resume_unlocked_dio(orig_inode); ext4_inode_resume_unlocked_dio(donor_inode); - ext4_inode_double_unlock(orig_inode, donor_inode); + unlock_two_nondirectories(orig_inode, donor_inode); return ret; } diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index e06e0995e00f..214fe1054fce 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -63,3 +63,11 @@ config F2FS_FS_SECURITY the extended attribute support in advance. If you are not using a security module, say N. + +config F2FS_CHECK_FS + bool "F2FS consistency checking feature" + depends on F2FS_FS + help + Enables BUG_ONs which check the file system consistency in runtime. + + If you want to improve the performance, say N. diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index b7826ec1b470..d0fc287efeff 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -205,7 +205,8 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type) return acl; } -static int f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl) +static int f2fs_set_acl(struct inode *inode, int type, + struct posix_acl *acl, struct page *ipage) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); struct f2fs_inode_info *fi = F2FS_I(inode); @@ -250,7 +251,7 @@ static int f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl) } } - error = f2fs_setxattr(inode, name_index, "", value, size, NULL); + error = f2fs_setxattr(inode, name_index, "", value, size, ipage); kfree(value); if (!error) @@ -260,10 +261,10 @@ static int f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl) return error; } -int f2fs_init_acl(struct inode *inode, struct inode *dir) +int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage) { - struct posix_acl *acl = NULL; struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); + struct posix_acl *acl = NULL; int error = 0; if (!S_ISLNK(inode->i_mode)) { @@ -276,19 +277,19 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir) inode->i_mode &= ~current_umask(); } - if (test_opt(sbi, POSIX_ACL) && acl) { + if (!test_opt(sbi, POSIX_ACL) || !acl) + goto cleanup; - if (S_ISDIR(inode->i_mode)) { - error = f2fs_set_acl(inode, ACL_TYPE_DEFAULT, acl); - if (error) - goto cleanup; - } - error = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode); - if (error < 0) - return error; - if (error > 0) - error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl); + if (S_ISDIR(inode->i_mode)) { + error = f2fs_set_acl(inode, ACL_TYPE_DEFAULT, acl, ipage); + if (error) + goto cleanup; } + error = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode); + if (error < 0) + return error; + if (error > 0) + error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl, ipage); cleanup: posix_acl_release(acl); return error; @@ -313,7 +314,8 @@ int f2fs_acl_chmod(struct inode *inode) error = posix_acl_chmod(&acl, GFP_KERNEL, mode); if (error) return error; - error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl); + + error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl, NULL); posix_acl_release(acl); return error; } @@ -388,7 +390,7 @@ static int f2fs_xattr_set_acl(struct dentry *dentry, const char *name, acl = NULL; } - error = f2fs_set_acl(inode, type, acl); + error = f2fs_set_acl(inode, type, acl, NULL); release_and_out: posix_acl_release(acl); diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h index 80f430674417..49633131e038 100644 --- a/fs/f2fs/acl.h +++ b/fs/f2fs/acl.h @@ -36,9 +36,9 @@ struct f2fs_acl_header { #ifdef CONFIG_F2FS_FS_POSIX_ACL -extern struct posix_acl *f2fs_get_acl(struct inode *inode, int type); -extern int f2fs_acl_chmod(struct inode *inode); -extern int f2fs_init_acl(struct inode *inode, struct inode *dir); +extern struct posix_acl *f2fs_get_acl(struct inode *, int); +extern int f2fs_acl_chmod(struct inode *); +extern int f2fs_init_acl(struct inode *, struct inode *, struct page *); #else #define f2fs_check_acl NULL #define f2fs_get_acl NULL @@ -49,7 +49,8 @@ static inline int f2fs_acl_chmod(struct inode *inode) return 0; } -static inline int f2fs_init_acl(struct inode *inode, struct inode *dir) +static inline int f2fs_init_acl(struct inode *inode, struct inode *dir, + struct page *page) { return 0; } diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index bb312201ca95..5716e5eb4e8e 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -81,7 +81,7 @@ static int f2fs_write_meta_page(struct page *page, struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); /* Should not write any meta pages, if any IO error was occurred */ - if (wbc->for_reclaim || + if (wbc->for_reclaim || sbi->por_doing || is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)) { dec_page_count(sbi, F2FS_DIRTY_META); wbc->pages_skipped++; @@ -142,8 +142,8 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; lock_page(page); - BUG_ON(page->mapping != mapping); - BUG_ON(!PageDirty(page)); + f2fs_bug_on(page->mapping != mapping); + f2fs_bug_on(!PageDirty(page)); clear_page_dirty_for_io(page); if (f2fs_write_meta_page(page, &wbc)) { unlock_page(page); @@ -167,6 +167,8 @@ static int f2fs_set_meta_page_dirty(struct page *page) struct address_space *mapping = page->mapping; struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); + trace_f2fs_set_page_dirty(page, META); + SetPageUptodate(page); if (!PageDirty(page)) { __set_page_dirty_nobuffers(page); @@ -206,6 +208,7 @@ int acquire_orphan_inode(struct f2fs_sb_info *sbi) void release_orphan_inode(struct f2fs_sb_info *sbi) { mutex_lock(&sbi->orphan_inode_mutex); + f2fs_bug_on(sbi->n_orphans == 0); sbi->n_orphans--; mutex_unlock(&sbi->orphan_inode_mutex); } @@ -225,12 +228,8 @@ void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) break; orphan = NULL; } -retry: - new = kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC); - if (!new) { - cond_resched(); - goto retry; - } + + new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC); new->ino = ino; /* add new_oentry into list which is sorted by inode number */ @@ -253,6 +252,7 @@ void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) if (orphan->ino == ino) { list_del(&orphan->list); kmem_cache_free(orphan_entry_slab, orphan); + f2fs_bug_on(sbi->n_orphans == 0); sbi->n_orphans--; break; } @@ -263,7 +263,7 @@ void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) { struct inode *inode = f2fs_iget(sbi->sb, ino); - BUG_ON(IS_ERR(inode)); + f2fs_bug_on(IS_ERR(inode)); clear_nlink(inode); /* truncate all the data during iput */ @@ -277,7 +277,7 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi) if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG)) return 0; - sbi->por_doing = 1; + sbi->por_doing = true; start_blk = __start_cp_addr(sbi) + 1; orphan_blkaddr = __start_sum_addr(sbi) - 1; @@ -294,7 +294,7 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi) } /* clear Orphan Flag */ clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG); - sbi->por_doing = 0; + sbi->por_doing = false; return 0; } @@ -469,9 +469,7 @@ static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) return -EEXIST; } list_add_tail(&new->list, head); -#ifdef CONFIG_F2FS_STAT_FS - sbi->n_dirty_dirs++; -#endif + stat_inc_dirty_dir(sbi); return 0; } @@ -482,12 +480,8 @@ void set_dirty_dir_page(struct inode *inode, struct page *page) if (!S_ISDIR(inode->i_mode)) return; -retry: - new = kmem_cache_alloc(inode_entry_slab, GFP_NOFS); - if (!new) { - cond_resched(); - goto retry; - } + + new = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); new->inode = inode; INIT_LIST_HEAD(&new->list); @@ -504,13 +498,9 @@ retry: void add_dirty_dir_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - struct dir_inode_entry *new; -retry: - new = kmem_cache_alloc(inode_entry_slab, GFP_NOFS); - if (!new) { - cond_resched(); - goto retry; - } + struct dir_inode_entry *new = + f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); + new->inode = inode; INIT_LIST_HEAD(&new->list); @@ -541,9 +531,7 @@ void remove_dirty_dir_inode(struct inode *inode) if (entry->inode == inode) { list_del(&entry->list); kmem_cache_free(inode_entry_slab, entry); -#ifdef CONFIG_F2FS_STAT_FS - sbi->n_dirty_dirs--; -#endif + stat_dec_dirty_dir(sbi); break; } } @@ -617,11 +605,10 @@ static void block_operations(struct f2fs_sb_info *sbi) blk_start_plug(&plug); retry_flush_dents: - mutex_lock_all(sbi); - + f2fs_lock_all(sbi); /* write all the dirty dentry pages */ if (get_pages(sbi, F2FS_DIRTY_DENTS)) { - mutex_unlock_all(sbi); + f2fs_unlock_all(sbi); sync_dirty_dir_inodes(sbi); goto retry_flush_dents; } @@ -644,7 +631,22 @@ retry_flush_nodes: static void unblock_operations(struct f2fs_sb_info *sbi) { mutex_unlock(&sbi->node_write); - mutex_unlock_all(sbi); + f2fs_unlock_all(sbi); +} + +static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi) +{ + DEFINE_WAIT(wait); + + for (;;) { + prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE); + + if (!get_pages(sbi, F2FS_WRITEBACK)) + break; + + io_schedule(); + } + finish_wait(&sbi->cp_wait, &wait); } static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) @@ -756,8 +758,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) f2fs_put_page(cp_page, 1); /* wait for previous submitted node/meta pages writeback */ - while (get_pages(sbi, F2FS_WRITEBACK)) - congestion_wait(BLK_RW_ASYNC, HZ / 50); + wait_on_all_pages_writeback(sbi); filemap_fdatawait_range(sbi->node_inode->i_mapping, 0, LONG_MAX); filemap_fdatawait_range(sbi->meta_inode->i_mapping, 0, LONG_MAX); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 941f9b9ca3a5..aa3438c571fa 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -68,9 +68,6 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, struct buffer_head *bh_result) { struct f2fs_inode_info *fi = F2FS_I(inode); -#ifdef CONFIG_F2FS_STAT_FS - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); -#endif pgoff_t start_fofs, end_fofs; block_t start_blkaddr; @@ -80,9 +77,8 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, return 0; } -#ifdef CONFIG_F2FS_STAT_FS - sbi->total_hit_ext++; -#endif + stat_inc_total_hit(inode->i_sb); + start_fofs = fi->ext.fofs; end_fofs = fi->ext.fofs + fi->ext.len - 1; start_blkaddr = fi->ext.blk_addr; @@ -100,9 +96,7 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, else bh_result->b_size = UINT_MAX; -#ifdef CONFIG_F2FS_STAT_FS - sbi->read_hit_ext++; -#endif + stat_inc_read_hit(inode->i_sb); read_unlock(&fi->ext.ext_lock); return 1; } @@ -116,7 +110,7 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) pgoff_t fofs, start_fofs, end_fofs; block_t start_blkaddr, end_blkaddr; - BUG_ON(blk_addr == NEW_ADDR); + f2fs_bug_on(blk_addr == NEW_ADDR); fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + dn->ofs_in_node; @@ -442,7 +436,7 @@ static int get_data_block_ro(struct inode *inode, sector_t iblock, } /* It does not support data allocation */ - BUG_ON(create); + f2fs_bug_on(create); if (dn.data_blkaddr != NEW_ADDR && dn.data_blkaddr != NULL_ADDR) { int i; @@ -560,9 +554,9 @@ write: inode_dec_dirty_dents(inode); err = do_write_data_page(page); } else { - int ilock = mutex_lock_op(sbi); + f2fs_lock_op(sbi); err = do_write_data_page(page); - mutex_unlock_op(sbi, ilock); + f2fs_unlock_op(sbi); need_balance_fs = true; } if (err == -ENOENT) @@ -641,7 +635,6 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT; struct dnode_of_data dn; int err = 0; - int ilock; f2fs_balance_fs(sbi); repeat: @@ -650,7 +643,7 @@ repeat: return -ENOMEM; *pagep = page; - ilock = mutex_lock_op(sbi); + f2fs_lock_op(sbi); set_new_dnode(&dn, inode, NULL, NULL, 0); err = get_dnode_of_data(&dn, index, ALLOC_NODE); @@ -664,7 +657,7 @@ repeat: if (err) goto err; - mutex_unlock_op(sbi, ilock); + f2fs_unlock_op(sbi); if ((len == PAGE_CACHE_SIZE) || PageUptodate(page)) return 0; @@ -700,7 +693,7 @@ out: return 0; err: - mutex_unlock_op(sbi, ilock); + f2fs_unlock_op(sbi); f2fs_put_page(page, 1); return err; } @@ -763,6 +756,8 @@ static int f2fs_set_data_page_dirty(struct page *page) struct address_space *mapping = page->mapping; struct inode *inode = mapping->host; + trace_f2fs_set_page_dirty(page, DATA); + SetPageUptodate(page); if (!PageDirty(page)) { __set_page_dirty_nobuffers(page); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 384c6daf9a89..594fc1bb64ef 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -139,7 +139,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, bool room = false; int max_slots = 0; - BUG_ON(level > MAX_DIR_HASH_DEPTH); + f2fs_bug_on(level > MAX_DIR_HASH_DEPTH); nbucket = dir_buckets(level); nblock = bucket_blocks(level); @@ -346,7 +346,7 @@ static struct page *init_inode_metadata(struct inode *inode, goto error; } - err = f2fs_init_acl(inode, dir); + err = f2fs_init_acl(inode, dir, page); if (err) goto error; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 608f0df5b919..89dc7508faf2 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -18,6 +18,13 @@ #include <linux/crc32.h> #include <linux/magic.h> #include <linux/kobject.h> +#include <linux/sched.h> + +#ifdef CONFIG_F2FS_CHECK_FS +#define f2fs_bug_on(condition) BUG_ON(condition) +#else +#define f2fs_bug_on(condition) +#endif /* * For mount options @@ -298,6 +305,9 @@ struct f2fs_sm_info { unsigned int main_segments; /* # of segments in main area */ unsigned int reserved_segments; /* # of reserved segments */ unsigned int ovp_segments; /* # of overprovision segments */ + + /* a threshold to reclaim prefree segments */ + unsigned int rec_prefree_segments; }; /* @@ -318,14 +328,6 @@ enum count_type { }; /* - * Uses as sbi->fs_lock[NR_GLOBAL_LOCKS]. - * The checkpoint procedure blocks all the locks in this fs_lock array. - * Some FS operations grab free locks, and if there is no free lock, - * then wait to grab a lock in a round-robin manner. - */ -#define NR_GLOBAL_LOCKS 8 - -/* * The below are the page types of bios used in submti_bio(). * The available types are: * DATA User data pages. It operates as async mode. @@ -365,12 +367,12 @@ struct f2fs_sb_info { struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ struct inode *meta_inode; /* cache meta blocks */ struct mutex cp_mutex; /* checkpoint procedure lock */ - struct mutex fs_lock[NR_GLOBAL_LOCKS]; /* blocking FS operations */ + struct rw_semaphore cp_rwsem; /* blocking FS operations */ struct mutex node_write; /* locking node writes */ struct mutex writepages; /* mutex for writepages() */ - unsigned char next_lock_num; /* round-robin global locks */ - int por_doing; /* recovery is doing or not */ - int on_build_free_nids; /* build_free_nids is doing */ + bool por_doing; /* recovery is doing or not */ + bool on_build_free_nids; /* build_free_nids is doing */ + wait_queue_head_t cp_wait; /* for orphan inode management */ struct list_head orphan_inode_list; /* orphan inode list */ @@ -520,48 +522,24 @@ static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) cp->ckpt_flags = cpu_to_le32(ckpt_flags); } -static inline void mutex_lock_all(struct f2fs_sb_info *sbi) +static inline void f2fs_lock_op(struct f2fs_sb_info *sbi) { - int i; - - for (i = 0; i < NR_GLOBAL_LOCKS; i++) { - /* - * This is the only time we take multiple fs_lock[] - * instances; the order is immaterial since we - * always hold cp_mutex, which serializes multiple - * such operations. - */ - mutex_lock_nest_lock(&sbi->fs_lock[i], &sbi->cp_mutex); - } + down_read(&sbi->cp_rwsem); } -static inline void mutex_unlock_all(struct f2fs_sb_info *sbi) +static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi) { - int i = 0; - for (; i < NR_GLOBAL_LOCKS; i++) - mutex_unlock(&sbi->fs_lock[i]); + up_read(&sbi->cp_rwsem); } -static inline int mutex_lock_op(struct f2fs_sb_info *sbi) +static inline void f2fs_lock_all(struct f2fs_sb_info *sbi) { - unsigned char next_lock = sbi->next_lock_num % NR_GLOBAL_LOCKS; - int i = 0; - - for (; i < NR_GLOBAL_LOCKS; i++) - if (mutex_trylock(&sbi->fs_lock[i])) - return i; - - mutex_lock(&sbi->fs_lock[next_lock]); - sbi->next_lock_num++; - return next_lock; + down_write_nest_lock(&sbi->cp_rwsem, &sbi->cp_mutex); } -static inline void mutex_unlock_op(struct f2fs_sb_info *sbi, int ilock) +static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi) { - if (ilock < 0) - return; - BUG_ON(ilock >= NR_GLOBAL_LOCKS); - mutex_unlock(&sbi->fs_lock[ilock]); + up_write(&sbi->cp_rwsem); } /* @@ -612,8 +590,8 @@ static inline int dec_valid_block_count(struct f2fs_sb_info *sbi, blkcnt_t count) { spin_lock(&sbi->stat_lock); - BUG_ON(sbi->total_valid_block_count < (block_t) count); - BUG_ON(inode->i_blocks < count); + f2fs_bug_on(sbi->total_valid_block_count < (block_t) count); + f2fs_bug_on(inode->i_blocks < count); inode->i_blocks -= count; sbi->total_valid_block_count -= (block_t)count; spin_unlock(&sbi->stat_lock); @@ -745,9 +723,9 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, { spin_lock(&sbi->stat_lock); - BUG_ON(sbi->total_valid_block_count < count); - BUG_ON(sbi->total_valid_node_count < count); - BUG_ON(inode->i_blocks < count); + f2fs_bug_on(sbi->total_valid_block_count < count); + f2fs_bug_on(sbi->total_valid_node_count < count); + f2fs_bug_on(inode->i_blocks < count); inode->i_blocks -= count; sbi->total_valid_node_count -= count; @@ -768,7 +746,7 @@ static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi) static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi) { spin_lock(&sbi->stat_lock); - BUG_ON(sbi->total_valid_inode_count == sbi->total_node_count); + f2fs_bug_on(sbi->total_valid_inode_count == sbi->total_node_count); sbi->total_valid_inode_count++; spin_unlock(&sbi->stat_lock); } @@ -776,7 +754,7 @@ static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi) static inline int dec_valid_inode_count(struct f2fs_sb_info *sbi) { spin_lock(&sbi->stat_lock); - BUG_ON(!sbi->total_valid_inode_count); + f2fs_bug_on(!sbi->total_valid_inode_count); sbi->total_valid_inode_count--; spin_unlock(&sbi->stat_lock); return 0; @@ -797,7 +775,7 @@ static inline void f2fs_put_page(struct page *page, int unlock) return; if (unlock) { - BUG_ON(!PageLocked(page)); + f2fs_bug_on(!PageLocked(page)); unlock_page(page); } page_cache_release(page); @@ -819,6 +797,20 @@ static inline struct kmem_cache *f2fs_kmem_cache_create(const char *name, return kmem_cache_create(name, size, 0, SLAB_RECLAIM_ACCOUNT, ctor); } +static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep, + gfp_t flags) +{ + void *entry; +retry: + entry = kmem_cache_alloc(cachep, flags); + if (!entry) { + cond_resched(); + goto retry; + } + + return entry; +} + #define RAW_IS_INODE(p) ((p)->footer.nid == (p)->footer.ino) static inline bool IS_INODE(struct page *page) @@ -979,6 +971,7 @@ long f2fs_compat_ioctl(struct file *, unsigned int, unsigned long); */ void f2fs_set_inode_flags(struct inode *); struct inode *f2fs_iget(struct super_block *, unsigned long); +int try_to_free_nats(struct f2fs_sb_info *, int); void update_inode(struct inode *, struct page *); int update_inode_page(struct inode *); int f2fs_write_inode(struct inode *, struct writeback_control *); @@ -1033,6 +1026,7 @@ void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); int truncate_inode_blocks(struct inode *, pgoff_t); int truncate_xattr_node(struct inode *, struct page *); +int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t); int remove_inode_page(struct inode *); struct page *new_inode_page(struct inode *, const struct qstr *); struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *); @@ -1059,6 +1053,7 @@ void destroy_node_manager_caches(void); * segment.c */ void f2fs_balance_fs(struct f2fs_sb_info *); +void f2fs_balance_fs_bg(struct f2fs_sb_info *); void invalidate_blocks(struct f2fs_sb_info *, block_t); void clear_prefree_segments(struct f2fs_sb_info *); int npages_for_summary_flush(struct f2fs_sb_info *); @@ -1172,7 +1167,16 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) return (struct f2fs_stat_info*)sbi->stat_info; } -#define stat_inc_call_count(si) ((si)->call_count++) +#define stat_inc_call_count(si) ((si)->call_count++) +#define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++) +#define stat_inc_dirty_dir(sbi) ((sbi)->n_dirty_dirs++) +#define stat_dec_dirty_dir(sbi) ((sbi)->n_dirty_dirs--) +#define stat_inc_total_hit(sb) ((F2FS_SB(sb))->total_hit_ext++) +#define stat_inc_read_hit(sb) ((F2FS_SB(sb))->read_hit_ext++) +#define stat_inc_seg_type(sbi, curseg) \ + ((sbi)->segment_count[(curseg)->alloc_type]++) +#define stat_inc_block_count(sbi, curseg) \ + ((sbi)->block_count[(curseg)->alloc_type]++) #define stat_inc_seg_count(sbi, type) \ do { \ @@ -1207,6 +1211,13 @@ void __init f2fs_create_root_stats(void); void f2fs_destroy_root_stats(void); #else #define stat_inc_call_count(si) +#define stat_inc_bggc_count(si) +#define stat_inc_dirty_dir(sbi) +#define stat_dec_dirty_dir(sbi) +#define stat_inc_total_hit(sb) +#define stat_inc_read_hit(sb) +#define stat_inc_seg_type(sbi, curseg) +#define stat_inc_block_count(sbi, curseg) #define stat_inc_seg_count(si, type) #define stat_inc_tot_blk_count(si, blks) #define stat_inc_data_blk_count(si, blks) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 02c906971cc6..7d714f4972d5 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -35,18 +35,18 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); block_t old_blk_addr; struct dnode_of_data dn; - int err, ilock; + int err; f2fs_balance_fs(sbi); sb_start_pagefault(inode->i_sb); /* block allocation */ - ilock = mutex_lock_op(sbi); + f2fs_lock_op(sbi); set_new_dnode(&dn, inode, NULL, NULL, 0); err = get_dnode_of_data(&dn, page->index, ALLOC_NODE); if (err) { - mutex_unlock_op(sbi, ilock); + f2fs_unlock_op(sbi); goto out; } @@ -56,12 +56,12 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, err = reserve_new_block(&dn); if (err) { f2fs_put_dnode(&dn); - mutex_unlock_op(sbi, ilock); + f2fs_unlock_op(sbi); goto out; } } f2fs_put_dnode(&dn); - mutex_unlock_op(sbi, ilock); + f2fs_unlock_op(sbi); file_update_time(vma->vm_file); lock_page(page); @@ -88,6 +88,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, set_page_dirty(page); SetPageUptodate(page); + trace_f2fs_vm_page_mkwrite(page, DATA); mapped: /* fill the page */ wait_on_page_writeback(page); @@ -188,8 +189,9 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) if (ret) goto out; } - filemap_fdatawait_range(sbi->node_inode->i_mapping, - 0, LONG_MAX); + ret = wait_on_node_pages_writeback(sbi, inode->i_ino); + if (ret) + goto out; ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); } out: @@ -270,7 +272,7 @@ static int truncate_blocks(struct inode *inode, u64 from) unsigned int blocksize = inode->i_sb->s_blocksize; struct dnode_of_data dn; pgoff_t free_from; - int count = 0, ilock = -1; + int count = 0; int err; trace_f2fs_truncate_blocks_enter(inode, from); @@ -278,13 +280,13 @@ static int truncate_blocks(struct inode *inode, u64 from) free_from = (pgoff_t) ((from + blocksize - 1) >> (sbi->log_blocksize)); - ilock = mutex_lock_op(sbi); + f2fs_lock_op(sbi); set_new_dnode(&dn, inode, NULL, NULL, 0); err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE); if (err) { if (err == -ENOENT) goto free_next; - mutex_unlock_op(sbi, ilock); + f2fs_unlock_op(sbi); trace_f2fs_truncate_blocks_exit(inode, err); return err; } @@ -295,7 +297,7 @@ static int truncate_blocks(struct inode *inode, u64 from) count = ADDRS_PER_BLOCK; count -= dn.ofs_in_node; - BUG_ON(count < 0); + f2fs_bug_on(count < 0); if (dn.ofs_in_node || IS_INODE(dn.node_page)) { truncate_data_blocks_range(&dn, count); @@ -305,7 +307,7 @@ static int truncate_blocks(struct inode *inode, u64 from) f2fs_put_dnode(&dn); free_next: err = truncate_inode_blocks(inode, free_from); - mutex_unlock_op(sbi, ilock); + f2fs_unlock_op(sbi); /* lastly zero out the first data page */ truncate_partial_data_page(inode, from); @@ -416,16 +418,15 @@ static void fill_zero(struct inode *inode, pgoff_t index, { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); struct page *page; - int ilock; if (!len) return; f2fs_balance_fs(sbi); - ilock = mutex_lock_op(sbi); + f2fs_lock_op(sbi); page = get_new_data_page(inode, NULL, index, false); - mutex_unlock_op(sbi, ilock); + f2fs_unlock_op(sbi); if (!IS_ERR(page)) { wait_on_page_writeback(page); @@ -484,7 +485,6 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len, int mode) struct address_space *mapping = inode->i_mapping; loff_t blk_start, blk_end; struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - int ilock; f2fs_balance_fs(sbi); @@ -493,9 +493,9 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len, int mode) truncate_inode_pages_range(mapping, blk_start, blk_end - 1); - ilock = mutex_lock_op(sbi); + f2fs_lock_op(sbi); ret = truncate_hole(inode, pg_start, pg_end); - mutex_unlock_op(sbi, ilock); + f2fs_unlock_op(sbi); } } @@ -529,13 +529,12 @@ static int expand_inode_data(struct inode *inode, loff_t offset, for (index = pg_start; index <= pg_end; index++) { struct dnode_of_data dn; - int ilock; - ilock = mutex_lock_op(sbi); + f2fs_lock_op(sbi); set_new_dnode(&dn, inode, NULL, NULL, 0); ret = get_dnode_of_data(&dn, index, ALLOC_NODE); if (ret) { - mutex_unlock_op(sbi, ilock); + f2fs_unlock_op(sbi); break; } @@ -543,12 +542,12 @@ static int expand_inode_data(struct inode *inode, loff_t offset, ret = reserve_new_block(&dn); if (ret) { f2fs_put_dnode(&dn); - mutex_unlock_op(sbi, ilock); + f2fs_unlock_op(sbi); break; } } f2fs_put_dnode(&dn); - mutex_unlock_op(sbi, ilock); + f2fs_unlock_op(sbi); if (pg_start == pg_end) new_size = offset + len; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 2f157e883687..b7ad1ec7e4cc 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -77,13 +77,15 @@ static int gc_thread_func(void *data) else wait_ms = increase_sleep_time(gc_th, wait_ms); -#ifdef CONFIG_F2FS_STAT_FS - sbi->bg_gc++; -#endif + stat_inc_bggc_count(sbi); /* if return value is not zero, no victim was selected */ if (f2fs_gc(sbi)) wait_ms = gc_th->no_gc_sleep_time; + + /* balancing f2fs's metadata periodically */ + f2fs_balance_fs_bg(sbi); + } while (!kthread_should_stop()); return 0; } @@ -236,8 +238,8 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno) return UINT_MAX - ((100 * (100 - u) * age) / (100 + u)); } -static unsigned int get_gc_cost(struct f2fs_sb_info *sbi, unsigned int segno, - struct victim_sel_policy *p) +static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi, + unsigned int segno, struct victim_sel_policy *p) { if (p->alloc_mode == SSR) return get_seg_entry(sbi, segno)->ckpt_valid_blocks; @@ -293,7 +295,11 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, } break; } - p.offset = ((segno / p.ofs_unit) * p.ofs_unit) + p.ofs_unit; + + p.offset = segno + p.ofs_unit; + if (p.ofs_unit > 1) + p.offset -= segno % p.ofs_unit; + secno = GET_SECNO(sbi, segno); if (sec_usage_check(sbi, secno)) @@ -306,10 +312,9 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, if (p.min_cost > cost) { p.min_segno = segno; p.min_cost = cost; - } - - if (cost == max_cost) + } else if (unlikely(cost == max_cost)) { continue; + } if (nsearched++ >= p.max_search) { sbi->last_victim[p.gc_mode] = segno; @@ -358,12 +363,8 @@ static void add_gc_inode(struct inode *inode, struct list_head *ilist) iput(inode); return; } -repeat: - new_ie = kmem_cache_alloc(winode_slab, GFP_NOFS); - if (!new_ie) { - cond_resched(); - goto repeat; - } + + new_ie = f2fs_kmem_cache_alloc(winode_slab, GFP_NOFS); new_ie->inode = inode; list_add_tail(&new_ie->list, ilist); } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 9339cd292047..d0eaa9faeca0 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -37,6 +37,31 @@ void f2fs_set_inode_flags(struct inode *inode) inode->i_flags |= S_DIRSYNC; } +static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) +{ + if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || + S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { + if (ri->i_addr[0]) + inode->i_rdev = old_decode_dev(le32_to_cpu(ri->i_addr[0])); + else + inode->i_rdev = new_decode_dev(le32_to_cpu(ri->i_addr[1])); + } +} + +static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri) +{ + if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { + if (old_valid_dev(inode->i_rdev)) { + ri->i_addr[0] = cpu_to_le32(old_encode_dev(inode->i_rdev)); + ri->i_addr[1] = 0; + } else { + ri->i_addr[0] = 0; + ri->i_addr[1] = cpu_to_le32(new_encode_dev(inode->i_rdev)); + ri->i_addr[2] = 0; + } + } +} + static int do_read_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); @@ -73,10 +98,6 @@ static int do_read_inode(struct inode *inode) inode->i_ctime.tv_nsec = le32_to_cpu(ri->i_ctime_nsec); inode->i_mtime.tv_nsec = le32_to_cpu(ri->i_mtime_nsec); inode->i_generation = le32_to_cpu(ri->i_generation); - if (ri->i_addr[0]) - inode->i_rdev = old_decode_dev(le32_to_cpu(ri->i_addr[0])); - else - inode->i_rdev = new_decode_dev(le32_to_cpu(ri->i_addr[1])); fi->i_current_depth = le32_to_cpu(ri->i_current_depth); fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid); @@ -84,8 +105,13 @@ static int do_read_inode(struct inode *inode) fi->flags = 0; fi->i_advise = ri->i_advise; fi->i_pino = le32_to_cpu(ri->i_pino); + get_extent_info(&fi->ext, ri->i_ext); get_inline_info(fi, ri); + + /* get rdev by using inline_info */ + __get_inode_rdev(inode, ri); + f2fs_put_page(node_page, 1); return 0; } @@ -179,21 +205,10 @@ void update_inode(struct inode *inode, struct page *node_page) ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino); ri->i_generation = cpu_to_le32(inode->i_generation); - if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { - if (old_valid_dev(inode->i_rdev)) { - ri->i_addr[0] = - cpu_to_le32(old_encode_dev(inode->i_rdev)); - ri->i_addr[1] = 0; - } else { - ri->i_addr[0] = 0; - ri->i_addr[1] = - cpu_to_le32(new_encode_dev(inode->i_rdev)); - ri->i_addr[2] = 0; - } - } - + __set_inode_rdev(inode, ri); set_cold_node(inode, node_page); set_page_dirty(node_page); + clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE); } @@ -214,7 +229,7 @@ int update_inode_page(struct inode *inode) int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - int ret, ilock; + int ret; if (inode->i_ino == F2FS_NODE_INO(sbi) || inode->i_ino == F2FS_META_INO(sbi)) @@ -227,9 +242,9 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) * We need to lock here to prevent from producing dirty node pages * during the urgent cleaning time when runing out of free sections. */ - ilock = mutex_lock_op(sbi); + f2fs_lock_op(sbi); ret = update_inode_page(inode); - mutex_unlock_op(sbi, ilock); + f2fs_unlock_op(sbi); if (wbc) f2fs_balance_fs(sbi); @@ -243,7 +258,6 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) void f2fs_evict_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - int ilock; trace_f2fs_evict_inode(inode); truncate_inode_pages(&inode->i_data, 0); @@ -252,7 +266,7 @@ void f2fs_evict_inode(struct inode *inode) inode->i_ino == F2FS_META_INO(sbi)) goto no_delete; - BUG_ON(atomic_read(&F2FS_I(inode)->dirty_dents)); + f2fs_bug_on(atomic_read(&F2FS_I(inode)->dirty_dents)); remove_dirty_dir_inode(inode); if (inode->i_nlink || is_bad_inode(inode)) @@ -265,9 +279,9 @@ void f2fs_evict_inode(struct inode *inode) if (F2FS_HAS_BLOCKS(inode)) f2fs_truncate(inode); - ilock = mutex_lock_op(sbi); + f2fs_lock_op(sbi); remove_inode_page(inode); - mutex_unlock_op(sbi, ilock); + f2fs_unlock_op(sbi); sb_end_intwrite(inode->i_sb); no_delete: diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 2a5359c990fc..575adac17f8b 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -27,19 +27,19 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) nid_t ino; struct inode *inode; bool nid_free = false; - int err, ilock; + int err; inode = new_inode(sb); if (!inode) return ERR_PTR(-ENOMEM); - ilock = mutex_lock_op(sbi); + f2fs_lock_op(sbi); if (!alloc_nid(sbi, &ino)) { - mutex_unlock_op(sbi, ilock); + f2fs_unlock_op(sbi); err = -ENOSPC; goto fail; } - mutex_unlock_op(sbi, ilock); + f2fs_unlock_op(sbi); inode->i_uid = current_fsuid(); @@ -115,7 +115,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct f2fs_sb_info *sbi = F2FS_SB(sb); struct inode *inode; nid_t ino = 0; - int err, ilock; + int err; f2fs_balance_fs(sbi); @@ -131,9 +131,9 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, inode->i_mapping->a_ops = &f2fs_dblock_aops; ino = inode->i_ino; - ilock = mutex_lock_op(sbi); + f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); - mutex_unlock_op(sbi, ilock); + f2fs_unlock_op(sbi); if (err) goto out; @@ -157,7 +157,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, struct inode *inode = old_dentry->d_inode; struct super_block *sb = dir->i_sb; struct f2fs_sb_info *sbi = F2FS_SB(sb); - int err, ilock; + int err; f2fs_balance_fs(sbi); @@ -165,9 +165,9 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, ihold(inode); set_inode_flag(F2FS_I(inode), FI_INC_LINK); - ilock = mutex_lock_op(sbi); + f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); - mutex_unlock_op(sbi, ilock); + f2fs_unlock_op(sbi); if (err) goto out; @@ -220,7 +220,6 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) struct f2fs_dir_entry *de; struct page *page; int err = -ENOENT; - int ilock; trace_f2fs_unlink_enter(dir, dentry); f2fs_balance_fs(sbi); @@ -229,16 +228,16 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) if (!de) goto fail; + f2fs_lock_op(sbi); err = acquire_orphan_inode(sbi); if (err) { + f2fs_unlock_op(sbi); kunmap(page); f2fs_put_page(page, 0); goto fail; } - - ilock = mutex_lock_op(sbi); f2fs_delete_entry(de, page, inode); - mutex_unlock_op(sbi, ilock); + f2fs_unlock_op(sbi); /* In order to evict this inode, we set it dirty */ mark_inode_dirty(inode); @@ -254,7 +253,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, struct f2fs_sb_info *sbi = F2FS_SB(sb); struct inode *inode; size_t symlen = strlen(symname) + 1; - int err, ilock; + int err; f2fs_balance_fs(sbi); @@ -265,9 +264,9 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, inode->i_op = &f2fs_symlink_inode_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; - ilock = mutex_lock_op(sbi); + f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); - mutex_unlock_op(sbi, ilock); + f2fs_unlock_op(sbi); if (err) goto out; @@ -290,7 +289,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); struct inode *inode; - int err, ilock; + int err; f2fs_balance_fs(sbi); @@ -304,9 +303,9 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); set_inode_flag(F2FS_I(inode), FI_INC_LINK); - ilock = mutex_lock_op(sbi); + f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); - mutex_unlock_op(sbi, ilock); + f2fs_unlock_op(sbi); if (err) goto out_fail; @@ -342,7 +341,6 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, struct f2fs_sb_info *sbi = F2FS_SB(sb); struct inode *inode; int err = 0; - int ilock; if (!new_valid_dev(rdev)) return -EINVAL; @@ -356,9 +354,9 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, init_special_inode(inode, inode->i_mode, rdev); inode->i_op = &f2fs_special_inode_operations; - ilock = mutex_lock_op(sbi); + f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); - mutex_unlock_op(sbi, ilock); + f2fs_unlock_op(sbi); if (err) goto out; @@ -387,7 +385,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, struct f2fs_dir_entry *old_dir_entry = NULL; struct f2fs_dir_entry *old_entry; struct f2fs_dir_entry *new_entry; - int err = -ENOENT, ilock = -1; + int err = -ENOENT; f2fs_balance_fs(sbi); @@ -402,7 +400,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out_old; } - ilock = mutex_lock_op(sbi); + f2fs_lock_op(sbi); if (new_inode) { @@ -467,7 +465,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, update_inode_page(old_dir); } - mutex_unlock_op(sbi, ilock); + f2fs_unlock_op(sbi); return 0; put_out_dir: @@ -477,7 +475,7 @@ out_dir: kunmap(old_dir_page); f2fs_put_page(old_dir_page, 0); } - mutex_unlock_op(sbi, ilock); + f2fs_unlock_op(sbi); out_old: kunmap(old_page); f2fs_put_page(old_page, 0); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 51ef27894433..4ac4150d421d 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -204,7 +204,7 @@ retry: } e->ni = *ni; e->checkpointed = true; - BUG_ON(ni->blk_addr == NEW_ADDR); + f2fs_bug_on(ni->blk_addr == NEW_ADDR); } else if (new_blkaddr == NEW_ADDR) { /* * when nid is reallocated, @@ -212,19 +212,19 @@ retry: * So, reinitialize it with new information. */ e->ni = *ni; - BUG_ON(ni->blk_addr != NULL_ADDR); + f2fs_bug_on(ni->blk_addr != NULL_ADDR); } if (new_blkaddr == NEW_ADDR) e->checkpointed = false; /* sanity check */ - BUG_ON(nat_get_blkaddr(e) != ni->blk_addr); - BUG_ON(nat_get_blkaddr(e) == NULL_ADDR && + f2fs_bug_on(nat_get_blkaddr(e) != ni->blk_addr); + f2fs_bug_on(nat_get_blkaddr(e) == NULL_ADDR && new_blkaddr == NULL_ADDR); - BUG_ON(nat_get_blkaddr(e) == NEW_ADDR && + f2fs_bug_on(nat_get_blkaddr(e) == NEW_ADDR && new_blkaddr == NEW_ADDR); - BUG_ON(nat_get_blkaddr(e) != NEW_ADDR && + f2fs_bug_on(nat_get_blkaddr(e) != NEW_ADDR && nat_get_blkaddr(e) != NULL_ADDR && new_blkaddr == NEW_ADDR); @@ -240,7 +240,7 @@ retry: write_unlock(&nm_i->nat_tree_lock); } -static int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) +int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) { struct f2fs_nm_info *nm_i = NM_I(sbi); @@ -495,10 +495,10 @@ static void truncate_node(struct dnode_of_data *dn) get_node_info(sbi, dn->nid, &ni); if (dn->inode->i_blocks == 0) { - BUG_ON(ni.blk_addr != NULL_ADDR); + f2fs_bug_on(ni.blk_addr != NULL_ADDR); goto invalidate; } - BUG_ON(ni.blk_addr == NULL_ADDR); + f2fs_bug_on(ni.blk_addr == NULL_ADDR); /* Deallocate node address */ invalidate_blocks(sbi, ni.blk_addr); @@ -822,7 +822,7 @@ int remove_inode_page(struct inode *inode) } /* 0 is possible, after f2fs_new_inode() is failed */ - BUG_ON(inode->i_blocks != 0 && inode->i_blocks != 1); + f2fs_bug_on(inode->i_blocks != 0 && inode->i_blocks != 1); set_new_dnode(&dn, inode, page, page, ino); truncate_node(&dn); return 0; @@ -863,7 +863,7 @@ struct page *new_node_page(struct dnode_of_data *dn, get_node_info(sbi, dn->nid, &old_ni); /* Reinitialize old_ni with new node page */ - BUG_ON(old_ni.blk_addr != NULL_ADDR); + f2fs_bug_on(old_ni.blk_addr != NULL_ADDR); new_ni = old_ni; new_ni.ino = dn->inode->i_ino; set_node_addr(sbi, &new_ni, NEW_ADDR); @@ -969,7 +969,7 @@ repeat: goto repeat; } got_it: - BUG_ON(nid != nid_of_node(page)); + f2fs_bug_on(nid != nid_of_node(page)); mark_page_accessed(page); return page; } @@ -1148,6 +1148,47 @@ continue_unlock: return nwritten; } +int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino) +{ + struct address_space *mapping = sbi->node_inode->i_mapping; + pgoff_t index = 0, end = LONG_MAX; + struct pagevec pvec; + int nr_pages; + int ret2 = 0, ret = 0; + + pagevec_init(&pvec, 0); + while ((index <= end) && + (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, + PAGECACHE_TAG_WRITEBACK, + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) { + unsigned i; + + for (i = 0; i < nr_pages; i++) { + struct page *page = pvec.pages[i]; + + /* until radix tree lookup accepts end_index */ + if (page->index > end) + continue; + + if (ino && ino_of_node(page) == ino) { + wait_on_page_writeback(page); + if (TestClearPageError(page)) + ret = -EIO; + } + } + pagevec_release(&pvec); + cond_resched(); + } + + if (test_and_clear_bit(AS_ENOSPC, &mapping->flags)) + ret2 = -ENOSPC; + if (test_and_clear_bit(AS_EIO, &mapping->flags)) + ret2 = -EIO; + if (!ret) + ret = ret2; + return ret; +} + static int f2fs_write_node_page(struct page *page, struct writeback_control *wbc) { @@ -1156,11 +1197,14 @@ static int f2fs_write_node_page(struct page *page, block_t new_addr; struct node_info ni; + if (sbi->por_doing) + goto redirty_out; + wait_on_page_writeback(page); /* get old block addr of this node page */ nid = nid_of_node(page); - BUG_ON(page->index != nid); + f2fs_bug_on(page->index != nid); get_node_info(sbi, nid, &ni); @@ -1171,12 +1215,8 @@ static int f2fs_write_node_page(struct page *page, return 0; } - if (wbc->for_reclaim) { - dec_page_count(sbi, F2FS_DIRTY_NODES); - wbc->pages_skipped++; - set_page_dirty(page); - return AOP_WRITEPAGE_ACTIVATE; - } + if (wbc->for_reclaim) + goto redirty_out; mutex_lock(&sbi->node_write); set_page_writeback(page); @@ -1186,6 +1226,12 @@ static int f2fs_write_node_page(struct page *page, mutex_unlock(&sbi->node_write); unlock_page(page); return 0; + +redirty_out: + dec_page_count(sbi, F2FS_DIRTY_NODES); + wbc->pages_skipped++; + set_page_dirty(page); + return AOP_WRITEPAGE_ACTIVATE; } /* @@ -1200,11 +1246,8 @@ static int f2fs_write_node_pages(struct address_space *mapping, struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); long nr_to_write = wbc->nr_to_write; - /* First check balancing cached NAT entries */ - if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK)) { - f2fs_sync_fs(sbi->sb, true); - return 0; - } + /* balancing f2fs's metadata in background */ + f2fs_balance_fs_bg(sbi); /* collect a number of dirty node pages and write together */ if (get_pages(sbi, F2FS_DIRTY_NODES) < COLLECT_DIRTY_NODES) @@ -1223,6 +1266,8 @@ static int f2fs_set_node_page_dirty(struct page *page) struct address_space *mapping = page->mapping; struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); + trace_f2fs_set_page_dirty(page, NODE); + SetPageUptodate(page); if (!PageDirty(page)) { __set_page_dirty_nobuffers(page); @@ -1291,23 +1336,18 @@ static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build) if (nid == 0) return 0; - if (!build) - goto retry; - - /* do not add allocated nids */ - read_lock(&nm_i->nat_tree_lock); - ne = __lookup_nat_cache(nm_i, nid); - if (ne && nat_get_blkaddr(ne) != NULL_ADDR) - allocated = true; - read_unlock(&nm_i->nat_tree_lock); - if (allocated) - return 0; -retry: - i = kmem_cache_alloc(free_nid_slab, GFP_NOFS); - if (!i) { - cond_resched(); - goto retry; + if (build) { + /* do not add allocated nids */ + read_lock(&nm_i->nat_tree_lock); + ne = __lookup_nat_cache(nm_i, nid); + if (ne && nat_get_blkaddr(ne) != NULL_ADDR) + allocated = true; + read_unlock(&nm_i->nat_tree_lock); + if (allocated) + return 0; } + + i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS); i->nid = nid; i->state = NID_NEW; @@ -1350,7 +1390,7 @@ static void scan_nat_page(struct f2fs_nm_info *nm_i, break; blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); - BUG_ON(blk_addr == NEW_ADDR); + f2fs_bug_on(blk_addr == NEW_ADDR); if (blk_addr == NULL_ADDR) { if (add_free_nid(nm_i, start_nid, true) < 0) break; @@ -1421,14 +1461,14 @@ retry: /* We should not use stale free nids created by build_free_nids */ if (nm_i->fcnt && !sbi->on_build_free_nids) { - BUG_ON(list_empty(&nm_i->free_nid_list)); + f2fs_bug_on(list_empty(&nm_i->free_nid_list)); list_for_each(this, &nm_i->free_nid_list) { i = list_entry(this, struct free_nid, list); if (i->state == NID_NEW) break; } - BUG_ON(i->state != NID_NEW); + f2fs_bug_on(i->state != NID_NEW); *nid = i->nid; i->state = NID_ALLOC; nm_i->fcnt--; @@ -1439,9 +1479,9 @@ retry: /* Let's scan nat pages and its caches to get free nids */ mutex_lock(&nm_i->build_lock); - sbi->on_build_free_nids = 1; + sbi->on_build_free_nids = true; build_free_nids(sbi); - sbi->on_build_free_nids = 0; + sbi->on_build_free_nids = false; mutex_unlock(&nm_i->build_lock); goto retry; } @@ -1456,7 +1496,7 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid) spin_lock(&nm_i->free_nid_list_lock); i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); - BUG_ON(!i || i->state != NID_ALLOC); + f2fs_bug_on(!i || i->state != NID_ALLOC); __del_from_free_nid_list(i); spin_unlock(&nm_i->free_nid_list_lock); } @@ -1474,7 +1514,7 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) spin_lock(&nm_i->free_nid_list_lock); i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); - BUG_ON(!i || i->state != NID_ALLOC); + f2fs_bug_on(!i || i->state != NID_ALLOC); if (nm_i->fcnt > 2 * MAX_FREE_NIDS) { __del_from_free_nid_list(i); } else { @@ -1677,7 +1717,7 @@ to_nat_page: nat_blk = page_address(page); } - BUG_ON(!nat_blk); + f2fs_bug_on(!nat_blk); raw_ne = nat_blk->entries[nid - start_nid]; flush_now: new_blkaddr = nat_get_blkaddr(ne); @@ -1781,11 +1821,11 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) /* destroy free nid list */ spin_lock(&nm_i->free_nid_list_lock); list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) { - BUG_ON(i->state == NID_ALLOC); + f2fs_bug_on(i->state == NID_ALLOC); __del_from_free_nid_list(i); nm_i->fcnt--; } - BUG_ON(nm_i->fcnt); + f2fs_bug_on(nm_i->fcnt); spin_unlock(&nm_i->free_nid_list_lock); /* destroy nat cache */ @@ -1799,7 +1839,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) __del_from_nat_cache(nm_i, e); } } - BUG_ON(nm_i->nat_cnt); + f2fs_bug_on(nm_i->nat_cnt); write_unlock(&nm_i->nat_tree_lock); kfree(nm_i->nat_bitmap); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 51ef5eec33d7..fdc81161f254 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -64,24 +64,31 @@ static int recover_dentry(struct page *ipage, struct inode *inode) name.name = raw_inode->i_name; retry: de = f2fs_find_entry(dir, &name, &page); - if (de && inode->i_ino == le32_to_cpu(de->ino)) { - kunmap(page); - f2fs_put_page(page, 0); - goto out; - } + if (de && inode->i_ino == le32_to_cpu(de->ino)) + goto out_unmap_put; if (de) { einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino)); if (IS_ERR(einode)) { WARN_ON(1); if (PTR_ERR(einode) == -ENOENT) err = -EEXIST; - goto out; + goto out_unmap_put; + } + err = acquire_orphan_inode(F2FS_SB(inode->i_sb)); + if (err) { + iput(einode); + goto out_unmap_put; } f2fs_delete_entry(de, page, einode); iput(einode); goto retry; } err = __f2fs_add_link(dir, &name, inode); + goto out; + +out_unmap_put: + kunmap(page); + f2fs_put_page(page, 0); out: f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode and its dentry: " "ino = %x, name = %s, dir = %lx, err = %d", @@ -285,7 +292,6 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, struct f2fs_summary sum; struct node_info ni; int err = 0, recovered = 0; - int ilock; start = start_bidx_of_node(ofs_of_node(page), fi); if (IS_INODE(page)) @@ -293,20 +299,20 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, else end = start + ADDRS_PER_BLOCK; - ilock = mutex_lock_op(sbi); + f2fs_lock_op(sbi); set_new_dnode(&dn, inode, NULL, NULL, 0); err = get_dnode_of_data(&dn, start, ALLOC_NODE); if (err) { - mutex_unlock_op(sbi, ilock); + f2fs_unlock_op(sbi); return err; } wait_on_page_writeback(dn.node_page); get_node_info(sbi, dn.nid, &ni); - BUG_ON(ni.ino != ino_of_node(page)); - BUG_ON(ofs_of_node(dn.node_page) != ofs_of_node(page)); + f2fs_bug_on(ni.ino != ino_of_node(page)); + f2fs_bug_on(ofs_of_node(dn.node_page) != ofs_of_node(page)); for (; start < end; start++) { block_t src, dest; @@ -316,9 +322,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR) { if (src == NULL_ADDR) { - int err = reserve_new_block(&dn); + err = reserve_new_block(&dn); /* We should not get -ENOSPC */ - BUG_ON(err); + f2fs_bug_on(err); } /* Check the previous node page having this index */ @@ -349,7 +355,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, recover_node_page(sbi, dn.node_page, &sum, &ni, blkaddr); err: f2fs_put_dnode(&dn); - mutex_unlock_op(sbi, ilock); + f2fs_unlock_op(sbi); f2fs_msg(sbi->sb, KERN_NOTICE, "recover_data: ino = %lx, " "recovered_data = %d blocks, err = %d", @@ -419,6 +425,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) { struct list_head inode_list; int err; + bool need_writecp = false; fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", sizeof(struct fsync_inode_entry), NULL); @@ -428,7 +435,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) INIT_LIST_HEAD(&inode_list); /* step #1: find fsynced inode numbers */ - sbi->por_doing = 1; + sbi->por_doing = true; err = find_fsync_dnodes(sbi, &inode_list); if (err) goto out; @@ -436,14 +443,16 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) if (list_empty(&inode_list)) goto out; + need_writecp = true; + /* step #2: recover data */ err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE); - BUG_ON(!list_empty(&inode_list)); + f2fs_bug_on(!list_empty(&inode_list)); out: destroy_fsync_dnodes(&inode_list); kmem_cache_destroy(fsync_entry_slab); - sbi->por_doing = 0; - if (!err) + sbi->por_doing = false; + if (!err && need_writecp) write_checkpoint(sbi, false); return err; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 09af9c7b0f52..fa284d397199 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -36,6 +36,14 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi) } } +void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) +{ + /* check the # of cached NAT entries and prefree segments */ + if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) || + excess_prefree_segs(sbi)) + f2fs_sync_fs(sbi->sb, true); +} + static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, enum dirty_type dirty_type) { @@ -50,20 +58,10 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, if (dirty_type == DIRTY) { struct seg_entry *sentry = get_seg_entry(sbi, segno); - enum dirty_type t = DIRTY_HOT_DATA; - - dirty_type = sentry->type; - - if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type])) - dirty_i->nr_dirty[dirty_type]++; + enum dirty_type t = sentry->type; - /* Only one bitmap should be set */ - for (; t <= DIRTY_COLD_NODE; t++) { - if (t == dirty_type) - continue; - if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) - dirty_i->nr_dirty[t]--; - } + if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t])) + dirty_i->nr_dirty[t]++; } } @@ -76,12 +74,11 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, dirty_i->nr_dirty[dirty_type]--; if (dirty_type == DIRTY) { - enum dirty_type t = DIRTY_HOT_DATA; + struct seg_entry *sentry = get_seg_entry(sbi, segno); + enum dirty_type t = sentry->type; - /* clear all the bitmaps */ - for (; t <= DIRTY_COLD_NODE; t++) - if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) - dirty_i->nr_dirty[t]--; + if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) + dirty_i->nr_dirty[t]--; if (get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0) clear_bit(GET_SECNO(sbi, segno), @@ -142,27 +139,33 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) void clear_prefree_segments(struct f2fs_sb_info *sbi) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - unsigned int segno = -1; + unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; unsigned int total_segs = TOTAL_SEGS(sbi); + unsigned int start = 0, end = -1; mutex_lock(&dirty_i->seglist_lock); + while (1) { - segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs, - segno + 1); - if (segno >= total_segs) + int i; + start = find_next_bit(prefree_map, total_segs, end + 1); + if (start >= total_segs) break; + end = find_next_zero_bit(prefree_map, total_segs, start + 1); + + for (i = start; i < end; i++) + clear_bit(i, prefree_map); - if (test_and_clear_bit(segno, dirty_i->dirty_segmap[PRE])) - dirty_i->nr_dirty[PRE]--; - - /* Let's use trim */ - if (test_opt(sbi, DISCARD)) - blkdev_issue_discard(sbi->sb->s_bdev, - START_BLOCK(sbi, segno) << - sbi->log_sectors_per_block, - 1 << (sbi->log_sectors_per_block + - sbi->log_blocks_per_seg), - GFP_NOFS, 0); + dirty_i->nr_dirty[PRE] -= end - start; + + if (!test_opt(sbi, DISCARD)) + continue; + + blkdev_issue_discard(sbi->sb->s_bdev, + START_BLOCK(sbi, start) << + sbi->log_sectors_per_block, + (1 << (sbi->log_sectors_per_block + + sbi->log_blocks_per_seg)) * (end - start), + GFP_NOFS, 0); } mutex_unlock(&dirty_i->seglist_lock); } @@ -195,7 +198,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) new_vblocks = se->valid_blocks + del; offset = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & (sbi->blocks_per_seg - 1); - BUG_ON((new_vblocks >> (sizeof(unsigned short) << 3) || + f2fs_bug_on((new_vblocks >> (sizeof(unsigned short) << 3) || (new_vblocks > sbi->blocks_per_seg))); se->valid_blocks = new_vblocks; @@ -235,7 +238,7 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) unsigned int segno = GET_SEGNO(sbi, addr); struct sit_info *sit_i = SIT_I(sbi); - BUG_ON(addr == NULL_ADDR); + f2fs_bug_on(addr == NULL_ADDR); if (addr == NEW_ADDR) return; @@ -267,9 +270,8 @@ static void __add_sum_entry(struct f2fs_sb_info *sbi, int type, */ int npages_for_summary_flush(struct f2fs_sb_info *sbi) { - int total_size_bytes = 0; int valid_sum_count = 0; - int i, sum_space; + int i, sum_in_page; for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { if (sbi->ckpt->alloc_type[i] == SSR) @@ -278,13 +280,12 @@ int npages_for_summary_flush(struct f2fs_sb_info *sbi) valid_sum_count += curseg_blkoff(sbi, i); } - total_size_bytes = valid_sum_count * (SUMMARY_SIZE + 1) - + sizeof(struct nat_journal) + 2 - + sizeof(struct sit_journal) + 2; - sum_space = PAGE_CACHE_SIZE - SUM_FOOTER_SIZE; - if (total_size_bytes < sum_space) + sum_in_page = (PAGE_CACHE_SIZE - 2 * SUM_JOURNAL_SIZE - + SUM_FOOTER_SIZE) / SUMMARY_SIZE; + if (valid_sum_count <= sum_in_page) return 1; - else if (total_size_bytes < 2 * sum_space) + else if ((valid_sum_count - sum_in_page) <= + (PAGE_CACHE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE) return 2; return 3; } @@ -350,7 +351,7 @@ find_other_zone: if (dir == ALLOC_RIGHT) { secno = find_next_zero_bit(free_i->free_secmap, TOTAL_SECS(sbi), 0); - BUG_ON(secno >= TOTAL_SECS(sbi)); + f2fs_bug_on(secno >= TOTAL_SECS(sbi)); } else { go_left = 1; left_start = hint - 1; @@ -366,7 +367,7 @@ find_other_zone: } left_start = find_next_zero_bit(free_i->free_secmap, TOTAL_SECS(sbi), 0); - BUG_ON(left_start >= TOTAL_SECS(sbi)); + f2fs_bug_on(left_start >= TOTAL_SECS(sbi)); break; } secno = left_start; @@ -405,7 +406,7 @@ skip_left: } got_it: /* set it as dirty segment in free segmap */ - BUG_ON(test_bit(segno, free_i->free_segmap)); + f2fs_bug_on(test_bit(segno, free_i->free_segmap)); __set_inuse(sbi, segno); *newseg = segno; write_unlock(&free_i->segmap_lock); @@ -550,9 +551,8 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, change_curseg(sbi, type, true); else new_curseg(sbi, type, false); -#ifdef CONFIG_F2FS_STAT_FS - sbi->segment_count[curseg->alloc_type]++; -#endif + + stat_inc_seg_type(sbi, curseg); } void allocate_new_segments(struct f2fs_sb_info *sbi) @@ -597,6 +597,11 @@ static void f2fs_end_io_write(struct bio *bio, int err) if (p->is_sync) complete(p->wait); + + if (!get_pages(p->sbi, F2FS_WRITEBACK) && + !list_empty(&p->sbi->cp_wait.task_list)) + wake_up(&p->sbi->cp_wait); + kfree(p); bio_put(bio); } @@ -657,6 +662,7 @@ static void submit_write_page(struct f2fs_sb_info *sbi, struct page *page, block_t blk_addr, enum page_type type) { struct block_device *bdev = sbi->sb->s_bdev; + int bio_blocks; verify_block_addr(sbi, blk_addr); @@ -676,7 +682,8 @@ retry: goto retry; } - sbi->bio[type] = f2fs_bio_alloc(bdev, max_hw_blocks(sbi)); + bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); + sbi->bio[type] = f2fs_bio_alloc(bdev, bio_blocks); sbi->bio[type]->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); sbi->bio[type]->bi_private = priv; /* @@ -771,7 +778,7 @@ static int __get_segment_type(struct page *page, enum page_type p_type) return __get_segment_type_4(page, p_type); } /* NR_CURSEG_TYPE(6) logs by default */ - BUG_ON(sbi->active_logs != NR_CURSEG_TYPE); + f2fs_bug_on(sbi->active_logs != NR_CURSEG_TYPE); return __get_segment_type_6(page, p_type); } @@ -801,9 +808,8 @@ static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, mutex_lock(&sit_i->sentry_lock); __refresh_next_blkoff(sbi, curseg); -#ifdef CONFIG_F2FS_STAT_FS - sbi->block_count[curseg->alloc_type]++; -#endif + + stat_inc_block_count(sbi, curseg); /* * SIT information should be updated before segment allocation, @@ -849,7 +855,7 @@ void write_data_page(struct inode *inode, struct page *page, struct f2fs_summary sum; struct node_info ni; - BUG_ON(old_blkaddr == NULL_ADDR); + f2fs_bug_on(old_blkaddr == NULL_ADDR); get_node_info(sbi, dn->nid, &ni); set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); @@ -1122,8 +1128,6 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr) SUM_JOURNAL_SIZE); written_size += SUM_JOURNAL_SIZE; - set_page_dirty(page); - /* Step 3: write summary entries */ for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { unsigned short blkoff; @@ -1142,18 +1146,20 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr) summary = (struct f2fs_summary *)(kaddr + written_size); *summary = seg_i->sum_blk->entries[j]; written_size += SUMMARY_SIZE; - set_page_dirty(page); if (written_size + SUMMARY_SIZE <= PAGE_CACHE_SIZE - SUM_FOOTER_SIZE) continue; + set_page_dirty(page); f2fs_put_page(page, 1); page = NULL; } } - if (page) + if (page) { + set_page_dirty(page); f2fs_put_page(page, 1); + } } static void write_normal_summaries(struct f2fs_sb_info *sbi, @@ -1239,7 +1245,7 @@ static struct page *get_next_sit_page(struct f2fs_sb_info *sbi, /* get current sit block page without lock */ src_page = get_meta_page(sbi, src_off); dst_page = grab_meta_page(sbi, dst_off); - BUG_ON(PageDirty(src_page)); + f2fs_bug_on(PageDirty(src_page)); src_addr = page_address(src_page); dst_addr = page_address(dst_page); @@ -1271,9 +1277,9 @@ static bool flush_sits_in_journal(struct f2fs_sb_info *sbi) __mark_sit_entry_dirty(sbi, segno); } update_sits_in_cursum(sum, -sits_in_cursum(sum)); - return 1; + return true; } - return 0; + return false; } /* @@ -1637,6 +1643,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi) sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count); sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main); sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); + sm_info->rec_prefree_segments = DEF_RECLAIM_PREFREE_SEGMENTS; err = build_sit_info(sbi); if (err) @@ -1744,6 +1751,8 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi) void destroy_segment_manager(struct f2fs_sb_info *sbi) { struct f2fs_sm_info *sm_info = SM_I(sbi); + if (!sm_info) + return; destroy_dirty_segmap(sbi); destroy_curseg(sbi); destroy_free_segmap(sbi); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index bdd10eab8c40..269f690b4e24 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -14,6 +14,8 @@ #define NULL_SEGNO ((unsigned int)(~0)) #define NULL_SECNO ((unsigned int)(~0)) +#define DEF_RECLAIM_PREFREE_SEGMENTS 100 /* 200MB of prefree segments */ + /* L: Logical segment # in volume, R: Relative segment # in main area */ #define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno) #define GET_R2L_SEGNO(free_i, segno) (segno + free_i->start_segno) @@ -90,6 +92,8 @@ (blk_addr << ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE)) #define SECTOR_TO_BLOCK(sbi, sectors) \ (sectors >> ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE)) +#define MAX_BIO_BLOCKS(max_hw_blocks) \ + (min((int)max_hw_blocks, BIO_MAX_PAGES)) /* during checkpoint, bio_private is used to synchronize the last bio */ struct bio_private { @@ -470,6 +474,11 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed) reserved_sections(sbi))); } +static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi) +{ + return (prefree_segments(sbi) > SM_I(sbi)->rec_prefree_segments); +} + static inline int utilization(struct f2fs_sb_info *sbi) { return div_u64((u64)valid_user_blocks(sbi) * 100, sbi->user_block_count); @@ -513,16 +522,13 @@ static inline unsigned short curseg_blkoff(struct f2fs_sb_info *sbi, int type) return curseg->next_blkoff; } +#ifdef CONFIG_F2FS_CHECK_FS static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) { unsigned int end_segno = SM_I(sbi)->segment_count - 1; BUG_ON(segno > end_segno); } -/* - * This function is used for only debugging. - * NOTE: In future, we have to remove this function. - */ static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) { struct f2fs_sm_info *sm_info = SM_I(sbi); @@ -541,8 +547,9 @@ static inline void check_block_count(struct f2fs_sb_info *sbi, { struct f2fs_sm_info *sm_info = SM_I(sbi); unsigned int end_segno = sm_info->segment_count - 1; + bool is_valid = test_bit_le(0, raw_sit->valid_map) ? true : false; int valid_blocks = 0; - int i; + int cur_pos = 0, next_pos; /* check segment usage */ BUG_ON(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg); @@ -551,11 +558,26 @@ static inline void check_block_count(struct f2fs_sb_info *sbi, BUG_ON(segno > end_segno); /* check bitmap with valid block count */ - for (i = 0; i < sbi->blocks_per_seg; i++) - if (f2fs_test_bit(i, raw_sit->valid_map)) - valid_blocks++; + do { + if (is_valid) { + next_pos = find_next_zero_bit_le(&raw_sit->valid_map, + sbi->blocks_per_seg, + cur_pos); + valid_blocks += next_pos - cur_pos; + } else + next_pos = find_next_bit_le(&raw_sit->valid_map, + sbi->blocks_per_seg, + cur_pos); + cur_pos = next_pos; + is_valid = !is_valid; + } while (cur_pos < sbi->blocks_per_seg); BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks); } +#else +#define check_seg_range(sbi, segno) +#define verify_block_addr(sbi, blk_addr) +#define check_block_count(sbi, segno, raw_sit) +#endif static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi, unsigned int start) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 13d0a0fe49dd..bafff72de8e8 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -43,7 +43,9 @@ enum { Opt_disable_roll_forward, Opt_discard, Opt_noheap, + Opt_user_xattr, Opt_nouser_xattr, + Opt_acl, Opt_noacl, Opt_active_logs, Opt_disable_ext_identify, @@ -56,7 +58,9 @@ static match_table_t f2fs_tokens = { {Opt_disable_roll_forward, "disable_roll_forward"}, {Opt_discard, "discard"}, {Opt_noheap, "no_heap"}, + {Opt_user_xattr, "user_xattr"}, {Opt_nouser_xattr, "nouser_xattr"}, + {Opt_acl, "acl"}, {Opt_noacl, "noacl"}, {Opt_active_logs, "active_logs=%u"}, {Opt_disable_ext_identify, "disable_ext_identify"}, @@ -65,24 +69,40 @@ static match_table_t f2fs_tokens = { }; /* Sysfs support for f2fs */ +enum { + GC_THREAD, /* struct f2fs_gc_thread */ + SM_INFO, /* struct f2fs_sm_info */ +}; + struct f2fs_attr { struct attribute attr; ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *); ssize_t (*store)(struct f2fs_attr *, struct f2fs_sb_info *, const char *, size_t); + int struct_type; int offset; }; +static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type) +{ + if (struct_type == GC_THREAD) + return (unsigned char *)sbi->gc_thread; + else if (struct_type == SM_INFO) + return (unsigned char *)SM_I(sbi); + return NULL; +} + static ssize_t f2fs_sbi_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { - struct f2fs_gc_kthread *gc_kth = sbi->gc_thread; + unsigned char *ptr = NULL; unsigned int *ui; - if (!gc_kth) + ptr = __struct_ptr(sbi, a->struct_type); + if (!ptr) return -EINVAL; - ui = (unsigned int *)(((char *)gc_kth) + a->offset); + ui = (unsigned int *)(ptr + a->offset); return snprintf(buf, PAGE_SIZE, "%u\n", *ui); } @@ -91,15 +111,16 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a, struct f2fs_sb_info *sbi, const char *buf, size_t count) { - struct f2fs_gc_kthread *gc_kth = sbi->gc_thread; + unsigned char *ptr; unsigned long t; unsigned int *ui; ssize_t ret; - if (!gc_kth) + ptr = __struct_ptr(sbi, a->struct_type); + if (!ptr) return -EINVAL; - ui = (unsigned int *)(((char *)gc_kth) + a->offset); + ui = (unsigned int *)(ptr + a->offset); ret = kstrtoul(skip_spaces(buf), 0, &t); if (ret < 0) @@ -135,21 +156,25 @@ static void f2fs_sb_release(struct kobject *kobj) complete(&sbi->s_kobj_unregister); } -#define F2FS_ATTR_OFFSET(_name, _mode, _show, _store, _elname) \ +#define F2FS_ATTR_OFFSET(_struct_type, _name, _mode, _show, _store, _offset) \ static struct f2fs_attr f2fs_attr_##_name = { \ .attr = {.name = __stringify(_name), .mode = _mode }, \ .show = _show, \ .store = _store, \ - .offset = offsetof(struct f2fs_gc_kthread, _elname), \ + .struct_type = _struct_type, \ + .offset = _offset \ } -#define F2FS_RW_ATTR(name, elname) \ - F2FS_ATTR_OFFSET(name, 0644, f2fs_sbi_show, f2fs_sbi_store, elname) +#define F2FS_RW_ATTR(struct_type, struct_name, name, elname) \ + F2FS_ATTR_OFFSET(struct_type, name, 0644, \ + f2fs_sbi_show, f2fs_sbi_store, \ + offsetof(struct struct_name, elname)) -F2FS_RW_ATTR(gc_min_sleep_time, min_sleep_time); -F2FS_RW_ATTR(gc_max_sleep_time, max_sleep_time); -F2FS_RW_ATTR(gc_no_gc_sleep_time, no_gc_sleep_time); -F2FS_RW_ATTR(gc_idle, gc_idle); +F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time); +F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time); +F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time); +F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle); +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { @@ -157,6 +182,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(gc_max_sleep_time), ATTR_LIST(gc_no_gc_sleep_time), ATTR_LIST(gc_idle), + ATTR_LIST(reclaim_segments), NULL, }; @@ -237,6 +263,9 @@ static int parse_options(struct super_block *sb, char *options) set_opt(sbi, NOHEAP); break; #ifdef CONFIG_F2FS_FS_XATTR + case Opt_user_xattr: + set_opt(sbi, XATTR_USER); + break; case Opt_nouser_xattr: clear_opt(sbi, XATTR_USER); break; @@ -244,6 +273,10 @@ static int parse_options(struct super_block *sb, char *options) set_opt(sbi, INLINE_XATTR); break; #else + case Opt_user_xattr: + f2fs_msg(sb, KERN_INFO, + "user_xattr options not supported"); + break; case Opt_nouser_xattr: f2fs_msg(sb, KERN_INFO, "nouser_xattr options not supported"); @@ -254,10 +287,16 @@ static int parse_options(struct super_block *sb, char *options) break; #endif #ifdef CONFIG_F2FS_FS_POSIX_ACL + case Opt_acl: + set_opt(sbi, POSIX_ACL); + break; case Opt_noacl: clear_opt(sbi, POSIX_ACL); break; #else + case Opt_acl: + f2fs_msg(sb, KERN_INFO, "acl options not supported"); + break; case Opt_noacl: f2fs_msg(sb, KERN_INFO, "noacl options not supported"); break; @@ -355,7 +394,9 @@ static void f2fs_put_super(struct super_block *sb) f2fs_destroy_stats(sbi); stop_gc_thread(sbi); - write_checkpoint(sbi, true); + /* We don't need to do checkpoint when it's clean */ + if (sbi->s_dirty && get_pages(sbi, F2FS_DIRTY_NODES)) + write_checkpoint(sbi, true); iput(sbi->node_inode); iput(sbi->meta_inode); @@ -727,30 +768,47 @@ static void init_sb_info(struct f2fs_sb_info *sbi) atomic_set(&sbi->nr_pages[i], 0); } -static int validate_superblock(struct super_block *sb, - struct f2fs_super_block **raw_super, - struct buffer_head **raw_super_buf, sector_t block) +/* + * Read f2fs raw super block. + * Because we have two copies of super block, so read the first one at first, + * if the first one is invalid, move to read the second one. + */ +static int read_raw_super_block(struct super_block *sb, + struct f2fs_super_block **raw_super, + struct buffer_head **raw_super_buf) { - const char *super = (block == 0 ? "first" : "second"); + int block = 0; - /* read f2fs raw super block */ +retry: *raw_super_buf = sb_bread(sb, block); if (!*raw_super_buf) { - f2fs_msg(sb, KERN_ERR, "unable to read %s superblock", - super); - return -EIO; + f2fs_msg(sb, KERN_ERR, "Unable to read %dth superblock", + block + 1); + if (block == 0) { + block++; + goto retry; + } else { + return -EIO; + } } *raw_super = (struct f2fs_super_block *) ((char *)(*raw_super_buf)->b_data + F2FS_SUPER_OFFSET); /* sanity checking of raw super */ - if (!sanity_check_raw_super(sb, *raw_super)) - return 0; + if (sanity_check_raw_super(sb, *raw_super)) { + brelse(*raw_super_buf); + f2fs_msg(sb, KERN_ERR, "Can't find a valid F2FS filesystem " + "in %dth superblock", block + 1); + if(block == 0) { + block++; + goto retry; + } else { + return -EINVAL; + } + } - f2fs_msg(sb, KERN_ERR, "Can't find a valid F2FS filesystem " - "in %s superblock", super); - return -EINVAL; + return 0; } static int f2fs_fill_super(struct super_block *sb, void *data, int silent) @@ -760,7 +818,6 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) struct buffer_head *raw_super_buf; struct inode *root; long err = -EINVAL; - int i; /* allocate memory for f2fs-specific super block info */ sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL); @@ -773,14 +830,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) goto free_sbi; } - err = validate_superblock(sb, &raw_super, &raw_super_buf, 0); - if (err) { - brelse(raw_super_buf); - /* check secondary superblock when primary failed */ - err = validate_superblock(sb, &raw_super, &raw_super_buf, 1); - if (err) - goto free_sb_buf; - } + err = read_raw_super_block(sb, &raw_super, &raw_super_buf); + if (err) + goto free_sbi; + sb->s_fs_info = sbi; /* init some FS parameters */ sbi->active_logs = NR_CURSEG_TYPE; @@ -818,12 +871,12 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) mutex_init(&sbi->gc_mutex); mutex_init(&sbi->writepages); mutex_init(&sbi->cp_mutex); - for (i = 0; i < NR_GLOBAL_LOCKS; i++) - mutex_init(&sbi->fs_lock[i]); mutex_init(&sbi->node_write); - sbi->por_doing = 0; + sbi->por_doing = false; spin_lock_init(&sbi->stat_lock); init_rwsem(&sbi->bio_sem); + init_rwsem(&sbi->cp_rwsem); + init_waitqueue_head(&sbi->cp_wait); init_sb_info(sbi); /* get an inode for meta space */ @@ -922,12 +975,12 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) /* After POR, we can run background GC thread.*/ err = start_gc_thread(sbi); if (err) - goto fail; + goto free_gc; } err = f2fs_build_stats(sbi); if (err) - goto fail; + goto free_gc; if (f2fs_proc_root) sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root); @@ -953,6 +1006,12 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) return 0; fail: + if (sbi->s_proc) { + remove_proc_entry("segment_info", sbi->s_proc); + remove_proc_entry(sb->s_id, f2fs_proc_root); + } + f2fs_destroy_stats(sbi); +free_gc: stop_gc_thread(sbi); free_root_inode: dput(sb->s_root); diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 1ac8a5f6e380..aa7a3f139fe5 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -154,6 +154,9 @@ static int f2fs_xattr_advise_set(struct dentry *dentry, const char *name, } #ifdef CONFIG_F2FS_FS_SECURITY +static int __f2fs_setxattr(struct inode *inode, int name_index, + const char *name, const void *value, size_t value_len, + struct page *ipage); static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array, void *page) { @@ -161,7 +164,7 @@ static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array, int err = 0; for (xattr = xattr_array; xattr->name != NULL; xattr++) { - err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_SECURITY, + err = __f2fs_setxattr(inode, F2FS_XATTR_INDEX_SECURITY, xattr->name, xattr->value, xattr->value_len, (struct page *)page); if (err < 0) @@ -369,7 +372,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, alloc_nid_failed(sbi, new_nid); return PTR_ERR(xpage); } - BUG_ON(new_nid); + f2fs_bug_on(new_nid); } else { struct dnode_of_data dn; set_new_dnode(&dn, inode, NULL, NULL, new_nid); @@ -469,16 +472,15 @@ cleanup: return error; } -int f2fs_setxattr(struct inode *inode, int name_index, const char *name, - const void *value, size_t value_len, struct page *ipage) +static int __f2fs_setxattr(struct inode *inode, int name_index, + const char *name, const void *value, size_t value_len, + struct page *ipage) { - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_xattr_entry *here, *last; void *base_addr; int found, newsize; size_t name_len; - int ilock; __u32 new_hsize; int error = -ENOMEM; @@ -493,10 +495,6 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, if (name_len > F2FS_NAME_LEN || value_len > MAX_VALUE_LEN(inode)) return -ERANGE; - f2fs_balance_fs(sbi); - - ilock = mutex_lock_op(sbi); - base_addr = read_all_xattrs(inode, ipage); if (!base_addr) goto exit; @@ -522,7 +520,7 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, */ free = MIN_OFFSET(inode) - ((char *)last - (char *)base_addr); if (found) - free = free - ENTRY_SIZE(here); + free = free + ENTRY_SIZE(here); if (free < newsize) { error = -ENOSPC; @@ -578,7 +576,21 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, else update_inode_page(inode); exit: - mutex_unlock_op(sbi, ilock); kzfree(base_addr); return error; } + +int f2fs_setxattr(struct inode *inode, int name_index, const char *name, + const void *value, size_t value_len, struct page *ipage) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + int err; + + f2fs_balance_fs(sbi); + + f2fs_lock_op(sbi); + err = __f2fs_setxattr(inode, name_index, name, value, value_len, ipage); + f2fs_unlock_op(sbi); + + return err; +} diff --git a/fs/fat/fat.h b/fs/fat/fat.h index 4241e6f39e86..7c31f4bc74a9 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -102,6 +102,7 @@ struct msdos_sb_info { struct hlist_head dir_hashtable[FAT_HASH_SIZE]; unsigned int dirty; /* fs state before mount */ + struct rcu_head rcu; }; #define FAT_CACHE_VALID 0 /* special case for valid cache */ diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 0062da21dd8b..854b578f6695 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -548,6 +548,16 @@ static void fat_set_state(struct super_block *sb, brelse(bh); } +static void delayed_free(struct rcu_head *p) +{ + struct msdos_sb_info *sbi = container_of(p, struct msdos_sb_info, rcu); + unload_nls(sbi->nls_disk); + unload_nls(sbi->nls_io); + if (sbi->options.iocharset != fat_default_iocharset) + kfree(sbi->options.iocharset); + kfree(sbi); +} + static void fat_put_super(struct super_block *sb) { struct msdos_sb_info *sbi = MSDOS_SB(sb); @@ -557,14 +567,7 @@ static void fat_put_super(struct super_block *sb) iput(sbi->fsinfo_inode); iput(sbi->fat_inode); - unload_nls(sbi->nls_disk); - unload_nls(sbi->nls_io); - - if (sbi->options.iocharset != fat_default_iocharset) - kfree(sbi->options.iocharset); - - sb->s_fs_info = NULL; - kfree(sbi); + call_rcu(&sbi->rcu, delayed_free); } static struct kmem_cache *fat_inode_cachep; diff --git a/fs/fcntl.c b/fs/fcntl.c index 65343c3741ff..ef6866592a0f 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -56,7 +56,7 @@ static int setfl(int fd, struct file * filp, unsigned long arg) return -EINVAL; } - if (filp->f_op && filp->f_op->check_flags) + if (filp->f_op->check_flags) error = filp->f_op->check_flags(arg); if (error) return error; @@ -64,8 +64,7 @@ static int setfl(int fd, struct file * filp, unsigned long arg) /* * ->fasync() is responsible for setting the FASYNC bit. */ - if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op && - filp->f_op->fasync) { + if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op->fasync) { error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); if (error < 0) goto out; diff --git a/fs/file_table.c b/fs/file_table.c index e900ca518635..5fff9030be34 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -36,8 +36,6 @@ struct files_stat_struct files_stat = { .max_files = NR_FILE }; -DEFINE_STATIC_LGLOCK(files_lglock); - /* SLAB cache for file structures */ static struct kmem_cache *filp_cachep __read_mostly; @@ -134,7 +132,6 @@ struct file *get_empty_filp(void) return ERR_PTR(error); } - INIT_LIST_HEAD(&f->f_u.fu_list); atomic_long_set(&f->f_count, 1); rwlock_init(&f->f_owner.lock); spin_lock_init(&f->f_lock); @@ -240,11 +237,11 @@ static void __fput(struct file *file) locks_remove_flock(file); if (unlikely(file->f_flags & FASYNC)) { - if (file->f_op && file->f_op->fasync) + if (file->f_op->fasync) file->f_op->fasync(-1, file, 0); } ima_file_free(file); - if (file->f_op && file->f_op->release) + if (file->f_op->release) file->f_op->release(inode, file); security_file_free(file); if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL && @@ -304,7 +301,6 @@ void fput(struct file *file) if (atomic_long_dec_and_test(&file->f_count)) { struct task_struct *task = current; - file_sb_list_del(file); if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) { init_task_work(&file->f_u.fu_rcuhead, ____fput); if (!task_work_add(task, &file->f_u.fu_rcuhead, true)) @@ -333,7 +329,6 @@ void __fput_sync(struct file *file) { if (atomic_long_dec_and_test(&file->f_count)) { struct task_struct *task = current; - file_sb_list_del(file); BUG_ON(!(task->flags & PF_KTHREAD)); __fput(file); } @@ -345,129 +340,10 @@ void put_filp(struct file *file) { if (atomic_long_dec_and_test(&file->f_count)) { security_file_free(file); - file_sb_list_del(file); file_free(file); } } -static inline int file_list_cpu(struct file *file) -{ -#ifdef CONFIG_SMP - return file->f_sb_list_cpu; -#else - return smp_processor_id(); -#endif -} - -/* helper for file_sb_list_add to reduce ifdefs */ -static inline void __file_sb_list_add(struct file *file, struct super_block *sb) -{ - struct list_head *list; -#ifdef CONFIG_SMP - int cpu; - cpu = smp_processor_id(); - file->f_sb_list_cpu = cpu; - list = per_cpu_ptr(sb->s_files, cpu); -#else - list = &sb->s_files; -#endif - list_add(&file->f_u.fu_list, list); -} - -/** - * file_sb_list_add - add a file to the sb's file list - * @file: file to add - * @sb: sb to add it to - * - * Use this function to associate a file with the superblock of the inode it - * refers to. - */ -void file_sb_list_add(struct file *file, struct super_block *sb) -{ - if (likely(!(file->f_mode & FMODE_WRITE))) - return; - if (!S_ISREG(file_inode(file)->i_mode)) - return; - lg_local_lock(&files_lglock); - __file_sb_list_add(file, sb); - lg_local_unlock(&files_lglock); -} - -/** - * file_sb_list_del - remove a file from the sb's file list - * @file: file to remove - * @sb: sb to remove it from - * - * Use this function to remove a file from its superblock. - */ -void file_sb_list_del(struct file *file) -{ - if (!list_empty(&file->f_u.fu_list)) { - lg_local_lock_cpu(&files_lglock, file_list_cpu(file)); - list_del_init(&file->f_u.fu_list); - lg_local_unlock_cpu(&files_lglock, file_list_cpu(file)); - } -} - -#ifdef CONFIG_SMP - -/* - * These macros iterate all files on all CPUs for a given superblock. - * files_lglock must be held globally. - */ -#define do_file_list_for_each_entry(__sb, __file) \ -{ \ - int i; \ - for_each_possible_cpu(i) { \ - struct list_head *list; \ - list = per_cpu_ptr((__sb)->s_files, i); \ - list_for_each_entry((__file), list, f_u.fu_list) - -#define while_file_list_for_each_entry \ - } \ -} - -#else - -#define do_file_list_for_each_entry(__sb, __file) \ -{ \ - struct list_head *list; \ - list = &(sb)->s_files; \ - list_for_each_entry((__file), list, f_u.fu_list) - -#define while_file_list_for_each_entry \ -} - -#endif - -/** - * mark_files_ro - mark all files read-only - * @sb: superblock in question - * - * All files are marked read-only. We don't care about pending - * delete files so this should be used in 'force' mode only. - */ -void mark_files_ro(struct super_block *sb) -{ - struct file *f; - - lg_global_lock(&files_lglock); - do_file_list_for_each_entry(sb, f) { - if (!file_count(f)) - continue; - if (!(f->f_mode & FMODE_WRITE)) - continue; - spin_lock(&f->f_lock); - f->f_mode &= ~FMODE_WRITE; - spin_unlock(&f->f_lock); - if (file_check_writeable(f) != 0) - continue; - __mnt_drop_write(f->f_path.mnt); - file_release_write(f); - } while_file_list_for_each_entry; - lg_global_unlock(&files_lglock); -} - void __init files_init(unsigned long mempages) { unsigned long n; @@ -483,6 +359,5 @@ void __init files_init(unsigned long mempages) n = (mempages * (PAGE_SIZE / 1024)) / 10; files_stat.max_files = max_t(unsigned long, n, NR_FILE); files_defer_init(); - lg_lock_init(&files_lglock, "files_lglock"); percpu_counter_init(&nr_files, 0); } diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 9f4935b8f208..1f4a10ece2f1 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -26,6 +26,7 @@ #include <linux/blkdev.h> #include <linux/backing-dev.h> #include <linux/tracepoint.h> +#include <linux/device.h> #include "internal.h" /* @@ -39,13 +40,18 @@ struct wb_writeback_work { long nr_pages; struct super_block *sb; - unsigned long *older_than_this; + /* + * Write only inodes dirtied before this time. Don't forget to set + * older_than_this_is_set when you set this. + */ + unsigned long older_than_this; enum writeback_sync_modes sync_mode; unsigned int tagged_writepages:1; unsigned int for_kupdate:1; unsigned int range_cyclic:1; unsigned int for_background:1; unsigned int for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ + unsigned int older_than_this_is_set:1; enum wb_reason reason; /* why was writeback initiated? */ struct list_head list; /* pending work list */ @@ -246,10 +252,10 @@ static int move_expired_inodes(struct list_head *delaying_queue, int do_sb_sort = 0; int moved = 0; + WARN_ON_ONCE(!work->older_than_this_is_set); while (!list_empty(delaying_queue)) { inode = wb_inode(delaying_queue->prev); - if (work->older_than_this && - inode_dirtied_after(inode, *work->older_than_this)) + if (inode_dirtied_after(inode, work->older_than_this)) break; list_move(&inode->i_wb_list, &tmp); moved++; @@ -733,6 +739,8 @@ static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages, .sync_mode = WB_SYNC_NONE, .range_cyclic = 1, .reason = reason, + .older_than_this = jiffies, + .older_than_this_is_set = 1, }; spin_lock(&wb->list_lock); @@ -791,12 +799,13 @@ static long wb_writeback(struct bdi_writeback *wb, { unsigned long wb_start = jiffies; long nr_pages = work->nr_pages; - unsigned long oldest_jif; struct inode *inode; long progress; - oldest_jif = jiffies; - work->older_than_this = &oldest_jif; + if (!work->older_than_this_is_set) { + work->older_than_this = jiffies; + work->older_than_this_is_set = 1; + } spin_lock(&wb->list_lock); for (;;) { @@ -830,10 +839,10 @@ static long wb_writeback(struct bdi_writeback *wb, * safe. */ if (work->for_kupdate) { - oldest_jif = jiffies - + work->older_than_this = jiffies - msecs_to_jiffies(dirty_expire_interval * 10); } else if (work->for_background) - oldest_jif = jiffies; + work->older_than_this = jiffies; trace_writeback_start(wb->bdi, work); if (list_empty(&wb->b_io)) @@ -1345,18 +1354,21 @@ EXPORT_SYMBOL(try_to_writeback_inodes_sb); /** * sync_inodes_sb - sync sb inode pages - * @sb: the superblock + * @sb: the superblock + * @older_than_this: timestamp * * This function writes and waits on any dirty inode belonging to this - * super_block. + * superblock that has been dirtied before given timestamp. */ -void sync_inodes_sb(struct super_block *sb) +void sync_inodes_sb(struct super_block *sb, unsigned long older_than_this) { DECLARE_COMPLETION_ONSTACK(done); struct wb_writeback_work work = { .sb = sb, .sync_mode = WB_SYNC_ALL, .nr_pages = LONG_MAX, + .older_than_this = older_than_this, + .older_than_this_is_set = 1, .range_cyclic = 0, .done = &done, .reason = WB_REASON_SYNC, diff --git a/fs/fscache/object.c b/fs/fscache/object.c index dcb821617774..53d35c504240 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -799,7 +799,7 @@ void fscache_enqueue_object(struct fscache_object *object) */ bool fscache_object_sleep_till_congested(signed long *timeoutp) { - wait_queue_head_t *cong_wq = &__get_cpu_var(fscache_object_cong_wait); + wait_queue_head_t *cong_wq = this_cpu_ptr(&fscache_object_cong_wait); DEFINE_WAIT(wait); if (fscache_object_congested()) diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index adbfd66b380f..b96a49b37d66 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -473,7 +473,7 @@ err: static void cuse_fc_release(struct fuse_conn *fc) { struct cuse_conn *cc = fc_to_cc(fc); - kfree(cc); + kfree_rcu(cc, fc.rcu); } /** @@ -589,11 +589,14 @@ static struct attribute *cuse_class_dev_attrs[] = { ATTRIBUTE_GROUPS(cuse_class_dev); static struct miscdevice cuse_miscdev = { - .minor = MISC_DYNAMIC_MINOR, + .minor = CUSE_MINOR, .name = "cuse", .fops = &cuse_channel_fops, }; +MODULE_ALIAS_MISCDEV(CUSE_MINOR); +MODULE_ALIAS("devname:cuse"); + static int __init cuse_init(void) { int i, rc; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index b7989f2ab4c4..c3eb2c46c8f1 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -342,24 +342,6 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name, return err; } -static struct dentry *fuse_materialise_dentry(struct dentry *dentry, - struct inode *inode) -{ - struct dentry *newent; - - if (inode && S_ISDIR(inode->i_mode)) { - struct fuse_conn *fc = get_fuse_conn(inode); - - mutex_lock(&fc->inst_mutex); - newent = d_materialise_unique(dentry, inode); - mutex_unlock(&fc->inst_mutex); - } else { - newent = d_materialise_unique(dentry, inode); - } - - return newent; -} - static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, unsigned int flags) { @@ -382,7 +364,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, if (inode && get_node_id(inode) == FUSE_ROOT_ID) goto out_iput; - newent = fuse_materialise_dentry(entry, inode); + newent = d_materialise_unique(entry, inode); err = PTR_ERR(newent); if (IS_ERR(newent)) goto out_err; @@ -601,21 +583,9 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req, } kfree(forget); - if (S_ISDIR(inode->i_mode)) { - struct dentry *alias; - mutex_lock(&fc->inst_mutex); - alias = d_find_alias(inode); - if (alias) { - /* New directory must have moved since mkdir */ - mutex_unlock(&fc->inst_mutex); - dput(alias); - iput(inode); - return -EBUSY; - } - d_instantiate(entry, inode); - mutex_unlock(&fc->inst_mutex); - } else - d_instantiate(entry, inode); + err = d_instantiate_no_diralias(entry, inode); + if (err) + return err; fuse_change_entry_timeout(entry, &outarg); fuse_invalidate_attr(dir); @@ -1284,7 +1254,7 @@ static int fuse_direntplus_link(struct file *file, if (!inode) goto out; - alias = fuse_materialise_dentry(dentry, inode); + alias = d_materialise_unique(dentry, inode); err = PTR_ERR(alias); if (IS_ERR(alias)) goto out; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 4598345ab87d..7e70506297bc 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -334,7 +334,8 @@ static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index) BUG_ON(req->inode != inode); curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT; - if (curr_index == index) { + if (curr_index <= index && + index < curr_index + req->num_pages) { found = true; break; } @@ -1409,8 +1410,13 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf, static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req) { - __free_page(req->pages[0]); - fuse_file_put(req->ff, false); + int i; + + for (i = 0; i < req->num_pages; i++) + __free_page(req->pages[i]); + + if (req->ff) + fuse_file_put(req->ff, false); } static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req) @@ -1418,30 +1424,34 @@ static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req) struct inode *inode = req->inode; struct fuse_inode *fi = get_fuse_inode(inode); struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info; + int i; list_del(&req->writepages_entry); - dec_bdi_stat(bdi, BDI_WRITEBACK); - dec_zone_page_state(req->pages[0], NR_WRITEBACK_TEMP); - bdi_writeout_inc(bdi); + for (i = 0; i < req->num_pages; i++) { + dec_bdi_stat(bdi, BDI_WRITEBACK); + dec_zone_page_state(req->pages[i], NR_WRITEBACK_TEMP); + bdi_writeout_inc(bdi); + } wake_up(&fi->page_waitq); } /* Called under fc->lock, may release and reacquire it */ -static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req) +static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req, + loff_t size) __releases(fc->lock) __acquires(fc->lock) { struct fuse_inode *fi = get_fuse_inode(req->inode); - loff_t size = i_size_read(req->inode); struct fuse_write_in *inarg = &req->misc.write.in; + __u64 data_size = req->num_pages * PAGE_CACHE_SIZE; if (!fc->connected) goto out_free; - if (inarg->offset + PAGE_CACHE_SIZE <= size) { - inarg->size = PAGE_CACHE_SIZE; + if (inarg->offset + data_size <= size) { + inarg->size = data_size; } else if (inarg->offset < size) { - inarg->size = size & (PAGE_CACHE_SIZE - 1); + inarg->size = size - inarg->offset; } else { /* Got truncated off completely */ goto out_free; @@ -1472,12 +1482,13 @@ __acquires(fc->lock) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); + size_t crop = i_size_read(inode); struct fuse_req *req; while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) { req = list_entry(fi->queued_writes.next, struct fuse_req, list); list_del_init(&req->list); - fuse_send_writepage(fc, req); + fuse_send_writepage(fc, req, crop); } } @@ -1488,12 +1499,62 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req) mapping_set_error(inode->i_mapping, req->out.h.error); spin_lock(&fc->lock); + while (req->misc.write.next) { + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_write_in *inarg = &req->misc.write.in; + struct fuse_req *next = req->misc.write.next; + req->misc.write.next = next->misc.write.next; + next->misc.write.next = NULL; + next->ff = fuse_file_get(req->ff); + list_add(&next->writepages_entry, &fi->writepages); + + /* + * Skip fuse_flush_writepages() to make it easy to crop requests + * based on primary request size. + * + * 1st case (trivial): there are no concurrent activities using + * fuse_set/release_nowrite. Then we're on safe side because + * fuse_flush_writepages() would call fuse_send_writepage() + * anyway. + * + * 2nd case: someone called fuse_set_nowrite and it is waiting + * now for completion of all in-flight requests. This happens + * rarely and no more than once per page, so this should be + * okay. + * + * 3rd case: someone (e.g. fuse_do_setattr()) is in the middle + * of fuse_set_nowrite..fuse_release_nowrite section. The fact + * that fuse_set_nowrite returned implies that all in-flight + * requests were completed along with all of their secondary + * requests. Further primary requests are blocked by negative + * writectr. Hence there cannot be any in-flight requests and + * no invocations of fuse_writepage_end() while we're in + * fuse_set_nowrite..fuse_release_nowrite section. + */ + fuse_send_writepage(fc, next, inarg->offset + inarg->size); + } fi->writectr--; fuse_writepage_finish(fc, req); spin_unlock(&fc->lock); fuse_writepage_free(fc, req); } +static struct fuse_file *fuse_write_file_get(struct fuse_conn *fc, + struct fuse_inode *fi) +{ + struct fuse_file *ff = NULL; + + spin_lock(&fc->lock); + if (!WARN_ON(list_empty(&fi->write_files))) { + ff = list_entry(fi->write_files.next, struct fuse_file, + write_entry); + fuse_file_get(ff); + } + spin_unlock(&fc->lock); + + return ff; +} + static int fuse_writepage_locked(struct page *page) { struct address_space *mapping = page->mapping; @@ -1501,8 +1562,8 @@ static int fuse_writepage_locked(struct page *page) struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_req *req; - struct fuse_file *ff; struct page *tmp_page; + int error = -ENOMEM; set_page_writeback(page); @@ -1515,16 +1576,16 @@ static int fuse_writepage_locked(struct page *page) if (!tmp_page) goto err_free; - spin_lock(&fc->lock); - BUG_ON(list_empty(&fi->write_files)); - ff = list_entry(fi->write_files.next, struct fuse_file, write_entry); - req->ff = fuse_file_get(ff); - spin_unlock(&fc->lock); + error = -EIO; + req->ff = fuse_write_file_get(fc, fi); + if (!req->ff) + goto err_free; - fuse_write_fill(req, ff, page_offset(page), 0); + fuse_write_fill(req, req->ff, page_offset(page), 0); copy_highpage(tmp_page, page); req->misc.write.in.write_flags |= FUSE_WRITE_CACHE; + req->misc.write.next = NULL; req->in.argpages = 1; req->num_pages = 1; req->pages[0] = tmp_page; @@ -1550,19 +1611,263 @@ err_free: fuse_request_free(req); err: end_page_writeback(page); - return -ENOMEM; + return error; } static int fuse_writepage(struct page *page, struct writeback_control *wbc) { int err; + if (fuse_page_is_writeback(page->mapping->host, page->index)) { + /* + * ->writepages() should be called for sync() and friends. We + * should only get here on direct reclaim and then we are + * allowed to skip a page which is already in flight + */ + WARN_ON(wbc->sync_mode == WB_SYNC_ALL); + + redirty_page_for_writepage(wbc, page); + return 0; + } + err = fuse_writepage_locked(page); unlock_page(page); return err; } +struct fuse_fill_wb_data { + struct fuse_req *req; + struct fuse_file *ff; + struct inode *inode; + struct page **orig_pages; +}; + +static void fuse_writepages_send(struct fuse_fill_wb_data *data) +{ + struct fuse_req *req = data->req; + struct inode *inode = data->inode; + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_inode *fi = get_fuse_inode(inode); + int num_pages = req->num_pages; + int i; + + req->ff = fuse_file_get(data->ff); + spin_lock(&fc->lock); + list_add_tail(&req->list, &fi->queued_writes); + fuse_flush_writepages(inode); + spin_unlock(&fc->lock); + + for (i = 0; i < num_pages; i++) + end_page_writeback(data->orig_pages[i]); +} + +static bool fuse_writepage_in_flight(struct fuse_req *new_req, + struct page *page) +{ + struct fuse_conn *fc = get_fuse_conn(new_req->inode); + struct fuse_inode *fi = get_fuse_inode(new_req->inode); + struct fuse_req *tmp; + struct fuse_req *old_req; + bool found = false; + pgoff_t curr_index; + + BUG_ON(new_req->num_pages != 0); + + spin_lock(&fc->lock); + list_del(&new_req->writepages_entry); + list_for_each_entry(old_req, &fi->writepages, writepages_entry) { + BUG_ON(old_req->inode != new_req->inode); + curr_index = old_req->misc.write.in.offset >> PAGE_CACHE_SHIFT; + if (curr_index <= page->index && + page->index < curr_index + old_req->num_pages) { + found = true; + break; + } + } + if (!found) { + list_add(&new_req->writepages_entry, &fi->writepages); + goto out_unlock; + } + + new_req->num_pages = 1; + for (tmp = old_req; tmp != NULL; tmp = tmp->misc.write.next) { + BUG_ON(tmp->inode != new_req->inode); + curr_index = tmp->misc.write.in.offset >> PAGE_CACHE_SHIFT; + if (tmp->num_pages == 1 && + curr_index == page->index) { + old_req = tmp; + } + } + + if (old_req->num_pages == 1 && (old_req->state == FUSE_REQ_INIT || + old_req->state == FUSE_REQ_PENDING)) { + struct backing_dev_info *bdi = page->mapping->backing_dev_info; + + copy_highpage(old_req->pages[0], page); + spin_unlock(&fc->lock); + + dec_bdi_stat(bdi, BDI_WRITEBACK); + dec_zone_page_state(page, NR_WRITEBACK_TEMP); + bdi_writeout_inc(bdi); + fuse_writepage_free(fc, new_req); + fuse_request_free(new_req); + goto out; + } else { + new_req->misc.write.next = old_req->misc.write.next; + old_req->misc.write.next = new_req; + } +out_unlock: + spin_unlock(&fc->lock); +out: + return found; +} + +static int fuse_writepages_fill(struct page *page, + struct writeback_control *wbc, void *_data) +{ + struct fuse_fill_wb_data *data = _data; + struct fuse_req *req = data->req; + struct inode *inode = data->inode; + struct fuse_conn *fc = get_fuse_conn(inode); + struct page *tmp_page; + bool is_writeback; + int err; + + if (!data->ff) { + err = -EIO; + data->ff = fuse_write_file_get(fc, get_fuse_inode(inode)); + if (!data->ff) + goto out_unlock; + } + + /* + * Being under writeback is unlikely but possible. For example direct + * read to an mmaped fuse file will set the page dirty twice; once when + * the pages are faulted with get_user_pages(), and then after the read + * completed. + */ + is_writeback = fuse_page_is_writeback(inode, page->index); + + if (req && req->num_pages && + (is_writeback || req->num_pages == FUSE_MAX_PAGES_PER_REQ || + (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_write || + data->orig_pages[req->num_pages - 1]->index + 1 != page->index)) { + fuse_writepages_send(data); + data->req = NULL; + } + err = -ENOMEM; + tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); + if (!tmp_page) + goto out_unlock; + + /* + * The page must not be redirtied until the writeout is completed + * (i.e. userspace has sent a reply to the write request). Otherwise + * there could be more than one temporary page instance for each real + * page. + * + * This is ensured by holding the page lock in page_mkwrite() while + * checking fuse_page_is_writeback(). We already hold the page lock + * since clear_page_dirty_for_io() and keep it held until we add the + * request to the fi->writepages list and increment req->num_pages. + * After this fuse_page_is_writeback() will indicate that the page is + * under writeback, so we can release the page lock. + */ + if (data->req == NULL) { + struct fuse_inode *fi = get_fuse_inode(inode); + + err = -ENOMEM; + req = fuse_request_alloc_nofs(FUSE_MAX_PAGES_PER_REQ); + if (!req) { + __free_page(tmp_page); + goto out_unlock; + } + + fuse_write_fill(req, data->ff, page_offset(page), 0); + req->misc.write.in.write_flags |= FUSE_WRITE_CACHE; + req->misc.write.next = NULL; + req->in.argpages = 1; + req->background = 1; + req->num_pages = 0; + req->end = fuse_writepage_end; + req->inode = inode; + + spin_lock(&fc->lock); + list_add(&req->writepages_entry, &fi->writepages); + spin_unlock(&fc->lock); + + data->req = req; + } + set_page_writeback(page); + + copy_highpage(tmp_page, page); + req->pages[req->num_pages] = tmp_page; + req->page_descs[req->num_pages].offset = 0; + req->page_descs[req->num_pages].length = PAGE_SIZE; + + inc_bdi_stat(page->mapping->backing_dev_info, BDI_WRITEBACK); + inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP); + + err = 0; + if (is_writeback && fuse_writepage_in_flight(req, page)) { + end_page_writeback(page); + data->req = NULL; + goto out_unlock; + } + data->orig_pages[req->num_pages] = page; + + /* + * Protected by fc->lock against concurrent access by + * fuse_page_is_writeback(). + */ + spin_lock(&fc->lock); + req->num_pages++; + spin_unlock(&fc->lock); + +out_unlock: + unlock_page(page); + + return err; +} + +static int fuse_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct inode *inode = mapping->host; + struct fuse_fill_wb_data data; + int err; + + err = -EIO; + if (is_bad_inode(inode)) + goto out; + + data.inode = inode; + data.req = NULL; + data.ff = NULL; + + err = -ENOMEM; + data.orig_pages = kzalloc(sizeof(struct page *) * + FUSE_MAX_PAGES_PER_REQ, + GFP_NOFS); + if (!data.orig_pages) + goto out; + + err = write_cache_pages(mapping, wbc, fuse_writepages_fill, &data); + if (data.req) { + /* Ignore errors if we can write at least one page */ + BUG_ON(!data.req->num_pages); + fuse_writepages_send(&data); + err = 0; + } + if (data.ff) + fuse_file_put(data.ff, false); + + kfree(data.orig_pages); +out: + return err; +} + static int fuse_launder_page(struct page *page) { int err = 0; @@ -1602,14 +1907,17 @@ static void fuse_vma_close(struct vm_area_struct *vma) static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { struct page *page = vmf->page; - /* - * Don't use page->mapping as it may become NULL from a - * concurrent truncate. - */ - struct inode *inode = vma->vm_file->f_mapping->host; + struct inode *inode = file_inode(vma->vm_file); + + file_update_time(vma->vm_file); + lock_page(page); + if (page->mapping != inode->i_mapping) { + unlock_page(page); + return VM_FAULT_NOPAGE; + } fuse_wait_on_page_writeback(inode, page->index); - return 0; + return VM_FAULT_LOCKED; } static const struct vm_operations_struct fuse_file_vm_ops = { @@ -2581,6 +2889,7 @@ static const struct file_operations fuse_direct_io_file_operations = { static const struct address_space_operations fuse_file_aops = { .readpage = fuse_readpage, .writepage = fuse_writepage, + .writepages = fuse_writepages, .launder_page = fuse_launder_page, .readpages = fuse_readpages, .set_page_dirty = __set_page_dirty_nobuffers, diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 5b9e6f3b6aef..7d2730912667 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -321,6 +321,7 @@ struct fuse_req { struct { struct fuse_write_in in; struct fuse_write_out out; + struct fuse_req *next; } write; struct fuse_notify_retrieve_in retrieve_in; struct fuse_lk_in lk_in; @@ -374,12 +375,11 @@ struct fuse_conn { /** Lock protecting accessess to members of this structure */ spinlock_t lock; - /** Mutex protecting against directory alias creation */ - struct mutex inst_mutex; - /** Refcount */ atomic_t count; + struct rcu_head rcu; + /** The user id for this mount */ kuid_t user_id; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index a8ce6dab60a0..d468643a68b2 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -565,7 +565,6 @@ void fuse_conn_init(struct fuse_conn *fc) { memset(fc, 0, sizeof(*fc)); spin_lock_init(&fc->lock); - mutex_init(&fc->inst_mutex); init_rwsem(&fc->killsb); atomic_set(&fc->count, 1); init_waitqueue_head(&fc->waitq); @@ -596,7 +595,6 @@ void fuse_conn_put(struct fuse_conn *fc) if (atomic_dec_and_test(&fc->count)) { if (fc->destroy_req) fuse_request_free(fc->destroy_req); - mutex_destroy(&fc->inst_mutex); fc->release(fc); } } @@ -920,7 +918,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) static void fuse_free_conn(struct fuse_conn *fc) { - kfree(fc); + kfree_rcu(fc, rcu); } static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 109ce9325b76..1615df16cf4e 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1514,13 +1514,6 @@ out: return NULL; } -static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p) -{ - char *s = nd_get_link(nd); - if (!IS_ERR(s)) - kfree(s); -} - /** * gfs2_permission - * @inode: The inode @@ -1872,7 +1865,7 @@ const struct inode_operations gfs2_dir_iops = { const struct inode_operations gfs2_symlink_iops = { .readlink = generic_readlink, .follow_link = gfs2_follow_link, - .put_link = gfs2_put_link, + .put_link = kfree_put_link, .permission = gfs2_permission, .setattr = gfs2_setattr, .getattr = gfs2_getattr, diff --git a/fs/hfs/btree.h b/fs/hfs/btree.h index 2a1d712f85dc..f6bd266d70b5 100644 --- a/fs/hfs/btree.h +++ b/fs/hfs/btree.h @@ -153,11 +153,6 @@ struct hfs_btree_header_rec { u32 reserved3[16]; } __packed; -#define HFS_NODE_INDEX 0x00 /* An internal (index) node */ -#define HFS_NODE_HEADER 0x01 /* The tree header node (node 0) */ -#define HFS_NODE_MAP 0x02 /* Holds part of the bitmap of used nodes */ -#define HFS_NODE_LEAF 0xFF /* A leaf (ndNHeight==1) node */ - #define BTREE_ATTR_BADCLOSE 0x00000001 /* b-tree not closed properly. not used by hfsplus. */ #define HFS_TREE_BIGKEYS 0x00000002 /* key length is u16 instead of u8. diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c index 0c6540c91167..0fcec8b2a90b 100644 --- a/fs/hfsplus/btree.c +++ b/fs/hfsplus/btree.c @@ -15,6 +15,118 @@ #include "hfsplus_fs.h" #include "hfsplus_raw.h" +/* + * Initial source code of clump size calculation is gotten + * from http://opensource.apple.com/tarballs/diskdev_cmds/ + */ +#define CLUMP_ENTRIES 15 + +static short clumptbl[CLUMP_ENTRIES * 3] = { +/* + * Volume Attributes Catalog Extents + * Size Clump (MB) Clump (MB) Clump (MB) + */ + /* 1GB */ 4, 4, 4, + /* 2GB */ 6, 6, 4, + /* 4GB */ 8, 8, 4, + /* 8GB */ 11, 11, 5, + /* + * For volumes 16GB and larger, we want to make sure that a full OS + * install won't require fragmentation of the Catalog or Attributes + * B-trees. We do this by making the clump sizes sufficiently large, + * and by leaving a gap after the B-trees for them to grow into. + * + * For SnowLeopard 10A298, a FullNetInstall with all packages selected + * results in: + * Catalog B-tree Header + * nodeSize: 8192 + * totalNodes: 31616 + * freeNodes: 1978 + * (used = 231.55 MB) + * Attributes B-tree Header + * nodeSize: 8192 + * totalNodes: 63232 + * freeNodes: 958 + * (used = 486.52 MB) + * + * We also want Time Machine backup volumes to have a sufficiently + * large clump size to reduce fragmentation. + * + * The series of numbers for Catalog and Attribute form a geometric + * series. For Catalog (16GB to 512GB), each term is 8**(1/5) times + * the previous term. For Attributes (16GB to 512GB), each term is + * 4**(1/5) times the previous term. For 1TB to 16TB, each term is + * 2**(1/5) times the previous term. + */ + /* 16GB */ 64, 32, 5, + /* 32GB */ 84, 49, 6, + /* 64GB */ 111, 74, 7, + /* 128GB */ 147, 111, 8, + /* 256GB */ 194, 169, 9, + /* 512GB */ 256, 256, 11, + /* 1TB */ 294, 294, 14, + /* 2TB */ 338, 338, 16, + /* 4TB */ 388, 388, 20, + /* 8TB */ 446, 446, 25, + /* 16TB */ 512, 512, 32 +}; + +u32 hfsplus_calc_btree_clump_size(u32 block_size, u32 node_size, + u64 sectors, int file_id) +{ + u32 mod = max(node_size, block_size); + u32 clump_size; + int column; + int i; + + /* Figure out which column of the above table to use for this file. */ + switch (file_id) { + case HFSPLUS_ATTR_CNID: + column = 0; + break; + case HFSPLUS_CAT_CNID: + column = 1; + break; + default: + column = 2; + break; + } + + /* + * The default clump size is 0.8% of the volume size. And + * it must also be a multiple of the node and block size. + */ + if (sectors < 0x200000) { + clump_size = sectors << 2; /* 0.8 % */ + if (clump_size < (8 * node_size)) + clump_size = 8 * node_size; + } else { + /* turn exponent into table index... */ + for (i = 0, sectors = sectors >> 22; + sectors && (i < CLUMP_ENTRIES - 1); + ++i, sectors = sectors >> 1) { + /* empty body */ + } + + clump_size = clumptbl[column + (i) * 3] * 1024 * 1024; + } + + /* + * Round the clump size to a multiple of node and block size. + * NOTE: This rounds down. + */ + clump_size /= mod; + clump_size *= mod; + + /* + * Rounding down could have rounded down to 0 if the block size was + * greater than the clump size. If so, just use one block or node. + */ + if (clump_size == 0) + clump_size = mod; + + return clump_size; +} /* Get a reference to a B*Tree and do some initial checks */ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 2b9cd01696e2..08846425b67f 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -127,6 +127,14 @@ struct hfs_bnode { #define HFS_BNODE_DELETED 4 /* + * Attributes file states + */ +#define HFSPLUS_EMPTY_ATTR_TREE 0 +#define HFSPLUS_CREATING_ATTR_TREE 1 +#define HFSPLUS_VALID_ATTR_TREE 2 +#define HFSPLUS_FAILED_ATTR_TREE 3 + +/* * HFS+ superblock info (built from Volume Header on disk) */ @@ -141,6 +149,7 @@ struct hfsplus_sb_info { struct hfs_btree *ext_tree; struct hfs_btree *cat_tree; struct hfs_btree *attr_tree; + atomic_t attr_tree_state; struct inode *alloc_file; struct inode *hidden_dir; struct nls_table *nls; @@ -380,6 +389,7 @@ int hfsplus_block_allocate(struct super_block *, u32, u32, u32 *); int hfsplus_block_free(struct super_block *, u32, u32); /* btree.c */ +u32 hfsplus_calc_btree_clump_size(u32, u32, u64, int); struct hfs_btree *hfs_btree_open(struct super_block *, u32); void hfs_btree_close(struct hfs_btree *); int hfs_btree_write(struct hfs_btree *); diff --git a/fs/hfsplus/hfsplus_raw.h b/fs/hfsplus/hfsplus_raw.h index 452ede01b036..8ffb3a8ffe75 100644 --- a/fs/hfsplus/hfsplus_raw.h +++ b/fs/hfsplus/hfsplus_raw.h @@ -156,10 +156,10 @@ struct hfs_bnode_desc { } __packed; /* HFS+ BTree node types */ -#define HFS_NODE_INDEX 0x00 -#define HFS_NODE_HEADER 0x01 -#define HFS_NODE_MAP 0x02 -#define HFS_NODE_LEAF 0xFF +#define HFS_NODE_INDEX 0x00 /* An internal (index) node */ +#define HFS_NODE_HEADER 0x01 /* The tree header node (node 0) */ +#define HFS_NODE_MAP 0x02 /* Holds part of the bitmap of used nodes */ +#define HFS_NODE_LEAF 0xFF /* A leaf (ndNHeight==1) node */ /* HFS+ BTree header */ struct hfs_btree_header_rec { @@ -187,6 +187,9 @@ struct hfs_btree_header_rec { /* HFS+ BTree misc info */ #define HFSPLUS_TREE_HEAD 0 #define HFSPLUS_NODE_MXSZ 32768 +#define HFSPLUS_ATTR_TREE_NODE_SIZE 8192 +#define HFSPLUS_BTREE_HDR_NODE_RECS_COUNT 3 +#define HFSPLUS_BTREE_HDR_USER_BYTES 128 /* Some special File ID numbers (stolen from hfs.h) */ #define HFSPLUS_POR_CNID 1 /* Parent Of the Root */ diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 4c4d142cf890..80875aa640ef 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -474,12 +474,14 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) pr_err("failed to load catalog file\n"); goto out_close_ext_tree; } + atomic_set(&sbi->attr_tree_state, HFSPLUS_EMPTY_ATTR_TREE); if (vhdr->attr_file.total_blocks != 0) { sbi->attr_tree = hfs_btree_open(sb, HFSPLUS_ATTR_CNID); if (!sbi->attr_tree) { pr_err("failed to load attributes file\n"); goto out_close_cat_tree; } + atomic_set(&sbi->attr_tree_state, HFSPLUS_VALID_ATTR_TREE); } sb->s_xattr = hfsplus_xattr_handlers; diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c index bd8471fb9a6a..efc85b1377cc 100644 --- a/fs/hfsplus/xattr.c +++ b/fs/hfsplus/xattr.c @@ -127,6 +127,208 @@ static int can_set_xattr(struct inode *inode, const char *name, return 0; } +static void hfsplus_init_header_node(struct inode *attr_file, + u32 clump_size, + char *buf, size_t node_size) +{ + struct hfs_bnode_desc *desc; + struct hfs_btree_header_rec *head; + u16 offset; + __be16 *rec_offsets; + u32 hdr_node_map_rec_bits; + char *bmp; + u32 used_nodes; + u32 used_bmp_bytes; + + hfs_dbg(ATTR_MOD, "init_hdr_attr_file: clump %u, node_size %zu\n", + clump_size, node_size); + + /* The end of the node contains list of record offsets */ + rec_offsets = (__be16 *)(buf + node_size); + + desc = (struct hfs_bnode_desc *)buf; + desc->type = HFS_NODE_HEADER; + desc->num_recs = cpu_to_be16(HFSPLUS_BTREE_HDR_NODE_RECS_COUNT); + offset = sizeof(struct hfs_bnode_desc); + *--rec_offsets = cpu_to_be16(offset); + + head = (struct hfs_btree_header_rec *)(buf + offset); + head->node_size = cpu_to_be16(node_size); + head->node_count = cpu_to_be32(i_size_read(attr_file) / node_size); + head->free_nodes = cpu_to_be32(be32_to_cpu(head->node_count) - 1); + head->clump_size = cpu_to_be32(clump_size); + head->attributes |= cpu_to_be32(HFS_TREE_BIGKEYS | HFS_TREE_VARIDXKEYS); + head->max_key_len = cpu_to_be16(HFSPLUS_ATTR_KEYLEN - sizeof(u16)); + offset += sizeof(struct hfs_btree_header_rec); + *--rec_offsets = cpu_to_be16(offset); + offset += HFSPLUS_BTREE_HDR_USER_BYTES; + *--rec_offsets = cpu_to_be16(offset); + + hdr_node_map_rec_bits = 8 * (node_size - offset - (4 * sizeof(u16))); + if (be32_to_cpu(head->node_count) > hdr_node_map_rec_bits) { + u32 map_node_bits; + u32 map_nodes; + + desc->next = cpu_to_be32(be32_to_cpu(head->leaf_tail) + 1); + map_node_bits = 8 * (node_size - sizeof(struct hfs_bnode_desc) - + (2 * sizeof(u16)) - 2); + map_nodes = (be32_to_cpu(head->node_count) - + hdr_node_map_rec_bits + + (map_node_bits - 1)) / map_node_bits; + be32_add_cpu(&head->free_nodes, 0 - map_nodes); + } + + bmp = buf + offset; + used_nodes = + be32_to_cpu(head->node_count) - be32_to_cpu(head->free_nodes); + used_bmp_bytes = used_nodes / 8; + if (used_bmp_bytes) { + memset(bmp, 0xFF, used_bmp_bytes); + bmp += used_bmp_bytes; + used_nodes %= 8; + } + *bmp = ~(0xFF >> used_nodes); + offset += hdr_node_map_rec_bits / 8; + *--rec_offsets = cpu_to_be16(offset); +} + +static int hfsplus_create_attributes_file(struct super_block *sb) +{ + int err = 0; + struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); + struct inode *attr_file; + struct hfsplus_inode_info *hip; + u32 clump_size; + u16 node_size = HFSPLUS_ATTR_TREE_NODE_SIZE; + char *buf; + int index, written; + struct address_space *mapping; + struct page *page; + int old_state = HFSPLUS_EMPTY_ATTR_TREE; + + hfs_dbg(ATTR_MOD, "create_attr_file: ino %d\n", HFSPLUS_ATTR_CNID); + +check_attr_tree_state_again: + switch (atomic_read(&sbi->attr_tree_state)) { + case HFSPLUS_EMPTY_ATTR_TREE: + if (old_state != atomic_cmpxchg(&sbi->attr_tree_state, + old_state, + HFSPLUS_CREATING_ATTR_TREE)) + goto check_attr_tree_state_again; + break; + case HFSPLUS_CREATING_ATTR_TREE: + /* + * This state means that another thread is in process + * of AttributesFile creation. Theoretically, it is + * possible to be here. But really __setxattr() method + * first of all calls hfs_find_init() for lookup in + * B-tree of CatalogFile. This method locks mutex of + * CatalogFile's B-tree. As a result, if some thread + * is inside AttributedFile creation operation then + * another threads will be waiting unlocking of + * CatalogFile's B-tree's mutex. However, if code will + * change then we will return error code (-EAGAIN) from + * here. Really, it means that first try to set of xattr + * fails with error but second attempt will have success. + */ + return -EAGAIN; + case HFSPLUS_VALID_ATTR_TREE: + return 0; + case HFSPLUS_FAILED_ATTR_TREE: + return -EOPNOTSUPP; + default: + BUG(); + } + + attr_file = hfsplus_iget(sb, HFSPLUS_ATTR_CNID); + if (IS_ERR(attr_file)) { + pr_err("failed to load attributes file\n"); + return PTR_ERR(attr_file); + } + + BUG_ON(i_size_read(attr_file) != 0); + + hip = HFSPLUS_I(attr_file); + + clump_size = hfsplus_calc_btree_clump_size(sb->s_blocksize, + node_size, + sbi->sect_count, + HFSPLUS_ATTR_CNID); + + mutex_lock(&hip->extents_lock); + hip->clump_blocks = clump_size >> sbi->alloc_blksz_shift; + mutex_unlock(&hip->extents_lock); + + if (sbi->free_blocks <= (hip->clump_blocks << 1)) { + err = -ENOSPC; + goto end_attr_file_creation; + } + + while (hip->alloc_blocks < hip->clump_blocks) { + err = hfsplus_file_extend(attr_file); + if (unlikely(err)) { + pr_err("failed to extend attributes file\n"); + goto end_attr_file_creation; + } + hip->phys_size = attr_file->i_size = + (loff_t)hip->alloc_blocks << sbi->alloc_blksz_shift; + hip->fs_blocks = hip->alloc_blocks << sbi->fs_shift; + inode_set_bytes(attr_file, attr_file->i_size); + } + + buf = kzalloc(node_size, GFP_NOFS); + if (!buf) { + pr_err("failed to allocate memory for header node\n"); + err = -ENOMEM; + goto end_attr_file_creation; + } + + hfsplus_init_header_node(attr_file, clump_size, buf, node_size); + + mapping = attr_file->i_mapping; + + index = 0; + written = 0; + for (; written < node_size; index++, written += PAGE_CACHE_SIZE) { + void *kaddr; + + page = read_mapping_page(mapping, index, NULL); + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto failed_header_node_init; + } + + kaddr = kmap_atomic(page); + memcpy(kaddr, buf + written, + min_t(size_t, PAGE_CACHE_SIZE, node_size - written)); + kunmap_atomic(kaddr); + + set_page_dirty(page); + page_cache_release(page); + } + + hfsplus_mark_inode_dirty(attr_file, HFSPLUS_I_ATTR_DIRTY); + + sbi->attr_tree = hfs_btree_open(sb, HFSPLUS_ATTR_CNID); + if (!sbi->attr_tree) + pr_err("failed to load attributes file\n"); + +failed_header_node_init: + kfree(buf); + +end_attr_file_creation: + iput(attr_file); + + if (!err) + atomic_set(&sbi->attr_tree_state, HFSPLUS_VALID_ATTR_TREE); + else if (err == -ENOSPC) + atomic_set(&sbi->attr_tree_state, HFSPLUS_EMPTY_ATTR_TREE); + else + atomic_set(&sbi->attr_tree_state, HFSPLUS_FAILED_ATTR_TREE); + + return err; +} + int __hfsplus_setxattr(struct inode *inode, const char *name, const void *value, size_t size, int flags) { @@ -211,8 +413,9 @@ int __hfsplus_setxattr(struct inode *inode, const char *name, } if (!HFSPLUS_SB(inode->i_sb)->attr_tree) { - err = -EOPNOTSUPP; - goto end_setxattr; + err = hfsplus_create_attributes_file(inode->i_sb); + if (unlikely(err)) + goto end_setxattr; } if (hfsplus_attr_exists(inode, name)) { diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index 1b398636e990..6797bf80f6e2 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -80,6 +80,7 @@ struct hpfs_sb_info { unsigned sb_c_bitmap; /* current bitmap */ unsigned sb_max_fwd_alloc; /* max forwad allocation */ int sb_timeshift; + struct rcu_head rcu; }; /* Four 512-byte buffers and the 2k block obtained by concatenating them */ diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index 345713d2f8f3..1b39afdd86fd 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -407,7 +407,7 @@ again: /*printk("HPFS: truncating file before delete.\n");*/ newattrs.ia_size = 0; newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; - err = notify_change(dentry, &newattrs); + err = notify_change(dentry, &newattrs, NULL); put_write_access(inode); if (!err) goto again; diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 4334cda8dba1..b8d01ef6f531 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -101,18 +101,24 @@ int hpfs_stop_cycles(struct super_block *s, int key, int *c1, int *c2, return 0; } -static void hpfs_put_super(struct super_block *s) +static void free_sbi(struct hpfs_sb_info *sbi) { - struct hpfs_sb_info *sbi = hpfs_sb(s); + kfree(sbi->sb_cp_table); + kfree(sbi->sb_bmp_dir); + kfree(sbi); +} +static void lazy_free_sbi(struct rcu_head *rcu) +{ + free_sbi(container_of(rcu, struct hpfs_sb_info, rcu)); +} + +static void hpfs_put_super(struct super_block *s) +{ hpfs_lock(s); unmark_dirty(s); hpfs_unlock(s); - - kfree(sbi->sb_cp_table); - kfree(sbi->sb_bmp_dir); - s->s_fs_info = NULL; - kfree(sbi); + call_rcu(&hpfs_sb(s)->rcu, lazy_free_sbi); } unsigned hpfs_count_one_bitmap(struct super_block *s, secno secno) @@ -485,9 +491,6 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) } s->s_fs_info = sbi; - sbi->sb_bmp_dir = NULL; - sbi->sb_cp_table = NULL; - mutex_init(&sbi->hpfs_mutex); hpfs_lock(s); @@ -679,10 +682,7 @@ bail2: brelse(bh0); bail1: bail0: hpfs_unlock(s); - kfree(sbi->sb_bmp_dir); - kfree(sbi->sb_cp_table); - s->s_fs_info = NULL; - kfree(sbi); + free_sbi(sbi); return -EINVAL; } diff --git a/fs/inode.c b/fs/inode.c index b33ba8e021cc..4bcdad3c9361 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -773,15 +773,11 @@ static struct inode *find_inode(struct super_block *sb, repeat: hlist_for_each_entry(inode, head, i_hash) { - spin_lock(&inode->i_lock); - if (inode->i_sb != sb) { - spin_unlock(&inode->i_lock); + if (inode->i_sb != sb) continue; - } - if (!test(inode, data)) { - spin_unlock(&inode->i_lock); + if (!test(inode, data)) continue; - } + spin_lock(&inode->i_lock); if (inode->i_state & (I_FREEING|I_WILL_FREE)) { __wait_on_freeing_inode(inode); goto repeat; @@ -804,15 +800,11 @@ static struct inode *find_inode_fast(struct super_block *sb, repeat: hlist_for_each_entry(inode, head, i_hash) { - spin_lock(&inode->i_lock); - if (inode->i_ino != ino) { - spin_unlock(&inode->i_lock); + if (inode->i_ino != ino) continue; - } - if (inode->i_sb != sb) { - spin_unlock(&inode->i_lock); + if (inode->i_sb != sb) continue; - } + spin_lock(&inode->i_lock); if (inode->i_state & (I_FREEING|I_WILL_FREE)) { __wait_on_freeing_inode(inode); goto repeat; @@ -951,6 +943,42 @@ void unlock_new_inode(struct inode *inode) EXPORT_SYMBOL(unlock_new_inode); /** + * lock_two_nondirectories - take two i_mutexes on non-directory objects + * @inode1: first inode to lock + * @inode2: second inode to lock + */ +void lock_two_nondirectories(struct inode *inode1, struct inode *inode2) +{ + WARN_ON_ONCE(S_ISDIR(inode1->i_mode)); + if (inode1 == inode2 || !inode2) { + mutex_lock(&inode1->i_mutex); + return; + } + WARN_ON_ONCE(S_ISDIR(inode2->i_mode)); + if (inode1 < inode2) { + mutex_lock(&inode1->i_mutex); + mutex_lock_nested(&inode2->i_mutex, I_MUTEX_NONDIR2); + } else { + mutex_lock(&inode2->i_mutex); + mutex_lock_nested(&inode1->i_mutex, I_MUTEX_NONDIR2); + } +} +EXPORT_SYMBOL(lock_two_nondirectories); + +/** + * unlock_two_nondirectories - release locks from lock_two_nondirectories() + * @inode1: first inode to unlock + * @inode2: second inode to unlock + */ +void unlock_two_nondirectories(struct inode *inode1, struct inode *inode2) +{ + mutex_unlock(&inode1->i_mutex); + if (inode2 && inode2 != inode1) + mutex_unlock(&inode2->i_mutex); +} +EXPORT_SYMBOL(unlock_two_nondirectories); + +/** * iget5_locked - obtain an inode from a mounted file system * @sb: super block of file system * @hashval: hash value (usually inode number) to get @@ -1575,7 +1603,11 @@ static int __remove_suid(struct dentry *dentry, int kill) struct iattr newattrs; newattrs.ia_valid = ATTR_FORCE | kill; - return notify_change(dentry, &newattrs); + /* + * Note we call this on write, so notify_change will not + * encounter any conflicting delegations: + */ + return notify_change(dentry, &newattrs, NULL); } int file_remove_suid(struct file *file) diff --git a/fs/internal.h b/fs/internal.h index 513e0d859a6c..465742407466 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -9,8 +9,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/lglock.h> - struct super_block; struct file_system_type; struct linux_binprm; @@ -62,8 +60,6 @@ extern int sb_prepare_remount_readonly(struct super_block *); extern void __init mnt_init(void); -extern struct lglock vfsmount_lock; - extern int __mnt_want_write(struct vfsmount *); extern int __mnt_want_write_file(struct file *); extern void __mnt_drop_write(struct vfsmount *); @@ -77,9 +73,6 @@ extern void chroot_fs_refs(const struct path *, const struct path *); /* * file_table.c */ -extern void file_sb_list_add(struct file *f, struct super_block *sb); -extern void file_sb_list_del(struct file *f); -extern void mark_files_ro(struct super_block *); extern struct file *get_empty_filp(void); /* diff --git a/fs/ioctl.c b/fs/ioctl.c index fd507fb460f8..8ac3fad36192 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -37,7 +37,7 @@ static long vfs_ioctl(struct file *filp, unsigned int cmd, { int error = -ENOTTY; - if (!filp->f_op || !filp->f_op->unlocked_ioctl) + if (!filp->f_op->unlocked_ioctl) goto out; error = filp->f_op->unlocked_ioctl(filp, cmd, arg); @@ -501,7 +501,7 @@ static int ioctl_fioasync(unsigned int fd, struct file *filp, /* Did FASYNC state change ? */ if ((flag ^ filp->f_flags) & FASYNC) { - if (filp->f_op && filp->f_op->fasync) + if (filp->f_op->fasync) /* fasync() adjusts filp->f_flags */ error = filp->f_op->fasync(fd, filp, on); else diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index e5d408a7ea4a..4a9e10ea13f2 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -181,7 +181,7 @@ struct iso9660_options{ * Compute the hash for the isofs name corresponding to the dentry. */ static int -isofs_hash_common(const struct dentry *dentry, struct qstr *qstr, int ms) +isofs_hash_common(struct qstr *qstr, int ms) { const char *name; int len; @@ -202,7 +202,7 @@ isofs_hash_common(const struct dentry *dentry, struct qstr *qstr, int ms) * Compute the hash for the isofs name corresponding to the dentry. */ static int -isofs_hashi_common(const struct dentry *dentry, struct qstr *qstr, int ms) +isofs_hashi_common(struct qstr *qstr, int ms) { const char *name; int len; @@ -259,13 +259,13 @@ static int isofs_dentry_cmp_common( static int isofs_hash(const struct dentry *dentry, struct qstr *qstr) { - return isofs_hash_common(dentry, qstr, 0); + return isofs_hash_common(qstr, 0); } static int isofs_hashi(const struct dentry *dentry, struct qstr *qstr) { - return isofs_hashi_common(dentry, qstr, 0); + return isofs_hashi_common(qstr, 0); } static int @@ -286,13 +286,13 @@ isofs_dentry_cmpi(const struct dentry *parent, const struct dentry *dentry, static int isofs_hash_ms(const struct dentry *dentry, struct qstr *qstr) { - return isofs_hash_common(dentry, qstr, 1); + return isofs_hash_common(qstr, 1); } static int isofs_hashi_ms(const struct dentry *dentry, struct qstr *qstr) { - return isofs_hashi_common(dentry, qstr, 1); + return isofs_hashi_common(qstr, 1); } static int diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index be0c39b66fe0..aa603e017d22 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -26,7 +26,6 @@ #include <linux/mm.h> #include <linux/highmem.h> #include <linux/hrtimer.h> -#include <linux/backing-dev.h> static void __journal_temp_unlink_buffer(struct journal_head *jh); @@ -100,10 +99,11 @@ static int start_this_handle(journal_t *journal, handle_t *handle) alloc_transaction: if (!journal->j_running_transaction) { - new_transaction = kzalloc(sizeof(*new_transaction), GFP_NOFS); + new_transaction = kzalloc(sizeof(*new_transaction), + GFP_NOFS|__GFP_NOFAIL); if (!new_transaction) { - congestion_wait(BLK_RW_ASYNC, HZ/50); - goto alloc_transaction; + ret = -ENOMEM; + goto out; } } diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index fe3c0527545f..09b3ed455724 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -515,6 +515,10 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent) c = JFFS2_SB_INFO(sb); + /* Do not support the MLC nand */ + if (c->mtd->type == MTD_MLCNANDFLASH) + return -EINVAL; + #ifndef CONFIG_JFFS2_FS_WRITEBUFFER if (c->mtd->type == MTD_NANDFLASH) { pr_err("Cannot operate on NAND flash unless jffs2 NAND support is compiled in\n"); diff --git a/fs/libfs.c b/fs/libfs.c index 3a3a9b53bf5a..5de06947ba5e 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -10,6 +10,7 @@ #include <linux/vfs.h> #include <linux/quotaops.h> #include <linux/mutex.h> +#include <linux/namei.h> #include <linux/exportfs.h> #include <linux/writeback.h> #include <linux/buffer_head.h> /* sync_mapping_buffers */ @@ -31,6 +32,7 @@ int simple_getattr(struct vfsmount *mnt, struct dentry *dentry, stat->blocks = inode->i_mapping->nrpages << (PAGE_CACHE_SHIFT - 9); return 0; } +EXPORT_SYMBOL(simple_getattr); int simple_statfs(struct dentry *dentry, struct kstatfs *buf) { @@ -39,6 +41,7 @@ int simple_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_namelen = NAME_MAX; return 0; } +EXPORT_SYMBOL(simple_statfs); /* * Retaining negative dentries for an in-memory filesystem just wastes @@ -66,6 +69,7 @@ struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned d_add(dentry, NULL); return NULL; } +EXPORT_SYMBOL(simple_lookup); int dcache_dir_open(struct inode *inode, struct file *file) { @@ -75,12 +79,14 @@ int dcache_dir_open(struct inode *inode, struct file *file) return file->private_data ? 0 : -ENOMEM; } +EXPORT_SYMBOL(dcache_dir_open); int dcache_dir_close(struct inode *inode, struct file *file) { dput(file->private_data); return 0; } +EXPORT_SYMBOL(dcache_dir_close); loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) { @@ -123,6 +129,7 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) mutex_unlock(&dentry->d_inode->i_mutex); return offset; } +EXPORT_SYMBOL(dcache_dir_lseek); /* Relationship between i_mode and the DT_xxx types */ static inline unsigned char dt_type(struct inode *inode) @@ -172,11 +179,13 @@ int dcache_readdir(struct file *file, struct dir_context *ctx) spin_unlock(&dentry->d_lock); return 0; } +EXPORT_SYMBOL(dcache_readdir); ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t *ppos) { return -EISDIR; } +EXPORT_SYMBOL(generic_read_dir); const struct file_operations simple_dir_operations = { .open = dcache_dir_open, @@ -186,10 +195,12 @@ const struct file_operations simple_dir_operations = { .iterate = dcache_readdir, .fsync = noop_fsync, }; +EXPORT_SYMBOL(simple_dir_operations); const struct inode_operations simple_dir_inode_operations = { .lookup = simple_lookup, }; +EXPORT_SYMBOL(simple_dir_inode_operations); static const struct super_operations simple_super_operations = { .statfs = simple_statfs, @@ -244,6 +255,7 @@ Enomem: deactivate_locked_super(s); return ERR_PTR(-ENOMEM); } +EXPORT_SYMBOL(mount_pseudo); int simple_open(struct inode *inode, struct file *file) { @@ -251,6 +263,7 @@ int simple_open(struct inode *inode, struct file *file) file->private_data = inode->i_private; return 0; } +EXPORT_SYMBOL(simple_open); int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { @@ -263,6 +276,7 @@ int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *den d_instantiate(dentry, inode); return 0; } +EXPORT_SYMBOL(simple_link); int simple_empty(struct dentry *dentry) { @@ -283,6 +297,7 @@ out: spin_unlock(&dentry->d_lock); return ret; } +EXPORT_SYMBOL(simple_empty); int simple_unlink(struct inode *dir, struct dentry *dentry) { @@ -293,6 +308,7 @@ int simple_unlink(struct inode *dir, struct dentry *dentry) dput(dentry); return 0; } +EXPORT_SYMBOL(simple_unlink); int simple_rmdir(struct inode *dir, struct dentry *dentry) { @@ -304,6 +320,7 @@ int simple_rmdir(struct inode *dir, struct dentry *dentry) drop_nlink(dir); return 0; } +EXPORT_SYMBOL(simple_rmdir); int simple_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) @@ -330,6 +347,7 @@ int simple_rename(struct inode *old_dir, struct dentry *old_dentry, return 0; } +EXPORT_SYMBOL(simple_rename); /** * simple_setattr - setattr for simple filesystem @@ -370,6 +388,7 @@ int simple_readpage(struct file *file, struct page *page) unlock_page(page); return 0; } +EXPORT_SYMBOL(simple_readpage); int simple_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, @@ -393,6 +412,7 @@ int simple_write_begin(struct file *file, struct address_space *mapping, } return 0; } +EXPORT_SYMBOL(simple_write_begin); /** * simple_write_end - .write_end helper for non-block-device FSes @@ -444,6 +464,7 @@ int simple_write_end(struct file *file, struct address_space *mapping, return copied; } +EXPORT_SYMBOL(simple_write_end); /* * the inodes created here are not hashed. If you use iunique to generate @@ -512,6 +533,7 @@ out: dput(root); return -ENOMEM; } +EXPORT_SYMBOL(simple_fill_super); static DEFINE_SPINLOCK(pin_fs_lock); @@ -534,6 +556,7 @@ int simple_pin_fs(struct file_system_type *type, struct vfsmount **mount, int *c mntput(mnt); return 0; } +EXPORT_SYMBOL(simple_pin_fs); void simple_release_fs(struct vfsmount **mount, int *count) { @@ -545,6 +568,7 @@ void simple_release_fs(struct vfsmount **mount, int *count) spin_unlock(&pin_fs_lock); mntput(mnt); } +EXPORT_SYMBOL(simple_release_fs); /** * simple_read_from_buffer - copy data from the buffer to user space @@ -579,6 +603,7 @@ ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos, *ppos = pos + count; return count; } +EXPORT_SYMBOL(simple_read_from_buffer); /** * simple_write_to_buffer - copy data from user space to the buffer @@ -613,6 +638,7 @@ ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos, *ppos = pos + count; return count; } +EXPORT_SYMBOL(simple_write_to_buffer); /** * memory_read_from_buffer - copy data from the buffer @@ -644,6 +670,7 @@ ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos, return count; } +EXPORT_SYMBOL(memory_read_from_buffer); /* * Transaction based IO. @@ -665,6 +692,7 @@ void simple_transaction_set(struct file *file, size_t n) smp_mb(); ar->size = n; } +EXPORT_SYMBOL(simple_transaction_set); char *simple_transaction_get(struct file *file, const char __user *buf, size_t size) { @@ -696,6 +724,7 @@ char *simple_transaction_get(struct file *file, const char __user *buf, size_t s return ar->data; } +EXPORT_SYMBOL(simple_transaction_get); ssize_t simple_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos) { @@ -705,12 +734,14 @@ ssize_t simple_transaction_read(struct file *file, char __user *buf, size_t size return 0; return simple_read_from_buffer(buf, size, pos, ar->data, ar->size); } +EXPORT_SYMBOL(simple_transaction_read); int simple_transaction_release(struct inode *inode, struct file *file) { free_page((unsigned long)file->private_data); return 0; } +EXPORT_SYMBOL(simple_transaction_release); /* Simple attribute files */ @@ -746,12 +777,14 @@ int simple_attr_open(struct inode *inode, struct file *file, return nonseekable_open(inode, file); } +EXPORT_SYMBOL_GPL(simple_attr_open); int simple_attr_release(struct inode *inode, struct file *file) { kfree(file->private_data); return 0; } +EXPORT_SYMBOL_GPL(simple_attr_release); /* GPL-only? This? Really? */ /* read from the buffer that is filled with the get function */ ssize_t simple_attr_read(struct file *file, char __user *buf, @@ -787,6 +820,7 @@ out: mutex_unlock(&attr->mutex); return ret; } +EXPORT_SYMBOL_GPL(simple_attr_read); /* interpret the buffer as a number to call the set function with */ ssize_t simple_attr_write(struct file *file, const char __user *buf, @@ -819,6 +853,7 @@ out: mutex_unlock(&attr->mutex); return ret; } +EXPORT_SYMBOL_GPL(simple_attr_write); /** * generic_fh_to_dentry - generic helper for the fh_to_dentry export operation @@ -957,39 +992,56 @@ int noop_fsync(struct file *file, loff_t start, loff_t end, int datasync) { return 0; } - -EXPORT_SYMBOL(dcache_dir_close); -EXPORT_SYMBOL(dcache_dir_lseek); -EXPORT_SYMBOL(dcache_dir_open); -EXPORT_SYMBOL(dcache_readdir); -EXPORT_SYMBOL(generic_read_dir); -EXPORT_SYMBOL(mount_pseudo); -EXPORT_SYMBOL(simple_write_begin); -EXPORT_SYMBOL(simple_write_end); -EXPORT_SYMBOL(simple_dir_inode_operations); -EXPORT_SYMBOL(simple_dir_operations); -EXPORT_SYMBOL(simple_empty); -EXPORT_SYMBOL(simple_fill_super); -EXPORT_SYMBOL(simple_getattr); -EXPORT_SYMBOL(simple_open); -EXPORT_SYMBOL(simple_link); -EXPORT_SYMBOL(simple_lookup); -EXPORT_SYMBOL(simple_pin_fs); -EXPORT_SYMBOL(simple_readpage); -EXPORT_SYMBOL(simple_release_fs); -EXPORT_SYMBOL(simple_rename); -EXPORT_SYMBOL(simple_rmdir); -EXPORT_SYMBOL(simple_statfs); EXPORT_SYMBOL(noop_fsync); -EXPORT_SYMBOL(simple_unlink); -EXPORT_SYMBOL(simple_read_from_buffer); -EXPORT_SYMBOL(simple_write_to_buffer); -EXPORT_SYMBOL(memory_read_from_buffer); -EXPORT_SYMBOL(simple_transaction_set); -EXPORT_SYMBOL(simple_transaction_get); -EXPORT_SYMBOL(simple_transaction_read); -EXPORT_SYMBOL(simple_transaction_release); -EXPORT_SYMBOL_GPL(simple_attr_open); -EXPORT_SYMBOL_GPL(simple_attr_release); -EXPORT_SYMBOL_GPL(simple_attr_read); -EXPORT_SYMBOL_GPL(simple_attr_write); + +void kfree_put_link(struct dentry *dentry, struct nameidata *nd, + void *cookie) +{ + char *s = nd_get_link(nd); + if (!IS_ERR(s)) + kfree(s); +} +EXPORT_SYMBOL(kfree_put_link); + +/* + * nop .set_page_dirty method so that people can use .page_mkwrite on + * anon inodes. + */ +static int anon_set_page_dirty(struct page *page) +{ + return 0; +}; + +/* + * A single inode exists for all anon_inode files. Contrary to pipes, + * anon_inode inodes have no associated per-instance data, so we need + * only allocate one of them. + */ +struct inode *alloc_anon_inode(struct super_block *s) +{ + static const struct address_space_operations anon_aops = { + .set_page_dirty = anon_set_page_dirty, + }; + struct inode *inode = new_inode_pseudo(s); + + if (!inode) + return ERR_PTR(-ENOMEM); + + inode->i_ino = get_next_ino(); + inode->i_mapping->a_ops = &anon_aops; + + /* + * Mark the inode dirty from the very beginning, + * that way it will never be moved to the dirty + * list because mark_inode_dirty() will think + * that it already _is_ on the dirty list. + */ + inode->i_state = I_DIRTY; + inode->i_mode = S_IRUSR | S_IWUSR; + inode->i_uid = current_fsuid(); + inode->i_gid = current_fsgid(); + inode->i_flags |= S_PRIVATE; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + return inode; +} +EXPORT_SYMBOL(alloc_anon_inode); diff --git a/fs/locks.c b/fs/locks.c index b27a3005d78d..92a0f0a52b06 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -134,7 +134,7 @@ #define IS_POSIX(fl) (fl->fl_flags & FL_POSIX) #define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK) -#define IS_LEASE(fl) (fl->fl_flags & FL_LEASE) +#define IS_LEASE(fl) (fl->fl_flags & (FL_LEASE|FL_DELEG)) static bool lease_breaking(struct file_lock *fl) { @@ -1292,28 +1292,40 @@ static void time_out_leases(struct inode *inode) } } +static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker) +{ + if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE)) + return false; + return locks_conflict(breaker, lease); +} + /** * __break_lease - revoke all outstanding leases on file * @inode: the inode of the file to return - * @mode: the open mode (read or write) + * @mode: O_RDONLY: break only write leases; O_WRONLY or O_RDWR: + * break all leases + * @type: FL_LEASE: break leases and delegations; FL_DELEG: break + * only delegations * * break_lease (inlined for speed) has checked there already is at least * some kind of lock (maybe a lease) on this file. Leases are broken on * a call to open() or truncate(). This function can sleep unless you * specified %O_NONBLOCK to your open(). */ -int __break_lease(struct inode *inode, unsigned int mode) +int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) { int error = 0; struct file_lock *new_fl, *flock; struct file_lock *fl; unsigned long break_time; int i_have_this_lease = 0; + bool lease_conflict = false; int want_write = (mode & O_ACCMODE) != O_RDONLY; new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK); if (IS_ERR(new_fl)) return PTR_ERR(new_fl); + new_fl->fl_flags = type; spin_lock(&inode->i_lock); @@ -1323,13 +1335,16 @@ int __break_lease(struct inode *inode, unsigned int mode) if ((flock == NULL) || !IS_LEASE(flock)) goto out; - if (!locks_conflict(flock, new_fl)) + for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) { + if (leases_conflict(fl, new_fl)) { + lease_conflict = true; + if (fl->fl_owner == current->files) + i_have_this_lease = 1; + } + } + if (!lease_conflict) goto out; - for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) - if (fl->fl_owner == current->files) - i_have_this_lease = 1; - break_time = 0; if (lease_break_time > 0) { break_time = jiffies + lease_break_time * HZ; @@ -1338,6 +1353,8 @@ int __break_lease(struct inode *inode, unsigned int mode) } for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) { + if (!leases_conflict(fl, new_fl)) + continue; if (want_write) { if (fl->fl_flags & FL_UNLOCK_PENDING) continue; @@ -1379,7 +1396,7 @@ restart: */ for (flock = inode->i_flock; flock && IS_LEASE(flock); flock = flock->fl_next) { - if (locks_conflict(new_fl, flock)) + if (leases_conflict(new_fl, flock)) goto restart; } error = 0; @@ -1460,9 +1477,27 @@ static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp struct file_lock *fl, **before, **my_before = NULL, *lease; struct dentry *dentry = filp->f_path.dentry; struct inode *inode = dentry->d_inode; + bool is_deleg = (*flp)->fl_flags & FL_DELEG; int error; lease = *flp; + /* + * In the delegation case we need mutual exclusion with + * a number of operations that take the i_mutex. We trylock + * because delegations are an optional optimization, and if + * there's some chance of a conflict--we'd rather not + * bother, maybe that's a sign this just isn't a good file to + * hand out a delegation on. + */ + if (is_deleg && !mutex_trylock(&inode->i_mutex)) + return -EAGAIN; + + if (is_deleg && arg == F_WRLCK) { + /* Write delegations are not currently supported: */ + mutex_unlock(&inode->i_mutex); + WARN_ON_ONCE(1); + return -EINVAL; + } error = -EAGAIN; if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) @@ -1514,9 +1549,10 @@ static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp goto out; locks_insert_lock(before, lease); - return 0; - + error = 0; out: + if (is_deleg) + mutex_unlock(&inode->i_mutex); return error; } @@ -1579,7 +1615,7 @@ EXPORT_SYMBOL(generic_setlease); static int __vfs_setlease(struct file *filp, long arg, struct file_lock **lease) { - if (filp->f_op && filp->f_op->setlease) + if (filp->f_op->setlease) return filp->f_op->setlease(filp, arg, lease); else return generic_setlease(filp, arg, lease); @@ -1771,7 +1807,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) if (error) goto out_free; - if (f.file->f_op && f.file->f_op->flock) + if (f.file->f_op->flock) error = f.file->f_op->flock(f.file, (can_sleep) ? F_SETLKW : F_SETLK, lock); @@ -1797,7 +1833,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) */ int vfs_test_lock(struct file *filp, struct file_lock *fl) { - if (filp->f_op && filp->f_op->lock) + if (filp->f_op->lock) return filp->f_op->lock(filp, F_GETLK, fl); posix_test_lock(filp, fl); return 0; @@ -1909,7 +1945,7 @@ out: */ int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf) { - if (filp->f_op && filp->f_op->lock) + if (filp->f_op->lock) return filp->f_op->lock(filp, cmd, fl); else return posix_lock_file(filp, fl, conf); @@ -2182,7 +2218,7 @@ void locks_remove_flock(struct file *filp) if (!inode->i_flock) return; - if (filp->f_op && filp->f_op->flock) { + if (filp->f_op->flock) { struct file_lock fl = { .fl_pid = current->tgid, .fl_file = filp, @@ -2246,7 +2282,7 @@ EXPORT_SYMBOL(posix_unblock_lock); */ int vfs_cancel_lock(struct file *filp, struct file_lock *fl) { - if (filp->f_op && filp->f_op->lock) + if (filp->f_op->lock) return filp->f_op->lock(filp, F_CANCELLK, fl); return 0; } diff --git a/fs/minix/Kconfig b/fs/minix/Kconfig index 6624684dd5de..f2a0cfcef11d 100644 --- a/fs/minix/Kconfig +++ b/fs/minix/Kconfig @@ -18,7 +18,7 @@ config MINIX_FS config MINIX_FS_NATIVE_ENDIAN def_bool MINIX_FS - depends on H8300 || M32R || MICROBLAZE || MIPS || S390 || SUPERH || SPARC || XTENSA || (M68K && !MMU) + depends on M32R || MICROBLAZE || MIPS || S390 || SUPERH || SPARC || XTENSA || (M68K && !MMU) config MINIX_FS_BIG_ENDIAN_16BIT_INDEXED def_bool MINIX_FS diff --git a/fs/mount.h b/fs/mount.h index 64a858143ff9..d64c594be6c4 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -29,6 +29,7 @@ struct mount { struct mount *mnt_parent; struct dentry *mnt_mountpoint; struct vfsmount mnt; + struct rcu_head mnt_rcu; #ifdef CONFIG_SMP struct mnt_pcp __percpu *mnt_pcp; #else @@ -55,7 +56,7 @@ struct mount { int mnt_group_id; /* peer group identifier */ int mnt_expiry_mark; /* true if marked for expiry */ int mnt_pinned; - int mnt_ghosts; + struct path mnt_ex_mountpoint; }; #define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */ @@ -76,13 +77,28 @@ static inline int is_mounted(struct vfsmount *mnt) return !IS_ERR_OR_NULL(real_mount(mnt)); } -extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *, int); +extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *); +extern struct mount *__lookup_mnt_last(struct vfsmount *, struct dentry *); + +extern bool legitimize_mnt(struct vfsmount *, unsigned); static inline void get_mnt_ns(struct mnt_namespace *ns) { atomic_inc(&ns->count); } +extern seqlock_t mount_lock; + +static inline void lock_mount_hash(void) +{ + write_seqlock(&mount_lock); +} + +static inline void unlock_mount_hash(void) +{ + write_sequnlock(&mount_lock); +} + struct proc_mounts { struct seq_file m; struct mnt_namespace *ns; diff --git a/fs/namei.c b/fs/namei.c index caa28051e197..e029a4cbff7d 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -482,18 +482,6 @@ EXPORT_SYMBOL(path_put); * to restart the path walk from the beginning in ref-walk mode. */ -static inline void lock_rcu_walk(void) -{ - br_read_lock(&vfsmount_lock); - rcu_read_lock(); -} - -static inline void unlock_rcu_walk(void) -{ - rcu_read_unlock(); - br_read_unlock(&vfsmount_lock); -} - /** * unlazy_walk - try to switch to ref-walk mode. * @nd: nameidata pathwalk data @@ -512,26 +500,23 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry) BUG_ON(!(nd->flags & LOOKUP_RCU)); /* - * Get a reference to the parent first: we're - * going to make "path_put(nd->path)" valid in - * non-RCU context for "terminate_walk()". - * - * If this doesn't work, return immediately with - * RCU walking still active (and then we will do - * the RCU walk cleanup in terminate_walk()). + * After legitimizing the bastards, terminate_walk() + * will do the right thing for non-RCU mode, and all our + * subsequent exit cases should rcu_read_unlock() + * before returning. Do vfsmount first; if dentry + * can't be legitimized, just set nd->path.dentry to NULL + * and rely on dput(NULL) being a no-op. */ - if (!lockref_get_not_dead(&parent->d_lockref)) + if (!legitimize_mnt(nd->path.mnt, nd->m_seq)) return -ECHILD; - - /* - * After the mntget(), we terminate_walk() will do - * the right thing for non-RCU mode, and all our - * subsequent exit cases should unlock_rcu_walk() - * before returning. - */ - mntget(nd->path.mnt); nd->flags &= ~LOOKUP_RCU; + if (!lockref_get_not_dead(&parent->d_lockref)) { + nd->path.dentry = NULL; + rcu_read_unlock(); + return -ECHILD; + } + /* * For a negative lookup, the lookup sequence point is the parents * sequence point, and it only needs to revalidate the parent dentry. @@ -566,17 +551,17 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry) spin_unlock(&fs->lock); } - unlock_rcu_walk(); + rcu_read_unlock(); return 0; unlock_and_drop_dentry: spin_unlock(&fs->lock); drop_dentry: - unlock_rcu_walk(); + rcu_read_unlock(); dput(dentry); goto drop_root_mnt; out: - unlock_rcu_walk(); + rcu_read_unlock(); drop_root_mnt: if (!(nd->flags & LOOKUP_ROOT)) nd->root.mnt = NULL; @@ -608,17 +593,22 @@ static int complete_walk(struct nameidata *nd) if (!(nd->flags & LOOKUP_ROOT)) nd->root.mnt = NULL; + if (!legitimize_mnt(nd->path.mnt, nd->m_seq)) { + rcu_read_unlock(); + return -ECHILD; + } if (unlikely(!lockref_get_not_dead(&dentry->d_lockref))) { - unlock_rcu_walk(); + rcu_read_unlock(); + mntput(nd->path.mnt); return -ECHILD; } if (read_seqcount_retry(&dentry->d_seq, nd->seq)) { - unlock_rcu_walk(); + rcu_read_unlock(); dput(dentry); + mntput(nd->path.mnt); return -ECHILD; } - mntget(nd->path.mnt); - unlock_rcu_walk(); + rcu_read_unlock(); } if (likely(!(nd->flags & LOOKUP_JUMPED))) @@ -909,15 +899,15 @@ int follow_up(struct path *path) struct mount *parent; struct dentry *mountpoint; - br_read_lock(&vfsmount_lock); + read_seqlock_excl(&mount_lock); parent = mnt->mnt_parent; if (parent == mnt) { - br_read_unlock(&vfsmount_lock); + read_sequnlock_excl(&mount_lock); return 0; } mntget(&parent->mnt); mountpoint = dget(mnt->mnt_mountpoint); - br_read_unlock(&vfsmount_lock); + read_sequnlock_excl(&mount_lock); dput(path->dentry); path->dentry = mountpoint; mntput(path->mnt); @@ -1048,8 +1038,8 @@ static int follow_managed(struct path *path, unsigned flags) /* Something is mounted on this dentry in another * namespace and/or whatever was mounted there in this - * namespace got unmounted before we managed to get the - * vfsmount_lock */ + * namespace got unmounted before lookup_mnt() could + * get it */ } /* Handle an automount point */ @@ -1111,7 +1101,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, if (!d_mountpoint(path->dentry)) break; - mounted = __lookup_mnt(path->mnt, path->dentry, 1); + mounted = __lookup_mnt(path->mnt, path->dentry); if (!mounted) break; path->mnt = &mounted->mnt; @@ -1132,7 +1122,7 @@ static void follow_mount_rcu(struct nameidata *nd) { while (d_mountpoint(nd->path.dentry)) { struct mount *mounted; - mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry, 1); + mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry); if (!mounted) break; nd->path.mnt = &mounted->mnt; @@ -1174,7 +1164,7 @@ failed: nd->flags &= ~LOOKUP_RCU; if (!(nd->flags & LOOKUP_ROOT)) nd->root.mnt = NULL; - unlock_rcu_walk(); + rcu_read_unlock(); return -ECHILD; } @@ -1308,8 +1298,8 @@ static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir, } /* - * Call i_op->lookup on the dentry. The dentry must be negative but may be - * hashed if it was pouplated with DCACHE_NEED_LOOKUP. + * Call i_op->lookup on the dentry. The dentry must be negative and + * unhashed. * * dir->d_inode->i_mutex must be held */ @@ -1501,7 +1491,7 @@ static void terminate_walk(struct nameidata *nd) nd->flags &= ~LOOKUP_RCU; if (!(nd->flags & LOOKUP_ROOT)) nd->root.mnt = NULL; - unlock_rcu_walk(); + rcu_read_unlock(); } } @@ -1511,18 +1501,9 @@ static void terminate_walk(struct nameidata *nd) * so we keep a cache of "no, this doesn't need follow_link" * for the common case. */ -static inline int should_follow_link(struct inode *inode, int follow) +static inline int should_follow_link(struct dentry *dentry, int follow) { - if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) { - if (likely(inode->i_op->follow_link)) - return follow; - - /* This gets set once for the inode lifetime */ - spin_lock(&inode->i_lock); - inode->i_opflags |= IOP_NOFOLLOW; - spin_unlock(&inode->i_lock); - } - return 0; + return unlikely(d_is_symlink(dentry)) ? follow : 0; } static inline int walk_component(struct nameidata *nd, struct path *path, @@ -1552,7 +1533,7 @@ static inline int walk_component(struct nameidata *nd, struct path *path, if (!inode) goto out_path_put; - if (should_follow_link(inode, follow)) { + if (should_follow_link(path->dentry, follow)) { if (nd->flags & LOOKUP_RCU) { if (unlikely(unlazy_walk(nd, path->dentry))) { err = -ECHILD; @@ -1611,26 +1592,6 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd) } /* - * We really don't want to look at inode->i_op->lookup - * when we don't have to. So we keep a cache bit in - * the inode ->i_opflags field that says "yes, we can - * do lookup on this inode". - */ -static inline int can_lookup(struct inode *inode) -{ - if (likely(inode->i_opflags & IOP_LOOKUP)) - return 1; - if (likely(!inode->i_op->lookup)) - return 0; - - /* We do this once for the lifetime of the inode */ - spin_lock(&inode->i_lock); - inode->i_opflags |= IOP_LOOKUP; - spin_unlock(&inode->i_lock); - return 1; -} - -/* * We can do the critical dentry name comparison and hashing * operations one word at a time, but we are limited to: * @@ -1833,7 +1794,7 @@ static int link_path_walk(const char *name, struct nameidata *nd) if (err) return err; } - if (!can_lookup(nd->inode)) { + if (!d_is_directory(nd->path.dentry)) { err = -ENOTDIR; break; } @@ -1851,9 +1812,10 @@ static int path_init(int dfd, const char *name, unsigned int flags, nd->flags = flags | LOOKUP_JUMPED; nd->depth = 0; if (flags & LOOKUP_ROOT) { - struct inode *inode = nd->root.dentry->d_inode; + struct dentry *root = nd->root.dentry; + struct inode *inode = root->d_inode; if (*name) { - if (!can_lookup(inode)) + if (!d_is_directory(root)) return -ENOTDIR; retval = inode_permission(inode, MAY_EXEC); if (retval) @@ -1862,8 +1824,9 @@ static int path_init(int dfd, const char *name, unsigned int flags, nd->path = nd->root; nd->inode = inode; if (flags & LOOKUP_RCU) { - lock_rcu_walk(); + rcu_read_lock(); nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); + nd->m_seq = read_seqbegin(&mount_lock); } else { path_get(&nd->path); } @@ -1872,9 +1835,10 @@ static int path_init(int dfd, const char *name, unsigned int flags, nd->root.mnt = NULL; + nd->m_seq = read_seqbegin(&mount_lock); if (*name=='/') { if (flags & LOOKUP_RCU) { - lock_rcu_walk(); + rcu_read_lock(); set_root_rcu(nd); } else { set_root(nd); @@ -1886,7 +1850,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, struct fs_struct *fs = current->fs; unsigned seq; - lock_rcu_walk(); + rcu_read_lock(); do { seq = read_seqcount_begin(&fs->seq); @@ -1907,7 +1871,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, dentry = f.file->f_path.dentry; if (*name) { - if (!can_lookup(dentry->d_inode)) { + if (!d_is_directory(dentry)) { fdput(f); return -ENOTDIR; } @@ -1918,7 +1882,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, if (f.need_put) *fp = f.file; nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); - lock_rcu_walk(); + rcu_read_lock(); } else { path_get(&nd->path); fdput(f); @@ -1989,7 +1953,7 @@ static int path_lookupat(int dfd, const char *name, err = complete_walk(nd); if (!err && nd->flags & LOOKUP_DIRECTORY) { - if (!can_lookup(nd->inode)) { + if (!d_is_directory(nd->path.dentry)) { path_put(&nd->path); err = -ENOTDIR; } @@ -2281,7 +2245,7 @@ done: } path->dentry = dentry; path->mnt = mntget(nd->path.mnt); - if (should_follow_link(dentry->d_inode, nd->flags & LOOKUP_FOLLOW)) + if (should_follow_link(dentry, nd->flags & LOOKUP_FOLLOW)) return 1; follow_mount(path); error = 0; @@ -2426,12 +2390,14 @@ static inline int check_sticky(struct inode *dir, struct inode *inode) * 10. We don't allow removal of NFS sillyrenamed files; it's handled by * nfs_async_unlink(). */ -static int may_delete(struct inode *dir,struct dentry *victim,int isdir) +static int may_delete(struct inode *dir, struct dentry *victim, bool isdir) { + struct inode *inode = victim->d_inode; int error; - if (!victim->d_inode) + if (d_is_negative(victim)) return -ENOENT; + BUG_ON(!inode); BUG_ON(victim->d_parent->d_inode != dir); audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE); @@ -2441,15 +2407,16 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir) return error; if (IS_APPEND(dir)) return -EPERM; - if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)|| - IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode)) + + if (check_sticky(dir, inode) || IS_APPEND(inode) || + IS_IMMUTABLE(inode) || IS_SWAPFILE(inode)) return -EPERM; if (isdir) { - if (!S_ISDIR(victim->d_inode->i_mode)) + if (!d_is_directory(victim) && !d_is_autodir(victim)) return -ENOTDIR; if (IS_ROOT(victim)) return -EBUSY; - } else if (S_ISDIR(victim->d_inode->i_mode)) + } else if (d_is_directory(victim) || d_is_autodir(victim)) return -EISDIR; if (IS_DEADDIR(dir)) return -ENOENT; @@ -2983,7 +2950,7 @@ retry_lookup: /* * create/update audit record if it already exists. */ - if (path->dentry->d_inode) + if (d_is_positive(path->dentry)) audit_inode(name, path->dentry, 0); /* @@ -3012,12 +2979,12 @@ retry_lookup: finish_lookup: /* we _can_ be in RCU mode here */ error = -ENOENT; - if (!inode) { + if (d_is_negative(path->dentry)) { path_to_nameidata(path, nd); goto out; } - if (should_follow_link(inode, !symlink_ok)) { + if (should_follow_link(path->dentry, !symlink_ok)) { if (nd->flags & LOOKUP_RCU) { if (unlikely(unlazy_walk(nd, path->dentry))) { error = -ECHILD; @@ -3046,10 +3013,11 @@ finish_open: } audit_inode(name, nd->path.dentry, 0); error = -EISDIR; - if ((open_flag & O_CREAT) && S_ISDIR(nd->inode->i_mode)) + if ((open_flag & O_CREAT) && + (d_is_directory(nd->path.dentry) || d_is_autodir(nd->path.dentry))) goto out; error = -ENOTDIR; - if ((nd->flags & LOOKUP_DIRECTORY) && !can_lookup(nd->inode)) + if ((nd->flags & LOOKUP_DIRECTORY) && !d_is_directory(nd->path.dentry)) goto out; if (!S_ISREG(nd->inode->i_mode)) will_truncate = false; @@ -3275,7 +3243,7 @@ struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt, nd.root.mnt = mnt; nd.root.dentry = dentry; - if (dentry->d_inode->i_op->follow_link && op->intent & LOOKUP_OPEN) + if (d_is_symlink(dentry) && op->intent & LOOKUP_OPEN) return ERR_PTR(-ELOOP); file = path_openat(-1, &filename, &nd, op, flags | LOOKUP_RCU); @@ -3325,8 +3293,9 @@ struct dentry *kern_path_create(int dfd, const char *pathname, goto unlock; error = -EEXIST; - if (dentry->d_inode) + if (d_is_positive(dentry)) goto fail; + /* * Special case - lookup gave negative, but... we had foo/bar/ * From the vfs_mknod() POV we just have a negative dentry - @@ -3647,8 +3616,27 @@ SYSCALL_DEFINE1(rmdir, const char __user *, pathname) return do_rmdir(AT_FDCWD, pathname); } -int vfs_unlink(struct inode *dir, struct dentry *dentry) +/** + * vfs_unlink - unlink a filesystem object + * @dir: parent directory + * @dentry: victim + * @delegated_inode: returns victim inode, if the inode is delegated. + * + * The caller must hold dir->i_mutex. + * + * If vfs_unlink discovers a delegation, it will return -EWOULDBLOCK and + * return a reference to the inode in delegated_inode. The caller + * should then break the delegation on that inode and retry. Because + * breaking a delegation may take a long time, the caller should drop + * dir->i_mutex before doing so. + * + * Alternatively, a caller may pass NULL for delegated_inode. This may + * be appropriate for callers that expect the underlying filesystem not + * to be NFS exported. + */ +int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegated_inode) { + struct inode *target = dentry->d_inode; int error = may_delete(dir, dentry, 0); if (error) @@ -3657,22 +3645,26 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry) if (!dir->i_op->unlink) return -EPERM; - mutex_lock(&dentry->d_inode->i_mutex); + mutex_lock(&target->i_mutex); if (d_mountpoint(dentry)) error = -EBUSY; else { error = security_inode_unlink(dir, dentry); if (!error) { + error = try_break_deleg(target, delegated_inode); + if (error) + goto out; error = dir->i_op->unlink(dir, dentry); if (!error) dont_mount(dentry); } } - mutex_unlock(&dentry->d_inode->i_mutex); +out: + mutex_unlock(&target->i_mutex); /* We don't d_delete() NFS sillyrenamed files--they still exist. */ if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) { - fsnotify_link_count(dentry->d_inode); + fsnotify_link_count(target); d_delete(dentry); } @@ -3692,6 +3684,7 @@ static long do_unlinkat(int dfd, const char __user *pathname) struct dentry *dentry; struct nameidata nd; struct inode *inode = NULL; + struct inode *delegated_inode = NULL; unsigned int lookup_flags = 0; retry: name = user_path_parent(dfd, pathname, &nd, lookup_flags); @@ -3706,7 +3699,7 @@ retry: error = mnt_want_write(nd.path.mnt); if (error) goto exit1; - +retry_deleg: mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); dentry = lookup_hash(&nd); error = PTR_ERR(dentry); @@ -3715,19 +3708,25 @@ retry: if (nd.last.name[nd.last.len]) goto slashes; inode = dentry->d_inode; - if (!inode) + if (d_is_negative(dentry)) goto slashes; ihold(inode); error = security_path_unlink(&nd.path, dentry); if (error) goto exit2; - error = vfs_unlink(nd.path.dentry->d_inode, dentry); + error = vfs_unlink(nd.path.dentry->d_inode, dentry, &delegated_inode); exit2: dput(dentry); } mutex_unlock(&nd.path.dentry->d_inode->i_mutex); if (inode) iput(inode); /* truncate the inode here */ + inode = NULL; + if (delegated_inode) { + error = break_deleg_wait(&delegated_inode); + if (!error) + goto retry_deleg; + } mnt_drop_write(nd.path.mnt); exit1: path_put(&nd.path); @@ -3740,8 +3739,12 @@ exit1: return error; slashes: - error = !dentry->d_inode ? -ENOENT : - S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR; + if (d_is_negative(dentry)) + error = -ENOENT; + else if (d_is_directory(dentry) || d_is_autodir(dentry)) + error = -EISDIR; + else + error = -ENOTDIR; goto exit2; } @@ -3817,7 +3820,26 @@ SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newn return sys_symlinkat(oldname, AT_FDCWD, newname); } -int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) +/** + * vfs_link - create a new link + * @old_dentry: object to be linked + * @dir: new parent + * @new_dentry: where to create the new link + * @delegated_inode: returns inode needing a delegation break + * + * The caller must hold dir->i_mutex + * + * If vfs_link discovers a delegation on the to-be-linked file in need + * of breaking, it will return -EWOULDBLOCK and return a reference to the + * inode in delegated_inode. The caller should then break the delegation + * and retry. Because breaking a delegation may take a long time, the + * caller should drop the i_mutex before doing so. + * + * Alternatively, a caller may pass NULL for delegated_inode. This may + * be appropriate for callers that expect the underlying filesystem not + * to be NFS exported. + */ +int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry, struct inode **delegated_inode) { struct inode *inode = old_dentry->d_inode; unsigned max_links = dir->i_sb->s_max_links; @@ -3853,8 +3875,11 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de error = -ENOENT; else if (max_links && inode->i_nlink >= max_links) error = -EMLINK; - else - error = dir->i_op->link(old_dentry, dir, new_dentry); + else { + error = try_break_deleg(inode, delegated_inode); + if (!error) + error = dir->i_op->link(old_dentry, dir, new_dentry); + } if (!error && (inode->i_state & I_LINKABLE)) { spin_lock(&inode->i_lock); @@ -3881,6 +3906,7 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname, { struct dentry *new_dentry; struct path old_path, new_path; + struct inode *delegated_inode = NULL; int how = 0; int error; @@ -3919,9 +3945,14 @@ retry: error = security_path_link(old_path.dentry, &new_path, new_dentry); if (error) goto out_dput; - error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry); + error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry, &delegated_inode); out_dput: done_path_create(&new_path, new_dentry); + if (delegated_inode) { + error = break_deleg_wait(&delegated_inode); + if (!error) + goto retry; + } if (retry_estale(error, how)) { how |= LOOKUP_REVAL; goto retry; @@ -3946,7 +3977,8 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname * That's where 4.4 screws up. Current fix: serialization on * sb->s_vfs_rename_mutex. We might be more accurate, but that's another * story. - * c) we have to lock _three_ objects - parents and victim (if it exists). + * c) we have to lock _four_ objects - parents and victim (if it exists), + * and source (if it is not a directory). * And that - after we got ->i_mutex on parents (until then we don't know * whether the target exists). Solution: try to be smart with locking * order for inodes. We rely on the fact that tree topology may change @@ -4019,9 +4051,11 @@ out: } static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry) + struct inode *new_dir, struct dentry *new_dentry, + struct inode **delegated_inode) { struct inode *target = new_dentry->d_inode; + struct inode *source = old_dentry->d_inode; int error; error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); @@ -4029,13 +4063,20 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, return error; dget(new_dentry); - if (target) - mutex_lock(&target->i_mutex); + lock_two_nondirectories(source, target); error = -EBUSY; if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) goto out; + error = try_break_deleg(source, delegated_inode); + if (error) + goto out; + if (target) { + error = try_break_deleg(target, delegated_inode); + if (error) + goto out; + } error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); if (error) goto out; @@ -4045,17 +4086,38 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) d_move(old_dentry, new_dentry); out: - if (target) - mutex_unlock(&target->i_mutex); + unlock_two_nondirectories(source, target); dput(new_dentry); return error; } +/** + * vfs_rename - rename a filesystem object + * @old_dir: parent of source + * @old_dentry: source + * @new_dir: parent of destination + * @new_dentry: destination + * @delegated_inode: returns an inode needing a delegation break + * + * The caller must hold multiple mutexes--see lock_rename()). + * + * If vfs_rename discovers a delegation in need of breaking at either + * the source or destination, it will return -EWOULDBLOCK and return a + * reference to the inode in delegated_inode. The caller should then + * break the delegation and retry. Because breaking a delegation may + * take a long time, the caller should drop all locks before doing + * so. + * + * Alternatively, a caller may pass NULL for delegated_inode. This may + * be appropriate for callers that expect the underlying filesystem not + * to be NFS exported. + */ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry) + struct inode *new_dir, struct dentry *new_dentry, + struct inode **delegated_inode) { int error; - int is_dir = S_ISDIR(old_dentry->d_inode->i_mode); + int is_dir = d_is_directory(old_dentry) || d_is_autodir(old_dentry); const unsigned char *old_name; if (old_dentry->d_inode == new_dentry->d_inode) @@ -4080,7 +4142,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (is_dir) error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry); else - error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry); + error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry,delegated_inode); if (!error) fsnotify_move(old_dir, new_dir, old_name, is_dir, new_dentry->d_inode, old_dentry); @@ -4096,6 +4158,7 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, struct dentry *old_dentry, *new_dentry; struct dentry *trap; struct nameidata oldnd, newnd; + struct inode *delegated_inode = NULL; struct filename *from; struct filename *to; unsigned int lookup_flags = 0; @@ -4135,6 +4198,7 @@ retry: newnd.flags &= ~LOOKUP_PARENT; newnd.flags |= LOOKUP_RENAME_TARGET; +retry_deleg: trap = lock_rename(new_dir, old_dir); old_dentry = lookup_hash(&oldnd); @@ -4143,10 +4207,10 @@ retry: goto exit3; /* source must exist */ error = -ENOENT; - if (!old_dentry->d_inode) + if (d_is_negative(old_dentry)) goto exit4; /* unless the source is a directory trailing slashes give -ENOTDIR */ - if (!S_ISDIR(old_dentry->d_inode->i_mode)) { + if (!d_is_directory(old_dentry) && !d_is_autodir(old_dentry)) { error = -ENOTDIR; if (oldnd.last.name[oldnd.last.len]) goto exit4; @@ -4171,13 +4235,19 @@ retry: if (error) goto exit5; error = vfs_rename(old_dir->d_inode, old_dentry, - new_dir->d_inode, new_dentry); + new_dir->d_inode, new_dentry, + &delegated_inode); exit5: dput(new_dentry); exit4: dput(old_dentry); exit3: unlock_rename(new_dir, old_dir); + if (delegated_inode) { + error = break_deleg_wait(&delegated_inode); + if (!error) + goto retry_deleg; + } mnt_drop_write(oldnd.path.mnt); exit2: if (retry_estale(error, lookup_flags)) diff --git a/fs/namespace.c b/fs/namespace.c index da5c49483430..ac2ce8a766e1 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -39,7 +39,7 @@ static int mnt_group_start = 1; static struct list_head *mount_hashtable __read_mostly; static struct list_head *mountpoint_hashtable __read_mostly; static struct kmem_cache *mnt_cache __read_mostly; -static struct rw_semaphore namespace_sem; +static DECLARE_RWSEM(namespace_sem); /* /sys/fs */ struct kobject *fs_kobj; @@ -53,7 +53,7 @@ EXPORT_SYMBOL_GPL(fs_kobj); * It should be taken for write in all cases where the vfsmount * tree or hash is modified or when a vfsmount structure is modified. */ -DEFINE_BRLOCK(vfsmount_lock); +__cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock); static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) { @@ -63,8 +63,6 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) return tmp & (HASH_SIZE - 1); } -#define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16) - /* * allocation is serialized by namespace_sem, but we need the spinlock to * serialize with freeing. @@ -458,7 +456,7 @@ static int mnt_make_readonly(struct mount *mnt) { int ret = 0; - br_write_lock(&vfsmount_lock); + lock_mount_hash(); mnt->mnt.mnt_flags |= MNT_WRITE_HOLD; /* * After storing MNT_WRITE_HOLD, we'll read the counters. This store @@ -492,15 +490,15 @@ static int mnt_make_readonly(struct mount *mnt) */ smp_wmb(); mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); return ret; } static void __mnt_unmake_readonly(struct mount *mnt) { - br_write_lock(&vfsmount_lock); + lock_mount_hash(); mnt->mnt.mnt_flags &= ~MNT_READONLY; - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); } int sb_prepare_remount_readonly(struct super_block *sb) @@ -512,7 +510,7 @@ int sb_prepare_remount_readonly(struct super_block *sb) if (atomic_long_read(&sb->s_remove_count)) return -EBUSY; - br_write_lock(&vfsmount_lock); + lock_mount_hash(); list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) { if (!(mnt->mnt.mnt_flags & MNT_READONLY)) { mnt->mnt.mnt_flags |= MNT_WRITE_HOLD; @@ -534,7 +532,7 @@ int sb_prepare_remount_readonly(struct super_block *sb) if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD) mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; } - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); return err; } @@ -549,30 +547,56 @@ static void free_vfsmnt(struct mount *mnt) kmem_cache_free(mnt_cache, mnt); } +/* call under rcu_read_lock */ +bool legitimize_mnt(struct vfsmount *bastard, unsigned seq) +{ + struct mount *mnt; + if (read_seqretry(&mount_lock, seq)) + return false; + if (bastard == NULL) + return true; + mnt = real_mount(bastard); + mnt_add_count(mnt, 1); + if (likely(!read_seqretry(&mount_lock, seq))) + return true; + if (bastard->mnt_flags & MNT_SYNC_UMOUNT) { + mnt_add_count(mnt, -1); + return false; + } + rcu_read_unlock(); + mntput(bastard); + rcu_read_lock(); + return false; +} + /* - * find the first or last mount at @dentry on vfsmount @mnt depending on - * @dir. If @dir is set return the first mount else return the last mount. - * vfsmount_lock must be held for read or write. + * find the first mount at @dentry on vfsmount @mnt. + * call under rcu_read_lock() */ -struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry, - int dir) +struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) { struct list_head *head = mount_hashtable + hash(mnt, dentry); - struct list_head *tmp = head; - struct mount *p, *found = NULL; + struct mount *p; - for (;;) { - tmp = dir ? tmp->next : tmp->prev; - p = NULL; - if (tmp == head) - break; - p = list_entry(tmp, struct mount, mnt_hash); - if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) { - found = p; - break; - } - } - return found; + list_for_each_entry_rcu(p, head, mnt_hash) + if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) + return p; + return NULL; +} + +/* + * find the last mount at @dentry on vfsmount @mnt. + * mount_lock must be held. + */ +struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry) +{ + struct list_head *head = mount_hashtable + hash(mnt, dentry); + struct mount *p; + + list_for_each_entry_reverse(p, head, mnt_hash) + if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) + return p; + return NULL; } /* @@ -594,17 +618,17 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry, struct vfsmount *lookup_mnt(struct path *path) { struct mount *child_mnt; + struct vfsmount *m; + unsigned seq; - br_read_lock(&vfsmount_lock); - child_mnt = __lookup_mnt(path->mnt, path->dentry, 1); - if (child_mnt) { - mnt_add_count(child_mnt, 1); - br_read_unlock(&vfsmount_lock); - return &child_mnt->mnt; - } else { - br_read_unlock(&vfsmount_lock); - return NULL; - } + rcu_read_lock(); + do { + seq = read_seqbegin(&mount_lock); + child_mnt = __lookup_mnt(path->mnt, path->dentry); + m = child_mnt ? &child_mnt->mnt : NULL; + } while (!legitimize_mnt(m, seq)); + rcu_read_unlock(); + return m; } static struct mountpoint *new_mountpoint(struct dentry *dentry) @@ -796,9 +820,9 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void mnt->mnt.mnt_sb = root->d_sb; mnt->mnt_mountpoint = mnt->mnt.mnt_root; mnt->mnt_parent = mnt; - br_write_lock(&vfsmount_lock); + lock_mount_hash(); list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts); - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); return &mnt->mnt; } EXPORT_SYMBOL_GPL(vfs_kern_mount); @@ -839,9 +863,9 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, mnt->mnt.mnt_root = dget(root); mnt->mnt_mountpoint = mnt->mnt.mnt_root; mnt->mnt_parent = mnt; - br_write_lock(&vfsmount_lock); + lock_mount_hash(); list_add_tail(&mnt->mnt_instance, &sb->s_mounts); - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); if ((flag & CL_SLAVE) || ((flag & CL_SHARED_TO_SLAVE) && IS_MNT_SHARED(old))) { @@ -872,64 +896,66 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, return ERR_PTR(err); } -static inline void mntfree(struct mount *mnt) +static void delayed_free(struct rcu_head *head) { - struct vfsmount *m = &mnt->mnt; - struct super_block *sb = m->mnt_sb; - - /* - * This probably indicates that somebody messed - * up a mnt_want/drop_write() pair. If this - * happens, the filesystem was probably unable - * to make r/w->r/o transitions. - */ - /* - * The locking used to deal with mnt_count decrement provides barriers, - * so mnt_get_writers() below is safe. - */ - WARN_ON(mnt_get_writers(mnt)); - fsnotify_vfsmount_delete(m); - dput(m->mnt_root); - free_vfsmnt(mnt); - deactivate_super(sb); + struct mount *mnt = container_of(head, struct mount, mnt_rcu); + kfree(mnt->mnt_devname); +#ifdef CONFIG_SMP + free_percpu(mnt->mnt_pcp); +#endif + kmem_cache_free(mnt_cache, mnt); } static void mntput_no_expire(struct mount *mnt) { put_again: -#ifdef CONFIG_SMP - br_read_lock(&vfsmount_lock); - if (likely(mnt->mnt_ns)) { - /* shouldn't be the last one */ - mnt_add_count(mnt, -1); - br_read_unlock(&vfsmount_lock); + rcu_read_lock(); + mnt_add_count(mnt, -1); + if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */ + rcu_read_unlock(); return; } - br_read_unlock(&vfsmount_lock); - - br_write_lock(&vfsmount_lock); - mnt_add_count(mnt, -1); + lock_mount_hash(); if (mnt_get_count(mnt)) { - br_write_unlock(&vfsmount_lock); + rcu_read_unlock(); + unlock_mount_hash(); return; } -#else - mnt_add_count(mnt, -1); - if (likely(mnt_get_count(mnt))) - return; - br_write_lock(&vfsmount_lock); -#endif if (unlikely(mnt->mnt_pinned)) { mnt_add_count(mnt, mnt->mnt_pinned + 1); mnt->mnt_pinned = 0; - br_write_unlock(&vfsmount_lock); + rcu_read_unlock(); + unlock_mount_hash(); acct_auto_close_mnt(&mnt->mnt); goto put_again; } + if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) { + rcu_read_unlock(); + unlock_mount_hash(); + return; + } + mnt->mnt.mnt_flags |= MNT_DOOMED; + rcu_read_unlock(); list_del(&mnt->mnt_instance); - br_write_unlock(&vfsmount_lock); - mntfree(mnt); + unlock_mount_hash(); + + /* + * This probably indicates that somebody messed + * up a mnt_want/drop_write() pair. If this + * happens, the filesystem was probably unable + * to make r/w->r/o transitions. + */ + /* + * The locking used to deal with mnt_count decrement provides barriers, + * so mnt_get_writers() below is safe. + */ + WARN_ON(mnt_get_writers(mnt)); + fsnotify_vfsmount_delete(&mnt->mnt); + dput(mnt->mnt.mnt_root); + deactivate_super(mnt->mnt.mnt_sb); + mnt_free_id(mnt); + call_rcu(&mnt->mnt_rcu, delayed_free); } void mntput(struct vfsmount *mnt) @@ -954,21 +980,21 @@ EXPORT_SYMBOL(mntget); void mnt_pin(struct vfsmount *mnt) { - br_write_lock(&vfsmount_lock); + lock_mount_hash(); real_mount(mnt)->mnt_pinned++; - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); } EXPORT_SYMBOL(mnt_pin); void mnt_unpin(struct vfsmount *m) { struct mount *mnt = real_mount(m); - br_write_lock(&vfsmount_lock); + lock_mount_hash(); if (mnt->mnt_pinned) { mnt_add_count(mnt, 1); mnt->mnt_pinned--; } - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); } EXPORT_SYMBOL(mnt_unpin); @@ -1085,12 +1111,12 @@ int may_umount_tree(struct vfsmount *m) BUG_ON(!m); /* write lock needed for mnt_get_count */ - br_write_lock(&vfsmount_lock); + lock_mount_hash(); for (p = mnt; p; p = next_mnt(p, mnt)) { actual_refs += mnt_get_count(p); minimum_refs += 2; } - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); if (actual_refs > minimum_refs) return 0; @@ -1117,10 +1143,10 @@ int may_umount(struct vfsmount *mnt) { int ret = 1; down_read(&namespace_sem); - br_write_lock(&vfsmount_lock); + lock_mount_hash(); if (propagate_mount_busy(real_mount(mnt), 2)) ret = 0; - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); up_read(&namespace_sem); return ret; } @@ -1142,23 +1168,13 @@ static void namespace_unlock(void) list_splice_init(&unmounted, &head); up_write(&namespace_sem); + synchronize_rcu(); + while (!list_empty(&head)) { mnt = list_first_entry(&head, struct mount, mnt_hash); list_del_init(&mnt->mnt_hash); - if (mnt_has_parent(mnt)) { - struct dentry *dentry; - struct mount *m; - - br_write_lock(&vfsmount_lock); - dentry = mnt->mnt_mountpoint; - m = mnt->mnt_parent; - mnt->mnt_mountpoint = mnt->mnt.mnt_root; - mnt->mnt_parent = mnt; - m->mnt_ghosts--; - br_write_unlock(&vfsmount_lock); - dput(dentry); - mntput(&m->mnt); - } + if (mnt->mnt_ex_mountpoint.mnt) + path_put(&mnt->mnt_ex_mountpoint); mntput(&mnt->mnt); } } @@ -1169,10 +1185,13 @@ static inline void namespace_lock(void) } /* - * vfsmount lock must be held for write + * mount_lock must be held * namespace_sem must be held for write + * how = 0 => just this tree, don't propagate + * how = 1 => propagate; we know that nobody else has reference to any victims + * how = 2 => lazy umount */ -void umount_tree(struct mount *mnt, int propagate) +void umount_tree(struct mount *mnt, int how) { LIST_HEAD(tmp_list); struct mount *p; @@ -1180,7 +1199,7 @@ void umount_tree(struct mount *mnt, int propagate) for (p = mnt; p; p = next_mnt(p, mnt)) list_move(&p->mnt_hash, &tmp_list); - if (propagate) + if (how) propagate_umount(&tmp_list); list_for_each_entry(p, &tmp_list, mnt_hash) { @@ -1188,10 +1207,16 @@ void umount_tree(struct mount *mnt, int propagate) list_del_init(&p->mnt_list); __touch_mnt_namespace(p->mnt_ns); p->mnt_ns = NULL; + if (how < 2) + p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; list_del_init(&p->mnt_child); if (mnt_has_parent(p)) { - p->mnt_parent->mnt_ghosts++; put_mountpoint(p->mnt_mp); + /* move the reference to mountpoint into ->mnt_ex_mountpoint */ + p->mnt_ex_mountpoint.dentry = p->mnt_mountpoint; + p->mnt_ex_mountpoint.mnt = &p->mnt_parent->mnt; + p->mnt_mountpoint = p->mnt.mnt_root; + p->mnt_parent = p; p->mnt_mp = NULL; } change_mnt_propagation(p, MS_PRIVATE); @@ -1225,12 +1250,12 @@ static int do_umount(struct mount *mnt, int flags) * probably don't strictly need the lock here if we examined * all race cases, but it's a slowpath. */ - br_write_lock(&vfsmount_lock); + lock_mount_hash(); if (mnt_get_count(mnt) != 2) { - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); return -EBUSY; } - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); if (!xchg(&mnt->mnt_expiry_mark, 1)) return -EAGAIN; @@ -1272,19 +1297,23 @@ static int do_umount(struct mount *mnt, int flags) } namespace_lock(); - br_write_lock(&vfsmount_lock); + lock_mount_hash(); event++; - if (!(flags & MNT_DETACH)) - shrink_submounts(mnt); - - retval = -EBUSY; - if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) { + if (flags & MNT_DETACH) { if (!list_empty(&mnt->mnt_list)) - umount_tree(mnt, 1); + umount_tree(mnt, 2); retval = 0; + } else { + shrink_submounts(mnt); + retval = -EBUSY; + if (!propagate_mount_busy(mnt, 2)) { + if (!list_empty(&mnt->mnt_list)) + umount_tree(mnt, 1); + retval = 0; + } } - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); namespace_unlock(); return retval; } @@ -1427,18 +1456,18 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, q = clone_mnt(p, p->mnt.mnt_root, flag); if (IS_ERR(q)) goto out; - br_write_lock(&vfsmount_lock); + lock_mount_hash(); list_add_tail(&q->mnt_list, &res->mnt_list); attach_mnt(q, parent, p->mnt_mp); - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); } } return res; out: if (res) { - br_write_lock(&vfsmount_lock); + lock_mount_hash(); umount_tree(res, 0); - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); } return q; } @@ -1460,9 +1489,9 @@ struct vfsmount *collect_mounts(struct path *path) void drop_collected_mounts(struct vfsmount *mnt) { namespace_lock(); - br_write_lock(&vfsmount_lock); + lock_mount_hash(); umount_tree(real_mount(mnt), 0); - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); namespace_unlock(); } @@ -1589,7 +1618,7 @@ static int attach_recursive_mnt(struct mount *source_mnt, if (err) goto out_cleanup_ids; - br_write_lock(&vfsmount_lock); + lock_mount_hash(); if (IS_MNT_SHARED(dest_mnt)) { for (p = source_mnt; p; p = next_mnt(p, source_mnt)) @@ -1608,7 +1637,7 @@ static int attach_recursive_mnt(struct mount *source_mnt, list_del_init(&child->mnt_hash); commit_tree(child); } - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); return 0; @@ -1710,10 +1739,10 @@ static int do_change_type(struct path *path, int flag) goto out_unlock; } - br_write_lock(&vfsmount_lock); + lock_mount_hash(); for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL)) change_mnt_propagation(m, type); - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); out_unlock: namespace_unlock(); @@ -1785,9 +1814,9 @@ static int do_loopback(struct path *path, const char *old_name, err = graft_tree(mnt, parent, mp); if (err) { - br_write_lock(&vfsmount_lock); + lock_mount_hash(); umount_tree(mnt, 0); - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); } out2: unlock_mount(mp); @@ -1846,17 +1875,13 @@ static int do_remount(struct path *path, int flags, int mnt_flags, else err = do_remount_sb(sb, flags, data, 0); if (!err) { - br_write_lock(&vfsmount_lock); + lock_mount_hash(); mnt_flags |= mnt->mnt.mnt_flags & MNT_PROPAGATION_MASK; mnt->mnt.mnt_flags = mnt_flags; - br_write_unlock(&vfsmount_lock); - } - up_write(&sb->s_umount); - if (!err) { - br_write_lock(&vfsmount_lock); touch_mnt_namespace(mnt->mnt_ns); - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); } + up_write(&sb->s_umount); return err; } @@ -1972,7 +1997,7 @@ static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags) struct mount *parent; int err; - mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL); + mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | MNT_DOOMED | MNT_SYNC_UMOUNT); mp = lock_mount(path); if (IS_ERR(mp)) @@ -2077,9 +2102,7 @@ fail: /* remove m from any expiration list it may be on */ if (!list_empty(&mnt->mnt_expire)) { namespace_lock(); - br_write_lock(&vfsmount_lock); list_del_init(&mnt->mnt_expire); - br_write_unlock(&vfsmount_lock); namespace_unlock(); } mntput(m); @@ -2095,11 +2118,9 @@ fail: void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list) { namespace_lock(); - br_write_lock(&vfsmount_lock); list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list); - br_write_unlock(&vfsmount_lock); namespace_unlock(); } EXPORT_SYMBOL(mnt_set_expiry); @@ -2118,7 +2139,7 @@ void mark_mounts_for_expiry(struct list_head *mounts) return; namespace_lock(); - br_write_lock(&vfsmount_lock); + lock_mount_hash(); /* extract from the expiration list every vfsmount that matches the * following criteria: @@ -2137,7 +2158,7 @@ void mark_mounts_for_expiry(struct list_head *mounts) touch_mnt_namespace(mnt->mnt_ns); umount_tree(mnt, 1); } - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); namespace_unlock(); } @@ -2193,7 +2214,7 @@ resume: * process a list of expirable mountpoints with the intent of discarding any * submounts of a specific parent mountpoint * - * vfsmount_lock must be held for write + * mount_lock must be held for write */ static void shrink_submounts(struct mount *mnt) { @@ -2414,20 +2435,25 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns) return new_ns; } -/* - * Allocate a new namespace structure and populate it with contents - * copied from the namespace of the passed in task structure. - */ -static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, - struct user_namespace *user_ns, struct fs_struct *fs) +struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, + struct user_namespace *user_ns, struct fs_struct *new_fs) { struct mnt_namespace *new_ns; struct vfsmount *rootmnt = NULL, *pwdmnt = NULL; struct mount *p, *q; - struct mount *old = mnt_ns->root; + struct mount *old; struct mount *new; int copy_flags; + BUG_ON(!ns); + + if (likely(!(flags & CLONE_NEWNS))) { + get_mnt_ns(ns); + return ns; + } + + old = ns->root; + new_ns = alloc_mnt_ns(user_ns); if (IS_ERR(new_ns)) return new_ns; @@ -2435,7 +2461,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, namespace_lock(); /* First pass: copy the tree topology */ copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE; - if (user_ns != mnt_ns->user_ns) + if (user_ns != ns->user_ns) copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED; new = copy_tree(old, old->mnt.mnt_root, copy_flags); if (IS_ERR(new)) { @@ -2444,9 +2470,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, return ERR_CAST(new); } new_ns->root = new; - br_write_lock(&vfsmount_lock); list_add_tail(&new_ns->list, &new->mnt_list); - br_write_unlock(&vfsmount_lock); /* * Second pass: switch the tsk->fs->* elements and mark new vfsmounts @@ -2457,13 +2481,13 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, q = new; while (p) { q->mnt_ns = new_ns; - if (fs) { - if (&p->mnt == fs->root.mnt) { - fs->root.mnt = mntget(&q->mnt); + if (new_fs) { + if (&p->mnt == new_fs->root.mnt) { + new_fs->root.mnt = mntget(&q->mnt); rootmnt = &p->mnt; } - if (&p->mnt == fs->pwd.mnt) { - fs->pwd.mnt = mntget(&q->mnt); + if (&p->mnt == new_fs->pwd.mnt) { + new_fs->pwd.mnt = mntget(&q->mnt); pwdmnt = &p->mnt; } } @@ -2484,23 +2508,6 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, return new_ns; } -struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, - struct user_namespace *user_ns, struct fs_struct *new_fs) -{ - struct mnt_namespace *new_ns; - - BUG_ON(!ns); - get_mnt_ns(ns); - - if (!(flags & CLONE_NEWNS)) - return ns; - - new_ns = dup_mnt_ns(ns, user_ns, new_fs); - - put_mnt_ns(ns); - return new_ns; -} - /** * create_mnt_ns - creates a private namespace and adds a root filesystem * @mnt: pointer to the new root filesystem mountpoint @@ -2593,7 +2600,7 @@ out_type: /* * Return true if path is reachable from root * - * namespace_sem or vfsmount_lock is held + * namespace_sem or mount_lock is held */ bool is_path_reachable(struct mount *mnt, struct dentry *dentry, const struct path *root) @@ -2608,9 +2615,9 @@ bool is_path_reachable(struct mount *mnt, struct dentry *dentry, int path_is_under(struct path *path1, struct path *path2) { int res; - br_read_lock(&vfsmount_lock); + read_seqlock_excl(&mount_lock); res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2); - br_read_unlock(&vfsmount_lock); + read_sequnlock_excl(&mount_lock); return res; } EXPORT_SYMBOL(path_is_under); @@ -2701,7 +2708,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, if (!is_path_reachable(old_mnt, old.dentry, &new)) goto out4; root_mp->m_count++; /* pin it so it won't go away */ - br_write_lock(&vfsmount_lock); + lock_mount_hash(); detach_mnt(new_mnt, &parent_path); detach_mnt(root_mnt, &root_parent); if (root_mnt->mnt.mnt_flags & MNT_LOCKED) { @@ -2713,7 +2720,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, /* mount new_root on / */ attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp); touch_mnt_namespace(current->nsproxy->mnt_ns); - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); chroot_fs_refs(&root, &new); put_mountpoint(root_mp); error = 0; @@ -2767,8 +2774,6 @@ void __init mnt_init(void) unsigned u; int err; - init_rwsem(&namespace_sem); - mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount), 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); @@ -2785,8 +2790,6 @@ void __init mnt_init(void) for (u = 0; u < HASH_SIZE; u++) INIT_LIST_HEAD(&mountpoint_hashtable[u]); - br_lock_init(&vfsmount_lock); - err = sysfs_init(); if (err) printk(KERN_WARNING "%s: sysfs_init error: %d\n", @@ -2802,11 +2805,7 @@ void put_mnt_ns(struct mnt_namespace *ns) { if (!atomic_dec_and_test(&ns->count)) return; - namespace_lock(); - br_write_lock(&vfsmount_lock); - umount_tree(ns->root, 0); - br_write_unlock(&vfsmount_lock); - namespace_unlock(); + drop_collected_mounts(&ns->root->mnt); free_mnt_ns(ns); } @@ -2829,9 +2828,8 @@ void kern_unmount(struct vfsmount *mnt) { /* release long term mount so mount point can be released */ if (!IS_ERR_OR_NULL(mnt)) { - br_write_lock(&vfsmount_lock); real_mount(mnt)->mnt_ns = NULL; - br_write_unlock(&vfsmount_lock); + synchronize_rcu(); /* yecchhh... */ mntput(mnt); } } @@ -2875,7 +2873,7 @@ bool fs_fully_visible(struct file_system_type *type) if (unlikely(!ns)) return false; - namespace_lock(); + down_read(&namespace_sem); list_for_each_entry(mnt, &ns->list, mnt_list) { struct mount *child; if (mnt->mnt.mnt_sb->s_type != type) @@ -2896,7 +2894,7 @@ bool fs_fully_visible(struct file_system_type *type) next: ; } found: - namespace_unlock(); + up_read(&namespace_sem); return visible; } diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 3be047474bfc..c320ac52353e 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -339,9 +339,8 @@ ncp_lookup_validate(struct dentry *dentry, unsigned int flags) if (val) goto finished; - DDPRINTK("ncp_lookup_validate: %s/%s not valid, age=%ld, server lookup\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - NCP_GET_AGE(dentry)); + DDPRINTK("ncp_lookup_validate: %pd2 not valid, age=%ld, server lookup\n", + dentry, NCP_GET_AGE(dentry)); len = sizeof(__name); if (ncp_is_server_root(dir)) { @@ -359,8 +358,8 @@ ncp_lookup_validate(struct dentry *dentry, unsigned int flags) res = ncp_obtain_info(server, dir, __name, &(finfo.i)); } finfo.volume = finfo.i.volNumber; - DDPRINTK("ncp_lookup_validate: looked for %s/%s, res=%d\n", - dentry->d_parent->d_name.name, __name, res); + DDPRINTK("ncp_lookup_validate: looked for %pd/%s, res=%d\n", + dentry->d_parent, __name, res); /* * If we didn't find it, or if it has a different dirEntNum to * what we remember, it's not valid any more. @@ -454,8 +453,7 @@ static int ncp_readdir(struct file *file, struct dir_context *ctx) ctl.page = NULL; ctl.cache = NULL; - DDPRINTK("ncp_readdir: reading %s/%s, pos=%d\n", - dentry->d_parent->d_name.name, dentry->d_name.name, + DDPRINTK("ncp_readdir: reading %pD2, pos=%d\n", file, (int) ctx->pos); result = -EIO; @@ -740,12 +738,10 @@ ncp_do_readdir(struct file *file, struct dir_context *ctx, int more; size_t bufsize; - DPRINTK("ncp_do_readdir: %s/%s, fpos=%ld\n", - dentry->d_parent->d_name.name, dentry->d_name.name, + DPRINTK("ncp_do_readdir: %pD2, fpos=%ld\n", file, (unsigned long) ctx->pos); - PPRINTK("ncp_do_readdir: init %s, volnum=%d, dirent=%u\n", - dentry->d_name.name, NCP_FINFO(dir)->volNumber, - NCP_FINFO(dir)->dirEntNum); + PPRINTK("ncp_do_readdir: init %pD, volnum=%d, dirent=%u\n", + file, NCP_FINFO(dir)->volNumber, NCP_FINFO(dir)->dirEntNum); err = ncp_initialize_search(server, dir, &seq); if (err) { @@ -850,8 +846,7 @@ static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, unsig if (!ncp_conn_valid(server)) goto finished; - PPRINTK("ncp_lookup: server lookup for %s/%s\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + PPRINTK("ncp_lookup: server lookup for %pd2\n", dentry); len = sizeof(__name); if (ncp_is_server_root(dir)) { @@ -867,8 +862,7 @@ static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, unsig if (!res) res = ncp_obtain_info(server, dir, __name, &(finfo.i)); } - PPRINTK("ncp_lookup: looked for %s/%s, res=%d\n", - dentry->d_parent->d_name.name, __name, res); + PPRINTK("ncp_lookup: looked for %pd2, res=%d\n", dentry, res); /* * If we didn't find an entry, make a negative dentry. */ @@ -915,8 +909,7 @@ out: return error; out_close: - PPRINTK("ncp_instantiate: %s/%s failed, closing file\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + PPRINTK("ncp_instantiate: %pd2 failed, closing file\n", dentry); ncp_close_file(NCP_SERVER(dir), finfo->file_handle); goto out; } @@ -930,8 +923,7 @@ int ncp_create_new(struct inode *dir, struct dentry *dentry, umode_t mode, int opmode; __u8 __name[NCP_MAXPATHLEN + 1]; - PPRINTK("ncp_create_new: creating %s/%s, mode=%hx\n", - dentry->d_parent->d_name.name, dentry->d_name.name, mode); + PPRINTK("ncp_create_new: creating %pd2, mode=%hx\n", dentry, mode); ncp_age_dentry(server, dentry); len = sizeof(__name); @@ -960,8 +952,7 @@ int ncp_create_new(struct inode *dir, struct dentry *dentry, umode_t mode, error = -ENAMETOOLONG; else if (result < 0) error = result; - DPRINTK("ncp_create: %s/%s failed\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + DPRINTK("ncp_create: %pd2 failed\n", dentry); goto out; } opmode = O_WRONLY; @@ -994,8 +985,7 @@ static int ncp_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) int error, len; __u8 __name[NCP_MAXPATHLEN + 1]; - DPRINTK("ncp_mkdir: making %s/%s\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + DPRINTK("ncp_mkdir: making %pd2\n", dentry); ncp_age_dentry(server, dentry); len = sizeof(__name); @@ -1032,8 +1022,7 @@ static int ncp_rmdir(struct inode *dir, struct dentry *dentry) int error, result, len; __u8 __name[NCP_MAXPATHLEN + 1]; - DPRINTK("ncp_rmdir: removing %s/%s\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + DPRINTK("ncp_rmdir: removing %pd2\n", dentry); len = sizeof(__name); error = ncp_io2vol(server, __name, &len, dentry->d_name.name, @@ -1078,8 +1067,7 @@ static int ncp_unlink(struct inode *dir, struct dentry *dentry) int error; server = NCP_SERVER(dir); - DPRINTK("ncp_unlink: unlinking %s/%s\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + DPRINTK("ncp_unlink: unlinking %pd2\n", dentry); /* * Check whether to close the file ... @@ -1099,8 +1087,7 @@ static int ncp_unlink(struct inode *dir, struct dentry *dentry) #endif switch (error) { case 0x00: - DPRINTK("ncp: removed %s/%s\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + DPRINTK("ncp: removed %pd2\n", dentry); break; case 0x85: case 0x8A: @@ -1133,9 +1120,7 @@ static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry, int old_len, new_len; __u8 __old_name[NCP_MAXPATHLEN + 1], __new_name[NCP_MAXPATHLEN + 1]; - DPRINTK("ncp_rename: %s/%s to %s/%s\n", - old_dentry->d_parent->d_name.name, old_dentry->d_name.name, - new_dentry->d_parent->d_name.name, new_dentry->d_name.name); + DPRINTK("ncp_rename: %pd2 to %pd2\n", old_dentry, new_dentry); ncp_age_dentry(server, old_dentry); ncp_age_dentry(server, new_dentry); @@ -1165,8 +1150,8 @@ static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry, #endif switch (error) { case 0x00: - DPRINTK("ncp renamed %s -> %s.\n", - old_dentry->d_name.name,new_dentry->d_name.name); + DPRINTK("ncp renamed %pd -> %pd.\n", + old_dentry, new_dentry); break; case 0x9E: error = -ENAMETOOLONG; diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c index 122e260247f5..8f5074e1ecb9 100644 --- a/fs/ncpfs/file.c +++ b/fs/ncpfs/file.c @@ -107,8 +107,7 @@ ncp_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) void* freepage; size_t freelen; - DPRINTK("ncp_file_read: enter %s/%s\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + DPRINTK("ncp_file_read: enter %pd2\n", dentry); pos = *ppos; @@ -166,8 +165,7 @@ ncp_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) file_accessed(file); - DPRINTK("ncp_file_read: exit %s/%s\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + DPRINTK("ncp_file_read: exit %pd2\n", dentry); outrel: ncp_inode_close(inode); return already_read ? already_read : error; @@ -184,8 +182,7 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t * int errno; void* bouncebuffer; - DPRINTK("ncp_file_write: enter %s/%s\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + DPRINTK("ncp_file_write: enter %pd2\n", dentry); if ((ssize_t) count < 0) return -EINVAL; pos = *ppos; @@ -264,8 +261,7 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t * i_size_write(inode, pos); mutex_unlock(&inode->i_mutex); } - DPRINTK("ncp_file_write: exit %s/%s\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + DPRINTK("ncp_file_write: exit %pd2\n", dentry); outrel: ncp_inode_close(inode); return already_written ? already_written : errno; diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 4659da67e7f6..2cf2ebecb55f 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -782,6 +782,17 @@ out: return error; } +static void delayed_free(struct rcu_head *p) +{ + struct ncp_server *server = container_of(p, struct ncp_server, rcu); +#ifdef CONFIG_NCPFS_NLS + /* unload the NLS charsets */ + unload_nls(server->nls_vol); + unload_nls(server->nls_io); +#endif /* CONFIG_NCPFS_NLS */ + kfree(server); +} + static void ncp_put_super(struct super_block *sb) { struct ncp_server *server = NCP_SBP(sb); @@ -792,11 +803,6 @@ static void ncp_put_super(struct super_block *sb) ncp_stop_tasks(server); -#ifdef CONFIG_NCPFS_NLS - /* unload the NLS charsets */ - unload_nls(server->nls_vol); - unload_nls(server->nls_io); -#endif /* CONFIG_NCPFS_NLS */ mutex_destroy(&server->rcv.creq_mutex); mutex_destroy(&server->root_setup_lock); mutex_destroy(&server->mutex); @@ -813,8 +819,7 @@ static void ncp_put_super(struct super_block *sb) vfree(server->rxbuf); vfree(server->txbuf); vfree(server->packet); - sb->s_fs_info = NULL; - kfree(server); + call_rcu(&server->rcu, delayed_free); } static int ncp_statfs(struct dentry *dentry, struct kstatfs *buf) diff --git a/fs/ncpfs/ncp_fs_sb.h b/fs/ncpfs/ncp_fs_sb.h index c51b2c543539..b81e97adc5a9 100644 --- a/fs/ncpfs/ncp_fs_sb.h +++ b/fs/ncpfs/ncp_fs_sb.h @@ -38,7 +38,7 @@ struct ncp_mount_data_kernel { }; struct ncp_server { - + struct rcu_head rcu; struct ncp_mount_data_kernel m; /* Nearly all of the mount data is of interest for us later, so we store it completely. */ diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 9a8676f33350..812154aff981 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -98,9 +98,7 @@ nfs_opendir(struct inode *inode, struct file *filp) struct nfs_open_dir_context *ctx; struct rpc_cred *cred; - dfprintk(FILE, "NFS: open dir(%s/%s)\n", - filp->f_path.dentry->d_parent->d_name.name, - filp->f_path.dentry->d_name.name); + dfprintk(FILE, "NFS: open dir(%pD2)\n", filp); nfs_inc_stats(inode, NFSIOS_VFSOPEN); @@ -297,11 +295,10 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des if (ctx->duped > 0 && ctx->dup_cookie == *desc->dir_cookie) { if (printk_ratelimit()) { - pr_notice("NFS: directory %s/%s contains a readdir loop." + pr_notice("NFS: directory %pD2 contains a readdir loop." "Please contact your server vendor. " "The file: %s has duplicate cookie %llu\n", - desc->file->f_dentry->d_parent->d_name.name, - desc->file->f_dentry->d_name.name, + desc->file, array->array[i].string.name, *desc->dir_cookie); } @@ -822,9 +819,8 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) struct nfs_open_dir_context *dir_ctx = file->private_data; int res = 0; - dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - (long long)ctx->pos); + dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n", + file, (long long)ctx->pos); nfs_inc_stats(inode, NFSIOS_VFSGETDENTS); /* @@ -880,22 +876,17 @@ out: nfs_unblock_sillyrename(dentry); if (res > 0) res = 0; - dfprintk(FILE, "NFS: readdir(%s/%s) returns %d\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - res); + dfprintk(FILE, "NFS: readdir(%pD2) returns %d\n", file, res); return res; } static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence) { - struct dentry *dentry = filp->f_path.dentry; - struct inode *inode = dentry->d_inode; + struct inode *inode = file_inode(filp); struct nfs_open_dir_context *dir_ctx = filp->private_data; - dfprintk(FILE, "NFS: llseek dir(%s/%s, %lld, %d)\n", - dentry->d_parent->d_name.name, - dentry->d_name.name, - offset, whence); + dfprintk(FILE, "NFS: llseek dir(%pD2, %lld, %d)\n", + filp, offset, whence); mutex_lock(&inode->i_mutex); switch (whence) { @@ -925,15 +916,12 @@ out: static int nfs_fsync_dir(struct file *filp, loff_t start, loff_t end, int datasync) { - struct dentry *dentry = filp->f_path.dentry; - struct inode *inode = dentry->d_inode; + struct inode *inode = file_inode(filp); - dfprintk(FILE, "NFS: fsync dir(%s/%s) datasync %d\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - datasync); + dfprintk(FILE, "NFS: fsync dir(%pD2) datasync %d\n", filp, datasync); mutex_lock(&inode->i_mutex); - nfs_inc_stats(dentry->d_inode, NFSIOS_VFSFSYNC); + nfs_inc_stats(inode, NFSIOS_VFSFSYNC); mutex_unlock(&inode->i_mutex); return 0; } @@ -1073,9 +1061,8 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) } if (is_bad_inode(inode)) { - dfprintk(LOOKUPCACHE, "%s: %s/%s has dud inode\n", - __func__, dentry->d_parent->d_name.name, - dentry->d_name.name); + dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n", + __func__, dentry); goto out_bad; } @@ -1125,9 +1112,8 @@ out_set_verifier: nfs_advise_use_readdirplus(dir); out_valid_noent: dput(parent); - dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n", - __func__, dentry->d_parent->d_name.name, - dentry->d_name.name); + dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n", + __func__, dentry); return 1; out_zap_parent: nfs_zap_caches(dir); @@ -1153,18 +1139,16 @@ out_zap_parent: goto out_valid; dput(parent); - dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n", - __func__, dentry->d_parent->d_name.name, - dentry->d_name.name); + dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n", + __func__, dentry); return 0; out_error: nfs_free_fattr(fattr); nfs_free_fhandle(fhandle); nfs4_label_free(label); dput(parent); - dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) lookup returned error %d\n", - __func__, dentry->d_parent->d_name.name, - dentry->d_name.name, error); + dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) lookup returned error %d\n", + __func__, dentry, error); return error; } @@ -1188,16 +1172,14 @@ static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags) * eventually need to do something more here. */ if (!inode) { - dfprintk(LOOKUPCACHE, "%s: %s/%s has negative inode\n", - __func__, dentry->d_parent->d_name.name, - dentry->d_name.name); + dfprintk(LOOKUPCACHE, "%s: %pd2 has negative inode\n", + __func__, dentry); return 1; } if (is_bad_inode(inode)) { - dfprintk(LOOKUPCACHE, "%s: %s/%s has dud inode\n", - __func__, dentry->d_parent->d_name.name, - dentry->d_name.name); + dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n", + __func__, dentry); return 0; } @@ -1212,9 +1194,8 @@ static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags) */ static int nfs_dentry_delete(const struct dentry *dentry) { - dfprintk(VFS, "NFS: dentry_delete(%s/%s, %x)\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - dentry->d_flags); + dfprintk(VFS, "NFS: dentry_delete(%pd2, %x)\n", + dentry, dentry->d_flags); /* Unhash any dentry with a stale inode */ if (dentry->d_inode != NULL && NFS_STALE(dentry->d_inode)) @@ -1292,8 +1273,7 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in struct nfs4_label *label = NULL; int error; - dfprintk(VFS, "NFS: lookup(%s/%s)\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + dfprintk(VFS, "NFS: lookup(%pd2)\n", dentry); nfs_inc_stats(dir, NFSIOS_VFSLOOKUP); res = ERR_PTR(-ENAMETOOLONG); @@ -1424,8 +1404,8 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, /* Expect a negative dentry */ BUG_ON(dentry->d_inode); - dfprintk(VFS, "NFS: atomic_open(%s/%ld), %s\n", - dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); + dfprintk(VFS, "NFS: atomic_open(%s/%ld), %pd\n", + dir->i_sb->s_id, dir->i_ino, dentry); err = nfs_check_flags(open_flags); if (err) @@ -1614,8 +1594,8 @@ int nfs_create(struct inode *dir, struct dentry *dentry, int open_flags = excl ? O_CREAT | O_EXCL : O_CREAT; int error; - dfprintk(VFS, "NFS: create(%s/%ld), %s\n", - dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); + dfprintk(VFS, "NFS: create(%s/%ld), %pd\n", + dir->i_sb->s_id, dir->i_ino, dentry); attr.ia_mode = mode; attr.ia_valid = ATTR_MODE; @@ -1641,8 +1621,8 @@ nfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) struct iattr attr; int status; - dfprintk(VFS, "NFS: mknod(%s/%ld), %s\n", - dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); + dfprintk(VFS, "NFS: mknod(%s/%ld), %pd\n", + dir->i_sb->s_id, dir->i_ino, dentry); if (!new_valid_dev(rdev)) return -EINVAL; @@ -1670,8 +1650,8 @@ int nfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) struct iattr attr; int error; - dfprintk(VFS, "NFS: mkdir(%s/%ld), %s\n", - dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); + dfprintk(VFS, "NFS: mkdir(%s/%ld), %pd\n", + dir->i_sb->s_id, dir->i_ino, dentry); attr.ia_valid = ATTR_MODE; attr.ia_mode = mode | S_IFDIR; @@ -1698,8 +1678,8 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry) { int error; - dfprintk(VFS, "NFS: rmdir(%s/%ld), %s\n", - dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); + dfprintk(VFS, "NFS: rmdir(%s/%ld), %pd\n", + dir->i_sb->s_id, dir->i_ino, dentry); trace_nfs_rmdir_enter(dir, dentry); if (dentry->d_inode) { @@ -1734,8 +1714,7 @@ static int nfs_safe_remove(struct dentry *dentry) struct inode *inode = dentry->d_inode; int error = -EBUSY; - dfprintk(VFS, "NFS: safe_remove(%s/%s)\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + dfprintk(VFS, "NFS: safe_remove(%pd2)\n", dentry); /* If the dentry was sillyrenamed, we simply call d_delete() */ if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { @@ -1768,8 +1747,8 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry) int error; int need_rehash = 0; - dfprintk(VFS, "NFS: unlink(%s/%ld, %s)\n", dir->i_sb->s_id, - dir->i_ino, dentry->d_name.name); + dfprintk(VFS, "NFS: unlink(%s/%ld, %pd)\n", dir->i_sb->s_id, + dir->i_ino, dentry); trace_nfs_unlink_enter(dir, dentry); spin_lock(&dentry->d_lock); @@ -1819,8 +1798,8 @@ int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) unsigned int pathlen = strlen(symname); int error; - dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s)\n", dir->i_sb->s_id, - dir->i_ino, dentry->d_name.name, symname); + dfprintk(VFS, "NFS: symlink(%s/%ld, %pd, %s)\n", dir->i_sb->s_id, + dir->i_ino, dentry, symname); if (pathlen > PAGE_SIZE) return -ENAMETOOLONG; @@ -1842,9 +1821,9 @@ int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr); trace_nfs_symlink_exit(dir, dentry, error); if (error != 0) { - dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s) error %d\n", + dfprintk(VFS, "NFS: symlink(%s/%ld, %pd, %s) error %d\n", dir->i_sb->s_id, dir->i_ino, - dentry->d_name.name, symname, error); + dentry, symname, error); d_drop(dentry); __free_page(page); return error; @@ -1871,9 +1850,8 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) struct inode *inode = old_dentry->d_inode; int error; - dfprintk(VFS, "NFS: link(%s/%s -> %s/%s)\n", - old_dentry->d_parent->d_name.name, old_dentry->d_name.name, - dentry->d_parent->d_name.name, dentry->d_name.name); + dfprintk(VFS, "NFS: link(%pd2 -> %pd2)\n", + old_dentry, dentry); trace_nfs_link_enter(inode, dir, dentry); NFS_PROTO(inode)->return_delegation(inode); @@ -1921,9 +1899,8 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct dentry *dentry = NULL, *rehash = NULL; int error = -EBUSY; - dfprintk(VFS, "NFS: rename(%s/%s -> %s/%s, ct=%d)\n", - old_dentry->d_parent->d_name.name, old_dentry->d_name.name, - new_dentry->d_parent->d_name.name, new_dentry->d_name.name, + dfprintk(VFS, "NFS: rename(%pd2 -> %pd2, ct=%d)\n", + old_dentry, new_dentry, d_count(new_dentry)); trace_nfs_rename_enter(old_dir, old_dentry, new_dir, new_dentry); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 91ff089d3412..d71d66c9e0a1 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -124,9 +124,8 @@ static inline int put_dreq(struct nfs_direct_req *dreq) ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs) { #ifndef CONFIG_NFS_SWAP - dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n", - iocb->ki_filp->f_path.dentry->d_name.name, - (long long) pos, nr_segs); + dprintk("NFS: nfs_direct_IO (%pD) off/no(%Ld/%lu) EINVAL\n", + iocb->ki_filp, (long long) pos, nr_segs); return -EINVAL; #else @@ -909,10 +908,8 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, count = iov_length(iov, nr_segs); nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count); - dfprintk(FILE, "NFS: direct read(%s/%s, %zd@%Ld)\n", - file->f_path.dentry->d_parent->d_name.name, - file->f_path.dentry->d_name.name, - count, (long long) pos); + dfprintk(FILE, "NFS: direct read(%pD2, %zd@%Ld)\n", + file, count, (long long) pos); retval = 0; if (!count) @@ -965,10 +962,8 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, count = iov_length(iov, nr_segs); nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count); - dfprintk(FILE, "NFS: direct write(%s/%s, %zd@%Ld)\n", - file->f_path.dentry->d_parent->d_name.name, - file->f_path.dentry->d_name.name, - count, (long long) pos); + dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n", + file, count, (long long) pos); retval = generic_write_checks(file, &pos, &count, 0); if (retval) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 1e6bfdbc1aff..e2fcacf07de3 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -65,9 +65,7 @@ nfs_file_open(struct inode *inode, struct file *filp) { int res; - dprintk("NFS: open file(%s/%s)\n", - filp->f_path.dentry->d_parent->d_name.name, - filp->f_path.dentry->d_name.name); + dprintk("NFS: open file(%pD2)\n", filp); nfs_inc_stats(inode, NFSIOS_VFSOPEN); res = nfs_check_flags(filp->f_flags); @@ -81,9 +79,7 @@ nfs_file_open(struct inode *inode, struct file *filp) int nfs_file_release(struct inode *inode, struct file *filp) { - dprintk("NFS: release(%s/%s)\n", - filp->f_path.dentry->d_parent->d_name.name, - filp->f_path.dentry->d_name.name); + dprintk("NFS: release(%pD2)\n", filp); nfs_inc_stats(inode, NFSIOS_VFSRELEASE); return nfs_release(inode, filp); @@ -123,10 +119,8 @@ force_reval: loff_t nfs_file_llseek(struct file *filp, loff_t offset, int whence) { - dprintk("NFS: llseek file(%s/%s, %lld, %d)\n", - filp->f_path.dentry->d_parent->d_name.name, - filp->f_path.dentry->d_name.name, - offset, whence); + dprintk("NFS: llseek file(%pD2, %lld, %d)\n", + filp, offset, whence); /* * whence == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate @@ -150,12 +144,9 @@ EXPORT_SYMBOL_GPL(nfs_file_llseek); int nfs_file_flush(struct file *file, fl_owner_t id) { - struct dentry *dentry = file->f_path.dentry; - struct inode *inode = dentry->d_inode; + struct inode *inode = file_inode(file); - dprintk("NFS: flush(%s/%s)\n", - dentry->d_parent->d_name.name, - dentry->d_name.name); + dprintk("NFS: flush(%pD2)\n", file); nfs_inc_stats(inode, NFSIOS_VFSFLUSH); if ((file->f_mode & FMODE_WRITE) == 0) @@ -177,15 +168,14 @@ ssize_t nfs_file_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { - struct dentry * dentry = iocb->ki_filp->f_path.dentry; - struct inode * inode = dentry->d_inode; + struct inode *inode = file_inode(iocb->ki_filp); ssize_t result; if (iocb->ki_filp->f_flags & O_DIRECT) return nfs_file_direct_read(iocb, iov, nr_segs, pos, true); - dprintk("NFS: read(%s/%s, %lu@%lu)\n", - dentry->d_parent->d_name.name, dentry->d_name.name, + dprintk("NFS: read(%pD2, %lu@%lu)\n", + iocb->ki_filp, (unsigned long) iov_length(iov, nr_segs), (unsigned long) pos); result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); @@ -203,13 +193,11 @@ nfs_file_splice_read(struct file *filp, loff_t *ppos, struct pipe_inode_info *pipe, size_t count, unsigned int flags) { - struct dentry *dentry = filp->f_path.dentry; - struct inode *inode = dentry->d_inode; + struct inode *inode = file_inode(filp); ssize_t res; - dprintk("NFS: splice_read(%s/%s, %lu@%Lu)\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - (unsigned long) count, (unsigned long long) *ppos); + dprintk("NFS: splice_read(%pD2, %lu@%Lu)\n", + filp, (unsigned long) count, (unsigned long long) *ppos); res = nfs_revalidate_mapping(inode, filp->f_mapping); if (!res) { @@ -224,12 +212,10 @@ EXPORT_SYMBOL_GPL(nfs_file_splice_read); int nfs_file_mmap(struct file * file, struct vm_area_struct * vma) { - struct dentry *dentry = file->f_path.dentry; - struct inode *inode = dentry->d_inode; + struct inode *inode = file_inode(file); int status; - dprintk("NFS: mmap(%s/%s)\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + dprintk("NFS: mmap(%pD2)\n", file); /* Note: generic_file_mmap() returns ENOSYS on nommu systems * so we call that before revalidating the mapping @@ -258,15 +244,12 @@ EXPORT_SYMBOL_GPL(nfs_file_mmap); int nfs_file_fsync_commit(struct file *file, loff_t start, loff_t end, int datasync) { - struct dentry *dentry = file->f_path.dentry; struct nfs_open_context *ctx = nfs_file_open_context(file); - struct inode *inode = dentry->d_inode; + struct inode *inode = file_inode(file); int have_error, do_resend, status; int ret = 0; - dprintk("NFS: fsync file(%s/%s) datasync %d\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - datasync); + dprintk("NFS: fsync file(%pD2) datasync %d\n", file, datasync); nfs_inc_stats(inode, NFSIOS_VFSFSYNC); do_resend = test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags); @@ -371,10 +354,8 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping, struct page *page; int once_thru = 0; - dfprintk(PAGECACHE, "NFS: write_begin(%s/%s(%ld), %u@%lld)\n", - file->f_path.dentry->d_parent->d_name.name, - file->f_path.dentry->d_name.name, - mapping->host->i_ino, len, (long long) pos); + dfprintk(PAGECACHE, "NFS: write_begin(%pD2(%ld), %u@%lld)\n", + file, mapping->host->i_ino, len, (long long) pos); start: /* @@ -414,10 +395,8 @@ static int nfs_write_end(struct file *file, struct address_space *mapping, struct nfs_open_context *ctx = nfs_file_open_context(file); int status; - dfprintk(PAGECACHE, "NFS: write_end(%s/%s(%ld), %u@%lld)\n", - file->f_path.dentry->d_parent->d_name.name, - file->f_path.dentry->d_name.name, - mapping->host->i_ino, len, (long long) pos); + dfprintk(PAGECACHE, "NFS: write_end(%pD2(%ld), %u@%lld)\n", + file, mapping->host->i_ino, len, (long long) pos); /* * Zero any uninitialised parts of the page, and then mark the page @@ -601,22 +580,21 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { struct page *page = vmf->page; struct file *filp = vma->vm_file; - struct dentry *dentry = filp->f_path.dentry; + struct inode *inode = file_inode(filp); unsigned pagelen; int ret = VM_FAULT_NOPAGE; struct address_space *mapping; - dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%s/%s(%ld), offset %lld)\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - filp->f_mapping->host->i_ino, + dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%pD2(%ld), offset %lld)\n", + filp, filp->f_mapping->host->i_ino, (long long)page_offset(page)); /* make sure the cache has finished storing the page */ - nfs_fscache_wait_on_page_write(NFS_I(dentry->d_inode), page); + nfs_fscache_wait_on_page_write(NFS_I(inode), page); lock_page(page); mapping = page_file_mapping(page); - if (mapping != dentry->d_inode->i_mapping) + if (mapping != inode->i_mapping) goto out_unlock; wait_on_page_writeback(page); @@ -659,22 +637,21 @@ static int nfs_need_sync_write(struct file *filp, struct inode *inode) ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { - struct dentry * dentry = iocb->ki_filp->f_path.dentry; - struct inode * inode = dentry->d_inode; + struct file *file = iocb->ki_filp; + struct inode *inode = file_inode(file); unsigned long written = 0; ssize_t result; size_t count = iov_length(iov, nr_segs); - result = nfs_key_timeout_notify(iocb->ki_filp, inode); + result = nfs_key_timeout_notify(file, inode); if (result) return result; - if (iocb->ki_filp->f_flags & O_DIRECT) + if (file->f_flags & O_DIRECT) return nfs_file_direct_write(iocb, iov, nr_segs, pos, true); - dprintk("NFS: write(%s/%s, %lu@%Ld)\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - (unsigned long) count, (long long) pos); + dprintk("NFS: write(%pD2, %lu@%Ld)\n", + file, (unsigned long) count, (long long) pos); result = -EBUSY; if (IS_SWAPFILE(inode)) @@ -682,8 +659,8 @@ ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, /* * O_APPEND implies that we must revalidate the file length. */ - if (iocb->ki_filp->f_flags & O_APPEND) { - result = nfs_revalidate_file_size(inode, iocb->ki_filp); + if (file->f_flags & O_APPEND) { + result = nfs_revalidate_file_size(inode, file); if (result) goto out; } @@ -697,8 +674,8 @@ ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, written = result; /* Return error values for O_DSYNC and IS_SYNC() */ - if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) { - int err = vfs_fsync(iocb->ki_filp, 0); + if (result >= 0 && nfs_need_sync_write(file, inode)) { + int err = vfs_fsync(file, 0); if (err < 0) result = err; } @@ -717,14 +694,12 @@ ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe, struct file *filp, loff_t *ppos, size_t count, unsigned int flags) { - struct dentry *dentry = filp->f_path.dentry; - struct inode *inode = dentry->d_inode; + struct inode *inode = file_inode(filp); unsigned long written = 0; ssize_t ret; - dprintk("NFS splice_write(%s/%s, %lu@%llu)\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - (unsigned long) count, (unsigned long long) *ppos); + dprintk("NFS splice_write(%pD2, %lu@%llu)\n", + filp, (unsigned long) count, (unsigned long long) *ppos); /* * The combination of splice and an O_APPEND destination is disallowed. @@ -883,10 +858,8 @@ int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) int ret = -ENOLCK; int is_local = 0; - dprintk("NFS: lock(%s/%s, t=%x, fl=%x, r=%lld:%lld)\n", - filp->f_path.dentry->d_parent->d_name.name, - filp->f_path.dentry->d_name.name, - fl->fl_type, fl->fl_flags, + dprintk("NFS: lock(%pD2, t=%x, fl=%x, r=%lld:%lld)\n", + filp, fl->fl_type, fl->fl_flags, (long long)fl->fl_start, (long long)fl->fl_end); nfs_inc_stats(inode, NFSIOS_VFSLOCK); @@ -923,10 +896,8 @@ int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) struct inode *inode = filp->f_mapping->host; int is_local = 0; - dprintk("NFS: flock(%s/%s, t=%x, fl=%x)\n", - filp->f_path.dentry->d_parent->d_name.name, - filp->f_path.dentry->d_name.name, - fl->fl_type, fl->fl_flags); + dprintk("NFS: flock(%pD2, t=%x, fl=%x)\n", + filp, fl->fl_type, fl->fl_flags); if (!(fl->fl_flags & FL_FLOCK)) return -ENOLCK; @@ -960,9 +931,7 @@ EXPORT_SYMBOL_GPL(nfs_flock); */ int nfs_setlease(struct file *file, long arg, struct file_lock **fl) { - dprintk("NFS: setlease(%s/%s, arg=%ld)\n", - file->f_path.dentry->d_parent->d_name.name, - file->f_path.dentry->d_name.name, arg); + dprintk("NFS: setlease(%pD2, arg=%ld)\n", file, arg); return -EINVAL; } EXPORT_SYMBOL_GPL(nfs_setlease); diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 348b535cd786..b5a0afc3ee10 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -253,9 +253,8 @@ struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh, dprintk("--> nfs_do_submount()\n"); - dprintk("%s: submounting on %s/%s\n", __func__, - dentry->d_parent->d_name.name, - dentry->d_name.name); + dprintk("%s: submounting on %pd2\n", __func__, + dentry); if (page == NULL) goto out; devname = nfs_devname(dentry, page, PAGE_SIZE); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 90cb10d7b693..01b6f6a49d16 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -321,7 +321,7 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, umode_t mode = sattr->ia_mode; int status = -ENOMEM; - dprintk("NFS call create %s\n", dentry->d_name.name); + dprintk("NFS call create %pd\n", dentry); data = nfs3_alloc_createdata(); if (data == NULL) @@ -548,7 +548,7 @@ nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page, if (len > NFS3_MAXPATHLEN) return -ENAMETOOLONG; - dprintk("NFS call symlink %s\n", dentry->d_name.name); + dprintk("NFS call symlink %pd\n", dentry); data = nfs3_alloc_createdata(); if (data == NULL) @@ -576,7 +576,7 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) umode_t mode = sattr->ia_mode; int status = -ENOMEM; - dprintk("NFS call mkdir %s\n", dentry->d_name.name); + dprintk("NFS call mkdir %pd\n", dentry); sattr->ia_mode &= ~current_umask(); @@ -695,7 +695,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, umode_t mode = sattr->ia_mode; int status = -ENOMEM; - dprintk("NFS call mknod %s %u:%u\n", dentry->d_name.name, + dprintk("NFS call mknod %pd %u:%u\n", dentry, MAJOR(rdev), MINOR(rdev)); sattr->ia_mode &= ~current_umask(); diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 1f01b55692ee..8de3407e0360 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -31,9 +31,7 @@ nfs4_file_open(struct inode *inode, struct file *filp) * -EOPENSTALE. The VFS will retry the lookup/create/open. */ - dprintk("NFS: open file(%s/%s)\n", - dentry->d_parent->d_name.name, - dentry->d_name.name); + dprintk("NFS: open file(%pd2)\n", dentry); if ((openflags & O_ACCMODE) == 3) openflags--; diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index c08cbf40c59e..4e7f05d3e9db 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -292,8 +292,7 @@ static struct vfsmount *nfs_follow_referral(struct dentry *dentry, if (locations == NULL || locations->nlocations <= 0) goto out; - dprintk("%s: referral at %s/%s\n", __func__, - dentry->d_parent->d_name.name, dentry->d_name.name); + dprintk("%s: referral at %pd2\n", __func__, dentry); page = (char *) __get_free_page(GFP_USER); if (!page) @@ -357,8 +356,8 @@ static struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry * mnt = ERR_PTR(-ENOENT); parent = dget_parent(dentry); - dprintk("%s: getting locations for %s/%s\n", - __func__, parent->d_name.name, dentry->d_name.name); + dprintk("%s: getting locations for %pd2\n", + __func__, dentry); err = nfs4_proc_fs_locations(client, parent->d_inode, &dentry->d_name, fs_locations, page); dput(parent); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5ab33c0792df..659990c0109e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3771,9 +3771,8 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, }; int status; - dprintk("%s: dentry = %s/%s, cookie = %Lu\n", __func__, - dentry->d_parent->d_name.name, - dentry->d_name.name, + dprintk("%s: dentry = %pd2, cookie = %Lu\n", __func__, + dentry, (unsigned long long)cookie); nfs4_setup_readdir(cookie, NFS_I(dir)->cookieverf, dentry, &args); res.pgbase = args.pgbase; diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index a8f57c728df5..fddbba2d9eff 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -235,7 +235,7 @@ nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, }; int status = -ENOMEM; - dprintk("NFS call create %s\n", dentry->d_name.name); + dprintk("NFS call create %pd\n", dentry); data = nfs_alloc_createdata(dir, dentry, sattr); if (data == NULL) goto out; @@ -265,7 +265,7 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, umode_t mode; int status = -ENOMEM; - dprintk("NFS call mknod %s\n", dentry->d_name.name); + dprintk("NFS call mknod %pd\n", dentry); mode = sattr->ia_mode; if (S_ISFIFO(mode)) { @@ -423,7 +423,7 @@ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page, }; int status = -ENAMETOOLONG; - dprintk("NFS call symlink %s\n", dentry->d_name.name); + dprintk("NFS call symlink %pd\n", dentry); if (len > NFS2_MAXPATHLEN) goto out; @@ -462,7 +462,7 @@ nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) }; int status = -ENOMEM; - dprintk("NFS call mkdir %s\n", dentry->d_name.name); + dprintk("NFS call mkdir %pd\n", dentry); data = nfs_alloc_createdata(dir, dentry, sattr); if (data == NULL) goto out; diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 0c29b1bb3936..11d78944de79 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -495,9 +495,8 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) struct rpc_task *task; int error = -EBUSY; - dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - d_count(dentry)); + dfprintk(VFS, "NFS: silly-rename(%pd2, ct=%d)\n", + dentry, d_count(dentry)); nfs_inc_stats(dir, NFSIOS_SILLYRENAME); /* @@ -521,8 +520,8 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) SILLYNAME_FILEID_LEN, fileid, SILLYNAME_COUNTER_LEN, sillycounter); - dfprintk(VFS, "NFS: trying to rename %s to %s\n", - dentry->d_name.name, silly); + dfprintk(VFS, "NFS: trying to rename %pd to %s\n", + dentry, silly); sdentry = lookup_one_len(silly, dentry->d_parent, slen); /* diff --git a/fs/nfs/write.c b/fs/nfs/write.c index ac1dc331ba31..c1d548211c31 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -954,10 +954,8 @@ int nfs_updatepage(struct file *file, struct page *page, nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE); - dprintk("NFS: nfs_updatepage(%s/%s %d@%lld)\n", - file->f_path.dentry->d_parent->d_name.name, - file->f_path.dentry->d_name.name, count, - (long long)(page_file_offset(page) + offset)); + dprintk("NFS: nfs_updatepage(%pD2 %d@%lld)\n", + file, count, (long long)(page_file_offset(page) + offset)); if (nfs_can_extend_write(file, page, inode)) { count = max(count + offset, nfs_page_length(page)); diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index e0a65a9e37e9..9c271f42604a 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -385,8 +385,8 @@ purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn) status = vfs_rmdir(parent->d_inode, child); if (status) - printk("failed to remove client recovery directory %s\n", - child->d_name.name); + printk("failed to remove client recovery directory %pd\n", + child); /* Keep trying, success or failure: */ return 0; } @@ -410,15 +410,15 @@ out: nfs4_release_reclaim(nn); if (status) printk("nfsd4: failed to purge old clients from recovery" - " directory %s\n", nn->rec_file->f_path.dentry->d_name.name); + " directory %pD\n", nn->rec_file); } static int load_recdir(struct dentry *parent, struct dentry *child, struct nfsd_net *nn) { if (child->d_name.len != HEXDIR_LEN - 1) { - printk("nfsd4: illegal name %s in recovery directory\n", - child->d_name.name); + printk("nfsd4: illegal name %pd in recovery directory\n", + child); /* Keep trying; maybe the others are OK: */ return 0; } @@ -437,7 +437,7 @@ nfsd4_recdir_load(struct net *net) { status = nfsd4_list_rec_dir(load_recdir, nn); if (status) printk("nfsd4: failed loading clients from recovery" - " directory %s\n", nn->rec_file->f_path.dentry->d_name.name); + " directory %pD\n", nn->rec_file); return status; } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 0874998a49cd..f36a30a9f2d1 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3008,7 +3008,7 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int f return NULL; locks_init_lock(fl); fl->fl_lmops = &nfsd_lease_mng_ops; - fl->fl_flags = FL_LEASE; + fl->fl_flags = FL_DELEG; fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; fl->fl_end = OFFSET_MAX; fl->fl_owner = (fl_owner_t)(dp->dl_file); @@ -3843,9 +3843,8 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfs4_ol_stateid *stp; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - dprintk("NFSD: nfsd4_open_confirm on file %.*s\n", - (int)cstate->current_fh.fh_dentry->d_name.len, - cstate->current_fh.fh_dentry->d_name.name); + dprintk("NFSD: nfsd4_open_confirm on file %pd\n", + cstate->current_fh.fh_dentry); status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0); if (status) @@ -3922,9 +3921,8 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, struct nfs4_ol_stateid *stp; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - dprintk("NFSD: nfsd4_open_downgrade on file %.*s\n", - (int)cstate->current_fh.fh_dentry->d_name.len, - cstate->current_fh.fh_dentry->d_name.name); + dprintk("NFSD: nfsd4_open_downgrade on file %pd\n", + cstate->current_fh.fh_dentry); /* We don't yet support WANT bits: */ if (od->od_deleg_want) @@ -3980,9 +3978,8 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct net *net = SVC_NET(rqstp); struct nfsd_net *nn = net_generic(net, nfsd_net_id); - dprintk("NFSD: nfsd4_close on file %.*s\n", - (int)cstate->current_fh.fh_dentry->d_name.len, - cstate->current_fh.fh_dentry->d_name.name); + dprintk("NFSD: nfsd4_close on file %pd\n", + cstate->current_fh.fh_dentry); nfs4_lock_state(); status = nfs4_preprocess_seqid_op(cstate, close->cl_seqid, diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 814afaa4458a..3d0e15ae6f72 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -47,7 +47,7 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry) tdentry = parent; } if (tdentry != exp->ex_path.dentry) - dprintk("nfsd_acceptable failed at %p %s\n", tdentry, tdentry->d_name.name); + dprintk("nfsd_acceptable failed at %p %pd\n", tdentry, tdentry); rv = (tdentry == exp->ex_path.dentry); dput(tdentry); return rv; @@ -253,8 +253,8 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) if (S_ISDIR(dentry->d_inode->i_mode) && (dentry->d_flags & DCACHE_DISCONNECTED)) { - printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %s/%s\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %pd2\n", + dentry); } fhp->fh_dentry = dentry; @@ -361,10 +361,9 @@ skip_pseudoflavor_check: error = nfsd_permission(rqstp, exp, dentry, access); if (error) { - dprintk("fh_verify: %s/%s permission failure, " + dprintk("fh_verify: %pd2 permission failure, " "acc=%x, error=%d\n", - dentry->d_parent->d_name.name, - dentry->d_name.name, + dentry, access, ntohl(error)); } out: @@ -514,14 +513,13 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, */ struct inode * inode = dentry->d_inode; - struct dentry *parent = dentry->d_parent; __u32 *datap; dev_t ex_dev = exp_sb(exp)->s_dev; - dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %s/%s, ino=%ld)\n", + dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %pd2, ino=%ld)\n", MAJOR(ex_dev), MINOR(ex_dev), (long) exp->ex_path.dentry->d_inode->i_ino, - parent->d_name.name, dentry->d_name.name, + dentry, (inode ? inode->i_ino : 0)); /* Choose filehandle version and fsid type based on @@ -534,13 +532,13 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, fh_put(ref_fh); if (fhp->fh_locked || fhp->fh_dentry) { - printk(KERN_ERR "fh_compose: fh %s/%s not initialized!\n", - parent->d_name.name, dentry->d_name.name); + printk(KERN_ERR "fh_compose: fh %pd2 not initialized!\n", + dentry); } if (fhp->fh_maxsize < NFS_FHSIZE) - printk(KERN_ERR "fh_compose: called with maxsize %d! %s/%s\n", + printk(KERN_ERR "fh_compose: called with maxsize %d! %pd2\n", fhp->fh_maxsize, - parent->d_name.name, dentry->d_name.name); + dentry); fhp->fh_dentry = dget(dentry); /* our internal copy */ fhp->fh_export = exp; @@ -613,8 +611,8 @@ out_bad: printk(KERN_ERR "fh_update: fh not verified!\n"); goto out; out_negative: - printk(KERN_ERR "fh_update: %s/%s still negative!\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + printk(KERN_ERR "fh_update: %pd2 still negative!\n", + dentry); goto out; } diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index e5e6707ba687..4775bc4896c8 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -173,8 +173,8 @@ fh_lock_nested(struct svc_fh *fhp, unsigned int subclass) BUG_ON(!dentry); if (fhp->fh_locked) { - printk(KERN_WARNING "fh_lock: %s/%s already locked!\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + printk(KERN_WARNING "fh_lock: %pd2 already locked!\n", + dentry); return; } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index c827acb0e943..94b5f5d2bfed 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -427,7 +427,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, goto out_nfserr; fh_lock(fhp); - host_err = notify_change(dentry, iap); + host_err = notify_change(dentry, iap, NULL); err = nfserrno(host_err); fh_unlock(fhp); } @@ -988,7 +988,11 @@ static void kill_suid(struct dentry *dentry) ia.ia_valid = ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; mutex_lock(&dentry->d_inode->i_mutex); - notify_change(dentry, &ia); + /* + * Note we call this on write, so notify_change will not + * encounter any conflicting delegations: + */ + notify_change(dentry, &ia, NULL); mutex_unlock(&dentry->d_inode->i_mutex); } @@ -1317,9 +1321,8 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, if (!fhp->fh_locked) { /* not actually possible */ printk(KERN_ERR - "nfsd_create: parent %s/%s not locked!\n", - dentry->d_parent->d_name.name, - dentry->d_name.name); + "nfsd_create: parent %pd2 not locked!\n", + dentry); err = nfserr_io; goto out; } @@ -1329,8 +1332,8 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, */ err = nfserr_exist; if (dchild->d_inode) { - dprintk("nfsd_create: dentry %s/%s not negative!\n", - dentry->d_name.name, dchild->d_name.name); + dprintk("nfsd_create: dentry %pd/%pd not negative!\n", + dentry, dchild); goto out; } @@ -1737,7 +1740,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, err = nfserrno(host_err); goto out_dput; } - host_err = vfs_link(dold, dirp, dnew); + host_err = vfs_link(dold, dirp, dnew, NULL); if (!host_err) { err = nfserrno(commit_metadata(ffhp)); if (!err) @@ -1838,7 +1841,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, if (host_err) goto out_dput_new; } - host_err = vfs_rename(fdir, odentry, tdir, ndentry); + host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL); if (!host_err) { host_err = commit_metadata(tfhp); if (!host_err) @@ -1911,7 +1914,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, if (host_err) goto out_put; if (type != S_IFDIR) - host_err = vfs_unlink(dirp, rdentry); + host_err = vfs_unlink(dirp, rdentry, NULL); else host_err = vfs_rmdir(dirp, rdentry); if (!host_err) diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 2778b0255dc6..ffb9b3675736 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -55,7 +55,7 @@ * * Return 1 if the attributes match and 0 if not. * - * NOTE: This function runs with the inode->i_lock spin lock held so it is not + * NOTE: This function runs with the inode_hash_lock spin lock held so it is not * allowed to sleep. */ int ntfs_test_inode(struct inode *vi, ntfs_attr *na) diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 17e6bdde96c5..dc7411fe185d 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -1025,7 +1025,7 @@ static int ocfs2_create_new_meta_bhs(handle_t *handle, for(i = count; i < (num_got + count); i++) { bhs[i] = sb_getblk(osb->sb, first_blkno); if (bhs[i] == NULL) { - status = -EIO; + status = -ENOMEM; mlog_errno(status); goto bail; } diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index f37d3c0e2053..aeb44e879c51 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -80,6 +80,7 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock, if ((u64)iblock >= ocfs2_clusters_to_blocks(inode->i_sb, le32_to_cpu(fe->i_clusters))) { + err = -ENOMEM; mlog(ML_ERROR, "block offset is outside the allocated size: " "%llu\n", (unsigned long long)iblock); goto bail; @@ -92,6 +93,7 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock, iblock; buffer_cache_bh = sb_getblk(osb->sb, blkno); if (!buffer_cache_bh) { + err = -ENOMEM; mlog(ML_ERROR, "couldn't getblock for symlink!\n"); goto bail; } @@ -592,26 +594,11 @@ static void ocfs2_dio_end_io(struct kiocb *iocb, ocfs2_rw_unlock(inode, level); } -/* - * ocfs2_invalidatepage() and ocfs2_releasepage() are shamelessly stolen - * from ext3. PageChecked() bits have been removed as OCFS2 does not - * do journalled data. - */ -static void ocfs2_invalidatepage(struct page *page, unsigned int offset, - unsigned int length) -{ - journal_t *journal = OCFS2_SB(page->mapping->host->i_sb)->journal->j_journal; - - jbd2_journal_invalidatepage(journal, page, offset, length); -} - static int ocfs2_releasepage(struct page *page, gfp_t wait) { - journal_t *journal = OCFS2_SB(page->mapping->host->i_sb)->journal->j_journal; - if (!page_has_buffers(page)) return 0; - return jbd2_journal_try_to_free_buffers(journal, page, wait); + return try_to_free_buffers(page); } static ssize_t ocfs2_direct_IO(int rw, @@ -1802,8 +1789,7 @@ try_again: data_ac->ac_resv = &OCFS2_I(inode)->ip_la_data_resv; credits = ocfs2_calc_extend_credits(inode->i_sb, - &di->id2.i_list, - clusters_to_alloc); + &di->id2.i_list); } @@ -1897,10 +1883,14 @@ out_commit: out: ocfs2_free_write_ctxt(wc); - if (data_ac) + if (data_ac) { ocfs2_free_alloc_context(data_ac); - if (meta_ac) + data_ac = NULL; + } + if (meta_ac) { ocfs2_free_alloc_context(meta_ac); + meta_ac = NULL; + } if (ret == -ENOSPC && try_free) { /* @@ -2087,7 +2077,7 @@ const struct address_space_operations ocfs2_aops = { .write_end = ocfs2_write_end, .bmap = ocfs2_bmap, .direct_IO = ocfs2_direct_IO, - .invalidatepage = ocfs2_invalidatepage, + .invalidatepage = block_invalidatepage, .releasepage = ocfs2_releasepage, .migratepage = buffer_migrate_page, .is_partially_uptodate = block_is_partially_uptodate, diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index 5d18ad10c27f..5b704c63a103 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c @@ -115,7 +115,7 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, if (bhs[i] == NULL) { bhs[i] = sb_getblk(osb->sb, block++); if (bhs[i] == NULL) { - status = -EIO; + status = -ENOMEM; mlog_errno(status); goto bail; } @@ -214,7 +214,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, bhs[i] = sb_getblk(sb, block++); if (bhs[i] == NULL) { ocfs2_metadata_cache_io_unlock(ci); - status = -EIO; + status = -ENOMEM; mlog_errno(status); goto bail; } diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 363f0dcc924f..73920ffda05b 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -35,6 +35,7 @@ #include <linux/time.h> #include <linux/debugfs.h> #include <linux/slab.h> +#include <linux/bitmap.h> #include "heartbeat.h" #include "tcp.h" @@ -282,15 +283,6 @@ struct o2hb_bio_wait_ctxt { int wc_error; }; -static int o2hb_pop_count(void *map, int count) -{ - int i = -1, pop = 0; - - while ((i = find_next_bit(map, count, i + 1)) < count) - pop++; - return pop; -} - static void o2hb_write_timeout(struct work_struct *work) { int failed, quorum; @@ -307,9 +299,9 @@ static void o2hb_write_timeout(struct work_struct *work) spin_lock_irqsave(&o2hb_live_lock, flags); if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap)) set_bit(reg->hr_region_num, o2hb_failed_region_bitmap); - failed = o2hb_pop_count(&o2hb_failed_region_bitmap, + failed = bitmap_weight(o2hb_failed_region_bitmap, O2NM_MAX_REGIONS); - quorum = o2hb_pop_count(&o2hb_quorum_region_bitmap, + quorum = bitmap_weight(o2hb_quorum_region_bitmap, O2NM_MAX_REGIONS); spin_unlock_irqrestore(&o2hb_live_lock, flags); @@ -765,7 +757,7 @@ static void o2hb_set_quorum_device(struct o2hb_region *reg) * If global heartbeat active, unpin all regions if the * region count > CUT_OFF */ - if (o2hb_pop_count(&o2hb_quorum_region_bitmap, + if (bitmap_weight(o2hb_quorum_region_bitmap, O2NM_MAX_REGIONS) > O2HB_PIN_CUT_OFF) o2hb_region_unpin(NULL); unlock: @@ -954,23 +946,9 @@ out: return changed; } -/* This could be faster if we just implmented a find_last_bit, but I - * don't think the circumstances warrant it. */ -static int o2hb_highest_node(unsigned long *nodes, - int numbits) +static int o2hb_highest_node(unsigned long *nodes, int numbits) { - int highest, node; - - highest = numbits; - node = -1; - while ((node = find_next_bit(nodes, numbits, node + 1)) != -1) { - if (node >= numbits) - break; - - highest = node; - } - - return highest; + return find_last_bit(nodes, numbits); } static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) @@ -1829,7 +1807,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, live_threshold = O2HB_LIVE_THRESHOLD; if (o2hb_global_heartbeat_active()) { spin_lock(&o2hb_live_lock); - if (o2hb_pop_count(&o2hb_region_bitmap, O2NM_MAX_REGIONS) == 1) + if (bitmap_weight(o2hb_region_bitmap, O2NM_MAX_REGIONS) == 1) live_threshold <<= 1; spin_unlock(&o2hb_live_lock); } @@ -2180,7 +2158,7 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group, if (!o2hb_dependent_users) goto unlock; - if (o2hb_pop_count(&o2hb_quorum_region_bitmap, + if (bitmap_weight(o2hb_quorum_region_bitmap, O2NM_MAX_REGIONS) <= O2HB_PIN_CUT_OFF) o2hb_region_pin(NULL); @@ -2480,7 +2458,7 @@ static int o2hb_region_inc_user(const char *region_uuid) if (o2hb_dependent_users > 1) goto unlock; - if (o2hb_pop_count(&o2hb_quorum_region_bitmap, + if (bitmap_weight(o2hb_quorum_region_bitmap, O2NM_MAX_REGIONS) <= O2HB_PIN_CUT_OFF) ret = o2hb_region_pin(NULL); diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h index baa2b9ef7eef..2260fb9e6508 100644 --- a/fs/ocfs2/cluster/masklog.h +++ b/fs/ocfs2/cluster/masklog.h @@ -199,7 +199,8 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits; #define mlog_errno(st) do { \ int _st = (st); \ if (_st != -ERESTARTSYS && _st != -EINTR && \ - _st != AOP_TRUNCATED_PAGE && _st != -ENOSPC) \ + _st != AOP_TRUNCATED_PAGE && _st != -ENOSPC && \ + _st != -EDQUOT) \ mlog(ML_ERROR, "status = %lld\n", (long long)_st); \ } while (0) diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 30544ce8e9f7..91a7e85ac8fd 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -2349,7 +2349,7 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb, dx_root_bh = sb_getblk(osb->sb, dr_blkno); if (dx_root_bh == NULL) { - ret = -EIO; + ret = -ENOMEM; goto out; } ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), dx_root_bh); @@ -2422,7 +2422,7 @@ static int ocfs2_dx_dir_format_cluster(struct ocfs2_super *osb, for (i = 0; i < num_dx_leaves; i++) { bh = sb_getblk(osb->sb, start_blk + i); if (bh == NULL) { - ret = -EIO; + ret = -ENOMEM; goto out; } dx_leaves[i] = bh; @@ -2929,7 +2929,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off); dirdata_bh = sb_getblk(sb, blkno); if (!dirdata_bh) { - ret = -EIO; + ret = -ENOMEM; mlog_errno(ret); goto out_commit; } @@ -3159,7 +3159,7 @@ static int ocfs2_do_extend_dir(struct super_block *sb, *new_bh = sb_getblk(sb, p_blkno); if (!*new_bh) { - status = -EIO; + status = -ENOMEM; mlog_errno(status); goto bail; } @@ -3284,7 +3284,7 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb, if (ocfs2_dir_resv_allowed(osb)) data_ac->ac_resv = &OCFS2_I(dir)->ip_la_data_resv; - credits = ocfs2_calc_extend_credits(sb, el, 1); + credits = ocfs2_calc_extend_credits(sb, el); } else { spin_unlock(&OCFS2_I(dir)->ip_lock); credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS; @@ -3716,7 +3716,7 @@ static int ocfs2_dx_dir_rebalance_credits(struct ocfs2_super *osb, { int credits = ocfs2_clusters_to_blocks(osb->sb, 2); - credits += ocfs2_calc_extend_credits(osb->sb, &dx_root->dr_list, 1); + credits += ocfs2_calc_extend_credits(osb->sb, &dx_root->dr_list); credits += ocfs2_quota_trans_credits(osb->sb); return credits; } diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index cf0f103963b1..af3f7aa73e13 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -1885,8 +1885,10 @@ ok: * up nodes that this node contacted */ while ((nn = find_next_bit (mle->response_map, O2NM_MAX_NODES, nn+1)) < O2NM_MAX_NODES) { - if (nn != dlm->node_num && nn != assert->node_idx) + if (nn != dlm->node_num && nn != assert->node_idx) { master_request = 1; + break; + } } } mle->master = assert->node_idx; @@ -2354,6 +2356,10 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm, assert_spin_locked(&res->spinlock); + /* delay migration when the lockres is in MIGRATING state */ + if (res->state & DLM_LOCK_RES_MIGRATING) + return 0; + if (res->owner != dlm->node_num) return 0; diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 0b5adca1b178..7035af09cc03 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -1886,6 +1886,13 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, if (ml->type == LKM_NLMODE) goto skip_lvb; + /* + * If the lock is in the blocked list it can't have a valid lvb, + * so skip it + */ + if (ml->list == DLM_BLOCKED_LIST) + goto skip_lvb; + if (!dlm_lvb_is_empty(mres->lvb)) { if (lksb->flags & DLM_LKSB_PUT_LVB) { /* other node was trying to update diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index d71903c6068b..6fff128cad16 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -580,7 +580,7 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start, int did_quota = 0; /* - * This function only exists for file systems which don't + * Unwritten extent only exists for file systems which * support holes. */ BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb)); @@ -603,8 +603,7 @@ restart_all: goto leave; } - credits = ocfs2_calc_extend_credits(osb->sb, &fe->id2.i_list, - clusters_to_add); + credits = ocfs2_calc_extend_credits(osb->sb, &fe->id2.i_list); handle = ocfs2_start_trans(osb, credits); if (IS_ERR(handle)) { status = PTR_ERR(handle); diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index f87f9bd1edff..f29a90fde619 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -386,19 +386,9 @@ static int ocfs2_read_locked_inode(struct inode *inode, u32 generation = 0; status = -EINVAL; - if (inode == NULL || inode->i_sb == NULL) { - mlog(ML_ERROR, "bad inode\n"); - return status; - } sb = inode->i_sb; osb = OCFS2_SB(sb); - if (!args) { - mlog(ML_ERROR, "bad inode args\n"); - make_bad_inode(inode); - return status; - } - /* * To improve performance of cold-cache inode stats, we take * the cluster lock here if possible. diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 0b479bab3671..9ff4e8cf9d97 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -524,8 +524,7 @@ static inline int ocfs2_calc_dxi_expand_credits(struct super_block *sb) * the result may be wrong. */ static inline int ocfs2_calc_extend_credits(struct super_block *sb, - struct ocfs2_extent_list *root_el, - u32 bits_wanted) + struct ocfs2_extent_list *root_el) { int bitmap_blocks, sysfile_bitmap_blocks, extent_blocks; diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index 3d3f3c83065c..631a98213474 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c @@ -201,8 +201,7 @@ static int ocfs2_lock_allocators_move_extents(struct inode *inode, } } - *credits += ocfs2_calc_extend_credits(osb->sb, et->et_root_el, - clusters_to_move + 2); + *credits += ocfs2_calc_extend_credits(osb->sb, et->et_root_el); mlog(0, "reserve metadata_blocks: %d, data_clusters: %u, credits: %d\n", extra_blocks, clusters_to_move, *credits); @@ -1067,8 +1066,10 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp) if (status) return status; - if ((!S_ISREG(inode->i_mode)) || !(filp->f_mode & FMODE_WRITE)) + if ((!S_ISREG(inode->i_mode)) || !(filp->f_mode & FMODE_WRITE)) { + status = -EPERM; goto out_drop; + } if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) { status = -EPERM; @@ -1090,8 +1091,10 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp) goto out_free; } - if (range.me_start > i_size_read(inode)) + if (range.me_start > i_size_read(inode)) { + status = -EINVAL; goto out_free; + } if (range.me_start + range.me_len > i_size_read(inode)) range.me_len = i_size_read(inode) - range.me_start; diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index be3f8676a438..4f791f6d27d0 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -489,7 +489,7 @@ static int __ocfs2_mknod_locked(struct inode *dir, *new_fe_bh = sb_getblk(osb->sb, fe_blkno); if (!*new_fe_bh) { - status = -EIO; + status = -ENOMEM; mlog_errno(status); goto leave; } diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index bf4dfc14bb2c..55767e1ba724 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -612,6 +612,11 @@ static int ocfs2_create_refcount_tree(struct inode *inode, } new_bh = sb_getblk(inode->i_sb, first_blkno); + if (!new_bh) { + ret = -ENOMEM; + mlog_errno(ret); + goto out_commit; + } ocfs2_set_new_buffer_uptodate(&new_tree->rf_ci, new_bh); ret = ocfs2_journal_access_rb(handle, &new_tree->rf_ci, new_bh, @@ -1310,7 +1315,7 @@ static int ocfs2_expand_inline_ref_root(handle_t *handle, new_bh = sb_getblk(sb, blkno); if (new_bh == NULL) { - ret = -EIO; + ret = -ENOMEM; mlog_errno(ret); goto out; } @@ -1561,7 +1566,7 @@ static int ocfs2_new_leaf_refcount_block(handle_t *handle, new_bh = sb_getblk(sb, blkno); if (new_bh == NULL) { - ret = -EIO; + ret = -ENOMEM; mlog_errno(ret); goto out; } @@ -2502,8 +2507,7 @@ static int ocfs2_calc_refcount_meta_credits(struct super_block *sb, ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh); *meta_add += ocfs2_extend_meta_needed(et.et_root_el); *credits += ocfs2_calc_extend_credits(sb, - et.et_root_el, - ref_blocks); + et.et_root_el); } else { *credits += OCFS2_EXPAND_REFCOUNT_TREE_CREDITS; *meta_add += 1; @@ -2874,8 +2878,7 @@ static int ocfs2_lock_refcount_allocators(struct super_block *sb, meta_add = ocfs2_extend_meta_needed(et->et_root_el); - *credits += ocfs2_calc_extend_credits(sb, et->et_root_el, - num_clusters + 2); + *credits += ocfs2_calc_extend_credits(sb, et->et_root_el); ret = ocfs2_calc_refcount_meta_credits(sb, ref_ci, ref_root_bh, p_cluster, num_clusters, @@ -3031,7 +3034,7 @@ int ocfs2_duplicate_clusters_by_jbd(handle_t *handle, for (i = 0; i < blocks; i++, old_block++, new_block++) { new_bh = sb_getblk(osb->sb, new_block); if (new_bh == NULL) { - ret = -EIO; + ret = -ENOMEM; mlog_errno(ret); break; } @@ -3625,8 +3628,7 @@ int ocfs2_refcounted_xattr_delete_need(struct inode *inode, ocfs2_init_refcount_extent_tree(&et, ref_ci, ref_root_bh); *credits += ocfs2_calc_extend_credits(inode->i_sb, - et.et_root_el, - ref_blocks); + et.et_root_el); } out: diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c index ec55add7604a..822ebc10f281 100644 --- a/fs/ocfs2/resize.c +++ b/fs/ocfs2/resize.c @@ -469,6 +469,7 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input) struct ocfs2_chain_list *cl; struct ocfs2_chain_rec *cr; u16 cl_bpc; + u64 bg_ptr; if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) return -EROFS; @@ -513,7 +514,7 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input) ret = ocfs2_verify_group_and_input(main_bm_inode, fe, input, group_bh); if (ret) { mlog_errno(ret); - goto out_unlock; + goto out_free_group_bh; } trace_ocfs2_group_add((unsigned long long)input->group, @@ -523,7 +524,7 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input) if (IS_ERR(handle)) { mlog_errno(PTR_ERR(handle)); ret = -EINVAL; - goto out_unlock; + goto out_free_group_bh; } cl_bpc = le16_to_cpu(fe->id2.i_chain.cl_bpc); @@ -538,12 +539,14 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input) } group = (struct ocfs2_group_desc *)group_bh->b_data; + bg_ptr = le64_to_cpu(group->bg_next_group); group->bg_next_group = cr->c_blkno; ocfs2_journal_dirty(handle, group_bh); ret = ocfs2_journal_access_di(handle, INODE_CACHE(main_bm_inode), main_bm_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret < 0) { + group->bg_next_group = cpu_to_le64(bg_ptr); mlog_errno(ret); goto out_commit; } @@ -574,8 +577,11 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input) out_commit: ocfs2_commit_trans(osb, handle); -out_unlock: + +out_free_group_bh: brelse(group_bh); + +out_unlock: brelse(main_bm_bh); ocfs2_inode_unlock(main_bm_inode, 1); diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 39abf89697ed..cb7ec0b63ddc 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -643,7 +643,7 @@ error: #define FS_OCFS2_NM 1 -static ctl_table ocfs2_nm_table[] = { +static struct ctl_table ocfs2_nm_table[] = { { .procname = "hb_ctl_path", .data = ocfs2_hb_ctl_path, @@ -654,7 +654,7 @@ static ctl_table ocfs2_nm_table[] = { { } }; -static ctl_table ocfs2_mod_table[] = { +static struct ctl_table ocfs2_mod_table[] = { { .procname = "nm", .data = NULL, @@ -665,7 +665,7 @@ static ctl_table ocfs2_mod_table[] = { { } }; -static ctl_table ocfs2_kern_table[] = { +static struct ctl_table ocfs2_kern_table[] = { { .procname = "ocfs2", .data = NULL, @@ -676,7 +676,7 @@ static ctl_table ocfs2_kern_table[] = { { } }; -static ctl_table ocfs2_root_table[] = { +static struct ctl_table ocfs2_root_table[] = { { .procname = "fs", .data = NULL, diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 5397c07ce608..2c91452c4047 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -481,7 +481,7 @@ ocfs2_block_group_alloc_contig(struct ocfs2_super *osb, handle_t *handle, bg_bh = sb_getblk(osb->sb, bg_blkno); if (!bg_bh) { - status = -EIO; + status = -ENOMEM; mlog_errno(status); goto bail; } @@ -661,7 +661,7 @@ ocfs2_block_group_alloc_discontig(handle_t *handle, bg_bh = sb_getblk(osb->sb, bg_blkno); if (!bg_bh) { - status = -EIO; + status = -ENOMEM; mlog_errno(status); goto bail; } diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index d4e81e4a9b04..c41492957aa5 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1848,8 +1848,8 @@ static int ocfs2_get_sector(struct super_block *sb, *bh = sb_getblk(sb, block); if (!*bh) { - mlog_errno(-EIO); - return -EIO; + mlog_errno(-ENOMEM); + return -ENOMEM; } lock_buffer(*bh); if (!buffer_dirty(*bh)) diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 6ce0686eab72..f0a1326d9bba 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -377,7 +377,7 @@ static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket, bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb, xb_blkno + i); if (!bucket->bu_bhs[i]) { - rc = -EIO; + rc = -ENOMEM; mlog_errno(rc); break; } @@ -754,8 +754,7 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode, BUG_ON(why == RESTART_META); credits = ocfs2_calc_extend_credits(inode->i_sb, - &vb->vb_xv->xr_list, - clusters_to_add); + &vb->vb_xv->xr_list); status = ocfs2_extend_trans(handle, credits); if (status < 0) { status = -ENOMEM; @@ -2865,6 +2864,12 @@ static int ocfs2_create_xattr_block(struct inode *inode, } new_bh = sb_getblk(inode->i_sb, first_blkno); + if (!new_bh) { + ret = -ENOMEM; + mlog_errno(ret); + goto end; + } + ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh); ret = ocfs2_journal_access_xb(ctxt->handle, INODE_CACHE(inode), @@ -3040,8 +3045,7 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode, if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { clusters_add += new_clusters; credits += ocfs2_calc_extend_credits(inode->i_sb, - &def_xv.xv.xr_list, - new_clusters); + &def_xv.xv.xr_list); } goto meta_guess; @@ -3106,8 +3110,7 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode, if (!ocfs2_xattr_is_local(xe)) credits += ocfs2_calc_extend_credits( inode->i_sb, - &def_xv.xv.xr_list, - new_clusters); + &def_xv.xv.xr_list); goto out; } } @@ -3132,9 +3135,7 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode, meta_add += ocfs2_extend_meta_needed(&xv->xr_list); clusters_add += new_clusters - old_clusters; credits += ocfs2_calc_extend_credits(inode->i_sb, - &xv->xr_list, - new_clusters - - old_clusters); + &xv->xr_list); if (value_size >= OCFS2_XATTR_ROOT_SIZE) goto out; } @@ -3180,7 +3181,7 @@ meta_guess: &xb->xb_attrs.xb_root.xt_list; meta_add += ocfs2_extend_meta_needed(el); credits += ocfs2_calc_extend_credits(inode->i_sb, - el, 1); + el); } else credits += OCFS2_SUBALLOC_ALLOC + 1; @@ -6216,8 +6217,7 @@ static int ocfs2_value_metas_in_xattr_header(struct super_block *sb, le16_to_cpu(xv->xr_list.l_next_free_rec); *credits += ocfs2_calc_extend_credits(sb, - &def_xv.xv.xr_list, - le32_to_cpu(xv->xr_clusters)); + &def_xv.xv.xr_list); /* * If the value is a tree with depth > 1, We don't go deep @@ -6782,7 +6782,7 @@ static int ocfs2_lock_reflink_xattr_rec_allocators( metas.num_metas += ocfs2_extend_meta_needed(xt_et->et_root_el); *credits += ocfs2_calc_extend_credits(osb->sb, - xt_et->et_root_el, len); + xt_et->et_root_el); if (metas.num_metas) { ret = ocfs2_reserve_new_metadata_blocks(osb, metas.num_metas, diff --git a/fs/open.c b/fs/open.c index d420331ca32a..4b3e1edf2fe4 100644 --- a/fs/open.c +++ b/fs/open.c @@ -57,7 +57,8 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, newattrs.ia_valid |= ret | ATTR_FORCE; mutex_lock(&dentry->d_inode->i_mutex); - ret = notify_change(dentry, &newattrs); + /* Note any delegations or leases have already been broken: */ + ret = notify_change(dentry, &newattrs, NULL); mutex_unlock(&dentry->d_inode->i_mutex); return ret; } @@ -464,21 +465,28 @@ out: static int chmod_common(struct path *path, umode_t mode) { struct inode *inode = path->dentry->d_inode; + struct inode *delegated_inode = NULL; struct iattr newattrs; int error; error = mnt_want_write(path->mnt); if (error) return error; +retry_deleg: mutex_lock(&inode->i_mutex); error = security_path_chmod(path, mode); if (error) goto out_unlock; newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; - error = notify_change(path->dentry, &newattrs); + error = notify_change(path->dentry, &newattrs, &delegated_inode); out_unlock: mutex_unlock(&inode->i_mutex); + if (delegated_inode) { + error = break_deleg_wait(&delegated_inode); + if (!error) + goto retry_deleg; + } mnt_drop_write(path->mnt); return error; } @@ -522,6 +530,7 @@ SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode) static int chown_common(struct path *path, uid_t user, gid_t group) { struct inode *inode = path->dentry->d_inode; + struct inode *delegated_inode = NULL; int error; struct iattr newattrs; kuid_t uid; @@ -546,12 +555,17 @@ static int chown_common(struct path *path, uid_t user, gid_t group) if (!S_ISDIR(inode->i_mode)) newattrs.ia_valid |= ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; +retry_deleg: mutex_lock(&inode->i_mutex); error = security_path_chown(path, uid, gid); if (!error) - error = notify_change(path->dentry, &newattrs); + error = notify_change(path->dentry, &newattrs, &delegated_inode); mutex_unlock(&inode->i_mutex); - + if (delegated_inode) { + error = break_deleg_wait(&delegated_inode); + if (!error) + goto retry_deleg; + } return error; } @@ -685,7 +699,6 @@ static int do_dentry_open(struct file *f, } f->f_mapping = inode->i_mapping; - file_sb_list_add(f, inode->i_sb); if (unlikely(f->f_mode & FMODE_PATH)) { f->f_op = &empty_fops; @@ -693,6 +706,10 @@ static int do_dentry_open(struct file *f, } f->f_op = fops_get(inode->i_fop); + if (unlikely(WARN_ON(!f->f_op))) { + error = -ENODEV; + goto cleanup_all; + } error = security_file_open(f, cred); if (error) @@ -702,7 +719,7 @@ static int do_dentry_open(struct file *f, if (error) goto cleanup_all; - if (!open && f->f_op) + if (!open) open = f->f_op->open; if (open) { error = open(inode, f); @@ -720,7 +737,6 @@ static int do_dentry_open(struct file *f, cleanup_all: fops_put(f->f_op); - file_sb_list_del(f); if (f->f_mode & FMODE_WRITE) { put_write_access(inode); if (!special_file(inode->i_mode)) { @@ -1023,7 +1039,7 @@ int filp_close(struct file *filp, fl_owner_t id) return 0; } - if (filp->f_op && filp->f_op->flush) + if (filp->f_op->flush) retval = filp->f_op->flush(filp, id); if (likely(!(filp->f_mode & FMODE_PATH))) { diff --git a/fs/pnode.c b/fs/pnode.c index 9af0df15256e..c7221bb19801 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -264,12 +264,12 @@ int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp, prev_src_mnt = child; } out: - br_write_lock(&vfsmount_lock); + lock_mount_hash(); while (!list_empty(&tmp_list)) { child = list_first_entry(&tmp_list, struct mount, mnt_hash); umount_tree(child, 0); } - br_write_unlock(&vfsmount_lock); + unlock_mount_hash(); return ret; } @@ -278,8 +278,7 @@ out: */ static inline int do_refcount_check(struct mount *mnt, int count) { - int mycount = mnt_get_count(mnt) - mnt->mnt_ghosts; - return (mycount > count); + return mnt_get_count(mnt) > count; } /* @@ -311,7 +310,7 @@ int propagate_mount_busy(struct mount *mnt, int refcnt) for (m = propagation_next(parent, parent); m; m = propagation_next(m, parent)) { - child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint, 0); + child = __lookup_mnt_last(&m->mnt, mnt->mnt_mountpoint); if (child && list_empty(&child->mnt_mounts) && (ret = do_refcount_check(child, 1))) break; @@ -333,8 +332,8 @@ static void __propagate_umount(struct mount *mnt) for (m = propagation_next(parent, parent); m; m = propagation_next(m, parent)) { - struct mount *child = __lookup_mnt(&m->mnt, - mnt->mnt_mountpoint, 0); + struct mount *child = __lookup_mnt_last(&m->mnt, + mnt->mnt_mountpoint); /* * umount the child only if the child has no * other children diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig index 15af6222f8a4..2183fcf41d59 100644 --- a/fs/proc/Kconfig +++ b/fs/proc/Kconfig @@ -31,6 +31,10 @@ config PROC_FS config PROC_KCORE bool "/proc/kcore support" if !ARM depends on PROC_FS && MMU + help + Provides a virtual ELF core file of the live kernel. This can + be read with gdb and other ELF tools. No modifications can be + made using this mechanism. config PROC_VMCORE bool "/proc/vmcore support" diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 8eaa1ba793fc..28955d4b7218 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -285,19 +285,23 @@ static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma) return rv; } -static unsigned long proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr, unsigned long len, unsigned long pgoff, unsigned long flags) +static unsigned long +proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr, + unsigned long len, unsigned long pgoff, + unsigned long flags) { struct proc_dir_entry *pde = PDE(file_inode(file)); unsigned long rv = -EIO; - unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long) = NULL; + unsigned long (*get_area)(struct file *, unsigned long, unsigned long, + unsigned long, unsigned long) = NULL; if (use_pde(pde)) { #ifdef CONFIG_MMU - get_unmapped_area = current->mm->get_unmapped_area; + get_area = current->mm->get_unmapped_area; #endif if (pde->proc_fops->get_unmapped_area) - get_unmapped_area = pde->proc_fops->get_unmapped_area; - if (get_unmapped_area) - rv = get_unmapped_area(file, orig_addr, len, pgoff, flags); + get_area = pde->proc_fops->get_unmapped_area; + if (get_area) + rv = get_area(file, orig_addr, len, pgoff, flags); unuse_pde(pde); } return rv; diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 06ea155e1a59..5ed0e52d6aa0 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -255,8 +255,7 @@ static int kcore_update_ram(void) end_pfn = 0; for_each_node_state(nid, N_MEMORY) { unsigned long node_end; - node_end = NODE_DATA(nid)->node_start_pfn + - NODE_DATA(nid)->node_spanned_pages; + node_end = node_end_pfn(nid); if (end_pfn < node_end) end_pfn = node_end; } diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 59d85d608898..c805d5b69ba1 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -24,7 +24,6 @@ static int meminfo_proc_show(struct seq_file *m, void *v) { struct sysinfo i; unsigned long committed; - unsigned long allowed; struct vmalloc_info vmi; long cached; unsigned long pages[NR_LRU_LISTS]; @@ -37,8 +36,6 @@ static int meminfo_proc_show(struct seq_file *m, void *v) si_meminfo(&i); si_swapinfo(&i); committed = percpu_counter_read_positive(&vm_committed_as); - allowed = ((totalram_pages - hugetlb_total_pages()) - * sysctl_overcommit_ratio / 100) + total_swap_pages; cached = global_page_state(NR_FILE_PAGES) - total_swapcache_pages() - i.bufferram; @@ -147,7 +144,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) K(global_page_state(NR_UNSTABLE_NFS)), K(global_page_state(NR_BOUNCE)), K(global_page_state(NR_WRITEBACK_TEMP)), - K(allowed), + K(vm_commit_limit()), K(committed), (unsigned long)VMALLOC_TOTAL >> 10, vmi.used >> 10, diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c index 106a83570630..70779b2fc209 100644 --- a/fs/proc/proc_devtree.c +++ b/fs/proc/proc_devtree.c @@ -14,16 +14,13 @@ #include <linux/of.h> #include <linux/export.h> #include <linux/slab.h> -#include <asm/prom.h> #include <asm/uaccess.h> #include "internal.h" static inline void set_node_proc_entry(struct device_node *np, struct proc_dir_entry *de) { -#ifdef HAVE_ARCH_DEVTREE_FIXUPS np->pde = de; -#endif } static struct proc_dir_entry *proc_device_tree; diff --git a/fs/proc/self.c b/fs/proc/self.c index 6b6a993b5c25..ffeb202ec942 100644 --- a/fs/proc/self.c +++ b/fs/proc/self.c @@ -36,18 +36,10 @@ static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) return NULL; } -static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd, - void *cookie) -{ - char *s = nd_get_link(nd); - if (!IS_ERR(s)) - kfree(s); -} - static const struct inode_operations proc_self_inode_operations = { .readlink = proc_self_readlink, .follow_link = proc_self_follow_link, - .put_link = proc_self_put_link, + .put_link = kfree_put_link, }; static unsigned self_inum; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 390bdab01c3c..abbe825d20ff 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -561,6 +561,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) [ilog2(VM_NONLINEAR)] = "nl", [ilog2(VM_ARCH_1)] = "ar", [ilog2(VM_DONTDUMP)] = "dd", +#ifdef CONFIG_MEM_SOFT_DIRTY + [ilog2(VM_SOFTDIRTY)] = "sd", +#endif [ilog2(VM_MIXEDMAP)] = "mm", [ilog2(VM_HUGEPAGE)] = "hg", [ilog2(VM_NOHUGEPAGE)] = "nh", @@ -1387,8 +1390,8 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) struct mm_struct *mm = vma->vm_mm; struct mm_walk walk = {}; struct mempolicy *pol; - int n; - char buffer[50]; + char buffer[64]; + int nid; if (!mm) return 0; @@ -1404,10 +1407,8 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) walk.mm = mm; pol = get_vma_policy(task, vma, vma->vm_start); - n = mpol_to_str(buffer, sizeof(buffer), pol); + mpol_to_str(buffer, sizeof(buffer), pol); mpol_cond_put(pol); - if (n < 0) - return n; seq_printf(m, "%08lx %s", vma->vm_start, buffer); @@ -1460,9 +1461,9 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) if (md->writeback) seq_printf(m, " writeback=%lu", md->writeback); - for_each_node_state(n, N_MEMORY) - if (md->node[n]) - seq_printf(m, " N%d=%lu", n, md->node[n]); + for_each_node_state(nid, N_MEMORY) + if (md->node[nid]) + seq_printf(m, " N%d=%lu", nid, md->node[nid]); out: seq_putc(m, '\n'); diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c index 5fe34c355e85..439406e081af 100644 --- a/fs/proc_namespace.c +++ b/fs/proc_namespace.c @@ -20,15 +20,15 @@ static unsigned mounts_poll(struct file *file, poll_table *wait) struct proc_mounts *p = proc_mounts(file->private_data); struct mnt_namespace *ns = p->ns; unsigned res = POLLIN | POLLRDNORM; + int event; poll_wait(file, &p->ns->poll, wait); - br_read_lock(&vfsmount_lock); - if (p->m.poll_event != ns->event) { - p->m.poll_event = ns->event; + event = ACCESS_ONCE(ns->event); + if (p->m.poll_event != event) { + p->m.poll_event = event; res |= POLLERR | POLLPRI; } - br_read_unlock(&vfsmount_lock); return res; } diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c index d024505ba007..e62c8183777a 100644 --- a/fs/qnx4/namei.c +++ b/fs/qnx4/namei.c @@ -60,10 +60,6 @@ static struct buffer_head *qnx4_find_entry(int len, struct inode *dir, struct buffer_head *bh; *res_dir = NULL; - if (!dir->i_sb) { - printk(KERN_WARNING "qnx4: no superblock on dir.\n"); - return NULL; - } bh = NULL; block = offset = blkofs = 0; while (blkofs * QNX4_BLOCK_SIZE + offset < dir->i_size) { diff --git a/fs/quota/quota.c b/fs/quota/quota.c index dea86e8967ee..2b363e23f36e 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -117,6 +117,7 @@ static int quota_setinfo(struct super_block *sb, int type, void __user *addr) static void copy_to_if_dqblk(struct if_dqblk *dst, struct fs_disk_quota *src) { + memset(dst, 0, sizeof(*dst)); dst->dqb_bhardlimit = src->d_blk_hardlimit; dst->dqb_bsoftlimit = src->d_blk_softlimit; dst->dqb_curspace = src->d_bcount; diff --git a/fs/read_write.c b/fs/read_write.c index e3cd280b158c..58e440df1bc6 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -257,7 +257,7 @@ loff_t vfs_llseek(struct file *file, loff_t offset, int whence) fn = no_llseek; if (file->f_mode & FMODE_LSEEK) { - if (file->f_op && file->f_op->llseek) + if (file->f_op->llseek) fn = file->f_op->llseek; } return fn(file, offset, whence); @@ -384,7 +384,7 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) if (!(file->f_mode & FMODE_READ)) return -EBADF; - if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read)) + if (!file->f_op->read && !file->f_op->aio_read) return -EINVAL; if (unlikely(!access_ok(VERIFY_WRITE, buf, count))) return -EFAULT; @@ -433,7 +433,7 @@ ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t const char __user *p; ssize_t ret; - if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write)) + if (!file->f_op->write && !file->f_op->aio_write) return -EINVAL; old_fs = get_fs(); @@ -460,7 +460,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_ if (!(file->f_mode & FMODE_WRITE)) return -EBADF; - if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write)) + if (!file->f_op->write && !file->f_op->aio_write) return -EINVAL; if (unlikely(!access_ok(VERIFY_READ, buf, count))) return -EFAULT; @@ -727,11 +727,6 @@ static ssize_t do_readv_writev(int type, struct file *file, io_fn_t fn; iov_fn_t fnv; - if (!file->f_op) { - ret = -EINVAL; - goto out; - } - ret = rw_copy_check_uvector(type, uvector, nr_segs, ARRAY_SIZE(iovstack), iovstack, &iov); if (ret <= 0) @@ -778,7 +773,7 @@ ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, { if (!(file->f_mode & FMODE_READ)) return -EBADF; - if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read)) + if (!file->f_op->aio_read && !file->f_op->read) return -EINVAL; return do_readv_writev(READ, file, vec, vlen, pos); @@ -791,7 +786,7 @@ ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, { if (!(file->f_mode & FMODE_WRITE)) return -EBADF; - if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write)) + if (!file->f_op->aio_write && !file->f_op->write) return -EINVAL; return do_readv_writev(WRITE, file, vec, vlen, pos); @@ -906,10 +901,6 @@ static ssize_t compat_do_readv_writev(int type, struct file *file, io_fn_t fn; iov_fn_t fnv; - ret = -EINVAL; - if (!file->f_op) - goto out; - ret = -EFAULT; if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector))) goto out; @@ -965,7 +956,7 @@ static size_t compat_readv(struct file *file, goto out; ret = -EINVAL; - if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read)) + if (!file->f_op->aio_read && !file->f_op->read) goto out; ret = compat_do_readv_writev(READ, file, vec, vlen, pos); @@ -1032,7 +1023,7 @@ static size_t compat_writev(struct file *file, goto out; ret = -EINVAL; - if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write)) + if (!file->f_op->aio_write && !file->f_op->write) goto out; ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos); diff --git a/fs/readdir.c b/fs/readdir.c index 93d71e574310..5b53d995cae6 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -24,7 +24,7 @@ int iterate_dir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); int res = -ENOTDIR; - if (!file->f_op || !file->f_op->iterate) + if (!file->f_op->iterate) goto out; res = security_file_permission(file, MAY_READ); diff --git a/fs/select.c b/fs/select.c index dfd5cb18c012..467bb1cb3ea5 100644 --- a/fs/select.c +++ b/fs/select.c @@ -454,7 +454,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) const struct file_operations *f_op; f_op = f.file->f_op; mask = DEFAULT_POLLMASK; - if (f_op && f_op->poll) { + if (f_op->poll) { wait_key_set(wait, in, out, bit, busy_flag); mask = (*f_op->poll)(f.file, wait); @@ -761,7 +761,7 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait, mask = POLLNVAL; if (f.file) { mask = DEFAULT_POLLMASK; - if (f.file->f_op && f.file->f_op->poll) { + if (f.file->f_op->poll) { pwait->_key = pollfd->events|POLLERR|POLLHUP; pwait->_key |= busy_flag; mask = f.file->f_op->poll(f.file, pwait); diff --git a/fs/splice.c b/fs/splice.c index 3b7ee656f3aa..46a08f772d7d 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -695,7 +695,7 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe, loff_t pos = sd->pos; int more; - if (!likely(file->f_op && file->f_op->sendpage)) + if (!likely(file->f_op->sendpage)) return -EINVAL; more = (sd->flags & SPLICE_F_MORE) ? MSG_MORE : 0; @@ -1099,7 +1099,7 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); - if (out->f_op && out->f_op->splice_write) + if (out->f_op->splice_write) splice_write = out->f_op->splice_write; else splice_write = default_file_splice_write; @@ -1125,7 +1125,7 @@ static long do_splice_to(struct file *in, loff_t *ppos, if (unlikely(ret < 0)) return ret; - if (in->f_op && in->f_op->splice_read) + if (in->f_op->splice_read) splice_read = in->f_op->splice_read; else splice_read = default_file_splice_read; diff --git a/fs/stat.c b/fs/stat.c index d0ea7ef75e26..ae0c3cef9927 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -37,14 +37,21 @@ void generic_fillattr(struct inode *inode, struct kstat *stat) EXPORT_SYMBOL(generic_fillattr); -int vfs_getattr(struct path *path, struct kstat *stat) +/** + * vfs_getattr_nosec - getattr without security checks + * @path: file to get attributes from + * @stat: structure to return attributes in + * + * Get attributes without calling security_inode_getattr. + * + * Currently the only caller other than vfs_getattr is internal to the + * filehandle lookup code, which uses only the inode number and returns + * no attributes to any user. Any other code probably wants + * vfs_getattr. + */ +int vfs_getattr_nosec(struct path *path, struct kstat *stat) { struct inode *inode = path->dentry->d_inode; - int retval; - - retval = security_inode_getattr(path->mnt, path->dentry); - if (retval) - return retval; if (inode->i_op->getattr) return inode->i_op->getattr(path->mnt, path->dentry, stat); @@ -53,6 +60,18 @@ int vfs_getattr(struct path *path, struct kstat *stat) return 0; } +EXPORT_SYMBOL(vfs_getattr_nosec); + +int vfs_getattr(struct path *path, struct kstat *stat) +{ + int retval; + + retval = security_inode_getattr(path->mnt, path->dentry); + if (retval) + return retval; + return vfs_getattr_nosec(path, stat); +} + EXPORT_SYMBOL(vfs_getattr); int vfs_fstat(unsigned int fd, struct kstat *stat) diff --git a/fs/super.c b/fs/super.c index 0225c20f8770..e5f6c2cfac38 100644 --- a/fs/super.c +++ b/fs/super.c @@ -129,33 +129,24 @@ static unsigned long super_cache_count(struct shrinker *shrink, return total_objects; } -static int init_sb_writers(struct super_block *s, struct file_system_type *type) -{ - int err; - int i; - - for (i = 0; i < SB_FREEZE_LEVELS; i++) { - err = percpu_counter_init(&s->s_writers.counter[i], 0); - if (err < 0) - goto err_out; - lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i], - &type->s_writers_key[i], 0); - } - init_waitqueue_head(&s->s_writers.wait); - init_waitqueue_head(&s->s_writers.wait_unfrozen); - return 0; -err_out: - while (--i >= 0) - percpu_counter_destroy(&s->s_writers.counter[i]); - return err; -} - -static void destroy_sb_writers(struct super_block *s) +/** + * destroy_super - frees a superblock + * @s: superblock to free + * + * Frees a superblock. + */ +static void destroy_super(struct super_block *s) { int i; - + list_lru_destroy(&s->s_dentry_lru); + list_lru_destroy(&s->s_inode_lru); for (i = 0; i < SB_FREEZE_LEVELS; i++) percpu_counter_destroy(&s->s_writers.counter[i]); + security_sb_free(s); + WARN_ON(!list_empty(&s->s_mounts)); + kfree(s->s_subtype); + kfree(s->s_options); + kfree_rcu(s, rcu); } /** @@ -170,111 +161,74 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) { struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER); static const struct super_operations default_op; + int i; - if (s) { - if (security_sb_alloc(s)) - goto out_free_sb; + if (!s) + return NULL; -#ifdef CONFIG_SMP - s->s_files = alloc_percpu(struct list_head); - if (!s->s_files) - goto err_out; - else { - int i; + if (security_sb_alloc(s)) + goto fail; - for_each_possible_cpu(i) - INIT_LIST_HEAD(per_cpu_ptr(s->s_files, i)); - } -#else - INIT_LIST_HEAD(&s->s_files); -#endif - if (init_sb_writers(s, type)) - goto err_out; - s->s_flags = flags; - s->s_bdi = &default_backing_dev_info; - INIT_HLIST_NODE(&s->s_instances); - INIT_HLIST_BL_HEAD(&s->s_anon); - INIT_LIST_HEAD(&s->s_inodes); - - if (list_lru_init(&s->s_dentry_lru)) - goto err_out; - if (list_lru_init(&s->s_inode_lru)) - goto err_out_dentry_lru; - - INIT_LIST_HEAD(&s->s_mounts); - init_rwsem(&s->s_umount); - lockdep_set_class(&s->s_umount, &type->s_umount_key); - /* - * sget() can have s_umount recursion. - * - * When it cannot find a suitable sb, it allocates a new - * one (this one), and tries again to find a suitable old - * one. - * - * In case that succeeds, it will acquire the s_umount - * lock of the old one. Since these are clearly distrinct - * locks, and this object isn't exposed yet, there's no - * risk of deadlocks. - * - * Annotate this by putting this lock in a different - * subclass. - */ - down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING); - s->s_count = 1; - atomic_set(&s->s_active, 1); - mutex_init(&s->s_vfs_rename_mutex); - lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key); - mutex_init(&s->s_dquot.dqio_mutex); - mutex_init(&s->s_dquot.dqonoff_mutex); - init_rwsem(&s->s_dquot.dqptr_sem); - s->s_maxbytes = MAX_NON_LFS; - s->s_op = &default_op; - s->s_time_gran = 1000000000; - s->cleancache_poolid = -1; - - s->s_shrink.seeks = DEFAULT_SEEKS; - s->s_shrink.scan_objects = super_cache_scan; - s->s_shrink.count_objects = super_cache_count; - s->s_shrink.batch = 1024; - s->s_shrink.flags = SHRINKER_NUMA_AWARE; + for (i = 0; i < SB_FREEZE_LEVELS; i++) { + if (percpu_counter_init(&s->s_writers.counter[i], 0) < 0) + goto fail; + lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i], + &type->s_writers_key[i], 0); } -out: + init_waitqueue_head(&s->s_writers.wait); + init_waitqueue_head(&s->s_writers.wait_unfrozen); + s->s_flags = flags; + s->s_bdi = &default_backing_dev_info; + INIT_HLIST_NODE(&s->s_instances); + INIT_HLIST_BL_HEAD(&s->s_anon); + INIT_LIST_HEAD(&s->s_inodes); + + if (list_lru_init(&s->s_dentry_lru)) + goto fail; + if (list_lru_init(&s->s_inode_lru)) + goto fail; + + INIT_LIST_HEAD(&s->s_mounts); + init_rwsem(&s->s_umount); + lockdep_set_class(&s->s_umount, &type->s_umount_key); + /* + * sget() can have s_umount recursion. + * + * When it cannot find a suitable sb, it allocates a new + * one (this one), and tries again to find a suitable old + * one. + * + * In case that succeeds, it will acquire the s_umount + * lock of the old one. Since these are clearly distrinct + * locks, and this object isn't exposed yet, there's no + * risk of deadlocks. + * + * Annotate this by putting this lock in a different + * subclass. + */ + down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING); + s->s_count = 1; + atomic_set(&s->s_active, 1); + mutex_init(&s->s_vfs_rename_mutex); + lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key); + mutex_init(&s->s_dquot.dqio_mutex); + mutex_init(&s->s_dquot.dqonoff_mutex); + init_rwsem(&s->s_dquot.dqptr_sem); + s->s_maxbytes = MAX_NON_LFS; + s->s_op = &default_op; + s->s_time_gran = 1000000000; + s->cleancache_poolid = -1; + + s->s_shrink.seeks = DEFAULT_SEEKS; + s->s_shrink.scan_objects = super_cache_scan; + s->s_shrink.count_objects = super_cache_count; + s->s_shrink.batch = 1024; + s->s_shrink.flags = SHRINKER_NUMA_AWARE; return s; -err_out_dentry_lru: - list_lru_destroy(&s->s_dentry_lru); -err_out: - security_sb_free(s); -#ifdef CONFIG_SMP - if (s->s_files) - free_percpu(s->s_files); -#endif - destroy_sb_writers(s); -out_free_sb: - kfree(s); - s = NULL; - goto out; -} - -/** - * destroy_super - frees a superblock - * @s: superblock to free - * - * Frees a superblock. - */ -static inline void destroy_super(struct super_block *s) -{ - list_lru_destroy(&s->s_dentry_lru); - list_lru_destroy(&s->s_inode_lru); -#ifdef CONFIG_SMP - free_percpu(s->s_files); -#endif - destroy_sb_writers(s); - security_sb_free(s); - WARN_ON(!list_empty(&s->s_mounts)); - kfree(s->s_subtype); - kfree(s->s_options); - kfree(s); +fail: + destroy_super(s); + return NULL; } /* Superblock refcounting */ @@ -756,7 +710,8 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) make sure there are no rw files opened */ if (remount_ro) { if (force) { - mark_files_ro(sb); + sb->s_readonly_remount = 1; + smp_wmb(); } else { retval = sb_prepare_remount_readonly(sb); if (retval) diff --git a/fs/sync.c b/fs/sync.c index 905f3f6b3d85..f15537452231 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -27,10 +27,11 @@ * wait == 1 case since in that case write_inode() functions do * sync_dirty_buffer() and thus effectively write one block at a time. */ -static int __sync_filesystem(struct super_block *sb, int wait) +static int __sync_filesystem(struct super_block *sb, int wait, + unsigned long start) { if (wait) - sync_inodes_sb(sb); + sync_inodes_sb(sb, start); else writeback_inodes_sb(sb, WB_REASON_SYNC); @@ -47,6 +48,7 @@ static int __sync_filesystem(struct super_block *sb, int wait) int sync_filesystem(struct super_block *sb) { int ret; + unsigned long start = jiffies; /* * We need to be protected against the filesystem going from @@ -60,17 +62,17 @@ int sync_filesystem(struct super_block *sb) if (sb->s_flags & MS_RDONLY) return 0; - ret = __sync_filesystem(sb, 0); + ret = __sync_filesystem(sb, 0, start); if (ret < 0) return ret; - return __sync_filesystem(sb, 1); + return __sync_filesystem(sb, 1, start); } EXPORT_SYMBOL_GPL(sync_filesystem); static void sync_inodes_one_sb(struct super_block *sb, void *arg) { if (!(sb->s_flags & MS_RDONLY)) - sync_inodes_sb(sb); + sync_inodes_sb(sb, *((unsigned long *)arg)); } static void sync_fs_one_sb(struct super_block *sb, void *arg) @@ -102,9 +104,10 @@ static void fdatawait_one_bdev(struct block_device *bdev, void *arg) SYSCALL_DEFINE0(sync) { int nowait = 0, wait = 1; + unsigned long start = jiffies; wakeup_flusher_threads(0, WB_REASON_SYNC); - iterate_supers(sync_inodes_one_sb, NULL); + iterate_supers(sync_inodes_one_sb, &start); iterate_supers(sync_fs_one_sb, &nowait); iterate_supers(sync_fs_one_sb, &wait); iterate_bdevs(fdatawrite_one_bdev, NULL); @@ -177,7 +180,7 @@ SYSCALL_DEFINE1(syncfs, int, fd) */ int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync) { - if (!file->f_op || !file->f_op->fsync) + if (!file->f_op->fsync) return -EINVAL; return file->f_op->fsync(file, start, end, datasync); } diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 6e025e02ffde..cc1febd8fadf 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -2563,9 +2563,9 @@ static int corrupt_data(const struct ubifs_info *c, const void *buf, unsigned int from, to, ffs = chance(1, 2); unsigned char *p = (void *)buf; - from = prandom_u32() % (len + 1); - /* Corruption may only span one max. write unit */ - to = min(len, ALIGN(from, c->max_write_size)); + from = prandom_u32() % len; + /* Corruption span max to end of write unit */ + to = min(len, ALIGN(from + 1, c->max_write_size)); ubifs_warn("filled bytes %u-%u with %s", from, to - 1, ffs ? "0xFFs" : "random data"); diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 6b4947f75af7..ea41649e4ca5 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -192,8 +192,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, struct ubifs_dent_node *dent; struct ubifs_info *c = dir->i_sb->s_fs_info; - dbg_gen("'%.*s' in dir ino %lu", - dentry->d_name.len, dentry->d_name.name, dir->i_ino); + dbg_gen("'%pd' in dir ino %lu", dentry, dir->i_ino); if (dentry->d_name.len > UBIFS_MAX_NLEN) return ERR_PTR(-ENAMETOOLONG); @@ -225,8 +224,8 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, * checking. */ err = PTR_ERR(inode); - ubifs_err("dead directory entry '%.*s', error %d", - dentry->d_name.len, dentry->d_name.name, err); + ubifs_err("dead directory entry '%pd', error %d", + dentry, err); ubifs_ro_mode(c, err); goto out; } @@ -260,8 +259,8 @@ static int ubifs_create(struct inode *dir, struct dentry *dentry, umode_t mode, * parent directory inode. */ - dbg_gen("dent '%.*s', mode %#hx in dir ino %lu", - dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino); + dbg_gen("dent '%pd', mode %#hx in dir ino %lu", + dentry, mode, dir->i_ino); err = ubifs_budget_space(c, &req); if (err) @@ -509,8 +508,8 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir, * changing the parent inode. */ - dbg_gen("dent '%.*s' to ino %lu (nlink %d) in dir ino %lu", - dentry->d_name.len, dentry->d_name.name, inode->i_ino, + dbg_gen("dent '%pd' to ino %lu (nlink %d) in dir ino %lu", + dentry, inode->i_ino, inode->i_nlink, dir->i_ino); ubifs_assert(mutex_is_locked(&dir->i_mutex)); ubifs_assert(mutex_is_locked(&inode->i_mutex)); @@ -566,8 +565,8 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry) * deletions. */ - dbg_gen("dent '%.*s' from ino %lu (nlink %d) in dir ino %lu", - dentry->d_name.len, dentry->d_name.name, inode->i_ino, + dbg_gen("dent '%pd' from ino %lu (nlink %d) in dir ino %lu", + dentry, inode->i_ino, inode->i_nlink, dir->i_ino); ubifs_assert(mutex_is_locked(&dir->i_mutex)); ubifs_assert(mutex_is_locked(&inode->i_mutex)); @@ -656,8 +655,8 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry) * because we have extra space reserved for deletions. */ - dbg_gen("directory '%.*s', ino %lu in dir ino %lu", dentry->d_name.len, - dentry->d_name.name, inode->i_ino, dir->i_ino); + dbg_gen("directory '%pd', ino %lu in dir ino %lu", dentry, + inode->i_ino, dir->i_ino); ubifs_assert(mutex_is_locked(&dir->i_mutex)); ubifs_assert(mutex_is_locked(&inode->i_mutex)); err = check_dir_empty(c, dentry->d_inode); @@ -716,8 +715,8 @@ static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) * directory inode. */ - dbg_gen("dent '%.*s', mode %#hx in dir ino %lu", - dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino); + dbg_gen("dent '%pd', mode %#hx in dir ino %lu", + dentry, mode, dir->i_ino); err = ubifs_budget_space(c, &req); if (err) @@ -778,8 +777,7 @@ static int ubifs_mknod(struct inode *dir, struct dentry *dentry, * directory inode. */ - dbg_gen("dent '%.*s' in dir ino %lu", - dentry->d_name.len, dentry->d_name.name, dir->i_ino); + dbg_gen("dent '%pd' in dir ino %lu", dentry, dir->i_ino); if (!new_valid_dev(rdev)) return -EINVAL; @@ -853,8 +851,8 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry, * directory inode. */ - dbg_gen("dent '%.*s', target '%s' in dir ino %lu", dentry->d_name.len, - dentry->d_name.name, symname, dir->i_ino); + dbg_gen("dent '%pd', target '%s' in dir ino %lu", dentry, + symname, dir->i_ino); if (len > UBIFS_MAX_INO_DATA) return -ENAMETOOLONG; @@ -979,10 +977,9 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, * separately. */ - dbg_gen("dent '%.*s' ino %lu in dir ino %lu to dent '%.*s' in dir ino %lu", - old_dentry->d_name.len, old_dentry->d_name.name, - old_inode->i_ino, old_dir->i_ino, new_dentry->d_name.len, - new_dentry->d_name.name, new_dir->i_ino); + dbg_gen("dent '%pd' ino %lu in dir ino %lu to dent '%pd' in dir ino %lu", + old_dentry, old_inode->i_ino, old_dir->i_ino, + new_dentry, new_dir->i_ino); ubifs_assert(mutex_is_locked(&old_dir->i_mutex)); ubifs_assert(mutex_is_locked(&new_dir->i_mutex)); if (unlink) diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index 76ca53cd3eee..9718da86ad01 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c @@ -668,8 +668,7 @@ int ubifs_garbage_collect(struct ubifs_info *c, int anyway) ubifs_assert(!wbuf->used); for (i = 0; ; i++) { - int space_before = c->leb_size - wbuf->offs - wbuf->used; - int space_after; + int space_before, space_after; cond_resched(); diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index afaad07f3b29..0e045e75abd8 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -933,10 +933,8 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, int move = (old_dir != new_dir); struct ubifs_inode *uninitialized_var(new_ui); - dbg_jnl("dent '%.*s' in dir ino %lu to dent '%.*s' in dir ino %lu", - old_dentry->d_name.len, old_dentry->d_name.name, - old_dir->i_ino, new_dentry->d_name.len, - new_dentry->d_name.name, new_dir->i_ino); + dbg_jnl("dent '%pd' in dir ino %lu to dent '%pd' in dir ino %lu", + old_dentry, old_dir->i_ino, new_dentry, new_dir->i_ino); ubifs_assert(ubifs_inode(old_dir)->data_len == 0); ubifs_assert(ubifs_inode(new_dir)->data_len == 0); ubifs_assert(mutex_is_locked(&ubifs_inode(old_dir)->ui_mutex)); diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 3e4aa7281e04..f69daa514a57 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1630,8 +1630,10 @@ static int ubifs_remount_rw(struct ubifs_info *c) } c->write_reserve_buf = kmalloc(COMPRESSED_DATA_NODE_BUF_SZ, GFP_KERNEL); - if (!c->write_reserve_buf) + if (!c->write_reserve_buf) { + err = -ENOMEM; goto out; + } err = ubifs_lpt_init(c, 0, 1); if (err) @@ -2064,8 +2066,10 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) } sb->s_root = d_make_root(root); - if (!sb->s_root) + if (!sb->s_root) { + err = -ENOMEM; goto out_umount; + } mutex_unlock(&c->umount_mutex); return 0; diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index 0f7139bdb2c2..5e0a63b1b0d5 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -303,8 +303,8 @@ int ubifs_setxattr(struct dentry *dentry, const char *name, union ubifs_key key; int err, type; - dbg_gen("xattr '%s', host ino %lu ('%.*s'), size %zd", name, - host->i_ino, dentry->d_name.len, dentry->d_name.name, size); + dbg_gen("xattr '%s', host ino %lu ('%pd'), size %zd", name, + host->i_ino, dentry, size); ubifs_assert(mutex_is_locked(&host->i_mutex)); if (size > UBIFS_MAX_INO_DATA) @@ -367,8 +367,8 @@ ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf, union ubifs_key key; int err; - dbg_gen("xattr '%s', ino %lu ('%.*s'), buf size %zd", name, - host->i_ino, dentry->d_name.len, dentry->d_name.name, size); + dbg_gen("xattr '%s', ino %lu ('%pd'), buf size %zd", name, + host->i_ino, dentry, size); err = check_namespace(&nm); if (err < 0) @@ -426,8 +426,8 @@ ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size) int err, len, written = 0; struct qstr nm = { .name = NULL }; - dbg_gen("ino %lu ('%.*s'), buffer size %zd", host->i_ino, - dentry->d_name.len, dentry->d_name.name, size); + dbg_gen("ino %lu ('%pd'), buffer size %zd", host->i_ino, + dentry, size); len = host_ui->xattr_names + host_ui->xattr_cnt; if (!buffer) @@ -529,8 +529,8 @@ int ubifs_removexattr(struct dentry *dentry, const char *name) union ubifs_key key; int err; - dbg_gen("xattr '%s', ino %lu ('%.*s')", name, - host->i_ino, dentry->d_name.len, dentry->d_name.name); + dbg_gen("xattr '%s', ino %lu ('%pd')", name, + host->i_ino, dentry); ubifs_assert(mutex_is_locked(&host->i_mutex)); err = check_namespace(&nm); diff --git a/fs/udf/super.c b/fs/udf/super.c index 91219385691d..3306b9f69bed 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -76,6 +76,9 @@ #define UDF_DEFAULT_BLOCKSIZE 2048 +#define VSD_FIRST_SECTOR_OFFSET 32768 +#define VSD_MAX_SECTOR_OFFSET 0x800000 + enum { UDF_MAX_LINKS = 0xffff }; /* These are the "meat" - everything else is stuffing */ @@ -685,7 +688,7 @@ out_unlock: static loff_t udf_check_vsd(struct super_block *sb) { struct volStructDesc *vsd = NULL; - loff_t sector = 32768; + loff_t sector = VSD_FIRST_SECTOR_OFFSET; int sectorsize; struct buffer_head *bh = NULL; int nsr02 = 0; @@ -703,8 +706,18 @@ static loff_t udf_check_vsd(struct super_block *sb) udf_debug("Starting at sector %u (%ld byte sectors)\n", (unsigned int)(sector >> sb->s_blocksize_bits), sb->s_blocksize); - /* Process the sequence (if applicable) */ - for (; !nsr02 && !nsr03; sector += sectorsize) { + /* Process the sequence (if applicable). The hard limit on the sector + * offset is arbitrary, hopefully large enough so that all valid UDF + * filesystems will be recognised. There is no mention of an upper + * bound to the size of the volume recognition area in the standard. + * The limit will prevent the code to read all the sectors of a + * specially crafted image (like a bluray disc full of CD001 sectors), + * potentially causing minutes or even hours of uninterruptible I/O + * activity. This actually happened with uninitialised SSD partitions + * (all 0xFF) before the check for the limit and all valid IDs were + * added */ + for (; !nsr02 && !nsr03 && sector < VSD_MAX_SECTOR_OFFSET; + sector += sectorsize) { /* Read a block */ bh = udf_tread(sb, sector >> sb->s_blocksize_bits); if (!bh) @@ -714,10 +727,7 @@ static loff_t udf_check_vsd(struct super_block *sb) vsd = (struct volStructDesc *)(bh->b_data + (sector & (sb->s_blocksize - 1))); - if (vsd->stdIdent[0] == 0) { - brelse(bh); - break; - } else if (!strncmp(vsd->stdIdent, VSD_STD_ID_CD001, + if (!strncmp(vsd->stdIdent, VSD_STD_ID_CD001, VSD_STD_ID_LEN)) { switch (vsd->structType) { case 0: @@ -753,6 +763,17 @@ static loff_t udf_check_vsd(struct super_block *sb) else if (!strncmp(vsd->stdIdent, VSD_STD_ID_NSR03, VSD_STD_ID_LEN)) nsr03 = sector; + else if (!strncmp(vsd->stdIdent, VSD_STD_ID_BOOT2, + VSD_STD_ID_LEN)) + ; /* nothing */ + else if (!strncmp(vsd->stdIdent, VSD_STD_ID_CDW02, + VSD_STD_ID_LEN)) + ; /* nothing */ + else { + /* invalid id : end of volume recognition area */ + brelse(bh); + break; + } brelse(bh); } @@ -760,7 +781,8 @@ static loff_t udf_check_vsd(struct super_block *sb) return nsr03; else if (nsr02) return nsr02; - else if (sector - (sbi->s_session << sb->s_blocksize_bits) == 32768) + else if (!bh && sector - (sbi->s_session << sb->s_blocksize_bits) == + VSD_FIRST_SECTOR_OFFSET) return -1; else return 0; @@ -1270,6 +1292,9 @@ static int udf_load_partdesc(struct super_block *sb, sector_t block) * PHYSICAL partitions are already set up */ type1_idx = i; +#ifdef UDFFS_DEBUG + map = NULL; /* supress 'maybe used uninitialized' warning */ +#endif for (i = 0; i < sbi->s_partitions; i++) { map = &sbi->s_partmaps[i]; @@ -1891,7 +1916,9 @@ static int udf_load_vrs(struct super_block *sb, struct udf_options *uopt, return 0; } if (nsr_off == -1) - udf_debug("Failed to read byte 32768. Assuming open disc. Skipping validity check\n"); + udf_debug("Failed to read sector at offset %d. " + "Assuming open disc. Skipping validity " + "check\n", VSD_FIRST_SECTOR_OFFSET); if (!sbi->s_last_block) sbi->s_last_block = udf_get_last_block(sb); } else { diff --git a/fs/utimes.c b/fs/utimes.c index f4fb7eca10e8..aa138d64560a 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -53,6 +53,7 @@ static int utimes_common(struct path *path, struct timespec *times) int error; struct iattr newattrs; struct inode *inode = path->dentry->d_inode; + struct inode *delegated_inode = NULL; error = mnt_want_write(path->mnt); if (error) @@ -101,9 +102,15 @@ static int utimes_common(struct path *path, struct timespec *times) goto mnt_drop_write_and_out; } } +retry_deleg: mutex_lock(&inode->i_mutex); - error = notify_change(path->dentry, &newattrs); + error = notify_change(path->dentry, &newattrs, &delegated_inode); mutex_unlock(&inode->i_mutex); + if (delegated_inode) { + error = break_deleg_wait(&delegated_inode); + if (!error) + goto retry_deleg; + } mnt_drop_write_and_out: mnt_drop_write(path->mnt); diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 15188cc99449..8968f5036fa1 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -918,7 +918,7 @@ xfs_flush_inodes( struct super_block *sb = mp->m_super; if (down_read_trylock(&sb->s_umount)) { - sync_inodes_sb(sb); + sync_inodes_sb(sb, jiffies); up_read(&sb->s_umount); } } |