diff options
-rw-r--r-- | fs/nfsd/filecache.c | 14 | ||||
-rw-r--r-- | fs/notify/dnotify/dnotify.c | 13 | ||||
-rw-r--r-- | fs/notify/fanotify/fanotify.c | 24 | ||||
-rw-r--r-- | fs/notify/fanotify/fanotify.h | 12 | ||||
-rw-r--r-- | fs/notify/fanotify/fanotify_user.c | 104 | ||||
-rw-r--r-- | fs/notify/fdinfo.c | 21 | ||||
-rw-r--r-- | fs/notify/fsnotify.c | 89 | ||||
-rw-r--r-- | fs/notify/group.c | 32 | ||||
-rw-r--r-- | fs/notify/inotify/inotify.h | 19 | ||||
-rw-r--r-- | fs/notify/inotify/inotify_fsnotify.c | 2 | ||||
-rw-r--r-- | fs/notify/inotify/inotify_user.c | 47 | ||||
-rw-r--r-- | fs/notify/mark.c | 112 | ||||
-rw-r--r-- | include/linux/fanotify.h | 1 | ||||
-rw-r--r-- | include/linux/fsnotify_backend.h | 98 | ||||
-rw-r--r-- | include/uapi/linux/fanotify.h | 1 | ||||
-rw-r--r-- | kernel/audit_fsnotify.c | 5 | ||||
-rw-r--r-- | kernel/audit_tree.c | 34 | ||||
-rw-r--r-- | kernel/audit_watch.c | 2 |
18 files changed, 396 insertions, 234 deletions
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 2c1b027774d4..489c9c1d8f31 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -119,14 +119,14 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf) struct inode *inode = nf->nf_inode; do { - mutex_lock(&nfsd_file_fsnotify_group->mark_mutex); + fsnotify_group_lock(nfsd_file_fsnotify_group); mark = fsnotify_find_mark(&inode->i_fsnotify_marks, - nfsd_file_fsnotify_group); + nfsd_file_fsnotify_group); if (mark) { nfm = nfsd_file_mark_get(container_of(mark, struct nfsd_file_mark, nfm_mark)); - mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex); + fsnotify_group_unlock(nfsd_file_fsnotify_group); if (nfm) { fsnotify_put_mark(mark); break; @@ -134,8 +134,9 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf) /* Avoid soft lockup race with nfsd_file_mark_put() */ fsnotify_destroy_mark(mark, nfsd_file_fsnotify_group); fsnotify_put_mark(mark); - } else - mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex); + } else { + fsnotify_group_unlock(nfsd_file_fsnotify_group); + } /* allocate a new nfm */ new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL); @@ -678,7 +679,8 @@ nfsd_file_cache_init(void) goto out_shrinker; } - nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops); + nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops, + FSNOTIFY_GROUP_NOFS); if (IS_ERR(nfsd_file_fsnotify_group)) { pr_err("nfsd: unable to create fsnotify group: %ld\n", PTR_ERR(nfsd_file_fsnotify_group)); diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index 829dd4a61b66..190aa717fa32 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -168,7 +168,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id) return; dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); - mutex_lock(&dnotify_group->mark_mutex); + fsnotify_group_lock(dnotify_group); spin_lock(&fsn_mark->lock); prev = &dn_mark->dn; @@ -191,7 +191,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id) free = true; } - mutex_unlock(&dnotify_group->mark_mutex); + fsnotify_group_unlock(dnotify_group); if (free) fsnotify_free_mark(fsn_mark); @@ -324,7 +324,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) new_dn_mark->dn = NULL; /* this is needed to prevent the fcntl/close race described below */ - mutex_lock(&dnotify_group->mark_mutex); + fsnotify_group_lock(dnotify_group); /* add the new_fsn_mark or find an old one. */ fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, dnotify_group); @@ -334,7 +334,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) } else { error = fsnotify_add_inode_mark_locked(new_fsn_mark, inode, 0); if (error) { - mutex_unlock(&dnotify_group->mark_mutex); + fsnotify_group_unlock(dnotify_group); goto out_err; } spin_lock(&new_fsn_mark->lock); @@ -383,7 +383,7 @@ out: if (destroy) fsnotify_detach_mark(fsn_mark); - mutex_unlock(&dnotify_group->mark_mutex); + fsnotify_group_unlock(dnotify_group); if (destroy) fsnotify_free_mark(fsn_mark); fsnotify_put_mark(fsn_mark); @@ -401,7 +401,8 @@ static int __init dnotify_init(void) SLAB_PANIC|SLAB_ACCOUNT); dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC|SLAB_ACCOUNT); - dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops); + dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops, + FSNOTIFY_GROUP_NOFS); if (IS_ERR(dnotify_group)) panic("unable to allocate fsnotify group for dnotify\n"); dnotify_sysctl_init(); diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 985e995d2a39..4f897e109547 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -319,12 +319,8 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group, return 0; } - fsnotify_foreach_iter_type(type) { - if (!fsnotify_iter_should_report_type(iter_info, type)) - continue; - mark = iter_info->marks[type]; - - /* Apply ignore mask regardless of ISDIR and ON_CHILD flags */ + fsnotify_foreach_iter_mark_type(iter_info, mark, type) { + /* Apply ignore mask regardless of mark's ISDIR flag */ marks_ignored_mask |= mark->ignored_mask; /* @@ -334,14 +330,6 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group, if (event_mask & FS_ISDIR && !(mark->mask & FS_ISDIR)) continue; - /* - * If the event is on a child and this mark is on a parent not - * watching children, don't send it! - */ - if (type == FSNOTIFY_ITER_TYPE_PARENT && - !(mark->mask & FS_EVENT_ON_CHILD)) - continue; - marks_mask |= mark->mask; /* Record the mark types of this group that matched the event */ @@ -849,16 +837,14 @@ out: */ static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info) { + struct fsnotify_mark *mark; int type; __kernel_fsid_t fsid = {}; - fsnotify_foreach_iter_type(type) { + fsnotify_foreach_iter_mark_type(iter_info, mark, type) { struct fsnotify_mark_connector *conn; - if (!fsnotify_iter_should_report_type(iter_info, type)) - continue; - - conn = READ_ONCE(iter_info->marks[type]->connector); + conn = READ_ONCE(mark->connector); /* Mark is just getting destroyed or created? */ if (!conn) continue; diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index a3d5b751cac5..80e0ec95b113 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -490,3 +490,15 @@ static inline unsigned int fanotify_event_hash_bucket( { return event->hash & FANOTIFY_HTABLE_MASK; } + +static inline unsigned int fanotify_mark_user_flags(struct fsnotify_mark *mark) +{ + unsigned int mflags = 0; + + if (mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY) + mflags |= FAN_MARK_IGNORED_SURV_MODIFY; + if (mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF) + mflags |= FAN_MARK_EVICTABLE; + + return mflags; +} diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index a792e21c5309..c2255b440df9 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -264,7 +264,7 @@ static int create_fd(struct fsnotify_group *group, struct path *path, * originally opened O_WRONLY. */ new_file = dentry_open(path, - group->fanotify_data.f_flags | FMODE_NONOTIFY, + group->fanotify_data.f_flags | __FMODE_NONOTIFY, current_cred()); if (IS_ERR(new_file)) { /* @@ -1035,10 +1035,10 @@ static int fanotify_remove_mark(struct fsnotify_group *group, __u32 removed; int destroy_mark; - mutex_lock(&group->mark_mutex); + fsnotify_group_lock(group); fsn_mark = fsnotify_find_mark(connp, group); if (!fsn_mark) { - mutex_unlock(&group->mark_mutex); + fsnotify_group_unlock(group); return -ENOENT; } @@ -1048,7 +1048,7 @@ static int fanotify_remove_mark(struct fsnotify_group *group, fsnotify_recalc_mask(fsn_mark->connector); if (destroy_mark) fsnotify_detach_mark(fsn_mark); - mutex_unlock(&group->mark_mutex); + fsnotify_group_unlock(group); if (destroy_mark) fsnotify_free_mark(fsn_mark); @@ -1081,47 +1081,63 @@ static int fanotify_remove_inode_mark(struct fsnotify_group *group, flags, umask); } -static void fanotify_mark_add_ignored_mask(struct fsnotify_mark *fsn_mark, - __u32 mask, unsigned int flags, - __u32 *removed) +static bool fanotify_mark_update_flags(struct fsnotify_mark *fsn_mark, + unsigned int fan_flags) { - fsn_mark->ignored_mask |= mask; + bool want_iref = !(fan_flags & FAN_MARK_EVICTABLE); + bool recalc = false; /* * Setting FAN_MARK_IGNORED_SURV_MODIFY for the first time may lead to * the removal of the FS_MODIFY bit in calculated mask if it was set * because of an ignored mask that is now going to survive FS_MODIFY. */ - if ((flags & FAN_MARK_IGNORED_SURV_MODIFY) && + if ((fan_flags & FAN_MARK_IGNORED_MASK) && + (fan_flags & FAN_MARK_IGNORED_SURV_MODIFY) && !(fsn_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) { fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY; if (!(fsn_mark->mask & FS_MODIFY)) - *removed = FS_MODIFY; + recalc = true; } + + if (fsn_mark->connector->type != FSNOTIFY_OBJ_TYPE_INODE || + want_iref == !(fsn_mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF)) + return recalc; + + /* + * NO_IREF may be removed from a mark, but not added. + * When removed, fsnotify_recalc_mask() will take the inode ref. + */ + WARN_ON_ONCE(!want_iref); + fsn_mark->flags &= ~FSNOTIFY_MARK_FLAG_NO_IREF; + + return true; } -static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, - __u32 mask, unsigned int flags, - __u32 *removed) +static bool fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, + __u32 mask, unsigned int fan_flags) { - __u32 oldmask, newmask; + bool recalc; spin_lock(&fsn_mark->lock); - oldmask = fsnotify_calc_mask(fsn_mark); - if (!(flags & FAN_MARK_IGNORED_MASK)) { + if (!(fan_flags & FAN_MARK_IGNORED_MASK)) fsn_mark->mask |= mask; - } else { - fanotify_mark_add_ignored_mask(fsn_mark, mask, flags, removed); - } - newmask = fsnotify_calc_mask(fsn_mark); + else + fsn_mark->ignored_mask |= mask; + + recalc = fsnotify_calc_mask(fsn_mark) & + ~fsnotify_conn_mask(fsn_mark->connector); + + recalc |= fanotify_mark_update_flags(fsn_mark, fan_flags); spin_unlock(&fsn_mark->lock); - return newmask & ~oldmask; + return recalc; } static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, fsnotify_connp_t *connp, unsigned int obj_type, + unsigned int fan_flags, __kernel_fsid_t *fsid) { struct ucounts *ucounts = group->fanotify_data.ucounts; @@ -1144,6 +1160,9 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, } fsnotify_init_mark(mark, group); + if (fan_flags & FAN_MARK_EVICTABLE) + mark->flags |= FSNOTIFY_MARK_FLAG_NO_IREF; + ret = fsnotify_add_mark_locked(mark, connp, obj_type, 0, fsid); if (ret) { fsnotify_put_mark(mark); @@ -1170,39 +1189,49 @@ static int fanotify_group_init_error_pool(struct fsnotify_group *group) static int fanotify_add_mark(struct fsnotify_group *group, fsnotify_connp_t *connp, unsigned int obj_type, - __u32 mask, unsigned int flags, + __u32 mask, unsigned int fan_flags, __kernel_fsid_t *fsid) { struct fsnotify_mark *fsn_mark; - __u32 added, removed = 0; + bool recalc; int ret = 0; - mutex_lock(&group->mark_mutex); + fsnotify_group_lock(group); fsn_mark = fsnotify_find_mark(connp, group); if (!fsn_mark) { - fsn_mark = fanotify_add_new_mark(group, connp, obj_type, fsid); + fsn_mark = fanotify_add_new_mark(group, connp, obj_type, + fan_flags, fsid); if (IS_ERR(fsn_mark)) { - mutex_unlock(&group->mark_mutex); + fsnotify_group_unlock(group); return PTR_ERR(fsn_mark); } } /* + * Non evictable mark cannot be downgraded to evictable mark. + */ + if (fan_flags & FAN_MARK_EVICTABLE && + !(fsn_mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF)) { + ret = -EEXIST; + goto out; + } + + /* * Error events are pre-allocated per group, only if strictly * needed (i.e. FAN_FS_ERROR was requested). */ - if (!(flags & FAN_MARK_IGNORED_MASK) && (mask & FAN_FS_ERROR)) { + if (!(fan_flags & FAN_MARK_IGNORED_MASK) && (mask & FAN_FS_ERROR)) { ret = fanotify_group_init_error_pool(group); if (ret) goto out; } - added = fanotify_mark_add_to_mask(fsn_mark, mask, flags, &removed); - if (removed || (added & ~fsnotify_conn_mask(fsn_mark->connector))) + recalc = fanotify_mark_add_to_mask(fsn_mark, mask, fan_flags); + if (recalc) fsnotify_recalc_mask(fsn_mark->connector); out: - mutex_unlock(&group->mark_mutex); + fsnotify_group_unlock(group); fsnotify_put_mark(fsn_mark); return ret; @@ -1348,14 +1377,15 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) (!(fid_mode & FAN_REPORT_NAME) || !(fid_mode & FAN_REPORT_FID))) return -EINVAL; - f_flags = O_RDWR | FMODE_NONOTIFY; + f_flags = O_RDWR | __FMODE_NONOTIFY; if (flags & FAN_CLOEXEC) f_flags |= O_CLOEXEC; if (flags & FAN_NONBLOCK) f_flags |= O_NONBLOCK; /* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */ - group = fsnotify_alloc_user_group(&fanotify_fsnotify_ops); + group = fsnotify_alloc_group(&fanotify_fsnotify_ops, + FSNOTIFY_GROUP_USER | FSNOTIFY_GROUP_NOFS); if (IS_ERR(group)) { return PTR_ERR(group); } @@ -1598,6 +1628,14 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, goto fput_and_out; /* + * Evictable is only relevant for inode marks, because only inode object + * can be evicted on memory pressure. + */ + if (flags & FAN_MARK_EVICTABLE && + mark_type != FAN_MARK_INODE) + goto fput_and_out; + + /* * Events that do not carry enough information to report * event->fd require a group that supports reporting fid. Those * events are not supported on a mount mark, because they do not @@ -1762,7 +1800,7 @@ static int __init fanotify_user_setup(void) BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS); BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 12); - BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 9); + BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 10); fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC|SLAB_ACCOUNT); diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c index 57f0d5d9f934..59fb40abe33d 100644 --- a/fs/notify/fdinfo.c +++ b/fs/notify/fdinfo.c @@ -14,6 +14,7 @@ #include <linux/exportfs.h> #include "inotify/inotify.h" +#include "fanotify/fanotify.h" #include "fdinfo.h" #include "fsnotify.h" @@ -28,13 +29,13 @@ static void show_fdinfo(struct seq_file *m, struct file *f, struct fsnotify_group *group = f->private_data; struct fsnotify_mark *mark; - mutex_lock(&group->mark_mutex); + fsnotify_group_lock(group); list_for_each_entry(mark, &group->marks_list, g_list) { show(m, mark); if (seq_has_overflowed(m)) break; } - mutex_unlock(&group->mark_mutex); + fsnotify_group_unlock(group); } #if defined(CONFIG_EXPORTFS) @@ -83,16 +84,9 @@ static void inotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) inode_mark = container_of(mark, struct inotify_inode_mark, fsn_mark); inode = igrab(fsnotify_conn_inode(mark->connector)); if (inode) { - /* - * IN_ALL_EVENTS represents all of the mask bits - * that we expose to userspace. There is at - * least one bit (FS_EVENT_ON_CHILD) which is - * used only internally to the kernel. - */ - u32 mask = mark->mask & IN_ALL_EVENTS; - seq_printf(m, "inotify wd:%x ino:%lx sdev:%x mask:%x ignored_mask:%x ", + seq_printf(m, "inotify wd:%x ino:%lx sdev:%x mask:%x ignored_mask:0 ", inode_mark->wd, inode->i_ino, inode->i_sb->s_dev, - mask, mark->ignored_mask); + inotify_mark_user_mask(mark)); show_mark_fhandle(m, inode); seq_putc(m, '\n'); iput(inode); @@ -110,12 +104,9 @@ void inotify_show_fdinfo(struct seq_file *m, struct file *f) static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) { - unsigned int mflags = 0; + unsigned int mflags = fanotify_mark_user_flags(mark); struct inode *inode; - if (mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY) - mflags |= FAN_MARK_IGNORED_SURV_MODIFY; - if (mark->connector->type == FSNOTIFY_OBJ_TYPE_INODE) { inode = igrab(fsnotify_conn_inode(mark->connector)); if (!inode) diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 70a8516b78bc..0b3e74935cb4 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -253,7 +253,7 @@ static int fsnotify_handle_inode_event(struct fsnotify_group *group, if (WARN_ON_ONCE(!inode && !dir)) return 0; - if ((inode_mark->mask & FS_EXCL_UNLINK) && + if ((inode_mark->flags & FSNOTIFY_MARK_FLAG_EXCL_UNLINK) && path && d_unlinked(path->dentry)) return 0; @@ -290,22 +290,15 @@ static int fsnotify_handle_event(struct fsnotify_group *group, __u32 mask, } if (parent_mark) { - /* - * parent_mark indicates that the parent inode is watching - * children and interested in this event, which is an event - * possible on child. But is *this mark* watching children and - * interested in this event? - */ - if (parent_mark->mask & FS_EVENT_ON_CHILD) { - ret = fsnotify_handle_inode_event(group, parent_mark, mask, - data, data_type, dir, name, 0); - if (ret) - return ret; - } - if (!inode_mark) - return 0; + ret = fsnotify_handle_inode_event(group, parent_mark, mask, + data, data_type, dir, name, 0); + if (ret) + return ret; } + if (!inode_mark) + return 0; + if (mask & FS_EVENT_ON_CHILD) { /* * Some events can be sent on both parent dir and child marks @@ -335,31 +328,23 @@ static int send_to_group(__u32 mask, const void *data, int data_type, struct fsnotify_mark *mark; int type; - if (WARN_ON(!iter_info->report_mask)) + if (!iter_info->report_mask) return 0; /* clear ignored on inode modification */ if (mask & FS_MODIFY) { - fsnotify_foreach_iter_type(type) { - if (!fsnotify_iter_should_report_type(iter_info, type)) - continue; - mark = iter_info->marks[type]; - if (mark && - !(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) + fsnotify_foreach_iter_mark_type(iter_info, mark, type) { + if (!(mark->flags & + FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) mark->ignored_mask = 0; } } - fsnotify_foreach_iter_type(type) { - if (!fsnotify_iter_should_report_type(iter_info, type)) - continue; - mark = iter_info->marks[type]; - /* does the object mark tell us to do something? */ - if (mark) { - group = mark->group; - marks_mask |= mark->mask; - marks_ignored_mask |= mark->ignored_mask; - } + /* Are any of the group marks interested in this event? */ + fsnotify_foreach_iter_mark_type(iter_info, mark, type) { + group = mark->group; + marks_mask |= mark->mask; + marks_ignored_mask |= mark->ignored_mask; } pr_debug("%s: group=%p mask=%x marks_mask=%x marks_ignored_mask=%x data=%p data_type=%d dir=%p cookie=%d\n", @@ -403,11 +388,11 @@ static struct fsnotify_mark *fsnotify_next_mark(struct fsnotify_mark *mark) /* * iter_info is a multi head priority queue of marks. - * Pick a subset of marks from queue heads, all with the - * same group and set the report_mask for selected subset. - * Returns the report_mask of the selected subset. + * Pick a subset of marks from queue heads, all with the same group + * and set the report_mask to a subset of the selected marks. + * Returns false if there are no more groups to iterate. */ -static unsigned int fsnotify_iter_select_report_types( +static bool fsnotify_iter_select_report_types( struct fsnotify_iter_info *iter_info) { struct fsnotify_group *max_prio_group = NULL; @@ -423,30 +408,48 @@ static unsigned int fsnotify_iter_select_report_types( } if (!max_prio_group) - return 0; + return false; /* Set the report mask for marks from same group as max prio group */ + iter_info->current_group = max_prio_group; iter_info->report_mask = 0; fsnotify_foreach_iter_type(type) { mark = iter_info->marks[type]; - if (mark && - fsnotify_compare_groups(max_prio_group, mark->group) == 0) + if (mark && mark->group == iter_info->current_group) { + /* + * FSNOTIFY_ITER_TYPE_PARENT indicates that this inode + * is watching children and interested in this event, + * which is an event possible on child. + * But is *this mark* watching children? + */ + if (type == FSNOTIFY_ITER_TYPE_PARENT && + !(mark->mask & FS_EVENT_ON_CHILD)) + continue; + fsnotify_iter_set_report_type(iter_info, type); + } } - return iter_info->report_mask; + return true; } /* - * Pop from iter_info multi head queue, the marks that were iterated in the + * Pop from iter_info multi head queue, the marks that belong to the group of * current iteration step. */ static void fsnotify_iter_next(struct fsnotify_iter_info *iter_info) { + struct fsnotify_mark *mark; int type; + /* + * We cannot use fsnotify_foreach_iter_mark_type() here because we + * may need to advance a mark of type X that belongs to current_group + * but was not selected for reporting. + */ fsnotify_foreach_iter_type(type) { - if (fsnotify_iter_should_report_type(iter_info, type)) + mark = iter_info->marks[type]; + if (mark && mark->group == iter_info->current_group) iter_info->marks[type] = fsnotify_next_mark(iter_info->marks[type]); } @@ -581,7 +584,7 @@ static __init int fsnotify_init(void) { int ret; - BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 25); + BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 23); ret = init_srcu_struct(&fsnotify_mark_srcu); if (ret) diff --git a/fs/notify/group.c b/fs/notify/group.c index b7d4d64f87c2..1de6631a3925 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c @@ -112,8 +112,10 @@ void fsnotify_put_group(struct fsnotify_group *group) EXPORT_SYMBOL_GPL(fsnotify_put_group); static struct fsnotify_group *__fsnotify_alloc_group( - const struct fsnotify_ops *ops, gfp_t gfp) + const struct fsnotify_ops *ops, + int flags, gfp_t gfp) { + static struct lock_class_key nofs_marks_lock; struct fsnotify_group *group; group = kzalloc(sizeof(struct fsnotify_group), gfp); @@ -133,6 +135,17 @@ static struct fsnotify_group *__fsnotify_alloc_group( INIT_LIST_HEAD(&group->marks_list); group->ops = ops; + group->flags = flags; + /* + * For most backends, eviction of inode with a mark is not expected, + * because marks hold a refcount on the inode against eviction. + * + * Use a different lockdep class for groups that support evictable + * inode marks, because with evictable marks, mark_mutex is NOT + * fs-reclaim safe - the mutex is taken when evicting inodes. + */ + if (flags & FSNOTIFY_GROUP_NOFS) + lockdep_set_class(&group->mark_mutex, &nofs_marks_lock); return group; } @@ -140,20 +153,15 @@ static struct fsnotify_group *__fsnotify_alloc_group( /* * Create a new fsnotify_group and hold a reference for the group returned. */ -struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops) +struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops, + int flags) { - return __fsnotify_alloc_group(ops, GFP_KERNEL); -} -EXPORT_SYMBOL_GPL(fsnotify_alloc_group); + gfp_t gfp = (flags & FSNOTIFY_GROUP_USER) ? GFP_KERNEL_ACCOUNT : + GFP_KERNEL; -/* - * Create a new fsnotify_group and hold a reference for the group returned. - */ -struct fsnotify_group *fsnotify_alloc_user_group(const struct fsnotify_ops *ops) -{ - return __fsnotify_alloc_group(ops, GFP_KERNEL_ACCOUNT); + return __fsnotify_alloc_group(ops, flags, gfp); } -EXPORT_SYMBOL_GPL(fsnotify_alloc_user_group); +EXPORT_SYMBOL_GPL(fsnotify_alloc_group); int fsnotify_fasync(int fd, struct file *file, int on) { diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h index 2007e3711916..7d5df7a21539 100644 --- a/fs/notify/inotify/inotify.h +++ b/fs/notify/inotify/inotify.h @@ -22,6 +22,25 @@ static inline struct inotify_event_info *INOTIFY_E(struct fsnotify_event *fse) return container_of(fse, struct inotify_event_info, fse); } +/* + * INOTIFY_USER_FLAGS represents all of the mask bits that we expose to + * userspace. There is at least one bit (FS_EVENT_ON_CHILD) which is + * used only internally to the kernel. + */ +#define INOTIFY_USER_MASK (IN_ALL_EVENTS) + +static inline __u32 inotify_mark_user_mask(struct fsnotify_mark *fsn_mark) +{ + __u32 mask = fsn_mark->mask & INOTIFY_USER_MASK; + + if (fsn_mark->flags & FSNOTIFY_MARK_FLAG_EXCL_UNLINK) + mask |= IN_EXCL_UNLINK; + if (fsn_mark->flags & FSNOTIFY_MARK_FLAG_IN_ONESHOT) + mask |= IN_ONESHOT; + + return mask; +} + extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, struct fsnotify_group *group); extern int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index d92d7b0adc9a..49cfe2ae6d23 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -122,7 +122,7 @@ int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask, fsnotify_destroy_event(group, fsn_event); } - if (inode_mark->mask & IN_ONESHOT) + if (inode_mark->flags & FSNOTIFY_MARK_FLAG_IN_ONESHOT) fsnotify_destroy_mark(inode_mark, group); return 0; diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 54583f62dc44..ed42a189faa2 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -110,11 +110,26 @@ static inline __u32 inotify_arg_to_mask(struct inode *inode, u32 arg) mask |= FS_EVENT_ON_CHILD; /* mask off the flags used to open the fd */ - mask |= (arg & (IN_ALL_EVENTS | IN_ONESHOT | IN_EXCL_UNLINK)); + mask |= (arg & INOTIFY_USER_MASK); return mask; } +#define INOTIFY_MARK_FLAGS \ + (FSNOTIFY_MARK_FLAG_EXCL_UNLINK | FSNOTIFY_MARK_FLAG_IN_ONESHOT) + +static inline unsigned int inotify_arg_to_flags(u32 arg) +{ + unsigned int flags = 0; + + if (arg & IN_EXCL_UNLINK) + flags |= FSNOTIFY_MARK_FLAG_EXCL_UNLINK; + if (arg & IN_ONESHOT) + flags |= FSNOTIFY_MARK_FLAG_IN_ONESHOT; + + return flags; +} + static inline u32 inotify_mask_to_arg(__u32 mask) { return mask & (IN_ALL_EVENTS | IN_ISDIR | IN_UNMOUNT | IN_IGNORED | @@ -526,13 +541,10 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, struct fsnotify_mark *fsn_mark; struct inotify_inode_mark *i_mark; __u32 old_mask, new_mask; - __u32 mask; - int add = (arg & IN_MASK_ADD); + int replace = !(arg & IN_MASK_ADD); int create = (arg & IN_MASK_CREATE); int ret; - mask = inotify_arg_to_mask(inode, arg); - fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group); if (!fsn_mark) return -ENOENT; @@ -545,10 +557,12 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, spin_lock(&fsn_mark->lock); old_mask = fsn_mark->mask; - if (add) - fsn_mark->mask |= mask; - else - fsn_mark->mask = mask; + if (replace) { + fsn_mark->mask = 0; + fsn_mark->flags &= ~INOTIFY_MARK_FLAGS; + } + fsn_mark->mask |= inotify_arg_to_mask(inode, arg); + fsn_mark->flags |= inotify_arg_to_flags(arg); new_mask = fsn_mark->mask; spin_unlock(&fsn_mark->lock); @@ -579,19 +593,17 @@ static int inotify_new_watch(struct fsnotify_group *group, u32 arg) { struct inotify_inode_mark *tmp_i_mark; - __u32 mask; int ret; struct idr *idr = &group->inotify_data.idr; spinlock_t *idr_lock = &group->inotify_data.idr_lock; - mask = inotify_arg_to_mask(inode, arg); - tmp_i_mark = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL); if (unlikely(!tmp_i_mark)) return -ENOMEM; fsnotify_init_mark(&tmp_i_mark->fsn_mark, group); - tmp_i_mark->fsn_mark.mask = mask; + tmp_i_mark->fsn_mark.mask = inotify_arg_to_mask(inode, arg); + tmp_i_mark->fsn_mark.flags = inotify_arg_to_flags(arg); tmp_i_mark->wd = -1; ret = inotify_add_to_idr(idr, idr_lock, tmp_i_mark); @@ -628,13 +640,13 @@ static int inotify_update_watch(struct fsnotify_group *group, struct inode *inod { int ret = 0; - mutex_lock(&group->mark_mutex); + fsnotify_group_lock(group); /* try to update and existing watch with the new arg */ ret = inotify_update_existing_watch(group, inode, arg); /* no mark present, try to add a new one */ if (ret == -ENOENT) ret = inotify_new_watch(group, inode, arg); - mutex_unlock(&group->mark_mutex); + fsnotify_group_unlock(group); return ret; } @@ -644,7 +656,8 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events) struct fsnotify_group *group; struct inotify_event_info *oevent; - group = fsnotify_alloc_user_group(&inotify_fsnotify_ops); + group = fsnotify_alloc_group(&inotify_fsnotify_ops, + FSNOTIFY_GROUP_USER); if (IS_ERR(group)) return group; @@ -845,9 +858,7 @@ static int __init inotify_user_setup(void) BUILD_BUG_ON(IN_UNMOUNT != FS_UNMOUNT); BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW); BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED); - BUILD_BUG_ON(IN_EXCL_UNLINK != FS_EXCL_UNLINK); BUILD_BUG_ON(IN_ISDIR != FS_ISDIR); - BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT); BUILD_BUG_ON(HWEIGHT32(ALL_INOTIFY_BITS) != 22); diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 4853184f7dde..c74ef947447d 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -116,20 +116,64 @@ __u32 fsnotify_conn_mask(struct fsnotify_mark_connector *conn) return *fsnotify_conn_mask_p(conn); } -static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) +static void fsnotify_get_inode_ref(struct inode *inode) +{ + ihold(inode); + atomic_long_inc(&inode->i_sb->s_fsnotify_connectors); +} + +/* + * Grab or drop inode reference for the connector if needed. + * + * When it's time to drop the reference, we only clear the HAS_IREF flag and + * return the inode object. fsnotify_drop_object() will be resonsible for doing + * iput() outside of spinlocks. This happens when last mark that wanted iref is + * detached. + */ +static struct inode *fsnotify_update_iref(struct fsnotify_mark_connector *conn, + bool want_iref) +{ + bool has_iref = conn->flags & FSNOTIFY_CONN_FLAG_HAS_IREF; + struct inode *inode = NULL; + + if (conn->type != FSNOTIFY_OBJ_TYPE_INODE || + want_iref == has_iref) + return NULL; + + if (want_iref) { + /* Pin inode if any mark wants inode refcount held */ + fsnotify_get_inode_ref(fsnotify_conn_inode(conn)); + conn->flags |= FSNOTIFY_CONN_FLAG_HAS_IREF; + } else { + /* Unpin inode after detach of last mark that wanted iref */ + inode = fsnotify_conn_inode(conn); + conn->flags &= ~FSNOTIFY_CONN_FLAG_HAS_IREF; + } + + return inode; +} + +static void *__fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) { u32 new_mask = 0; + bool want_iref = false; struct fsnotify_mark *mark; assert_spin_locked(&conn->lock); /* We can get detached connector here when inode is getting unlinked. */ if (!fsnotify_valid_obj_type(conn->type)) - return; + return NULL; hlist_for_each_entry(mark, &conn->list, obj_list) { - if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) - new_mask |= fsnotify_calc_mask(mark); + if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) + continue; + new_mask |= fsnotify_calc_mask(mark); + if (conn->type == FSNOTIFY_OBJ_TYPE_INODE && + !(mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF)) + want_iref = true; } *fsnotify_conn_mask_p(conn) = new_mask; + + return fsnotify_update_iref(conn, want_iref); } /* @@ -169,12 +213,6 @@ static void fsnotify_connector_destroy_workfn(struct work_struct *work) } } -static void fsnotify_get_inode_ref(struct inode *inode) -{ - ihold(inode); - atomic_long_inc(&inode->i_sb->s_fsnotify_connectors); -} - static void fsnotify_put_inode_ref(struct inode *inode) { struct super_block *sb = inode->i_sb; @@ -213,6 +251,10 @@ static void *fsnotify_detach_connector_from_object( if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) { inode = fsnotify_conn_inode(conn); inode->i_fsnotify_mask = 0; + + /* Unpin inode when detaching from connector */ + if (!(conn->flags & FSNOTIFY_CONN_FLAG_HAS_IREF)) + inode = NULL; } else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) { fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0; } else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) { @@ -274,7 +316,8 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) objp = fsnotify_detach_connector_from_object(conn, &type); free_conn = true; } else { - __fsnotify_recalc_mask(conn); + objp = __fsnotify_recalc_mask(conn); + type = conn->type; } WRITE_ONCE(mark->connector, NULL); spin_unlock(&conn->lock); @@ -398,9 +441,7 @@ void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info) */ void fsnotify_detach_mark(struct fsnotify_mark *mark) { - struct fsnotify_group *group = mark->group; - - WARN_ON_ONCE(!mutex_is_locked(&group->mark_mutex)); + fsnotify_group_assert_locked(mark->group); WARN_ON_ONCE(!srcu_read_lock_held(&fsnotify_mark_srcu) && refcount_read(&mark->refcnt) < 1 + !!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)); @@ -452,9 +493,9 @@ void fsnotify_free_mark(struct fsnotify_mark *mark) void fsnotify_destroy_mark(struct fsnotify_mark *mark, struct fsnotify_group *group) { - mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); + fsnotify_group_lock(group); fsnotify_detach_mark(mark); - mutex_unlock(&group->mark_mutex); + fsnotify_group_unlock(group); fsnotify_free_mark(mark); } EXPORT_SYMBOL_GPL(fsnotify_destroy_mark); @@ -499,7 +540,6 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, unsigned int obj_type, __kernel_fsid_t *fsid) { - struct inode *inode = NULL; struct fsnotify_mark_connector *conn; conn = kmem_cache_alloc(fsnotify_mark_connector_cachep, GFP_KERNEL); @@ -507,6 +547,7 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, return -ENOMEM; spin_lock_init(&conn->lock); INIT_HLIST_HEAD(&conn->list); + conn->flags = 0; conn->type = obj_type; conn->obj = connp; /* Cache fsid of filesystem containing the object */ @@ -517,10 +558,6 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, conn->fsid.val[0] = conn->fsid.val[1] = 0; conn->flags = 0; } - if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) { - inode = fsnotify_conn_inode(conn); - fsnotify_get_inode_ref(inode); - } fsnotify_get_sb_connectors(conn); /* @@ -529,8 +566,6 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, */ if (cmpxchg(connp, NULL, conn)) { /* Someone else created list structure for us */ - if (inode) - fsnotify_put_inode_ref(inode); fsnotify_put_sb_connectors(conn); kmem_cache_free(fsnotify_mark_connector_cachep, conn); } @@ -574,7 +609,7 @@ out: static int fsnotify_add_mark_list(struct fsnotify_mark *mark, fsnotify_connp_t *connp, unsigned int obj_type, - int allow_dups, __kernel_fsid_t *fsid) + int add_flags, __kernel_fsid_t *fsid) { struct fsnotify_mark *lmark, *last = NULL; struct fsnotify_mark_connector *conn; @@ -633,7 +668,7 @@ restart: if ((lmark->group == mark->group) && (lmark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) && - !allow_dups) { + !(mark->group->flags & FSNOTIFY_GROUP_DUPS)) { err = -EEXIST; goto out_err; } @@ -668,12 +703,12 @@ out_err: */ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, fsnotify_connp_t *connp, unsigned int obj_type, - int allow_dups, __kernel_fsid_t *fsid) + int add_flags, __kernel_fsid_t *fsid) { struct fsnotify_group *group = mark->group; int ret = 0; - BUG_ON(!mutex_is_locked(&group->mark_mutex)); + fsnotify_group_assert_locked(group); /* * LOCKING ORDER!!!! @@ -688,12 +723,11 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, fsnotify_get_mark(mark); /* for g_list */ spin_unlock(&mark->lock); - ret = fsnotify_add_mark_list(mark, connp, obj_type, allow_dups, fsid); + ret = fsnotify_add_mark_list(mark, connp, obj_type, add_flags, fsid); if (ret) goto err; - if (mark->mask || mark->ignored_mask) - fsnotify_recalc_mask(mark->connector); + fsnotify_recalc_mask(mark->connector); return ret; err: @@ -708,15 +742,15 @@ err: } int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp, - unsigned int obj_type, int allow_dups, + unsigned int obj_type, int add_flags, __kernel_fsid_t *fsid) { int ret; struct fsnotify_group *group = mark->group; - mutex_lock(&group->mark_mutex); - ret = fsnotify_add_mark_locked(mark, connp, obj_type, allow_dups, fsid); - mutex_unlock(&group->mark_mutex); + fsnotify_group_lock(group); + ret = fsnotify_add_mark_locked(mark, connp, obj_type, add_flags, fsid); + fsnotify_group_unlock(group); return ret; } EXPORT_SYMBOL_GPL(fsnotify_add_mark); @@ -770,24 +804,24 @@ void fsnotify_clear_marks_by_group(struct fsnotify_group *group, * move marks to free to to_free list in one go and then free marks in * to_free list one by one. */ - mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); + fsnotify_group_lock(group); list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) { if (mark->connector->type == obj_type) list_move(&mark->g_list, &to_free); } - mutex_unlock(&group->mark_mutex); + fsnotify_group_unlock(group); clear: while (1) { - mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); + fsnotify_group_lock(group); if (list_empty(head)) { - mutex_unlock(&group->mark_mutex); + fsnotify_group_unlock(group); break; } mark = list_first_entry(head, struct fsnotify_mark, g_list); fsnotify_get_mark(mark); fsnotify_detach_mark(mark); - mutex_unlock(&group->mark_mutex); + fsnotify_group_unlock(group); fsnotify_free_mark(mark); fsnotify_put_mark(mark); } diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 419cadcd7ff5..edc28555814c 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -66,6 +66,7 @@ FAN_MARK_ONLYDIR | \ FAN_MARK_IGNORED_MASK | \ FAN_MARK_IGNORED_SURV_MODIFY | \ + FAN_MARK_EVICTABLE | \ FAN_MARK_FLUSH) /* diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 0805b74cae44..9560734759fa 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -20,6 +20,7 @@ #include <linux/user_namespace.h> #include <linux/refcount.h> #include <linux/mempool.h> +#include <linux/sched/mm.h> /* * IN_* from inotfy.h lines up EXACTLY with FS_*, this is so we can easily @@ -55,7 +56,6 @@ #define FS_ACCESS_PERM 0x00020000 /* access event in a permissions hook */ #define FS_OPEN_EXEC_PERM 0x00040000 /* open/exec event in a permission hook */ -#define FS_EXCL_UNLINK 0x04000000 /* do not send events if object is unlinked */ /* * Set on inode mark that cares about things that happen to its children. * Always set for dnotify and inotify. @@ -66,7 +66,6 @@ #define FS_RENAME 0x10000000 /* File was renamed */ #define FS_DN_MULTISHOT 0x20000000 /* dnotify multishot */ #define FS_ISDIR 0x40000000 /* event occurred against dir */ -#define FS_IN_ONESHOT 0x80000000 /* only send event once */ #define FS_MOVE (FS_MOVED_FROM | FS_MOVED_TO) @@ -106,8 +105,7 @@ FS_ERROR) /* Extra flags that may be reported with event or control handling of events */ -#define ALL_FSNOTIFY_FLAGS (FS_EXCL_UNLINK | FS_ISDIR | FS_IN_ONESHOT | \ - FS_DN_MULTISHOT | FS_EVENT_ON_CHILD) +#define ALL_FSNOTIFY_FLAGS (FS_ISDIR | FS_EVENT_ON_CHILD | FS_DN_MULTISHOT) #define ALL_FSNOTIFY_BITS (ALL_FSNOTIFY_EVENTS | ALL_FSNOTIFY_FLAGS) @@ -213,6 +211,12 @@ struct fsnotify_group { unsigned int priority; bool shutdown; /* group is being shut down, don't queue more events */ +#define FSNOTIFY_GROUP_USER 0x01 /* user allocated group */ +#define FSNOTIFY_GROUP_DUPS 0x02 /* allow multiple marks per object */ +#define FSNOTIFY_GROUP_NOFS 0x04 /* group lock is not direct reclaim safe */ + int flags; + unsigned int owner_flags; /* stored flags of mark_mutex owner */ + /* stores all fastpath marks assoc with this group so they can be cleaned on unregister */ struct mutex mark_mutex; /* protect marks_list */ atomic_t user_waits; /* Number of tasks waiting for user @@ -253,6 +257,31 @@ struct fsnotify_group { }; }; +/* + * These helpers are used to prevent deadlock when reclaiming inodes with + * evictable marks of the same group that is allocating a new mark. + */ +static inline void fsnotify_group_lock(struct fsnotify_group *group) +{ + mutex_lock(&group->mark_mutex); + if (group->flags & FSNOTIFY_GROUP_NOFS) + group->owner_flags = memalloc_nofs_save(); +} + +static inline void fsnotify_group_unlock(struct fsnotify_group *group) +{ + if (group->flags & FSNOTIFY_GROUP_NOFS) + memalloc_nofs_restore(group->owner_flags); + mutex_unlock(&group->mark_mutex); +} + +static inline void fsnotify_group_assert_locked(struct fsnotify_group *group) +{ + WARN_ON_ONCE(!mutex_is_locked(&group->mark_mutex)); + if (group->flags & FSNOTIFY_GROUP_NOFS) + WARN_ON_ONCE(!(current->flags & PF_MEMALLOC_NOFS)); +} + /* When calling fsnotify tell it if the data is a path or inode */ enum fsnotify_data_type { FSNOTIFY_EVENT_NONE, @@ -370,6 +399,7 @@ static inline bool fsnotify_valid_obj_type(unsigned int obj_type) struct fsnotify_iter_info { struct fsnotify_mark *marks[FSNOTIFY_ITER_TYPE_COUNT]; + struct fsnotify_group *current_group; unsigned int report_mask; int srcu_idx; }; @@ -386,20 +416,31 @@ static inline void fsnotify_iter_set_report_type( iter_info->report_mask |= (1U << iter_type); } -static inline void fsnotify_iter_set_report_type_mark( - struct fsnotify_iter_info *iter_info, int iter_type, - struct fsnotify_mark *mark) +static inline struct fsnotify_mark *fsnotify_iter_mark( + struct fsnotify_iter_info *iter_info, int iter_type) { - iter_info->marks[iter_type] = mark; - iter_info->report_mask |= (1U << iter_type); + if (fsnotify_iter_should_report_type(iter_info, iter_type)) + return iter_info->marks[iter_type]; + return NULL; +} + +static inline int fsnotify_iter_step(struct fsnotify_iter_info *iter, int type, + struct fsnotify_mark **markp) +{ + while (type < FSNOTIFY_ITER_TYPE_COUNT) { + *markp = fsnotify_iter_mark(iter, type); + if (*markp) + break; + type++; + } + return type; } #define FSNOTIFY_ITER_FUNCS(name, NAME) \ static inline struct fsnotify_mark *fsnotify_iter_##name##_mark( \ struct fsnotify_iter_info *iter_info) \ { \ - return (iter_info->report_mask & (1U << FSNOTIFY_ITER_TYPE_##NAME)) ? \ - iter_info->marks[FSNOTIFY_ITER_TYPE_##NAME] : NULL; \ + return fsnotify_iter_mark(iter_info, FSNOTIFY_ITER_TYPE_##NAME); \ } FSNOTIFY_ITER_FUNCS(inode, INODE) @@ -409,6 +450,11 @@ FSNOTIFY_ITER_FUNCS(sb, SB) #define fsnotify_foreach_iter_type(type) \ for (type = 0; type < FSNOTIFY_ITER_TYPE_COUNT; type++) +#define fsnotify_foreach_iter_mark_type(iter, mark, type) \ + for (type = 0; \ + type = fsnotify_iter_step(iter, type, &mark), \ + type < FSNOTIFY_ITER_TYPE_COUNT; \ + type++) /* * fsnotify_connp_t is what we embed in objects which connector can be attached @@ -427,6 +473,7 @@ struct fsnotify_mark_connector { spinlock_t lock; unsigned short type; /* Type of object [lock] */ #define FSNOTIFY_CONN_FLAG_HAS_FSID 0x01 +#define FSNOTIFY_CONN_FLAG_HAS_IREF 0x02 unsigned short flags; /* flags [lock] */ __kernel_fsid_t fsid; /* fsid of filesystem containing object */ union { @@ -473,9 +520,15 @@ struct fsnotify_mark { struct fsnotify_mark_connector *connector; /* Events types to ignore [mark->lock, group->mark_mutex] */ __u32 ignored_mask; -#define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY 0x01 -#define FSNOTIFY_MARK_FLAG_ALIVE 0x02 -#define FSNOTIFY_MARK_FLAG_ATTACHED 0x04 + /* General fsnotify mark flags */ +#define FSNOTIFY_MARK_FLAG_ALIVE 0x0001 +#define FSNOTIFY_MARK_FLAG_ATTACHED 0x0002 + /* inotify mark flags */ +#define FSNOTIFY_MARK_FLAG_EXCL_UNLINK 0x0010 +#define FSNOTIFY_MARK_FLAG_IN_ONESHOT 0x0020 + /* fanotify mark flags */ +#define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY 0x0100 +#define FSNOTIFY_MARK_FLAG_NO_IREF 0x0200 unsigned int flags; /* flags [mark->lock] */ }; @@ -541,8 +594,9 @@ static inline void fsnotify_update_flags(struct dentry *dentry) /* called from fsnotify listeners, such as fanotify or dnotify */ /* create a new group */ -extern struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops); -extern struct fsnotify_group *fsnotify_alloc_user_group(const struct fsnotify_ops *ops); +extern struct fsnotify_group *fsnotify_alloc_group( + const struct fsnotify_ops *ops, + int flags); /* get reference to a group */ extern void fsnotify_get_group(struct fsnotify_group *group); /* drop reference on a group from fsnotify_alloc_group */ @@ -635,26 +689,26 @@ extern int fsnotify_get_conn_fsid(const struct fsnotify_mark_connector *conn, /* attach the mark to the object */ extern int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp, unsigned int obj_type, - int allow_dups, __kernel_fsid_t *fsid); + int add_flags, __kernel_fsid_t *fsid); extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark, fsnotify_connp_t *connp, - unsigned int obj_type, int allow_dups, + unsigned int obj_type, int add_flags, __kernel_fsid_t *fsid); /* attach the mark to the inode */ static inline int fsnotify_add_inode_mark(struct fsnotify_mark *mark, struct inode *inode, - int allow_dups) + int add_flags) { return fsnotify_add_mark(mark, &inode->i_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_INODE, allow_dups, NULL); + FSNOTIFY_OBJ_TYPE_INODE, add_flags, NULL); } static inline int fsnotify_add_inode_mark_locked(struct fsnotify_mark *mark, struct inode *inode, - int allow_dups) + int add_flags) { return fsnotify_add_mark_locked(mark, &inode->i_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_INODE, allow_dups, + FSNOTIFY_OBJ_TYPE_INODE, add_flags, NULL); } diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index e8ac38cc2fd6..f1f89132d60e 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -82,6 +82,7 @@ #define FAN_MARK_IGNORED_SURV_MODIFY 0x00000040 #define FAN_MARK_FLUSH 0x00000080 /* FAN_MARK_FILESYSTEM is 0x00000100 */ +#define FAN_MARK_EVICTABLE 0x00000200 /* These are NOT bitwise flags. Both bits can be used togther. */ #define FAN_MARK_INODE 0x00000000 diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c index 02348b48447c..6432a37ac1c9 100644 --- a/kernel/audit_fsnotify.c +++ b/kernel/audit_fsnotify.c @@ -100,7 +100,7 @@ struct audit_fsnotify_mark *audit_alloc_mark(struct audit_krule *krule, char *pa audit_update_mark(audit_mark, dentry->d_inode); audit_mark->rule = krule; - ret = fsnotify_add_inode_mark(&audit_mark->mark, inode, true); + ret = fsnotify_add_inode_mark(&audit_mark->mark, inode, 0); if (ret < 0) { fsnotify_put_mark(&audit_mark->mark); audit_mark = ERR_PTR(ret); @@ -181,7 +181,8 @@ static const struct fsnotify_ops audit_mark_fsnotify_ops = { static int __init audit_fsnotify_init(void) { - audit_fsnotify_group = fsnotify_alloc_group(&audit_mark_fsnotify_ops); + audit_fsnotify_group = fsnotify_alloc_group(&audit_mark_fsnotify_ops, + FSNOTIFY_GROUP_DUPS); if (IS_ERR(audit_fsnotify_group)) { audit_fsnotify_group = NULL; audit_panic("cannot create audit fsnotify group"); diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index e7315d487163..e867c17d3f84 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -351,7 +351,7 @@ static void untag_chunk(struct audit_chunk *chunk, struct fsnotify_mark *mark) struct audit_chunk *new; int size; - mutex_lock(&audit_tree_group->mark_mutex); + fsnotify_group_lock(audit_tree_group); /* * mark_mutex stabilizes chunk attached to the mark so we can check * whether it didn't change while we've dropped hash_lock. @@ -368,7 +368,7 @@ static void untag_chunk(struct audit_chunk *chunk, struct fsnotify_mark *mark) replace_mark_chunk(mark, NULL); spin_unlock(&hash_lock); fsnotify_detach_mark(mark); - mutex_unlock(&audit_tree_group->mark_mutex); + fsnotify_group_unlock(audit_tree_group); audit_mark_put_chunk(chunk); fsnotify_free_mark(mark); return; @@ -385,12 +385,12 @@ static void untag_chunk(struct audit_chunk *chunk, struct fsnotify_mark *mark) */ replace_chunk(new, chunk); spin_unlock(&hash_lock); - mutex_unlock(&audit_tree_group->mark_mutex); + fsnotify_group_unlock(audit_tree_group); audit_mark_put_chunk(chunk); return; out_mutex: - mutex_unlock(&audit_tree_group->mark_mutex); + fsnotify_group_unlock(audit_tree_group); } /* Call with group->mark_mutex held, releases it */ @@ -400,19 +400,19 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree) struct audit_chunk *chunk = alloc_chunk(1); if (!chunk) { - mutex_unlock(&audit_tree_group->mark_mutex); + fsnotify_group_unlock(audit_tree_group); return -ENOMEM; } mark = alloc_mark(); if (!mark) { - mutex_unlock(&audit_tree_group->mark_mutex); + fsnotify_group_unlock(audit_tree_group); kfree(chunk); return -ENOMEM; } if (fsnotify_add_inode_mark_locked(mark, inode, 0)) { - mutex_unlock(&audit_tree_group->mark_mutex); + fsnotify_group_unlock(audit_tree_group); fsnotify_put_mark(mark); kfree(chunk); return -ENOSPC; @@ -422,7 +422,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree) if (tree->goner) { spin_unlock(&hash_lock); fsnotify_detach_mark(mark); - mutex_unlock(&audit_tree_group->mark_mutex); + fsnotify_group_unlock(audit_tree_group); fsnotify_free_mark(mark); fsnotify_put_mark(mark); kfree(chunk); @@ -444,7 +444,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree) */ insert_hash(chunk); spin_unlock(&hash_lock); - mutex_unlock(&audit_tree_group->mark_mutex); + fsnotify_group_unlock(audit_tree_group); /* * Drop our initial reference. When mark we point to is getting freed, * we get notification through ->freeing_mark callback and cleanup @@ -462,7 +462,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) struct audit_node *p; int n; - mutex_lock(&audit_tree_group->mark_mutex); + fsnotify_group_lock(audit_tree_group); mark = fsnotify_find_mark(&inode->i_fsnotify_marks, audit_tree_group); if (!mark) return create_chunk(inode, tree); @@ -478,7 +478,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) for (n = 0; n < old->count; n++) { if (old->owners[n].owner == tree) { spin_unlock(&hash_lock); - mutex_unlock(&audit_tree_group->mark_mutex); + fsnotify_group_unlock(audit_tree_group); fsnotify_put_mark(mark); return 0; } @@ -487,7 +487,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) chunk = alloc_chunk(old->count + 1); if (!chunk) { - mutex_unlock(&audit_tree_group->mark_mutex); + fsnotify_group_unlock(audit_tree_group); fsnotify_put_mark(mark); return -ENOMEM; } @@ -495,7 +495,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) spin_lock(&hash_lock); if (tree->goner) { spin_unlock(&hash_lock); - mutex_unlock(&audit_tree_group->mark_mutex); + fsnotify_group_unlock(audit_tree_group); fsnotify_put_mark(mark); kfree(chunk); return 0; @@ -515,7 +515,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) */ replace_chunk(chunk, old); spin_unlock(&hash_lock); - mutex_unlock(&audit_tree_group->mark_mutex); + fsnotify_group_unlock(audit_tree_group); fsnotify_put_mark(mark); /* pair to fsnotify_find_mark */ audit_mark_put_chunk(old); @@ -1044,12 +1044,12 @@ static void audit_tree_freeing_mark(struct fsnotify_mark *mark, { struct audit_chunk *chunk; - mutex_lock(&mark->group->mark_mutex); + fsnotify_group_lock(mark->group); spin_lock(&hash_lock); chunk = mark_chunk(mark); replace_mark_chunk(mark, NULL); spin_unlock(&hash_lock); - mutex_unlock(&mark->group->mark_mutex); + fsnotify_group_unlock(mark->group); if (chunk) { evict_chunk(chunk); audit_mark_put_chunk(chunk); @@ -1074,7 +1074,7 @@ static int __init audit_tree_init(void) audit_tree_mark_cachep = KMEM_CACHE(audit_tree_mark, SLAB_PANIC); - audit_tree_group = fsnotify_alloc_group(&audit_tree_ops); + audit_tree_group = fsnotify_alloc_group(&audit_tree_ops, 0); if (IS_ERR(audit_tree_group)) audit_panic("cannot initialize fsnotify group for rectree watches"); diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 713b256be944..4b0957aa2cd4 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -493,7 +493,7 @@ static const struct fsnotify_ops audit_watch_fsnotify_ops = { static int __init audit_watch_init(void) { - audit_watch_group = fsnotify_alloc_group(&audit_watch_fsnotify_ops); + audit_watch_group = fsnotify_alloc_group(&audit_watch_fsnotify_ops, 0); if (IS_ERR(audit_watch_group)) { audit_watch_group = NULL; audit_panic("cannot create audit fsnotify group"); |