diff options
Diffstat (limited to 'fs/notify/fanotify/fanotify.c')
-rw-r--r-- | fs/notify/fanotify/fanotify.c | 166 |
1 files changed, 119 insertions, 47 deletions
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 1192c9953620..057abd2cf887 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -14,6 +14,7 @@ #include <linux/audit.h> #include <linux/sched/mm.h> #include <linux/statfs.h> +#include <linux/stringhash.h> #include "fanotify.h" @@ -22,12 +23,24 @@ static bool fanotify_path_equal(struct path *p1, struct path *p2) return p1->mnt == p2->mnt && p1->dentry == p2->dentry; } +static unsigned int fanotify_hash_path(const struct path *path) +{ + return hash_ptr(path->dentry, FANOTIFY_EVENT_HASH_BITS) ^ + hash_ptr(path->mnt, FANOTIFY_EVENT_HASH_BITS); +} + static inline bool fanotify_fsid_equal(__kernel_fsid_t *fsid1, __kernel_fsid_t *fsid2) { return fsid1->val[0] == fsid2->val[0] && fsid1->val[1] == fsid2->val[1]; } +static unsigned int fanotify_hash_fsid(__kernel_fsid_t *fsid) +{ + return hash_32(fsid->val[0], FANOTIFY_EVENT_HASH_BITS) ^ + hash_32(fsid->val[1], FANOTIFY_EVENT_HASH_BITS); +} + static bool fanotify_fh_equal(struct fanotify_fh *fh1, struct fanotify_fh *fh2) { @@ -38,6 +51,16 @@ static bool fanotify_fh_equal(struct fanotify_fh *fh1, !memcmp(fanotify_fh_buf(fh1), fanotify_fh_buf(fh2), fh1->len); } +static unsigned int fanotify_hash_fh(struct fanotify_fh *fh) +{ + long salt = (long)fh->type | (long)fh->len << 8; + + /* + * full_name_hash() works long by long, so it handles fh buf optimally. + */ + return full_name_hash((void *)salt, fanotify_fh_buf(fh), fh->len); +} + static bool fanotify_fid_event_equal(struct fanotify_fid_event *ffe1, struct fanotify_fid_event *ffe2) { @@ -88,16 +111,12 @@ static bool fanotify_name_event_equal(struct fanotify_name_event *fne1, return fanotify_info_equal(info1, info2); } -static bool fanotify_should_merge(struct fsnotify_event *old_fsn, - struct fsnotify_event *new_fsn) +static bool fanotify_should_merge(struct fanotify_event *old, + struct fanotify_event *new) { - struct fanotify_event *old, *new; - - pr_debug("%s: old=%p new=%p\n", __func__, old_fsn, new_fsn); - old = FANOTIFY_E(old_fsn); - new = FANOTIFY_E(new_fsn); + pr_debug("%s: old=%p new=%p\n", __func__, old, new); - if (old_fsn->objectid != new_fsn->objectid || + if (old->hash != new->hash || old->type != new->type || old->pid != new->pid) return false; @@ -129,14 +148,20 @@ static bool fanotify_should_merge(struct fsnotify_event *old_fsn, return false; } +/* Limit event merges to limit CPU overhead per event */ +#define FANOTIFY_MAX_MERGE_EVENTS 128 + /* and the list better be locked by something too! */ -static int fanotify_merge(struct list_head *list, struct fsnotify_event *event) +static int fanotify_merge(struct fsnotify_group *group, + struct fsnotify_event *event) { - struct fsnotify_event *test_event; - struct fanotify_event *new; + struct fanotify_event *old, *new = FANOTIFY_E(event); + unsigned int bucket = fanotify_event_hash_bucket(group, new); + struct hlist_head *hlist = &group->fanotify_data.merge_hash[bucket]; + int i = 0; - pr_debug("%s: list=%p event=%p\n", __func__, list, event); - new = FANOTIFY_E(event); + pr_debug("%s: group=%p event=%p bucket=%u\n", __func__, + group, event, bucket); /* * Don't merge a permission event with any other event so that we know @@ -146,9 +171,11 @@ static int fanotify_merge(struct list_head *list, struct fsnotify_event *event) if (fanotify_is_perm_event(new->mask)) return 0; - list_for_each_entry_reverse(test_event, list, list) { - if (fanotify_should_merge(test_event, event)) { - FANOTIFY_E(test_event)->mask |= new->mask; + hlist_for_each_entry(old, hlist, merge_list) { + if (++i > FANOTIFY_MAX_MERGE_EVENTS) + break; + if (fanotify_should_merge(old, new)) { + old->mask |= new->mask; return 1; } } @@ -184,8 +211,11 @@ static int fanotify_get_response(struct fsnotify_group *group, return ret; } /* Event not yet reported? Just remove it. */ - if (event->state == FAN_EVENT_INIT) + if (event->state == FAN_EVENT_INIT) { fsnotify_remove_queued_event(group, &event->fae.fse); + /* Permission events are not supposed to be hashed */ + WARN_ON_ONCE(!hlist_unhashed(&event->fae.merge_list)); + } /* * Event may be also answered in case signal delivery raced * with wakeup. In that case we have nothing to do besides @@ -329,7 +359,8 @@ static int fanotify_encode_fh_len(struct inode *inode) * Return 0 on failure to encode. */ static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode, - unsigned int fh_len, gfp_t gfp) + unsigned int fh_len, unsigned int *hash, + gfp_t gfp) { int dwords, type = 0; char *ext_buf = NULL; @@ -372,6 +403,9 @@ static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode, fh->type = type; fh->len = fh_len; + /* Mix fh into event merge key */ + *hash ^= fanotify_hash_fh(fh); + return FANOTIFY_FH_HDR_LEN + fh_len; out_err: @@ -425,6 +459,7 @@ static struct inode *fanotify_dfid_inode(u32 event_mask, const void *data, } static struct fanotify_event *fanotify_alloc_path_event(const struct path *path, + unsigned int *hash, gfp_t gfp) { struct fanotify_path_event *pevent; @@ -435,6 +470,7 @@ static struct fanotify_event *fanotify_alloc_path_event(const struct path *path, pevent->fae.type = FANOTIFY_EVENT_TYPE_PATH; pevent->path = *path; + *hash ^= fanotify_hash_path(path); path_get(path); return &pevent->fae; @@ -460,6 +496,7 @@ static struct fanotify_event *fanotify_alloc_perm_event(const struct path *path, static struct fanotify_event *fanotify_alloc_fid_event(struct inode *id, __kernel_fsid_t *fsid, + unsigned int *hash, gfp_t gfp) { struct fanotify_fid_event *ffe; @@ -470,16 +507,18 @@ static struct fanotify_event *fanotify_alloc_fid_event(struct inode *id, ffe->fae.type = FANOTIFY_EVENT_TYPE_FID; ffe->fsid = *fsid; + *hash ^= fanotify_hash_fsid(fsid); fanotify_encode_fh(&ffe->object_fh, id, fanotify_encode_fh_len(id), - gfp); + hash, gfp); return &ffe->fae; } static struct fanotify_event *fanotify_alloc_name_event(struct inode *id, __kernel_fsid_t *fsid, - const struct qstr *file_name, + const struct qstr *name, struct inode *child, + unsigned int *hash, gfp_t gfp) { struct fanotify_name_event *fne; @@ -492,24 +531,30 @@ static struct fanotify_event *fanotify_alloc_name_event(struct inode *id, size = sizeof(*fne) + FANOTIFY_FH_HDR_LEN + dir_fh_len; if (child_fh_len) size += FANOTIFY_FH_HDR_LEN + child_fh_len; - if (file_name) - size += file_name->len + 1; + if (name) + size += name->len + 1; fne = kmalloc(size, gfp); if (!fne) return NULL; fne->fae.type = FANOTIFY_EVENT_TYPE_FID_NAME; fne->fsid = *fsid; + *hash ^= fanotify_hash_fsid(fsid); info = &fne->info; fanotify_info_init(info); dfh = fanotify_info_dir_fh(info); - info->dir_fh_totlen = fanotify_encode_fh(dfh, id, dir_fh_len, 0); + info->dir_fh_totlen = fanotify_encode_fh(dfh, id, dir_fh_len, hash, 0); if (child_fh_len) { ffh = fanotify_info_file_fh(info); - info->file_fh_totlen = fanotify_encode_fh(ffh, child, child_fh_len, 0); + info->file_fh_totlen = fanotify_encode_fh(ffh, child, + child_fh_len, hash, 0); + } + if (name) { + long salt = name->len; + + fanotify_info_copy_name(info, name); + *hash ^= full_name_hash((void *)salt, name->name, name->len); } - if (file_name) - fanotify_info_copy_name(info, file_name); pr_debug("%s: ino=%lu size=%u dir_fh_len=%u child_fh_len=%u name_len=%u name='%.*s'\n", __func__, id->i_ino, size, dir_fh_len, child_fh_len, @@ -533,6 +578,9 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, struct mem_cgroup *old_memcg; struct inode *child = NULL; bool name_event = false; + unsigned int hash = 0; + bool ondir = mask & FAN_ONDIR; + struct pid *pid; if ((fid_mode & FAN_REPORT_DIR_FID) && dirid) { /* @@ -540,8 +588,7 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, * report the child fid for events reported on a non-dir child * in addition to reporting the parent fid and maybe child name. */ - if ((fid_mode & FAN_REPORT_FID) && - id != dirid && !(mask & FAN_ONDIR)) + if ((fid_mode & FAN_REPORT_FID) && id != dirid && !ondir) child = id; id = dirid; @@ -562,8 +609,7 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, if (!(fid_mode & FAN_REPORT_NAME)) { name_event = !!child; file_name = NULL; - } else if ((mask & ALL_FSNOTIFY_DIRENT_EVENTS) || - !(mask & FAN_ONDIR)) { + } else if ((mask & ALL_FSNOTIFY_DIRENT_EVENTS) || !ondir) { name_event = true; } } @@ -586,26 +632,25 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, event = fanotify_alloc_perm_event(path, gfp); } else if (name_event && (file_name || child)) { event = fanotify_alloc_name_event(id, fsid, file_name, child, - gfp); + &hash, gfp); } else if (fid_mode) { - event = fanotify_alloc_fid_event(id, fsid, gfp); + event = fanotify_alloc_fid_event(id, fsid, &hash, gfp); } else { - event = fanotify_alloc_path_event(path, gfp); + event = fanotify_alloc_path_event(path, &hash, gfp); } if (!event) goto out; - /* - * Use the victim inode instead of the watching inode as the id for - * event queue, so event reported on parent is merged with event - * reported on child when both directory and child watches exist. - */ - fanotify_init_event(event, (unsigned long)id, mask); if (FAN_GROUP_FLAG(group, FAN_REPORT_TID)) - event->pid = get_pid(task_pid(current)); + pid = get_pid(task_pid(current)); else - event->pid = get_pid(task_tgid(current)); + pid = get_pid(task_tgid(current)); + + /* Mix event info, FAN_ONDIR flag and pid into event merge key */ + hash ^= hash_long((unsigned long)pid | ondir, FANOTIFY_EVENT_HASH_BITS); + fanotify_init_event(event, hash, mask); + event->pid = pid; out: set_active_memcg(old_memcg); @@ -645,6 +690,24 @@ static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info) return fsid; } +/* + * Add an event to hash table for faster merge. + */ +static void fanotify_insert_event(struct fsnotify_group *group, + struct fsnotify_event *fsn_event) +{ + struct fanotify_event *event = FANOTIFY_E(fsn_event); + unsigned int bucket = fanotify_event_hash_bucket(group, event); + struct hlist_head *hlist = &group->fanotify_data.merge_hash[bucket]; + + assert_spin_locked(&group->notification_lock); + + pr_debug("%s: group=%p event=%p bucket=%u\n", __func__, + group, event, bucket); + + hlist_add_head(&event->merge_list, hlist); +} + static int fanotify_handle_event(struct fsnotify_group *group, u32 mask, const void *data, int data_type, struct inode *dir, @@ -715,7 +778,9 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask, } fsn_event = &event->fse; - ret = fsnotify_add_event(group, fsn_event, fanotify_merge); + ret = fsnotify_add_event(group, fsn_event, fanotify_merge, + fanotify_is_hashed_event(mask) ? + fanotify_insert_event : NULL); if (ret) { /* Permission events shouldn't be merged */ BUG_ON(ret == 1 && mask & FANOTIFY_PERM_EVENTS); @@ -736,11 +801,10 @@ finish: static void fanotify_free_group_priv(struct fsnotify_group *group) { - struct user_struct *user; - - user = group->fanotify_data.user; - atomic_dec(&user->fanotify_listeners); - free_uid(user); + kfree(group->fanotify_data.merge_hash); + if (group->fanotify_data.ucounts) + dec_ucount(group->fanotify_data.ucounts, + UCOUNT_FANOTIFY_GROUPS); } static void fanotify_free_path_event(struct fanotify_event *event) @@ -796,6 +860,13 @@ static void fanotify_free_event(struct fsnotify_event *fsn_event) } } +static void fanotify_freeing_mark(struct fsnotify_mark *mark, + struct fsnotify_group *group) +{ + if (!FAN_GROUP_FLAG(group, FAN_UNLIMITED_MARKS)) + dec_ucount(group->fanotify_data.ucounts, UCOUNT_FANOTIFY_MARKS); +} + static void fanotify_free_mark(struct fsnotify_mark *fsn_mark) { kmem_cache_free(fanotify_mark_cache, fsn_mark); @@ -805,5 +876,6 @@ const struct fsnotify_ops fanotify_fsnotify_ops = { .handle_event = fanotify_handle_event, .free_group_priv = fanotify_free_group_priv, .free_event = fanotify_free_event, + .freeing_mark = fanotify_freeing_mark, .free_mark = fanotify_free_mark, }; |