diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-06-26 09:50:21 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-06-26 09:50:21 -0700 |
commit | 64bf6ae93e08787f4a6db8dddf671fd3a9c43916 (patch) | |
tree | 120d7fe35d953aeb31b1fd6bdcceeab223f89c92 /include/linux | |
parent | 5c1c88cddb79d3ed3fb1d02a3eaf529eded76f05 (diff) | |
parent | 2507135e4ff231a368eae38000a501da0b96c662 (diff) |
Merge tag 'v6.5/vfs.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull misc vfs updates from Christian Brauner:
"Miscellaneous features, cleanups, and fixes for vfs and individual fs
Features:
- Use mode 0600 for file created by cachefilesd so it can be run by
unprivileged users. This aligns them with directories which are
already created with mode 0700 by cachefilesd
- Reorder a few members in struct file to prevent some false sharing
scenarios
- Indicate that an eventfd is used a semaphore in the eventfd's
fdinfo procfs file
- Add a missing uapi header for eventfd exposing relevant uapi
defines
- Let the VFS protect transitions of a superblock from read-only to
read-write in addition to the protection it already provides for
transitions from read-write to read-only. Protecting read-only to
read-write transitions allows filesystems such as ext4 to perform
internal writes, keeping writers away until the transition is
completed
Cleanups:
- Arnd removed the architecture specific arch_report_meminfo()
prototypes and added a generic one into procfs.h. Note, we got a
report about a warning in amdpgpu codepaths that suggested this was
bisectable to this change but we concluded it was a false positive
- Remove unused parameters from split_fs_names()
- Rename put_and_unmap_page() to unmap_and_put_page() to let the name
reflect the order of the cleanup operation that has to unmap before
the actual put
- Unexport buffer_check_dirty_writeback() as it is not used outside
of block device aops
- Stop allocating aio rings from highmem
- Protecting read-{only,write} transitions in the VFS used open-coded
barriers in various places. Replace them with proper little helpers
and document both the helpers and all barrier interactions involved
when transitioning between read-{only,write} states
- Use flexible array members in old readdir codepaths
Fixes:
- Use the correct type __poll_t for epoll and eventfd
- Replace all deprecated strlcpy() invocations, whose return value
isn't checked with an equivalent strscpy() call
- Fix some kernel-doc warnings in fs/open.c
- Reduce the stack usage in jffs2's xattr codepaths finally getting
rid of this: fs/jffs2/xattr.c:887:1: error: the frame size of 1088
bytes is larger than 1024 bytes [-Werror=frame-larger-than=]
royally annoying compilation warning
- Use __FMODE_NONOTIFY instead of FMODE_NONOTIFY where an int and not
fmode_t is required to avoid fmode_t to integer degradation
warnings
- Create coredumps with O_WRONLY instead of O_RDWR. There's a long
explanation in that commit how O_RDWR is actually a bug which we
found out with the help of Linus and git archeology
- Fix "no previous prototype" warnings in the pipe codepaths
- Add overflow calculations for remap_verify_area() as a signed
addition overflow could be triggered in xfstests
- Fix a null pointer dereference in sysv
- Use an unsigned variable for length calculations in jfs avoiding
compilation warnings with gcc 13
- Fix a dangling pipe pointer in the watch queue codepath
- The legacy mount option parser provided as a fallback by the VFS
for filesystems not yet converted to the new mount api did prefix
the generated mount option string with a leading ',' causing issues
for some filesystems
- Fix a repeated word in a comment in fs.h
- autofs: Update the ctime when mtime is updated as mandated by
POSIX"
* tag 'v6.5/vfs.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (27 commits)
readdir: Replace one-element arrays with flexible-array members
fs: Provide helpers for manipulating sb->s_readonly_remount
fs: Protect reconfiguration of sb read-write from racing writes
eventfd: add a uapi header for eventfd userspace APIs
autofs: set ctime as well when mtime changes on a dir
eventfd: show the EFD_SEMAPHORE flag in fdinfo
fs/aio: Stop allocating aio rings from HIGHMEM
fs: Fix comment typo
fs: unexport buffer_check_dirty_writeback
fs: avoid empty option when generating legacy mount string
watch_queue: prevent dangling pipe pointer
fs.h: Optimize file struct to prevent false sharing
highmem: Rename put_and_unmap_page() to unmap_and_put_page()
cachefiles: Allow the cache to be non-root
init: remove unused names parameter in split_fs_names()
jfs: Use unsigned variable for length calculations
fs/sysv: Null check to prevent null-ptr-deref bug
fs: use UB-safe check for signed addition overflow in remap_verify_area
procfs: consolidate arch_report_meminfo declaration
fs: pipe: reveal missing function protoypes
...
Diffstat (limited to 'include/linux')
-rw-r--r-- | include/linux/eventfd.h | 8 | ||||
-rw-r--r-- | include/linux/fs.h | 20 | ||||
-rw-r--r-- | include/linux/highmem.h | 2 | ||||
-rw-r--r-- | include/linux/pipe_fs_i.h | 4 | ||||
-rw-r--r-- | include/linux/proc_fs.h | 2 | ||||
-rw-r--r-- | include/linux/watch_queue.h | 3 |
6 files changed, 19 insertions, 20 deletions
diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h index 36a486505b08..b9d83652c097 100644 --- a/include/linux/eventfd.h +++ b/include/linux/eventfd.h @@ -9,12 +9,12 @@ #ifndef _LINUX_EVENTFD_H #define _LINUX_EVENTFD_H -#include <linux/fcntl.h> #include <linux/wait.h> #include <linux/err.h> #include <linux/percpu-defs.h> #include <linux/percpu.h> #include <linux/sched.h> +#include <uapi/linux/eventfd.h> /* * CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining @@ -23,10 +23,6 @@ * from eventfd, in order to leave a free define-space for * shared O_* flags. */ -#define EFD_SEMAPHORE (1 << 0) -#define EFD_CLOEXEC O_CLOEXEC -#define EFD_NONBLOCK O_NONBLOCK - #define EFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK) #define EFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS | EFD_SEMAPHORE) @@ -40,7 +36,7 @@ struct file *eventfd_fget(int fd); struct eventfd_ctx *eventfd_ctx_fdget(int fd); struct eventfd_ctx *eventfd_ctx_fileget(struct file *file); __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n); -__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, unsigned mask); +__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, __poll_t mask); int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait, __u64 *cnt); void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt); diff --git a/include/linux/fs.h b/include/linux/fs.h index 133f0640fb24..66f105ef3427 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -956,29 +956,35 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) index < ra->start + ra->size); } +/* + * f_{lock,count,pos_lock} members can be highly contended and share + * the same cacheline. f_{lock,mode} are very frequently used together + * and so share the same cacheline as well. The read-mostly + * f_{path,inode,op} are kept on a separate cacheline. + */ struct file { union { struct llist_node f_llist; struct rcu_head f_rcuhead; unsigned int f_iocb_flags; }; - struct path f_path; - struct inode *f_inode; /* cached value */ - const struct file_operations *f_op; /* * Protects f_ep, f_flags. * Must not be taken from IRQ context. */ spinlock_t f_lock; - atomic_long_t f_count; - unsigned int f_flags; fmode_t f_mode; + atomic_long_t f_count; struct mutex f_pos_lock; loff_t f_pos; + unsigned int f_flags; struct fown_struct f_owner; const struct cred *f_cred; struct file_ra_state f_ra; + struct path f_path; + struct inode *f_inode; /* cached value */ + const struct file_operations *f_op; u64 f_version; #ifdef CONFIG_SECURITY @@ -1242,7 +1248,7 @@ struct super_block { */ atomic_long_t s_fsnotify_connectors; - /* Being remounted read-only */ + /* Read-only state of the superblock is being changed */ int s_readonly_remount; /* per-sb errseq_t for reporting writeback errors via syncfs */ @@ -2669,7 +2675,7 @@ extern void evict_inodes(struct super_block *sb); void dump_mapping(const struct address_space *); /* - * Userspace may rely on the the inode number being non-zero. For example, glibc + * Userspace may rely on the inode number being non-zero. For example, glibc * simply ignores files with zero i_ino in unlink() and other places. * * As an additional complication, if userspace was compiled with diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 4de1dbcd3ef6..68da30625a6c 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -507,7 +507,7 @@ static inline void folio_zero_range(struct folio *folio, zero_user_segments(&folio->page, start, start + length, 0, 0); } -static inline void put_and_unmap_page(struct page *page, void *addr) +static inline void unmap_and_put_page(struct page *page, void *addr) { kunmap_local(addr); put_page(page); diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index d2c3f16cf6b1..02e0086b10f6 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -261,18 +261,14 @@ void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *); extern const struct pipe_buf_operations nosteal_pipe_buf_ops; -#ifdef CONFIG_WATCH_QUEUE unsigned long account_pipe_buffers(struct user_struct *user, unsigned long old, unsigned long new); bool too_many_pipe_buffers_soft(unsigned long user_bufs); bool too_many_pipe_buffers_hard(unsigned long user_bufs); bool pipe_is_unprivileged_user(void); -#endif /* for F_SETPIPE_SZ and F_GETPIPE_SZ */ -#ifdef CONFIG_WATCH_QUEUE int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots); -#endif long pipe_fcntl(struct file *, unsigned int, unsigned long arg); struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice); diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 0260f5ea98fe..253f2676d93a 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -158,6 +158,8 @@ int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task); #endif /* CONFIG_PROC_PID_ARCH_STATUS */ +void arch_report_meminfo(struct seq_file *m); + #else /* CONFIG_PROC_FS */ static inline void proc_root_init(void) diff --git a/include/linux/watch_queue.h b/include/linux/watch_queue.h index fc6bba20273b..45cd42f55d49 100644 --- a/include/linux/watch_queue.h +++ b/include/linux/watch_queue.h @@ -38,7 +38,7 @@ struct watch_filter { struct watch_queue { struct rcu_head rcu; struct watch_filter __rcu *filter; - struct pipe_inode_info *pipe; /* The pipe we're using as a buffer */ + struct pipe_inode_info *pipe; /* Pipe we use as a buffer, NULL if queue closed */ struct hlist_head watches; /* Contributory watches */ struct page **notes; /* Preallocated notifications */ unsigned long *notes_bitmap; /* Allocation bitmap for notes */ @@ -46,7 +46,6 @@ struct watch_queue { spinlock_t lock; unsigned int nr_notes; /* Number of notes */ unsigned int nr_pages; /* Number of pages in notes[] */ - bool defunct; /* T when queues closed */ }; /* |