summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-06-26 09:50:21 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-06-26 09:50:21 -0700
commit64bf6ae93e08787f4a6db8dddf671fd3a9c43916 (patch)
tree120d7fe35d953aeb31b1fd6bdcceeab223f89c92 /include/linux
parent5c1c88cddb79d3ed3fb1d02a3eaf529eded76f05 (diff)
parent2507135e4ff231a368eae38000a501da0b96c662 (diff)
Merge tag 'v6.5/vfs.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull misc vfs updates from Christian Brauner: "Miscellaneous features, cleanups, and fixes for vfs and individual fs Features: - Use mode 0600 for file created by cachefilesd so it can be run by unprivileged users. This aligns them with directories which are already created with mode 0700 by cachefilesd - Reorder a few members in struct file to prevent some false sharing scenarios - Indicate that an eventfd is used a semaphore in the eventfd's fdinfo procfs file - Add a missing uapi header for eventfd exposing relevant uapi defines - Let the VFS protect transitions of a superblock from read-only to read-write in addition to the protection it already provides for transitions from read-write to read-only. Protecting read-only to read-write transitions allows filesystems such as ext4 to perform internal writes, keeping writers away until the transition is completed Cleanups: - Arnd removed the architecture specific arch_report_meminfo() prototypes and added a generic one into procfs.h. Note, we got a report about a warning in amdpgpu codepaths that suggested this was bisectable to this change but we concluded it was a false positive - Remove unused parameters from split_fs_names() - Rename put_and_unmap_page() to unmap_and_put_page() to let the name reflect the order of the cleanup operation that has to unmap before the actual put - Unexport buffer_check_dirty_writeback() as it is not used outside of block device aops - Stop allocating aio rings from highmem - Protecting read-{only,write} transitions in the VFS used open-coded barriers in various places. Replace them with proper little helpers and document both the helpers and all barrier interactions involved when transitioning between read-{only,write} states - Use flexible array members in old readdir codepaths Fixes: - Use the correct type __poll_t for epoll and eventfd - Replace all deprecated strlcpy() invocations, whose return value isn't checked with an equivalent strscpy() call - Fix some kernel-doc warnings in fs/open.c - Reduce the stack usage in jffs2's xattr codepaths finally getting rid of this: fs/jffs2/xattr.c:887:1: error: the frame size of 1088 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] royally annoying compilation warning - Use __FMODE_NONOTIFY instead of FMODE_NONOTIFY where an int and not fmode_t is required to avoid fmode_t to integer degradation warnings - Create coredumps with O_WRONLY instead of O_RDWR. There's a long explanation in that commit how O_RDWR is actually a bug which we found out with the help of Linus and git archeology - Fix "no previous prototype" warnings in the pipe codepaths - Add overflow calculations for remap_verify_area() as a signed addition overflow could be triggered in xfstests - Fix a null pointer dereference in sysv - Use an unsigned variable for length calculations in jfs avoiding compilation warnings with gcc 13 - Fix a dangling pipe pointer in the watch queue codepath - The legacy mount option parser provided as a fallback by the VFS for filesystems not yet converted to the new mount api did prefix the generated mount option string with a leading ',' causing issues for some filesystems - Fix a repeated word in a comment in fs.h - autofs: Update the ctime when mtime is updated as mandated by POSIX" * tag 'v6.5/vfs.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (27 commits) readdir: Replace one-element arrays with flexible-array members fs: Provide helpers for manipulating sb->s_readonly_remount fs: Protect reconfiguration of sb read-write from racing writes eventfd: add a uapi header for eventfd userspace APIs autofs: set ctime as well when mtime changes on a dir eventfd: show the EFD_SEMAPHORE flag in fdinfo fs/aio: Stop allocating aio rings from HIGHMEM fs: Fix comment typo fs: unexport buffer_check_dirty_writeback fs: avoid empty option when generating legacy mount string watch_queue: prevent dangling pipe pointer fs.h: Optimize file struct to prevent false sharing highmem: Rename put_and_unmap_page() to unmap_and_put_page() cachefiles: Allow the cache to be non-root init: remove unused names parameter in split_fs_names() jfs: Use unsigned variable for length calculations fs/sysv: Null check to prevent null-ptr-deref bug fs: use UB-safe check for signed addition overflow in remap_verify_area procfs: consolidate arch_report_meminfo declaration fs: pipe: reveal missing function protoypes ...
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/eventfd.h8
-rw-r--r--include/linux/fs.h20
-rw-r--r--include/linux/highmem.h2
-rw-r--r--include/linux/pipe_fs_i.h4
-rw-r--r--include/linux/proc_fs.h2
-rw-r--r--include/linux/watch_queue.h3
6 files changed, 19 insertions, 20 deletions
diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
index 36a486505b08..b9d83652c097 100644
--- a/include/linux/eventfd.h
+++ b/include/linux/eventfd.h
@@ -9,12 +9,12 @@
#ifndef _LINUX_EVENTFD_H
#define _LINUX_EVENTFD_H
-#include <linux/fcntl.h>
#include <linux/wait.h>
#include <linux/err.h>
#include <linux/percpu-defs.h>
#include <linux/percpu.h>
#include <linux/sched.h>
+#include <uapi/linux/eventfd.h>
/*
* CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining
@@ -23,10 +23,6 @@
* from eventfd, in order to leave a free define-space for
* shared O_* flags.
*/
-#define EFD_SEMAPHORE (1 << 0)
-#define EFD_CLOEXEC O_CLOEXEC
-#define EFD_NONBLOCK O_NONBLOCK
-
#define EFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK)
#define EFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS | EFD_SEMAPHORE)
@@ -40,7 +36,7 @@ struct file *eventfd_fget(int fd);
struct eventfd_ctx *eventfd_ctx_fdget(int fd);
struct eventfd_ctx *eventfd_ctx_fileget(struct file *file);
__u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n);
-__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, unsigned mask);
+__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, __poll_t mask);
int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait,
__u64 *cnt);
void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 133f0640fb24..66f105ef3427 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -956,29 +956,35 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
index < ra->start + ra->size);
}
+/*
+ * f_{lock,count,pos_lock} members can be highly contended and share
+ * the same cacheline. f_{lock,mode} are very frequently used together
+ * and so share the same cacheline as well. The read-mostly
+ * f_{path,inode,op} are kept on a separate cacheline.
+ */
struct file {
union {
struct llist_node f_llist;
struct rcu_head f_rcuhead;
unsigned int f_iocb_flags;
};
- struct path f_path;
- struct inode *f_inode; /* cached value */
- const struct file_operations *f_op;
/*
* Protects f_ep, f_flags.
* Must not be taken from IRQ context.
*/
spinlock_t f_lock;
- atomic_long_t f_count;
- unsigned int f_flags;
fmode_t f_mode;
+ atomic_long_t f_count;
struct mutex f_pos_lock;
loff_t f_pos;
+ unsigned int f_flags;
struct fown_struct f_owner;
const struct cred *f_cred;
struct file_ra_state f_ra;
+ struct path f_path;
+ struct inode *f_inode; /* cached value */
+ const struct file_operations *f_op;
u64 f_version;
#ifdef CONFIG_SECURITY
@@ -1242,7 +1248,7 @@ struct super_block {
*/
atomic_long_t s_fsnotify_connectors;
- /* Being remounted read-only */
+ /* Read-only state of the superblock is being changed */
int s_readonly_remount;
/* per-sb errseq_t for reporting writeback errors via syncfs */
@@ -2669,7 +2675,7 @@ extern void evict_inodes(struct super_block *sb);
void dump_mapping(const struct address_space *);
/*
- * Userspace may rely on the the inode number being non-zero. For example, glibc
+ * Userspace may rely on the inode number being non-zero. For example, glibc
* simply ignores files with zero i_ino in unlink() and other places.
*
* As an additional complication, if userspace was compiled with
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 4de1dbcd3ef6..68da30625a6c 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -507,7 +507,7 @@ static inline void folio_zero_range(struct folio *folio,
zero_user_segments(&folio->page, start, start + length, 0, 0);
}
-static inline void put_and_unmap_page(struct page *page, void *addr)
+static inline void unmap_and_put_page(struct page *page, void *addr)
{
kunmap_local(addr);
put_page(page);
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index d2c3f16cf6b1..02e0086b10f6 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -261,18 +261,14 @@ void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *);
extern const struct pipe_buf_operations nosteal_pipe_buf_ops;
-#ifdef CONFIG_WATCH_QUEUE
unsigned long account_pipe_buffers(struct user_struct *user,
unsigned long old, unsigned long new);
bool too_many_pipe_buffers_soft(unsigned long user_bufs);
bool too_many_pipe_buffers_hard(unsigned long user_bufs);
bool pipe_is_unprivileged_user(void);
-#endif
/* for F_SETPIPE_SZ and F_GETPIPE_SZ */
-#ifdef CONFIG_WATCH_QUEUE
int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots);
-#endif
long pipe_fcntl(struct file *, unsigned int, unsigned long arg);
struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice);
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 0260f5ea98fe..253f2676d93a 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -158,6 +158,8 @@ int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task);
#endif /* CONFIG_PROC_PID_ARCH_STATUS */
+void arch_report_meminfo(struct seq_file *m);
+
#else /* CONFIG_PROC_FS */
static inline void proc_root_init(void)
diff --git a/include/linux/watch_queue.h b/include/linux/watch_queue.h
index fc6bba20273b..45cd42f55d49 100644
--- a/include/linux/watch_queue.h
+++ b/include/linux/watch_queue.h
@@ -38,7 +38,7 @@ struct watch_filter {
struct watch_queue {
struct rcu_head rcu;
struct watch_filter __rcu *filter;
- struct pipe_inode_info *pipe; /* The pipe we're using as a buffer */
+ struct pipe_inode_info *pipe; /* Pipe we use as a buffer, NULL if queue closed */
struct hlist_head watches; /* Contributory watches */
struct page **notes; /* Preallocated notifications */
unsigned long *notes_bitmap; /* Allocation bitmap for notes */
@@ -46,7 +46,6 @@ struct watch_queue {
spinlock_t lock;
unsigned int nr_notes; /* Number of notes */
unsigned int nr_pages; /* Number of pages in notes[] */
- bool defunct; /* T when queues closed */
};
/*