From 7375dca1647fa978310f2d706ddbff537f72110b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 20 May 2019 09:26:24 -0400 Subject: ftrace: Make enable and update parameters bool when applicable The code modification functions have "enable" and "update" variables that are sometimes "int" but used as "bool". Remove the ambiguity and make them "bool" when they are only used for true or false values. Link: http://lkml.kernel.org/r/e1429923d9eda92a3cf5ee9e33c7eacce539781d.1558115654.git.naveen.n.rao@linux.vnet.ibm.com Reported-by: "Naveen N. Rao" Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 25e2995d4a4c..8a8cb3c401b2 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -427,8 +427,8 @@ struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter); iter = ftrace_rec_iter_next(iter)) -int ftrace_update_record(struct dyn_ftrace *rec, int enable); -int ftrace_test_record(struct dyn_ftrace *rec, int enable); +int ftrace_update_record(struct dyn_ftrace *rec, bool enable); +int ftrace_test_record(struct dyn_ftrace *rec, bool enable); void ftrace_run_stop_machine(int command); unsigned long ftrace_location(unsigned long ip); unsigned long ftrace_location_range(unsigned long start, unsigned long end); -- cgit v1.2.3-58-ga151 From 2d8d8fac3b4eab035dcd0068e1f5a746a697fbb3 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 15 May 2019 14:38:06 +0900 Subject: x86/uaccess: Allow access_ok() in irq context if pagefault_disabled WARN_ON_IN_IRQ() assumes that the access_ok() and following user memory access can sleep. But this assumption is not always correct; when the pagefault is disabled, following memory access will just returns -EFAULT and never sleep. Add pagefault_disabled() check in WARN_ON_ONCE() so that it can ignore the case we call it with disabling pagefault. For this purpose, this modified pagefault_disabled() as an inline function. Link: http://lkml.kernel.org/r/155789868664.26965.7932665824135793317.stgit@devnote2 Acked-by: Ingo Molnar Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- arch/x86/include/asm/uaccess.h | 4 +++- include/linux/uaccess.h | 5 ++++- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index c82abd6e4ca3..9c4435307ff8 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -66,7 +66,9 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un }) #ifdef CONFIG_DEBUG_ATOMIC_SLEEP -# define WARN_ON_IN_IRQ() WARN_ON_ONCE(!in_task()) +static inline bool pagefault_disabled(void); +# define WARN_ON_IN_IRQ() \ + WARN_ON_ONCE(!in_task() && !pagefault_disabled()) #else # define WARN_ON_IN_IRQ() #endif diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 2b70130af585..5a43ef7db492 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -203,7 +203,10 @@ static inline void pagefault_enable(void) /* * Is the pagefault handler disabled? If so, user access methods will not sleep. */ -#define pagefault_disabled() (current->pagefault_disabled != 0) +static inline bool pagefault_disabled(void) +{ + return current->pagefault_disabled != 0; +} /* * The pagefault handler is in general disabled by pagefault_disable() or -- cgit v1.2.3-58-ga151 From 3d7081822f7f9eab867d9bcc8fd635208ec438e0 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 15 May 2019 14:38:18 +0900 Subject: uaccess: Add non-pagefault user-space read functions Add probe_user_read(), strncpy_from_unsafe_user() and strnlen_unsafe_user() which allows caller to access user-space in IRQ context. Current probe_kernel_read() and strncpy_from_unsafe() are not available for user-space memory, because it sets KERNEL_DS while accessing data. On some arch, user address space and kernel address space can be co-exist, but others can not. In that case, setting KERNEL_DS means given address is treated as a kernel address space. Also strnlen_user() is only available from user context since it can sleep if pagefault is enabled. To access user-space memory without pagefault, we need these new functions which sets USER_DS while accessing the data. Link: http://lkml.kernel.org/r/155789869802.26965.4940338412595759063.stgit@devnote2 Acked-by: Ingo Molnar Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- include/linux/uaccess.h | 14 ++++++ mm/maccess.c | 122 +++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 130 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 5a43ef7db492..9c435c3f2105 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -242,6 +242,17 @@ static inline unsigned long __copy_from_user_inatomic_nocache(void *to, extern long probe_kernel_read(void *dst, const void *src, size_t size); extern long __probe_kernel_read(void *dst, const void *src, size_t size); +/* + * probe_user_read(): safely attempt to read from a location in user space + * @dst: pointer to the buffer that shall take the data + * @src: address to read from + * @size: size of the data chunk + * + * Safely read from address @src to the buffer at @dst. If a kernel fault + * happens, handle that and return -EFAULT. + */ +extern long probe_user_read(void *dst, const void __user *src, size_t size); + /* * probe_kernel_write(): safely attempt to write to a location * @dst: address to write to @@ -255,6 +266,9 @@ extern long notrace probe_kernel_write(void *dst, const void *src, size_t size); extern long notrace __probe_kernel_write(void *dst, const void *src, size_t size); extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count); +extern long strncpy_from_unsafe_user(char *dst, const void __user *unsafe_addr, + long count); +extern long strnlen_unsafe_user(const void __user *unsafe_addr, long count); /** * probe_kernel_address(): safely attempt to read from a location diff --git a/mm/maccess.c b/mm/maccess.c index ec00be51a24f..19c8c3dc14df 100644 --- a/mm/maccess.c +++ b/mm/maccess.c @@ -5,8 +5,20 @@ #include #include +static __always_inline long +probe_read_common(void *dst, const void __user *src, size_t size) +{ + long ret; + + pagefault_disable(); + ret = __copy_from_user_inatomic(dst, src, size); + pagefault_enable(); + + return ret ? -EFAULT : 0; +} + /** - * probe_kernel_read(): safely attempt to read from a location + * probe_kernel_read(): safely attempt to read from a kernel-space location * @dst: pointer to the buffer that shall take the data * @src: address to read from * @size: size of the data chunk @@ -29,16 +41,40 @@ long __probe_kernel_read(void *dst, const void *src, size_t size) mm_segment_t old_fs = get_fs(); set_fs(KERNEL_DS); - pagefault_disable(); - ret = __copy_from_user_inatomic(dst, - (__force const void __user *)src, size); - pagefault_enable(); + ret = probe_read_common(dst, (__force const void __user *)src, size); set_fs(old_fs); - return ret ? -EFAULT : 0; + return ret; } EXPORT_SYMBOL_GPL(probe_kernel_read); +/** + * probe_user_read(): safely attempt to read from a user-space location + * @dst: pointer to the buffer that shall take the data + * @src: address to read from. This must be a user address. + * @size: size of the data chunk + * + * Safely read from user address @src to the buffer at @dst. If a kernel fault + * happens, handle that and return -EFAULT. + */ + +long __weak probe_user_read(void *dst, const void __user *src, size_t size) + __attribute__((alias("__probe_user_read"))); + +long __probe_user_read(void *dst, const void __user *src, size_t size) +{ + long ret = -EFAULT; + mm_segment_t old_fs = get_fs(); + + set_fs(USER_DS); + if (access_ok(src, size)) + ret = probe_read_common(dst, src, size); + set_fs(old_fs); + + return ret; +} +EXPORT_SYMBOL_GPL(probe_user_read); + /** * probe_kernel_write(): safely attempt to write to a location * @dst: address to write to @@ -66,6 +102,7 @@ long __probe_kernel_write(void *dst, const void *src, size_t size) } EXPORT_SYMBOL_GPL(probe_kernel_write); + /** * strncpy_from_unsafe: - Copy a NUL terminated string from unsafe address. * @dst: Destination address, in kernel space. This buffer must be at @@ -105,3 +142,76 @@ long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count) return ret ? -EFAULT : src - unsafe_addr; } + +/** + * strncpy_from_unsafe_user: - Copy a NUL terminated string from unsafe user + * address. + * @dst: Destination address, in kernel space. This buffer must be at + * least @count bytes long. + * @unsafe_addr: Unsafe user address. + * @count: Maximum number of bytes to copy, including the trailing NUL. + * + * Copies a NUL-terminated string from unsafe user address to kernel buffer. + * + * On success, returns the length of the string INCLUDING the trailing NUL. + * + * If access fails, returns -EFAULT (some data may have been copied + * and the trailing NUL added). + * + * If @count is smaller than the length of the string, copies @count-1 bytes, + * sets the last byte of @dst buffer to NUL and returns @count. + */ +long strncpy_from_unsafe_user(char *dst, const void __user *unsafe_addr, + long count) +{ + mm_segment_t old_fs = get_fs(); + long ret; + + if (unlikely(count <= 0)) + return 0; + + set_fs(USER_DS); + pagefault_disable(); + ret = strncpy_from_user(dst, unsafe_addr, count); + pagefault_enable(); + set_fs(old_fs); + + if (ret >= count) { + ret = count; + dst[ret - 1] = '\0'; + } else if (ret > 0) { + ret++; + } + + return ret; +} + +/** + * strnlen_unsafe_user: - Get the size of a user string INCLUDING final NUL. + * @unsafe_addr: The string to measure. + * @count: Maximum count (including NUL) + * + * Get the size of a NUL-terminated string in user space without pagefault. + * + * Returns the size of the string INCLUDING the terminating NUL. + * + * If the string is too long, returns a number larger than @count. User + * has to check the return value against "> count". + * On exception (or invalid count), returns 0. + * + * Unlike strnlen_user, this can be used from IRQ handler etc. because + * it disables pagefaults. + */ +long strnlen_unsafe_user(const void __user *unsafe_addr, long count) +{ + mm_segment_t old_fs = get_fs(); + int ret; + + set_fs(USER_DS); + pagefault_disable(); + ret = strnlen_user(unsafe_addr, count); + pagefault_enable(); + set_fs(old_fs); + + return ret; +} -- cgit v1.2.3-58-ga151 From 87a90956eeab260a469a51897bfda27b28adf67d Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 22 May 2019 17:27:44 +0900 Subject: uaccess: Add a prototype of non-static __probe_user_read() Declare a prototype of non-static __probe_user_read() as same as __probe_kernel_read() at uaccess.h. Reported-by: kbuild test robot Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- include/linux/uaccess.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 9c435c3f2105..34a038563d97 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -252,6 +252,7 @@ extern long __probe_kernel_read(void *dst, const void *src, size_t size); * happens, handle that and return -EFAULT. */ extern long probe_user_read(void *dst, const void __user *src, size_t size); +extern long __probe_user_read(void *dst, const void __user *src, size_t size); /* * probe_kernel_write(): safely attempt to write to a location -- cgit v1.2.3-58-ga151 From 46710f3a34b592ac5c51a95f696b2d2a2a0d9419 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 25 May 2019 09:57:59 -0700 Subject: tracing: Pass type into tracing_generic_entry_update() All callers of tracing_generic_entry_update() have to initialize entry->type, so let's just simply move it inside. Link: http://lkml.kernel.org/r/20190525165802.25944-2-xiyou.wangcong@gmail.com Cc: Ingo Molnar Signed-off-by: Cong Wang Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_events.h | 1 + kernel/trace/trace.c | 8 ++++---- kernel/trace/trace_event_perf.c | 3 +-- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 8a62731673f7..5c6f2a6c8cd2 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -142,6 +142,7 @@ enum print_line_t { enum print_line_t trace_handle_return(struct trace_seq *s); void tracing_generic_entry_update(struct trace_entry *entry, + unsigned short type, unsigned long flags, int pc); struct trace_event_file; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 77b9c4ca5faa..6b62e1718548 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -743,8 +743,7 @@ trace_event_setup(struct ring_buffer_event *event, { struct trace_entry *ent = ring_buffer_event_data(event); - tracing_generic_entry_update(ent, flags, pc); - ent->type = type; + tracing_generic_entry_update(ent, type, flags, pc); } static __always_inline struct ring_buffer_event * @@ -2312,13 +2311,14 @@ enum print_line_t trace_handle_return(struct trace_seq *s) EXPORT_SYMBOL_GPL(trace_handle_return); void -tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, - int pc) +tracing_generic_entry_update(struct trace_entry *entry, unsigned short type, + unsigned long flags, int pc) { struct task_struct *tsk = current; entry->preempt_count = pc & 0xff; entry->pid = (tsk) ? tsk->pid : 0; + entry->type = type; entry->flags = #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 4629a6104474..0892e38ed6fb 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -416,8 +416,7 @@ void perf_trace_buf_update(void *record, u16 type) unsigned long flags; local_save_flags(flags); - tracing_generic_entry_update(entry, flags, pc); - entry->type = type; + tracing_generic_entry_update(entry, type, flags, pc); } NOKPROBE_SYMBOL(perf_trace_buf_update); -- cgit v1.2.3-58-ga151 From 0aeb1def44169cbe7119f26cf10b974a2046142e Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 25 May 2019 09:58:01 -0700 Subject: tracing: Make trace_get_fields() global trace_get_fields() is the only way to read tracepoint fields at run time, as their fields are defined at compile-time with macros. Make this function visible to all users and it will be used by trace event injection code to calculate the size of a tracepoint entry. Link: http://lkml.kernel.org/r/20190525165802.25944-4-xiyou.wangcong@gmail.com Cc: Ingo Molnar Signed-off-by: Cong Wang Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_events.h | 8 ++++++++ kernel/trace/trace_events.c | 8 -------- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 5c6f2a6c8cd2..5150436783e8 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -318,6 +318,14 @@ trace_event_name(struct trace_event_call *call) return call->name; } +static inline struct list_head * +trace_get_fields(struct trace_event_call *event_call) +{ + if (!event_call->class->get_fields) + return &event_call->class->fields; + return event_call->class->get_fields(event_call); +} + struct trace_array; struct trace_subsystem_dir; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index edc72f3b080c..c7506bc81b75 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -70,14 +70,6 @@ static int system_refcount_dec(struct event_subsystem *system) #define while_for_each_event_file() \ } -static struct list_head * -trace_get_fields(struct trace_event_call *event_call) -{ - if (!event_call->class->get_fields) - return &event_call->class->fields; - return event_call->class->get_fields(event_call); -} - static struct ftrace_event_field * __find_event_field(struct list_head *head, char *name) { -- cgit v1.2.3-58-ga151