diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-01 12:45:04 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-01 12:45:04 -0700 |
commit | 0bd957eb11cfeef23fcc240edde6dfe431731e69 (patch) | |
tree | b230aaa22c06daca90acef99decbbd1709497c4d | |
parent | 9bf9511e3d9f328c03f6f79bfb741c3d18f2f2c0 (diff) | |
parent | 66e9b0717102507e64f638790eaece88765cc9e5 (diff) |
Merge tag 'core-kprobes-2020-06-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull kprobes updates from Ingo Molnar:
"Various kprobes updates, mostly centered around cleaning up the
no-instrumentation logic.
Instead of the current per debug facility blacklist, use the more
generic .noinstr.text approach, combined with a 'noinstr' marker for
functions.
Also add instrumentation_begin()/end() to better manage the exact
place in entry code where instrumentation may be used.
And add a kprobes blacklist for modules"
* tag 'core-kprobes-2020-06-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
kprobes: Prevent probes in .noinstr.text section
vmlinux.lds.h: Create section for protection against instrumentation
samples/kprobes: Add __kprobes and NOKPROBE_SYMBOL() for handlers.
kprobes: Support NOKPROBE_SYMBOL() in modules
kprobes: Support __kprobes blacklist in modules
kprobes: Lock kprobe_mutex while showing kprobe_blacklist
-rw-r--r-- | arch/powerpc/kernel/vmlinux.lds.S | 1 | ||||
-rw-r--r-- | include/asm-generic/sections.h | 3 | ||||
-rw-r--r-- | include/asm-generic/vmlinux.lds.h | 10 | ||||
-rw-r--r-- | include/linux/compiler.h | 53 | ||||
-rw-r--r-- | include/linux/compiler_types.h | 4 | ||||
-rw-r--r-- | include/linux/module.h | 8 | ||||
-rw-r--r-- | kernel/kprobes.c | 85 | ||||
-rw-r--r-- | kernel/module.c | 10 | ||||
-rw-r--r-- | samples/kprobes/kprobe_example.c | 6 | ||||
-rw-r--r-- | samples/kprobes/kretprobe_example.c | 2 | ||||
-rw-r--r-- | scripts/mod/modpost.c | 2 |
11 files changed, 180 insertions, 4 deletions
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 31a0f201fb6f..a1706b63b82d 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -90,6 +90,7 @@ SECTIONS #ifdef CONFIG_PPC64 *(.tramp.ftrace.text); #endif + NOINSTR_TEXT SCHED_TEXT CPUIDLE_TEXT LOCK_TEXT diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index d1779d442aa5..66397ed10acb 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -53,6 +53,9 @@ extern char __ctors_start[], __ctors_end[]; /* Start and end of .opd section - used for function descriptors. */ extern char __start_opd[], __end_opd[]; +/* Start and end of instrumentation protected text section */ +extern char __noinstr_text_start[], __noinstr_text_end[]; + extern __visible const void __nosave_begin, __nosave_end; /* Function descriptor handling (if any). Override in asm/sections.h */ diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 71e387a5fe90..db600ef218d7 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -541,6 +541,15 @@ __end_rodata = .; /* + * Non-instrumentable text section + */ +#define NOINSTR_TEXT \ + ALIGN_FUNCTION(); \ + __noinstr_text_start = .; \ + *(.noinstr.text) \ + __noinstr_text_end = .; + +/* * .text section. Map to function alignment to avoid address changes * during second ld run in second ld pass when generating System.map * @@ -551,6 +560,7 @@ #define TEXT_TEXT \ ALIGN_FUNCTION(); \ *(.text.hot TEXT_MAIN .text.fixup .text.unlikely) \ + NOINSTR_TEXT \ *(.text..refcount) \ *(.ref.text) \ MEM_KEEP(init.text*) \ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 448c91bf543b..6325d64e3c3b 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -120,12 +120,65 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, /* Annotate a C jump table to allow objtool to follow the code flow */ #define __annotate_jump_table __section(.rodata..c_jump_table) +#ifdef CONFIG_DEBUG_ENTRY +/* Begin/end of an instrumentation safe region */ +#define instrumentation_begin() ({ \ + asm volatile("%c0:\n\t" \ + ".pushsection .discard.instr_begin\n\t" \ + ".long %c0b - .\n\t" \ + ".popsection\n\t" : : "i" (__COUNTER__)); \ +}) + +/* + * Because instrumentation_{begin,end}() can nest, objtool validation considers + * _begin() a +1 and _end() a -1 and computes a sum over the instructions. + * When the value is greater than 0, we consider instrumentation allowed. + * + * There is a problem with code like: + * + * noinstr void foo() + * { + * instrumentation_begin(); + * ... + * if (cond) { + * instrumentation_begin(); + * ... + * instrumentation_end(); + * } + * bar(); + * instrumentation_end(); + * } + * + * If instrumentation_end() would be an empty label, like all the other + * annotations, the inner _end(), which is at the end of a conditional block, + * would land on the instruction after the block. + * + * If we then consider the sum of the !cond path, we'll see that the call to + * bar() is with a 0-value, even though, we meant it to happen with a positive + * value. + * + * To avoid this, have _end() be a NOP instruction, this ensures it will be + * part of the condition block and does not escape. + */ +#define instrumentation_end() ({ \ + asm volatile("%c0: nop\n\t" \ + ".pushsection .discard.instr_end\n\t" \ + ".long %c0b - .\n\t" \ + ".popsection\n\t" : : "i" (__COUNTER__)); \ +}) +#endif /* CONFIG_DEBUG_ENTRY */ + #else #define annotate_reachable() #define annotate_unreachable() #define __annotate_jump_table #endif +#ifndef instrumentation_begin +#define instrumentation_begin() do { } while(0) +#define instrumentation_end() do { } while(0) +#endif + #ifndef ASM_UNREACHABLE # define ASM_UNREACHABLE #endif diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index e970f97a7fcb..5da257cbebf1 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -118,6 +118,10 @@ struct ftrace_likely_data { #define notrace __attribute__((__no_instrument_function__)) #endif +/* Section for code which can't be instrumented at all */ +#define noinstr \ + noinline notrace __attribute((__section__(".noinstr.text"))) + /* * it doesn't make sense on ARM (currently the only user of __naked) * to trace naked functions because then mcount is called without diff --git a/include/linux/module.h b/include/linux/module.h index 1ad393e62bef..d849d06e4d44 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -458,6 +458,8 @@ struct module { void __percpu *percpu; unsigned int percpu_size; #endif + void *noinstr_text_start; + unsigned int noinstr_text_size; #ifdef CONFIG_TRACEPOINTS unsigned int num_tracepoints; @@ -489,6 +491,12 @@ struct module { unsigned int num_ftrace_callsites; unsigned long *ftrace_callsites; #endif +#ifdef CONFIG_KPROBES + void *kprobes_text_start; + unsigned int kprobes_text_size; + unsigned long *kprobe_blacklist; + unsigned int num_kprobe_blacklist; +#endif #ifdef CONFIG_LIVEPATCH bool klp; /* Is this a livepatch module? */ diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 2625c241ac00..3f310df4a693 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2179,6 +2179,24 @@ int kprobe_add_area_blacklist(unsigned long start, unsigned long end) return 0; } +/* Remove all symbols in given area from kprobe blacklist */ +static void kprobe_remove_area_blacklist(unsigned long start, unsigned long end) +{ + struct kprobe_blacklist_entry *ent, *n; + + list_for_each_entry_safe(ent, n, &kprobe_blacklist, list) { + if (ent->start_addr < start || ent->start_addr >= end) + continue; + list_del(&ent->list); + kfree(ent); + } +} + +static void kprobe_remove_ksym_blacklist(unsigned long entry) +{ + kprobe_remove_area_blacklist(entry, entry + 1); +} + int __init __weak arch_populate_kprobe_blacklist(void) { return 0; @@ -2211,10 +2229,62 @@ static int __init populate_kprobe_blacklist(unsigned long *start, /* Symbols in __kprobes_text are blacklisted */ ret = kprobe_add_area_blacklist((unsigned long)__kprobes_text_start, (unsigned long)__kprobes_text_end); + if (ret) + return ret; + + /* Symbols in noinstr section are blacklisted */ + ret = kprobe_add_area_blacklist((unsigned long)__noinstr_text_start, + (unsigned long)__noinstr_text_end); return ret ? : arch_populate_kprobe_blacklist(); } +static void add_module_kprobe_blacklist(struct module *mod) +{ + unsigned long start, end; + int i; + + if (mod->kprobe_blacklist) { + for (i = 0; i < mod->num_kprobe_blacklist; i++) + kprobe_add_ksym_blacklist(mod->kprobe_blacklist[i]); + } + + start = (unsigned long)mod->kprobes_text_start; + if (start) { + end = start + mod->kprobes_text_size; + kprobe_add_area_blacklist(start, end); + } + + start = (unsigned long)mod->noinstr_text_start; + if (start) { + end = start + mod->noinstr_text_size; + kprobe_add_area_blacklist(start, end); + } +} + +static void remove_module_kprobe_blacklist(struct module *mod) +{ + unsigned long start, end; + int i; + + if (mod->kprobe_blacklist) { + for (i = 0; i < mod->num_kprobe_blacklist; i++) + kprobe_remove_ksym_blacklist(mod->kprobe_blacklist[i]); + } + + start = (unsigned long)mod->kprobes_text_start; + if (start) { + end = start + mod->kprobes_text_size; + kprobe_remove_area_blacklist(start, end); + } + + start = (unsigned long)mod->noinstr_text_start; + if (start) { + end = start + mod->noinstr_text_size; + kprobe_remove_area_blacklist(start, end); + } +} + /* Module notifier call back, checking kprobes on the module */ static int kprobes_module_callback(struct notifier_block *nb, unsigned long val, void *data) @@ -2225,6 +2295,11 @@ static int kprobes_module_callback(struct notifier_block *nb, unsigned int i; int checkcore = (val == MODULE_STATE_GOING); + if (val == MODULE_STATE_COMING) { + mutex_lock(&kprobe_mutex); + add_module_kprobe_blacklist(mod); + mutex_unlock(&kprobe_mutex); + } if (val != MODULE_STATE_GOING && val != MODULE_STATE_LIVE) return NOTIFY_DONE; @@ -2255,6 +2330,8 @@ static int kprobes_module_callback(struct notifier_block *nb, kill_kprobe(p); } } + if (val == MODULE_STATE_GOING) + remove_module_kprobe_blacklist(mod); mutex_unlock(&kprobe_mutex); return NOTIFY_DONE; } @@ -2420,6 +2497,7 @@ static const struct file_operations debugfs_kprobes_operations = { /* kprobes/blacklist -- shows which functions can not be probed */ static void *kprobe_blacklist_seq_start(struct seq_file *m, loff_t *pos) { + mutex_lock(&kprobe_mutex); return seq_list_start(&kprobe_blacklist, *pos); } @@ -2446,10 +2524,15 @@ static int kprobe_blacklist_seq_show(struct seq_file *m, void *v) return 0; } +static void kprobe_blacklist_seq_stop(struct seq_file *f, void *v) +{ + mutex_unlock(&kprobe_mutex); +} + static const struct seq_operations kprobe_blacklist_seq_ops = { .start = kprobe_blacklist_seq_start, .next = kprobe_blacklist_seq_next, - .stop = kprobe_seq_stop, /* Reuse void function */ + .stop = kprobe_blacklist_seq_stop, .show = kprobe_blacklist_seq_show, }; diff --git a/kernel/module.c b/kernel/module.c index 646f1e2330d2..72ed2b3a6ee2 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3150,6 +3150,9 @@ static int find_module_sections(struct module *mod, struct load_info *info) } #endif + mod->noinstr_text_start = section_objs(info, ".noinstr.text", 1, + &mod->noinstr_text_size); + #ifdef CONFIG_TRACEPOINTS mod->tracepoints_ptrs = section_objs(info, "__tracepoints_ptrs", sizeof(*mod->tracepoints_ptrs), @@ -3194,6 +3197,13 @@ static int find_module_sections(struct module *mod, struct load_info *info) sizeof(*mod->ei_funcs), &mod->num_ei_funcs); #endif +#ifdef CONFIG_KPROBES + mod->kprobes_text_start = section_objs(info, ".kprobes.text", 1, + &mod->kprobes_text_size); + mod->kprobe_blacklist = section_objs(info, "_kprobe_blacklist", + sizeof(unsigned long), + &mod->num_kprobe_blacklist); +#endif mod->extable = section_objs(info, "__ex_table", sizeof(*mod->extable), &mod->num_exentries); diff --git a/samples/kprobes/kprobe_example.c b/samples/kprobes/kprobe_example.c index d693c23a85e8..501911d1b327 100644 --- a/samples/kprobes/kprobe_example.c +++ b/samples/kprobes/kprobe_example.c @@ -25,7 +25,7 @@ static struct kprobe kp = { }; /* kprobe pre_handler: called just before the probed instruction is executed */ -static int handler_pre(struct kprobe *p, struct pt_regs *regs) +static int __kprobes handler_pre(struct kprobe *p, struct pt_regs *regs) { #ifdef CONFIG_X86 pr_info("<%s> pre_handler: p->addr = 0x%p, ip = %lx, flags = 0x%lx\n", @@ -54,7 +54,7 @@ static int handler_pre(struct kprobe *p, struct pt_regs *regs) } /* kprobe post_handler: called after the probed instruction is executed */ -static void handler_post(struct kprobe *p, struct pt_regs *regs, +static void __kprobes handler_post(struct kprobe *p, struct pt_regs *regs, unsigned long flags) { #ifdef CONFIG_X86 @@ -90,6 +90,8 @@ static int handler_fault(struct kprobe *p, struct pt_regs *regs, int trapnr) /* Return 0 because we don't handle the fault. */ return 0; } +/* NOKPROBE_SYMBOL() is also available */ +NOKPROBE_SYMBOL(handler_fault); static int __init kprobe_init(void) { diff --git a/samples/kprobes/kretprobe_example.c b/samples/kprobes/kretprobe_example.c index 186315ca88b3..013e8e6ebae9 100644 --- a/samples/kprobes/kretprobe_example.c +++ b/samples/kprobes/kretprobe_example.c @@ -48,6 +48,7 @@ static int entry_handler(struct kretprobe_instance *ri, struct pt_regs *regs) data->entry_stamp = ktime_get(); return 0; } +NOKPROBE_SYMBOL(entry_handler); /* * Return-probe handler: Log the return value and duration. Duration may turn @@ -67,6 +68,7 @@ static int ret_handler(struct kretprobe_instance *ri, struct pt_regs *regs) func_name, retval, (long long)delta); return 0; } +NOKPROBE_SYMBOL(ret_handler); static struct kretprobe my_kretprobe = { .handler = ret_handler, diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 5c3c50c5ec52..0053d4fea847 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -948,7 +948,7 @@ static void check_section(const char *modname, struct elf_info *elf, #define DATA_SECTIONS ".data", ".data.rel" #define TEXT_SECTIONS ".text", ".text.unlikely", ".sched.text", \ - ".kprobes.text", ".cpuidle.text" + ".kprobes.text", ".cpuidle.text", ".noinstr.text" #define OTHER_TEXT_SECTIONS ".ref.text", ".head.text", ".spinlock.text", \ ".fixup", ".entry.text", ".exception.text", ".text.*", \ ".coldtext" |