summaryrefslogtreecommitdiff
path: root/fs/proc
diff options
context:
space:
mode:
authorAndrii Nakryiko <andrii@kernel.org>2024-06-27 10:08:55 -0700
committerAndrew Morton <akpm@linux-foundation.org>2024-07-12 15:52:12 -0700
commitbfc69fd05ef9b6416f4811aafafb7f2b34daa000 (patch)
tree97a27c6ab19db0194dc50ef8f4348ab06b65fbcc /fs/proc
parented5d583a88a9207b866c14ba834984c6f3c51d23 (diff)
fs/procfs: add build ID fetching to PROCMAP_QUERY API
The need to get ELF build ID reliably is an important aspect when dealing with profiling and stack trace symbolization, and /proc/<pid>/maps textual representation doesn't help with this. To get backing file's ELF build ID, application has to first resolve VMA, then use it's start/end address range to follow a special /proc/<pid>/map_files/<start>-<end> symlink to open the ELF file (this is necessary because backing file might have been removed from the disk or was already replaced with another binary in the same file path. Such approach, beyond just adding complexity of having to do a bunch of extra work, has extra security implications. Because application opens underlying ELF file and needs read access to its entire contents (as far as kernel is concerned), kernel puts additional capable() checks on following /proc/<pid>/map_files/<start>-<end> symlink. And that makes sense in general. But in the case of build ID, profiler/symbolizer doesn't need the contents of ELF file, per se. It's only build ID that is of interest, and ELF build ID itself doesn't provide any sensitive information. So this patch adds a way to request backing file's ELF build ID along the rest of VMA information in the same API. User has control over whether this piece of information is requested or not by either setting build_id_size field to zero or non-zero maximum buffer size they provided through build_id_addr field (which encodes user pointer as __u64 field). This is a completely optional piece of information, and so has no performance implications for user cases that don't care about build ID, while improving performance and simplifying the setup for those application that do need it. Kernel already implements build ID fetching, which is used from BPF subsystem. We are reusing this code here, but plan a follow up changes to make it work better under more relaxed assumption (compared to what existing code assumes) of being called from user process context, in which page faults are allowed. BPF-specific implementation currently bails out if necessary part of ELF file is not paged in, all due to extra BPF-specific restrictions (like the need to fetch build ID in restrictive contexts such as NMI handler). [andrii@kernel.org: fix integer to pointer cast warning in do_procmap_query()] Link: https://lkml.kernel.org/r/20240701174805.1897344-1-andrii@kernel.org Link: https://lkml.kernel.org/r/20240627170900.1672542-4-andrii@kernel.org Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Acked-by: Liam R. Howlett <Liam.Howlett@Oracle.com> Cc: Alexey Dobriyan <adobriyan@gmail.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Christian Brauner <brauner@kernel.org> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Mike Rapoport (IBM) <rppt@kernel.org> Cc: Suren Baghdasaryan <surenb@google.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Stephen Rothwell <sfr@canb.auug.org.au> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'fs/proc')
-rw-r--r--fs/proc/task_mmu.c27
1 files changed, 25 insertions, 2 deletions
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 75679b5de549..728693ed00e6 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -22,6 +22,7 @@
#include <linux/pkeys.h>
#include <linux/minmax.h>
#include <linux/overflow.h>
+#include <linux/buildid.h>
#include <asm/elf.h>
#include <asm/tlb.h>
@@ -445,6 +446,7 @@ skip_vma:
addr = vma->vm_end;
if (flags & PROCMAP_QUERY_COVERING_OR_NEXT_VMA)
goto next_vma;
+
no_vma:
return ERR_PTR(-ENOENT);
}
@@ -455,7 +457,7 @@ static int do_procmap_query(struct proc_maps_private *priv, void __user *uarg)
struct vm_area_struct *vma;
struct mm_struct *mm;
const char *name = NULL;
- char *name_buf = NULL;
+ char build_id_buf[BUILD_ID_SIZE_MAX], *name_buf = NULL;
__u64 usize;
int err;
@@ -477,6 +479,8 @@ static int do_procmap_query(struct proc_maps_private *priv, void __user *uarg)
/* either both buffer address and size are set, or both should be zero */
if (!!karg.vma_name_size != !!karg.vma_name_addr)
return -EINVAL;
+ if (!!karg.build_id_size != !!karg.build_id_addr)
+ return -EINVAL;
mm = priv->mm;
if (!mm || !mmget_not_zero(mm))
@@ -539,6 +543,21 @@ static int do_procmap_query(struct proc_maps_private *priv, void __user *uarg)
}
}
+ if (karg.build_id_size) {
+ __u32 build_id_sz;
+
+ err = build_id_parse(vma, build_id_buf, &build_id_sz);
+ if (err) {
+ karg.build_id_size = 0;
+ } else {
+ if (karg.build_id_size < build_id_sz) {
+ err = -ENAMETOOLONG;
+ goto out;
+ }
+ karg.build_id_size = build_id_sz;
+ }
+ }
+
if (karg.vma_name_size) {
size_t name_buf_sz = min_t(size_t, PATH_MAX, karg.vma_name_size);
const struct path *path;
@@ -576,13 +595,17 @@ static int do_procmap_query(struct proc_maps_private *priv, void __user *uarg)
query_vma_teardown(mm, vma);
mmput(mm);
- if (karg.vma_name_size && copy_to_user((void __user *)karg.vma_name_addr,
+ if (karg.vma_name_size && copy_to_user(u64_to_user_ptr(karg.vma_name_addr),
name, karg.vma_name_size)) {
kfree(name_buf);
return -EFAULT;
}
kfree(name_buf);
+ if (karg.build_id_size && copy_to_user(u64_to_user_ptr(karg.build_id_addr),
+ build_id_buf, karg.build_id_size))
+ return -EFAULT;
+
if (copy_to_user(uarg, &karg, min_t(size_t, sizeof(karg), usize)))
return -EFAULT;