summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-05-25 12:22:58 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-05-25 12:22:58 -0700
commit7e062cda7d90543ac8c7700fc7c5527d0c0f22ad (patch)
tree2f1602595d9416be41cc2e88a659ba4c145734b9 /arch/x86
parent5d1772b1739b085721431eef0c0400f3aff01abf (diff)
parent57d7becda9c9e612e6b00676f2eecfac3e719e88 (diff)
Merge tag 'net-next-5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Jakub Kicinski: "Core ---- - Support TCPv6 segmentation offload with super-segments larger than 64k bytes using the IPv6 Jumbogram extension header (AKA BIG TCP). - Generalize skb freeing deferral to per-cpu lists, instead of per-socket lists. - Add a netdev statistic for packets dropped due to L2 address mismatch (rx_otherhost_dropped). - Continue work annotating skb drop reasons. - Accept alternative netdev names (ALT_IFNAME) in more netlink requests. - Add VLAN support for AF_PACKET SOCK_RAW GSO. - Allow receiving skb mark from the socket as a cmsg. - Enable memcg accounting for veth queues, sysctl tables and IPv6. BPF --- - Add libbpf support for User Statically-Defined Tracing (USDTs). - Speed up symbol resolution for kprobes multi-link attachments. - Support storing typed pointers to referenced and unreferenced objects in BPF maps. - Add support for BPF link iterator. - Introduce access to remote CPU map elements in BPF per-cpu map. - Allow middle-of-the-road settings for the kernel.unprivileged_bpf_disabled sysctl. - Implement basic types of dynamic pointers e.g. to allow for dynamically sized ringbuf reservations without extra memory copies. Protocols --------- - Retire port only listening_hash table, add a second bind table hashed by port and address. Avoid linear list walk when binding to very popular ports (e.g. 443). - Add bridge FDB bulk flush filtering support allowing user space to remove all FDB entries matching a condition. - Introduce accept_unsolicited_na sysctl for IPv6 to implement router-side changes for RFC9131. - Support for MPTCP path manager in user space. - Add MPTCP support for fallback to regular TCP for connections that have never connected additional subflows or transmitted out-of-sequence data (partial support for RFC8684 fallback). - Avoid races in MPTCP-level window tracking, stabilize and improve throughput. - Support lockless operation of GRE tunnels with seq numbers enabled. - WiFi support for host based BSS color collision detection. - Add support for SO_TXTIME/SCM_TXTIME on CAN sockets. - Support transmission w/o flow control in CAN ISOTP (ISO 15765-2). - Support zero-copy Tx with TLS 1.2 crypto offload (sendfile). - Allow matching on the number of VLAN tags via tc-flower. - Add tracepoint for tcp_set_ca_state(). Driver API ---------- - Improve error reporting from classifier and action offload. - Add support for listing line cards in switches (devlink). - Add helpers for reporting page pool statistics with ethtool -S. - Add support for reading clock cycles when using PTP virtual clocks, instead of having the driver convert to time before reporting. This makes it possible to report time from different vclocks. - Support configuring low-latency Tx descriptor push via ethtool. - Separate Clause 22 and Clause 45 MDIO accesses more explicitly. New hardware / drivers ---------------------- - Ethernet: - Marvell's Octeon NIC PCI Endpoint support (octeon_ep) - Sunplus SP7021 SoC (sp7021_emac) - Add support for Renesas RZ/V2M (in ravb) - Add support for MediaTek mt7986 switches (in mtk_eth_soc) - Ethernet PHYs: - ADIN1100 industrial PHYs (w/ 10BASE-T1L and SQI reporting) - TI DP83TD510 PHY - Microchip LAN8742/LAN88xx PHYs - WiFi: - Driver for pureLiFi X, XL, XC devices (plfxlc) - Driver for Silicon Labs devices (wfx) - Support for WCN6750 (in ath11k) - Support Realtek 8852ce devices (in rtw89) - Mobile: - MediaTek T700 modems (Intel 5G 5000 M.2 cards) - CAN: - ctucanfd: add support for CTU CAN FD open-source IP core from Czech Technical University in Prague Drivers ------- - Delete a number of old drivers still using virt_to_bus(). - Ethernet NICs: - intel: support TSO on tunnels MPLS - broadcom: support multi-buffer XDP - nfp: support VF rate limiting - sfc: use hardware tx timestamps for more than PTP - mlx5: multi-port eswitch support - hyper-v: add support for XDP_REDIRECT - atlantic: XDP support (including multi-buffer) - macb: improve real-time perf by deferring Tx processing to NAPI - High-speed Ethernet switches: - mlxsw: implement basic line card information querying - prestera: add support for traffic policing on ingress and egress - Embedded Ethernet switches: - lan966x: add support for packet DMA (FDMA) - lan966x: add support for PTP programmable pins - ti: cpsw_new: enable bc/mc storm prevention - Qualcomm 802.11ax WiFi (ath11k): - Wake-on-WLAN support for QCA6390 and WCN6855 - device recovery (firmware restart) support - support setting Specific Absorption Rate (SAR) for WCN6855 - read country code from SMBIOS for WCN6855/QCA6390 - enable keep-alive during WoWLAN suspend - implement remain-on-channel support - MediaTek WiFi (mt76): - support Wireless Ethernet Dispatch offloading packet movement between the Ethernet switch and WiFi interfaces - non-standard VHT MCS10-11 support - mt7921 AP mode support - mt7921 IPv6 NS offload support - Ethernet PHYs: - micrel: ksz9031/ksz9131: cabletest support - lan87xx: SQI support for T1 PHYs - lan937x: add interrupt support for link detection" * tag 'net-next-5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1809 commits) ptp: ocp: Add firmware header checks ptp: ocp: fix PPS source selector debugfs reporting ptp: ocp: add .init function for sma_op vector ptp: ocp: vectorize the sma accessor functions ptp: ocp: constify selectors ptp: ocp: parameterize input/output sma selectors ptp: ocp: revise firmware display ptp: ocp: add Celestica timecard PCI ids ptp: ocp: Remove #ifdefs around PCI IDs ptp: ocp: 32-bit fixups for pci start address Revert "net/smc: fix listen processing for SMC-Rv2" ath6kl: Use cc-disable-warning to disable -Wdangling-pointer selftests/bpf: Dynptr tests bpf: Add dynptr data slices bpf: Add bpf_dynptr_read and bpf_dynptr_write bpf: Dynptr support for ring buffers bpf: Add bpf_dynptr_from_mem for local dynptrs bpf: Add verifier support for dynptrs bpf: Suppress 'passing zero to PTR_ERR' warning bpf: Introduce bpf_arch_text_invalidate for bpf_prog_pack ...
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/text-patching.h1
-rw-r--r--arch/x86/kernel/alternative.c67
-rw-r--r--arch/x86/net/bpf_jit_comp.c79
3 files changed, 113 insertions, 34 deletions
diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index d20ab0921480..1cc15528ce29 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -45,6 +45,7 @@ extern void *text_poke(void *addr, const void *opcode, size_t len);
extern void text_poke_sync(void);
extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
extern void *text_poke_copy(void *addr, const void *opcode, size_t len);
+extern void *text_poke_set(void *addr, int c, size_t len);
extern int poke_int3_handler(struct pt_regs *regs);
extern void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate);
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 3c66073e7645..e257f6c80372 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -994,7 +994,21 @@ static inline void unuse_temporary_mm(temp_mm_state_t prev_state)
__ro_after_init struct mm_struct *poking_mm;
__ro_after_init unsigned long poking_addr;
-static void *__text_poke(void *addr, const void *opcode, size_t len)
+static void text_poke_memcpy(void *dst, const void *src, size_t len)
+{
+ memcpy(dst, src, len);
+}
+
+static void text_poke_memset(void *dst, const void *src, size_t len)
+{
+ int c = *(const int *)src;
+
+ memset(dst, c, len);
+}
+
+typedef void text_poke_f(void *dst, const void *src, size_t len);
+
+static void *__text_poke(text_poke_f func, void *addr, const void *src, size_t len)
{
bool cross_page_boundary = offset_in_page(addr) + len > PAGE_SIZE;
struct page *pages[2] = {NULL};
@@ -1059,7 +1073,7 @@ static void *__text_poke(void *addr, const void *opcode, size_t len)
prev = use_temporary_mm(poking_mm);
kasan_disable_current();
- memcpy((u8 *)poking_addr + offset_in_page(addr), opcode, len);
+ func((u8 *)poking_addr + offset_in_page(addr), src, len);
kasan_enable_current();
/*
@@ -1087,11 +1101,13 @@ static void *__text_poke(void *addr, const void *opcode, size_t len)
(cross_page_boundary ? 2 : 1) * PAGE_SIZE,
PAGE_SHIFT, false);
- /*
- * If the text does not match what we just wrote then something is
- * fundamentally screwy; there's nothing we can really do about that.
- */
- BUG_ON(memcmp(addr, opcode, len));
+ if (func == text_poke_memcpy) {
+ /*
+ * If the text does not match what we just wrote then something is
+ * fundamentally screwy; there's nothing we can really do about that.
+ */
+ BUG_ON(memcmp(addr, src, len));
+ }
local_irq_restore(flags);
pte_unmap_unlock(ptep, ptl);
@@ -1118,7 +1134,7 @@ void *text_poke(void *addr, const void *opcode, size_t len)
{
lockdep_assert_held(&text_mutex);
- return __text_poke(addr, opcode, len);
+ return __text_poke(text_poke_memcpy, addr, opcode, len);
}
/**
@@ -1137,7 +1153,7 @@ void *text_poke(void *addr, const void *opcode, size_t len)
*/
void *text_poke_kgdb(void *addr, const void *opcode, size_t len)
{
- return __text_poke(addr, opcode, len);
+ return __text_poke(text_poke_memcpy, addr, opcode, len);
}
/**
@@ -1167,7 +1183,38 @@ void *text_poke_copy(void *addr, const void *opcode, size_t len)
s = min_t(size_t, PAGE_SIZE * 2 - offset_in_page(ptr), len - patched);
- __text_poke((void *)ptr, opcode + patched, s);
+ __text_poke(text_poke_memcpy, (void *)ptr, opcode + patched, s);
+ patched += s;
+ }
+ mutex_unlock(&text_mutex);
+ return addr;
+}
+
+/**
+ * text_poke_set - memset into (an unused part of) RX memory
+ * @addr: address to modify
+ * @c: the byte to fill the area with
+ * @len: length to copy, could be more than 2x PAGE_SIZE
+ *
+ * This is useful to overwrite unused regions of RX memory with illegal
+ * instructions.
+ */
+void *text_poke_set(void *addr, int c, size_t len)
+{
+ unsigned long start = (unsigned long)addr;
+ size_t patched = 0;
+
+ if (WARN_ON_ONCE(core_kernel_text(start)))
+ return NULL;
+
+ mutex_lock(&text_mutex);
+ while (patched < len) {
+ unsigned long ptr = start + patched;
+ size_t s;
+
+ s = min_t(size_t, PAGE_SIZE * 2 - offset_in_page(ptr), len - patched);
+
+ __text_poke(text_poke_memset, (void *)ptr, (void *)&c, s);
patched += s;
}
mutex_unlock(&text_mutex);
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 16b6efacf7c6..f298b18a9a3d 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -228,6 +228,11 @@ static void jit_fill_hole(void *area, unsigned int size)
memset(area, 0xcc, size);
}
+int bpf_arch_text_invalidate(void *dst, size_t len)
+{
+ return IS_ERR_OR_NULL(text_poke_set(dst, 0xcc, len));
+}
+
struct jit_context {
int cleanup_addr; /* Epilogue code offset */
@@ -1762,13 +1767,32 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args,
}
static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
- struct bpf_prog *p, int stack_size, bool save_ret)
+ struct bpf_tramp_link *l, int stack_size,
+ int run_ctx_off, bool save_ret)
{
u8 *prog = *pprog;
u8 *jmp_insn;
+ int ctx_cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
+ struct bpf_prog *p = l->link.prog;
+ u64 cookie = l->cookie;
+
+ /* mov rdi, cookie */
+ emit_mov_imm64(&prog, BPF_REG_1, (long) cookie >> 32, (u32) (long) cookie);
+
+ /* Prepare struct bpf_tramp_run_ctx.
+ *
+ * bpf_tramp_run_ctx is already preserved by
+ * arch_prepare_bpf_trampoline().
+ *
+ * mov QWORD PTR [rbp - run_ctx_off + ctx_cookie_off], rdi
+ */
+ emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_1, -run_ctx_off + ctx_cookie_off);
/* arg1: mov rdi, progs[i] */
emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p);
+ /* arg2: lea rsi, [rbp - ctx_cookie_off] */
+ EMIT4(0x48, 0x8D, 0x75, -run_ctx_off);
+
if (emit_call(&prog,
p->aux->sleepable ? __bpf_prog_enter_sleepable :
__bpf_prog_enter, prog))
@@ -1814,6 +1838,8 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p);
/* arg2: mov rsi, rbx <- start time in nsec */
emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
+ /* arg3: lea rdx, [rbp - run_ctx_off] */
+ EMIT4(0x48, 0x8D, 0x55, -run_ctx_off);
if (emit_call(&prog,
p->aux->sleepable ? __bpf_prog_exit_sleepable :
__bpf_prog_exit, prog))
@@ -1850,15 +1876,15 @@ static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond)
}
static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
- struct bpf_tramp_progs *tp, int stack_size,
- bool save_ret)
+ struct bpf_tramp_links *tl, int stack_size,
+ int run_ctx_off, bool save_ret)
{
int i;
u8 *prog = *pprog;
- for (i = 0; i < tp->nr_progs; i++) {
- if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size,
- save_ret))
+ for (i = 0; i < tl->nr_links; i++) {
+ if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size,
+ run_ctx_off, save_ret))
return -EINVAL;
}
*pprog = prog;
@@ -1866,8 +1892,8 @@ static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
}
static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
- struct bpf_tramp_progs *tp, int stack_size,
- u8 **branches)
+ struct bpf_tramp_links *tl, int stack_size,
+ int run_ctx_off, u8 **branches)
{
u8 *prog = *pprog;
int i;
@@ -1877,8 +1903,8 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
*/
emit_mov_imm32(&prog, false, BPF_REG_0, 0);
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
- for (i = 0; i < tp->nr_progs; i++) {
- if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, true))
+ for (i = 0; i < tl->nr_links; i++) {
+ if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, run_ctx_off, true))
return -EINVAL;
/* mod_ret prog stored return value into [rbp - 8]. Emit:
@@ -1980,14 +2006,14 @@ static bool is_valid_bpf_tramp_flags(unsigned int flags)
*/
int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end,
const struct btf_func_model *m, u32 flags,
- struct bpf_tramp_progs *tprogs,
+ struct bpf_tramp_links *tlinks,
void *orig_call)
{
int ret, i, nr_args = m->nr_args;
- int regs_off, ip_off, args_off, stack_size = nr_args * 8;
- struct bpf_tramp_progs *fentry = &tprogs[BPF_TRAMP_FENTRY];
- struct bpf_tramp_progs *fexit = &tprogs[BPF_TRAMP_FEXIT];
- struct bpf_tramp_progs *fmod_ret = &tprogs[BPF_TRAMP_MODIFY_RETURN];
+ int regs_off, ip_off, args_off, stack_size = nr_args * 8, run_ctx_off;
+ struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
+ struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
+ struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
u8 **branches = NULL;
u8 *prog;
bool save_ret;
@@ -2014,6 +2040,8 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
* RBP - args_off [ args count ] always
*
* RBP - ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag
+ *
+ * RBP - run_ctx_off [ bpf_tramp_run_ctx ]
*/
/* room for return value of orig_call or fentry prog */
@@ -2032,6 +2060,9 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
ip_off = stack_size;
+ stack_size += (sizeof(struct bpf_tramp_run_ctx) + 7) & ~0x7;
+ run_ctx_off = stack_size;
+
if (flags & BPF_TRAMP_F_SKIP_FRAME) {
/* skip patched call instruction and point orig_call to actual
* body of the kernel function.
@@ -2078,19 +2109,19 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
}
}
- if (fentry->nr_progs)
- if (invoke_bpf(m, &prog, fentry, regs_off,
+ if (fentry->nr_links)
+ if (invoke_bpf(m, &prog, fentry, regs_off, run_ctx_off,
flags & BPF_TRAMP_F_RET_FENTRY_RET))
return -EINVAL;
- if (fmod_ret->nr_progs) {
- branches = kcalloc(fmod_ret->nr_progs, sizeof(u8 *),
+ if (fmod_ret->nr_links) {
+ branches = kcalloc(fmod_ret->nr_links, sizeof(u8 *),
GFP_KERNEL);
if (!branches)
return -ENOMEM;
if (invoke_bpf_mod_ret(m, &prog, fmod_ret, regs_off,
- branches)) {
+ run_ctx_off, branches)) {
ret = -EINVAL;
goto cleanup;
}
@@ -2111,7 +2142,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
prog += X86_PATCH_SIZE;
}
- if (fmod_ret->nr_progs) {
+ if (fmod_ret->nr_links) {
/* From Intel 64 and IA-32 Architectures Optimization
* Reference Manual, 3.4.1.4 Code Alignment, Assembly/Compiler
* Coding Rule 11: All branch targets should be 16-byte
@@ -2121,13 +2152,13 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
/* Update the branches saved in invoke_bpf_mod_ret with the
* aligned address of do_fexit.
*/
- for (i = 0; i < fmod_ret->nr_progs; i++)
+ for (i = 0; i < fmod_ret->nr_links; i++)
emit_cond_near_jump(&branches[i], prog, branches[i],
X86_JNE);
}
- if (fexit->nr_progs)
- if (invoke_bpf(m, &prog, fexit, regs_off, false)) {
+ if (fexit->nr_links)
+ if (invoke_bpf(m, &prog, fexit, regs_off, run_ctx_off, false)) {
ret = -EINVAL;
goto cleanup;
}