summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-10-15 18:42:13 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-10-15 18:42:13 -0700
commit9ff9b0d392ea08090cd1780fb196f36dbb586529 (patch)
tree276a3a5c4525b84dee64eda30b423fc31bf94850 /arch
parent840e5bb326bbcb16ce82dd2416d2769de4839aea (diff)
parent105faa8742437c28815b2a3eb8314ebc5fd9288c (diff)
Merge tag 'net-next-5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Jakub Kicinski: - Add redirect_neigh() BPF packet redirect helper, allowing to limit stack traversal in common container configs and improving TCP back-pressure. Daniel reports ~10Gbps => ~15Gbps single stream TCP performance gain. - Expand netlink policy support and improve policy export to user space. (Ge)netlink core performs request validation according to declared policies. Expand the expressiveness of those policies (min/max length and bitmasks). Allow dumping policies for particular commands. This is used for feature discovery by user space (instead of kernel version parsing or trial and error). - Support IGMPv3/MLDv2 multicast listener discovery protocols in bridge. - Allow more than 255 IPv4 multicast interfaces. - Add support for Type of Service (ToS) reflection in SYN/SYN-ACK packets of TCPv6. - In Multi-patch TCP (MPTCP) support concurrent transmission of data on multiple subflows in a load balancing scenario. Enhance advertising addresses via the RM_ADDR/ADD_ADDR options. - Support SMC-Dv2 version of SMC, which enables multi-subnet deployments. - Allow more calls to same peer in RxRPC. - Support two new Controller Area Network (CAN) protocols - CAN-FD and ISO 15765-2:2016. - Add xfrm/IPsec compat layer, solving the 32bit user space on 64bit kernel problem. - Add TC actions for implementing MPLS L2 VPNs. - Improve nexthop code - e.g. handle various corner cases when nexthop objects are removed from groups better, skip unnecessary notifications and make it easier to offload nexthops into HW by converting to a blocking notifier. - Support adding and consuming TCP header options by BPF programs, opening the doors for easy experimental and deployment-specific TCP option use. - Reorganize TCP congestion control (CC) initialization to simplify life of TCP CC implemented in BPF. - Add support for shipping BPF programs with the kernel and loading them early on boot via the User Mode Driver mechanism, hence reusing all the user space infra we have. - Support sleepable BPF programs, initially targeting LSM and tracing. - Add bpf_d_path() helper for returning full path for given 'struct path'. - Make bpf_tail_call compatible with bpf-to-bpf calls. - Allow BPF programs to call map_update_elem on sockmaps. - Add BPF Type Format (BTF) support for type and enum discovery, as well as support for using BTF within the kernel itself (current use is for pretty printing structures). - Support listing and getting information about bpf_links via the bpf syscall. - Enhance kernel interfaces around NIC firmware update. Allow specifying overwrite mask to control if settings etc. are reset during update; report expected max time operation may take to users; support firmware activation without machine reboot incl. limits of how much impact reset may have (e.g. dropping link or not). - Extend ethtool configuration interface to report IEEE-standard counters, to limit the need for per-vendor logic in user space. - Adopt or extend devlink use for debug, monitoring, fw update in many drivers (dsa loop, ice, ionic, sja1105, qed, mlxsw, mv88e6xxx, dpaa2-eth). - In mlxsw expose critical and emergency SFP module temperature alarms. Refactor port buffer handling to make the defaults more suitable and support setting these values explicitly via the DCBNL interface. - Add XDP support for Intel's igb driver. - Support offloading TC flower classification and filtering rules to mscc_ocelot switches. - Add PTP support for Marvell Octeontx2 and PP2.2 hardware, as well as fixed interval period pulse generator and one-step timestamping in dpaa-eth. - Add support for various auth offloads in WiFi APs, e.g. SAE (WPA3) offload. - Add Lynx PHY/PCS MDIO module, and convert various drivers which have this HW to use it. Convert mvpp2 to split PCS. - Support Marvell Prestera 98DX3255 24-port switch ASICs, as well as 7-port Mediatek MT7531 IP. - Add initial support for QCA6390 and IPQ6018 in ath11k WiFi driver, and wcn3680 support in wcn36xx. - Improve performance for packets which don't require much offloads on recent Mellanox NICs by 20% by making multiple packets share a descriptor entry. - Move chelsio inline crypto drivers (for TLS and IPsec) from the crypto subtree to drivers/net. Move MDIO drivers out of the phy directory. - Clean up a lot of W=1 warnings, reportedly the actively developed subsections of networking drivers should now build W=1 warning free. - Make sure drivers don't use in_interrupt() to dynamically adapt their code. Convert tasklets to use new tasklet_setup API (sadly this conversion is not yet complete). * tag 'net-next-5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (2583 commits) Revert "bpfilter: Fix build error with CONFIG_BPFILTER_UMH" net, sockmap: Don't call bpf_prog_put() on NULL pointer bpf, selftest: Fix flaky tcp_hdr_options test when adding addr to lo bpf, sockmap: Add locking annotations to iterator netfilter: nftables: allow re-computing sctp CRC-32C in 'payload' statements net: fix pos incrementment in ipv6_route_seq_next net/smc: fix invalid return code in smcd_new_buf_create() net/smc: fix valid DMBE buffer sizes net/smc: fix use-after-free of delayed events bpfilter: Fix build error with CONFIG_BPFILTER_UMH cxgb4/ch_ipsec: Replace the module name to ch_ipsec from chcr net: sched: Fix suspicious RCU usage while accessing tcf_tunnel_info bpf: Fix register equivalence tracking. rxrpc: Fix loss of final ack on shutdown rxrpc: Fix bundle counting for exclusive connections netfilter: restore NF_INET_NUMHOOKS ibmveth: Identify ingress large send packets. ibmveth: Switch order of ibmveth_helper calls. cxgb4: handle 4-tuple PEDIT to NAT mode translation selftests: Add VRF route leaking tests ...
Diffstat (limited to 'arch')
-rw-r--r--arch/arm64/boot/dts/exynos/exynos5433-tm2-common.dtsi4
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls1028a-rdb.dts1
-rw-r--r--arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts50
-rw-r--r--arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts63
-rw-r--r--arch/mips/boot/dts/mscc/ocelot.dtsi4
-rw-r--r--arch/powerpc/boot/dts/fsl/t1040rdb.dts107
-rw-r--r--arch/powerpc/boot/dts/fsl/t1040si-post.dtsi78
-rw-r--r--arch/s390/include/asm/ccwdev.h9
-rw-r--r--arch/s390/include/asm/chsc.h7
-rw-r--r--arch/s390/include/asm/css_chars.h4
-rw-r--r--arch/s390/net/bpf_jit_comp.c61
-rw-r--r--arch/x86/include/asm/nospec-branch.h16
-rw-r--r--arch/x86/net/bpf_jit_comp.c310
13 files changed, 586 insertions, 128 deletions
diff --git a/arch/arm64/boot/dts/exynos/exynos5433-tm2-common.dtsi b/arch/arm64/boot/dts/exynos/exynos5433-tm2-common.dtsi
index 250fc01de78d..24aab3ea3f52 100644
--- a/arch/arm64/boot/dts/exynos/exynos5433-tm2-common.dtsi
+++ b/arch/arm64/boot/dts/exynos/exynos5433-tm2-common.dtsi
@@ -795,8 +795,8 @@
reg = <0x27>;
interrupt-parent = <&gpa1>;
interrupts = <3 IRQ_TYPE_LEVEL_HIGH>;
- s3fwrn5,en-gpios = <&gpf1 4 GPIO_ACTIVE_HIGH>;
- s3fwrn5,fw-gpios = <&gpj0 2 GPIO_ACTIVE_HIGH>;
+ en-gpios = <&gpf1 4 GPIO_ACTIVE_HIGH>;
+ wake-gpios = <&gpj0 2 GPIO_ACTIVE_HIGH>;
};
};
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-rdb.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-rdb.dts
index c2dc1232f93f..1efb61cff454 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-rdb.dts
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-rdb.dts
@@ -199,6 +199,7 @@
&enetc_port0 {
phy-handle = <&sgmii_phy0>;
phy-connection-type = "sgmii";
+ managed = "in-band-status";
status = "okay";
mdio {
diff --git a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts b/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts
index d174ad214857..9a11e5c60c26 100644
--- a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts
+++ b/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts
@@ -143,6 +143,56 @@
mdio: mdio-bus {
#address-cells = <1>;
#size-cells = <0>;
+
+ switch@0 {
+ compatible = "mediatek,mt7531";
+ reg = <0>;
+ reset-gpios = <&pio 54 0>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ label = "wan";
+ };
+
+ port@1 {
+ reg = <1>;
+ label = "lan0";
+ };
+
+ port@2 {
+ reg = <2>;
+ label = "lan1";
+ };
+
+ port@3 {
+ reg = <3>;
+ label = "lan2";
+ };
+
+ port@4 {
+ reg = <4>;
+ label = "lan3";
+ };
+
+ port@6 {
+ reg = <6>;
+ label = "cpu";
+ ethernet = <&gmac0>;
+ phy-mode = "2500base-x";
+
+ fixed-link {
+ speed = <2500>;
+ full-duplex;
+ pause;
+ };
+ };
+ };
+ };
+
};
};
diff --git a/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts b/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts
index 0b4de627f96e..08ad0ffb24df 100644
--- a/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts
+++ b/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts
@@ -105,20 +105,71 @@
pinctrl-0 = <&eth_pins>;
status = "okay";
- gmac1: mac@1 {
+ gmac0: mac@0 {
compatible = "mediatek,eth-mac";
- reg = <1>;
- phy-handle = <&phy5>;
+ reg = <0>;
+ phy-mode = "2500base-x";
+
+ fixed-link {
+ speed = <2500>;
+ full-duplex;
+ pause;
+ };
};
mdio-bus {
#address-cells = <1>;
#size-cells = <0>;
- phy5: ethernet-phy@5 {
- reg = <5>;
- phy-mode = "sgmii";
+ switch@0 {
+ compatible = "mediatek,mt7531";
+ reg = <0>;
+ reset-gpios = <&pio 54 0>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ label = "lan0";
+ };
+
+ port@1 {
+ reg = <1>;
+ label = "lan1";
+ };
+
+ port@2 {
+ reg = <2>;
+ label = "lan2";
+ };
+
+ port@3 {
+ reg = <3>;
+ label = "lan3";
+ };
+
+ port@4 {
+ reg = <4>;
+ label = "wan";
+ };
+
+ port@6 {
+ reg = <6>;
+ label = "cpu";
+ ethernet = <&gmac0>;
+ phy-mode = "2500base-x";
+
+ fixed-link {
+ speed = <2500>;
+ full-duplex;
+ pause;
+ };
+ };
+ };
};
+
};
};
diff --git a/arch/mips/boot/dts/mscc/ocelot.dtsi b/arch/mips/boot/dts/mscc/ocelot.dtsi
index f94e8a02ed06..535a98284dcb 100644
--- a/arch/mips/boot/dts/mscc/ocelot.dtsi
+++ b/arch/mips/boot/dts/mscc/ocelot.dtsi
@@ -134,11 +134,13 @@
<0x1280000 0x100>,
<0x1800000 0x80000>,
<0x1880000 0x10000>,
+ <0x1040000 0x10000>,
+ <0x1050000 0x10000>,
<0x1060000 0x10000>;
reg-names = "sys", "rew", "qs", "ptp", "port0", "port1",
"port2", "port3", "port4", "port5", "port6",
"port7", "port8", "port9", "port10", "qsys",
- "ana", "s2";
+ "ana", "s0", "s1", "s2";
interrupts = <18 21 22>;
interrupt-names = "ptp_rdy", "xtr", "inj";
diff --git a/arch/powerpc/boot/dts/fsl/t1040rdb.dts b/arch/powerpc/boot/dts/fsl/t1040rdb.dts
index 65ff34c49025..af0c8a6f5613 100644
--- a/arch/powerpc/boot/dts/fsl/t1040rdb.dts
+++ b/arch/powerpc/boot/dts/fsl/t1040rdb.dts
@@ -64,6 +64,40 @@
phy_sgmii_2: ethernet-phy@3 {
reg = <0x03>;
};
+
+ /* VSC8514 QSGMII PHY */
+ phy_qsgmii_0: ethernet-phy@4 {
+ reg = <0x4>;
+ };
+
+ phy_qsgmii_1: ethernet-phy@5 {
+ reg = <0x5>;
+ };
+
+ phy_qsgmii_2: ethernet-phy@6 {
+ reg = <0x6>;
+ };
+
+ phy_qsgmii_3: ethernet-phy@7 {
+ reg = <0x7>;
+ };
+
+ /* VSC8514 QSGMII PHY */
+ phy_qsgmii_4: ethernet-phy@8 {
+ reg = <0x8>;
+ };
+
+ phy_qsgmii_5: ethernet-phy@9 {
+ reg = <0x9>;
+ };
+
+ phy_qsgmii_6: ethernet-phy@a {
+ reg = <0xa>;
+ };
+
+ phy_qsgmii_7: ethernet-phy@b {
+ reg = <0xb>;
+ };
};
};
};
@@ -76,3 +110,76 @@
};
#include "t1040si-post.dtsi"
+
+&seville_switch {
+ status = "okay";
+};
+
+&seville_port0 {
+ managed = "in-band-status";
+ phy-handle = <&phy_qsgmii_0>;
+ phy-mode = "qsgmii";
+ label = "ETH5";
+ status = "okay";
+};
+
+&seville_port1 {
+ managed = "in-band-status";
+ phy-handle = <&phy_qsgmii_1>;
+ phy-mode = "qsgmii";
+ label = "ETH4";
+ status = "okay";
+};
+
+&seville_port2 {
+ managed = "in-band-status";
+ phy-handle = <&phy_qsgmii_2>;
+ phy-mode = "qsgmii";
+ label = "ETH7";
+ status = "okay";
+};
+
+&seville_port3 {
+ managed = "in-band-status";
+ phy-handle = <&phy_qsgmii_3>;
+ phy-mode = "qsgmii";
+ label = "ETH6";
+ status = "okay";
+};
+
+&seville_port4 {
+ managed = "in-band-status";
+ phy-handle = <&phy_qsgmii_4>;
+ phy-mode = "qsgmii";
+ label = "ETH9";
+ status = "okay";
+};
+
+&seville_port5 {
+ managed = "in-band-status";
+ phy-handle = <&phy_qsgmii_5>;
+ phy-mode = "qsgmii";
+ label = "ETH8";
+ status = "okay";
+};
+
+&seville_port6 {
+ managed = "in-band-status";
+ phy-handle = <&phy_qsgmii_6>;
+ phy-mode = "qsgmii";
+ label = "ETH11";
+ status = "okay";
+};
+
+&seville_port7 {
+ managed = "in-band-status";
+ phy-handle = <&phy_qsgmii_7>;
+ phy-mode = "qsgmii";
+ label = "ETH10";
+ status = "okay";
+};
+
+&seville_port8 {
+ ethernet = <&enet0>;
+ status = "okay";
+};
diff --git a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi
index 315d0557eefc..f58eb820eb5e 100644
--- a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi
@@ -628,6 +628,84 @@
status = "disabled";
};
};
+
+ seville_switch: ethernet-switch@800000 {
+ compatible = "mscc,vsc9953-switch";
+ reg = <0x800000 0x290000>;
+ interrupts = <26 2 0 0>;
+ interrupt-names = "xtr";
+ little-endian;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ seville_port0: port@0 {
+ reg = <0>;
+ status = "disabled";
+ };
+
+ seville_port1: port@1 {
+ reg = <1>;
+ status = "disabled";
+ };
+
+ seville_port2: port@2 {
+ reg = <2>;
+ status = "disabled";
+ };
+
+ seville_port3: port@3 {
+ reg = <3>;
+ status = "disabled";
+ };
+
+ seville_port4: port@4 {
+ reg = <4>;
+ status = "disabled";
+ };
+
+ seville_port5: port@5 {
+ reg = <5>;
+ status = "disabled";
+ };
+
+ seville_port6: port@6 {
+ reg = <6>;
+ status = "disabled";
+ };
+
+ seville_port7: port@7 {
+ reg = <7>;
+ status = "disabled";
+ };
+
+ seville_port8: port@8 {
+ reg = <8>;
+ phy-mode = "internal";
+ status = "disabled";
+
+ fixed-link {
+ speed = <2500>;
+ full-duplex;
+ };
+ };
+
+ seville_port9: port@9 {
+ reg = <9>;
+ phy-mode = "internal";
+ status = "disabled";
+
+ fixed-link {
+ speed = <2500>;
+ full-duplex;
+ };
+ };
+ };
+ };
};
&qe {
diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h
index 3cfe1eb89838..c0be5fe1ddba 100644
--- a/arch/s390/include/asm/ccwdev.h
+++ b/arch/s390/include/asm/ccwdev.h
@@ -238,7 +238,10 @@ extern void ccw_device_get_schid(struct ccw_device *, struct subchannel_id *);
struct channel_path_desc_fmt0 *ccw_device_get_chp_desc(struct ccw_device *, int);
u8 *ccw_device_get_util_str(struct ccw_device *cdev, int chp_idx);
int ccw_device_pnso(struct ccw_device *cdev,
- struct chsc_pnso_area *pnso_area,
- struct chsc_pnso_resume_token resume_token,
- int cnc);
+ struct chsc_pnso_area *pnso_area, u8 oc,
+ struct chsc_pnso_resume_token resume_token, int cnc);
+int ccw_device_get_cssid(struct ccw_device *cdev, u8 *cssid);
+int ccw_device_get_iid(struct ccw_device *cdev, u8 *iid);
+int ccw_device_get_chpid(struct ccw_device *cdev, int chp_idx, u8 *chpid);
+int ccw_device_get_chid(struct ccw_device *cdev, int chp_idx, u16 *chid);
#endif /* _S390_CCWDEV_H_ */
diff --git a/arch/s390/include/asm/chsc.h b/arch/s390/include/asm/chsc.h
index 36ce2d25a5fc..ae4d2549cd67 100644
--- a/arch/s390/include/asm/chsc.h
+++ b/arch/s390/include/asm/chsc.h
@@ -12,6 +12,13 @@
#include <uapi/asm/chsc.h>
/**
+ * Operation codes for CHSC PNSO:
+ * PNSO_OC_NET_BRIDGE_INFO - only addresses that are visible to a bridgeport
+ * PNSO_OC_NET_ADDR_INFO - all addresses
+ */
+#define PNSO_OC_NET_BRIDGE_INFO 0
+#define PNSO_OC_NET_ADDR_INFO 3
+/**
* struct chsc_pnso_naid_l2 - network address information descriptor
* @nit: Network interface token
* @addr_lnid: network address and logical network id (VLAN ID)
diff --git a/arch/s390/include/asm/css_chars.h b/arch/s390/include/asm/css_chars.h
index 480bb02ccacd..638137d46c85 100644
--- a/arch/s390/include/asm/css_chars.h
+++ b/arch/s390/include/asm/css_chars.h
@@ -36,7 +36,9 @@ struct css_general_char {
u64 alt_ssi : 1; /* bit 108 */
u64 : 1;
u64 narf : 1; /* bit 110 */
- u64 : 12;
+ u64 : 5;
+ u64 enarf: 1; /* bit 116 */
+ u64 : 6;
u64 util_str : 1;/* bit 123 */
} __packed;
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index be4b8532dd3c..0a4182792876 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -50,7 +50,6 @@ struct bpf_jit {
int r14_thunk_ip; /* Address of expoline thunk for 'br %r14' */
int tail_call_start; /* Tail call start offset */
int excnt; /* Number of exception table entries */
- int labels[1]; /* Labels for local jumps */
};
#define SEEN_MEM BIT(0) /* use mem[] for temporary storage */
@@ -229,18 +228,18 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
REG_SET_SEEN(b3); \
})
-#define EMIT6_PCREL_LABEL(op1, op2, b1, b2, label, mask) \
+#define EMIT6_PCREL_RIEB(op1, op2, b1, b2, mask, target) \
({ \
- int rel = (jit->labels[label] - jit->prg) >> 1; \
+ unsigned int rel = (int)((target) - jit->prg) / 2; \
_EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff), \
(op2) | (mask) << 12); \
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
})
-#define EMIT6_PCREL_IMM_LABEL(op1, op2, b1, imm, label, mask) \
+#define EMIT6_PCREL_RIEC(op1, op2, b1, imm, mask, target) \
({ \
- int rel = (jit->labels[label] - jit->prg) >> 1; \
+ unsigned int rel = (int)((target) - jit->prg) / 2; \
_EMIT6((op1) | (reg_high(b1) | (mask)) << 16 | \
(rel & 0xffff), (op2) | ((imm) & 0xff) << 8); \
REG_SET_SEEN(b1); \
@@ -1282,7 +1281,9 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT4(0xb9040000, BPF_REG_0, REG_2);
break;
}
- case BPF_JMP | BPF_TAIL_CALL:
+ case BPF_JMP | BPF_TAIL_CALL: {
+ int patch_1_clrj, patch_2_clij, patch_3_brc;
+
/*
* Implicit input:
* B1: pointer to ctx
@@ -1300,16 +1301,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT6_DISP_LH(0xe3000000, 0x0016, REG_W1, REG_0, BPF_REG_2,
offsetof(struct bpf_array, map.max_entries));
/* if ((u32)%b3 >= (u32)%w1) goto out; */
- if (!is_first_pass(jit) && can_use_rel(jit, jit->labels[0])) {
- /* clrj %b3,%w1,0xa,label0 */
- EMIT6_PCREL_LABEL(0xec000000, 0x0077, BPF_REG_3,
- REG_W1, 0, 0xa);
- } else {
- /* clr %b3,%w1 */
- EMIT2(0x1500, BPF_REG_3, REG_W1);
- /* brcl 0xa,label0 */
- EMIT6_PCREL_RILC(0xc0040000, 0xa, jit->labels[0]);
- }
+ /* clrj %b3,%w1,0xa,out */
+ patch_1_clrj = jit->prg;
+ EMIT6_PCREL_RIEB(0xec000000, 0x0077, BPF_REG_3, REG_W1, 0xa,
+ jit->prg);
/*
* if (tail_call_cnt++ > MAX_TAIL_CALL_CNT)
@@ -1324,16 +1319,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT4_IMM(0xa7080000, REG_W0, 1);
/* laal %w1,%w0,off(%r15) */
EMIT6_DISP_LH(0xeb000000, 0x00fa, REG_W1, REG_W0, REG_15, off);
- if (!is_first_pass(jit) && can_use_rel(jit, jit->labels[0])) {
- /* clij %w1,MAX_TAIL_CALL_CNT,0x2,label0 */
- EMIT6_PCREL_IMM_LABEL(0xec000000, 0x007f, REG_W1,
- MAX_TAIL_CALL_CNT, 0, 0x2);
- } else {
- /* clfi %w1,MAX_TAIL_CALL_CNT */
- EMIT6_IMM(0xc20f0000, REG_W1, MAX_TAIL_CALL_CNT);
- /* brcl 0x2,label0 */
- EMIT6_PCREL_RILC(0xc0040000, 0x2, jit->labels[0]);
- }
+ /* clij %w1,MAX_TAIL_CALL_CNT,0x2,out */
+ patch_2_clij = jit->prg;
+ EMIT6_PCREL_RIEC(0xec000000, 0x007f, REG_W1, MAX_TAIL_CALL_CNT,
+ 2, jit->prg);
/*
* prog = array->ptrs[index];
@@ -1348,13 +1337,9 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
/* ltg %r1,prog(%b2,%r1) */
EMIT6_DISP_LH(0xe3000000, 0x0002, REG_1, BPF_REG_2,
REG_1, offsetof(struct bpf_array, ptrs));
- if (!is_first_pass(jit) && can_use_rel(jit, jit->labels[0])) {
- /* brc 0x8,label0 */
- EMIT4_PCREL_RIC(0xa7040000, 0x8, jit->labels[0]);
- } else {
- /* brcl 0x8,label0 */
- EMIT6_PCREL_RILC(0xc0040000, 0x8, jit->labels[0]);
- }
+ /* brc 0x8,out */
+ patch_3_brc = jit->prg;
+ EMIT4_PCREL_RIC(0xa7040000, 8, jit->prg);
/*
* Restore registers before calling function
@@ -1371,8 +1356,16 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
/* bc 0xf,tail_call_start(%r1) */
_EMIT4(0x47f01000 + jit->tail_call_start);
/* out: */
- jit->labels[0] = jit->prg;
+ if (jit->prg_buf) {
+ *(u16 *)(jit->prg_buf + patch_1_clrj + 2) =
+ (jit->prg - patch_1_clrj) >> 1;
+ *(u16 *)(jit->prg_buf + patch_2_clij + 2) =
+ (jit->prg - patch_2_clij) >> 1;
+ *(u16 *)(jit->prg_buf + patch_3_brc + 2) =
+ (jit->prg - patch_3_brc) >> 1;
+ }
break;
+ }
case BPF_JMP | BPF_EXIT: /* return b0 */
last = (i == fp->len - 1) ? 1 : 0;
if (last)
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 86651e86289d..cb9ad6b73973 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -314,19 +314,19 @@ static inline void mds_idle_clear_cpu_buffers(void)
* lfence
* jmp spec_trap
* do_rop:
- * mov %rax,(%rsp) for x86_64
+ * mov %rcx,(%rsp) for x86_64
* mov %edx,(%esp) for x86_32
* retq
*
* Without retpolines configured:
*
- * jmp *%rax for x86_64
+ * jmp *%rcx for x86_64
* jmp *%edx for x86_32
*/
#ifdef CONFIG_RETPOLINE
# ifdef CONFIG_X86_64
-# define RETPOLINE_RAX_BPF_JIT_SIZE 17
-# define RETPOLINE_RAX_BPF_JIT() \
+# define RETPOLINE_RCX_BPF_JIT_SIZE 17
+# define RETPOLINE_RCX_BPF_JIT() \
do { \
EMIT1_off32(0xE8, 7); /* callq do_rop */ \
/* spec_trap: */ \
@@ -334,7 +334,7 @@ do { \
EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \
EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \
/* do_rop: */ \
- EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */ \
+ EMIT4(0x48, 0x89, 0x0C, 0x24); /* mov %rcx,(%rsp) */ \
EMIT1(0xC3); /* retq */ \
} while (0)
# else /* !CONFIG_X86_64 */
@@ -352,9 +352,9 @@ do { \
# endif
#else /* !CONFIG_RETPOLINE */
# ifdef CONFIG_X86_64
-# define RETPOLINE_RAX_BPF_JIT_SIZE 2
-# define RETPOLINE_RAX_BPF_JIT() \
- EMIT2(0xFF, 0xE0); /* jmp *%rax */
+# define RETPOLINE_RCX_BPF_JIT_SIZE 2
+# define RETPOLINE_RCX_BPF_JIT() \
+ EMIT2(0xFF, 0xE1); /* jmp *%rcx */
# else /* !CONFIG_X86_64 */
# define RETPOLINE_EDX_BPF_JIT() \
EMIT2(0xFF, 0xE2) /* jmp *%edx */
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 42b6709e6dc7..796506dcfc42 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -221,14 +221,48 @@ struct jit_context {
/* Number of bytes emit_patch() needs to generate instructions */
#define X86_PATCH_SIZE 5
+/* Number of bytes that will be skipped on tailcall */
+#define X86_TAIL_CALL_OFFSET 11
-#define PROLOGUE_SIZE 25
+static void push_callee_regs(u8 **pprog, bool *callee_regs_used)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+ if (callee_regs_used[0])
+ EMIT1(0x53); /* push rbx */
+ if (callee_regs_used[1])
+ EMIT2(0x41, 0x55); /* push r13 */
+ if (callee_regs_used[2])
+ EMIT2(0x41, 0x56); /* push r14 */
+ if (callee_regs_used[3])
+ EMIT2(0x41, 0x57); /* push r15 */
+ *pprog = prog;
+}
+
+static void pop_callee_regs(u8 **pprog, bool *callee_regs_used)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+ if (callee_regs_used[3])
+ EMIT2(0x41, 0x5F); /* pop r15 */
+ if (callee_regs_used[2])
+ EMIT2(0x41, 0x5E); /* pop r14 */
+ if (callee_regs_used[1])
+ EMIT2(0x41, 0x5D); /* pop r13 */
+ if (callee_regs_used[0])
+ EMIT1(0x5B); /* pop rbx */
+ *pprog = prog;
+}
/*
- * Emit x86-64 prologue code for BPF program and check its size.
- * bpf_tail_call helper will skip it while jumping into another program
+ * Emit x86-64 prologue code for BPF program.
+ * bpf_tail_call helper will skip the first X86_TAIL_CALL_OFFSET bytes
+ * while jumping to another program
*/
-static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
+static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf,
+ bool tail_call_reachable, bool is_subprog)
{
u8 *prog = *pprog;
int cnt = X86_PATCH_SIZE;
@@ -238,19 +272,19 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
*/
memcpy(prog, ideal_nops[NOP_ATOMIC5], cnt);
prog += cnt;
+ if (!ebpf_from_cbpf) {
+ if (tail_call_reachable && !is_subprog)
+ EMIT2(0x31, 0xC0); /* xor eax, eax */
+ else
+ EMIT2(0x66, 0x90); /* nop2 */
+ }
EMIT1(0x55); /* push rbp */
EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
/* sub rsp, rounded_stack_depth */
- EMIT3_off32(0x48, 0x81, 0xEC, round_up(stack_depth, 8));
- EMIT1(0x53); /* push rbx */
- EMIT2(0x41, 0x55); /* push r13 */
- EMIT2(0x41, 0x56); /* push r14 */
- EMIT2(0x41, 0x57); /* push r15 */
- if (!ebpf_from_cbpf) {
- /* zero init tail_call_cnt */
- EMIT2(0x6a, 0x00);
- BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
- }
+ if (stack_depth)
+ EMIT3_off32(0x48, 0x81, 0xEC, round_up(stack_depth, 8));
+ if (tail_call_reachable)
+ EMIT1(0x50); /* push rax */
*pprog = prog;
}
@@ -314,13 +348,14 @@ static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
mutex_lock(&text_mutex);
if (memcmp(ip, old_insn, X86_PATCH_SIZE))
goto out;
+ ret = 1;
if (memcmp(ip, new_insn, X86_PATCH_SIZE)) {
if (text_live)
text_poke_bp(ip, new_insn, X86_PATCH_SIZE, NULL);
else
memcpy(ip, new_insn, X86_PATCH_SIZE);
+ ret = 0;
}
- ret = 0;
out:
mutex_unlock(&text_mutex);
return ret;
@@ -337,6 +372,22 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
return __bpf_arch_text_poke(ip, t, old_addr, new_addr, true);
}
+static int get_pop_bytes(bool *callee_regs_used)
+{
+ int bytes = 0;
+
+ if (callee_regs_used[3])
+ bytes += 2;
+ if (callee_regs_used[2])
+ bytes += 2;
+ if (callee_regs_used[1])
+ bytes += 2;
+ if (callee_regs_used[0])
+ bytes += 1;
+
+ return bytes;
+}
+
/*
* Generate the following code:
*
@@ -351,12 +402,32 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
* goto *(prog->bpf_func + prologue_size);
* out:
*/
-static void emit_bpf_tail_call_indirect(u8 **pprog)
+static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
+ u32 stack_depth)
{
+ int tcc_off = -4 - round_up(stack_depth, 8);
u8 *prog = *pprog;
- int label1, label2, label3;
+ int pop_bytes = 0;
+ int off1 = 42;
+ int off2 = 31;
+ int off3 = 9;
int cnt = 0;
+ /* count the additional bytes used for popping callee regs from stack
+ * that need to be taken into account for each of the offsets that
+ * are used for bailing out of the tail call
+ */
+ pop_bytes = get_pop_bytes(callee_regs_used);
+ off1 += pop_bytes;
+ off2 += pop_bytes;
+ off3 += pop_bytes;
+
+ if (stack_depth) {
+ off1 += 7;
+ off2 += 7;
+ off3 += 7;
+ }
+
/*
* rdi - pointer to ctx
* rsi - pointer to bpf_array
@@ -370,72 +441,112 @@ static void emit_bpf_tail_call_indirect(u8 **pprog)
EMIT2(0x89, 0xD2); /* mov edx, edx */
EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */
offsetof(struct bpf_array, map.max_entries));
-#define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* Number of bytes to jump */
+#define OFFSET1 (off1 + RETPOLINE_RCX_BPF_JIT_SIZE) /* Number of bytes to jump */
EMIT2(X86_JBE, OFFSET1); /* jbe out */
- label1 = cnt;
/*
* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
* goto out;
*/
- EMIT2_off32(0x8B, 0x85, -36 - MAX_BPF_STACK); /* mov eax, dword ptr [rbp - 548] */
+ EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */
EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
-#define OFFSET2 (30 + RETPOLINE_RAX_BPF_JIT_SIZE)
+#define OFFSET2 (off2 + RETPOLINE_RCX_BPF_JIT_SIZE)
EMIT2(X86_JA, OFFSET2); /* ja out */
- label2 = cnt;
EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
- EMIT2_off32(0x89, 0x85, -36 - MAX_BPF_STACK); /* mov dword ptr [rbp -548], eax */
+ EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */
/* prog = array->ptrs[index]; */
- EMIT4_off32(0x48, 0x8B, 0x84, 0xD6, /* mov rax, [rsi + rdx * 8 + offsetof(...)] */
+ EMIT4_off32(0x48, 0x8B, 0x8C, 0xD6, /* mov rcx, [rsi + rdx * 8 + offsetof(...)] */
offsetof(struct bpf_array, ptrs));
/*
* if (prog == NULL)
* goto out;
*/
- EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */
-#define OFFSET3 (8 + RETPOLINE_RAX_BPF_JIT_SIZE)
+ EMIT3(0x48, 0x85, 0xC9); /* test rcx,rcx */
+#define OFFSET3 (off3 + RETPOLINE_RCX_BPF_JIT_SIZE)
EMIT2(X86_JE, OFFSET3); /* je out */
- label3 = cnt;
- /* goto *(prog->bpf_func + prologue_size); */
- EMIT4(0x48, 0x8B, 0x40, /* mov rax, qword ptr [rax + 32] */
- offsetof(struct bpf_prog, bpf_func));
- EMIT4(0x48, 0x83, 0xC0, PROLOGUE_SIZE); /* add rax, prologue_size */
+ *pprog = prog;
+ pop_callee_regs(pprog, callee_regs_used);
+ prog = *pprog;
+
+ EMIT1(0x58); /* pop rax */
+ if (stack_depth)
+ EMIT3_off32(0x48, 0x81, 0xC4, /* add rsp, sd */
+ round_up(stack_depth, 8));
+ /* goto *(prog->bpf_func + X86_TAIL_CALL_OFFSET); */
+ EMIT4(0x48, 0x8B, 0x49, /* mov rcx, qword ptr [rcx + 32] */
+ offsetof(struct bpf_prog, bpf_func));
+ EMIT4(0x48, 0x83, 0xC1, /* add rcx, X86_TAIL_CALL_OFFSET */
+ X86_TAIL_CALL_OFFSET);
/*
- * Wow we're ready to jump into next BPF program
+ * Now we're ready to jump into next BPF program
* rdi == ctx (1st arg)
- * rax == prog->bpf_func + prologue_size
+ * rcx == prog->bpf_func + X86_TAIL_CALL_OFFSET
*/
- RETPOLINE_RAX_BPF_JIT();
+ RETPOLINE_RCX_BPF_JIT();
/* out: */
- BUILD_BUG_ON(cnt - label1 != OFFSET1);
- BUILD_BUG_ON(cnt - label2 != OFFSET2);
- BUILD_BUG_ON(cnt - label3 != OFFSET3);
*pprog = prog;
}
static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
- u8 **pprog, int addr, u8 *image)
+ u8 **pprog, int addr, u8 *image,
+ bool *callee_regs_used, u32 stack_depth)
{
+ int tcc_off = -4 - round_up(stack_depth, 8);
u8 *prog = *pprog;
+ int pop_bytes = 0;
+ int off1 = 20;
+ int poke_off;
int cnt = 0;
+ /* count the additional bytes used for popping callee regs to stack
+ * that need to be taken into account for jump offset that is used for
+ * bailing out from of the tail call when limit is reached
+ */
+ pop_bytes = get_pop_bytes(callee_regs_used);
+ off1 += pop_bytes;
+
+ /*
+ * total bytes for:
+ * - nop5/ jmpq $off
+ * - pop callee regs
+ * - sub rsp, $val if depth > 0
+ * - pop rax
+ */
+ poke_off = X86_PATCH_SIZE + pop_bytes + 1;
+ if (stack_depth) {
+ poke_off += 7;
+ off1 += 7;
+ }
+
/*
* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
* goto out;
*/
- EMIT2_off32(0x8B, 0x85, -36 - MAX_BPF_STACK); /* mov eax, dword ptr [rbp - 548] */
+ EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */
EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
- EMIT2(X86_JA, 14); /* ja out */
+ EMIT2(X86_JA, off1); /* ja out */
EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
- EMIT2_off32(0x89, 0x85, -36 - MAX_BPF_STACK); /* mov dword ptr [rbp -548], eax */
+ EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */
- poke->ip = image + (addr - X86_PATCH_SIZE);
- poke->adj_off = PROLOGUE_SIZE;
+ poke->tailcall_bypass = image + (addr - poke_off - X86_PATCH_SIZE);
+ poke->adj_off = X86_TAIL_CALL_OFFSET;
+ poke->tailcall_target = image + (addr - X86_PATCH_SIZE);
+ poke->bypass_addr = (u8 *)poke->tailcall_target + X86_PATCH_SIZE;
+
+ emit_jump(&prog, (u8 *)poke->tailcall_target + X86_PATCH_SIZE,
+ poke->tailcall_bypass);
+
+ *pprog = prog;
+ pop_callee_regs(pprog, callee_regs_used);
+ prog = *pprog;
+ EMIT1(0x58); /* pop rax */
+ if (stack_depth)
+ EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8));
memcpy(prog, ideal_nops[NOP_ATOMIC5], X86_PATCH_SIZE);
prog += X86_PATCH_SIZE;
@@ -453,7 +564,7 @@ static void bpf_tail_call_direct_fixup(struct bpf_prog *prog)
for (i = 0; i < prog->aux->size_poke_tab; i++) {
poke = &prog->aux->poke_tab[i];
- WARN_ON_ONCE(READ_ONCE(poke->ip_stable));
+ WARN_ON_ONCE(READ_ONCE(poke->tailcall_target_stable));
if (poke->reason != BPF_POKE_REASON_TAIL_CALL)
continue;
@@ -464,18 +575,25 @@ static void bpf_tail_call_direct_fixup(struct bpf_prog *prog)
if (target) {
/* Plain memcpy is used when image is not live yet
* and still not locked as read-only. Once poke
- * location is active (poke->ip_stable), any parallel
- * bpf_arch_text_poke() might occur still on the
- * read-write image until we finally locked it as
- * read-only. Both modifications on the given image
- * are under text_mutex to avoid interference.
+ * location is active (poke->tailcall_target_stable),
+ * any parallel bpf_arch_text_poke() might occur
+ * still on the read-write image until we finally
+ * locked it as read-only. Both modifications on
+ * the given image are under text_mutex to avoid
+ * interference.
*/
- ret = __bpf_arch_text_poke(poke->ip, BPF_MOD_JUMP, NULL,
+ ret = __bpf_arch_text_poke(poke->tailcall_target,
+ BPF_MOD_JUMP, NULL,
(u8 *)target->bpf_func +
poke->adj_off, false);
BUG_ON(ret < 0);
+ ret = __bpf_arch_text_poke(poke->tailcall_bypass,
+ BPF_MOD_JUMP,
+ (u8 *)poke->tailcall_target +
+ X86_PATCH_SIZE, NULL, false);
+ BUG_ON(ret < 0);
}
- WRITE_ONCE(poke->ip_stable, true);
+ WRITE_ONCE(poke->tailcall_target_stable, true);
mutex_unlock(&array->aux->poke_mutex);
}
}
@@ -652,19 +770,49 @@ static bool ex_handler_bpf(const struct exception_table_entry *x,
return true;
}
+static void detect_reg_usage(struct bpf_insn *insn, int insn_cnt,
+ bool *regs_used, bool *tail_call_seen)
+{
+ int i;
+
+ for (i = 1; i <= insn_cnt; i++, insn++) {
+ if (insn->code == (BPF_JMP | BPF_TAIL_CALL))
+ *tail_call_seen = true;
+ if (insn->dst_reg == BPF_REG_6 || insn->src_reg == BPF_REG_6)
+ regs_used[0] = true;
+ if (insn->dst_reg == BPF_REG_7 || insn->src_reg == BPF_REG_7)
+ regs_used[1] = true;
+ if (insn->dst_reg == BPF_REG_8 || insn->src_reg == BPF_REG_8)
+ regs_used[2] = true;
+ if (insn->dst_reg == BPF_REG_9 || insn->src_reg == BPF_REG_9)
+ regs_used[3] = true;
+ }
+}
+
static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
int oldproglen, struct jit_context *ctx)
{
+ bool tail_call_reachable = bpf_prog->aux->tail_call_reachable;
struct bpf_insn *insn = bpf_prog->insnsi;
+ bool callee_regs_used[4] = {};
int insn_cnt = bpf_prog->len;
+ bool tail_call_seen = false;
bool seen_exit = false;
u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
int i, cnt = 0, excnt = 0;
int proglen = 0;
u8 *prog = temp;
+ detect_reg_usage(insn, insn_cnt, callee_regs_used,
+ &tail_call_seen);
+
+ /* tail call's presence in current prog implies it is reachable */
+ tail_call_reachable |= tail_call_seen;
+
emit_prologue(&prog, bpf_prog->aux->stack_depth,
- bpf_prog_was_classic(bpf_prog));
+ bpf_prog_was_classic(bpf_prog), tail_call_reachable,
+ bpf_prog->aux->func_idx != 0);
+ push_callee_regs(&prog, callee_regs_used);
addrs[0] = prog - temp;
for (i = 1; i <= insn_cnt; i++, insn++) {
@@ -1102,16 +1250,27 @@ xadd: if (is_imm8(insn->off))
/* call */
case BPF_JMP | BPF_CALL:
func = (u8 *) __bpf_call_base + imm32;
- if (!imm32 || emit_call(&prog, func, image + addrs[i - 1]))
- return -EINVAL;
+ if (tail_call_reachable) {
+ EMIT3_off32(0x48, 0x8B, 0x85,
+ -(bpf_prog->aux->stack_depth + 8));
+ if (!imm32 || emit_call(&prog, func, image + addrs[i - 1] + 7))
+ return -EINVAL;
+ } else {
+ if (!imm32 || emit_call(&prog, func, image + addrs[i - 1]))
+ return -EINVAL;
+ }
break;
case BPF_JMP | BPF_TAIL_CALL:
if (imm32)
emit_bpf_tail_call_direct(&bpf_prog->aux->poke_tab[imm32 - 1],
- &prog, addrs[i], image);
+ &prog, addrs[i], image,
+ callee_regs_used,
+ bpf_prog->aux->stack_depth);
else
- emit_bpf_tail_call_indirect(&prog);
+ emit_bpf_tail_call_indirect(&prog,
+ callee_regs_used,
+ bpf_prog->aux->stack_depth);
break;
/* cond jump */
@@ -1294,12 +1453,7 @@ emit_jmp:
seen_exit = true;
/* Update cleanup_addr */
ctx->cleanup_addr = proglen;
- if (!bpf_prog_was_classic(bpf_prog))
- EMIT1(0x5B); /* get rid of tail_call_cnt */
- EMIT2(0x41, 0x5F); /* pop r15 */
- EMIT2(0x41, 0x5E); /* pop r14 */
- EMIT2(0x41, 0x5D); /* pop r13 */
- EMIT1(0x5B); /* pop rbx */
+ pop_callee_regs(&prog, callee_regs_used);
EMIT1(0xC9); /* leave */
EMIT1(0xC3); /* ret */
break;
@@ -1379,10 +1533,15 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
u8 *prog = *pprog;
int cnt = 0;
- if (emit_call(&prog, __bpf_prog_enter, prog))
- return -EINVAL;
- /* remember prog start time returned by __bpf_prog_enter */
- emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
+ if (p->aux->sleepable) {
+ if (emit_call(&prog, __bpf_prog_enter_sleepable, prog))
+ return -EINVAL;
+ } else {
+ if (emit_call(&prog, __bpf_prog_enter, prog))
+ return -EINVAL;
+ /* remember prog start time returned by __bpf_prog_enter */
+ emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
+ }
/* arg1: lea rdi, [rbp - stack_size] */
EMIT4(0x48, 0x8D, 0x7D, -stack_size);
@@ -1402,13 +1561,18 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
if (mod_ret)
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
- /* arg1: mov rdi, progs[i] */
- emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32,
- (u32) (long) p);
- /* arg2: mov rsi, rbx <- start time in nsec */
- emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
- if (emit_call(&prog, __bpf_prog_exit, prog))
- return -EINVAL;
+ if (p->aux->sleepable) {
+ if (emit_call(&prog, __bpf_prog_exit_sleepable, prog))
+ return -EINVAL;
+ } else {
+ /* arg1: mov rdi, progs[i] */
+ emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32,
+ (u32) (long) p);
+ /* arg2: mov rsi, rbx <- start time in nsec */
+ emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
+ if (emit_call(&prog, __bpf_prog_exit, prog))
+ return -EINVAL;
+ }
*pprog = prog;
return 0;