From 872b368b2282604aafbc8af1275e0b28a73b8636 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 29 Jun 2023 20:58:42 +0800 Subject: LoongArch: Set CPU#0 as the io master for FDT ACPI systems set io masters by parsing ACPI MADT, FDT systems have no MADT so we explicitly set CPU#0 as the io master. Otherwise CPU#0 will be considered as hotpluggable. Signed-off-by: Huacai Chen --- arch/loongarch/kernel/smp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index ed167e244cda..a858a468f746 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -210,6 +210,7 @@ static void __init fdt_smp_setup(void) } loongson_sysconf.nr_cpus = num_processors; + set_bit(0, &(loongson_sysconf.cores_io_master)); #endif } -- cgit v1.2.3-58-ga151 From d7c24960975b02211c53afe97c36acde5c8ff933 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 29 Jun 2023 20:58:42 +0800 Subject: LoongArch: Delete unnecessary debugfs checking Debugfs functions are not supposed to be checked for errors. This is sort of unusual but it is described in the comments for the debugfs_create_dir() function. Also debugfs_create_dir() can never return NULL. Reviewed-by: WANG Xuerui Signed-off-by: Dan Carpenter Signed-off-by: Huacai Chen --- arch/loongarch/kernel/unaligned.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/loongarch/kernel/unaligned.c b/arch/loongarch/kernel/unaligned.c index 85fae3d2d71a..3abf163dda05 100644 --- a/arch/loongarch/kernel/unaligned.c +++ b/arch/loongarch/kernel/unaligned.c @@ -485,8 +485,6 @@ static int __init debugfs_unaligned(void) struct dentry *d; d = debugfs_create_dir("loongarch", NULL); - if (IS_ERR_OR_NULL(d)) - return -ENOMEM; debugfs_create_u32("unaligned_instructions_user", S_IRUGO, d, &unaligned_instructions_user); -- cgit v1.2.3-58-ga151 From 0d03e9dce5c91d841a35af05ca61a5cf318f5064 Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Thu, 29 Jun 2023 20:58:42 +0800 Subject: LoongArch: Add guard for the larch_insn_gen_xxx functions Add guard for the larch_insn_gen_xxx functions to verify whether the immediate operand is within the acceptable range. Signed-off-by: WANG Rui Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/inst.h | 13 +++++++++++-- arch/loongarch/include/asm/module.h | 2 +- arch/loongarch/kernel/inst.c | 24 ++++++++++++++++++++++-- 3 files changed, 34 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h index b09887ffcd15..1dc5b5802c15 100644 --- a/arch/loongarch/include/asm/inst.h +++ b/arch/loongarch/include/asm/inst.h @@ -5,6 +5,7 @@ #ifndef _ASM_INST_H #define _ASM_INST_H +#include #include #include #include @@ -15,14 +16,22 @@ #define ADDR_IMMMASK_LU52ID 0xFFF0000000000000 #define ADDR_IMMMASK_LU32ID 0x000FFFFF00000000 #define ADDR_IMMMASK_LU12IW 0x00000000FFFFF000 +#define ADDR_IMMMASK_ORI 0x0000000000000FFF #define ADDR_IMMMASK_ADDU16ID 0x00000000FFFF0000 #define ADDR_IMMSHIFT_LU52ID 52 +#define ADDR_IMMSBIDX_LU52ID 11 #define ADDR_IMMSHIFT_LU32ID 32 +#define ADDR_IMMSBIDX_LU32ID 19 #define ADDR_IMMSHIFT_LU12IW 12 +#define ADDR_IMMSBIDX_LU12IW 19 +#define ADDR_IMMSHIFT_ORI 0 +#define ADDR_IMMSBIDX_ORI 63 #define ADDR_IMMSHIFT_ADDU16ID 16 +#define ADDR_IMMSBIDX_ADDU16ID 15 -#define ADDR_IMM(addr, INSN) ((addr & ADDR_IMMMASK_##INSN) >> ADDR_IMMSHIFT_##INSN) +#define ADDR_IMM(addr, INSN) \ + (sign_extend64(((addr & ADDR_IMMMASK_##INSN) >> ADDR_IMMSHIFT_##INSN), ADDR_IMMSBIDX_##INSN)) enum reg0i15_op { break_op = 0x54, @@ -449,7 +458,7 @@ u32 larch_insn_gen_move(enum loongarch_gpr rd, enum loongarch_gpr rj); u32 larch_insn_gen_lu12iw(enum loongarch_gpr rd, int imm); u32 larch_insn_gen_lu32id(enum loongarch_gpr rd, int imm); u32 larch_insn_gen_lu52id(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm); -u32 larch_insn_gen_jirl(enum loongarch_gpr rd, enum loongarch_gpr rj, unsigned long pc, unsigned long dest); +u32 larch_insn_gen_jirl(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm); static inline bool signed_imm_check(long val, unsigned int bit) { diff --git a/arch/loongarch/include/asm/module.h b/arch/loongarch/include/asm/module.h index 12a0f1e66916..2ecd82bb64e1 100644 --- a/arch/loongarch/include/asm/module.h +++ b/arch/loongarch/include/asm/module.h @@ -55,7 +55,7 @@ static inline struct plt_entry emit_plt_entry(unsigned long val) lu12iw = larch_insn_gen_lu12iw(LOONGARCH_GPR_T1, ADDR_IMM(val, LU12IW)); lu32id = larch_insn_gen_lu32id(LOONGARCH_GPR_T1, ADDR_IMM(val, LU32ID)); lu52id = larch_insn_gen_lu52id(LOONGARCH_GPR_T1, LOONGARCH_GPR_T1, ADDR_IMM(val, LU52ID)); - jirl = larch_insn_gen_jirl(0, LOONGARCH_GPR_T1, 0, (val & 0xfff)); + jirl = larch_insn_gen_jirl(0, LOONGARCH_GPR_T1, ADDR_IMM(val, ORI)); return (struct plt_entry) { lu12iw, lu32id, lu52id, jirl }; } diff --git a/arch/loongarch/kernel/inst.c b/arch/loongarch/kernel/inst.c index 258ef267cd30..ffe13c5ba557 100644 --- a/arch/loongarch/kernel/inst.c +++ b/arch/loongarch/kernel/inst.c @@ -226,6 +226,11 @@ u32 larch_insn_gen_lu12iw(enum loongarch_gpr rd, int imm) { union loongarch_instruction insn; + if (imm < -SZ_512K || imm >= SZ_512K) { + pr_warn("The generated lu12i.w instruction is out of range.\n"); + return INSN_BREAK; + } + emit_lu12iw(&insn, rd, imm); return insn.word; @@ -235,6 +240,11 @@ u32 larch_insn_gen_lu32id(enum loongarch_gpr rd, int imm) { union loongarch_instruction insn; + if (imm < -SZ_512K || imm >= SZ_512K) { + pr_warn("The generated lu32i.d instruction is out of range.\n"); + return INSN_BREAK; + } + emit_lu32id(&insn, rd, imm); return insn.word; @@ -244,16 +254,26 @@ u32 larch_insn_gen_lu52id(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm) { union loongarch_instruction insn; + if (imm < -SZ_2K || imm >= SZ_2K) { + pr_warn("The generated lu52i.d instruction is out of range.\n"); + return INSN_BREAK; + } + emit_lu52id(&insn, rd, rj, imm); return insn.word; } -u32 larch_insn_gen_jirl(enum loongarch_gpr rd, enum loongarch_gpr rj, unsigned long pc, unsigned long dest) +u32 larch_insn_gen_jirl(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm) { union loongarch_instruction insn; - emit_jirl(&insn, rj, rd, (dest - pc) >> 2); + if ((imm & 3) || imm < -SZ_128K || imm >= SZ_128K) { + pr_warn("The generated jirl instruction is out of range.\n"); + return INSN_BREAK; + } + + emit_jirl(&insn, rj, rd, imm >> 2); return insn.word; } -- cgit v1.2.3-58-ga151 From 414cefc798a30895d61e9d5b010fcf350af782d6 Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Thu, 29 Jun 2023 20:58:42 +0800 Subject: LoongArch: Calculate various sizes in the linker script Taking the address delta between symbols in different sections is not supported by the LLVM IAS. Instead, do this in the linker script, so the same data can be properly referenced in assembly. Signed-off-by: WANG Rui Signed-off-by: WANG Xuerui [chenhuacai: Fix build with !CONFIG_EFI_STUB] Signed-off-by: Huacai Chen --- arch/loongarch/kernel/efi-header.S | 6 +++--- arch/loongarch/kernel/head.S | 8 ++++---- arch/loongarch/kernel/vmlinux.lds.S | 9 +++++++++ 3 files changed, 16 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/loongarch/kernel/efi-header.S b/arch/loongarch/kernel/efi-header.S index 8c1d229a2afa..5f23b85d78ca 100644 --- a/arch/loongarch/kernel/efi-header.S +++ b/arch/loongarch/kernel/efi-header.S @@ -24,7 +24,7 @@ .byte 0x02 /* MajorLinkerVersion */ .byte 0x14 /* MinorLinkerVersion */ .long __inittext_end - .Lefi_header_end /* SizeOfCode */ - .long _end - __initdata_begin /* SizeOfInitializedData */ + .long _kernel_vsize /* SizeOfInitializedData */ .long 0 /* SizeOfUninitializedData */ .long __efistub_efi_pe_entry - _head /* AddressOfEntryPoint */ .long .Lefi_header_end - _head /* BaseOfCode */ @@ -79,9 +79,9 @@ IMAGE_SCN_MEM_EXECUTE /* Characteristics */ .ascii ".data\0\0\0" - .long _end - __initdata_begin /* VirtualSize */ + .long _kernel_vsize /* VirtualSize */ .long __initdata_begin - _head /* VirtualAddress */ - .long _edata - __initdata_begin /* SizeOfRawData */ + .long _kernel_rsize /* SizeOfRawData */ .long __initdata_begin - _head /* PointerToRawData */ .long 0 /* PointerToRelocations */ diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S index aa64b179744f..5e828a8bc0a0 100644 --- a/arch/loongarch/kernel/head.S +++ b/arch/loongarch/kernel/head.S @@ -23,7 +23,7 @@ _head: .word MZ_MAGIC /* "MZ", MS-DOS header */ .org 0x8 .dword kernel_entry /* Kernel entry point */ - .dword _end - _text /* Kernel image effective size */ + .dword _kernel_asize /* Kernel image effective size */ .quad PHYS_LINK_KADDR /* Kernel image load offset from start of RAM */ .org 0x38 /* 0x20 ~ 0x37 reserved */ .long LINUX_PE_MAGIC @@ -32,9 +32,9 @@ _head: pe_header: __EFI_PE_HEADER -SYM_DATA(kernel_asize, .long _end - _text); -SYM_DATA(kernel_fsize, .long _edata - _text); -SYM_DATA(kernel_offset, .long kernel_offset - _text); +SYM_DATA(kernel_asize, .long _kernel_asize); +SYM_DATA(kernel_fsize, .long _kernel_fsize); +SYM_DATA(kernel_offset, .long _kernel_offset); #endif diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S index 0c7b041be9d8..b1686afcf876 100644 --- a/arch/loongarch/kernel/vmlinux.lds.S +++ b/arch/loongarch/kernel/vmlinux.lds.S @@ -136,6 +136,15 @@ SECTIONS DWARF_DEBUG ELF_DETAILS +#ifdef CONFIG_EFI_STUB + /* header symbols */ + _kernel_asize = _end - _text; + _kernel_fsize = _edata - _text; + _kernel_vsize = _end - __initdata_begin; + _kernel_rsize = _edata - __initdata_begin; + _kernel_offset = kernel_offset - _text; +#endif + .gptab.sdata : { *(.gptab.data) *(.gptab.sdata) -- cgit v1.2.3-58-ga151 From 24da0249d950bbf97a8513daf414b48548b8bbe9 Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Thu, 29 Jun 2023 20:58:43 +0800 Subject: LoongArch: extable: Also recognize ABI names of registers When the kernel is compiled with LLVM, the register names being handled during exception fixup building are ABI names instead of bare $rNN style. Add mapping for the ABI names for LLVM compatibility. Signed-off-by: WANG Rui Signed-off-by: WANG Xuerui Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/gpr-num.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'arch') diff --git a/arch/loongarch/include/asm/gpr-num.h b/arch/loongarch/include/asm/gpr-num.h index e0941af20c7e..996038da806d 100644 --- a/arch/loongarch/include/asm/gpr-num.h +++ b/arch/loongarch/include/asm/gpr-num.h @@ -9,6 +9,22 @@ .equ .L__gpr_num_$r\num, \num .endr + /* ABI names of registers */ + .equ .L__gpr_num_$ra, 1 + .equ .L__gpr_num_$tp, 2 + .equ .L__gpr_num_$sp, 3 + .irp num,0,1,2,3,4,5,6,7 + .equ .L__gpr_num_$a\num, 4 + \num + .endr + .irp num,0,1,2,3,4,5,6,7,8 + .equ .L__gpr_num_$t\num, 12 + \num + .endr + .equ .L__gpr_num_$s9, 22 + .equ .L__gpr_num_$fp, 22 + .irp num,0,1,2,3,4,5,6,7,8 + .equ .L__gpr_num_$s\num, 23 + \num + .endr + #else /* __ASSEMBLY__ */ #define __DEFINE_ASM_GPR_NUMS \ @@ -16,6 +32,20 @@ " .irp num,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31\n" \ " .equ .L__gpr_num_$r\\num, \\num\n" \ " .endr\n" \ +" .equ .L__gpr_num_$ra, 1\n" \ +" .equ .L__gpr_num_$tp, 2\n" \ +" .equ .L__gpr_num_$sp, 3\n" \ +" .irp num,0,1,2,3,4,5,6,7\n" \ +" .equ .L__gpr_num_$a\\num, 4 + \\num\n" \ +" .endr\n" \ +" .irp num,0,1,2,3,4,5,6,7,8\n" \ +" .equ .L__gpr_num_$t\\num, 12 + \\num\n" \ +" .endr\n" \ +" .equ .L__gpr_num_$s9, 22\n" \ +" .equ .L__gpr_num_$fp, 22\n" \ +" .irp num,0,1,2,3,4,5,6,7,8\n" \ +" .equ .L__gpr_num_$s\\num, 23 + \\num\n" \ +" .endr\n" \ #endif /* __ASSEMBLY__ */ -- cgit v1.2.3-58-ga151 From 38bb46f94544c5385bc35aa2bfc776dcf53a7b5d Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Thu, 29 Jun 2023 20:58:43 +0800 Subject: LoongArch: Prepare for assemblers with proper FCSR class support The GNU assembler (as of 2.40) mis-treats FCSR operands as GPRs, but the LLVM IAS does not. Probe for this and refer to FCSRs as "$fcsrNN" if support is present. Signed-off-by: WANG Xuerui Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 3 +++ arch/loongarch/include/asm/fpregdef.h | 7 +++++++ arch/loongarch/include/asm/loongarch.h | 9 ++++++++- 3 files changed, 18 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index d38b066fc931..86fdd7a42cd1 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -241,6 +241,9 @@ config SCHED_OMIT_FRAME_POINTER config AS_HAS_EXPLICIT_RELOCS def_bool $(as-instr,x:pcalau12i \$t0$(comma)%pc_hi20(x)) +config AS_HAS_FCSR_CLASS + def_bool $(as-instr,movfcsr2gr \$t0$(comma)\$fcsr0) + menu "Kernel type and options" source "kernel/Kconfig.hz" diff --git a/arch/loongarch/include/asm/fpregdef.h b/arch/loongarch/include/asm/fpregdef.h index b6be527831dd..e56610ae8592 100644 --- a/arch/loongarch/include/asm/fpregdef.h +++ b/arch/loongarch/include/asm/fpregdef.h @@ -40,6 +40,7 @@ #define fs6 $f30 #define fs7 $f31 +#ifndef CONFIG_AS_HAS_FCSR_CLASS /* * Current binutils expects *GPRs* at FCSR position for the FCSR * operation instructions, so define aliases for those used. @@ -48,5 +49,11 @@ #define fcsr1 $r1 #define fcsr2 $r2 #define fcsr3 $r3 +#else +#define fcsr0 $fcsr0 +#define fcsr1 $fcsr1 +#define fcsr2 $fcsr2 +#define fcsr3 $fcsr3 +#endif #endif /* _ASM_FPREGDEF_H */ diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index 35e8a52fea11..e90c222374d0 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -1441,11 +1441,18 @@ __BUILD_CSR_OP(tlbidx) #define EXCCODE_INT_START 64 #define EXCCODE_INT_END (EXCCODE_INT_START + EXCCODE_INT_NUM - 1) -/* FPU register names */ +/* FPU Status Register Names */ +#ifndef CONFIG_AS_HAS_FCSR_CLASS #define LOONGARCH_FCSR0 $r0 #define LOONGARCH_FCSR1 $r1 #define LOONGARCH_FCSR2 $r2 #define LOONGARCH_FCSR3 $r3 +#else +#define LOONGARCH_FCSR0 $fcsr0 +#define LOONGARCH_FCSR1 $fcsr1 +#define LOONGARCH_FCSR2 $fcsr2 +#define LOONGARCH_FCSR3 $fcsr3 +#endif /* FPU Status Register Values */ #define FPU_CSR_RSVD 0xe0e0fce0 -- cgit v1.2.3-58-ga151 From 53a4858ccd0d27538f9ab1ac2bead002fca97edc Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Thu, 29 Jun 2023 20:58:43 +0800 Subject: LoongArch: Make the CPUCFG&CSR ops simple aliases of compiler built-ins In addition to less visual clutter, this also makes Clang happy regarding the const-ness of arguments. In the original approach, all Clang gets to see is the incoming arguments whose const-ness cannot be proven without first being inlined; so Clang errors out here while GCC is fine. While at it, tweak several printk format strings because the return type of csr_read64 becomes effectively unsigned long, instead of unsigned long long. Signed-off-by: WANG Xuerui Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/loongarch.h | 63 ++++++---------------------------- arch/loongarch/kernel/traps.c | 2 +- arch/loongarch/lib/dump_tlb.c | 6 ++-- 3 files changed, 15 insertions(+), 56 deletions(-) (limited to 'arch') diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index e90c222374d0..08c77d065a11 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -56,10 +56,7 @@ __asm__(".macro parse_r var r\n\t" #undef _IFC_REG /* CPUCFG */ -static inline u32 read_cpucfg(u32 reg) -{ - return __cpucfg(reg); -} +#define read_cpucfg(reg) __cpucfg(reg) #endif /* !__ASSEMBLY__ */ @@ -206,56 +203,18 @@ static inline u32 read_cpucfg(u32 reg) #ifndef __ASSEMBLY__ /* CSR */ -static __always_inline u32 csr_read32(u32 reg) -{ - return __csrrd_w(reg); -} - -static __always_inline u64 csr_read64(u32 reg) -{ - return __csrrd_d(reg); -} - -static __always_inline void csr_write32(u32 val, u32 reg) -{ - __csrwr_w(val, reg); -} - -static __always_inline void csr_write64(u64 val, u32 reg) -{ - __csrwr_d(val, reg); -} - -static __always_inline u32 csr_xchg32(u32 val, u32 mask, u32 reg) -{ - return __csrxchg_w(val, mask, reg); -} - -static __always_inline u64 csr_xchg64(u64 val, u64 mask, u32 reg) -{ - return __csrxchg_d(val, mask, reg); -} +#define csr_read32(reg) __csrrd_w(reg) +#define csr_read64(reg) __csrrd_d(reg) +#define csr_write32(val, reg) __csrwr_w(val, reg) +#define csr_write64(val, reg) __csrwr_d(val, reg) +#define csr_xchg32(val, mask, reg) __csrxchg_w(val, mask, reg) +#define csr_xchg64(val, mask, reg) __csrxchg_d(val, mask, reg) /* IOCSR */ -static __always_inline u32 iocsr_read32(u32 reg) -{ - return __iocsrrd_w(reg); -} - -static __always_inline u64 iocsr_read64(u32 reg) -{ - return __iocsrrd_d(reg); -} - -static __always_inline void iocsr_write32(u32 val, u32 reg) -{ - __iocsrwr_w(val, reg); -} - -static __always_inline void iocsr_write64(u64 val, u32 reg) -{ - __iocsrwr_d(val, reg); -} +#define iocsr_read32(reg) __iocsrrd_w(reg) +#define iocsr_read64(reg) __iocsrrd_d(reg) +#define iocsr_write32(val, reg) __iocsrwr_w(val, reg) +#define iocsr_write64(val, reg) __iocsrwr_d(val, reg) #endif /* !__ASSEMBLY__ */ diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 8db26e4ca447..e73d9bbe1658 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -924,7 +924,7 @@ asmlinkage void cache_parity_error(void) /* For the moment, report the problem and hang. */ pr_err("Cache error exception:\n"); pr_err("csr_merrctl == %08x\n", csr_read32(LOONGARCH_CSR_MERRCTL)); - pr_err("csr_merrera == %016llx\n", csr_read64(LOONGARCH_CSR_MERRERA)); + pr_err("csr_merrera == %016lx\n", csr_read64(LOONGARCH_CSR_MERRERA)); panic("Can't handle the cache error!"); } diff --git a/arch/loongarch/lib/dump_tlb.c b/arch/loongarch/lib/dump_tlb.c index c2cc7ce343c9..0b886a6e260f 100644 --- a/arch/loongarch/lib/dump_tlb.c +++ b/arch/loongarch/lib/dump_tlb.c @@ -20,9 +20,9 @@ void dump_tlb_regs(void) pr_info("Index : 0x%0x\n", read_csr_tlbidx()); pr_info("PageSize : 0x%0x\n", read_csr_pagesize()); - pr_info("EntryHi : 0x%0*llx\n", field, read_csr_entryhi()); - pr_info("EntryLo0 : 0x%0*llx\n", field, read_csr_entrylo0()); - pr_info("EntryLo1 : 0x%0*llx\n", field, read_csr_entrylo1()); + pr_info("EntryHi : 0x%0*lx\n", field, read_csr_entryhi()); + pr_info("EntryLo0 : 0x%0*lx\n", field, read_csr_entrylo0()); + pr_info("EntryLo1 : 0x%0*lx\n", field, read_csr_entrylo1()); } static void dump_tlb(int first, int last) -- cgit v1.2.3-58-ga151 From 83d8b38967d253942d9172b0c4d69b7d844d5f06 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Thu, 29 Jun 2023 20:58:43 +0800 Subject: LoongArch: Simplify the invtlb wrappers The invtlb instruction has been supported by upstream LoongArch toolchains from day one, so ditch the raw opcode trickery and just use plain inline asm for it. While at it, also make the invtlb asm statements barriers, for proper modeling of the side effects. The functions are also marked as __always_inline instead of just "inline", because they cannot work at all if not inlined: the op argument will not be compile-time const in that case, thus failing to satisfy the "i" constraint. The signature of the other more specific invtlb wrappers contain unused arguments right now, but these are not removed right away in order for the patch to be focused. In the meantime, assertions are added to ensure no accidental misuse happens before the refactor. (The more specific wrappers cannot re-use the generic invtlb wrapper, because the ISA manual says $zero shall be used in case a particular op does not take the respective argument: re-using the generic wrapper would mean losing control over the register usage.) Signed-off-by: WANG Xuerui Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/tlb.h | 43 ++++++++++++++++++---------------------- 1 file changed, 19 insertions(+), 24 deletions(-) (limited to 'arch') diff --git a/arch/loongarch/include/asm/tlb.h b/arch/loongarch/include/asm/tlb.h index f5e4deb97402..0ad53f1ad25d 100644 --- a/arch/loongarch/include/asm/tlb.h +++ b/arch/loongarch/include/asm/tlb.h @@ -88,52 +88,47 @@ enum invtlb_ops { INVTLB_GID_ADDR = 0x16, }; -/* - * invtlb op info addr - * (0x1 << 26) | (0x24 << 20) | (0x13 << 15) | - * (addr << 10) | (info << 5) | op - */ -static inline void invtlb(u32 op, u32 info, u64 addr) +static __always_inline void invtlb(u32 op, u32 info, u64 addr) { __asm__ __volatile__( - "parse_r addr,%0\n\t" - "parse_r info,%1\n\t" - ".word ((0x6498000) | (addr << 10) | (info << 5) | %2)\n\t" - : - : "r"(addr), "r"(info), "i"(op) + "invtlb %0, %1, %2\n\t" : + : "i"(op), "r"(info), "r"(addr) + : "memory" ); } -static inline void invtlb_addr(u32 op, u32 info, u64 addr) +static __always_inline void invtlb_addr(u32 op, u32 info, u64 addr) { + BUILD_BUG_ON(!__builtin_constant_p(info) || info != 0); __asm__ __volatile__( - "parse_r addr,%0\n\t" - ".word ((0x6498000) | (addr << 10) | (0 << 5) | %1)\n\t" - : - : "r"(addr), "i"(op) + "invtlb %0, $zero, %1\n\t" : + : "i"(op), "r"(addr) + : "memory" ); } -static inline void invtlb_info(u32 op, u32 info, u64 addr) +static __always_inline void invtlb_info(u32 op, u32 info, u64 addr) { + BUILD_BUG_ON(!__builtin_constant_p(addr) || addr != 0); __asm__ __volatile__( - "parse_r info,%0\n\t" - ".word ((0x6498000) | (0 << 10) | (info << 5) | %1)\n\t" - : - : "r"(info), "i"(op) + "invtlb %0, %1, $zero\n\t" : + : "i"(op), "r"(info) + : "memory" ); } -static inline void invtlb_all(u32 op, u32 info, u64 addr) +static __always_inline void invtlb_all(u32 op, u32 info, u64 addr) { + BUILD_BUG_ON(!__builtin_constant_p(info) || info != 0); + BUILD_BUG_ON(!__builtin_constant_p(addr) || addr != 0); __asm__ __volatile__( - ".word ((0x6498000) | (0 << 10) | (0 << 5) | %0)\n\t" + "invtlb %0, $zero, $zero\n\t" : : "i"(op) - : + : "memory" ); } -- cgit v1.2.3-58-ga151 From 38b10b269d04540aee05c34a059dcf304cfce0a8 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Thu, 29 Jun 2023 20:58:43 +0800 Subject: LoongArch: Tweak CFLAGS for Clang compatibility Now the arch code is mostly ready for LLVM/Clang consumption, it is time to re-organize the CFLAGS a little to actually enable the LLVM build. Namely, all -G0 switches from CFLAGS are removed, and -mexplicit-relocs and -mdirect-extern-access are now wrapped with cc-option (with the related asm/percpu.h definition guarded against toolchain combos that are known to not work). A build with !RELOCATABLE && !MODULE is confirmed working within a QEMU environment; support for the two features are currently blocked on LLVM/Clang, and will come later. Why -G0 can be removed: In GCC, -G stands for "small data threshold", that instructs the compiler to put data smaller than the specified threshold in a dedicated "small data" section (called .sdata on LoongArch and several other arches). However, benefiting from this would require ABI cooperation, which is not the case for LoongArch; and current GCC behave the same whether -G0 (equal to disabling this optimization) is given or not. So, remove -G0 from CFLAGS altogether for one less thing to care about. This also benefits LLVM/Clang compatibility where the -G switch is not supported. Why -mexplicit-relocs can now be conditionally applied without regressions: Originally -mexplicit-relocs is unconditionally added to CFLAGS in case of CONFIG_AS_HAS_EXPLICIT_RELOCS, because not having it (i.e. old GCC + new binutils) would not work: modules will have R_LARCH_ABS_* relocs inside, but given the rarity of such toolchain combo in the wild, it may not be worthwhile to support it, so support for such relocs in modules were not added back when explicit relocs support was upstreamed, and -mexplicit-relocs is unconditionally added to fail the build early. Now that Clang compatibility is desired, given Clang is behaving like -mexplicit-relocs from day one but without support for the CLI flag, we must ensure the flag is not passed in case of Clang. However, explicit compiler flavor checks can be more brittle than feature detection: in this case what actually matters is support for __attribute__((model)) when building modules. Given neither older GCC nor current Clang support this attribute, probing for the attribute support and #error'ing out would allow proper UX without checking for Clang, and also automatically work when Clang support for the attribute is to be added in the future. Why -mdirect-extern-access is now conditionally applied: This is actually a nice-to-have optimization that can reduce GOT accesses, but not having it is harmless either. Because Clang does not support the option currently, but might do so in the future, conditional application via cc-option ensures compatibility with both current and future Clang versions. Suggested-by: Xi Ruoyao # cc-option changes Signed-off-by: WANG Xuerui Signed-off-by: Huacai Chen --- arch/loongarch/Makefile | 21 +++++++++++++-------- arch/loongarch/include/asm/percpu.h | 6 +++++- arch/loongarch/vdso/Makefile | 2 +- 3 files changed, 19 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index a27e264bdaa5..a63683da3bcf 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -46,8 +46,8 @@ ld-emul = $(64bit-emul) cflags-y += -mabi=lp64s endif -cflags-y += -G0 -pipe -msoft-float -LDFLAGS_vmlinux += -G0 -static -n -nostdlib +cflags-y += -pipe -msoft-float +LDFLAGS_vmlinux += -static -n -nostdlib # When the assembler supports explicit relocation hint, we must use it. # GCC may have -mexplicit-relocs off by default if it was built with an old @@ -56,13 +56,18 @@ LDFLAGS_vmlinux += -G0 -static -n -nostdlib # When the assembler does not supports explicit relocation hint, we can't use # it. Disable it if the compiler supports it. # -# If you've seen "unknown reloc hint" message building the kernel and you are -# now wondering why "-mexplicit-relocs" is not wrapped with cc-option: the -# combination of a "new" assembler and "old" compiler is not supported. Either -# upgrade the compiler or downgrade the assembler. +# The combination of a "new" assembler and "old" GCC is not supported, given +# the rarity of this combo and the extra complexity needed to make it work. +# Either upgrade the compiler or downgrade the assembler; the build will error +# out if it is the case (by probing for the model attribute; all supported +# compilers in this case would have support). +# +# Also, -mdirect-extern-access is useful in case of building with explicit +# relocs, for avoiding unnecessary GOT accesses. It is harmless to not have +# support though. ifdef CONFIG_AS_HAS_EXPLICIT_RELOCS -cflags-y += -mexplicit-relocs -KBUILD_CFLAGS_KERNEL += -mdirect-extern-access +cflags-y += $(call cc-option,-mexplicit-relocs) +KBUILD_CFLAGS_KERNEL += $(call cc-option,-mdirect-extern-access) else cflags-y += $(call cc-option,-mno-explicit-relocs) KBUILD_AFLAGS_KERNEL += -Wa,-mla-global-with-pcrel diff --git a/arch/loongarch/include/asm/percpu.h b/arch/loongarch/include/asm/percpu.h index ad8d88494554..b9f567e66016 100644 --- a/arch/loongarch/include/asm/percpu.h +++ b/arch/loongarch/include/asm/percpu.h @@ -14,7 +14,11 @@ * loaded. Tell the compiler this fact when using explicit relocs. */ #if defined(MODULE) && defined(CONFIG_AS_HAS_EXPLICIT_RELOCS) -#define PER_CPU_ATTRIBUTES __attribute__((model("extreme"))) +# if __has_attribute(model) +# define PER_CPU_ATTRIBUTES __attribute__((model("extreme"))) +# else +# error compiler support for the model attribute is necessary when a recent assembler is used +# endif #endif /* Use r21 for fast access */ diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile index 461240ab4436..3f9df4d9930f 100644 --- a/arch/loongarch/vdso/Makefile +++ b/arch/loongarch/vdso/Makefile @@ -21,7 +21,7 @@ endif cflags-vdso := $(ccflags-vdso) \ -isystem $(shell $(CC) -print-file-name=include) \ $(filter -W%,$(filter-out -Wa$(comma)%,$(KBUILD_CFLAGS))) \ - -O2 -g -fno-strict-aliasing -fno-common -fno-builtin -G0 \ + -O2 -g -fno-strict-aliasing -fno-common -fno-builtin \ -fno-stack-protector -fno-jump-tables -DDISABLE_BRANCH_PROFILING \ $(call cc-option, -fno-asynchronous-unwind-tables) \ $(call cc-option, -fno-stack-protector) -- cgit v1.2.3-58-ga151 From b89673a91a31710a4a957114b0195cfd45feb122 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Thu, 29 Jun 2023 20:58:43 +0800 Subject: LoongArch: vDSO: Use CLANG_FLAGS instead of filtering out '--target=' This is a port of commit 76d7fff22be3e ("MIPS: VDSO: Use CLANG_FLAGS instead of filtering out '--target='") to arch/loongarch, for fixing cross-compilation with Clang. Reported-by: Nathan Chancellor Reviewed-by: Nathan Chancellor Link: https://github.com/ClangBuiltLinux/linux/issues/1787#issuecomment-1608306002 Signed-off-by: WANG Xuerui Signed-off-by: Huacai Chen --- arch/loongarch/vdso/Makefile | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile index 3f9df4d9930f..a50308b6fc25 100644 --- a/arch/loongarch/vdso/Makefile +++ b/arch/loongarch/vdso/Makefile @@ -12,12 +12,9 @@ ccflags-vdso := \ $(filter -E%,$(KBUILD_CFLAGS)) \ $(filter -march=%,$(KBUILD_CFLAGS)) \ $(filter -m%-float,$(KBUILD_CFLAGS)) \ + $(CLANG_FLAGS) \ -D__VDSO__ -ifeq ($(cc-name),clang) -ccflags-vdso += $(filter --target=%,$(KBUILD_CFLAGS)) -endif - cflags-vdso := $(ccflags-vdso) \ -isystem $(shell $(CC) -print-file-name=include) \ $(filter -W%,$(filter-out -Wa$(comma)%,$(KBUILD_CFLAGS))) \ -- cgit v1.2.3-58-ga151 From 5ddc7a3794ddd3470635ebd325fa1dffea5b18c0 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Thu, 29 Jun 2023 20:58:43 +0800 Subject: LoongArch: Include KBUILD_CPPFLAGS in CHECKFLAGS invocation This is a port of commit 08f6554ff90e ("mips: Include KBUILD_CPPFLAGS in CHECKFLAGS invocation") to arch/loongarch, for fixing cross-compilation of Linux/LoongArch with Clang, where previously the `--target` flag would no longer be present for the CHECKFLAGS cc invocation leading to build failure. Reported-by: Nathan Chancellor Reviewed-by: Nathan Chancellor Link: https://github.com/ClangBuiltLinux/linux/issues/1787#issuecomment-1608306002 Signed-off-by: WANG Xuerui Signed-off-by: Huacai Chen --- arch/loongarch/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index a63683da3bcf..09ba338a64de 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -112,7 +112,7 @@ KBUILD_CFLAGS += -isystem $(shell $(CC) -print-file-name=include) KBUILD_LDFLAGS += -m $(ld-emul) ifdef CONFIG_LOONGARCH -CHECKFLAGS += $(shell $(CC) $(KBUILD_CFLAGS) -dM -E -x c /dev/null | \ +CHECKFLAGS += $(shell $(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) -dM -E -x c /dev/null | \ grep -E -vw '__GNUC_(MINOR_|PATCHLEVEL_)?_' | \ sed -e "s/^\#define /-D'/" -e "s/ /'='/" -e "s/$$/'/" -e 's/\$$/&&/g') endif -- cgit v1.2.3-58-ga151 From 5a31ed4678e0b09f8c4c8b2e711c6cc112082dd4 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Thu, 29 Jun 2023 20:58:43 +0800 Subject: LoongArch: Mark Clang LTO as working Confirmed working with QEMU system emulation. Acked-by: Nick Desaulniers Signed-off-by: WANG Xuerui Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 86fdd7a42cd1..f2fce9b30554 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -48,6 +48,8 @@ config LOONGARCH select ARCH_SUPPORTS_ACPI select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_HUGETLBFS + select ARCH_SUPPORTS_LTO_CLANG + select ARCH_SUPPORTS_LTO_CLANG_THIN select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF -- cgit v1.2.3-58-ga151 From aa5e65dc0818bbf676bf06927368ec46867778fd Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Thu, 29 Jun 2023 20:58:43 +0800 Subject: LoongArch: Add support to clone a time namespace We can see that "Time namespaces are not supported" on LoongArch: (1) clone3 test # cd tools/testing/selftests/clone3 && make && ./clone3 ... # Time namespaces are not supported ok 18 # SKIP Skipping clone3() with CLONE_NEWTIME # Totals: pass:17 fail:0 xfail:0 xpass:0 skip:1 error:0 (2) timens test # cd tools/testing/selftests/timens && make && ./timens ... 1..0 # SKIP Time namespaces are not supported On LoongArch the current kernel does not support CONFIG_TIME_NS which depends on GENERIC_VDSO_TIME_NS, select GENERIC_VDSO_TIME_NS to enable CONFIG_TIME_NS to build kernel/time/namespace.c. Additionally, it needs to define some arch-dependent functions for the timens, such as __arch_get_timens_vdso_data(), arch_get_vdso_data() and vdso_join_timens(). At the same time, modify the layout of vvar to use one page size for generic vdso data, expand another page size for timens vdso data and assign LOONGARCH_VDSO_DATA_SIZE (maybe exceeds a page size if expand in the future) for loongarch vdso data, at last add the callback function vvar_fault() and modify stack_top(). With this patch under CONFIG_TIME_NS: (1) clone3 test # cd tools/testing/selftests/clone3 && make && ./clone3 ... ok 18 [739] Result (0) matches expectation (0) # Totals: pass:18 fail:0 xfail:0 xpass:0 skip:0 error:0 (2) timens test # cd tools/testing/selftests/timens && make && ./timens ... # Totals: pass:10 fail:0 xfail:0 xpass:0 skip:0 error:0 Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 1 + arch/loongarch/include/asm/page.h | 1 + arch/loongarch/include/asm/vdso/gettimeofday.h | 9 ++- arch/loongarch/include/asm/vdso/vdso.h | 32 +++++++-- arch/loongarch/kernel/process.c | 2 +- arch/loongarch/kernel/vdso.c | 98 +++++++++++++++++++++----- arch/loongarch/vdso/vgetcpu.c | 2 +- 7 files changed, 121 insertions(+), 24 deletions(-) (limited to 'arch') diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index f2fce9b30554..1944bae2f31c 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -82,6 +82,7 @@ config LOONGARCH select GENERIC_SCHED_CLOCK select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL + select GENERIC_VDSO_TIME_NS select GPIOLIB select HAS_IOPORT select HAVE_ARCH_AUDITSYSCALL diff --git a/arch/loongarch/include/asm/page.h b/arch/loongarch/include/asm/page.h index fb5338b352e6..26e8dccb6619 100644 --- a/arch/loongarch/include/asm/page.h +++ b/arch/loongarch/include/asm/page.h @@ -81,6 +81,7 @@ typedef struct { unsigned long pgprot; } pgprot_t; #define __va(x) ((void *)((unsigned long)(x) + PAGE_OFFSET - PHYS_OFFSET)) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) +#define sym_to_pfn(x) __phys_to_pfn(__pa_symbol(x)) #define virt_to_pfn(kaddr) PFN_DOWN(PHYSADDR(kaddr)) #define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr)) diff --git a/arch/loongarch/include/asm/vdso/gettimeofday.h b/arch/loongarch/include/asm/vdso/gettimeofday.h index 7b2cd37641e2..89e6b222c2f2 100644 --- a/arch/loongarch/include/asm/vdso/gettimeofday.h +++ b/arch/loongarch/include/asm/vdso/gettimeofday.h @@ -91,9 +91,16 @@ static inline bool loongarch_vdso_hres_capable(void) static __always_inline const struct vdso_data *__arch_get_vdso_data(void) { - return get_vdso_data(); + return (const struct vdso_data *)get_vdso_data(); } +#ifdef CONFIG_TIME_NS +static __always_inline +const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) +{ + return (const struct vdso_data *)(get_vdso_data() + VVAR_TIMENS_PAGE_OFFSET * PAGE_SIZE); +} +#endif #endif /* !__ASSEMBLY__ */ #endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/loongarch/include/asm/vdso/vdso.h b/arch/loongarch/include/asm/vdso/vdso.h index 3b55d32a0619..5a12309d9fb5 100644 --- a/arch/loongarch/include/asm/vdso/vdso.h +++ b/arch/loongarch/include/asm/vdso/vdso.h @@ -16,10 +16,33 @@ struct vdso_pcpu_data { struct loongarch_vdso_data { struct vdso_pcpu_data pdata[NR_CPUS]; - struct vdso_data data[CS_BASES]; /* Arch-independent data */ }; -#define VDSO_DATA_SIZE PAGE_ALIGN(sizeof(struct loongarch_vdso_data)) +/* + * The layout of vvar: + * + * high + * +---------------------+--------------------------+ + * | loongarch vdso data | LOONGARCH_VDSO_DATA_SIZE | + * +---------------------+--------------------------+ + * | time-ns vdso data | PAGE_SIZE | + * +---------------------+--------------------------+ + * | generic vdso data | PAGE_SIZE | + * +---------------------+--------------------------+ + * low + */ +#define LOONGARCH_VDSO_DATA_SIZE PAGE_ALIGN(sizeof(struct loongarch_vdso_data)) +#define LOONGARCH_VDSO_DATA_PAGES (LOONGARCH_VDSO_DATA_SIZE >> PAGE_SHIFT) + +enum vvar_pages { + VVAR_GENERIC_PAGE_OFFSET, + VVAR_TIMENS_PAGE_OFFSET, + VVAR_LOONGARCH_PAGES_START, + VVAR_LOONGARCH_PAGES_END = VVAR_LOONGARCH_PAGES_START + LOONGARCH_VDSO_DATA_PAGES - 1, + VVAR_NR_PAGES, +}; + +#define VVAR_SIZE (VVAR_NR_PAGES << PAGE_SHIFT) static inline unsigned long get_vdso_base(void) { @@ -34,10 +57,9 @@ static inline unsigned long get_vdso_base(void) return addr; } -static inline const struct vdso_data *get_vdso_data(void) +static inline unsigned long get_vdso_data(void) { - return (const struct vdso_data *)(get_vdso_base() - - VDSO_DATA_SIZE + SMP_CACHE_BYTES * NR_CPUS); + return get_vdso_base() - VVAR_SIZE; } #endif /* __ASSEMBLY__ */ diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index b71e17c1cc0c..9535a0662480 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -285,7 +285,7 @@ unsigned long stack_top(void) /* Space for the VDSO & data page */ top -= PAGE_ALIGN(current->thread.vdso->size); - top -= PAGE_SIZE; + top -= VVAR_SIZE; /* Space to randomize the VDSO base */ if (current->flags & PF_RANDOMIZE) diff --git a/arch/loongarch/kernel/vdso.c b/arch/loongarch/kernel/vdso.c index eaebd2e0f725..14941e4be66d 100644 --- a/arch/loongarch/kernel/vdso.c +++ b/arch/loongarch/kernel/vdso.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -26,12 +27,17 @@ extern char vdso_start[], vdso_end[]; /* Kernel-provided data used by the VDSO. */ static union { - u8 page[VDSO_DATA_SIZE]; + u8 page[PAGE_SIZE]; + struct vdso_data data[CS_BASES]; +} generic_vdso_data __page_aligned_data; + +static union { + u8 page[LOONGARCH_VDSO_DATA_SIZE]; struct loongarch_vdso_data vdata; } loongarch_vdso_data __page_aligned_data; static struct page *vdso_pages[] = { NULL }; -struct vdso_data *vdso_data = loongarch_vdso_data.vdata.data; +struct vdso_data *vdso_data = generic_vdso_data.data; struct vdso_pcpu_data *vdso_pdata = loongarch_vdso_data.vdata.pdata; static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma) @@ -41,6 +47,43 @@ static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struc return 0; } +static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, + struct vm_area_struct *vma, struct vm_fault *vmf) +{ + unsigned long pfn; + struct page *timens_page = find_timens_vvar_page(vma); + + switch (vmf->pgoff) { + case VVAR_GENERIC_PAGE_OFFSET: + if (!timens_page) + pfn = sym_to_pfn(vdso_data); + else + pfn = page_to_pfn(timens_page); + break; +#ifdef CONFIG_TIME_NS + case VVAR_TIMENS_PAGE_OFFSET: + /* + * If a task belongs to a time namespace then a namespace specific + * VVAR is mapped with the VVAR_GENERIC_PAGE_OFFSET and the real + * VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET offset. + * See also the comment near timens_setup_vdso_data(). + */ + if (!timens_page) + return VM_FAULT_SIGBUS; + else + pfn = sym_to_pfn(vdso_data); + break; +#endif /* CONFIG_TIME_NS */ + case VVAR_LOONGARCH_PAGES_START ... VVAR_LOONGARCH_PAGES_END: + pfn = sym_to_pfn(&loongarch_vdso_data) + vmf->pgoff - VVAR_LOONGARCH_PAGES_START; + break; + default: + return VM_FAULT_SIGBUS; + } + + return vmf_insert_pfn(vma, vmf->address, pfn); +} + struct loongarch_vdso_info vdso_info = { .vdso = vdso_start, .size = PAGE_SIZE, @@ -51,6 +94,7 @@ struct loongarch_vdso_info vdso_info = { }, .data_mapping = { .name = "[vvar]", + .fault = vvar_fault, }, .offset_sigreturn = vdso_offset_sigreturn, }; @@ -73,6 +117,37 @@ static int __init init_vdso(void) } subsys_initcall(init_vdso); +#ifdef CONFIG_TIME_NS +struct vdso_data *arch_get_vdso_data(void *vvar_page) +{ + return (struct vdso_data *)(vvar_page); +} + +/* + * The vvar mapping contains data for a specific time namespace, so when a + * task changes namespace we must unmap its vvar data for the old namespace. + * Subsequent faults will map in data for the new namespace. + * + * For more details see timens_setup_vdso_data(). + */ +int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) +{ + struct mm_struct *mm = task->mm; + struct vm_area_struct *vma; + + VMA_ITERATOR(vmi, mm, 0); + + mmap_read_lock(mm); + for_each_vma(vmi, vma) { + if (vma_is_special_mapping(vma, &vdso_info.data_mapping)) + zap_vma_pages(vma); + } + mmap_read_unlock(mm); + + return 0; +} +#endif + static unsigned long vdso_base(void) { unsigned long base = STACK_TOP; @@ -88,7 +163,7 @@ static unsigned long vdso_base(void) int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { int ret; - unsigned long vvar_size, size, data_addr, vdso_addr; + unsigned long size, data_addr, vdso_addr; struct mm_struct *mm = current->mm; struct vm_area_struct *vma; struct loongarch_vdso_info *info = current->thread.vdso; @@ -100,32 +175,23 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) * Determine total area size. This includes the VDSO data itself * and the data pages. */ - vvar_size = VDSO_DATA_SIZE; - size = vvar_size + info->size; + size = VVAR_SIZE + info->size; data_addr = get_unmapped_area(NULL, vdso_base(), size, 0, 0); if (IS_ERR_VALUE(data_addr)) { ret = data_addr; goto out; } - vdso_addr = data_addr + VDSO_DATA_SIZE; - vma = _install_special_mapping(mm, data_addr, vvar_size, - VM_READ | VM_MAYREAD, + vma = _install_special_mapping(mm, data_addr, VVAR_SIZE, + VM_READ | VM_MAYREAD | VM_PFNMAP, &info->data_mapping); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto out; } - /* Map VDSO data page. */ - ret = remap_pfn_range(vma, data_addr, - virt_to_phys(&loongarch_vdso_data) >> PAGE_SHIFT, - vvar_size, PAGE_READONLY); - if (ret) - goto out; - - /* Map VDSO code page. */ + vdso_addr = data_addr + VVAR_SIZE; vma = _install_special_mapping(mm, vdso_addr, info->size, VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC, &info->code_mapping); diff --git a/arch/loongarch/vdso/vgetcpu.c b/arch/loongarch/vdso/vgetcpu.c index e02e775f5360..9e445be39763 100644 --- a/arch/loongarch/vdso/vgetcpu.c +++ b/arch/loongarch/vdso/vgetcpu.c @@ -21,7 +21,7 @@ static __always_inline int read_cpu_id(void) static __always_inline const struct vdso_pcpu_data *get_pcpu_data(void) { - return (struct vdso_pcpu_data *)(get_vdso_base() - VDSO_DATA_SIZE); + return (struct vdso_pcpu_data *)(get_vdso_data() + VVAR_LOONGARCH_PAGES_START * PAGE_SIZE); } extern -- cgit v1.2.3-58-ga151 From 616500232e632dba8b03981eeccadacf2fbf1c30 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 29 Jun 2023 20:58:43 +0800 Subject: LoongArch: Add vector extensions support Add LoongArch's vector extensions support, which including 128bit LSX (i.e., Loongson SIMD eXtension) and 256bit LASX (i.e., Loongson Advanced SIMD eXtension). Linux kernel doesn't use vector itself, it only handle exceptions and context save/restore. So it only needs a subset of these instructions: * Vector load/store: vld vst vldx vstx xvld xvst xvldx xvstx * 8bit-elements move: vpickve2gr.b xvpickve2gr.b vinsgr2vr.b xvinsgr2vr.b * 16bit-elements move: vpickve2gr.h xvpickve2gr.h vinsgr2vr.h xvinsgr2vr.h * 32bit-elements move: vpickve2gr.w xvpickve2gr.w vinsgr2vr.w xvinsgr2vr.w * 64bit-elements move: vpickve2gr.d xvpickve2gr.d vinsgr2vr.d xvinsgr2vr.d * Elements permute: vpermi.w vpermi.d xvpermi.w xvpermi.d xvpermi.q Introduce AS_HAS_LSX_EXTENSION and AS_HAS_LASX_EXTENSION to avoid non- vector toolchains complains unsupported instructions. Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 51 +++- arch/loongarch/include/asm/asmmacro.h | 393 +++++++++++++++++++++++++++ arch/loongarch/include/asm/fpu.h | 185 ++++++++++++- arch/loongarch/include/uapi/asm/ptrace.h | 16 +- arch/loongarch/include/uapi/asm/sigcontext.h | 18 ++ arch/loongarch/kernel/cpu-probe.c | 12 + arch/loongarch/kernel/fpu.S | 270 ++++++++++++++++++ arch/loongarch/kernel/process.c | 10 +- arch/loongarch/kernel/ptrace.c | 110 ++++++++ arch/loongarch/kernel/signal.c | 326 +++++++++++++++++++++- arch/loongarch/kernel/traps.c | 84 +++++- 11 files changed, 1452 insertions(+), 23 deletions(-) (limited to 'arch') diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 1944bae2f31c..72b614429c37 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -164,14 +164,6 @@ config 32BIT config 64BIT def_bool y -config CPU_HAS_FPU - bool - default y - -config CPU_HAS_PREFETCH - bool - default y - config GENERIC_BUG def_bool y depends on BUG @@ -247,6 +239,12 @@ config AS_HAS_EXPLICIT_RELOCS config AS_HAS_FCSR_CLASS def_bool $(as-instr,movfcsr2gr \$t0$(comma)\$fcsr0) +config AS_HAS_LSX_EXTENSION + def_bool $(as-instr,vld \$vr0$(comma)\$a0$(comma)0) + +config AS_HAS_LASX_EXTENSION + def_bool $(as-instr,xvld \$xr0$(comma)\$a0$(comma)0) + menu "Kernel type and options" source "kernel/Kconfig.hz" @@ -487,6 +485,43 @@ config ARCH_STRICT_ALIGN to run kernel only on systems with h/w unaligned access support in order to optimise for performance. +config CPU_HAS_FPU + bool + default y + +config CPU_HAS_LSX + bool "Support for the Loongson SIMD Extension" + depends on AS_HAS_LSX_EXTENSION + help + Loongson SIMD Extension (LSX) introduces 128 bit wide vector registers + and a set of SIMD instructions to operate on them. When this option + is enabled the kernel will support allocating & switching LSX + vector register contexts. If you know that your kernel will only be + running on CPUs which do not support LSX or that your userland will + not be making use of it then you may wish to say N here to reduce + the size & complexity of your kernel. + + If unsure, say Y. + +config CPU_HAS_LASX + bool "Support for the Loongson Advanced SIMD Extension" + depends on CPU_HAS_LSX + depends on AS_HAS_LASX_EXTENSION + help + Loongson Advanced SIMD Extension (LASX) introduces 256 bit wide vector + registers and a set of SIMD instructions to operate on them. When this + option is enabled the kernel will support allocating & switching LASX + vector register contexts. If you know that your kernel will only be + running on CPUs which do not support LASX or that your userland will + not be making use of it then you may wish to say N here to reduce + the size & complexity of your kernel. + + If unsure, say Y. + +config CPU_HAS_PREFETCH + bool + default y + config KEXEC bool "Kexec system call" select KEXEC_CORE diff --git a/arch/loongarch/include/asm/asmmacro.h b/arch/loongarch/include/asm/asmmacro.h index c51a1b43acb4..79e1d53fea89 100644 --- a/arch/loongarch/include/asm/asmmacro.h +++ b/arch/loongarch/include/asm/asmmacro.h @@ -270,6 +270,399 @@ fld.d $f31, \tmp, THREAD_FPR31 - THREAD_FPR0 .endm + .macro lsx_save_data thread tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \tmp, \thread, \tmp + vst $vr0, \tmp, THREAD_FPR0 - THREAD_FPR0 + vst $vr1, \tmp, THREAD_FPR1 - THREAD_FPR0 + vst $vr2, \tmp, THREAD_FPR2 - THREAD_FPR0 + vst $vr3, \tmp, THREAD_FPR3 - THREAD_FPR0 + vst $vr4, \tmp, THREAD_FPR4 - THREAD_FPR0 + vst $vr5, \tmp, THREAD_FPR5 - THREAD_FPR0 + vst $vr6, \tmp, THREAD_FPR6 - THREAD_FPR0 + vst $vr7, \tmp, THREAD_FPR7 - THREAD_FPR0 + vst $vr8, \tmp, THREAD_FPR8 - THREAD_FPR0 + vst $vr9, \tmp, THREAD_FPR9 - THREAD_FPR0 + vst $vr10, \tmp, THREAD_FPR10 - THREAD_FPR0 + vst $vr11, \tmp, THREAD_FPR11 - THREAD_FPR0 + vst $vr12, \tmp, THREAD_FPR12 - THREAD_FPR0 + vst $vr13, \tmp, THREAD_FPR13 - THREAD_FPR0 + vst $vr14, \tmp, THREAD_FPR14 - THREAD_FPR0 + vst $vr15, \tmp, THREAD_FPR15 - THREAD_FPR0 + vst $vr16, \tmp, THREAD_FPR16 - THREAD_FPR0 + vst $vr17, \tmp, THREAD_FPR17 - THREAD_FPR0 + vst $vr18, \tmp, THREAD_FPR18 - THREAD_FPR0 + vst $vr19, \tmp, THREAD_FPR19 - THREAD_FPR0 + vst $vr20, \tmp, THREAD_FPR20 - THREAD_FPR0 + vst $vr21, \tmp, THREAD_FPR21 - THREAD_FPR0 + vst $vr22, \tmp, THREAD_FPR22 - THREAD_FPR0 + vst $vr23, \tmp, THREAD_FPR23 - THREAD_FPR0 + vst $vr24, \tmp, THREAD_FPR24 - THREAD_FPR0 + vst $vr25, \tmp, THREAD_FPR25 - THREAD_FPR0 + vst $vr26, \tmp, THREAD_FPR26 - THREAD_FPR0 + vst $vr27, \tmp, THREAD_FPR27 - THREAD_FPR0 + vst $vr28, \tmp, THREAD_FPR28 - THREAD_FPR0 + vst $vr29, \tmp, THREAD_FPR29 - THREAD_FPR0 + vst $vr30, \tmp, THREAD_FPR30 - THREAD_FPR0 + vst $vr31, \tmp, THREAD_FPR31 - THREAD_FPR0 + .endm + + .macro lsx_restore_data thread tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \tmp, \thread, \tmp + vld $vr0, \tmp, THREAD_FPR0 - THREAD_FPR0 + vld $vr1, \tmp, THREAD_FPR1 - THREAD_FPR0 + vld $vr2, \tmp, THREAD_FPR2 - THREAD_FPR0 + vld $vr3, \tmp, THREAD_FPR3 - THREAD_FPR0 + vld $vr4, \tmp, THREAD_FPR4 - THREAD_FPR0 + vld $vr5, \tmp, THREAD_FPR5 - THREAD_FPR0 + vld $vr6, \tmp, THREAD_FPR6 - THREAD_FPR0 + vld $vr7, \tmp, THREAD_FPR7 - THREAD_FPR0 + vld $vr8, \tmp, THREAD_FPR8 - THREAD_FPR0 + vld $vr9, \tmp, THREAD_FPR9 - THREAD_FPR0 + vld $vr10, \tmp, THREAD_FPR10 - THREAD_FPR0 + vld $vr11, \tmp, THREAD_FPR11 - THREAD_FPR0 + vld $vr12, \tmp, THREAD_FPR12 - THREAD_FPR0 + vld $vr13, \tmp, THREAD_FPR13 - THREAD_FPR0 + vld $vr14, \tmp, THREAD_FPR14 - THREAD_FPR0 + vld $vr15, \tmp, THREAD_FPR15 - THREAD_FPR0 + vld $vr16, \tmp, THREAD_FPR16 - THREAD_FPR0 + vld $vr17, \tmp, THREAD_FPR17 - THREAD_FPR0 + vld $vr18, \tmp, THREAD_FPR18 - THREAD_FPR0 + vld $vr19, \tmp, THREAD_FPR19 - THREAD_FPR0 + vld $vr20, \tmp, THREAD_FPR20 - THREAD_FPR0 + vld $vr21, \tmp, THREAD_FPR21 - THREAD_FPR0 + vld $vr22, \tmp, THREAD_FPR22 - THREAD_FPR0 + vld $vr23, \tmp, THREAD_FPR23 - THREAD_FPR0 + vld $vr24, \tmp, THREAD_FPR24 - THREAD_FPR0 + vld $vr25, \tmp, THREAD_FPR25 - THREAD_FPR0 + vld $vr26, \tmp, THREAD_FPR26 - THREAD_FPR0 + vld $vr27, \tmp, THREAD_FPR27 - THREAD_FPR0 + vld $vr28, \tmp, THREAD_FPR28 - THREAD_FPR0 + vld $vr29, \tmp, THREAD_FPR29 - THREAD_FPR0 + vld $vr30, \tmp, THREAD_FPR30 - THREAD_FPR0 + vld $vr31, \tmp, THREAD_FPR31 - THREAD_FPR0 + .endm + + .macro lsx_save_all thread tmp0 tmp1 + fpu_save_cc \thread, \tmp0, \tmp1 + fpu_save_csr \thread, \tmp0 + lsx_save_data \thread, \tmp0 + .endm + + .macro lsx_restore_all thread tmp0 tmp1 + lsx_restore_data \thread, \tmp0 + fpu_restore_cc \thread, \tmp0, \tmp1 + fpu_restore_csr \thread, \tmp0 + .endm + + .macro lsx_save_upper vd base tmp off + vpickve2gr.d \tmp, \vd, 1 + st.d \tmp, \base, (\off+8) + .endm + + .macro lsx_save_all_upper thread base tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \base, \thread, \tmp + lsx_save_upper $vr0, \base, \tmp, (THREAD_FPR0-THREAD_FPR0) + lsx_save_upper $vr1, \base, \tmp, (THREAD_FPR1-THREAD_FPR0) + lsx_save_upper $vr2, \base, \tmp, (THREAD_FPR2-THREAD_FPR0) + lsx_save_upper $vr3, \base, \tmp, (THREAD_FPR3-THREAD_FPR0) + lsx_save_upper $vr4, \base, \tmp, (THREAD_FPR4-THREAD_FPR0) + lsx_save_upper $vr5, \base, \tmp, (THREAD_FPR5-THREAD_FPR0) + lsx_save_upper $vr6, \base, \tmp, (THREAD_FPR6-THREAD_FPR0) + lsx_save_upper $vr7, \base, \tmp, (THREAD_FPR7-THREAD_FPR0) + lsx_save_upper $vr8, \base, \tmp, (THREAD_FPR8-THREAD_FPR0) + lsx_save_upper $vr9, \base, \tmp, (THREAD_FPR9-THREAD_FPR0) + lsx_save_upper $vr10, \base, \tmp, (THREAD_FPR10-THREAD_FPR0) + lsx_save_upper $vr11, \base, \tmp, (THREAD_FPR11-THREAD_FPR0) + lsx_save_upper $vr12, \base, \tmp, (THREAD_FPR12-THREAD_FPR0) + lsx_save_upper $vr13, \base, \tmp, (THREAD_FPR13-THREAD_FPR0) + lsx_save_upper $vr14, \base, \tmp, (THREAD_FPR14-THREAD_FPR0) + lsx_save_upper $vr15, \base, \tmp, (THREAD_FPR15-THREAD_FPR0) + lsx_save_upper $vr16, \base, \tmp, (THREAD_FPR16-THREAD_FPR0) + lsx_save_upper $vr17, \base, \tmp, (THREAD_FPR17-THREAD_FPR0) + lsx_save_upper $vr18, \base, \tmp, (THREAD_FPR18-THREAD_FPR0) + lsx_save_upper $vr19, \base, \tmp, (THREAD_FPR19-THREAD_FPR0) + lsx_save_upper $vr20, \base, \tmp, (THREAD_FPR20-THREAD_FPR0) + lsx_save_upper $vr21, \base, \tmp, (THREAD_FPR21-THREAD_FPR0) + lsx_save_upper $vr22, \base, \tmp, (THREAD_FPR22-THREAD_FPR0) + lsx_save_upper $vr23, \base, \tmp, (THREAD_FPR23-THREAD_FPR0) + lsx_save_upper $vr24, \base, \tmp, (THREAD_FPR24-THREAD_FPR0) + lsx_save_upper $vr25, \base, \tmp, (THREAD_FPR25-THREAD_FPR0) + lsx_save_upper $vr26, \base, \tmp, (THREAD_FPR26-THREAD_FPR0) + lsx_save_upper $vr27, \base, \tmp, (THREAD_FPR27-THREAD_FPR0) + lsx_save_upper $vr28, \base, \tmp, (THREAD_FPR28-THREAD_FPR0) + lsx_save_upper $vr29, \base, \tmp, (THREAD_FPR29-THREAD_FPR0) + lsx_save_upper $vr30, \base, \tmp, (THREAD_FPR30-THREAD_FPR0) + lsx_save_upper $vr31, \base, \tmp, (THREAD_FPR31-THREAD_FPR0) + .endm + + .macro lsx_restore_upper vd base tmp off + ld.d \tmp, \base, (\off+8) + vinsgr2vr.d \vd, \tmp, 1 + .endm + + .macro lsx_restore_all_upper thread base tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \base, \thread, \tmp + lsx_restore_upper $vr0, \base, \tmp, (THREAD_FPR0-THREAD_FPR0) + lsx_restore_upper $vr1, \base, \tmp, (THREAD_FPR1-THREAD_FPR0) + lsx_restore_upper $vr2, \base, \tmp, (THREAD_FPR2-THREAD_FPR0) + lsx_restore_upper $vr3, \base, \tmp, (THREAD_FPR3-THREAD_FPR0) + lsx_restore_upper $vr4, \base, \tmp, (THREAD_FPR4-THREAD_FPR0) + lsx_restore_upper $vr5, \base, \tmp, (THREAD_FPR5-THREAD_FPR0) + lsx_restore_upper $vr6, \base, \tmp, (THREAD_FPR6-THREAD_FPR0) + lsx_restore_upper $vr7, \base, \tmp, (THREAD_FPR7-THREAD_FPR0) + lsx_restore_upper $vr8, \base, \tmp, (THREAD_FPR8-THREAD_FPR0) + lsx_restore_upper $vr9, \base, \tmp, (THREAD_FPR9-THREAD_FPR0) + lsx_restore_upper $vr10, \base, \tmp, (THREAD_FPR10-THREAD_FPR0) + lsx_restore_upper $vr11, \base, \tmp, (THREAD_FPR11-THREAD_FPR0) + lsx_restore_upper $vr12, \base, \tmp, (THREAD_FPR12-THREAD_FPR0) + lsx_restore_upper $vr13, \base, \tmp, (THREAD_FPR13-THREAD_FPR0) + lsx_restore_upper $vr14, \base, \tmp, (THREAD_FPR14-THREAD_FPR0) + lsx_restore_upper $vr15, \base, \tmp, (THREAD_FPR15-THREAD_FPR0) + lsx_restore_upper $vr16, \base, \tmp, (THREAD_FPR16-THREAD_FPR0) + lsx_restore_upper $vr17, \base, \tmp, (THREAD_FPR17-THREAD_FPR0) + lsx_restore_upper $vr18, \base, \tmp, (THREAD_FPR18-THREAD_FPR0) + lsx_restore_upper $vr19, \base, \tmp, (THREAD_FPR19-THREAD_FPR0) + lsx_restore_upper $vr20, \base, \tmp, (THREAD_FPR20-THREAD_FPR0) + lsx_restore_upper $vr21, \base, \tmp, (THREAD_FPR21-THREAD_FPR0) + lsx_restore_upper $vr22, \base, \tmp, (THREAD_FPR22-THREAD_FPR0) + lsx_restore_upper $vr23, \base, \tmp, (THREAD_FPR23-THREAD_FPR0) + lsx_restore_upper $vr24, \base, \tmp, (THREAD_FPR24-THREAD_FPR0) + lsx_restore_upper $vr25, \base, \tmp, (THREAD_FPR25-THREAD_FPR0) + lsx_restore_upper $vr26, \base, \tmp, (THREAD_FPR26-THREAD_FPR0) + lsx_restore_upper $vr27, \base, \tmp, (THREAD_FPR27-THREAD_FPR0) + lsx_restore_upper $vr28, \base, \tmp, (THREAD_FPR28-THREAD_FPR0) + lsx_restore_upper $vr29, \base, \tmp, (THREAD_FPR29-THREAD_FPR0) + lsx_restore_upper $vr30, \base, \tmp, (THREAD_FPR30-THREAD_FPR0) + lsx_restore_upper $vr31, \base, \tmp, (THREAD_FPR31-THREAD_FPR0) + .endm + + .macro lsx_init_upper vd tmp + vinsgr2vr.d \vd, \tmp, 1 + .endm + + .macro lsx_init_all_upper tmp + not \tmp, zero + lsx_init_upper $vr0 \tmp + lsx_init_upper $vr1 \tmp + lsx_init_upper $vr2 \tmp + lsx_init_upper $vr3 \tmp + lsx_init_upper $vr4 \tmp + lsx_init_upper $vr5 \tmp + lsx_init_upper $vr6 \tmp + lsx_init_upper $vr7 \tmp + lsx_init_upper $vr8 \tmp + lsx_init_upper $vr9 \tmp + lsx_init_upper $vr10 \tmp + lsx_init_upper $vr11 \tmp + lsx_init_upper $vr12 \tmp + lsx_init_upper $vr13 \tmp + lsx_init_upper $vr14 \tmp + lsx_init_upper $vr15 \tmp + lsx_init_upper $vr16 \tmp + lsx_init_upper $vr17 \tmp + lsx_init_upper $vr18 \tmp + lsx_init_upper $vr19 \tmp + lsx_init_upper $vr20 \tmp + lsx_init_upper $vr21 \tmp + lsx_init_upper $vr22 \tmp + lsx_init_upper $vr23 \tmp + lsx_init_upper $vr24 \tmp + lsx_init_upper $vr25 \tmp + lsx_init_upper $vr26 \tmp + lsx_init_upper $vr27 \tmp + lsx_init_upper $vr28 \tmp + lsx_init_upper $vr29 \tmp + lsx_init_upper $vr30 \tmp + lsx_init_upper $vr31 \tmp + .endm + + .macro lasx_save_data thread tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \tmp, \thread, \tmp + xvst $xr0, \tmp, THREAD_FPR0 - THREAD_FPR0 + xvst $xr1, \tmp, THREAD_FPR1 - THREAD_FPR0 + xvst $xr2, \tmp, THREAD_FPR2 - THREAD_FPR0 + xvst $xr3, \tmp, THREAD_FPR3 - THREAD_FPR0 + xvst $xr4, \tmp, THREAD_FPR4 - THREAD_FPR0 + xvst $xr5, \tmp, THREAD_FPR5 - THREAD_FPR0 + xvst $xr6, \tmp, THREAD_FPR6 - THREAD_FPR0 + xvst $xr7, \tmp, THREAD_FPR7 - THREAD_FPR0 + xvst $xr8, \tmp, THREAD_FPR8 - THREAD_FPR0 + xvst $xr9, \tmp, THREAD_FPR9 - THREAD_FPR0 + xvst $xr10, \tmp, THREAD_FPR10 - THREAD_FPR0 + xvst $xr11, \tmp, THREAD_FPR11 - THREAD_FPR0 + xvst $xr12, \tmp, THREAD_FPR12 - THREAD_FPR0 + xvst $xr13, \tmp, THREAD_FPR13 - THREAD_FPR0 + xvst $xr14, \tmp, THREAD_FPR14 - THREAD_FPR0 + xvst $xr15, \tmp, THREAD_FPR15 - THREAD_FPR0 + xvst $xr16, \tmp, THREAD_FPR16 - THREAD_FPR0 + xvst $xr17, \tmp, THREAD_FPR17 - THREAD_FPR0 + xvst $xr18, \tmp, THREAD_FPR18 - THREAD_FPR0 + xvst $xr19, \tmp, THREAD_FPR19 - THREAD_FPR0 + xvst $xr20, \tmp, THREAD_FPR20 - THREAD_FPR0 + xvst $xr21, \tmp, THREAD_FPR21 - THREAD_FPR0 + xvst $xr22, \tmp, THREAD_FPR22 - THREAD_FPR0 + xvst $xr23, \tmp, THREAD_FPR23 - THREAD_FPR0 + xvst $xr24, \tmp, THREAD_FPR24 - THREAD_FPR0 + xvst $xr25, \tmp, THREAD_FPR25 - THREAD_FPR0 + xvst $xr26, \tmp, THREAD_FPR26 - THREAD_FPR0 + xvst $xr27, \tmp, THREAD_FPR27 - THREAD_FPR0 + xvst $xr28, \tmp, THREAD_FPR28 - THREAD_FPR0 + xvst $xr29, \tmp, THREAD_FPR29 - THREAD_FPR0 + xvst $xr30, \tmp, THREAD_FPR30 - THREAD_FPR0 + xvst $xr31, \tmp, THREAD_FPR31 - THREAD_FPR0 + .endm + + .macro lasx_restore_data thread tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \tmp, \thread, \tmp + xvld $xr0, \tmp, THREAD_FPR0 - THREAD_FPR0 + xvld $xr1, \tmp, THREAD_FPR1 - THREAD_FPR0 + xvld $xr2, \tmp, THREAD_FPR2 - THREAD_FPR0 + xvld $xr3, \tmp, THREAD_FPR3 - THREAD_FPR0 + xvld $xr4, \tmp, THREAD_FPR4 - THREAD_FPR0 + xvld $xr5, \tmp, THREAD_FPR5 - THREAD_FPR0 + xvld $xr6, \tmp, THREAD_FPR6 - THREAD_FPR0 + xvld $xr7, \tmp, THREAD_FPR7 - THREAD_FPR0 + xvld $xr8, \tmp, THREAD_FPR8 - THREAD_FPR0 + xvld $xr9, \tmp, THREAD_FPR9 - THREAD_FPR0 + xvld $xr10, \tmp, THREAD_FPR10 - THREAD_FPR0 + xvld $xr11, \tmp, THREAD_FPR11 - THREAD_FPR0 + xvld $xr12, \tmp, THREAD_FPR12 - THREAD_FPR0 + xvld $xr13, \tmp, THREAD_FPR13 - THREAD_FPR0 + xvld $xr14, \tmp, THREAD_FPR14 - THREAD_FPR0 + xvld $xr15, \tmp, THREAD_FPR15 - THREAD_FPR0 + xvld $xr16, \tmp, THREAD_FPR16 - THREAD_FPR0 + xvld $xr17, \tmp, THREAD_FPR17 - THREAD_FPR0 + xvld $xr18, \tmp, THREAD_FPR18 - THREAD_FPR0 + xvld $xr19, \tmp, THREAD_FPR19 - THREAD_FPR0 + xvld $xr20, \tmp, THREAD_FPR20 - THREAD_FPR0 + xvld $xr21, \tmp, THREAD_FPR21 - THREAD_FPR0 + xvld $xr22, \tmp, THREAD_FPR22 - THREAD_FPR0 + xvld $xr23, \tmp, THREAD_FPR23 - THREAD_FPR0 + xvld $xr24, \tmp, THREAD_FPR24 - THREAD_FPR0 + xvld $xr25, \tmp, THREAD_FPR25 - THREAD_FPR0 + xvld $xr26, \tmp, THREAD_FPR26 - THREAD_FPR0 + xvld $xr27, \tmp, THREAD_FPR27 - THREAD_FPR0 + xvld $xr28, \tmp, THREAD_FPR28 - THREAD_FPR0 + xvld $xr29, \tmp, THREAD_FPR29 - THREAD_FPR0 + xvld $xr30, \tmp, THREAD_FPR30 - THREAD_FPR0 + xvld $xr31, \tmp, THREAD_FPR31 - THREAD_FPR0 + .endm + + .macro lasx_save_all thread tmp0 tmp1 + fpu_save_cc \thread, \tmp0, \tmp1 + fpu_save_csr \thread, \tmp0 + lasx_save_data \thread, \tmp0 + .endm + + .macro lasx_restore_all thread tmp0 tmp1 + lasx_restore_data \thread, \tmp0 + fpu_restore_cc \thread, \tmp0, \tmp1 + fpu_restore_csr \thread, \tmp0 + .endm + + .macro lasx_save_upper xd base tmp off + /* Nothing */ + .endm + + .macro lasx_save_all_upper thread base tmp + /* Nothing */ + .endm + + .macro lasx_restore_upper xd base tmp0 tmp1 off + vld \tmp0, \base, (\off+16) + xvpermi.q \xd, \tmp1, 0x2 + .endm + + .macro lasx_restore_all_upper thread base tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \base, \thread, \tmp + /* Save $vr31 ($xr31 lower bits) with xvpickve2gr */ + xvpickve2gr.d $r17, $xr31, 0 + xvpickve2gr.d $r18, $xr31, 1 + lasx_restore_upper $xr0, \base, $vr31, $xr31, (THREAD_FPR0-THREAD_FPR0) + lasx_restore_upper $xr1, \base, $vr31, $xr31, (THREAD_FPR1-THREAD_FPR0) + lasx_restore_upper $xr2, \base, $vr31, $xr31, (THREAD_FPR2-THREAD_FPR0) + lasx_restore_upper $xr3, \base, $vr31, $xr31, (THREAD_FPR3-THREAD_FPR0) + lasx_restore_upper $xr4, \base, $vr31, $xr31, (THREAD_FPR4-THREAD_FPR0) + lasx_restore_upper $xr5, \base, $vr31, $xr31, (THREAD_FPR5-THREAD_FPR0) + lasx_restore_upper $xr6, \base, $vr31, $xr31, (THREAD_FPR6-THREAD_FPR0) + lasx_restore_upper $xr7, \base, $vr31, $xr31, (THREAD_FPR7-THREAD_FPR0) + lasx_restore_upper $xr8, \base, $vr31, $xr31, (THREAD_FPR8-THREAD_FPR0) + lasx_restore_upper $xr9, \base, $vr31, $xr31, (THREAD_FPR9-THREAD_FPR0) + lasx_restore_upper $xr10, \base, $vr31, $xr31, (THREAD_FPR10-THREAD_FPR0) + lasx_restore_upper $xr11, \base, $vr31, $xr31, (THREAD_FPR11-THREAD_FPR0) + lasx_restore_upper $xr12, \base, $vr31, $xr31, (THREAD_FPR12-THREAD_FPR0) + lasx_restore_upper $xr13, \base, $vr31, $xr31, (THREAD_FPR13-THREAD_FPR0) + lasx_restore_upper $xr14, \base, $vr31, $xr31, (THREAD_FPR14-THREAD_FPR0) + lasx_restore_upper $xr15, \base, $vr31, $xr31, (THREAD_FPR15-THREAD_FPR0) + lasx_restore_upper $xr16, \base, $vr31, $xr31, (THREAD_FPR16-THREAD_FPR0) + lasx_restore_upper $xr17, \base, $vr31, $xr31, (THREAD_FPR17-THREAD_FPR0) + lasx_restore_upper $xr18, \base, $vr31, $xr31, (THREAD_FPR18-THREAD_FPR0) + lasx_restore_upper $xr19, \base, $vr31, $xr31, (THREAD_FPR19-THREAD_FPR0) + lasx_restore_upper $xr20, \base, $vr31, $xr31, (THREAD_FPR20-THREAD_FPR0) + lasx_restore_upper $xr21, \base, $vr31, $xr31, (THREAD_FPR21-THREAD_FPR0) + lasx_restore_upper $xr22, \base, $vr31, $xr31, (THREAD_FPR22-THREAD_FPR0) + lasx_restore_upper $xr23, \base, $vr31, $xr31, (THREAD_FPR23-THREAD_FPR0) + lasx_restore_upper $xr24, \base, $vr31, $xr31, (THREAD_FPR24-THREAD_FPR0) + lasx_restore_upper $xr25, \base, $vr31, $xr31, (THREAD_FPR25-THREAD_FPR0) + lasx_restore_upper $xr26, \base, $vr31, $xr31, (THREAD_FPR26-THREAD_FPR0) + lasx_restore_upper $xr27, \base, $vr31, $xr31, (THREAD_FPR27-THREAD_FPR0) + lasx_restore_upper $xr28, \base, $vr31, $xr31, (THREAD_FPR28-THREAD_FPR0) + lasx_restore_upper $xr29, \base, $vr31, $xr31, (THREAD_FPR29-THREAD_FPR0) + lasx_restore_upper $xr30, \base, $vr31, $xr31, (THREAD_FPR30-THREAD_FPR0) + lasx_restore_upper $xr31, \base, $vr31, $xr31, (THREAD_FPR31-THREAD_FPR0) + /* Restore $vr31 ($xr31 lower bits) with xvinsgr2vr */ + xvinsgr2vr.d $xr31, $r17, 0 + xvinsgr2vr.d $xr31, $r18, 1 + .endm + + .macro lasx_init_upper xd tmp + xvinsgr2vr.d \xd, \tmp, 2 + xvinsgr2vr.d \xd, \tmp, 3 + .endm + + .macro lasx_init_all_upper tmp + not \tmp, zero + lasx_init_upper $xr0 \tmp + lasx_init_upper $xr1 \tmp + lasx_init_upper $xr2 \tmp + lasx_init_upper $xr3 \tmp + lasx_init_upper $xr4 \tmp + lasx_init_upper $xr5 \tmp + lasx_init_upper $xr6 \tmp + lasx_init_upper $xr7 \tmp + lasx_init_upper $xr8 \tmp + lasx_init_upper $xr9 \tmp + lasx_init_upper $xr10 \tmp + lasx_init_upper $xr11 \tmp + lasx_init_upper $xr12 \tmp + lasx_init_upper $xr13 \tmp + lasx_init_upper $xr14 \tmp + lasx_init_upper $xr15 \tmp + lasx_init_upper $xr16 \tmp + lasx_init_upper $xr17 \tmp + lasx_init_upper $xr18 \tmp + lasx_init_upper $xr19 \tmp + lasx_init_upper $xr20 \tmp + lasx_init_upper $xr21 \tmp + lasx_init_upper $xr22 \tmp + lasx_init_upper $xr23 \tmp + lasx_init_upper $xr24 \tmp + lasx_init_upper $xr25 \tmp + lasx_init_upper $xr26 \tmp + lasx_init_upper $xr27 \tmp + lasx_init_upper $xr28 \tmp + lasx_init_upper $xr29 \tmp + lasx_init_upper $xr30 \tmp + lasx_init_upper $xr31 \tmp + .endm + .macro not dst src nor \dst, \src, zero .endm diff --git a/arch/loongarch/include/asm/fpu.h b/arch/loongarch/include/asm/fpu.h index 192f8e35d912..e4193d637f66 100644 --- a/arch/loongarch/include/asm/fpu.h +++ b/arch/loongarch/include/asm/fpu.h @@ -28,6 +28,26 @@ extern void _init_fpu(unsigned int); extern void _save_fp(struct loongarch_fpu *); extern void _restore_fp(struct loongarch_fpu *); +extern void _save_lsx(struct loongarch_fpu *fpu); +extern void _restore_lsx(struct loongarch_fpu *fpu); +extern void _init_lsx_upper(void); +extern void _restore_lsx_upper(struct loongarch_fpu *fpu); + +extern void _save_lasx(struct loongarch_fpu *fpu); +extern void _restore_lasx(struct loongarch_fpu *fpu); +extern void _init_lasx_upper(void); +extern void _restore_lasx_upper(struct loongarch_fpu *fpu); + +static inline void enable_lsx(void); +static inline void disable_lsx(void); +static inline void save_lsx(struct task_struct *t); +static inline void restore_lsx(struct task_struct *t); + +static inline void enable_lasx(void); +static inline void disable_lasx(void); +static inline void save_lasx(struct task_struct *t); +static inline void restore_lasx(struct task_struct *t); + /* * Mask the FCSR Cause bits according to the Enable bits, observing * that Unimplemented is always enabled. @@ -44,6 +64,29 @@ static inline int is_fp_enabled(void) 1 : 0; } +static inline int is_lsx_enabled(void) +{ + if (!cpu_has_lsx) + return 0; + + return (csr_read32(LOONGARCH_CSR_EUEN) & CSR_EUEN_LSXEN) ? + 1 : 0; +} + +static inline int is_lasx_enabled(void) +{ + if (!cpu_has_lasx) + return 0; + + return (csr_read32(LOONGARCH_CSR_EUEN) & CSR_EUEN_LASXEN) ? + 1 : 0; +} + +static inline int is_simd_enabled(void) +{ + return is_lsx_enabled() | is_lasx_enabled(); +} + #define enable_fpu() set_csr_euen(CSR_EUEN_FPEN) #define disable_fpu() clear_csr_euen(CSR_EUEN_FPEN) @@ -81,9 +124,22 @@ static inline void own_fpu(int restore) static inline void lose_fpu_inatomic(int save, struct task_struct *tsk) { if (is_fpu_owner()) { - if (save) - _save_fp(&tsk->thread.fpu); - disable_fpu(); + if (!is_simd_enabled()) { + if (save) + _save_fp(&tsk->thread.fpu); + disable_fpu(); + } else { + if (save) { + if (!is_lasx_enabled()) + save_lsx(tsk); + else + save_lasx(tsk); + } + disable_fpu(); + disable_lsx(); + disable_lasx(); + clear_tsk_thread_flag(tsk, TIF_USEDSIMD); + } clear_tsk_thread_flag(tsk, TIF_USEDFPU); } KSTK_EUEN(tsk) &= ~(CSR_EUEN_FPEN | CSR_EUEN_LSXEN | CSR_EUEN_LASXEN); @@ -129,4 +185,127 @@ static inline union fpureg *get_fpu_regs(struct task_struct *tsk) return tsk->thread.fpu.fpr; } +static inline int is_simd_owner(void) +{ + return test_thread_flag(TIF_USEDSIMD); +} + +#ifdef CONFIG_CPU_HAS_LSX + +static inline void enable_lsx(void) +{ + if (cpu_has_lsx) + csr_xchg32(CSR_EUEN_LSXEN, CSR_EUEN_LSXEN, LOONGARCH_CSR_EUEN); +} + +static inline void disable_lsx(void) +{ + if (cpu_has_lsx) + csr_xchg32(0, CSR_EUEN_LSXEN, LOONGARCH_CSR_EUEN); +} + +static inline void save_lsx(struct task_struct *t) +{ + if (cpu_has_lsx) + _save_lsx(&t->thread.fpu); +} + +static inline void restore_lsx(struct task_struct *t) +{ + if (cpu_has_lsx) + _restore_lsx(&t->thread.fpu); +} + +static inline void init_lsx_upper(void) +{ + /* + * Check cpu_has_lsx only if it's a constant. This will allow the + * compiler to optimise out code for CPUs without LSX without adding + * an extra redundant check for CPUs with LSX. + */ + if (__builtin_constant_p(cpu_has_lsx) && !cpu_has_lsx) + return; + + _init_lsx_upper(); +} + +static inline void restore_lsx_upper(struct task_struct *t) +{ + if (cpu_has_lsx) + _restore_lsx_upper(&t->thread.fpu); +} + +#else +static inline void enable_lsx(void) {} +static inline void disable_lsx(void) {} +static inline void save_lsx(struct task_struct *t) {} +static inline void restore_lsx(struct task_struct *t) {} +static inline void init_lsx_upper(void) {} +static inline void restore_lsx_upper(struct task_struct *t) {} +#endif + +#ifdef CONFIG_CPU_HAS_LASX + +static inline void enable_lasx(void) +{ + + if (cpu_has_lasx) + csr_xchg32(CSR_EUEN_LASXEN, CSR_EUEN_LASXEN, LOONGARCH_CSR_EUEN); +} + +static inline void disable_lasx(void) +{ + if (cpu_has_lasx) + csr_xchg32(0, CSR_EUEN_LASXEN, LOONGARCH_CSR_EUEN); +} + +static inline void save_lasx(struct task_struct *t) +{ + if (cpu_has_lasx) + _save_lasx(&t->thread.fpu); +} + +static inline void restore_lasx(struct task_struct *t) +{ + if (cpu_has_lasx) + _restore_lasx(&t->thread.fpu); +} + +static inline void init_lasx_upper(void) +{ + if (cpu_has_lasx) + _init_lasx_upper(); +} + +static inline void restore_lasx_upper(struct task_struct *t) +{ + if (cpu_has_lasx) + _restore_lasx_upper(&t->thread.fpu); +} + +#else +static inline void enable_lasx(void) {} +static inline void disable_lasx(void) {} +static inline void save_lasx(struct task_struct *t) {} +static inline void restore_lasx(struct task_struct *t) {} +static inline void init_lasx_upper(void) {} +static inline void restore_lasx_upper(struct task_struct *t) {} +#endif + +static inline int thread_lsx_context_live(void) +{ + if (__builtin_constant_p(cpu_has_lsx) && !cpu_has_lsx) + return 0; + + return test_thread_flag(TIF_LSX_CTX_LIVE); +} + +static inline int thread_lasx_context_live(void) +{ + if (__builtin_constant_p(cpu_has_lasx) && !cpu_has_lasx) + return 0; + + return test_thread_flag(TIF_LASX_CTX_LIVE); +} + #endif /* _ASM_FPU_H */ diff --git a/arch/loongarch/include/uapi/asm/ptrace.h b/arch/loongarch/include/uapi/asm/ptrace.h index 82d811b5c6e9..06e3be52cb04 100644 --- a/arch/loongarch/include/uapi/asm/ptrace.h +++ b/arch/loongarch/include/uapi/asm/ptrace.h @@ -41,9 +41,19 @@ struct user_pt_regs { } __attribute__((aligned(8))); struct user_fp_state { - uint64_t fpr[32]; - uint64_t fcc; - uint32_t fcsr; + uint64_t fpr[32]; + uint64_t fcc; + uint32_t fcsr; +}; + +struct user_lsx_state { + /* 32 registers, 128 bits width per register. */ + uint64_t vregs[32*2]; +}; + +struct user_lasx_state { + /* 32 registers, 256 bits width per register. */ + uint64_t vregs[32*4]; }; struct user_watch_state { diff --git a/arch/loongarch/include/uapi/asm/sigcontext.h b/arch/loongarch/include/uapi/asm/sigcontext.h index 52e49b8bf4be..4cd7d16f7037 100644 --- a/arch/loongarch/include/uapi/asm/sigcontext.h +++ b/arch/loongarch/include/uapi/asm/sigcontext.h @@ -41,4 +41,22 @@ struct fpu_context { __u32 fcsr; }; +/* LSX context */ +#define LSX_CTX_MAGIC 0x53580001 +#define LSX_CTX_ALIGN 16 +struct lsx_context { + __u64 regs[2*32]; + __u64 fcc; + __u32 fcsr; +}; + +/* LASX context */ +#define LASX_CTX_MAGIC 0x41535801 +#define LASX_CTX_ALIGN 32 +struct lasx_context { + __u64 regs[4*32]; + __u64 fcc; + __u32 fcsr; +}; + #endif /* _UAPI_ASM_SIGCONTEXT_H */ diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c index 5adf0f736c6d..f42acc6c8df6 100644 --- a/arch/loongarch/kernel/cpu-probe.c +++ b/arch/loongarch/kernel/cpu-probe.c @@ -116,6 +116,18 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c) c->options |= LOONGARCH_CPU_FPU; elf_hwcap |= HWCAP_LOONGARCH_FPU; } +#ifdef CONFIG_CPU_HAS_LSX + if (config & CPUCFG2_LSX) { + c->options |= LOONGARCH_CPU_LSX; + elf_hwcap |= HWCAP_LOONGARCH_LSX; + } +#endif +#ifdef CONFIG_CPU_HAS_LASX + if (config & CPUCFG2_LASX) { + c->options |= LOONGARCH_CPU_LASX; + elf_hwcap |= HWCAP_LOONGARCH_LASX; + } +#endif if (config & CPUCFG2_COMPLEX) { c->options |= LOONGARCH_CPU_COMPLEX; elf_hwcap |= HWCAP_LOONGARCH_COMPLEX; diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S index ccde94140c89..f3df5f0a4509 100644 --- a/arch/loongarch/kernel/fpu.S +++ b/arch/loongarch/kernel/fpu.S @@ -145,6 +145,154 @@ movgr2fcsr fcsr0, \tmp0 .endm + .macro sc_save_lsx base +#ifdef CONFIG_CPU_HAS_LSX + EX vst $vr0, \base, (0 * LSX_REG_WIDTH) + EX vst $vr1, \base, (1 * LSX_REG_WIDTH) + EX vst $vr2, \base, (2 * LSX_REG_WIDTH) + EX vst $vr3, \base, (3 * LSX_REG_WIDTH) + EX vst $vr4, \base, (4 * LSX_REG_WIDTH) + EX vst $vr5, \base, (5 * LSX_REG_WIDTH) + EX vst $vr6, \base, (6 * LSX_REG_WIDTH) + EX vst $vr7, \base, (7 * LSX_REG_WIDTH) + EX vst $vr8, \base, (8 * LSX_REG_WIDTH) + EX vst $vr9, \base, (9 * LSX_REG_WIDTH) + EX vst $vr10, \base, (10 * LSX_REG_WIDTH) + EX vst $vr11, \base, (11 * LSX_REG_WIDTH) + EX vst $vr12, \base, (12 * LSX_REG_WIDTH) + EX vst $vr13, \base, (13 * LSX_REG_WIDTH) + EX vst $vr14, \base, (14 * LSX_REG_WIDTH) + EX vst $vr15, \base, (15 * LSX_REG_WIDTH) + EX vst $vr16, \base, (16 * LSX_REG_WIDTH) + EX vst $vr17, \base, (17 * LSX_REG_WIDTH) + EX vst $vr18, \base, (18 * LSX_REG_WIDTH) + EX vst $vr19, \base, (19 * LSX_REG_WIDTH) + EX vst $vr20, \base, (20 * LSX_REG_WIDTH) + EX vst $vr21, \base, (21 * LSX_REG_WIDTH) + EX vst $vr22, \base, (22 * LSX_REG_WIDTH) + EX vst $vr23, \base, (23 * LSX_REG_WIDTH) + EX vst $vr24, \base, (24 * LSX_REG_WIDTH) + EX vst $vr25, \base, (25 * LSX_REG_WIDTH) + EX vst $vr26, \base, (26 * LSX_REG_WIDTH) + EX vst $vr27, \base, (27 * LSX_REG_WIDTH) + EX vst $vr28, \base, (28 * LSX_REG_WIDTH) + EX vst $vr29, \base, (29 * LSX_REG_WIDTH) + EX vst $vr30, \base, (30 * LSX_REG_WIDTH) + EX vst $vr31, \base, (31 * LSX_REG_WIDTH) +#endif + .endm + + .macro sc_restore_lsx base +#ifdef CONFIG_CPU_HAS_LSX + EX vld $vr0, \base, (0 * LSX_REG_WIDTH) + EX vld $vr1, \base, (1 * LSX_REG_WIDTH) + EX vld $vr2, \base, (2 * LSX_REG_WIDTH) + EX vld $vr3, \base, (3 * LSX_REG_WIDTH) + EX vld $vr4, \base, (4 * LSX_REG_WIDTH) + EX vld $vr5, \base, (5 * LSX_REG_WIDTH) + EX vld $vr6, \base, (6 * LSX_REG_WIDTH) + EX vld $vr7, \base, (7 * LSX_REG_WIDTH) + EX vld $vr8, \base, (8 * LSX_REG_WIDTH) + EX vld $vr9, \base, (9 * LSX_REG_WIDTH) + EX vld $vr10, \base, (10 * LSX_REG_WIDTH) + EX vld $vr11, \base, (11 * LSX_REG_WIDTH) + EX vld $vr12, \base, (12 * LSX_REG_WIDTH) + EX vld $vr13, \base, (13 * LSX_REG_WIDTH) + EX vld $vr14, \base, (14 * LSX_REG_WIDTH) + EX vld $vr15, \base, (15 * LSX_REG_WIDTH) + EX vld $vr16, \base, (16 * LSX_REG_WIDTH) + EX vld $vr17, \base, (17 * LSX_REG_WIDTH) + EX vld $vr18, \base, (18 * LSX_REG_WIDTH) + EX vld $vr19, \base, (19 * LSX_REG_WIDTH) + EX vld $vr20, \base, (20 * LSX_REG_WIDTH) + EX vld $vr21, \base, (21 * LSX_REG_WIDTH) + EX vld $vr22, \base, (22 * LSX_REG_WIDTH) + EX vld $vr23, \base, (23 * LSX_REG_WIDTH) + EX vld $vr24, \base, (24 * LSX_REG_WIDTH) + EX vld $vr25, \base, (25 * LSX_REG_WIDTH) + EX vld $vr26, \base, (26 * LSX_REG_WIDTH) + EX vld $vr27, \base, (27 * LSX_REG_WIDTH) + EX vld $vr28, \base, (28 * LSX_REG_WIDTH) + EX vld $vr29, \base, (29 * LSX_REG_WIDTH) + EX vld $vr30, \base, (30 * LSX_REG_WIDTH) + EX vld $vr31, \base, (31 * LSX_REG_WIDTH) +#endif + .endm + + .macro sc_save_lasx base +#ifdef CONFIG_CPU_HAS_LASX + EX xvst $xr0, \base, (0 * LASX_REG_WIDTH) + EX xvst $xr1, \base, (1 * LASX_REG_WIDTH) + EX xvst $xr2, \base, (2 * LASX_REG_WIDTH) + EX xvst $xr3, \base, (3 * LASX_REG_WIDTH) + EX xvst $xr4, \base, (4 * LASX_REG_WIDTH) + EX xvst $xr5, \base, (5 * LASX_REG_WIDTH) + EX xvst $xr6, \base, (6 * LASX_REG_WIDTH) + EX xvst $xr7, \base, (7 * LASX_REG_WIDTH) + EX xvst $xr8, \base, (8 * LASX_REG_WIDTH) + EX xvst $xr9, \base, (9 * LASX_REG_WIDTH) + EX xvst $xr10, \base, (10 * LASX_REG_WIDTH) + EX xvst $xr11, \base, (11 * LASX_REG_WIDTH) + EX xvst $xr12, \base, (12 * LASX_REG_WIDTH) + EX xvst $xr13, \base, (13 * LASX_REG_WIDTH) + EX xvst $xr14, \base, (14 * LASX_REG_WIDTH) + EX xvst $xr15, \base, (15 * LASX_REG_WIDTH) + EX xvst $xr16, \base, (16 * LASX_REG_WIDTH) + EX xvst $xr17, \base, (17 * LASX_REG_WIDTH) + EX xvst $xr18, \base, (18 * LASX_REG_WIDTH) + EX xvst $xr19, \base, (19 * LASX_REG_WIDTH) + EX xvst $xr20, \base, (20 * LASX_REG_WIDTH) + EX xvst $xr21, \base, (21 * LASX_REG_WIDTH) + EX xvst $xr22, \base, (22 * LASX_REG_WIDTH) + EX xvst $xr23, \base, (23 * LASX_REG_WIDTH) + EX xvst $xr24, \base, (24 * LASX_REG_WIDTH) + EX xvst $xr25, \base, (25 * LASX_REG_WIDTH) + EX xvst $xr26, \base, (26 * LASX_REG_WIDTH) + EX xvst $xr27, \base, (27 * LASX_REG_WIDTH) + EX xvst $xr28, \base, (28 * LASX_REG_WIDTH) + EX xvst $xr29, \base, (29 * LASX_REG_WIDTH) + EX xvst $xr30, \base, (30 * LASX_REG_WIDTH) + EX xvst $xr31, \base, (31 * LASX_REG_WIDTH) +#endif + .endm + + .macro sc_restore_lasx base +#ifdef CONFIG_CPU_HAS_LASX + EX xvld $xr0, \base, (0 * LASX_REG_WIDTH) + EX xvld $xr1, \base, (1 * LASX_REG_WIDTH) + EX xvld $xr2, \base, (2 * LASX_REG_WIDTH) + EX xvld $xr3, \base, (3 * LASX_REG_WIDTH) + EX xvld $xr4, \base, (4 * LASX_REG_WIDTH) + EX xvld $xr5, \base, (5 * LASX_REG_WIDTH) + EX xvld $xr6, \base, (6 * LASX_REG_WIDTH) + EX xvld $xr7, \base, (7 * LASX_REG_WIDTH) + EX xvld $xr8, \base, (8 * LASX_REG_WIDTH) + EX xvld $xr9, \base, (9 * LASX_REG_WIDTH) + EX xvld $xr10, \base, (10 * LASX_REG_WIDTH) + EX xvld $xr11, \base, (11 * LASX_REG_WIDTH) + EX xvld $xr12, \base, (12 * LASX_REG_WIDTH) + EX xvld $xr13, \base, (13 * LASX_REG_WIDTH) + EX xvld $xr14, \base, (14 * LASX_REG_WIDTH) + EX xvld $xr15, \base, (15 * LASX_REG_WIDTH) + EX xvld $xr16, \base, (16 * LASX_REG_WIDTH) + EX xvld $xr17, \base, (17 * LASX_REG_WIDTH) + EX xvld $xr18, \base, (18 * LASX_REG_WIDTH) + EX xvld $xr19, \base, (19 * LASX_REG_WIDTH) + EX xvld $xr20, \base, (20 * LASX_REG_WIDTH) + EX xvld $xr21, \base, (21 * LASX_REG_WIDTH) + EX xvld $xr22, \base, (22 * LASX_REG_WIDTH) + EX xvld $xr23, \base, (23 * LASX_REG_WIDTH) + EX xvld $xr24, \base, (24 * LASX_REG_WIDTH) + EX xvld $xr25, \base, (25 * LASX_REG_WIDTH) + EX xvld $xr26, \base, (26 * LASX_REG_WIDTH) + EX xvld $xr27, \base, (27 * LASX_REG_WIDTH) + EX xvld $xr28, \base, (28 * LASX_REG_WIDTH) + EX xvld $xr29, \base, (29 * LASX_REG_WIDTH) + EX xvld $xr30, \base, (30 * LASX_REG_WIDTH) + EX xvld $xr31, \base, (31 * LASX_REG_WIDTH) +#endif + .endm + /* * Save a thread's fp context. */ @@ -166,6 +314,76 @@ SYM_FUNC_START(_restore_fp) jr ra SYM_FUNC_END(_restore_fp) +#ifdef CONFIG_CPU_HAS_LSX + +/* + * Save a thread's LSX vector context. + */ +SYM_FUNC_START(_save_lsx) + lsx_save_all a0 t1 t2 + jr ra +SYM_FUNC_END(_save_lsx) +EXPORT_SYMBOL(_save_lsx) + +/* + * Restore a thread's LSX vector context. + */ +SYM_FUNC_START(_restore_lsx) + lsx_restore_all a0 t1 t2 + jr ra +SYM_FUNC_END(_restore_lsx) + +SYM_FUNC_START(_save_lsx_upper) + lsx_save_all_upper a0 t0 t1 + jr ra +SYM_FUNC_END(_save_lsx_upper) + +SYM_FUNC_START(_restore_lsx_upper) + lsx_restore_all_upper a0 t0 t1 + jr ra +SYM_FUNC_END(_restore_lsx_upper) + +SYM_FUNC_START(_init_lsx_upper) + lsx_init_all_upper t1 + jr ra +SYM_FUNC_END(_init_lsx_upper) +#endif + +#ifdef CONFIG_CPU_HAS_LASX + +/* + * Save a thread's LASX vector context. + */ +SYM_FUNC_START(_save_lasx) + lasx_save_all a0 t1 t2 + jr ra +SYM_FUNC_END(_save_lasx) +EXPORT_SYMBOL(_save_lasx) + +/* + * Restore a thread's LASX vector context. + */ +SYM_FUNC_START(_restore_lasx) + lasx_restore_all a0 t1 t2 + jr ra +SYM_FUNC_END(_restore_lasx) + +SYM_FUNC_START(_save_lasx_upper) + lasx_save_all_upper a0 t0 t1 + jr ra +SYM_FUNC_END(_save_lasx_upper) + +SYM_FUNC_START(_restore_lasx_upper) + lasx_restore_all_upper a0 t0 t1 + jr ra +SYM_FUNC_END(_restore_lasx_upper) + +SYM_FUNC_START(_init_lasx_upper) + lasx_init_all_upper t1 + jr ra +SYM_FUNC_END(_init_lasx_upper) +#endif + /* * Load the FPU with signalling NANS. This bit pattern we're using has * the property that no matter whether considered as single or as double @@ -244,6 +462,58 @@ SYM_FUNC_START(_restore_fp_context) jr ra SYM_FUNC_END(_restore_fp_context) +/* + * a0: fpregs + * a1: fcc + * a2: fcsr + */ +SYM_FUNC_START(_save_lsx_context) + sc_save_fcc a1, t0, t1 + sc_save_fcsr a2, t0 + sc_save_lsx a0 + li.w a0, 0 # success + jr ra +SYM_FUNC_END(_save_lsx_context) + +/* + * a0: fpregs + * a1: fcc + * a2: fcsr + */ +SYM_FUNC_START(_restore_lsx_context) + sc_restore_lsx a0 + sc_restore_fcc a1, t1, t2 + sc_restore_fcsr a2, t1 + li.w a0, 0 # success + jr ra +SYM_FUNC_END(_restore_lsx_context) + +/* + * a0: fpregs + * a1: fcc + * a2: fcsr + */ +SYM_FUNC_START(_save_lasx_context) + sc_save_fcc a1, t0, t1 + sc_save_fcsr a2, t0 + sc_save_lasx a0 + li.w a0, 0 # success + jr ra +SYM_FUNC_END(_save_lasx_context) + +/* + * a0: fpregs + * a1: fcc + * a2: fcsr + */ +SYM_FUNC_START(_restore_lasx_context) + sc_restore_lasx a0 + sc_restore_fcc a1, t1, t2 + sc_restore_fcsr a2, t1 + li.w a0, 0 # success + jr ra +SYM_FUNC_END(_restore_lasx_context) + SYM_FUNC_START(fault) li.w a0, -EFAULT # failure jr ra diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index 9535a0662480..2e04eb07abb6 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -117,8 +117,14 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) */ preempt_disable(); - if (is_fpu_owner()) - save_fp(current); + if (is_fpu_owner()) { + if (is_lasx_enabled()) + save_lasx(current); + else if (is_lsx_enabled()) + save_lsx(current); + else + save_fp(current); + } preempt_enable(); diff --git a/arch/loongarch/kernel/ptrace.c b/arch/loongarch/kernel/ptrace.c index 5fcffb452367..a0767c3a0f0a 100644 --- a/arch/loongarch/kernel/ptrace.c +++ b/arch/loongarch/kernel/ptrace.c @@ -250,6 +250,90 @@ static int cfg_set(struct task_struct *target, return 0; } +#ifdef CONFIG_CPU_HAS_LSX + +static void copy_pad_fprs(struct task_struct *target, + const struct user_regset *regset, + struct membuf *to, unsigned int live_sz) +{ + int i, j; + unsigned long long fill = ~0ull; + unsigned int cp_sz, pad_sz; + + cp_sz = min(regset->size, live_sz); + pad_sz = regset->size - cp_sz; + WARN_ON(pad_sz % sizeof(fill)); + + for (i = 0; i < NUM_FPU_REGS; i++) { + membuf_write(to, &target->thread.fpu.fpr[i], cp_sz); + for (j = 0; j < (pad_sz / sizeof(fill)); j++) { + membuf_store(to, fill); + } + } +} + +static int simd_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + const unsigned int wr_size = NUM_FPU_REGS * regset->size; + + if (!tsk_used_math(target)) { + /* The task hasn't used FP or LSX, fill with 0xff */ + copy_pad_fprs(target, regset, &to, 0); + } else if (!test_tsk_thread_flag(target, TIF_LSX_CTX_LIVE)) { + /* Copy scalar FP context, fill the rest with 0xff */ + copy_pad_fprs(target, regset, &to, 8); +#ifdef CONFIG_CPU_HAS_LASX + } else if (!test_tsk_thread_flag(target, TIF_LASX_CTX_LIVE)) { + /* Copy LSX 128 Bit context, fill the rest with 0xff */ + copy_pad_fprs(target, regset, &to, 16); +#endif + } else if (sizeof(target->thread.fpu.fpr[0]) == regset->size) { + /* Trivially copy the vector registers */ + membuf_write(&to, &target->thread.fpu.fpr, wr_size); + } else { + /* Copy as much context as possible, fill the rest with 0xff */ + copy_pad_fprs(target, regset, &to, sizeof(target->thread.fpu.fpr[0])); + } + + return 0; +} + +static int simd_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + const unsigned int wr_size = NUM_FPU_REGS * regset->size; + unsigned int cp_sz; + int i, err, start; + + init_fp_ctx(target); + + if (sizeof(target->thread.fpu.fpr[0]) == regset->size) { + /* Trivially copy the vector registers */ + err = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.fpu.fpr, + 0, wr_size); + } else { + /* Copy as much context as possible */ + cp_sz = min_t(unsigned int, regset->size, + sizeof(target->thread.fpu.fpr[0])); + + i = start = err = 0; + for (; i < NUM_FPU_REGS; i++, start += regset->size) { + err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.fpu.fpr[i], + start, start + cp_sz); + } + } + + return err; +} + +#endif /* CONFIG_CPU_HAS_LSX */ + #ifdef CONFIG_HAVE_HW_BREAKPOINT /* @@ -708,6 +792,12 @@ enum loongarch_regset { REGSET_GPR, REGSET_FPR, REGSET_CPUCFG, +#ifdef CONFIG_CPU_HAS_LSX + REGSET_LSX, +#endif +#ifdef CONFIG_CPU_HAS_LASX + REGSET_LASX, +#endif #ifdef CONFIG_HAVE_HW_BREAKPOINT REGSET_HW_BREAK, REGSET_HW_WATCH, @@ -739,6 +829,26 @@ static const struct user_regset loongarch64_regsets[] = { .regset_get = cfg_get, .set = cfg_set, }, +#ifdef CONFIG_CPU_HAS_LSX + [REGSET_LSX] = { + .core_note_type = NT_LOONGARCH_LSX, + .n = NUM_FPU_REGS, + .size = 16, + .align = 16, + .regset_get = simd_get, + .set = simd_set, + }, +#endif +#ifdef CONFIG_CPU_HAS_LASX + [REGSET_LASX] = { + .core_note_type = NT_LOONGARCH_LASX, + .n = NUM_FPU_REGS, + .size = 32, + .align = 32, + .regset_get = simd_get, + .set = simd_set, + }, +#endif #ifdef CONFIG_HAVE_HW_BREAKPOINT [REGSET_HW_BREAK] = { .core_note_type = NT_LOONGARCH_HW_BREAK, diff --git a/arch/loongarch/kernel/signal.c b/arch/loongarch/kernel/signal.c index 8f5b7986374b..ceb899366c0a 100644 --- a/arch/loongarch/kernel/signal.c +++ b/arch/loongarch/kernel/signal.c @@ -50,6 +50,14 @@ extern asmlinkage int _save_fp_context(void __user *fpregs, void __user *fcc, void __user *csr); extern asmlinkage int _restore_fp_context(void __user *fpregs, void __user *fcc, void __user *csr); +extern asmlinkage int +_save_lsx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); +extern asmlinkage int +_restore_lsx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); +extern asmlinkage int +_save_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); +extern asmlinkage int +_restore_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); struct rt_sigframe { struct siginfo rs_info; @@ -65,6 +73,8 @@ struct extctx_layout { unsigned long size; unsigned int flags; struct _ctx_layout fpu; + struct _ctx_layout lsx; + struct _ctx_layout lasx; struct _ctx_layout end; }; @@ -115,6 +125,96 @@ static int copy_fpu_from_sigcontext(struct fpu_context __user *ctx) return err; } +static int copy_lsx_to_sigcontext(struct lsx_context __user *ctx) +{ + int i; + int err = 0; + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + for (i = 0; i < NUM_FPU_REGS; i++) { + err |= __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 0), + ®s[2*i]); + err |= __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 1), + ®s[2*i+1]); + } + err |= __put_user(current->thread.fpu.fcc, fcc); + err |= __put_user(current->thread.fpu.fcsr, fcsr); + + return err; +} + +static int copy_lsx_from_sigcontext(struct lsx_context __user *ctx) +{ + int i; + int err = 0; + u64 fpr_val; + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + for (i = 0; i < NUM_FPU_REGS; i++) { + err |= __get_user(fpr_val, ®s[2*i]); + set_fpr64(¤t->thread.fpu.fpr[i], 0, fpr_val); + err |= __get_user(fpr_val, ®s[2*i+1]); + set_fpr64(¤t->thread.fpu.fpr[i], 1, fpr_val); + } + err |= __get_user(current->thread.fpu.fcc, fcc); + err |= __get_user(current->thread.fpu.fcsr, fcsr); + + return err; +} + +static int copy_lasx_to_sigcontext(struct lasx_context __user *ctx) +{ + int i; + int err = 0; + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + for (i = 0; i < NUM_FPU_REGS; i++) { + err |= __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 0), + ®s[4*i]); + err |= __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 1), + ®s[4*i+1]); + err |= __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 2), + ®s[4*i+2]); + err |= __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 3), + ®s[4*i+3]); + } + err |= __put_user(current->thread.fpu.fcc, fcc); + err |= __put_user(current->thread.fpu.fcsr, fcsr); + + return err; +} + +static int copy_lasx_from_sigcontext(struct lasx_context __user *ctx) +{ + int i; + int err = 0; + u64 fpr_val; + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + for (i = 0; i < NUM_FPU_REGS; i++) { + err |= __get_user(fpr_val, ®s[4*i]); + set_fpr64(¤t->thread.fpu.fpr[i], 0, fpr_val); + err |= __get_user(fpr_val, ®s[4*i+1]); + set_fpr64(¤t->thread.fpu.fpr[i], 1, fpr_val); + err |= __get_user(fpr_val, ®s[4*i+2]); + set_fpr64(¤t->thread.fpu.fpr[i], 2, fpr_val); + err |= __get_user(fpr_val, ®s[4*i+3]); + set_fpr64(¤t->thread.fpu.fpr[i], 3, fpr_val); + } + err |= __get_user(current->thread.fpu.fcc, fcc); + err |= __get_user(current->thread.fpu.fcsr, fcsr); + + return err; +} + /* * Wrappers for the assembly _{save,restore}_fp_context functions. */ @@ -136,6 +236,42 @@ static int restore_hw_fpu_context(struct fpu_context __user *ctx) return _restore_fp_context(regs, fcc, fcsr); } +static int save_hw_lsx_context(struct lsx_context __user *ctx) +{ + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + return _save_lsx_context(regs, fcc, fcsr); +} + +static int restore_hw_lsx_context(struct lsx_context __user *ctx) +{ + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + return _restore_lsx_context(regs, fcc, fcsr); +} + +static int save_hw_lasx_context(struct lasx_context __user *ctx) +{ + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + return _save_lasx_context(regs, fcc, fcsr); +} + +static int restore_hw_lasx_context(struct lasx_context __user *ctx) +{ + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + return _restore_lasx_context(regs, fcc, fcsr); +} + static int fcsr_pending(unsigned int __user *fcsr) { int err, sig = 0; @@ -227,6 +363,162 @@ static int protected_restore_fpu_context(struct extctx_layout *extctx) return err ?: sig; } +static int protected_save_lsx_context(struct extctx_layout *extctx) +{ + int err = 0; + struct sctx_info __user *info = extctx->lsx.addr; + struct lsx_context __user *lsx_ctx = (struct lsx_context *)get_ctx_through_ctxinfo(info); + uint64_t __user *regs = (uint64_t *)&lsx_ctx->regs; + uint64_t __user *fcc = &lsx_ctx->fcc; + uint32_t __user *fcsr = &lsx_ctx->fcsr; + + while (1) { + lock_fpu_owner(); + if (is_lsx_enabled()) + err = save_hw_lsx_context(lsx_ctx); + else { + if (is_fpu_owner()) + save_fp(current); + err = copy_lsx_to_sigcontext(lsx_ctx); + } + unlock_fpu_owner(); + + err |= __put_user(LSX_CTX_MAGIC, &info->magic); + err |= __put_user(extctx->lsx.size, &info->size); + + if (likely(!err)) + break; + /* Touch the LSX context and try again */ + err = __put_user(0, ®s[0]) | + __put_user(0, ®s[32*2-1]) | + __put_user(0, fcc) | + __put_user(0, fcsr); + if (err) + return err; /* really bad sigcontext */ + } + + return err; +} + +static int protected_restore_lsx_context(struct extctx_layout *extctx) +{ + int err = 0, sig = 0, tmp __maybe_unused; + struct sctx_info __user *info = extctx->lsx.addr; + struct lsx_context __user *lsx_ctx = (struct lsx_context *)get_ctx_through_ctxinfo(info); + uint64_t __user *regs = (uint64_t *)&lsx_ctx->regs; + uint64_t __user *fcc = &lsx_ctx->fcc; + uint32_t __user *fcsr = &lsx_ctx->fcsr; + + err = sig = fcsr_pending(fcsr); + if (err < 0) + return err; + + while (1) { + lock_fpu_owner(); + if (is_lsx_enabled()) + err = restore_hw_lsx_context(lsx_ctx); + else { + err = copy_lsx_from_sigcontext(lsx_ctx); + if (is_fpu_owner()) + restore_fp(current); + } + unlock_fpu_owner(); + + if (likely(!err)) + break; + /* Touch the LSX context and try again */ + err = __get_user(tmp, ®s[0]) | + __get_user(tmp, ®s[32*2-1]) | + __get_user(tmp, fcc) | + __get_user(tmp, fcsr); + if (err) + break; /* really bad sigcontext */ + } + + return err ?: sig; +} + +static int protected_save_lasx_context(struct extctx_layout *extctx) +{ + int err = 0; + struct sctx_info __user *info = extctx->lasx.addr; + struct lasx_context __user *lasx_ctx = + (struct lasx_context *)get_ctx_through_ctxinfo(info); + uint64_t __user *regs = (uint64_t *)&lasx_ctx->regs; + uint64_t __user *fcc = &lasx_ctx->fcc; + uint32_t __user *fcsr = &lasx_ctx->fcsr; + + while (1) { + lock_fpu_owner(); + if (is_lasx_enabled()) + err = save_hw_lasx_context(lasx_ctx); + else { + if (is_lsx_enabled()) + save_lsx(current); + else if (is_fpu_owner()) + save_fp(current); + err = copy_lasx_to_sigcontext(lasx_ctx); + } + unlock_fpu_owner(); + + err |= __put_user(LASX_CTX_MAGIC, &info->magic); + err |= __put_user(extctx->lasx.size, &info->size); + + if (likely(!err)) + break; + /* Touch the LASX context and try again */ + err = __put_user(0, ®s[0]) | + __put_user(0, ®s[32*4-1]) | + __put_user(0, fcc) | + __put_user(0, fcsr); + if (err) + return err; /* really bad sigcontext */ + } + + return err; +} + +static int protected_restore_lasx_context(struct extctx_layout *extctx) +{ + int err = 0, sig = 0, tmp __maybe_unused; + struct sctx_info __user *info = extctx->lasx.addr; + struct lasx_context __user *lasx_ctx = + (struct lasx_context *)get_ctx_through_ctxinfo(info); + uint64_t __user *regs = (uint64_t *)&lasx_ctx->regs; + uint64_t __user *fcc = &lasx_ctx->fcc; + uint32_t __user *fcsr = &lasx_ctx->fcsr; + + err = sig = fcsr_pending(fcsr); + if (err < 0) + return err; + + while (1) { + lock_fpu_owner(); + if (is_lasx_enabled()) + err = restore_hw_lasx_context(lasx_ctx); + else { + err = copy_lasx_from_sigcontext(lasx_ctx); + if (is_lsx_enabled()) + restore_lsx(current); + else if (is_fpu_owner()) + restore_fp(current); + } + unlock_fpu_owner(); + + if (likely(!err)) + break; + /* Touch the LASX context and try again */ + err = __get_user(tmp, ®s[0]) | + __get_user(tmp, ®s[32*4-1]) | + __get_user(tmp, fcc) | + __get_user(tmp, fcsr); + if (err) + break; /* really bad sigcontext */ + } + + return err ?: sig; +} + static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, struct extctx_layout *extctx) { @@ -240,7 +532,11 @@ static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, for (i = 1; i < 32; i++) err |= __put_user(regs->regs[i], &sc->sc_regs[i]); - if (extctx->fpu.addr) + if (extctx->lasx.addr) + err |= protected_save_lasx_context(extctx); + else if (extctx->lsx.addr) + err |= protected_save_lsx_context(extctx); + else if (extctx->fpu.addr) err |= protected_save_fpu_context(extctx); /* Set the "end" magic */ @@ -274,6 +570,20 @@ static int parse_extcontext(struct sigcontext __user *sc, struct extctx_layout * extctx->fpu.addr = info; break; + case LSX_CTX_MAGIC: + if (size < (sizeof(struct sctx_info) + + sizeof(struct lsx_context))) + goto invalid; + extctx->lsx.addr = info; + break; + + case LASX_CTX_MAGIC: + if (size < (sizeof(struct sctx_info) + + sizeof(struct lasx_context))) + goto invalid; + extctx->lasx.addr = info; + break; + default: goto invalid; } @@ -319,7 +629,11 @@ static int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc for (i = 1; i < 32; i++) err |= __get_user(regs->regs[i], &sc->sc_regs[i]); - if (extctx.fpu.addr) + if (extctx.lasx.addr) + err |= protected_restore_lasx_context(&extctx); + else if (extctx.lsx.addr) + err |= protected_restore_lsx_context(&extctx); + else if (extctx.fpu.addr) err |= protected_restore_fpu_context(&extctx); bad: @@ -375,7 +689,13 @@ static unsigned long setup_extcontext(struct extctx_layout *extctx, unsigned lon extctx->size += extctx->end.size; if (extctx->flags & SC_USED_FP) { - if (cpu_has_fpu) + if (cpu_has_lasx && thread_lasx_context_live()) + new_sp = extframe_alloc(extctx, &extctx->lasx, + sizeof(struct lasx_context), LASX_CTX_ALIGN, new_sp); + else if (cpu_has_lsx && thread_lsx_context_live()) + new_sp = extframe_alloc(extctx, &extctx->lsx, + sizeof(struct lsx_context), LSX_CTX_ALIGN, new_sp); + else if (cpu_has_fpu) new_sp = extframe_alloc(extctx, &extctx->fpu, sizeof(struct fpu_context), FPU_CTX_ALIGN, new_sp); } diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index e73d9bbe1658..e56df45f7202 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -852,12 +852,67 @@ static void init_restore_fp(void) BUG_ON(!is_fp_enabled()); } +static void init_restore_lsx(void) +{ + enable_lsx(); + + if (!thread_lsx_context_live()) { + /* First time LSX context user */ + init_restore_fp(); + init_lsx_upper(); + set_thread_flag(TIF_LSX_CTX_LIVE); + } else { + if (!is_simd_owner()) { + if (is_fpu_owner()) { + restore_lsx_upper(current); + } else { + __own_fpu(); + restore_lsx(current); + } + } + } + + set_thread_flag(TIF_USEDSIMD); + + BUG_ON(!is_fp_enabled()); + BUG_ON(!is_lsx_enabled()); +} + +static void init_restore_lasx(void) +{ + enable_lasx(); + + if (!thread_lasx_context_live()) { + /* First time LASX context user */ + init_restore_lsx(); + init_lasx_upper(); + set_thread_flag(TIF_LASX_CTX_LIVE); + } else { + if (is_fpu_owner() || is_simd_owner()) { + init_restore_lsx(); + restore_lasx_upper(current); + } else { + __own_fpu(); + enable_lsx(); + restore_lasx(current); + } + } + + set_thread_flag(TIF_USEDSIMD); + + BUG_ON(!is_fp_enabled()); + BUG_ON(!is_lsx_enabled()); + BUG_ON(!is_lasx_enabled()); +} + asmlinkage void noinstr do_fpu(struct pt_regs *regs) { irqentry_state_t state = irqentry_enter(regs); local_irq_enable(); die_if_kernel("do_fpu invoked from kernel context!", regs); + BUG_ON(is_lsx_enabled()); + BUG_ON(is_lasx_enabled()); preempt_disable(); init_restore_fp(); @@ -872,9 +927,20 @@ asmlinkage void noinstr do_lsx(struct pt_regs *regs) irqentry_state_t state = irqentry_enter(regs); local_irq_enable(); - force_sig(SIGILL); - local_irq_disable(); + if (!cpu_has_lsx) { + force_sig(SIGILL); + goto out; + } + + die_if_kernel("do_lsx invoked from kernel context!", regs); + BUG_ON(is_lasx_enabled()); + preempt_disable(); + init_restore_lsx(); + preempt_enable(); + +out: + local_irq_disable(); irqentry_exit(regs, state); } @@ -883,9 +949,19 @@ asmlinkage void noinstr do_lasx(struct pt_regs *regs) irqentry_state_t state = irqentry_enter(regs); local_irq_enable(); - force_sig(SIGILL); - local_irq_disable(); + if (!cpu_has_lasx) { + force_sig(SIGILL); + goto out; + } + + die_if_kernel("do_lasx invoked from kernel context!", regs); + preempt_disable(); + init_restore_lasx(); + preempt_enable(); + +out: + local_irq_disable(); irqentry_exit(regs, state); } -- cgit v1.2.3-58-ga151 From f6f0c9a74a48448583c3cb0f3f067bc3fe0f13c6 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 29 Jun 2023 20:58:43 +0800 Subject: LoongArch: Add SMT (Simultaneous Multi-Threading) support Loongson-3A6000 has SMT (Simultaneous Multi-Threading) support, each physical core has two logical cores (threads). This patch add SMT probe and scheduler support via ACPI PPTT. If SCHED_SMT enabled, Loongson-3A6000 is treated as 4 cores, 8 threads; If SCHED_SMT disabled, Loongson-3A6000 is treated as 8 cores, 8 threads. Remove smp_num_siblings to support HMP (Heterogeneous Multi-Processing). Signed-off-by: Liupu Wang Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 8 ++++++++ arch/loongarch/include/asm/acpi.h | 9 +++++++++ arch/loongarch/include/asm/cpu-info.h | 1 + arch/loongarch/kernel/acpi.c | 32 ++++++++++++++++++++++++++++++++ arch/loongarch/kernel/proc.c | 1 + arch/loongarch/kernel/smp.c | 24 ++++++++++-------------- drivers/acpi/Kconfig | 2 +- 7 files changed, 62 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 72b614429c37..e06315b706b8 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -5,6 +5,7 @@ config LOONGARCH select ACPI select ACPI_GENERIC_GSI if ACPI select ACPI_MCFG if ACPI + select ACPI_PPTT if ACPI select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI select ARCH_BINFMT_ELF_STATE select ARCH_ENABLE_MEMORY_HOTPLUG @@ -376,6 +377,13 @@ config EFI_STUB This kernel feature allows the kernel to be loaded directly by EFI firmware without the use of a bootloader. +config SCHED_SMT + bool "SMT scheduler support" + default y + help + Improves scheduler's performance when there are multiple + threads in one physical core. + config SMP bool "Multi-Processing support" help diff --git a/arch/loongarch/include/asm/acpi.h b/arch/loongarch/include/asm/acpi.h index 976a810352c6..5c78b5d2bfb7 100644 --- a/arch/loongarch/include/asm/acpi.h +++ b/arch/loongarch/include/asm/acpi.h @@ -13,6 +13,7 @@ extern int acpi_strict; extern int acpi_disabled; extern int acpi_pci_disabled; extern int acpi_noirq; +extern int pptt_enabled; #define acpi_os_ioremap acpi_os_ioremap void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size); @@ -30,6 +31,14 @@ static inline bool acpi_has_cpu_in_madt(void) } extern struct list_head acpi_wakeup_device_list; +extern struct acpi_madt_core_pic acpi_core_pic[NR_CPUS]; + +extern int __init parse_acpi_topology(void); + +static inline u32 get_acpi_id_for_cpu(unsigned int cpu) +{ + return acpi_core_pic[cpu_logical_map(cpu)].processor_id; +} #endif /* !CONFIG_ACPI */ diff --git a/arch/loongarch/include/asm/cpu-info.h b/arch/loongarch/include/asm/cpu-info.h index cd73a6f57fe3..900589cb159d 100644 --- a/arch/loongarch/include/asm/cpu-info.h +++ b/arch/loongarch/include/asm/cpu-info.h @@ -54,6 +54,7 @@ struct cpuinfo_loongarch { struct cache_desc cache_leaves[CACHE_LEAVES_MAX]; int core; /* physical core number in package */ int package;/* physical package number */ + int global_id; /* physical global thread number */ int vabits; /* Virtual Address size in bits */ int pabits; /* Physical Address size in bits */ unsigned int ksave_mask; /* Usable KSave mask. */ diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index 98f431157e4c..9450e09073eb 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -33,6 +33,8 @@ u64 acpi_saved_sp; #define PREFIX "ACPI: " +struct acpi_madt_core_pic acpi_core_pic[NR_CPUS]; + void __init __iomem * __acpi_map_table(unsigned long phys, unsigned long size) { @@ -99,6 +101,7 @@ acpi_parse_processor(union acpi_subtable_headers *header, const unsigned long en acpi_table_print_madt_entry(&header->common); #ifdef CONFIG_SMP + acpi_core_pic[processor->core_id] = *processor; set_processor_mask(processor->core_id, processor->flags); #endif @@ -140,6 +143,35 @@ static void __init acpi_process_madt(void) loongson_sysconf.nr_cpus = num_processors; } +int pptt_enabled; + +int __init parse_acpi_topology(void) +{ + int cpu, topology_id; + + for_each_possible_cpu(cpu) { + topology_id = find_acpi_cpu_topology(cpu, 0); + if (topology_id < 0) { + pr_warn("Invalid BIOS PPTT\n"); + return -ENOENT; + } + + if (acpi_pptt_cpu_is_thread(cpu) <= 0) + cpu_data[cpu].core = topology_id; + else { + topology_id = find_acpi_cpu_topology(cpu, 1); + if (topology_id < 0) + return -ENOENT; + + cpu_data[cpu].core = topology_id; + } + } + + pptt_enabled = 1; + + return 0; +} + #ifndef CONFIG_SUSPEND int (*acpi_suspend_lowlevel)(void); #else diff --git a/arch/loongarch/kernel/proc.c b/arch/loongarch/kernel/proc.c index 0d82907b5404..d4b270630bb5 100644 --- a/arch/loongarch/kernel/proc.c +++ b/arch/loongarch/kernel/proc.c @@ -49,6 +49,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "processor\t\t: %ld\n", n); seq_printf(m, "package\t\t\t: %d\n", cpu_data[n].package); seq_printf(m, "core\t\t\t: %d\n", cpu_data[n].core); + seq_printf(m, "global_id\t\t: %d\n", cpu_data[n].global_id); seq_printf(m, "CPU Family\t\t: %s\n", __cpu_family[n]); seq_printf(m, "Model Name\t\t: %s\n", __cpu_full_name[n]); seq_printf(m, "CPU Revision\t\t: 0x%02x\n", version); diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index a858a468f746..255967ff8c36 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -8,6 +8,7 @@ * Copyright (C) 2000, 2001 Silicon Graphics, Inc. * Copyright (C) 2000, 2001, 2003 Broadcom Corporation */ +#include #include #include #include @@ -37,10 +38,6 @@ EXPORT_SYMBOL(__cpu_number_map); int __cpu_logical_map[NR_CPUS]; /* Map logical to physical */ EXPORT_SYMBOL(__cpu_logical_map); -/* Number of threads (siblings) per CPU core */ -int smp_num_siblings = 1; -EXPORT_SYMBOL(smp_num_siblings); - /* Representing the threads (siblings) of each logical CPU */ cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_sibling_map); @@ -229,9 +226,12 @@ void __init loongson_prepare_cpus(unsigned int max_cpus) { int i = 0; + parse_acpi_topology(); + for (i = 0; i < loongson_sysconf.nr_cpus; i++) { set_cpu_present(i, true); csr_mail_send(0, __cpu_logical_map[i], 0); + cpu_data[i].global_id = __cpu_logical_map[i]; } per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; @@ -272,10 +272,10 @@ void loongson_init_secondary(void) numa_add_cpu(cpu); #endif per_cpu(cpu_state, cpu) = CPU_ONLINE; - cpu_data[cpu].core = - cpu_logical_map(cpu) % loongson_sysconf.cores_per_package; cpu_data[cpu].package = cpu_logical_map(cpu) / loongson_sysconf.cores_per_package; + cpu_data[cpu].core = pptt_enabled ? cpu_data[cpu].core : + cpu_logical_map(cpu) % loongson_sysconf.cores_per_package; } void loongson_smp_finish(void) @@ -381,14 +381,10 @@ static inline void set_cpu_sibling_map(int cpu) cpumask_set_cpu(cpu, &cpu_sibling_setup_map); - if (smp_num_siblings <= 1) - cpumask_set_cpu(cpu, &cpu_sibling_map[cpu]); - else { - for_each_cpu(i, &cpu_sibling_setup_map) { - if (cpus_are_siblings(cpu, i)) { - cpumask_set_cpu(i, &cpu_sibling_map[cpu]); - cpumask_set_cpu(cpu, &cpu_sibling_map[i]); - } + for_each_cpu(i, &cpu_sibling_setup_map) { + if (cpus_are_siblings(cpu, i)) { + cpumask_set_cpu(i, &cpu_sibling_map[cpu]); + cpumask_set_cpu(cpu, &cpu_sibling_map[i]); } } } diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index ccbeab9500ec..00dd309b6682 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -542,10 +542,10 @@ config ACPI_PFRUT if ARM64 source "drivers/acpi/arm64/Kconfig" +endif config ACPI_PPTT bool -endif config ACPI_PCC bool "ACPI PCC Address Space" -- cgit v1.2.3-58-ga151 From e031a5f3f1eddb961a6ded8a21ab8189d8760860 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 29 Jun 2023 20:58:44 +0800 Subject: LoongArch: Support dbar with different hints Traditionally, LoongArch uses "dbar 0" (full completion barrier) for everything. But the full completion barrier is a performance killer, so Loongson-3A6000 and newer processors have made finer granularity hints available: Bit4: ordering or completion (0: completion, 1: ordering) Bit3: barrier for previous read (0: true, 1: false) Bit2: barrier for previous write (0: true, 1: false) Bit1: barrier for succeeding read (0: true, 1: false) Bit0: barrier for succeeding write (0: true, 1: false) Hint 0x700: barrier for "read after read" from the same address, which is needed by LL-SC loops on old models (dbar 0x700 behaves the same as nop if such reordering is disabled on new models). This patch makes use of the various new hints for different kinds of memory barriers. It brings performance improvements on Loongson-3A6000 series, while not affecting the existing models because all variants are treated as 'dbar 0' there. Why override queued_spin_unlock()? After commit 01e3b958efe85a26d9b ("drivers: Remove explicit invocations of mmiowb()") we need a completion barrier in queued_spin_unlock(), but the generic implementation use smp_store_release() which only provide an ordering barrier. Signed-off-by: Jun Yi Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/Kbuild | 1 - arch/loongarch/include/asm/barrier.h | 130 ++++++++++++++------------------- arch/loongarch/include/asm/io.h | 2 +- arch/loongarch/include/asm/qspinlock.h | 18 +++++ arch/loongarch/kernel/smp.c | 2 +- arch/loongarch/mm/tlbex.S | 6 +- 6 files changed, 78 insertions(+), 81 deletions(-) create mode 100644 arch/loongarch/include/asm/qspinlock.h (limited to 'arch') diff --git a/arch/loongarch/include/asm/Kbuild b/arch/loongarch/include/asm/Kbuild index 77ad8e6f0906..6b222f227342 100644 --- a/arch/loongarch/include/asm/Kbuild +++ b/arch/loongarch/include/asm/Kbuild @@ -5,7 +5,6 @@ generic-y += mcs_spinlock.h generic-y += parport.h generic-y += early_ioremap.h generic-y += qrwlock.h -generic-y += qspinlock.h generic-y += rwsem.h generic-y += segment.h generic-y += user.h diff --git a/arch/loongarch/include/asm/barrier.h b/arch/loongarch/include/asm/barrier.h index cda977675854..4b663f197706 100644 --- a/arch/loongarch/include/asm/barrier.h +++ b/arch/loongarch/include/asm/barrier.h @@ -5,27 +5,56 @@ #ifndef __ASM_BARRIER_H #define __ASM_BARRIER_H -#define __sync() __asm__ __volatile__("dbar 0" : : : "memory") +/* + * Hint encoding: + * + * Bit4: ordering or completion (0: completion, 1: ordering) + * Bit3: barrier for previous read (0: true, 1: false) + * Bit2: barrier for previous write (0: true, 1: false) + * Bit1: barrier for succeeding read (0: true, 1: false) + * Bit0: barrier for succeeding write (0: true, 1: false) + * + * Hint 0x700: barrier for "read after read" from the same address + */ + +#define DBAR(hint) __asm__ __volatile__("dbar %0 " : : "I"(hint) : "memory") + +#define crwrw 0b00000 +#define cr_r_ 0b00101 +#define c_w_w 0b01010 -#define fast_wmb() __sync() -#define fast_rmb() __sync() -#define fast_mb() __sync() -#define fast_iob() __sync() -#define wbflush() __sync() +#define orwrw 0b10000 +#define or_r_ 0b10101 +#define o_w_w 0b11010 -#define wmb() fast_wmb() -#define rmb() fast_rmb() -#define mb() fast_mb() -#define iob() fast_iob() +#define orw_w 0b10010 +#define or_rw 0b10100 -#define __smp_mb() __asm__ __volatile__("dbar 0" : : : "memory") -#define __smp_rmb() __asm__ __volatile__("dbar 0" : : : "memory") -#define __smp_wmb() __asm__ __volatile__("dbar 0" : : : "memory") +#define c_sync() DBAR(crwrw) +#define c_rsync() DBAR(cr_r_) +#define c_wsync() DBAR(c_w_w) + +#define o_sync() DBAR(orwrw) +#define o_rsync() DBAR(or_r_) +#define o_wsync() DBAR(o_w_w) + +#define ldacq_mb() DBAR(or_rw) +#define strel_mb() DBAR(orw_w) + +#define mb() c_sync() +#define rmb() c_rsync() +#define wmb() c_wsync() +#define iob() c_sync() +#define wbflush() c_sync() + +#define __smp_mb() o_sync() +#define __smp_rmb() o_rsync() +#define __smp_wmb() o_wsync() #ifdef CONFIG_SMP -#define __WEAK_LLSC_MB " dbar 0 \n" +#define __WEAK_LLSC_MB " dbar 0x700 \n" #else -#define __WEAK_LLSC_MB " \n" +#define __WEAK_LLSC_MB " \n" #endif #define __smp_mb__before_atomic() barrier() @@ -59,68 +88,19 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, return mask; } -#define __smp_load_acquire(p) \ -({ \ - union { typeof(*p) __val; char __c[1]; } __u; \ - unsigned long __tmp = 0; \ - compiletime_assert_atomic_type(*p); \ - switch (sizeof(*p)) { \ - case 1: \ - *(__u8 *)__u.__c = *(volatile __u8 *)p; \ - __smp_mb(); \ - break; \ - case 2: \ - *(__u16 *)__u.__c = *(volatile __u16 *)p; \ - __smp_mb(); \ - break; \ - case 4: \ - __asm__ __volatile__( \ - "amor_db.w %[val], %[tmp], %[mem] \n" \ - : [val] "=&r" (*(__u32 *)__u.__c) \ - : [mem] "ZB" (*(u32 *) p), [tmp] "r" (__tmp) \ - : "memory"); \ - break; \ - case 8: \ - __asm__ __volatile__( \ - "amor_db.d %[val], %[tmp], %[mem] \n" \ - : [val] "=&r" (*(__u64 *)__u.__c) \ - : [mem] "ZB" (*(u64 *) p), [tmp] "r" (__tmp) \ - : "memory"); \ - break; \ - } \ - (typeof(*p))__u.__val; \ +#define __smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = READ_ONCE(*p); \ + compiletime_assert_atomic_type(*p); \ + ldacq_mb(); \ + ___p1; \ }) -#define __smp_store_release(p, v) \ -do { \ - union { typeof(*p) __val; char __c[1]; } __u = \ - { .__val = (__force typeof(*p)) (v) }; \ - unsigned long __tmp; \ - compiletime_assert_atomic_type(*p); \ - switch (sizeof(*p)) { \ - case 1: \ - __smp_mb(); \ - *(volatile __u8 *)p = *(__u8 *)__u.__c; \ - break; \ - case 2: \ - __smp_mb(); \ - *(volatile __u16 *)p = *(__u16 *)__u.__c; \ - break; \ - case 4: \ - __asm__ __volatile__( \ - "amswap_db.w %[tmp], %[val], %[mem] \n" \ - : [mem] "+ZB" (*(u32 *)p), [tmp] "=&r" (__tmp) \ - : [val] "r" (*(__u32 *)__u.__c) \ - : ); \ - break; \ - case 8: \ - __asm__ __volatile__( \ - "amswap_db.d %[tmp], %[val], %[mem] \n" \ - : [mem] "+ZB" (*(u64 *)p), [tmp] "=&r" (__tmp) \ - : [val] "r" (*(__u64 *)__u.__c) \ - : ); \ - break; \ - } \ +#define __smp_store_release(p, v) \ +do { \ + compiletime_assert_atomic_type(*p); \ + strel_mb(); \ + WRITE_ONCE(*p, v); \ } while (0) #define __smp_store_mb(p, v) \ diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h index 545e2708fbf7..1c9410220040 100644 --- a/arch/loongarch/include/asm/io.h +++ b/arch/loongarch/include/asm/io.h @@ -62,7 +62,7 @@ extern pgprot_t pgprot_wc; #define ioremap_cache(offset, size) \ ioremap_prot((offset), (size), pgprot_val(PAGE_KERNEL)) -#define mmiowb() asm volatile ("dbar 0" ::: "memory") +#define mmiowb() wmb() /* * String version of I/O memory access operations. diff --git a/arch/loongarch/include/asm/qspinlock.h b/arch/loongarch/include/asm/qspinlock.h new file mode 100644 index 000000000000..34f43f8ad591 --- /dev/null +++ b/arch/loongarch/include/asm/qspinlock.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_QSPINLOCK_H +#define _ASM_QSPINLOCK_H + +#include + +#define queued_spin_unlock queued_spin_unlock + +static inline void queued_spin_unlock(struct qspinlock *lock) +{ + compiletime_assert_atomic_type(lock->locked); + c_sync(); + WRITE_ONCE(lock->locked, 0); +} + +#include + +#endif /* _ASM_QSPINLOCK_H */ diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 255967ff8c36..8ea1bbcf13a7 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -115,7 +115,7 @@ static u32 ipi_read_clear(int cpu) action = iocsr_read32(LOONGARCH_IOCSR_IPI_STATUS); /* Clear the ipi register to clear the interrupt */ iocsr_write32(action, LOONGARCH_IOCSR_IPI_CLEAR); - smp_mb(); + wbflush(); return action; } diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S index 244e2f5aeee5..240ced55586e 100644 --- a/arch/loongarch/mm/tlbex.S +++ b/arch/loongarch/mm/tlbex.S @@ -184,7 +184,7 @@ tlb_huge_update_load: ertn nopage_tlb_load: - dbar 0 + dbar 0x700 csrrd ra, EXCEPTION_KS2 la_abs t0, tlb_do_page_fault_0 jr t0 @@ -333,7 +333,7 @@ tlb_huge_update_store: ertn nopage_tlb_store: - dbar 0 + dbar 0x700 csrrd ra, EXCEPTION_KS2 la_abs t0, tlb_do_page_fault_1 jr t0 @@ -480,7 +480,7 @@ tlb_huge_update_modify: ertn nopage_tlb_modify: - dbar 0 + dbar 0x700 csrrd ra, EXCEPTION_KS2 la_abs t0, tlb_do_page_fault_1 jr t0 -- cgit v1.2.3-58-ga151 From 01158487af60cd3915e8c31924144caf29cb0767 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 29 Jun 2023 20:58:44 +0800 Subject: LoongArch: Introduce hardware page table walker Loongson-3A6000 and newer processors have hardware page table walker (PTW) support. PTW can handle all fastpaths of TLBI/TLBL/TLBS/TLBM exceptions by hardware, software only need to handle slowpaths (page faults). BTW, PTW doesn't append _PAGE_MODIFIED for page table entries, so we change pmd_dirty() and pte_dirty() to also check _PAGE_DIRTY for the "dirty" attribute. Signed-off-by: Liang Gao Signed-off-by: Jun Yi Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/cpu-features.h | 2 +- arch/loongarch/include/asm/cpu.h | 2 ++ arch/loongarch/include/asm/loongarch.h | 4 ++++ arch/loongarch/include/asm/pgtable.h | 4 ++-- arch/loongarch/include/asm/tlb.h | 3 +++ arch/loongarch/include/uapi/asm/hwcap.h | 1 + arch/loongarch/kernel/cpu-probe.c | 4 ++++ arch/loongarch/kernel/proc.c | 1 + arch/loongarch/mm/tlb.c | 21 +++++++++++++++++---- arch/loongarch/mm/tlbex.S | 21 +++++++++++++++++++++ 10 files changed, 56 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/loongarch/include/asm/cpu-features.h b/arch/loongarch/include/asm/cpu-features.h index f6177f133477..2eafe6a6aca8 100644 --- a/arch/loongarch/include/asm/cpu-features.h +++ b/arch/loongarch/include/asm/cpu-features.h @@ -64,6 +64,6 @@ #define cpu_has_eiodecode cpu_opt(LOONGARCH_CPU_EIODECODE) #define cpu_has_guestid cpu_opt(LOONGARCH_CPU_GUESTID) #define cpu_has_hypervisor cpu_opt(LOONGARCH_CPU_HYPERVISOR) - +#define cpu_has_ptw cpu_opt(LOONGARCH_CPU_PTW) #endif /* __ASM_CPU_FEATURES_H */ diff --git a/arch/loongarch/include/asm/cpu.h b/arch/loongarch/include/asm/cpu.h index 88773d849e33..48b9f7168bcc 100644 --- a/arch/loongarch/include/asm/cpu.h +++ b/arch/loongarch/include/asm/cpu.h @@ -98,6 +98,7 @@ enum cpu_type_enum { #define CPU_FEATURE_EIODECODE 23 /* CPU has EXTIOI interrupt pin decode mode */ #define CPU_FEATURE_GUESTID 24 /* CPU has GuestID feature */ #define CPU_FEATURE_HYPERVISOR 25 /* CPU has hypervisor (running in VM) */ +#define CPU_FEATURE_PTW 26 /* CPU has hardware page table walker */ #define LOONGARCH_CPU_CPUCFG BIT_ULL(CPU_FEATURE_CPUCFG) #define LOONGARCH_CPU_LAM BIT_ULL(CPU_FEATURE_LAM) @@ -125,5 +126,6 @@ enum cpu_type_enum { #define LOONGARCH_CPU_EIODECODE BIT_ULL(CPU_FEATURE_EIODECODE) #define LOONGARCH_CPU_GUESTID BIT_ULL(CPU_FEATURE_GUESTID) #define LOONGARCH_CPU_HYPERVISOR BIT_ULL(CPU_FEATURE_HYPERVISOR) +#define LOONGARCH_CPU_PTW BIT_ULL(CPU_FEATURE_PTW) #endif /* _ASM_CPU_H */ diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index 08c77d065a11..1ab1ed28d770 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -135,6 +135,7 @@ __asm__(".macro parse_r var r\n\t" #define CPUCFG2_MIPSBT BIT(20) #define CPUCFG2_LSPW BIT(21) #define CPUCFG2_LAM BIT(22) +#define CPUCFG2_PTW BIT(24) #define LOONGARCH_CPUCFG3 0x3 #define CPUCFG3_CCDMA BIT(0) @@ -412,6 +413,9 @@ __asm__(".macro parse_r var r\n\t" #define CSR_PWCTL0_PTBASE (_ULCAST_(0x1f) << CSR_PWCTL0_PTBASE_SHIFT) #define LOONGARCH_CSR_PWCTL1 0x1d /* PWCtl1 */ +#define CSR_PWCTL1_PTW_SHIFT 24 +#define CSR_PWCTL1_PTW_WIDTH 1 +#define CSR_PWCTL1_PTW (_ULCAST_(0x1) << CSR_PWCTL1_PTW_SHIFT) #define CSR_PWCTL1_DIR3WIDTH_SHIFT 18 #define CSR_PWCTL1_DIR3WIDTH_WIDTH 5 #define CSR_PWCTL1_DIR3WIDTH (_ULCAST_(0x1f) << CSR_PWCTL1_DIR3WIDTH_SHIFT) diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index 9a9f9ff9b709..38afeb7dd58b 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -362,7 +362,7 @@ extern pgd_t invalid_pg_dir[]; */ static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } -static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_MODIFIED; } +static inline int pte_dirty(pte_t pte) { return pte_val(pte) & (_PAGE_DIRTY | _PAGE_MODIFIED); } static inline pte_t pte_mkold(pte_t pte) { @@ -506,7 +506,7 @@ static inline pmd_t pmd_wrprotect(pmd_t pmd) static inline int pmd_dirty(pmd_t pmd) { - return !!(pmd_val(pmd) & _PAGE_MODIFIED); + return !!(pmd_val(pmd) & (_PAGE_DIRTY | _PAGE_MODIFIED)); } static inline pmd_t pmd_mkclean(pmd_t pmd) diff --git a/arch/loongarch/include/asm/tlb.h b/arch/loongarch/include/asm/tlb.h index 0ad53f1ad25d..da7a3b5b9374 100644 --- a/arch/loongarch/include/asm/tlb.h +++ b/arch/loongarch/include/asm/tlb.h @@ -158,6 +158,9 @@ extern void handle_tlb_store(void); extern void handle_tlb_modify(void); extern void handle_tlb_refill(void); extern void handle_tlb_protect(void); +extern void handle_tlb_load_ptw(void); +extern void handle_tlb_store_ptw(void); +extern void handle_tlb_modify_ptw(void); extern void dump_tlb_all(void); extern void dump_tlb_regs(void); diff --git a/arch/loongarch/include/uapi/asm/hwcap.h b/arch/loongarch/include/uapi/asm/hwcap.h index 8840b72fa8e8..6955a7cb2c65 100644 --- a/arch/loongarch/include/uapi/asm/hwcap.h +++ b/arch/loongarch/include/uapi/asm/hwcap.h @@ -16,5 +16,6 @@ #define HWCAP_LOONGARCH_LBT_X86 (1 << 10) #define HWCAP_LOONGARCH_LBT_ARM (1 << 11) #define HWCAP_LOONGARCH_LBT_MIPS (1 << 12) +#define HWCAP_LOONGARCH_PTW (1 << 13) #endif /* _UAPI_ASM_HWCAP_H */ diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c index f42acc6c8df6..e925579c7a71 100644 --- a/arch/loongarch/kernel/cpu-probe.c +++ b/arch/loongarch/kernel/cpu-probe.c @@ -136,6 +136,10 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c) c->options |= LOONGARCH_CPU_CRYPTO; elf_hwcap |= HWCAP_LOONGARCH_CRYPTO; } + if (config & CPUCFG2_PTW) { + c->options |= LOONGARCH_CPU_PTW; + elf_hwcap |= HWCAP_LOONGARCH_PTW; + } if (config & CPUCFG2_LVZP) { c->options |= LOONGARCH_CPU_LVZ; elf_hwcap |= HWCAP_LOONGARCH_LVZ; diff --git a/arch/loongarch/kernel/proc.c b/arch/loongarch/kernel/proc.c index d4b270630bb5..0d33cbc47e51 100644 --- a/arch/loongarch/kernel/proc.c +++ b/arch/loongarch/kernel/proc.c @@ -80,6 +80,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) if (cpu_has_crc32) seq_printf(m, " crc32"); if (cpu_has_complex) seq_printf(m, " complex"); if (cpu_has_crypto) seq_printf(m, " crypto"); + if (cpu_has_ptw) seq_printf(m, " ptw"); if (cpu_has_lvz) seq_printf(m, " lvz"); if (cpu_has_lbt_x86) seq_printf(m, " lbt_x86"); if (cpu_has_lbt_arm) seq_printf(m, " lbt_arm"); diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c index 8bad6b0cff59..00bb563e3c89 100644 --- a/arch/loongarch/mm/tlb.c +++ b/arch/loongarch/mm/tlb.c @@ -167,6 +167,9 @@ void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep int idx; unsigned long flags; + if (cpu_has_ptw) + return; + /* * Handle debugger faulting in for debugee. */ @@ -222,6 +225,9 @@ static void setup_ptwalker(void) pwctl0 = pte_i | pte_w << 5 | pmd_i << 10 | pmd_w << 15 | pud_i << 20 | pud_w << 25; pwctl1 = pgd_i | pgd_w << 6; + if (cpu_has_ptw) + pwctl1 |= CSR_PWCTL1_PTW; + csr_write64(pwctl0, LOONGARCH_CSR_PWCTL0); csr_write64(pwctl1, LOONGARCH_CSR_PWCTL1); csr_write64((long)swapper_pg_dir, LOONGARCH_CSR_PGDH); @@ -264,10 +270,17 @@ void setup_tlb_handler(int cpu) if (cpu == 0) { memcpy((void *)tlbrentry, handle_tlb_refill, 0x80); local_flush_icache_range(tlbrentry, tlbrentry + 0x80); - set_handler(EXCCODE_TLBI * VECSIZE, handle_tlb_load, VECSIZE); - set_handler(EXCCODE_TLBL * VECSIZE, handle_tlb_load, VECSIZE); - set_handler(EXCCODE_TLBS * VECSIZE, handle_tlb_store, VECSIZE); - set_handler(EXCCODE_TLBM * VECSIZE, handle_tlb_modify, VECSIZE); + if (!cpu_has_ptw) { + set_handler(EXCCODE_TLBI * VECSIZE, handle_tlb_load, VECSIZE); + set_handler(EXCCODE_TLBL * VECSIZE, handle_tlb_load, VECSIZE); + set_handler(EXCCODE_TLBS * VECSIZE, handle_tlb_store, VECSIZE); + set_handler(EXCCODE_TLBM * VECSIZE, handle_tlb_modify, VECSIZE); + } else { + set_handler(EXCCODE_TLBI * VECSIZE, handle_tlb_load_ptw, VECSIZE); + set_handler(EXCCODE_TLBL * VECSIZE, handle_tlb_load_ptw, VECSIZE); + set_handler(EXCCODE_TLBS * VECSIZE, handle_tlb_store_ptw, VECSIZE); + set_handler(EXCCODE_TLBM * VECSIZE, handle_tlb_modify_ptw, VECSIZE); + } set_handler(EXCCODE_TLBNR * VECSIZE, handle_tlb_protect, VECSIZE); set_handler(EXCCODE_TLBNX * VECSIZE, handle_tlb_protect, VECSIZE); set_handler(EXCCODE_TLBPE * VECSIZE, handle_tlb_protect, VECSIZE); diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S index 240ced55586e..4ad78703de6f 100644 --- a/arch/loongarch/mm/tlbex.S +++ b/arch/loongarch/mm/tlbex.S @@ -190,6 +190,13 @@ nopage_tlb_load: jr t0 SYM_FUNC_END(handle_tlb_load) +SYM_FUNC_START(handle_tlb_load_ptw) + csrwr t0, LOONGARCH_CSR_KS0 + csrwr t1, LOONGARCH_CSR_KS1 + la_abs t0, tlb_do_page_fault_0 + jr t0 +SYM_FUNC_END(handle_tlb_load_ptw) + SYM_FUNC_START(handle_tlb_store) csrwr t0, EXCEPTION_KS0 csrwr t1, EXCEPTION_KS1 @@ -339,6 +346,13 @@ nopage_tlb_store: jr t0 SYM_FUNC_END(handle_tlb_store) +SYM_FUNC_START(handle_tlb_store_ptw) + csrwr t0, LOONGARCH_CSR_KS0 + csrwr t1, LOONGARCH_CSR_KS1 + la_abs t0, tlb_do_page_fault_1 + jr t0 +SYM_FUNC_END(handle_tlb_store_ptw) + SYM_FUNC_START(handle_tlb_modify) csrwr t0, EXCEPTION_KS0 csrwr t1, EXCEPTION_KS1 @@ -486,6 +500,13 @@ nopage_tlb_modify: jr t0 SYM_FUNC_END(handle_tlb_modify) +SYM_FUNC_START(handle_tlb_modify_ptw) + csrwr t0, LOONGARCH_CSR_KS0 + csrwr t1, LOONGARCH_CSR_KS1 + la_abs t0, tlb_do_page_fault_1 + jr t0 +SYM_FUNC_END(handle_tlb_modify_ptw) + SYM_FUNC_START(handle_tlb_refill) csrwr t0, LOONGARCH_CSR_TLBRSAVE csrrd t0, LOONGARCH_CSR_PGD -- cgit v1.2.3-58-ga151 From 31f1a8b0ec66cf21d83807243c3a54469a7018c3 Mon Sep 17 00:00:00 2001 From: Yinbo Zhu Date: Thu, 29 Jun 2023 20:58:44 +0800 Subject: LoongArch: Export some arch-specific pm interfaces Some PMC (Power Management Controllers) need to support DTS and will use the suspend interfaces thus this patch was to export such interfaces for their use. Signed-off-by: Yinbo Zhu Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/acpi.h | 4 ++-- arch/loongarch/include/asm/suspend.h | 10 ++++++++++ arch/loongarch/power/suspend.c | 8 ++++---- 3 files changed, 16 insertions(+), 6 deletions(-) create mode 100644 arch/loongarch/include/asm/suspend.h (limited to 'arch') diff --git a/arch/loongarch/include/asm/acpi.h b/arch/loongarch/include/asm/acpi.h index 5c78b5d2bfb7..8de6c4b83a61 100644 --- a/arch/loongarch/include/asm/acpi.h +++ b/arch/loongarch/include/asm/acpi.h @@ -8,6 +8,8 @@ #ifndef _ASM_LOONGARCH_ACPI_H #define _ASM_LOONGARCH_ACPI_H +#include + #ifdef CONFIG_ACPI extern int acpi_strict; extern int acpi_disabled; @@ -46,12 +48,10 @@ static inline u32 get_acpi_id_for_cpu(unsigned int cpu) extern int loongarch_acpi_suspend(void); extern int (*acpi_suspend_lowlevel)(void); -extern void loongarch_suspend_enter(void); static inline unsigned long acpi_get_wakeup_address(void) { #ifdef CONFIG_SUSPEND - extern void loongarch_wakeup_start(void); return (unsigned long)loongarch_wakeup_start; #endif return 0UL; diff --git a/arch/loongarch/include/asm/suspend.h b/arch/loongarch/include/asm/suspend.h new file mode 100644 index 000000000000..4025c9d5d7cf --- /dev/null +++ b/arch/loongarch/include/asm/suspend.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_SUSPEND_H +#define __ASM_SUSPEND_H + +void loongarch_common_suspend(void); +void loongarch_common_resume(void); +void loongarch_suspend_enter(void); +void loongarch_wakeup_start(void); + +#endif diff --git a/arch/loongarch/power/suspend.c b/arch/loongarch/power/suspend.c index 5e19733e5e05..166d9e06a64b 100644 --- a/arch/loongarch/power/suspend.c +++ b/arch/loongarch/power/suspend.c @@ -27,7 +27,7 @@ struct saved_registers { }; static struct saved_registers saved_regs; -static void arch_common_suspend(void) +void loongarch_common_suspend(void) { save_counter(); saved_regs.pgd = csr_read64(LOONGARCH_CSR_PGDL); @@ -40,7 +40,7 @@ static void arch_common_suspend(void) loongarch_suspend_addr = loongson_sysconf.suspend_addr; } -static void arch_common_resume(void) +void loongarch_common_resume(void) { sync_counter(); local_flush_tlb_all(); @@ -62,12 +62,12 @@ int loongarch_acpi_suspend(void) enable_gpe_wakeup(); enable_pci_wakeup(); - arch_common_suspend(); + loongarch_common_suspend(); /* processor specific suspend */ loongarch_suspend_enter(); - arch_common_resume(); + loongarch_common_resume(); return 0; } -- cgit v1.2.3-58-ga151 From 5d553770409de4a98a8c4f8c014559725dcfaa37 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Thu, 29 Jun 2023 20:58:44 +0800 Subject: LoongArch: Select HAVE_DEBUG_KMEMLEAK to support kmemleak We can see that DEBUG_KMEMLEAK depends on HAVE_DEBUG_KMEMLEAK after commit b69ec42b1b19 ("Kconfig: clean up the long arch list for the DEBUG_KMEMLEAK config option"), just select HAVE_DEBUG_KMEMLEAK to support kmemleak on LoongArch. Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- Documentation/features/debug/kmemleak/arch-support.txt | 2 +- arch/loongarch/Kconfig | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/Documentation/features/debug/kmemleak/arch-support.txt b/Documentation/features/debug/kmemleak/arch-support.txt index 0cfa5f0e4db1..4e205ef70363 100644 --- a/Documentation/features/debug/kmemleak/arch-support.txt +++ b/Documentation/features/debug/kmemleak/arch-support.txt @@ -13,7 +13,7 @@ | csky: | ok | | hexagon: | TODO | | ia64: | TODO | - | loongarch: | TODO | + | loongarch: | ok | | m68k: | TODO | | microblaze: | ok | | mips: | ok | diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index e06315b706b8..8c7b67eca838 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -94,6 +94,7 @@ config LOONGARCH select HAVE_ASM_MODVERSIONS select HAVE_CONTEXT_TRACKING_USER select HAVE_C_RECORDMCOUNT + select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_STACKOVERFLOW select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE -- cgit v1.2.3-58-ga151 From f02644e32c9e4bd1a9b286dc0b84f9cbe294f4e2 Mon Sep 17 00:00:00 2001 From: Youling Tang Date: Thu, 29 Jun 2023 20:58:44 +0800 Subject: LoongArch: Add jump-label implementation Add support for jump labels based on the ARM64 version. Acked-by: Peter Zijlstra (Intel) Signed-off-by: Youling Tang Signed-off-by: Huacai Chen --- .../features/core/jump-labels/arch-support.txt | 2 +- arch/loongarch/Kconfig | 2 + arch/loongarch/include/asm/jump_label.h | 50 ++++++++++++++++++++++ arch/loongarch/kernel/Makefile | 2 + arch/loongarch/kernel/jump_label.c | 22 ++++++++++ 5 files changed, 77 insertions(+), 1 deletion(-) create mode 100644 arch/loongarch/include/asm/jump_label.h create mode 100644 arch/loongarch/kernel/jump_label.c (limited to 'arch') diff --git a/Documentation/features/core/jump-labels/arch-support.txt b/Documentation/features/core/jump-labels/arch-support.txt index 2328eada3a49..94d9dece580f 100644 --- a/Documentation/features/core/jump-labels/arch-support.txt +++ b/Documentation/features/core/jump-labels/arch-support.txt @@ -13,7 +13,7 @@ | csky: | ok | | hexagon: | TODO | | ia64: | TODO | - | loongarch: | TODO | + | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | | mips: | ok | diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 8c7b67eca838..64cdc6802295 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -87,6 +87,8 @@ config LOONGARCH select GPIOLIB select HAS_IOPORT select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_JUMP_LABEL + select HAVE_ARCH_JUMP_LABEL_RELATIVE select HAVE_ARCH_MMAP_RND_BITS if MMU select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK diff --git a/arch/loongarch/include/asm/jump_label.h b/arch/loongarch/include/asm/jump_label.h new file mode 100644 index 000000000000..3cea299a5ef5 --- /dev/null +++ b/arch/loongarch/include/asm/jump_label.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 Loongson Technology Corporation Limited + * + * Based on arch/arm64/include/asm/jump_label.h + */ +#ifndef __ASM_JUMP_LABEL_H +#define __ASM_JUMP_LABEL_H + +#ifndef __ASSEMBLY__ + +#include + +#define JUMP_LABEL_NOP_SIZE 4 + +#define JUMP_TABLE_ENTRY \ + ".pushsection __jump_table, \"aw\" \n\t" \ + ".align 3 \n\t" \ + ".long 1b - ., %l[l_yes] - . \n\t" \ + ".quad %0 - . \n\t" \ + ".popsection \n\t" + +static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch) +{ + asm_volatile_goto( + "1: nop \n\t" + JUMP_TABLE_ENTRY + : : "i"(&((char *)key)[branch]) : : l_yes); + + return false; + +l_yes: + return true; +} + +static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch) +{ + asm_volatile_goto( + "1: b %l[l_yes] \n\t" + JUMP_TABLE_ENTRY + : : "i"(&((char *)key)[branch]) : : l_yes); + + return false; + +l_yes: + return true; +} + +#endif /* __ASSEMBLY__ */ +#endif /* __ASM_JUMP_LABEL_H */ diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 9a72d91cd104..64ea76f60e2c 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -54,4 +54,6 @@ obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_KPROBES) += kprobes.o kprobes_trampoline.o +obj-$(CONFIG_JUMP_LABEL) += jump_label.o + CPPFLAGS_vmlinux.lds := $(KBUILD_CFLAGS) diff --git a/arch/loongarch/kernel/jump_label.c b/arch/loongarch/kernel/jump_label.c new file mode 100644 index 000000000000..31891214b767 --- /dev/null +++ b/arch/loongarch/kernel/jump_label.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2023 Loongson Technology Corporation Limited + * + * Based on arch/arm64/kernel/jump_label.c + */ +#include +#include +#include + +void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type) +{ + u32 insn; + void *addr = (void *)jump_entry_code(entry); + + if (type == JUMP_LABEL_JMP) + insn = larch_insn_gen_b(jump_entry_code(entry), jump_entry_target(entry)); + else + insn = larch_insn_gen_nop(); + + larch_insn_patch_text(addr, insn); +} -- cgit v1.2.3-58-ga151 From 7b0a096436c2dac6de77d132e751a8a3328798d5 Mon Sep 17 00:00:00 2001 From: Haoran Jiang Date: Thu, 29 Jun 2023 20:58:44 +0800 Subject: LoongArch: Replace kretprobe with rethook This is an adaptation of commit f3a112c0c40d ("x86,rethook,kprobes: Replace kretprobe with rethook on x86") and commit b57c2f124098 ("riscv: add riscv rethook implementation") to LoongArch. Mainly refer to commit b57c2f124098 ("riscv: add riscv rethook implementation"). Replaces the kretprobe code with rethook on LoongArch. With this patch, kretprobe on LoongArch uses the rethook instead of kretprobe specific trampoline code. Signed-off-by: Haoran Jiang Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 1 + arch/loongarch/include/asm/kprobes.h | 3 - arch/loongarch/kernel/Makefile | 5 +- arch/loongarch/kernel/kprobes.c | 21 ------- arch/loongarch/kernel/kprobes_trampoline.S | 96 ------------------------------ arch/loongarch/kernel/rethook.c | 28 +++++++++ arch/loongarch/kernel/rethook.h | 8 +++ arch/loongarch/kernel/rethook_trampoline.S | 96 ++++++++++++++++++++++++++++++ 8 files changed, 137 insertions(+), 121 deletions(-) delete mode 100644 arch/loongarch/kernel/kprobes_trampoline.S create mode 100644 arch/loongarch/kernel/rethook.c create mode 100644 arch/loongarch/kernel/rethook.h create mode 100644 arch/loongarch/kernel/rethook_trampoline.S (limited to 'arch') diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 64cdc6802295..b787f8fcbac7 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -127,6 +127,7 @@ config LOONGARCH select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_RETHOOK select HAVE_RSEQ select HAVE_SAMPLE_FTRACE_DIRECT select HAVE_SAMPLE_FTRACE_DIRECT_MULTI diff --git a/arch/loongarch/include/asm/kprobes.h b/arch/loongarch/include/asm/kprobes.h index 798020ae02c6..7b9fc3ed71c3 100644 --- a/arch/loongarch/include/asm/kprobes.h +++ b/arch/loongarch/include/asm/kprobes.h @@ -49,9 +49,6 @@ bool kprobe_fault_handler(struct pt_regs *regs, int trapnr); bool kprobe_breakpoint_handler(struct pt_regs *regs); bool kprobe_singlestep_handler(struct pt_regs *regs); -void __kretprobe_trampoline(void); -void *trampoline_probe_handler(struct pt_regs *regs); - #else /* !CONFIG_KPROBES */ static inline bool kprobe_breakpoint_handler(struct pt_regs *regs) { return false; } diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 64ea76f60e2c..1061c36f5ad5 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -28,6 +28,8 @@ ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_inst.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_time.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_perf_event.o = $(CC_FLAGS_FTRACE) + CFLAGS_REMOVE_rethook.o = $(CC_FLAGS_FTRACE) + CFLAGS_REMOVE_rethook_trampoline.o = $(CC_FLAGS_FTRACE) endif obj-$(CONFIG_MODULES) += module.o module-sections.o @@ -52,7 +54,8 @@ obj-$(CONFIG_UNWINDER_PROLOGUE) += unwind_prologue.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_regs.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o -obj-$(CONFIG_KPROBES) += kprobes.o kprobes_trampoline.o +obj-$(CONFIG_KPROBES) += kprobes.o +obj-$(CONFIG_RETHOOK) += rethook.o rethook_trampoline.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o diff --git a/arch/loongarch/kernel/kprobes.c b/arch/loongarch/kernel/kprobes.c index 56c8c4b09a42..83467232ca3c 100644 --- a/arch/loongarch/kernel/kprobes.c +++ b/arch/loongarch/kernel/kprobes.c @@ -378,27 +378,6 @@ int __init arch_init_kprobes(void) return 0; } -/* ASM function that handles the kretprobes must not be probed */ -NOKPROBE_SYMBOL(__kretprobe_trampoline); - -/* Called from __kretprobe_trampoline */ -void __used *trampoline_probe_handler(struct pt_regs *regs) -{ - return (void *)kretprobe_trampoline_handler(regs, NULL); -} -NOKPROBE_SYMBOL(trampoline_probe_handler); - -void arch_prepare_kretprobe(struct kretprobe_instance *ri, - struct pt_regs *regs) -{ - ri->ret_addr = (kprobe_opcode_t *)regs->regs[1]; - ri->fp = NULL; - - /* Replace the return addr with trampoline addr */ - regs->regs[1] = (unsigned long)&__kretprobe_trampoline; -} -NOKPROBE_SYMBOL(arch_prepare_kretprobe); - int arch_trampoline_kprobe(struct kprobe *p) { return 0; diff --git a/arch/loongarch/kernel/kprobes_trampoline.S b/arch/loongarch/kernel/kprobes_trampoline.S deleted file mode 100644 index af94b0d213fa..000000000000 --- a/arch/loongarch/kernel/kprobes_trampoline.S +++ /dev/null @@ -1,96 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0+ */ -#include -#include - - .text - - .macro save_all_base_regs - cfi_st ra, PT_R1 - cfi_st tp, PT_R2 - cfi_st a0, PT_R4 - cfi_st a1, PT_R5 - cfi_st a2, PT_R6 - cfi_st a3, PT_R7 - cfi_st a4, PT_R8 - cfi_st a5, PT_R9 - cfi_st a6, PT_R10 - cfi_st a7, PT_R11 - cfi_st t0, PT_R12 - cfi_st t1, PT_R13 - cfi_st t2, PT_R14 - cfi_st t3, PT_R15 - cfi_st t4, PT_R16 - cfi_st t5, PT_R17 - cfi_st t6, PT_R18 - cfi_st t7, PT_R19 - cfi_st t8, PT_R20 - cfi_st u0, PT_R21 - cfi_st fp, PT_R22 - cfi_st s0, PT_R23 - cfi_st s1, PT_R24 - cfi_st s2, PT_R25 - cfi_st s3, PT_R26 - cfi_st s4, PT_R27 - cfi_st s5, PT_R28 - cfi_st s6, PT_R29 - cfi_st s7, PT_R30 - cfi_st s8, PT_R31 - csrrd t0, LOONGARCH_CSR_CRMD - andi t0, t0, 0x7 /* extract bit[1:0] PLV, bit[2] IE */ - LONG_S t0, sp, PT_CRMD - .endm - - .macro restore_all_base_regs - cfi_ld tp, PT_R2 - cfi_ld a0, PT_R4 - cfi_ld a1, PT_R5 - cfi_ld a2, PT_R6 - cfi_ld a3, PT_R7 - cfi_ld a4, PT_R8 - cfi_ld a5, PT_R9 - cfi_ld a6, PT_R10 - cfi_ld a7, PT_R11 - cfi_ld t0, PT_R12 - cfi_ld t1, PT_R13 - cfi_ld t2, PT_R14 - cfi_ld t3, PT_R15 - cfi_ld t4, PT_R16 - cfi_ld t5, PT_R17 - cfi_ld t6, PT_R18 - cfi_ld t7, PT_R19 - cfi_ld t8, PT_R20 - cfi_ld u0, PT_R21 - cfi_ld fp, PT_R22 - cfi_ld s0, PT_R23 - cfi_ld s1, PT_R24 - cfi_ld s2, PT_R25 - cfi_ld s3, PT_R26 - cfi_ld s4, PT_R27 - cfi_ld s5, PT_R28 - cfi_ld s6, PT_R29 - cfi_ld s7, PT_R30 - cfi_ld s8, PT_R31 - LONG_L t0, sp, PT_CRMD - li.d t1, 0x7 /* mask bit[1:0] PLV, bit[2] IE */ - csrxchg t0, t1, LOONGARCH_CSR_CRMD - .endm - -SYM_CODE_START(__kretprobe_trampoline) - addi.d sp, sp, -PT_SIZE - save_all_base_regs - - addi.d t0, sp, PT_SIZE - LONG_S t0, sp, PT_R3 - - move a0, sp /* pt_regs */ - - bl trampoline_probe_handler - - /* use the result as the return-address */ - move ra, a0 - - restore_all_base_regs - addi.d sp, sp, PT_SIZE - - jr ra -SYM_CODE_END(__kretprobe_trampoline) diff --git a/arch/loongarch/kernel/rethook.c b/arch/loongarch/kernel/rethook.c new file mode 100644 index 000000000000..db1c5f5024fd --- /dev/null +++ b/arch/loongarch/kernel/rethook.c @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Generic return hook for LoongArch. + */ + +#include +#include +#include "rethook.h" + +/* This is called from arch_rethook_trampoline() */ +unsigned long __used arch_rethook_trampoline_callback(struct pt_regs *regs) +{ + return rethook_trampoline_handler(regs, 0); +} +NOKPROBE_SYMBOL(arch_rethook_trampoline_callback); + +void arch_rethook_prepare(struct rethook_node *rhn, struct pt_regs *regs, bool mcount) +{ + rhn->frame = 0; + rhn->ret_addr = regs->regs[1]; + + /* replace return addr with trampoline */ + regs->regs[1] = (unsigned long)arch_rethook_trampoline; +} +NOKPROBE_SYMBOL(arch_rethook_prepare); + +/* ASM function that handles the rethook must not be probed itself */ +NOKPROBE_SYMBOL(arch_rethook_trampoline); diff --git a/arch/loongarch/kernel/rethook.h b/arch/loongarch/kernel/rethook.h new file mode 100644 index 000000000000..3f1c1edf0d0b --- /dev/null +++ b/arch/loongarch/kernel/rethook.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LOONGARCH_RETHOOK_H +#define __LOONGARCH_RETHOOK_H + +unsigned long arch_rethook_trampoline_callback(struct pt_regs *regs); +void arch_rethook_prepare(struct rethook_node *rhn, struct pt_regs *regs, bool mcount); + +#endif diff --git a/arch/loongarch/kernel/rethook_trampoline.S b/arch/loongarch/kernel/rethook_trampoline.S new file mode 100644 index 000000000000..bd5772c96338 --- /dev/null +++ b/arch/loongarch/kernel/rethook_trampoline.S @@ -0,0 +1,96 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +#include +#include + + .text + + .macro save_all_base_regs + cfi_st ra, PT_R1 + cfi_st tp, PT_R2 + cfi_st a0, PT_R4 + cfi_st a1, PT_R5 + cfi_st a2, PT_R6 + cfi_st a3, PT_R7 + cfi_st a4, PT_R8 + cfi_st a5, PT_R9 + cfi_st a6, PT_R10 + cfi_st a7, PT_R11 + cfi_st t0, PT_R12 + cfi_st t1, PT_R13 + cfi_st t2, PT_R14 + cfi_st t3, PT_R15 + cfi_st t4, PT_R16 + cfi_st t5, PT_R17 + cfi_st t6, PT_R18 + cfi_st t7, PT_R19 + cfi_st t8, PT_R20 + cfi_st u0, PT_R21 + cfi_st fp, PT_R22 + cfi_st s0, PT_R23 + cfi_st s1, PT_R24 + cfi_st s2, PT_R25 + cfi_st s3, PT_R26 + cfi_st s4, PT_R27 + cfi_st s5, PT_R28 + cfi_st s6, PT_R29 + cfi_st s7, PT_R30 + cfi_st s8, PT_R31 + csrrd t0, LOONGARCH_CSR_CRMD + andi t0, t0, 0x7 /* extract bit[1:0] PLV, bit[2] IE */ + LONG_S t0, sp, PT_CRMD + .endm + + .macro restore_all_base_regs + cfi_ld tp, PT_R2 + cfi_ld a0, PT_R4 + cfi_ld a1, PT_R5 + cfi_ld a2, PT_R6 + cfi_ld a3, PT_R7 + cfi_ld a4, PT_R8 + cfi_ld a5, PT_R9 + cfi_ld a6, PT_R10 + cfi_ld a7, PT_R11 + cfi_ld t0, PT_R12 + cfi_ld t1, PT_R13 + cfi_ld t2, PT_R14 + cfi_ld t3, PT_R15 + cfi_ld t4, PT_R16 + cfi_ld t5, PT_R17 + cfi_ld t6, PT_R18 + cfi_ld t7, PT_R19 + cfi_ld t8, PT_R20 + cfi_ld u0, PT_R21 + cfi_ld fp, PT_R22 + cfi_ld s0, PT_R23 + cfi_ld s1, PT_R24 + cfi_ld s2, PT_R25 + cfi_ld s3, PT_R26 + cfi_ld s4, PT_R27 + cfi_ld s5, PT_R28 + cfi_ld s6, PT_R29 + cfi_ld s7, PT_R30 + cfi_ld s8, PT_R31 + LONG_L t0, sp, PT_CRMD + li.d t1, 0x7 /* mask bit[1:0] PLV, bit[2] IE */ + csrxchg t0, t1, LOONGARCH_CSR_CRMD + .endm + +SYM_CODE_START(arch_rethook_trampoline) + addi.d sp, sp, -PT_SIZE + save_all_base_regs + + addi.d t0, sp, PT_SIZE + LONG_S t0, sp, PT_R3 + + move a0, sp /* pt_regs */ + + bl arch_rethook_trampoline_callback + + /* use the result as the return-address */ + move ra, a0 + + restore_all_base_regs + addi.d sp, sp, PT_SIZE + + jr ra +SYM_CODE_END(arch_rethook_trampoline) -- cgit v1.2.3-58-ga151 From 3d2c3daf82544283c5597028a8a3efc9ac0fb02b Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Thu, 29 Jun 2023 20:58:44 +0800 Subject: LoongArch: Move three functions from kprobes.c to inst.c The three functions insns_not_supported(), insns_need_simulation() and arch_simulate_insn() will be used for uprobes, move them from kprobes.c to inst.c, this is preparation for later patch, no functionality change. Tested-by: Jeff Xie Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/inst.h | 4 ++++ arch/loongarch/kernel/inst.c | 39 +++++++++++++++++++++++++++++++++ arch/loongarch/kernel/kprobes.c | 46 ++------------------------------------- 3 files changed, 45 insertions(+), 44 deletions(-) (limited to 'arch') diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h index 1dc5b5802c15..985760e64f04 100644 --- a/arch/loongarch/include/asm/inst.h +++ b/arch/loongarch/include/asm/inst.h @@ -444,6 +444,10 @@ static inline bool is_self_loop_ins(union loongarch_instruction *ip, struct pt_r void simu_pc(struct pt_regs *regs, union loongarch_instruction insn); void simu_branch(struct pt_regs *regs, union loongarch_instruction insn); +bool insns_not_supported(union loongarch_instruction insn); +bool insns_need_simulation(union loongarch_instruction insn); +void arch_simulate_insn(union loongarch_instruction insn, struct pt_regs *regs); + int larch_insn_read(void *addr, u32 *insnp); int larch_insn_write(void *addr, u32 insn); int larch_insn_patch_text(void *addr, u32 insn); diff --git a/arch/loongarch/kernel/inst.c b/arch/loongarch/kernel/inst.c index ffe13c5ba557..18e197515d7f 100644 --- a/arch/loongarch/kernel/inst.c +++ b/arch/loongarch/kernel/inst.c @@ -133,6 +133,45 @@ void simu_branch(struct pt_regs *regs, union loongarch_instruction insn) } } +bool insns_not_supported(union loongarch_instruction insn) +{ + switch (insn.reg2i14_format.opcode) { + case llw_op: + case lld_op: + case scw_op: + case scd_op: + pr_notice("ll and sc instructions are not supported\n"); + return true; + } + + switch (insn.reg1i21_format.opcode) { + case bceqz_op: + pr_notice("bceqz and bcnez instructions are not supported\n"); + return true; + } + + return false; +} + +bool insns_need_simulation(union loongarch_instruction insn) +{ + if (is_pc_ins(&insn)) + return true; + + if (is_branch_ins(&insn)) + return true; + + return false; +} + +void arch_simulate_insn(union loongarch_instruction insn, struct pt_regs *regs) +{ + if (is_pc_ins(&insn)) + simu_pc(regs, insn); + else if (is_branch_ins(&insn)) + simu_branch(regs, insn); +} + int larch_insn_read(void *addr, u32 *insnp) { int ret; diff --git a/arch/loongarch/kernel/kprobes.c b/arch/loongarch/kernel/kprobes.c index 83467232ca3c..4c13c4431b67 100644 --- a/arch/loongarch/kernel/kprobes.c +++ b/arch/loongarch/kernel/kprobes.c @@ -21,48 +21,6 @@ static const union loongarch_instruction singlestep_insn = { DEFINE_PER_CPU(struct kprobe *, current_kprobe); DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); -static bool insns_not_supported(union loongarch_instruction insn) -{ - switch (insn.reg2i14_format.opcode) { - case llw_op: - case lld_op: - case scw_op: - case scd_op: - pr_notice("kprobe: ll and sc instructions are not supported\n"); - return true; - } - - switch (insn.reg1i21_format.opcode) { - case bceqz_op: - pr_notice("kprobe: bceqz and bcnez instructions are not supported\n"); - return true; - } - - return false; -} -NOKPROBE_SYMBOL(insns_not_supported); - -static bool insns_need_simulation(struct kprobe *p) -{ - if (is_pc_ins(&p->opcode)) - return true; - - if (is_branch_ins(&p->opcode)) - return true; - - return false; -} -NOKPROBE_SYMBOL(insns_need_simulation); - -static void arch_simulate_insn(struct kprobe *p, struct pt_regs *regs) -{ - if (is_pc_ins(&p->opcode)) - simu_pc(regs, p->opcode); - else if (is_branch_ins(&p->opcode)) - simu_branch(regs, p->opcode); -} -NOKPROBE_SYMBOL(arch_simulate_insn); - static void arch_prepare_ss_slot(struct kprobe *p) { p->ainsn.insn[0] = *p->addr; @@ -89,7 +47,7 @@ int arch_prepare_kprobe(struct kprobe *p) if (insns_not_supported(p->opcode)) return -EINVAL; - if (insns_need_simulation(p)) { + if (insns_need_simulation(p->opcode)) { p->ainsn.insn = NULL; } else { p->ainsn.insn = get_insn_slot(); @@ -220,7 +178,7 @@ static void setup_singlestep(struct kprobe *p, struct pt_regs *regs, regs->csr_era = (unsigned long)p->ainsn.insn; } else { /* simulate single steping */ - arch_simulate_insn(p, regs); + arch_simulate_insn(p->opcode, regs); /* now go for post processing */ post_kprobe_handler(p, kcb, regs); } -- cgit v1.2.3-58-ga151 From b82fad4d5deb2c2a15fdb581a1e6725dcea666e7 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Thu, 29 Jun 2023 20:58:44 +0800 Subject: LoongArch: Check for AMO instructions in insns_not_supported() Like llsc instructions, the atomic memory access instructions shouldn't be supported for probing, so check for them in insns_not_supported(). Closes: https://lore.kernel.org/all/SY4P282MB351877A70A0333C790FE85A5C09C9@SY4P282MB3518.AUSP282.PROD.OUTLOOK.COM/ Tested-by: Jeff Xie Reported-by: Hengqi Chen Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/inst.h | 26 ++++++++++++++++++++++++++ arch/loongarch/kernel/inst.c | 6 ++++++ 2 files changed, 32 insertions(+) (limited to 'arch') diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h index 985760e64f04..a8b88a09a1b0 100644 --- a/arch/loongarch/include/asm/inst.h +++ b/arch/loongarch/include/asm/inst.h @@ -187,6 +187,32 @@ enum reg3_op { amord_op = 0x70c7, amxorw_op = 0x70c8, amxord_op = 0x70c9, + ammaxw_op = 0x70ca, + ammaxd_op = 0x70cb, + amminw_op = 0x70cc, + ammind_op = 0x70cd, + ammaxwu_op = 0x70ce, + ammaxdu_op = 0x70cf, + amminwu_op = 0x70d0, + ammindu_op = 0x70d1, + amswapdbw_op = 0x70d2, + amswapdbd_op = 0x70d3, + amadddbw_op = 0x70d4, + amadddbd_op = 0x70d5, + amanddbw_op = 0x70d6, + amanddbd_op = 0x70d7, + amordbw_op = 0x70d8, + amordbd_op = 0x70d9, + amxordbw_op = 0x70da, + amxordbd_op = 0x70db, + ammaxdbw_op = 0x70dc, + ammaxdbd_op = 0x70dd, + ammindbw_op = 0x70de, + ammindbd_op = 0x70df, + ammaxdbwu_op = 0x70e0, + ammaxdbdu_op = 0x70e1, + ammindbwu_op = 0x70e2, + ammindbdu_op = 0x70e3, fldgts_op = 0x70e8, fldgtd_op = 0x70e9, fldles_op = 0x70ea, diff --git a/arch/loongarch/kernel/inst.c b/arch/loongarch/kernel/inst.c index 18e197515d7f..a3169cf1cc31 100644 --- a/arch/loongarch/kernel/inst.c +++ b/arch/loongarch/kernel/inst.c @@ -135,6 +135,12 @@ void simu_branch(struct pt_regs *regs, union loongarch_instruction insn) bool insns_not_supported(union loongarch_instruction insn) { + switch (insn.reg3_format.opcode) { + case amswapw_op ... ammindbdu_op: + pr_notice("atomic memory access instructions are not supported\n"); + return true; + } + switch (insn.reg2i14_format.opcode) { case llw_op: case lld_op: -- cgit v1.2.3-58-ga151 From 49ed320da5f52ec729e7f2f9edbc6e79848455bd Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Thu, 29 Jun 2023 20:58:44 +0800 Subject: LoongArch: Add larch_insn_gen_break() to generate break insns There exist various break insns such as BRK_KPROBE_BP, BRK_KPROBE_SSTEPBP, BRK_UPROBE_BP and BRK_UPROBE_XOLBP, add larch_insn_gen_break() to generate break insns simpler, this is preparation for later patch. Tested-by: Jeff Xie Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/inst.h | 12 ++++++++++++ arch/loongarch/kernel/inst.c | 14 ++++++++++++++ 2 files changed, 26 insertions(+) (limited to 'arch') diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h index a8b88a09a1b0..71e1ed4165c8 100644 --- a/arch/loongarch/include/asm/inst.h +++ b/arch/loongarch/include/asm/inst.h @@ -482,6 +482,8 @@ u32 larch_insn_gen_nop(void); u32 larch_insn_gen_b(unsigned long pc, unsigned long dest); u32 larch_insn_gen_bl(unsigned long pc, unsigned long dest); +u32 larch_insn_gen_break(int imm); + u32 larch_insn_gen_or(enum loongarch_gpr rd, enum loongarch_gpr rj, enum loongarch_gpr rk); u32 larch_insn_gen_move(enum loongarch_gpr rd, enum loongarch_gpr rj); @@ -500,6 +502,16 @@ static inline bool unsigned_imm_check(unsigned long val, unsigned int bit) return val < (1UL << bit); } +#define DEF_EMIT_REG0I15_FORMAT(NAME, OP) \ +static inline void emit_##NAME(union loongarch_instruction *insn, \ + int imm) \ +{ \ + insn->reg0i15_format.opcode = OP; \ + insn->reg0i15_format.immediate = imm; \ +} + +DEF_EMIT_REG0I15_FORMAT(break, break_op) + #define DEF_EMIT_REG0I26_FORMAT(NAME, OP) \ static inline void emit_##NAME(union loongarch_instruction *insn, \ int offset) \ diff --git a/arch/loongarch/kernel/inst.c b/arch/loongarch/kernel/inst.c index a3169cf1cc31..3050329556d1 100644 --- a/arch/loongarch/kernel/inst.c +++ b/arch/loongarch/kernel/inst.c @@ -253,6 +253,20 @@ u32 larch_insn_gen_bl(unsigned long pc, unsigned long dest) return insn.word; } +u32 larch_insn_gen_break(int imm) +{ + union loongarch_instruction insn; + + if (imm < 0 || imm >= SZ_32K) { + pr_warn("The generated break instruction is out of range.\n"); + return INSN_BREAK; + } + + emit_break(&insn, imm); + + return insn.word; +} + u32 larch_insn_gen_or(enum loongarch_gpr rd, enum loongarch_gpr rj, enum loongarch_gpr rk) { union loongarch_instruction insn; -- cgit v1.2.3-58-ga151 From 6e320363339b585a36bf40d74592db3db021e017 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Thu, 29 Jun 2023 20:58:44 +0800 Subject: LoongArch: Use larch_insn_gen_break() for kprobes For now, we can use larch_insn_gen_break() to define KPROBE_BP_INSN and KPROBE_SSTEPBP_INSN. Because larch_insn_gen_break() returns instruction word, define kprobe_opcode_t as u32, then do some small changes related with type conversion, no functional change intended. Tested-by: Jeff Xie Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/kprobes.h | 2 +- arch/loongarch/kernel/kprobes.c | 33 ++++++++++++++------------------- 2 files changed, 15 insertions(+), 20 deletions(-) (limited to 'arch') diff --git a/arch/loongarch/include/asm/kprobes.h b/arch/loongarch/include/asm/kprobes.h index 7b9fc3ed71c3..60fa753a010d 100644 --- a/arch/loongarch/include/asm/kprobes.h +++ b/arch/loongarch/include/asm/kprobes.h @@ -22,7 +22,7 @@ do { \ #define kretprobe_blacklist_size 0 -typedef union loongarch_instruction kprobe_opcode_t; +typedef u32 kprobe_opcode_t; /* Architecture specific copy of original instruction */ struct arch_specific_insn { diff --git a/arch/loongarch/kernel/kprobes.c b/arch/loongarch/kernel/kprobes.c index 4c13c4431b67..17b040bd6067 100644 --- a/arch/loongarch/kernel/kprobes.c +++ b/arch/loongarch/kernel/kprobes.c @@ -4,19 +4,8 @@ #include #include -static const union loongarch_instruction breakpoint_insn = { - .reg0i15_format = { - .opcode = break_op, - .immediate = BRK_KPROBE_BP, - } -}; - -static const union loongarch_instruction singlestep_insn = { - .reg0i15_format = { - .opcode = break_op, - .immediate = BRK_KPROBE_SSTEPBP, - } -}; +#define KPROBE_BP_INSN larch_insn_gen_break(BRK_KPROBE_BP) +#define KPROBE_SSTEPBP_INSN larch_insn_gen_break(BRK_KPROBE_SSTEPBP) DEFINE_PER_CPU(struct kprobe *, current_kprobe); DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); @@ -24,7 +13,7 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); static void arch_prepare_ss_slot(struct kprobe *p) { p->ainsn.insn[0] = *p->addr; - p->ainsn.insn[1] = singlestep_insn; + p->ainsn.insn[1] = KPROBE_SSTEPBP_INSN; p->ainsn.restore = (unsigned long)p->addr + LOONGARCH_INSN_SIZE; } NOKPROBE_SYMBOL(arch_prepare_ss_slot); @@ -37,17 +26,20 @@ NOKPROBE_SYMBOL(arch_prepare_simulate); int arch_prepare_kprobe(struct kprobe *p) { + union loongarch_instruction insn; + if ((unsigned long)p->addr & 0x3) return -EILSEQ; /* copy instruction */ p->opcode = *p->addr; + insn.word = p->opcode; /* decode instruction */ - if (insns_not_supported(p->opcode)) + if (insns_not_supported(insn)) return -EINVAL; - if (insns_need_simulation(p->opcode)) { + if (insns_need_simulation(insn)) { p->ainsn.insn = NULL; } else { p->ainsn.insn = get_insn_slot(); @@ -68,7 +60,7 @@ NOKPROBE_SYMBOL(arch_prepare_kprobe); /* Install breakpoint in text */ void arch_arm_kprobe(struct kprobe *p) { - *p->addr = breakpoint_insn; + *p->addr = KPROBE_BP_INSN; flush_insn_slot(p); } NOKPROBE_SYMBOL(arch_arm_kprobe); @@ -163,6 +155,8 @@ NOKPROBE_SYMBOL(post_kprobe_handler); static void setup_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb, int reenter) { + union loongarch_instruction insn; + if (reenter) { save_previous_kprobe(kcb); set_current_kprobe(p); @@ -178,7 +172,8 @@ static void setup_singlestep(struct kprobe *p, struct pt_regs *regs, regs->csr_era = (unsigned long)p->ainsn.insn; } else { /* simulate single steping */ - arch_simulate_insn(p->opcode, regs); + insn.word = p->opcode; + arch_simulate_insn(insn, regs); /* now go for post processing */ post_kprobe_handler(p, kcb, regs); } @@ -253,7 +248,7 @@ bool kprobe_breakpoint_handler(struct pt_regs *regs) } } - if (addr->word != breakpoint_insn.word) { + if (*addr != KPROBE_BP_INSN) { /* * The breakpoint instruction was removed right * after we hit it. Another cpu has removed -- cgit v1.2.3-58-ga151 From 19bc6cb6409289106d38f9ad1b2ecf73980df6b5 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Thu, 29 Jun 2023 20:58:44 +0800 Subject: LoongArch: Add uprobes support Uprobes is the user-space counterpart to kprobes, this patch adds uprobes support for LoongArch. Here is a simple example with CONFIG_UPROBE_EVENTS=y: # cat test.c #include int add(int a, int b) { return a + b; } int main() { return add(2, 7); } # gcc test.c -o /tmp/test # nm /tmp/test | grep add 0000000120004194 T add # cd /sys/kernel/debug/tracing # echo > uprobe_events # echo "p:myuprobe /tmp/test:0x4194 %r4 %r5" > uprobe_events # echo "r:myuretprobe /tmp/test:0x4194 %r4" >> uprobe_events # echo 1 > events/uprobes/enable # echo 1 > tracing_on # /tmp/test # cat trace ... # TASK-PID CPU# ||||| TIMESTAMP FUNCTION # | | | ||||| | | test-1060 [001] DNZff 1015.770620: myuprobe: (0x120004194) arg1=0x2 arg2=0x7 test-1060 [001] DNZff 1015.770930: myuretprobe: (0x1200041f0 <- 0x120004194) arg1=0x9 Tested-by: Jeff Xie Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 3 + arch/loongarch/include/asm/uprobes.h | 36 +++++++++ arch/loongarch/kernel/Makefile | 1 + arch/loongarch/kernel/traps.c | 9 +-- arch/loongarch/kernel/uprobes.c | 153 +++++++++++++++++++++++++++++++++++ 5 files changed, 197 insertions(+), 5 deletions(-) create mode 100644 arch/loongarch/include/asm/uprobes.h create mode 100644 arch/loongarch/kernel/uprobes.c (limited to 'arch') diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index b787f8fcbac7..94ca147981cf 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -643,6 +643,9 @@ config ARCH_MMAP_RND_BITS_MIN config ARCH_MMAP_RND_BITS_MAX default 18 +config ARCH_SUPPORTS_UPROBES + def_bool y + menu "Power management options" config ARCH_SUSPEND_POSSIBLE diff --git a/arch/loongarch/include/asm/uprobes.h b/arch/loongarch/include/asm/uprobes.h new file mode 100644 index 000000000000..c8f59983f702 --- /dev/null +++ b/arch/loongarch/include/asm/uprobes.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_LOONGARCH_UPROBES_H +#define __ASM_LOONGARCH_UPROBES_H + +#include + +typedef u32 uprobe_opcode_t; + +#define MAX_UINSN_BYTES 8 +#define UPROBE_XOL_SLOT_BYTES MAX_UINSN_BYTES + +#define UPROBE_SWBP_INSN larch_insn_gen_break(BRK_UPROBE_BP) +#define UPROBE_SWBP_INSN_SIZE LOONGARCH_INSN_SIZE + +#define UPROBE_XOLBP_INSN larch_insn_gen_break(BRK_UPROBE_XOLBP) + +struct arch_uprobe { + unsigned long resume_era; + u32 insn[2]; + u32 ixol[2]; + bool simulate; +}; + +struct arch_uprobe_task { + unsigned long saved_trap_nr; +}; + +#ifdef CONFIG_UPROBES +bool uprobe_breakpoint_handler(struct pt_regs *regs); +bool uprobe_singlestep_handler(struct pt_regs *regs); +#else /* !CONFIG_UPROBES */ +static inline bool uprobe_breakpoint_handler(struct pt_regs *regs) { return false; } +static inline bool uprobe_singlestep_handler(struct pt_regs *regs) { return false; } +#endif /* CONFIG_UPROBES */ + +#endif /* __ASM_LOONGARCH_UPROBES_H */ diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 1061c36f5ad5..8e279f04f9e7 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -56,6 +56,7 @@ obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_RETHOOK) += rethook.o rethook_trampoline.o +obj-$(CONFIG_UPROBES) += uprobes.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index e56df45f7202..8fb5e7a77145 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -47,6 +47,7 @@ #include #include #include +#include #include "access-helper.h" @@ -689,7 +690,6 @@ asmlinkage void noinstr do_bp(struct pt_regs *regs) if (regs->csr_prmd & CSR_PRMD_PIE) local_irq_enable(); - current->thread.trap_nr = read_csr_excode(); if (__get_inst(&opcode, (u32 *)era, user)) goto out_sigsegv; @@ -711,18 +711,17 @@ asmlinkage void noinstr do_bp(struct pt_regs *regs) else break; case BRK_UPROBE_BP: - if (notify_die(DIE_UPROBE, "Uprobe", regs, bcode, - current->thread.trap_nr, SIGTRAP) == NOTIFY_STOP) + if (uprobe_breakpoint_handler(regs)) goto out; else break; case BRK_UPROBE_XOLBP: - if (notify_die(DIE_UPROBE_XOL, "Uprobe_XOL", regs, bcode, - current->thread.trap_nr, SIGTRAP) == NOTIFY_STOP) + if (uprobe_singlestep_handler(regs)) goto out; else break; default: + current->thread.trap_nr = read_csr_excode(); if (notify_die(DIE_TRAP, "Break", regs, bcode, current->thread.trap_nr, SIGTRAP) == NOTIFY_STOP) goto out; diff --git a/arch/loongarch/kernel/uprobes.c b/arch/loongarch/kernel/uprobes.c new file mode 100644 index 000000000000..87abc7137b73 --- /dev/null +++ b/arch/loongarch/kernel/uprobes.c @@ -0,0 +1,153 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include +#include +#include + +#define UPROBE_TRAP_NR UINT_MAX + +int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, + struct mm_struct *mm, unsigned long addr) +{ + int idx; + union loongarch_instruction insn; + + if (addr & 0x3) + return -EILSEQ; + + for (idx = ARRAY_SIZE(auprobe->insn) - 1; idx >= 0; idx--) { + insn.word = auprobe->insn[idx]; + if (insns_not_supported(insn)) + return -EINVAL; + } + + if (insns_need_simulation(insn)) { + auprobe->ixol[0] = larch_insn_gen_nop(); + auprobe->simulate = true; + } else { + auprobe->ixol[0] = auprobe->insn[0]; + auprobe->simulate = false; + } + + auprobe->ixol[1] = UPROBE_XOLBP_INSN; + + return 0; +} + +int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + struct uprobe_task *utask = current->utask; + + utask->autask.saved_trap_nr = current->thread.trap_nr; + current->thread.trap_nr = UPROBE_TRAP_NR; + instruction_pointer_set(regs, utask->xol_vaddr); + user_enable_single_step(current); + + return 0; +} + +int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + struct uprobe_task *utask = current->utask; + + WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR); + current->thread.trap_nr = utask->autask.saved_trap_nr; + + if (auprobe->simulate) + instruction_pointer_set(regs, auprobe->resume_era); + else + instruction_pointer_set(regs, utask->vaddr + LOONGARCH_INSN_SIZE); + + user_disable_single_step(current); + + return 0; +} + +void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + struct uprobe_task *utask = current->utask; + + current->thread.trap_nr = utask->autask.saved_trap_nr; + instruction_pointer_set(regs, utask->vaddr); + user_disable_single_step(current); +} + +bool arch_uprobe_xol_was_trapped(struct task_struct *t) +{ + if (t->thread.trap_nr != UPROBE_TRAP_NR) + return true; + + return false; +} + +bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + union loongarch_instruction insn; + + if (!auprobe->simulate) + return false; + + insn.word = auprobe->insn[0]; + arch_simulate_insn(insn, regs); + auprobe->resume_era = regs->csr_era; + + return true; +} + +unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, + struct pt_regs *regs) +{ + unsigned long ra = regs->regs[1]; + + regs->regs[1] = trampoline_vaddr; + + return ra; +} + +bool arch_uretprobe_is_alive(struct return_instance *ret, + enum rp_check ctx, struct pt_regs *regs) +{ + if (ctx == RP_CHECK_CHAIN_CALL) + return regs->regs[3] <= ret->stack; + else + return regs->regs[3] < ret->stack; +} + +int arch_uprobe_exception_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + return NOTIFY_DONE; +} + +bool uprobe_breakpoint_handler(struct pt_regs *regs) +{ + if (uprobe_pre_sstep_notifier(regs)) + return true; + + return false; +} + +bool uprobe_singlestep_handler(struct pt_regs *regs) +{ + if (uprobe_post_sstep_notifier(regs)) + return true; + + return false; +} + +unsigned long uprobe_get_swbp_addr(struct pt_regs *regs) +{ + return instruction_pointer(regs); +} + +void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, + void *src, unsigned long len) +{ + void *kaddr = kmap_local_page(page); + void *dst = kaddr + (vaddr & ~PAGE_MASK); + + memcpy(dst, src, len); + flush_icache_range((unsigned long)dst, (unsigned long)dst + len); + kunmap_local(kaddr); +} -- cgit v1.2.3-58-ga151 From 5ee35c769663cb1c5f26e12cad84904dc3002de8 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Thu, 29 Jun 2023 20:58:44 +0800 Subject: LoongArch: Remove five DIE_* definitions in kdebug.h For now, DIE_PAGE_FAULT, DIE_BREAK, DIE_SSTEPBP, DIE_UPROBE and DIE_UPROBE_XOL are not used by any code, remove them. Tested-by: Jeff Xie Suggested-by: Youling Tang Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/kdebug.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch') diff --git a/arch/loongarch/include/asm/kdebug.h b/arch/loongarch/include/asm/kdebug.h index d721b4b82fae..c00ed874bf06 100644 --- a/arch/loongarch/include/asm/kdebug.h +++ b/arch/loongarch/include/asm/kdebug.h @@ -13,11 +13,6 @@ enum die_val { DIE_FP, DIE_SIMD, DIE_TRAP, - DIE_PAGE_FAULT, - DIE_BREAK, - DIE_SSTEPBP, - DIE_UPROBE, - DIE_UPROBE_XOL, }; #endif /* _ASM_LOONGARCH_KDEBUG_H */ -- cgit v1.2.3-58-ga151