diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-01-20 11:06:04 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-01-20 11:06:04 -0800 |
commit | e5075d8ec5647322fb9e699bfb76331cc8ee098d (patch) | |
tree | 8ec986d65c79dbf421a318a8ec2042fe02bf20d4 | |
parent | c25b24fa72c734f8cd6c31a13548013263b26286 (diff) | |
parent | f24a70106dc1ad2a755b2d42f47cf1dcf24f0b27 (diff) |
Merge tag 'riscv-for-linus-6.8-mw4' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux
Pull more RISC-V updates from Palmer Dabbelt:
- Support for tuning for systems with fast misaligned accesses.
- Support for SBI-based suspend.
- Support for the new SBI debug console extension.
- The T-Head CMOs now use PA-based flushes.
- Support for enabling the V extension in kernel code.
- Optimized IP checksum routines.
- Various ftrace improvements.
- Support for archrandom, which depends on the Zkr extension.
- The build is no longer broken under NET=n, KUNIT=y for ports that
don't define their own ipv6 checksum.
* tag 'riscv-for-linus-6.8-mw4' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux: (56 commits)
lib: checksum: Fix build with CONFIG_NET=n
riscv: lib: Check if output in asm goto supported
riscv: Fix build error on rv32 + XIP
riscv: optimize ELF relocation function in riscv
RISC-V: Implement archrandom when Zkr is available
riscv: Optimize hweight API with Zbb extension
riscv: add dependency among Image(.gz), loader(.bin), and vmlinuz.efi
samples: ftrace: Add RISC-V support for SAMPLE_FTRACE_DIRECT[_MULTI]
riscv: ftrace: Add DYNAMIC_FTRACE_WITH_DIRECT_CALLS support
riscv: ftrace: Make function graph use ftrace directly
riscv: select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY
lib/Kconfig.debug: Update AS_HAS_NON_CONST_LEB128 comment and name
riscv: Restrict DWARF5 when building with LLVM to known working versions
riscv: Hoist linker relaxation disabling logic into Kconfig
kunit: Add tests for csum_ipv6_magic and ip_fast_csum
riscv: Add checksum library
riscv: Add checksum header
riscv: Add static key for misaligned accesses
asm-generic: Improve csum_fold
RISC-V: selftests: cbo: Ensure asm operands match constraints
...
71 files changed, 2676 insertions, 213 deletions
diff --git a/Documentation/devicetree/bindings/riscv/cpus.yaml b/Documentation/devicetree/bindings/riscv/cpus.yaml index 23646b684ea2..9d8670c00e3b 100644 --- a/Documentation/devicetree/bindings/riscv/cpus.yaml +++ b/Documentation/devicetree/bindings/riscv/cpus.yaml @@ -63,8 +63,8 @@ properties: mmu-type: description: - Identifies the MMU address translation mode used on this - hart. These values originate from the RISC-V Privileged + Identifies the largest MMU address translation mode supported by + this hart. These values originate from the RISC-V Privileged Specification document, available from https://riscv.org/specifications/ $ref: /schemas/types.yaml#/definitions/string @@ -80,6 +80,11 @@ properties: description: The blocksize in bytes for the Zicbom cache operations. + riscv,cbop-block-size: + $ref: /schemas/types.yaml#/definitions/uint32 + description: + The blocksize in bytes for the Zicbop cache operations. + riscv,cboz-block-size: $ref: /schemas/types.yaml#/definitions/uint32 description: diff --git a/Documentation/devicetree/bindings/riscv/extensions.yaml b/Documentation/devicetree/bindings/riscv/extensions.yaml index 27beedb98198..63d81dc895e5 100644 --- a/Documentation/devicetree/bindings/riscv/extensions.yaml +++ b/Documentation/devicetree/bindings/riscv/extensions.yaml @@ -48,7 +48,7 @@ properties: insensitive, letters in the riscv,isa string must be all lowercase. $ref: /schemas/types.yaml#/definitions/string - pattern: ^rv(?:64|32)imaf?d?q?c?b?k?j?p?v?h?(?:[hsxz](?:[a-z])+)?(?:_[hsxz](?:[a-z])+)*$ + pattern: ^rv(?:64|32)imaf?d?q?c?b?k?j?p?v?h?(?:[hsxz](?:[0-9a-z])+)?(?:_[hsxz](?:[0-9a-z])+)*$ deprecated: true riscv,isa-base: diff --git a/Documentation/features/vm/TLB/arch-support.txt b/Documentation/features/vm/TLB/arch-support.txt index 8fd22073a847..d222bd3ee749 100644 --- a/Documentation/features/vm/TLB/arch-support.txt +++ b/Documentation/features/vm/TLB/arch-support.txt @@ -20,7 +20,7 @@ | openrisc: | .. | | parisc: | TODO | | powerpc: | TODO | - | riscv: | TODO | + | riscv: | ok | | s390: | TODO | | sh: | TODO | | sparc: | TODO | diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index b549499eb363..bffbd869a068 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -53,6 +53,7 @@ config RISCV select ARCH_USE_MEMTEST select ARCH_USE_QUEUED_RWLOCKS select ARCH_USES_CFI_TRAPS if CFI_CLANG + select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_GENERAL_HUGETLB if !RISCV_ISA_SVNAPOT @@ -66,9 +67,10 @@ config RISCV select CLINT_TIMER if !MMU select CLONE_BACKWARDS select COMMON_CLK - select CPU_PM if CPU_IDLE || HIBERNATION + select CPU_PM if CPU_IDLE || HIBERNATION || SUSPEND select EDAC_SUPPORT select FRAME_POINTER if PERF_EVENTS || (FUNCTION_TRACER && !DYNAMIC_FTRACE) + select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY if DYNAMIC_FTRACE select GENERIC_ARCH_TOPOLOGY select GENERIC_ATOMIC64 if !64BIT select GENERIC_CLOCKEVENTS_BROADCAST if SMP @@ -115,6 +117,7 @@ config RISCV select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS if MMU select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && (CLANG_SUPPORTS_DYNAMIC_FTRACE || GCC_SUPPORTS_DYNAMIC_FTRACE) + select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL select HAVE_FUNCTION_GRAPH_TRACER @@ -142,6 +145,8 @@ config RISCV select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RETHOOK if !XIP_KERNEL select HAVE_RSEQ + select HAVE_SAMPLE_FTRACE_DIRECT + select HAVE_SAMPLE_FTRACE_DIRECT_MULTI select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU @@ -183,6 +188,20 @@ config HAVE_SHADOW_CALL_STACK # https://github.com/riscv-non-isa/riscv-elf-psabi-doc/commit/a484e843e6eeb51f0cb7b8819e50da6d2444d769 depends on $(ld-option,--no-relax-gp) +config RISCV_USE_LINKER_RELAXATION + def_bool y + # https://github.com/llvm/llvm-project/commit/6611d58f5bbcbec77262d392e2923e1d680f6985 + depends on !LD_IS_LLD || LLD_VERSION >= 150000 + +# https://github.com/llvm/llvm-project/commit/bbc0f99f3bc96f1db16f649fc21dd18e5b0918f6 +config ARCH_HAS_BROKEN_DWARF5 + def_bool y + depends on RISCV_USE_LINKER_RELAXATION + # https://github.com/llvm/llvm-project/commit/1df5ea29b43690b6622db2cad7b745607ca4de6a + depends on AS_IS_LLVM && AS_VERSION < 180000 + # https://github.com/llvm/llvm-project/commit/7ffabb61a5569444b5ac9322e22e5471cc5e4a77 + depends on LD_IS_LLD && LLD_VERSION < 180000 + config ARCH_MMAP_RND_BITS_MIN default 18 if 64BIT default 8 @@ -529,6 +548,28 @@ config RISCV_ISA_V_DEFAULT_ENABLE If you don't know what to do here, say Y. +config RISCV_ISA_V_UCOPY_THRESHOLD + int "Threshold size for vectorized user copies" + depends on RISCV_ISA_V + default 768 + help + Prefer using vectorized copy_to_user()/copy_from_user() when the + workload size exceeds this value. + +config RISCV_ISA_V_PREEMPTIVE + bool "Run kernel-mode Vector with kernel preemption" + depends on PREEMPTION + depends on RISCV_ISA_V + default y + help + Usually, in-kernel SIMD routines are run with preemption disabled. + Functions which envoke long running SIMD thus must yield core's + vector unit to prevent blocking other tasks for too long. + + This config allows kernel to run SIMD without explicitly disable + preemption. Enabling this config will result in higher memory + consumption due to the allocation of per-task's kernel Vector context. + config TOOLCHAIN_HAS_ZBB bool default y @@ -655,6 +696,20 @@ config RISCV_MISALIGNED load/store for both kernel and userspace. When disable, misaligned accesses will generate SIGBUS in userspace and panic in kernel. +config RISCV_EFFICIENT_UNALIGNED_ACCESS + bool "Assume the CPU supports fast unaligned memory accesses" + depends on NONPORTABLE + select DCACHE_WORD_ACCESS if MMU + select HAVE_EFFICIENT_UNALIGNED_ACCESS + help + Say Y here if you want the kernel to assume that the CPU supports + efficient unaligned memory accesses. When enabled, this option + improves the performance of the kernel on such CPUs. However, the + kernel will run much more slowly, or will not be able to run at all, + on CPUs that do not support efficient unaligned memory accesses. + + If unsure what to do here, say N. + endmenu # "Platform type" menu "Kernel features" diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata index f5c432b005e7..910ba8837add 100644 --- a/arch/riscv/Kconfig.errata +++ b/arch/riscv/Kconfig.errata @@ -98,6 +98,7 @@ config ERRATA_THEAD_CMO depends on ERRATA_THEAD && MMU select DMA_DIRECT_REMAP select RISCV_DMA_NONCOHERENT + select RISCV_NONSTANDARD_CACHE_OPS default y help This will apply the cache management errata to handle the diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index a74be78678eb..0b7d109258e7 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -43,8 +43,7 @@ else KBUILD_LDFLAGS += -melf32lriscv endif -ifeq ($(CONFIG_LD_IS_LLD),y) -ifeq ($(call test-lt, $(CONFIG_LLD_VERSION), 150000),y) +ifndef CONFIG_RISCV_USE_LINKER_RELAXATION KBUILD_CFLAGS += -mno-relax KBUILD_AFLAGS += -mno-relax ifndef CONFIG_AS_IS_LLVM @@ -52,7 +51,6 @@ ifndef CONFIG_AS_IS_LLVM KBUILD_AFLAGS += -Wa,-mno-relax endif endif -endif ifeq ($(CONFIG_SHADOW_CALL_STACK),y) KBUILD_LDFLAGS += --no-relax-gp @@ -108,7 +106,9 @@ KBUILD_AFLAGS_MODULE += $(call as-option,-Wa$(comma)-mno-relax) # unaligned accesses. While unaligned accesses are explicitly allowed in the # RISC-V ISA, they're emulated by machine mode traps on all extant # architectures. It's faster to have GCC emit only aligned accesses. +ifneq ($(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS),y) KBUILD_CFLAGS += $(call cc-option,-mstrict-align) +endif ifeq ($(CONFIG_STACKPROTECTOR_PER_TASK),y) prepare: stack_protector_prepare @@ -163,6 +163,8 @@ BOOT_TARGETS := Image Image.gz loader loader.bin xipImage vmlinuz.efi all: $(notdir $(KBUILD_IMAGE)) +loader.bin: loader +Image.gz loader vmlinuz.efi: Image $(BOOT_TARGETS): vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ @$(kecho) ' Kernel: $(boot)/$@ is ready' diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 905881282a7c..eaf34e871e30 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -149,6 +149,7 @@ CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_DW=y CONFIG_SERIAL_OF_PLATFORM=y CONFIG_SERIAL_SH_SCI=y +CONFIG_SERIAL_EARLYCON_RISCV_SBI=y CONFIG_VIRTIO_CONSOLE=y CONFIG_HW_RANDOM=y CONFIG_HW_RANDOM_VIRTIO=y diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c index 0554ed4bf087..b1c410bbc1ae 100644 --- a/arch/riscv/errata/thead/errata.c +++ b/arch/riscv/errata/thead/errata.c @@ -12,8 +12,10 @@ #include <asm/alternative.h> #include <asm/cacheflush.h> #include <asm/cpufeature.h> +#include <asm/dma-noncoherent.h> #include <asm/errata_list.h> #include <asm/hwprobe.h> +#include <asm/io.h> #include <asm/patch.h> #include <asm/vendorid_list.h> @@ -33,6 +35,69 @@ static bool errata_probe_pbmt(unsigned int stage, return false; } +/* + * th.dcache.ipa rs1 (invalidate, physical address) + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | + * 0000001 01010 rs1 000 00000 0001011 + * th.dcache.iva rs1 (invalidate, virtual address) + * 0000001 00110 rs1 000 00000 0001011 + * + * th.dcache.cpa rs1 (clean, physical address) + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | + * 0000001 01001 rs1 000 00000 0001011 + * th.dcache.cva rs1 (clean, virtual address) + * 0000001 00101 rs1 000 00000 0001011 + * + * th.dcache.cipa rs1 (clean then invalidate, physical address) + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | + * 0000001 01011 rs1 000 00000 0001011 + * th.dcache.civa rs1 (clean then invalidate, virtual address) + * 0000001 00111 rs1 000 00000 0001011 + * + * th.sync.s (make sure all cache operations finished) + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | + * 0000000 11001 00000 000 00000 0001011 + */ +#define THEAD_INVAL_A0 ".long 0x02a5000b" +#define THEAD_CLEAN_A0 ".long 0x0295000b" +#define THEAD_FLUSH_A0 ".long 0x02b5000b" +#define THEAD_SYNC_S ".long 0x0190000b" + +#define THEAD_CMO_OP(_op, _start, _size, _cachesize) \ +asm volatile("mv a0, %1\n\t" \ + "j 2f\n\t" \ + "3:\n\t" \ + THEAD_##_op##_A0 "\n\t" \ + "add a0, a0, %0\n\t" \ + "2:\n\t" \ + "bltu a0, %2, 3b\n\t" \ + THEAD_SYNC_S \ + : : "r"(_cachesize), \ + "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)), \ + "r"((unsigned long)(_start) + (_size)) \ + : "a0") + +static void thead_errata_cache_inv(phys_addr_t paddr, size_t size) +{ + THEAD_CMO_OP(INVAL, paddr, size, riscv_cbom_block_size); +} + +static void thead_errata_cache_wback(phys_addr_t paddr, size_t size) +{ + THEAD_CMO_OP(CLEAN, paddr, size, riscv_cbom_block_size); +} + +static void thead_errata_cache_wback_inv(phys_addr_t paddr, size_t size) +{ + THEAD_CMO_OP(FLUSH, paddr, size, riscv_cbom_block_size); +} + +static const struct riscv_nonstd_cache_ops thead_errata_cmo_ops = { + .wback = &thead_errata_cache_wback, + .inv = &thead_errata_cache_inv, + .wback_inv = &thead_errata_cache_wback_inv, +}; + static bool errata_probe_cmo(unsigned int stage, unsigned long arch_id, unsigned long impid) { @@ -48,6 +113,7 @@ static bool errata_probe_cmo(unsigned int stage, if (stage == RISCV_ALTERNATIVES_BOOT) { riscv_cbom_block_size = L1_CACHE_BYTES; riscv_noncoherent_supported(); + riscv_noncoherent_register_cache_ops(&thead_errata_cmo_ops); } return true; @@ -77,8 +143,7 @@ static u32 thead_errata_probe(unsigned int stage, if (errata_probe_pbmt(stage, archid, impid)) cpu_req_errata |= BIT(ERRATA_THEAD_PBMT); - if (errata_probe_cmo(stage, archid, impid)) - cpu_req_errata |= BIT(ERRATA_THEAD_CMO); + errata_probe_cmo(stage, archid, impid); if (errata_probe_pmu(stage, archid, impid)) cpu_req_errata |= BIT(ERRATA_THEAD_PMU); diff --git a/arch/riscv/include/asm/arch_hweight.h b/arch/riscv/include/asm/arch_hweight.h new file mode 100644 index 000000000000..c20236a0725b --- /dev/null +++ b/arch/riscv/include/asm/arch_hweight.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Based on arch/x86/include/asm/arch_hweight.h + */ + +#ifndef _ASM_RISCV_HWEIGHT_H +#define _ASM_RISCV_HWEIGHT_H + +#include <asm/alternative-macros.h> +#include <asm/hwcap.h> + +#if (BITS_PER_LONG == 64) +#define CPOPW "cpopw " +#elif (BITS_PER_LONG == 32) +#define CPOPW "cpop " +#else +#error "Unexpected BITS_PER_LONG" +#endif + +static __always_inline unsigned int __arch_hweight32(unsigned int w) +{ +#ifdef CONFIG_RISCV_ISA_ZBB + asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0, + RISCV_ISA_EXT_ZBB, 1) + : : : : legacy); + + asm (".option push\n" + ".option arch,+zbb\n" + CPOPW "%0, %0\n" + ".option pop\n" + : "+r" (w) : :); + + return w; + +legacy: +#endif + return __sw_hweight32(w); +} + +static inline unsigned int __arch_hweight16(unsigned int w) +{ + return __arch_hweight32(w & 0xffff); +} + +static inline unsigned int __arch_hweight8(unsigned int w) +{ + return __arch_hweight32(w & 0xff); +} + +#if BITS_PER_LONG == 64 +static __always_inline unsigned long __arch_hweight64(__u64 w) +{ +# ifdef CONFIG_RISCV_ISA_ZBB + asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0, + RISCV_ISA_EXT_ZBB, 1) + : : : : legacy); + + asm (".option push\n" + ".option arch,+zbb\n" + "cpop %0, %0\n" + ".option pop\n" + : "+r" (w) : :); + + return w; + +legacy: +# endif + return __sw_hweight64(w); +} +#else /* BITS_PER_LONG == 64 */ +static inline unsigned long __arch_hweight64(__u64 w) +{ + return __arch_hweight32((u32)w) + + __arch_hweight32((u32)(w >> 32)); +} +#endif /* !(BITS_PER_LONG == 64) */ + +#endif /* _ASM_RISCV_HWEIGHT_H */ diff --git a/arch/riscv/include/asm/archrandom.h b/arch/riscv/include/asm/archrandom.h new file mode 100644 index 000000000000..5345360adfb9 --- /dev/null +++ b/arch/riscv/include/asm/archrandom.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Kernel interface for the RISCV arch_random_* functions + * + * Copyright (c) 2023 Rivos Inc. + * + */ + +#ifndef ASM_RISCV_ARCHRANDOM_H +#define ASM_RISCV_ARCHRANDOM_H + +#include <asm/csr.h> +#include <asm/processor.h> + +#define SEED_RETRY_LOOPS 100 + +static inline bool __must_check csr_seed_long(unsigned long *v) +{ + unsigned int retry = SEED_RETRY_LOOPS, valid_seeds = 0; + const int needed_seeds = sizeof(long) / sizeof(u16); + u16 *entropy = (u16 *)v; + + do { + /* + * The SEED CSR must be accessed with a read-write instruction. + */ + unsigned long csr_seed = csr_swap(CSR_SEED, 0); + unsigned long opst = csr_seed & SEED_OPST_MASK; + + switch (opst) { + case SEED_OPST_ES16: + entropy[valid_seeds++] = csr_seed & SEED_ENTROPY_MASK; + if (valid_seeds == needed_seeds) + return true; + break; + + case SEED_OPST_DEAD: + pr_err_once("archrandom: Unrecoverable error\n"); + return false; + + case SEED_OPST_BIST: + case SEED_OPST_WAIT: + default: + cpu_relax(); + continue; + } + } while (--retry); + + return false; +} + +static inline size_t __must_check arch_get_random_longs(unsigned long *v, size_t max_longs) +{ + return 0; +} + +static inline size_t __must_check arch_get_random_seed_longs(unsigned long *v, size_t max_longs) +{ + if (!max_longs) + return 0; + + /* + * If Zkr is supported and csr_seed_long succeeds, we return one long + * worth of entropy. + */ + if (riscv_has_extension_likely(RISCV_ISA_EXT_ZKR) && csr_seed_long(v)) + return 1; + + return 0; +} + +#endif /* ASM_RISCV_ARCHRANDOM_H */ diff --git a/arch/riscv/include/asm/asm-extable.h b/arch/riscv/include/asm/asm-extable.h index 00a96e7a9664..0c8bfd54fc4e 100644 --- a/arch/riscv/include/asm/asm-extable.h +++ b/arch/riscv/include/asm/asm-extable.h @@ -6,6 +6,7 @@ #define EX_TYPE_FIXUP 1 #define EX_TYPE_BPF 2 #define EX_TYPE_UACCESS_ERR_ZERO 3 +#define EX_TYPE_LOAD_UNALIGNED_ZEROPAD 4 #ifdef CONFIG_MMU @@ -47,6 +48,11 @@ #define EX_DATA_REG_ZERO_SHIFT 5 #define EX_DATA_REG_ZERO GENMASK(9, 5) +#define EX_DATA_REG_DATA_SHIFT 0 +#define EX_DATA_REG_DATA GENMASK(4, 0) +#define EX_DATA_REG_ADDR_SHIFT 5 +#define EX_DATA_REG_ADDR GENMASK(9, 5) + #define EX_DATA_REG(reg, gpr) \ "((.L__gpr_num_" #gpr ") << " __stringify(EX_DATA_REG_##reg##_SHIFT) ")" @@ -62,6 +68,15 @@ #define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err) \ _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero) +#define _ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(insn, fixup, data, addr) \ + __DEFINE_ASM_GPR_NUMS \ + __ASM_EXTABLE_RAW(#insn, #fixup, \ + __stringify(EX_TYPE_LOAD_UNALIGNED_ZEROPAD), \ + "(" \ + EX_DATA_REG(DATA, data) " | " \ + EX_DATA_REG(ADDR, addr) \ + ")") + #endif /* __ASSEMBLY__ */ #else /* CONFIG_MMU */ diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h index 36b955c762ba..cd627ec289f1 100644 --- a/arch/riscv/include/asm/asm-prototypes.h +++ b/arch/riscv/include/asm/asm-prototypes.h @@ -9,6 +9,33 @@ long long __lshrti3(long long a, int b); long long __ashrti3(long long a, int b); long long __ashlti3(long long a, int b); +#ifdef CONFIG_RISCV_ISA_V + +#ifdef CONFIG_MMU +asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n); +#endif /* CONFIG_MMU */ + +void xor_regs_2_(unsigned long bytes, unsigned long *__restrict p1, + const unsigned long *__restrict p2); +void xor_regs_3_(unsigned long bytes, unsigned long *__restrict p1, + const unsigned long *__restrict p2, + const unsigned long *__restrict p3); +void xor_regs_4_(unsigned long bytes, unsigned long *__restrict p1, + const unsigned long *__restrict p2, + const unsigned long *__restrict p3, + const unsigned long *__restrict p4); +void xor_regs_5_(unsigned long bytes, unsigned long *__restrict p1, + const unsigned long *__restrict p2, + const unsigned long *__restrict p3, + const unsigned long *__restrict p4, + const unsigned long *__restrict p5); + +#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE +asmlinkage void riscv_v_context_nesting_start(struct pt_regs *regs); +asmlinkage void riscv_v_context_nesting_end(struct pt_regs *regs); +#endif /* CONFIG_RISCV_ISA_V_PREEMPTIVE */ + +#endif /* CONFIG_RISCV_ISA_V */ #define DECLARE_DO_ERROR_INFO(name) asmlinkage void name(struct pt_regs *regs) diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h index 224b4dc02b50..9ffc35537024 100644 --- a/arch/riscv/include/asm/bitops.h +++ b/arch/riscv/include/asm/bitops.h @@ -271,7 +271,9 @@ legacy: #include <asm-generic/bitops/fls64.h> #include <asm-generic/bitops/sched.h> -#include <asm-generic/bitops/hweight.h> +#include <asm/arch_hweight.h> + +#include <asm-generic/bitops/const_hweight.h> #if (BITS_PER_LONG == 64) #define __AMO(op) "amo" #op ".d" diff --git a/arch/riscv/include/asm/checksum.h b/arch/riscv/include/asm/checksum.h new file mode 100644 index 000000000000..a5b60b54b101 --- /dev/null +++ b/arch/riscv/include/asm/checksum.h @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Checksum routines + * + * Copyright (C) 2023 Rivos Inc. + */ +#ifndef __ASM_RISCV_CHECKSUM_H +#define __ASM_RISCV_CHECKSUM_H + +#include <linux/in6.h> +#include <linux/uaccess.h> + +#define ip_fast_csum ip_fast_csum + +extern unsigned int do_csum(const unsigned char *buff, int len); +#define do_csum do_csum + +/* Default version is sufficient for 32 bit */ +#ifndef CONFIG_32BIT +#define _HAVE_ARCH_IPV6_CSUM +__sum16 csum_ipv6_magic(const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u32 len, __u8 proto, __wsum sum); +#endif + +/* Define riscv versions of functions before importing asm-generic/checksum.h */ +#include <asm-generic/checksum.h> + +/** + * Quickly compute an IP checksum with the assumption that IPv4 headers will + * always be in multiples of 32-bits, and have an ihl of at least 5. + * + * @ihl: the number of 32 bit segments and must be greater than or equal to 5. + * @iph: assumed to be word aligned given that NET_IP_ALIGN is set to 2 on + * riscv, defining IP headers to be aligned. + */ +static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) +{ + unsigned long csum = 0; + int pos = 0; + + do { + csum += ((const unsigned int *)iph)[pos]; + if (IS_ENABLED(CONFIG_32BIT)) + csum += csum < ((const unsigned int *)iph)[pos]; + } while (++pos < ihl); + + /* + * ZBB only saves three instructions on 32-bit and five on 64-bit so not + * worth checking if supported without Alternatives. + */ + if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && + IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { + unsigned long fold_temp; + + asm_volatile_goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0, + RISCV_ISA_EXT_ZBB, 1) + : + : + : + : no_zbb); + + if (IS_ENABLED(CONFIG_32BIT)) { + asm(".option push \n\ + .option arch,+zbb \n\ + not %[fold_temp], %[csum] \n\ + rori %[csum], %[csum], 16 \n\ + sub %[csum], %[fold_temp], %[csum] \n\ + .option pop" + : [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)); + } else { + asm(".option push \n\ + .option arch,+zbb \n\ + rori %[fold_temp], %[csum], 32 \n\ + add %[csum], %[fold_temp], %[csum] \n\ + srli %[csum], %[csum], 32 \n\ + not %[fold_temp], %[csum] \n\ + roriw %[csum], %[csum], 16 \n\ + subw %[csum], %[fold_temp], %[csum] \n\ + .option pop" + : [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)); + } + return (__force __sum16)(csum >> 16); + } +no_zbb: +#ifndef CONFIG_32BIT + csum += ror64(csum, 32); + csum >>= 32; +#endif + return csum_fold((__force __wsum)csum); +} + +#endif /* __ASM_RISCV_CHECKSUM_H */ diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h index fbdde8b8a47e..5a626ed2c47a 100644 --- a/arch/riscv/include/asm/cpufeature.h +++ b/arch/riscv/include/asm/cpufeature.h @@ -135,4 +135,6 @@ static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsi return __riscv_isa_extension_available(hart_isa[cpu].isa, ext); } +DECLARE_STATIC_KEY_FALSE(fast_misaligned_access_speed_key); + #endif diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h index 306a19a5509c..510014051f5d 100644 --- a/arch/riscv/include/asm/csr.h +++ b/arch/riscv/include/asm/csr.h @@ -411,6 +411,15 @@ #define CSR_VTYPE 0xc21 #define CSR_VLENB 0xc22 +/* Scalar Crypto Extension - Entropy */ +#define CSR_SEED 0x015 +#define SEED_OPST_MASK _AC(0xC0000000, UL) +#define SEED_OPST_BIST _AC(0x00000000, UL) +#define SEED_OPST_WAIT _AC(0x40000000, UL) +#define SEED_OPST_ES16 _AC(0x80000000, UL) +#define SEED_OPST_DEAD _AC(0xC0000000, UL) +#define SEED_ENTROPY_MASK _AC(0xFFFF, UL) + #ifdef CONFIG_RISCV_M_MODE # define CSR_STATUS CSR_MSTATUS # define CSR_IE CSR_MIE diff --git a/arch/riscv/include/asm/entry-common.h b/arch/riscv/include/asm/entry-common.h index 7ab5e34318c8..2293e535f865 100644 --- a/arch/riscv/include/asm/entry-common.h +++ b/arch/riscv/include/asm/entry-common.h @@ -4,6 +4,23 @@ #define _ASM_RISCV_ENTRY_COMMON_H #include <asm/stacktrace.h> +#include <asm/thread_info.h> +#include <asm/vector.h> + +static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, + unsigned long ti_work) +{ + if (ti_work & _TIF_RISCV_V_DEFER_RESTORE) { + clear_thread_flag(TIF_RISCV_V_DEFER_RESTORE); + /* + * We are already called with irq disabled, so go without + * keeping track of riscv_v_flags. + */ + riscv_v_vstate_restore(¤t->thread.vstate, regs); + } +} + +#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare void handle_page_fault(struct pt_regs *regs); void handle_break(struct pt_regs *regs); diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h index 83ed25e43553..ea33288f8a25 100644 --- a/arch/riscv/include/asm/errata_list.h +++ b/arch/riscv/include/asm/errata_list.h @@ -24,9 +24,8 @@ #ifdef CONFIG_ERRATA_THEAD #define ERRATA_THEAD_PBMT 0 -#define ERRATA_THEAD_CMO 1 -#define ERRATA_THEAD_PMU 2 -#define ERRATA_THEAD_NUMBER 3 +#define ERRATA_THEAD_PMU 1 +#define ERRATA_THEAD_NUMBER 2 #endif #ifdef __ASSEMBLY__ @@ -94,54 +93,17 @@ asm volatile(ALTERNATIVE( \ #define ALT_THEAD_PMA(_val) #endif -/* - * th.dcache.ipa rs1 (invalidate, physical address) - * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | - * 0000001 01010 rs1 000 00000 0001011 - * th.dache.iva rs1 (invalida, virtual address) - * 0000001 00110 rs1 000 00000 0001011 - * - * th.dcache.cpa rs1 (clean, physical address) - * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | - * 0000001 01001 rs1 000 00000 0001011 - * th.dcache.cva rs1 (clean, virtual address) - * 0000001 00101 rs1 000 00000 0001011 - * - * th.dcache.cipa rs1 (clean then invalidate, physical address) - * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | - * 0000001 01011 rs1 000 00000 0001011 - * th.dcache.civa rs1 (... virtual address) - * 0000001 00111 rs1 000 00000 0001011 - * - * th.sync.s (make sure all cache operations finished) - * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | - * 0000000 11001 00000 000 00000 0001011 - */ -#define THEAD_INVAL_A0 ".long 0x0265000b" -#define THEAD_CLEAN_A0 ".long 0x0255000b" -#define THEAD_FLUSH_A0 ".long 0x0275000b" -#define THEAD_SYNC_S ".long 0x0190000b" - #define ALT_CMO_OP(_op, _start, _size, _cachesize) \ -asm volatile(ALTERNATIVE_2( \ - __nops(6), \ +asm volatile(ALTERNATIVE( \ + __nops(5), \ "mv a0, %1\n\t" \ "j 2f\n\t" \ "3:\n\t" \ CBO_##_op(a0) \ "add a0, a0, %0\n\t" \ "2:\n\t" \ - "bltu a0, %2, 3b\n\t" \ - "nop", 0, RISCV_ISA_EXT_ZICBOM, CONFIG_RISCV_ISA_ZICBOM, \ - "mv a0, %1\n\t" \ - "j 2f\n\t" \ - "3:\n\t" \ - THEAD_##_op##_A0 "\n\t" \ - "add a0, a0, %0\n\t" \ - "2:\n\t" \ - "bltu a0, %2, 3b\n\t" \ - THEAD_SYNC_S, THEAD_VENDOR_ID, \ - ERRATA_THEAD_CMO, CONFIG_ERRATA_THEAD_CMO) \ + "bltu a0, %2, 3b\n\t", \ + 0, RISCV_ISA_EXT_ZICBOM, CONFIG_RISCV_ISA_ZICBOM) \ : : "r"(_cachesize), \ "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)), \ "r"((unsigned long)(_start) + (_size)) \ diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h index 2b2f5df7ef2c..329172122952 100644 --- a/arch/riscv/include/asm/ftrace.h +++ b/arch/riscv/include/asm/ftrace.h @@ -128,7 +128,23 @@ do { \ struct dyn_ftrace; int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec); #define ftrace_init_nop ftrace_init_nop -#endif + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +struct ftrace_ops; +struct ftrace_regs; +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs); +#define ftrace_graph_func ftrace_graph_func + +static inline void __arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr) +{ + regs->t1 = addr; +} +#define arch_ftrace_set_direct_caller(fregs, addr) \ + __arch_ftrace_set_direct_caller(&(fregs)->regs, addr) +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ + +#endif /* __ASSEMBLY__ */ #endif /* CONFIG_DYNAMIC_FTRACE */ diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index e3ffef1c6119..0c94260b5d0c 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -865,7 +865,7 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) #define TASK_SIZE_MIN (PGDIR_SIZE_L3 * PTRS_PER_PGD / 2) #ifdef CONFIG_COMPAT -#define TASK_SIZE_32 (_AC(0x80000000, UL) - PAGE_SIZE) +#define TASK_SIZE_32 (_AC(0x80000000, UL)) #define TASK_SIZE (test_thread_flag(TIF_32BIT) ? \ TASK_SIZE_32 : TASK_SIZE_64) #else diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index f19f861cda54..a8509cc31ab2 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -16,7 +16,7 @@ #ifdef CONFIG_64BIT #define DEFAULT_MAP_WINDOW (UL(1) << (MMAP_VA_BITS - 1)) -#define STACK_TOP_MAX TASK_SIZE_64 +#define STACK_TOP_MAX TASK_SIZE #define arch_get_mmap_end(addr, len, flags) \ ({ \ @@ -73,6 +73,43 @@ struct task_struct; struct pt_regs; +/* + * We use a flag to track in-kernel Vector context. Currently the flag has the + * following meaning: + * + * - bit 0: indicates whether the in-kernel Vector context is active. The + * activation of this state disables the preemption. On a non-RT kernel, it + * also disable bh. + * - bits 8: is used for tracking preemptible kernel-mode Vector, when + * RISCV_ISA_V_PREEMPTIVE is enabled. Calling kernel_vector_begin() does not + * disable the preemption if the thread's kernel_vstate.datap is allocated. + * Instead, the kernel set this bit field. Then the trap entry/exit code + * knows if we are entering/exiting the context that owns preempt_v. + * - 0: the task is not using preempt_v + * - 1: the task is actively using preempt_v. But whether does the task own + * the preempt_v context is decided by bits in RISCV_V_CTX_DEPTH_MASK. + * - bit 16-23 are RISCV_V_CTX_DEPTH_MASK, used by context tracking routine + * when preempt_v starts: + * - 0: the task is actively using, and own preempt_v context. + * - non-zero: the task was using preempt_v, but then took a trap within. + * Thus, the task does not own preempt_v. Any use of Vector will have to + * save preempt_v, if dirty, and fallback to non-preemptible kernel-mode + * Vector. + * - bit 30: The in-kernel preempt_v context is saved, and requries to be + * restored when returning to the context that owns the preempt_v. + * - bit 31: The in-kernel preempt_v context is dirty, as signaled by the + * trap entry code. Any context switches out-of current task need to save + * it to the task's in-kernel V context. Also, any traps nesting on-top-of + * preempt_v requesting to use V needs a save. + */ +#define RISCV_V_CTX_DEPTH_MASK 0x00ff0000 + +#define RISCV_V_CTX_UNIT_DEPTH 0x00010000 +#define RISCV_KERNEL_MODE_V 0x00000001 +#define RISCV_PREEMPT_V 0x00000100 +#define RISCV_PREEMPT_V_DIRTY 0x80000000 +#define RISCV_PREEMPT_V_NEED_RESTORE 0x40000000 + /* CPU-specific state of a task */ struct thread_struct { /* Callee-saved registers */ @@ -81,9 +118,11 @@ struct thread_struct { unsigned long s[12]; /* s[0]: frame pointer */ struct __riscv_d_ext_state fstate; unsigned long bad_cause; - unsigned long vstate_ctrl; + u32 riscv_v_flags; + u32 vstate_ctrl; struct __riscv_v_ext_state vstate; unsigned long align_ctl; + struct __riscv_v_ext_state kernel_vstate; }; /* Whitelist the fstate from the task_struct for hardened usercopy */ diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index b6f898c56940..6e68f8dff76b 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -29,6 +29,7 @@ enum sbi_ext_id { SBI_EXT_RFENCE = 0x52464E43, SBI_EXT_HSM = 0x48534D, SBI_EXT_SRST = 0x53525354, + SBI_EXT_SUSP = 0x53555350, SBI_EXT_PMU = 0x504D55, SBI_EXT_DBCN = 0x4442434E, SBI_EXT_STA = 0x535441, @@ -115,6 +116,14 @@ enum sbi_srst_reset_reason { SBI_SRST_RESET_REASON_SYS_FAILURE, }; +enum sbi_ext_susp_fid { + SBI_EXT_SUSP_SYSTEM_SUSPEND = 0, +}; + +enum sbi_ext_susp_sleep_type { + SBI_SUSP_SLEEP_TYPE_SUSPEND_TO_RAM = 0, +}; + enum sbi_ext_pmu_fid { SBI_EXT_PMU_NUM_COUNTERS = 0, SBI_EXT_PMU_COUNTER_GET_INFO, @@ -288,8 +297,13 @@ struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0, unsigned long arg3, unsigned long arg4, unsigned long arg5); +#ifdef CONFIG_RISCV_SBI_V01 void sbi_console_putchar(int ch); int sbi_console_getchar(void); +#else +static inline void sbi_console_putchar(int ch) { } +static inline int sbi_console_getchar(void) { return -ENOENT; } +#endif long sbi_get_mvendorid(void); long sbi_get_marchid(void); long sbi_get_mimpid(void); @@ -346,6 +360,11 @@ static inline unsigned long sbi_mk_version(unsigned long major, } int sbi_err_map_linux_errno(int err); + +extern bool sbi_debug_console_available; +int sbi_debug_console_write(const char *bytes, unsigned int num_bytes); +int sbi_debug_console_read(char *bytes, unsigned int num_bytes); + #else /* CONFIG_RISCV_SBI */ static inline int sbi_remote_fence_i(const struct cpumask *cpu_mask) { return -1; } static inline void sbi_init(void) {} diff --git a/arch/riscv/include/asm/simd.h b/arch/riscv/include/asm/simd.h new file mode 100644 index 000000000000..54efbf523d49 --- /dev/null +++ b/arch/riscv/include/asm/simd.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org> + * Copyright (C) 2023 SiFive + */ + +#ifndef __ASM_SIMD_H +#define __ASM_SIMD_H + +#include <linux/compiler.h> +#include <linux/irqflags.h> +#include <linux/percpu.h> +#include <linux/preempt.h> +#include <linux/types.h> +#include <linux/thread_info.h> + +#include <asm/vector.h> + +#ifdef CONFIG_RISCV_ISA_V +/* + * may_use_simd - whether it is allowable at this time to issue vector + * instructions or access the vector register file + * + * Callers must not assume that the result remains true beyond the next + * preempt_enable() or return from softirq context. + */ +static __must_check inline bool may_use_simd(void) +{ + /* + * RISCV_KERNEL_MODE_V is only set while preemption is disabled, + * and is clear whenever preemption is enabled. + */ + if (in_hardirq() || in_nmi()) + return false; + + /* + * Nesting is acheived in preempt_v by spreading the control for + * preemptible and non-preemptible kernel-mode Vector into two fields. + * Always try to match with prempt_v if kernel V-context exists. Then, + * fallback to check non preempt_v if nesting happens, or if the config + * is not set. + */ + if (IS_ENABLED(CONFIG_RISCV_ISA_V_PREEMPTIVE) && current->thread.kernel_vstate.datap) { + if (!riscv_preempt_v_started(current)) + return true; + } + /* + * Non-preemptible kernel-mode Vector temporarily disables bh. So we + * must not return true on irq_disabled(). Otherwise we would fail the + * lockdep check calling local_bh_enable() + */ + return !irqs_disabled() && !(riscv_v_flags() & RISCV_KERNEL_MODE_V); +} + +#else /* ! CONFIG_RISCV_ISA_V */ + +static __must_check inline bool may_use_simd(void) +{ + return false; +} + +#endif /* ! CONFIG_RISCV_ISA_V */ + +#endif diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h index f90d8e42f3c7..7efdb0584d47 100644 --- a/arch/riscv/include/asm/switch_to.h +++ b/arch/riscv/include/asm/switch_to.h @@ -53,8 +53,7 @@ static inline void __switch_to_fpu(struct task_struct *prev, struct pt_regs *regs; regs = task_pt_regs(prev); - if (unlikely(regs->status & SR_SD)) - fstate_save(prev, regs); + fstate_save(prev, regs); fstate_restore(next, task_pt_regs(next)); } diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h index 4856697c5f25..5d473343634b 100644 --- a/arch/riscv/include/asm/thread_info.h +++ b/arch/riscv/include/asm/thread_info.h @@ -102,12 +102,14 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); #define TIF_NOTIFY_SIGNAL 9 /* signal notifications exist */ #define TIF_UPROBE 10 /* uprobe breakpoint or singlestep */ #define TIF_32BIT 11 /* compat-mode 32bit process */ +#define TIF_RISCV_V_DEFER_RESTORE 12 /* restore Vector before returing to user */ #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) #define _TIF_UPROBE (1 << TIF_UPROBE) +#define _TIF_RISCV_V_DEFER_RESTORE (1 << TIF_RISCV_V_DEFER_RESTORE) #define _TIF_WORK_MASK \ (_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED | \ diff --git a/arch/riscv/include/asm/tlbbatch.h b/arch/riscv/include/asm/tlbbatch.h new file mode 100644 index 000000000000..46014f70b9da --- /dev/null +++ b/arch/riscv/include/asm/tlbbatch.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 Rivos Inc. + */ + +#ifndef _ASM_RISCV_TLBBATCH_H +#define _ASM_RISCV_TLBBATCH_H + +#include <linux/cpumask.h> + +struct arch_tlbflush_unmap_batch { + struct cpumask cpumask; +}; + +#endif /* _ASM_RISCV_TLBBATCH_H */ diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index a60416bbe190..928f096dca21 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -47,6 +47,14 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end); void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); #endif + +bool arch_tlbbatch_should_defer(struct mm_struct *mm); +void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch, + struct mm_struct *mm, + unsigned long uaddr); +void arch_flush_tlb_batched_pending(struct mm_struct *mm); +void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch); + #else /* CONFIG_SMP && CONFIG_MMU */ #define flush_tlb_all() local_flush_tlb_all() diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h index 87aaef656257..0cd6f0a027d1 100644 --- a/arch/riscv/include/asm/vector.h +++ b/arch/riscv/include/asm/vector.h @@ -22,6 +22,18 @@ extern unsigned long riscv_v_vsize; int riscv_v_setup_vsize(void); bool riscv_v_first_use_handler(struct pt_regs *regs); +void kernel_vector_begin(void); +void kernel_vector_end(void); +void get_cpu_vector_context(void); +void put_cpu_vector_context(void); +void riscv_v_thread_free(struct task_struct *tsk); +void __init riscv_v_setup_ctx_cache(void); +void riscv_v_thread_alloc(struct task_struct *tsk); + +static inline u32 riscv_v_flags(void) +{ + return READ_ONCE(current->thread.riscv_v_flags); +} static __always_inline bool has_vector(void) { @@ -162,36 +174,89 @@ static inline void riscv_v_vstate_discard(struct pt_regs *regs) __riscv_v_vstate_dirty(regs); } -static inline void riscv_v_vstate_save(struct task_struct *task, +static inline void riscv_v_vstate_save(struct __riscv_v_ext_state *vstate, struct pt_regs *regs) { if ((regs->status & SR_VS) == SR_VS_DIRTY) { - struct __riscv_v_ext_state *vstate = &task->thread.vstate; - __riscv_v_vstate_save(vstate, vstate->datap); __riscv_v_vstate_clean(regs); } } -static inline void riscv_v_vstate_restore(struct task_struct *task, +static inline void riscv_v_vstate_restore(struct __riscv_v_ext_state *vstate, struct pt_regs *regs) { if ((regs->status & SR_VS) != SR_VS_OFF) { - struct __riscv_v_ext_state *vstate = &task->thread.vstate; - __riscv_v_vstate_restore(vstate, vstate->datap); __riscv_v_vstate_clean(regs); } } +static inline void riscv_v_vstate_set_restore(struct task_struct *task, + struct pt_regs *regs) +{ + if ((regs->status & SR_VS) != SR_VS_OFF) { + set_tsk_thread_flag(task, TIF_RISCV_V_DEFER_RESTORE); + riscv_v_vstate_on(regs); + } +} + +#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE +static inline bool riscv_preempt_v_dirty(struct task_struct *task) +{ + return !!(task->thread.riscv_v_flags & RISCV_PREEMPT_V_DIRTY); +} + +static inline bool riscv_preempt_v_restore(struct task_struct *task) +{ + return !!(task->thread.riscv_v_flags & RISCV_PREEMPT_V_NEED_RESTORE); +} + +static inline void riscv_preempt_v_clear_dirty(struct task_struct *task) +{ + barrier(); + task->thread.riscv_v_flags &= ~RISCV_PREEMPT_V_DIRTY; +} + +static inline void riscv_preempt_v_set_restore(struct task_struct *task) +{ + barrier(); + task->thread.riscv_v_flags |= RISCV_PREEMPT_V_NEED_RESTORE; +} + +static inline bool riscv_preempt_v_started(struct task_struct *task) +{ + return !!(task->thread.riscv_v_flags & RISCV_PREEMPT_V); +} + +#else /* !CONFIG_RISCV_ISA_V_PREEMPTIVE */ +static inline bool riscv_preempt_v_dirty(struct task_struct *task) { return false; } +static inline bool riscv_preempt_v_restore(struct task_struct *task) { return false; } +static inline bool riscv_preempt_v_started(struct task_struct *task) { return false; } +#define riscv_preempt_v_clear_dirty(tsk) do {} while (0) +#define riscv_preempt_v_set_restore(tsk) do {} while (0) +#endif /* CONFIG_RISCV_ISA_V_PREEMPTIVE */ + static inline void __switch_to_vector(struct task_struct *prev, struct task_struct *next) { struct pt_regs *regs; - regs = task_pt_regs(prev); - riscv_v_vstate_save(prev, regs); - riscv_v_vstate_restore(next, task_pt_regs(next)); + if (riscv_preempt_v_started(prev)) { + if (riscv_preempt_v_dirty(prev)) { + __riscv_v_vstate_save(&prev->thread.kernel_vstate, + prev->thread.kernel_vstate.datap); + riscv_preempt_v_clear_dirty(prev); + } + } else { + regs = task_pt_regs(prev); + riscv_v_vstate_save(&prev->thread.vstate, regs); + } + + if (riscv_preempt_v_started(next)) + riscv_preempt_v_set_restore(next); + else + riscv_v_vstate_set_restore(next, task_pt_regs(next)); } void riscv_v_vstate_ctrl_init(struct task_struct *tsk); @@ -208,11 +273,14 @@ static inline bool riscv_v_vstate_query(struct pt_regs *regs) { return false; } static inline bool riscv_v_vstate_ctrl_user_allowed(void) { return false; } #define riscv_v_vsize (0) #define riscv_v_vstate_discard(regs) do {} while (0) -#define riscv_v_vstate_save(task, regs) do {} while (0) -#define riscv_v_vstate_restore(task, regs) do {} while (0) +#define riscv_v_vstate_save(vstate, regs) do {} while (0) +#define riscv_v_vstate_restore(vstate, regs) do {} while (0) #define __switch_to_vector(__prev, __next) do {} while (0) #define riscv_v_vstate_off(regs) do {} while (0) #define riscv_v_vstate_on(regs) do {} while (0) +#define riscv_v_thread_free(tsk) do {} while (0) +#define riscv_v_setup_ctx_cache() do {} while (0) +#define riscv_v_thread_alloc(tsk) do {} while (0) #endif /* CONFIG_RISCV_ISA_V */ diff --git a/arch/riscv/include/asm/word-at-a-time.h b/arch/riscv/include/asm/word-at-a-time.h index 7c086ac6ecd4..f3f031e34191 100644 --- a/arch/riscv/include/asm/word-at-a-time.h +++ b/arch/riscv/include/asm/word-at-a-time.h @@ -9,6 +9,7 @@ #define _ASM_RISCV_WORD_AT_A_TIME_H +#include <asm/asm-extable.h> #include <linux/kernel.h> struct word_at_a_time { @@ -45,4 +46,30 @@ static inline unsigned long find_zero(unsigned long mask) /* The mask we created is directly usable as a bytemask */ #define zero_bytemask(mask) (mask) +#ifdef CONFIG_DCACHE_WORD_ACCESS + +/* + * Load an unaligned word from kernel space. + * + * In the (very unlikely) case of the word being a page-crosser + * and the next page not being mapped, take the exception and + * return zeroes in the non-existing part. + */ +static inline unsigned long load_unaligned_zeropad(const void *addr) +{ + unsigned long ret; + + /* Load word from unaligned pointer addr */ + asm( + "1: " REG_L " %0, %2\n" + "2:\n" + _ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(1b, 2b, %0, %1) + : "=&r" (ret) + : "r" (addr), "m" (*(unsigned long *)addr)); + + return ret; +} + +#endif /* CONFIG_DCACHE_WORD_ACCESS */ + #endif /* _ASM_RISCV_WORD_AT_A_TIME_H */ diff --git a/arch/riscv/include/asm/xor.h b/arch/riscv/include/asm/xor.h new file mode 100644 index 000000000000..96011861e46b --- /dev/null +++ b/arch/riscv/include/asm/xor.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2021 SiFive + */ + +#include <linux/hardirq.h> +#include <asm-generic/xor.h> +#ifdef CONFIG_RISCV_ISA_V +#include <asm/vector.h> +#include <asm/switch_to.h> +#include <asm/asm-prototypes.h> + +static void xor_vector_2(unsigned long bytes, unsigned long *__restrict p1, + const unsigned long *__restrict p2) +{ + kernel_vector_begin(); + xor_regs_2_(bytes, p1, p2); + kernel_vector_end(); +} + +static void xor_vector_3(unsigned long bytes, unsigned long *__restrict p1, + const unsigned long *__restrict p2, + const unsigned long *__restrict p3) +{ + kernel_vector_begin(); + xor_regs_3_(bytes, p1, p2, p3); + kernel_vector_end(); +} + +static void xor_vector_4(unsigned long bytes, unsigned long *__restrict p1, + const unsigned long *__restrict p2, + const unsigned long *__restrict p3, + const unsigned long *__restrict p4) +{ + kernel_vector_begin(); + xor_regs_4_(bytes, p1, p2, p3, p4); + kernel_vector_end(); +} + +static void xor_vector_5(unsigned long bytes, unsigned long *__restrict p1, + const unsigned long *__restrict p2, + const unsigned long *__restrict p3, + const unsigned long *__restrict p4, + const unsigned long *__restrict p5) +{ + kernel_vector_begin(); + xor_regs_5_(bytes, p1, p2, p3, p4, p5); + kernel_vector_end(); +} + +static struct xor_block_template xor_block_rvv = { + .name = "rvv", + .do_2 = xor_vector_2, + .do_3 = xor_vector_3, + .do_4 = xor_vector_4, + .do_5 = xor_vector_5 +}; + +#undef XOR_TRY_TEMPLATES +#define XOR_TRY_TEMPLATES \ + do { \ + xor_speed(&xor_block_8regs); \ + xor_speed(&xor_block_32regs); \ + if (has_vector()) { \ + xor_speed(&xor_block_rvv);\ + } \ + } while (0) +#endif diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index c92c623b311e..f71910718053 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -64,6 +64,7 @@ obj-$(CONFIG_MMU) += vdso.o vdso/ obj-$(CONFIG_RISCV_MISALIGNED) += traps_misaligned.o obj-$(CONFIG_FPU) += fpu.o obj-$(CONFIG_RISCV_ISA_V) += vector.o +obj-$(CONFIG_RISCV_ISA_V) += kernel_mode_vector.o obj-$(CONFIG_SMP) += smpboot.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_SMP) += cpu_ops.o diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index e32591e9da90..89920f84d0a3 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -8,8 +8,10 @@ #include <linux/acpi.h> #include <linux/bitmap.h> +#include <linux/cpu.h> #include <linux/cpuhotplug.h> #include <linux/ctype.h> +#include <linux/jump_label.h> #include <linux/log2.h> #include <linux/memory.h> #include <linux/module.h> @@ -44,6 +46,8 @@ struct riscv_isainfo hart_isa[NR_CPUS]; /* Performance information */ DEFINE_PER_CPU(long, misaligned_access_speed); +static cpumask_t fast_misaligned_access; + /** * riscv_isa_extension_base() - Get base extension word * @@ -784,6 +788,16 @@ static int check_unaligned_access(void *param) (speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow"); per_cpu(misaligned_access_speed, cpu) = speed; + + /* + * Set the value of fast_misaligned_access of a CPU. These operations + * are atomic to avoid race conditions. + */ + if (speed == RISCV_HWPROBE_MISALIGNED_FAST) + cpumask_set_cpu(cpu, &fast_misaligned_access); + else + cpumask_clear_cpu(cpu, &fast_misaligned_access); + return 0; } @@ -796,13 +810,69 @@ static void check_unaligned_access_nonboot_cpu(void *param) check_unaligned_access(pages[cpu]); } +DEFINE_STATIC_KEY_FALSE(fast_misaligned_access_speed_key); + +static void modify_unaligned_access_branches(cpumask_t *mask, int weight) +{ + if (cpumask_weight(mask) == weight) + static_branch_enable_cpuslocked(&fast_misaligned_access_speed_key); + else + static_branch_disable_cpuslocked(&fast_misaligned_access_speed_key); +} + +static void set_unaligned_access_static_branches_except_cpu(int cpu) +{ + /* + * Same as set_unaligned_access_static_branches, except excludes the + * given CPU from the result. When a CPU is hotplugged into an offline + * state, this function is called before the CPU is set to offline in + * the cpumask, and thus the CPU needs to be explicitly excluded. + */ + + cpumask_t fast_except_me; + + cpumask_and(&fast_except_me, &fast_misaligned_access, cpu_online_mask); + cpumask_clear_cpu(cpu, &fast_except_me); + + modify_unaligned_access_branches(&fast_except_me, num_online_cpus() - 1); +} + +static void set_unaligned_access_static_branches(void) +{ + /* + * This will be called after check_unaligned_access_all_cpus so the + * result of unaligned access speed for all CPUs will be available. + * + * To avoid the number of online cpus changing between reading + * cpu_online_mask and calling num_online_cpus, cpus_read_lock must be + * held before calling this function. + */ + + cpumask_t fast_and_online; + + cpumask_and(&fast_and_online, &fast_misaligned_access, cpu_online_mask); + + modify_unaligned_access_branches(&fast_and_online, num_online_cpus()); +} + +static int lock_and_set_unaligned_access_static_branch(void) +{ + cpus_read_lock(); + set_unaligned_access_static_branches(); + cpus_read_unlock(); + + return 0; +} + +arch_initcall_sync(lock_and_set_unaligned_access_static_branch); + static int riscv_online_cpu(unsigned int cpu) { static struct page *buf; /* We are already set since the last check */ if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN) - return 0; + goto exit; buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); if (!buf) { @@ -812,6 +882,17 @@ static int riscv_online_cpu(unsigned int cpu) check_unaligned_access(buf); __free_pages(buf, MISALIGNED_BUFFER_ORDER); + +exit: + set_unaligned_access_static_branches(); + + return 0; +} + +static int riscv_offline_cpu(unsigned int cpu) +{ + set_unaligned_access_static_branches_except_cpu(cpu); + return 0; } @@ -846,9 +927,12 @@ static int check_unaligned_access_all_cpus(void) /* Check core 0. */ smp_call_on_cpu(0, check_unaligned_access, bufs[0], true); - /* Setup hotplug callback for any new CPUs that come online. */ + /* + * Setup hotplug callbacks for any new CPUs that come online or go + * offline. + */ cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", - riscv_online_cpu, NULL); + riscv_online_cpu, riscv_offline_cpu); out: unaligned_emulation_finish(); diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 54ca4564a926..9d1a305d5508 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -83,6 +83,10 @@ SYM_CODE_START(handle_exception) /* Load the kernel shadow call stack pointer if coming from userspace */ scs_load_current_if_task_changed s5 +#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE + move a0, sp + call riscv_v_context_nesting_start +#endif move a0, sp /* pt_regs */ la ra, ret_from_exception @@ -138,6 +142,10 @@ SYM_CODE_START_NOALIGN(ret_from_exception) */ csrw CSR_SCRATCH, tp 1: +#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE + move a0, sp + call riscv_v_context_nesting_end +#endif REG_L a0, PT_STATUS(sp) /* * The current load reservation is effectively part of the processor's diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index 03a6434a8cdd..f5aa24d9e1c1 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -178,32 +178,28 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, } #ifdef CONFIG_DYNAMIC_FTRACE +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs) +{ + struct pt_regs *regs = arch_ftrace_get_regs(fregs); + unsigned long *parent = (unsigned long *)®s->ra; + + prepare_ftrace_return(parent, ip, frame_pointer(regs)); +} +#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ extern void ftrace_graph_call(void); -extern void ftrace_graph_regs_call(void); int ftrace_enable_ftrace_graph_caller(void) { - int ret; - - ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call, - (unsigned long)&prepare_ftrace_return, true, true); - if (ret) - return ret; - - return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call, + return __ftrace_modify_call((unsigned long)&ftrace_graph_call, (unsigned long)&prepare_ftrace_return, true, true); } int ftrace_disable_ftrace_graph_caller(void) { - int ret; - - ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call, - (unsigned long)&prepare_ftrace_return, false, true); - if (ret) - return ret; - - return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call, + return __ftrace_modify_call((unsigned long)&ftrace_graph_call, (unsigned long)&prepare_ftrace_return, false, true); } +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/riscv/kernel/kernel_mode_vector.c b/arch/riscv/kernel/kernel_mode_vector.c new file mode 100644 index 000000000000..6afe80c7f03a --- /dev/null +++ b/arch/riscv/kernel/kernel_mode_vector.c @@ -0,0 +1,247 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2012 ARM Ltd. + * Author: Catalin Marinas <catalin.marinas@arm.com> + * Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org> + * Copyright (C) 2021 SiFive + */ +#include <linux/compiler.h> +#include <linux/irqflags.h> +#include <linux/percpu.h> +#include <linux/preempt.h> +#include <linux/types.h> + +#include <asm/vector.h> +#include <asm/switch_to.h> +#include <asm/simd.h> +#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE +#include <asm/asm-prototypes.h> +#endif + +static inline void riscv_v_flags_set(u32 flags) +{ + WRITE_ONCE(current->thread.riscv_v_flags, flags); +} + +static inline void riscv_v_start(u32 flags) +{ + int orig; + + orig = riscv_v_flags(); + BUG_ON((orig & flags) != 0); + riscv_v_flags_set(orig | flags); + barrier(); +} + +static inline void riscv_v_stop(u32 flags) +{ + int orig; + + barrier(); + orig = riscv_v_flags(); + BUG_ON((orig & flags) == 0); + riscv_v_flags_set(orig & ~flags); +} + +/* + * Claim ownership of the CPU vector context for use by the calling context. + * + * The caller may freely manipulate the vector context metadata until + * put_cpu_vector_context() is called. + */ +void get_cpu_vector_context(void) +{ + /* + * disable softirqs so it is impossible for softirqs to nest + * get_cpu_vector_context() when kernel is actively using Vector. + */ + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_bh_disable(); + else + preempt_disable(); + + riscv_v_start(RISCV_KERNEL_MODE_V); +} + +/* + * Release the CPU vector context. + * + * Must be called from a context in which get_cpu_vector_context() was + * previously called, with no call to put_cpu_vector_context() in the + * meantime. + */ +void put_cpu_vector_context(void) +{ + riscv_v_stop(RISCV_KERNEL_MODE_V); + + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_bh_enable(); + else + preempt_enable(); +} + +#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE +static __always_inline u32 *riscv_v_flags_ptr(void) +{ + return ¤t->thread.riscv_v_flags; +} + +static inline void riscv_preempt_v_set_dirty(void) +{ + *riscv_v_flags_ptr() |= RISCV_PREEMPT_V_DIRTY; +} + +static inline void riscv_preempt_v_reset_flags(void) +{ + *riscv_v_flags_ptr() &= ~(RISCV_PREEMPT_V_DIRTY | RISCV_PREEMPT_V_NEED_RESTORE); +} + +static inline void riscv_v_ctx_depth_inc(void) +{ + *riscv_v_flags_ptr() += RISCV_V_CTX_UNIT_DEPTH; +} + +static inline void riscv_v_ctx_depth_dec(void) +{ + *riscv_v_flags_ptr() -= RISCV_V_CTX_UNIT_DEPTH; +} + +static inline u32 riscv_v_ctx_get_depth(void) +{ + return *riscv_v_flags_ptr() & RISCV_V_CTX_DEPTH_MASK; +} + +static int riscv_v_stop_kernel_context(void) +{ + if (riscv_v_ctx_get_depth() != 0 || !riscv_preempt_v_started(current)) + return 1; + + riscv_preempt_v_clear_dirty(current); + riscv_v_stop(RISCV_PREEMPT_V); + return 0; +} + +static int riscv_v_start_kernel_context(bool *is_nested) +{ + struct __riscv_v_ext_state *kvstate, *uvstate; + + kvstate = ¤t->thread.kernel_vstate; + if (!kvstate->datap) + return -ENOENT; + + if (riscv_preempt_v_started(current)) { + WARN_ON(riscv_v_ctx_get_depth() == 0); + *is_nested = true; + get_cpu_vector_context(); + if (riscv_preempt_v_dirty(current)) { + __riscv_v_vstate_save(kvstate, kvstate->datap); + riscv_preempt_v_clear_dirty(current); + } + riscv_preempt_v_set_restore(current); + return 0; + } + + /* Transfer the ownership of V from user to kernel, then save */ + riscv_v_start(RISCV_PREEMPT_V | RISCV_PREEMPT_V_DIRTY); + if ((task_pt_regs(current)->status & SR_VS) == SR_VS_DIRTY) { + uvstate = ¤t->thread.vstate; + __riscv_v_vstate_save(uvstate, uvstate->datap); + } + riscv_preempt_v_clear_dirty(current); + return 0; +} + +/* low-level V context handling code, called with irq disabled */ +asmlinkage void riscv_v_context_nesting_start(struct pt_regs *regs) +{ + int depth; + + if (!riscv_preempt_v_started(current)) + return; + + depth = riscv_v_ctx_get_depth(); + if (depth == 0 && (regs->status & SR_VS) == SR_VS_DIRTY) + riscv_preempt_v_set_dirty(); + + riscv_v_ctx_depth_inc(); +} + +asmlinkage void riscv_v_context_nesting_end(struct pt_regs *regs) +{ + struct __riscv_v_ext_state *vstate = ¤t->thread.kernel_vstate; + u32 depth; + + WARN_ON(!irqs_disabled()); + + if (!riscv_preempt_v_started(current)) + return; + + riscv_v_ctx_depth_dec(); + depth = riscv_v_ctx_get_depth(); + if (depth == 0) { + if (riscv_preempt_v_restore(current)) { + __riscv_v_vstate_restore(vstate, vstate->datap); + __riscv_v_vstate_clean(regs); + riscv_preempt_v_reset_flags(); + } + } +} +#else +#define riscv_v_start_kernel_context(nested) (-ENOENT) +#define riscv_v_stop_kernel_context() (-ENOENT) +#endif /* CONFIG_RISCV_ISA_V_PREEMPTIVE */ + +/* + * kernel_vector_begin(): obtain the CPU vector registers for use by the calling + * context + * + * Must not be called unless may_use_simd() returns true. + * Task context in the vector registers is saved back to memory as necessary. + * + * A matching call to kernel_vector_end() must be made before returning from the + * calling context. + * + * The caller may freely use the vector registers until kernel_vector_end() is + * called. + */ +void kernel_vector_begin(void) +{ + bool nested = false; + + if (WARN_ON(!has_vector())) + return; + + BUG_ON(!may_use_simd()); + + if (riscv_v_start_kernel_context(&nested)) { + get_cpu_vector_context(); + riscv_v_vstate_save(¤t->thread.vstate, task_pt_regs(current)); + } + + if (!nested) + riscv_v_vstate_set_restore(current, task_pt_regs(current)); + + riscv_v_enable(); +} +EXPORT_SYMBOL_GPL(kernel_vector_begin); + +/* + * kernel_vector_end(): give the CPU vector registers back to the current task + * + * Must be called from a context in which kernel_vector_begin() was previously + * called, with no call to kernel_vector_end() in the meantime. + * + * The caller must not use the vector registers after this function is called, + * unless kernel_vector_begin() is called again in the meantime. + */ +void kernel_vector_end(void) +{ + if (WARN_ON(!has_vector())) + return; + + riscv_v_disable(); + + if (riscv_v_stop_kernel_context()) + put_cpu_vector_context(); +} +EXPORT_SYMBOL_GPL(kernel_vector_end); diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S index 79dc81223238..b7561288e8da 100644 --- a/arch/riscv/kernel/mcount-dyn.S +++ b/arch/riscv/kernel/mcount-dyn.S @@ -57,31 +57,150 @@ .endm #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS - .macro SAVE_ALL + +/** +* SAVE_ABI_REGS - save regs against the pt_regs struct +* +* @all: tell if saving all the regs +* +* If all is set, all the regs will be saved, otherwise only ABI +* related regs (a0-a7,epc,ra and optional s0) will be saved. +* +* After the stack is established, +* +* 0(sp) stores the PC of the traced function which can be accessed +* by &(fregs)->regs->epc in tracing function. Note that the real +* function entry address should be computed with -FENTRY_RA_OFFSET. +* +* 8(sp) stores the function return address (i.e. parent IP) that +* can be accessed by &(fregs)->regs->ra in tracing function. +* +* The other regs are saved at the respective localtion and accessed +* by the respective pt_regs member. +* +* Here is the layout of stack for your reference. +* +* PT_SIZE_ON_STACK -> +++++++++ +* + ..... + +* + t3-t6 + +* + s2-s11+ +* + a0-a7 + --++++-> ftrace_caller saved +* + s1 + + +* + s0 + --+ +* + t0-t2 + + +* + tp + + +* + gp + + +* + sp + + +* + ra + --+ // parent IP +* sp -> + epc + --+ // PC +* +++++++++ +**/ + .macro SAVE_ABI_REGS, all=0 addi sp, sp, -PT_SIZE_ON_STACK - REG_S t0, PT_EPC(sp) - REG_S x1, PT_RA(sp) - REG_S x2, PT_SP(sp) - REG_S x3, PT_GP(sp) - REG_S x4, PT_TP(sp) - REG_S x5, PT_T0(sp) - save_from_x6_to_x31 + REG_S t0, PT_EPC(sp) + REG_S x1, PT_RA(sp) + + // save the ABI regs + + REG_S x10, PT_A0(sp) + REG_S x11, PT_A1(sp) + REG_S x12, PT_A2(sp) + REG_S x13, PT_A3(sp) + REG_S x14, PT_A4(sp) + REG_S x15, PT_A5(sp) + REG_S x16, PT_A6(sp) + REG_S x17, PT_A7(sp) + + // save the leftover regs + + .if \all == 1 + REG_S x2, PT_SP(sp) + REG_S x3, PT_GP(sp) + REG_S x4, PT_TP(sp) + REG_S x5, PT_T0(sp) + REG_S x6, PT_T1(sp) + REG_S x7, PT_T2(sp) + REG_S x8, PT_S0(sp) + REG_S x9, PT_S1(sp) + REG_S x18, PT_S2(sp) + REG_S x19, PT_S3(sp) + REG_S x20, PT_S4(sp) + REG_S x21, PT_S5(sp) + REG_S x22, PT_S6(sp) + REG_S x23, PT_S7(sp) + REG_S x24, PT_S8(sp) + REG_S x25, PT_S9(sp) + REG_S x26, PT_S10(sp) + REG_S x27, PT_S11(sp) + REG_S x28, PT_T3(sp) + REG_S x29, PT_T4(sp) + REG_S x30, PT_T5(sp) + REG_S x31, PT_T6(sp) + + // save s0 if FP_TEST defined + + .else +#ifdef HAVE_FUNCTION_GRAPH_FP_TEST + REG_S x8, PT_S0(sp) +#endif + .endif .endm - .macro RESTORE_ALL - REG_L x1, PT_RA(sp) - REG_L x2, PT_SP(sp) - REG_L x3, PT_GP(sp) - REG_L x4, PT_TP(sp) - /* Restore t0 with PT_EPC */ - REG_L x5, PT_EPC(sp) - restore_from_x6_to_x31 + .macro RESTORE_ABI_REGS, all=0 + REG_L t0, PT_EPC(sp) + REG_L x1, PT_RA(sp) + REG_L x10, PT_A0(sp) + REG_L x11, PT_A1(sp) + REG_L x12, PT_A2(sp) + REG_L x13, PT_A3(sp) + REG_L x14, PT_A4(sp) + REG_L x15, PT_A5(sp) + REG_L x16, PT_A6(sp) + REG_L x17, PT_A7(sp) + .if \all == 1 + REG_L x2, PT_SP(sp) + REG_L x3, PT_GP(sp) + REG_L x4, PT_TP(sp) + REG_L x6, PT_T1(sp) + REG_L x7, PT_T2(sp) + REG_L x8, PT_S0(sp) + REG_L x9, PT_S1(sp) + REG_L x18, PT_S2(sp) + REG_L x19, PT_S3(sp) + REG_L x20, PT_S4(sp) + REG_L x21, PT_S5(sp) + REG_L x22, PT_S6(sp) + REG_L x23, PT_S7(sp) + REG_L x24, PT_S8(sp) + REG_L x25, PT_S9(sp) + REG_L x26, PT_S10(sp) + REG_L x27, PT_S11(sp) + REG_L x28, PT_T3(sp) + REG_L x29, PT_T4(sp) + REG_L x30, PT_T5(sp) + REG_L x31, PT_T6(sp) + + .else +#ifdef HAVE_FUNCTION_GRAPH_FP_TEST + REG_L x8, PT_S0(sp) +#endif + .endif addi sp, sp, PT_SIZE_ON_STACK .endm + + .macro PREPARE_ARGS + addi a0, t0, -FENTRY_RA_OFFSET + la a1, function_trace_op + REG_L a2, 0(a1) + mv a1, ra + mv a3, sp + .endm + #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ +#ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS SYM_FUNC_START(ftrace_caller) SAVE_ABI @@ -105,34 +224,39 @@ SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL) call ftrace_stub #endif RESTORE_ABI - jr t0 + jr t0 SYM_FUNC_END(ftrace_caller) -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ SYM_FUNC_START(ftrace_regs_caller) - SAVE_ALL - - addi a0, t0, -FENTRY_RA_OFFSET - la a1, function_trace_op - REG_L a2, 0(a1) - mv a1, ra - mv a3, sp + mv t1, zero + SAVE_ABI_REGS 1 + PREPARE_ARGS SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL) call ftrace_stub -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - addi a0, sp, PT_RA - REG_L a1, PT_EPC(sp) - addi a1, a1, -FENTRY_RA_OFFSET -#ifdef HAVE_FUNCTION_GRAPH_FP_TEST - mv a2, s0 -#endif -SYM_INNER_LABEL(ftrace_graph_regs_call, SYM_L_GLOBAL) + RESTORE_ABI_REGS 1 + bnez t1, .Ldirect + jr t0 +.Ldirect: + jr t1 +SYM_FUNC_END(ftrace_regs_caller) + +SYM_FUNC_START(ftrace_caller) + SAVE_ABI_REGS 0 + PREPARE_ARGS + +SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) call ftrace_stub -#endif - RESTORE_ALL - jr t0 -SYM_FUNC_END(ftrace_regs_caller) + RESTORE_ABI_REGS 0 + jr t0 +SYM_FUNC_END(ftrace_caller) #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +SYM_CODE_START(ftrace_stub_direct_tramp) + jr t0 +SYM_CODE_END(ftrace_stub_direct_tramp) +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 862834bb1d64..5e5a82644451 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -723,8 +723,8 @@ static int add_relocation_to_accumulate(struct module *me, int type, if (!bucket) { kfree(entry); - kfree(rel_head); kfree(rel_head->rel_entry); + kfree(rel_head); return -ENOMEM; } @@ -747,6 +747,10 @@ initialize_relocation_hashtable(unsigned int num_relocations, { /* Can safely assume that bits is not greater than sizeof(long) */ unsigned long hashtable_size = roundup_pow_of_two(num_relocations); + /* + * When hashtable_size == 1, hashtable_bits == 0. + * This is valid because the hashing algorithm returns 0 in this case. + */ unsigned int hashtable_bits = ilog2(hashtable_size); /* @@ -760,10 +764,10 @@ initialize_relocation_hashtable(unsigned int num_relocations, hashtable_size <<= should_double_size; *relocation_hashtable = kmalloc_array(hashtable_size, - sizeof(*relocation_hashtable), + sizeof(**relocation_hashtable), GFP_KERNEL); if (!*relocation_hashtable) - return -ENOMEM; + return 0; __hash_init(*relocation_hashtable, hashtable_size); @@ -779,6 +783,7 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, Elf_Sym *sym; void *location; unsigned int i, type; + unsigned int j_idx = 0; Elf_Addr v; int res; unsigned int num_relocations = sechdrs[relsec].sh_size / sizeof(*rel); @@ -789,8 +794,8 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, hashtable_bits = initialize_relocation_hashtable(num_relocations, &relocation_hashtable); - if (hashtable_bits < 0) - return hashtable_bits; + if (!relocation_hashtable) + return -ENOMEM; INIT_LIST_HEAD(&used_buckets_list); @@ -829,9 +834,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, v = sym->st_value + rel[i].r_addend; if (type == R_RISCV_PCREL_LO12_I || type == R_RISCV_PCREL_LO12_S) { - unsigned int j; + unsigned int j = j_idx; + bool found = false; - for (j = 0; j < sechdrs[relsec].sh_size / sizeof(*rel); j++) { + do { unsigned long hi20_loc = sechdrs[sechdrs[relsec].sh_info].sh_addr + rel[j].r_offset; @@ -860,16 +866,26 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, hi20 = (offset + 0x800) & 0xfffff000; lo12 = offset - hi20; v = lo12; + found = true; break; } - } - if (j == sechdrs[relsec].sh_size / sizeof(*rel)) { + + j++; + if (j > sechdrs[relsec].sh_size / sizeof(*rel)) + j = 0; + + } while (j_idx != j); + + if (!found) { pr_err( "%s: Can not find HI20 relocation information\n", me->name); return -EINVAL; } + + /* Record the previous j-loop end index */ + j_idx = j; } if (reloc_handlers[type].accumulate_handler) diff --git a/arch/riscv/kernel/pi/cmdline_early.c b/arch/riscv/kernel/pi/cmdline_early.c index 68e786c84c94..f6d4dedffb84 100644 --- a/arch/riscv/kernel/pi/cmdline_early.c +++ b/arch/riscv/kernel/pi/cmdline_early.c @@ -38,8 +38,7 @@ static char *get_early_cmdline(uintptr_t dtb_pa) if (IS_ENABLED(CONFIG_CMDLINE_EXTEND) || IS_ENABLED(CONFIG_CMDLINE_FORCE) || fdt_cmdline_size == 0 /* CONFIG_CMDLINE_FALLBACK */) { - strncat(early_cmdline, CONFIG_CMDLINE, - COMMAND_LINE_SIZE - fdt_cmdline_size); + strlcat(early_cmdline, CONFIG_CMDLINE, COMMAND_LINE_SIZE); } return early_cmdline; diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 4f21d970a129..92922dbd5b5c 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -171,6 +171,7 @@ void flush_thread(void) riscv_v_vstate_off(task_pt_regs(current)); kfree(current->thread.vstate.datap); memset(¤t->thread.vstate, 0, sizeof(struct __riscv_v_ext_state)); + clear_tsk_thread_flag(current, TIF_RISCV_V_DEFER_RESTORE); #endif } @@ -178,7 +179,7 @@ void arch_release_task_struct(struct task_struct *tsk) { /* Free the vector context of datap. */ if (has_vector()) - kfree(tsk->thread.vstate.datap); + riscv_v_thread_free(tsk); } int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) @@ -187,6 +188,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) *dst = *src; /* clear entire V context, including datap for a new task */ memset(&dst->thread.vstate, 0, sizeof(struct __riscv_v_ext_state)); + memset(&dst->thread.kernel_vstate, 0, sizeof(struct __riscv_v_ext_state)); + clear_tsk_thread_flag(dst, TIF_RISCV_V_DEFER_RESTORE); return 0; } @@ -221,7 +224,15 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) childregs->a0 = 0; /* Return value of fork() */ p->thread.s[0] = 0; } + p->thread.riscv_v_flags = 0; + if (has_vector()) + riscv_v_thread_alloc(p); p->thread.ra = (unsigned long)ret_from_fork; p->thread.sp = (unsigned long)childregs; /* kernel sp */ return 0; } + +void __init arch_task_cache_init(void) +{ + riscv_v_setup_ctx_cache(); +} diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c index 2afe460de16a..e8515aa9d80b 100644 --- a/arch/riscv/kernel/ptrace.c +++ b/arch/riscv/kernel/ptrace.c @@ -99,8 +99,11 @@ static int riscv_vr_get(struct task_struct *target, * Ensure the vector registers have been saved to the memory before * copying them to membuf. */ - if (target == current) - riscv_v_vstate_save(current, task_pt_regs(current)); + if (target == current) { + get_cpu_vector_context(); + riscv_v_vstate_save(¤t->thread.vstate, task_pt_regs(current)); + put_cpu_vector_context(); + } ptrace_vstate.vstart = vstate->vstart; ptrace_vstate.vl = vstate->vl; diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c index 5a62ed1da453..e66e0999a800 100644 --- a/arch/riscv/kernel/sbi.c +++ b/arch/riscv/kernel/sbi.c @@ -7,6 +7,7 @@ #include <linux/bits.h> #include <linux/init.h> +#include <linux/mm.h> #include <linux/pm.h> #include <linux/reboot.h> #include <asm/sbi.h> @@ -571,6 +572,66 @@ long sbi_get_mimpid(void) } EXPORT_SYMBOL_GPL(sbi_get_mimpid); +bool sbi_debug_console_available; + +int sbi_debug_console_write(const char *bytes, unsigned int num_bytes) +{ + phys_addr_t base_addr; + struct sbiret ret; + + if (!sbi_debug_console_available) + return -EOPNOTSUPP; + + if (is_vmalloc_addr(bytes)) + base_addr = page_to_phys(vmalloc_to_page(bytes)) + + offset_in_page(bytes); + else + base_addr = __pa(bytes); + if (PAGE_SIZE < (offset_in_page(bytes) + num_bytes)) + num_bytes = PAGE_SIZE - offset_in_page(bytes); + + if (IS_ENABLED(CONFIG_32BIT)) + ret = sbi_ecall(SBI_EXT_DBCN, SBI_EXT_DBCN_CONSOLE_WRITE, + num_bytes, lower_32_bits(base_addr), + upper_32_bits(base_addr), 0, 0, 0); + else + ret = sbi_ecall(SBI_EXT_DBCN, SBI_EXT_DBCN_CONSOLE_WRITE, + num_bytes, base_addr, 0, 0, 0, 0); + + if (ret.error == SBI_ERR_FAILURE) + return -EIO; + return ret.error ? sbi_err_map_linux_errno(ret.error) : ret.value; +} + +int sbi_debug_console_read(char *bytes, unsigned int num_bytes) +{ + phys_addr_t base_addr; + struct sbiret ret; + + if (!sbi_debug_console_available) + return -EOPNOTSUPP; + + if (is_vmalloc_addr(bytes)) + base_addr = page_to_phys(vmalloc_to_page(bytes)) + + offset_in_page(bytes); + else + base_addr = __pa(bytes); + if (PAGE_SIZE < (offset_in_page(bytes) + num_bytes)) + num_bytes = PAGE_SIZE - offset_in_page(bytes); + + if (IS_ENABLED(CONFIG_32BIT)) + ret = sbi_ecall(SBI_EXT_DBCN, SBI_EXT_DBCN_CONSOLE_READ, + num_bytes, lower_32_bits(base_addr), + upper_32_bits(base_addr), 0, 0, 0); + else + ret = sbi_ecall(SBI_EXT_DBCN, SBI_EXT_DBCN_CONSOLE_READ, + num_bytes, base_addr, 0, 0, 0, 0); + + if (ret.error == SBI_ERR_FAILURE) + return -EIO; + return ret.error ? sbi_err_map_linux_errno(ret.error) : ret.value; +} + void __init sbi_init(void) { int ret; @@ -612,6 +673,11 @@ void __init sbi_init(void) sbi_srst_reboot_nb.priority = 192; register_restart_handler(&sbi_srst_reboot_nb); } + if ((sbi_spec_version >= sbi_mk_version(2, 0)) && + (sbi_probe_extension(SBI_EXT_DBCN) > 0)) { + pr_info("SBI DBCN extension detected\n"); + sbi_debug_console_available = true; + } } else { __sbi_set_timer = __sbi_set_timer_v01; __sbi_send_ipi = __sbi_send_ipi_v01; diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index 33dfb5078301..501e66debf69 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -86,7 +86,10 @@ static long save_v_state(struct pt_regs *regs, void __user **sc_vec) /* datap is designed to be 16 byte aligned for better performance */ WARN_ON(unlikely(!IS_ALIGNED((unsigned long)datap, 16))); - riscv_v_vstate_save(current, regs); + get_cpu_vector_context(); + riscv_v_vstate_save(¤t->thread.vstate, regs); + put_cpu_vector_context(); + /* Copy everything of vstate but datap. */ err = __copy_to_user(&state->v_state, ¤t->thread.vstate, offsetof(struct __riscv_v_ext_state, datap)); @@ -134,7 +137,7 @@ static long __restore_v_state(struct pt_regs *regs, void __user *sc_vec) if (unlikely(err)) return err; - riscv_v_vstate_restore(current, regs); + riscv_v_vstate_set_restore(current, regs); return err; } diff --git a/arch/riscv/kernel/suspend.c b/arch/riscv/kernel/suspend.c index 3c89b8ec69c4..239509367e42 100644 --- a/arch/riscv/kernel/suspend.c +++ b/arch/riscv/kernel/suspend.c @@ -4,8 +4,12 @@ * Copyright (c) 2022 Ventana Micro Systems Inc. */ +#define pr_fmt(fmt) "suspend: " fmt + #include <linux/ftrace.h> +#include <linux/suspend.h> #include <asm/csr.h> +#include <asm/sbi.h> #include <asm/suspend.h> void suspend_save_csrs(struct suspend_context *context) @@ -85,3 +89,43 @@ int cpu_suspend(unsigned long arg, return rc; } + +#ifdef CONFIG_RISCV_SBI +static int sbi_system_suspend(unsigned long sleep_type, + unsigned long resume_addr, + unsigned long opaque) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_SUSP, SBI_EXT_SUSP_SYSTEM_SUSPEND, + sleep_type, resume_addr, opaque, 0, 0, 0); + if (ret.error) + return sbi_err_map_linux_errno(ret.error); + + return ret.value; +} + +static int sbi_system_suspend_enter(suspend_state_t state) +{ + return cpu_suspend(SBI_SUSP_SLEEP_TYPE_SUSPEND_TO_RAM, sbi_system_suspend); +} + +static const struct platform_suspend_ops sbi_system_suspend_ops = { + .valid = suspend_valid_only_mem, + .enter = sbi_system_suspend_enter, +}; + +static int __init sbi_system_suspend_init(void) +{ + if (sbi_spec_version >= sbi_mk_version(2, 0) && + sbi_probe_extension(SBI_EXT_SUSP) > 0) { + pr_info("SBI SUSP extension detected\n"); + if (IS_ENABLED(CONFIG_SUSPEND)) + suspend_set_ops(&sbi_system_suspend_ops); + } + + return 0; +} + +arch_initcall(sbi_system_suspend_init); +#endif /* CONFIG_RISCV_SBI */ diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c index 578b6292487e..6727d1d3b8f2 100644 --- a/arch/riscv/kernel/vector.c +++ b/arch/riscv/kernel/vector.c @@ -21,6 +21,10 @@ #include <asm/bug.h> static bool riscv_v_implicit_uacc = IS_ENABLED(CONFIG_RISCV_ISA_V_DEFAULT_ENABLE); +static struct kmem_cache *riscv_v_user_cachep; +#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE +static struct kmem_cache *riscv_v_kernel_cachep; +#endif unsigned long riscv_v_vsize __read_mostly; EXPORT_SYMBOL_GPL(riscv_v_vsize); @@ -47,6 +51,21 @@ int riscv_v_setup_vsize(void) return 0; } +void __init riscv_v_setup_ctx_cache(void) +{ + if (!has_vector()) + return; + + riscv_v_user_cachep = kmem_cache_create_usercopy("riscv_vector_ctx", + riscv_v_vsize, 16, SLAB_PANIC, + 0, riscv_v_vsize, NULL); +#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE + riscv_v_kernel_cachep = kmem_cache_create("riscv_vector_kctx", + riscv_v_vsize, 16, + SLAB_PANIC, NULL); +#endif +} + static bool insn_is_vector(u32 insn_buf) { u32 opcode = insn_buf & __INSN_OPCODE_MASK; @@ -80,20 +99,37 @@ static bool insn_is_vector(u32 insn_buf) return false; } -static int riscv_v_thread_zalloc(void) +static int riscv_v_thread_zalloc(struct kmem_cache *cache, + struct __riscv_v_ext_state *ctx) { void *datap; - datap = kzalloc(riscv_v_vsize, GFP_KERNEL); + datap = kmem_cache_zalloc(cache, GFP_KERNEL); if (!datap) return -ENOMEM; - current->thread.vstate.datap = datap; - memset(¤t->thread.vstate, 0, offsetof(struct __riscv_v_ext_state, - datap)); + ctx->datap = datap; + memset(ctx, 0, offsetof(struct __riscv_v_ext_state, datap)); return 0; } +void riscv_v_thread_alloc(struct task_struct *tsk) +{ +#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE + riscv_v_thread_zalloc(riscv_v_kernel_cachep, &tsk->thread.kernel_vstate); +#endif +} + +void riscv_v_thread_free(struct task_struct *tsk) +{ + if (tsk->thread.vstate.datap) + kmem_cache_free(riscv_v_user_cachep, tsk->thread.vstate.datap); +#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE + if (tsk->thread.kernel_vstate.datap) + kmem_cache_free(riscv_v_kernel_cachep, tsk->thread.kernel_vstate.datap); +#endif +} + #define VSTATE_CTRL_GET_CUR(x) ((x) & PR_RISCV_V_VSTATE_CTRL_CUR_MASK) #define VSTATE_CTRL_GET_NEXT(x) (((x) & PR_RISCV_V_VSTATE_CTRL_NEXT_MASK) >> 2) #define VSTATE_CTRL_MAKE_NEXT(x) (((x) << 2) & PR_RISCV_V_VSTATE_CTRL_NEXT_MASK) @@ -122,7 +158,8 @@ static inline void riscv_v_ctrl_set(struct task_struct *tsk, int cur, int nxt, ctrl |= VSTATE_CTRL_MAKE_NEXT(nxt); if (inherit) ctrl |= PR_RISCV_V_VSTATE_CTRL_INHERIT; - tsk->thread.vstate_ctrl = ctrl; + tsk->thread.vstate_ctrl &= ~PR_RISCV_V_VSTATE_CTRL_MASK; + tsk->thread.vstate_ctrl |= ctrl; } bool riscv_v_vstate_ctrl_user_allowed(void) @@ -162,12 +199,12 @@ bool riscv_v_first_use_handler(struct pt_regs *regs) * context where VS has been off. So, try to allocate the user's V * context and resume execution. */ - if (riscv_v_thread_zalloc()) { + if (riscv_v_thread_zalloc(riscv_v_user_cachep, ¤t->thread.vstate)) { force_sig(SIGBUS); return true; } riscv_v_vstate_on(regs); - riscv_v_vstate_restore(current, regs); + riscv_v_vstate_set_restore(current, regs); return true; } diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile index 26cb2502ecf8..bd6e6c1b0497 100644 --- a/arch/riscv/lib/Makefile +++ b/arch/riscv/lib/Makefile @@ -6,8 +6,14 @@ lib-y += memmove.o lib-y += strcmp.o lib-y += strlen.o lib-y += strncmp.o +lib-y += csum.o +ifeq ($(CONFIG_MMU), y) +lib-$(CONFIG_RISCV_ISA_V) += uaccess_vector.o +endif lib-$(CONFIG_MMU) += uaccess.o lib-$(CONFIG_64BIT) += tishift.o lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o +lib-$(CONFIG_RISCV_ISA_V) += xor.o +lib-$(CONFIG_RISCV_ISA_V) += riscv_v_helpers.o diff --git a/arch/riscv/lib/csum.c b/arch/riscv/lib/csum.c new file mode 100644 index 000000000000..af3df5274ccb --- /dev/null +++ b/arch/riscv/lib/csum.c @@ -0,0 +1,328 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Checksum library + * + * Influenced by arch/arm64/lib/csum.c + * Copyright (C) 2023 Rivos Inc. + */ +#include <linux/bitops.h> +#include <linux/compiler.h> +#include <linux/jump_label.h> +#include <linux/kasan-checks.h> +#include <linux/kernel.h> + +#include <asm/cpufeature.h> + +#include <net/checksum.h> + +/* Default version is sufficient for 32 bit */ +#ifndef CONFIG_32BIT +__sum16 csum_ipv6_magic(const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u32 len, __u8 proto, __wsum csum) +{ + unsigned int ulen, uproto; + unsigned long sum = (__force unsigned long)csum; + + sum += (__force unsigned long)saddr->s6_addr32[0]; + sum += (__force unsigned long)saddr->s6_addr32[1]; + sum += (__force unsigned long)saddr->s6_addr32[2]; + sum += (__force unsigned long)saddr->s6_addr32[3]; + + sum += (__force unsigned long)daddr->s6_addr32[0]; + sum += (__force unsigned long)daddr->s6_addr32[1]; + sum += (__force unsigned long)daddr->s6_addr32[2]; + sum += (__force unsigned long)daddr->s6_addr32[3]; + + ulen = (__force unsigned int)htonl((unsigned int)len); + sum += ulen; + + uproto = (__force unsigned int)htonl(proto); + sum += uproto; + + /* + * Zbb support saves 4 instructions, so not worth checking without + * alternatives if supported + */ + if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && + IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { + unsigned long fold_temp; + + /* + * Zbb is likely available when the kernel is compiled with Zbb + * support, so nop when Zbb is available and jump when Zbb is + * not available. + */ + asm_volatile_goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0, + RISCV_ISA_EXT_ZBB, 1) + : + : + : + : no_zbb); + asm(".option push \n\ + .option arch,+zbb \n\ + rori %[fold_temp], %[sum], 32 \n\ + add %[sum], %[fold_temp], %[sum] \n\ + srli %[sum], %[sum], 32 \n\ + not %[fold_temp], %[sum] \n\ + roriw %[sum], %[sum], 16 \n\ + subw %[sum], %[fold_temp], %[sum] \n\ + .option pop" + : [sum] "+r" (sum), [fold_temp] "=&r" (fold_temp)); + return (__force __sum16)(sum >> 16); + } +no_zbb: + sum += ror64(sum, 32); + sum >>= 32; + return csum_fold((__force __wsum)sum); +} +EXPORT_SYMBOL(csum_ipv6_magic); +#endif /* !CONFIG_32BIT */ + +#ifdef CONFIG_32BIT +#define OFFSET_MASK 3 +#elif CONFIG_64BIT +#define OFFSET_MASK 7 +#endif + +static inline __no_sanitize_address unsigned long +do_csum_common(const unsigned long *ptr, const unsigned long *end, + unsigned long data) +{ + unsigned int shift; + unsigned long csum = 0, carry = 0; + + /* + * Do 32-bit reads on RV32 and 64-bit reads otherwise. This should be + * faster than doing 32-bit reads on architectures that support larger + * reads. + */ + while (ptr < end) { + csum += data; + carry += csum < data; + data = *(ptr++); + } + + /* + * Perform alignment (and over-read) bytes on the tail if any bytes + * leftover. + */ + shift = ((long)ptr - (long)end) * 8; +#ifdef __LITTLE_ENDIAN + data = (data << shift) >> shift; +#else + data = (data >> shift) << shift; +#endif + csum += data; + carry += csum < data; + csum += carry; + csum += csum < carry; + + return csum; +} + +/* + * Algorithm accounts for buff being misaligned. + * If buff is not aligned, will over-read bytes but not use the bytes that it + * shouldn't. The same thing will occur on the tail-end of the read. + */ +static inline __no_sanitize_address unsigned int +do_csum_with_alignment(const unsigned char *buff, int len) +{ + unsigned int offset, shift; + unsigned long csum, data; + const unsigned long *ptr, *end; + + /* + * Align address to closest word (double word on rv64) that comes before + * buff. This should always be in the same page and cache line. + * Directly call KASAN with the alignment we will be using. + */ + offset = (unsigned long)buff & OFFSET_MASK; + kasan_check_read(buff, len); + ptr = (const unsigned long *)(buff - offset); + + /* + * Clear the most significant bytes that were over-read if buff was not + * aligned. + */ + shift = offset * 8; + data = *(ptr++); +#ifdef __LITTLE_ENDIAN + data = (data >> shift) << shift; +#else + data = (data << shift) >> shift; +#endif + end = (const unsigned long *)(buff + len); + csum = do_csum_common(ptr, end, data); + +#ifdef CC_HAS_ASM_GOTO_TIED_OUTPUT + /* + * Zbb support saves 6 instructions, so not worth checking without + * alternatives if supported + */ + if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && + IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { + unsigned long fold_temp; + + /* + * Zbb is likely available when the kernel is compiled with Zbb + * support, so nop when Zbb is available and jump when Zbb is + * not available. + */ + asm_volatile_goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0, + RISCV_ISA_EXT_ZBB, 1) + : + : + : + : no_zbb); + +#ifdef CONFIG_32BIT + asm_volatile_goto(".option push \n\ + .option arch,+zbb \n\ + rori %[fold_temp], %[csum], 16 \n\ + andi %[offset], %[offset], 1 \n\ + add %[csum], %[fold_temp], %[csum] \n\ + beq %[offset], zero, %l[end] \n\ + rev8 %[csum], %[csum] \n\ + .option pop" + : [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp) + : [offset] "r" (offset) + : + : end); + + return (unsigned short)csum; +#else /* !CONFIG_32BIT */ + asm_volatile_goto(".option push \n\ + .option arch,+zbb \n\ + rori %[fold_temp], %[csum], 32 \n\ + add %[csum], %[fold_temp], %[csum] \n\ + srli %[csum], %[csum], 32 \n\ + roriw %[fold_temp], %[csum], 16 \n\ + addw %[csum], %[fold_temp], %[csum] \n\ + andi %[offset], %[offset], 1 \n\ + beq %[offset], zero, %l[end] \n\ + rev8 %[csum], %[csum] \n\ + .option pop" + : [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp) + : [offset] "r" (offset) + : + : end); + + return (csum << 16) >> 48; +#endif /* !CONFIG_32BIT */ +end: + return csum >> 16; + } +no_zbb: +#endif /* CC_HAS_ASM_GOTO_TIED_OUTPUT */ +#ifndef CONFIG_32BIT + csum += ror64(csum, 32); + csum >>= 32; +#endif + csum = (u32)csum + ror32((u32)csum, 16); + if (offset & 1) + return (u16)swab32(csum); + return csum >> 16; +} + +/* + * Does not perform alignment, should only be used if machine has fast + * misaligned accesses, or when buff is known to be aligned. + */ +static inline __no_sanitize_address unsigned int +do_csum_no_alignment(const unsigned char *buff, int len) +{ + unsigned long csum, data; + const unsigned long *ptr, *end; + + ptr = (const unsigned long *)(buff); + data = *(ptr++); + + kasan_check_read(buff, len); + + end = (const unsigned long *)(buff + len); + csum = do_csum_common(ptr, end, data); + + /* + * Zbb support saves 6 instructions, so not worth checking without + * alternatives if supported + */ + if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && + IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { + unsigned long fold_temp; + + /* + * Zbb is likely available when the kernel is compiled with Zbb + * support, so nop when Zbb is available and jump when Zbb is + * not available. + */ + asm_volatile_goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0, + RISCV_ISA_EXT_ZBB, 1) + : + : + : + : no_zbb); + +#ifdef CONFIG_32BIT + asm (".option push \n\ + .option arch,+zbb \n\ + rori %[fold_temp], %[csum], 16 \n\ + add %[csum], %[fold_temp], %[csum] \n\ + .option pop" + : [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp) + : + : ); + +#else /* !CONFIG_32BIT */ + asm (".option push \n\ + .option arch,+zbb \n\ + rori %[fold_temp], %[csum], 32 \n\ + add %[csum], %[fold_temp], %[csum] \n\ + srli %[csum], %[csum], 32 \n\ + roriw %[fold_temp], %[csum], 16 \n\ + addw %[csum], %[fold_temp], %[csum] \n\ + .option pop" + : [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp) + : + : ); +#endif /* !CONFIG_32BIT */ + return csum >> 16; + } +no_zbb: +#ifndef CONFIG_32BIT + csum += ror64(csum, 32); + csum >>= 32; +#endif + csum = (u32)csum + ror32((u32)csum, 16); + return csum >> 16; +} + +/* + * Perform a checksum on an arbitrary memory address. + * Will do a light-weight address alignment if buff is misaligned, unless + * cpu supports fast misaligned accesses. + */ +unsigned int do_csum(const unsigned char *buff, int len) +{ + if (unlikely(len <= 0)) + return 0; + + /* + * Significant performance gains can be seen by not doing alignment + * on machines with fast misaligned accesses. + * + * There is some duplicate code between the "with_alignment" and + * "no_alignment" implmentations, but the overlap is too awkward to be + * able to fit in one function without introducing multiple static + * branches. The largest chunk of overlap was delegated into the + * do_csum_common function. + */ + if (static_branch_likely(&fast_misaligned_access_speed_key)) + return do_csum_no_alignment(buff, len); + + if (((unsigned long)buff & OFFSET_MASK) == 0) + return do_csum_no_alignment(buff, len); + + return do_csum_with_alignment(buff, len); +} diff --git a/arch/riscv/lib/riscv_v_helpers.c b/arch/riscv/lib/riscv_v_helpers.c new file mode 100644 index 000000000000..be38a93cedae --- /dev/null +++ b/arch/riscv/lib/riscv_v_helpers.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2023 SiFive + * Author: Andy Chiu <andy.chiu@sifive.com> + */ +#include <linux/linkage.h> +#include <asm/asm.h> + +#include <asm/vector.h> +#include <asm/simd.h> + +#ifdef CONFIG_MMU +#include <asm/asm-prototypes.h> +#endif + +#ifdef CONFIG_MMU +size_t riscv_v_usercopy_threshold = CONFIG_RISCV_ISA_V_UCOPY_THRESHOLD; +int __asm_vector_usercopy(void *dst, void *src, size_t n); +int fallback_scalar_usercopy(void *dst, void *src, size_t n); +asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n) +{ + size_t remain, copied; + + /* skip has_vector() check because it has been done by the asm */ + if (!may_use_simd()) + goto fallback; + + kernel_vector_begin(); + remain = __asm_vector_usercopy(dst, src, n); + kernel_vector_end(); + + if (remain) { + copied = n - remain; + dst += copied; + src += copied; + n = remain; + goto fallback; + } + + return remain; + +fallback: + return fallback_scalar_usercopy(dst, src, n); +} +#endif diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S index a9d356d6c03c..bc22c078aba8 100644 --- a/arch/riscv/lib/uaccess.S +++ b/arch/riscv/lib/uaccess.S @@ -3,6 +3,8 @@ #include <asm/asm.h> #include <asm/asm-extable.h> #include <asm/csr.h> +#include <asm/hwcap.h> +#include <asm/alternative-macros.h> .macro fixup op reg addr lbl 100: @@ -11,6 +13,13 @@ .endm SYM_FUNC_START(__asm_copy_to_user) +#ifdef CONFIG_RISCV_ISA_V + ALTERNATIVE("j fallback_scalar_usercopy", "nop", 0, RISCV_ISA_EXT_v, CONFIG_RISCV_ISA_V) + REG_L t0, riscv_v_usercopy_threshold + bltu a2, t0, fallback_scalar_usercopy + tail enter_vector_usercopy +#endif +SYM_FUNC_START(fallback_scalar_usercopy) /* Enable access to user memory */ li t6, SR_SUM @@ -181,6 +190,7 @@ SYM_FUNC_START(__asm_copy_to_user) sub a0, t5, a0 ret SYM_FUNC_END(__asm_copy_to_user) +SYM_FUNC_END(fallback_scalar_usercopy) EXPORT_SYMBOL(__asm_copy_to_user) SYM_FUNC_ALIAS(__asm_copy_from_user, __asm_copy_to_user) EXPORT_SYMBOL(__asm_copy_from_user) diff --git a/arch/riscv/lib/uaccess_vector.S b/arch/riscv/lib/uaccess_vector.S new file mode 100644 index 000000000000..51ab5588e9ff --- /dev/null +++ b/arch/riscv/lib/uaccess_vector.S @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include <linux/linkage.h> +#include <asm-generic/export.h> +#include <asm/asm.h> +#include <asm/asm-extable.h> +#include <asm/csr.h> + +#define pDst a0 +#define pSrc a1 +#define iNum a2 + +#define iVL a3 + +#define ELEM_LMUL_SETTING m8 +#define vData v0 + + .macro fixup op reg addr lbl +100: + \op \reg, \addr + _asm_extable 100b, \lbl + .endm + +SYM_FUNC_START(__asm_vector_usercopy) + /* Enable access to user memory */ + li t6, SR_SUM + csrs CSR_STATUS, t6 + +loop: + vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma + fixup vle8.v vData, (pSrc), 10f + sub iNum, iNum, iVL + add pSrc, pSrc, iVL + fixup vse8.v vData, (pDst), 11f + add pDst, pDst, iVL + bnez iNum, loop + + /* Exception fixup for vector load is shared with normal exit */ +10: + /* Disable access to user memory */ + csrc CSR_STATUS, t6 + mv a0, iNum + ret + + /* Exception fixup code for vector store. */ +11: + /* Undo the subtraction after vle8.v */ + add iNum, iNum, iVL + /* Make sure the scalar fallback skip already processed bytes */ + csrr t2, CSR_VSTART + sub iNum, iNum, t2 + j 10b +SYM_FUNC_END(__asm_vector_usercopy) diff --git a/arch/riscv/lib/xor.S b/arch/riscv/lib/xor.S new file mode 100644 index 000000000000..b28f2430e52f --- /dev/null +++ b/arch/riscv/lib/xor.S @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2021 SiFive + */ +#include <linux/linkage.h> +#include <linux/export.h> +#include <asm/asm.h> + +SYM_FUNC_START(xor_regs_2_) + vsetvli a3, a0, e8, m8, ta, ma + vle8.v v0, (a1) + vle8.v v8, (a2) + sub a0, a0, a3 + vxor.vv v16, v0, v8 + add a2, a2, a3 + vse8.v v16, (a1) + add a1, a1, a3 + bnez a0, xor_regs_2_ + ret +SYM_FUNC_END(xor_regs_2_) +EXPORT_SYMBOL(xor_regs_2_) + +SYM_FUNC_START(xor_regs_3_) + vsetvli a4, a0, e8, m8, ta, ma + vle8.v v0, (a1) + vle8.v v8, (a2) + sub a0, a0, a4 + vxor.vv v0, v0, v8 + vle8.v v16, (a3) + add a2, a2, a4 + vxor.vv v16, v0, v16 + add a3, a3, a4 + vse8.v v16, (a1) + add a1, a1, a4 + bnez a0, xor_regs_3_ + ret +SYM_FUNC_END(xor_regs_3_) +EXPORT_SYMBOL(xor_regs_3_) + +SYM_FUNC_START(xor_regs_4_) + vsetvli a5, a0, e8, m8, ta, ma + vle8.v v0, (a1) + vle8.v v8, (a2) + sub a0, a0, a5 + vxor.vv v0, v0, v8 + vle8.v v16, (a3) + add a2, a2, a5 + vxor.vv v0, v0, v16 + vle8.v v24, (a4) + add a3, a3, a5 + vxor.vv v16, v0, v24 + add a4, a4, a5 + vse8.v v16, (a1) + add a1, a1, a5 + bnez a0, xor_regs_4_ + ret +SYM_FUNC_END(xor_regs_4_) +EXPORT_SYMBOL(xor_regs_4_) + +SYM_FUNC_START(xor_regs_5_) + vsetvli a6, a0, e8, m8, ta, ma + vle8.v v0, (a1) + vle8.v v8, (a2) + sub a0, a0, a6 + vxor.vv v0, v0, v8 + vle8.v v16, (a3) + add a2, a2, a6 + vxor.vv v0, v0, v16 + vle8.v v24, (a4) + add a3, a3, a6 + vxor.vv v0, v0, v24 + vle8.v v8, (a5) + add a4, a4, a6 + vxor.vv v16, v0, v8 + add a5, a5, a6 + vse8.v v16, (a1) + add a1, a1, a6 + bnez a0, xor_regs_5_ + ret +SYM_FUNC_END(xor_regs_5_) +EXPORT_SYMBOL(xor_regs_5_) diff --git a/arch/riscv/mm/extable.c b/arch/riscv/mm/extable.c index 35484d830fd6..dd1530af3ef1 100644 --- a/arch/riscv/mm/extable.c +++ b/arch/riscv/mm/extable.c @@ -27,6 +27,14 @@ static bool ex_handler_fixup(const struct exception_table_entry *ex, return true; } +static inline unsigned long regs_get_gpr(struct pt_regs *regs, unsigned int offset) +{ + if (unlikely(!offset || offset > MAX_REG_OFFSET)) + return 0; + + return *(unsigned long *)((unsigned long)regs + offset); +} + static inline void regs_set_gpr(struct pt_regs *regs, unsigned int offset, unsigned long val) { @@ -50,6 +58,27 @@ static bool ex_handler_uaccess_err_zero(const struct exception_table_entry *ex, return true; } +static bool +ex_handler_load_unaligned_zeropad(const struct exception_table_entry *ex, + struct pt_regs *regs) +{ + int reg_data = FIELD_GET(EX_DATA_REG_DATA, ex->data); + int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->data); + unsigned long data, addr, offset; + + addr = regs_get_gpr(regs, reg_addr * sizeof(unsigned long)); + + offset = addr & 0x7UL; + addr &= ~0x7UL; + + data = *(unsigned long *)addr >> (offset * 8); + + regs_set_gpr(regs, reg_data * sizeof(unsigned long), data); + + regs->epc = get_ex_fixup(ex); + return true; +} + bool fixup_exception(struct pt_regs *regs) { const struct exception_table_entry *ex; @@ -65,6 +94,8 @@ bool fixup_exception(struct pt_regs *regs) return ex_handler_bpf(ex, regs); case EX_TYPE_UACCESS_ERR_ZERO: return ex_handler_uaccess_err_zero(ex, regs); + case EX_TYPE_LOAD_UNALIGNED_ZEROPAD: + return ex_handler_load_unaligned_zeropad(ex, regs); } BUG(); diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index a65937336cdc..32cad6a65ccd 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -1060,7 +1060,11 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) kernel_map.virt_addr = KERNEL_LINK_ADDR + kernel_map.virt_offset; #ifdef CONFIG_XIP_KERNEL +#ifdef CONFIG_64BIT kernel_map.page_offset = PAGE_OFFSET_L3; +#else + kernel_map.page_offset = _AC(CONFIG_PAGE_OFFSET, UL); +#endif kernel_map.xiprom = (uintptr_t)CONFIG_XIP_PHYS_ADDR; kernel_map.xiprom_sz = (uintptr_t)(&_exiprom) - (uintptr_t)(&_xiprom); @@ -1387,10 +1391,29 @@ void __init misc_mem_init(void) } #ifdef CONFIG_SPARSEMEM_VMEMMAP +void __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node, + unsigned long addr, unsigned long next) +{ + pmd_set_huge(pmd, virt_to_phys(p), PAGE_KERNEL); +} + +int __meminit vmemmap_check_pmd(pmd_t *pmdp, int node, + unsigned long addr, unsigned long next) +{ + vmemmap_verify((pte_t *)pmdp, node, addr, next); + return 1; +} + int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap) { - return vmemmap_populate_basepages(start, end, node, NULL); + /* + * Note that SPARSEMEM_VMEMMAP is only selected for rv64 and that we + * can't use hugepage mappings for 2-level page table because in case of + * memory hotplug, we are not able to update all the page tables with + * the new PMDs. + */ + return vmemmap_populate_hugepages(start, end, node, NULL); } #endif diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index 8aadc5f71c93..8d12b26f5ac3 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -98,29 +98,23 @@ static void __ipi_flush_tlb_range_asid(void *info) local_flush_tlb_range_asid(d->start, d->size, d->stride, d->asid); } -static void __flush_tlb_range(struct mm_struct *mm, unsigned long start, - unsigned long size, unsigned long stride) +static void __flush_tlb_range(struct cpumask *cmask, unsigned long asid, + unsigned long start, unsigned long size, + unsigned long stride) { struct flush_tlb_range_data ftd; - const struct cpumask *cmask; - unsigned long asid = FLUSH_TLB_NO_ASID; bool broadcast; - if (mm) { - unsigned int cpuid; + if (cpumask_empty(cmask)) + return; - cmask = mm_cpumask(mm); - if (cpumask_empty(cmask)) - return; + if (cmask != cpu_online_mask) { + unsigned int cpuid; cpuid = get_cpu(); /* check if the tlbflush needs to be sent to other CPUs */ broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids; - - if (static_branch_unlikely(&use_asid_allocator)) - asid = atomic_long_read(&mm->context.id) & asid_mask; } else { - cmask = cpu_online_mask; broadcast = true; } @@ -140,25 +134,34 @@ static void __flush_tlb_range(struct mm_struct *mm, unsigned long start, local_flush_tlb_range_asid(start, size, stride, asid); } - if (mm) + if (cmask != cpu_online_mask) put_cpu(); } +static inline unsigned long get_mm_asid(struct mm_struct *mm) +{ + return static_branch_unlikely(&use_asid_allocator) ? + atomic_long_read(&mm->context.id) & asid_mask : FLUSH_TLB_NO_ASID; +} + void flush_tlb_mm(struct mm_struct *mm) { - __flush_tlb_range(mm, 0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE); + __flush_tlb_range(mm_cpumask(mm), get_mm_asid(mm), + 0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE); } void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, unsigned long end, unsigned int page_size) { - __flush_tlb_range(mm, start, end - start, page_size); + __flush_tlb_range(mm_cpumask(mm), get_mm_asid(mm), + start, end - start, page_size); } void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) { - __flush_tlb_range(vma->vm_mm, addr, PAGE_SIZE, PAGE_SIZE); + __flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm), + addr, PAGE_SIZE, PAGE_SIZE); } void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, @@ -190,18 +193,44 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, } } - __flush_tlb_range(vma->vm_mm, start, end - start, stride_size); + __flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm), + start, end - start, stride_size); } void flush_tlb_kernel_range(unsigned long start, unsigned long end) { - __flush_tlb_range(NULL, start, end - start, PAGE_SIZE); + __flush_tlb_range((struct cpumask *)cpu_online_mask, FLUSH_TLB_NO_ASID, + start, end - start, PAGE_SIZE); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - __flush_tlb_range(vma->vm_mm, start, end - start, PMD_SIZE); + __flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm), + start, end - start, PMD_SIZE); } #endif + +bool arch_tlbbatch_should_defer(struct mm_struct *mm) +{ + return true; +} + +void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch, + struct mm_struct *mm, + unsigned long uaddr) +{ + cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm)); +} + +void arch_flush_tlb_batched_pending(struct mm_struct *mm) +{ + flush_tlb_mm(mm); +} + +void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) +{ + __flush_tlb_range(&batch->cpumask, FLUSH_TLB_NO_ASID, 0, + FLUSH_TLB_MAX_SIZE, PAGE_SIZE); +} diff --git a/drivers/tty/hvc/Kconfig b/drivers/tty/hvc/Kconfig index 4f9264d005c0..6e05c5c7bca1 100644 --- a/drivers/tty/hvc/Kconfig +++ b/drivers/tty/hvc/Kconfig @@ -108,7 +108,7 @@ config HVC_DCC_SERIALIZE_SMP config HVC_RISCV_SBI bool "RISC-V SBI console support" - depends on RISCV_SBI_V01 + depends on RISCV_SBI select HVC_DRIVER help This enables support for console output via RISC-V SBI calls, which diff --git a/drivers/tty/hvc/hvc_riscv_sbi.c b/drivers/tty/hvc/hvc_riscv_sbi.c index a72591279f86..cede8a572594 100644 --- a/drivers/tty/hvc/hvc_riscv_sbi.c +++ b/drivers/tty/hvc/hvc_riscv_sbi.c @@ -40,21 +40,44 @@ static ssize_t hvc_sbi_tty_get(uint32_t vtermno, u8 *buf, size_t count) return i; } -static const struct hv_ops hvc_sbi_ops = { +static const struct hv_ops hvc_sbi_v01_ops = { .get_chars = hvc_sbi_tty_get, .put_chars = hvc_sbi_tty_put, }; -static int __init hvc_sbi_init(void) +static ssize_t hvc_sbi_dbcn_tty_put(uint32_t vtermno, const u8 *buf, size_t count) { - return PTR_ERR_OR_ZERO(hvc_alloc(0, 0, &hvc_sbi_ops, 16)); + return sbi_debug_console_write(buf, count); } -device_initcall(hvc_sbi_init); -static int __init hvc_sbi_console_init(void) +static ssize_t hvc_sbi_dbcn_tty_get(uint32_t vtermno, u8 *buf, size_t count) { - hvc_instantiate(0, 0, &hvc_sbi_ops); + return sbi_debug_console_read(buf, count); +} + +static const struct hv_ops hvc_sbi_dbcn_ops = { + .put_chars = hvc_sbi_dbcn_tty_put, + .get_chars = hvc_sbi_dbcn_tty_get, +}; + +static int __init hvc_sbi_init(void) +{ + int err; + + if (sbi_debug_console_available) { + err = PTR_ERR_OR_ZERO(hvc_alloc(0, 0, &hvc_sbi_dbcn_ops, 256)); + if (err) + return err; + hvc_instantiate(0, 0, &hvc_sbi_dbcn_ops); + } else if (IS_ENABLED(CONFIG_RISCV_SBI_V01)) { + err = PTR_ERR_OR_ZERO(hvc_alloc(0, 0, &hvc_sbi_v01_ops, 256)); + if (err) + return err; + hvc_instantiate(0, 0, &hvc_sbi_v01_ops); + } else { + return -ENODEV; + } return 0; } -console_initcall(hvc_sbi_console_init); +device_initcall(hvc_sbi_init); diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig index 8b1f5756002f..ffcf4882b25f 100644 --- a/drivers/tty/serial/Kconfig +++ b/drivers/tty/serial/Kconfig @@ -87,7 +87,7 @@ config SERIAL_EARLYCON_SEMIHOST config SERIAL_EARLYCON_RISCV_SBI bool "Early console using RISC-V SBI" - depends on RISCV_SBI_V01 + depends on RISCV_SBI select SERIAL_CORE select SERIAL_CORE_CONSOLE select SERIAL_EARLYCON diff --git a/drivers/tty/serial/earlycon-riscv-sbi.c b/drivers/tty/serial/earlycon-riscv-sbi.c index 27afb0b74ea7..0162155f0c83 100644 --- a/drivers/tty/serial/earlycon-riscv-sbi.c +++ b/drivers/tty/serial/earlycon-riscv-sbi.c @@ -15,17 +15,38 @@ static void sbi_putc(struct uart_port *port, unsigned char c) sbi_console_putchar(c); } -static void sbi_console_write(struct console *con, - const char *s, unsigned n) +static void sbi_0_1_console_write(struct console *con, + const char *s, unsigned int n) { struct earlycon_device *dev = con->data; uart_console_write(&dev->port, s, n, sbi_putc); } +static void sbi_dbcn_console_write(struct console *con, + const char *s, unsigned int n) +{ + int ret; + + while (n) { + ret = sbi_debug_console_write(s, n); + if (ret < 0) + break; + + s += ret; + n -= ret; + } +} + static int __init early_sbi_setup(struct earlycon_device *device, const char *opt) { - device->con->write = sbi_console_write; + if (sbi_debug_console_available) + device->con->write = sbi_dbcn_console_write; + else if (IS_ENABLED(CONFIG_RISCV_SBI_V01)) + device->con->write = sbi_0_1_console_write; + else + return -ENODEV; + return 0; } EARLYCON_DECLARE(sbi, early_sbi_setup); diff --git a/include/asm-generic/checksum.h b/include/asm-generic/checksum.h index 43e18db89c14..ad928cce268b 100644 --- a/include/asm-generic/checksum.h +++ b/include/asm-generic/checksum.h @@ -2,6 +2,8 @@ #ifndef __ASM_GENERIC_CHECKSUM_H #define __ASM_GENERIC_CHECKSUM_H +#include <linux/bitops.h> + /* * computes the checksum of a memory block at buff, length len, * and adds in "sum" (32-bit) @@ -31,9 +33,7 @@ extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl); static inline __sum16 csum_fold(__wsum csum) { u32 sum = (__force u32)csum; - sum = (sum & 0xffff) + (sum >> 16); - sum = (sum & 0xffff) + (sum >> 16); - return (__force __sum16)~sum; + return (__force __sum16)((~sum - ror32(sum, 16)) >> 16); } #endif diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index ba25129563ad..975a07f9f1cc 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -231,9 +231,10 @@ config DEBUG_INFO in the "Debug information" choice below, indicating that debug information will be generated for build targets. -# Clang is known to generate .{s,u}leb128 with symbol deltas with DWARF5, which -# some targets may not support: https://sourceware.org/bugzilla/show_bug.cgi?id=27215 -config AS_HAS_NON_CONST_LEB128 +# Clang generates .uleb128 with label differences for DWARF v5, a feature that +# older binutils ports do not support when utilizing RISC-V style linker +# relaxation: https://sourceware.org/bugzilla/show_bug.cgi?id=27215 +config AS_HAS_NON_CONST_ULEB128 def_bool $(as-instr,.uleb128 .Lexpr_end4 - .Lexpr_start3\n.Lexpr_start3:\n.Lexpr_end4:) choice @@ -258,7 +259,7 @@ config DEBUG_INFO_NONE config DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT bool "Rely on the toolchain's implicit default DWARF version" select DEBUG_INFO - depends on !CC_IS_CLANG || AS_IS_LLVM || CLANG_VERSION < 140000 || (AS_IS_GNU && AS_VERSION >= 23502 && AS_HAS_NON_CONST_LEB128) + depends on !CC_IS_CLANG || AS_IS_LLVM || CLANG_VERSION < 140000 || (AS_IS_GNU && AS_VERSION >= 23502 && AS_HAS_NON_CONST_ULEB128) help The implicit default version of DWARF debug info produced by a toolchain changes over time. @@ -282,7 +283,8 @@ config DEBUG_INFO_DWARF4 config DEBUG_INFO_DWARF5 bool "Generate DWARF Version 5 debuginfo" select DEBUG_INFO - depends on !CC_IS_CLANG || AS_IS_LLVM || (AS_IS_GNU && AS_VERSION >= 23502 && AS_HAS_NON_CONST_LEB128) + depends on !ARCH_HAS_BROKEN_DWARF5 + depends on !CC_IS_CLANG || AS_IS_LLVM || (AS_IS_GNU && AS_VERSION >= 23502 && AS_HAS_NON_CONST_ULEB128) help Generate DWARF v5 debug info. Requires binutils 2.35.2, gcc 5.0+ (gcc 5.0+ accepts the -gdwarf-5 flag but only had partial support for some diff --git a/lib/checksum_kunit.c b/lib/checksum_kunit.c index 0eed92b77ba3..225bb7701460 100644 --- a/lib/checksum_kunit.c +++ b/lib/checksum_kunit.c @@ -1,15 +1,21 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * Test cases csum_partial and csum_fold + * Test cases csum_partial, csum_fold, ip_fast_csum, csum_ipv6_magic */ #include <kunit/test.h> #include <asm/checksum.h> +#include <net/ip6_checksum.h> #define MAX_LEN 512 #define MAX_ALIGN 64 #define TEST_BUFLEN (MAX_LEN + MAX_ALIGN) +#define IPv4_MIN_WORDS 5 +#define IPv4_MAX_WORDS 15 +#define NUM_IPv6_TESTS 200 +#define NUM_IP_FAST_CSUM_TESTS 181 + /* Values for a little endian CPU. Byte swap each half on big endian CPU. */ static const u32 random_init_sum = 0x2847aab; static const u8 random_buf[] = { @@ -209,6 +215,237 @@ static const u32 init_sums_no_overflow[] = { 0xffff0000, 0xfffffffb, }; +static const __sum16 expected_csum_ipv6_magic[] = { + 0x18d4, 0x3085, 0x2e4b, 0xd9f4, 0xbdc8, 0x78f, 0x1034, 0x8422, 0x6fc0, + 0xd2f6, 0xbeb5, 0x9d3, 0x7e2a, 0x312e, 0x778e, 0xc1bb, 0x7cf2, 0x9d1e, + 0xca21, 0xf3ff, 0x7569, 0xb02e, 0xca86, 0x7e76, 0x4539, 0x45e3, 0xf28d, + 0xdf81, 0x8fd5, 0x3b5d, 0x8324, 0xf471, 0x83be, 0x1daf, 0x8c46, 0xe682, + 0xd1fb, 0x6b2e, 0xe687, 0x2a33, 0x4833, 0x2d67, 0x660f, 0x2e79, 0xd65e, + 0x6b62, 0x6672, 0x5dbd, 0x8680, 0xbaa5, 0x2229, 0x2125, 0x2d01, 0x1cc0, + 0x6d36, 0x33c0, 0xee36, 0xd832, 0x9820, 0x8a31, 0x53c5, 0x2e2, 0xdb0e, + 0x49ed, 0x17a7, 0x77a0, 0xd72e, 0x3d72, 0x7dc8, 0x5b17, 0xf55d, 0xa4d9, + 0x1446, 0x5d56, 0x6b2e, 0x69a5, 0xadb6, 0xff2a, 0x92e, 0xe044, 0x3402, + 0xbb60, 0xec7f, 0xe7e6, 0x1986, 0x32f4, 0x8f8, 0x5e00, 0x47c6, 0x3059, + 0x3969, 0xe957, 0x4388, 0x2854, 0x3334, 0xea71, 0xa6de, 0x33f9, 0x83fc, + 0x37b4, 0x5531, 0x3404, 0x1010, 0xed30, 0x610a, 0xc95, 0x9aed, 0x6ff, + 0x5136, 0x2741, 0x660e, 0x8b80, 0xf71, 0xa263, 0x88af, 0x7a73, 0x3c37, + 0x1908, 0x6db5, 0x2e92, 0x1cd2, 0x70c8, 0xee16, 0xe80, 0xcd55, 0x6e6, + 0x6434, 0x127, 0x655d, 0x2ea0, 0xb4f4, 0xdc20, 0x5671, 0xe462, 0xe52b, + 0xdb44, 0x3589, 0xc48f, 0xe60b, 0xd2d2, 0x66ad, 0x498, 0x436, 0xb917, + 0xf0ca, 0x1a6e, 0x1cb7, 0xbf61, 0x2870, 0xc7e8, 0x5b30, 0xe4a5, 0x168, + 0xadfc, 0xd035, 0xe690, 0xe283, 0xfb27, 0xe4ad, 0xb1a5, 0xf2d5, 0xc4b6, + 0x8a30, 0xd7d5, 0x7df9, 0x91d5, 0x63ed, 0x2d21, 0x312b, 0xab19, 0xa632, + 0x8d2e, 0xef06, 0x57b9, 0xc373, 0xbd1f, 0xa41f, 0x8444, 0x9975, 0x90cb, + 0xc49c, 0xe965, 0x4eff, 0x5a, 0xef6d, 0xe81a, 0xe260, 0x853a, 0xff7a, + 0x99aa, 0xb06b, 0xee19, 0xcc2c, 0xf34c, 0x7c49, 0xdac3, 0xa71e, 0xc988, + 0x3845, 0x1014 +}; + +static const __sum16 expected_fast_csum[] = { + 0xda83, 0x45da, 0x4f46, 0x4e4f, 0x34e, 0xe902, 0xa5e9, 0x87a5, 0x7187, + 0x5671, 0xf556, 0x6df5, 0x816d, 0x8f81, 0xbb8f, 0xfbba, 0x5afb, 0xbe5a, + 0xedbe, 0xabee, 0x6aac, 0xe6b, 0xea0d, 0x67ea, 0x7e68, 0x8a7e, 0x6f8a, + 0x3a70, 0x9f3a, 0xe89e, 0x75e8, 0x7976, 0xfa79, 0x2cfa, 0x3c2c, 0x463c, + 0x7146, 0x7a71, 0x547a, 0xfd53, 0x99fc, 0xb699, 0x92b6, 0xdb91, 0xe8da, + 0x5fe9, 0x1e60, 0xae1d, 0x39ae, 0xf439, 0xa1f4, 0xdda1, 0xede, 0x790f, + 0x579, 0x1206, 0x9012, 0x2490, 0xd224, 0x5cd2, 0xa65d, 0xca7, 0x220d, + 0xf922, 0xbf9, 0x920b, 0x1b92, 0x361c, 0x2e36, 0x4d2e, 0x24d, 0x2, + 0xcfff, 0x90cf, 0xa591, 0x93a5, 0x7993, 0x9579, 0xc894, 0x50c8, 0x5f50, + 0xd55e, 0xcad5, 0xf3c9, 0x8f4, 0x4409, 0x5043, 0x5b50, 0x55b, 0x2205, + 0x1e22, 0x801e, 0x3780, 0xe137, 0x7ee0, 0xf67d, 0x3cf6, 0xa53c, 0x2ea5, + 0x472e, 0x5147, 0xcf51, 0x1bcf, 0x951c, 0x1e95, 0xc71e, 0xe4c7, 0xc3e4, + 0x3dc3, 0xee3d, 0xa4ed, 0xf9a4, 0xcbf8, 0x75cb, 0xb375, 0x50b4, 0x3551, + 0xf835, 0x19f8, 0x8c1a, 0x538c, 0xad52, 0xa3ac, 0xb0a3, 0x5cb0, 0x6c5c, + 0x5b6c, 0xc05a, 0x92c0, 0x4792, 0xbe47, 0x53be, 0x1554, 0x5715, 0x4b57, + 0xe54a, 0x20e5, 0x21, 0xd500, 0xa1d4, 0xa8a1, 0x57a9, 0xca57, 0x5ca, + 0x1c06, 0x4f1c, 0xe24e, 0xd9e2, 0xf0d9, 0x4af1, 0x474b, 0x8146, 0xe81, + 0xfd0e, 0x84fd, 0x7c85, 0xba7c, 0x17ba, 0x4a17, 0x964a, 0xf595, 0xff5, + 0x5310, 0x3253, 0x6432, 0x4263, 0x2242, 0xe121, 0x32e1, 0xf632, 0xc5f5, + 0x21c6, 0x7d22, 0x8e7c, 0x418e, 0x5641, 0x3156, 0x7c31, 0x737c, 0x373, + 0x2503, 0xc22a, 0x3c2, 0x4a04, 0x8549, 0x5285, 0xa352, 0xe8a3, 0x6fe8, + 0x1a6f, 0x211a, 0xe021, 0x38e0, 0x7638, 0xf575, 0x9df5, 0x169e, 0xf116, + 0x23f1, 0xcd23, 0xece, 0x660f, 0x4866, 0x6a48, 0x716a, 0xee71, 0xa2ee, + 0xb8a2, 0x61b9, 0xa361, 0xf7a2, 0x26f7, 0x1127, 0x6611, 0xe065, 0x36e0, + 0x1837, 0x3018, 0x1c30, 0x721b, 0x3e71, 0xe43d, 0x99e4, 0x9e9a, 0xb79d, + 0xa9b7, 0xcaa, 0xeb0c, 0x4eb, 0x1305, 0x8813, 0xb687, 0xa9b6, 0xfba9, + 0xd7fb, 0xccd8, 0x2ecd, 0x652f, 0xae65, 0x3fae, 0x3a40, 0x563a, 0x7556, + 0x2776, 0x1228, 0xef12, 0xf9ee, 0xcef9, 0x56cf, 0xa956, 0x24a9, 0xba24, + 0x5fba, 0x665f, 0xf465, 0x8ff4, 0x6d8f, 0x346d, 0x5f34, 0x385f, 0xd137, + 0xb8d0, 0xacb8, 0x55ac, 0x7455, 0xe874, 0x89e8, 0xd189, 0xa0d1, 0xb2a0, + 0xb8b2, 0x36b8, 0x5636, 0xd355, 0x8d3, 0x1908, 0x2118, 0xc21, 0x990c, + 0x8b99, 0x158c, 0x7815, 0x9e78, 0x6f9e, 0x4470, 0x1d44, 0x341d, 0x2634, + 0x3f26, 0x793e, 0xc79, 0xcc0b, 0x26cc, 0xd126, 0x1fd1, 0xb41f, 0xb6b4, + 0x22b7, 0xa122, 0xa1, 0x7f01, 0x837e, 0x3b83, 0xaf3b, 0x6fae, 0x916f, + 0xb490, 0xffb3, 0xceff, 0x50cf, 0x7550, 0x7275, 0x1272, 0x2613, 0xaa26, + 0xd5aa, 0x7d5, 0x9607, 0x96, 0xb100, 0xf8b0, 0x4bf8, 0xdd4c, 0xeddd, + 0x98ed, 0x2599, 0x9325, 0xeb92, 0x8feb, 0xcc8f, 0x2acd, 0x392b, 0x3b39, + 0xcb3b, 0x6acb, 0xd46a, 0xb8d4, 0x6ab8, 0x106a, 0x2f10, 0x892f, 0x789, + 0xc806, 0x45c8, 0x7445, 0x3c74, 0x3a3c, 0xcf39, 0xd7ce, 0x58d8, 0x6e58, + 0x336e, 0x1034, 0xee10, 0xe9ed, 0xc2e9, 0x3fc2, 0xd53e, 0xd2d4, 0xead2, + 0x8fea, 0x2190, 0x1162, 0xbe11, 0x8cbe, 0x6d8c, 0xfb6c, 0x6dfb, 0xd36e, + 0x3ad3, 0xf3a, 0x870e, 0xc287, 0x53c3, 0xc54, 0x5b0c, 0x7d5a, 0x797d, + 0xec79, 0x5dec, 0x4d5e, 0x184e, 0xd618, 0x60d6, 0xb360, 0x98b3, 0xf298, + 0xb1f2, 0x69b1, 0xf969, 0xef9, 0xab0e, 0x21ab, 0xe321, 0x24e3, 0x8224, + 0x5481, 0x5954, 0x7a59, 0xff7a, 0x7dff, 0x1a7d, 0xa51a, 0x46a5, 0x6b47, + 0xe6b, 0x830e, 0xa083, 0xff9f, 0xd0ff, 0xffd0, 0xe6ff, 0x7de7, 0xc67d, + 0xd0c6, 0x61d1, 0x3a62, 0xc3b, 0x150c, 0x1715, 0x4517, 0x5345, 0x3954, + 0xdd39, 0xdadd, 0x32db, 0x6a33, 0xd169, 0x86d1, 0xb687, 0x3fb6, 0x883f, + 0xa487, 0x39a4, 0x2139, 0xbe20, 0xffbe, 0xedfe, 0x8ded, 0x368e, 0xc335, + 0x51c3, 0x9851, 0xf297, 0xd6f2, 0xb9d6, 0x95ba, 0x2096, 0xea1f, 0x76e9, + 0x4e76, 0xe04d, 0xd0df, 0x80d0, 0xa280, 0xfca2, 0x75fc, 0xef75, 0x32ef, + 0x6833, 0xdf68, 0xc4df, 0x76c4, 0xb77, 0xb10a, 0xbfb1, 0x58bf, 0x5258, + 0x4d52, 0x6c4d, 0x7e6c, 0xb67e, 0xccb5, 0x8ccc, 0xbe8c, 0xc8bd, 0x9ac8, + 0xa99b, 0x52a9, 0x2f53, 0xc30, 0x3e0c, 0xb83d, 0x83b7, 0x5383, 0x7e53, + 0x4f7e, 0xe24e, 0xb3e1, 0x8db3, 0x618e, 0xc861, 0xfcc8, 0x34fc, 0x9b35, + 0xaa9b, 0xb1aa, 0x5eb1, 0x395e, 0x8639, 0xd486, 0x8bd4, 0x558b, 0x2156, + 0xf721, 0x4ef6, 0x14f, 0x7301, 0xdd72, 0x49de, 0x894a, 0x9889, 0x8898, + 0x7788, 0x7b77, 0x637b, 0xb963, 0xabb9, 0x7cab, 0xc87b, 0x21c8, 0xcb21, + 0xdfca, 0xbfdf, 0xf2bf, 0x6af2, 0x626b, 0xb261, 0x3cb2, 0xc63c, 0xc9c6, + 0xc9c9, 0xb4c9, 0xf9b4, 0x91f9, 0x4091, 0x3a40, 0xcc39, 0xd1cb, 0x7ed1, + 0x537f, 0x6753, 0xa167, 0xba49, 0x88ba, 0x7789, 0x3877, 0xf037, 0xd3ef, + 0xb5d4, 0x55b6, 0xa555, 0xeca4, 0xa1ec, 0xb6a2, 0x7b7, 0x9507, 0xfd94, + 0x82fd, 0x5c83, 0x765c, 0x9676, 0x3f97, 0xda3f, 0x6fda, 0x646f, 0x3064, + 0x5e30, 0x655e, 0x6465, 0xcb64, 0xcdca, 0x4ccd, 0x3f4c, 0x243f, 0x6f24, + 0x656f, 0x6065, 0x3560, 0x3b36, 0xac3b, 0x4aac, 0x714a, 0x7e71, 0xda7e, + 0x7fda, 0xda7f, 0x6fda, 0xff6f, 0xc6ff, 0xedc6, 0xd4ed, 0x70d5, 0xeb70, + 0xa3eb, 0x80a3, 0xca80, 0x3fcb, 0x2540, 0xf825, 0x7ef8, 0xf87e, 0x73f8, + 0xb474, 0xb4b4, 0x92b5, 0x9293, 0x93, 0x3500, 0x7134, 0x9071, 0xfa8f, + 0x51fa, 0x1452, 0xba13, 0x7ab9, 0x957a, 0x8a95, 0x6e8a, 0x6d6e, 0x7c6d, + 0x447c, 0x9744, 0x4597, 0x8945, 0xef88, 0x8fee, 0x3190, 0x4831, 0x8447, + 0xa183, 0x1da1, 0xd41d, 0x2dd4, 0x4f2e, 0xc94e, 0xcbc9, 0xc9cb, 0x9ec9, + 0x319e, 0xd531, 0x20d5, 0x4021, 0xb23f, 0x29b2, 0xd828, 0xecd8, 0x5ded, + 0xfc5d, 0x4dfc, 0xd24d, 0x6bd2, 0x5f6b, 0xb35e, 0x7fb3, 0xee7e, 0x56ee, + 0xa657, 0x68a6, 0x8768, 0x7787, 0xb077, 0x4cb1, 0x764c, 0xb175, 0x7b1, + 0x3d07, 0x603d, 0x3560, 0x3e35, 0xb03d, 0xd6b0, 0xc8d6, 0xd8c8, 0x8bd8, + 0x3e8c, 0x303f, 0xd530, 0xf1d4, 0x42f1, 0xca42, 0xddca, 0x41dd, 0x3141, + 0x132, 0xe901, 0x8e9, 0xbe09, 0xe0bd, 0x2ce0, 0x862d, 0x3986, 0x9139, + 0x6d91, 0x6a6d, 0x8d6a, 0x1b8d, 0xac1b, 0xedab, 0x54ed, 0xc054, 0xcebf, + 0xc1ce, 0x5c2, 0x3805, 0x6038, 0x5960, 0xd359, 0xdd3, 0xbe0d, 0xafbd, + 0x6daf, 0x206d, 0x2c20, 0x862c, 0x8e86, 0xec8d, 0xa2ec, 0xa3a2, 0x51a3, + 0x8051, 0xfd7f, 0x91fd, 0xa292, 0xaf14, 0xeeae, 0x59ef, 0x535a, 0x8653, + 0x3986, 0x9539, 0xb895, 0xa0b8, 0x26a0, 0x2227, 0xc022, 0x77c0, 0xad77, + 0x46ad, 0xaa46, 0x60aa, 0x8560, 0x4785, 0xd747, 0x45d7, 0x2346, 0x5f23, + 0x25f, 0x1d02, 0x71d, 0x8206, 0xc82, 0x180c, 0x3018, 0x4b30, 0x4b, + 0x3001, 0x1230, 0x2d12, 0x8c2d, 0x148d, 0x4015, 0x5f3f, 0x3d5f, 0x6b3d, + 0x396b, 0x473a, 0xf746, 0x44f7, 0x8945, 0x3489, 0xcb34, 0x84ca, 0xd984, + 0xf0d9, 0xbcf0, 0x63bd, 0x3264, 0xf332, 0x45f3, 0x7346, 0x5673, 0xb056, + 0xd3b0, 0x4ad4, 0x184b, 0x7d18, 0x6c7d, 0xbb6c, 0xfeba, 0xe0fe, 0x10e1, + 0x5410, 0x2954, 0x9f28, 0x3a9f, 0x5a3a, 0xdb59, 0xbdc, 0xb40b, 0x1ab4, + 0x131b, 0x5d12, 0x6d5c, 0xe16c, 0xb0e0, 0x89b0, 0xba88, 0xbb, 0x3c01, + 0xe13b, 0x6fe1, 0x446f, 0xa344, 0x81a3, 0xfe81, 0xc7fd, 0x38c8, 0xb38, + 0x1a0b, 0x6d19, 0xf36c, 0x47f3, 0x6d48, 0xb76d, 0xd3b7, 0xd8d2, 0x52d9, + 0x4b53, 0xa54a, 0x34a5, 0xc534, 0x9bc4, 0xed9b, 0xbeed, 0x3ebe, 0x233e, + 0x9f22, 0x4a9f, 0x774b, 0x4577, 0xa545, 0x64a5, 0xb65, 0x870b, 0x487, + 0x9204, 0x5f91, 0xd55f, 0x35d5, 0x1a35, 0x71a, 0x7a07, 0x4e7a, 0xfc4e, + 0x1efc, 0x481f, 0x7448, 0xde74, 0xa7dd, 0x1ea7, 0xaa1e, 0xcfaa, 0xfbcf, + 0xedfb, 0x6eee, 0x386f, 0x4538, 0x6e45, 0xd96d, 0x11d9, 0x7912, 0x4b79, + 0x494b, 0x6049, 0xac5f, 0x65ac, 0x1366, 0x5913, 0xe458, 0x7ae4, 0x387a, + 0x3c38, 0xb03c, 0x76b0, 0x9376, 0xe193, 0x42e1, 0x7742, 0x6476, 0x3564, + 0x3c35, 0x6a3c, 0xcc69, 0x94cc, 0x5d95, 0xe5e, 0xee0d, 0x4ced, 0xce4c, + 0x52ce, 0xaa52, 0xdaaa, 0xe4da, 0x1de5, 0x4530, 0x5445, 0x3954, 0xb639, + 0x81b6, 0x7381, 0x1574, 0xc215, 0x10c2, 0x3f10, 0x6b3f, 0xe76b, 0x7be7, + 0xbc7b, 0xf7bb, 0x41f7, 0xcc41, 0x38cc, 0x4239, 0xa942, 0x4a9, 0xc504, + 0x7cc4, 0x437c, 0x6743, 0xea67, 0x8dea, 0xe88d, 0xd8e8, 0xdcd8, 0x17dd, + 0x5718, 0x958, 0xa609, 0x41a5, 0x5842, 0x159, 0x9f01, 0x269f, 0x5a26, + 0x405a, 0xc340, 0xb4c3, 0xd4b4, 0xf4d3, 0xf1f4, 0x39f2, 0xe439, 0x67e4, + 0x4168, 0xa441, 0xdda3, 0xdedd, 0x9df, 0xab0a, 0xa5ab, 0x9a6, 0xba09, + 0x9ab9, 0xad9a, 0x5ae, 0xe205, 0xece2, 0xecec, 0x14ed, 0xd614, 0x6bd5, + 0x916c, 0x3391, 0x6f33, 0x206f, 0x8020, 0x780, 0x7207, 0x2472, 0x8a23, + 0xb689, 0x3ab6, 0xf739, 0x97f6, 0xb097, 0xa4b0, 0xe6a4, 0x88e6, 0x2789, + 0xb28, 0x350b, 0x1f35, 0x431e, 0x1043, 0xc30f, 0x79c3, 0x379, 0x5703, + 0x3256, 0x4732, 0x7247, 0x9d72, 0x489d, 0xd348, 0xa4d3, 0x7ca4, 0xbf7b, + 0x45c0, 0x7b45, 0x337b, 0x4034, 0x843f, 0xd083, 0x35d0, 0x6335, 0x4d63, + 0xe14c, 0xcce0, 0xfecc, 0x35ff, 0x5636, 0xf856, 0xeef8, 0x2def, 0xfc2d, + 0x4fc, 0x6e04, 0xb66d, 0x78b6, 0xbb78, 0x3dbb, 0x9a3d, 0x839a, 0x9283, + 0x593, 0xd504, 0x23d5, 0x5424, 0xd054, 0x61d0, 0xdb61, 0x17db, 0x1f18, + 0x381f, 0x9e37, 0x679e, 0x1d68, 0x381d, 0x8038, 0x917f, 0x491, 0xbb04, + 0x23bb, 0x4124, 0xd41, 0xa30c, 0x8ba3, 0x8b8b, 0xc68b, 0xd2c6, 0xebd2, + 0x93eb, 0xbd93, 0x99bd, 0x1a99, 0xea19, 0x58ea, 0xcf58, 0x73cf, 0x1073, + 0x9e10, 0x139e, 0xea13, 0xcde9, 0x3ecd, 0x883f, 0xf89, 0x180f, 0x2a18, + 0x212a, 0xce20, 0x73ce, 0xf373, 0x60f3, 0xad60, 0x4093, 0x8e40, 0xb98e, + 0xbfb9, 0xf1bf, 0x8bf1, 0x5e8c, 0xe95e, 0x14e9, 0x4e14, 0x1c4e, 0x7f1c, + 0xe77e, 0x6fe7, 0xf26f, 0x13f2, 0x8b13, 0xda8a, 0x5fda, 0xea5f, 0x4eea, + 0xa84f, 0x88a8, 0x1f88, 0x2820, 0x9728, 0x5a97, 0x3f5b, 0xb23f, 0x70b2, + 0x2c70, 0x232d, 0xf623, 0x4f6, 0x905, 0x7509, 0xd675, 0x28d7, 0x9428, + 0x3794, 0xf036, 0x2bf0, 0xba2c, 0xedb9, 0xd7ed, 0x59d8, 0xed59, 0x4ed, + 0xe304, 0x18e3, 0x5c19, 0x3d5c, 0x753d, 0x6d75, 0x956d, 0x7f95, 0xc47f, + 0x83c4, 0xa84, 0x2e0a, 0x5f2e, 0xb95f, 0x77b9, 0x6d78, 0xf46d, 0x1bf4, + 0xed1b, 0xd6ed, 0xe0d6, 0x5e1, 0x3905, 0x5638, 0xa355, 0x99a2, 0xbe99, + 0xb4bd, 0x85b4, 0x2e86, 0x542e, 0x6654, 0xd765, 0x73d7, 0x3a74, 0x383a, + 0x2638, 0x7826, 0x7677, 0x9a76, 0x7e99, 0x2e7e, 0xea2d, 0xa6ea, 0x8a7, + 0x109, 0x3300, 0xad32, 0x5fad, 0x465f, 0x2f46, 0xc62f, 0xd4c5, 0xad5, + 0xcb0a, 0x4cb, 0xb004, 0x7baf, 0xe47b, 0x92e4, 0x8e92, 0x638e, 0x1763, + 0xc17, 0xf20b, 0x1ff2, 0x8920, 0x5889, 0xcb58, 0xf8cb, 0xcaf8, 0x84cb, + 0x9f84, 0x8a9f, 0x918a, 0x4991, 0x8249, 0xff81, 0x46ff, 0x5046, 0x5f50, + 0x725f, 0xf772, 0x8ef7, 0xe08f, 0xc1e0, 0x1fc2, 0x9e1f, 0x8b9d, 0x108b, + 0x411, 0x2b04, 0xb02a, 0x1fb0, 0x1020, 0x7a0f, 0x587a, 0x8958, 0xb188, + 0xb1b1, 0x49b2, 0xb949, 0x7ab9, 0x917a, 0xfc91, 0xe6fc, 0x47e7, 0xbc47, + 0x8fbb, 0xea8e, 0x34ea, 0x2635, 0x1726, 0x9616, 0xc196, 0xa6c1, 0xf3a6, + 0x11f3, 0x4811, 0x3e48, 0xeb3e, 0xf7ea, 0x1bf8, 0xdb1c, 0x8adb, 0xe18a, + 0x42e1, 0x9d42, 0x5d9c, 0x6e5d, 0x286e, 0x4928, 0x9a49, 0xb09c, 0xa6b0, + 0x2a7, 0xe702, 0xf5e6, 0x9af5, 0xf9b, 0x810f, 0x8080, 0x180, 0x1702, + 0x5117, 0xa650, 0x11a6, 0x1011, 0x550f, 0xd554, 0xbdd5, 0x6bbe, 0xc66b, + 0xfc7, 0x5510, 0x5555, 0x7655, 0x177, 0x2b02, 0x6f2a, 0xb70, 0x9f0b, + 0xcf9e, 0xf3cf, 0x3ff4, 0xcb40, 0x8ecb, 0x768e, 0x5277, 0x8652, 0x9186, + 0x9991, 0x5099, 0xd350, 0x93d3, 0x6d94, 0xe6d, 0x530e, 0x3153, 0xa531, + 0x64a5, 0x7964, 0x7c79, 0x467c, 0x1746, 0x3017, 0x3730, 0x538, 0x5, + 0x1e00, 0x5b1e, 0x955a, 0xae95, 0x3eaf, 0xff3e, 0xf8ff, 0xb2f9, 0xa1b3, + 0xb2a1, 0x5b2, 0xad05, 0x7cac, 0x2d7c, 0xd32c, 0x80d2, 0x7280, 0x8d72, + 0x1b8e, 0x831b, 0xac82, 0xfdac, 0xa7fd, 0x15a8, 0xd614, 0xe0d5, 0x7be0, + 0xb37b, 0x61b3, 0x9661, 0x9d95, 0xc79d, 0x83c7, 0xd883, 0xead7, 0xceb, + 0xf60c, 0xa9f5, 0x19a9, 0xa019, 0x8f9f, 0xd48f, 0x3ad5, 0x853a, 0x985, + 0x5309, 0x6f52, 0x1370, 0x6e13, 0xa96d, 0x98a9, 0x5198, 0x9f51, 0xb69f, + 0xa1b6, 0x2ea1, 0x672e, 0x2067, 0x6520, 0xaf65, 0x6eaf, 0x7e6f, 0xee7e, + 0x17ef, 0xa917, 0xcea8, 0x9ace, 0xff99, 0x5dff, 0xdf5d, 0x38df, 0xa39, + 0x1c0b, 0xe01b, 0x46e0, 0xcb46, 0x90cb, 0xba90, 0x4bb, 0x9104, 0x9d90, + 0xc89c, 0xf6c8, 0x6cf6, 0x886c, 0x1789, 0xbd17, 0x70bc, 0x7e71, 0x17e, + 0x1f01, 0xa01f, 0xbaa0, 0x14bb, 0xfc14, 0x7afb, 0xa07a, 0x3da0, 0xbf3d, + 0x48bf, 0x8c48, 0x968b, 0x9d96, 0xfd9d, 0x96fd, 0x9796, 0x6b97, 0xd16b, + 0xf4d1, 0x3bf4, 0x253c, 0x9125, 0x6691, 0xc166, 0x34c1, 0x5735, 0x1a57, + 0xdc19, 0x77db, 0x8577, 0x4a85, 0x824a, 0x9182, 0x7f91, 0xfd7f, 0xb4c3, + 0xb5b4, 0xb3b5, 0x7eb3, 0x617e, 0x4e61, 0xa4f, 0x530a, 0x3f52, 0xa33e, + 0x34a3, 0x9234, 0xf091, 0xf4f0, 0x1bf5, 0x311b, 0x9631, 0x6a96, 0x386b, + 0x1d39, 0xe91d, 0xe8e9, 0x69e8, 0x426a, 0xee42, 0x89ee, 0x368a, 0x2837, + 0x7428, 0x5974, 0x6159, 0x1d62, 0x7b1d, 0xf77a, 0x7bf7, 0x6b7c, 0x696c, + 0xf969, 0x4cf9, 0x714c, 0x4e71, 0x6b4e, 0x256c, 0x6e25, 0xe96d, 0x94e9, + 0x8f94, 0x3e8f, 0x343e, 0x4634, 0xb646, 0x97b5, 0x8997, 0xe8a, 0x900e, + 0x8090, 0xfd80, 0xa0fd, 0x16a1, 0xf416, 0xebf4, 0x95ec, 0x1196, 0x8911, + 0x3d89, 0xda3c, 0x9fd9, 0xd79f, 0x4bd7, 0x214c, 0x3021, 0x4f30, 0x994e, + 0x5c99, 0x6f5d, 0x326f, 0xab31, 0x6aab, 0xe969, 0x90e9, 0x1190, 0xff10, + 0xa2fe, 0xe0a2, 0x66e1, 0x4067, 0x9e3f, 0x2d9e, 0x712d, 0x8170, 0xd180, + 0xffd1, 0x25ff, 0x3826, 0x2538, 0x5f24, 0xc45e, 0x1cc4, 0xdf1c, 0x93df, + 0xc793, 0x80c7, 0x2380, 0xd223, 0x7ed2, 0xfc7e, 0x22fd, 0x7422, 0x1474, + 0xb714, 0x7db6, 0x857d, 0xa85, 0xa60a, 0x88a6, 0x4289, 0x7842, 0xc278, + 0xf7c2, 0xcdf7, 0x84cd, 0xae84, 0x8cae, 0xb98c, 0x1aba, 0x4d1a, 0x884c, + 0x4688, 0xcc46, 0xd8cb, 0x2bd9, 0xbe2b, 0xa2be, 0x72a2, 0xf772, 0xd2f6, + 0x75d2, 0xc075, 0xa3c0, 0x63a3, 0xae63, 0x8fae, 0x2a90, 0x5f2a, 0xef5f, + 0x5cef, 0xa05c, 0x89a0, 0x5e89, 0x6b5e, 0x736b, 0x773, 0x9d07, 0xe99c, + 0x27ea, 0x2028, 0xc20, 0x980b, 0x4797, 0x2848, 0x9828, 0xc197, 0x48c2, + 0x2449, 0x7024, 0x570, 0x3e05, 0xd3e, 0xf60c, 0xbbf5, 0x69bb, 0x3f6a, + 0x740, 0xf006, 0xe0ef, 0xbbe0, 0xadbb, 0x56ad, 0xcf56, 0xbfce, 0xa9bf, + 0x205b, 0x6920, 0xae69, 0x50ae, 0x2050, 0xf01f, 0x27f0, 0x9427, 0x8993, + 0x8689, 0x4087, 0x6e40, 0xb16e, 0xa1b1, 0xe8a1, 0x87e8, 0x6f88, 0xfe6f, + 0x4cfe, 0xe94d, 0xd5e9, 0x47d6, 0x3148, 0x5f31, 0xc35f, 0x13c4, 0xa413, + 0x5a5, 0x2405, 0xc223, 0x66c2, 0x3667, 0x5e37, 0x5f5e, 0x2f5f, 0x8c2f, + 0xe48c, 0xd0e4, 0x4d1, 0xd104, 0xe4d0, 0xcee4, 0xfcf, 0x480f, 0xa447, + 0x5ea4, 0xff5e, 0xbefe, 0x8dbe, 0x1d8e, 0x411d, 0x1841, 0x6918, 0x5469, + 0x1155, 0xc611, 0xaac6, 0x37ab, 0x2f37, 0xca2e, 0x87ca, 0xbd87, 0xabbd, + 0xb3ab, 0xcb4, 0xce0c, 0xfccd, 0xa5fd, 0x72a5, 0xf072, 0x83f0, 0xfe83, + 0x97fd, 0xc997, 0xb0c9, 0xadb0, 0xe6ac, 0x88e6, 0x1088, 0xbe10, 0x16be, + 0xa916, 0xa3a8, 0x46a3, 0x5447, 0xe953, 0x84e8, 0x2085, 0xa11f, 0xfa1, + 0xdd0f, 0xbedc, 0x5abe, 0x805a, 0xc97f, 0x6dc9, 0x826d, 0x4a82, 0x934a, + 0x5293, 0xd852, 0xd3d8, 0xadd3, 0xf4ad, 0xf3f4, 0xfcf3, 0xfefc, 0xcafe, + 0xb7ca, 0x3cb8, 0xa13c, 0x18a1, 0x1418, 0xea13, 0x91ea, 0xf891, 0x53f8, + 0xa254, 0xe9a2, 0x87ea, 0x4188, 0x1c41, 0xdc1b, 0xf5db, 0xcaf5, 0x45ca, + 0x6d45, 0x396d, 0xde39, 0x90dd, 0x1e91, 0x1e, 0x7b00, 0x6a7b, 0xa46a, + 0xc9a3, 0x9bc9, 0x389b, 0x1139, 0x5211, 0x1f52, 0xeb1f, 0xabeb, 0x48ab, + 0x9348, 0xb392, 0x17b3, 0x1618, 0x5b16, 0x175b, 0xdc17, 0xdedb, 0x1cdf, + 0xeb1c, 0xd1ea, 0x4ad2, 0xd4b, 0xc20c, 0x24c2, 0x7b25, 0x137b, 0x8b13, + 0x618b, 0xa061, 0xff9f, 0xfffe, 0x72ff, 0xf572, 0xe2f5, 0xcfe2, 0xd2cf, + 0x75d3, 0x6a76, 0xc469, 0x1ec4, 0xfc1d, 0x59fb, 0x455a, 0x7a45, 0xa479, + 0xb7a4 +}; + static u8 tmp_buf[TEST_BUFLEN]; #define full_csum(buff, len, sum) csum_fold(csum_partial(buff, len, sum)) @@ -338,10 +575,57 @@ static void test_csum_no_carry_inputs(struct kunit *test) } } +static void test_ip_fast_csum(struct kunit *test) +{ + __sum16 csum_result, expected; + + for (int len = IPv4_MIN_WORDS; len < IPv4_MAX_WORDS; len++) { + for (int index = 0; index < NUM_IP_FAST_CSUM_TESTS; index++) { + csum_result = ip_fast_csum(random_buf + index, len); + expected = + expected_fast_csum[(len - IPv4_MIN_WORDS) * + NUM_IP_FAST_CSUM_TESTS + + index]; + CHECK_EQ(expected, csum_result); + } + } +} + +static void test_csum_ipv6_magic(struct kunit *test) +{ +#if defined(CONFIG_NET) + const struct in6_addr *saddr; + const struct in6_addr *daddr; + unsigned int len; + unsigned char proto; + unsigned int csum; + + const int daddr_offset = sizeof(struct in6_addr); + const int len_offset = sizeof(struct in6_addr) + sizeof(struct in6_addr); + const int proto_offset = sizeof(struct in6_addr) + sizeof(struct in6_addr) + + sizeof(int); + const int csum_offset = sizeof(struct in6_addr) + sizeof(struct in6_addr) + + sizeof(int) + sizeof(char); + + for (int i = 0; i < NUM_IPv6_TESTS; i++) { + saddr = (const struct in6_addr *)(random_buf + i); + daddr = (const struct in6_addr *)(random_buf + i + + daddr_offset); + len = *(unsigned int *)(random_buf + i + len_offset); + proto = *(random_buf + i + proto_offset); + csum = *(unsigned int *)(random_buf + i + csum_offset); + CHECK_EQ(expected_csum_ipv6_magic[i], + csum_ipv6_magic(saddr, daddr, len, proto, csum)); + } +#endif /* !CONFIG_NET */ +} + static struct kunit_case __refdata checksum_test_cases[] = { KUNIT_CASE(test_csum_fixed_random_inputs), KUNIT_CASE(test_csum_all_carry_inputs), KUNIT_CASE(test_csum_no_carry_inputs), + KUNIT_CASE(test_ip_fast_csum), + KUNIT_CASE(test_csum_ipv6_magic), {} }; diff --git a/samples/ftrace/ftrace-direct-modify.c b/samples/ftrace/ftrace-direct-modify.c index e2a6a69352df..81220390851a 100644 --- a/samples/ftrace/ftrace-direct-modify.c +++ b/samples/ftrace/ftrace-direct-modify.c @@ -24,6 +24,41 @@ extern void my_tramp2(void *); static unsigned long my_ip = (unsigned long)schedule; +#ifdef CONFIG_RISCV +#include <asm/asm.h> + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp1, @function\n" +" .globl my_tramp1\n" +" my_tramp1:\n" +" addi sp,sp,-2*"SZREG"\n" +" "REG_S" t0,0*"SZREG"(sp)\n" +" "REG_S" ra,1*"SZREG"(sp)\n" +" call my_direct_func1\n" +" "REG_L" t0,0*"SZREG"(sp)\n" +" "REG_L" ra,1*"SZREG"(sp)\n" +" addi sp,sp,2*"SZREG"\n" +" jr t0\n" +" .size my_tramp1, .-my_tramp1\n" +" .type my_tramp2, @function\n" +" .globl my_tramp2\n" + +" my_tramp2:\n" +" addi sp,sp,-2*"SZREG"\n" +" "REG_S" t0,0*"SZREG"(sp)\n" +" "REG_S" ra,1*"SZREG"(sp)\n" +" call my_direct_func2\n" +" "REG_L" t0,0*"SZREG"(sp)\n" +" "REG_L" ra,1*"SZREG"(sp)\n" +" addi sp,sp,2*"SZREG"\n" +" jr t0\n" +" .size my_tramp2, .-my_tramp2\n" +" .popsection\n" +); + +#endif /* CONFIG_RISCV */ + #ifdef CONFIG_X86_64 #include <asm/ibt.h> diff --git a/samples/ftrace/ftrace-direct-multi-modify.c b/samples/ftrace/ftrace-direct-multi-modify.c index 2e349834d63c..f943e40d57fd 100644 --- a/samples/ftrace/ftrace-direct-multi-modify.c +++ b/samples/ftrace/ftrace-direct-multi-modify.c @@ -22,6 +22,47 @@ void my_direct_func2(unsigned long ip) extern void my_tramp1(void *); extern void my_tramp2(void *); +#ifdef CONFIG_RISCV +#include <asm/asm.h> + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp1, @function\n" +" .globl my_tramp1\n" +" my_tramp1:\n" +" addi sp,sp,-3*"SZREG"\n" +" "REG_S" a0,0*"SZREG"(sp)\n" +" "REG_S" t0,1*"SZREG"(sp)\n" +" "REG_S" ra,2*"SZREG"(sp)\n" +" mv a0,t0\n" +" call my_direct_func1\n" +" "REG_L" a0,0*"SZREG"(sp)\n" +" "REG_L" t0,1*"SZREG"(sp)\n" +" "REG_L" ra,2*"SZREG"(sp)\n" +" addi sp,sp,3*"SZREG"\n" +" jr t0\n" +" .size my_tramp1, .-my_tramp1\n" + +" .type my_tramp2, @function\n" +" .globl my_tramp2\n" +" my_tramp2:\n" +" addi sp,sp,-3*"SZREG"\n" +" "REG_S" a0,0*"SZREG"(sp)\n" +" "REG_S" t0,1*"SZREG"(sp)\n" +" "REG_S" ra,2*"SZREG"(sp)\n" +" mv a0,t0\n" +" call my_direct_func2\n" +" "REG_L" a0,0*"SZREG"(sp)\n" +" "REG_L" t0,1*"SZREG"(sp)\n" +" "REG_L" ra,2*"SZREG"(sp)\n" +" addi sp,sp,3*"SZREG"\n" +" jr t0\n" +" .size my_tramp2, .-my_tramp2\n" +" .popsection\n" +); + +#endif /* CONFIG_RISCV */ + #ifdef CONFIG_X86_64 #include <asm/ibt.h> diff --git a/samples/ftrace/ftrace-direct-multi.c b/samples/ftrace/ftrace-direct-multi.c index 9243dbfe4d0c..aed6df2927ce 100644 --- a/samples/ftrace/ftrace-direct-multi.c +++ b/samples/ftrace/ftrace-direct-multi.c @@ -17,6 +17,31 @@ void my_direct_func(unsigned long ip) extern void my_tramp(void *); +#ifdef CONFIG_RISCV +#include <asm/asm.h> + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp, @function\n" +" .globl my_tramp\n" +" my_tramp:\n" +" addi sp,sp,-3*"SZREG"\n" +" "REG_S" a0,0*"SZREG"(sp)\n" +" "REG_S" t0,1*"SZREG"(sp)\n" +" "REG_S" ra,2*"SZREG"(sp)\n" +" mv a0,t0\n" +" call my_direct_func\n" +" "REG_L" a0,0*"SZREG"(sp)\n" +" "REG_L" t0,1*"SZREG"(sp)\n" +" "REG_L" ra,2*"SZREG"(sp)\n" +" addi sp,sp,3*"SZREG"\n" +" jr t0\n" +" .size my_tramp, .-my_tramp\n" +" .popsection\n" +); + +#endif /* CONFIG_RISCV */ + #ifdef CONFIG_X86_64 #include <asm/ibt.h> diff --git a/samples/ftrace/ftrace-direct-too.c b/samples/ftrace/ftrace-direct-too.c index e39c3563ae4e..6ff546a5d7eb 100644 --- a/samples/ftrace/ftrace-direct-too.c +++ b/samples/ftrace/ftrace-direct-too.c @@ -19,6 +19,34 @@ void my_direct_func(struct vm_area_struct *vma, unsigned long address, extern void my_tramp(void *); +#ifdef CONFIG_RISCV +#include <asm/asm.h> + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp, @function\n" +" .globl my_tramp\n" +" my_tramp:\n" +" addi sp,sp,-5*"SZREG"\n" +" "REG_S" a0,0*"SZREG"(sp)\n" +" "REG_S" a1,1*"SZREG"(sp)\n" +" "REG_S" a2,2*"SZREG"(sp)\n" +" "REG_S" t0,3*"SZREG"(sp)\n" +" "REG_S" ra,4*"SZREG"(sp)\n" +" call my_direct_func\n" +" "REG_L" a0,0*"SZREG"(sp)\n" +" "REG_L" a1,1*"SZREG"(sp)\n" +" "REG_L" a2,2*"SZREG"(sp)\n" +" "REG_L" t0,3*"SZREG"(sp)\n" +" "REG_L" ra,4*"SZREG"(sp)\n" +" addi sp,sp,5*"SZREG"\n" +" jr t0\n" +" .size my_tramp, .-my_tramp\n" +" .popsection\n" +); + +#endif /* CONFIG_RISCV */ + #ifdef CONFIG_X86_64 #include <asm/ibt.h> diff --git a/samples/ftrace/ftrace-direct.c b/samples/ftrace/ftrace-direct.c index 32c477da1e9a..ef0945670e1e 100644 --- a/samples/ftrace/ftrace-direct.c +++ b/samples/ftrace/ftrace-direct.c @@ -16,6 +16,30 @@ void my_direct_func(struct task_struct *p) extern void my_tramp(void *); +#ifdef CONFIG_RISCV +#include <asm/asm.h> + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp, @function\n" +" .globl my_tramp\n" +" my_tramp:\n" +" addi sp,sp,-3*"SZREG"\n" +" "REG_S" a0,0*"SZREG"(sp)\n" +" "REG_S" t0,1*"SZREG"(sp)\n" +" "REG_S" ra,2*"SZREG"(sp)\n" +" call my_direct_func\n" +" "REG_L" a0,0*"SZREG"(sp)\n" +" "REG_L" t0,1*"SZREG"(sp)\n" +" "REG_L" ra,2*"SZREG"(sp)\n" +" addi sp,sp,3*"SZREG"\n" +" jr t0\n" +" .size my_tramp, .-my_tramp\n" +" .popsection\n" +); + +#endif /* CONFIG_RISCV */ + #ifdef CONFIG_X86_64 #include <asm/ibt.h> diff --git a/tools/testing/selftests/riscv/hwprobe/cbo.c b/tools/testing/selftests/riscv/hwprobe/cbo.c index 50a2cc8aef38..c537d52fafc5 100644 --- a/tools/testing/selftests/riscv/hwprobe/cbo.c +++ b/tools/testing/selftests/riscv/hwprobe/cbo.c @@ -36,16 +36,14 @@ static void sigill_handler(int sig, siginfo_t *info, void *context) regs[0] += 4; } -static void cbo_insn(char *base, int fn) -{ - uint32_t insn = MK_CBO(fn); - - asm volatile( - "mv a0, %0\n" - "li a1, %1\n" - ".4byte %2\n" - : : "r" (base), "i" (fn), "i" (insn) : "a0", "a1", "memory"); -} +#define cbo_insn(base, fn) \ +({ \ + asm volatile( \ + "mv a0, %0\n" \ + "li a1, %1\n" \ + ".4byte %2\n" \ + : : "r" (base), "i" (fn), "i" (MK_CBO(fn)) : "a0", "a1", "memory"); \ +}) static void cbo_inval(char *base) { cbo_insn(base, 0); } static void cbo_clean(char *base) { cbo_insn(base, 1); } @@ -97,7 +95,7 @@ static void test_zicboz(void *arg) block_size = pair.value; ksft_test_result(rc == 0 && pair.key == RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE && is_power_of_2(block_size), "Zicboz block size\n"); - ksft_print_msg("Zicboz block size: %ld\n", block_size); + ksft_print_msg("Zicboz block size: %llu\n", block_size); illegal_insn = false; cbo_zero(&mem[block_size]); @@ -121,7 +119,7 @@ static void test_zicboz(void *arg) for (j = 0; j < block_size; ++j) { if (mem[i * block_size + j] != expected) { ksft_test_result_fail("cbo.zero check\n"); - ksft_print_msg("cbo.zero check: mem[%d] != 0x%x\n", + ksft_print_msg("cbo.zero check: mem[%llu] != 0x%x\n", i * block_size + j, expected); return; } @@ -201,7 +199,7 @@ int main(int argc, char **argv) pair.key = RISCV_HWPROBE_KEY_IMA_EXT_0; rc = riscv_hwprobe(&pair, 1, sizeof(cpu_set_t), (unsigned long *)&cpus, 0); if (rc < 0) - ksft_exit_fail_msg("hwprobe() failed with %d\n", rc); + ksft_exit_fail_msg("hwprobe() failed with %ld\n", rc); assert(rc == 0 && pair.key == RISCV_HWPROBE_KEY_IMA_EXT_0); if (pair.value & RISCV_HWPROBE_EXT_ZICBOZ) { diff --git a/tools/testing/selftests/riscv/hwprobe/hwprobe.c b/tools/testing/selftests/riscv/hwprobe/hwprobe.c index d53e0889b59e..fd73c87804f3 100644 --- a/tools/testing/selftests/riscv/hwprobe/hwprobe.c +++ b/tools/testing/selftests/riscv/hwprobe/hwprobe.c @@ -29,7 +29,7 @@ int main(int argc, char **argv) /* Fail if the kernel claims not to recognize a base key. */ if ((i < 4) && (pairs[i].key != i)) ksft_exit_fail_msg("Failed to recognize base key: key != i, " - "key=%ld, i=%ld\n", pairs[i].key, i); + "key=%lld, i=%ld\n", pairs[i].key, i); if (pairs[i].key != RISCV_HWPROBE_KEY_BASE_BEHAVIOR) continue; @@ -37,7 +37,7 @@ int main(int argc, char **argv) if (pairs[i].value & RISCV_HWPROBE_BASE_BEHAVIOR_IMA) continue; - ksft_exit_fail_msg("Unexpected pair: (%ld, %ld)\n", pairs[i].key, pairs[i].value); + ksft_exit_fail_msg("Unexpected pair: (%lld, %llu)\n", pairs[i].key, pairs[i].value); } out = riscv_hwprobe(pairs, 8, 0, 0, 0); diff --git a/tools/testing/selftests/riscv/mm/mmap_test.h b/tools/testing/selftests/riscv/mm/mmap_test.h index 9b8434f62f57..2e0db9c5be6c 100644 --- a/tools/testing/selftests/riscv/mm/mmap_test.h +++ b/tools/testing/selftests/riscv/mm/mmap_test.h @@ -18,6 +18,8 @@ struct addresses { int *on_56_addr; }; +// Only works on 64 bit +#if __riscv_xlen == 64 static inline void do_mmaps(struct addresses *mmap_addresses) { /* @@ -50,6 +52,7 @@ static inline void do_mmaps(struct addresses *mmap_addresses) mmap_addresses->on_56_addr = mmap(on_56_bits, 5 * sizeof(int), prot, flags, 0, 0); } +#endif /* __riscv_xlen == 64 */ static inline int memory_layout(void) { diff --git a/tools/testing/selftests/riscv/vector/v_initval_nolibc.c b/tools/testing/selftests/riscv/vector/v_initval_nolibc.c index 66764edb0d52..1dd94197da30 100644 --- a/tools/testing/selftests/riscv/vector/v_initval_nolibc.c +++ b/tools/testing/selftests/riscv/vector/v_initval_nolibc.c @@ -27,7 +27,7 @@ int main(void) datap = malloc(MAX_VSIZE); if (!datap) { - ksft_test_result_fail("fail to allocate memory for size = %lu\n", MAX_VSIZE); + ksft_test_result_fail("fail to allocate memory for size = %d\n", MAX_VSIZE); exit(-1); } diff --git a/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c b/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c index 2c0d2b1126c1..1f9969bed235 100644 --- a/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c +++ b/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c @@ -1,4 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only + +#include <linux/wait.h> + #define THIS_PROGRAM "./vstate_exec_nolibc" int main(int argc, char **argv) diff --git a/tools/testing/selftests/riscv/vector/vstate_prctl.c b/tools/testing/selftests/riscv/vector/vstate_prctl.c index 8dcd399ef7fc..27668fb3b6d0 100644 --- a/tools/testing/selftests/riscv/vector/vstate_prctl.c +++ b/tools/testing/selftests/riscv/vector/vstate_prctl.c @@ -60,7 +60,7 @@ int test_and_compare_child(long provided, long expected, int inherit) } rc = launch_test(inherit); if (rc != expected) { - ksft_test_result_fail("Test failed, check %d != %d\n", rc, + ksft_test_result_fail("Test failed, check %d != %ld\n", rc, expected); return -2; } @@ -79,7 +79,7 @@ int main(void) pair.key = RISCV_HWPROBE_KEY_IMA_EXT_0; rc = riscv_hwprobe(&pair, 1, 0, NULL, 0); if (rc < 0) { - ksft_test_result_fail("hwprobe() failed with %d\n", rc); + ksft_test_result_fail("hwprobe() failed with %ld\n", rc); return -1; } |