diff options
author | Arnaldo Carvalho de Melo <acme@redhat.com> | 2023-05-17 10:34:59 -0300 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2023-05-17 10:42:19 -0300 |
commit | 7f02ce62a6cfc0feb164bb7eb36c6647c00b43c8 (patch) | |
tree | 59bdbf9179e39dd1a2077d22df3b05cf4c924570 /tools/arch/x86/lib/memcpy_64.S | |
parent | 9bc83d6e3824f09cef73b01256962d950965ddc4 (diff) |
tools headers: Update the copy of x86's mem{cpy,set}_64.S used in 'perf bench'
This is to get the changes from:
68674f94ffc9dddc ("x86: don't use REP_GOOD or ERMS for small memory copies")
20f3337d350c4e1b ("x86: don't use REP_GOOD or ERMS for small memory clearing")
This also make the 'perf bench mem' files stop referring to the erms
versions that gone away with the above patches.
That addresses these perf tools build warning:
Warning: Kernel ABI header at 'tools/arch/x86/lib/memcpy_64.S' differs from latest version at 'arch/x86/lib/memcpy_64.S'
diff -u tools/arch/x86/lib/memcpy_64.S arch/x86/lib/memcpy_64.S
Warning: Kernel ABI header at 'tools/arch/x86/lib/memset_64.S' differs from latest version at 'arch/x86/lib/memset_64.S'
diff -u tools/arch/x86/lib/memset_64.S arch/x86/lib/memset_64.S
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/arch/x86/lib/memcpy_64.S')
-rw-r--r-- | tools/arch/x86/lib/memcpy_64.S | 34 |
1 files changed, 10 insertions, 24 deletions
diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S index a91ac666f758..d055b82d22cc 100644 --- a/tools/arch/x86/lib/memcpy_64.S +++ b/tools/arch/x86/lib/memcpy_64.S @@ -10,13 +10,6 @@ .section .noinstr.text, "ax" /* - * We build a jump to memcpy_orig by default which gets NOPped out on - * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which - * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs - * to a jmp to memcpy_erms which does the REP; MOVSB mem copy. - */ - -/* * memcpy - Copy a memory block. * * Input: @@ -26,17 +19,21 @@ * * Output: * rax original destination + * + * The FSRM alternative should be done inline (avoiding the call and + * the disgusting return handling), but that would require some help + * from the compiler for better calling conventions. + * + * The 'rep movsb' itself is small enough to replace the call, but the + * two register moves blow up the code. And one of them is "needed" + * only for the return value that is the same as the source input, + * which the compiler could/should do much better anyway. */ SYM_TYPED_FUNC_START(__memcpy) - ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ - "jmp memcpy_erms", X86_FEATURE_ERMS + ALTERNATIVE "jmp memcpy_orig", "", X86_FEATURE_FSRM movq %rdi, %rax movq %rdx, %rcx - shrq $3, %rcx - andl $7, %edx - rep movsq - movl %edx, %ecx rep movsb RET SYM_FUNC_END(__memcpy) @@ -45,17 +42,6 @@ EXPORT_SYMBOL(__memcpy) SYM_FUNC_ALIAS(memcpy, __memcpy) EXPORT_SYMBOL(memcpy) -/* - * memcpy_erms() - enhanced fast string memcpy. This is faster and - * simpler than memcpy. Use memcpy_erms when possible. - */ -SYM_FUNC_START_LOCAL(memcpy_erms) - movq %rdi, %rax - movq %rdx, %rcx - rep movsb - RET -SYM_FUNC_END(memcpy_erms) - SYM_FUNC_START_LOCAL(memcpy_orig) movq %rdi, %rax |