x86: don't use REP_GOOD or ERMS for user memory copies
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 15 Apr 2023 20:14:59 +0000 (13:14 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 19 Apr 2023 00:05:28 +0000 (17:05 -0700)
The modern target to use is FSRM (Fast Short REP MOVS), and the other
cases should only be used for bigger areas (ie mainly things like page
clearing).

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
arch/x86/include/asm/uaccess_64.h
arch/x86/lib/copy_user_64.S

index d13d71a..c697cf1 100644 (file)
@@ -18,9 +18,7 @@
 
 /* Handles exceptions in both to and from, but doesn't do access_ok */
 __must_check unsigned long
-copy_user_enhanced_fast_string(void *to, const void *from, unsigned len);
-__must_check unsigned long
-copy_user_generic_string(void *to, const void *from, unsigned len);
+copy_user_fast_string(void *to, const void *from, unsigned len);
 __must_check unsigned long
 copy_user_generic_unrolled(void *to, const void *from, unsigned len);
 
@@ -30,15 +28,12 @@ copy_user_generic(void *to, const void *from, unsigned len)
        unsigned ret;
 
        /*
-        * If CPU has ERMS feature, use copy_user_enhanced_fast_string.
-        * Otherwise, if CPU has rep_good feature, use copy_user_generic_string.
+        * If CPU has FSRM feature, use 'rep movs'.
         * Otherwise, use copy_user_generic_unrolled.
         */
-       alternative_call_2(copy_user_generic_unrolled,
-                        copy_user_generic_string,
-                        X86_FEATURE_REP_GOOD,
-                        copy_user_enhanced_fast_string,
-                        X86_FEATURE_ERMS,
+       alternative_call(copy_user_generic_unrolled,
+                        copy_user_fast_string,
+                        X86_FEATURE_FSRM,
                         ASM_OUTPUT2("=a" (ret), "=D" (to), "=S" (from),
                                     "=d" (len)),
                         "1" (to), "2" (from), "3" (len)
index 9dec1b3..d0283bc 100644 (file)
@@ -104,8 +104,8 @@ SYM_FUNC_START(copy_user_generic_unrolled)
 SYM_FUNC_END(copy_user_generic_unrolled)
 EXPORT_SYMBOL(copy_user_generic_unrolled)
 
-/* Some CPUs run faster using the string copy instructions.
- * This is also a lot simpler. Use them when possible.
+/*
+ * Some CPUs support FSRM for Fast Short REP MOVS.
  *
  * Only 4GB of copy is supported. This shouldn't be a problem
  * because the kernel normally only writes from/to page sized chunks
@@ -122,58 +122,21 @@ EXPORT_SYMBOL(copy_user_generic_unrolled)
  * Output:
  * eax uncopied bytes or 0 if successful.
  */
-SYM_FUNC_START(copy_user_generic_string)
+SYM_FUNC_START(copy_user_fast_string)
        ASM_STAC
-       cmpl $8,%edx
-       jb 2f           /* less than 8 bytes, go to byte copy loop */
-       ALIGN_DESTINATION
        movl %edx,%ecx
-       shrl $3,%ecx
-       andl $7,%edx
-1:     rep movsq
-2:     movl %edx,%ecx
-3:     rep movsb
+1:     rep movsb
        xorl %eax,%eax
        ASM_CLAC
        RET
 
-11:    leal (%rdx,%rcx,8),%ecx
-12:    movl %ecx,%edx          /* ecx is zerorest also */
-       jmp .Lcopy_user_handle_tail
-
-       _ASM_EXTABLE_CPY(1b, 11b)
-       _ASM_EXTABLE_CPY(3b, 12b)
-SYM_FUNC_END(copy_user_generic_string)
-EXPORT_SYMBOL(copy_user_generic_string)
-
-/*
- * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
- * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
- *
- * Input:
- * rdi destination
- * rsi source
- * rdx count
- *
- * Output:
- * eax uncopied bytes or 0 if successful.
- */
-SYM_FUNC_START(copy_user_enhanced_fast_string)
-       ASM_STAC
-       /* CPUs without FSRM should avoid rep movsb for short copies */
-       ALTERNATIVE "cmpl $64, %edx; jb copy_user_short_string", "", X86_FEATURE_FSRM
-       movl %edx,%ecx
-1:     rep movsb
-       xorl %eax,%eax
+12:    movl %ecx,%eax          /* ecx is zerorest also */
        ASM_CLAC
        RET
 
-12:    movl %ecx,%edx          /* ecx is zerorest also */
-       jmp .Lcopy_user_handle_tail
-
        _ASM_EXTABLE_CPY(1b, 12b)
-SYM_FUNC_END(copy_user_enhanced_fast_string)
-EXPORT_SYMBOL(copy_user_enhanced_fast_string)
+SYM_FUNC_END(copy_user_fast_string)
+EXPORT_SYMBOL(copy_user_fast_string)
 
 /*
  * Try to copy last bytes and clear the rest if needed.