From 46a13a0ef847dbe3226f4b3d8726ef25c8909293 Mon Sep 17 00:00:00 2001 From: Clement Courbet Date: Fri, 15 Apr 2022 11:11:26 +0200 Subject: [PATCH] [ExpandMemCmp] Properly expand `bcmp` to an equality pattern. Before that change, constant-size `bcmp` would miss an opportunity to generate a more efficient equality pattern and would generate a -1/0-1 pattern instead. Differential Revision: https://reviews.llvm.org/D123849 --- llvm/lib/CodeGen/ExpandMemCmp.cpp | 7 ++++--- llvm/test/CodeGen/X86/memcmp-optsize-x32.ll | 11 ++++------- llvm/test/CodeGen/X86/memcmp-optsize.ll | 11 ++++------- llvm/test/CodeGen/X86/memcmp-pgso-x32.ll | 11 ++++------- llvm/test/CodeGen/X86/memcmp-pgso.ll | 11 ++++------- llvm/test/Transforms/ExpandMemCmp/X86/bcmp.ll | 11 +++-------- 6 files changed, 23 insertions(+), 39 deletions(-) diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp index bd6486b..17bf6b0 100644 --- a/llvm/lib/CodeGen/ExpandMemCmp.cpp +++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp @@ -740,7 +740,7 @@ Value *MemCmpExpansion::getMemCmpExpansion() { static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI, const TargetLowering *TLI, const DataLayout *DL, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, - DomTreeUpdater *DTU) { + DomTreeUpdater *DTU, const bool IsBCmp) { NumMemCmpCalls++; // Early exit from expansion if -Oz. @@ -760,7 +760,8 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI, } // TTI call to check if target would like to expand memcmp. Also, get the // available load sizes. - const bool IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI); + const bool IsUsedForZeroCmp = + IsBCmp || isOnlyUsedInZeroEqualityComparison(CI); bool OptForSize = CI->getFunction()->hasOptSize() || llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI); auto Options = TTI->enableMemCmpExpansion(OptForSize, @@ -864,7 +865,7 @@ bool ExpandMemCmpPass::runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI, LibFunc Func; if (TLI->getLibFunc(*CI, Func) && (Func == LibFunc_memcmp || Func == LibFunc_bcmp) && - expandMemCmp(CI, TTI, TL, &DL, PSI, BFI, DTU)) { + expandMemCmp(CI, TTI, TL, &DL, PSI, BFI, DTU, Func == LibFunc_bcmp)) { return true; } } diff --git a/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll b/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll index 2c45b85..db205e5 100644 --- a/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll +++ b/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll @@ -569,15 +569,12 @@ define i1 @length64_eq_const(i8* %X) nounwind optsize { define i32 @bcmp_length2(i8* %X, i8* %Y) nounwind optsize { ; X86-LABEL: bcmp_length2: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzwl (%ecx), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movzwl (%eax), %edx -; X86-NEXT: rolw $8, %cx -; X86-NEXT: rolw $8, %dx -; X86-NEXT: movzwl %cx, %eax -; X86-NEXT: movzwl %dx, %ecx -; X86-NEXT: subl %ecx, %eax +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: cmpw (%ecx), %dx +; X86-NEXT: setne %al ; X86-NEXT: retl %m = tail call i32 @bcmp(i8* %X, i8* %Y, i32 2) nounwind ret i32 %m diff --git a/llvm/test/CodeGen/X86/memcmp-optsize.ll b/llvm/test/CodeGen/X86/memcmp-optsize.ll index 4c5b339..ef0be61 100644 --- a/llvm/test/CodeGen/X86/memcmp-optsize.ll +++ b/llvm/test/CodeGen/X86/memcmp-optsize.ll @@ -584,13 +584,10 @@ define i1 @length64_eq_const(i8* %X) nounwind optsize { define i32 @bcmp_length2(i8* %X, i8* %Y) nounwind optsize { ; X64-LABEL: bcmp_length2: ; X64: # %bb.0: -; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: movzwl (%rsi), %ecx -; X64-NEXT: rolw $8, %ax -; X64-NEXT: rolw $8, %cx -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: movzwl %cx, %ecx -; X64-NEXT: subl %ecx, %eax +; X64-NEXT: movzwl (%rdi), %ecx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpw (%rsi), %cx +; X64-NEXT: setne %al ; X64-NEXT: retq %m = tail call i32 @bcmp(i8* %X, i8* %Y, i64 2) nounwind ret i32 %m diff --git a/llvm/test/CodeGen/X86/memcmp-pgso-x32.ll b/llvm/test/CodeGen/X86/memcmp-pgso-x32.ll index 0953e35..85290c3 100644 --- a/llvm/test/CodeGen/X86/memcmp-pgso-x32.ll +++ b/llvm/test/CodeGen/X86/memcmp-pgso-x32.ll @@ -569,15 +569,12 @@ define i1 @length64_eq_const(i8* %X) nounwind !prof !14 { define i32 @bcmp_length2(i8* %X, i8* %Y) nounwind !prof !14 { ; X86-LABEL: bcmp_length2: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzwl (%ecx), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movzwl (%eax), %edx -; X86-NEXT: rolw $8, %cx -; X86-NEXT: rolw $8, %dx -; X86-NEXT: movzwl %cx, %eax -; X86-NEXT: movzwl %dx, %ecx -; X86-NEXT: subl %ecx, %eax +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: cmpw (%ecx), %dx +; X86-NEXT: setne %al ; X86-NEXT: retl %m = tail call i32 @bcmp(i8* %X, i8* %Y, i32 2) nounwind ret i32 %m diff --git a/llvm/test/CodeGen/X86/memcmp-pgso.ll b/llvm/test/CodeGen/X86/memcmp-pgso.ll index f763d91..28e9f36 100644 --- a/llvm/test/CodeGen/X86/memcmp-pgso.ll +++ b/llvm/test/CodeGen/X86/memcmp-pgso.ll @@ -584,13 +584,10 @@ define i1 @length64_eq_const(i8* %X) nounwind !prof !14 { define i32 @bcmp_length2(i8* %X, i8* %Y) nounwind !prof !14 { ; X64-LABEL: bcmp_length2: ; X64: # %bb.0: -; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: movzwl (%rsi), %ecx -; X64-NEXT: rolw $8, %ax -; X64-NEXT: rolw $8, %cx -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: movzwl %cx, %ecx -; X64-NEXT: subl %ecx, %eax +; X64-NEXT: movzwl (%rdi), %ecx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpw (%rsi), %cx +; X64-NEXT: setne %al ; X64-NEXT: retq %m = tail call i32 @bcmp(i8* %X, i8* %Y, i64 2) nounwind ret i32 %m diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/bcmp.ll b/llvm/test/Transforms/ExpandMemCmp/X86/bcmp.ll index 72d049d..6f29dc7 100644 --- a/llvm/test/Transforms/ExpandMemCmp/X86/bcmp.ll +++ b/llvm/test/Transforms/ExpandMemCmp/X86/bcmp.ll @@ -9,14 +9,9 @@ define i32 @bcmp8(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; X64-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* ; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1 ; X64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1 -; X64-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) -; X64-NEXT: [[TMP6:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) -; X64-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP5]], [[TMP6]] -; X64-NEXT: [[TMP8:%.*]] = icmp ult i64 [[TMP5]], [[TMP6]] -; X64-NEXT: [[TMP9:%.*]] = zext i1 [[TMP7]] to i32 -; X64-NEXT: [[TMP10:%.*]] = zext i1 [[TMP8]] to i32 -; X64-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[TMP10]] -; X64-NEXT: ret i32 [[TMP11]] +; X64-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] +; X64-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32 +; X64-NEXT: ret i32 [[TMP6]] ; %call = tail call i32 @bcmp(i8* %x, i8* %y, i64 8) ret i32 %call -- 2.7.4