From 8b6434bbb9f9414e88c462c0761b2bd50ef65d92 Mon Sep 17 00:00:00 2001 From: Clement Courbet Date: Tue, 11 Dec 2018 13:38:43 +0000 Subject: [PATCH] Revert r348843 "[CodeGen] Allow mempcy/memset to generate small overlapping stores." Breaks ARM/memcpy-inline.ll llvm-svn: 348844 --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 8 ++- llvm/test/CodeGen/AArch64/arm64-memcpy-inline.ll | 12 ++-- llvm/test/CodeGen/PowerPC/jaggedstructs.ll | 6 +- llvm/test/CodeGen/PowerPC/structsinmem.ll | 3 +- llvm/test/CodeGen/PowerPC/structsinregs.ll | 3 +- llvm/test/CodeGen/X86/memcpy-from-string.ll | 3 +- llvm/test/CodeGen/X86/memset-2.ll | 3 +- llvm/test/CodeGen/X86/memset-zero.ll | 42 +++++++++----- llvm/test/CodeGen/X86/unaligned-load.ll | 73 +++--------------------- 9 files changed, 61 insertions(+), 92 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index b9afcc1..0136494 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5395,10 +5395,12 @@ static bool FindOptimalMemOpLowering(std::vector &MemOps, // If the new VT cannot cover all of the remaining bits, then consider // issuing a (or a pair of) unaligned and overlapping load / store. + // FIXME: Only does this for 64-bit or more since we don't have proper + // cost model for unaligned load / store. bool Fast; - if (NumMemOps && AllowOverlap && NewVTSize < Size && - TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, &Fast) && - Fast) + if (NumMemOps && AllowOverlap && + VTSize >= 8 && NewVTSize < Size && + TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, &Fast) && Fast) VTSize = Size; else { VT = NewVT; diff --git a/llvm/test/CodeGen/AArch64/arm64-memcpy-inline.ll b/llvm/test/CodeGen/AArch64/arm64-memcpy-inline.ll index 629cf37..951076c 100644 --- a/llvm/test/CodeGen/AArch64/arm64-memcpy-inline.ll +++ b/llvm/test/CodeGen/AArch64/arm64-memcpy-inline.ll @@ -16,8 +16,10 @@ define i32 @t0() { entry: ; CHECK-LABEL: t0: -; CHECK: ldur [[REG0:w[0-9]+]], [x[[BASEREG:[0-9]+]], #7] -; CHECK: stur [[REG0]], [x[[BASEREG2:[0-9]+]], #7] +; CHECK: ldrb [[REG0:w[0-9]+]], [x[[BASEREG:[0-9]+]], #10] +; CHECK: strb [[REG0]], [x[[BASEREG2:[0-9]+]], #10] +; CHECK: ldrh [[REG1:w[0-9]+]], [x[[BASEREG]], #8] +; CHECK: strh [[REG1]], [x[[BASEREG2]], #8] ; CHECK: ldr [[REG2:x[0-9]+]], ; CHECK: str [[REG2]], call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 getelementptr inbounds (%struct.x, %struct.x* @dst, i32 0, i32 0), i8* align 8 getelementptr inbounds (%struct.x, %struct.x* @src, i32 0, i32 0), i32 11, i1 false) @@ -72,9 +74,9 @@ entry: define void @t5(i8* nocapture %C) nounwind { entry: ; CHECK-LABEL: t5: -; CHECK: mov [[REG7:w[0-9]+]], #21337 -; CHECK: movk [[REG7]], -; CHECK: stur [[REG7]], [x0, #3] +; CHECK: strb wzr, [x0, #6] +; CHECK: mov [[REG7:w[0-9]+]], #21587 +; CHECK: strh [[REG7]], [x0, #4] ; CHECK: mov [[REG8:w[0-9]+]], ; CHECK: movk [[REG8]], ; CHECK: str [[REG8]], [x0] diff --git a/llvm/test/CodeGen/PowerPC/jaggedstructs.ll b/llvm/test/CodeGen/PowerPC/jaggedstructs.ll index 45e043a..6128316 100644 --- a/llvm/test/CodeGen/PowerPC/jaggedstructs.ll +++ b/llvm/test/CodeGen/PowerPC/jaggedstructs.ll @@ -34,9 +34,11 @@ entry: ; CHECK-DAG: lwz {{[0-9]+}}, 178(1) ; CHECK-DAG: sth {{[0-9]+}}, 70(1) ; CHECK-DAG: stw {{[0-9]+}}, 66(1) -; CHECK-DAG: lwz {{[0-9]+}}, 188(1) +; CHECK-DAG: lbz {{[0-9]+}}, 191(1) +; CHECK-DAG: lhz {{[0-9]+}}, 189(1) ; CHECK-DAG: lwz {{[0-9]+}}, 185(1) -; CHECK-DAG: stw {{[0-9]+}}, 76(1) +; CHECK-DAG: stb {{[0-9]+}}, 79(1) +; CHECK-DAG: sth {{[0-9]+}}, 77(1) ; CHECK-DAG: stw {{[0-9]+}}, 73(1) ; CHECK-DAG: ld 6, 72(1) ; CHECK-DAG: ld 5, 64(1) diff --git a/llvm/test/CodeGen/PowerPC/structsinmem.ll b/llvm/test/CodeGen/PowerPC/structsinmem.ll index bbe8289..c8ea3be 100644 --- a/llvm/test/CodeGen/PowerPC/structsinmem.ll +++ b/llvm/test/CodeGen/PowerPC/structsinmem.ll @@ -157,7 +157,8 @@ entry: ; CHECK: stw {{[0-9]+}}, 147(1) ; CHECK: sth {{[0-9]+}}, 158(1) ; CHECK: stw {{[0-9]+}}, 154(1) -; CHECK: stw {{[0-9]+}}, 164(1) +; CHECK: stb {{[0-9]+}}, 167(1) +; CHECK: sth {{[0-9]+}}, 165(1) ; CHECK: stw {{[0-9]+}}, 161(1) } diff --git a/llvm/test/CodeGen/PowerPC/structsinregs.ll b/llvm/test/CodeGen/PowerPC/structsinregs.ll index 52976ca..d8afc8f 100644 --- a/llvm/test/CodeGen/PowerPC/structsinregs.ll +++ b/llvm/test/CodeGen/PowerPC/structsinregs.ll @@ -148,7 +148,8 @@ entry: ; CHECK: stw {{[0-9]+}}, 83(1) ; CHECK: sth {{[0-9]+}}, 94(1) ; CHECK: stw {{[0-9]+}}, 90(1) -; CHECK: stw {{[0-9]+}}, 100(1) +; CHECK: stb {{[0-9]+}}, 103(1) +; CHECK: sth {{[0-9]+}}, 101(1) ; CHECK: stw {{[0-9]+}}, 97(1) ; CHECK: ld 9, 96(1) ; CHECK: ld 8, 88(1) diff --git a/llvm/test/CodeGen/X86/memcpy-from-string.ll b/llvm/test/CodeGen/X86/memcpy-from-string.ll index af88ffa..4546672 100644 --- a/llvm/test/CodeGen/X86/memcpy-from-string.ll +++ b/llvm/test/CodeGen/X86/memcpy-from-string.ll @@ -16,7 +16,8 @@ target triple = "x86_64-unknown-linux-gnu" define void @foo(i8* %tmp2) { ; X86-LABEL: foo: ; X86: # %bb.0: -; X86-NEXT: movl $3894379, 3(%rdi) # imm = 0x3B6C6B +; X86-NEXT: movb $0, 6(%rdi) +; X86-NEXT: movw $15212, 4(%rdi) # imm = 0x3B6C ; X86-NEXT: movl $1802117222, (%rdi) # imm = 0x6B6A2066 ; X86-NEXT: retq call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp2, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @0, i64 0, i64 3), i64 7, i1 false) diff --git a/llvm/test/CodeGen/X86/memset-2.ll b/llvm/test/CodeGen/X86/memset-2.ll index 0b91628..33a2227 100644 --- a/llvm/test/CodeGen/X86/memset-2.ll +++ b/llvm/test/CodeGen/X86/memset-2.ll @@ -51,10 +51,11 @@ define void @t4(i8* nocapture %s, i8 %a) nounwind { ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: imull $16843009, %ecx, %ecx ## imm = 0x1010101 -; CHECK-NEXT: movl %ecx, 11(%eax) ; CHECK-NEXT: movl %ecx, 8(%eax) ; CHECK-NEXT: movl %ecx, 4(%eax) ; CHECK-NEXT: movl %ecx, (%eax) +; CHECK-NEXT: movw %cx, 12(%eax) +; CHECK-NEXT: movb %cl, 14(%eax) ; CHECK-NEXT: retl entry: tail call void @llvm.memset.p0i8.i32(i8* %s, i8 %a, i32 15, i1 false) diff --git a/llvm/test/CodeGen/X86/memset-zero.ll b/llvm/test/CodeGen/X86/memset-zero.ll index f2f0d07..220968d 100644 --- a/llvm/test/CodeGen/X86/memset-zero.ll +++ b/llvm/test/CodeGen/X86/memset-zero.ll @@ -71,19 +71,22 @@ define void @memset_7(i8* %a) nounwind { ; X86-LABEL: memset_7: ; X86: # %bb.0: # %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl $0, 3(%eax) +; X86-NEXT: movb $0, 6(%eax) +; X86-NEXT: movw $0, 4(%eax) ; X86-NEXT: movl $0, (%eax) ; X86-NEXT: retl ; ; CORE2-LABEL: memset_7: ; CORE2: # %bb.0: # %entry -; CORE2-NEXT: movl $0, 3(%rdi) +; CORE2-NEXT: movb $0, 6(%rdi) +; CORE2-NEXT: movw $0, 4(%rdi) ; CORE2-NEXT: movl $0, (%rdi) ; CORE2-NEXT: retq ; ; NEHALEM-LABEL: memset_7: ; NEHALEM: # %bb.0: # %entry -; NEHALEM-NEXT: movl $0, 3(%rdi) +; NEHALEM-NEXT: movb $0, 6(%rdi) +; NEHALEM-NEXT: movw $0, 4(%rdi) ; NEHALEM-NEXT: movl $0, (%rdi) ; NEHALEM-NEXT: retq entry: @@ -117,20 +120,23 @@ define void @memset_11(i8* %a) nounwind { ; X86-LABEL: memset_11: ; X86: # %bb.0: # %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl $0, 7(%eax) +; X86-NEXT: movb $0, 10(%eax) +; X86-NEXT: movw $0, 8(%eax) ; X86-NEXT: movl $0, 4(%eax) ; X86-NEXT: movl $0, (%eax) ; X86-NEXT: retl ; ; CORE2-LABEL: memset_11: ; CORE2: # %bb.0: # %entry -; CORE2-NEXT: movl $0, 7(%rdi) +; CORE2-NEXT: movb $0, 10(%rdi) +; CORE2-NEXT: movw $0, 8(%rdi) ; CORE2-NEXT: movq $0, (%rdi) ; CORE2-NEXT: retq ; ; NEHALEM-LABEL: memset_11: ; NEHALEM: # %bb.0: # %entry -; NEHALEM-NEXT: movl $0, 7(%rdi) +; NEHALEM-NEXT: movb $0, 10(%rdi) +; NEHALEM-NEXT: movw $0, 8(%rdi) ; NEHALEM-NEXT: movq $0, (%rdi) ; NEHALEM-NEXT: retq entry: @@ -168,7 +174,8 @@ define void @memset_15(i8* %a) nounwind { ; X86-LABEL: memset_15: ; X86: # %bb.0: # %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl $0, 11(%eax) +; X86-NEXT: movb $0, 14(%eax) +; X86-NEXT: movw $0, 12(%eax) ; X86-NEXT: movl $0, 8(%eax) ; X86-NEXT: movl $0, 4(%eax) ; X86-NEXT: movl $0, (%eax) @@ -249,7 +256,8 @@ define void @memset_19(i8* %a) nounwind { ; X86-LABEL: memset_19: ; X86: # %bb.0: # %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl $0, 15(%eax) +; X86-NEXT: movb $0, 18(%eax) +; X86-NEXT: movw $0, 16(%eax) ; X86-NEXT: movl $0, 12(%eax) ; X86-NEXT: movl $0, 8(%eax) ; X86-NEXT: movl $0, 4(%eax) @@ -258,7 +266,8 @@ define void @memset_19(i8* %a) nounwind { ; ; CORE2-LABEL: memset_19: ; CORE2: # %bb.0: # %entry -; CORE2-NEXT: movl $0, 15(%rdi) +; CORE2-NEXT: movb $0, 18(%rdi) +; CORE2-NEXT: movw $0, 16(%rdi) ; CORE2-NEXT: movq $0, 8(%rdi) ; CORE2-NEXT: movq $0, (%rdi) ; CORE2-NEXT: retq @@ -267,7 +276,8 @@ define void @memset_19(i8* %a) nounwind { ; NEHALEM: # %bb.0: # %entry ; NEHALEM-NEXT: xorps %xmm0, %xmm0 ; NEHALEM-NEXT: movups %xmm0, (%rdi) -; NEHALEM-NEXT: movl $0, 15(%rdi) +; NEHALEM-NEXT: movb $0, 18(%rdi) +; NEHALEM-NEXT: movw $0, 16(%rdi) ; NEHALEM-NEXT: retq entry: call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 19, i1 false) @@ -278,7 +288,8 @@ define void @memset_31(i8* %a) nounwind { ; X86-LABEL: memset_31: ; X86: # %bb.0: # %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl $0, 27(%eax) +; X86-NEXT: movb $0, 30(%eax) +; X86-NEXT: movw $0, 28(%eax) ; X86-NEXT: movl $0, 24(%eax) ; X86-NEXT: movl $0, 20(%eax) ; X86-NEXT: movl $0, 16(%eax) @@ -311,7 +322,8 @@ define void @memset_35(i8* %a) nounwind { ; X86-LABEL: memset_35: ; X86: # %bb.0: # %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl $0, 31(%eax) +; X86-NEXT: movb $0, 34(%eax) +; X86-NEXT: movw $0, 32(%eax) ; X86-NEXT: movl $0, 28(%eax) ; X86-NEXT: movl $0, 24(%eax) ; X86-NEXT: movl $0, 20(%eax) @@ -324,7 +336,8 @@ define void @memset_35(i8* %a) nounwind { ; ; CORE2-LABEL: memset_35: ; CORE2: # %bb.0: # %entry -; CORE2-NEXT: movl $0, 31(%rdi) +; CORE2-NEXT: movb $0, 34(%rdi) +; CORE2-NEXT: movw $0, 32(%rdi) ; CORE2-NEXT: movq $0, 24(%rdi) ; CORE2-NEXT: movq $0, 16(%rdi) ; CORE2-NEXT: movq $0, 8(%rdi) @@ -336,7 +349,8 @@ define void @memset_35(i8* %a) nounwind { ; NEHALEM-NEXT: xorps %xmm0, %xmm0 ; NEHALEM-NEXT: movups %xmm0, 16(%rdi) ; NEHALEM-NEXT: movups %xmm0, (%rdi) -; NEHALEM-NEXT: movl $0, 31(%rdi) +; NEHALEM-NEXT: movb $0, 34(%rdi) +; NEHALEM-NEXT: movw $0, 32(%rdi) ; NEHALEM-NEXT: retq entry: call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 35, i1 false) diff --git a/llvm/test/CodeGen/X86/unaligned-load.ll b/llvm/test/CodeGen/X86/unaligned-load.ll index be8ca01..980fca2 100644 --- a/llvm/test/CodeGen/X86/unaligned-load.ll +++ b/llvm/test/CodeGen/X86/unaligned-load.ll @@ -6,23 +6,21 @@ @.str1 = internal constant [31 x i8] c"DHRYSTONE PROGRAM, SOME STRING\00", align 8 @.str3 = internal constant [31 x i8] c"DHRYSTONE PROGRAM, 2'ND STRING\00", align 8 -; This can be improved; see PR39952. - define void @func() nounwind ssp { ; I386-LABEL: func: ; I386: ## %bb.0: ## %entry -; I386-NEXT: subl $32, %esp +; I386-NEXT: pushl %esi +; I386-NEXT: subl $40, %esp +; I386-NEXT: leal {{[0-9]+}}(%esp), %esi ; I386-NEXT: .p2align 4, 0x90 ; I386-NEXT: LBB0_1: ## %bb ; I386-NEXT: ## =>This Inner Loop Header: Depth=1 -; I386-NEXT: movl $4673097, {{[0-9]+}}(%esp) ## imm = 0x474E49 -; I386-NEXT: movl $1230132307, {{[0-9]+}}(%esp) ## imm = 0x49525453 -; I386-NEXT: movl $541347367, {{[0-9]+}}(%esp) ## imm = 0x20444E27 -; I386-NEXT: movl $840969293, {{[0-9]+}}(%esp) ## imm = 0x32202C4D -; I386-NEXT: movl $1095911247, {{[0-9]+}}(%esp) ## imm = 0x4152474F -; I386-NEXT: movl $1380982853, {{[0-9]+}}(%esp) ## imm = 0x52502045 -; I386-NEXT: movl $1313821779, {{[0-9]+}}(%esp) ## imm = 0x4E4F5453 -; I386-NEXT: movl $1498564676, (%esp) ## imm = 0x59524844 +; I386-NEXT: subl $4, %esp +; I386-NEXT: pushl $31 +; I386-NEXT: pushl $_.str3 +; I386-NEXT: pushl %esi +; I386-NEXT: calll _memcpy +; I386-NEXT: addl $16, %esp ; I386-NEXT: jmp LBB0_1 ; ; CORE2-LABEL: func: @@ -63,57 +61,4 @@ return: ; No predecessors! ret void } -define void @func_aligned() nounwind ssp { -; I386-LABEL: func_aligned: -; I386: ## %bb.0: ## %entry -; I386-NEXT: subl $44, %esp -; I386-NEXT: movaps {{.*#+}} xmm0 = [1498564676,1313821779,1380982853,1095911247] -; I386-NEXT: .p2align 4, 0x90 -; I386-NEXT: LBB1_1: ## %bb -; I386-NEXT: ## =>This Inner Loop Header: Depth=1 -; I386-NEXT: movaps %xmm0, (%esp) -; I386-NEXT: movl $4673097, {{[0-9]+}}(%esp) ## imm = 0x474E49 -; I386-NEXT: movl $1230132307, {{[0-9]+}}(%esp) ## imm = 0x49525453 -; I386-NEXT: movl $541347367, {{[0-9]+}}(%esp) ## imm = 0x20444E27 -; I386-NEXT: movl $840969293, {{[0-9]+}}(%esp) ## imm = 0x32202C4D -; I386-NEXT: jmp LBB1_1 -; -; CORE2-LABEL: func_aligned: -; CORE2: ## %bb.0: ## %entry -; CORE2-NEXT: movabsq $20070800167293728, %rax ## imm = 0x474E4952545320 -; CORE2-NEXT: movabsq $2325069237881678925, %rcx ## imm = 0x20444E2732202C4D -; CORE2-NEXT: movabsq $4706902966564560965, %rdx ## imm = 0x4152474F52502045 -; CORE2-NEXT: movabsq $5642821575076104260, %rsi ## imm = 0x4E4F545359524844 -; CORE2-NEXT: .p2align 4, 0x90 -; CORE2-NEXT: LBB1_1: ## %bb -; CORE2-NEXT: ## =>This Inner Loop Header: Depth=1 -; CORE2-NEXT: movq %rax, -{{[0-9]+}}(%rsp) -; CORE2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) -; CORE2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) -; CORE2-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) -; CORE2-NEXT: jmp LBB1_1 -; -; COREI7-LABEL: func_aligned: -; COREI7: ## %bb.0: ## %entry -; COREI7-NEXT: movups _.str3+{{.*}}(%rip), %xmm0 -; COREI7-NEXT: movups {{.*}}(%rip), %xmm1 -; COREI7-NEXT: .p2align 4, 0x90 -; COREI7-NEXT: LBB1_1: ## %bb -; COREI7-NEXT: ## =>This Inner Loop Header: Depth=1 -; COREI7-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) -; COREI7-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) -; COREI7-NEXT: jmp LBB1_1 -entry: - %String2Loc = alloca [31 x i8], align 16 - br label %bb - -bb: ; preds = %bb, %entry - %String2Loc9 = getelementptr inbounds [31 x i8], [31 x i8]* %String2Loc, i64 0, i64 0 - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %String2Loc9, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str3, i64 0, i64 0), i64 31, i1 false) - br label %bb - -return: ; No predecessors! - ret void -} - declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind -- 2.7.4