[X86] Add i32->float and i64->double bitcast pseudo instructions to store folding...

author Craig Topper <craig.topper@gmail.com>

Mon, 19 Oct 2020 18:50:47 +0000 (11:50 -0700)

committer Craig Topper <craig.topper@gmail.com>

Mon, 19 Oct 2020 19:53:14 +0000 (12:53 -0700)
author Craig Topper <craig.topper@gmail.com>
Mon, 19 Oct 2020 18:50:47 +0000 (11:50 -0700)
committer Craig Topper <craig.topper@gmail.com>
Mon, 19 Oct 2020 19:53:14 +0000 (12:53 -0700)
diff --git a/llvm/lib/Target/X86/X86InstrFoldTables.cpp b/llvm/lib/Target/X86/X86InstrFoldTables.cpp

index e16382e..41ea3a9 100644 (file)
--- a/llvm/lib/Target/X86/X86InstrFoldTables.cpp
+++ b/llvm/lib/Target/X86/X86InstrFoldTables.cpp
@@ -300,11 +300,13 @@ static const X86MemoryFoldTableEntry MemoryFoldTable0[] = {
    { X86::MOV32rr,             X86::MOV32mr,             TB_FOLDED_STORE },
    { X86::MOV64ri32,           X86::MOV64mi32,           TB_FOLDED_STORE },
    { X86::MOV64rr,             X86::MOV64mr,             TB_FOLDED_STORE },
+  { X86::MOV64toSDrr,         X86::MOV64mr,             TB_FOLDED_STORE | TB_NO_REVERSE },
    { X86::MOV8ri,              X86::MOV8mi,              TB_FOLDED_STORE },
    { X86::MOV8rr,              X86::MOV8mr,              TB_FOLDED_STORE },
    { X86::MOV8rr_NOREX,        X86::MOV8mr_NOREX,        TB_FOLDED_STORE },
    { X86::MOVAPDrr,            X86::MOVAPDmr,            TB_FOLDED_STORE | TB_ALIGN_16 },
    { X86::MOVAPSrr,            X86::MOVAPSmr,            TB_FOLDED_STORE | TB_ALIGN_16 },
+  { X86::MOVDI2SSrr,          X86::MOV32mr,             TB_FOLDED_STORE | TB_NO_REVERSE },
    { X86::MOVDQArr,            X86::MOVDQAmr,            TB_FOLDED_STORE | TB_ALIGN_16 },
    { X86::MOVDQUrr,            X86::MOVDQUmr,            TB_FOLDED_STORE },
    { X86::MOVPDI2DIrr,         X86::MOVPDI2DImr,         TB_FOLDED_STORE },
@@ -357,6 +359,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable0[] = {
    { X86::VEXTRACTI64x4Zrr,    X86::VEXTRACTI64x4Zmr,    TB_FOLDED_STORE },
    { X86::VEXTRACTPSZrr,       X86::VEXTRACTPSZmr,       TB_FOLDED_STORE },
    { X86::VEXTRACTPSrr,        X86::VEXTRACTPSmr,        TB_FOLDED_STORE },
+  { X86::VMOV64toSDZrr,       X86::MOV64mr,             TB_FOLDED_STORE | TB_NO_REVERSE },
+  { X86::VMOV64toSDrr,        X86::MOV64mr,             TB_FOLDED_STORE | TB_NO_REVERSE },
    { X86::VMOVAPDYrr,          X86::VMOVAPDYmr,          TB_FOLDED_STORE | TB_ALIGN_32 },
    { X86::VMOVAPDZ128rr,       X86::VMOVAPDZ128mr,       TB_FOLDED_STORE | TB_ALIGN_16 },
    { X86::VMOVAPDZ256rr,       X86::VMOVAPDZ256mr,       TB_FOLDED_STORE | TB_ALIGN_32 },
@@ -367,6 +371,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable0[] = {
    { X86::VMOVAPSZ256rr,       X86::VMOVAPSZ256mr,       TB_FOLDED_STORE | TB_ALIGN_32 },
    { X86::VMOVAPSZrr,          X86::VMOVAPSZmr,          TB_FOLDED_STORE | TB_ALIGN_64 },
    { X86::VMOVAPSrr,           X86::VMOVAPSmr,           TB_FOLDED_STORE | TB_ALIGN_16 },
+  { X86::VMOVDI2SSZrr,        X86::MOV32mr,             TB_FOLDED_STORE | TB_NO_REVERSE },
+  { X86::VMOVDI2SSrr,         X86::MOV32mr,             TB_FOLDED_STORE | TB_NO_REVERSE },
    { X86::VMOVDQA32Z128rr,     X86::VMOVDQA32Z128mr,     TB_FOLDED_STORE | TB_ALIGN_16 },
    { X86::VMOVDQA32Z256rr,     X86::VMOVDQA32Z256mr,     TB_FOLDED_STORE | TB_ALIGN_32 },
    { X86::VMOVDQA32Zrr,        X86::VMOVDQA32Zmr,        TB_FOLDED_STORE | TB_ALIGN_64 },
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp

index 56226bf..45fef99 100644 (file)
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -5526,6 +5526,10 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
  
    if (I != nullptr) {
      unsigned Opcode = I->DstOp;
+    bool FoldedLoad =
+        isTwoAddrFold || (OpNum == 0 && I->Flags & TB_FOLDED_LOAD) || OpNum > 0;
+    bool FoldedStore =
+        isTwoAddrFold || (OpNum == 0 && I->Flags & TB_FOLDED_STORE);
      MaybeAlign MinAlign =
          decodeMaybeAlign((I->Flags & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT);
      if (MinAlign && Alignment < *MinAlign)
@@ -5536,20 +5540,25 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
        const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum,
                                                    &RI, MF);
        unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
-      if (Size < RCSize) {
-        // FIXME: Allow scalar intrinsic instructions like ADDSSrm_Int.
-        // Check if it's safe to fold the load. If the size of the object is
-        // narrower than the load width, then it's not.
-        if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4)
-          return nullptr;
+      // Check if it's safe to fold the load. If the size of the object is
+      // narrower than the load width, then it's not.
+      // FIXME: Allow scalar intrinsic instructions like ADDSSrm_Int.
+      if (FoldedLoad && Size < RCSize) {
          // If this is a 64-bit load, but the spill slot is 32, then we can do
          // a 32-bit load which is implicitly zero-extended. This likely is
          // due to live interval analysis remat'ing a load from stack slot.
+        if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4)
+          return nullptr;
          if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
            return nullptr;
          Opcode = X86::MOV32rm;
          NarrowToMOV32rm = true;
        }
+      // For stores, make sure the size of the object is equal to the size of
+      // the store. If the object is larger, the extra bits would be garbage. If
+      // the object is smaller we might overwrite another object or fault.
+      if (FoldedStore && Size != RCSize)
+        return nullptr;
      }
  
      if (isTwoAddrFold)
diff --git a/llvm/test/CodeGen/X86/pr47874.ll b/llvm/test/CodeGen/X86/pr47874.ll

index 76bfd46..9d628b1 100644 (file)
--- a/llvm/test/CodeGen/X86/pr47874.ll
+++ b/llvm/test/CodeGen/X86/pr47874.ll
@@ -9,8 +9,7 @@ define void @a(float* %arg, i32 %arg1) {
  ; SSE2-NEXT:    testl %esi, %esi
  ; SSE2-NEXT:    jle LBB0_3
  ; SSE2-NEXT:  ## %bb.1: ## %bb2
-; SSE2-NEXT:    movd %esi, %xmm0
-; SSE2-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill
+; SSE2-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
  ; SSE2-NEXT:    movl %esi, %eax
  ; SSE2-NEXT:    .p2align 4, 0x90
  ; SSE2-NEXT:  LBB0_2: ## %bb6
@@ -31,8 +30,7 @@ define void @a(float* %arg, i32 %arg1) {
  ; AVX-NEXT:    testl %esi, %esi
  ; AVX-NEXT:    jle LBB0_3
  ; AVX-NEXT:  ## %bb.1: ## %bb2
-; AVX-NEXT:    vmovd %esi, %xmm0
-; AVX-NEXT:    vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill
+; AVX-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
  ; AVX-NEXT:    movl %esi, %eax
  ; AVX-NEXT:    .p2align 4, 0x90
  ; AVX-NEXT:  LBB0_2: ## %bb6
@@ -78,8 +76,7 @@ define void @b(double* %arg, i64 %arg1) {
  ; SSE2-NEXT:    testq %rsi, %rsi
  ; SSE2-NEXT:    jle LBB1_3
  ; SSE2-NEXT:  ## %bb.1: ## %bb2
-; SSE2-NEXT:    movq %rsi, %xmm0
-; SSE2-NEXT:    movq %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Folded Spill
+; SSE2-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
  ; SSE2-NEXT:    .p2align 4, 0x90
  ; SSE2-NEXT:  LBB1_2: ## %bb6
  ; SSE2-NEXT:    ## =>This Inner Loop Header: Depth=1
@@ -99,8 +96,7 @@ define void @b(double* %arg, i64 %arg1) {
  ; AVX-NEXT:    testq %rsi, %rsi
  ; AVX-NEXT:    jle LBB1_3
  ; AVX-NEXT:  ## %bb.1: ## %bb2
-; AVX-NEXT:    vmovq %rsi, %xmm0
-; AVX-NEXT:    vmovq %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Folded Spill
+; AVX-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
  ; AVX-NEXT:    .p2align 4, 0x90
  ; AVX-NEXT:  LBB1_2: ## %bb6
  ; AVX-NEXT:    ## =>This Inner Loop Header: Depth=1
author	Craig Topper <craig.topper@gmail.com>
	Mon, 19 Oct 2020 18:50:47 +0000 (11:50 -0700)
committer	Craig Topper <craig.topper@gmail.com>
	Mon, 19 Oct 2020 19:53:14 +0000 (12:53 -0700)
llvm/lib/Target/X86/X86InstrFoldTables.cpp		patch \| blob \| history
llvm/lib/Target/X86/X86InstrInfo.cpp		patch \| blob \| history
llvm/test/CodeGen/X86/pr47874.ll		patch \| blob \| history