return None;
}
+// Returns a boolean that represents whether there exists a register
+// from FirstMI to the beginning of the block that can be renamed. If
+// one exists, we update Flags with its value.
+static bool updateFlagsWithRenameReg(
+ Optional<bool> MaybeCanRename, LdStPairFlags &Flags, MachineInstr &FirstMI,
+ MachineInstr &MI, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween,
+ SmallPtrSetImpl<const TargetRegisterClass *> &RequiredClasses,
+ const TargetRegisterInfo *TRI) {
+ if (DebugCounter::shouldExecute(RegRenamingCounter)) {
+ if (!MaybeCanRename)
+ MaybeCanRename = {
+ canRenameUpToDef(FirstMI, UsedInBetween, RequiredClasses, TRI)};
+
+ if (*MaybeCanRename) {
+ Optional<MCPhysReg> MaybeRenameReg = tryToFindRegisterToRename(
+ FirstMI, MI, DefinedInBB, UsedInBetween, RequiredClasses, TRI);
+ if (MaybeRenameReg) {
+ Flags.setRenameReg(*MaybeRenameReg);
+ Flags.setMergeForward(true);
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
/// Scan the instructions looking for a load/store that can be combined with the
/// current instruction into a wider equivalent or a load/store pair.
MachineBasicBlock::iterator
continue;
}
}
+ // If the load/store pattern has been optimized and reordered
+ // into the following:
+ // ldr q0, [x1, #16]
+ // str q0, [x0, #16]
+ // ldr q0, [x1]
+ // str q0, [x0]
+ // and the destination register of the load/store instruction is
+ // the same register as or a sub/super register of the other
+ // load/store, it will not generate an LDP/STP, so we attempt to
+ // rename the register so that it can be recognised as a pair.
+ // TODO: This is currently supported for STPs, LDPs are not
+ // being generated yet.
+ if (TRI->isSuperOrSubRegisterEq(Reg, getLdStRegOp(MI).getReg())) {
+ bool flagsHaveRenameReg = updateFlagsWithRenameReg(
+ MaybeCanRename, Flags, FirstMI, MI, DefinedInBB, UsedInBetween,
+ RequiredClasses, TRI);
+ if (flagsHaveRenameReg) {
+ MBBIWithRenameReg = MBBI;
+ continue;
+ }
+ }
// If the destination register of one load is the same register or a
// sub/super register of the other load, bail and keep looking. A
// load-pair instruction with both destination registers the same is
Flags.clearRenameReg();
return MBBI;
}
-
- if (DebugCounter::shouldExecute(RegRenamingCounter)) {
- if (!MaybeCanRename)
- MaybeCanRename = {canRenameUpToDef(FirstMI, UsedInBetween,
- RequiredClasses, TRI)};
-
- if (*MaybeCanRename) {
- Optional<MCPhysReg> MaybeRenameReg = tryToFindRegisterToRename(
- FirstMI, MI, DefinedInBB, UsedInBetween, RequiredClasses,
- TRI);
- if (MaybeRenameReg) {
- Flags.setRenameReg(*MaybeRenameReg);
- Flags.setMergeForward(true);
- MBBIWithRenameReg = MBBI;
- }
- }
+ bool flagsHaveRenameReg = updateFlagsWithRenameReg(
+ MaybeCanRename, Flags, FirstMI, MI, DefinedInBB, UsedInBetween,
+ RequiredClasses, TRI);
+ if (flagsHaveRenameReg) {
+ MBBIWithRenameReg = MBBI;
}
}
// Unable to combine these instructions due to interference in between.
; CHECK-NEXT: .cfi_offset w28, -16
; CHECK-NEXT: .cfi_offset w30, -24
; CHECK-NEXT: .cfi_offset w29, -32
-; CHECK-NEXT: ldr q0, [x0]
-; CHECK-NEXT: str q0, [sp]
+; CHECK-NEXT: ldr q1, [x0]
; CHECK-NEXT: ldr q0, [x0, #16]
-; CHECK-NEXT: str q0, [sp, #16]
-; CHECK-NEXT: ldr q0, [x0, #32]
-; CHECK-NEXT: str q0, [sp, #32]
+; CHECK-NEXT: stp q1, q0, [sp]
+; CHECK-NEXT: ldr q1, [x0, #32]
; CHECK-NEXT: ldr q0, [x0, #48]
-; CHECK-NEXT: str q0, [sp, #48]
-; CHECK-NEXT: ldr q0, [x0, #64]
-; CHECK-NEXT: str q0, [sp, #64]
+; CHECK-NEXT: stp q1, q0, [sp, #32]
+; CHECK-NEXT: ldr q1, [x0, #64]
; CHECK-NEXT: ldr q0, [x0, #80]
-; CHECK-NEXT: str q0, [sp, #80]
-; CHECK-NEXT: ldr q0, [x0, #96]
-; CHECK-NEXT: str q0, [sp, #96]
+; CHECK-NEXT: stp q1, q0, [sp, #64]
+; CHECK-NEXT: ldr q1, [x0, #96]
; CHECK-NEXT: ldr q0, [x0, #112]
-; CHECK-NEXT: str q0, [sp, #112]
-; CHECK-NEXT: ldr q0, [x0, #128]
-; CHECK-NEXT: str q0, [sp, #128]
+; CHECK-NEXT: stp q1, q0, [sp, #96]
+; CHECK-NEXT: ldr q1, [x0, #128]
; CHECK-NEXT: ldr q0, [x0, #144]
-; CHECK-NEXT: str q0, [sp, #144]
-; CHECK-NEXT: ldr q0, [x0, #160]
-; CHECK-NEXT: str q0, [sp, #160]
+; CHECK-NEXT: stp q1, q0, [sp, #128]
+; CHECK-NEXT: ldr q1, [x0, #160]
; CHECK-NEXT: ldr q0, [x0, #176]
-; CHECK-NEXT: str q0, [sp, #176]
-; CHECK-NEXT: ldr q0, [x0, #192]
-; CHECK-NEXT: str q0, [sp, #192]
+; CHECK-NEXT: stp q1, q0, [sp, #160]
+; CHECK-NEXT: ldr q1, [x0, #192]
; CHECK-NEXT: ldr q0, [x0, #208]
-; CHECK-NEXT: str q0, [sp, #208]
-; CHECK-NEXT: ldr q0, [x0, #224]
-; CHECK-NEXT: str q0, [sp, #224]
+; CHECK-NEXT: stp q1, q0, [sp, #192]
+; CHECK-NEXT: ldr q1, [x0, #224]
; CHECK-NEXT: ldr q0, [x0, #240]
-; CHECK-NEXT: str q0, [sp, #240]
+; CHECK-NEXT: stp q1, q0, [sp, #224]
; CHECK-NEXT: bl byval_a64i32
; CHECK-NEXT: ldr x28, [sp, #272] // 8-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp, #256] // 16-byte Folded Reload
; CHECK-NOT: adrp x10, global+332
; CHECK-NOT: add x10, x10, :lo12:global+332
; CHECK: adrp x10, global+528
+; CHECK-NEXT: and w12, w8, #0xffffff
+; CHECK-NEXT: ldr w8, [x11]
; CHECK-NEXT: add x10, x10, :lo12:global+528
%struct.blam = type { %struct.bar, %struct.bar.0, %struct.wobble, %struct.wombat, i8, i16, %struct.snork.2, %struct.foo, %struct.snork.3, %struct.wobble.4, %struct.quux, [9 x i16], %struct.spam, %struct.zot }
define void @post-indexed-sub-word(i32* %a, i32* %b, i64 %count) nounwind {
; CHECK-LABEL: post-indexed-sub-word
; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}], #-8
-; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}], #-8
+; CHECK: stp w{{[0-9]+}}, w{{[0-9]+}}, [x0, #-4]
br label %for.body
for.body:
%phi1 = phi i32* [ %gep4, %for.body ], [ %b, %0 ]
define void @post-indexed-sub-doubleword(i64* %a, i64* %b, i64 %count) nounwind {
; CHECK-LABEL: post-indexed-sub-doubleword
; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}], #-16
-; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}], #-16
+; CHECK: stp x{{[0-9]+}}, x{{[0-9]+}}, [x0, #-8]
br label %for.body
for.body:
%phi1 = phi i64* [ %gep4, %for.body ], [ %b, %0 ]
define void @post-indexed-sub-quadword(<2 x i64>* %a, <2 x i64>* %b, i64 %count) nounwind {
; CHECK-LABEL: post-indexed-sub-quadword
; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}], #-32
-; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}], #-32
+; CHECK: stp q{{[0-9]+}}, q{{[0-9]+}}, [x0, #-16]
br label %for.body
for.body:
%phi1 = phi <2 x i64>* [ %gep4, %for.body ], [ %b, %0 ]
define void @post-indexed-sub-float(float* %a, float* %b, i64 %count) nounwind {
; CHECK-LABEL: post-indexed-sub-float
; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}], #-8
-; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}], #-8
+; CHECK: stp s{{[0-9]+}}, s{{[0-9]+}}, [x0, #-4]
br label %for.body
for.body:
%phi1 = phi float* [ %gep4, %for.body ], [ %b, %0 ]
define void @post-indexed-sub-double(double* %a, double* %b, i64 %count) nounwind {
; CHECK-LABEL: post-indexed-sub-double
; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}], #-16
-; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}], #-16
+; CHECK: stp d{{[0-9]+}}, d{{[0-9]+}}, [x0, #-8]
br label %for.body
for.body:
%phi1 = phi double* [ %gep4, %for.body ], [ %b, %0 ]
define void @post-indexed-sub-doubleword-offset-min(i64* %a, i64* %b, i64 %count) nounwind {
; CHECK-LABEL: post-indexed-sub-doubleword-offset-min
; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}], #-256
-; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}], #-256
+; CHECK: stp x{{[0-9]+}}, x{{[0-9]+}}, [x0], #-256
br label %for.body
for.body:
%phi1 = phi i64* [ %gep4, %for.body ], [ %b, %0 ]
; CHECK-LABEL: post-indexed-doubleword-offset-out-of-range
; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}]
; CHECK: add x{{[0-9]+}}, x{{[0-9]+}}, #256
-; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}]
-; CHECK: add x{{[0-9]+}}, x{{[0-9]+}}, #256
+; CHECK: stp x{{[0-9]+}}, x{{[0-9]+}}, [x0], #256
br label %for.body
for.body:
RET undef $lr
...
+
+# During ISel, the order of load/store pairs can be optimized and changed
+# which prevents STPs from being generated. These tests check that an STP
+# instruction is generated after register renaming.
+...
+---
+#
+# CHECK-LABEL: name: memcpy32
+# CHECK: liveins: $x0, $x1
+# CHECK: $q1 = LDRQui renamable $x1, 1 :: (load 16)
+# CHECK: renamable $q0 = LDRQui killed renamable $x1, 0 :: (load 16, align 32)
+# CHECK: STPQi killed renamable $q0, killed $q1, killed renamable $x0, 0 :: (store 16, align 32)
+# CHECK: RET undef $lr
+#
+name: memcpy32
+alignment: 4
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+hasWinCFI: false
+registers: []
+liveins:
+ - { reg: '$x0', virtual-reg: '' }
+ - { reg: '$x1', virtual-reg: '' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 1
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ maxCallFrameSize: 0
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ hasTailCall: false
+ localFrameSize: 0
+ savePoint: ''
+ restorePoint: ''
+fixedStack: []
+stack: []
+callSites: []
+debugValueSubstitutions: []
+constants: []
+machineFunctionInfo:
+ hasRedZone: false
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+ renamable $q0 = LDRQui renamable $x1, 1 :: (load 16)
+ STRQui killed renamable $q0, renamable $x0, 1 :: (store 16, basealign 32)
+ renamable $q0 = LDRQui killed renamable $x1, 0 :: (load 16, align 32)
+ STRQui killed renamable $q0, killed renamable $x0, 0 :: (store 16, align 32)
+ RET undef $lr
+
+...
+---
+#
+# CHECK-LABEL: name: memcpy64
+# CHECK: liveins: $x0, $x1
+# CHECK: $q1 = LDRQui renamable $x1, 1 :: (load 16)
+# CHECK: renamable $q0 = LDRQui killed renamable $x1, 0 :: (load 16, align 64)
+# CHECK: STPQi killed renamable $q0, killed $q1, killed renamable $x0, 0 :: (store 16, align 64)
+# CHECK: RET undef $lr
+#
+name: memcpy64
+alignment: 4
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+hasWinCFI: false
+registers: []
+liveins:
+ - { reg: '$x0', virtual-reg: '' }
+ - { reg: '$x1', virtual-reg: '' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 1
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ maxCallFrameSize: 0
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ hasTailCall: false
+ localFrameSize: 0
+ savePoint: ''
+ restorePoint: ''
+fixedStack: []
+stack: []
+callSites: []
+debugValueSubstitutions: []
+constants: []
+machineFunctionInfo:
+ hasRedZone: false
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+ renamable $q0 = LDRQui renamable $x1, 1 :: (load 16)
+ STRQui killed renamable $q0, renamable $x0, 1 :: (store 16, basealign 64)
+ renamable $q0 = LDRQui killed renamable $x1, 0 :: (load 16, align 64)
+ STRQui killed renamable $q0, killed renamable $x0, 0 :: (store 16, align 64)
+ RET undef $lr
+
+...
+---
+#
+# CHECK-LABEL: name: memcpy128
+# CHECK: liveins: $x0, $x1
+# CHECK: $q1 = LDRQui renamable $x1, 1 :: (load 16)
+# CHECK: renamable $q0 = LDRQui killed renamable $x1, 0 :: (load 16, align 128)
+# CHECK: STPQi killed renamable $q0, killed $q1, killed renamable $x0, 0 :: (store 16, align 128)
+# CHECK: RET undef $lr
+#
+name: memcpy128
+alignment: 4
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+hasWinCFI: false
+registers: []
+liveins:
+ - { reg: '$x0', virtual-reg: '' }
+ - { reg: '$x1', virtual-reg: '' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 1
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ maxCallFrameSize: 0
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ hasTailCall: false
+ localFrameSize: 0
+ savePoint: ''
+ restorePoint: ''
+fixedStack: []
+stack: []
+callSites: []
+debugValueSubstitutions: []
+constants: []
+machineFunctionInfo:
+ hasRedZone: false
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+ renamable $q0 = LDRQui renamable $x1, 1 :: (load 16)
+ STRQui killed renamable $q0, renamable $x0, 1 :: (store 16, basealign 128)
+ renamable $q0 = LDRQui killed renamable $x1, 0 :: (load 16, align 128)
+ STRQui killed renamable $q0, killed renamable $x0, 0 :: (store 16, align 128)
+ RET undef $lr