return Legalized;
}
case TargetOpcode::G_LOAD: {
- // FIXME: add support for when SizeOp0 isn't an exact multiple of
- // NarrowSize.
- if (SizeOp0 % NarrowSize != 0)
- return UnableToLegalize;
-
const auto &MMO = **MI.memoperands_begin();
unsigned DstReg = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
+ int NumParts = SizeOp0 / NarrowSize;
+ unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
+ unsigned LeftoverBits = DstTy.getSizeInBits() - HandledSize;
+
+ if (DstTy.isVector() && LeftoverBits != 0)
+ return UnableToLegalize;
if (8 * MMO.getSize() != DstTy.getSizeInBits()) {
unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
MMO.getFailureOrdering() != AtomicOrdering::NotAtomic)
return UnableToLegalize;
- int NumParts = SizeOp0 / NarrowSize;
LLT OffsetTy = LLT::scalar(
MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits());
SmallVector<unsigned, 2> DstRegs;
for (int i = 0; i < NumParts; ++i) {
- unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
+ unsigned PartDstReg = MRI.createGenericVirtualRegister(NarrowTy);
unsigned SrcReg = 0;
unsigned Adjustment = i * NarrowSize / 8;
unsigned Alignment = MinAlign(MMO.getAlignment(), Adjustment);
MIRBuilder.materializeGEP(SrcReg, MI.getOperand(1).getReg(), OffsetTy,
Adjustment);
- MIRBuilder.buildLoad(DstReg, SrcReg, *SplitMMO);
+ MIRBuilder.buildLoad(PartDstReg, SrcReg, *SplitMMO);
- DstRegs.push_back(DstReg);
+ DstRegs.push_back(PartDstReg);
}
+ unsigned MergeResultReg = LeftoverBits == 0 ? DstReg :
+ MRI.createGenericVirtualRegister(LLT::scalar(HandledSize));
+
+ // For the leftover piece, still create the merge and insert it.
+ // TODO: Would it be better to directly insert the intermediate pieces?
if (DstTy.isVector())
- MIRBuilder.buildBuildVector(DstReg, DstRegs);
+ MIRBuilder.buildBuildVector(MergeResultReg, DstRegs);
else
- MIRBuilder.buildMerge(DstReg, DstRegs);
+ MIRBuilder.buildMerge(MergeResultReg, DstRegs);
+
+ if (LeftoverBits == 0) {
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ unsigned ImpDefReg = MRI.createGenericVirtualRegister(DstTy);
+ unsigned Insert0Reg = MRI.createGenericVirtualRegister(DstTy);
+ MIRBuilder.buildUndef(ImpDefReg);
+ MIRBuilder.buildInsert(Insert0Reg, ImpDefReg, MergeResultReg, 0);
+
+ unsigned PartDstReg
+ = MRI.createGenericVirtualRegister(LLT::scalar(LeftoverBits));
+ unsigned Offset = HandledSize / 8;
+
+ MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand(
+ &MMO, Offset, LeftoverBits / 8);
+
+ unsigned SrcReg = 0;
+ MIRBuilder.materializeGEP(SrcReg, MI.getOperand(1).getReg(), OffsetTy,
+ Offset);
+ MIRBuilder.buildLoad(PartDstReg, SrcReg, *SplitMMO);
+ MIRBuilder.buildInsert(DstReg, Insert0Reg, PartDstReg, HandledSize);
+
MI.eraseFromParent();
return Legalized;
}
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ANYEXT]](s128)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s128) = G_LOAD %0 :: (load 4, addrspace 1, align 4)
-
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+ $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+...
+
+---
+name: test_load_global_s96_align4
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; SI-LABEL: name: test_load_global_s96_align4
+ ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1)
+ ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64)
+ ; SI: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF
+ ; SI: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[COPY1]](s64), 0
+ ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+ ; SI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64)
+ ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 4, addrspace 1)
+ ; SI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
+ ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96)
+ ; VI-LABEL: name: test_load_global_s96_align4
+ ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; VI: [[LOAD:%[0-9]+]]:_(s96) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1)
+ ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96)
+ %0:_(p1) = COPY $vgpr0_vgpr1
+ %1:_(s96) = G_LOAD %0 :: (load 12, addrspace 1, align 4)
+ $vgpr0_vgpr1_vgpr2 = COPY %1
+
+...
+
+---
+name: test_load_global_s160_align4
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; SI-LABEL: name: test_load_global_s160_align4
+ ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1)
+ ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+ ; SI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64)
+ ; SI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p1) :: (load 8, align 4, addrspace 1)
+ ; SI: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[LOAD]](s64), [[LOAD1]](s64)
+ ; SI: [[DEF:%[0-9]+]]:_(s160) = G_IMPLICIT_DEF
+ ; SI: [[INSERT:%[0-9]+]]:_(s160) = G_INSERT [[DEF]], [[MV]](s128), 0
+ ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+ ; SI: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64)
+ ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 4, addrspace 1)
+ ; SI: [[INSERT1:%[0-9]+]]:_(s160) = G_INSERT [[INSERT]], [[LOAD2]](s32), 128
+ ; SI: S_NOP 0, implicit [[INSERT1]](s160)
+ ; VI-LABEL: name: test_load_global_s160_align4
+ ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1)
+ ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+ ; VI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64)
+ ; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p1) :: (load 8, align 4, addrspace 1)
+ ; VI: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[LOAD]](s64), [[LOAD1]](s64)
+ ; VI: [[DEF:%[0-9]+]]:_(s160) = G_IMPLICIT_DEF
+ ; VI: [[INSERT:%[0-9]+]]:_(s160) = G_INSERT [[DEF]], [[MV]](s128), 0
+ ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+ ; VI: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64)
+ ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 4, addrspace 1)
+ ; VI: [[INSERT1:%[0-9]+]]:_(s160) = G_INSERT [[INSERT]], [[LOAD2]](s32), 128
+ ; VI: S_NOP 0, implicit [[INSERT1]](s160)
+ %0:_(p1) = COPY $vgpr0_vgpr1
+ %1:_(s160) = G_LOAD %0 :: (load 20, addrspace 1, align 4)
+ S_NOP 0, implicit %1
+...
+
+---
+name: test_load_global_s224_align4
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; SI-LABEL: name: test_load_global_s224_align4
+ ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1)
+ ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+ ; SI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64)
+ ; SI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p1) :: (load 8, align 4, addrspace 1)
+ ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+ ; SI: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64)
+ ; SI: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[GEP1]](p1) :: (load 8, align 4, addrspace 1)
+ ; SI: [[MV:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64)
+ ; SI: [[DEF:%[0-9]+]]:_(s224) = G_IMPLICIT_DEF
+ ; SI: [[INSERT:%[0-9]+]]:_(s224) = G_INSERT [[DEF]], [[MV]](s192), 0
+ ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
+ ; SI: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C2]](s64)
+ ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 4, addrspace 1)
+ ; SI: [[INSERT1:%[0-9]+]]:_(s224) = G_INSERT [[INSERT]], [[LOAD3]](s32), 192
+ ; SI: [[DEF1:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
+ ; SI: [[INSERT2:%[0-9]+]]:_(s256) = G_INSERT [[DEF1]], [[INSERT1]](s224), 0
+ ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](s256)
+ ; VI-LABEL: name: test_load_global_s224_align4
+ ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1)
+ ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+ ; VI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64)
+ ; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p1) :: (load 8, align 4, addrspace 1)
+ ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+ ; VI: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64)
+ ; VI: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[GEP1]](p1) :: (load 8, align 4, addrspace 1)
+ ; VI: [[MV:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64)
+ ; VI: [[DEF:%[0-9]+]]:_(s224) = G_IMPLICIT_DEF
+ ; VI: [[INSERT:%[0-9]+]]:_(s224) = G_INSERT [[DEF]], [[MV]](s192), 0
+ ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
+ ; VI: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C2]](s64)
+ ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 4, addrspace 1)
+ ; VI: [[INSERT1:%[0-9]+]]:_(s224) = G_INSERT [[INSERT]], [[LOAD3]](s32), 192
+ ; VI: [[DEF1:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
+ ; VI: [[INSERT2:%[0-9]+]]:_(s256) = G_INSERT [[DEF1]], [[INSERT1]](s224), 0
+ ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](s256)
+ %0:_(p1) = COPY $vgpr0_vgpr1
+ %1:_(s224) = G_LOAD %0 :: (load 28, addrspace 1, align 4)
+
+ %2:_(s256) = G_IMPLICIT_DEF
+ %3:_(s256) = G_INSERT %2, %1, 0
+ $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3
+
...