LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
+ LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
LegalizeResult lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
// ...
// AN = BinOp<Ty/N> BN, CN
// A = G_MERGE_VALUES A1, ..., AN
-
- // FIXME: add support for when SizeOp0 isn't an exact multiple of
- // NarrowSize.
- if (SizeOp0 % NarrowSize != 0)
- return UnableToLegalize;
- int NumParts = SizeOp0 / NarrowSize;
-
- // List the registers where the destination will be scattered.
- SmallVector<unsigned, 2> DstRegs;
- // List the registers where the first argument will be split.
- SmallVector<unsigned, 2> SrcsReg1;
- // List the registers where the second argument will be split.
- SmallVector<unsigned, 2> SrcsReg2;
- // Create all the temporary registers.
- for (int i = 0; i < NumParts; ++i) {
- unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
- unsigned SrcReg1 = MRI.createGenericVirtualRegister(NarrowTy);
- unsigned SrcReg2 = MRI.createGenericVirtualRegister(NarrowTy);
-
- DstRegs.push_back(DstReg);
- SrcsReg1.push_back(SrcReg1);
- SrcsReg2.push_back(SrcReg2);
- }
- // Explode the big arguments into smaller chunks.
- MIRBuilder.buildUnmerge(SrcsReg1, MI.getOperand(1).getReg());
- MIRBuilder.buildUnmerge(SrcsReg2, MI.getOperand(2).getReg());
-
- // Do the operation on each small part.
- for (int i = 0; i < NumParts; ++i)
- MIRBuilder.buildInstr(MI.getOpcode(), {DstRegs[i]},
- {SrcsReg1[i], SrcsReg2[i]});
-
- // Gather the destination registers into the final destination.
- unsigned DstReg = MI.getOperand(0).getReg();
- if(MRI.getType(DstReg).isVector())
- MIRBuilder.buildBuildVector(DstReg, DstRegs);
- else
- MIRBuilder.buildMerge(DstReg, DstRegs);
- MI.eraseFromParent();
- return Legalized;
+ return narrowScalarBasic(MI, TypeIdx, NarrowTy);
}
case TargetOpcode::G_SHL:
case TargetOpcode::G_LSHR:
}
LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ unsigned DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+
+ assert(MI.getNumOperands() == 3 && TypeIdx == 0);
+
+ SmallVector<unsigned, 4> DstRegs, DstLeftoverRegs;
+ SmallVector<unsigned, 4> Src0Regs, Src0LeftoverRegs;
+ SmallVector<unsigned, 4> Src1Regs, Src1LeftoverRegs;
+ LLT LeftoverTy;
+ if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
+ Src0Regs, Src0LeftoverRegs))
+ return UnableToLegalize;
+
+ LLT Unused;
+ if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
+ Src1Regs, Src1LeftoverRegs))
+ llvm_unreachable("inconsistent extractParts result");
+
+ for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
+ auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
+ {Src0Regs[I], Src1Regs[I]});
+ DstRegs.push_back(Inst->getOperand(0).getReg());
+ }
+
+ for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
+ auto Inst = MIRBuilder.buildInstr(
+ MI.getOpcode(),
+ {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
+ DstLeftoverRegs.push_back(Inst->getOperand(0).getReg());
+ }
+
+ insertParts(DstReg, DstTy, NarrowTy, DstRegs,
+ LeftoverTy, DstLeftoverRegs);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
LLT NarrowTy) {
if (TypeIdx != 0)
...
---
+name: test_and_s96
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5
+
+ ; CHECK-LABEL: name: test_and_s96
+ ; CHECK: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5
+ ; CHECK: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](s96), 0
+ ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s96), 64
+ ; CHECK: [[EXTRACT2:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY1]](s96), 0
+ ; CHECK: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s96), 64
+ ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[EXTRACT]], [[EXTRACT2]]
+ ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[EXTRACT1]], [[EXTRACT3]]
+ ; CHECK: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF
+ ; CHECK: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[AND]](s64), 0
+ ; CHECK: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[AND1]](s32), 64
+ ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96)
+ %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2
+ %1:_(s96) = COPY $vgpr3_vgpr4_vgpr5
+ %2:_(s96) = G_AND %0, %1
+ $vgpr0_vgpr1_vgpr2 = COPY %2
+...
+
+---
+name: test_and_128
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7
+
+ ; CHECK-LABEL: name: test_and_128
+ ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+ ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128)
+ ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](s128)
+ ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[UV]], [[UV2]]
+ ; CHECK: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[UV3]]
+ ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[AND]](s64), [[AND1]](s64)
+ ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128)
+ %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ %1:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+ %2:_(s128) = G_AND %0, %1
+ $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2
+...
+
+---
name: test_and_s7
body: |
bb.0:
...
---
+name: test_or_s96
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5
+
+ ; CHECK-LABEL: name: test_or_s96
+ ; CHECK: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5
+ ; CHECK: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](s96), 0
+ ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s96), 64
+ ; CHECK: [[EXTRACT2:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY1]](s96), 0
+ ; CHECK: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s96), 64
+ ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[EXTRACT]], [[EXTRACT2]]
+ ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[EXTRACT1]], [[EXTRACT3]]
+ ; CHECK: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF
+ ; CHECK: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[OR]](s64), 0
+ ; CHECK: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[OR1]](s32), 64
+ ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96)
+ %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2
+ %1:_(s96) = COPY $vgpr3_vgpr4_vgpr5
+ %2:_(s96) = G_OR %0, %1
+ $vgpr0_vgpr1_vgpr2 = COPY %2
+...
+
+---
+name: test_or_128
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7
+
+ ; CHECK-LABEL: name: test_or_128
+ ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+ ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128)
+ ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](s128)
+ ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[UV]], [[UV2]]
+ ; CHECK: [[OR1:%[0-9]+]]:_(s64) = G_OR [[UV1]], [[UV3]]
+ ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[OR1]](s64)
+ ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128)
+ %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ %1:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+ %2:_(s128) = G_OR %0, %1
+ $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2
+...
+
+---
name: test_or_s7
body: |
bb.0:
...
---
+name: test_xor_s96
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5
+
+ ; CHECK-LABEL: name: test_xor_s96
+ ; CHECK: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5
+ ; CHECK: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](s96), 0
+ ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s96), 64
+ ; CHECK: [[EXTRACT2:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY1]](s96), 0
+ ; CHECK: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s96), 64
+ ; CHECK: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[EXTRACT]], [[EXTRACT2]]
+ ; CHECK: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[EXTRACT1]], [[EXTRACT3]]
+ ; CHECK: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF
+ ; CHECK: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[XOR]](s64), 0
+ ; CHECK: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[XOR1]](s32), 64
+ ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96)
+ %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2
+ %1:_(s96) = COPY $vgpr3_vgpr4_vgpr5
+ %2:_(s96) = G_XOR %0, %1
+ $vgpr0_vgpr1_vgpr2 = COPY %2
+...
+
+---
+name: test_xor_128
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7
+
+ ; CHECK-LABEL: name: test_xor_128
+ ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+ ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128)
+ ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](s128)
+ ; CHECK: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[UV]], [[UV2]]
+ ; CHECK: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[UV1]], [[UV3]]
+ ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[XOR]](s64), [[XOR1]](s64)
+ ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128)
+ %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ %1:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+ %2:_(s128) = G_XOR %0, %1
+ $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2
+...
+
+---
name: test_xor_s7
body: |
bb.0: