if (TypeIdx != 0)
return UnableToLegalize;
- uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
- uint64_t NarrowSize = NarrowTy.getSizeInBits();
-
- // FIXME: add support for when SizeOp0 isn't an exact multiple of
- // NarrowSize.
- if (SizeOp0 % NarrowSize != 0)
- return UnableToLegalize;
-
- int NumParts = SizeOp0 / NarrowSize;
-
- SmallVector<Register, 2> SrcRegs, DstRegs;
+ SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
SmallVector<uint64_t, 2> Indexes;
- extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
+ LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
+ LLT LeftoverTy;
+ extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
+ LeftoverRegs);
+ for (Register Reg : LeftoverRegs)
+ SrcRegs.push_back(Reg);
+
+ uint64_t NarrowSize = NarrowTy.getSizeInBits();
Register OpReg = MI.getOperand(2).getReg();
uint64_t OpStart = MI.getOperand(3).getImm();
uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
- for (int i = 0; i < NumParts; ++i) {
- unsigned DstStart = i * NarrowSize;
+ for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
+ unsigned DstStart = I * NarrowSize;
- if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
- // No part of the insert affects this subregister, forward the original.
- DstRegs.push_back(SrcRegs[i]);
- continue;
- } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
+ if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
// The entire subregister is defined by this insert, forward the new
// value.
DstRegs.push_back(OpReg);
continue;
}
+ Register SrcReg = SrcRegs[I];
+ if (MRI.getType(SrcRegs[I]) == LeftoverTy) {
+ // The leftover reg is smaller than NarrowTy, so we need to extend it.
+ SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
+ MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]);
+ }
+
+ if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
+ // No part of the insert affects this subregister, forward the original.
+ DstRegs.push_back(SrcReg);
+ continue;
+ }
+
// OpSegStart is where this destination segment would start in OpReg if it
// extended infinitely in both directions.
int64_t ExtractOffset, InsertOffset;
}
Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
- MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
+ MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
DstRegs.push_back(DstReg);
}
- assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
+ uint64_t WideSize = DstRegs.size() * NarrowSize;
Register DstReg = MI.getOperand(0).getReg();
- if(MRI.getType(DstReg).isVector())
- MIRBuilder.buildBuildVector(DstReg, DstRegs);
- else
+ if (WideSize > RegTy.getSizeInBits()) {
+ Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
+ MIRBuilder.buildMerge(MergeReg, DstRegs);
+ MIRBuilder.buildTrunc(DstReg, MergeReg);
+ } else
MIRBuilder.buildMerge(DstReg, DstRegs);
+
MI.eraseFromParent();
return Legalized;
}
ret void
}
-; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: %{{[0-9]+}}:_(s96) = G_INSERT %{{[0-9]+}}:_, %{{[0-9]+}}:_(s32), 64 (in function: nonpow2_or_narrowing)
-; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_or_narrowing
-; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_or_narrowing:
-define void @nonpow2_or_narrowing() {
- %a = add i128 undef, undef
- %b = trunc i128 %a to i96
- %a2 = add i128 undef, undef
- %b2 = trunc i128 %a2 to i96
- %dummy = or i96 %b, %b2
- store i96 %dummy, i96* undef
- ret void
-}
-
-; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: %0:_(s96) = G_INSERT %10:_, %8:_(s32), 64 (in function: nonpow2_load_narrowing)
-; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_load_narrowing
-; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_load_narrowing:
-define void @nonpow2_load_narrowing() {
- %dummy = load i96, i96* undef
- store i96 %dummy, i96* undef
- ret void
-}
-
; Currently can't handle vector lengths that aren't an exact multiple of
; natively supported vector lengths. Test that the fall-back works for those.
; FALLBACK-WITH-REPORT-ERR-G_IMPLICIT_DEF-LEGALIZABLE: (FIXME: this is what is expected once we can legalize non-pow-of-2 G_IMPLICIT_DEF) remark: <unknown>:0:0: unable to legalize instruction: %1:_(<7 x s64>) = G_ADD %0, %0 (in function: nonpow2_vector_add_fewerelements
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -O0 -run-pass=legalizer %s -o - | FileCheck %s
-
---- |
- target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
- target triple = "aarch64--"
- define void @test_inserts_nonpow2() { ret void }
-...
+# RUN: llc -O0 -mtriple=aarch64-- -run-pass=legalizer %s -o - | FileCheck %s
---
name: test_inserts_nonpow2
; CHECK-LABEL: name: test_inserts_nonpow2
- ; CHECK: [[C:%[0-9]+]]:_(s64) = COPY $x3
- ; CHECK: $x0 = COPY [[C]]
+ ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3
+ ; CHECK: $x0 = COPY [[COPY3]](s64)
+ ; CHECK: RET_ReallyLR
%0:_(s64) = COPY $x0
%1:_(s64) = COPY $x1
%2:_(s64) = COPY $x2
$x0 = COPY %6
RET_ReallyLR
...
+---
+name: test_inserts_s96
+body: |
+ bb.0:
+ liveins: $x0, $x1, $x2
+
+ ; CHECK-LABEL: name: test_inserts_s96
+ ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64)
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY]](s64)
+ ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s64), 0
+ ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT]](s32)
+ ; CHECK: [[INSERT:%[0-9]+]]:_(s64) = G_INSERT [[ANYEXT]], [[TRUNC]](s32), 0
+ ; CHECK: $x0 = COPY [[COPY3]](s64)
+ ; CHECK: $x1 = COPY [[INSERT]](s64)
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = COPY $x2
+ %3:_(s128) = G_MERGE_VALUES %0:_(s64), %1:_(s64)
+ %4:_(s96) = G_TRUNC %3(s128)
+ %5:_(s32) = G_TRUNC %2(s64)
+ %6:_(s96) = G_INSERT %4, %5(s32), 64
+ %7:_(s128) = G_ANYEXT %6(s96)
+ %8:_(s64), %9:_(s64) = G_UNMERGE_VALUES %7
+ $x0 = COPY %8
+ $x1 = COPY %9
+...
+---
+name: test_inserts_s65
+body: |
+ bb.0:
+ liveins: $x0, $x1, $x2
+
+ ; CHECK-LABEL: name: test_inserts_s65
+ ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY2]](s64)
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY]](s64)
+ ; CHECK: [[EXTRACT:%[0-9]+]]:_(s1) = G_EXTRACT [[COPY1]](s64), 0
+ ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT]](s1)
+ ; CHECK: [[INSERT:%[0-9]+]]:_(s64) = G_INSERT [[ANYEXT]], [[TRUNC]](s1), 0
+ ; CHECK: $x0 = COPY [[COPY3]](s64)
+ ; CHECK: $x1 = COPY [[INSERT]](s64)
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = COPY $x2
+ %3:_(s128) = G_MERGE_VALUES %0:_(s64), %1:_(s64)
+ %4:_(s65) = G_TRUNC %3(s128)
+ %5:_(s1) = G_TRUNC %2(s64)
+ %6:_(s65) = G_INSERT %4, %5(s1), 64
+ %7:_(s128) = G_ANYEXT %6(s65)
+ %8:_(s64), %9:_(s64) = G_UNMERGE_VALUES %7
+ $x0 = COPY %8
+ $x1 = COPY %9
+...