From: Matt Arsenault Date: Mon, 4 Feb 2019 23:41:59 +0000 (+0000) Subject: GlobalISel: Combine g_extract with g_merge_values X-Git-Tag: llvmorg-10-init~12820 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=24f14993e8743a4944756a8acc68d8c6fa7de9a6;p=platform%2Fupstream%2Fllvm.git GlobalISel: Combine g_extract with g_merge_values Try to use the underlying source registers. This enables legalization in more cases where some irregular operations are widened and others narrowed. This seems to make the test_combines_2 AArch64 test worse, since the MERGE_VALUES has multiple uses. Since this should be required for legalization, a hasOneUse check is probably inappropriate (or maybe should only be used if the merge is legal?). llvm-svn: 353121 --- diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h index 83a3622..223fbc7 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -163,6 +163,16 @@ public: return false; } + static unsigned getMergeOpcode(LLT OpTy, LLT DestTy) { + if (OpTy.isVector() && DestTy.isVector()) + return TargetOpcode::G_CONCAT_VECTORS; + + if (OpTy.isVector() && !DestTy.isVector()) + return TargetOpcode::G_BUILD_VECTOR; + + return TargetOpcode::G_MERGE_VALUES; + } + bool tryCombineMerges(MachineInstr &MI, SmallVectorImpl &DeadInsts) { @@ -171,16 +181,10 @@ public: unsigned NumDefs = MI.getNumOperands() - 1; - unsigned MergingOpcode; LLT OpTy = MRI.getType(MI.getOperand(NumDefs).getReg()); LLT DestTy = MRI.getType(MI.getOperand(0).getReg()); - if (OpTy.isVector() && DestTy.isVector()) - MergingOpcode = TargetOpcode::G_CONCAT_VECTORS; - else if (OpTy.isVector() && !DestTy.isVector()) - MergingOpcode = TargetOpcode::G_BUILD_VECTOR; - else - MergingOpcode = TargetOpcode::G_MERGE_VALUES; + unsigned MergingOpcode = getMergeOpcode(OpTy, DestTy); MachineInstr *MergeI = getOpcodeDef(MergingOpcode, MI.getOperand(NumDefs).getReg(), MRI); @@ -249,6 +253,65 @@ public: return true; } + static bool isMergeLikeOpcode(unsigned Opc) { + switch (Opc) { + case TargetOpcode::G_MERGE_VALUES: + case TargetOpcode::G_BUILD_VECTOR: + case TargetOpcode::G_CONCAT_VECTORS: + return true; + default: + return false; + } + } + + bool tryCombineExtract(MachineInstr &MI, + SmallVectorImpl &DeadInsts) { + assert(MI.getOpcode() == TargetOpcode::G_EXTRACT); + + // Try to use the source registers from a G_MERGE_VALUES + // + // %2 = G_MERGE_VALUES %0, %1 + // %3 = G_EXTRACT %2, N + // => + // + // for N < %2.getSizeInBits() / 2 + // %3 = G_EXTRACT %0, N + // + // for N >= %2.getSizeInBits() / 2 + // %3 = G_EXTRACT %1, (N - %0.getSizeInBits() + + unsigned Src = lookThroughCopyInstrs(MI.getOperand(1).getReg()); + MachineInstr *MergeI = MRI.getVRegDef(Src); + if (!MergeI || !isMergeLikeOpcode(MergeI->getOpcode())) + return false; + + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + LLT SrcTy = MRI.getType(Src); + + // TODO: Do we need to check if the resulting extract is supported? + unsigned ExtractDstSize = DstTy.getSizeInBits(); + unsigned Offset = MI.getOperand(2).getImm(); + unsigned NumMergeSrcs = MergeI->getNumOperands() - 1; + unsigned MergeSrcSize = SrcTy.getSizeInBits() / NumMergeSrcs; + unsigned MergeSrcIdx = Offset / MergeSrcSize; + + // Compute the offset of the last bit the extract needs. + unsigned EndMergeSrcIdx = (Offset + ExtractDstSize - 1) / MergeSrcSize; + + // Can't handle the case where the extract spans multiple inputs. + if (MergeSrcIdx != EndMergeSrcIdx) + return false; + + // TODO: We could modify MI in place in most cases. + Builder.setInstr(MI); + Builder.buildExtract( + MI.getOperand(0).getReg(), + MergeI->getOperand(MergeSrcIdx + 1).getReg(), + Offset - MergeSrcIdx * MergeSrcSize); + markInstAndDefDead(MI, *MergeI, DeadInsts); + return true; + } + /// Try to combine away MI. /// Returns true if it combined away the MI. /// Adds instructions that are dead as a result of the combine @@ -266,6 +329,8 @@ public: return tryCombineSExt(MI, DeadInsts); case TargetOpcode::G_UNMERGE_VALUES: return tryCombineMerges(MI, DeadInsts); + case TargetOpcode::G_EXTRACT: + return tryCombineExtract(MI, DeadInsts); case TargetOpcode::G_TRUNC: { bool Changed = false; for (auto &Use : MRI.use_instructions(MI.getOperand(0).getReg())) @@ -276,6 +341,23 @@ public: } private: + + static unsigned getArtifactSrcReg(const MachineInstr &MI) { + switch (MI.getOpcode()) { + case TargetOpcode::COPY: + case TargetOpcode::G_TRUNC: + case TargetOpcode::G_ZEXT: + case TargetOpcode::G_ANYEXT: + case TargetOpcode::G_SEXT: + case TargetOpcode::G_UNMERGE_VALUES: + return MI.getOperand(MI.getNumOperands() - 1).getReg(); + case TargetOpcode::G_EXTRACT: + return MI.getOperand(1).getReg(); + default: + llvm_unreachable("Not a legalization artifact happen"); + } + } + /// Mark MI as dead. If a def of one of MI's operands, DefMI, would also be /// dead due to MI being killed, then mark DefMI as dead too. /// Some of the combines (extends(trunc)), try to walk through redundant @@ -296,8 +378,8 @@ private: // and as a result, %3, %2, %1 are dead. MachineInstr *PrevMI = &MI; while (PrevMI != &DefMI) { - unsigned PrevRegSrc = - PrevMI->getOperand(PrevMI->getNumOperands() - 1).getReg(); + unsigned PrevRegSrc = getArtifactSrcReg(*PrevMI); + MachineInstr *TmpDef = MRI.getVRegDef(PrevRegSrc); if (MRI.hasOneUse(PrevRegSrc)) { if (TmpDef != &DefMI) { diff --git a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp index b96827b..02fa4d1 100644 --- a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -76,6 +76,7 @@ static bool isArtifact(const MachineInstr &MI) { case TargetOpcode::G_UNMERGE_VALUES: case TargetOpcode::G_CONCAT_VECTORS: case TargetOpcode::G_BUILD_VECTOR: + case TargetOpcode::G_EXTRACT: return true; } } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-combines.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-combines.mir index 3d0bd02..14ae981 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-combines.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-combines.mir @@ -22,8 +22,11 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]] ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[ADD]](s32) - ; CHECK: [[EXTRACT:%[0-9]+]]:_(s1) = G_EXTRACT [[MV]](s64), 0 - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s64) = COPY [[MV]](s64) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s1) = G_EXTRACT [[COPY]](s32), 0 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s1) + ; CHECK: $w0 = COPY [[ANYEXT]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[MV]](s64) + ; CHECK: $x0 = COPY [[COPY1]](s64) %0:_(s32) = COPY $w0 %1:_(s32) = G_ADD %0, %0 @@ -45,6 +48,7 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]] ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ADD]] + ; CHECK: $w0 = COPY [[ADD1]](s32) %0:_(s32) = COPY $w0 %1:_(s32) = G_ADD %0, %0 @@ -64,6 +68,7 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) ; CHECK: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[COPY1]], [[COPY1]] + ; CHECK: $x0 = COPY [[ADD]](s64) %0:_(s64) = COPY $x0 %1:_(s128) = G_MERGE_VALUES %0, %0 @@ -82,6 +87,7 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]] ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ADD]] + ; CHECK: $w0 = COPY [[ADD1]](s32) %0:_(s32) = COPY $w0 %1:_(s32) = G_ADD %0, %0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir index 3444254..86eea78 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir @@ -11,13 +11,15 @@ body: | ; value stored is forwarded directly from first load. ; CHECK-LABEL: name: test_extracts_1 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x2 - ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8, align 16) + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2 + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY2]](p0) :: (load 8, align 16) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) + ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY2]], [[C]](s64) ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p0) :: (load 8) - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; CHECK: G_STORE [[COPY1]](s64), [[COPY]](p0) :: (store 8) + ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) + ; CHECK: G_STORE [[COPY3]](s64), [[COPY2]](p0) :: (store 8) ; CHECK: RET_ReallyLR %0:_(s64) = COPY $x0 %1:_(s32) = COPY $w1 @@ -36,16 +38,17 @@ body: | ; Low extraction wipes takes whole low register. High extraction is real. ; CHECK-LABEL: name: test_extracts_2 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x2 - ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8, align 16) + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2 + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY2]](p0) :: (load 8, align 16) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) + ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY2]], [[C]](s64) ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p0) :: (load 8) - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) + ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD1]](s64), 0 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[EXTRACT]](s32) - ; CHECK: G_STORE [[COPY1]](s64), [[COPY]](p0) :: (store 8) - ; CHECK: G_STORE [[COPY2]](s32), [[COPY]](p0) :: (store 4) + ; CHECK: G_STORE [[COPY3]](s64), [[COPY2]](p0) :: (store 8) + ; CHECK: G_STORE [[EXTRACT]](s32), [[COPY2]](p0) :: (store 4) ; CHECK: RET_ReallyLR %0:_(s64) = COPY $x0 %1:_(s32) = COPY $w1 @@ -90,9 +93,9 @@ body: | ; CHECK-LABEL: name: test_extracts_4 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s64), 32 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[EXTRACT]](s32) - ; CHECK: $w0 = COPY [[COPY1]](s32) + ; CHECK: $w0 = COPY [[EXTRACT]](s32) ; CHECK: RET_ReallyLR %0:_(s64) = COPY $x0 %1:_(s64) = COPY $x1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir new file mode 100644 index 0000000..4202c4d1 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir @@ -0,0 +1,470 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s + +--- +name: extract_s32_merge_s64_s32_s32_offset0 + +body: | + bb.0: + ; CHECK-LABEL: name: extract_s32_merge_s64_s32_s32_offset0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK: $vgpr0 = COPY [[COPY]](s32) + %0:_(s32) = G_CONSTANT i32 0 + %1:_(s32) = G_CONSTANT i32 1 + %2:_(s64) = G_MERGE_VALUES %0, %1 + %3:_(s32) = G_EXTRACT %2, 0 + $vgpr0 = COPY %3 +... + +--- +name: extract_s32_merge_s64_s32_s32_offset32 + +body: | + bb.0: + ; CHECK-LABEL: name: extract_s32_merge_s64_s32_s32_offset32 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK: $vgpr0 = COPY [[COPY]](s32) + %0:_(s32) = G_CONSTANT i32 0 + %1:_(s32) = G_CONSTANT i32 1 + %2:_(s64) = G_MERGE_VALUES %0, %1 + %3:_(s32) = G_EXTRACT %2, 32 + $vgpr0 = COPY %3 +... + +--- +name: extract_s64_merge_s128_s64_s64_offset0 + +body: | + bb.0: + ; CHECK-LABEL: name: extract_s64_merge_s128_s64_s64_offset0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64) + ; CHECK: $vgpr0_vgpr1 = COPY [[COPY]](s64) + %0:_(s64) = G_CONSTANT i64 0 + %1:_(s64) = G_CONSTANT i64 1 + %2:_(s128) = G_MERGE_VALUES %0, %1 + %3:_(s64) = G_EXTRACT %2, 0 + $vgpr0_vgpr1 = COPY %3 +... + +--- +name: extract_s64_merge_s128_s64_s64_offset64 + +body: | + bb.0: + ; CHECK-LABEL: name: extract_s64_merge_s128_s64_s64_offset64 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY [[C1]](s64) + ; CHECK: $vgpr0_vgpr1 = COPY [[COPY]](s64) + %0:_(s64) = G_CONSTANT i64 0 + %1:_(s64) = G_CONSTANT i64 1 + %2:_(s128) = G_MERGE_VALUES %0, %1 + %3:_(s64) = G_EXTRACT %2, 64 + $vgpr0_vgpr1 = COPY %3 +... + +--- +name: extract_s32_merge_s128_s64_s64_offset0 + +body: | + bb.0: + ; CHECK-LABEL: name: extract_s32_merge_s128_s64_s64_offset0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[C]](s64), 0 + ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32) + %0:_(s64) = G_CONSTANT i64 0 + %1:_(s64) = G_CONSTANT i64 1 + %2:_(s128) = G_MERGE_VALUES %0, %1 + %3:_(s32) = G_EXTRACT %2, 0 + $vgpr0 = COPY %3 +... + +--- +name: extract_s32_merge_s128_s64_s64_offset32 + +body: | + bb.0: + ; CHECK-LABEL: name: extract_s32_merge_s128_s64_s64_offset32 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[C]](s64), 32 + ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32) + %0:_(s64) = G_CONSTANT i64 0 + %1:_(s64) = G_CONSTANT i64 1 + %2:_(s128) = G_MERGE_VALUES %0, %1 + %3:_(s32) = G_EXTRACT %2, 32 + $vgpr0 = COPY %3 +... + +--- +name: extract_s32_merge_s128_s64_s64_offset64 + +body: | + bb.0: + ; CHECK-LABEL: name: extract_s32_merge_s128_s64_s64_offset64 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[C1]](s64), 0 + ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32) + %0:_(s64) = G_CONSTANT i64 0 + %1:_(s64) = G_CONSTANT i64 1 + %2:_(s128) = G_MERGE_VALUES %0, %1 + %3:_(s32) = G_EXTRACT %2, 64 + $vgpr0 = COPY %3 +... + +--- +name: extract_s32_merge_s128_s64_s64_offset96 + +body: | + bb.0: + ; CHECK-LABEL: name: extract_s32_merge_s128_s64_s64_offset96 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[C1]](s64), 32 + ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32) + %0:_(s64) = G_CONSTANT i64 0 + %1:_(s64) = G_CONSTANT i64 1 + %2:_(s128) = G_MERGE_VALUES %0, %1 + %3:_(s32) = G_EXTRACT %2, 96 + $vgpr0 = COPY %3 +... + +# Destination size fits, but is skewed from the start of the register. +--- +name: extract_s16_merge_s128_s64_s64_offset18 + +body: | + bb.0: + ; CHECK-LABEL: name: extract_s16_merge_s128_s64_s64_offset18 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[C]](s64), 18 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s64) = G_CONSTANT i64 0 + %1:_(s64) = G_CONSTANT i64 1 + %2:_(s128) = G_MERGE_VALUES %0, %1 + %3:_(s16) = G_EXTRACT %2, 18 + %4:_(s32) = G_ANYEXT %3 + $vgpr0 = COPY %4 +... + +# Destination size fits, but is skewed from the start of the register. +--- +name: extract_s16_merge_s128_s64_s64_offset82 + +body: | + bb.0: + ; CHECK-LABEL: name: extract_s16_merge_s128_s64_s64_offset82 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[C1]](s64), 18 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s64) = G_CONSTANT i64 0 + %1:_(s64) = G_CONSTANT i64 1 + %2:_(s128) = G_MERGE_VALUES %0, %1 + %3:_(s16) = G_EXTRACT %2, 82 + %4:_(s32) = G_ANYEXT %3 + $vgpr0 = COPY %4 +... + + +# Can't handle this since it spans two registers +--- +name: extract_s64_merge_s128_s64_s64_offset32 + +body: | + bb.0: + ; CHECK-LABEL: name: extract_s64_merge_s128_s64_s64_offset32 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C]](s64), [[C1]](s64) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[MV]](s128), 32 + ; CHECK: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64) + %0:_(s64) = G_CONSTANT i64 0 + %1:_(s64) = G_CONSTANT i64 1 + %2:_(s128) = G_MERGE_VALUES %0, %1 + %3:_(s64) = G_EXTRACT %2, 32 + $vgpr0_vgpr1 = COPY %3 +... + + +# Only the last bit spans to another register +--- +name: extract_s16_merge_s32_s32_offset1 + +body: | + bb.0: + ; CHECK-LABEL: name: extract_s16_merge_s32_s32_offset1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[MV]](s64), 1 + ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32) + %0:_(s32) = G_CONSTANT i32 0 + %1:_(s32) = G_CONSTANT i32 1 + %2:_(s64) = G_MERGE_VALUES %0, %1 + %3:_(s32) = G_EXTRACT %2, 1 + $vgpr0 = COPY %3 +... + + +# Test with some merges with 3 operands + +--- +name: extract_s32_merge_s96_s32_s32_s32_offset0 + +body: | + bb.0: + ; CHECK-LABEL: name: extract_s32_merge_s96_s32_s32_s32_offset0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK: $vgpr0 = COPY [[COPY]](s32) + %0:_(s32) = G_CONSTANT i32 0 + %1:_(s32) = G_CONSTANT i32 1 + %2:_(s32) = G_CONSTANT i32 1 + %3:_(s96) = G_MERGE_VALUES %0, %1, %2 + %4:_(s32) = G_EXTRACT %3, 0 + $vgpr0 = COPY %4 +... + +--- +name: extract_s32_merge_s96_s32_s32_s32_offset64 + +body: | + bb.0: + ; CHECK-LABEL: name: extract_s32_merge_s96_s32_s32_s32_offset64 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK: $vgpr0 = COPY [[COPY]](s32) + %0:_(s32) = G_CONSTANT i32 0 + %1:_(s32) = G_CONSTANT i32 1 + %2:_(s32) = G_CONSTANT i32 1 + %3:_(s96) = G_MERGE_VALUES %0, %1, %2 + %4:_(s32) = G_EXTRACT %3, 64 + $vgpr0 = COPY %4 +... + +--- +name: extract_s64_merge_s96_s32_s32_s32_offset0 + +body: | + bb.0: + ; CHECK-LABEL: name: extract_s64_merge_s96_s32_s32_s32_offset0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[C]](s32), [[C1]](s32), [[C2]](s32) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[MV]](s96), 0 + ; CHECK: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64) + %0:_(s32) = G_CONSTANT i32 0 + %1:_(s32) = G_CONSTANT i32 1 + %2:_(s32) = G_CONSTANT i32 1 + %3:_(s96) = G_MERGE_VALUES %0, %1, %2 + %4:_(s64) = G_EXTRACT %3, 0 + $vgpr0_vgpr1 = COPY %4 +... + +--- +name: extract_s64_merge_s96_s32_s32_s32_offset32 + +body: | + bb.0: + ; CHECK-LABEL: name: extract_s64_merge_s96_s32_s32_s32_offset32 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[C]](s32), [[C1]](s32), [[C2]](s32) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[MV]](s96), 32 + ; CHECK: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64) + %0:_(s32) = G_CONSTANT i32 0 + %1:_(s32) = G_CONSTANT i32 1 + %2:_(s32) = G_CONSTANT i32 1 + %3:_(s96) = G_MERGE_VALUES %0, %1, %2 + %4:_(s64) = G_EXTRACT %3, 32 + $vgpr0_vgpr1 = COPY %4 +... + +# Test build_vector sources +--- +name: extract_s64_build_vector_v2s64_s64_s64_offset0 + +body: | + bb.0: + ; CHECK-LABEL: name: extract_s64_build_vector_v2s64_s64_s64_offset0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64) + ; CHECK: $vgpr0_vgpr1 = COPY [[COPY]](s64) + %0:_(s64) = G_CONSTANT i64 0 + %1:_(s64) = G_CONSTANT i64 1 + %2:_(<2 x s64>) = G_BUILD_VECTOR %0, %1 + %3:_(s64) = G_EXTRACT %2, 0 + $vgpr0_vgpr1 = COPY %3 +... + +--- +name: extract_s64_build_vector_v2s64_s64_s64_offset64 + +body: | + bb.0: + ; CHECK-LABEL: name: extract_s64_build_vector_v2s64_s64_s64_offset64 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY [[C1]](s64) + ; CHECK: $vgpr0_vgpr1 = COPY [[COPY]](s64) + %0:_(s64) = G_CONSTANT i64 0 + %1:_(s64) = G_CONSTANT i64 1 + %2:_(<2 x s64>) = G_BUILD_VECTOR %0, %1 + %3:_(s64) = G_EXTRACT %2, 64 + $vgpr0_vgpr1 = COPY %3 +... + +--- +name: extract_s64_build_vector_v2s64_s64_s64_offset32 + +body: | + bb.0: + ; CHECK-LABEL: name: extract_s64_build_vector_v2s64_s64_s64_offset32 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[BUILD_VECTOR]](<2 x s64>), 32 + ; CHECK: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64) + %0:_(s64) = G_CONSTANT i64 0 + %1:_(s64) = G_CONSTANT i64 1 + %2:_(<2 x s64>) = G_BUILD_VECTOR %0, %1 + %3:_(s64) = G_EXTRACT %2, 32 + $vgpr0_vgpr1 = COPY %3 +... + +# Test extracting something smaller than the element size +--- +name: extract_s32_build_vector_v2s64_s64_s64_offset64 + +body: | + bb.0: + ; CHECK-LABEL: name: extract_s32_build_vector_v2s64_s64_s64_offset64 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[C1]](s64), 0 + ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32) + %0:_(s64) = G_CONSTANT i64 0 + %1:_(s64) = G_CONSTANT i64 1 + %2:_(<2 x s64>) = G_BUILD_VECTOR %0, %1 + %3:_(s32) = G_EXTRACT %2, 64 + $vgpr0 = COPY %3 + +... + +# Test concat_vector sources +--- +name: extract_v2s16_build_vector_v2s64_v2s16_v2s16_offset0 + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: extract_v2s16_build_vector_v2s64_v2s16_v2s16_offset0 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY [[COPY]](<2 x s16>) + ; CHECK: $vgpr0 = COPY [[COPY2]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $vgpr1 + %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1 + %3:_(<2 x s16>) = G_EXTRACT %2, 0 + $vgpr0 = COPY %3 +... + +--- +name: extract_v2s16_build_vector_v2s64_v2s16_v2s16_offset32 + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: extract_v2s16_build_vector_v2s64_v2s16_v2s16_offset32 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY [[COPY1]](<2 x s16>) + ; CHECK: $vgpr0 = COPY [[COPY2]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $vgpr1 + %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1 + %3:_(<2 x s16>) = G_EXTRACT %2, 32 + $vgpr0 = COPY %3 +... + +# Test extracting only a single element, not a subvector +--- +name: extract_s16_build_vector_v2s64_v2s16_v2s16_offset32 + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: extract_s16_build_vector_v2s64_v2s16_v2s16_offset32 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY1]](<2 x s16>), 0 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $vgpr1 + %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1 + %3:_(s16) = G_EXTRACT %2, 32 + %4:_(s32) = G_ANYEXT %3 + $vgpr0 = COPY %4 +... + +--- +name: extract_s16_build_vector_v2s64_v2s16_v2s16_offset48 + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: extract_s16_build_vector_v2s64_v2s16_v2s16_offset48 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY1]](<2 x s16>), 16 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $vgpr1 + %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1 + %3:_(s16) = G_EXTRACT %2, 48 + %4:_(s32) = G_ANYEXT %3 + $vgpr0 = COPY %4 +... + +# Test extracting less than an element +--- +name: extract_s8_build_vector_v2s64_v2s16_v2s16_offset48 + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: extract_s8_build_vector_v2s64_v2s16_v2s16_offset48 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s8) = G_EXTRACT [[COPY1]](<2 x s16>), 16 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s8) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $vgpr1 + %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1 + %3:_(s8) = G_EXTRACT %2, 48 + %4:_(s32) = G_ANYEXT %3 + $vgpr0 = COPY %4 +...