From: Matt Arsenault Date: Mon, 3 Aug 2020 18:13:38 +0000 (-0400) Subject: GlobalISel: Implement fewerElementsVector for G_CONCAT_VECTORS sources X-Git-Tag: llvmorg-13-init~14227 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=31adc28d24b1a95bb47df23068b6f61dfb5cd012;p=platform%2Fupstream%2Fllvm.git GlobalISel: Implement fewerElementsVector for G_CONCAT_VECTORS sources This fixes <6 x s16> = G_CONCAT_VECTORS from <3 x s16> handling. --- diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index f857d1d..13d966e 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -198,11 +198,19 @@ private: LLT PartTy, ArrayRef PartRegs, LLT LeftoverTy = LLT(), ArrayRef LeftoverRegs = {}); - /// Unmerge \p SrcReg into \p Parts with the greatest common divisor type with - /// \p DstTy and \p NarrowTy. Returns the GCD type. + /// Unmerge \p SrcReg into smaller sized values, and append them to \p + /// Parts. The elements of \p Parts will be the greatest common divisor type + /// of \p DstTy, \p NarrowTy and the type of \p SrcReg. This will compute and + /// return the GCD type. LLT extractGCDType(SmallVectorImpl &Parts, LLT DstTy, LLT NarrowTy, Register SrcReg); + /// Unmerge \p SrcReg into \p GCDTy typed registers. This will append all of + /// the unpacked registers to \p Parts. This version is if the common unmerge + /// type is already known. + void extractGCDType(SmallVectorImpl &Parts, LLT GCDTy, + Register SrcReg); + /// Produce a merge of values in \p VRegs to define \p DstReg. Perform a merge /// from the least common multiple type, and convert as appropriate to \p /// DstReg. @@ -282,10 +290,12 @@ public: LegalizeResult fewerElementsVectorBuildVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); + LegalizeResult fewerElementsVectorConcatVectors(MachineInstr &MI, + unsigned TypeIdx, + LLT NarrowTy); LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); - LegalizeResult reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 9999d26..ee9fd3c 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -243,22 +243,21 @@ void LegalizerHelper::insertParts(Register DstReg, } } -/// Return the result registers of G_UNMERGE_VALUES \p MI in \p Regs +/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs. static void getUnmergeResults(SmallVectorImpl &Regs, const MachineInstr &MI) { assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES); + const int StartIdx = Regs.size(); const int NumResults = MI.getNumOperands() - 1; - Regs.resize(NumResults); + Regs.resize(Regs.size() + NumResults); for (int I = 0; I != NumResults; ++I) - Regs[I] = MI.getOperand(I).getReg(); + Regs[StartIdx + I] = MI.getOperand(I).getReg(); } -LLT LegalizerHelper::extractGCDType(SmallVectorImpl &Parts, LLT DstTy, - LLT NarrowTy, Register SrcReg) { +void LegalizerHelper::extractGCDType(SmallVectorImpl &Parts, + LLT GCDTy, Register SrcReg) { LLT SrcTy = MRI.getType(SrcReg); - - LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy); if (SrcTy == GCDTy) { // If the source already evenly divides the result type, we don't need to do // anything. @@ -268,7 +267,13 @@ LLT LegalizerHelper::extractGCDType(SmallVectorImpl &Parts, LLT DstTy, auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg); getUnmergeResults(Parts, *Unmerge); } +} +LLT LegalizerHelper::extractGCDType(SmallVectorImpl &Parts, LLT DstTy, + LLT NarrowTy, Register SrcReg) { + LLT SrcTy = MRI.getType(SrcReg); + LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy); + extractGCDType(Parts, GCDTy, SrcReg); return GCDTy; } @@ -3610,6 +3615,34 @@ LegalizerHelper::fewerElementsVectorBuildVector(MachineInstr &MI, } LegalizerHelper::LegalizeResult +LegalizerHelper::fewerElementsVectorConcatVectors(MachineInstr &MI, + unsigned TypeIdx, + LLT NarrowTy) { + if (TypeIdx != 1) + return UnableToLegalize; + + Register DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); + LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy); + + // Break into a common type + SmallVector Parts; + for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) + extractGCDType(Parts, GCDTy, MI.getOperand(I).getReg()); + + // Build the requested new merge, padding with undef. + LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, + TargetOpcode::G_ANYEXT); + + // Pack into the original result register. + buildWidenedRemergeToDst(DstReg, LCMTy, Parts); + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowVecTy) { @@ -4013,6 +4046,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy); case G_BUILD_VECTOR: return fewerElementsVectorBuildVector(MI, TypeIdx, NarrowTy); + case G_CONCAT_VECTORS: + return fewerElementsVectorConcatVectors(MI, TypeIdx, NarrowTy); case G_EXTRACT_VECTOR_ELT: case G_INSERT_VECTOR_ELT: return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 4c3adb1..62cd458 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1434,7 +1434,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, // FIXME: Clamp maximum size getActionDefinitionsBuilder(G_CONCAT_VECTORS) - .legalIf(isRegisterType(0)); + .legalIf(all(isRegisterType(0), isRegisterType(1))) + .clampMaxNumElements(0, S32, 32) + .clampMaxNumElements(1, S16, 2) // TODO: Make 4? + .clampMaxNumElements(0, S16, 64); // TODO: Don't fully scalarize v2s16 pieces? Or combine out thosse // pre-legalize. diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir index 2d29b13..c5a3577 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir @@ -504,12 +504,47 @@ body: | ; CHECK: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[INSERT4]], [[INSERT5]] ; CHECK: [[CONCAT_VECTORS3:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[AND1]](<4 x s16>), [[DEF3]](<4 x s16>), [[DEF3]](<4 x s16>) ; CHECK: [[UV20:%[0-9]+]]:_(<3 x s16>), [[UV21:%[0-9]+]]:_(<3 x s16>), [[UV22:%[0-9]+]]:_(<3 x s16>), [[UV23:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<12 x s16>) - ; CHECK: [[CONCAT_VECTORS4:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV16]](<3 x s16>), [[UV20]](<3 x s16>) + ; CHECK: [[INSERT6:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV16]](<3 x s16>), 0 + ; CHECK: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT6]](<4 x s16>) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV24]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV25]](<2 x s16>) + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK: [[INSERT7:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV20]](<3 x s16>), 0 + ; CHECK: [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT7]](<4 x s16>) + ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV26]](<2 x s16>) + ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV27]](<2 x s16>) + ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] + ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL1]] + ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL2]] + ; CHECK: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK: [[CONCAT_VECTORS4:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; CHECK: [[CONCAT_VECTORS5:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[CONCAT_VECTORS4]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CHECK: [[UV24:%[0-9]+]]:_(<5 x s16>), [[UV25:%[0-9]+]]:_(<5 x s16>), [[UV26:%[0-9]+]]:_(<5 x s16>), [[UV27:%[0-9]+]]:_(<5 x s16>), [[UV28:%[0-9]+]]:_(<5 x s16>), [[UV29:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS5]](<30 x s16>) + ; CHECK: [[UV28:%[0-9]+]]:_(<5 x s16>), [[UV29:%[0-9]+]]:_(<5 x s16>), [[UV30:%[0-9]+]]:_(<5 x s16>), [[UV31:%[0-9]+]]:_(<5 x s16>), [[UV32:%[0-9]+]]:_(<5 x s16>), [[UV33:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS5]](<30 x s16>) ; CHECK: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF4]], [[UV24]](<5 x s16>), 0 - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT6]](<8 x s16>) + ; CHECK: [[INSERT8:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF4]], [[UV28]](<5 x s16>), 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT8]](<8 x s16>) %0:_(<5 x s16>) = G_IMPLICIT_DEF %1:_(<5 x s16>) = G_IMPLICIT_DEF %2:_(<5 x s16>) = G_AND %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-concat-vectors.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-concat-vectors.mir index f5c924b..e2724ff 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-concat-vectors.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-concat-vectors.mir @@ -212,3 +212,56 @@ body: | %2:_(<4 x p999>) = G_CONCAT_VECTORS %0, %1 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %2 ... + +--- +name: concat_vectors_v6s16_v3s16 + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2 + ; CHECK-LABEL: name: concat_vectors_v6s16_v3s16 + ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 + ; CHECK: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV1]](<3 x s16>), 0 + ; CHECK: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CHECK: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 + %3:_(<6 x s16>) = G_CONCAT_VECTORS %1, %2 + $vgpr0_vgpr1_vgpr2 = COPY %3 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll index ff614a54..50cbdf9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll @@ -135,10 +135,32 @@ define amdgpu_ps <3 x half> @image_load_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 ; UNPACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; UNPACKED: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) ; UNPACKED: [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; UNPACKED: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV3]](<3 x s16>) - ; UNPACKED: [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; UNPACKED: $vgpr0 = COPY [[UV7]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[UV8]](<2 x s16>) + ; UNPACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; UNPACKED: [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; UNPACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) + ; UNPACKED: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; UNPACKED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) + ; UNPACKED: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV3]](<3 x s16>), 0 + ; UNPACKED: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; UNPACKED: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; UNPACKED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) + ; UNPACKED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; UNPACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C]] + ; UNPACKED: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; UNPACKED: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C]] + ; UNPACKED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C1]](s32) + ; UNPACKED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] + ; UNPACKED: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; UNPACKED: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; UNPACKED: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C]] + ; UNPACKED: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; UNPACKED: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C]] + ; UNPACKED: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32) + ; UNPACKED: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; UNPACKED: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; UNPACKED: $vgpr0 = COPY [[BITCAST5]](<2 x s16>) + ; UNPACKED: $vgpr1 = COPY [[BITCAST6]](<2 x s16>) ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v3f16 ; PACKED: bb.1 (%ir-block.0): @@ -164,10 +186,34 @@ define amdgpu_ps <3 x half> @image_load_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 ; PACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; PACKED: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF2]](<4 x s16>), [[DEF2]](<4 x s16>) ; PACKED: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; PACKED: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV2]](<3 x s16>), [[UV4]](<3 x s16>) - ; PACKED: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; PACKED: $vgpr0 = COPY [[UV8]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[UV9]](<2 x s16>) + ; PACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV2]](<3 x s16>), 0 + ; PACKED: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; PACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; PACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; PACKED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; PACKED: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV4]](<3 x s16>), 0 + ; PACKED: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; PACKED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; PACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] + ; PACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] + ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; PACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; PACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] + ; PACKED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; PACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] + ; PACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; PACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; PACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; PACKED: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; PACKED: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x half> %tex @@ -387,10 +433,32 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16(<8 x i32> inreg %rsrc, i32 %s, ; UNPACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; UNPACKED: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF2]](<4 x s16>), [[DEF2]](<4 x s16>) ; UNPACKED: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; UNPACKED: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV4]](<3 x s16>) - ; UNPACKED: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; UNPACKED: $vgpr0 = COPY [[UV8]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[UV9]](<2 x s16>) + ; UNPACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0 + ; UNPACKED: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; UNPACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) + ; UNPACKED: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; UNPACKED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) + ; UNPACKED: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV4]](<3 x s16>), 0 + ; UNPACKED: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; UNPACKED: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; UNPACKED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) + ; UNPACKED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; UNPACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C]] + ; UNPACKED: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; UNPACKED: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C]] + ; UNPACKED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C1]](s32) + ; UNPACKED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] + ; UNPACKED: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; UNPACKED: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; UNPACKED: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C]] + ; UNPACKED: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; UNPACKED: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C]] + ; UNPACKED: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32) + ; UNPACKED: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; UNPACKED: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; UNPACKED: $vgpr0 = COPY [[BITCAST5]](<2 x s16>) + ; UNPACKED: $vgpr1 = COPY [[BITCAST6]](<2 x s16>) ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v3f16 ; PACKED: bb.1 (%ir-block.0): @@ -420,10 +488,34 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16(<8 x i32> inreg %rsrc, i32 %s, ; PACKED: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; PACKED: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF3]](<4 x s16>), [[DEF3]](<4 x s16>) ; PACKED: [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; PACKED: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV3]](<3 x s16>), [[UV5]](<3 x s16>) - ; PACKED: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; PACKED: $vgpr0 = COPY [[UV9]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[UV10]](<2 x s16>) + ; PACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV3]](<3 x s16>), 0 + ; PACKED: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; PACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; PACKED: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; PACKED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; PACKED: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV5]](<3 x s16>), 0 + ; PACKED: [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; PACKED: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) + ; PACKED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; PACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] + ; PACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] + ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; PACKED: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; PACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] + ; PACKED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; PACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] + ; PACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; PACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; PACKED: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; PACKED: $vgpr0 = COPY [[BITCAST5]](<2 x s16>) + ; PACKED: $vgpr1 = COPY [[BITCAST6]](<2 x s16>) ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x half>, i32 } %res, 0 @@ -662,10 +754,32 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1100(<8 x i32> inreg %rsrc, ; UNPACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; UNPACKED: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) ; UNPACKED: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; UNPACKED: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV2]](<3 x s16>) - ; UNPACKED: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; UNPACKED: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[UV7]](<2 x s16>) + ; UNPACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; UNPACKED: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; UNPACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) + ; UNPACKED: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; UNPACKED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) + ; UNPACKED: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV2]](<3 x s16>), 0 + ; UNPACKED: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; UNPACKED: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; UNPACKED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) + ; UNPACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C]] + ; UNPACKED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; UNPACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C]] + ; UNPACKED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; UNPACKED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] + ; UNPACKED: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; UNPACKED: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; UNPACKED: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C]] + ; UNPACKED: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; UNPACKED: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C]] + ; UNPACKED: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) + ; UNPACKED: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] + ; UNPACKED: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; UNPACKED: $vgpr0 = COPY [[BITCAST5]](<2 x s16>) + ; UNPACKED: $vgpr1 = COPY [[BITCAST6]](<2 x s16>) ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v3f16_dmask_1100 ; PACKED: bb.1 (%ir-block.0): @@ -690,10 +804,34 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1100(<8 x i32> inreg %rsrc, ; PACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; PACKED: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF2]](<4 x s16>), [[DEF2]](<4 x s16>) ; PACKED: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; PACKED: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<3 x s16>), [[UV2]](<3 x s16>) - ; PACKED: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; PACKED: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[UV7]](<2 x s16>) + ; PACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0 + ; PACKED: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; PACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; PACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; PACKED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; PACKED: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV2]](<3 x s16>), 0 + ; PACKED: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; PACKED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; PACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] + ; PACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] + ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; PACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; PACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] + ; PACKED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; PACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] + ; PACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; PACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; PACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; PACKED: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; PACKED: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x half> %tex @@ -732,10 +870,32 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1000(<8 x i32> inreg %rsrc, ; UNPACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; UNPACKED: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) ; UNPACKED: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; UNPACKED: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV]](<3 x s16>) - ; UNPACKED: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; UNPACKED: $vgpr0 = COPY [[UV4]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[UV5]](<2 x s16>) + ; UNPACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; UNPACKED: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; UNPACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C2]](s32) + ; UNPACKED: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; UNPACKED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C2]](s32) + ; UNPACKED: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; UNPACKED: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; UNPACKED: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; UNPACKED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C2]](s32) + ; UNPACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]] + ; UNPACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C]] + ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32) + ; UNPACKED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] + ; UNPACKED: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; UNPACKED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; UNPACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C]] + ; UNPACKED: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; UNPACKED: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C]] + ; UNPACKED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; UNPACKED: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] + ; UNPACKED: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; UNPACKED: $vgpr0 = COPY [[BITCAST5]](<2 x s16>) + ; UNPACKED: $vgpr1 = COPY [[BITCAST6]](<2 x s16>) ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v3f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): @@ -760,10 +920,34 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1000(<8 x i32> inreg %rsrc, ; PACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; PACKED: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF2]](<4 x s16>), [[DEF2]](<4 x s16>) ; PACKED: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; PACKED: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<3 x s16>), [[UV2]](<3 x s16>) - ; PACKED: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; PACKED: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[UV7]](<2 x s16>) + ; PACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0 + ; PACKED: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; PACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; PACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; PACKED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; PACKED: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV2]](<3 x s16>), 0 + ; PACKED: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; PACKED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; PACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] + ; PACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] + ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; PACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; PACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] + ; PACKED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; PACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] + ; PACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; PACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; PACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; PACKED: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; PACKED: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x half> %tex @@ -790,10 +974,34 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_0000(<8 x i32> inreg %rsrc, ; UNPACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; UNPACKED: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) ; UNPACKED: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; UNPACKED: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<3 x s16>), [[UV4]](<3 x s16>) - ; UNPACKED: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; UNPACKED: $vgpr0 = COPY [[UV8]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[UV9]](<2 x s16>) + ; UNPACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; UNPACKED: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; UNPACKED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; UNPACKED: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 + ; UNPACKED: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; UNPACKED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] + ; UNPACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] + ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] + ; UNPACKED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; UNPACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] + ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; UNPACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; UNPACKED: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v3f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): @@ -815,10 +1023,34 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_0000(<8 x i32> inreg %rsrc, ; PACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; PACKED: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) ; PACKED: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; PACKED: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<3 x s16>), [[UV4]](<3 x s16>) - ; PACKED: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; PACKED: $vgpr0 = COPY [[UV8]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[UV9]](<2 x s16>) + ; PACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; PACKED: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; PACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; PACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; PACKED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; PACKED: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 + ; PACKED: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; PACKED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; PACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] + ; PACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] + ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; PACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; PACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] + ; PACKED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; PACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] + ; PACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; PACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; PACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; PACKED: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; PACKED: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x half> %tex @@ -1246,10 +1478,32 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1100(<8 x i32> inreg %rs ; UNPACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; UNPACKED: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF2]](<4 x s16>), [[DEF2]](<4 x s16>) ; UNPACKED: [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; UNPACKED: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV3]](<3 x s16>) - ; UNPACKED: [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; UNPACKED: $vgpr0 = COPY [[UV7]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[UV8]](<2 x s16>) + ; UNPACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0 + ; UNPACKED: [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; UNPACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) + ; UNPACKED: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; UNPACKED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) + ; UNPACKED: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV3]](<3 x s16>), 0 + ; UNPACKED: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; UNPACKED: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; UNPACKED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) + ; UNPACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C]] + ; UNPACKED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; UNPACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C]] + ; UNPACKED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; UNPACKED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] + ; UNPACKED: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; UNPACKED: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; UNPACKED: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C]] + ; UNPACKED: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; UNPACKED: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C]] + ; UNPACKED: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) + ; UNPACKED: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] + ; UNPACKED: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; UNPACKED: $vgpr0 = COPY [[BITCAST5]](<2 x s16>) + ; UNPACKED: $vgpr1 = COPY [[BITCAST6]](<2 x s16>) ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_1100 ; PACKED: bb.1 (%ir-block.0): @@ -1278,10 +1532,34 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1100(<8 x i32> inreg %rs ; PACKED: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; PACKED: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF3]](<4 x s16>), [[DEF3]](<4 x s16>) ; PACKED: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; PACKED: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV2]](<3 x s16>), [[UV4]](<3 x s16>) - ; PACKED: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; PACKED: $vgpr0 = COPY [[UV8]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[UV9]](<2 x s16>) + ; PACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV2]](<3 x s16>), 0 + ; PACKED: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; PACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; PACKED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; PACKED: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV4]](<3 x s16>), 0 + ; PACKED: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; PACKED: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; PACKED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; PACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] + ; PACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] + ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; PACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; PACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] + ; PACKED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; PACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] + ; PACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; PACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; PACKED: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; PACKED: $vgpr0 = COPY [[BITCAST4]](<2 x s16>) + ; PACKED: $vgpr1 = COPY [[BITCAST5]](<2 x s16>) ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x half>, i32 } %res, 0 @@ -1326,10 +1604,32 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1000(<8 x i32> inreg %rs ; UNPACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; UNPACKED: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF2]](<4 x s16>), [[DEF2]](<4 x s16>) ; UNPACKED: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; UNPACKED: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV2]](<3 x s16>) - ; UNPACKED: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; UNPACKED: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[UV7]](<2 x s16>) + ; UNPACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0 + ; UNPACKED: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; UNPACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C2]](s32) + ; UNPACKED: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; UNPACKED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C2]](s32) + ; UNPACKED: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV2]](<3 x s16>), 0 + ; UNPACKED: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; UNPACKED: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; UNPACKED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C2]](s32) + ; UNPACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]] + ; UNPACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C]] + ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32) + ; UNPACKED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] + ; UNPACKED: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; UNPACKED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; UNPACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C]] + ; UNPACKED: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; UNPACKED: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C]] + ; UNPACKED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; UNPACKED: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] + ; UNPACKED: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; UNPACKED: $vgpr0 = COPY [[BITCAST5]](<2 x s16>) + ; UNPACKED: $vgpr1 = COPY [[BITCAST6]](<2 x s16>) ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): @@ -1358,10 +1658,34 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1000(<8 x i32> inreg %rs ; PACKED: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; PACKED: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF3]](<4 x s16>), [[DEF3]](<4 x s16>) ; PACKED: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; PACKED: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV2]](<3 x s16>), [[UV4]](<3 x s16>) - ; PACKED: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; PACKED: $vgpr0 = COPY [[UV8]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[UV9]](<2 x s16>) + ; PACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV2]](<3 x s16>), 0 + ; PACKED: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; PACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; PACKED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; PACKED: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV4]](<3 x s16>), 0 + ; PACKED: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; PACKED: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; PACKED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; PACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] + ; PACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] + ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; PACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; PACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] + ; PACKED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; PACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] + ; PACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; PACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; PACKED: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; PACKED: $vgpr0 = COPY [[BITCAST4]](<2 x s16>) + ; PACKED: $vgpr1 = COPY [[BITCAST5]](<2 x s16>) ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x half>, i32 } %res, 0 @@ -1406,10 +1730,32 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_0000(<8 x i32> inreg %rs ; UNPACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; UNPACKED: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF2]](<4 x s16>), [[DEF2]](<4 x s16>) ; UNPACKED: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; UNPACKED: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV2]](<3 x s16>) - ; UNPACKED: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; UNPACKED: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[UV7]](<2 x s16>) + ; UNPACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0 + ; UNPACKED: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; UNPACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C2]](s32) + ; UNPACKED: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; UNPACKED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C2]](s32) + ; UNPACKED: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV2]](<3 x s16>), 0 + ; UNPACKED: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; UNPACKED: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; UNPACKED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C2]](s32) + ; UNPACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]] + ; UNPACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C]] + ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32) + ; UNPACKED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] + ; UNPACKED: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; UNPACKED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; UNPACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C]] + ; UNPACKED: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; UNPACKED: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C]] + ; UNPACKED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; UNPACKED: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] + ; UNPACKED: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; UNPACKED: $vgpr0 = COPY [[BITCAST5]](<2 x s16>) + ; UNPACKED: $vgpr1 = COPY [[BITCAST6]](<2 x s16>) ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): @@ -1438,10 +1784,34 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_0000(<8 x i32> inreg %rs ; PACKED: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; PACKED: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF3]](<4 x s16>), [[DEF3]](<4 x s16>) ; PACKED: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; PACKED: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV2]](<3 x s16>), [[UV4]](<3 x s16>) - ; PACKED: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; PACKED: $vgpr0 = COPY [[UV8]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[UV9]](<2 x s16>) + ; PACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV2]](<3 x s16>), 0 + ; PACKED: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; PACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; PACKED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; PACKED: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV4]](<3 x s16>), 0 + ; PACKED: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; PACKED: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; PACKED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; PACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] + ; PACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] + ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; PACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; PACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] + ; PACKED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; PACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] + ; PACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; PACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; PACKED: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; PACKED: $vgpr0 = COPY [[BITCAST4]](<2 x s16>) + ; PACKED: $vgpr1 = COPY [[BITCAST5]](<2 x s16>) ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x half>, i32 } %res, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir index 1fed9c0..8bb7b2a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir @@ -7279,11 +7279,44 @@ body: | ; SI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; SI: [[CONCAT_VECTORS3:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) ; SI: [[UV18:%[0-9]+]]:_(<5 x s16>), [[UV19:%[0-9]+]]:_(<5 x s16>), [[UV20:%[0-9]+]]:_(<5 x s16>), [[UV21:%[0-9]+]]:_(<5 x s16>), [[UV22:%[0-9]+]]:_(<5 x s16>), [[UV23:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<30 x s16>) - ; SI: [[CONCAT_VECTORS4:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[UV12]](<5 x s16>), [[UV18]](<5 x s16>) - ; SI: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<10 x s16>) - ; SI: $vgpr0 = COPY [[UV24]](<2 x s16>) - ; SI: $vgpr1 = COPY [[UV25]](<2 x s16>) - ; SI: $vgpr2 = COPY [[UV26]](<2 x s16>) + ; SI: [[INSERT4:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV12]](<5 x s16>), 0 + ; SI: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT4]](<6 x s16>) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV24]](<2 x s16>) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV25]](<2 x s16>) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV26]](<2 x s16>) + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) + ; SI: [[INSERT5:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV18]](<5 x s16>), 0 + ; SI: [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT5]](<6 x s16>) + ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV27]](<2 x s16>) + ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI: $vgpr0 = COPY [[BITCAST4]](<2 x s16>) + ; SI: $vgpr1 = COPY [[BITCAST5]](<2 x s16>) + ; SI: $vgpr2 = COPY [[BITCAST6]](<2 x s16>) ; CI-HSA-LABEL: name: test_load_global_v5s16_align8 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 8, addrspace 1) @@ -7383,11 +7416,44 @@ body: | ; SI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; SI: [[CONCAT_VECTORS3:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) ; SI: [[UV18:%[0-9]+]]:_(<5 x s16>), [[UV19:%[0-9]+]]:_(<5 x s16>), [[UV20:%[0-9]+]]:_(<5 x s16>), [[UV21:%[0-9]+]]:_(<5 x s16>), [[UV22:%[0-9]+]]:_(<5 x s16>), [[UV23:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<30 x s16>) - ; SI: [[CONCAT_VECTORS4:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[UV12]](<5 x s16>), [[UV18]](<5 x s16>) - ; SI: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<10 x s16>) - ; SI: $vgpr0 = COPY [[UV24]](<2 x s16>) - ; SI: $vgpr1 = COPY [[UV25]](<2 x s16>) - ; SI: $vgpr2 = COPY [[UV26]](<2 x s16>) + ; SI: [[INSERT4:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV12]](<5 x s16>), 0 + ; SI: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT4]](<6 x s16>) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV24]](<2 x s16>) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV25]](<2 x s16>) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV26]](<2 x s16>) + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) + ; SI: [[INSERT5:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV18]](<5 x s16>), 0 + ; SI: [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT5]](<6 x s16>) + ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV27]](<2 x s16>) + ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI: $vgpr0 = COPY [[BITCAST4]](<2 x s16>) + ; SI: $vgpr1 = COPY [[BITCAST5]](<2 x s16>) + ; SI: $vgpr2 = COPY [[BITCAST6]](<2 x s16>) ; CI-HSA-LABEL: name: test_load_global_v5s16_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 4, addrspace 1) @@ -7513,11 +7579,42 @@ body: | ; SI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; SI: [[CONCAT_VECTORS4:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) ; SI: [[UV18:%[0-9]+]]:_(<5 x s16>), [[UV19:%[0-9]+]]:_(<5 x s16>), [[UV20:%[0-9]+]]:_(<5 x s16>), [[UV21:%[0-9]+]]:_(<5 x s16>), [[UV22:%[0-9]+]]:_(<5 x s16>), [[UV23:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<30 x s16>) - ; SI: [[CONCAT_VECTORS5:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[UV12]](<5 x s16>), [[UV18]](<5 x s16>) - ; SI: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS5]](<10 x s16>) - ; SI: $vgpr0 = COPY [[UV24]](<2 x s16>) - ; SI: $vgpr1 = COPY [[UV25]](<2 x s16>) - ; SI: $vgpr2 = COPY [[UV26]](<2 x s16>) + ; SI: [[INSERT4:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV12]](<5 x s16>), 0 + ; SI: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT4]](<6 x s16>) + ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV24]](<2 x s16>) + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C4]](s32) + ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV25]](<2 x s16>) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C4]](s32) + ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV26]](<2 x s16>) + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C4]](s32) + ; SI: [[INSERT5:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV18]](<5 x s16>), 0 + ; SI: [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT5]](<6 x s16>) + ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV27]](<2 x s16>) + ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C4]](s32) + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; SI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] + ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) + ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] + ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; SI: $vgpr0 = COPY [[BITCAST6]](<2 x s16>) + ; SI: $vgpr1 = COPY [[BITCAST7]](<2 x s16>) + ; SI: $vgpr2 = COPY [[BITCAST8]](<2 x s16>) ; CI-HSA-LABEL: name: test_load_global_v5s16_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 2, addrspace 1) @@ -7578,11 +7675,42 @@ body: | ; CI-MESA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-MESA: [[CONCAT_VECTORS4:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) ; CI-MESA: [[UV18:%[0-9]+]]:_(<5 x s16>), [[UV19:%[0-9]+]]:_(<5 x s16>), [[UV20:%[0-9]+]]:_(<5 x s16>), [[UV21:%[0-9]+]]:_(<5 x s16>), [[UV22:%[0-9]+]]:_(<5 x s16>), [[UV23:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<30 x s16>) - ; CI-MESA: [[CONCAT_VECTORS5:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[UV12]](<5 x s16>), [[UV18]](<5 x s16>) - ; CI-MESA: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS5]](<10 x s16>) - ; CI-MESA: $vgpr0 = COPY [[UV24]](<2 x s16>) - ; CI-MESA: $vgpr1 = COPY [[UV25]](<2 x s16>) - ; CI-MESA: $vgpr2 = COPY [[UV26]](<2 x s16>) + ; CI-MESA: [[INSERT4:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV12]](<5 x s16>), 0 + ; CI-MESA: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT4]](<6 x s16>) + ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV24]](<2 x s16>) + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C4]](s32) + ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV25]](<2 x s16>) + ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C4]](s32) + ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV26]](<2 x s16>) + ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C4]](s32) + ; CI-MESA: [[INSERT5:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV18]](<5 x s16>), 0 + ; CI-MESA: [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT5]](<6 x s16>) + ; CI-MESA: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV27]](<2 x s16>) + ; CI-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C4]](s32) + ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] + ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-MESA: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] + ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32) + ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CI-MESA: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] + ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) + ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] + ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32) + ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] + ; CI-MESA: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CI-MESA: $vgpr0 = COPY [[BITCAST6]](<2 x s16>) + ; CI-MESA: $vgpr1 = COPY [[BITCAST7]](<2 x s16>) + ; CI-MESA: $vgpr2 = COPY [[BITCAST8]](<2 x s16>) ; VI-LABEL: name: test_load_global_v5s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) @@ -7631,11 +7759,42 @@ body: | ; VI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; VI: [[CONCAT_VECTORS4:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) ; VI: [[UV18:%[0-9]+]]:_(<5 x s16>), [[UV19:%[0-9]+]]:_(<5 x s16>), [[UV20:%[0-9]+]]:_(<5 x s16>), [[UV21:%[0-9]+]]:_(<5 x s16>), [[UV22:%[0-9]+]]:_(<5 x s16>), [[UV23:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<30 x s16>) - ; VI: [[CONCAT_VECTORS5:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[UV12]](<5 x s16>), [[UV18]](<5 x s16>) - ; VI: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS5]](<10 x s16>) - ; VI: $vgpr0 = COPY [[UV24]](<2 x s16>) - ; VI: $vgpr1 = COPY [[UV25]](<2 x s16>) - ; VI: $vgpr2 = COPY [[UV26]](<2 x s16>) + ; VI: [[INSERT4:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV12]](<5 x s16>), 0 + ; VI: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT4]](<6 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV24]](<2 x s16>) + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C4]](s32) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV25]](<2 x s16>) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C4]](s32) + ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV26]](<2 x s16>) + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C4]](s32) + ; VI: [[INSERT5:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV18]](<5 x s16>), 0 + ; VI: [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT5]](<6 x s16>) + ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV27]](<2 x s16>) + ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C4]](s32) + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32) + ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; VI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] + ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) + ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] + ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI: $vgpr0 = COPY [[BITCAST6]](<2 x s16>) + ; VI: $vgpr1 = COPY [[BITCAST7]](<2 x s16>) + ; VI: $vgpr2 = COPY [[BITCAST8]](<2 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 2, addrspace 1) @@ -7686,11 +7845,31 @@ body: | ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA: [[CONCAT_VECTORS4:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) ; GFX9-MESA: [[UV18:%[0-9]+]]:_(<5 x s16>), [[UV19:%[0-9]+]]:_(<5 x s16>), [[UV20:%[0-9]+]]:_(<5 x s16>), [[UV21:%[0-9]+]]:_(<5 x s16>), [[UV22:%[0-9]+]]:_(<5 x s16>), [[UV23:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<30 x s16>) - ; GFX9-MESA: [[CONCAT_VECTORS5:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[UV12]](<5 x s16>), [[UV18]](<5 x s16>) - ; GFX9-MESA: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS5]](<10 x s16>) - ; GFX9-MESA: $vgpr0 = COPY [[UV24]](<2 x s16>) - ; GFX9-MESA: $vgpr1 = COPY [[UV25]](<2 x s16>) - ; GFX9-MESA: $vgpr2 = COPY [[UV26]](<2 x s16>) + ; GFX9-MESA: [[INSERT4:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV12]](<5 x s16>), 0 + ; GFX9-MESA: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT4]](<6 x s16>) + ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV24]](<2 x s16>) + ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) + ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV25]](<2 x s16>) + ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C4]](s32) + ; GFX9-MESA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV26]](<2 x s16>) + ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C4]](s32) + ; GFX9-MESA: [[INSERT5:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV18]](<5 x s16>), 0 + ; GFX9-MESA: [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT5]](<6 x s16>) + ; GFX9-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV27]](<2 x s16>) + ; GFX9-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C4]](s32) + ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) + ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) + ; GFX9-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX9-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) + ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>) + ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC4]](<2 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x s16>) = G_LOAD %0 :: (load 10, align 2, addrspace 1) %2:_(<5 x s16>) = G_IMPLICIT_DEF @@ -7802,11 +7981,43 @@ body: | ; SI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; SI: [[CONCAT_VECTORS4:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) ; SI: [[UV18:%[0-9]+]]:_(<5 x s16>), [[UV19:%[0-9]+]]:_(<5 x s16>), [[UV20:%[0-9]+]]:_(<5 x s16>), [[UV21:%[0-9]+]]:_(<5 x s16>), [[UV22:%[0-9]+]]:_(<5 x s16>), [[UV23:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<30 x s16>) - ; SI: [[CONCAT_VECTORS5:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[UV12]](<5 x s16>), [[UV18]](<5 x s16>) - ; SI: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS5]](<10 x s16>) - ; SI: $vgpr0 = COPY [[UV24]](<2 x s16>) - ; SI: $vgpr1 = COPY [[UV25]](<2 x s16>) - ; SI: $vgpr2 = COPY [[UV26]](<2 x s16>) + ; SI: [[INSERT4:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV12]](<5 x s16>), 0 + ; SI: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT4]](<6 x s16>) + ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV24]](<2 x s16>) + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C5]](s32) + ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV25]](<2 x s16>) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C5]](s32) + ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV26]](<2 x s16>) + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C5]](s32) + ; SI: [[INSERT5:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV18]](<5 x s16>), 0 + ; SI: [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT5]](<6 x s16>) + ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV27]](<2 x s16>) + ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C5]](s32) + ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C8]] + ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C8]] + ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C5]](s32) + ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL7]] + ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C8]] + ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C8]] + ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C5]](s32) + ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL8]] + ; SI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) + ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C8]] + ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) + ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C8]] + ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C5]](s32) + ; SI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL9]] + ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) + ; SI: $vgpr0 = COPY [[BITCAST6]](<2 x s16>) + ; SI: $vgpr1 = COPY [[BITCAST7]](<2 x s16>) + ; SI: $vgpr2 = COPY [[BITCAST8]](<2 x s16>) ; CI-HSA-LABEL: name: test_load_global_v5s16_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 1, addrspace 1) @@ -7913,11 +8124,43 @@ body: | ; CI-MESA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-MESA: [[CONCAT_VECTORS4:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) ; CI-MESA: [[UV18:%[0-9]+]]:_(<5 x s16>), [[UV19:%[0-9]+]]:_(<5 x s16>), [[UV20:%[0-9]+]]:_(<5 x s16>), [[UV21:%[0-9]+]]:_(<5 x s16>), [[UV22:%[0-9]+]]:_(<5 x s16>), [[UV23:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<30 x s16>) - ; CI-MESA: [[CONCAT_VECTORS5:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[UV12]](<5 x s16>), [[UV18]](<5 x s16>) - ; CI-MESA: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS5]](<10 x s16>) - ; CI-MESA: $vgpr0 = COPY [[UV24]](<2 x s16>) - ; CI-MESA: $vgpr1 = COPY [[UV25]](<2 x s16>) - ; CI-MESA: $vgpr2 = COPY [[UV26]](<2 x s16>) + ; CI-MESA: [[INSERT4:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV12]](<5 x s16>), 0 + ; CI-MESA: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT4]](<6 x s16>) + ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV24]](<2 x s16>) + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C5]](s32) + ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV25]](<2 x s16>) + ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C5]](s32) + ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV26]](<2 x s16>) + ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C5]](s32) + ; CI-MESA: [[INSERT5:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV18]](<5 x s16>), 0 + ; CI-MESA: [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT5]](<6 x s16>) + ; CI-MESA: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV27]](<2 x s16>) + ; CI-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C5]](s32) + ; CI-MESA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; CI-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C8]] + ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C8]] + ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C5]](s32) + ; CI-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL7]] + ; CI-MESA: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; CI-MESA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C8]] + ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C8]] + ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C5]](s32) + ; CI-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL8]] + ; CI-MESA: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) + ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; CI-MESA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C8]] + ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) + ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C8]] + ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C5]](s32) + ; CI-MESA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL9]] + ; CI-MESA: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) + ; CI-MESA: $vgpr0 = COPY [[BITCAST6]](<2 x s16>) + ; CI-MESA: $vgpr1 = COPY [[BITCAST7]](<2 x s16>) + ; CI-MESA: $vgpr2 = COPY [[BITCAST8]](<2 x s16>) ; VI-LABEL: name: test_load_global_v5s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) @@ -8002,11 +8245,43 @@ body: | ; VI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; VI: [[CONCAT_VECTORS4:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) ; VI: [[UV18:%[0-9]+]]:_(<5 x s16>), [[UV19:%[0-9]+]]:_(<5 x s16>), [[UV20:%[0-9]+]]:_(<5 x s16>), [[UV21:%[0-9]+]]:_(<5 x s16>), [[UV22:%[0-9]+]]:_(<5 x s16>), [[UV23:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<30 x s16>) - ; VI: [[CONCAT_VECTORS5:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[UV12]](<5 x s16>), [[UV18]](<5 x s16>) - ; VI: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS5]](<10 x s16>) - ; VI: $vgpr0 = COPY [[UV24]](<2 x s16>) - ; VI: $vgpr1 = COPY [[UV25]](<2 x s16>) - ; VI: $vgpr2 = COPY [[UV26]](<2 x s16>) + ; VI: [[INSERT4:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV12]](<5 x s16>), 0 + ; VI: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT4]](<6 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV24]](<2 x s16>) + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C4]](s32) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV25]](<2 x s16>) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C4]](s32) + ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV26]](<2 x s16>) + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C4]](s32) + ; VI: [[INSERT5:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV18]](<5 x s16>), 0 + ; VI: [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT5]](<6 x s16>) + ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV27]](<2 x s16>) + ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C4]](s32) + ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]] + ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C4]](s32) + ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL7]] + ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]] + ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32) + ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL8]] + ; VI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]] + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) + ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]] + ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C4]](s32) + ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL9]] + ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) + ; VI: $vgpr0 = COPY [[BITCAST6]](<2 x s16>) + ; VI: $vgpr1 = COPY [[BITCAST7]](<2 x s16>) + ; VI: $vgpr2 = COPY [[BITCAST8]](<2 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 1, addrspace 1) @@ -8098,11 +8373,31 @@ body: | ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA: [[CONCAT_VECTORS4:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) ; GFX9-MESA: [[UV18:%[0-9]+]]:_(<5 x s16>), [[UV19:%[0-9]+]]:_(<5 x s16>), [[UV20:%[0-9]+]]:_(<5 x s16>), [[UV21:%[0-9]+]]:_(<5 x s16>), [[UV22:%[0-9]+]]:_(<5 x s16>), [[UV23:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<30 x s16>) - ; GFX9-MESA: [[CONCAT_VECTORS5:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[UV12]](<5 x s16>), [[UV18]](<5 x s16>) - ; GFX9-MESA: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS5]](<10 x s16>) - ; GFX9-MESA: $vgpr0 = COPY [[UV24]](<2 x s16>) - ; GFX9-MESA: $vgpr1 = COPY [[UV25]](<2 x s16>) - ; GFX9-MESA: $vgpr2 = COPY [[UV26]](<2 x s16>) + ; GFX9-MESA: [[INSERT4:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV12]](<5 x s16>), 0 + ; GFX9-MESA: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT4]](<6 x s16>) + ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV24]](<2 x s16>) + ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) + ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV25]](<2 x s16>) + ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C6]](s32) + ; GFX9-MESA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV26]](<2 x s16>) + ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C6]](s32) + ; GFX9-MESA: [[INSERT5:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV18]](<5 x s16>), 0 + ; GFX9-MESA: [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT5]](<6 x s16>) + ; GFX9-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV27]](<2 x s16>) + ; GFX9-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C6]](s32) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) + ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>) + ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC4]](<2 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x s16>) = G_LOAD %0 :: (load 10, align 1, addrspace 1) %2:_(<5 x s16>) = G_IMPLICIT_DEF @@ -9167,12 +9462,52 @@ body: | ; SI: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; SI: [[CONCAT_VECTORS7:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF4]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>) ; SI: [[UV36:%[0-9]+]]:_(<7 x s16>), [[UV37:%[0-9]+]]:_(<7 x s16>), [[UV38:%[0-9]+]]:_(<7 x s16>), [[UV39:%[0-9]+]]:_(<7 x s16>), [[UV40:%[0-9]+]]:_(<7 x s16>), [[UV41:%[0-9]+]]:_(<7 x s16>), [[UV42:%[0-9]+]]:_(<7 x s16>), [[UV43:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS7]](<56 x s16>) - ; SI: [[CONCAT_VECTORS8:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[UV28]](<7 x s16>), [[UV36]](<7 x s16>) - ; SI: [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>), [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS8]](<14 x s16>) - ; SI: $vgpr0 = COPY [[UV44]](<2 x s16>) - ; SI: $vgpr1 = COPY [[UV45]](<2 x s16>) - ; SI: $vgpr2 = COPY [[UV46]](<2 x s16>) - ; SI: $vgpr3 = COPY [[UV47]](<2 x s16>) + ; SI: [[INSERT8:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV28]](<7 x s16>), 0 + ; SI: [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT8]](<8 x s16>) + ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV44]](<2 x s16>) + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C4]](s32) + ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV45]](<2 x s16>) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C4]](s32) + ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV46]](<2 x s16>) + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C4]](s32) + ; SI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV47]](<2 x s16>) + ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C4]](s32) + ; SI: [[INSERT9:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV36]](<7 x s16>), 0 + ; SI: [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT9]](<8 x s16>) + ; SI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV48]](<2 x s16>) + ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C4]](s32) + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] + ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] + ; SI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) + ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]] + ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]] + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C4]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] + ; SI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] + ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]] + ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32) + ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL6]] + ; SI: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; SI: $vgpr0 = COPY [[BITCAST8]](<2 x s16>) + ; SI: $vgpr1 = COPY [[BITCAST9]](<2 x s16>) + ; SI: $vgpr2 = COPY [[BITCAST10]](<2 x s16>) + ; SI: $vgpr3 = COPY [[BITCAST11]](<2 x s16>) ; CI-HSA-LABEL: name: test_load_global_v7s16_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 2, addrspace 1) @@ -9257,12 +9592,52 @@ body: | ; CI-MESA: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-MESA: [[CONCAT_VECTORS7:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF4]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>) ; CI-MESA: [[UV36:%[0-9]+]]:_(<7 x s16>), [[UV37:%[0-9]+]]:_(<7 x s16>), [[UV38:%[0-9]+]]:_(<7 x s16>), [[UV39:%[0-9]+]]:_(<7 x s16>), [[UV40:%[0-9]+]]:_(<7 x s16>), [[UV41:%[0-9]+]]:_(<7 x s16>), [[UV42:%[0-9]+]]:_(<7 x s16>), [[UV43:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS7]](<56 x s16>) - ; CI-MESA: [[CONCAT_VECTORS8:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[UV28]](<7 x s16>), [[UV36]](<7 x s16>) - ; CI-MESA: [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>), [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS8]](<14 x s16>) - ; CI-MESA: $vgpr0 = COPY [[UV44]](<2 x s16>) - ; CI-MESA: $vgpr1 = COPY [[UV45]](<2 x s16>) - ; CI-MESA: $vgpr2 = COPY [[UV46]](<2 x s16>) - ; CI-MESA: $vgpr3 = COPY [[UV47]](<2 x s16>) + ; CI-MESA: [[INSERT8:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV28]](<7 x s16>), 0 + ; CI-MESA: [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT8]](<8 x s16>) + ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV44]](<2 x s16>) + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C4]](s32) + ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV45]](<2 x s16>) + ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C4]](s32) + ; CI-MESA: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV46]](<2 x s16>) + ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C4]](s32) + ; CI-MESA: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV47]](<2 x s16>) + ; CI-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C4]](s32) + ; CI-MESA: [[INSERT9:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV36]](<7 x s16>), 0 + ; CI-MESA: [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT9]](<8 x s16>) + ; CI-MESA: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV48]](<2 x s16>) + ; CI-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C4]](s32) + ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] + ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32) + ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CI-MESA: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] + ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] + ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32) + ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] + ; CI-MESA: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) + ; CI-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]] + ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]] + ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C4]](s32) + ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] + ; CI-MESA: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; CI-MESA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] + ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]] + ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32) + ; CI-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL6]] + ; CI-MESA: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CI-MESA: $vgpr0 = COPY [[BITCAST8]](<2 x s16>) + ; CI-MESA: $vgpr1 = COPY [[BITCAST9]](<2 x s16>) + ; CI-MESA: $vgpr2 = COPY [[BITCAST10]](<2 x s16>) + ; CI-MESA: $vgpr3 = COPY [[BITCAST11]](<2 x s16>) ; VI-LABEL: name: test_load_global_v7s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) @@ -9334,12 +9709,52 @@ body: | ; VI: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; VI: [[CONCAT_VECTORS7:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF4]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>) ; VI: [[UV36:%[0-9]+]]:_(<7 x s16>), [[UV37:%[0-9]+]]:_(<7 x s16>), [[UV38:%[0-9]+]]:_(<7 x s16>), [[UV39:%[0-9]+]]:_(<7 x s16>), [[UV40:%[0-9]+]]:_(<7 x s16>), [[UV41:%[0-9]+]]:_(<7 x s16>), [[UV42:%[0-9]+]]:_(<7 x s16>), [[UV43:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS7]](<56 x s16>) - ; VI: [[CONCAT_VECTORS8:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[UV28]](<7 x s16>), [[UV36]](<7 x s16>) - ; VI: [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>), [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS8]](<14 x s16>) - ; VI: $vgpr0 = COPY [[UV44]](<2 x s16>) - ; VI: $vgpr1 = COPY [[UV45]](<2 x s16>) - ; VI: $vgpr2 = COPY [[UV46]](<2 x s16>) - ; VI: $vgpr3 = COPY [[UV47]](<2 x s16>) + ; VI: [[INSERT8:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV28]](<7 x s16>), 0 + ; VI: [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT8]](<8 x s16>) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV44]](<2 x s16>) + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C4]](s32) + ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV45]](<2 x s16>) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C4]](s32) + ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV46]](<2 x s16>) + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C4]](s32) + ; VI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV47]](<2 x s16>) + ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C4]](s32) + ; VI: [[INSERT9:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV36]](<7 x s16>), 0 + ; VI: [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT9]](<8 x s16>) + ; VI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV48]](<2 x s16>) + ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C4]](s32) + ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32) + ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] + ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] + ; VI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) + ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]] + ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]] + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C4]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] + ; VI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] + ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]] + ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C4]](s32) + ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL6]] + ; VI: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; VI: $vgpr0 = COPY [[BITCAST8]](<2 x s16>) + ; VI: $vgpr1 = COPY [[BITCAST9]](<2 x s16>) + ; VI: $vgpr2 = COPY [[BITCAST10]](<2 x s16>) + ; VI: $vgpr3 = COPY [[BITCAST11]](<2 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 2, addrspace 1) @@ -9410,12 +9825,37 @@ body: | ; GFX9-MESA: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA: [[CONCAT_VECTORS7:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF4]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>) ; GFX9-MESA: [[UV36:%[0-9]+]]:_(<7 x s16>), [[UV37:%[0-9]+]]:_(<7 x s16>), [[UV38:%[0-9]+]]:_(<7 x s16>), [[UV39:%[0-9]+]]:_(<7 x s16>), [[UV40:%[0-9]+]]:_(<7 x s16>), [[UV41:%[0-9]+]]:_(<7 x s16>), [[UV42:%[0-9]+]]:_(<7 x s16>), [[UV43:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS7]](<56 x s16>) - ; GFX9-MESA: [[CONCAT_VECTORS8:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[UV28]](<7 x s16>), [[UV36]](<7 x s16>) - ; GFX9-MESA: [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>), [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS8]](<14 x s16>) - ; GFX9-MESA: $vgpr0 = COPY [[UV44]](<2 x s16>) - ; GFX9-MESA: $vgpr1 = COPY [[UV45]](<2 x s16>) - ; GFX9-MESA: $vgpr2 = COPY [[UV46]](<2 x s16>) - ; GFX9-MESA: $vgpr3 = COPY [[UV47]](<2 x s16>) + ; GFX9-MESA: [[INSERT8:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV28]](<7 x s16>), 0 + ; GFX9-MESA: [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT8]](<8 x s16>) + ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV44]](<2 x s16>) + ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) + ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV45]](<2 x s16>) + ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C4]](s32) + ; GFX9-MESA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV46]](<2 x s16>) + ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C4]](s32) + ; GFX9-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV47]](<2 x s16>) + ; GFX9-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C4]](s32) + ; GFX9-MESA: [[INSERT9:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV36]](<7 x s16>), 0 + ; GFX9-MESA: [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT9]](<8 x s16>) + ; GFX9-MESA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV48]](<2 x s16>) + ; GFX9-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C4]](s32) + ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) + ; GFX9-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX9-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) + ; GFX9-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX9-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) + ; GFX9-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; GFX9-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) + ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>) + ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC4]](<2 x s16>) + ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC5]](<2 x s16>) + ; GFX9-MESA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC6]](<2 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<7 x s16>) = G_LOAD %0 :: (load 14, align 2, addrspace 1) %2:_(<7 x s16>) = G_IMPLICIT_DEF @@ -9569,12 +10009,53 @@ body: | ; SI: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; SI: [[CONCAT_VECTORS7:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF4]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>) ; SI: [[UV36:%[0-9]+]]:_(<7 x s16>), [[UV37:%[0-9]+]]:_(<7 x s16>), [[UV38:%[0-9]+]]:_(<7 x s16>), [[UV39:%[0-9]+]]:_(<7 x s16>), [[UV40:%[0-9]+]]:_(<7 x s16>), [[UV41:%[0-9]+]]:_(<7 x s16>), [[UV42:%[0-9]+]]:_(<7 x s16>), [[UV43:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS7]](<56 x s16>) - ; SI: [[CONCAT_VECTORS8:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[UV28]](<7 x s16>), [[UV36]](<7 x s16>) - ; SI: [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>), [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS8]](<14 x s16>) - ; SI: $vgpr0 = COPY [[UV44]](<2 x s16>) - ; SI: $vgpr1 = COPY [[UV45]](<2 x s16>) - ; SI: $vgpr2 = COPY [[UV46]](<2 x s16>) - ; SI: $vgpr3 = COPY [[UV47]](<2 x s16>) + ; SI: [[INSERT8:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV28]](<7 x s16>), 0 + ; SI: [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT8]](<8 x s16>) + ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV44]](<2 x s16>) + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C5]](s32) + ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV45]](<2 x s16>) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C5]](s32) + ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV46]](<2 x s16>) + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C5]](s32) + ; SI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV47]](<2 x s16>) + ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C5]](s32) + ; SI: [[INSERT9:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV36]](<7 x s16>), 0 + ; SI: [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT9]](<8 x s16>) + ; SI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV48]](<2 x s16>) + ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C5]](s32) + ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C8]] + ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C8]] + ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C5]](s32) + ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL10]] + ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) + ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; SI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C8]] + ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C8]] + ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C5]](s32) + ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL11]] + ; SI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) + ; SI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) + ; SI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C8]] + ; SI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; SI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C8]] + ; SI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C5]](s32) + ; SI: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND18]], [[SHL12]] + ; SI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR12]](s32) + ; SI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; SI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C8]] + ; SI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; SI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C8]] + ; SI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C5]](s32) + ; SI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL13]] + ; SI: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32) + ; SI: $vgpr0 = COPY [[BITCAST8]](<2 x s16>) + ; SI: $vgpr1 = COPY [[BITCAST9]](<2 x s16>) + ; SI: $vgpr2 = COPY [[BITCAST10]](<2 x s16>) + ; SI: $vgpr3 = COPY [[BITCAST11]](<2 x s16>) ; CI-HSA-LABEL: name: test_load_global_v7s16_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 1, addrspace 1) @@ -9723,12 +10204,53 @@ body: | ; CI-MESA: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-MESA: [[CONCAT_VECTORS7:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF4]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>) ; CI-MESA: [[UV36:%[0-9]+]]:_(<7 x s16>), [[UV37:%[0-9]+]]:_(<7 x s16>), [[UV38:%[0-9]+]]:_(<7 x s16>), [[UV39:%[0-9]+]]:_(<7 x s16>), [[UV40:%[0-9]+]]:_(<7 x s16>), [[UV41:%[0-9]+]]:_(<7 x s16>), [[UV42:%[0-9]+]]:_(<7 x s16>), [[UV43:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS7]](<56 x s16>) - ; CI-MESA: [[CONCAT_VECTORS8:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[UV28]](<7 x s16>), [[UV36]](<7 x s16>) - ; CI-MESA: [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>), [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS8]](<14 x s16>) - ; CI-MESA: $vgpr0 = COPY [[UV44]](<2 x s16>) - ; CI-MESA: $vgpr1 = COPY [[UV45]](<2 x s16>) - ; CI-MESA: $vgpr2 = COPY [[UV46]](<2 x s16>) - ; CI-MESA: $vgpr3 = COPY [[UV47]](<2 x s16>) + ; CI-MESA: [[INSERT8:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV28]](<7 x s16>), 0 + ; CI-MESA: [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT8]](<8 x s16>) + ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV44]](<2 x s16>) + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C5]](s32) + ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV45]](<2 x s16>) + ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C5]](s32) + ; CI-MESA: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV46]](<2 x s16>) + ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C5]](s32) + ; CI-MESA: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV47]](<2 x s16>) + ; CI-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C5]](s32) + ; CI-MESA: [[INSERT9:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV36]](<7 x s16>), 0 + ; CI-MESA: [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT9]](<8 x s16>) + ; CI-MESA: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV48]](<2 x s16>) + ; CI-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C5]](s32) + ; CI-MESA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; CI-MESA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C8]] + ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C8]] + ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C5]](s32) + ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL10]] + ; CI-MESA: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) + ; CI-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; CI-MESA: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C8]] + ; CI-MESA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-MESA: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C8]] + ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C5]](s32) + ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL11]] + ; CI-MESA: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) + ; CI-MESA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) + ; CI-MESA: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C8]] + ; CI-MESA: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-MESA: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C8]] + ; CI-MESA: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C5]](s32) + ; CI-MESA: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND18]], [[SHL12]] + ; CI-MESA: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR12]](s32) + ; CI-MESA: [[COPY20:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; CI-MESA: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C8]] + ; CI-MESA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; CI-MESA: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C8]] + ; CI-MESA: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C5]](s32) + ; CI-MESA: [[OR13:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL13]] + ; CI-MESA: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32) + ; CI-MESA: $vgpr0 = COPY [[BITCAST8]](<2 x s16>) + ; CI-MESA: $vgpr1 = COPY [[BITCAST9]](<2 x s16>) + ; CI-MESA: $vgpr2 = COPY [[BITCAST10]](<2 x s16>) + ; CI-MESA: $vgpr3 = COPY [[BITCAST11]](<2 x s16>) ; VI-LABEL: name: test_load_global_v7s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) @@ -9850,12 +10372,53 @@ body: | ; VI: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; VI: [[CONCAT_VECTORS7:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF4]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>) ; VI: [[UV36:%[0-9]+]]:_(<7 x s16>), [[UV37:%[0-9]+]]:_(<7 x s16>), [[UV38:%[0-9]+]]:_(<7 x s16>), [[UV39:%[0-9]+]]:_(<7 x s16>), [[UV40:%[0-9]+]]:_(<7 x s16>), [[UV41:%[0-9]+]]:_(<7 x s16>), [[UV42:%[0-9]+]]:_(<7 x s16>), [[UV43:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS7]](<56 x s16>) - ; VI: [[CONCAT_VECTORS8:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[UV28]](<7 x s16>), [[UV36]](<7 x s16>) - ; VI: [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>), [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS8]](<14 x s16>) - ; VI: $vgpr0 = COPY [[UV44]](<2 x s16>) - ; VI: $vgpr1 = COPY [[UV45]](<2 x s16>) - ; VI: $vgpr2 = COPY [[UV46]](<2 x s16>) - ; VI: $vgpr3 = COPY [[UV47]](<2 x s16>) + ; VI: [[INSERT8:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV28]](<7 x s16>), 0 + ; VI: [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT8]](<8 x s16>) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV44]](<2 x s16>) + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C4]](s32) + ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV45]](<2 x s16>) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C4]](s32) + ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV46]](<2 x s16>) + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C4]](s32) + ; VI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV47]](<2 x s16>) + ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C4]](s32) + ; VI: [[INSERT9:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV36]](<7 x s16>), 0 + ; VI: [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT9]](<8 x s16>) + ; VI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV48]](<2 x s16>) + ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C4]](s32) + ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]] + ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C4]](s32) + ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL10]] + ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; VI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; VI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]] + ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C4]](s32) + ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL11]] + ; VI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) + ; VI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]] + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; VI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]] + ; VI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C4]](s32) + ; VI: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND18]], [[SHL12]] + ; VI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR12]](s32) + ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; VI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]] + ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; VI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]] + ; VI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C4]](s32) + ; VI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL13]] + ; VI: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32) + ; VI: $vgpr0 = COPY [[BITCAST8]](<2 x s16>) + ; VI: $vgpr1 = COPY [[BITCAST9]](<2 x s16>) + ; VI: $vgpr2 = COPY [[BITCAST10]](<2 x s16>) + ; VI: $vgpr3 = COPY [[BITCAST11]](<2 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 1, addrspace 1) @@ -9983,12 +10546,37 @@ body: | ; GFX9-MESA: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA: [[CONCAT_VECTORS7:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF4]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>), [[DEF3]](<8 x s16>) ; GFX9-MESA: [[UV36:%[0-9]+]]:_(<7 x s16>), [[UV37:%[0-9]+]]:_(<7 x s16>), [[UV38:%[0-9]+]]:_(<7 x s16>), [[UV39:%[0-9]+]]:_(<7 x s16>), [[UV40:%[0-9]+]]:_(<7 x s16>), [[UV41:%[0-9]+]]:_(<7 x s16>), [[UV42:%[0-9]+]]:_(<7 x s16>), [[UV43:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS7]](<56 x s16>) - ; GFX9-MESA: [[CONCAT_VECTORS8:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[UV28]](<7 x s16>), [[UV36]](<7 x s16>) - ; GFX9-MESA: [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>), [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS8]](<14 x s16>) - ; GFX9-MESA: $vgpr0 = COPY [[UV44]](<2 x s16>) - ; GFX9-MESA: $vgpr1 = COPY [[UV45]](<2 x s16>) - ; GFX9-MESA: $vgpr2 = COPY [[UV46]](<2 x s16>) - ; GFX9-MESA: $vgpr3 = COPY [[UV47]](<2 x s16>) + ; GFX9-MESA: [[INSERT8:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV28]](<7 x s16>), 0 + ; GFX9-MESA: [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT8]](<8 x s16>) + ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV44]](<2 x s16>) + ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) + ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV45]](<2 x s16>) + ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C6]](s32) + ; GFX9-MESA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV46]](<2 x s16>) + ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C6]](s32) + ; GFX9-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV47]](<2 x s16>) + ; GFX9-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C6]](s32) + ; GFX9-MESA: [[INSERT9:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF3]], [[UV36]](<7 x s16>), 0 + ; GFX9-MESA: [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT9]](<8 x s16>) + ; GFX9-MESA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV48]](<2 x s16>) + ; GFX9-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C6]](s32) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) + ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) + ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>) + ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC4]](<2 x s16>) + ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC5]](<2 x s16>) + ; GFX9-MESA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC6]](<2 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<7 x s16>) = G_LOAD %0 :: (load 14, align 1, addrspace 1) %2:_(<7 x s16>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir index 86b4a8e..1cfc08d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir @@ -504,12 +504,47 @@ body: | ; CHECK: [[OR1:%[0-9]+]]:_(<4 x s16>) = G_OR [[INSERT4]], [[INSERT5]] ; CHECK: [[CONCAT_VECTORS3:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[OR1]](<4 x s16>), [[DEF3]](<4 x s16>), [[DEF3]](<4 x s16>) ; CHECK: [[UV20:%[0-9]+]]:_(<3 x s16>), [[UV21:%[0-9]+]]:_(<3 x s16>), [[UV22:%[0-9]+]]:_(<3 x s16>), [[UV23:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<12 x s16>) - ; CHECK: [[CONCAT_VECTORS4:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV16]](<3 x s16>), [[UV20]](<3 x s16>) + ; CHECK: [[INSERT6:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV16]](<3 x s16>), 0 + ; CHECK: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT6]](<4 x s16>) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV24]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV25]](<2 x s16>) + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK: [[INSERT7:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV20]](<3 x s16>), 0 + ; CHECK: [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT7]](<4 x s16>) + ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV26]](<2 x s16>) + ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV27]](<2 x s16>) + ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CHECK: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CHECK: [[CONCAT_VECTORS4:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; CHECK: [[CONCAT_VECTORS5:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[CONCAT_VECTORS4]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CHECK: [[UV24:%[0-9]+]]:_(<5 x s16>), [[UV25:%[0-9]+]]:_(<5 x s16>), [[UV26:%[0-9]+]]:_(<5 x s16>), [[UV27:%[0-9]+]]:_(<5 x s16>), [[UV28:%[0-9]+]]:_(<5 x s16>), [[UV29:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS5]](<30 x s16>) + ; CHECK: [[UV28:%[0-9]+]]:_(<5 x s16>), [[UV29:%[0-9]+]]:_(<5 x s16>), [[UV30:%[0-9]+]]:_(<5 x s16>), [[UV31:%[0-9]+]]:_(<5 x s16>), [[UV32:%[0-9]+]]:_(<5 x s16>), [[UV33:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS5]](<30 x s16>) ; CHECK: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF4]], [[UV24]](<5 x s16>), 0 - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT6]](<8 x s16>) + ; CHECK: [[INSERT8:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF4]], [[UV28]](<5 x s16>), 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT8]](<8 x s16>) %0:_(<5 x s16>) = G_IMPLICIT_DEF %1:_(<5 x s16>) = G_IMPLICIT_DEF %2:_(<5 x s16>) = G_OR %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir index 50311d6..02ea5a6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir @@ -180,7 +180,42 @@ body: | ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CHECK: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF3]](<4 x s16>), [[DEF2]](<4 x s16>), [[DEF2]](<4 x s16>) ; CHECK: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) - ; CHECK: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV4]](<3 x s16>), [[UV8]](<3 x s16>) + ; CHECK: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV4]](<3 x s16>), 0 + ; CHECK: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT4]](<4 x s16>) + ; CHECK: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C4]](s32) + ; CHECK: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C4]](s32) + ; CHECK: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV8]](<3 x s16>), 0 + ; CHECK: [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT5]](<4 x s16>) + ; CHECK: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV14]](<2 x s16>) + ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C4]](s32) + ; CHECK: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>) + ; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C4]](s32) + ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C5]] + ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C5]] + ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] + ; CHECK: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C5]] + ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST8]](s32) + ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C5]] + ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C4]](s32) + ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CHECK: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C5]] + ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST9]](s32) + ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C5]] + ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C4]](s32) + ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; CHECK: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CHECK: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS3]](<6 x s16>) ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir index 224ee57..5e3a15c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir @@ -535,7 +535,40 @@ body: | ; GFX6: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX6: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) ; GFX6: [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; GFX6: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV6]](<3 x s16>) + ; GFX6: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 + ; GFX6: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) + ; GFX6: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; GFX6: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX6: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) + ; GFX6: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; GFX6: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV6]](<3 x s16>), 0 + ; GFX6: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<4 x s16>) + ; GFX6: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; GFX6: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; GFX6: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; GFX6: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) + ; GFX6: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C4]] + ; GFX6: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C4]] + ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL8]] + ; GFX6: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C4]] + ; GFX6: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST8]](s32) + ; GFX6: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C4]] + ; GFX6: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; GFX6: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL9]] + ; GFX6: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; GFX6: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; GFX6: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C4]] + ; GFX6: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST9]](s32) + ; GFX6: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C4]] + ; GFX6: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) + ; GFX6: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL10]] + ; GFX6: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; GFX6: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) ; GFX8-LABEL: name: saddsat_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -599,7 +632,41 @@ body: | ; GFX8: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX8: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) ; GFX8: [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; GFX8: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV6]](<3 x s16>) + ; GFX8: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 + ; GFX8: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) + ; GFX8: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; GFX8: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX8: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) + ; GFX8: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; GFX8: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV6]](<3 x s16>), 0 + ; GFX8: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<4 x s16>) + ; GFX8: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; GFX8: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; GFX8: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; GFX8: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) + ; GFX8: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] + ; GFX8: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST8]](s32) + ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] + ; GFX8: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX8: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL3]] + ; GFX8: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; GFX8: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; GFX8: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] + ; GFX8: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST9]](s32) + ; GFX8: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] + ; GFX8: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX8: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] + ; GFX8: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; GFX8: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) ; GFX9-LABEL: name: saddsat_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -637,7 +704,28 @@ body: | ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF3]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) ; GFX9: [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV6]](<3 x s16>) + ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) + ; GFX9: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) + ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV6]](<3 x s16>), 0 + ; GFX9: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<4 x s16>) + ; GFX9: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; GFX9: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX9: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; GFX9: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) + ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) + ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) + ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) + ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir index 4193662..555a18b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir @@ -524,7 +524,40 @@ body: | ; GFX6: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX6: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) ; GFX6: [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; GFX6: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV6]](<3 x s16>) + ; GFX6: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 + ; GFX6: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) + ; GFX6: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; GFX6: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX6: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) + ; GFX6: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; GFX6: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV6]](<3 x s16>), 0 + ; GFX6: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<4 x s16>) + ; GFX6: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; GFX6: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; GFX6: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; GFX6: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) + ; GFX6: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; GFX6: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] + ; GFX6: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; GFX6: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] + ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL8]] + ; GFX6: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; GFX6: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] + ; GFX6: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST8]](s32) + ; GFX6: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C1]] + ; GFX6: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) + ; GFX6: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL9]] + ; GFX6: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; GFX6: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; GFX6: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C1]] + ; GFX6: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST9]](s32) + ; GFX6: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C1]] + ; GFX6: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) + ; GFX6: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL10]] + ; GFX6: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; GFX6: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) ; GFX8-LABEL: name: sshlsat_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir index bb3d796..d39d926 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir @@ -535,7 +535,40 @@ body: | ; GFX6: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX6: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) ; GFX6: [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; GFX6: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV6]](<3 x s16>) + ; GFX6: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 + ; GFX6: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) + ; GFX6: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; GFX6: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX6: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) + ; GFX6: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; GFX6: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV6]](<3 x s16>), 0 + ; GFX6: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<4 x s16>) + ; GFX6: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; GFX6: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; GFX6: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; GFX6: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) + ; GFX6: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C4]] + ; GFX6: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C4]] + ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL8]] + ; GFX6: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C4]] + ; GFX6: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST8]](s32) + ; GFX6: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C4]] + ; GFX6: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; GFX6: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL9]] + ; GFX6: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; GFX6: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; GFX6: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C4]] + ; GFX6: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST9]](s32) + ; GFX6: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C4]] + ; GFX6: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) + ; GFX6: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL10]] + ; GFX6: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; GFX6: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) ; GFX8-LABEL: name: ssubsat_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -599,7 +632,41 @@ body: | ; GFX8: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX8: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) ; GFX8: [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; GFX8: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV6]](<3 x s16>) + ; GFX8: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 + ; GFX8: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) + ; GFX8: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; GFX8: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX8: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) + ; GFX8: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; GFX8: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV6]](<3 x s16>), 0 + ; GFX8: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<4 x s16>) + ; GFX8: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; GFX8: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; GFX8: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; GFX8: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) + ; GFX8: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] + ; GFX8: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST8]](s32) + ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] + ; GFX8: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX8: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL3]] + ; GFX8: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; GFX8: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; GFX8: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] + ; GFX8: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST9]](s32) + ; GFX8: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] + ; GFX8: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX8: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] + ; GFX8: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; GFX8: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) ; GFX9-LABEL: name: ssubsat_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -637,7 +704,28 @@ body: | ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF3]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) ; GFX9: [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV6]](<3 x s16>) + ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) + ; GFX9: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) + ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV6]](<3 x s16>), 0 + ; GFX9: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<4 x s16>) + ; GFX9: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; GFX9: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX9: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; GFX9: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) + ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) + ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) + ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) + ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir index 00d9b72..077c6be 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir @@ -426,7 +426,40 @@ body: | ; GFX6: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX6: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) ; GFX6: [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; GFX6: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV6]](<3 x s16>) + ; GFX6: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 + ; GFX6: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) + ; GFX6: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; GFX6: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX6: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) + ; GFX6: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; GFX6: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV6]](<3 x s16>), 0 + ; GFX6: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<4 x s16>) + ; GFX6: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; GFX6: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; GFX6: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; GFX6: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) + ; GFX6: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C2]] + ; GFX6: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) + ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C2]] + ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL8]] + ; GFX6: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C2]] + ; GFX6: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST8]](s32) + ; GFX6: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C2]] + ; GFX6: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; GFX6: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL9]] + ; GFX6: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; GFX6: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) + ; GFX6: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C2]] + ; GFX6: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST9]](s32) + ; GFX6: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C2]] + ; GFX6: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) + ; GFX6: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL10]] + ; GFX6: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; GFX6: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) ; GFX8-LABEL: name: uaddsat_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -469,7 +502,41 @@ body: | ; GFX8: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX8: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) ; GFX8: [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; GFX8: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV6]](<3 x s16>) + ; GFX8: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 + ; GFX8: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) + ; GFX8: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; GFX8: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX8: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) + ; GFX8: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; GFX8: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV6]](<3 x s16>), 0 + ; GFX8: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<4 x s16>) + ; GFX8: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; GFX8: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; GFX8: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; GFX8: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] + ; GFX8: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST8]](s32) + ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; GFX8: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX8: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL3]] + ; GFX8: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; GFX8: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; GFX8: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; GFX8: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST9]](s32) + ; GFX8: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; GFX8: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX8: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] + ; GFX8: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; GFX8: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) ; GFX9-LABEL: name: uaddsat_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -507,7 +574,28 @@ body: | ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF3]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) ; GFX9: [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV6]](<3 x s16>) + ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) + ; GFX9: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) + ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV6]](<3 x s16>), 0 + ; GFX9: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<4 x s16>) + ; GFX9: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; GFX9: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX9: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; GFX9: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) + ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) + ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) + ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) + ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir index 687c6df..e7369c0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir @@ -452,7 +452,40 @@ body: | ; GFX6: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX6: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) ; GFX6: [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; GFX6: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV6]](<3 x s16>) + ; GFX6: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 + ; GFX6: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) + ; GFX6: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; GFX6: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX6: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) + ; GFX6: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; GFX6: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV6]](<3 x s16>), 0 + ; GFX6: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<4 x s16>) + ; GFX6: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; GFX6: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; GFX6: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; GFX6: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) + ; GFX6: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; GFX6: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] + ; GFX6: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) + ; GFX6: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] + ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL8]] + ; GFX6: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; GFX6: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] + ; GFX6: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST8]](s32) + ; GFX6: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] + ; GFX6: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) + ; GFX6: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL9]] + ; GFX6: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; GFX6: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32) + ; GFX6: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C1]] + ; GFX6: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST9]](s32) + ; GFX6: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C1]] + ; GFX6: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) + ; GFX6: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL10]] + ; GFX6: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; GFX6: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) ; GFX8-LABEL: name: ushlsat_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir index 0fef31a..2bea002 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir @@ -410,7 +410,40 @@ body: | ; GFX6: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX6: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) ; GFX6: [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; GFX6: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV6]](<3 x s16>) + ; GFX6: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 + ; GFX6: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) + ; GFX6: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; GFX6: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX6: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) + ; GFX6: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; GFX6: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV6]](<3 x s16>), 0 + ; GFX6: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<4 x s16>) + ; GFX6: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; GFX6: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; GFX6: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; GFX6: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) + ; GFX6: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] + ; GFX6: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) + ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] + ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL8]] + ; GFX6: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] + ; GFX6: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST8]](s32) + ; GFX6: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] + ; GFX6: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; GFX6: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL9]] + ; GFX6: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; GFX6: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) + ; GFX6: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C1]] + ; GFX6: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST9]](s32) + ; GFX6: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C1]] + ; GFX6: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) + ; GFX6: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL10]] + ; GFX6: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; GFX6: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) ; GFX8-LABEL: name: usubsat_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -453,7 +486,41 @@ body: | ; GFX8: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX8: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) ; GFX8: [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; GFX8: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV6]](<3 x s16>) + ; GFX8: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 + ; GFX8: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) + ; GFX8: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; GFX8: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX8: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) + ; GFX8: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; GFX8: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV6]](<3 x s16>), 0 + ; GFX8: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<4 x s16>) + ; GFX8: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; GFX8: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; GFX8: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; GFX8: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] + ; GFX8: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST8]](s32) + ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; GFX8: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX8: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL3]] + ; GFX8: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; GFX8: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; GFX8: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; GFX8: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST9]](s32) + ; GFX8: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; GFX8: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX8: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] + ; GFX8: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; GFX8: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) ; GFX9-LABEL: name: usubsat_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -491,7 +558,28 @@ body: | ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF3]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) ; GFX9: [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV6]](<3 x s16>) + ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) + ; GFX9: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) + ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV6]](<3 x s16>), 0 + ; GFX9: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<4 x s16>) + ; GFX9: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; GFX9: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX9: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; GFX9: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) + ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) + ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) + ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) + ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir index 538ac6f..e4be432 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir @@ -504,12 +504,47 @@ body: | ; CHECK: [[XOR1:%[0-9]+]]:_(<4 x s16>) = G_XOR [[INSERT4]], [[INSERT5]] ; CHECK: [[CONCAT_VECTORS3:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[XOR1]](<4 x s16>), [[DEF3]](<4 x s16>), [[DEF3]](<4 x s16>) ; CHECK: [[UV20:%[0-9]+]]:_(<3 x s16>), [[UV21:%[0-9]+]]:_(<3 x s16>), [[UV22:%[0-9]+]]:_(<3 x s16>), [[UV23:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<12 x s16>) - ; CHECK: [[CONCAT_VECTORS4:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV16]](<3 x s16>), [[UV20]](<3 x s16>) + ; CHECK: [[INSERT6:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV16]](<3 x s16>), 0 + ; CHECK: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT6]](<4 x s16>) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV24]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV25]](<2 x s16>) + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK: [[INSERT7:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV20]](<3 x s16>), 0 + ; CHECK: [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT7]](<4 x s16>) + ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV26]](<2 x s16>) + ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV27]](<2 x s16>) + ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CHECK: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK: [[CONCAT_VECTORS4:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; CHECK: [[CONCAT_VECTORS5:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[CONCAT_VECTORS4]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CHECK: [[UV24:%[0-9]+]]:_(<5 x s16>), [[UV25:%[0-9]+]]:_(<5 x s16>), [[UV26:%[0-9]+]]:_(<5 x s16>), [[UV27:%[0-9]+]]:_(<5 x s16>), [[UV28:%[0-9]+]]:_(<5 x s16>), [[UV29:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS5]](<30 x s16>) + ; CHECK: [[UV28:%[0-9]+]]:_(<5 x s16>), [[UV29:%[0-9]+]]:_(<5 x s16>), [[UV30:%[0-9]+]]:_(<5 x s16>), [[UV31:%[0-9]+]]:_(<5 x s16>), [[UV32:%[0-9]+]]:_(<5 x s16>), [[UV33:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS5]](<30 x s16>) ; CHECK: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF4]], [[UV24]](<5 x s16>), 0 - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT6]](<8 x s16>) + ; CHECK: [[INSERT8:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF4]], [[UV28]](<5 x s16>), 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT8]](<8 x s16>) %0:_(<5 x s16>) = G_IMPLICIT_DEF %1:_(<5 x s16>) = G_IMPLICIT_DEF %2:_(<5 x s16>) = G_XOR %0, %1