From: Matt Arsenault Date: Sun, 24 May 2020 15:12:11 +0000 (-0400) Subject: AMDGPU/GlobalISel: Start rewriting load/store legality rules X-Git-Tag: llvmorg-12-init~3875 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=bc20bdb9f968122ef7801ac9124d68953fa0dd5e;p=platform%2Fupstream%2Fllvm.git AMDGPU/GlobalISel: Start rewriting load/store legality rules The current set is an incomprehensible mess riddled with ordering hacks for various limitations in the legalizer at the time of writing, many of which have been fixed. This takes a very small step in correcting this. The core first change is to start checking for fully legal cases first, rather than trying to figure out all of the actions that could need to be performed. It's recommended to check the legal cases first for faster legality checks in the common case. This still has a table listing some common cases, but it needs measuring whether this really helps or not. More significantly, stop trying to allow any arbitrary type with a legal bitwidth as a legal memory type, and start using the bitcast legalize action for them. Allowing loads of these weird vector types produced new burdens we don't need for handling all of the legalization artifacts. Unlike the SelectionDAG handling, this is still not casting 64 or 16-bit element vectors to 32-bit vectors. These cases should still be handled by increasing/decreasing the number of 16-bit elements. This is primarily to fix 8-bit element vectors. Another change is to stop trying to handle the load-widening based on a higher alignment. We should still do this, but the way it was handled wasn't really correct. We really need to modify the MMO's size at the same time, and not just increase the result type. The LegalizerHelper does not do this, and I think this would really require a separate WidenMemory action (or to add a memory action payload to the LegalizeMutation). These will now fail to legalize. The structure of the legalizer rules makes writing concise rules here difficult. It would be easier if the same function could answer the query the query, and report the action to perform at the same time. Instead these two are split into distinct predicate and action functions. This is mostly tolerable for other cases, but the load/store rules get pretty complicated so it's difficult to keep two versions of these functions in sync. --- diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h index b3c7b68..cc549c4 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -578,6 +578,15 @@ public: return actionIf(LegalizeAction::Legal, always); } + /// The specified type index is coerced if predicate is true. + LegalizeRuleSet &bitcastIf(LegalityPredicate Predicate, + LegalizeMutation Mutation) { + // We have no choice but conservatively assume that lowering with a + // free-form user provided Predicate properly handles all type indices: + markAllIdxsAsCovered(); + return actionIf(LegalizeAction::Bitcast, Predicate, Mutation); + } + /// The instruction is lowered. LegalizeRuleSet &lower() { using namespace LegalizeMutations; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 678124c..c1e9e22 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -114,6 +114,23 @@ static LegalizeMutation moreEltsToNext32Bit(unsigned TypeIdx) { }; } +static LegalizeMutation bitcastToRegisterType(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + const LLT Ty = Query.Types[TypeIdx]; + unsigned Size = Ty.getSizeInBits(); + + LLT CoercedTy; + if (Size < 32) { + // <2 x s8> -> s16 + assert(Size == 16); + CoercedTy = LLT::scalar(16); + } else + CoercedTy = LLT::scalarOrVector(Size / 32, 32); + + return std::make_pair(TypeIdx, CoercedTy); + }; +} + static LegalityPredicate vectorSmallerThan(unsigned TypeIdx, unsigned Size) { return [=](const LegalityQuery &Query) { const LLT QueryTy = Query.Types[TypeIdx]; @@ -135,19 +152,37 @@ static LegalityPredicate numElementsNotEven(unsigned TypeIdx) { }; } +static bool isRegisterSize(unsigned Size) { + return Size % 32 == 0 && Size <= 1024; +} + +static bool isRegisterVectorElementType(LLT EltTy) { + const int EltSize = EltTy.getSizeInBits(); + return EltSize == 16 || EltSize % 32 == 0; +} + +static bool isRegisterVectorType(LLT Ty) { + const int EltSize = Ty.getElementType().getSizeInBits(); + return EltSize == 32 || EltSize == 64 || + (EltSize == 16 && Ty.getNumElements() % 2 == 0) || + EltSize == 128 || EltSize == 256; +} + +static bool isRegisterType(LLT Ty) { + if (!isRegisterSize(Ty.getSizeInBits())) + return false; + + if (Ty.isVector()) + return isRegisterVectorType(Ty); + + return true; +} + // Any combination of 32 or 64-bit elements up to 1024 bits, and multiples of // v2s16. static LegalityPredicate isRegisterType(unsigned TypeIdx) { return [=](const LegalityQuery &Query) { - const LLT Ty = Query.Types[TypeIdx]; - if (Ty.isVector()) { - const int EltSize = Ty.getElementType().getSizeInBits(); - return EltSize == 32 || EltSize == 64 || - (EltSize == 16 && Ty.getNumElements() % 2 == 0) || - EltSize == 128 || EltSize == 256; - } - - return Ty.getSizeInBits() % 32 == 0 && Ty.getSizeInBits() <= 1024; + return isRegisterType(Query.Types[TypeIdx]); }; } @@ -169,6 +204,102 @@ static LegalityPredicate isWideScalarTruncStore(unsigned TypeIdx) { }; } +// TODO: Should load to s16 be legal? Most loads extend to 32-bits, but we +// handle some operations by just promoting the register during +// selection. There are also d16 loads on GFX9+ which preserve the high bits. +static unsigned maxSizeForAddrSpace(const GCNSubtarget &ST, unsigned AS, + bool IsLoad) { + switch (AS) { + case AMDGPUAS::PRIVATE_ADDRESS: + // FIXME: Private element size. + return 32; + case AMDGPUAS::LOCAL_ADDRESS: + return ST.useDS128() ? 128 : 64; + case AMDGPUAS::GLOBAL_ADDRESS: + case AMDGPUAS::CONSTANT_ADDRESS: + case AMDGPUAS::CONSTANT_ADDRESS_32BIT: + // Treat constant and global as identical. SMRD loads are sometimes usable for + // global loads (ideally constant address space should be eliminated) + // depending on the context. Legality cannot be context dependent, but + // RegBankSelect can split the load as necessary depending on the pointer + // register bank/uniformity and if the memory is invariant or not written in a + // kernel. + return IsLoad ? 512 : 128; + default: + // Flat addresses may contextually need to be split to 32-bit parts if they + // may alias scratch depending on the subtarget. + return 128; + } +} + +static bool isLoadStoreSizeLegal(const GCNSubtarget &ST, + const LegalityQuery &Query, + unsigned Opcode) { + const LLT Ty = Query.Types[0]; + + // Handle G_LOAD, G_ZEXTLOAD, G_SEXTLOAD + const bool IsLoad = Opcode != AMDGPU::G_STORE; + + unsigned RegSize = Ty.getSizeInBits(); + unsigned MemSize = Query.MMODescrs[0].SizeInBits; + unsigned Align = Query.MMODescrs[0].AlignInBits; + unsigned AS = Query.Types[1].getAddressSpace(); + + // All of these need to be custom lowered to cast the pointer operand. + if (AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) + return false; + + // TODO: We should be able to widen loads if the alignment is high enough, but + // we also need to modify the memory access size. +#if 0 + // Accept widening loads based on alignment. + if (IsLoad && MemSize < Size) + MemSize = std::max(MemSize, Align); +#endif + + // Only 1-byte and 2-byte to 32-bit extloads are valid. + if (MemSize != RegSize && RegSize != 32) + return false; + + if (MemSize > maxSizeForAddrSpace(ST, AS, IsLoad)) + return false; + + switch (MemSize) { + case 8: + case 16: + case 32: + case 64: + case 128: + break; + case 96: + if (!ST.hasDwordx3LoadStores()) + return false; + break; + case 256: + case 512: + // These may contextually need to be broken down. + break; + default: + return false; + } + + assert(RegSize >= MemSize); + + if (Align < MemSize) { + const SITargetLowering *TLI = ST.getTargetLowering(); + if (!TLI->allowsMisalignedMemoryAccessesImpl(MemSize, AS, Align / 8)) + return false; + } + + return true; +} + +static bool isLoadStoreLegal(const GCNSubtarget &ST, const LegalityQuery &Query, + unsigned Opcode) { + const LLT Ty = Query.Types[0]; + return isRegisterType(Ty) && isLoadStoreSizeLegal(ST, Query, Opcode); +} + AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, const GCNTargetMachine &TM) : ST(ST_) { @@ -711,32 +842,6 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .scalarize(0) .custom(); - // TODO: Should load to s16 be legal? Most loads extend to 32-bits, but we - // handle some operations by just promoting the register during - // selection. There are also d16 loads on GFX9+ which preserve the high bits. - auto maxSizeForAddrSpace = [this](unsigned AS, bool IsLoad) -> unsigned { - switch (AS) { - // FIXME: Private element size. - case AMDGPUAS::PRIVATE_ADDRESS: - return 32; - // FIXME: Check subtarget - case AMDGPUAS::LOCAL_ADDRESS: - return ST.useDS128() ? 128 : 64; - - // Treat constant and global as identical. SMRD loads are sometimes usable - // for global loads (ideally constant address space should be eliminated) - // depending on the context. Legality cannot be context dependent, but - // RegBankSelect can split the load as necessary depending on the pointer - // register bank/uniformity and if the memory is invariant or not written in - // a kernel. - case AMDGPUAS::CONSTANT_ADDRESS: - case AMDGPUAS::GLOBAL_ADDRESS: - return IsLoad ? 512 : 128; - default: - return 128; - } - }; - const auto needToSplitMemOp = [=](const LegalityQuery &Query, bool IsLoad) -> bool { const LLT DstTy = Query.Types[0]; @@ -753,7 +858,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, const LLT PtrTy = Query.Types[1]; unsigned AS = PtrTy.getAddressSpace(); - if (MemSize > maxSizeForAddrSpace(AS, IsLoad)) + if (MemSize > maxSizeForAddrSpace(ST, AS, IsLoad)) return true; // Catch weird sized loads that don't evenly divide into the access sizes @@ -776,7 +881,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, return false; }; - const auto shouldWidenLoadResult = [=](const LegalityQuery &Query) -> bool { + const auto shouldWidenLoadResult = [=](const LegalityQuery &Query, + unsigned Opc) -> bool { unsigned Size = Query.Types[0].getSizeInBits(); if (isPowerOf2_32(Size)) return false; @@ -785,7 +891,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, return false; unsigned AddrSpace = Query.Types[1].getAddressSpace(); - if (Size >= maxSizeForAddrSpace(AddrSpace, true)) + if (Size >= maxSizeForAddrSpace(ST, AddrSpace, Opc)) return false; unsigned Align = Query.MMODescrs[0].AlignInBits; @@ -805,12 +911,11 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, const bool IsStore = Op == G_STORE; auto &Actions = getActionDefinitionsBuilder(Op); - // Whitelist the common cases. - // TODO: Loads to s16 on gfx9 + // Whitelist some common cases. + // TODO: Does this help compile time at all? Actions.legalForTypesWithMemDesc({{S32, GlobalPtr, 32, GlobalAlign32}, {V2S32, GlobalPtr, 64, GlobalAlign32}, {V4S32, GlobalPtr, 128, GlobalAlign32}, - {S128, GlobalPtr, 128, GlobalAlign32}, {S64, GlobalPtr, 64, GlobalAlign32}, {V2S64, GlobalPtr, 128, GlobalAlign32}, {V2S16, GlobalPtr, 32, GlobalAlign32}, @@ -829,29 +934,49 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, {S32, PrivatePtr, 16, 16}, {V2S16, PrivatePtr, 32, 32}, - {S32, FlatPtr, 32, GlobalAlign32}, - {S32, FlatPtr, 16, GlobalAlign16}, - {S32, FlatPtr, 8, GlobalAlign8}, - {V2S16, FlatPtr, 32, GlobalAlign32}, - {S32, ConstantPtr, 32, GlobalAlign32}, {V2S32, ConstantPtr, 64, GlobalAlign32}, {V4S32, ConstantPtr, 128, GlobalAlign32}, {S64, ConstantPtr, 64, GlobalAlign32}, - {S128, ConstantPtr, 128, GlobalAlign32}, {V2S32, ConstantPtr, 32, GlobalAlign32}}); + Actions.legalIf( + [=](const LegalityQuery &Query) -> bool { + return isLoadStoreLegal(ST, Query, Op); + }); + + // Constant 32-bit is handled by addrspacecasting the 32-bit pointer to + // 64-bits. + // + // TODO: Should generalize bitcast action into coerce, which will also cover + // inserting addrspacecasts. + Actions.customIf(typeIs(1, Constant32Ptr)); + + // Turn any illegal element vectors into something easier to deal + // with. These will ultimately produce 32-bit scalar shifts to extract the + // parts anyway. + // + // For odd 16-bit element vectors, prefer to split those into pieces with + // 16-bit vector parts. + Actions.bitcastIf( + [=](const LegalityQuery &Query) -> bool { + LLT Ty = Query.Types[0]; + return Ty.isVector() && + isRegisterSize(Ty.getSizeInBits()) && + !isRegisterVectorElementType(Ty.getElementType()); + }, bitcastToRegisterType(0)); + Actions .customIf(typeIs(1, Constant32Ptr)) // Widen suitably aligned loads by loading extra elements. .moreElementsIf([=](const LegalityQuery &Query) { const LLT Ty = Query.Types[0]; return Op == G_LOAD && Ty.isVector() && - shouldWidenLoadResult(Query); + shouldWidenLoadResult(Query, Op); }, moreElementsToNextPow2(0)) .widenScalarIf([=](const LegalityQuery &Query) { const LLT Ty = Query.Types[0]; return Op == G_LOAD && !Ty.isVector() && - shouldWidenLoadResult(Query); + shouldWidenLoadResult(Query, Op); }, widenScalarOrEltToNextPow2(0)) .narrowScalarIf( [=](const LegalityQuery &Query) -> bool { @@ -883,7 +1008,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, return std::make_pair(0, LLT::scalar(32 * (DstSize / 32))); } - unsigned MaxSize = maxSizeForAddrSpace(PtrTy.getAddressSpace(), + unsigned MaxSize = maxSizeForAddrSpace(ST, + PtrTy.getAddressSpace(), Op == G_LOAD); if (MemSize > MaxSize) return std::make_pair(0, LLT::scalar(MaxSize)); @@ -901,7 +1027,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, const LLT PtrTy = Query.Types[1]; LLT EltTy = DstTy.getElementType(); - unsigned MaxSize = maxSizeForAddrSpace(PtrTy.getAddressSpace(), + unsigned MaxSize = maxSizeForAddrSpace(ST, + PtrTy.getAddressSpace(), Op == G_LOAD); // FIXME: Handle widened to power of 2 results better. This ends @@ -963,37 +1090,6 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, // TODO: Need a bitcast lower option? Actions - .legalIf([=](const LegalityQuery &Query) { - const LLT Ty0 = Query.Types[0]; - unsigned Size = Ty0.getSizeInBits(); - unsigned MemSize = Query.MMODescrs[0].SizeInBits; - unsigned Align = Query.MMODescrs[0].AlignInBits; - - // FIXME: Widening store from alignment not valid. - if (MemSize < Size) - MemSize = std::max(MemSize, Align); - - // No extending vector loads. - if (Size > MemSize && Ty0.isVector()) - return false; - - switch (MemSize) { - case 8: - case 16: - return Size == 32; - case 32: - case 64: - case 128: - return true; - case 96: - return ST.hasDwordx3LoadStores(); - case 256: - case 512: - return true; - default: - return false; - } - }) .widenScalarToNextPow2(0) .moreElementsIf(vectorSmallerThan(0, 32), moreEltsToNext32Bit(0)); } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir index 82b9531..f0a1e21 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir @@ -635,38 +635,33 @@ body: | ; CI-LABEL: name: test_load_constant_s48_align8 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 6, align 8, addrspace 4) - ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; CI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; CI: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; CI: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; CI: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) + ; CI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) + ; CI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) ; VI-LABEL: name: test_load_constant_s48_align8 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 6, align 8, addrspace 4) - ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; VI: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; VI: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; VI: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) + ; VI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) + ; VI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) ; GFX9-LABEL: name: test_load_constant_s48_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 6, align 8, addrspace 4) - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; GFX9: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; GFX9: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) + ; GFX9: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) ; CI-MESA-LABEL: name: test_load_constant_s48_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 6, align 8, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; CI-MESA: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; CI-MESA: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) + ; CI-MESA: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) ; GFX9-MESA-LABEL: name: test_load_constant_s48_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 6, align 8, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) + ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s48) = G_LOAD %0 :: (load 6, align 8, addrspace 4) %2:_(s64) = G_ZEXT %1 @@ -4134,63 +4129,127 @@ body: | ; CI-LABEL: name: test_load_constant_v2s8_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) - ; CI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) + ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; CI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) ; CI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) - ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) - ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC]] + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[COPY5]](s32) + ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]] ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; CI: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_load_constant_v2s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) - ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; VI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) ; VI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) ; VI: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) - ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C]](s16) + ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_load_constant_v2s8_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; GFX9: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) ; GFX9: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) ; GFX9: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C]](s16) + ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-MESA-LABEL: name: test_load_constant_v2s8_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) - ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) + ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; CI-MESA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) - ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC]] + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[COPY5]](s32) + ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]] ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-MESA-LABEL: name: test_load_constant_v2s8_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) - ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) + ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; GFX9-MESA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C]](s16) + ; GFX9-MESA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32) @@ -4209,43 +4268,129 @@ body: | ; CI-LABEL: name: test_load_constant_v2s8_align2 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; CI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; CI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; CI: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; CI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[COPY5]](s32) + ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]] + ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; CI: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_load_constant_v2s8_align2 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; VI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; VI: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; VI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; VI: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) + ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) + ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_load_constant_v2s8_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; GFX9: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; GFX9: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) + ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) + ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-MESA-LABEL: name: test_load_constant_v2s8_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; CI-MESA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; CI-MESA: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[COPY5]](s32) + ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]] + ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-MESA-LABEL: name: test_load_constant_v2s8_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; GFX9-MESA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) + ; GFX9-MESA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) + ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] + ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 2, addrspace 4) @@ -4465,24 +4610,88 @@ body: | ; CI-LABEL: name: test_load_constant_v4s8_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) - ; CI: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; VI-LABEL: name: test_load_constant_v4s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) - ; VI: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; VI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-LABEL: name: test_load_constant_v4s8_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) - ; GFX9: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; CI-MESA-LABEL: name: test_load_constant_v4s8_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) - ; CI-MESA: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-MESA-LABEL: name: test_load_constant_v4s8_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) - ; GFX9-MESA: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<4 x s8>) = G_LOAD %0 :: (load 4, align 4, addrspace 4) $vgpr0 = COPY %1 @@ -4496,99 +4705,114 @@ body: | ; CI-LABEL: name: test_load_constant_v4s8_align2 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 2, addrspace 4) - ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, align 2, addrspace 4) - ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 + 2, addrspace 4) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) + ; CI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32) + ; CI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C2]](s32) + ; CI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C3]](s32) ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) ; CI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; VI-LABEL: name: test_load_constant_v4s8_align2 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 2, addrspace 4) - ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, align 2, addrspace 4) - ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 + 2, addrspace 4) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) + ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32) + ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C2]](s32) + ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C3]](s32) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) ; VI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-LABEL: name: test_load_constant_v4s8_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 2, addrspace 4) - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, align 2, addrspace 4) - ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 + 2, addrspace 4) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) + ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32) + ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C2]](s32) + ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C3]](s32) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; CI-MESA-LABEL: name: test_load_constant_v4s8_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 2, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, align 2, addrspace 4) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 + 2, addrspace 4) + ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) + ; CI-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32) + ; CI-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C2]](s32) + ; CI-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C3]](s32) ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) ; CI-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-MESA-LABEL: name: test_load_constant_v4s8_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 2, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, align 2, addrspace 4) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 + 2, addrspace 4) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) + ; GFX9-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32) + ; GFX9-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C2]](s32) + ; GFX9-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C3]](s32) ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) @@ -4716,24 +4940,29 @@ body: | ; CI-LABEL: name: test_load_constant_v8s8_align8 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) - ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<8 x s8>) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; CI: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[LOAD]](<2 x s32>) + ; CI: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) ; VI-LABEL: name: test_load_constant_v8s8_align8 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) - ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<8 x s8>) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; VI: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[LOAD]](<2 x s32>) + ; VI: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) ; GFX9-LABEL: name: test_load_constant_v8s8_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) - ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<8 x s8>) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; GFX9: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[LOAD]](<2 x s32>) + ; GFX9: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) ; CI-MESA-LABEL: name: test_load_constant_v8s8_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<8 x s8>) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[LOAD]](<2 x s32>) + ; CI-MESA: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) ; GFX9-MESA-LABEL: name: test_load_constant_v8s8_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<8 x s8>) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[LOAD]](<2 x s32>) + ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<8 x s8>) = G_LOAD %0 :: (load 8, align 8, addrspace 4) $vgpr0_vgpr1 = COPY %1 @@ -4747,24 +4976,29 @@ body: | ; CI-LABEL: name: test_load_constant_v16s8_align16 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) - ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<16 x s8>) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) + ; CI: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[LOAD]](<4 x s32>) + ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) ; VI-LABEL: name: test_load_constant_v16s8_align16 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<16 x s8>) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) + ; VI: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[LOAD]](<4 x s32>) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) ; GFX9-LABEL: name: test_load_constant_v16s8_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<16 x s8>) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) + ; GFX9: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) ; CI-MESA-LABEL: name: test_load_constant_v16s8_align16 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<16 x s8>) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[LOAD]](<4 x s32>) + ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) ; GFX9-MESA-LABEL: name: test_load_constant_v16s8_align16 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<16 x s8>) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) + ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<16 x s8>) = G_LOAD %0 :: (load 16, align 16, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -4778,24 +5012,29 @@ body: | ; CI-LABEL: name: test_load_constant_v32s8_align32 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<32 x s8>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) - ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<32 x s8>) + ; CI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) + ; CI: [[BITCAST:%[0-9]+]]:_(<32 x s8>) = G_BITCAST [[LOAD]](<8 x s32>) + ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<32 x s8>) ; VI-LABEL: name: test_load_constant_v32s8_align32 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<32 x s8>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<32 x s8>) + ; VI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) + ; VI: [[BITCAST:%[0-9]+]]:_(<32 x s8>) = G_BITCAST [[LOAD]](<8 x s32>) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<32 x s8>) ; GFX9-LABEL: name: test_load_constant_v32s8_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<32 x s8>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<32 x s8>) + ; GFX9: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) + ; GFX9: [[BITCAST:%[0-9]+]]:_(<32 x s8>) = G_BITCAST [[LOAD]](<8 x s32>) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<32 x s8>) ; CI-MESA-LABEL: name: test_load_constant_v32s8_align32 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<32 x s8>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<32 x s8>) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<32 x s8>) = G_BITCAST [[LOAD]](<8 x s32>) + ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<32 x s8>) ; GFX9-MESA-LABEL: name: test_load_constant_v32s8_align32 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<32 x s8>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<32 x s8>) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) + ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<32 x s8>) = G_BITCAST [[LOAD]](<8 x s32>) + ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<32 x s8>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<32 x s8>) = G_LOAD %0 :: (load 32, align 32, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir index 34c5e56..73d2c9f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir @@ -635,38 +635,33 @@ body: | ; CI-LABEL: name: test_load_flat_s48_align8 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 6, align 8) - ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; CI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; CI: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; CI: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; CI: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) + ; CI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) + ; CI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) ; VI-LABEL: name: test_load_flat_s48_align8 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 6, align 8) - ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; VI: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; VI: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; VI: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) + ; VI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) + ; VI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) ; GFX9-LABEL: name: test_load_flat_s48_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 6, align 8) - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; GFX9: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; GFX9: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) + ; GFX9: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) ; CI-MESA-LABEL: name: test_load_flat_s48_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 6, align 8) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; CI-MESA: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; CI-MESA: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) + ; CI-MESA: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) ; GFX9-MESA-LABEL: name: test_load_flat_s48_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 6, align 8) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) + ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s48) = G_LOAD %0 :: (load 6, align 8, addrspace 0) %2:_(s64) = G_ZEXT %1 @@ -4154,63 +4149,127 @@ body: | ; CI-LABEL: name: test_load_flat_v2s8_align4 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p0) :: (load 2, align 4) - ; CI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) + ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; CI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) ; CI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) - ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) - ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC]] + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[COPY5]](s32) + ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]] ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; CI: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_load_flat_v2s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p0) :: (load 2, align 4) - ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; VI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) ; VI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) ; VI: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) - ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C]](s16) + ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_load_flat_v2s8_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p0) :: (load 2, align 4) - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; GFX9: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) ; GFX9: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) ; GFX9: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C]](s16) + ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-MESA-LABEL: name: test_load_flat_v2s8_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p0) :: (load 2, align 4) - ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) + ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; CI-MESA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) - ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC]] + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[COPY5]](s32) + ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]] ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-MESA-LABEL: name: test_load_flat_v2s8_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p0) :: (load 2, align 4) - ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) + ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; GFX9-MESA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C]](s16) + ; GFX9-MESA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32) @@ -4229,43 +4288,129 @@ body: | ; CI-LABEL: name: test_load_flat_v2s8_align2 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p0) :: (load 2) - ; CI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; CI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; CI: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; CI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[COPY5]](s32) + ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]] + ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; CI: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_load_flat_v2s8_align2 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p0) :: (load 2) - ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; VI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; VI: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; VI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; VI: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) + ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) + ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_load_flat_v2s8_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p0) :: (load 2) - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; GFX9: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; GFX9: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) + ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) + ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-MESA-LABEL: name: test_load_flat_v2s8_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p0) :: (load 2) - ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; CI-MESA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; CI-MESA: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[COPY5]](s32) + ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]] + ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-MESA-LABEL: name: test_load_flat_v2s8_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p0) :: (load 2) - ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; GFX9-MESA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) + ; GFX9-MESA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) + ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] + ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 2, addrspace 0) @@ -4485,24 +4630,88 @@ body: | ; CI-LABEL: name: test_load_flat_v4s8_align4 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p0) :: (load 4) - ; CI: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; VI-LABEL: name: test_load_flat_v4s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p0) :: (load 4) - ; VI: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; VI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-LABEL: name: test_load_flat_v4s8_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p0) :: (load 4) - ; GFX9: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; CI-MESA-LABEL: name: test_load_flat_v4s8_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p0) :: (load 4) - ; CI-MESA: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-MESA-LABEL: name: test_load_flat_v4s8_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p0) :: (load 4) - ; GFX9-MESA: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<4 x s8>) = G_LOAD %0 :: (load 4, align 4, addrspace 0) $vgpr0 = COPY %1 @@ -4516,99 +4725,114 @@ body: | ; CI-LABEL: name: test_load_flat_v4s8_align2 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 2) - ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 + 2, align 2) - ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 + 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 + 2) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) + ; CI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32) + ; CI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C2]](s32) + ; CI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C3]](s32) ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) ; CI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; VI-LABEL: name: test_load_flat_v4s8_align2 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 2) - ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 + 2, align 2) - ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 + 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 + 2) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) + ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32) + ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C2]](s32) + ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C3]](s32) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) ; VI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-LABEL: name: test_load_flat_v4s8_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 2) - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 + 2, align 2) - ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 + 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 + 2) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) + ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32) + ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C2]](s32) + ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C3]](s32) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; CI-MESA-LABEL: name: test_load_flat_v4s8_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 2) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 + 2, align 2) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 + 3) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 + 2) + ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) + ; CI-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32) + ; CI-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C2]](s32) + ; CI-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C3]](s32) ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) ; CI-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-MESA-LABEL: name: test_load_flat_v4s8_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 2) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 + 2, align 2) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 + 3) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 + 2) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) + ; GFX9-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32) + ; GFX9-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C2]](s32) + ; GFX9-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C3]](s32) ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) @@ -4736,24 +4960,29 @@ body: | ; CI-LABEL: name: test_load_flat_v8s8_align8 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p0) :: (load 8) - ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<8 x s8>) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load 8) + ; CI: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[LOAD]](<2 x s32>) + ; CI: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) ; VI-LABEL: name: test_load_flat_v8s8_align8 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p0) :: (load 8) - ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<8 x s8>) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load 8) + ; VI: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[LOAD]](<2 x s32>) + ; VI: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) ; GFX9-LABEL: name: test_load_flat_v8s8_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p0) :: (load 8) - ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<8 x s8>) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load 8) + ; GFX9: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[LOAD]](<2 x s32>) + ; GFX9: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) ; CI-MESA-LABEL: name: test_load_flat_v8s8_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p0) :: (load 8) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<8 x s8>) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load 8) + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[LOAD]](<2 x s32>) + ; CI-MESA: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) ; GFX9-MESA-LABEL: name: test_load_flat_v8s8_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p0) :: (load 8) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<8 x s8>) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load 8) + ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[LOAD]](<2 x s32>) + ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<8 x s8>) = G_LOAD %0 :: (load 8, align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -4767,24 +4996,29 @@ body: | ; CI-LABEL: name: test_load_flat_v16s8_align16 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16) - ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<16 x s8>) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16) + ; CI: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[LOAD]](<4 x s32>) + ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) ; VI-LABEL: name: test_load_flat_v16s8_align16 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<16 x s8>) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16) + ; VI: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[LOAD]](<4 x s32>) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) ; GFX9-LABEL: name: test_load_flat_v16s8_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<16 x s8>) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16) + ; GFX9: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) ; CI-MESA-LABEL: name: test_load_flat_v16s8_align16 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<16 x s8>) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16) + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[LOAD]](<4 x s32>) + ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) ; GFX9-MESA-LABEL: name: test_load_flat_v16s8_align16 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<16 x s8>) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16) + ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<16 x s8>) = G_LOAD %0 :: (load 16, align 16, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -4798,44 +5032,49 @@ body: | ; CI-LABEL: name: test_load_flat_v32s8_align32 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16) - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[LOAD]](<16 x s8>), [[LOAD1]](<16 x s8>) - ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<32 x s8>) + ; CI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) + ; CI: [[BITCAST:%[0-9]+]]:_(<32 x s8>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>) + ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<32 x s8>) ; VI-LABEL: name: test_load_flat_v32s8_align32 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[LOAD]](<16 x s8>), [[LOAD1]](<16 x s8>) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<32 x s8>) + ; VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) + ; VI: [[BITCAST:%[0-9]+]]:_(<32 x s8>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<32 x s8>) ; GFX9-LABEL: name: test_load_flat_v32s8_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[LOAD]](<16 x s8>), [[LOAD1]](<16 x s8>) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<32 x s8>) + ; GFX9: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) + ; GFX9: [[BITCAST:%[0-9]+]]:_(<32 x s8>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<32 x s8>) ; CI-MESA-LABEL: name: test_load_flat_v32s8_align32 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16) - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[LOAD]](<16 x s8>), [[LOAD1]](<16 x s8>) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<32 x s8>) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16) + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<32 x s8>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>) + ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<32 x s8>) ; GFX9-MESA-LABEL: name: test_load_flat_v32s8_align32 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16) - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[LOAD]](<16 x s8>), [[LOAD1]](<16 x s8>) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<32 x s8>) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) + ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<32 x s8>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>) + ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<32 x s8>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<32 x s8>) = G_LOAD %0 :: (load 32, align 32, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir index 5c3ee75..ee36dca 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir @@ -675,45 +675,39 @@ body: | ; SI-LABEL: name: test_load_global_s48_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 6, align 8, addrspace 1) - ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; SI: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; SI: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; SI: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) + ; SI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) + ; SI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) ; CI-HSA-LABEL: name: test_load_global_s48_align8 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 6, align 8, addrspace 1) - ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; CI-HSA: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; CI-HSA: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; CI-HSA: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; CI-HSA: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) + ; CI-HSA: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) + ; CI-HSA: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) ; CI-MESA-LABEL: name: test_load_global_s48_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 6, align 8, addrspace 1) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; CI-MESA: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; CI-MESA: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) + ; CI-MESA: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) ; VI-LABEL: name: test_load_global_s48_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 6, align 8, addrspace 1) - ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; VI: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; VI: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; VI: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) + ; VI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) + ; VI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) ; GFX9-HSA-LABEL: name: test_load_global_s48_align8 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 6, align 8, addrspace 1) - ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; GFX9-HSA: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) + ; GFX9-HSA: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) + ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) ; GFX9-MESA-LABEL: name: test_load_global_s48_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 6, align 8, addrspace 1) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) + ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s48) = G_LOAD %0 :: (load 6, align 8, addrspace 1) %2:_(s64) = G_ZEXT %1 @@ -3835,76 +3829,152 @@ body: | ; SI-LABEL: name: test_load_global_v2s8_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) - ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; SI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; SI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) ; SI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC]] + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[COPY5]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]] ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; SI: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-HSA-LABEL: name: test_load_global_v2s8_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) - ; CI-HSA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI-HSA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; CI-HSA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) ; CI-HSA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) - ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CI-HSA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) - ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CI-HSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-HSA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC]] + ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[COPY5]](s32) + ; CI-HSA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CI-HSA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]] ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; CI-HSA: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-MESA-LABEL: name: test_load_global_v2s8_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) - ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; CI-MESA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) - ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC]] + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[COPY5]](s32) + ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]] ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_load_global_v2s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) - ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; VI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) ; VI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) ; VI: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) - ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C]](s16) + ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-HSA-LABEL: name: test_load_global_v2s8_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) - ; GFX9-HSA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-HSA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; GFX9-HSA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) ; GFX9-HSA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) ; GFX9-HSA: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) - ; GFX9-HSA: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-HSA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C]](s16) + ; GFX9-HSA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-HSA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) ; GFX9-HSA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9-HSA: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-MESA-LABEL: name: test_load_global_v2s8_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) - ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; GFX9-MESA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C]](s16) + ; GFX9-MESA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32) @@ -3923,51 +3993,154 @@ body: | ; SI-LABEL: name: test_load_global_v2s8_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; SI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; SI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; SI: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; SI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[COPY5]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]] + ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; SI: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-HSA-LABEL: name: test_load_global_v2s8_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; CI-HSA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI-HSA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; CI-HSA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; CI-HSA: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; CI-HSA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI-HSA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) + ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[COPY5]](s32) + ; CI-HSA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CI-HSA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]] + ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; CI-HSA: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-MESA-LABEL: name: test_load_global_v2s8_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; CI-MESA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; CI-MESA: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[COPY5]](s32) + ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]] + ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_load_global_v2s8_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; VI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; VI: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; VI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; VI: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) + ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) + ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-HSA-LABEL: name: test_load_global_v2s8_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; GFX9-HSA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-HSA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; GFX9-HSA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; GFX9-HSA: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; GFX9-HSA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; GFX9-HSA: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) + ; GFX9-HSA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-HSA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) + ; GFX9-HSA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] + ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9-HSA: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-MESA-LABEL: name: test_load_global_v2s8_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; GFX9-MESA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) + ; GFX9-MESA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) + ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] + ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 2, addrspace 1) @@ -4002,11 +4175,28 @@ body: | ; SI: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-HSA-LABEL: name: test_load_global_v2s8_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1) - ; CI-HSA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1) + ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI-HSA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; CI-HSA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; CI-HSA: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; CI-HSA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI-HSA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) + ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[COPY5]](s32) + ; CI-HSA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CI-HSA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]] + ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; CI-HSA: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-MESA-LABEL: name: test_load_global_v2s8_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -4044,11 +4234,29 @@ body: | ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-HSA-LABEL: name: test_load_global_v2s8_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1) - ; GFX9-HSA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1) + ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-HSA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; GFX9-HSA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; GFX9-HSA: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; GFX9-HSA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; GFX9-HSA: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) + ; GFX9-HSA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-HSA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) + ; GFX9-HSA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] + ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9-HSA: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-MESA-LABEL: name: test_load_global_v2s8_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -4204,28 +4412,104 @@ body: | ; SI-LABEL: name: test_load_global_v4s8_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) - ; SI: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; SI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; SI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; CI-HSA-LABEL: name: test_load_global_v4s8_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) - ; CI-HSA: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI-HSA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; CI-MESA-LABEL: name: test_load_global_v4s8_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) - ; CI-MESA: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; VI-LABEL: name: test_load_global_v4s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) - ; VI: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; VI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-HSA-LABEL: name: test_load_global_v4s8_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) - ; GFX9-HSA: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-HSA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-MESA-LABEL: name: test_load_global_v4s8_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) - ; GFX9-MESA: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<4 x s8>) = G_LOAD %0 :: (load 4, align 4, addrspace 1) $vgpr0 = COPY %1 @@ -4258,86 +4542,124 @@ body: | ; CI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; SI-LABEL: name: test_load_global_v4s8_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 2, addrspace 1) - ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 + 1, addrspace 1) - ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 + 2, align 2, addrspace 1) - ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 + 3, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 + 2, addrspace 1) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) + ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32) + ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C2]](s32) + ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C3]](s32) ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) ; SI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) ; SI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; CI-HSA-LABEL: name: test_load_global_v4s8_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 4, align 2, addrspace 1) - ; CI-HSA: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, align 2, addrspace 1) + ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI-HSA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; CI-MESA-LABEL: name: test_load_global_v4s8_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 2, addrspace 1) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 + 1, addrspace 1) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 + 2, align 2, addrspace 1) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 + 3, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 + 2, addrspace 1) + ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) + ; CI-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32) + ; CI-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C2]](s32) + ; CI-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C3]](s32) ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) ; CI-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; VI-LABEL: name: test_load_global_v4s8_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 2, addrspace 1) - ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 + 1, addrspace 1) - ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 + 2, align 2, addrspace 1) - ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 + 3, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 + 2, addrspace 1) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) + ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32) + ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C2]](s32) + ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C3]](s32) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) ; VI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-HSA-LABEL: name: test_load_global_v4s8_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 4, align 2, addrspace 1) - ; GFX9-HSA: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, align 2, addrspace 1) + ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-HSA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-MESA-LABEL: name: test_load_global_v4s8_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 2, addrspace 1) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 + 1, addrspace 1) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 + 2, align 2, addrspace 1) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 + 3, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 + 2, addrspace 1) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) + ; GFX9-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32) + ; GFX9-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C2]](s32) + ; GFX9-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C3]](s32) ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) @@ -4374,8 +4696,20 @@ body: | ; SI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; CI-HSA-LABEL: name: test_load_global_v4s8_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 4, align 1, addrspace 1) - ; CI-HSA: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, align 1, addrspace 1) + ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI-HSA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; CI-MESA-LABEL: name: test_load_global_v4s8_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) @@ -4416,8 +4750,22 @@ body: | ; VI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-HSA-LABEL: name: test_load_global_v4s8_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 4, align 1, addrspace 1) - ; GFX9-HSA: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, align 1, addrspace 1) + ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-HSA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-MESA-LABEL: name: test_load_global_v4s8_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) @@ -4452,28 +4800,34 @@ body: | ; SI-LABEL: name: test_load_global_v8s8_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) - ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<8 x s8>) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; SI: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[LOAD]](<2 x s32>) + ; SI: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) ; CI-HSA-LABEL: name: test_load_global_v8s8_align8 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) - ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<8 x s8>) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[LOAD]](<2 x s32>) + ; CI-HSA: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) ; CI-MESA-LABEL: name: test_load_global_v8s8_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<8 x s8>) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[LOAD]](<2 x s32>) + ; CI-MESA: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) ; VI-LABEL: name: test_load_global_v8s8_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) - ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<8 x s8>) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; VI: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[LOAD]](<2 x s32>) + ; VI: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) ; GFX9-HSA-LABEL: name: test_load_global_v8s8_align8 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) - ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<8 x s8>) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[LOAD]](<2 x s32>) + ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) ; GFX9-MESA-LABEL: name: test_load_global_v8s8_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<8 x s8>) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[LOAD]](<2 x s32>) + ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<8 x s8>) = G_LOAD %0 :: (load 8, align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -4487,28 +4841,34 @@ body: | ; SI-LABEL: name: test_load_global_v16s8_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) - ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<16 x s8>) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; SI: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[LOAD]](<4 x s32>) + ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) ; CI-HSA-LABEL: name: test_load_global_v16s8_align16 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) - ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<16 x s8>) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[LOAD]](<4 x s32>) + ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) ; CI-MESA-LABEL: name: test_load_global_v16s8_align16 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<16 x s8>) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[LOAD]](<4 x s32>) + ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) ; VI-LABEL: name: test_load_global_v16s8_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<16 x s8>) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; VI: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[LOAD]](<4 x s32>) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) ; GFX9-HSA-LABEL: name: test_load_global_v16s8_align16 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) - ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<16 x s8>) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) ; GFX9-MESA-LABEL: name: test_load_global_v16s8_align16 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<16 x s8>) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<16 x s8>) = G_LOAD %0 :: (load 16, align 16, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -4522,28 +4882,34 @@ body: | ; SI-LABEL: name: test_load_global_v32s8_align32 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<32 x s8>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) - ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<32 x s8>) + ; SI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; SI: [[BITCAST:%[0-9]+]]:_(<32 x s8>) = G_BITCAST [[LOAD]](<8 x s32>) + ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<32 x s8>) ; CI-HSA-LABEL: name: test_load_global_v32s8_align32 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<32 x s8>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) - ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<32 x s8>) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<32 x s8>) = G_BITCAST [[LOAD]](<8 x s32>) + ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<32 x s8>) ; CI-MESA-LABEL: name: test_load_global_v32s8_align32 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<32 x s8>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<32 x s8>) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<32 x s8>) = G_BITCAST [[LOAD]](<8 x s32>) + ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<32 x s8>) ; VI-LABEL: name: test_load_global_v32s8_align32 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<32 x s8>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<32 x s8>) + ; VI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; VI: [[BITCAST:%[0-9]+]]:_(<32 x s8>) = G_BITCAST [[LOAD]](<8 x s32>) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<32 x s8>) ; GFX9-HSA-LABEL: name: test_load_global_v32s8_align32 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<32 x s8>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) - ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<32 x s8>) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<32 x s8>) = G_BITCAST [[LOAD]](<8 x s32>) + ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<32 x s8>) ; GFX9-MESA-LABEL: name: test_load_global_v32s8_align32 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<32 x s8>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<32 x s8>) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<32 x s8>) = G_BITCAST [[LOAD]](<8 x s32>) + ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<32 x s8>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<32 x s8>) = G_LOAD %0 :: (load 32, align 32, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 @@ -11622,10 +11988,11 @@ body: | ; SI: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF ; SI: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[LOAD1]](s64), 0 ; SI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[LOAD2]](s32), 64 - ; SI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[TRUNC]](s96) - ; SI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[INSERT1]](s96) - ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s96>) = G_BUILD_VECTOR [[TRUNC]](s96), [[INSERT1]](s96) + ; SI: [[EXTRACT:%[0-9]+]]:_(s96) = G_EXTRACT [[BUILD_VECTOR]](<2 x s96>), 0 + ; SI: [[EXTRACT1:%[0-9]+]]:_(s96) = G_EXTRACT [[BUILD_VECTOR]](<2 x s96>), 96 + ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[EXTRACT]](s96) + ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[EXTRACT1]](s96) ; CI-HSA-LABEL: name: test_extload_global_v2s96_from_24_align16 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA: [[LOAD:%[0-9]+]]:_(s96) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1) @@ -11683,3 +12050,833 @@ body: | $vgpr0_vgpr1_vgpr2 = COPY %2 $vgpr3_vgpr4_vgpr5 = COPY %3 ... + +--- +name: test_load_global_v32s1_align4 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; SI-LABEL: name: test_load_global_v32s1_align4 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) + ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32) + ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32) + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32) + ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C7]](s32) + ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C8]](s32) + ; SI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C9]](s32) + ; SI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 + ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C10]](s32) + ; SI: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C11]](s32) + ; SI: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 + ; SI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C12]](s32) + ; SI: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 + ; SI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C13]](s32) + ; SI: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; SI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C14]](s32) + ; SI: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C15]](s32) + ; SI: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; SI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C16]](s32) + ; SI: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 + ; SI: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C17]](s32) + ; SI: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 19 + ; SI: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C18]](s32) + ; SI: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; SI: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C19]](s32) + ; SI: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 21 + ; SI: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C20]](s32) + ; SI: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 22 + ; SI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C21]](s32) + ; SI: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; SI: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C22]](s32) + ; SI: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; SI: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C23]](s32) + ; SI: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 + ; SI: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C24]](s32) + ; SI: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 26 + ; SI: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C25]](s32) + ; SI: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 27 + ; SI: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C26]](s32) + ; SI: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; SI: [[LSHR27:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C27]](s32) + ; SI: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 29 + ; SI: [[LSHR28:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C28]](s32) + ; SI: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 + ; SI: [[LSHR29:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C29]](s32) + ; SI: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; SI: [[LSHR30:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C30]](s32) + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) + ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) + ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) + ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) + ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) + ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32) + ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32) + ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32) + ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR15]](s32) + ; SI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LSHR16]](s32) + ; SI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR17]](s32) + ; SI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR18]](s32) + ; SI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LSHR19]](s32) + ; SI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LSHR20]](s32) + ; SI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR21]](s32) + ; SI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LSHR22]](s32) + ; SI: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LSHR23]](s32) + ; SI: [[COPY26:%[0-9]+]]:_(s32) = COPY [[LSHR24]](s32) + ; SI: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR25]](s32) + ; SI: [[COPY28:%[0-9]+]]:_(s32) = COPY [[LSHR26]](s32) + ; SI: [[COPY29:%[0-9]+]]:_(s32) = COPY [[LSHR27]](s32) + ; SI: [[COPY30:%[0-9]+]]:_(s32) = COPY [[LSHR28]](s32) + ; SI: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LSHR29]](s32) + ; SI: [[COPY32:%[0-9]+]]:_(s32) = COPY [[LSHR30]](s32) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32) + ; SI: [[TRUNC:%[0-9]+]]:_(<32 x s1>) = G_TRUNC [[BUILD_VECTOR]](<32 x s32>) + ; SI: $vgpr0 = COPY [[TRUNC]](<32 x s1>) + ; CI-HSA-LABEL: name: test_load_global_v32s1_align4 + ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CI-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI-HSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) + ; CI-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CI-HSA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32) + ; CI-HSA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; CI-HSA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32) + ; CI-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CI-HSA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32) + ; CI-HSA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-HSA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C7]](s32) + ; CI-HSA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; CI-HSA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C8]](s32) + ; CI-HSA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; CI-HSA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C9]](s32) + ; CI-HSA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 + ; CI-HSA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C10]](s32) + ; CI-HSA: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CI-HSA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C11]](s32) + ; CI-HSA: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 + ; CI-HSA: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C12]](s32) + ; CI-HSA: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 + ; CI-HSA: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C13]](s32) + ; CI-HSA: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; CI-HSA: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C14]](s32) + ; CI-HSA: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C15]](s32) + ; CI-HSA: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CI-HSA: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C16]](s32) + ; CI-HSA: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 + ; CI-HSA: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C17]](s32) + ; CI-HSA: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 19 + ; CI-HSA: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C18]](s32) + ; CI-HSA: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; CI-HSA: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C19]](s32) + ; CI-HSA: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 21 + ; CI-HSA: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C20]](s32) + ; CI-HSA: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 22 + ; CI-HSA: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C21]](s32) + ; CI-HSA: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; CI-HSA: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C22]](s32) + ; CI-HSA: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-HSA: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C23]](s32) + ; CI-HSA: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 + ; CI-HSA: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C24]](s32) + ; CI-HSA: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 26 + ; CI-HSA: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C25]](s32) + ; CI-HSA: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 27 + ; CI-HSA: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C26]](s32) + ; CI-HSA: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; CI-HSA: [[LSHR27:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C27]](s32) + ; CI-HSA: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 29 + ; CI-HSA: [[LSHR28:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C28]](s32) + ; CI-HSA: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 + ; CI-HSA: [[LSHR29:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C29]](s32) + ; CI-HSA: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; CI-HSA: [[LSHR30:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C30]](s32) + ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; CI-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) + ; CI-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; CI-HSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) + ; CI-HSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) + ; CI-HSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) + ; CI-HSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) + ; CI-HSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) + ; CI-HSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32) + ; CI-HSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32) + ; CI-HSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32) + ; CI-HSA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR15]](s32) + ; CI-HSA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LSHR16]](s32) + ; CI-HSA: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR17]](s32) + ; CI-HSA: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR18]](s32) + ; CI-HSA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LSHR19]](s32) + ; CI-HSA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LSHR20]](s32) + ; CI-HSA: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR21]](s32) + ; CI-HSA: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LSHR22]](s32) + ; CI-HSA: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LSHR23]](s32) + ; CI-HSA: [[COPY26:%[0-9]+]]:_(s32) = COPY [[LSHR24]](s32) + ; CI-HSA: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR25]](s32) + ; CI-HSA: [[COPY28:%[0-9]+]]:_(s32) = COPY [[LSHR26]](s32) + ; CI-HSA: [[COPY29:%[0-9]+]]:_(s32) = COPY [[LSHR27]](s32) + ; CI-HSA: [[COPY30:%[0-9]+]]:_(s32) = COPY [[LSHR28]](s32) + ; CI-HSA: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LSHR29]](s32) + ; CI-HSA: [[COPY32:%[0-9]+]]:_(s32) = COPY [[LSHR30]](s32) + ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32) + ; CI-HSA: [[TRUNC:%[0-9]+]]:_(<32 x s1>) = G_TRUNC [[BUILD_VECTOR]](<32 x s32>) + ; CI-HSA: $vgpr0 = COPY [[TRUNC]](<32 x s1>) + ; CI-MESA-LABEL: name: test_load_global_v32s1_align4 + ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) + ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CI-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32) + ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; CI-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32) + ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CI-MESA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32) + ; CI-MESA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-MESA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C7]](s32) + ; CI-MESA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; CI-MESA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C8]](s32) + ; CI-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; CI-MESA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C9]](s32) + ; CI-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 + ; CI-MESA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C10]](s32) + ; CI-MESA: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CI-MESA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C11]](s32) + ; CI-MESA: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 + ; CI-MESA: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C12]](s32) + ; CI-MESA: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 + ; CI-MESA: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C13]](s32) + ; CI-MESA: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; CI-MESA: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C14]](s32) + ; CI-MESA: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C15]](s32) + ; CI-MESA: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CI-MESA: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C16]](s32) + ; CI-MESA: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 + ; CI-MESA: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C17]](s32) + ; CI-MESA: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 19 + ; CI-MESA: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C18]](s32) + ; CI-MESA: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; CI-MESA: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C19]](s32) + ; CI-MESA: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 21 + ; CI-MESA: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C20]](s32) + ; CI-MESA: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 22 + ; CI-MESA: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C21]](s32) + ; CI-MESA: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; CI-MESA: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C22]](s32) + ; CI-MESA: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-MESA: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C23]](s32) + ; CI-MESA: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 + ; CI-MESA: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C24]](s32) + ; CI-MESA: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 26 + ; CI-MESA: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C25]](s32) + ; CI-MESA: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 27 + ; CI-MESA: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C26]](s32) + ; CI-MESA: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; CI-MESA: [[LSHR27:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C27]](s32) + ; CI-MESA: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 29 + ; CI-MESA: [[LSHR28:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C28]](s32) + ; CI-MESA: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 + ; CI-MESA: [[LSHR29:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C29]](s32) + ; CI-MESA: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; CI-MESA: [[LSHR30:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C30]](s32) + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) + ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) + ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) + ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) + ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) + ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) + ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32) + ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32) + ; CI-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32) + ; CI-MESA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR15]](s32) + ; CI-MESA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LSHR16]](s32) + ; CI-MESA: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR17]](s32) + ; CI-MESA: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR18]](s32) + ; CI-MESA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LSHR19]](s32) + ; CI-MESA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LSHR20]](s32) + ; CI-MESA: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR21]](s32) + ; CI-MESA: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LSHR22]](s32) + ; CI-MESA: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LSHR23]](s32) + ; CI-MESA: [[COPY26:%[0-9]+]]:_(s32) = COPY [[LSHR24]](s32) + ; CI-MESA: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR25]](s32) + ; CI-MESA: [[COPY28:%[0-9]+]]:_(s32) = COPY [[LSHR26]](s32) + ; CI-MESA: [[COPY29:%[0-9]+]]:_(s32) = COPY [[LSHR27]](s32) + ; CI-MESA: [[COPY30:%[0-9]+]]:_(s32) = COPY [[LSHR28]](s32) + ; CI-MESA: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LSHR29]](s32) + ; CI-MESA: [[COPY32:%[0-9]+]]:_(s32) = COPY [[LSHR30]](s32) + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32) + ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<32 x s1>) = G_TRUNC [[BUILD_VECTOR]](<32 x s32>) + ; CI-MESA: $vgpr0 = COPY [[TRUNC]](<32 x s1>) + ; VI-LABEL: name: test_load_global_v32s1_align4 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32) + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32) + ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C7]](s32) + ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C8]](s32) + ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C9]](s32) + ; VI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 + ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C10]](s32) + ; VI: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C11]](s32) + ; VI: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 + ; VI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C12]](s32) + ; VI: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 + ; VI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C13]](s32) + ; VI: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; VI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C14]](s32) + ; VI: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C15]](s32) + ; VI: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; VI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C16]](s32) + ; VI: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 + ; VI: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C17]](s32) + ; VI: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 19 + ; VI: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C18]](s32) + ; VI: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; VI: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C19]](s32) + ; VI: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 21 + ; VI: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C20]](s32) + ; VI: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 22 + ; VI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C21]](s32) + ; VI: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; VI: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C22]](s32) + ; VI: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C23]](s32) + ; VI: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 + ; VI: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C24]](s32) + ; VI: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 26 + ; VI: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C25]](s32) + ; VI: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 27 + ; VI: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C26]](s32) + ; VI: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; VI: [[LSHR27:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C27]](s32) + ; VI: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 29 + ; VI: [[LSHR28:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C28]](s32) + ; VI: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 + ; VI: [[LSHR29:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C29]](s32) + ; VI: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; VI: [[LSHR30:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C30]](s32) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) + ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) + ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) + ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) + ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) + ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) + ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32) + ; VI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32) + ; VI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32) + ; VI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR15]](s32) + ; VI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LSHR16]](s32) + ; VI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR17]](s32) + ; VI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR18]](s32) + ; VI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LSHR19]](s32) + ; VI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LSHR20]](s32) + ; VI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR21]](s32) + ; VI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LSHR22]](s32) + ; VI: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LSHR23]](s32) + ; VI: [[COPY26:%[0-9]+]]:_(s32) = COPY [[LSHR24]](s32) + ; VI: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR25]](s32) + ; VI: [[COPY28:%[0-9]+]]:_(s32) = COPY [[LSHR26]](s32) + ; VI: [[COPY29:%[0-9]+]]:_(s32) = COPY [[LSHR27]](s32) + ; VI: [[COPY30:%[0-9]+]]:_(s32) = COPY [[LSHR28]](s32) + ; VI: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LSHR29]](s32) + ; VI: [[COPY32:%[0-9]+]]:_(s32) = COPY [[LSHR30]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32) + ; VI: [[TRUNC:%[0-9]+]]:_(<32 x s1>) = G_TRUNC [[BUILD_VECTOR]](<32 x s32>) + ; VI: $vgpr0 = COPY [[TRUNC]](<32 x s1>) + ; GFX9-HSA-LABEL: name: test_load_global_v32s1_align4 + ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; GFX9-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX9-HSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) + ; GFX9-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; GFX9-HSA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32) + ; GFX9-HSA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; GFX9-HSA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32) + ; GFX9-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; GFX9-HSA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32) + ; GFX9-HSA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-HSA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C7]](s32) + ; GFX9-HSA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; GFX9-HSA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C8]](s32) + ; GFX9-HSA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; GFX9-HSA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C9]](s32) + ; GFX9-HSA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 + ; GFX9-HSA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C10]](s32) + ; GFX9-HSA: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX9-HSA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C11]](s32) + ; GFX9-HSA: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 + ; GFX9-HSA: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C12]](s32) + ; GFX9-HSA: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 + ; GFX9-HSA: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C13]](s32) + ; GFX9-HSA: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; GFX9-HSA: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C14]](s32) + ; GFX9-HSA: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-HSA: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C15]](s32) + ; GFX9-HSA: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; GFX9-HSA: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C16]](s32) + ; GFX9-HSA: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 + ; GFX9-HSA: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C17]](s32) + ; GFX9-HSA: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 19 + ; GFX9-HSA: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C18]](s32) + ; GFX9-HSA: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; GFX9-HSA: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C19]](s32) + ; GFX9-HSA: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 21 + ; GFX9-HSA: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C20]](s32) + ; GFX9-HSA: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 22 + ; GFX9-HSA: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C21]](s32) + ; GFX9-HSA: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; GFX9-HSA: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C22]](s32) + ; GFX9-HSA: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-HSA: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C23]](s32) + ; GFX9-HSA: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 + ; GFX9-HSA: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C24]](s32) + ; GFX9-HSA: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 26 + ; GFX9-HSA: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C25]](s32) + ; GFX9-HSA: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 27 + ; GFX9-HSA: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C26]](s32) + ; GFX9-HSA: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; GFX9-HSA: [[LSHR27:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C27]](s32) + ; GFX9-HSA: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 29 + ; GFX9-HSA: [[LSHR28:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C28]](s32) + ; GFX9-HSA: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 + ; GFX9-HSA: [[LSHR29:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C29]](s32) + ; GFX9-HSA: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX9-HSA: [[LSHR30:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C30]](s32) + ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) + ; GFX9-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) + ; GFX9-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) + ; GFX9-HSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) + ; GFX9-HSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) + ; GFX9-HSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) + ; GFX9-HSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) + ; GFX9-HSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) + ; GFX9-HSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) + ; GFX9-HSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32) + ; GFX9-HSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) + ; GFX9-HSA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR15]](s32) + ; GFX9-HSA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LSHR16]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[COPY18]](s32) + ; GFX9-HSA: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR17]](s32) + ; GFX9-HSA: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR18]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC9:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32) + ; GFX9-HSA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LSHR19]](s32) + ; GFX9-HSA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LSHR20]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC10:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32) + ; GFX9-HSA: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR21]](s32) + ; GFX9-HSA: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LSHR22]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC11:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[COPY24]](s32) + ; GFX9-HSA: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LSHR23]](s32) + ; GFX9-HSA: [[COPY26:%[0-9]+]]:_(s32) = COPY [[LSHR24]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY25]](s32), [[COPY26]](s32) + ; GFX9-HSA: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR25]](s32) + ; GFX9-HSA: [[COPY28:%[0-9]+]]:_(s32) = COPY [[LSHR26]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY27]](s32), [[COPY28]](s32) + ; GFX9-HSA: [[COPY29:%[0-9]+]]:_(s32) = COPY [[LSHR27]](s32) + ; GFX9-HSA: [[COPY30:%[0-9]+]]:_(s32) = COPY [[LSHR28]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC14:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY29]](s32), [[COPY30]](s32) + ; GFX9-HSA: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LSHR29]](s32) + ; GFX9-HSA: [[COPY32:%[0-9]+]]:_(s32) = COPY [[LSHR30]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC15:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY31]](s32), [[COPY32]](s32) + ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>), [[BUILD_VECTOR_TRUNC8]](<2 x s16>), [[BUILD_VECTOR_TRUNC9]](<2 x s16>), [[BUILD_VECTOR_TRUNC10]](<2 x s16>), [[BUILD_VECTOR_TRUNC11]](<2 x s16>), [[BUILD_VECTOR_TRUNC12]](<2 x s16>), [[BUILD_VECTOR_TRUNC13]](<2 x s16>), [[BUILD_VECTOR_TRUNC14]](<2 x s16>), [[BUILD_VECTOR_TRUNC15]](<2 x s16>) + ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(<32 x s1>) = G_TRUNC [[CONCAT_VECTORS]](<32 x s16>) + ; GFX9-HSA: $vgpr0 = COPY [[TRUNC]](<32 x s1>) + ; GFX9-MESA-LABEL: name: test_load_global_v32s1_align4 + ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX9-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) + ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; GFX9-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32) + ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; GFX9-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32) + ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; GFX9-MESA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32) + ; GFX9-MESA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-MESA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C7]](s32) + ; GFX9-MESA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; GFX9-MESA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C8]](s32) + ; GFX9-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; GFX9-MESA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C9]](s32) + ; GFX9-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 + ; GFX9-MESA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C10]](s32) + ; GFX9-MESA: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX9-MESA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C11]](s32) + ; GFX9-MESA: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 + ; GFX9-MESA: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C12]](s32) + ; GFX9-MESA: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 + ; GFX9-MESA: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C13]](s32) + ; GFX9-MESA: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; GFX9-MESA: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C14]](s32) + ; GFX9-MESA: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C15]](s32) + ; GFX9-MESA: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; GFX9-MESA: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C16]](s32) + ; GFX9-MESA: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 + ; GFX9-MESA: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C17]](s32) + ; GFX9-MESA: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 19 + ; GFX9-MESA: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C18]](s32) + ; GFX9-MESA: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; GFX9-MESA: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C19]](s32) + ; GFX9-MESA: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 21 + ; GFX9-MESA: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C20]](s32) + ; GFX9-MESA: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 22 + ; GFX9-MESA: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C21]](s32) + ; GFX9-MESA: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; GFX9-MESA: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C22]](s32) + ; GFX9-MESA: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-MESA: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C23]](s32) + ; GFX9-MESA: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 + ; GFX9-MESA: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C24]](s32) + ; GFX9-MESA: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 26 + ; GFX9-MESA: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C25]](s32) + ; GFX9-MESA: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 27 + ; GFX9-MESA: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C26]](s32) + ; GFX9-MESA: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; GFX9-MESA: [[LSHR27:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C27]](s32) + ; GFX9-MESA: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 29 + ; GFX9-MESA: [[LSHR28:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C28]](s32) + ; GFX9-MESA: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 + ; GFX9-MESA: [[LSHR29:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C29]](s32) + ; GFX9-MESA: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX9-MESA: [[LSHR30:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C30]](s32) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) + ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) + ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) + ; GFX9-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) + ; GFX9-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) + ; GFX9-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) + ; GFX9-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) + ; GFX9-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) + ; GFX9-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) + ; GFX9-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32) + ; GFX9-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) + ; GFX9-MESA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR15]](s32) + ; GFX9-MESA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LSHR16]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[COPY18]](s32) + ; GFX9-MESA: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR17]](s32) + ; GFX9-MESA: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR18]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC9:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32) + ; GFX9-MESA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LSHR19]](s32) + ; GFX9-MESA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LSHR20]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC10:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32) + ; GFX9-MESA: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR21]](s32) + ; GFX9-MESA: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LSHR22]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC11:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[COPY24]](s32) + ; GFX9-MESA: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LSHR23]](s32) + ; GFX9-MESA: [[COPY26:%[0-9]+]]:_(s32) = COPY [[LSHR24]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY25]](s32), [[COPY26]](s32) + ; GFX9-MESA: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR25]](s32) + ; GFX9-MESA: [[COPY28:%[0-9]+]]:_(s32) = COPY [[LSHR26]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY27]](s32), [[COPY28]](s32) + ; GFX9-MESA: [[COPY29:%[0-9]+]]:_(s32) = COPY [[LSHR27]](s32) + ; GFX9-MESA: [[COPY30:%[0-9]+]]:_(s32) = COPY [[LSHR28]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC14:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY29]](s32), [[COPY30]](s32) + ; GFX9-MESA: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LSHR29]](s32) + ; GFX9-MESA: [[COPY32:%[0-9]+]]:_(s32) = COPY [[LSHR30]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC15:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY31]](s32), [[COPY32]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>), [[BUILD_VECTOR_TRUNC8]](<2 x s16>), [[BUILD_VECTOR_TRUNC9]](<2 x s16>), [[BUILD_VECTOR_TRUNC10]](<2 x s16>), [[BUILD_VECTOR_TRUNC11]](<2 x s16>), [[BUILD_VECTOR_TRUNC12]](<2 x s16>), [[BUILD_VECTOR_TRUNC13]](<2 x s16>), [[BUILD_VECTOR_TRUNC14]](<2 x s16>), [[BUILD_VECTOR_TRUNC15]](<2 x s16>) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<32 x s1>) = G_TRUNC [[CONCAT_VECTORS]](<32 x s16>) + ; GFX9-MESA: $vgpr0 = COPY [[TRUNC]](<32 x s1>) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(<32 x s1>) = G_LOAD %0 :: (load 4, align 4, addrspace 1) + $vgpr0 = COPY %1 +... + +--- +name: test_load_global_v8s4_align4 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; SI-LABEL: name: test_load_global_v8s4_align4 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) + ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32) + ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32) + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32) + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; SI: [[TRUNC:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) + ; SI: $vgpr0 = COPY [[TRUNC]](<8 x s4>) + ; CI-HSA-LABEL: name: test_load_global_v8s4_align4 + ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CI-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) + ; CI-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; CI-HSA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32) + ; CI-HSA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-HSA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32) + ; CI-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; CI-HSA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32) + ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; CI-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) + ; CI-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; CI-HSA: [[TRUNC:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) + ; CI-HSA: $vgpr0 = COPY [[TRUNC]](<8 x s4>) + ; CI-MESA-LABEL: name: test_load_global_v8s4_align4 + ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) + ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; CI-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32) + ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32) + ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; CI-MESA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32) + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) + ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) + ; CI-MESA: $vgpr0 = COPY [[TRUNC]](<8 x s4>) + ; VI-LABEL: name: test_load_global_v8s4_align4 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32) + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) + ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; VI: [[TRUNC:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) + ; VI: $vgpr0 = COPY [[TRUNC]](<8 x s4>) + ; GFX9-HSA-LABEL: name: test_load_global_v8s4_align4 + ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX9-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-HSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) + ; GFX9-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; GFX9-HSA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32) + ; GFX9-HSA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-HSA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32) + ; GFX9-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; GFX9-HSA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32) + ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) + ; GFX9-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) + ; GFX9-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) + ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>) + ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[CONCAT_VECTORS]](<8 x s16>) + ; GFX9-HSA: $vgpr0 = COPY [[TRUNC]](<8 x s4>) + ; GFX9-MESA-LABEL: name: test_load_global_v8s4_align4 + ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) + ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; GFX9-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32) + ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32) + ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; GFX9-MESA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) + ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) + ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[CONCAT_VECTORS]](<8 x s16>) + ; GFX9-MESA: $vgpr0 = COPY [[TRUNC]](<8 x s4>) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(<8 x s4>) = G_LOAD %0 :: (load 4, align 4, addrspace 1) + $vgpr0 = COPY %1 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir index a4359d2..0164bcc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir @@ -637,28 +637,33 @@ body: | ; SI-LABEL: name: test_load_local_s48_align8 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 6, align 8, addrspace 3) - ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; SI: $vgpr0_vgpr1 = COPY [[COPY1]](s64) + ; SI: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) + ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s48) + ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-LABEL: name: test_load_local_s48_align8 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 6, align 8, addrspace 3) - ; CI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; CI: $vgpr0_vgpr1 = COPY [[COPY1]](s64) + ; CI: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) + ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s48) + ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-DS128-LABEL: name: test_load_local_s48_align8 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 6, align 8, addrspace 3) - ; CI-DS128: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; CI-DS128: $vgpr0_vgpr1 = COPY [[COPY1]](s64) + ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) + ; CI-DS128: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s48) + ; CI-DS128: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_load_local_s48_align8 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 6, align 8, addrspace 3) - ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; VI: $vgpr0_vgpr1 = COPY [[COPY1]](s64) + ; VI: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) + ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s48) + ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_load_local_s48_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 6, align 8, addrspace 3) - ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; GFX9: $vgpr0_vgpr1 = COPY [[COPY1]](s64) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s48) + ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p3) = COPY $vgpr0 %1:_(s48) = G_LOAD %0 :: (load 6, align 8, addrspace 3) %2:_(s64) = G_ANYEXT %1 @@ -5241,43 +5246,128 @@ body: | ; SI-LABEL: name: test_load_local_v2s8_align2 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; SI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; SI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; SI: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; SI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[COPY5]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]] + ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; SI: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-LABEL: name: test_load_local_v2s8_align2 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; CI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; CI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; CI: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; CI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[COPY5]](s32) + ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]] + ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; CI: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-DS128-LABEL: name: test_load_local_v2s8_align2 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-DS128: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-DS128: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-DS128: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-DS128: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-DS128: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI-DS128: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI-DS128: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; CI-DS128: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; CI-DS128: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; CI-DS128: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; CI-DS128: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; CI-DS128: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI-DS128: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) + ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[COPY5]](s32) + ; CI-DS128: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CI-DS128: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]] + ; CI-DS128: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; CI-DS128: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_load_local_v2s8_align2 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; VI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; VI: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; VI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; VI: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) + ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) + ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_load_local_v2s8_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; GFX9: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; GFX9: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) + ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) + ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(p3) = COPY $vgpr0 %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 2, addrspace 3) @@ -5470,24 +5560,86 @@ body: | ; SI-LABEL: name: test_load_local_v4s8_align4 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) - ; SI: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; SI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; SI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; CI-LABEL: name: test_load_local_v4s8_align4 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) - ; CI: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; CI-DS128-LABEL: name: test_load_local_v4s8_align4 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) - ; CI-DS128: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-DS128: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-DS128: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-DS128: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-DS128: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-DS128: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI-DS128: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI-DS128: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; VI-LABEL: name: test_load_local_v4s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) - ; VI: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; VI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-LABEL: name: test_load_local_v4s8_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) - ; GFX9: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9: $vgpr0 = COPY [[TRUNC]](<4 x s8>) %0:_(p3) = COPY $vgpr0 %1:_(<4 x s8>) = G_LOAD %0 :: (load 4, align 4, addrspace 3) $vgpr0 = COPY %1 @@ -5501,24 +5653,29 @@ body: | ; SI-LABEL: name: test_load_local_v8s8_align8 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) - ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<8 x s8>) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; SI: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[LOAD]](<2 x s32>) + ; SI: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) ; CI-LABEL: name: test_load_local_v8s8_align8 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) - ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<8 x s8>) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; CI: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[LOAD]](<2 x s32>) + ; CI: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) ; CI-DS128-LABEL: name: test_load_local_v8s8_align8 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) - ; CI-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](<8 x s8>) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; CI-DS128: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[LOAD]](<2 x s32>) + ; CI-DS128: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) ; VI-LABEL: name: test_load_local_v8s8_align8 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) - ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<8 x s8>) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; VI: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[LOAD]](<2 x s32>) + ; VI: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) ; GFX9-LABEL: name: test_load_local_v8s8_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) - ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<8 x s8>) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX9: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[LOAD]](<2 x s32>) + ; GFX9: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) %0:_(p3) = COPY $vgpr0 %1:_(<8 x s8>) = G_LOAD %0 :: (load 8, align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -5533,338 +5690,315 @@ body: | ; SI-LABEL: name: test_load_local_v16s8_align16 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; SI: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 + 1, addrspace 3) + ; SI: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD1]](s32) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 + 2, addrspace 3) + ; SI: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD2]](s32) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 + 3, addrspace 3) + ; SI: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD3]](s32) + ; SI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 + 4, addrspace 3) - ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; SI: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD4]](s32) + ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 + 5, addrspace 3) - ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; SI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; SI: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD5]](s32) + ; SI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 + 6, addrspace 3) - ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; SI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) + ; SI: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD6]](s32) + ; SI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 + 7, addrspace 3) - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) - ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) + ; SI: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD7]](s32) + ; SI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 + 8, addrspace 3) + ; SI: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD8]](s32) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 + 9, addrspace 3) + ; SI: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD9]](s32) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 + 10, addrspace 3) + ; SI: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD10]](s32) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 + 11, addrspace 3) + ; SI: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD11]](s32) + ; SI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC8]](s8), [[TRUNC9]](s8), [[TRUNC10]](s8), [[TRUNC11]](s8) ; SI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s32) ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 + 12, addrspace 3) - ; SI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) + ; SI: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD12]](s32) + ; SI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 + 13, addrspace 3) - ; SI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s32) + ; SI: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD13]](s32) + ; SI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 + 14, addrspace 3) - ; SI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s32) + ; SI: [[TRUNC14:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD14]](s32) + ; SI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 + 15, addrspace 3) - ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) - ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) - ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) - ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) - ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) - ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) - ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32) - ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<8 x s32>) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<8 x s8>), [[TRUNC1]](<8 x s8>) - ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>) + ; SI: [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD15]](s32) + ; SI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC12]](s8), [[TRUNC13]](s8), [[TRUNC14]](s8), [[TRUNC15]](s8) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV2]](s32), [[MV3]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) + ; SI: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) + ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) ; CI-LABEL: name: test_load_local_v16s8_align16 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 + 1, addrspace 3) + ; CI: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD1]](s32) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 + 2, addrspace 3) + ; CI: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD2]](s32) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 + 3, addrspace 3) + ; CI: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD3]](s32) + ; CI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 + 4, addrspace 3) - ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; CI: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD4]](s32) + ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 + 5, addrspace 3) - ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; CI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; CI: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD5]](s32) + ; CI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 + 6, addrspace 3) - ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; CI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) + ; CI: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD6]](s32) + ; CI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 + 7, addrspace 3) - ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; CI: [[TRUNC:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) - ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) + ; CI: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD7]](s32) + ; CI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 + 8, addrspace 3) + ; CI: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD8]](s32) ; CI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 + 9, addrspace 3) + ; CI: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD9]](s32) ; CI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 + 10, addrspace 3) + ; CI: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD10]](s32) ; CI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 + 11, addrspace 3) + ; CI: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD11]](s32) + ; CI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC8]](s8), [[TRUNC9]](s8), [[TRUNC10]](s8), [[TRUNC11]](s8) ; CI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s32) ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 + 12, addrspace 3) - ; CI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) + ; CI: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD12]](s32) + ; CI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 + 13, addrspace 3) - ; CI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s32) + ; CI: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD13]](s32) + ; CI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 + 14, addrspace 3) - ; CI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s32) + ; CI: [[TRUNC14:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD14]](s32) + ; CI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 + 15, addrspace 3) - ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) - ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) - ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) - ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) - ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) - ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) - ; CI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32) - ; CI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) - ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32) - ; CI: [[TRUNC1:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<8 x s32>) - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<8 x s8>), [[TRUNC1]](<8 x s8>) - ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>) + ; CI: [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD15]](s32) + ; CI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC12]](s8), [[TRUNC13]](s8), [[TRUNC14]](s8), [[TRUNC15]](s8) + ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV2]](s32), [[MV3]](s32) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) + ; CI: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) + ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) ; CI-DS128-LABEL: name: test_load_local_v16s8_align16 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD]](s32) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 + 1, addrspace 3) + ; CI-DS128: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD1]](s32) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 + 2, addrspace 3) + ; CI-DS128: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD2]](s32) ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 + 3, addrspace 3) + ; CI-DS128: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD3]](s32) + ; CI-DS128: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8) ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 + 4, addrspace 3) - ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CI-DS128: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; CI-DS128: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD4]](s32) + ; CI-DS128: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 + 5, addrspace 3) - ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; CI-DS128: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; CI-DS128: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD5]](s32) + ; CI-DS128: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 + 6, addrspace 3) - ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; CI-DS128: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) + ; CI-DS128: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD6]](s32) + ; CI-DS128: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 + 7, addrspace 3) - ; CI-DS128: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-DS128: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) + ; CI-DS128: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD7]](s32) + ; CI-DS128: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8) + ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-DS128: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) ; CI-DS128: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 + 8, addrspace 3) - ; CI-DS128: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 - ; CI-DS128: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32) + ; CI-DS128: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD8]](s32) + ; CI-DS128: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) ; CI-DS128: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 + 9, addrspace 3) - ; CI-DS128: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CI-DS128: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C9]](s32) + ; CI-DS128: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD9]](s32) + ; CI-DS128: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) ; CI-DS128: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 + 10, addrspace 3) - ; CI-DS128: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 - ; CI-DS128: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C10]](s32) + ; CI-DS128: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD10]](s32) + ; CI-DS128: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) ; CI-DS128: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 + 11, addrspace 3) - ; CI-DS128: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-DS128: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C11]](s32) + ; CI-DS128: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD11]](s32) + ; CI-DS128: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC8]](s8), [[TRUNC9]](s8), [[TRUNC10]](s8), [[TRUNC11]](s8) + ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CI-DS128: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) ; CI-DS128: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 + 12, addrspace 3) - ; CI-DS128: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 - ; CI-DS128: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C12]](s32) + ; CI-DS128: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD12]](s32) + ; CI-DS128: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) ; CI-DS128: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 + 13, addrspace 3) - ; CI-DS128: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 - ; CI-DS128: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C13]](s32) + ; CI-DS128: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD13]](s32) + ; CI-DS128: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) ; CI-DS128: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 + 14, addrspace 3) - ; CI-DS128: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; CI-DS128: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C14]](s32) + ; CI-DS128: [[TRUNC14:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD14]](s32) + ; CI-DS128: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) ; CI-DS128: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 + 15, addrspace 3) - ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-DS128: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-DS128: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-DS128: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; CI-DS128: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; CI-DS128: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; CI-DS128: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; CI-DS128: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) - ; CI-DS128: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) - ; CI-DS128: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) - ; CI-DS128: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) - ; CI-DS128: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) - ; CI-DS128: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) - ; CI-DS128: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32) - ; CI-DS128: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) - ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32) - ; CI-DS128: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[BUILD_VECTOR]](<16 x s32>) - ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[TRUNC]](<16 x s8>) + ; CI-DS128: [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD15]](s32) + ; CI-DS128: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC12]](s8), [[TRUNC13]](s8), [[TRUNC14]](s8), [[TRUNC15]](s8) + ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32), [[MV2]](s32), [[MV3]](s32) + ; CI-DS128: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) ; VI-LABEL: name: test_load_local_v16s8_align16 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; VI: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 + 1, addrspace 3) + ; VI: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 + 2, addrspace 3) + ; VI: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD2]](s32) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 + 3, addrspace 3) + ; VI: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD3]](s32) + ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 + 4, addrspace 3) - ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; VI: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD4]](s32) + ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 + 5, addrspace 3) - ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; VI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; VI: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD5]](s32) + ; VI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 + 6, addrspace 3) - ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) + ; VI: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD6]](s32) + ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 + 7, addrspace 3) - ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) + ; VI: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD7]](s32) + ; VI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8) + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 + 8, addrspace 3) - ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 - ; VI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32) + ; VI: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD8]](s32) + ; VI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 + 9, addrspace 3) - ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; VI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C9]](s32) + ; VI: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD9]](s32) + ; VI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 + 10, addrspace 3) - ; VI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 - ; VI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C10]](s32) + ; VI: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD10]](s32) + ; VI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 + 11, addrspace 3) - ; VI: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C11]](s32) + ; VI: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD11]](s32) + ; VI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC8]](s8), [[TRUNC9]](s8), [[TRUNC10]](s8), [[TRUNC11]](s8) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; VI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 + 12, addrspace 3) - ; VI: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 - ; VI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C12]](s32) + ; VI: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD12]](s32) + ; VI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 + 13, addrspace 3) - ; VI: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 - ; VI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C13]](s32) + ; VI: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD13]](s32) + ; VI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 + 14, addrspace 3) - ; VI: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; VI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C14]](s32) + ; VI: [[TRUNC14:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD14]](s32) + ; VI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 + 15, addrspace 3) - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) - ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) - ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) - ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) - ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) - ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) - ; VI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32) - ; VI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32) - ; VI: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[BUILD_VECTOR]](<16 x s32>) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[TRUNC]](<16 x s8>) + ; VI: [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD15]](s32) + ; VI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC12]](s8), [[TRUNC13]](s8), [[TRUNC14]](s8), [[TRUNC15]](s8) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32), [[MV2]](s32), [[MV3]](s32) + ; VI: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) ; GFX9-LABEL: name: test_load_local_v16s8_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 + 1, addrspace 3) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD1]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 + 2, addrspace 3) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 + 3, addrspace 3) + ; GFX9: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD3]](s32) + ; GFX9: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 + 4, addrspace 3) - ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX9: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD4]](s32) + ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 + 5, addrspace 3) - ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; GFX9: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD5]](s32) + ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 + 6, addrspace 3) - ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) + ; GFX9: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD6]](s32) + ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 + 7, addrspace 3) - ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) + ; GFX9: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD7]](s32) + ; GFX9: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8) + ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 + 8, addrspace 3) - ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 - ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32) + ; GFX9: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD8]](s32) + ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 + 9, addrspace 3) - ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C9]](s32) + ; GFX9: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD9]](s32) + ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 + 10, addrspace 3) - ; GFX9: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 - ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C10]](s32) + ; GFX9: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD10]](s32) + ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 + 11, addrspace 3) - ; GFX9: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C11]](s32) + ; GFX9: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD11]](s32) + ; GFX9: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC8]](s8), [[TRUNC9]](s8), [[TRUNC10]](s8), [[TRUNC11]](s8) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 + 12, addrspace 3) - ; GFX9: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 - ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C12]](s32) + ; GFX9: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD12]](s32) + ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 + 13, addrspace 3) - ; GFX9: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 - ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C13]](s32) + ; GFX9: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD13]](s32) + ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 + 14, addrspace 3) - ; GFX9: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C14]](s32) + ; GFX9: [[TRUNC14:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD14]](s32) + ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 + 15, addrspace 3) - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) - ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) - ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) - ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) - ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) - ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) - ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) - ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32) - ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<16 x s16>) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[TRUNC]](<16 x s8>) + ; GFX9: [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD15]](s32) + ; GFX9: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC12]](s8), [[TRUNC13]](s8), [[TRUNC14]](s8), [[TRUNC15]](s8) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32), [[MV2]](s32), [[MV3]](s32) + ; GFX9: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) %0:_(p3) = COPY $vgpr0 %1:_(<16 x s8>) = G_LOAD %0 :: (load 16, align 1, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir index 58240b5..678d3bc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir @@ -4089,35 +4089,103 @@ body: | ; SI-LABEL: name: test_load_private_v2s8_align2 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; SI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; SI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; SI: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; SI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[COPY5]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]] + ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; SI: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-LABEL: name: test_load_private_v2s8_align2 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; CI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; CI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; CI: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; CI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[COPY5]](s32) + ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]] + ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; CI: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_load_private_v2s8_align2 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; VI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; VI: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; VI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; VI: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) + ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) + ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_load_private_v2s8_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; GFX9: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; GFX9: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) + ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) + ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(p5) = COPY $vgpr0 %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 2, addrspace 5) @@ -4282,20 +4350,70 @@ body: | ; SI-LABEL: name: test_load_private_v4s8_align4 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) - ; SI: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; SI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; SI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; CI-LABEL: name: test_load_private_v4s8_align4 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) - ; CI: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; VI-LABEL: name: test_load_private_v4s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) - ; VI: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; VI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-LABEL: name: test_load_private_v4s8_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) - ; GFX9: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9: $vgpr0 = COPY [[TRUNC]](<4 x s8>) %0:_(p5) = COPY $vgpr0 %1:_(<4 x s8>) = G_LOAD %0 :: (load 4, align 4, addrspace 5) $vgpr0 = COPY %1 @@ -4309,36 +4427,40 @@ body: | ; SI-LABEL: name: test_load_private_v8s8_align8 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[PTR_ADD]](p5) :: (load 4 + 4, addrspace 5) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[LOAD]](<4 x s8>), [[LOAD1]](<4 x s8>) - ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<8 x s8>) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 + 4, addrspace 5) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) + ; SI: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>) + ; SI: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) ; CI-LABEL: name: test_load_private_v8s8_align8 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[PTR_ADD]](p5) :: (load 4 + 4, addrspace 5) - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[LOAD]](<4 x s8>), [[LOAD1]](<4 x s8>) - ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<8 x s8>) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 + 4, addrspace 5) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) + ; CI: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>) + ; CI: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) ; VI-LABEL: name: test_load_private_v8s8_align8 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[PTR_ADD]](p5) :: (load 4 + 4, addrspace 5) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[LOAD]](<4 x s8>), [[LOAD1]](<4 x s8>) - ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<8 x s8>) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 + 4, addrspace 5) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) + ; VI: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>) + ; VI: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) ; GFX9-LABEL: name: test_load_private_v8s8_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[PTR_ADD]](p5) :: (load 4 + 4, addrspace 5) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[LOAD]](<4 x s8>), [[LOAD1]](<4 x s8>) - ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<8 x s8>) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 + 4, addrspace 5) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) + ; GFX9: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>) + ; GFX9: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) %0:_(p5) = COPY $vgpr0 %1:_(<8 x s8>) = G_LOAD %0 :: (load 8, align 8, addrspace 5) $vgpr0_vgpr1 = COPY %1 @@ -4353,271 +4475,251 @@ body: | ; SI-LABEL: name: test_load_private_v16s8_align16 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 56) + ; SI: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 + 1, addrspace 56) + ; SI: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD1]](s32) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 + 2, addrspace 56) + ; SI: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD2]](s32) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 + 3, addrspace 56) - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; SI: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD3]](s32) + ; SI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 + 4, addrspace 56) + ; SI: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD4]](s32) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 + 5, addrspace 56) + ; SI: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD5]](s32) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 + 6, addrspace 56) + ; SI: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD6]](s32) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 + 7, addrspace 56) - ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>) + ; SI: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD7]](s32) + ; SI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8) ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 + 8, addrspace 56) + ; SI: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD8]](s32) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 + 9, addrspace 56) + ; SI: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD9]](s32) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 + 10, addrspace 56) + ; SI: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD10]](s32) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 + 11, addrspace 56) - ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) - ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) - ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) - ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) - ; SI: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32) - ; SI: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x s32>) + ; SI: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD11]](s32) + ; SI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC8]](s8), [[TRUNC9]](s8), [[TRUNC10]](s8), [[TRUNC11]](s8) ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; SI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load 1 + 12, addrspace 56) + ; SI: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD12]](s32) ; SI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load 1 + 13, addrspace 56) + ; SI: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD13]](s32) ; SI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load 1 + 14, addrspace 56) + ; SI: [[TRUNC14:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD14]](s32) ; SI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load 1 + 15, addrspace 56) - ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) - ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) - ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32) - ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) - ; SI: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>) - ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>) + ; SI: [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD15]](s32) + ; SI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC12]](s8), [[TRUNC13]](s8), [[TRUNC14]](s8), [[TRUNC15]](s8) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32), [[MV2]](s32), [[MV3]](s32) + ; SI: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) ; CI-LABEL: name: test_load_private_v16s8_align16 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 56) + ; CI: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 + 1, addrspace 56) + ; CI: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD1]](s32) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 + 2, addrspace 56) + ; CI: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD2]](s32) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 + 3, addrspace 56) - ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD3]](s32) + ; CI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 + 4, addrspace 56) + ; CI: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD4]](s32) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 + 5, addrspace 56) + ; CI: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD5]](s32) ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 + 6, addrspace 56) + ; CI: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD6]](s32) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 + 7, addrspace 56) - ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; CI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>) + ; CI: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD7]](s32) + ; CI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8) ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 + 8, addrspace 56) + ; CI: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD8]](s32) ; CI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 + 9, addrspace 56) + ; CI: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD9]](s32) ; CI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 + 10, addrspace 56) + ; CI: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD10]](s32) ; CI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 + 11, addrspace 56) - ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) - ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) - ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) - ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) - ; CI: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32) - ; CI: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x s32>) + ; CI: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD11]](s32) + ; CI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC8]](s8), [[TRUNC9]](s8), [[TRUNC10]](s8), [[TRUNC11]](s8) ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load 1 + 12, addrspace 56) + ; CI: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD12]](s32) ; CI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load 1 + 13, addrspace 56) + ; CI: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD13]](s32) ; CI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load 1 + 14, addrspace 56) + ; CI: [[TRUNC14:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD14]](s32) ; CI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load 1 + 15, addrspace 56) - ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) - ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) - ; CI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32) - ; CI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) - ; CI: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32) - ; CI: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>) - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>) - ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>) + ; CI: [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD15]](s32) + ; CI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC12]](s8), [[TRUNC13]](s8), [[TRUNC14]](s8), [[TRUNC15]](s8) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32), [[MV2]](s32), [[MV3]](s32) + ; CI: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) ; VI-LABEL: name: test_load_private_v16s8_align16 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 56) + ; VI: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 + 1, addrspace 56) + ; VI: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 + 2, addrspace 56) + ; VI: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD2]](s32) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 + 3, addrspace 56) - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; VI: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD3]](s32) + ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 + 4, addrspace 56) + ; VI: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD4]](s32) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 + 5, addrspace 56) + ; VI: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD5]](s32) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 + 6, addrspace 56) + ; VI: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD6]](s32) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 + 7, addrspace 56) - ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>) + ; VI: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD7]](s32) + ; VI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8) ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 + 8, addrspace 56) + ; VI: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD8]](s32) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 + 9, addrspace 56) + ; VI: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD9]](s32) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 + 10, addrspace 56) + ; VI: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD10]](s32) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 + 11, addrspace 56) - ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) - ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) - ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) - ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) - ; VI: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32) - ; VI: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x s32>) + ; VI: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD11]](s32) + ; VI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC8]](s8), [[TRUNC9]](s8), [[TRUNC10]](s8), [[TRUNC11]](s8) ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; VI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load 1 + 12, addrspace 56) + ; VI: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD12]](s32) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load 1 + 13, addrspace 56) + ; VI: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD13]](s32) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load 1 + 14, addrspace 56) + ; VI: [[TRUNC14:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD14]](s32) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load 1 + 15, addrspace 56) - ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) - ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) - ; VI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32) - ; VI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) - ; VI: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>) + ; VI: [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD15]](s32) + ; VI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC12]](s8), [[TRUNC13]](s8), [[TRUNC14]](s8), [[TRUNC15]](s8) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32), [[MV2]](s32), [[MV3]](s32) + ; VI: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) ; GFX9-LABEL: name: test_load_private_v16s8_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 56) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 + 1, addrspace 56) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD1]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 + 2, addrspace 56) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 + 3, addrspace 56) - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD3]](s32) + ; GFX9: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 + 4, addrspace 56) + ; GFX9: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD4]](s32) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 + 5, addrspace 56) + ; GFX9: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD5]](s32) ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 + 6, addrspace 56) + ; GFX9: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD6]](s32) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 + 7, addrspace 56) - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) - ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS1]](<4 x s16>) + ; GFX9: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD7]](s32) + ; GFX9: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8) ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 + 8, addrspace 56) + ; GFX9: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD8]](s32) ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 + 9, addrspace 56) + ; GFX9: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD9]](s32) ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 + 10, addrspace 56) + ; GFX9: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD10]](s32) ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 + 11, addrspace 56) - ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) - ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) - ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) - ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) - ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS2]](<4 x s16>) + ; GFX9: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD11]](s32) + ; GFX9: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC8]](s8), [[TRUNC9]](s8), [[TRUNC10]](s8), [[TRUNC11]](s8) ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load 1 + 12, addrspace 56) + ; GFX9: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD12]](s32) ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load 1 + 13, addrspace 56) + ; GFX9: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD13]](s32) ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load 1 + 14, addrspace 56) + ; GFX9: [[TRUNC14:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD14]](s32) ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load 1 + 15, addrspace 56) - ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) - ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) - ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32) - ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) - ; GFX9: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS3]](<4 x s16>) - ; GFX9: [[CONCAT_VECTORS4:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<16 x s8>) + ; GFX9: [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD15]](s32) + ; GFX9: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC12]](s8), [[TRUNC13]](s8), [[TRUNC14]](s8), [[TRUNC15]](s8) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32), [[MV2]](s32), [[MV3]](s32) + ; GFX9: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) %0:_(p5) = COPY $vgpr0 %1:_(<16 x s8>) = G_LOAD %0 :: (load 16, align 1, addrspace 56) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir index 97232c2..81408b7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=2 %s -o - | FileCheck %s --- name: test_phi_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir index 40105bd..b6633ad 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir @@ -1,6 +1,17 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -o - %s | FileCheck %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=2 -pass-remarks-missed='gisel*' -o - 2> %t %s | FileCheck -check-prefix=GFX8 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=2 -o - %s | FileCheck -check-prefix=GFX6 %s +# RUN: FileCheck -check-prefixes=ERR %s < %t + +# FIXME: Run with and without unaligned access turned on + +# ERR-NOT: remark +# ERR: remark: :0:0: unable to legalize instruction: %1:_(<2 x s16>) = G_SEXTLOAD %0:_(p1) :: (load 2, addrspace 1) (in function: test_sextload_global_v2i16_from_2) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_SEXTLOAD %0:_(p1) :: (load 2, addrspace 1) (in function: test_sextload_global_v2i32_from_2) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_SEXTLOAD %0:_(p1) :: (load 4, addrspace 1) (in function: test_sextload_global_v2i32_from_4) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s64>) = G_SEXTLOAD %0:_(p1) :: (load 4, addrspace 1) (in function: test_sextload_global_v2i64_from_4) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s64>) = G_SEXTLOAD %0:_(p1) :: (load 8, addrspace 1) (in function: test_sextload_global_v2i64_from_8) +# ERR-NOT: remark --- name: test_sextload_global_i32_i8 @@ -8,10 +19,14 @@ body: | bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_sextload_global_i32_i8 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1) - ; CHECK: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; GFX8-LABEL: name: test_sextload_global_i32_i8 + ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX8: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; GFX6-LABEL: name: test_sextload_global_i32_i8 + ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX6: $vgpr0 = COPY [[SEXTLOAD]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = G_SEXTLOAD %0 :: (load 1, addrspace 1) $vgpr0 = COPY %1 @@ -22,10 +37,14 @@ body: | bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_sextload_global_i32_i16 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; CHECK: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; GFX8-LABEL: name: test_sextload_global_i32_i16 + ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX8: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; GFX6-LABEL: name: test_sextload_global_i32_i16 + ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX6: $vgpr0 = COPY [[SEXTLOAD]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = G_SEXTLOAD %0 :: (load 2, addrspace 1) $vgpr0 = COPY %1 @@ -36,11 +55,16 @@ body: | bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_sextload_global_i31_i8 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SEXTLOAD]](s32) - ; CHECK: $vgpr0 = COPY [[COPY1]](s32) + ; GFX8-LABEL: name: test_sextload_global_i31_i8 + ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SEXTLOAD]](s32) + ; GFX8: $vgpr0 = COPY [[COPY1]](s32) + ; GFX6-LABEL: name: test_sextload_global_i31_i8 + ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SEXTLOAD]](s32) + ; GFX6: $vgpr0 = COPY [[COPY1]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s31) = G_SEXTLOAD %0 :: (load 1, addrspace 1) %2:_(s32) = G_ANYEXT %1 @@ -52,11 +76,16 @@ body: | bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_sextload_global_i64_i8 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[SEXT]](s64) + ; GFX8-LABEL: name: test_sextload_global_i64_i8 + ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX8: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32) + ; GFX8: $vgpr0_vgpr1 = COPY [[SEXT]](s64) + ; GFX6-LABEL: name: test_sextload_global_i64_i8 + ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX6: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32) + ; GFX6: $vgpr0_vgpr1 = COPY [[SEXT]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = G_SEXTLOAD %0 :: (load 1, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -67,11 +96,16 @@ body: | bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_sextload_global_i64_i16 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[SEXT]](s64) + ; GFX8-LABEL: name: test_sextload_global_i64_i16 + ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX8: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32) + ; GFX8: $vgpr0_vgpr1 = COPY [[SEXT]](s64) + ; GFX6-LABEL: name: test_sextload_global_i64_i16 + ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX6: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32) + ; GFX6: $vgpr0_vgpr1 = COPY [[SEXT]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = G_SEXTLOAD %0 :: (load 2, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -82,12 +116,202 @@ body: | bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_sextload_global_i64_i32 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[SEXT]](s64) + ; GFX8-LABEL: name: test_sextload_global_i64_i32 + ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX8: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32) + ; GFX8: $vgpr0_vgpr1 = COPY [[SEXT]](s64) + ; GFX6-LABEL: name: test_sextload_global_i64_i32 + ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX6: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32) + ; GFX6: $vgpr0_vgpr1 = COPY [[SEXT]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = G_SEXTLOAD %0 :: (load 4, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... + +--- +name: test_sextload_global_s32_from_2_align1 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX8-LABEL: name: test_sextload_global_s32_from_2_align1 + ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX8: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX8: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 + 1, addrspace 1) + ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) + ; GFX8: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[OR]](s16) + ; GFX8: $vgpr0 = COPY [[SEXT]](s32) + ; GFX6-LABEL: name: test_sextload_global_s32_from_2_align1 + ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX6: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX6: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 + 1, addrspace 1) + ; GFX6: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] + ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[OR]](s16) + ; GFX6: $vgpr0 = COPY [[SEXT]](s32) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_SEXTLOAD %0 :: (load 2, align 1, addrspace 1) + $vgpr0 = COPY %1 +... + +--- +name: test_sextload_global_s64_from_2_align1 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX8-LABEL: name: test_sextload_global_s64_from_2_align1 + ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX8: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX8: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 + 1, addrspace 1) + ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) + ; GFX8: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; GFX8: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[OR]](s16) + ; GFX8: $vgpr0_vgpr1 = COPY [[SEXT]](s64) + ; GFX6-LABEL: name: test_sextload_global_s64_from_2_align1 + ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX6: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX6: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 + 1, addrspace 1) + ; GFX6: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] + ; GFX6: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[OR]](s16) + ; GFX6: $vgpr0_vgpr1 = COPY [[SEXT]](s64) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s64) = G_SEXTLOAD %0 :: (load 2, align 1, addrspace 1) + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_sextload_global_v2i16_from_2 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX8-LABEL: name: test_sextload_global_v2i16_from_2 + ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(<2 x s16>) = G_SEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX8: $vgpr0 = COPY [[SEXTLOAD]](<2 x s16>) + ; GFX6-LABEL: name: test_sextload_global_v2i16_from_2 + ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(<2 x s16>) = G_SEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX6: $vgpr0 = COPY [[SEXTLOAD]](<2 x s16>) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(<2 x s16>) = G_SEXTLOAD %0 :: (load 2, addrspace 1) + $vgpr0 = COPY %1 +... + +--- +name: test_sextload_global_v2i32_from_2 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX8-LABEL: name: test_sextload_global_v2i32_from_2 + ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_SEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX8: $vgpr0_vgpr1 = COPY [[SEXTLOAD]](<2 x s32>) + ; GFX6-LABEL: name: test_sextload_global_v2i32_from_2 + ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_SEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX6: $vgpr0_vgpr1 = COPY [[SEXTLOAD]](<2 x s32>) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(<2 x s32>) = G_SEXTLOAD %0 :: (load 2, addrspace 1) + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_sextload_global_v2i32_from_4 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX8-LABEL: name: test_sextload_global_v2i32_from_4 + ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_SEXTLOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX8: $vgpr0_vgpr1 = COPY [[SEXTLOAD]](<2 x s32>) + ; GFX6-LABEL: name: test_sextload_global_v2i32_from_4 + ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_SEXTLOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX6: $vgpr0_vgpr1 = COPY [[SEXTLOAD]](<2 x s32>) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(<2 x s32>) = G_SEXTLOAD %0 :: (load 4, addrspace 1) + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_sextload_global_v2i64_from_4 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX8-LABEL: name: test_sextload_global_v2i64_from_4 + ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_SEXTLOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SEXTLOAD]](<2 x s64>) + ; GFX6-LABEL: name: test_sextload_global_v2i64_from_4 + ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_SEXTLOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SEXTLOAD]](<2 x s64>) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(<2 x s64>) = G_SEXTLOAD %0 :: (load 4, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 +... + +--- +name: test_sextload_global_v2i64_from_8 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX8-LABEL: name: test_sextload_global_v2i64_from_8 + ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_SEXTLOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SEXTLOAD]](<2 x s64>) + ; GFX6-LABEL: name: test_sextload_global_v2i64_from_8 + ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_SEXTLOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SEXTLOAD]](<2 x s64>) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(<2 x s64>) = G_SEXTLOAD %0 :: (load 8, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir index 3e48a61..6c2dcfc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir @@ -392,18 +392,16 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[DEF:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF ; SI: [[DEF1:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; SI: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[DEF1]](<4 x s8>) - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[ANYEXT]], [[DEF]](<3 x s8>), 0 - ; SI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[INSERT]](<4 x s16>) - ; SI: G_STORE [[TRUNC]](<4 x s8>), [[COPY]](p1) :: (store 3, align 4, addrspace 1) + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF1]], [[DEF]](<3 x s8>), 0 + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[INSERT]](<4 x s8>) + ; SI: G_STORE [[BITCAST]](s32), [[COPY]](p1) :: (store 3, align 4, addrspace 1) ; VI-LABEL: name: test_store_global_v3s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[DEF:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF ; VI: [[DEF1:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; VI: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[DEF1]](<4 x s8>) - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[ANYEXT]], [[DEF]](<3 x s8>), 0 - ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[INSERT]](<4 x s16>) - ; VI: G_STORE [[TRUNC]](<4 x s8>), [[COPY]](p1) :: (store 3, align 4, addrspace 1) + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF1]], [[DEF]](<3 x s8>), 0 + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[INSERT]](<4 x s8>) + ; VI: G_STORE [[BITCAST]](s32), [[COPY]](p1) :: (store 3, align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s8>) = G_IMPLICIT_DEF G_STORE %1, %0 :: (store 3, addrspace 1, align 4) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir index 3552d0e..80ea672 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir @@ -1,6 +1,17 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -o - %s | FileCheck %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=2 -pass-remarks-missed='gisel*' -o - 2> %t %s | FileCheck -check-prefix=GFX8 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=2 -o - %s | FileCheck -check-prefix=GFX6 %s +# RUN: FileCheck -check-prefixes=ERR %s < %t + +# FIXME: Run with and without unaligned access turned on + +# ERR-NOT: remark +# ERR: remark: :0:0: unable to legalize instruction: %1:_(<2 x s16>) = G_ZEXTLOAD %0:_(p1) :: (load 2, addrspace 1) (in function: test_zextload_global_v2i16_from_2) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_ZEXTLOAD %0:_(p1) :: (load 2, addrspace 1) (in function: test_zextload_global_v2i32_from_2) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_ZEXTLOAD %0:_(p1) :: (load 4, addrspace 1) (in function: test_zextload_global_v2i32_from_4) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s64>) = G_ZEXTLOAD %0:_(p1) :: (load 4, addrspace 1) (in function: test_zextload_global_v2i64_from_4) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s64>) = G_ZEXTLOAD %0:_(p1) :: (load 8, addrspace 1) (in function: test_zextload_global_v2i64_from_8) +# ERR-NOT: remark --- name: test_zextload_global_i32_i8 @@ -8,10 +19,14 @@ body: | bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_zextload_global_i32_i8 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1) - ; CHECK: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; GFX8-LABEL: name: test_zextload_global_i32_i8 + ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX8: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; GFX6-LABEL: name: test_zextload_global_i32_i8 + ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX6: $vgpr0 = COPY [[ZEXTLOAD]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = G_ZEXTLOAD %0 :: (load 1, addrspace 1) $vgpr0 = COPY %1 @@ -22,10 +37,14 @@ body: | bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_zextload_global_i32_i16 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; CHECK: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; GFX8-LABEL: name: test_zextload_global_i32_i16 + ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX8: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; GFX6-LABEL: name: test_zextload_global_i32_i16 + ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX6: $vgpr0 = COPY [[ZEXTLOAD]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = G_ZEXTLOAD %0 :: (load 2, addrspace 1) $vgpr0 = COPY %1 @@ -36,11 +55,16 @@ body: | bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_zextload_global_i31_i8 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[ZEXTLOAD]](s32) - ; CHECK: $vgpr0 = COPY [[COPY1]](s32) + ; GFX8-LABEL: name: test_zextload_global_i31_i8 + ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[ZEXTLOAD]](s32) + ; GFX8: $vgpr0 = COPY [[COPY1]](s32) + ; GFX6-LABEL: name: test_zextload_global_i31_i8 + ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY [[ZEXTLOAD]](s32) + ; GFX6: $vgpr0 = COPY [[COPY1]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s31) = G_ZEXTLOAD %0 :: (load 1, addrspace 1) %2:_(s32) = G_ANYEXT %1 @@ -52,11 +76,16 @@ body: | bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_zextload_global_i64_i8 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; GFX8-LABEL: name: test_zextload_global_i64_i8 + ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX8: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32) + ; GFX8: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; GFX6-LABEL: name: test_zextload_global_i64_i8 + ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX6: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32) + ; GFX6: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = G_ZEXTLOAD %0 :: (load 1, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -67,11 +96,16 @@ body: | bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_zextload_global_i64_i16 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; GFX8-LABEL: name: test_zextload_global_i64_i16 + ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX8: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32) + ; GFX8: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; GFX6-LABEL: name: test_zextload_global_i64_i16 + ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX6: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32) + ; GFX6: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = G_ZEXTLOAD %0 :: (load 2, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -82,12 +116,202 @@ body: | bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_zextload_global_i64_i32 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; GFX8-LABEL: name: test_zextload_global_i64_i32 + ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX8: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32) + ; GFX8: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; GFX6-LABEL: name: test_zextload_global_i64_i32 + ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX6: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32) + ; GFX6: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = G_ZEXTLOAD %0 :: (load 4, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... + +--- +name: test_zextload_global_s32_from_2_align1 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX8-LABEL: name: test_zextload_global_s32_from_2_align1 + ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX8: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX8: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 + 1, addrspace 1) + ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) + ; GFX8: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX8: $vgpr0 = COPY [[ZEXT]](s32) + ; GFX6-LABEL: name: test_zextload_global_s32_from_2_align1 + ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX6: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX6: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 + 1, addrspace 1) + ; GFX6: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX6: $vgpr0 = COPY [[ZEXT]](s32) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_ZEXTLOAD %0 :: (load 2, align 1, addrspace 1) + $vgpr0 = COPY %1 +... + +--- +name: test_zextload_global_s64_from_2_align1 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX8-LABEL: name: test_zextload_global_s64_from_2_align1 + ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX8: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX8: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 + 1, addrspace 1) + ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) + ; GFX8: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; GFX8: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s16) + ; GFX8: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; GFX6-LABEL: name: test_zextload_global_s64_from_2_align1 + ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX6: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX6: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 + 1, addrspace 1) + ; GFX6: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] + ; GFX6: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s16) + ; GFX6: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s64) = G_ZEXTLOAD %0 :: (load 2, align 1, addrspace 1) + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_zextload_global_v2i16_from_2 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX8-LABEL: name: test_zextload_global_v2i16_from_2 + ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s16>) = G_ZEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX8: $vgpr0 = COPY [[ZEXTLOAD]](<2 x s16>) + ; GFX6-LABEL: name: test_zextload_global_v2i16_from_2 + ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s16>) = G_ZEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX6: $vgpr0 = COPY [[ZEXTLOAD]](<2 x s16>) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(<2 x s16>) = G_ZEXTLOAD %0 :: (load 2, addrspace 1) + $vgpr0 = COPY %1 +... + +--- +name: test_zextload_global_v2i32_from_2 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX8-LABEL: name: test_zextload_global_v2i32_from_2 + ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_ZEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX8: $vgpr0_vgpr1 = COPY [[ZEXTLOAD]](<2 x s32>) + ; GFX6-LABEL: name: test_zextload_global_v2i32_from_2 + ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_ZEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX6: $vgpr0_vgpr1 = COPY [[ZEXTLOAD]](<2 x s32>) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(<2 x s32>) = G_ZEXTLOAD %0 :: (load 2, addrspace 1) + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_zextload_global_v2i32_from_4 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX8-LABEL: name: test_zextload_global_v2i32_from_4 + ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_ZEXTLOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX8: $vgpr0_vgpr1 = COPY [[ZEXTLOAD]](<2 x s32>) + ; GFX6-LABEL: name: test_zextload_global_v2i32_from_4 + ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_ZEXTLOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX6: $vgpr0_vgpr1 = COPY [[ZEXTLOAD]](<2 x s32>) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(<2 x s32>) = G_ZEXTLOAD %0 :: (load 4, addrspace 1) + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_zextload_global_v2i64_from_4 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX8-LABEL: name: test_zextload_global_v2i64_from_4 + ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_ZEXTLOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ZEXTLOAD]](<2 x s64>) + ; GFX6-LABEL: name: test_zextload_global_v2i64_from_4 + ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_ZEXTLOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ZEXTLOAD]](<2 x s64>) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(<2 x s64>) = G_ZEXTLOAD %0 :: (load 4, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 +... + +--- +name: test_zextload_global_v2i64_from_8 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX8-LABEL: name: test_zextload_global_v2i64_from_8 + ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_ZEXTLOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ZEXTLOAD]](<2 x s64>) + ; GFX6-LABEL: name: test_zextload_global_v2i64_from_8 + ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_ZEXTLOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ZEXTLOAD]](<2 x s64>) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(<2 x s64>) = G_ZEXTLOAD %0 :: (load 8, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir index eb0d73c..5e8b492 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir @@ -44,7 +44,7 @@ define void @non_power_of_2() { ret void } - define amdgpu_kernel void @load_constant_v4i16_from_6_align8(<3 x i16> addrspace(4)* %ptr0) { + define amdgpu_kernel void @load_constant_v4i16_from_8_align8(<3 x i16> addrspace(4)* %ptr0) { ret void } @@ -188,15 +188,15 @@ body: | ... --- -name: load_constant_v4i16_from_6_align8 +name: load_constant_v4i16_from_8_align8 legalized: true body: | bb.0: - ; CHECK-LABEL: name: load_constant_v4i16_from_6_align8 + ; CHECK-LABEL: name: load_constant_v4i16_from_8_align8 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 6 from %ir.ptr0, align 8, addrspace 4) + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 8 from %ir.ptr0, addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(<4 x s16>) = G_LOAD %0 :: (load 6 from %ir.ptr0, align 8, addrspace 4) + %1:_(<4 x s16>) = G_LOAD %0 :: (load 8 from %ir.ptr0, align 8, addrspace 4) ...