return true;
}
-static bool isZeroOrUndef(int X) {
- return X == 0 || X == -1;
-}
-
-static bool isOneOrUndef(int X) {
- return X == 1 || X == -1;
-}
-
-static bool isZeroOrOneOrUndef(int X) {
- return X == 0 || X == 1 || X == -1;
-}
-
-// Normalize a VOP3P shuffle mask to refer to the low/high half of a single
-// 32-bit register.
-static Register normalizeVOP3PMask(int NewMask[2], Register Src0, Register Src1,
- ArrayRef<int> Mask) {
- NewMask[0] = Mask[0];
- NewMask[1] = Mask[1];
- if (isZeroOrOneOrUndef(Mask[0]) && isZeroOrOneOrUndef(Mask[1]))
- return Src0;
-
- assert(NewMask[0] == 2 || NewMask[0] == 3 || NewMask[0] == -1);
- assert(NewMask[1] == 2 || NewMask[1] == 3 || NewMask[1] == -1);
-
- // Shift the mask inputs to be 0/1;
- NewMask[0] = NewMask[0] == -1 ? -1 : NewMask[0] - 2;
- NewMask[1] = NewMask[1] == -1 ? -1 : NewMask[1] - 2;
- return Src1;
-}
-
-// This is only legal with VOP3P instructions as an aid to op_sel matching.
-bool AMDGPUInstructionSelector::selectG_SHUFFLE_VECTOR(
- MachineInstr &MI) const {
- Register DstReg = MI.getOperand(0).getReg();
- Register Src0Reg = MI.getOperand(1).getReg();
- Register Src1Reg = MI.getOperand(2).getReg();
- ArrayRef<int> ShufMask = MI.getOperand(3).getShuffleMask();
-
- const LLT V2S16 = LLT::fixed_vector(2, 16);
- if (MRI->getType(DstReg) != V2S16 || MRI->getType(Src0Reg) != V2S16)
- return false;
-
- if (!AMDGPU::isLegalVOP3PShuffleMask(ShufMask))
- return false;
-
- assert(ShufMask.size() == 2);
-
- MachineBasicBlock *MBB = MI.getParent();
- const DebugLoc &DL = MI.getDebugLoc();
-
- const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
- const bool IsVALU = DstRB->getID() == AMDGPU::VGPRRegBankID;
- const TargetRegisterClass &RC = IsVALU ?
- AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
-
- // Handle the degenerate case which should have folded out.
- if (ShufMask[0] == -1 && ShufMask[1] == -1) {
- BuildMI(*MBB, MI, DL, TII.get(AMDGPU::IMPLICIT_DEF), DstReg);
-
- MI.eraseFromParent();
- return RBI.constrainGenericRegister(DstReg, RC, *MRI);
- }
-
- // A legal VOP3P mask only reads one of the sources.
- int Mask[2];
- Register SrcVec = normalizeVOP3PMask(Mask, Src0Reg, Src1Reg, ShufMask);
-
- if (!RBI.constrainGenericRegister(DstReg, RC, *MRI) ||
- !RBI.constrainGenericRegister(SrcVec, RC, *MRI))
- return false;
-
- // TODO: This also should have been folded out
- if (isZeroOrUndef(Mask[0]) && isOneOrUndef(Mask[1])) {
- BuildMI(*MBB, MI, DL, TII.get(AMDGPU::COPY), DstReg)
- .addReg(SrcVec);
-
- MI.eraseFromParent();
- return true;
- }
-
- if (Mask[0] == 1 && Mask[1] == -1) {
- if (IsVALU) {
- BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), DstReg)
- .addImm(16)
- .addReg(SrcVec);
- } else {
- BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_LSHR_B32), DstReg)
- .addReg(SrcVec)
- .addImm(16);
- }
- } else if (Mask[0] == -1 && Mask[1] == 0) {
- if (IsVALU) {
- BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), DstReg)
- .addImm(16)
- .addReg(SrcVec);
- } else {
- BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_LSHL_B32), DstReg)
- .addReg(SrcVec)
- .addImm(16);
- }
- } else if (Mask[0] == 0 && Mask[1] == 0) {
- if (IsVALU) {
- if (STI.hasSDWA()) {
- // Write low half of the register into the high half.
- MachineInstr *MovSDWA =
- BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)
- .addImm(0) // $src0_modifiers
- .addReg(SrcVec) // $src0
- .addImm(0) // $clamp
- .addImm(AMDGPU::SDWA::WORD_1) // $dst_sel
- .addImm(AMDGPU::SDWA::UNUSED_PRESERVE) // $dst_unused
- .addImm(AMDGPU::SDWA::WORD_0) // $src0_sel
- .addReg(SrcVec, RegState::Implicit);
- MovSDWA->tieOperands(0, MovSDWA->getNumOperands() - 1);
- } else {
- Register TmpReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg)
- .addImm(0xFFFF)
- .addReg(SrcVec);
- BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), DstReg)
- .addReg(TmpReg)
- .addImm(16)
- .addReg(TmpReg);
- }
- } else {
- BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_PACK_LL_B32_B16), DstReg)
- .addReg(SrcVec)
- .addReg(SrcVec);
- }
- } else if (Mask[0] == 1 && Mask[1] == 1) {
- if (IsVALU) {
- if (STI.hasSDWA()) {
- // Write high half of the register into the low half.
- MachineInstr *MovSDWA =
- BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)
- .addImm(0) // $src0_modifiers
- .addReg(SrcVec) // $src0
- .addImm(0) // $clamp
- .addImm(AMDGPU::SDWA::WORD_0) // $dst_sel
- .addImm(AMDGPU::SDWA::UNUSED_PRESERVE) // $dst_unused
- .addImm(AMDGPU::SDWA::WORD_1) // $src0_sel
- .addReg(SrcVec, RegState::Implicit);
- MovSDWA->tieOperands(0, MovSDWA->getNumOperands() - 1);
- } else {
- Register TmpReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), TmpReg)
- .addImm(16)
- .addReg(SrcVec);
- BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), DstReg)
- .addReg(TmpReg)
- .addImm(16)
- .addReg(TmpReg);
- }
- } else {
- BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_PACK_HH_B32_B16), DstReg)
- .addReg(SrcVec)
- .addReg(SrcVec);
- }
- } else if (Mask[0] == 1 && Mask[1] == 0) {
- if (IsVALU) {
- BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_ALIGNBIT_B32_e64), DstReg)
- .addReg(SrcVec)
- .addReg(SrcVec)
- .addImm(16);
- } else {
- if (STI.hasSPackHL()) {
- BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_PACK_HL_B32_B16), DstReg)
- .addReg(SrcVec)
- .addReg(SrcVec);
- } else {
- Register TmpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
- BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_LSHR_B32), TmpReg)
- .addReg(SrcVec)
- .addImm(16);
- BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_PACK_LL_B32_B16), DstReg)
- .addReg(TmpReg)
- .addReg(SrcVec);
- }
- }
- } else
- llvm_unreachable("all shuffle masks should be handled");
-
- MI.eraseFromParent();
- return true;
-}
-
bool AMDGPUInstructionSelector::selectBufferLoadLds(MachineInstr &MI) const {
unsigned Opc;
unsigned Size = MI.getOperand(3).getImm();
return selectG_EXTRACT_VECTOR_ELT(I);
case TargetOpcode::G_INSERT_VECTOR_ELT:
return selectG_INSERT_VECTOR_ELT(I);
- case TargetOpcode::G_SHUFFLE_VECTOR:
- return selectG_SHUFFLE_VECTOR(I);
case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
+++ /dev/null
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=instruction-select -o - %s | FileCheck -check-prefix=GFX9 %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=instruction-select -o - %s | FileCheck -check-prefix=GFX9 %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -run-pass=instruction-select -o - %s | FileCheck -check-prefix=GFX11 %s
-
----
-name: v_shufflevector_v2s16_v2s16_u_u
-tracksRegLiveness: true
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $vgpr0, $vgpr1
-
- ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_u_u
- ; GFX9: liveins: $vgpr0, $vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX9-NEXT: $vgpr0 = COPY [[DEF]]
- ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_u_u
- ; GFX11: liveins: $vgpr0, $vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX11-NEXT: $vgpr0 = COPY [[DEF]]
- %0:vgpr(<2 x s16>) = COPY $vgpr0
- %1:vgpr(<2 x s16>) = COPY $vgpr1
- %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, undef)
- $vgpr0 = COPY %2
-
-...
-
----
-name: v_shufflevector_v2s16_v2s16_0_u
-tracksRegLiveness: true
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $vgpr0, $vgpr1
-
- ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_0_u
- ; GFX9: liveins: $vgpr0, $vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX9-NEXT: $vgpr0 = COPY [[COPY]]
- ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_0_u
- ; GFX11: liveins: $vgpr0, $vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-NEXT: $vgpr0 = COPY [[COPY]]
- %0:vgpr(<2 x s16>) = COPY $vgpr0
- %1:vgpr(<2 x s16>) = COPY $vgpr1
- %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, undef)
- $vgpr0 = COPY %2
-
-...
-
----
-name: v_shufflevector_v2s16_v2s16_u_0
-tracksRegLiveness: true
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $vgpr0, $vgpr1
-
- ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_u_0
- ; GFX9: liveins: $vgpr0, $vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX9-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, [[COPY]], implicit $exec
- ; GFX9-NEXT: $vgpr0 = COPY [[V_LSHLREV_B32_e64_]]
- ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_u_0
- ; GFX11: liveins: $vgpr0, $vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, [[COPY]], implicit $exec
- ; GFX11-NEXT: $vgpr0 = COPY [[V_LSHLREV_B32_e64_]]
- %0:vgpr(<2 x s16>) = COPY $vgpr0
- %1:vgpr(<2 x s16>) = COPY $vgpr1
- %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 0)
- $vgpr0 = COPY %2
-
-...
-
----
-name: v_shufflevector_v2s16_v2s16_1_u
-tracksRegLiveness: true
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $vgpr0, $vgpr1
-
- ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_1_u
- ; GFX9: liveins: $vgpr0, $vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX9-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[COPY]], implicit $exec
- ; GFX9-NEXT: $vgpr0 = COPY [[V_LSHRREV_B32_e64_]]
- ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_1_u
- ; GFX11: liveins: $vgpr0, $vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[COPY]], implicit $exec
- ; GFX11-NEXT: $vgpr0 = COPY [[V_LSHRREV_B32_e64_]]
- %0:vgpr(<2 x s16>) = COPY $vgpr0
- %1:vgpr(<2 x s16>) = COPY $vgpr1
- %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1, undef)
- $vgpr0 = COPY %2
-
-...
-
----
-name: v_shufflevector_v2s16_v2s16_u_1
-tracksRegLiveness: true
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $vgpr0, $vgpr1
-
- ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_u_1
- ; GFX9: liveins: $vgpr0, $vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX9-NEXT: $vgpr0 = COPY [[COPY]]
- ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_u_1
- ; GFX11: liveins: $vgpr0, $vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-NEXT: $vgpr0 = COPY [[COPY]]
- %0:vgpr(<2 x s16>) = COPY $vgpr0
- %1:vgpr(<2 x s16>) = COPY $vgpr1
- %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 1)
- $vgpr0 = COPY %2
-
-...
-
-
----
-name: v_shufflevector_v2s16_v2s16_2_u
-tracksRegLiveness: true
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $vgpr0, $vgpr1
-
- ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_2_u
- ; GFX9: liveins: $vgpr0, $vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX9-NEXT: $vgpr0 = COPY [[COPY]]
- ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_2_u
- ; GFX11: liveins: $vgpr0, $vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: $vgpr0 = COPY [[COPY]]
- %0:vgpr(<2 x s16>) = COPY $vgpr0
- %1:vgpr(<2 x s16>) = COPY $vgpr1
- %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(2, undef)
- $vgpr0 = COPY %2
-
-...
-
----
-name: v_shufflevector_v2s16_v2s16_u_2
-tracksRegLiveness: true
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $vgpr0, $vgpr1
-
- ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_u_2
- ; GFX9: liveins: $vgpr0, $vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX9-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, [[COPY]], implicit $exec
- ; GFX9-NEXT: $vgpr0 = COPY [[V_LSHLREV_B32_e64_]]
- ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_u_2
- ; GFX11: liveins: $vgpr0, $vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, [[COPY]], implicit $exec
- ; GFX11-NEXT: $vgpr0 = COPY [[V_LSHLREV_B32_e64_]]
- %0:vgpr(<2 x s16>) = COPY $vgpr0
- %1:vgpr(<2 x s16>) = COPY $vgpr1
- %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 2)
- $vgpr0 = COPY %2
-
-...
-
----
-name: v_shufflevector_v2s16_v2s16_3_u
-tracksRegLiveness: true
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $vgpr0, $vgpr1
-
- ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_3_u
- ; GFX9: liveins: $vgpr0, $vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX9-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[COPY]], implicit $exec
- ; GFX9-NEXT: $vgpr0 = COPY [[V_LSHRREV_B32_e64_]]
- ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_3_u
- ; GFX11: liveins: $vgpr0, $vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[COPY]], implicit $exec
- ; GFX11-NEXT: $vgpr0 = COPY [[V_LSHRREV_B32_e64_]]
- %0:vgpr(<2 x s16>) = COPY $vgpr0
- %1:vgpr(<2 x s16>) = COPY $vgpr1
- %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(3, undef)
- $vgpr0 = COPY %2
-
-...
-
----
-name: v_shufflevector_v2s16_v2s16_u_3
-tracksRegLiveness: true
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $vgpr0, $vgpr1
-
- ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_u_3
- ; GFX9: liveins: $vgpr0, $vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX9-NEXT: $vgpr0 = COPY [[COPY]]
- ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_u_3
- ; GFX11: liveins: $vgpr0, $vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: $vgpr0 = COPY [[COPY]]
- %0:vgpr(<2 x s16>) = COPY $vgpr0
- %1:vgpr(<2 x s16>) = COPY $vgpr1
- %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 3)
- $vgpr0 = COPY %2
-
-...
-
----
-name: v_shufflevector_v2s16_v2s16_0_0
-tracksRegLiveness: true
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $vgpr0, $vgpr1
-
- ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_0_0
- ; GFX9: liveins: $vgpr0, $vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX9-NEXT: [[V_MOV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_MOV_B32_sdwa 0, [[COPY]], 0, 5, 2, 4, implicit $exec, implicit [[COPY]](tied-def 0)
- ; GFX9-NEXT: $vgpr0 = COPY [[V_MOV_B32_sdwa]]
- ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_0_0
- ; GFX11: liveins: $vgpr0, $vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 65535, [[COPY]], implicit $exec
- ; GFX11-NEXT: [[V_LSHL_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 [[V_AND_B32_e32_]], 16, [[V_AND_B32_e32_]], implicit $exec
- ; GFX11-NEXT: $vgpr0 = COPY [[V_LSHL_OR_B32_e64_]]
- %0:vgpr(<2 x s16>) = COPY $vgpr0
- %1:vgpr(<2 x s16>) = COPY $vgpr1
- %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 0)
- $vgpr0 = COPY %2
-
-...
-
----
-name: v_shufflevector_v2s16_v2s16_0_1
-tracksRegLiveness: true
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $vgpr0, $vgpr1
-
- ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_0_1
- ; GFX9: liveins: $vgpr0, $vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX9-NEXT: $vgpr0 = COPY [[COPY]]
- ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_0_1
- ; GFX11: liveins: $vgpr0, $vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-NEXT: $vgpr0 = COPY [[COPY]]
- %0:vgpr(<2 x s16>) = COPY $vgpr0
- %1:vgpr(<2 x s16>) = COPY $vgpr1
- %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 1)
- $vgpr0 = COPY %2
-
-...
-
----
-name: v_shufflevector_v2s16_v2s16_1_0
-tracksRegLiveness: true
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $vgpr0, $vgpr1
-
- ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_1_0
- ; GFX9: liveins: $vgpr0, $vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX9-NEXT: [[V_ALIGNBIT_B32_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 [[COPY]], [[COPY]], 16, implicit $exec
- ; GFX9-NEXT: $vgpr0 = COPY [[V_ALIGNBIT_B32_e64_]]
- ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_1_0
- ; GFX11: liveins: $vgpr0, $vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-NEXT: [[V_ALIGNBIT_B32_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 [[COPY]], [[COPY]], 16, implicit $exec
- ; GFX11-NEXT: $vgpr0 = COPY [[V_ALIGNBIT_B32_e64_]]
- %0:vgpr(<2 x s16>) = COPY $vgpr0
- %1:vgpr(<2 x s16>) = COPY $vgpr1
- %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1, 0)
- $vgpr0 = COPY %2
-
-...
-
----
-name: v_shufflevector_v2s16_v2s16_1_1
-tracksRegLiveness: true
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $vgpr0, $vgpr1
-
- ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_1_1
- ; GFX9: liveins: $vgpr0, $vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX9-NEXT: [[V_MOV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_MOV_B32_sdwa 0, [[COPY]], 0, 4, 2, 5, implicit $exec, implicit [[COPY]](tied-def 0)
- ; GFX9-NEXT: $vgpr0 = COPY [[V_MOV_B32_sdwa]]
- ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_1_1
- ; GFX11: liveins: $vgpr0, $vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[COPY]], implicit $exec
- ; GFX11-NEXT: [[V_LSHL_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 [[V_LSHRREV_B32_e64_]], 16, [[V_LSHRREV_B32_e64_]], implicit $exec
- ; GFX11-NEXT: $vgpr0 = COPY [[V_LSHL_OR_B32_e64_]]
- %0:vgpr(<2 x s16>) = COPY $vgpr0
- %1:vgpr(<2 x s16>) = COPY $vgpr1
- %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1, 1)
- $vgpr0 = COPY %2
-
-...
-
----
-name: v_shufflevector_v2s16_v2s16_2_2
-tracksRegLiveness: true
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $vgpr0, $vgpr1
-
- ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_2_2
- ; GFX9: liveins: $vgpr0, $vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX9-NEXT: [[V_MOV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_MOV_B32_sdwa 0, [[COPY]], 0, 5, 2, 4, implicit $exec, implicit [[COPY]](tied-def 0)
- ; GFX9-NEXT: $vgpr0 = COPY [[V_MOV_B32_sdwa]]
- ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_2_2
- ; GFX11: liveins: $vgpr0, $vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 65535, [[COPY]], implicit $exec
- ; GFX11-NEXT: [[V_LSHL_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 [[V_AND_B32_e32_]], 16, [[V_AND_B32_e32_]], implicit $exec
- ; GFX11-NEXT: $vgpr0 = COPY [[V_LSHL_OR_B32_e64_]]
- %0:vgpr(<2 x s16>) = COPY $vgpr0
- %1:vgpr(<2 x s16>) = COPY $vgpr1
- %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(2, 2)
- $vgpr0 = COPY %2
-
-...
-
----
-name: v_shufflevector_v2s16_v2s16_2_3
-tracksRegLiveness: true
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $vgpr0, $vgpr1
-
- ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_2_3
- ; GFX9: liveins: $vgpr0, $vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX9-NEXT: $vgpr0 = COPY [[COPY]]
- ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_2_3
- ; GFX11: liveins: $vgpr0, $vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: $vgpr0 = COPY [[COPY]]
- %0:vgpr(<2 x s16>) = COPY $vgpr0
- %1:vgpr(<2 x s16>) = COPY $vgpr1
- %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(2, 3)
- $vgpr0 = COPY %2
-
-...
-
----
-name: v_shufflevector_v2s16_v2s16_3_2
-tracksRegLiveness: true
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $vgpr0, $vgpr1
-
- ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_3_2
- ; GFX9: liveins: $vgpr0, $vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX9-NEXT: [[V_ALIGNBIT_B32_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 [[COPY]], [[COPY]], 16, implicit $exec
- ; GFX9-NEXT: $vgpr0 = COPY [[V_ALIGNBIT_B32_e64_]]
- ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_3_2
- ; GFX11: liveins: $vgpr0, $vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_ALIGNBIT_B32_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 [[COPY]], [[COPY]], 16, implicit $exec
- ; GFX11-NEXT: $vgpr0 = COPY [[V_ALIGNBIT_B32_e64_]]
- %0:vgpr(<2 x s16>) = COPY $vgpr0
- %1:vgpr(<2 x s16>) = COPY $vgpr1
- %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(3, 2)
- $vgpr0 = COPY %2
-
-...
-
----
-name: v_shufflevector_v2s16_v2s16_3_3
-tracksRegLiveness: true
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $vgpr0, $vgpr1
-
- ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_3_3
- ; GFX9: liveins: $vgpr0, $vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX9-NEXT: [[V_MOV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_MOV_B32_sdwa 0, [[COPY]], 0, 4, 2, 5, implicit $exec, implicit [[COPY]](tied-def 0)
- ; GFX9-NEXT: $vgpr0 = COPY [[V_MOV_B32_sdwa]]
- ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_3_3
- ; GFX11: liveins: $vgpr0, $vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[COPY]], implicit $exec
- ; GFX11-NEXT: [[V_LSHL_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 [[V_LSHRREV_B32_e64_]], 16, [[V_LSHRREV_B32_e64_]], implicit $exec
- ; GFX11-NEXT: $vgpr0 = COPY [[V_LSHL_OR_B32_e64_]]
- %0:vgpr(<2 x s16>) = COPY $vgpr0
- %1:vgpr(<2 x s16>) = COPY $vgpr1
- %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(3, 3)
- $vgpr0 = COPY %2
-
-...
-
----
-name: s_shufflevector_v2s16_v2s16_u_u
-tracksRegLiveness: true
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $sgpr0, $sgpr1
-
- ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_u_u
- ; GFX9: liveins: $sgpr0, $sgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
- ; GFX9-NEXT: $sgpr0 = COPY [[DEF]]
- ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_u_u
- ; GFX11: liveins: $sgpr0, $sgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
- ; GFX11-NEXT: $sgpr0 = COPY [[DEF]]
- %0:sgpr(<2 x s16>) = COPY $sgpr0
- %1:sgpr(<2 x s16>) = COPY $sgpr1
- %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, undef)
- $sgpr0 = COPY %2
-
-...
-
----
-name: s_shufflevector_v2s16_v2s16_0_u
-tracksRegLiveness: true
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $sgpr0, $sgpr1
-
- ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_0_u
- ; GFX9: liveins: $sgpr0, $sgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GFX9-NEXT: $sgpr0 = COPY [[COPY]]
- ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_0_u
- ; GFX11: liveins: $sgpr0, $sgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GFX11-NEXT: $sgpr0 = COPY [[COPY]]
- %0:sgpr(<2 x s16>) = COPY $sgpr0
- %1:sgpr(<2 x s16>) = COPY $sgpr1
- %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, undef)
- $sgpr0 = COPY %2
-
-...
-
----
-name: s_shufflevector_v2s16_v2s16_u_0
-tracksRegLiveness: true
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $sgpr0, $sgpr1
-
- ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_u_0
- ; GFX9: liveins: $sgpr0, $sgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GFX9-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], 16, implicit-def $scc
- ; GFX9-NEXT: $sgpr0 = COPY [[S_LSHL_B32_]]
- ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_u_0
- ; GFX11: liveins: $sgpr0, $sgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GFX11-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], 16, implicit-def $scc
- ; GFX11-NEXT: $sgpr0 = COPY [[S_LSHL_B32_]]
- %0:sgpr(<2 x s16>) = COPY $sgpr0
- %1:sgpr(<2 x s16>) = COPY $sgpr1
- %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 0)
- $sgpr0 = COPY %2
-
-...
-
----
-name: s_shufflevector_v2s16_v2s16_1_u
-tracksRegLiveness: true
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $sgpr0, $sgpr1
-
- ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_1_u
- ; GFX9: liveins: $sgpr0, $sgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GFX9-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], 16, implicit-def $scc
- ; GFX9-NEXT: $sgpr0 = COPY [[S_LSHR_B32_]]
- ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_1_u
- ; GFX11: liveins: $sgpr0, $sgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GFX11-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], 16, implicit-def $scc
- ; GFX11-NEXT: $sgpr0 = COPY [[S_LSHR_B32_]]
- %0:sgpr(<2 x s16>) = COPY $sgpr0
- %1:sgpr(<2 x s16>) = COPY $sgpr1
- %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1, undef)
- $sgpr0 = COPY %2
-
-...
-
----
-name: s_shufflevector_v2s16_v2s16_u_1
-tracksRegLiveness: true
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $sgpr0, $sgpr1
-
- ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_u_1
- ; GFX9: liveins: $sgpr0, $sgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GFX9-NEXT: $sgpr0 = COPY [[COPY]]
- ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_u_1
- ; GFX11: liveins: $sgpr0, $sgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GFX11-NEXT: $sgpr0 = COPY [[COPY]]
- %0:sgpr(<2 x s16>) = COPY $sgpr0
- %1:sgpr(<2 x s16>) = COPY $sgpr1
- %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 1)
- $sgpr0 = COPY %2
-
-...
-
-
----
-name: s_shufflevector_v2s16_v2s16_2_u
-tracksRegLiveness: true
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $sgpr0, $sgpr1
-
- ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_2_u
- ; GFX9: liveins: $sgpr0, $sgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX9-NEXT: $sgpr0 = COPY [[COPY]]
- ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_2_u
- ; GFX11: liveins: $sgpr0, $sgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX11-NEXT: $sgpr0 = COPY [[COPY]]
- %0:sgpr(<2 x s16>) = COPY $sgpr0
- %1:sgpr(<2 x s16>) = COPY $sgpr1
- %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(2, undef)
- $sgpr0 = COPY %2
-
-...
-
----
-name: s_shufflevector_v2s16_v2s16_u_2
-tracksRegLiveness: true
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $sgpr0, $sgpr1
-
- ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_u_2
- ; GFX9: liveins: $sgpr0, $sgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX9-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], 16, implicit-def $scc
- ; GFX9-NEXT: $sgpr0 = COPY [[S_LSHL_B32_]]
- ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_u_2
- ; GFX11: liveins: $sgpr0, $sgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX11-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], 16, implicit-def $scc
- ; GFX11-NEXT: $sgpr0 = COPY [[S_LSHL_B32_]]
- %0:sgpr(<2 x s16>) = COPY $sgpr0
- %1:sgpr(<2 x s16>) = COPY $sgpr1
- %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 2)
- $sgpr0 = COPY %2
-
-...
-
----
-name: s_shufflevector_v2s16_v2s16_3_u
-tracksRegLiveness: true
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $sgpr0, $sgpr1
-
- ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_3_u
- ; GFX9: liveins: $sgpr0, $sgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX9-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], 16, implicit-def $scc
- ; GFX9-NEXT: $sgpr0 = COPY [[S_LSHR_B32_]]
- ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_3_u
- ; GFX11: liveins: $sgpr0, $sgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX11-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], 16, implicit-def $scc
- ; GFX11-NEXT: $sgpr0 = COPY [[S_LSHR_B32_]]
- %0:sgpr(<2 x s16>) = COPY $sgpr0
- %1:sgpr(<2 x s16>) = COPY $sgpr1
- %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(3, undef)
- $sgpr0 = COPY %2
-
-...
-
----
-name: s_shufflevector_v2s16_v2s16_u_3
-tracksRegLiveness: true
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $sgpr0, $sgpr1
-
- ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_u_3
- ; GFX9: liveins: $sgpr0, $sgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX9-NEXT: $sgpr0 = COPY [[COPY]]
- ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_u_3
- ; GFX11: liveins: $sgpr0, $sgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX11-NEXT: $sgpr0 = COPY [[COPY]]
- %0:sgpr(<2 x s16>) = COPY $sgpr0
- %1:sgpr(<2 x s16>) = COPY $sgpr1
- %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 3)
- $sgpr0 = COPY %2
-
-...
-
----
-name: s_shufflevector_v2s16_v2s16_0_0
-tracksRegLiveness: true
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $sgpr0, $sgpr1
-
- ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_0_0
- ; GFX9: liveins: $sgpr0, $sgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GFX9-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[COPY]]
- ; GFX9-NEXT: $sgpr0 = COPY [[S_PACK_LL_B32_B16_]]
- ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_0_0
- ; GFX11: liveins: $sgpr0, $sgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GFX11-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[COPY]]
- ; GFX11-NEXT: $sgpr0 = COPY [[S_PACK_LL_B32_B16_]]
- %0:sgpr(<2 x s16>) = COPY $sgpr0
- %1:sgpr(<2 x s16>) = COPY $sgpr1
- %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 0)
- $sgpr0 = COPY %2
-
-...
-
----
-name: s_shufflevector_v2s16_v2s16_0_1
-tracksRegLiveness: true
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $sgpr0, $sgpr1
-
- ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_0_1
- ; GFX9: liveins: $sgpr0, $sgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GFX9-NEXT: $sgpr0 = COPY [[COPY]]
- ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_0_1
- ; GFX11: liveins: $sgpr0, $sgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GFX11-NEXT: $sgpr0 = COPY [[COPY]]
- %0:sgpr(<2 x s16>) = COPY $sgpr0
- %1:sgpr(<2 x s16>) = COPY $sgpr1
- %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 1)
- $sgpr0 = COPY %2
-
-...
-
----
-name: s_shufflevector_v2s16_v2s16_1_0
-tracksRegLiveness: true
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $sgpr0, $sgpr1
-
- ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_1_0
- ; GFX9: liveins: $sgpr0, $sgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GFX9-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], 16, implicit-def $scc
- ; GFX9-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_LSHR_B32_]], [[COPY]]
- ; GFX9-NEXT: $sgpr0 = COPY [[S_PACK_LL_B32_B16_]]
- ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_1_0
- ; GFX11: liveins: $sgpr0, $sgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GFX11-NEXT: [[S_PACK_HL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_HL_B32_B16 [[COPY]], [[COPY]]
- ; GFX11-NEXT: $sgpr0 = COPY [[S_PACK_HL_B32_B16_]]
- %0:sgpr(<2 x s16>) = COPY $sgpr0
- %1:sgpr(<2 x s16>) = COPY $sgpr1
- %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1, 0)
- $sgpr0 = COPY %2
-
-...
-
----
-name: s_shufflevector_v2s16_v2s16_1_1
-tracksRegLiveness: true
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $sgpr0, $sgpr1
-
- ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_1_1
- ; GFX9: liveins: $sgpr0, $sgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GFX9-NEXT: [[S_PACK_HH_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_HH_B32_B16 [[COPY]], [[COPY]]
- ; GFX9-NEXT: $sgpr0 = COPY [[S_PACK_HH_B32_B16_]]
- ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_1_1
- ; GFX11: liveins: $sgpr0, $sgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GFX11-NEXT: [[S_PACK_HH_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_HH_B32_B16 [[COPY]], [[COPY]]
- ; GFX11-NEXT: $sgpr0 = COPY [[S_PACK_HH_B32_B16_]]
- %0:sgpr(<2 x s16>) = COPY $sgpr0
- %1:sgpr(<2 x s16>) = COPY $sgpr1
- %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1, 1)
- $sgpr0 = COPY %2
-
-...
-
----
-name: s_shufflevector_v2s16_v2s16_2_2
-tracksRegLiveness: true
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $sgpr0, $sgpr1
-
- ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_2_2
- ; GFX9: liveins: $sgpr0, $sgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX9-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[COPY]]
- ; GFX9-NEXT: $sgpr0 = COPY [[S_PACK_LL_B32_B16_]]
- ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_2_2
- ; GFX11: liveins: $sgpr0, $sgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX11-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[COPY]]
- ; GFX11-NEXT: $sgpr0 = COPY [[S_PACK_LL_B32_B16_]]
- %0:sgpr(<2 x s16>) = COPY $sgpr0
- %1:sgpr(<2 x s16>) = COPY $sgpr1
- %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(2, 2)
- $sgpr0 = COPY %2
-
-...
-
----
-name: s_shufflevector_v2s16_v2s16_2_3
-tracksRegLiveness: true
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $sgpr0, $sgpr1
-
- ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_2_3
- ; GFX9: liveins: $sgpr0, $sgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX9-NEXT: $sgpr0 = COPY [[COPY]]
- ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_2_3
- ; GFX11: liveins: $sgpr0, $sgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX11-NEXT: $sgpr0 = COPY [[COPY]]
- %0:sgpr(<2 x s16>) = COPY $sgpr0
- %1:sgpr(<2 x s16>) = COPY $sgpr1
- %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(2, 3)
- $sgpr0 = COPY %2
-
-...
-
----
-name: s_shufflevector_v2s16_v2s16_3_2
-tracksRegLiveness: true
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $sgpr0, $sgpr1
-
- ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_3_2
- ; GFX9: liveins: $sgpr0, $sgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX9-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], 16, implicit-def $scc
- ; GFX9-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_LSHR_B32_]], [[COPY]]
- ; GFX9-NEXT: $sgpr0 = COPY [[S_PACK_LL_B32_B16_]]
- ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_3_2
- ; GFX11: liveins: $sgpr0, $sgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX11-NEXT: [[S_PACK_HL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_HL_B32_B16 [[COPY]], [[COPY]]
- ; GFX11-NEXT: $sgpr0 = COPY [[S_PACK_HL_B32_B16_]]
- %0:sgpr(<2 x s16>) = COPY $sgpr0
- %1:sgpr(<2 x s16>) = COPY $sgpr1
- %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(3, 2)
- $sgpr0 = COPY %2
-
-...
-
----
-name: s_shufflevector_v2s16_v2s16_3_3
-tracksRegLiveness: true
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $sgpr0, $sgpr1
-
- ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_3_3
- ; GFX9: liveins: $sgpr0, $sgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX9-NEXT: [[S_PACK_HH_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_HH_B32_B16 [[COPY]], [[COPY]]
- ; GFX9-NEXT: $sgpr0 = COPY [[S_PACK_HH_B32_B16_]]
- ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_3_3
- ; GFX11: liveins: $sgpr0, $sgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GFX11-NEXT: [[S_PACK_HH_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_HH_B32_B16 [[COPY]], [[COPY]]
- ; GFX11-NEXT: $sgpr0 = COPY [[S_PACK_HH_B32_B16_]]
- %0:sgpr(<2 x s16>) = COPY $sgpr0
- %1:sgpr(<2 x s16>) = COPY $sgpr1
- %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(3, 3)
- $sgpr0 = COPY %2
-
-...
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s16>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s16>), [[COPY1]], shufflemask(undef, undef)
- ; GFX9-NEXT: $vgpr0 = COPY [[SHUF]](<2 x s16>)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
%0:_(<2 x s16>) = COPY $vgpr0
%1:_(<2 x s16>) = COPY $vgpr1
%2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, undef)
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s16>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s16>), [[COPY1]], shufflemask(0, undef)
- ; GFX9-NEXT: $vgpr0 = COPY [[SHUF]](<2 x s16>)
+ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
%0:_(<2 x s16>) = COPY $vgpr0
%1:_(<2 x s16>) = COPY $vgpr1
%2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, undef)
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s16>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s16>), [[COPY1]], shufflemask(undef, 0)
- ; GFX9-NEXT: $vgpr0 = COPY [[SHUF]](<2 x s16>)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[TRUNC]](s16)
+ ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
%0:_(<2 x s16>) = COPY $vgpr0
%1:_(<2 x s16>) = COPY $vgpr1
%2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 0)
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s16>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s16>), [[COPY1]], shufflemask(0, 1)
- ; GFX9-NEXT: $vgpr0 = COPY [[SHUF]](<2 x s16>)
+ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+ ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
+ ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
+ ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
%0:_(<2 x s16>) = COPY $vgpr0
%1:_(<2 x s16>) = COPY $vgpr1
%2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 1)
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s16>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s16>), [[COPY1]], shufflemask(1, 0)
- ; GFX9-NEXT: $vgpr0 = COPY [[SHUF]](<2 x s16>)
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
+ ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
+ ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
%0:_(<2 x s16>) = COPY $vgpr0
%1:_(<2 x s16>) = COPY $vgpr1
%2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1, 0)
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s16>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s16>), [[COPY1]], shufflemask(0, 0)
- ; GFX9-NEXT: $vgpr0 = COPY [[SHUF]](<2 x s16>)
+ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
+ ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
%0:_(<2 x s16>) = COPY $vgpr0
%1:_(<2 x s16>) = COPY $vgpr1
%2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 0)
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s16>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s16>), [[COPY1]], shufflemask(1, 1)
- ; GFX9-NEXT: $vgpr0 = COPY [[SHUF]](<2 x s16>)
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
+ ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
%0:_(<2 x s16>) = COPY $vgpr0
%1:_(<2 x s16>) = COPY $vgpr1
%2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1, 1)
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s16>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s16>), [[COPY1]], shufflemask(2, 2)
- ; GFX9-NEXT: $vgpr0 = COPY [[SHUF]](<2 x s16>)
+ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
+ ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
%0:_(<2 x s16>) = COPY $vgpr0
%1:_(<2 x s16>) = COPY $vgpr1
%2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(2, 2)
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s16>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s16>), [[COPY1]], shufflemask(2, undef)
- ; GFX9-NEXT: $vgpr0 = COPY [[SHUF]](<2 x s16>)
+ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
%0:_(<2 x s16>) = COPY $vgpr0
%1:_(<2 x s16>) = COPY $vgpr1
%2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(2, undef)
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s16>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s16>), [[COPY1]], shufflemask(undef, 2)
- ; GFX9-NEXT: $vgpr0 = COPY [[SHUF]](<2 x s16>)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[TRUNC]](s16)
+ ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
%0:_(<2 x s16>) = COPY $vgpr0
%1:_(<2 x s16>) = COPY $vgpr1
%2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 2)
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s16>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s16>), [[COPY1]], shufflemask(2, 3)
- ; GFX9-NEXT: $vgpr0 = COPY [[SHUF]](<2 x s16>)
+ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+ ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
+ ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
+ ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
%0:_(<2 x s16>) = COPY $vgpr0
%1:_(<2 x s16>) = COPY $vgpr1
%2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(2, 3)
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s16>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s16>), [[COPY1]], shufflemask(3, 2)
- ; GFX9-NEXT: $vgpr0 = COPY [[SHUF]](<2 x s16>)
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
+ ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
+ ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
%0:_(<2 x s16>) = COPY $vgpr0
%1:_(<2 x s16>) = COPY $vgpr1
%2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(3, 2)
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s16>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s16>), [[COPY1]], shufflemask(undef, 3)
- ; GFX9-NEXT: $vgpr0 = COPY [[SHUF]](<2 x s16>)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[TRUNC]](s16)
+ ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
%0:_(<2 x s16>) = COPY $vgpr0
%1:_(<2 x s16>) = COPY $vgpr1
%2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 3)
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s16>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s16>), [[COPY1]], shufflemask(3, undef)
- ; GFX9-NEXT: $vgpr0 = COPY [[SHUF]](<2 x s16>)
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
%0:_(<2 x s16>) = COPY $vgpr0
%1:_(<2 x s16>) = COPY $vgpr1
%2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(3, undef)
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s16>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s16>), [[COPY1]], shufflemask(3, 3)
- ; GFX9-NEXT: $vgpr0 = COPY [[SHUF]](<2 x s16>)
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
+ ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
%0:_(<2 x s16>) = COPY $vgpr0
%1:_(<2 x s16>) = COPY $vgpr1
%2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(3, 3)
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s16>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s16>), [[COPY1]], shufflemask(0, 0)
- ; GFX9-NEXT: $vgpr0 = COPY [[SHUF]](<2 x s16>)
+ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
+ ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
%0:_(<2 x s16>) = COPY $vgpr0
%1:_(<2 x s16>) = COPY $vgpr1
%2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 0)