AMDGPU: Don't look for constant in insert/extract_vector_elt regbankselect
authorMatt Arsenault <Matthew.Arsenault@amd.com>
Wed, 20 Mar 2019 20:41:34 +0000 (20:41 +0000)
committerMatt Arsenault <Matthew.Arsenault@amd.com>
Wed, 20 Mar 2019 20:41:34 +0000 (20:41 +0000)
The constantness shouldn't change the register bank choice. We also
don't need to restrict this to only indexing VGPRs, since it's
possible to index SGPRs (but SelectionDAG made using this
difficult). Allow directly indexing SGPRs when appropriate.

llvm-svn: 356611

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert-vector-elt.mir

index 0d90481660fe7b96315b7640e691003ba6530c0f..42742a649b36a8817caa5e23d7fee89d763f94c7 100644 (file)
@@ -52,24 +52,6 @@ AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI)
 
 }
 
-static bool isConstant(const MachineOperand &MO, int64_t &C) {
-  const MachineFunction *MF = MO.getParent()->getParent()->getParent();
-  const MachineRegisterInfo &MRI = MF->getRegInfo();
-  const MachineInstr *Def = MRI.getVRegDef(MO.getReg());
-  if (!Def)
-    return false;
-
-  if (Def->getOpcode() == AMDGPU::G_CONSTANT) {
-    C = Def->getOperand(1).getCImm()->getSExtValue();
-    return true;
-  }
-
-  if (Def->getOpcode() == AMDGPU::COPY)
-    return isConstant(Def->getOperand(1), C);
-
-  return false;
-}
-
 unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst,
                                           const RegisterBank &Src,
                                           unsigned Size) const {
@@ -816,42 +798,35 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
 
 
   case AMDGPU::G_EXTRACT_VECTOR_ELT: {
-    unsigned IdxOp = 2;
-    int64_t Imm;
-    // XXX - Do we really need to fully handle these? The constant case should
-    // be legalized away before RegBankSelect?
-
-    unsigned OutputBankID = isSALUMapping(MI) && isConstant(MI.getOperand(IdxOp), Imm) ?
+    unsigned OutputBankID = isSALUMapping(MI) ?
                             AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
-
+    unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+    unsigned IdxSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
     unsigned IdxBank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
-    OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, MRI.getType(MI.getOperand(0).getReg()).getSizeInBits());
-    OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, MRI.getType(MI.getOperand(1).getReg()).getSizeInBits());
+
+    OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, SrcSize);
+    OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, SrcSize);
 
     // The index can be either if the source vector is VGPR.
-    OpdsMapping[2] = AMDGPU::getValueMapping(IdxBank, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
+    OpdsMapping[2] = AMDGPU::getValueMapping(IdxBank, IdxSize);
     break;
   }
   case AMDGPU::G_INSERT_VECTOR_ELT: {
-    // XXX - Do we really need to fully handle these? The constant case should
-    // be legalized away before RegBankSelect?
-
-    int64_t Imm;
-
-    unsigned IdxOp = MI.getOpcode() == AMDGPU::G_EXTRACT_VECTOR_ELT ? 2 : 3;
-    unsigned BankID = isSALUMapping(MI) && isConstant(MI.getOperand(IdxOp), Imm) ?
-                      AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
-
-
+    unsigned OutputBankID = isSALUMapping(MI) ?
+      AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
 
-    // TODO: Can do SGPR indexing, which would obviate the need for the
-    // isConstant check.
-    for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-      unsigned Size = getSizeInBits(MI.getOperand(i).getReg(), MRI, *TRI);
-      OpdsMapping[i] = AMDGPU::getValueMapping(BankID, Size);
-    }
+    unsigned VecSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+    unsigned InsertSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
+    unsigned IdxSize = MRI.getType(MI.getOperand(3).getReg()).getSizeInBits();
+    unsigned InsertEltBank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
+    unsigned IdxBank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
 
+    OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, VecSize);
+    OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, VecSize);
+    OpdsMapping[2] = AMDGPU::getValueMapping(InsertEltBank, InsertSize);
 
+    // The index can be either if the source vector is VGPR.
+    OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
     break;
   }
   case AMDGPU::G_UNMERGE_VALUES: {
index 558129ee7f8441453cc7315b62ca330a0a929208..b7dadd8367f336ad9fd8d215c31fa76483e0a25c 100644 (file)
@@ -1,39 +1,76 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
 
 ---
-name: extract_vector_elt_0_v2i32_s
+name: extract_vector_elt_v16i32_ss
 legalized: true
 
 body: |
   bb.0:
-    liveins: $sgpr0_sgpr1
-    ; CHECK-LABEL: name: extract_vector_elt_0_v2i32_s
-    ; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
-    ; CHECK: [[EVEC:%[0-9]+]]:sgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C]](s32)
+    liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16
+    ; CHECK-LABEL: name: extract_vector_elt_v16i32_ss
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr16
+    ; CHECK: [[EVEC:%[0-9]+]]:sgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[COPY1]](s32)
     ; CHECK: $vgpr0 = COPY [[EVEC]](s32)
-    %0:_(<2 x s32>) = COPY $sgpr0_sgpr1
-    %1:_(s32) = G_CONSTANT i32 0
+    %0:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    %1:_(s32) = COPY $sgpr16
     %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1
     $vgpr0 = COPY %2
 ...
 
+---
+name: extract_vector_elt_v16i32_sv
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0
+    ; CHECK-LABEL: name: extract_vector_elt_v16i32_sv
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(<16 x s32>) = COPY [[COPY]](<16 x s32>)
+    ; CHECK: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY2]](<16 x s32>), [[COPY1]](s32)
+    ; CHECK: $vgpr0 = COPY [[EVEC]](s32)
+    %0:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    %1:_(s32) = COPY $vgpr0
+    %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1
+    $vgpr0 = COPY %2
+...
+
+---
+name: extract_vector_elt_v16i32_vs
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $sgpr0
+    ; CHECK-LABEL: name: extract_vector_elt_v16i32_vs
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; CHECK: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[COPY1]](s32)
+    ; CHECK: $vgpr0 = COPY [[EVEC]](s32)
+    %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    %1:_(s32) = COPY $sgpr0
+    %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1
+    $vgpr0 = COPY %2
+...
 
 ---
-name: extract_vector_elt_0_v4i32_s
+name: extract_vector_elt_v16i32_vv
 legalized: true
 
 body: |
   bb.0:
-    liveins: $sgpr0_sgpr1_sgpr2_sgpr3
-    ; CHECK-LABEL: name: extract_vector_elt_0_v4i32_s
-    ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
-    ; CHECK: [[EVEC:%[0-9]+]]:sgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s32)
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16
+    ; CHECK-LABEL: name: extract_vector_elt_v16i32_vv
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16
+    ; CHECK: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[COPY1]](s32)
     ; CHECK: $vgpr0 = COPY [[EVEC]](s32)
-    %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-    %1:_(s32) = G_CONSTANT i32 0
+    %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    %1:_(s32) = COPY $vgpr16
     %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1
     $vgpr0 = COPY %2
 ...
index ea02e5fab21b4c4de243b62d8890717ed371fbe6..dd77ff1afed81a8b3b2741cba4a80e7b7f7a7f10 100644 (file)
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
 
 ---
-name: insert_vector_elt_v4i32_s_s_k
+name: insert_vector_elt_v4i32_s_s_s
 legalized: true
 
 body: |
   bb.0:
-    liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5
-    ; CHECK-LABEL: name: insert_vector_elt_v4i32_s_s_k
+    liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $sgpr5
+
+    ; CHECK-LABEL: name: insert_vector_elt_v4i32_s_s_s
     ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
-    ; CHECK: [[IVEC:%[0-9]+]]:sgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[C]](s32)
+    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
+    ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
+    ; CHECK: [[IVEC:%[0-9]+]]:sgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[COPY2]](s32)
     ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>)
     %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-    %1:_(s32) = COPY $sgpr5
-    %2:_(s32) = G_CONSTANT i32 0
+    %1:_(s32) = COPY $sgpr4
+    %2:_(s32) = COPY $sgpr5
     %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
     $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
 ...
 
 ---
-name: insert_vector_elt_v4i32_v_s_k
+name: insert_vector_elt_v4i32_v_s_s
 legalized: true
 
 body: |
   bb.0:
-    liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr5
-    ; CHECK-LABEL: name: insert_vector_elt_v4i32_v_s_k
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0, $sgpr1
+
+    ; CHECK-LABEL: name: insert_vector_elt_v4i32_v_s_s
     ; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
-    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
-    ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
-    ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY2]](s32), [[COPY3]](s32)
+    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[COPY2]](s32)
     ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    %1:_(s32) = COPY $sgpr5
-    %2:_(s32) = G_CONSTANT i32 0
+    %1:_(s32) = COPY $sgpr0
+    %2:_(s32) = COPY $sgpr1
     %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
     $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
 ...
 
 ---
-name: insert_vector_elt_v4i32_s_v_k
+name: insert_vector_elt_v4i32_s_v_s
 legalized: true
 
 body: |
   bb.0:
-    liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr5
-    ; CHECK-LABEL: name: insert_vector_elt_v4i32_s_v_k
+    liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0, $sgpr4
+
+    ; CHECK-LABEL: name: insert_vector_elt_v4i32_s_v_s
     ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
-    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[COPY]](<4 x s32>)
-    ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
-    ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY2]], [[COPY1]](s32), [[COPY3]](s32)
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
+    ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[COPY]](<4 x s32>)
+    ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY3]], [[COPY1]](s32), [[COPY2]](s32)
     ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>)
     %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-    %1:_(s32) = COPY $vgpr2
-    %2:_(s32) = G_CONSTANT i32 0
+    %1:_(s32) = COPY $vgpr0
+    %2:_(s32) = COPY $sgpr4
     %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
     $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
 ...
 
 ---
-name: insert_vector_elt_var_v4i32_s_s_s
+name: insert_vector_elt_v4i32_s_s_v
 legalized: true
 
 body: |
   bb.0:
-    liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, $sgpr6
-    ; CHECK-LABEL: name: insert_vector_elt_var_v4i32_s_s_s
+    liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $vgpr0
+
+    ; CHECK-LABEL: name: insert_vector_elt_v4i32_s_s_v
     ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
-    ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
+    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
+    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
     ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[COPY]](<4 x s32>)
-    ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
-    ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
-    ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY3]], [[COPY4]](s32), [[COPY5]](s32)
+    ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY3]], [[COPY1]](s32), [[COPY2]](s32)
     ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>)
     %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-    %1:_(s32) = COPY $sgpr5
-    %2:_(s32) = COPY $sgpr6
+    %1:_(s32) = COPY $sgpr4
+    %2:_(s32) = COPY $vgpr0
     %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
     $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
 ...
 
 ---
-name: insert_vector_elt_var_v4i32_s_s_v
+name: insert_vector_elt_v4i32_s_v_v
 legalized: true
 
 body: |
   bb.0:
-    liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, $vgpr6
-    ; CHECK-LABEL: name: insert_vector_elt_var_v4i32_s_s_v
+    liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0, $vgpr1
+
+    ; CHECK-LABEL: name: insert_vector_elt_v4i32_s_v_v
     ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
-    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
     ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[COPY]](<4 x s32>)
-    ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
-    ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY3]], [[COPY4]](s32), [[COPY2]](s32)
+    ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY3]], [[COPY1]](s32), [[COPY2]](s32)
     ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>)
     %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-    %1:_(s32) = COPY $sgpr5
-    %2:_(s32) = COPY $vgpr6
+    %1:_(s32) = COPY $vgpr0
+    %2:_(s32) = COPY $vgpr1
     %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
     $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
 ...
@@ -116,17 +116,38 @@ legalized: true
 
 body: |
   bb.0:
-    liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr5, $vgpr6
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr4, $vgpr0
+
     ; CHECK-LABEL: name: insert_vector_elt_var_v4i32_v_s_v
     ; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
-    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
-    ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
-    ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY3]](s32), [[COPY2]](s32)
+    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
+    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[COPY2]](s32)
+    ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>)
+    %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    %1:_(s32) = COPY $sgpr4
+    %2:_(s32) = COPY $vgpr0
+    %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
+    $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
+...
+
+---
+name: insert_vector_elt_var_v4i32_v_v_s
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4, $sgpr0
+
+    ; CHECK-LABEL: name: insert_vector_elt_var_v4i32_v_v_s
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[COPY2]](s32)
     ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    %1:_(s32) = COPY $sgpr5
-    %2:_(s32) = COPY $vgpr6
+    %1:_(s32) = COPY $vgpr0
+    %2:_(s32) = COPY $sgpr0
     %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
     $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
 ...
@@ -137,16 +158,17 @@ legalized: true
 
 body: |
   bb.0:
-    liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr5, $vgpr6
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4, $vgpr5
+
     ; CHECK-LABEL: name: insert_vector_elt_var_v4i32_v_v_v
     ; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
-    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
+    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
     ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[COPY2]](s32)
     ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[IVEC]](<4 x s32>)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    %1:_(s32) = COPY $vgpr5
-    %2:_(s32) = COPY $vgpr6
+    %1:_(s32) = COPY $vgpr4
+    %2:_(s32) = COPY $vgpr5
     %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
 ...