AMDGPU: Run SIFoldOperands after PeepholeOptimizer

author Matt Arsenault <Matthew.Arsenault@amd.com>

Thu, 14 Apr 2016 21:58:24 +0000 (21:58 +0000)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Thu, 14 Apr 2016 21:58:24 +0000 (21:58 +0000)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Thu, 14 Apr 2016 21:58:24 +0000 (21:58 +0000)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Thu, 14 Apr 2016 21:58:24 +0000 (21:58 +0000)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

index 08254ba..c79db48 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -202,6 +202,7 @@ public:
    GCNPassConfig(TargetMachine *TM, PassManagerBase &PM)
      : AMDGPUPassConfig(TM, PM) { }
    bool addPreISel() override;
+  void addMachineSSAOptimization() override;
    bool addInstSelector() override;
  #ifdef LLVM_BUILD_GLOBAL_ISEL
    bool addIRTranslator() override;
@@ -323,11 +324,24 @@ bool GCNPassConfig::addPreISel() {
    return false;
  }
  
+void GCNPassConfig::addMachineSSAOptimization() {
+  TargetPassConfig::addMachineSSAOptimization();
+
+  // We want to fold operands after PeepholeOptimizer has run (or as part of
+  // it), because it will eliminate extra copies making it easier to fold the
+  // real source operand. We want to eliminate dead instructions after, so that
+  // we see fewer uses of the copies. We then need to clean up the dead
+  // instructions leftover after the operands are folded as well.
+  //
+  // XXX - Can we get away without running DeadMachineInstructionElim again?
+  addPass(&SIFoldOperandsID);
+  addPass(&DeadMachineInstructionElimID);
+}
+
  bool GCNPassConfig::addInstSelector() {
    AMDGPUPassConfig::addInstSelector();
    addPass(createSILowerI1CopiesPass());
    addPass(&SIFixSGPRCopiesID);
-  addPass(createSIFoldOperandsPass());
    return false;
  }
  
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

index 01a8054..ba5b8a2 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1058,6 +1058,8 @@ static void removeModOperands(MachineInstr &MI) {
    MI.RemoveOperand(Src0ModIdx);
  }
  
+// TODO: Maybe this should be removed this and custom fold everything in
+// SIFoldOperands?
  bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
                                  unsigned Reg, MachineRegisterInfo *MRI) const {
    if (!MRI->hasOneNonDBGUse(Reg))
@@ -1073,6 +1075,14 @@ bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
        return false;
      }
  
+    const MachineOperand &ImmOp = DefMI->getOperand(1);
+
+    // If this is a free constant, there's no reason to do this.
+    // TODO: We could fold this here instead of letting SIFoldOperands do it
+    // later.
+    if (isInlineConstant(ImmOp, 4))
+      return false;
+
      MachineOperand *Src0 = getNamedOperand(*UseMI, AMDGPU::OpName::src0);
      MachineOperand *Src1 = getNamedOperand(*UseMI, AMDGPU::OpName::src1);
      MachineOperand *Src2 = getNamedOperand(*UseMI, AMDGPU::OpName::src2);
diff --git a/llvm/test/CodeGen/AMDGPU/commute_modifiers.ll b/llvm/test/CodeGen/AMDGPU/commute_modifiers.ll

index c5d479a..bce3fe9 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/commute_modifiers.ll
+++ b/llvm/test/CodeGen/AMDGPU/commute_modifiers.ll
@@ -159,7 +159,7 @@ define void @commute_mul_fabs_x_fneg_fabs_y_f32(float addrspace(1)* %out, float
  ; SI-LABEL: {{^}}fma_a_2.0_neg_b_f32
  ; SI-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
  ; SI-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; SI: v_fma_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], |[[R2]]|
+; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, |[[R2]]|
  ; SI: buffer_store_dword [[RESULT]]
  define void @fma_a_2.0_neg_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
    %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/fma.ll b/llvm/test/CodeGen/AMDGPU/fma.ll

index d6024aa..d04a594 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/fma.ll
+++ b/llvm/test/CodeGen/AMDGPU/fma.ll
@@ -61,7 +61,7 @@ define void @fma_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)*
  }
  
  ; FUNC-LABEL: @fma_commute_mul_inline_imm_f32
-; SI: v_fma_f32 {{v[0-9]+}}, 2.0, {{v[0-9]+}}, {{v[0-9]+}}
+; SI: v_fma_f32 {{v[0-9]+}}, {{v[0-9]+}}, 2.0, {{v[0-9]+}}
  define void @fma_commute_mul_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
    %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
    %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
diff --git a/llvm/test/CodeGen/AMDGPU/fmed3.ll b/llvm/test/CodeGen/AMDGPU/fmed3.ll

index 4094311..412282d 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/fmed3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmed3.ll
@@ -96,8 +96,8 @@ define void @v_test_fmed3_r_i_i_multi_use_f32(float addrspace(1)* %out, float ad
  }
  
  ; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_f64:
-; GCN: v_max_f64 {{v\[[0-9]+:[0-9]+\]}}, 2.0, {{v\[[0-9]+:[0-9]+\]}}
-; GCN: v_min_f64 {{v\[[0-9]+:[0-9]+\]}}, 4.0, {{v\[[0-9]+:[0-9]+\]}}
+; GCN: v_max_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, 2.0
+; GCN: v_min_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, 4.0
  define void @v_test_fmed3_r_i_i_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #1 {
    %tid = call i32 @llvm.r600.read.tidig.x()
    %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
diff --git a/llvm/test/CodeGen/AMDGPU/fneg.f64.ll b/llvm/test/CodeGen/AMDGPU/fneg.f64.ll

index aa6df20..7627a4d 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/fneg.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg.f64.ll
@@ -39,7 +39,7 @@ define void @fneg_v4f64(<4 x double> addrspace(1)* nocapture %out, <4 x double>
  ; unless the target returns true for isNegFree()
  
  ; FUNC-LABEL: {{^}}fneg_free_f64:
-; GCN: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, 0, -{{s\[[0-9]+:[0-9]+\]$}}
+; GCN: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, -{{s\[[0-9]+:[0-9]+\]}}, 0{{$}}
  define void @fneg_free_f64(double addrspace(1)* %out, i64 %in) {
    %bc = bitcast i64 %in to double
    %fsub = fsub double 0.0, %bc
diff --git a/llvm/test/CodeGen/AMDGPU/fsub64.ll b/llvm/test/CodeGen/AMDGPU/fsub64.ll

index f34a48e..f1b970a 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/fsub64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fsub64.ll
@@ -47,7 +47,7 @@ define void @s_fsub_f64(double addrspace(1)* %out, double %a, double %b) {
  }
  
  ; SI-LABEL: {{^}}s_fsub_imm_f64:
-; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], 4.0, -s\[[0-9]+:[0-9]+\]}}
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], -s\[[0-9]+:[0-9]+\]}}, 4.0
  define void @s_fsub_imm_f64(double addrspace(1)* %out, double %a, double %b) {
    %sub = fsub double 4.0, %a
    store double %sub, double addrspace(1)* %out
@@ -55,7 +55,7 @@ define void @s_fsub_imm_f64(double addrspace(1)* %out, double %a, double %b) {
  }
  
  ; SI-LABEL: {{^}}s_fsub_imm_inv_f64:
-; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], -4.0, s\[[0-9]+:[0-9]+\]}}
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\]}}, -4.0
  define void @s_fsub_imm_inv_f64(double addrspace(1)* %out, double %a, double %b) {
    %sub = fsub double %a, 4.0
    store double %sub, double addrspace(1)* %out
diff --git a/llvm/test/CodeGen/AMDGPU/imm.ll b/llvm/test/CodeGen/AMDGPU/imm.ll

index d443469..298cb41 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/imm.ll
+++ b/llvm/test/CodeGen/AMDGPU/imm.ll
@@ -322,7 +322,7 @@ define void @add_inline_imm_64_f32(float addrspace(1)* %out, float %x) {
  ; CHECK-LABEL: {{^}}add_inline_imm_0.0_f64:
  ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
  ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
-; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 0, [[VAL]]
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 0{{$}}
  ; CHECK: buffer_store_dwordx2 [[REG]]
  define void @add_inline_imm_0.0_f64(double addrspace(1)* %out, double %x) {
    %y = fadd double %x, 0.0
@@ -333,7 +333,7 @@ define void @add_inline_imm_0.0_f64(double addrspace(1)* %out, double %x) {
  ; CHECK-LABEL: {{^}}add_inline_imm_0.5_f64:
  ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
  ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
-; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 0.5, [[VAL]]
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 0.5
  ; CHECK: buffer_store_dwordx2 [[REG]]
  define void @add_inline_imm_0.5_f64(double addrspace(1)* %out, double %x) {
    %y = fadd double %x, 0.5
@@ -344,7 +344,7 @@ define void @add_inline_imm_0.5_f64(double addrspace(1)* %out, double %x) {
  ; CHECK-LABEL: {{^}}add_inline_imm_neg_0.5_f64:
  ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
  ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
-; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -0.5, [[VAL]]
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], -0.5
  ; CHECK: buffer_store_dwordx2 [[REG]]
  define void @add_inline_imm_neg_0.5_f64(double addrspace(1)* %out, double %x) {
    %y = fadd double %x, -0.5
@@ -355,7 +355,7 @@ define void @add_inline_imm_neg_0.5_f64(double addrspace(1)* %out, double %x) {
  ; CHECK-LABEL: {{^}}add_inline_imm_1.0_f64:
  ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
  ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
-; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 1.0, [[VAL]]
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 1.0
  ; CHECK: buffer_store_dwordx2 [[REG]]
  define void @add_inline_imm_1.0_f64(double addrspace(1)* %out, double %x) {
    %y = fadd double %x, 1.0
@@ -366,7 +366,7 @@ define void @add_inline_imm_1.0_f64(double addrspace(1)* %out, double %x) {
  ; CHECK-LABEL: {{^}}add_inline_imm_neg_1.0_f64:
  ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
  ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
-; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -1.0, [[VAL]]
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], -1.0
  ; CHECK: buffer_store_dwordx2 [[REG]]
  define void @add_inline_imm_neg_1.0_f64(double addrspace(1)* %out, double %x) {
    %y = fadd double %x, -1.0
@@ -377,7 +377,7 @@ define void @add_inline_imm_neg_1.0_f64(double addrspace(1)* %out, double %x) {
  ; CHECK-LABEL: {{^}}add_inline_imm_2.0_f64:
  ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
  ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
-; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 2.0, [[VAL]]
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 2.0
  ; CHECK: buffer_store_dwordx2 [[REG]]
  define void @add_inline_imm_2.0_f64(double addrspace(1)* %out, double %x) {
    %y = fadd double %x, 2.0
@@ -388,7 +388,7 @@ define void @add_inline_imm_2.0_f64(double addrspace(1)* %out, double %x) {
  ; CHECK-LABEL: {{^}}add_inline_imm_neg_2.0_f64:
  ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
  ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
-; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -2.0, [[VAL]]
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], -2.0
  ; CHECK: buffer_store_dwordx2 [[REG]]
  define void @add_inline_imm_neg_2.0_f64(double addrspace(1)* %out, double %x) {
    %y = fadd double %x, -2.0
@@ -399,7 +399,7 @@ define void @add_inline_imm_neg_2.0_f64(double addrspace(1)* %out, double %x) {
  ; CHECK-LABEL: {{^}}add_inline_imm_4.0_f64:
  ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
  ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
-; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 4.0, [[VAL]]
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 4.0
  ; CHECK: buffer_store_dwordx2 [[REG]]
  define void @add_inline_imm_4.0_f64(double addrspace(1)* %out, double %x) {
    %y = fadd double %x, 4.0
@@ -410,7 +410,7 @@ define void @add_inline_imm_4.0_f64(double addrspace(1)* %out, double %x) {
  ; CHECK-LABEL: {{^}}add_inline_imm_neg_4.0_f64:
  ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
  ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
-; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -4.0, [[VAL]]
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], -4.0
  ; CHECK: buffer_store_dwordx2 [[REG]]
  define void @add_inline_imm_neg_4.0_f64(double addrspace(1)* %out, double %x) {
    %y = fadd double %x, -4.0
@@ -422,7 +422,7 @@ define void @add_inline_imm_neg_4.0_f64(double addrspace(1)* %out, double %x) {
  ; CHECK-LABEL: {{^}}add_inline_imm_1_f64:
  ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
  ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
-; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 1, [[VAL]]
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 1{{$}}
  ; CHECK: buffer_store_dwordx2 [[REG]]
  define void @add_inline_imm_1_f64(double addrspace(1)* %out, double %x) {
    %y = fadd double %x, 0x0000000000000001
@@ -433,7 +433,7 @@ define void @add_inline_imm_1_f64(double addrspace(1)* %out, double %x) {
  ; CHECK-LABEL: {{^}}add_inline_imm_2_f64:
  ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
  ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
-; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 2, [[VAL]]
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 2{{$}}
  ; CHECK: buffer_store_dwordx2 [[REG]]
  define void @add_inline_imm_2_f64(double addrspace(1)* %out, double %x) {
    %y = fadd double %x, 0x0000000000000002
@@ -444,7 +444,7 @@ define void @add_inline_imm_2_f64(double addrspace(1)* %out, double %x) {
  ; CHECK-LABEL: {{^}}add_inline_imm_16_f64:
  ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
  ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
-; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 16, [[VAL]]
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 16
  ; CHECK: buffer_store_dwordx2 [[REG]]
  define void @add_inline_imm_16_f64(double addrspace(1)* %out, double %x) {
    %y = fadd double %x, 0x0000000000000010
@@ -455,7 +455,7 @@ define void @add_inline_imm_16_f64(double addrspace(1)* %out, double %x) {
  ; CHECK-LABEL: {{^}}add_inline_imm_neg_1_f64:
  ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
  ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
-; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -1, [[VAL]]
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], -1
  ; CHECK: buffer_store_dwordx2 [[REG]]
  define void @add_inline_imm_neg_1_f64(double addrspace(1)* %out, double %x) {
    %y = fadd double %x, 0xffffffffffffffff
@@ -466,7 +466,7 @@ define void @add_inline_imm_neg_1_f64(double addrspace(1)* %out, double %x) {
  ; CHECK-LABEL: {{^}}add_inline_imm_neg_2_f64:
  ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
  ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
-; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -2, [[VAL]]
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], -2
  ; CHECK: buffer_store_dwordx2 [[REG]]
  define void @add_inline_imm_neg_2_f64(double addrspace(1)* %out, double %x) {
    %y = fadd double %x, 0xfffffffffffffffe
@@ -477,7 +477,7 @@ define void @add_inline_imm_neg_2_f64(double addrspace(1)* %out, double %x) {
  ; CHECK-LABEL: {{^}}add_inline_imm_neg_16_f64:
  ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
  ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
-; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -16, [[VAL]]
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], -16
  ; CHECK: buffer_store_dwordx2 [[REG]]
  define void @add_inline_imm_neg_16_f64(double addrspace(1)* %out, double %x) {
    %y = fadd double %x, 0xfffffffffffffff0
@@ -488,7 +488,7 @@ define void @add_inline_imm_neg_16_f64(double addrspace(1)* %out, double %x) {
  ; CHECK-LABEL: {{^}}add_inline_imm_63_f64:
  ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
  ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
-; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 63, [[VAL]]
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 63
  ; CHECK: buffer_store_dwordx2 [[REG]]
  define void @add_inline_imm_63_f64(double addrspace(1)* %out, double %x) {
    %y = fadd double %x, 0x000000000000003F
@@ -499,7 +499,7 @@ define void @add_inline_imm_63_f64(double addrspace(1)* %out, double %x) {
  ; CHECK-LABEL: {{^}}add_inline_imm_64_f64:
  ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
  ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
-; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 64, [[VAL]]
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 64
  ; CHECK: buffer_store_dwordx2 [[REG]]
  define void @add_inline_imm_64_f64(double addrspace(1)* %out, double %x) {
    %y = fadd double %x, 0x0000000000000040
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll

index e06dbdd..efea3eb 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN -check-prefix=SI %s
  ; XUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN -check-prefix=VI %s
  
  ; FIXME: Enable for VI.
@@ -45,7 +45,7 @@ define void @test_div_fmas_f32_inline_imm_0(float addrspace(1)* %out, float %a,
  ; SI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
  ; SI-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]]
  ; SI-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[SA]]
-; SI: v_div_fmas_f32 [[RESULT:v[0-9]+]], 1.0, [[VA]], [[VC]]
+; SI: v_div_fmas_f32 [[RESULT:v[0-9]+]], [[VA]], 1.0, [[VC]]
  ; SI: buffer_store_dword [[RESULT]],
  ; SI: s_endpgm
  define void @test_div_fmas_f32_inline_imm_1(float addrspace(1)* %out, float %a, float %b, float %c, i1 %d) nounwind {
@@ -146,7 +146,7 @@ define void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace(1)* %out, flo
  
  ; SI: BB9_2:
  ; SI: s_or_b64 exec, exec, [[SAVE]]
-; SI: v_cmp_ne_i32_e32 vcc, 0, v0
+; SI: v_cmp_ne_i32_e32 vcc, 0, v{{[0-9]+}}
  ; SI: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
  ; SI: buffer_store_dword
  ; SI: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/madmk.ll b/llvm/test/CodeGen/AMDGPU/madmk.ll

index 219684c..1adf824 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/madmk.ll
+++ b/llvm/test/CodeGen/AMDGPU/madmk.ll
@@ -1,13 +1,17 @@
  ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
  ; XUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
  
+ ; FIXME: None of these trigger madmk emission anymore. It is still
+ ; possible, but requires the correct registers to be used which is
+ ; hard to trigger.
+
  declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
  declare float @llvm.fabs.f32(float) nounwind readnone
  
  ; GCN-LABEL: {{^}}madmk_f32:
  ; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
  ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; GCN: v_madmk_f32_e32 {{v[0-9]+}}, [[VA]], 0x41200000, [[VB]]
+; GCN: v_mac_f32_e32 [[VB]], 0x41200000, [[VA]]
  define void @madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
    %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
    %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
@@ -182,7 +186,7 @@ define void @madmk_add_inline_imm_f32(float addrspace(1)* noalias %out, float ad
  
  ; SI-LABEL: {{^}}kill_madmk_verifier_error:
  ; SI: s_xor_b64
-; SI: v_madmk_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, 0x472aee8c, {{v[0-9]+}}
+; SI: v_mac_f32_e32 {{v[0-9]+}}, 0x472aee8c, {{v[0-9]+}}
  ; SI: s_or_b64
  define void @kill_madmk_verifier_error() nounwind {
  bb:
diff --git a/llvm/test/CodeGen/AMDGPU/mul.ll b/llvm/test/CodeGen/AMDGPU/mul.ll

index 94e0f96..0c1f701 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/mul.ll
+++ b/llvm/test/CodeGen/AMDGPU/mul.ll
@@ -96,8 +96,8 @@ define void @v_mul64_sext_c(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
  }
  
  ; FUNC-LABEL: {{^}}v_mul64_sext_inline_imm:
-; SI-DAG: v_mul_lo_i32 v{{[0-9]+}}, 9, v{{[0-9]+}}
-; SI-DAG: v_mul_hi_i32 v{{[0-9]+}}, 9, v{{[0-9]+}}
+; SI-DAG: v_mul_lo_i32 v{{[0-9]+}}, v{{[0-9]+}}, 9
+; SI-DAG: v_mul_hi_i32 v{{[0-9]+}}, v{{[0-9]+}}, 9
  ; SI: s_endpgm
  define void @v_mul64_sext_inline_imm(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
    %val = load i32, i32 addrspace(1)* %in, align 4
diff --git a/llvm/test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll b/llvm/test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll

index d2c9e80..45051d9 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll
@@ -3,7 +3,7 @@
  ; register operands in the correct order when modifying the opcode of an
  ; instruction to V_ADD_I32_e32.
  
-; CHECK: %{{[0-9]+}} = V_ADD_I32_e32 %{{[0-9]+}}, %{{[0-9]+}}, implicit-def %vcc, implicit %exec
+; CHECK: %{{[0-9]+}} = V_ADD_I32_e32 killed %{{[0-9]+}}, killed %{{[0-9]+}}, implicit-def %vcc, implicit %exec
  
  define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
  entry:
diff --git a/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll b/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll

index b9c34c4..7c58f2d 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll
@@ -10,14 +10,14 @@ define void @sint_to_fp_i32_to_f64(double addrspace(1)* %out, i32 %in) {
    ret void
  }
  
-; SI-LABEL: {{^}}sint_to_fp_i1_f64:
-; SI: v_cmp_eq_i32_e64 vcc,
  ; We can't fold the SGPRs into v_cndmask_b32_e64, because it already
  ; uses an SGPR (implicit vcc).
+
+; SI-LABEL: {{^}}sint_to_fp_i1_f64:
+; SI-DAG: v_cmp_eq_i32_e64 vcc,
  ; SI-DAG: v_cndmask_b32_e32 v[[SEL:[0-9]+]], 0, v{{[0-9]+}}
  ; SI-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
  ; SI: buffer_store_dwordx2 v{{\[}}[[ZERO]]:[[SEL]]{{\]}}
-
  ; SI: s_endpgm
  define void @sint_to_fp_i1_f64(double addrspace(1)* %out, i32 %in) {
    %cmp = icmp eq i32 %in, 0
diff --git a/llvm/test/CodeGen/AMDGPU/sra.ll b/llvm/test/CodeGen/AMDGPU/sra.ll

index 7a844d5..6740657 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/sra.ll
+++ b/llvm/test/CodeGen/AMDGPU/sra.ll
@@ -230,9 +230,8 @@ define void @v_ashr_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
  ; GCN-LABEL: {{^}}s_ashr_63_i64:
  ; GCN-DAG: s_load_dword s[[HI:[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}}
  ; GCN: s_ashr_i32 s[[SHIFT:[0-9]+]], s[[HI]], 31
-; GCN-DAG: s_mov_b32 s[[COPYSHIFT:[0-9]+]], s[[SHIFT]]
-; GCN-DAG: s_add_u32 {{s[0-9]+}}, s[[HI]], {{s[0-9]+}}
-; GCN: s_addc_u32 {{s[0-9]+}}, s[[COPYSHIFT]], {{s[0-9]+}}
+; GCN: s_add_u32 {{s[0-9]+}}, s[[HI]], {{s[0-9]+}}
+; GCN: s_addc_u32 {{s[0-9]+}}, s[[SHIFT]], {{s[0-9]+}}
  define void @s_ashr_63_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
    %result = ashr i64 %a, 63
    %add = add i64 %result, %b
diff --git a/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll b/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll

index 2723c0d..b36ce6b 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll
@@ -70,10 +70,11 @@ define void @s_uint_to_fp_v4i32_to_v4f64(<4 x double> addrspace(1)* %out, <4 x i
    ret void
  }
  
-; SI-LABEL: {{^}}uint_to_fp_i1_to_f64:
-; SI: v_cmp_eq_i32_e64 vcc
  ; We can't fold the SGPRs into v_cndmask_b32_e32, because it already
  ; uses an SGPR (implicit vcc).
+
+; SI-LABEL: {{^}}uint_to_fp_i1_to_f64:
+; SI-DAG: v_cmp_eq_i32_e64 vcc
  ; SI-DAG: v_cndmask_b32_e32 v[[SEL:[0-9]+]], 0, v{{[0-9]+}}
  ; SI-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
  ; SI: buffer_store_dwordx2 v{{\[}}[[ZERO]]:[[SEL]]{{\]}}
diff --git a/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll b/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll

index be0670d..d76a839 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll
+++ b/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
  ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
  
  declare float @llvm.fma.f32(float, float, float) #1
@@ -107,7 +107,7 @@ define void @test_sgpr_use_twice_ternary_op_a_a_imm(float addrspace(1)* %out, fl
  
  ; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_imm_a:
  ; GCN: s_load_dword [[SGPR:s[0-9]+]]
-; GCN: v_fma_f32 [[RESULT:v[0-9]+]], 2.0, [[SGPR]], [[SGPR]]
+; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], 2.0, [[SGPR]]
  ; GCN: buffer_store_dword [[RESULT]]
  define void @test_sgpr_use_twice_ternary_op_a_imm_a(float addrspace(1)* %out, float %a) #0 {
    %fma = call float @llvm.fma.f32(float %a, float 2.0, float %a) #1
@@ -227,7 +227,7 @@ define void @test_literal_use_twice_ternary_op_s_k_k_x2(float addrspace(1)* %out
  ; GCN-DAG: v_mov_b32_e32 [[VK0:v[0-9]+]], 0x44800000
  ; GCN-DAG: v_mov_b32_e32 [[VS1:v[0-9]+]], [[SGPR1]]
  
-; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[SGPR0]], [[VS1]], [[VK0]]
+; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[VS1]], [[SGPR0]], [[VK0]]
  ; GCN-DAG: v_mov_b32_e32 [[VK1:v[0-9]+]], 0x45800000
  ; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[SGPR0]], [[VS1]], [[VK1]]
  
@@ -254,7 +254,7 @@ define void @test_s0_s1_k_f32(float addrspace(1)* %out, float %a, float %b) #0 {
  
  ; Same zero component is re-used for half of each immediate.
  ; GCN: v_mov_b32_e32 v[[VK1_SUB1:[0-9]+]], 0x40b00000
-; GCN: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[VS1_SUB0]]:[[VS1_SUB1]]{{\]}}, [[SGPR0]], v{{\[}}[[VZERO]]:[[VK1_SUB1]]{{\]}}
+; GCN: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], [[SGPR0]], v{{\[}}[[VS1_SUB0]]:[[VS1_SUB1]]{{\]}}, v{{\[}}[[VZERO]]:[[VK1_SUB1]]{{\]}}
  
  ; GCN: buffer_store_dwordx2 [[RESULT0]]
  ; GCN: buffer_store_dwordx2 [[RESULT1]]
diff --git a/llvm/test/CodeGen/AMDGPU/v_mac.ll b/llvm/test/CodeGen/AMDGPU/v_mac.ll

index cec267f..ff05027 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/v_mac.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_mac.ll
@@ -24,7 +24,7 @@ entry:
  
  ; GCN-LABEL: {{^}}mad_inline_sgpr_inline:
  ; GCN-NOT: v_mac_f32
-; GCN: v_mad_f32 v{{[0-9]}}, 0.5, s{{[0-9]+}}, 0.5
+; GCN: v_mad_f32 v{{[0-9]}}, s{{[0-9]+}}, 0.5, 0.5
  define void @mad_inline_sgpr_inline(float addrspace(1)* %out, float %in) {
  entry:
    %tmp0 = fmul float 0.5, %in
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Thu, 14 Apr 2016 21:58:24 +0000 (21:58 +0000)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Thu, 14 Apr 2016 21:58:24 +0000 (21:58 +0000)
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/commute_modifiers.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/fma.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/fmed3.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/fneg.f64.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/fsub64.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/imm.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/madmk.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/mul.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/sra.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/v_mac.ll		patch \| blob \| history