R600/SI: Use V_ADD_F32 instead of V_MOV_B32 for clamp/neg/abs modifiers.
authorMichel Danzer <michel.daenzer@amd.com>
Mon, 11 Feb 2013 15:58:21 +0000 (15:58 +0000)
committerMichel Danzer <michel.daenzer@amd.com>
Mon, 11 Feb 2013 15:58:21 +0000 (15:58 +0000)
The modifiers don't seem to have any effect with V_MOV_B32, supposedly it's
meant to just move bits untouched.

Fixes 46 piglit tests with radeonsi, though unfortunately 11 of those had
just regressed because they started using the clamp modifier.

NOTE: This is a candidate for the Mesa stable branch.

Reviewed-by: Tom Stellard <thomas.stellard@amd.com>
llvm-svn: 174890

llvm/lib/Target/R600/SIISelLowering.cpp

index afafa8c..ceab692 100644 (file)
@@ -74,13 +74,11 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
     return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
   case AMDGPU::BRANCH: return BB;
   case AMDGPU::CLAMP_SI:
-    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_ADD_F32_e64))
            .addOperand(MI->getOperand(0))
            .addOperand(MI->getOperand(1))
-           // VSRC1-2 are unused, but we still need to fill all the
-           // operand slots, so we just reuse the VSRC0 operand
-           .addOperand(MI->getOperand(1))
-           .addOperand(MI->getOperand(1))
+           .addReg(AMDGPU::SREG_LIT_0)
+           .addReg(AMDGPU::SREG_LIT_0)
            .addImm(0) // ABS
            .addImm(1) // CLAMP
            .addImm(0) // OMOD
@@ -89,13 +87,11 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
     break;
 
   case AMDGPU::FABS_SI:
-    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_ADD_F32_e64))
                  .addOperand(MI->getOperand(0))
                  .addOperand(MI->getOperand(1))
-                 // VSRC1-2 are unused, but we still need to fill all the
-                 // operand slots, so we just reuse the VSRC0 operand
-                 .addOperand(MI->getOperand(1))
-                 .addOperand(MI->getOperand(1))
+                 .addReg(AMDGPU::SREG_LIT_0)
+                 .addReg(AMDGPU::SREG_LIT_0)
                  .addImm(1) // ABS
                  .addImm(0) // CLAMP
                  .addImm(0) // OMOD
@@ -104,13 +100,11 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
     break;
 
   case AMDGPU::FNEG_SI:
-    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_ADD_F32_e64))
                  .addOperand(MI->getOperand(0))
                  .addOperand(MI->getOperand(1))
-                 // VSRC1-2 are unused, but we still need to fill all the
-                 // operand slots, so we just reuse the VSRC0 operand
-                 .addOperand(MI->getOperand(1))
-                 .addOperand(MI->getOperand(1))
+                 .addReg(AMDGPU::SREG_LIT_0)
+                 .addReg(AMDGPU::SREG_LIT_0)
                  .addImm(0) // ABS
                  .addImm(0) // CLAMP
                  .addImm(0) // OMOD