r300/compiler: optimize CMP for vertex shaders a bit
authorMarek Olšák <maraeo@gmail.com>
Sun, 18 Apr 2010 18:49:50 +0000 (20:49 +0200)
committerMarek Olšák <maraeo@gmail.com>
Sun, 18 Apr 2010 22:35:26 +0000 (00:35 +0200)
src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c

index f5b7d57..fced31d 100644 (file)
@@ -511,37 +511,26 @@ static void transform_r300_vertex_CMP(struct radeon_compiler* c,
 {
        /* There is no decent CMP available, so let's rig one up.
         * CMP is defined as dst = src0 < 0.0 ? src1 : src2
-        * The following sequence consumes two temps and three extra slots,
+        * The following sequence consumes two temps and two extra slots
+        * (the second temp and the second slot is consumed by transform_LRP),
         * but should be equivalent:
         *
         * SLT tmp0, src0, 0.0
-        * SGE tmp1, src0, 0.0
-        * MUL tmp0, tmp0, src1
-        * MAD dst, src2, tmp1, tmp0
+        * LRP dst, tmp0, src1, src2
         *
-        * Yes, I know, I'm a mad scientist. ~ C. */
+        * Yes, I know, I'm a mad scientist. ~ C. & M. */
        int tempreg0 = rc_find_free_temporary(c);
-       int tempreg1 = rc_find_free_temporary(c);
 
        /* SLT tmp0, src0, 0.0 */
        emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
                dstreg(RC_FILE_TEMPORARY, tempreg0),
                inst->U.I.SrcReg[0], builtin_zero);
 
-       /* SGE tmp1, src0, 0.0 */
-       emit2(c, inst->Prev, RC_OPCODE_SGE, 0,
-               dstreg(RC_FILE_TEMPORARY, tempreg1),
-               inst->U.I.SrcReg[0], builtin_zero);
-
-       /* MUL tmp0, tmp0, src1 */
-       emit2(c, inst->Prev, RC_OPCODE_MUL, 0,
-               dstreg(RC_FILE_TEMPORARY, tempreg0),
-               srcreg(RC_FILE_TEMPORARY, tempreg0), inst->U.I.SrcReg[1]);
-
-       /* MAD dst, src2, tmp1, tmp0 */
-       emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode,
-               inst->U.I.DstReg,
-               inst->U.I.SrcReg[2], srcreg(RC_FILE_TEMPORARY, tempreg1), srcreg(RC_FILE_TEMPORARY, tempreg0));
+       /* LRP dst, tmp0, src1, src2 */
+       transform_LRP(c,
+               emit3(c, inst->Prev, RC_OPCODE_LRP, 0,
+                     inst->U.I.DstReg,
+                     srcreg(RC_FILE_TEMPORARY, tempreg0), inst->U.I.SrcReg[1],  inst->U.I.SrcReg[2]));
 
        rc_remove_instruction(inst);
 }