r300/compiler: implement SIN+COS+SCS for vertex shaders
authorMarek Olšák <maraeo@gmail.com>
Sat, 5 Jun 2010 03:07:41 +0000 (05:07 +0200)
committerMarek Olšák <maraeo@gmail.com>
Sat, 5 Jun 2010 05:03:15 +0000 (07:03 +0200)
src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h

index dd30785..507b2e5 100644 (file)
@@ -360,6 +360,7 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
                switch (vpi->Opcode) {
                case RC_OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break;
                case RC_OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break;
+               case RC_OPCODE_COS: ei_math1(compiler->code, ME_COS, vpi, inst); break;
                case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break;
                case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break;
                case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break;
@@ -378,6 +379,7 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
                case RC_OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break;
                case RC_OPCODE_SEQ: ei_vector2(compiler->code, VE_SET_EQUAL, vpi, inst); break;
                case RC_OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break;
+               case RC_OPCODE_SIN: ei_math1(compiler->code, ME_SIN, vpi, inst); break;
                case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break;
                case RC_OPCODE_SNE: ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); break;
                default:
@@ -605,8 +607,9 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
        {
                struct radeon_program_transformation transformations[] = {
                        { &r300_transform_vertex_alu, 0 },
+                       { &r300_transform_trig_scale_vertex, 0 }
                };
-               radeonLocalTransform(&compiler->Base, 1, transformations);
+               radeonLocalTransform(&compiler->Base, 2, transformations);
        }
 
        debug_program_log(compiler, "after native rewrite");
index d14de79..c922d3d 100644 (file)
@@ -848,6 +848,34 @@ int radeonTransformTrigSimple(struct radeon_compiler* c,
        return 1;
 }
 
+static void r300_transform_SIN_COS_SCS(struct radeon_compiler *c,
+       struct rc_instruction *inst,
+       unsigned srctmp)
+{
+       if (inst->U.I.Opcode == RC_OPCODE_COS) {
+               emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, inst->U.I.DstReg,
+                       srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
+       } else if (inst->U.I.Opcode == RC_OPCODE_SIN) {
+               emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode,
+                       inst->U.I.DstReg, srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
+       } else if (inst->U.I.Opcode == RC_OPCODE_SCS) {
+               struct rc_dst_register moddst = inst->U.I.DstReg;
+
+               if (inst->U.I.DstReg.WriteMask & RC_MASK_X) {
+                       moddst.WriteMask = RC_MASK_X;
+                       emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, moddst,
+                               srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
+               }
+               if (inst->U.I.DstReg.WriteMask & RC_MASK_Y) {
+                       moddst.WriteMask = RC_MASK_Y;
+                       emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, moddst,
+                               srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
+               }
+       }
+
+       rc_remove_instruction(inst);
+}
+
 
 /**
  * Transform the trigonometric functions COS, SIN, and SCS
@@ -880,29 +908,48 @@ int radeonTransformTrigScale(struct radeon_compiler* c,
        emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W),
                srcreg(RC_FILE_TEMPORARY, temp));
 
-       if (inst->U.I.Opcode == RC_OPCODE_COS) {
-               emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, inst->U.I.DstReg,
-                       srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW));
-       } else if (inst->U.I.Opcode == RC_OPCODE_SIN) {
-               emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode,
-                       inst->U.I.DstReg, srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW));
-       } else if (inst->U.I.Opcode == RC_OPCODE_SCS) {
-               struct rc_dst_register moddst = inst->U.I.DstReg;
+       r300_transform_SIN_COS_SCS(c, inst, temp);
+       return 1;
+}
 
-               if (inst->U.I.DstReg.WriteMask & RC_MASK_X) {
-                       moddst.WriteMask = RC_MASK_X;
-                       emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, moddst,
-                               srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW));
-               }
-               if (inst->U.I.DstReg.WriteMask & RC_MASK_Y) {
-                       moddst.WriteMask = RC_MASK_Y;
-                       emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, moddst,
-                               srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW));
-               }
-       }
+/**
+ * Transform the trigonometric functions COS, SIN, and SCS
+ * so that the input to COS and SIN is always in the range [-PI, PI].
+ * SCS is replaced by one COS and one SIN instruction.
+ */
+int r300_transform_trig_scale_vertex(struct radeon_compiler *c,
+       struct rc_instruction *inst,
+       void *unused)
+{
+       if (inst->U.I.Opcode != RC_OPCODE_COS &&
+           inst->U.I.Opcode != RC_OPCODE_SIN &&
+           inst->U.I.Opcode != RC_OPCODE_SCS)
+               return 0;
 
-       rc_remove_instruction(inst);
+       /* Repeat x in the range [-PI, PI]:
+        *
+        *   repeat(x) = frac(x / 2PI + 0.5) * 2PI - PI
+        */
+
+       static const float cons[4] = {0.15915494309189535, 0.5, 6.28318530717959, -3.14159265358979};
+       unsigned int temp;
+       unsigned int constant;
+
+       temp = rc_find_free_temporary(c);
+       constant = rc_constants_add_immediate_vec4(&c->Program.Constants, cons);
+
+       emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W),
+               swizzle_xxxx(inst->U.I.SrcReg[0]),
+               srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_XXXX),
+               srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_YYYY));
+       emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W),
+               srcreg(RC_FILE_TEMPORARY, temp));
+       emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W),
+               srcreg(RC_FILE_TEMPORARY, temp),
+               srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_ZZZZ),
+               srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_WWWW));
 
+       r300_transform_SIN_COS_SCS(c, inst, temp);
        return 1;
 }
 
index 7cb5f84..77d4444 100644 (file)
@@ -50,6 +50,11 @@ int radeonTransformTrigScale(
        struct rc_instruction * inst,
        void*);
 
+int r300_transform_trig_scale_vertex(
+       struct radeon_compiler *c,
+       struct rc_instruction *inst,
+       void*);
+
 int radeonTransformDeriv(
        struct radeon_compiler * c,
        struct rc_instruction * inst,