r300_fragprog: Emulate trigonometric functions in radeon_program_alu
authorNicolai Haehnle <nhaehnle@gmail.com>
Sat, 5 Jul 2008 21:54:31 +0000 (23:54 +0200)
committerNicolai Haehnle <nhaehnle@gmail.com>
Sun, 6 Jul 2008 08:00:35 +0000 (10:00 +0200)
src/mesa/drivers/dri/r300/r300_fragprog.c
src/mesa/drivers/dri/r300/r300_fragprog_emit.c
src/mesa/drivers/dri/r300/radeon_program_alu.c
src/mesa/drivers/dri/r300/radeon_program_alu.h

index 57987f5..8a1d690 100644 (file)
@@ -408,12 +408,13 @@ void r300TranslateFragmentShader(r300ContextPtr r300,
 
                struct radeon_program_transformation transformations[] = {
                        { &transform_TEX, &compiler },
-                       { &radeonTransformALU, 0 }
+                       { &radeonTransformALU, 0 },
+                       { &radeonTransformTrigSimple, 0 }
                };
                radeonLocalTransform(
                        r300->radeon.glCtx,
                        compiler.program,
-                       2, transformations);
+                       3, transformations);
 
                if (RADEON_DEBUG & DEBUG_PIXEL) {
                        _mesa_printf("Fragment Program: After transformations:\n");
index 30f513b..4786b45 100644 (file)
@@ -1423,40 +1423,11 @@ static void emit_arith(struct r300_pfs_compile_state *cs,
        return;
 }
 
-static GLfloat SinCosConsts[2][4] = {
-       {
-        1.273239545,           // 4/PI
-        -0.405284735,          // -4/(PI*PI)
-        3.141592654,           // PI
-        0.2225                 // weight
-        },
-       {
-        0.75,
-        0.0,
-        0.159154943,           // 1/(2*PI)
-        6.283185307            // 2*PI
-        }
-};
-
-static GLuint emit_sincosconsts(struct r300_pfs_compile_state *cs, int i)
-{
-       struct prog_src_register srcreg;
-       GLuint constant_swizzle;
-
-       srcreg.File = PROGRAM_CONSTANT;
-       srcreg.Index = _mesa_add_unnamed_constant(cs->compiler->program->Parameters,
-               SinCosConsts[i], 4, &constant_swizzle);
-       srcreg.Swizzle = constant_swizzle;
-
-       return emit_const4fv(cs, srcreg);
-}
-
 static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_instruction *fpi)
 {
        COMPILE_STATE;
-       GLuint src[3], dest, temp[2];
+       GLuint src[3], dest;
        int flags, mask = 0;
-       int const_sin[2];
 
        if (fpi->SaturateMode == SATURATE_ZERO_ONE)
                flags = PFS_FLAG_SAT;
@@ -1485,60 +1456,6 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst
                emit_arith(cs, PFS_OP_CMP, dest, mask,
                                src[2], src[1], src[0], flags);
                break;
-       case OPCODE_COS:
-               /*
-                       * cos using a parabola (see SIN):
-                       * cos(x):
-                       *   x = (x/(2*PI))+0.75
-                       *   x = frac(x)
-                       *   x = (x*2*PI)-PI
-                       *   result = sin(x)
-                       */
-               temp[0] = get_temp_reg(cs);
-               const_sin[0] = emit_sincosconsts(cs, 0);
-               const_sin[1] = emit_sincosconsts(cs, 1);
-               src[0] = t_scalar_src(cs, fpi->SrcReg[0]);
-
-               /* add 0.5*PI and do range reduction */
-
-               emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_X,
-                               swizzle(src[0], X, X, X, X),
-                               swizzle(const_sin[1], Z, Z, Z, Z),
-                               swizzle(const_sin[1], X, X, X, X), 0);
-
-               emit_arith(cs, PFS_OP_FRC, temp[0], WRITEMASK_X,
-                               swizzle(temp[0], X, X, X, X),
-                               undef, undef, 0);
-
-               emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(temp[0], X, X, X, X), swizzle(const_sin[1], W, W, W, W),       //2*PI
-                               negate(swizzle(const_sin[0], Z, Z, Z, Z)),      //-PI
-                               0);
-
-               /* SIN */
-
-               emit_arith(cs, PFS_OP_MAD, temp[0],
-                               WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0],
-                                                               Z, Z, Z,
-                                                               Z),
-                               const_sin[0], pfs_zero, 0);
-
-               emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_X,
-                               swizzle(temp[0], Y, Y, Y, Y),
-                               absolute(swizzle(temp[0], Z, Z, Z, Z)),
-                               swizzle(temp[0], X, X, X, X), 0);
-
-               emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_Y,
-                               swizzle(temp[0], X, X, X, X),
-                               absolute(swizzle(temp[0], X, X, X, X)),
-                               negate(swizzle(temp[0], X, X, X, X)), 0);
-
-               emit_arith(cs, PFS_OP_MAD, dest, mask,
-                               swizzle(temp[0], Y, Y, Y, Y),
-                               swizzle(const_sin[0], W, W, W, W),
-                               swizzle(temp[0], X, X, X, X), flags);
-
-               free_temp(cs, temp[0]);
-               break;
        case OPCODE_DP3:
                src[0] = t_src(cs, fpi->SrcReg[0]);
                src[1] = t_src(cs, fpi->SrcReg[1]);
@@ -1609,127 +1526,6 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst
                emit_arith(cs, PFS_OP_RSQ, dest, mask,
                                absolute(src[0]), pfs_zero, pfs_zero, flags);
                break;
-       case OPCODE_SCS:
-               /*
-                       * scs using a parabola :
-                       * scs(x):
-                       *   result.x = sin(-abs(x)+0.5*PI)  (cos)
-                       *   result.y = sin(x)               (sin)
-                       *
-                       */
-               temp[0] = get_temp_reg(cs);
-               temp[1] = get_temp_reg(cs);
-               const_sin[0] = emit_sincosconsts(cs, 0);
-               const_sin[1] = emit_sincosconsts(cs, 1);
-               src[0] = t_scalar_src(cs, fpi->SrcReg[0]);
-
-               /* x = -abs(x)+0.5*PI */
-               emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(const_sin[0], Z, Z, Z, Z),     //PI
-                               pfs_half,
-                               negate(abs
-                                       (swizzle(keep(src[0]), X, X, X, X))),
-                               0);
-
-               /* C*x (sin) */
-               emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_W,
-                               swizzle(const_sin[0], Y, Y, Y, Y),
-                               swizzle(keep(src[0]), X, X, X, X),
-                               pfs_zero, 0);
-
-               /* B*x, C*x (cos) */
-               emit_arith(cs, PFS_OP_MAD, temp[0],
-                               WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0],
-                                                               Z, Z, Z,
-                                                               Z),
-                               const_sin[0], pfs_zero, 0);
-
-               /* B*x (sin) */
-               emit_arith(cs, PFS_OP_MAD, temp[1], WRITEMASK_W,
-                               swizzle(const_sin[0], X, X, X, X),
-                               keep(src[0]), pfs_zero, 0);
-
-               /* y = B*x + C*x*abs(x) (sin) */
-               emit_arith(cs, PFS_OP_MAD, temp[1], WRITEMASK_Z,
-                               absolute(src[0]),
-                               swizzle(temp[0], W, W, W, W),
-                               swizzle(temp[1], W, W, W, W), 0);
-
-               /* y = B*x + C*x*abs(x) (cos) */
-               emit_arith(cs, PFS_OP_MAD, temp[1], WRITEMASK_W,
-                               swizzle(temp[0], Y, Y, Y, Y),
-                               absolute(swizzle(temp[0], Z, Z, Z, Z)),
-                               swizzle(temp[0], X, X, X, X), 0);
-
-               /* y*abs(y) - y (cos), y*abs(y) - y (sin) */
-               emit_arith(cs, PFS_OP_MAD, temp[0],
-                               WRITEMASK_X | WRITEMASK_Y, swizzle(temp[1],
-                                                               W, Z, Y,
-                                                               X),
-                               absolute(swizzle(temp[1], W, Z, Y, X)),
-                               negate(swizzle(temp[1], W, Z, Y, X)), 0);
-
-               /* dest.xy = mad(temp.xy, P, temp2.wz) */
-               emit_arith(cs, PFS_OP_MAD, dest,
-                               mask & (WRITEMASK_X | WRITEMASK_Y), temp[0],
-                               swizzle(const_sin[0], W, W, W, W),
-                               swizzle(temp[1], W, Z, Y, X), flags);
-
-               free_temp(cs, temp[0]);
-               free_temp(cs, temp[1]);
-               break;
-       case OPCODE_SIN:
-               /*
-                       *  using a parabola:
-                       * sin(x) = 4/pi * x + -4/(pi*pi) * x * abs(x)
-                       * extra precision is obtained by weighting against
-                       * itself squared.
-                       */
-
-               temp[0] = get_temp_reg(cs);
-               const_sin[0] = emit_sincosconsts(cs, 0);
-               const_sin[1] = emit_sincosconsts(cs, 1);
-               src[0] = t_scalar_src(cs, fpi->SrcReg[0]);
-
-               /* do range reduction */
-
-               emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_X,
-                               swizzle(keep(src[0]), X, X, X, X),
-                               swizzle(const_sin[1], Z, Z, Z, Z),
-                               pfs_half, 0);
-
-               emit_arith(cs, PFS_OP_FRC, temp[0], WRITEMASK_X,
-                               swizzle(temp[0], X, X, X, X),
-                               undef, undef, 0);
-
-               emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(temp[0], X, X, X, X), swizzle(const_sin[1], W, W, W, W),       //2*PI
-                               negate(swizzle(const_sin[0], Z, Z, Z, Z)),      //PI
-                               0);
-
-               /* SIN */
-
-               emit_arith(cs, PFS_OP_MAD, temp[0],
-                               WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0],
-                                                               Z, Z, Z,
-                                                               Z),
-                               const_sin[0], pfs_zero, 0);
-
-               emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_X,
-                               swizzle(temp[0], Y, Y, Y, Y),
-                               absolute(swizzle(temp[0], Z, Z, Z, Z)),
-                               swizzle(temp[0], X, X, X, X), 0);
-
-               emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_Y,
-                               swizzle(temp[0], X, X, X, X),
-                               absolute(swizzle(temp[0], X, X, X, X)),
-                               negate(swizzle(temp[0], X, X, X, X)), 0);
-
-               emit_arith(cs, PFS_OP_MAD, dest, mask,
-                               swizzle(temp[0], Y, Y, Y, Y),
-                               swizzle(const_sin[0], W, W, W, W),
-                               swizzle(temp[0], X, X, X, X), flags);
-
-               free_temp(cs, temp[0]);
-               break;
        case OPCODE_TEX:
                emit_tex(cs, fpi, R300_TEX_OP_LD);
                break;
index 4a40d3e..fa6a67f 100644 (file)
@@ -149,6 +149,14 @@ static struct prog_src_register srcregswz(int file, int index, int swz)
        return src;
 }
 
+static struct prog_src_register absolute(struct prog_src_register reg)
+{
+       struct prog_src_register newreg = reg;
+       newreg.Abs = 1;
+       newreg.NegateAbs = 0;
+       return newreg;
+}
+
 static struct prog_src_register negate(struct prog_src_register reg)
 {
        struct prog_src_register newreg = reg;
@@ -412,3 +420,139 @@ GLboolean radeonTransformALU(struct radeon_transform_context* t,
                return GL_FALSE;
        }
 }
+
+
+static void sincos_constants(struct radeon_transform_context* t, GLuint *constants)
+{
+       static const GLfloat SinCosConsts[2][4] = {
+               {
+                       1.273239545,            // 4/PI
+                       -0.405284735,           // -4/(PI*PI)
+                       3.141592654,            // PI
+                       0.2225                  // weight
+               },
+               {
+                       0.75,
+                       0.5,
+                       0.159154943,            // 1/(2*PI)
+                       6.283185307             // 2*PI
+               }
+       };
+       int i;
+
+       for(i = 0; i < 2; ++i) {
+               GLuint swz;
+               constants[i] = _mesa_add_unnamed_constant(t->Program->Parameters, SinCosConsts[i], 4, &swz);
+               ASSERT(swz == SWIZZLE_NOOP);
+       }
+}
+
+/**
+ * Approximate sin(x), where x is clamped to (-pi/2, pi/2).
+ *
+ * MUL tmp.xy, src, { 4/PI, -4/(PI^2) }
+ * MAD tmp.x, tmp.y, |src|, tmp.x
+ * MAD tmp.y, tmp.x, |tmp.x|, -tmp.x
+ * MAD dest, tmp.y, weight, tmp.x
+ */
+static void sin_approx(struct radeon_transform_context* t,
+       struct prog_dst_register dst, struct prog_src_register src, const GLuint* constants)
+{
+       GLuint tempreg = radeonFindFreeTemporary(t);
+
+       emit2(t->Program, OPCODE_MUL, dstregtmpmask(tempreg, WRITEMASK_XY),
+               swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+               srcreg(PROGRAM_CONSTANT, constants[0]));
+       emit3(t->Program, OPCODE_MAD, dstregtmpmask(tempreg, WRITEMASK_X),
+               swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
+               absolute(swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
+               swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
+       emit3(t->Program, OPCODE_MAD, dstregtmpmask(tempreg, WRITEMASK_Y),
+               swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+               absolute(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
+               negate(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)));
+       emit3(t->Program, OPCODE_MAD, dst,
+               swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
+               swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+               swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
+}
+
+/**
+ * Translate the trigonometric functions COS, SIN, and SCS
+ * using only the basic instructions
+ *  MOV, ADD, MUL, MAD, FRC
+ */
+GLboolean radeonTransformTrigSimple(struct radeon_transform_context* t,
+       struct prog_instruction* inst,
+       void* unused)
+{
+       if (inst->Opcode != OPCODE_COS &&
+           inst->Opcode != OPCODE_SIN &&
+           inst->Opcode != OPCODE_SCS)
+               return GL_FALSE;
+
+       GLuint constants[2];
+       GLuint tempreg = radeonFindFreeTemporary(t);
+
+       sincos_constants(t, constants);
+
+       if (inst->Opcode == OPCODE_COS) {
+               // MAD tmp.x, src, 1/(2*PI), 0.75
+               // FRC tmp.x, tmp.x
+               // MAD tmp.z, tmp.x, 2*PI, -PI
+               emit3(t->Program, OPCODE_MAD, dstregtmpmask(tempreg, WRITEMASK_W),
+                       swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+                       swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
+                       swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
+               emit1(t->Program, OPCODE_FRC, dstregtmpmask(tempreg, WRITEMASK_W),
+                       swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
+               emit3(t->Program, OPCODE_MAD, dstregtmpmask(tempreg, WRITEMASK_W),
+                       swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+                       swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+                       negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
+
+               sin_approx(t, inst->DstReg,
+                       swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+                       constants);
+       } else if (inst->Opcode == OPCODE_SIN) {
+               emit3(t->Program, OPCODE_MAD, dstregtmpmask(tempreg, WRITEMASK_W),
+                       swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+                       swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
+                       swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y));
+               emit1(t->Program, OPCODE_FRC, dstregtmpmask(tempreg, WRITEMASK_W),
+                       swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
+               emit3(t->Program, OPCODE_MAD, dstregtmpmask(tempreg, WRITEMASK_W),
+                       swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+                       swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+                       negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
+
+               sin_approx(t, inst->DstReg,
+                       swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+                       constants);
+       } else {
+               emit3(t->Program, OPCODE_MAD, dstregtmpmask(tempreg, WRITEMASK_XY),
+                       swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+                       swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
+                       swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W));
+               emit1(t->Program, OPCODE_FRC, dstregtmpmask(tempreg, WRITEMASK_XY),
+                       srcreg(PROGRAM_TEMPORARY, tempreg));
+               emit3(t->Program, OPCODE_MAD, dstregtmpmask(tempreg, WRITEMASK_XY),
+                       srcreg(PROGRAM_TEMPORARY, tempreg),
+                       swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+                       negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
+
+               struct prog_dst_register dst = inst->DstReg;
+
+               dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_X;
+               sin_approx(t, dst,
+                       swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+                       constants);
+
+               dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_Y;
+               sin_approx(t, dst,
+                       swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
+                       constants);
+       }
+
+       return GL_TRUE;
+}
index 858c5ed..3fe6153 100644 (file)
@@ -35,4 +35,9 @@ GLboolean radeonTransformALU(
        struct prog_instruction*,
        void*);
 
+GLboolean radeonTransformTrigSimple(
+       struct radeon_transform_context *t,
+       struct prog_instruction*,
+       void*);
+
 #endif /* __RADEON_PROGRAM_ALU_H_ */