r5xx: Add DDX and DDY instructions.
authorCorbin Simpson <MostAwesomeDude@gmail.com>
Sun, 17 Aug 2008 21:06:47 +0000 (14:06 -0700)
committerCorbin Simpson <MostAwesomeDude@gmail.com>
Mon, 18 Aug 2008 05:36:17 +0000 (22:36 -0700)
Signed-off-by: Corbin Simpson <MostAwesomeDude@gmail.com>
src/mesa/drivers/dri/r300/r500_fragprog.c
src/mesa/drivers/dri/r300/r500_fragprog_emit.c
src/mesa/drivers/dri/r300/radeon_nqssadce.c
src/mesa/drivers/dri/r300/radeon_program_alu.c
src/mesa/drivers/dri/r300/radeon_program_alu.h
src/mesa/drivers/dri/r300/radeon_program_pair.c

index a84ba13..75dae86 100644 (file)
@@ -317,6 +317,14 @@ static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg)
                        return GL_FALSE;
 
                return GL_TRUE;
+       } else if (opcode == OPCODE_DDX || opcode == OPCODE_DDY) {
+               /* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles;
+                * if it doesn't fit perfectly into a .xyzw case... */
+               if (reg.Swizzle == SWIZZLE_NOOP && !reg.Abs
+                               && !reg.NegateBase && !reg.NegateAbs)
+                       return GL_TRUE;
+
+               return GL_FALSE;
        } else {
                /* ALU instructions support almost everything */
                if (reg.Abs)
@@ -439,13 +447,14 @@ void r500TranslateFragmentShader(r300ContextPtr r300,
 
                insert_WPOS_trailer(&compiler);
 
-               struct radeon_program_transformation transformations[3] = {
+               struct radeon_program_transformation transformations[] = {
                        { &transform_TEX, &compiler },
                        { &radeonTransformALU, 0 },
+                       { &radeonTransformDeriv, 0 },
                        { &radeonTransformTrigScale, 0 }
                };
                radeonLocalTransform(r300->radeon.glCtx, compiler.program,
-                       3, transformations);
+                       4, transformations);
 
                if (RADEON_DEBUG & DEBUG_PIXEL) {
                        _mesa_printf("Compiler: after native rewrite:\n");
index b6f5247..4631235 100644 (file)
@@ -89,6 +89,8 @@ static GLuint translate_rgb_op(GLuint opcode)
 {
        switch(opcode) {
        case OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
+       case OPCODE_DDX: return R500_ALU_RGBA_OP_MDH;
+       case OPCODE_DDY: return R500_ALU_RGBA_OP_MDV;
        case OPCODE_DP3: return R500_ALU_RGBA_OP_DP3;
        case OPCODE_DP4: return R500_ALU_RGBA_OP_DP4;
        case OPCODE_FRC: return R500_ALU_RGBA_OP_FRC;
@@ -109,6 +111,8 @@ static GLuint translate_alpha_op(GLuint opcode)
        switch(opcode) {
        case OPCODE_CMP: return R500_ALPHA_OP_CMP;
        case OPCODE_COS: return R500_ALPHA_OP_COS;
+       case OPCODE_DDX: return R500_ALPHA_OP_MDH;
+       case OPCODE_DDY: return R500_ALPHA_OP_MDV;
        case OPCODE_DP3: return R500_ALPHA_OP_DP;
        case OPCODE_DP4: return R500_ALPHA_OP_DP;
        case OPCODE_EX2: return R500_ALPHA_OP_EX2;
index f10ba40..97ce016 100644 (file)
@@ -218,6 +218,8 @@ static void process_instruction(struct nqssadce_state* s)
         * might change the instruction stream under us, so we have
         * to be careful with the inst pointer. */
        switch (inst->Opcode) {
+       case OPCODE_DDX:
+       case OPCODE_DDY:
        case OPCODE_FRC:
        case OPCODE_MOV:
                inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
index e0a2bd0..1ef71e7 100644 (file)
@@ -629,3 +629,30 @@ GLboolean radeonTransformTrigScale(struct radeon_transform_context* t,
 
        return GL_TRUE;
 }
+
+/**
+ * Rewrite DDX/DDY instructions to properly work with r5xx shaders.
+ * The r5xx MDH/MDV instruction provides per-quad partial derivatives.
+ * It takes the form A*B+C. A and C are set by setting src0. B should be -1.
+ *
+ * @warning This explicitly changes the form of DDX and DDY!
+ */
+
+GLboolean radeonTransformDeriv(struct radeon_transform_context* t,
+       struct prog_instruction* inst,
+       void* unused)
+{
+       if (inst->Opcode != OPCODE_DDX && inst->Opcode != OPCODE_DDY)
+               return GL_FALSE;
+
+       struct prog_src_register B = inst->SrcReg[1];
+
+       B.Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE,
+                                               SWIZZLE_ONE, SWIZZLE_ONE);
+       B.NegateBase = NEGATE_XYZW;
+
+       emit2(t->Program, inst->Opcode, inst->SaturateMode, inst->DstReg,
+               inst->SrcReg[0], B);
+
+       return GL_TRUE;
+}
index ea9d5bb..b459581 100644 (file)
@@ -45,4 +45,9 @@ GLboolean radeonTransformTrigScale(
        struct prog_instruction*,
        void*);
 
+GLboolean radeonTransformDeriv(
+       struct radeon_transform_context *t,
+       struct prog_instruction*,
+       void*);
+
 #endif /* __RADEON_PROGRAM_ALU_H_ */
index 4307994..5ad50d2 100644 (file)
@@ -305,6 +305,8 @@ static void classify_instruction(struct pair_state *s,
        switch(inst->Opcode) {
        case OPCODE_ADD:
        case OPCODE_CMP:
+       case OPCODE_DDX:
+       case OPCODE_DDY:
        case OPCODE_FRC:
        case OPCODE_MAD:
        case OPCODE_MAX:
@@ -673,8 +675,6 @@ static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instructio
        return candidate;
 }
 
-
-
 /**
  * Fill the given ALU instruction's opcodes and source operands into the given pair,
  * if possible.
@@ -704,6 +704,14 @@ static GLboolean fill_instruction_into_pair(struct pair_state *s, struct radeon_
        int nargs = _mesa_num_inst_src_regs(inst->Opcode);
        int i;
 
+       /* Special case for DDX/DDY (MDH/MDV). */
+       if (inst->Opcode == OPCODE_DDX || inst->Opcode == OPCODE_DDY) {
+               if (pair->RGB.Src[0].Used || pair->Alpha.Src[0].Used)
+                       return GL_FALSE;
+               else
+                       nargs++;
+       }
+
        for(i = 0; i < nargs; ++i) {
                int source;
                if (pairinst->NeedRGB && !pairinst->IsTranscendent) {