r600: implement LOG op in compiler
authorPierre Ossman <pierre@ossman.eu>
Sun, 1 Nov 2009 21:28:02 +0000 (22:28 +0100)
committerAlex Deucher <alexdeucher@gmail.com>
Mon, 2 Nov 2009 16:30:00 +0000 (11:30 -0500)
src/mesa/drivers/dri/r600/r700_assembler.c
src/mesa/drivers/dri/r600/r700_assembler.h

index fbf1f29..e0d7d4f 100644 (file)
@@ -3037,6 +3037,217 @@ GLboolean assemble_LRP(r700_AssemblerBase *pAsm)
     return GL_TRUE;
 }
 
+GLboolean assemble_LOG(r700_AssemblerBase *pAsm)
+{
+    BITS tmp1, tmp2, tmp3;
+
+    checkop1(pAsm);
+
+    tmp1 = gethelpr(pAsm);
+    tmp2 = gethelpr(pAsm);
+    tmp3 = gethelpr(pAsm);
+
+    // FIXME: The hardware can do fabs() directly on input
+    //        elements, but the compiler doesn't have the
+    //        capability to use that.
+
+    // MAX     tmp1.x,   a.x,    -a.x   (fabs(a.x))
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_MAX;  
+
+    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+    pAsm->D.dst.rtype  = DST_REG_TEMPORARY;
+    pAsm->D.dst.reg    = tmp1;
+    pAsm->D.dst.writex = 1;
+
+    if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+    {
+        return GL_FALSE;
+    }
+    pAsm->S[1].bits = pAsm->S[0].bits;
+    flipneg_PVSSRC(&(pAsm->S[1].src));
+
+    if ( GL_FALSE == next_ins(pAsm) ) 
+    {
+        return GL_FALSE;
+    }
+
+    // Entire algo:
+    //
+    // LG2     tmp2.x,   tmp1.x
+    // FLOOR   tmp3.x,   tmp2.x
+    // MOV     dst.x,    tmp3.x
+    // ADD     tmp3.x,   tmp2.x,    -tmp3.x
+    // EX2     dst.y,    tmp3.x
+    // MOV     dst.z,    tmp2.x
+    // MOV     dst.w,    1.0
+
+    // LG2     tmp2.x,   tmp1.x
+    // FLOOR   tmp3.x,   tmp2.x
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE;
+    pAsm->D.dst.math = 1;
+
+    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+    pAsm->D.dst.rtype  = DST_REG_TEMPORARY;
+    pAsm->D.dst.reg    = tmp2;
+    pAsm->D.dst.writex = 1;
+
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+    pAsm->S[0].src.reg   = tmp1;
+
+    setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+    noneg_PVSSRC(&(pAsm->S[0].src));
+
+    if( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
+
+    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+    pAsm->D.dst.rtype  = DST_REG_TEMPORARY;
+    pAsm->D.dst.reg    = tmp3;
+    pAsm->D.dst.writex = 1;
+
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+    pAsm->S[0].src.reg   = tmp2;
+
+    setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+    noneg_PVSSRC(&(pAsm->S[0].src));
+
+    if( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    // MOV     dst.x,    tmp3.x
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+    if( GL_FALSE == assemble_dst(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
+
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+    pAsm->S[0].src.reg   = tmp3;
+
+    setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+    noneg_PVSSRC(&(pAsm->S[0].src));
+
+    if( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    // ADD     tmp3.x,   tmp2.x,    -tmp3.x
+    // EX2     dst.y,    tmp3.x
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
+
+    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+    pAsm->D.dst.rtype  = DST_REG_TEMPORARY;
+    pAsm->D.dst.reg    = tmp3;
+    pAsm->D.dst.writex = 1;
+
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+    pAsm->S[0].src.reg   = tmp2;
+
+    setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+    noneg_PVSSRC(&(pAsm->S[0].src));
+
+    setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
+    pAsm->S[1].src.rtype = DST_REG_TEMPORARY;
+    pAsm->S[1].src.reg   = tmp3;
+
+    setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
+    neg_PVSSRC(&(pAsm->S[1].src));
+
+    if( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
+    pAsm->D.dst.math = 1;
+
+    if( GL_FALSE == assemble_dst(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    pAsm->D.dst.writex = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
+
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+    pAsm->S[0].src.reg   = tmp3;
+
+    setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+    noneg_PVSSRC(&(pAsm->S[0].src));
+
+    if( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    // MOV     dst.z,    tmp2.x
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+    if( GL_FALSE == assemble_dst(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writew = 0;
+
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+    pAsm->S[0].src.reg   = tmp2;
+
+    setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+    noneg_PVSSRC(&(pAsm->S[0].src));
+
+    if( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    // MOV     dst.w     1.0
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+    if( GL_FALSE == assemble_dst(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writez = 0;
+
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+    pAsm->S[0].src.reg   = tmp1;
+
+    setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_1);
+    noneg_PVSSRC(&(pAsm->S[0].src));
+
+    if( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    return GL_TRUE;
+}
+
 GLboolean assemble_MAD(struct r700_AssemblerBase *pAsm) 
 {
     int tmp, ii;
@@ -4166,10 +4377,9 @@ GLboolean AssembleInstr(GLuint uiNumberInsts,
                 return GL_FALSE;
             break;  
         case OPCODE_LOG: 
-            radeon_error("Not yet implemented instruction OPCODE_LOG \n");
-            //if ( GL_FALSE == assemble_BAD("LOG") ) 
+            if ( GL_FALSE == assemble_LOG(pR700AsmCode) ) 
                 return GL_FALSE;
-            break; // approx of LG2
+            break;
 
         case OPCODE_MAD: 
             if ( GL_FALSE == assemble_MAD(pR700AsmCode) ) 
index 317feb1..c66db50 100644 (file)
@@ -484,6 +484,7 @@ GLboolean assemble_FRC(r700_AssemblerBase *pAsm);
 GLboolean assemble_KIL(r700_AssemblerBase *pAsm);
 GLboolean assemble_LG2(r700_AssemblerBase *pAsm);
 GLboolean assemble_LRP(r700_AssemblerBase *pAsm);
+GLboolean assemble_LOG(r700_AssemblerBase *pAsm);
 GLboolean assemble_MAD(r700_AssemblerBase *pAsm);
 GLboolean assemble_LIT(r700_AssemblerBase *pAsm);
 GLboolean assemble_MAX(r700_AssemblerBase *pAsm);