vc4: Add support for nir_op_uge, using the carry bit on QPU_A_SUB.
authorEric Anholt <eric@anholt.net>
Tue, 10 Nov 2015 23:37:47 +0000 (15:37 -0800)
committerEric Anholt <eric@anholt.net>
Wed, 18 Nov 2015 01:45:23 +0000 (17:45 -0800)
It looks like nir_lower_idiv is going to use it soon, so add support.
With Ilia's change, this fixes one case in fs-op-div-large-uint-uint (with
GL 3.0 forced on).

Cc: "11.0" <mesa-stable@lists.freedesktop.org>
src/gallium/drivers/vc4/vc4_opt_algebraic.c
src/gallium/drivers/vc4/vc4_program.c
src/gallium/drivers/vc4/vc4_qir.c
src/gallium/drivers/vc4/vc4_qir.h
src/gallium/drivers/vc4/vc4_qpu_emit.c

index f1bab81..07a9226 100644 (file)
@@ -144,6 +144,8 @@ qir_opt_algebraic(struct vc4_compile *c)
                 case QOP_SEL_X_Y_ZC:
                 case QOP_SEL_X_Y_NS:
                 case QOP_SEL_X_Y_NC:
+                case QOP_SEL_X_Y_CS:
+                case QOP_SEL_X_Y_CC:
                         if (is_zero(c, inst->src[1])) {
                                 /* Replace references to a 0 uniform value
                                  * with the SEL_X_0 equivalent.
index a48dad8..52317bd 100644 (file)
@@ -987,6 +987,10 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
                 qir_SF(c, qir_SUB(c, src[0], src[1]));
                 *dest = qir_SEL_X_0_NC(c, qir_uniform_ui(c, ~0));
                 break;
+        case nir_op_uge:
+                qir_SF(c, qir_SUB(c, src[0], src[1]));
+                *dest = qir_SEL_X_0_CC(c, qir_uniform_ui(c, ~0));
+                break;
         case nir_op_ilt:
                 qir_SF(c, qir_SUB(c, src[0], src[1]));
                 *dest = qir_SEL_X_0_NS(c, qir_uniform_ui(c, ~0));
index 7894b08..f2855e1 100644 (file)
@@ -69,10 +69,14 @@ static const struct qir_op_info qir_op_info[] = {
         [QOP_SEL_X_0_NC] = { "fsel_x_0_nc", 1, 1, false, true },
         [QOP_SEL_X_0_ZS] = { "fsel_x_0_zs", 1, 1, false, true },
         [QOP_SEL_X_0_ZC] = { "fsel_x_0_zc", 1, 1, false, true },
+        [QOP_SEL_X_0_CS] = { "fsel_x_0_cs", 1, 1, false, true },
+        [QOP_SEL_X_0_CC] = { "fsel_x_0_cc", 1, 1, false, true },
         [QOP_SEL_X_Y_NS] = { "fsel_x_y_ns", 1, 2, false, true },
         [QOP_SEL_X_Y_NC] = { "fsel_x_y_nc", 1, 2, false, true },
         [QOP_SEL_X_Y_ZS] = { "fsel_x_y_zs", 1, 2, false, true },
         [QOP_SEL_X_Y_ZC] = { "fsel_x_y_zc", 1, 2, false, true },
+        [QOP_SEL_X_Y_CS] = { "fsel_x_y_cs", 1, 2, false, true },
+        [QOP_SEL_X_Y_CC] = { "fsel_x_y_cc", 1, 2, false, true },
 
         [QOP_RCP] = { "rcp", 1, 1, false, true },
         [QOP_RSQ] = { "rsq", 1, 1, false, true },
@@ -218,10 +222,14 @@ qir_depends_on_flags(struct qinst *inst)
         case QOP_SEL_X_0_NC:
         case QOP_SEL_X_0_ZS:
         case QOP_SEL_X_0_ZC:
+        case QOP_SEL_X_0_CS:
+        case QOP_SEL_X_0_CC:
         case QOP_SEL_X_Y_NS:
         case QOP_SEL_X_Y_NC:
         case QOP_SEL_X_Y_ZS:
         case QOP_SEL_X_Y_ZC:
+        case QOP_SEL_X_Y_CS:
+        case QOP_SEL_X_Y_CC:
                 return true;
         default:
                 return false;
index a92ad93..ddb35e4 100644 (file)
@@ -99,11 +99,15 @@ enum qop {
         QOP_SEL_X_0_ZC,
         QOP_SEL_X_0_NS,
         QOP_SEL_X_0_NC,
+        QOP_SEL_X_0_CS,
+        QOP_SEL_X_0_CC,
         /* Selects the src[0] if the ns flag bit is set, otherwise src[1]. */
         QOP_SEL_X_Y_ZS,
         QOP_SEL_X_Y_ZC,
         QOP_SEL_X_Y_NS,
         QOP_SEL_X_Y_NC,
+        QOP_SEL_X_Y_CS,
+        QOP_SEL_X_Y_CC,
 
         QOP_FTOI,
         QOP_ITOF,
@@ -567,10 +571,14 @@ QIR_ALU1(SEL_X_0_ZS)
 QIR_ALU1(SEL_X_0_ZC)
 QIR_ALU1(SEL_X_0_NS)
 QIR_ALU1(SEL_X_0_NC)
+QIR_ALU1(SEL_X_0_CS)
+QIR_ALU1(SEL_X_0_CC)
 QIR_ALU2(SEL_X_Y_ZS)
 QIR_ALU2(SEL_X_Y_ZC)
 QIR_ALU2(SEL_X_Y_NS)
 QIR_ALU2(SEL_X_Y_NC)
+QIR_ALU2(SEL_X_Y_CS)
+QIR_ALU2(SEL_X_Y_CC)
 QIR_ALU2(FMIN)
 QIR_ALU2(FMAX)
 QIR_ALU2(FMINABS)
index 133e138..e0d3633 100644 (file)
@@ -311,6 +311,8 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
                 case QOP_SEL_X_0_ZC:
                 case QOP_SEL_X_0_NS:
                 case QOP_SEL_X_0_NC:
+                case QOP_SEL_X_0_CS:
+                case QOP_SEL_X_0_CC:
                         queue(c, qpu_a_MOV(dst, src[0]) | unpack);
                         set_last_cond_add(c, qinst->op - QOP_SEL_X_0_ZS +
                                           QPU_COND_ZS);
@@ -324,6 +326,8 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
                 case QOP_SEL_X_Y_ZC:
                 case QOP_SEL_X_Y_NS:
                 case QOP_SEL_X_Y_NC:
+                case QOP_SEL_X_Y_CS:
+                case QOP_SEL_X_Y_CC:
                         queue(c, qpu_a_MOV(dst, src[0]));
                         if (qinst->src[0].pack)
                                 *(last_inst(c)) |= unpack;