freedreno/ir3: fixes for half reg in/out
authorRob Clark <robdclark@chromium.org>
Mon, 29 Apr 2019 20:12:31 +0000 (13:12 -0700)
committerRob Clark <robdclark@chromium.org>
Tue, 30 Apr 2019 17:39:24 +0000 (10:39 -0700)
Needs to update max_half_reg, or be remapped to full reg and update
max_reg accordingly, depending on generation..

Signed-off-by: Rob Clark <robdclark@chromium.org>
src/freedreno/ir3/ir3.c
src/freedreno/ir3/ir3_compiler_nir.c
src/freedreno/ir3/ir3_shader.c
src/freedreno/ir3/ir3_shader.h

index 55e03d8..97f4ae9 100644 (file)
@@ -104,28 +104,28 @@ static uint32_t reg(struct ir3_register *reg, struct ir3_info *info,
                if (reg->flags & IR3_REG_RELATIV) {
                        components = reg->size;
                        val.idummy10 = reg->array.offset;
-                       max = (reg->array.offset + repeat + components - 1) >> 2;
+                       max = (reg->array.offset + repeat + components - 1);
                } else {
                        components = util_last_bit(reg->wrmask);
                        val.comp = reg->num & 0x3;
                        val.num  = reg->num >> 2;
-                       max = (reg->num + repeat + components - 1) >> 2;
+                       max = (reg->num + repeat + components - 1);
                }
 
                if (reg->flags & IR3_REG_CONST) {
-                       info->max_const = MAX2(info->max_const, max);
+                       info->max_const = MAX2(info->max_const, max >> 2);
                } else if (val.num == 63) {
                        /* ignore writes to dummy register r63.x */
-               } else if (max < 48) {
+               } else if (max < regid(48, 0)) {
                        if (reg->flags & IR3_REG_HALF) {
                                if (info->gpu_id >= 600) {
                                        /* starting w/ a6xx, half regs conflict with full regs: */
-                                       info->max_reg = MAX2(info->max_reg, (max+1)/2);
+                                       info->max_reg = MAX2(info->max_reg, max >> 3);
                                } else {
-                                       info->max_half_reg = MAX2(info->max_half_reg, max);
+                                       info->max_half_reg = MAX2(info->max_half_reg, max >> 2);
                                }
                        } else {
-                               info->max_reg = MAX2(info->max_reg, max);
+                               info->max_reg = MAX2(info->max_reg, max >> 2);
                        }
                }
        }
index 4e139dc..3c813c7 100644 (file)
@@ -2954,6 +2954,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
                        struct ir3_instruction *instr = ir->outputs[(i*4) + j];
                        if (instr) {
                                so->outputs[i].regid = instr->regs[0]->num;
+                               so->outputs[i].half  = !!(instr->regs[0]->flags & IR3_REG_HALF);
                                break;
                        }
                }
@@ -2962,14 +2963,21 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
        /* Note that some or all channels of an input may be unused: */
        for (i = 0; i < so->inputs_count; i++) {
                unsigned j, reg = regid(63,0);
+               bool half = false;
                for (j = 0; j < 4; j++) {
                        struct ir3_instruction *in = inputs[(i*4) + j];
 
                        if (in && !(in->flags & IR3_INSTR_UNUSED)) {
                                reg = in->regs[0]->num - j;
+                               if (half) {
+                                       compile_assert(ctx, in->regs[0]->flags & IR3_REG_HALF);
+                               } else {
+                                       half = !!(in->regs[0]->flags & IR3_REG_HALF);
+                               }
                        }
                }
                so->inputs[i].regid = reg;
+               so->inputs[i].half  = half;
        }
 
        if (ctx->astc_srgb)
index 3f8e8ab..46eba2a 100644 (file)
@@ -63,7 +63,7 @@ delete_variant(struct ir3_shader_variant *v)
  * the reg off.
  */
 static void
-fixup_regfootprint(struct ir3_shader_variant *v)
+fixup_regfootprint(struct ir3_shader_variant *v, uint32_t gpu_id)
 {
        unsigned i;
 
@@ -83,14 +83,30 @@ fixup_regfootprint(struct ir3_shader_variant *v)
 
                if (v->inputs[i].compmask) {
                        unsigned n = util_last_bit(v->inputs[i].compmask) - 1;
-                       int32_t regid = (v->inputs[i].regid + n) >> 2;
-                       v->info.max_reg = MAX2(v->info.max_reg, regid);
+                       int32_t regid = v->inputs[i].regid + n;
+                       if (v->inputs[i].half) {
+                               if (gpu_id < 500) {
+                                       v->info.max_half_reg = MAX2(v->info.max_half_reg, regid >> 2);
+                               } else {
+                                       v->info.max_reg = MAX2(v->info.max_reg, regid >> 3);
+                               }
+                       } else {
+                               v->info.max_reg = MAX2(v->info.max_reg, regid >> 2);
+                       }
                }
        }
 
        for (i = 0; i < v->outputs_count; i++) {
-               int32_t regid = (v->outputs[i].regid + 3) >> 2;
-               v->info.max_reg = MAX2(v->info.max_reg, regid);
+               int32_t regid = v->outputs[i].regid + 3;
+               if (v->outputs[i].half) {
+                       if (gpu_id < 500) {
+                               v->info.max_half_reg = MAX2(v->info.max_half_reg, regid >> 2);
+                       } else {
+                               v->info.max_reg = MAX2(v->info.max_reg, regid >> 3);
+                       }
+               } else {
+                       v->info.max_reg = MAX2(v->info.max_reg, regid >> 2);
+               }
        }
 }
 
@@ -117,7 +133,7 @@ void * ir3_shader_assemble(struct ir3_shader_variant *v, uint32_t gpu_id)
         */
        v->constlen = MIN2(255, MAX2(v->constlen, v->info.max_const + 1));
 
-       fixup_regfootprint(v);
+       fixup_regfootprint(v, gpu_id);
 
        return bin;
 }
index 7f09ee5..4e8ab08 100644 (file)
@@ -390,6 +390,7 @@ struct ir3_shader_variant {
        struct {
                uint8_t slot;
                uint8_t regid;
+               bool    half : 1;
        } outputs[16 + 2];  /* +POSITION +PSIZE */
        bool writes_pos, writes_smask, writes_psize;
 
@@ -413,6 +414,7 @@ struct ir3_shader_variant {
                /* fragment shader specific: */
                bool    bary       : 1;   /* fetched varying (vs one loaded into reg) */
                bool    rasterflat : 1;   /* special handling for emit->rasterflat */
+               bool    half       : 1;
                enum glsl_interp_mode interpolate;
        } inputs[16 + 2];  /* +POSITION +FACE */