src/gallium/drivers/nvc0/nvc0_pc_optimize.c

   1 /*
   2  * Copyright 2010 Christoph Bumiller
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
  18  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
  19  * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  20  * SOFTWARE.
  21  */
  22
  23 #include "nvc0_pc.h"
  24 #include "nvc0_program.h"
  25
  26 #define DESCEND_ARBITRARY(j, f)                                 \
  27 do {                                                            \
  28    b->pass_seq = ctx->pc->pass_seq;                             \
  29                                                                 \
  30    for (j = 0; j < 2; ++j)                                      \
  31       if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) \
  32          f(ctx, b->out[j]);                                       \
  33 } while (0)
  34
  35 static INLINE boolean
  36 registers_interfere(struct nv_value *a, struct nv_value *b)
  37 {
  38    if (a->reg.file != b->reg.file)
  39       return FALSE;
  40    if (NV_IS_MEMORY_FILE(a->reg.file) || NV_IS_MEMORY_FILE(b->reg.file))
  41       return FALSE;
  42
  43    assert(a->join->reg.id >= 0 && b->join->reg.id >= 0);
  44
  45    if (a->join->reg.id < b->join->reg.id) {
  46       return (a->join->reg.id + a->reg.size >= b->join->reg.id);
  47    } else
  48    if (a->join->reg.id > b->join->reg.id) {
  49       return (b->join->reg.id + b->reg.size >= a->join->reg.id);
  50    }
  51
  52    return FALSE;
  53 }
  54
  55 static INLINE boolean
  56 values_equal(struct nv_value *a, struct nv_value *b)
  57 {
  58    if (a->reg.file != b->reg.file || a->reg.size != b->reg.size)
  59       return FALSE;
  60    if (NV_IS_MEMORY_FILE(a->reg.file))
  61       return a->reg.address == b->reg.address;
  62    else
  63       return a->join->reg.id == b->join->reg.id;
  64 }
  65
  66 #if 0
  67 static INLINE boolean
  68 inst_commutation_check(struct nv_instruction *a, struct nv_instruction *b)
  69 {
  70    int si, di;
  71
  72    for (di = 0; di < 4 && a->def[di]; ++di)
  73       for (si = 0; si < 5 && b->src[si]; ++si)
  74          if (registers_interfere(a->def[di], b->src[si]->value))
  75             return FALSE;
  76
  77    return TRUE;
  78 }
  79
  80 /* Check whether we can swap the order of the instructions,
  81  * where a & b may be either the earlier or the later one.
  82  */
  83 static boolean
  84 inst_commutation_legal(struct nv_instruction *a, struct nv_instruction *b)
  85 {
  86    return inst_commutation_check(a, b) && inst_commutation_check(b, a);
  87 }
  88 #endif
  89
  90 static INLINE boolean
  91 inst_removable(struct nv_instruction *nvi)
  92 {
  93    if (nvi->opcode == NV_OP_ST)
  94       return FALSE;
  95    return (!(nvi->terminator ||
  96              nvi->join ||
  97              nvi->target ||
  98              nvi->fixed ||
  99              nvc0_insn_refcount(nvi)));
 100 }
 101
 102 /* Check if we do not actually have to emit this instruction. */
 103 static INLINE boolean
 104 inst_is_noop(struct nv_instruction *nvi)
 105 {
 106    if (nvi->opcode == NV_OP_UNDEF || nvi->opcode == NV_OP_BIND)
 107       return TRUE;
 108    if (nvi->terminator || nvi->join)
 109       return FALSE;
 110    if (nvi->def[0] && nvi->def[0]->join->reg.id < 0)
 111       return TRUE;
 112    if (nvi->opcode != NV_OP_MOV && nvi->opcode != NV_OP_SELECT)
 113       return FALSE;
 114    if (nvi->def[0]->reg.file != nvi->src[0]->value->reg.file)
 115       return FALSE;
 116
 117    if (nvi->src[0]->value->join->reg.id < 0) {
 118       NV50_DBGMSG(PROG_IR, "inst_is_noop: orphaned value detected\n");
 119       return TRUE;
 120    }
 121
 122    if (nvi->opcode == NV_OP_SELECT)
 123       if (!values_equal(nvi->def[0], nvi->src[1]->value))
 124          return FALSE;
 125    return values_equal(nvi->def[0], nvi->src[0]->value);
 126 }
 127
 128 struct nv_pass {
 129    struct nv_pc *pc;
 130    int n;
 131    void *priv;
 132 };
 133
 134 static int
 135 nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b);
 136
 137 static void
 138 nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b)
 139 {
 140    struct nv_pc *pc = (struct nv_pc *)priv;
 141    struct nv_basic_block *in;
 142    struct nv_instruction *nvi, *next;
 143    int j;
 144
 145    /* find first non-empty block emitted before b */
 146    for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->emit_size; --j);
 147
 148    for (; j >= 0; --j) {
 149       in = pc->bb_list[j];
 150
 151       /* check for no-op branches (BRA $PC+8) */
 152       if (in->exit && in->exit->opcode == NV_OP_BRA && in->exit->target == b) {
 153          in->emit_size -= 8;
 154          pc->emit_size -= 8;
 155
 156          for (++j; j < pc->num_blocks; ++j)
 157             pc->bb_list[j]->emit_pos -= 8;
 158
 159          nvc0_insn_delete(in->exit);
 160       }
 161       b->emit_pos = in->emit_pos + in->emit_size;
 162
 163       if (in->emit_size) /* no more no-op branches to b */
 164          break;
 165    }
 166
 167    pc->bb_list[pc->num_blocks++] = b;
 168
 169    /* visit node */
 170
 171    for (nvi = b->entry; nvi; nvi = next) {
 172       next = nvi->next;
 173       if (inst_is_noop(nvi) ||
 174           (pc->is_fragprog && nvi->opcode == NV_OP_EXPORT)) {
 175          nvc0_insn_delete(nvi);
 176       } else
 177          b->emit_size += 8;
 178    }
 179    pc->emit_size += b->emit_size;
 180
 181 #if NV50_DEBUG & NV50_DEBUG_PROG_IR
 182    if (!b->entry)
 183       debug_printf("BB:%i is now empty\n", b->id);
 184    else
 185       debug_printf("BB:%i size = %u\n", b->id, b->emit_size);
 186 #endif
 187 }
 188
 189 static int
 190 nv_pc_pass2(struct nv_pc *pc, struct nv_basic_block *root)
 191 {
 192    struct nv_pass pass;
 193
 194    pass.pc = pc;
 195
 196    pc->pass_seq++;
 197    nv_pass_flatten(&pass, root);
 198
 199    nvc0_pc_pass_in_order(root, nv_pc_pass_pre_emission, pc);
 200
 201    return 0;
 202 }
 203
 204 int
 205 nvc0_pc_exec_pass2(struct nv_pc *pc)
 206 {
 207    int i, ret;
 208
 209    NV50_DBGMSG(PROG_IR, "preparing %u blocks for emission\n", pc->num_blocks);
 210
 211    pc->num_blocks = 0; /* will reorder bb_list */
 212
 213    for (i = 0; i < pc->num_subroutines + 1; ++i)
 214       if (pc->root[i] && (ret = nv_pc_pass2(pc, pc->root[i])))
 215          return ret;
 216    return 0;
 217 }
 218
 219 static INLINE boolean
 220 is_cspace_load(struct nv_instruction *nvi)
 221 {
 222    if (!nvi)
 223       return FALSE;
 224    assert(nvi->indirect != 0);
 225    return (nvi->opcode == NV_OP_LD &&
 226            nvi->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
 227            nvi->src[0]->value->reg.file <= NV_FILE_MEM_C(15));
 228 }
 229
 230 static INLINE boolean
 231 is_immd32_load(struct nv_instruction *nvi)
 232 {
 233    if (!nvi)
 234       return FALSE;
 235    return (nvi->opcode == NV_OP_MOV &&
 236            nvi->src[0]->value->reg.file == NV_FILE_IMM &&
 237            nvi->src[0]->value->reg.size == 4);
 238 }
 239
 240 static INLINE void
 241 check_swap_src_0_1(struct nv_instruction *nvi)
 242 {
 243    struct nv_ref *src0 = nvi->src[0];
 244    struct nv_ref *src1 = nvi->src[1];
 245
 246    if (!nv_op_commutative(nvi->opcode) &&
 247        NV_BASEOP(nvi->opcode) != NV_OP_SET &&
 248        NV_BASEOP(nvi->opcode) != NV_OP_SLCT)
 249       return;
 250    assert(src0 && src1 && src0->value && src1->value);
 251
 252    if (src1->value->reg.file != NV_FILE_GPR)
 253       return;
 254
 255    if (is_cspace_load(src0->value->insn)) {
 256       if (!is_cspace_load(src1->value->insn)) {
 257          nvi->src[0] = src1;
 258          nvi->src[1] = src0;
 259       }
 260    } else
 261    if (is_immd32_load(src0->value->insn)) {
 262       if (!is_cspace_load(src1->value->insn) &&
 263           !is_immd32_load(src1->value->insn)) {
 264          nvi->src[0] = src1;
 265          nvi->src[1] = src0;
 266       }
 267    }
 268
 269    if (nvi->src[0] != src0) {
 270       if (NV_BASEOP(nvi->opcode) == NV_OP_SET)
 271          nvi->set_cond = nvc0_ir_reverse_cc(nvi->set_cond);
 272       else
 273       if (NV_BASEOP(nvi->opcode) == NV_OP_SLCT)
 274          nvi->set_cond = NV_CC_INVERSE(nvi->set_cond);
 275    }
 276 }
 277
 278 static void
 279 nvi_set_indirect_load(struct nv_pc *pc,
 280                       struct nv_instruction *nvi, struct nv_value *val)
 281 {
 282    for (nvi->indirect = 0; nvi->indirect < 6 && nvi->src[nvi->indirect];
 283         ++nvi->indirect);
 284    assert(nvi->indirect < 6);
 285    nv_reference(pc, nvi, nvi->indirect, val);
 286 }
 287
 288 static int
 289 nvc0_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b)
 290 {
 291    struct nv_instruction *nvi, *ld;
 292    int s;
 293
 294    for (nvi = b->entry; nvi; nvi = nvi->next) {
 295       check_swap_src_0_1(nvi);
 296
 297       for (s = 0; s < 3 && nvi->src[s]; ++s) {
 298          ld = nvi->src[s]->value->insn;
 299          if (!ld || (ld->opcode != NV_OP_LD && ld->opcode != NV_OP_MOV))
 300             continue;
 301          if (!nvc0_insn_can_load(nvi, s, ld))
 302             continue;
 303
 304          /* fold it ! */
 305          nv_reference(ctx->pc, nvi, s, ld->src[0]->value);
 306          if (ld->indirect >= 0)
 307             nvi_set_indirect_load(ctx->pc, nvi, ld->src[ld->indirect]->value);
 308
 309          if (!nvc0_insn_refcount(ld))
 310             nvc0_insn_delete(ld);
 311       }
 312    }
 313    DESCEND_ARBITRARY(s, nvc0_pass_fold_loads);
 314
 315    return 0;
 316 }
 317
 318 /* NOTE: Assumes loads have not yet been folded. */
 319 static int
 320 nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b)
 321 {
 322    struct nv_instruction *nvi, *mi, *next;
 323    int j;
 324    uint8_t mod;
 325
 326    for (nvi = b->entry; nvi; nvi = next) {
 327       next = nvi->next;
 328       if (nvi->opcode == NV_OP_SUB) {
 329          nvi->src[1]->mod ^= NV_MOD_NEG;
 330          nvi->opcode = NV_OP_ADD;
 331       }
 332
 333       for (j = 0; j < 3 && nvi->src[j]; ++j) {
 334          mi = nvi->src[j]->value->insn;
 335          if (!mi)
 336             continue;
 337          if (mi->def[0]->refc > 1 || mi->predicate >= 0)
 338             continue;
 339
 340          if (NV_BASEOP(mi->opcode) == NV_OP_NEG) mod = NV_MOD_NEG;
 341          else
 342          if (NV_BASEOP(mi->opcode) == NV_OP_ABS) mod = NV_MOD_ABS;
 343          else
 344             continue;
 345          assert(!(mod & mi->src[0]->mod & NV_MOD_NEG));
 346
 347          mod |= mi->src[0]->mod;
 348
 349          if ((nvi->opcode == NV_OP_ABS) || (nvi->src[j]->mod & NV_MOD_ABS)) {
 350             /* abs neg [abs] = abs */
 351             mod &= ~(NV_MOD_NEG | NV_MOD_ABS);
 352          } else
 353          if ((nvi->opcode == NV_OP_NEG) && (mod & NV_MOD_NEG)) {
 354             /* neg as opcode and modifier on same insn cannot occur */
 355             /* neg neg abs = abs, neg neg = identity */
 356             assert(j == 0);
 357             if (mod & NV_MOD_ABS)
 358                nvi->opcode = NV_OP_ABS;
 359             else
 360                nvi->opcode = NV_OP_MOV;
 361             mod = 0;
 362          }
 363
 364          if ((nv_op_supported_src_mods(nvi->opcode, j) & mod) != mod)
 365             continue;
 366
 367          nv_reference(ctx->pc, nvi, j, mi->src[0]->value);
 368
 369          nvi->src[j]->mod ^= mod;
 370       }
 371
 372       if (nvi->opcode == NV_OP_SAT) {
 373          mi = nvi->src[0]->value->insn;
 374
 375          if (mi->def[0]->refc > 1 ||
 376              (mi->opcode != NV_OP_ADD &&
 377               mi->opcode != NV_OP_MUL &&
 378               mi->opcode != NV_OP_MAD))
 379             continue;
 380          mi->saturate = 1;
 381          mi->def[0] = nvi->def[0];
 382          mi->def[0]->insn = mi;
 383          nvc0_insn_delete(nvi);
 384       }
 385    }
 386    DESCEND_ARBITRARY(j, nv_pass_lower_mods);
 387
 388    return 0;
 389 }
 390
 391 #define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL)
 392
 393 static void
 394 apply_modifiers(uint32_t *val, uint8_t type, uint8_t mod)
 395 {
 396    if (mod & NV_MOD_ABS) {
 397       if (type == NV_TYPE_F32)
 398          *val &= 0x7fffffff;
 399       else
 400       if ((*val) & (1 << 31))
 401          *val = ~(*val) + 1;
 402    }
 403    if (mod & NV_MOD_NEG) {
 404       if (type == NV_TYPE_F32)
 405          *val ^= 0x80000000;
 406       else
 407          *val = ~(*val) + 1;
 408    }
 409    if (mod & NV_MOD_SAT) {
 410       union {
 411          float f;
 412          uint32_t u;
 413          int32_t i;
 414       } u;
 415       u.u = *val;
 416       if (type == NV_TYPE_F32) {
 417          u.f = CLAMP(u.f, -1.0f, 1.0f);
 418       } else
 419       if (type == NV_TYPE_U16) {
 420          u.u = MIN2(u.u, 0xffff);
 421       } else
 422       if (type == NV_TYPE_S16) {
 423          u.i = CLAMP(u.i, -32768, 32767);
 424       }
 425       *val = u.u;
 426    }
 427    if (mod & NV_MOD_NOT)
 428       *val = ~*val;
 429 }
 430
 431 static void
 432 constant_expression(struct nv_pc *pc, struct nv_instruction *nvi,
 433                     struct nv_value *src0, struct nv_value *src1)
 434 {
 435    struct nv_value *val;
 436    union {
 437       float f32;
 438       uint32_t u32;
 439       int32_t s32;
 440    } u0, u1, u;
 441    ubyte type;
 442
 443    if (!nvi->def[0])
 444       return;
 445    type = NV_OPTYPE(nvi->opcode);
 446
 447    u.u32 = 0;
 448    u0.u32 = src0->reg.imm.u32;
 449    u1.u32 = src1->reg.imm.u32;
 450
 451    apply_modifiers(&u0.u32, type, nvi->src[0]->mod);
 452    apply_modifiers(&u1.u32, type, nvi->src[1]->mod);
 453
 454    switch (nvi->opcode) {
 455    case NV_OP_MAD_F32:
 456       if (nvi->src[2]->value->reg.file != NV_FILE_GPR)
 457          return;
 458       /* fall through */
 459    case NV_OP_MUL_F32:
 460       u.f32 = u0.f32 * u1.f32;
 461       break;
 462    case NV_OP_MUL_B32:
 463       u.u32 = u0.u32 * u1.u32;
 464       break;
 465    case NV_OP_ADD_F32:
 466       u.f32 = u0.f32 + u1.f32;
 467       break;
 468    case NV_OP_ADD_B32:
 469       u.u32 = u0.u32 + u1.u32;
 470       break;
 471    case NV_OP_SUB_F32:
 472       u.f32 = u0.f32 - u1.f32;
 473       break;
 474       /*
 475    case NV_OP_SUB_B32:
 476       u.u32 = u0.u32 - u1.u32;
 477       break;
 478       */
 479    default:
 480       return;
 481    }
 482
 483    val = new_value(pc, NV_FILE_IMM, nv_type_sizeof(type));
 484    val->reg.imm.u32 = u.u32;
 485
 486    nv_reference(pc, nvi, 1, NULL);
 487    nv_reference(pc, nvi, 0, val);
 488
 489    if (nvi->opcode == NV_OP_MAD_F32) {
 490       nvi->src[1] = nvi->src[0];
 491       nvi->src[0] = nvi->src[2];
 492       nvi->src[2] = NULL;
 493       nvi->opcode = NV_OP_ADD_F32;
 494
 495       if (val->reg.imm.u32 == 0) {
 496          nvi->src[1] = NULL;
 497          nvi->opcode = NV_OP_MOV;
 498       }
 499    } else {
 500       nvi->opcode = NV_OP_MOV;
 501    }
 502 }
 503
 504 static void
 505 constant_operand(struct nv_pc *pc,
 506                  struct nv_instruction *nvi, struct nv_value *val, int s)
 507 {
 508    union {
 509       float f32;
 510       uint32_t u32;
 511       int32_t s32;
 512    } u;
 513    int shift;
 514    int t = s ? 0 : 1;
 515    uint op;
 516    ubyte type;
 517
 518    if (!nvi->def[0])
 519       return;
 520    type = NV_OPTYPE(nvi->opcode);
 521
 522    u.u32 = val->reg.imm.u32;
 523    apply_modifiers(&u.u32, type, nvi->src[s]->mod);
 524
 525    if (u.u32 == 0 && NV_BASEOP(nvi->opcode) == NV_OP_MUL) {
 526       nvi->opcode = NV_OP_MOV;
 527       nv_reference(pc, nvi, t, NULL);
 528       if (s) {
 529          nvi->src[0] = nvi->src[1];
 530          nvi->src[1] = NULL;
 531       }
 532       return;
 533    }
 534
 535    switch (nvi->opcode) {
 536    case NV_OP_MUL_F32:
 537       if (u.f32 == 1.0f || u.f32 == -1.0f) {
 538          if (u.f32 == -1.0f)
 539             nvi->src[t]->mod ^= NV_MOD_NEG;
 540          switch (nvi->src[t]->mod) {
 541          case 0: op = nvi->saturate ? NV_OP_SAT : NV_OP_MOV; break;
 542          case NV_MOD_NEG: op = NV_OP_NEG_F32; break;
 543          case NV_MOD_ABS: op = NV_OP_ABS_F32; break;
 544          default:
 545             return;
 546          }
 547          nvi->opcode = op;
 548          nv_reference(pc, nvi, 0, nvi->src[t]->value);
 549          nv_reference(pc, nvi, 1, NULL);
 550          nvi->src[0]->mod = 0;
 551       } else
 552       if (u.f32 == 2.0f || u.f32 == -2.0f) {
 553          if (u.f32 == -2.0f)
 554             nvi->src[t]->mod ^= NV_MOD_NEG;
 555          nvi->opcode = NV_OP_ADD_F32;
 556          nv_reference(pc, nvi, s, nvi->src[t]->value);
 557          nvi->src[s]->mod = nvi->src[t]->mod;
 558       }
 559       break;
 560    case NV_OP_ADD_F32:
 561       if (u.u32 == 0) {
 562          switch (nvi->src[t]->mod) {
 563          case 0: op = nvi->saturate ? NV_OP_SAT : NV_OP_MOV; break;
 564          case NV_MOD_NEG: op = NV_OP_NEG_F32; break;
 565          case NV_MOD_ABS: op = NV_OP_ABS_F32; break;
 566          case NV_MOD_NEG | NV_MOD_ABS:
 567             op = NV_OP_CVT;
 568             nvi->ext.cvt.s = nvi->ext.cvt.d = type;
 569             break;
 570          default:
 571             return;
 572          }
 573          nvi->opcode = op;
 574          nv_reference(pc, nvi, 0, nvi->src[t]->value);
 575          nv_reference(pc, nvi, 1, NULL);
 576          if (nvi->opcode != NV_OP_CVT)
 577             nvi->src[0]->mod = 0;
 578       }
 579       break;
 580    case NV_OP_ADD_B32:
 581       if (u.u32 == 0) {
 582          assert(nvi->src[t]->mod == 0);
 583          nvi->opcode = nvi->saturate ? NV_OP_CVT : NV_OP_MOV;
 584          nvi->ext.cvt.s = nvi->ext.cvt.d = type;
 585          nv_reference(pc, nvi, 0, nvi->src[t]->value);
 586          nv_reference(pc, nvi, 1, NULL);
 587       }
 588       break;
 589    case NV_OP_MUL_B32:
 590       /* multiplication by 0 already handled above */
 591       assert(nvi->src[s]->mod == 0);
 592       shift = ffs(u.s32) - 1;
 593       if (shift == 0) {
 594          nvi->opcode = NV_OP_MOV;
 595          nv_reference(pc, nvi, 0, nvi->src[t]->value);
 596          nv_reference(pc, nvi, 1, NULL);
 597       } else
 598       if (u.s32 > 0 && u.s32 == (1 << shift)) {
 599          nvi->opcode = NV_OP_SHL;
 600          (val = new_value(pc, NV_FILE_IMM, 4))->reg.imm.s32 = shift;
 601          nv_reference(pc, nvi, 0, nvi->src[t]->value);
 602          nv_reference(pc, nvi, 1, val);
 603          break;
 604       }
 605       break;
 606    case NV_OP_RCP:
 607       u.f32 = 1.0f / u.f32;
 608       (val = new_value(pc, NV_FILE_IMM, 4))->reg.imm.f32 = u.f32;
 609       nvi->opcode = NV_OP_MOV;
 610       assert(s == 0);
 611       nv_reference(pc, nvi, 0, val);
 612       break;
 613    case NV_OP_RSQ:
 614       u.f32 = 1.0f / sqrtf(u.f32);
 615       (val = new_value(pc, NV_FILE_IMM, 4))->reg.imm.f32 = u.f32;
 616       nvi->opcode = NV_OP_MOV;
 617       assert(s == 0);
 618       nv_reference(pc, nvi, 0, val);
 619       break;
 620    default:
 621       break;
 622    }
 623 }
 624
 625 static void
 626 handle_min_max(struct nv_pass *ctx, struct nv_instruction *nvi)
 627 {
 628    struct nv_value *src0 = nvi->src[0]->value;
 629    struct nv_value *src1 = nvi->src[1]->value;
 630
 631    if (src0 != src1 || (nvi->src[0]->mod | nvi->src[1]->mod))
 632       return;
 633    if (src0->reg.file != NV_FILE_GPR)
 634       return;
 635    nvc0_pc_replace_value(ctx->pc, nvi->def[0], src0);
 636    nvc0_insn_delete(nvi);
 637 }
 638
 639 /* check if we can MUL + ADD -> MAD/FMA */
 640 static void
 641 handle_add_mul(struct nv_pass *ctx, struct nv_instruction *nvi)
 642 {
 643    struct nv_value *src0 = nvi->src[0]->value;
 644    struct nv_value *src1 = nvi->src[1]->value;
 645    struct nv_value *src;
 646    int s;
 647    uint8_t mod[4];
 648
 649    if (SRC_IS_MUL(src0) && src0->refc == 1) s = 0;
 650    else
 651    if (SRC_IS_MUL(src1) && src1->refc == 1) s = 1;
 652    else
 653       return;
 654
 655    if ((src0->insn && src0->insn->bb != nvi->bb) ||
 656        (src1->insn && src1->insn->bb != nvi->bb))
 657       return;
 658
 659    /* check for immediates from prior constant folding */
 660    if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR)
 661       return;
 662    src = nvi->src[s]->value;
 663
 664    mod[0] = nvi->src[0]->mod;
 665    mod[1] = nvi->src[1]->mod;
 666    mod[2] = src->insn->src[0]->mod;
 667    mod[3] = src->insn->src[1]->mod;
 668
 669    if ((mod[0] | mod[1] | mod[2] | mod[3]) & ~NV_MOD_NEG)
 670       return;
 671
 672    nvi->opcode = NV_OP_MAD_F32;
 673
 674    nv_reference(ctx->pc, nvi, s, NULL);
 675    nvi->src[2] = nvi->src[!s];
 676    nvi->src[!s] = NULL;
 677
 678    nv_reference(ctx->pc, nvi, 0, src->insn->src[0]->value);
 679    nvi->src[0]->mod = mod[2] ^ mod[s];
 680    nv_reference(ctx->pc, nvi, 1, src->insn->src[1]->value);
 681    nvi->src[1]->mod = mod[3];
 682 }
 683
 684 static int
 685 nv_pass_algebraic_opt(struct nv_pass *ctx, struct nv_basic_block *b)
 686 {
 687    struct nv_instruction *nvi, *next;
 688    int j;
 689
 690    for (nvi = b->entry; nvi; nvi = next) {
 691       struct nv_value *src0, *src1;
 692       uint baseop = NV_BASEOP(nvi->opcode);
 693
 694       next = nvi->next;
 695
 696       src0 = nvc0_pc_find_immediate(nvi->src[0]);
 697       src1 = nvc0_pc_find_immediate(nvi->src[1]);
 698
 699       if (src0 && src1) {
 700          constant_expression(ctx->pc, nvi, src0, src1);
 701       } else {
 702          if (src0)
 703             constant_operand(ctx->pc, nvi, src0, 0);
 704          else
 705          if (src1)
 706             constant_operand(ctx->pc, nvi, src1, 1);
 707       }
 708
 709       if (baseop == NV_OP_MIN || baseop == NV_OP_MAX)
 710          handle_min_max(ctx, nvi);
 711       else
 712       if (nvi->opcode == NV_OP_ADD_F32)
 713          handle_add_mul(ctx, nvi);
 714    }
 715    DESCEND_ARBITRARY(j, nv_pass_algebraic_opt);
 716
 717    return 0;
 718 }
 719
 720 /* TODO: redundant store elimination */
 721
 722 struct mem_record {
 723    struct mem_record *next;
 724    struct nv_instruction *insn;
 725    uint32_t ofst;
 726    uint32_t base;
 727    uint32_t size;
 728 };
 729
 730 #define MEM_RECORD_POOL_SIZE 1024
 731
 732 struct pass_reld_elim {
 733    struct nv_pc *pc;
 734
 735    struct mem_record *imm;
 736    struct mem_record *mem_v;
 737    struct mem_record *mem_a;
 738    struct mem_record *mem_c[16];
 739    struct mem_record *mem_l;
 740
 741    struct mem_record pool[MEM_RECORD_POOL_SIZE];
 742    int alloc;
 743 };
 744
 745 /* Extend the load operation in @rec to also cover the data loaded by @ld.
 746  * The two loads may not overlap but reference adjacent memory locations.
 747  */
 748 static void
 749 combine_load(struct nv_pc *pc, struct mem_record *rec,
 750              struct nv_instruction *ld)
 751 {
 752    struct nv_instruction *fv = rec->insn;
 753    struct nv_value *mem = ld->src[0]->value;
 754    uint32_t size = rec->size + mem->reg.size;
 755    int j;
 756    int d = rec->size / 4;
 757
 758    assert(rec->size < 16);
 759    if (rec->ofst > mem->reg.address) {
 760       if ((size == 8 && mem->reg.address & 3) ||
 761           (size > 8 && mem->reg.address & 7))
 762          return;
 763       rec->ofst = mem->reg.address;
 764       for (j = 0; j < d; ++j)
 765          fv->def[mem->reg.size / 4 + j] = fv->def[j];
 766       d = 0;
 767    } else
 768    if ((size == 8 && rec->ofst & 3) ||
 769        (size > 8 && rec->ofst & 7)) {
 770       return;
 771    }
 772
 773    for (j = 0; j < mem->reg.size / 4; ++j) {
 774       fv->def[d] = ld->def[j];
 775       fv->def[d++]->insn = fv;
 776    }
 777
 778    if (fv->src[0]->value->refc > 1)
 779       nv_reference(pc, fv, 0, new_value_like(pc, fv->src[0]->value));
 780    fv->src[0]->value->reg.address = rec->ofst;
 781    fv->src[0]->value->reg.size = rec->size = size;
 782
 783    nvc0_insn_delete(ld);
 784 }
 785
 786 static void
 787 combine_export(struct mem_record *rec, struct nv_instruction *ex)
 788 {
 789
 790 }
 791
 792 static INLINE void
 793 add_mem_record(struct pass_reld_elim *ctx, struct mem_record **rec,
 794                uint32_t base, uint32_t ofst, struct nv_instruction *nvi)
 795 {
 796    struct mem_record *it = &ctx->pool[ctx->alloc++];
 797
 798    it->next = *rec;
 799    *rec = it;
 800    it->base = base;
 801    it->ofst = ofst;
 802    it->insn = nvi;
 803    it->size = nvi->src[0]->value->reg.size;
 804 }
 805
 806 /* vectorize and reuse loads from memory or of immediates */
 807 static int
 808 nv_pass_mem_opt(struct pass_reld_elim *ctx, struct nv_basic_block *b)
 809 {
 810    struct mem_record **rec, *it;
 811    struct nv_instruction *ld, *next;
 812    struct nv_value *mem;
 813    uint32_t base, ofst;
 814    int s;
 815
 816    for (ld = b->entry; ld; ld = next) {
 817       next = ld->next;
 818
 819       if (is_cspace_load(ld)) {
 820          mem = ld->src[0]->value;
 821          rec = &ctx->mem_c[ld->src[0]->value->reg.file - NV_FILE_MEM_C(0)];
 822       } else
 823       if (ld->opcode == NV_OP_VFETCH) {
 824          mem = ld->src[0]->value;
 825          rec = &ctx->mem_a;
 826       } else
 827       if (ld->opcode == NV_OP_EXPORT) {
 828          mem = ld->src[0]->value;
 829          if (mem->reg.file != NV_FILE_MEM_V)
 830             continue;
 831          rec = &ctx->mem_v;
 832       } else {
 833          continue;
 834       }
 835       if (ld->def[0] && ld->def[0]->refc == 0)
 836          continue;
 837       ofst = mem->reg.address;
 838       base = (ld->indirect >= 0) ? ld->src[ld->indirect]->value->n : 0;
 839
 840       for (it = *rec; it; it = it->next) {
 841          if (it->base == base &&
 842              ((it->ofst >> 4) == (ofst >> 4)) &&
 843              ((it->ofst + it->size == ofst) ||
 844               (it->ofst - mem->reg.size == ofst))) {
 845             /* only NV_OP_VFETCH can load exactly 12 bytes */
 846             if (ld->opcode == NV_OP_LD && it->size + mem->reg.size == 12)
 847                continue;
 848             if (it->ofst < ofst) {
 849                if ((it->ofst & 0xf) == 4)
 850                   continue;
 851             } else
 852             if ((ofst & 0xf) == 4)
 853                continue;
 854             break;
 855          }
 856       }
 857       if (it) {
 858          switch (ld->opcode) {
 859          case NV_OP_EXPORT: combine_export(it, ld); break;
 860          default:
 861             combine_load(ctx->pc, it, ld);
 862             break;
 863          }
 864       } else
 865       if (ctx->alloc < MEM_RECORD_POOL_SIZE) {
 866          add_mem_record(ctx, rec, base, ofst, ld);
 867       }
 868    }
 869
 870    ctx->alloc = 0;
 871    ctx->mem_a = ctx->mem_v = ctx->mem_l = NULL;
 872    for (s = 0; s < 16; ++s)
 873       ctx->mem_c[s] = NULL;
 874
 875    DESCEND_ARBITRARY(s, nv_pass_mem_opt);
 876    return 0;
 877 }
 878
 879 static void
 880 eliminate_store(struct mem_record *rec, struct nv_instruction *st)
 881 {
 882 }
 883
 884 /* elimination of redundant stores */
 885 static int
 886 pass_store_elim(struct pass_reld_elim *ctx, struct nv_basic_block *b)
 887 {
 888    struct mem_record **rec, *it;
 889    struct nv_instruction *st, *next;
 890    struct nv_value *mem;
 891    uint32_t base, ofst, size;
 892    int s;
 893
 894    for (st = b->entry; st; st = next) {
 895       next = st->next;
 896
 897       if (st->opcode == NV_OP_ST) {
 898          mem = st->src[0]->value;
 899          rec = &ctx->mem_l;
 900       } else
 901       if (st->opcode == NV_OP_EXPORT) {
 902          mem = st->src[0]->value;
 903          if (mem->reg.file != NV_FILE_MEM_V)
 904             continue;
 905          rec = &ctx->mem_v;
 906       } else
 907       if (st->opcode == NV_OP_ST) {
 908          /* TODO: purge */
 909       }
 910       ofst = mem->reg.address;
 911       base = (st->indirect >= 0) ? st->src[st->indirect]->value->n : 0;
 912       size = mem->reg.size;
 913
 914       for (it = *rec; it; it = it->next) {
 915          if (it->base == base &&
 916              (it->ofst <= ofst && (it->ofst + size) > ofst))
 917             break;
 918       }
 919       if (it)
 920          eliminate_store(it, st);
 921       else
 922          add_mem_record(ctx, rec, base, ofst, st);
 923    }
 924
 925    DESCEND_ARBITRARY(s, nv_pass_mem_opt);
 926    return 0;
 927 }
 928
 929 /* TODO: properly handle loads from l[] memory in the presence of stores */
 930 static int
 931 nv_pass_reload_elim(struct pass_reld_elim *ctx, struct nv_basic_block *b)
 932 {
 933 #if 0
 934    struct load_record **rec, *it;
 935    struct nv_instruction *ld, *next;
 936    uint64_t data[2];
 937    struct nv_value *val;
 938    int j;
 939
 940    for (ld = b->entry; ld; ld = next) {
 941       next = ld->next;
 942       if (!ld->src[0])
 943          continue;
 944       val = ld->src[0]->value;
 945       rec = NULL;
 946
 947       if (ld->opcode == NV_OP_LINTERP || ld->opcode == NV_OP_PINTERP) {
 948          data[0] = val->reg.id;
 949          data[1] = 0;
 950          rec = &ctx->mem_v;
 951       } else
 952       if (ld->opcode == NV_OP_LDA) {
 953          data[0] = val->reg.id;
 954          data[1] = ld->src[4] ? ld->src[4]->value->n : ~0ULL;
 955          if (val->reg.file >= NV_FILE_MEM_C(0) &&
 956              val->reg.file <= NV_FILE_MEM_C(15))
 957             rec = &ctx->mem_c[val->reg.file - NV_FILE_MEM_C(0)];
 958          else
 959          if (val->reg.file == NV_FILE_MEM_S)
 960             rec = &ctx->mem_s;
 961          else
 962          if (val->reg.file == NV_FILE_MEM_L)
 963             rec = &ctx->mem_l;
 964       } else
 965       if ((ld->opcode == NV_OP_MOV) && (val->reg.file == NV_FILE_IMM)) {
 966          data[0] = val->reg.imm.u32;
 967          data[1] = 0;
 968          rec = &ctx->imm;
 969       }
 970
 971       if (!rec || !ld->def[0]->refc)
 972          continue;
 973
 974       for (it = *rec; it; it = it->next)
 975          if (it->data[0] == data[0] && it->data[1] == data[1])
 976             break;
 977
 978       if (it) {
 979          if (ld->def[0]->reg.id >= 0)
 980             it->value = ld->def[0];
 981          else
 982          if (!ld->fixed)
 983             nvc0_pc_replace_value(ctx->pc, ld->def[0], it->value);
 984       } else {
 985          if (ctx->alloc == LOAD_RECORD_POOL_SIZE)
 986             continue;
 987          it = &ctx->pool[ctx->alloc++];
 988          it->next = *rec;
 989          it->data[0] = data[0];
 990          it->data[1] = data[1];
 991          it->value = ld->def[0];
 992          *rec = it;
 993       }
 994    }
 995
 996    ctx->imm = NULL;
 997    ctx->mem_s = NULL;
 998    ctx->mem_v = NULL;
 999    for (j = 0; j < 16; ++j)
1000       ctx->mem_c[j] = NULL;
1001    ctx->mem_l = NULL;
1002    ctx->alloc = 0;
1003
1004    DESCEND_ARBITRARY(j, nv_pass_reload_elim);
1005 #endif
1006    return 0;
1007 }
1008
1009 static int
1010 nv_pass_tex_mask(struct nv_pass *ctx, struct nv_basic_block *b)
1011 {
1012    int i, c, j;
1013
1014    for (i = 0; i < ctx->pc->num_instructions; ++i) {
1015       struct nv_instruction *nvi = &ctx->pc->instructions[i];
1016       struct nv_value *def[4];
1017
1018       if (!nv_is_texture_op(nvi->opcode))
1019          continue;
1020       nvi->tex_mask = 0;
1021
1022       for (c = 0; c < 4; ++c) {
1023          if (nvi->def[c]->refc)
1024             nvi->tex_mask |= 1 << c;
1025          def[c] = nvi->def[c];
1026       }
1027
1028       j = 0;
1029       for (c = 0; c < 4; ++c)
1030          if (nvi->tex_mask & (1 << c))
1031             nvi->def[j++] = def[c];
1032       for (c = 0; c < 4; ++c)
1033          if (!(nvi->tex_mask & (1 << c)))
1034            nvi->def[j++] = def[c];
1035       assert(j == 4);
1036    }
1037    return 0;
1038 }
1039
1040 struct nv_pass_dce {
1041    struct nv_pc *pc;
1042    uint removed;
1043 };
1044
1045 static int
1046 nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b)
1047 {
1048    int j;
1049    struct nv_instruction *nvi, *next;
1050
1051    for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = next) {
1052       next = nvi->next;
1053
1054       if (inst_removable(nvi)) {
1055          nvc0_insn_delete(nvi);
1056          ++ctx->removed;
1057       }
1058    }
1059    DESCEND_ARBITRARY(j, nv_pass_dce);
1060
1061    return 0;
1062 }
1063
1064 /* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE.
1065  * Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with
1066  * BREAK and dummy ELSE block.
1067  */
1068 static INLINE boolean
1069 bb_is_if_else_endif(struct nv_basic_block *bb)
1070 {
1071    if (!bb->out[0] || !bb->out[1])
1072       return FALSE;
1073
1074    if (bb->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) {
1075       return (bb->out[0]->out[1] == bb->out[1]->out[0] &&
1076               !bb->out[1]->out[1]);
1077    } else {
1078       return (bb->out[0]->out[0] == bb->out[1]->out[0] &&
1079               !bb->out[0]->out[1] &&
1080               !bb->out[1]->out[1]);
1081    }
1082 }
1083
1084 /* Predicate instructions and delete any branch at the end if it is
1085  * not a break from a loop.
1086  */
1087 static void
1088 predicate_instructions(struct nv_pc *pc, struct nv_basic_block *b,
1089                        struct nv_value *pred, uint8_t cc)
1090 {
1091    struct nv_instruction *nvi, *prev;
1092    int s;
1093
1094    if (!b->entry)
1095       return;
1096    for (nvi = b->entry; nvi; nvi = nvi->next) {
1097       prev = nvi;
1098       if (inst_is_noop(nvi))
1099          continue;
1100       for (s = 0; nvi->src[s]; ++s);
1101       assert(s < 6);
1102       nvi->predicate = s;
1103       nvi->cc = cc;
1104       nv_reference(pc, nvi, nvi->predicate, pred);
1105    }
1106    if (prev->opcode == NV_OP_BRA &&
1107        b->out_kind[0] != CFG_EDGE_LOOP_LEAVE &&
1108        b->out_kind[1] != CFG_EDGE_LOOP_LEAVE)
1109       nvc0_insn_delete(prev);
1110 }
1111
1112 static INLINE boolean
1113 may_predicate_insn(struct nv_instruction *nvi, struct nv_value *pred)
1114 {
1115    if (nvi->def[0] && values_equal(nvi->def[0], pred))
1116       return FALSE;
1117    return nvc0_insn_is_predicateable(nvi);
1118 }
1119
1120 /* Transform IF/ELSE/ENDIF constructs into predicated instructions
1121  * where feasible.
1122  */
1123 static int
1124 nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)
1125 {
1126    struct nv_instruction *nvi;
1127    struct nv_value *pred;
1128    int k;
1129    int n0, n1; /* instruction counts of outgoing blocks */
1130
1131    if (bb_is_if_else_endif(b)) {
1132       assert(b->exit && b->exit->opcode == NV_OP_BRA);
1133
1134       assert(b->exit->predicate >= 0);
1135       pred = b->exit->src[b->exit->predicate]->value;
1136
1137       n1 = n0 = 0;
1138       for (nvi = b->out[0]->entry; nvi; nvi = nvi->next, ++n0)
1139          if (!may_predicate_insn(nvi, pred))
1140             break;
1141       if (!nvi) {
1142          /* we're after register allocation, so there always is an ELSE block */
1143          for (nvi = b->out[1]->entry; nvi; nvi = nvi->next, ++n1)
1144             if (!may_predicate_insn(nvi, pred))
1145                break;
1146       }
1147
1148       /* 12 is an arbitrary limit */
1149       if (!nvi && n0 < 12 && n1 < 12) {
1150          predicate_instructions(ctx->pc, b->out[0], pred, !b->exit->cc);
1151          predicate_instructions(ctx->pc, b->out[1], pred, b->exit->cc);
1152
1153          nvc0_insn_delete(b->exit); /* delete the branch */
1154
1155          /* and a potential joinat before it */
1156          if (b->exit && b->exit->opcode == NV_OP_JOINAT)
1157             nvc0_insn_delete(b->exit);
1158
1159          /* remove join operations at the end of the conditional */
1160          k = (b->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) ? 1 : 0;
1161          if ((nvi = b->out[0]->out[k]->entry)) {
1162             nvi->join = 0;
1163             if (nvi->opcode == NV_OP_JOIN)
1164                nvc0_insn_delete(nvi);
1165          }
1166       }
1167    }
1168    DESCEND_ARBITRARY(k, nv_pass_flatten);
1169
1170    return 0;
1171 }
1172
1173 /* Tests instructions for equality, but independently of sources. */
1174 static boolean
1175 is_operation_equal(struct nv_instruction *a, struct nv_instruction *b)
1176 {
1177    if (a->opcode != b->opcode)
1178       return FALSE;
1179    if (nv_is_texture_op(a->opcode)) {
1180       if (a->ext.tex.t != b->ext.tex.t ||
1181           a->ext.tex.s != b->ext.tex.s)
1182          return FALSE;
1183       if (a->tex_dim != b->tex_dim ||
1184           a->tex_array != b->tex_array ||
1185           a->tex_cube != b->tex_cube ||
1186           a->tex_shadow != b->tex_shadow ||
1187           a->tex_live != b->tex_live)
1188          return FALSE;
1189    } else
1190    if (a->opcode == NV_OP_CVT) {
1191       if (a->ext.cvt.s != b->ext.cvt.s ||
1192           a->ext.cvt.d != b->ext.cvt.d)
1193          return FALSE;
1194    } else
1195    if (NV_BASEOP(a->opcode) == NV_OP_SET ||
1196        NV_BASEOP(a->opcode) == NV_OP_SLCT) {
1197       if (a->set_cond != b->set_cond)
1198          return FALSE;
1199    } else
1200    if (a->opcode == NV_OP_LINTERP ||
1201        a->opcode == NV_OP_PINTERP) {
1202       if (a->centroid != b->centroid ||
1203           a->flat != b->flat)
1204          return FALSE;
1205    }
1206    if (a->cc != b->cc)
1207       return FALSE;
1208    if (a->lanes != b->lanes ||
1209        a->patch != b->patch ||
1210        a->saturate != b->saturate)
1211       return FALSE;
1212    if (a->opcode == NV_OP_QUADOP) /* beware quadon ! */
1213       return FALSE;
1214    return TRUE;
1215 }
1216
1217 /* local common subexpression elimination, stupid O(n^2) implementation */
1218 static int
1219 nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
1220 {
1221    struct nv_instruction *ir, *ik, *next;
1222    struct nv_instruction *entry = b->phi ? b->phi : b->entry;
1223    int s, d;
1224    unsigned int reps;
1225
1226    do {
1227       reps = 0;
1228       for (ir = entry; ir; ir = next) {
1229          next = ir->next;
1230          if (ir->fixed)
1231             continue;
1232          for (ik = entry; ik != ir; ik = ik->next) {
1233             if (!is_operation_equal(ir, ik))
1234                continue;
1235             if (!ir->def[0] || !ik->def[0])
1236                continue;
1237
1238             if (ik->indirect != ir->indirect || ik->predicate != ir->predicate)
1239                continue;
1240
1241             for (d = 0; d < 4; ++d) {
1242                if ((ir->def[d] ? 1 : 0) != (ik->def[d] ? 1 : 0))
1243                   break;
1244                if (ir->def[d]) {
1245                   if (!values_equal(ik->def[0], ir->def[0]))
1246                      break;
1247                } else {
1248                   d = 4;
1249                   break;
1250                }
1251             }
1252             if (d != 4)
1253                continue;
1254
1255             for (s = 0; s < 5; ++s) {
1256                struct nv_value *a, *b;
1257
1258                if ((ir->src[s] ? 1 : 0) != (ik->src[s] ? 1 : 0))
1259                   break;
1260                if (!ir->src[s]) {
1261                   s = 5;
1262                   break;
1263                }
1264
1265                if (ik->src[s]->mod != ir->src[s]->mod)
1266                   break;
1267                a = ik->src[s]->value;
1268                b = ir->src[s]->value;
1269                if (a == b)
1270                   continue;
1271                if (a->reg.file != b->reg.file ||
1272                    a->reg.id < 0 || /* this excludes memory loads/stores */
1273                    a->reg.id != b->reg.id)
1274                   break;
1275             }
1276             if (s == 5) {
1277                nvc0_insn_delete(ir);
1278                for (d = 0; d < 4 && ir->def[d]; ++d)
1279                   nvc0_pc_replace_value(ctx->pc, ir->def[d], ik->def[d]);
1280                ++reps;
1281                break;
1282             }
1283          }
1284       }
1285    } while(reps);
1286
1287    DESCEND_ARBITRARY(s, nv_pass_cse);
1288
1289    return 0;
1290 }
1291
1292 /* Make sure all sources of an NV_OP_BIND are distinct, they need to occupy
1293  * neighbouring registers. CSE might have messed this up.
1294  * Just generate a MOV for each source to avoid conflicts if they're used in
1295  * multiple NV_OP_BIND at different positions.
1296  *
1297  * Add a dummy use of the pointer source of >= 8 byte loads after the load
1298  * to prevent it from being assigned a register which overlaps the load's
1299  * destination, which would produce random corruptions.
1300  */
1301 static int
1302 nv_pass_fixups(struct nv_pass *ctx, struct nv_basic_block *b)
1303 {
1304    struct nv_value *val;
1305    struct nv_instruction *fix, *nvi, *next;
1306    int s;
1307
1308    for (fix = b->entry; fix; fix = next) {
1309       next = fix->next;
1310
1311       if (fix->opcode == NV_OP_LD) {
1312          if (fix->indirect >= 0 && fix->src[0]->value->reg.size >= 8) {
1313             nvi = nv_alloc_instruction(ctx->pc, NV_OP_UNDEF);
1314             nv_reference(ctx->pc, nvi, 0, fix->src[fix->indirect]->value);
1315
1316             nvc0_insn_insert_after(fix, nvi);
1317          }
1318          continue;
1319       } else
1320       if (fix->opcode == NV_OP_BIND) {
1321          for (s = 0; s < 4 && fix->src[s]; ++s) {
1322             val = fix->src[s]->value;
1323
1324             nvi = nv_alloc_instruction(ctx->pc, NV_OP_MOV);
1325             nvi->def[0] = new_value_like(ctx->pc, val);
1326             nvi->def[0]->insn = nvi;
1327             nv_reference(ctx->pc, nvi, 0, val);
1328             nv_reference(ctx->pc, fix, s, nvi->def[0]);
1329
1330             nvc0_insn_insert_before(fix, nvi);
1331          }
1332       }
1333    }
1334    DESCEND_ARBITRARY(s, nv_pass_fixups);
1335
1336    return 0;
1337 }
1338
1339 static int
1340 nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root)
1341 {
1342    struct pass_reld_elim *reldelim;
1343    struct nv_pass pass;
1344    struct nv_pass_dce dce;
1345    int ret;
1346
1347    pass.n = 0;
1348    pass.pc = pc;
1349
1350    /* Do CSE so we can just compare values by pointer in subsequent passes. */
1351    pc->pass_seq++;
1352    ret = nv_pass_cse(&pass, root);
1353    if (ret)
1354       return ret;
1355
1356    /* Do this first, so we don't have to pay attention
1357     * to whether sources are supported memory loads.
1358     */
1359    pc->pass_seq++;
1360    ret = nv_pass_algebraic_opt(&pass, root);
1361    if (ret)
1362       return ret;
1363
1364    pc->pass_seq++;
1365    ret = nv_pass_lower_mods(&pass, root);
1366    if (ret)
1367       return ret;
1368
1369    pc->pass_seq++;
1370    ret = nvc0_pass_fold_loads(&pass, root);
1371    if (ret)
1372       return ret;
1373
1374    if (pc->opt_reload_elim) {
1375       reldelim = CALLOC_STRUCT(pass_reld_elim);
1376       reldelim->pc = pc;
1377
1378       pc->pass_seq++;
1379       ret = nv_pass_reload_elim(reldelim, root);
1380       if (ret) {
1381          FREE(reldelim);
1382          return ret;
1383       }
1384       memset(reldelim, 0, sizeof(struct pass_reld_elim));
1385       reldelim->pc = pc;
1386    }
1387
1388    /* May run DCE before load-combining since that pass will clean up
1389     * after itself.
1390     */
1391    dce.pc = pc;
1392    do {
1393       dce.removed = 0;
1394       pc->pass_seq++;
1395       ret = nv_pass_dce(&dce, root);
1396       if (ret)
1397          return ret;
1398    } while (dce.removed);
1399
1400    if (pc->opt_reload_elim) {
1401       pc->pass_seq++;
1402       ret = nv_pass_mem_opt(reldelim, root);
1403       if (!ret) {
1404          memset(reldelim, 0, sizeof(struct pass_reld_elim));
1405          reldelim->pc = pc;
1406
1407          pc->pass_seq++;
1408          ret = nv_pass_mem_opt(reldelim, root);
1409       }
1410       FREE(reldelim);
1411       if (ret)
1412          return ret;
1413    }
1414
1415    ret = nv_pass_tex_mask(&pass, root);
1416    if (ret)
1417       return ret;
1418
1419    pc->pass_seq++;
1420    ret = nv_pass_fixups(&pass, root);
1421
1422    return ret;
1423 }
1424
1425 int
1426 nvc0_pc_exec_pass0(struct nv_pc *pc)
1427 {
1428    int i, ret;
1429
1430    for (i = 0; i < pc->num_subroutines + 1; ++i)
1431       if (pc->root[i] && (ret = nv_pc_pass0(pc, pc->root[i])))
1432          return ret;
1433    return 0;
1434 }