v2: Remove type change for SHR with negation. This was a leftover from
a previous attempt to deal with SHR and negation. Now all right-shifts
with unsigned parameters are marked as not being able to have source
modifiers.
v3: Disallow negations on right shifts of unsigned sources by setting
the no_negations flag in add_candidate_immediate. This eliminates the
need to exclude SHR in can_do_source_mods.
Tiger Lake
total instructions in shared programs:
21102817 ->
21099443 (-0.02%)
instructions in affected programs: 296796 -> 293422 (-1.14%)
helped: 92 / HURT: 356
total cycles in shared programs:
790564691 ->
790393358 (-0.02%)
cycles in affected programs:
36456886 ->
36285553 (-0.47%)
helped: 171 / HURT: 286
total spills in shared programs: 3951 -> 3959 (0.20%)
spills in affected programs: 176 -> 184 (4.55%)
helped: 0 / HURT: 2
total fills in shared programs: 2631 -> 2639 (0.30%)
fills in affected programs: 176 -> 184 (4.55%)
helped: 0 / HURT: 2
LOST: 0
GAINED: 4
Ice Lake
total instructions in shared programs:
19954204 ->
19949122 (-0.03%)
instructions in affected programs: 40301 -> 35219 (-12.61%)
helped: 23 / HURT: 2
total cycles in shared programs:
858377735 ->
858462082 (<.01%)
cycles in affected programs:
75537286 ->
75621633 (0.11%)
helped: 124 / HURT: 319
total spills in shared programs: 6255 -> 6190 (-1.04%)
spills in affected programs: 392 -> 327 (-16.58%)
helped: 1 / HURT: 2
total fills in shared programs: 7813 -> 7382 (-5.52%)
fills in affected programs: 942 -> 511 (-45.75%)
helped: 1 / HURT: 2
LOST: 0
GAINED: 3
Skylake
total instructions in shared programs:
18049362 ->
18044440 (-0.03%)
instructions in affected programs: 48317 -> 43395 (-10.19%)
helped: 26 / HURT: 2
total cycles in shared programs:
844884806 ->
844915655 (<.01%)
cycles in affected programs:
76137133 ->
76167982 (0.04%)
helped: 171 / HURT: 293
total spills in shared programs: 6148 -> 6149 (0.02%)
spills in affected programs: 595 -> 596 (0.17%)
helped: 4 / HURT: 2
total fills in shared programs: 7484 -> 7067 (-5.57%)
fills in affected programs: 1226 -> 809 (-34.01%)
helped: 4 / HURT: 2
LOST: 0
GAINED: 8
Broadwell
total instructions in shared programs:
17826844 ->
17821805 (-0.03%)
instructions in affected programs: 60687 -> 55648 (-8.30%)
helped: 28 / HURT: 8
total cycles in shared programs:
905332682 ->
904369499 (-0.11%)
cycles in affected programs:
76743509 ->
75780326 (-1.26%)
helped: 179 / HURT: 225
total spills in shared programs: 17922 -> 17908 (-0.08%)
spills in affected programs: 2495 -> 2481 (-0.56%)
helped: 6 / HURT: 8
total fills in shared programs: 26290 -> 25397 (-3.40%)
fills in affected programs: 2606 -> 1713 (-34.27%)
helped: 8 / HURT: 6
LOST: 1
GAINED: 1
Haswell
total instructions in shared programs:
16678878 ->
16674444 (-0.03%)
instructions in affected programs: 78458 -> 74024 (-5.65%)
helped: 87 / HURT: 6
total cycles in shared programs:
880189381 ->
880301043 (0.01%)
cycles in affected programs:
29956463 ->
30068125 (0.37%)
helped: 169 / HURT: 163
total spills in shared programs: 14428 -> 14378 (-0.35%)
spills in affected programs: 2384 -> 2334 (-2.10%)
helped: 8 / HURT: 6
total fills in shared programs: 16975 -> 16881 (-0.55%)
fills in affected programs: 1334 -> 1240 (-7.05%)
helped: 10 / HURT: 4
Ivy Bridge
total instructions in shared programs:
15706048 ->
15706035 (<.01%)
instructions in affected programs: 9941 -> 9928 (-0.13%)
helped: 13 / HURT: 0
total cycles in shared programs:
433618834 ->
433624637 (<.01%)
cycles in affected programs:
12926714 ->
12932517 (0.04%)
helped: 52 / HURT: 41
Sandy Bridge
total cycles in shared programs:
741223552 ->
741223443 (<.01%)
cycles in affected programs: 19814 -> 19705 (-0.55%)
helped: 14 / HURT: 0
No changes on Iron Lake or GM45
fossil-db changes:
Tiger Lake
Instructions in all programs:
156858030 ->
156905532 (+0.0%)
Instructions helped: 3915
Instructions hurt: 15411
Cycles in all programs:
7529667771 ->
7532117340 (+0.0%)
Cycles helped: 10260
Cycles hurt: 9990
Spills in all programs: 5610 -> 5457 (-2.7%)
Spills helped: 18
Fills in all programs: 6274 -> 6091 (-2.9%)
Fills helped: 18
Gained: 2
Lost: 16
Ice Lake
Instructions in all programs:
141308082 ->
141303083 (-0.0%)
Instructions helped: 574
Instructions hurt: 172
Cycles in all programs:
9091361325 ->
9094622766 (+0.0%)
Cycles helped: 8764
Cycles hurt: 11702
Spills in all programs: 7531 -> 7385 (-1.9%)
Spills helped: 19
Fills in all programs: 8462 -> 8294 (-2.0%)
Fills helped: 19
Gained: 22
Lost: 15
Skylake
Instructions in all programs:
131872162 ->
131867263 (-0.0%)
Instructions helped: 566
Instructions hurt: 172
Cycles in all programs:
8795095440 ->
8799676943 (+0.1%)
Cycles helped: 8333
Cycles hurt: 12182
Spills in all programs: 7006 -> 6884 (-1.7%)
Spills helped: 13
Fills in all programs: 7696 -> 7552 (-1.9%)
Fills helped: 13
Gained: 24
Lost: 1
Tested-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7698>
bool must_promote,
bool allow_one_constant,
bblock_t *block,
- ASSERTED const struct intel_device_info *devinfo,
+ const struct intel_device_info *devinfo,
void *const_ctx)
{
struct value *v = new_value(table, const_ctx);
unsigned box_idx = box_instruction(table, const_ctx, inst, ip, block,
must_promote);
- /* Just for now... */
- assert(inst->can_do_source_mods(devinfo));
-
v->value.u64 = inst->src[i].d64;
v->bit_size = 8 * type_sz(inst->src[i].type);
v->instr_index = box_idx;
v->src = i;
v->allow_one_constant = allow_one_constant;
- v->no_negations = false;
+
+ /* Right-shift instructions are special. They can have source modifiers,
+ * but changing the type can change the semantic of the instruction. Only
+ * allow negations on a right shift if the source type is already signed.
+ */
+ v->no_negations = !inst->can_do_source_mods(devinfo) ||
+ ((inst->opcode == BRW_OPCODE_SHR ||
+ inst->opcode == BRW_OPCODE_ASR) &&
+ brw_reg_type_is_unsigned_integer(inst->src[i].type));
switch (inst->src[i].type) {
case BRW_REGISTER_TYPE_DF:
ip++;
switch (inst->opcode) {
+ case SHADER_OPCODE_INT_QUOTIENT:
+ case SHADER_OPCODE_INT_REMAINDER:
case SHADER_OPCODE_POW:
- assert(inst->src[0].file != IMM);
+ if (inst->src[0].file == IMM) {
+ assert(inst->opcode != SHADER_OPCODE_POW);
+
+ add_candidate_immediate(&table, inst, ip, 0, true, false, block,
+ devinfo, const_ctx);
+ }
if (inst->src[1].file == IMM && devinfo->ver < 8) {
add_candidate_immediate(&table, inst, ip, 1, true, false, block,
break;
}
+ case BRW_OPCODE_BFE:
+ case BRW_OPCODE_BFI2:
case BRW_OPCODE_LRP:
for (int i = 0; i < inst->sources; i++) {
if (inst->src[i].file != IMM)
}
break;
+ case BRW_OPCODE_ASR:
+ case BRW_OPCODE_BFI1:
+ case BRW_OPCODE_ROL:
+ case BRW_OPCODE_ROR:
+ case BRW_OPCODE_SHL:
+ case BRW_OPCODE_SHR:
+ if (inst->src[0].file == IMM) {
+ add_candidate_immediate(&table, inst, ip, 0, true, false, block,
+ devinfo, const_ctx);
+ }
+ break;
+
case BRW_OPCODE_MOV:
if (could_coissue(devinfo, inst) && inst->src[0].file == IMM) {
add_candidate_immediate(&table, inst, ip, 0, false, false, block,
unreachable("Bad type size");
}
}
+ } else if ((link->inst->opcode == BRW_OPCODE_SHL ||
+ link->inst->opcode == BRW_OPCODE_ASR) &&
+ link->negate) {
+ reg->type = brw_int_type(type_sz(reg->type), true);
}
#ifdef DEBUG
}
#endif
+ assert(link->inst->can_do_source_mods(devinfo) || !link->negate);
+
reg->file = VGRF;
reg->offset = table.imm[i].subreg_offset;
reg->stride = 0;
progress = true;
break;
- case SHADER_OPCODE_INT_QUOTIENT:
- case SHADER_OPCODE_INT_REMAINDER:
- /* FINISHME: Promote non-float constants and remove this. */
- if (devinfo->ver < 8)
- break;
- FALLTHROUGH;
case SHADER_OPCODE_POW:
/* Allow constant propagation into src1 (except on Gen 6 which
* doesn't support scalar source math), and let constant combining
*/
if (devinfo->ver == 6)
break;
- FALLTHROUGH;
- case BRW_OPCODE_BFI1:
- case BRW_OPCODE_ASR:
- case BRW_OPCODE_SHR:
- case BRW_OPCODE_SUBB:
+
if (i == 1) {
inst->src[i] = val;
progress = true;
}
break;
- case BRW_OPCODE_SHL:
- /* Only constant propagate into src0 if src1 is also constant. In that
- * specific case, constant folding will eliminate the instruction.
- */
- if ((i == 0 && inst->src[1].file == IMM) ||
- i == 1) {
+ case BRW_OPCODE_SUBB:
+ if (i == 1) {
inst->src[i] = val;
progress = true;
}
}
break;
+ case SHADER_OPCODE_INT_QUOTIENT:
+ case SHADER_OPCODE_INT_REMAINDER:
+ /* Allow constant propagation into either source (except on Gen 6
+ * which doesn't support scalar source math). Constant combining
+ * promote the src1 constant on Gen < 8, and it will promote the src0
+ * constant on all platforms.
+ */
+ if (devinfo->ver == 6)
+ break;
+
+ FALLTHROUGH;
case BRW_OPCODE_AND:
+ case BRW_OPCODE_ASR:
+ case BRW_OPCODE_BFE:
+ case BRW_OPCODE_BFI1:
+ case BRW_OPCODE_BFI2:
+ case BRW_OPCODE_ROL:
+ case BRW_OPCODE_ROR:
+ case BRW_OPCODE_SHL:
+ case BRW_OPCODE_SHR:
case BRW_OPCODE_OR:
case SHADER_OPCODE_TEX_LOGICAL:
case SHADER_OPCODE_TXD_LOGICAL: