bool disable_gcm;
bool disable_loop_unrolling;
bool disable_ubo_load_sorting;
+ bool move_buffer_loads;
bool disable_tmu_pipelining;
uint32_t max_tmu_spills;
};
c->disable_general_tmu_sched = strategy->disable_general_tmu_sched;
c->disable_tmu_pipelining = strategy->disable_tmu_pipelining;
c->disable_constant_ubo_load_sorting = strategy->disable_ubo_load_sorting;
+ c->move_buffer_loads = strategy->move_buffer_loads;
c->disable_gcm = strategy->disable_gcm;
c->disable_loop_unrolling = V3D_DBG(NO_LOOP_UNROLL)
? true : strategy->disable_loop_unrolling;
if (!c->disable_constant_ubo_load_sorting)
NIR_PASS(_, c->s, v3d_nir_sort_constant_ubo_loads, c);
+ const nir_move_options buffer_opts = c->move_buffer_loads ?
+ (nir_move_load_ubo | nir_move_load_ssbo) : 0;
NIR_PASS(_, c->s, nir_opt_move, nir_move_load_uniform |
- nir_move_const_undef);
+ nir_move_const_undef |
+ buffer_opts);
v3d_nir_to_vir(c);
}
* because v3d_nir_to_vir will cap this to the actual minimum.
*/
static const struct v3d_compiler_strategy strategies[] = {
- /*0*/ { "default", 4, 4, false, false, false, false, false, 0 },
- /*1*/ { "disable general TMU sched", 4, 4, true, false, false, false, false, 0 },
- /*2*/ { "disable gcm", 4, 4, true, true, false, false, false, 0 },
- /*3*/ { "disable loop unrolling", 4, 4, true, true, true, false, false, 0 },
- /*4*/ { "disable UBO load sorting", 4, 4, true, true, true, true, false, 0 },
- /*5*/ { "disable TMU pipelining", 4, 4, true, true, true, true, true, 0 },
- /*6*/ { "lower thread count", 2, 1, false, false, false, false, false, -1 },
- /*7*/ { "disable general TMU sched (2t)", 2, 1, true, false, false, false, false, -1 },
- /*8*/ { "disable gcm (2t)", 2, 1, true, true, false, false, false, -1 },
- /*9*/ { "disable loop unrolling (2t)", 2, 1, true, true, true, false, false, -1 },
- /*10*/ { "disable UBO load sorting (2t)", 2, 1, true, true, true, true, false, -1 },
- /*11*/ { "disable TMU pipelining (2t)", 2, 1, true, true, true, true, true, -1 },
- /*12*/ { "fallback scheduler", 2, 1, true, true, true, true, true, -1 }
+ /*0*/ { "default", 4, 4, false, false, false, false, false, false, 0 },
+ /*1*/ { "disable general TMU sched", 4, 4, true, false, false, false, false, false, 0 },
+ /*2*/ { "disable gcm", 4, 4, true, true, false, false, false, false, 0 },
+ /*3*/ { "disable loop unrolling", 4, 4, true, true, true, false, false, false, 0 },
+ /*4*/ { "disable UBO load sorting", 4, 4, true, true, true, true, false, false, 0 },
+ /*5*/ { "disable TMU pipelining", 4, 4, true, true, true, true, false, true, 0 },
+ /*6*/ { "lower thread count", 2, 1, false, false, false, false, false, false, -1 },
+ /*7*/ { "disable general TMU sched (2t)", 2, 1, true, false, false, false, false, false, -1 },
+ /*8*/ { "disable gcm (2t)", 2, 1, true, true, false, false, false, false, -1 },
+ /*9*/ { "disable loop unrolling (2t)", 2, 1, true, true, true, false, false, false, -1 },
+ /*10*/ { "Move buffer loads (2t)", 2, 1, true, true, true, true, true, false, -1 },
+ /*11*/ { "disable TMU pipelining (2t)", 2, 1, true, true, true, true, true, false, -1 },
+ /*12*/ { "fallback scheduler", 2, 1, true, true, true, true, true, false, -1 }
};
/**
return !c->unrolled_any_loops;
/* UBO load sorting: skip if we didn't sort any loads */
case 4:
- case 10:
return !c->sorted_any_ubo_loads;
+ /* Move buffer loads: we assume any shader with difficult RA
+ * most likely has UBO / SSBO loads so we never try to skip.
+ * For now, we only try this for 2-thread compiles since it
+ * is expected to impact instruction counts and latency.
+ */
+ case 10:
+ assert(c->threads < 4);
+ return false;
/* TMU pipelining: skip if we didn't pipeline any TMU ops */
case 5:
case 11: