/* TODO: implement clip+cull distances on earlier gen's */
compiler->has_clip_cull = true;
+ /* TODO: implement private memory on earlier gen's */
+ compiler->has_pvtmem = true;
+
if (compiler->gpu_id == 650)
compiler->tess_use_shared = true;
} else {
progress |= OPT(s, ir3_nir_lower_ubo_loads, so);
+ /* Lower large temporaries to scratch, which in Qualcomm terms is private
+ * memory, to avoid excess register pressure. This should happen after
+ * nir_opt_large_constants, because loading from a UBO is much, much less
+ * expensive.
+ */
+ if (so->shader->compiler->has_pvtmem) {
+ NIR_PASS_V(s, nir_lower_vars_to_scratch, nir_var_function_temp,
+ 16 * 16 /* bytes */, glsl_get_natural_size_align_bytes);
+ }
+
+
OPT_V(s, nir_lower_amul, ir3_glsl_type_size);
/* UBO offset lowering has to come after we've decided what will