return mask;
}
+static nir_mem_access_size_align
+mem_access_size_align_cb(nir_intrinsic_op intrin, uint8_t bytes, uint32_t align,
+ uint32_t align_offset, bool offset_is_const,
+ const void *cb_data)
+{
+ align = nir_combined_align(align, align_offset);
+
+ assert(util_is_power_of_two_nonzero(align));
+ unsigned bit_size = (bytes & 1) ? 8 : (bytes & 2) ? 16 : 32;
+ if (align == 2)
+ bit_size = MIN2(bit_size, 16);
+ else if (align == 1)
+ bit_size = 8;
+
+ return (nir_mem_access_size_align){
+ .num_components = bytes / (bit_size / 8),
+ .bit_size = bit_size,
+ .align = bit_size / 8,
+ };
+}
+
static bool
agx_should_dump(nir_shader *nir, unsigned agx_dbg_bit)
{
out->depth_layout = layout;
}
+ /* Late sysval lowering creates large loads. Load lowering creates unpacks */
+ NIR_PASS_V(nir, nir_lower_mem_access_bit_sizes,
+ nir_var_mem_ssbo | nir_var_mem_constant |
+ nir_var_mem_task_payload | nir_var_shader_temp |
+ nir_var_function_temp | nir_var_mem_global |
+ nir_var_mem_shared,
+ mem_access_size_align_cb, NULL);
+ NIR_PASS_V(nir, nir_lower_pack);
+
/* Late blend lowering creates vectors */
NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
.lower_fdph = true,
.lower_ffract = true,
.lower_pack_half_2x16 = true,
+ .lower_pack_64_2x32 = true,
.lower_unpack_half_2x16 = true,
.lower_extract_byte = true,
.lower_insert_byte = true,