bool large_ds_read = bld.program->chip_class >= GFX7;
bool usable_read2 = bld.program->chip_class >= GFX7;
- bool aligned2 = align % 2 == 0;
- bool aligned4 = align % 4 == 0;
- bool aligned8 = bld.program->dev.has_unaligned_lds_access ? aligned4 : (align % 8 == 0);
- bool aligned16 = bld.program->dev.has_unaligned_lds_access ? aligned4 : (align % 16 == 0);
-
bool read2 = false;
unsigned size = 0;
aco_opcode op;
- if (bytes_needed >= 16 && aligned16 && large_ds_read) {
+ //TODO: use ds_read_u8_d16_hi/ds_read_u16_d16_hi if beneficial
+ if (bytes_needed >= 16 && align % 16 == 0 && large_ds_read) {
size = 16;
op = aco_opcode::ds_read_b128;
- } else if (bytes_needed >= 16 && aligned8 && const_offset % 8 == 0 && usable_read2) {
+ } else if (bytes_needed >= 16 && align % 8 == 0 && const_offset % 8 == 0 && usable_read2) {
size = 16;
read2 = true;
op = aco_opcode::ds_read2_b64;
- } else if (bytes_needed >= 12 && aligned16 && large_ds_read) {
+ } else if (bytes_needed >= 12 && align % 16 == 0 && large_ds_read) {
size = 12;
op = aco_opcode::ds_read_b96;
- } else if (bytes_needed >= 8 && aligned8) {
+ } else if (bytes_needed >= 8 && align % 8 == 0) {
size = 8;
op = aco_opcode::ds_read_b64;
- } else if (bytes_needed >= 8 && aligned4 && const_offset % 4 == 0) {
+ } else if (bytes_needed >= 8 && align % 4 == 0 && const_offset % 4 == 0) {
size = 8;
read2 = true;
op = aco_opcode::ds_read2_b32;
- } else if (bytes_needed >= 4 && aligned4) {
+ } else if (bytes_needed >= 4 && align % 4 == 0) {
size = 4;
op = aco_opcode::ds_read_b32;
- } else if (bytes_needed >= 2 && aligned2) {
+ } else if (bytes_needed >= 2 && align % 2 == 0) {
size = 2;
op = aco_opcode::ds_read_u16;
} else {
bool aligned2 = offset % 2 == 0 && align % 2 == 0;
bool aligned4 = offset % 4 == 0 && align % 4 == 0;
- bool aligned8 = bld.program->dev.has_unaligned_lds_access ? aligned4 : (offset % 8 == 0 && align % 8 == 0);
- bool aligned16 = bld.program->dev.has_unaligned_lds_access ? aligned4 : (offset % 16 == 0 && align % 16 == 0);
+ bool aligned8 = offset % 8 == 0 && align % 8 == 0;
+ bool aligned16 = offset % 16 == 0 && align % 16 == 0;
//TODO: use ds_write_b8_d16_hi/ds_write_b16_d16_hi if beneficial
aco_opcode op = aco_opcode::num_opcodes;
program->dev.lds_limit = chip_class >= GFX7 ? 65536 : 32768;
/* apparently gfx702 also has 16-bank LDS but I can't find a family for that */
program->dev.has_16bank_lds = family == CHIP_KABINI || family == CHIP_STONEY;
- /* GFX10 WGP has a bug making naturally aligned access required. The LLVM
- * subtarget feature is called "FeatureLdsMisalignedBug".
- */
- program->dev.has_unaligned_lds_access = chip_class >= GFX9 && !(chip_class == GFX10 && wgp_mode);
program->dev.vgpr_limit = 256;
program->dev.physical_vgprs = 256;
nir_intrinsic_instr *low, nir_intrinsic_instr *high,
void *data)
{
- struct radv_device *device = data;
- enum chip_class chip = device->physical_device->rad_info.chip_class;
-
if (num_components > 4)
return false;
FALLTHROUGH;
case nir_intrinsic_load_shared:
case nir_intrinsic_store_shared:
- if (chip < GFX9 && bit_size * num_components == 96) /* 96 bit loads require 128 bit alignment on GFX6-8 and are split otherwise */
+ if (bit_size * num_components == 96) /* 96 bit loads require 128 bit alignment and are split otherwise */
return align % 16 == 0;
- else if (chip < GFX9 && bit_size * num_components == 128) /* 128 bit loads require 64 bit alignment on GFX6-8 and are split otherwise */
+ else if (bit_size * num_components == 128) /* 128 bit loads require 64 bit alignment and are split otherwise */
return align % 8 == 0;
else
return align % (bit_size == 8 ? 2 : 4) == 0;
nir_var_mem_push_const | nir_var_mem_shared |
nir_var_mem_global,
.callback = mem_vectorize_callback,
- .cb_data = device,
.robust_modes = 0,
};