"Unimplemented non-zero nir_intrinsic_load_input offset");
unsigned location = nir_intrinsic_base(instr) - VERT_ATTRIB_GENERIC0;
- unsigned component = nir_intrinsic_component(instr);
unsigned bitsize = instr->dest.ssa.bit_size;
+ unsigned component = nir_intrinsic_component(instr) >> (bitsize == 64 ? 1 : 0);
unsigned num_components = instr->dest.ssa.num_components;
- Temp input = get_arg(ctx, ctx->args->vs_inputs[location]);
-
aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(
aco_opcode::p_create_vector, Format::PSEUDO, num_components, 1)};
std::array<Temp, NIR_MAX_VEC_COMPONENTS> elems;
for (unsigned i = 0; i < num_components; i++) {
- elems[i] = emit_extract_vector(ctx, input, component + i, bitsize == 64 ? v2 : v1);
+ if (bitsize == 64) {
+ Temp input = get_arg(ctx, ctx->args->vs_inputs[location + (component + i) / 2]);
+ elems[i] = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2),
+ emit_extract_vector(ctx, input, (component + i) * 2 % 4, v1),
+ emit_extract_vector(ctx, input, (component + i) * 2 % 4 + 1, v1));
+ } else {
+ Temp input = get_arg(ctx, ctx->args->vs_inputs[location]);
+ elems[i] = emit_extract_vector(ctx, input, component + i, v1);
+ }
if (bitsize == 16) {
if (nir_alu_type_get_base_type(nir_intrinsic_dest_type(instr)) == nir_type_float)
elems[i] = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v2b), elems[i]);
convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->args->ac.vertex_buffers));
unsigned location = nir_intrinsic_base(instr) - VERT_ATTRIB_GENERIC0;
- unsigned component = nir_intrinsic_component(instr);
unsigned bitsize = instr->dest.ssa.bit_size;
+ unsigned component = nir_intrinsic_component(instr) >> (bitsize == 64 ? 1 : 0);
unsigned attrib_binding = ctx->options->key.vs.vertex_attribute_bindings[location];
uint32_t attrib_offset = ctx->options->key.vs.vertex_attribute_offsets[location];
uint32_t attrib_stride = ctx->options->key.vs.vertex_attribute_strides[location];
channels[channel_start] = fetch_dst;
} else {
for (unsigned i = 0; i < MIN2(fetch_component, num_channels - channel_start); i++)
- channels[channel_start + i] =
- emit_extract_vector(ctx, fetch_dst, i, bitsize == 16 ? v2b : v1);
+ channels[channel_start + i] = emit_extract_vector(
+ ctx, fetch_dst, i, RegClass::get(RegType::vgpr, bitsize / 8u));
}
channel_start += fetch_component;
num_temp++;
elems[i] = channel;
+ } else if (bitsize == 64) {
+ /* 22.1.1. Attribute Location and Component Assignment of Vulkan 1.3 specification:
+ * For 64-bit data types, no default attribute values are provided. Input variables
+ * must not use more components than provided by the attribute.
+ */
+ vec->operands[i] = Operand(v2);
} else if (is_float && idx == 3) {
vec->operands[i] = bitsize == 16 ? Operand::c16(0x3c00u) : Operand::c32(0x3f800000u);
} else if (!is_float && idx == 3) {
}
if (ctx->stage.has(SWStage::VS) && ctx->program->info.vs.dynamic_inputs) {
- unsigned num_attributes = util_last_bit(ctx->program->info.vs.vb_desc_usage_mask);
+ unsigned num_attributes = util_last_bit(ctx->program->info.vs.input_slot_usage_mask);
for (unsigned i = 0; i < num_attributes; i++) {
Definition def(get_arg(ctx, ctx->args->vs_inputs[i]));
bld.sopp(aco_opcode::s_waitcnt, -1, lgkm_imm.pack(program->gfx_level));
- for (unsigned i = 0; i < num_descs; i++, loc++) {
+ for (unsigned i = 0; i < num_descs;) {
PhysReg dest(attributes_start.reg() + loc * 4u);
/* calculate index */
bld.mubuf(aco_opcode::buffer_load_dword, Definition(dest.advance(j * 4u), v1),
Operand(cur_desc, s4), fetch_index, Operand::c32(0u), offset, false,
false, true);
+ else if (vtx_info->chan_byte_size == 8)
+ bld.mtbuf(aco_opcode::tbuffer_load_format_xy,
+ Definition(dest.advance(j * 8u), v2), Operand(cur_desc, s4),
+ fetch_index, Operand::c32(0u), dfmt, nfmt, offset, false, true);
else
bld.mtbuf(aco_opcode::tbuffer_load_format_x, Definition(dest.advance(j * 4u), v1),
Operand(cur_desc, s4), fetch_index, Operand::c32(0u), dfmt, nfmt,
nfmt == V_008F0C_BUF_NUM_FORMAT_UINT || nfmt == V_008F0C_BUF_NUM_FORMAT_SINT
? 1u
: 0x3f800000u;
- for (unsigned j = vtx_info->num_channels; j < 4; j++) {
+ /* 22.1.1. Attribute Location and Component Assignment of Vulkan 1.3 specification:
+ * For 64-bit data types, no default attribute values are provided. Input variables must
+ * not use more components than provided by the attribute.
+ */
+ for (unsigned j = vtx_info->num_channels; vtx_info->chan_byte_size != 8 && j < 4; j++) {
bld.vop1(aco_opcode::v_mov_b32, Definition(dest.advance(j * 4u), v1),
Operand::c32(j == 3 ? one : 0u));
}
+
+ unsigned slots = vtx_info->chan_byte_size == 8 && vtx_info->num_channels > 2 ? 2 : 1;
+ loc += slots;
+ i += slots;
} else {
bld.mubuf(aco_opcode::buffer_load_format_xyzw, Definition(dest, v4),
Operand(cur_desc, s4), fetch_index, Operand::c32(0u), 0u, false, false, true);
+ loc++;
+ i++;
}
}
}
uint64_t tcs_temp_only_input_mask;
bool use_per_attribute_vb_descs;
uint32_t vb_desc_usage_mask;
+ uint32_t input_slot_usage_mask;
bool has_prolog;
bool dynamic_inputs;
} vs;
case 32:
break;
case 64:
- unreachable("64-bit IO should have been lowered");
- return NULL;
+ if (ctx->stage != MESA_SHADER_VERTEX || is_output) {
+ unreachable("64-bit IO should have been lowered");
+ return NULL;
+ }
+ break;
default:
unreachable("unhandled load type");
return NULL;
ASSIGN_FIELD(vs.tcs_temp_only_input_mask);
ASSIGN_FIELD(vs.use_per_attribute_vb_descs);
ASSIGN_FIELD(vs.vb_desc_usage_mask);
+ ASSIGN_FIELD(vs.input_slot_usage_mask);
ASSIGN_FIELD(vs.has_prolog);
ASSIGN_FIELD(vs.dynamic_inputs);
ASSIGN_FIELD_CP(gs.output_usage_mask);
static LLVMValueRef
radv_fixup_vertex_input_fetches(struct radv_shader_context *ctx, LLVMValueRef value,
- unsigned num_channels, bool is_float)
+ unsigned num_channels, bool is_float, bool is_64bit)
{
- LLVMValueRef zero = is_float ? ctx->ac.f32_0 : ctx->ac.i32_0;
- LLVMValueRef one = is_float ? ctx->ac.f32_1 : ctx->ac.i32_1;
+ LLVMValueRef zero = is_64bit ? ctx->ac.i64_0 : (is_float ? ctx->ac.f32_0 : ctx->ac.i32_0);
+ LLVMValueRef one = is_64bit ? ctx->ac.i64_0 : (is_float ? ctx->ac.f32_1 : ctx->ac.i32_1);
LLVMValueRef chan[4];
if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMVectorTypeKind) {
* dynamic) is unaligned and also if the VBO offset is aligned to a scalar (eg. stride is 8 and
* VBO offset is 2 for R16G16B16A16_SNORM).
*/
+ unsigned chan_dwords = vtx_info->chan_byte_size == 8 ? 2 : 1;
if (((ctx->ac.gfx_level == GFX6 || ctx->ac.gfx_level >= GFX10) && vtx_info->chan_byte_size) ||
- !(vtx_info->has_hw_format & BITFIELD_BIT(vtx_info->num_channels - 1))) {
+ !(vtx_info->has_hw_format & BITFIELD_BIT(vtx_info->num_channels - 1)) ||
+ vtx_info->element_size > 16) {
unsigned chan_format = vtx_info->hw_format[0] & 0xf;
LLVMValueRef values[4];
values[chan] = ac_build_struct_tbuffer_load(
&ctx->ac, t_list, chan_index, LLVMConstInt(ctx->ac.i32, chan_offset, false),
- ctx->ac.i32_0, 1, chan_format, num_format, 0, true);
+ ctx->ac.i32_0, chan_dwords, chan_format, num_format, 0, true);
}
input = ac_build_gather_values(&ctx->ac, values, num_channels);
input = ac_build_struct_tbuffer_load(
&ctx->ac, t_list, buffer_index, LLVMConstInt(ctx->ac.i32, attrib_offset, false),
- ctx->ac.i32_0, num_channels, data_format, num_format, 0, true);
+ ctx->ac.i32_0, num_channels * chan_dwords, data_format, num_format, 0, true);
}
- input = radv_fixup_vertex_input_fetches(ctx, input, num_channels, is_float);
+ if (vtx_info->chan_byte_size == 8)
+ input =
+ LLVMBuildBitCast(ctx->ac.builder, input, LLVMVectorType(ctx->ac.i64, num_channels), "");
+
+ input = radv_fixup_vertex_input_fetches(ctx, input, num_channels, is_float,
+ vtx_info->chan_byte_size == 8);
for (unsigned chan = 0; chan < 4; chan++) {
LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, MESA_SHADER_FRAGMENT);
}
- NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out, type_size_vec4,
- nir_lower_io_lower_64bit_to_32);
+ if (nir->info.stage == MESA_SHADER_VERTEX) {
+ NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in, type_size_vec4, 0);
+ NIR_PASS(_, nir, nir_lower_io, nir_var_shader_out, type_size_vec4,
+ nir_lower_io_lower_64bit_to_32);
+ } else {
+ NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out, type_size_vec4,
+ nir_lower_io_lower_64bit_to_32);
+ }
/* This pass needs actual constants */
NIR_PASS(_, nir, nir_opt_constant_folding);
bool needs_base_instance;
bool use_per_attribute_vb_descs;
uint32_t vb_desc_usage_mask;
+ uint32_t input_slot_usage_mask;
bool has_prolog;
bool dynamic_inputs;
} vs;
if (info->vs.dynamic_inputs) {
assert(info->vs.use_per_attribute_vb_descs);
- unsigned num_attributes = util_last_bit(info->vs.vb_desc_usage_mask);
+ unsigned num_attributes = util_last_bit(info->vs.input_slot_usage_mask);
for (unsigned i = 0; i < num_attributes; i++)
ac_add_arg(&args->ac, AC_ARG_VGPR, 4, AC_ARG_INT, &args->vs_inputs[i]);
/* Ensure the main shader doesn't use less vgprs than the prolog. The prolog requires one
unsigned idx = nir_intrinsic_io_semantics(instr).location;
unsigned component = nir_intrinsic_component(instr);
unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
+ mask = (instr->dest.ssa.bit_size == 64 ? util_widen_mask(mask, 2) : mask) << component;
- info->vs.input_usage_mask[idx] |= mask << component;
+ info->vs.input_usage_mask[idx] |= mask & 0xf;
+ if (mask >> 4)
+ info->vs.input_usage_mask[idx + 1] |= mask >> 4;
break;
}
default:
}
static void
+gather_info_input_decl_vs(const nir_shader *nir, unsigned location, const struct glsl_type *type,
+ const struct radv_pipeline_key *key, struct radv_shader_info *info)
+{
+ if (glsl_type_is_scalar(type) || glsl_type_is_vector(type)) {
+ if (key->vs.instance_rate_inputs & BITFIELD_BIT(location)) {
+ info->vs.needs_instance_id = true;
+ info->vs.needs_base_instance = true;
+ }
+
+ if (info->vs.use_per_attribute_vb_descs)
+ info->vs.vb_desc_usage_mask |= BITFIELD_BIT(location);
+ else
+ info->vs.vb_desc_usage_mask |= BITFIELD_BIT(key->vs.vertex_attribute_bindings[location]);
+
+ info->vs.input_slot_usage_mask |=
+ BITFIELD_RANGE(location, glsl_count_attribute_slots(type, false));
+ } else if (glsl_type_is_matrix(type) || glsl_type_is_array(type)) {
+ const struct glsl_type *elem = glsl_get_array_element(type);
+ unsigned stride = glsl_count_attribute_slots(elem, false);
+
+ for (unsigned i = 0; i < glsl_get_length(type); ++i)
+ gather_info_input_decl_vs(nir, location + i * stride, elem, key, info);
+ } else {
+ assert(glsl_type_is_struct_or_ifc(type));
+
+ for (unsigned i = 0; i < glsl_get_length(type); i++) {
+ const struct glsl_type *field = glsl_get_struct_field(type, i);
+ gather_info_input_decl_vs(nir, location, field, key, info);
+ location += glsl_count_attribute_slots(field, false);
+ }
+ }
+}
+
+static void
gather_shader_info_vs(struct radv_device *device, const nir_shader *nir,
const struct radv_pipeline_key *pipeline_key, struct radv_shader_info *info)
{
info->vs.needs_base_instance |= info->vs.has_prolog;
info->vs.needs_draw_id |= info->vs.has_prolog;
- nir_foreach_shader_in_variable(var, nir) {
- unsigned attrib_count = glsl_count_attribute_slots(var->type, true);
-
- for (unsigned i = 0; i < attrib_count; ++i) {
- unsigned attrib_index = var->data.location + i - VERT_ATTRIB_GENERIC0;
-
- if (pipeline_key->vs.instance_rate_inputs & (1u << attrib_index)) {
- info->vs.needs_instance_id = true;
- info->vs.needs_base_instance = true;
- }
-
- if (info->vs.use_per_attribute_vb_descs) {
- info->vs.vb_desc_usage_mask |= 1u << attrib_index;
- } else {
- info->vs.vb_desc_usage_mask |=
- 1u << pipeline_key->vs.vertex_attribute_bindings[attrib_index];
- }
- }
- }
+ nir_foreach_shader_in_variable(var, nir)
+ gather_info_input_decl_vs(nir, var->data.location - VERT_ATTRIB_GENERIC0, var->type,
+ pipeline_key, info);
}
static void