to match si_shader_info.
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14266>
for (unsigned comp = 0; comp < 4; comp++) {
tmp = ac_build_gep0(&ctx->ac, vertexptr, LLVMConstInt(ctx->ac.i32, 4 * reg + comp, false));
out.values[comp] = LLVMBuildLoad(builder, tmp, "");
- out.vertex_stream[comp] = (info->output_streams[reg] >> (2 * comp)) & 3;
+ out.vertex_streams = info->output_streams[reg];
}
si_llvm_streamout_store_output(ctx, so_buffer, offset, &so->output[i], &out);
outputs[i].semantic = info->output_semantic[i];
for (unsigned j = 0; j < 4; j++) {
- outputs[i].vertex_stream[j] = (info->output_streams[i] >> (2 * j)) & 3;
+ outputs[i].vertex_streams = info->output_streams[i];
/* TODO: we may store more outputs than streamout needs,
* but streamout performance isn't that important.
if (ctx->shader->key.ge.mono.u.vs_export_prim_id) {
outputs[i].semantic = VARYING_SLOT_PRIMITIVE_ID;
+ outputs[i].vertex_streams = 0;
if (ctx->stage == MESA_SHADER_VERTEX) {
/* Wait for GS stores to finish. */
outputs[i].values[0] = ac_to_float(&ctx->ac, outputs[i].values[0]);
for (unsigned j = 1; j < 4; j++)
outputs[i].values[j] = LLVMGetUndef(ctx->ac.f32);
-
- memset(outputs[i].vertex_stream, 0, sizeof(outputs[i].vertex_stream));
i++;
}
tmp = ngg_gs_get_emit_output_ptr(ctx, vertexptr, out_idx);
tmp = LLVMBuildLoad(builder, tmp, "");
outputs[i].values[j] = ac_to_float(&ctx->ac, tmp);
- outputs[i].vertex_stream[j] = (info->output_streams[i] >> (2 * j)) & 3;
+ outputs[i].vertex_streams = info->output_streams[i];
}
}
struct si_shader_output_values {
LLVMValueRef values[4];
- ubyte vertex_stream[4];
+ ubyte vertex_streams;
ubyte semantic;
};
/* Fill in output information. */
for (i = 0; i < gsinfo->num_outputs; ++i) {
outputs[i].semantic = gsinfo->output_semantic[i];
-
- for (int chan = 0; chan < 4; chan++) {
- outputs[i].vertex_stream[chan] = (gsinfo->output_streams[i] >> (2 * chan)) & 3;
- }
+ outputs[i].vertex_streams = gsinfo->output_streams[i];
}
LLVMBasicBlockRef end_bb;
for (i = 0; i < gsinfo->num_outputs; ++i) {
for (unsigned chan = 0; chan < 4; chan++) {
if (!(gsinfo->output_usagemask[i] & (1 << chan)) ||
- outputs[i].vertex_stream[chan] != stream) {
+ ((outputs[i].vertex_streams >> (chan * 2)) & 0x3) != stream) {
outputs[i].values[chan] = LLVMGetUndef(ctx.ac.f32);
continue;
}
/* Load the output as int. */
for (int j = 0; j < num_comps; j++) {
- assert(stream_out->stream == shader_out->vertex_stream[start + j]);
+ assert(stream_out->stream == ((shader_out->vertex_streams >> ((start + j) * 2)) & 0x3));
out[j] = ac_to_integer(&ctx->ac, shader_out->values[start + j]);
}
for (unsigned i = 0; i < noutput; i++) {
unsigned semantic = outputs[i].semantic;
- if (outputs[i].vertex_stream[0] != 0 && outputs[i].vertex_stream[1] != 0 &&
- outputs[i].vertex_stream[2] != 0 && outputs[i].vertex_stream[3] != 0)
+ /* Skip if no channel writes to stream 0. */
+ if (outputs[i].vertex_streams & 0x03 &&
+ outputs[i].vertex_streams & 0x0c &&
+ outputs[i].vertex_streams & 0x30 &&
+ outputs[i].vertex_streams & 0xc0)
continue;
switch (semantic) {
for (j = 0; j < 4; j++) {
outputs[i].values[j] = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + j], "");
- outputs[i].vertex_stream[j] = (info->output_streams[i] >> (2 * j)) & 3;
+ outputs[i].vertex_streams = info->output_streams[i];
}
}
/* Export PrimitiveID. */
if (ctx->shader->key.ge.mono.u.vs_export_prim_id) {
outputs[i].semantic = VARYING_SLOT_PRIMITIVE_ID;
+ outputs[i].vertex_streams = 0;
outputs[i].values[0] = ac_to_float(&ctx->ac, si_get_primitive_id(ctx, 0));
for (j = 1; j < 4; j++)
outputs[i].values[j] = LLVMConstReal(ctx->ac.f32, 0);
-
- memset(outputs[i].vertex_stream, 0, sizeof(outputs[i].vertex_stream));
i++;
}