#include "sfn_instr_lds.h"
#include "sfn_instr_tex.h"
#include "sfn_peephole.h"
+#include "sfn_valuefactory.h"
+#include "sfn_virtualvalues.h"
#include <sstream>
class CopyPropFwdVisitor : public InstrVisitor {
public:
- CopyPropFwdVisitor();
+ CopyPropFwdVisitor(ValueFactory& vf);
void visit(AluInstr *instr) override;
void visit(AluGroup *instr) override;
void visit(LDSReadInstr *instr) override { (void)instr; };
void propagate_to(RegisterVec4& src, Instr *instr);
+ bool assigned_in_block_and_direct(PRegister reg, int block_id);
+ ValueFactory& value_factory;
bool progress;
};
copy_propagation_fwd(Shader& shader)
{
auto& root = shader.func();
- CopyPropFwdVisitor copy_prop;
+ CopyPropFwdVisitor copy_prop(shader.value_factory());
do {
copy_prop.progress = false;
return copy_prop.progress;
}
-CopyPropFwdVisitor::CopyPropFwdVisitor():
- progress(false)
+CopyPropFwdVisitor::CopyPropFwdVisitor(ValueFactory& vf):
+ value_factory(vf),
+ progress(false)
{
}
propagate_to(instr->value(), instr);
}
+static bool register_sel_can_change(Pin pin)
+{
+ return pin == pin_free || pin == pin_none;
+}
+
+static bool register_chan_is_pinned(Pin pin)
+{
+ return pin == pin_chan ||
+ pin == pin_fully ||
+ pin == pin_chgr;
+}
+
+
void
CopyPropFwdVisitor::propagate_to(RegisterVec4& src, Instr *instr)
{
}
PRegister new_src[4] = {0};
- int sel = -1;
+ uint8_t mask = 0;
+ int new_sel = -1;
+ bool all_sel_can_change = true;
+
+ bool is_ssa = true;
+ int new_chan[4] = {0,0,0,0};
+
for (int i = 0; i < 4; ++i) {
+ unsigned allowed_mask = 0xf & ~mask;
if (!parents[i])
continue;
if ((parents[i]->opcode() != op1_mov) || parents[i]->has_alu_flag(alu_src0_neg) ||
parents[i]->has_alu_flag(alu_src0_abs) ||
parents[i]->has_alu_flag(alu_dst_clamp) ||
- parents[i]->has_alu_flag(alu_src0_rel)) {
+ parents[i]->has_alu_flag(alu_src0_rel) ||
+ std::get<0>(parents[i]->indirect_addr())) {
return;
} else {
auto src = parents[i]->src(0).as_register();
if (!src)
return;
- else if (!src->has_flag(Register::ssa))
+ if (src->pin() == pin_array)
return;
- else if (sel < 0)
- sel = src->sel();
- else if (sel != src->sel())
+ if (!src->has_flag(Register::ssa) &&
+ !assigned_in_block_and_direct(src, instr->block_id())) {
return;
+ }
+ if (register_chan_is_pinned(src->pin())) {
+ allowed_mask = 1 << src->chan();
+ }
+ new_chan[i] = src->chan();
+
for (auto p : src->parents()) {
auto alu = p->as_alu();
- if (alu && !(alu->allowed_dest_chan_mask() & (1 << i)))
+ if (alu)
+ allowed_mask &= alu->allowed_dest_chan_mask();
+ }
+ if (!allowed_mask) {
+ return;
+ }
+
+ if (new_sel < 0) {
+ new_sel = src->sel();
+ is_ssa = src->has_flag(Register::ssa);
+ new_chan[i] = src->chan();
+ } else if (new_sel != src->sel()) {
+ if (all_sel_can_change &&
+ register_sel_can_change(src->pin()) &&
+ (is_ssa == src->has_flag(Register::ssa))) {
+ new_chan[i] = u_bit_scan(&allowed_mask);
+ new_sel = value_factory.new_register_index();
+ } else
return;
}
+
new_src[i] = src;
+ mask |= 1 << new_chan[i];
+ if (!register_sel_can_change(src->pin()))
+ all_sel_can_change = false;
}
}
for (int i = 0; i < 4; ++i) {
if (parents[i]) {
src.del_use(instr);
+ new_src[i]->set_sel(new_sel);
+ if (is_ssa)
+ new_src[i]->set_flag(Register::ssa);
+ new_src[i]->set_chan(new_chan[i]);
src.set_value(i, new_src[i]);
if (new_src[i]->pin() != pin_fully) {
if (new_src[i]->pin() == pin_chan)
src.validate();
}
+bool CopyPropFwdVisitor::assigned_in_block_and_direct(PRegister reg, int block_id)
+{
+ for (auto p: reg->parents()) {
+ if (p->as_alu()) {
+ auto [addr, is_regoffs, is_index] = p->as_alu()->indirect_addr();
+ if (addr) {
+ return false;
+ }
+ }
+
+ if (p->block_id() == block_id)
+ return true;
+ }
+ return false;
+}
+
void
CopyPropFwdVisitor::visit(FetchInstr *instr)
{
check(sh, vs_nexted_loop_from_nir_expect_opt);
}
+
+TEST_F(TestShaderFromNir, CombineRegisterToTexSrc)
+{
+const char *shader_input =
+ R"(FS
+CHIPCLASS EVERGREEN
+REGISTERS R0.x R1.x R2.x R3.x
+PROP MAX_COLOR_EXPORTS:1
+PROP COLOR_EXPORTS:1
+PROP COLOR_EXPORT_MASK:15
+OUTPUT LOC:0 NAME:1 MASK:15
+SHADER
+ALU ADD R2.x : R0.x R2.x {W}
+ALU MUL R3.x : R0.x R3.x {WL}
+ALU MOV S1.x@group : R2.x {W}
+ALU MOV S1.y@group : R3.x {WL}
+TEX SAMPLE S2.xyzw : S1.xy__ RID:18 SID:0 NNNN
+EXPORT_DONE PIXEL 0 S2.xyzw
+)";
+
+const char *shader_expect =
+ R"(FS
+CHIPCLASS EVERGREEN
+REGISTERS R1024.x@group R1024.y@group R0.x
+PROP MAX_COLOR_EXPORTS:1
+PROP COLOR_EXPORTS:1
+PROP COLOR_EXPORT_MASK:15
+OUTPUT LOC:0 NAME:1 MASK:15
+SHADER
+
+ALU ADD R1024.x@group : R0.x R1024.x@group {W}
+ALU MUL R1024.y@group : R0.x R1024.y@group {WL}
+TEX SAMPLE S2.xyzw : R1024.xy__ RID:18 SID:0 NNNN
+EXPORT_DONE PIXEL 0 S2.xyzw
+)";
+
+ auto sh = from_string(shader_input);
+
+ optimize(*sh);
+
+ check(sh, shader_expect);
+}
+
TEST_F(TestShaderFromNir, OptimizeWithDestArrayValue)
{
auto sh = from_string(shader_with_dest_array);
ALU MULADD_IEEE S33.x : S25.x@free KC0[4].x S30.x {W}
ALU MULADD_IEEE S33.y : S25.x@free KC0[4].y S30.y {W}
ALU MULADD_IEEE S33.z : S25.x@free KC0[4].z S30.z {WL}
-ALU MULADD_IEEE CLAMP S40.x@group : S27.x@free KC0[5].x S33.x {W}
-ALU MULADD_IEEE CLAMP S40.y@group : S27.x@free KC0[5].y S33.y {W}
-ALU MULADD_IEEE CLAMP S40.z@group : S27.x@free KC0[5].z S33.z {W}
+ALU MULADD_IEEE CLAMP S1024.x@group : S27.x@free KC0[5].x S33.x {W}
+ALU MULADD_IEEE CLAMP S1024.y@group : S27.x@free KC0[5].y S33.y {W}
+ALU MULADD_IEEE CLAMP S1024.z@group : S27.x@free KC0[5].z S33.z {WL}
EXPORT_DONE POS 0 S15.xyzw
-ALU MOV CLAMP S40.w@group : KC0[2].w {WL}
-EXPORT_DONE PARAM 0 S40.xyzw)";
+ALU MOV CLAMP S1024.w@group : KC0[2].w {WL}
+EXPORT_DONE PARAM 0 S1024.xyzw)";
const char *vs_nexted_loop_nir =
R"(shader: MESA_SHADER_VERTEX
PROP WRITE_ALL_COLORS:1
OUTPUT LOC:0 NAME:1 MASK:15
SHADER
-ALU DOT4_IEEE S5.x@group : KC0[0].y KC0[0].y + KC0[0].y KC0[0].y + I[0] I[0] + I[0] I[0] {W}
-ALU DOT4_IEEE S3.x@free : KC0[0].x KC0[0].z + KC0[0].x KC0[0].w + I[0] I[0] + I[0] I[0] {WL}
-ALU DOT4_IEEE S5.w@group : KC0[0].y KC0[0].w + KC0[0].w KC0[0].y + I[0] I[0] + I[0] I[0] {WL}
-ALU MOV S5.y@group : S3.x@free {W}
-ALU MOV S5.z@group : S3.x@free {W}
-EXPORT_DONE PIXEL 0 S5.xyzw
+ALU DOT4_IEEE S1026.x@group : KC0[0].y KC0[0].y + KC0[0].y KC0[0].y + I[0] I[0] + I[0] I[0] {WL}
+ALU DOT4_IEEE S1026.z@group : KC0[0].x KC0[0].z + KC0[0].x KC0[0].w + I[0] I[0] + I[0] I[0] {WL}
+ALU DOT4_IEEE S1026.w@group : KC0[0].y KC0[0].w + KC0[0].w KC0[0].y + I[0] I[0] + I[0] I[0] {WL}
+EXPORT_DONE PIXEL 0 S1026.xzzw
)";
const char *fs_with_grand_and_abs =