void visit(AluInstr *instr) override;
void visit(AluGroup *instr) override;
void visit(TexInstr *instr) override;
- void visit(ExportInstr *instr) override {(void)instr;}
+ void visit(ExportInstr *instr) override;
void visit(FetchInstr *instr) override;
void visit(Block *instr) override;
void visit(ControlFlowInstr *instr) override {(void)instr;}
void visit(LDSAtomicInstr *instr) override {(void)instr;};
void visit(LDSReadInstr *instr) override {(void)instr;};
+ void propagate_to(RegisterVec4& src, Instr *instr);
+
bool progress;
};
void CopyPropFwdVisitor::visit(TexInstr *instr)
{
- (void)instr;
+ propagate_to(instr->src(), instr);
+}
+
+void CopyPropFwdVisitor::visit(ExportInstr *instr)
+{
+ propagate_to(instr->value(), instr);
+}
+
+void CopyPropFwdVisitor::propagate_to(RegisterVec4& src, Instr *instr)
+{
+ AluInstr *parents[4] = {nullptr};
+ for (int i = 0; i < 4; ++i) {
+ if (src[i]->chan() < 4 && src[i]->is_ssa()) {
+ /* We have a pre-define value, so we can't propagate a copy */
+ if (src[i]->parents().empty())
+ return;
+
+ assert(src[i]->parents().size() == 1);
+ parents[i] = (*src[i]->parents().begin())->as_alu();
+ }
+ }
+ PRegister new_src[4] = {0};
+
+ int sel = -1;
+ for (int i = 0; i < 4; ++i) {
+ if (!parents[i])
+ continue;
+ if ((parents[i]->opcode() != op1_mov) ||
+ parents[i]->has_alu_flag(alu_src0_neg) ||
+ parents[i]->has_alu_flag(alu_src0_abs) ||
+ parents[i]->has_alu_flag(alu_dst_clamp) ||
+ parents[i]->has_alu_flag(alu_src0_rel)) {
+ return;
+ } else {
+ auto src = parents[i]->src(0).as_register();
+ if (!src)
+ return;
+ else if (!src->is_ssa())
+ return;
+ else if (sel < 0)
+ sel = src->sel();
+ else if (sel != src->sel())
+ return;
+ new_src[i] = src;
+ }
+ }
+
+ for (int i = 0; i < 4; ++i) {
+ if (parents[i]) {
+ src.del_use(instr);
+ src.set_value(i, new_src[i]);
+ if (new_src[i]->pin() != pin_fully) {
+ if (new_src[i]->pin() == pin_chan)
+ new_src[i]->set_pin(pin_chgr);
+ else
+ new_src[i]->set_pin(pin_group);
+ }
+ src.add_use(instr);
+ progress |= true;
+ }
+ }
+ if (progress)
+ src.validate();
}
void CopyPropFwdVisitor::visit(FetchInstr *instr)
ALU INTERP_XY __.w@chan : R0.x@fully Param0.w {L} VEC_210
ALU_GROUP_END
ALU_GROUP_BEGIN
-ALU FLT_TO_INT S1028.x@group : S1025.x@chan {W}
-ALU FLT_TO_INT S1028.y@group : S1025.y@chan {W}
-ALU FLT_TO_INT S1028.w@group : S1025.w@chan {WL}
+ALU FLT_TO_INT S1026.x@group : S1025.x@chan {W}
+ALU FLT_TO_INT S1026.y@group : S1025.y@chan {W}
+ALU FLT_TO_INT S1026.z@group : S1025.w@chan {WL}
ALU_GROUP_END
BLOCK_END
BLOCK_START
-TEX LD S1029.xyzw : S1028.xy_w RID:0 SID:18 NNNN
+TEX LD S1029.xyzw : S1026.xy_z RID:0 SID:18 NNNN
BLOCK_END
BLOCK_START
EXPORT_DONE PIXEL 0 S1029.xyzw
ALU INTERP_XY __.z@chan : R0.y@fully Param0.z VEC_210 {}
ALU INTERP_XY __.w@chan : R0.x@fully Param0.w VEC_210 {L}
ALU_GROUP_END
-ALU FLT_TO_INT S1028.x@group : S1025.x@chan {W}
-ALU FLT_TO_INT S1028.y@group : S1025.y@chan {W}
-ALU FLT_TO_INT S1028.w@group : S1025.w@chan {WL}
-TEX LD S1029.xyzw : S1028.xy_w RID:0 SID:18 NNNN
+ALU FLT_TO_INT S1026.x@group : S1025.x@chan {W}
+ALU FLT_TO_INT S1026.y@group : S1025.y@chan {W}
+ALU FLT_TO_INT S1026.z@group : S1025.w@chan {WL}
+TEX LD S1029.xyzw : S1026.xy_z RID:0 SID:18 NNNN
EXPORT_DONE PIXEL 0 S1029.xyzw
)";
ALU MOV S2.y : KC0[1].y {W}
ALU MOV S2.z : KC0[1].z {W}
ALU MOV S2.w : KC0[1].w {WL}
-ALU DOT4_IEEE S3.x@free : KC0[0].x S2.x + KC0[0].y S2.y + KC0[0].z S2.z + KC0[0].w S2.w {WL}
-ALU MOV S4.x : S3.x@free {W}
-ALU MOV S4.y : S3.x@free {W}
-ALU MOV S4.z : S3.x@free {W}
-ALU MOV S4.w : S3.x@free {W}
-EXPORT_DONE PIXEL 0 S4.xyzw
+ALU DOT4_IEEE S3.x@group : KC0[0].x S2.x + KC0[0].y S2.y + KC0[0].z S2.z + KC0[0].w S2.w {WL}
+EXPORT_DONE PIXEL 0 S3.xxxx
)";
const char *glxgears_vs2_nir =
ALU_GROUP_BEGIN
ALU INTERP_ZW __.x@chan : R0.y@fully Param0.x {} VEC_210
ALU INTERP_ZW __.y@chan : R0.x@fully Param0.y {} VEC_210
-ALU INTERP_ZW S2.z@chgr : R0.y@fully Param0.z {W} VEC_210
-ALU INTERP_ZW S2.w@chgr : R0.x@fully Param0.w {WL} VEC_210
+ALU INTERP_ZW S1.z@chgr : R0.y@fully Param0.z {W} VEC_210
+ALU INTERP_ZW S1.w@chgr : R0.x@fully Param0.w {WL} VEC_210
ALU_GROUP_END
ALU_GROUP_BEGIN
-ALU INTERP_XY S2.x@chgr : R0.y@fully Param0.x {W} VEC_210
-ALU INTERP_XY S2.y@chgr : R0.x@fully Param0.y {W} VEC_210
+ALU INTERP_XY S1.x@chgr : R0.y@fully Param0.x {W} VEC_210
+ALU INTERP_XY S1.y@chgr : R0.x@fully Param0.y {W} VEC_210
ALU INTERP_XY __.z@chan : R0.y@fully Param0.z {} VEC_210
ALU INTERP_XY __.w@chan : R0.x@fully Param0.w {L} VEC_210
ALU_GROUP_END
-EXPORT_DONE PIXEL 0 S2.xyzw
+EXPORT_DONE PIXEL 0 S1.xyzw
)";
const char *shader_group_chan_pin_combined_sheduled =