From: Gert Wollny Date: Wed, 7 Sep 2022 06:22:04 +0000 (+0200) Subject: r600/sfn: Copy propagate into TEX source X-Git-Tag: upstream/22.3.5~2950 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=c0b6c59e084882acfb6fbc7a39c901ab471cbe17;p=platform%2Fupstream%2Fmesa.git r600/sfn: Copy propagate into TEX source This is possible if all register values are actually from the same register ID. Signed-off-by: Gert Wollny Part-of: --- diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_tex.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_tex.cpp index 06d515c..945b9a8 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr_tex.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_instr_tex.cpp @@ -376,6 +376,7 @@ bool TexInstr::replace_source(PRegister old_src, PVirtualValue new_src) success = true; } } + m_src.validate(); if (success) { old_src->del_use(this); new_src->as_register()->add_use(this); diff --git a/src/gallium/drivers/r600/sfn/sfn_optimizer.cpp b/src/gallium/drivers/r600/sfn/sfn_optimizer.cpp index a28ae32..96604ce 100644 --- a/src/gallium/drivers/r600/sfn/sfn_optimizer.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_optimizer.cpp @@ -244,7 +244,7 @@ public: void visit(AluInstr *instr) override; void visit(AluGroup *instr) override; void visit(TexInstr *instr) override; - void visit(ExportInstr *instr) override {(void)instr;} + void visit(ExportInstr *instr) override; void visit(FetchInstr *instr) override; void visit(Block *instr) override; void visit(ControlFlowInstr *instr) override {(void)instr;} @@ -261,6 +261,8 @@ public: void visit(LDSAtomicInstr *instr) override {(void)instr;}; void visit(LDSReadInstr *instr) override {(void)instr;}; + void propagate_to(RegisterVec4& src, Instr *instr); + bool progress; }; @@ -386,7 +388,69 @@ void CopyPropFwdVisitor::visit(AluGroup *instr) void CopyPropFwdVisitor::visit(TexInstr *instr) { - (void)instr; + propagate_to(instr->src(), instr); +} + +void CopyPropFwdVisitor::visit(ExportInstr *instr) +{ + propagate_to(instr->value(), instr); +} + +void CopyPropFwdVisitor::propagate_to(RegisterVec4& src, Instr *instr) +{ + AluInstr *parents[4] = {nullptr}; + for (int i = 0; i < 4; ++i) { + if (src[i]->chan() < 4 && src[i]->is_ssa()) { + /* We have a pre-define value, so we can't propagate a copy */ + if (src[i]->parents().empty()) + return; + + assert(src[i]->parents().size() == 1); + parents[i] = (*src[i]->parents().begin())->as_alu(); + } + } + PRegister new_src[4] = {0}; + + int sel = -1; + for (int i = 0; i < 4; ++i) { + if (!parents[i]) + continue; + if ((parents[i]->opcode() != op1_mov) || + parents[i]->has_alu_flag(alu_src0_neg) || + parents[i]->has_alu_flag(alu_src0_abs) || + parents[i]->has_alu_flag(alu_dst_clamp) || + parents[i]->has_alu_flag(alu_src0_rel)) { + return; + } else { + auto src = parents[i]->src(0).as_register(); + if (!src) + return; + else if (!src->is_ssa()) + return; + else if (sel < 0) + sel = src->sel(); + else if (sel != src->sel()) + return; + new_src[i] = src; + } + } + + for (int i = 0; i < 4; ++i) { + if (parents[i]) { + src.del_use(instr); + src.set_value(i, new_src[i]); + if (new_src[i]->pin() != pin_fully) { + if (new_src[i]->pin() == pin_chan) + new_src[i]->set_pin(pin_chgr); + else + new_src[i]->set_pin(pin_group); + } + src.add_use(instr); + progress |= true; + } + } + if (progress) + src.validate(); } void CopyPropFwdVisitor::visit(FetchInstr *instr) diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.cpp b/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.cpp index 1667281..fe28e2d 100644 --- a/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.cpp +++ b/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.cpp @@ -504,13 +504,13 @@ ALU INTERP_XY __.z@chan : R0.y@fully Param0.z {} VEC_210 ALU INTERP_XY __.w@chan : R0.x@fully Param0.w {L} VEC_210 ALU_GROUP_END ALU_GROUP_BEGIN -ALU FLT_TO_INT S1028.x@group : S1025.x@chan {W} -ALU FLT_TO_INT S1028.y@group : S1025.y@chan {W} -ALU FLT_TO_INT S1028.w@group : S1025.w@chan {WL} +ALU FLT_TO_INT S1026.x@group : S1025.x@chan {W} +ALU FLT_TO_INT S1026.y@group : S1025.y@chan {W} +ALU FLT_TO_INT S1026.z@group : S1025.w@chan {WL} ALU_GROUP_END BLOCK_END BLOCK_START -TEX LD S1029.xyzw : S1028.xy_w RID:0 SID:18 NNNN +TEX LD S1029.xyzw : S1026.xy_z RID:0 SID:18 NNNN BLOCK_END BLOCK_START EXPORT_DONE PIXEL 0 S1029.xyzw @@ -540,10 +540,10 @@ ALU INTERP_XY S1025.y@chan : R0.x@fully Param0.y VEC_210 {W} ALU INTERP_XY __.z@chan : R0.y@fully Param0.z VEC_210 {} ALU INTERP_XY __.w@chan : R0.x@fully Param0.w VEC_210 {L} ALU_GROUP_END -ALU FLT_TO_INT S1028.x@group : S1025.x@chan {W} -ALU FLT_TO_INT S1028.y@group : S1025.y@chan {W} -ALU FLT_TO_INT S1028.w@group : S1025.w@chan {WL} -TEX LD S1029.xyzw : S1028.xy_w RID:0 SID:18 NNNN +ALU FLT_TO_INT S1026.x@group : S1025.x@chan {W} +ALU FLT_TO_INT S1026.y@group : S1025.y@chan {W} +ALU FLT_TO_INT S1026.z@group : S1025.w@chan {WL} +TEX LD S1029.xyzw : S1026.xy_z RID:0 SID:18 NNNN EXPORT_DONE PIXEL 0 S1029.xyzw )"; @@ -585,12 +585,8 @@ ALU MOV S2.x : KC0[1].x {W} ALU MOV S2.y : KC0[1].y {W} ALU MOV S2.z : KC0[1].z {W} ALU MOV S2.w : KC0[1].w {WL} -ALU DOT4_IEEE S3.x@free : KC0[0].x S2.x + KC0[0].y S2.y + KC0[0].z S2.z + KC0[0].w S2.w {WL} -ALU MOV S4.x : S3.x@free {W} -ALU MOV S4.y : S3.x@free {W} -ALU MOV S4.z : S3.x@free {W} -ALU MOV S4.w : S3.x@free {W} -EXPORT_DONE PIXEL 0 S4.xyzw +ALU DOT4_IEEE S3.x@group : KC0[0].x S2.x + KC0[0].y S2.y + KC0[0].z S2.z + KC0[0].w S2.w {WL} +EXPORT_DONE PIXEL 0 S3.xxxx )"; const char *glxgears_vs2_nir = @@ -2188,16 +2184,16 @@ SHADER ALU_GROUP_BEGIN ALU INTERP_ZW __.x@chan : R0.y@fully Param0.x {} VEC_210 ALU INTERP_ZW __.y@chan : R0.x@fully Param0.y {} VEC_210 -ALU INTERP_ZW S2.z@chgr : R0.y@fully Param0.z {W} VEC_210 -ALU INTERP_ZW S2.w@chgr : R0.x@fully Param0.w {WL} VEC_210 +ALU INTERP_ZW S1.z@chgr : R0.y@fully Param0.z {W} VEC_210 +ALU INTERP_ZW S1.w@chgr : R0.x@fully Param0.w {WL} VEC_210 ALU_GROUP_END ALU_GROUP_BEGIN -ALU INTERP_XY S2.x@chgr : R0.y@fully Param0.x {W} VEC_210 -ALU INTERP_XY S2.y@chgr : R0.x@fully Param0.y {W} VEC_210 +ALU INTERP_XY S1.x@chgr : R0.y@fully Param0.x {W} VEC_210 +ALU INTERP_XY S1.y@chgr : R0.x@fully Param0.y {W} VEC_210 ALU INTERP_XY __.z@chan : R0.y@fully Param0.z {} VEC_210 ALU INTERP_XY __.w@chan : R0.x@fully Param0.w {L} VEC_210 ALU_GROUP_END -EXPORT_DONE PIXEL 0 S2.xyzw +EXPORT_DONE PIXEL 0 S1.xyzw )"; const char *shader_group_chan_pin_combined_sheduled =