From 8f332122589f97b9c974b168ca5b0b186296f0e4 Mon Sep 17 00:00:00 2001 From: Julian Brown Date: Tue, 29 Jun 2021 03:57:31 -0700 Subject: [PATCH] amdgcn: Add [us]mulsid3/muldi3 patterns This patch improves 64-bit multiplication for AMD GCN: patterns for unsigned and signed 32x32->64 bit multiplication have been added, and also 64x64->64 bit multiplication is now open-coded rather than calling a library function (which may be a win for code size as well as speed: the function calling sequence isn't particularly concise for GCN). This version of the patch uses define_insn_and_split in order to keep multiply operations together during RTL optimisations up to register allocation: this appears to produce more compact code via inspection on small test cases than the previous approach using an expander. The DImode multiply implementation is lost from libgcc if we build it for DImode/TImode rather than SImode/DImode, a change we make in a later patch in this series. 2021-06-29 Julian Brown gcc/ * config/gcn/gcn.md (mulsidi3, mulsidi3_reg, mulsidi3_imm, muldi3): Add patterns. --- gcc/config/gcn/gcn.md | 94 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md index d1d4998..82f7a46 100644 --- a/gcc/config/gcn/gcn.md +++ b/gcc/config/gcn/gcn.md @@ -1457,6 +1457,100 @@ (set_attr "length" "4,8,8") (set_attr "gcn_version" "gcn5,gcn5,*")]) +(define_expand "mulsidi3" + [(set (match_operand:DI 0 "register_operand" "") + (mult:DI (any_extend:DI + (match_operand:SI 1 "register_operand" "")) + (any_extend:DI + (match_operand:SI 2 "nonmemory_operand" ""))))] + "" +{ + if (can_create_pseudo_p () + && !TARGET_GCN5 + && !gcn_inline_immediate_operand (operands[2], SImode)) + operands[2] = force_reg (SImode, operands[2]); + + if (REG_P (operands[2])) + emit_insn (gen_mulsidi3_reg (operands[0], operands[1], operands[2])); + else + emit_insn (gen_mulsidi3_imm (operands[0], operands[1], operands[2])); + + DONE; +}) + +(define_insn_and_split "mulsidi3_reg" + [(set (match_operand:DI 0 "register_operand" "=&Sg, &v") + (mult:DI (any_extend:DI + (match_operand:SI 1 "register_operand" "%Sg, v")) + (any_extend:DI + (match_operand:SI 2 "register_operand" "Sg,vSv"))))] + "" + "#" + "reload_completed" + [(const_int 0)] + { + rtx dstlo = gen_lowpart (SImode, operands[0]); + rtx dsthi = gen_highpart_mode (SImode, DImode, operands[0]); + emit_insn (gen_mulsi3 (dstlo, operands[1], operands[2])); + emit_insn (gen_mulsi3_highpart (dsthi, operands[1], operands[2])); + DONE; + } + [(set_attr "gcn_version" "gcn5,*")]) + +(define_insn_and_split "mulsidi3_imm" + [(set (match_operand:DI 0 "register_operand" "=&Sg,&Sg,&v") + (mult:DI (any_extend:DI + (match_operand:SI 1 "register_operand" "Sg, Sg, v")) + (match_operand:DI 2 "gcn_32bit_immediate_operand" + "A, B, A")))] + "TARGET_GCN5 || gcn_inline_immediate_operand (operands[2], SImode)" + "#" + "&& reload_completed" + [(const_int 0)] + { + rtx dstlo = gen_lowpart (SImode, operands[0]); + rtx dsthi = gen_highpart_mode (SImode, DImode, operands[0]); + emit_insn (gen_mulsi3 (dstlo, operands[1], operands[2])); + emit_insn (gen_mulsi3_highpart (dsthi, operands[1], operands[2])); + DONE; + } + [(set_attr "gcn_version" "gcn5,gcn5,*")]) + +(define_insn_and_split "muldi3" + [(set (match_operand:DI 0 "register_operand" "=&Sg,&Sg, &v,&v") + (mult:DI (match_operand:DI 1 "register_operand" "%Sg, Sg, v, v") + (match_operand:DI 2 "nonmemory_operand" "Sg, i,vSv, A"))) + (clobber (match_scratch:SI 3 "=&Sg,&Sg,&v,&v")) + (clobber (match_scratch:BI 4 "=cs, cs, X, X")) + (clobber (match_scratch:DI 5 "=X, X,cV,cV"))] + "" + "#" + "reload_completed" + [(const_int 0)] + { + rtx tmp = operands[3]; + rtx dsthi = gen_highpart_mode (SImode, DImode, operands[0]); + rtx op1lo = gcn_operand_part (DImode, operands[1], 0); + rtx op1hi = gcn_operand_part (DImode, operands[1], 1); + rtx op2lo = gcn_operand_part (DImode, operands[2], 0); + rtx op2hi = gcn_operand_part (DImode, operands[2], 1); + emit_insn (gen_umulsidi3 (operands[0], op1lo, op2lo)); + emit_insn (gen_mulsi3 (tmp, op1lo, op2hi)); + rtx add = gen_rtx_SET (dsthi, gen_rtx_PLUS (SImode, dsthi, tmp)); + rtx clob1 = gen_rtx_CLOBBER (VOIDmode, operands[4]); + rtx clob2 = gen_rtx_CLOBBER (VOIDmode, operands[5]); + add = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, add, clob1, clob2)); + emit_insn (add); + emit_insn (gen_mulsi3 (tmp, op1hi, op2lo)); + add = gen_rtx_SET (dsthi, gen_rtx_PLUS (SImode, dsthi, tmp)); + clob1 = gen_rtx_CLOBBER (VOIDmode, operands[4]); + clob2 = gen_rtx_CLOBBER (VOIDmode, operands[5]); + add = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, add, clob1, clob2)); + emit_insn (add); + DONE; + } + [(set_attr "gcn_version" "gcn5,gcn5,*,*")]) + (define_insn "mulhisi3" [(set (match_operand:SI 0 "register_operand" "=v") (mult:SI -- 2.7.4