amdgcn: Align VGPR pairs
authorAndrew Stubbs <ams@codesourcery.com>
Fri, 21 Feb 2020 11:07:55 +0000 (11:07 +0000)
committerAndrew Stubbs <ams@codesourcery.com>
Fri, 21 Feb 2020 11:56:20 +0000 (11:56 +0000)
Aligning the registers is not needed by the architecture, but doing so
allows us to remove the requirement for bug-prone early-clobber
constraints from many split patterns (and avoid adding more in future).

2020-02-21  Andrew Stubbs  <ams@codesourcery.com>

gcc/
* config/gcn/gcn.c (gcn_hard_regno_mode_ok): Align VGPR pairs.
* config/gcn/gcn-valu.md (addv64di3): Remove early-clobber.
(addv64di3_exec): Likewise.
(subv64di3): Likewise.
(subv64di3_exec): Likewise.
(addv64di3_zext): Likewise.
(addv64di3_zext_exec): Likewise.
(addv64di3_zext_dup): Likewise.
(addv64di3_zext_dup_exec): Likewise.
(addv64di3_zext_dup2): Likewise.
(addv64di3_zext_dup2_exec): Likewise.
(addv64di3_sext_dup2): Likewise.
(addv64di3_sext_dup2_exec): Likewise.
(<expander>v64di3): Likewise.
(<expander>v64di3_exec): Likewise.
(*<reduc_op>_dpp_shr_v64di): Likewise.
(*plus_carry_dpp_shr_v64di): Likewise.
* config/gcn/gcn.md (adddi3): Likewise.
(addptrdi3): Likewise.
(<expander>di3): Likewise.

gcc/ChangeLog
gcc/config/gcn/gcn-valu.md
gcc/config/gcn/gcn.c
gcc/config/gcn/gcn.md

index 543e95d..1845563 100644 (file)
@@ -1,5 +1,28 @@
 2020-02-21  Andrew Stubbs  <ams@codesourcery.com>
 
+       * config/gcn/gcn.c (gcn_hard_regno_mode_ok): Align VGPR pairs.
+       * config/gcn/gcn-valu.md (addv64di3): Remove early-clobber.
+       (addv64di3_exec): Likewise.
+       (subv64di3): Likewise.
+       (subv64di3_exec): Likewise.
+       (addv64di3_zext): Likewise.
+       (addv64di3_zext_exec): Likewise.
+       (addv64di3_zext_dup): Likewise.
+       (addv64di3_zext_dup_exec): Likewise.
+       (addv64di3_zext_dup2): Likewise.
+       (addv64di3_zext_dup2_exec): Likewise.
+       (addv64di3_sext_dup2): Likewise.
+       (addv64di3_sext_dup2_exec): Likewise.
+       (<expander>v64di3): Likewise.
+       (<expander>v64di3_exec): Likewise.
+       (*<reduc_op>_dpp_shr_v64di): Likewise.
+       (*plus_carry_dpp_shr_v64di): Likewise.
+       * config/gcn/gcn.md (adddi3): Likewise.
+       (addptrdi3): Likewise.
+       (<expander>di3): Likewise.
+
+2020-02-21  Andrew Stubbs  <ams@codesourcery.com>
+
        * config/gcn/gcn-valu.md (vec_seriesv64di): Use gen_vec_duplicatev64di.
 
 2020-02-21  Richard Sandiford  <richard.sandiford@arm.com>
index edac362..d5e6d0a 100644 (file)
    (set_attr "length" "4,8,4,8")])
 
 (define_insn_and_split "addv64di3"
-  [(set (match_operand:V64DI 0 "register_operand"   "= &v,  &v")
+  [(set (match_operand:V64DI 0 "register_operand"   "=  v")
        (plus:V64DI
-         (match_operand:V64DI 1 "register_operand" "%vDb,vDb0")
-         (match_operand:V64DI 2 "gcn_alu_operand"  "vDb0, vDb")))
+         (match_operand:V64DI 1 "register_operand" "%vDb")
+         (match_operand:V64DI 2 "gcn_alu_operand"  " vDb")))
    (clobber (reg:DI VCC_REG))]
   ""
   "#"
    (set_attr "length" "8")])
 
 (define_insn_and_split "addv64di3_exec"
-  [(set (match_operand:V64DI 0 "register_operand"           "= &v,  &v, &v")
+  [(set (match_operand:V64DI 0 "register_operand"                "=  v")
        (vec_merge:V64DI
          (plus:V64DI
-           (match_operand:V64DI 1 "register_operand"        "%vDb,vDb0,vDb")
-           (match_operand:V64DI 2 "gcn_alu_operand"         "vDb0, vDb,vDb"))
-         (match_operand:V64DI 3 "gcn_register_or_unspec_operand"
-                                                            "   U,   U,  0")
-         (match_operand:DI 4 "gcn_exec_reg_operand"         "   e,   e,  e")))
+           (match_operand:V64DI 1 "register_operand"             "%vDb")
+           (match_operand:V64DI 2 "gcn_alu_operand"              " vDb"))
+         (match_operand:V64DI 3 "gcn_register_or_unspec_operand" "  U0")
+         (match_operand:DI 4 "gcn_exec_reg_operand"              "   e")))
    (clobber (reg:DI VCC_REG))]
   ""
   "#"
    (set_attr "length" "8")])
 
 (define_insn_and_split "subv64di3"
-  [(set (match_operand:V64DI 0 "register_operand"  "=&v,  &v,  &v, &v")
-       (minus:V64DI                                                 
-         (match_operand:V64DI 1 "gcn_alu_operand" "vDb,vDb0,   v, v0")
-         (match_operand:V64DI 2 "gcn_alu_operand" " v0,   v,vDb0,vDb")))
+  [(set (match_operand:V64DI 0 "register_operand"  "= v,  v")
+       (minus:V64DI                                        
+         (match_operand:V64DI 1 "gcn_alu_operand" "vDb,  v")
+         (match_operand:V64DI 2 "gcn_alu_operand" "  v,vDb")))
    (clobber (reg:DI VCC_REG))]
   ""
   "#"
    (set_attr "length" "8")])
 
 (define_insn_and_split "subv64di3_exec"
-  [(set (match_operand:V64DI 0 "register_operand"    "= &v,   &v,   &v,  &v")
+  [(set (match_operand:V64DI 0 "register_operand"               "=  v,   v")
        (vec_merge:V64DI                                                         
          (minus:V64DI                                                           
-           (match_operand:V64DI 1 "gcn_alu_operand" "vSvB,vSvB0,    v,  v0")
-           (match_operand:V64DI 2 "gcn_alu_operand" "  v0,    v,vSvB0,vSvB"))
-         (match_operand:V64DI 3 "gcn_register_or_unspec_operand"
-                                                    "  U0,   U0,   U0,  U0")
-         (match_operand:DI 4 "gcn_exec_reg_operand" "   e,    e,    e,   e")))
+           (match_operand:V64DI 1 "gcn_alu_operand"             "vSvB,   v")
+           (match_operand:V64DI 2 "gcn_alu_operand"             "   v,vSvB"))
+         (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0,  U0")
+         (match_operand:DI 4 "gcn_exec_reg_operand"             "   e,   e")))
    (clobber (reg:DI VCC_REG))]
   "register_operand (operands[1], VOIDmode)
    || register_operand (operands[2], VOIDmode)"
    (set_attr "length" "8")])
 
 (define_insn_and_split "addv64di3_zext"
-  [(set (match_operand:V64DI 0 "register_operand"    "=&v, &v,  &v,  &v")
+  [(set (match_operand:V64DI 0 "register_operand"    "= v,  v")
        (plus:V64DI
          (zero_extend:V64DI
-           (match_operand:V64SI 1 "gcn_alu_operand" "0vA,0vB,  vA,  vB"))
-         (match_operand:V64DI 2 "gcn_alu_operand"   "vDb,vDA,0vDb,0vDA")))
+           (match_operand:V64SI 1 "gcn_alu_operand" " vA, vB"))
+         (match_operand:V64DI 2 "gcn_alu_operand"   "vDb,vDA")))
    (clobber (reg:DI VCC_REG))]
   ""
   "#"
    (set_attr "length" "8")])
 
 (define_insn_and_split "addv64di3_zext_exec"
-  [(set (match_operand:V64DI 0 "register_operand"       "=&v,  &v, &v,  &v")
+  [(set (match_operand:V64DI 0 "register_operand"                "= v,  v")
        (vec_merge:V64DI
          (plus:V64DI
            (zero_extend:V64DI
-             (match_operand:V64SI 1 "gcn_alu_operand"   "0vA,  vA,0vB,  vB"))
-           (match_operand:V64DI 2 "gcn_alu_operand"     "vDb,0vDb,vDA,0vDA"))
-         (match_operand:V64DI 3 "gcn_register_or_unspec_operand"
-                                                        " U0,  U0, U0,  U0")
-         (match_operand:DI 4 "gcn_exec_reg_operand"     "  e,   e,  e,   e")))
+             (match_operand:V64SI 1 "gcn_alu_operand"            " vA, vB"))
+           (match_operand:V64DI 2 "gcn_alu_operand"              "vDb,vDA"))
+         (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0, U0")
+         (match_operand:DI 4 "gcn_exec_reg_operand"              "  e,  e")))
    (clobber (reg:DI VCC_REG))]
   ""
   "#"
    (set_attr "length" "8")])
 
 (define_insn_and_split "addv64di3_zext_dup"
-  [(set (match_operand:V64DI 0 "register_operand"   "= &v,  &v")
+  [(set (match_operand:V64DI 0 "register_operand"   "= v,  v")
        (plus:V64DI
          (zero_extend:V64DI
            (vec_duplicate:V64SI
-             (match_operand:SI 1 "gcn_alu_operand" " BSv, ASv")))
-         (match_operand:V64DI 2 "gcn_alu_operand"  "vDA0,vDb0")))
+             (match_operand:SI 1 "gcn_alu_operand" "BSv,ASv")))
+         (match_operand:V64DI 2 "gcn_alu_operand"  "vDA,vDb")))
    (clobber (reg:DI VCC_REG))]
   ""
   "#"
    (set_attr "length" "8")])
 
 (define_insn_and_split "addv64di3_zext_dup_exec"
-  [(set (match_operand:V64DI 0 "register_operand"               "= &v,  &v")
+  [(set (match_operand:V64DI 0 "register_operand"                "= v,  v")
        (vec_merge:V64DI
          (plus:V64DI
            (zero_extend:V64DI
              (vec_duplicate:V64SI
-               (match_operand:SI 1 "gcn_alu_operand"            " ASv, BSv")))
-           (match_operand:V64DI 2 "gcn_alu_operand"             "vDb0,vDA0"))
-         (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0,  U0")
-         (match_operand:DI 4 "gcn_exec_reg_operand"             "   e,   e")))
+               (match_operand:SI 1 "gcn_alu_operand"             "ASv,BSv")))
+           (match_operand:V64DI 2 "gcn_alu_operand"              "vDb,vDA"))
+         (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0, U0")
+         (match_operand:DI 4 "gcn_exec_reg_operand"              "  e,  e")))
    (clobber (reg:DI VCC_REG))]
   ""
   "#"
    (set_attr "length" "8")])
 
 (define_insn_and_split "addv64di3_zext_dup2"
-  [(set (match_operand:V64DI 0 "register_operand"                   "= &v")
+  [(set (match_operand:V64DI 0 "register_operand"                   "=  v")
        (plus:V64DI
          (zero_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand" " vA"))
          (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "DbSv"))))
    (set_attr "length" "8")])
 
 (define_insn_and_split "addv64di3_zext_dup2_exec"
-  [(set (match_operand:V64DI 0 "register_operand"                     "=&v")
+  [(set (match_operand:V64DI 0 "register_operand"                     "= v")
        (vec_merge:V64DI
          (plus:V64DI
            (zero_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand"
    (set_attr "length" "8")])
 
 (define_insn_and_split "addv64di3_sext_dup2"
-  [(set (match_operand:V64DI 0 "register_operand"                    "=&v")
+  [(set (match_operand:V64DI 0 "register_operand"                    "= v")
        (plus:V64DI
          (sign_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand" " vA"))
          (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand"  "BSv"))))
    (set_attr "length" "8")])
 
 (define_insn_and_split "addv64di3_sext_dup2_exec"
-  [(set (match_operand:V64DI 0 "register_operand"                     "=&v")
+  [(set (match_operand:V64DI 0 "register_operand"                     "= v")
        (vec_merge:V64DI
          (plus:V64DI
            (sign_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand"
    (set_attr "length" "8,8")])
 
 (define_insn_and_split "<expander>v64di3"
-  [(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "=&v,RD")
+  [(set (match_operand:V64DI 0 "gcn_valu_dst_operand"      "=  v,RD")
        (bitop:V64DI
-         (match_operand:V64DI 1 "gcn_valu_src0_operand"          "%  v,RD")
-         (match_operand:V64DI 2 "gcn_valu_src1com_operand"       "vSvB, v")))]
+         (match_operand:V64DI 1 "gcn_valu_src0_operand"    "%  v,RD")
+         (match_operand:V64DI 2 "gcn_valu_src1com_operand" "vSvB, v")))]
   ""
   "@
    #
    (set_attr "length" "16,8")])
 
 (define_insn_and_split "<expander>v64di3_exec"
-  [(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "=&v,RD")
+  [(set (match_operand:V64DI 0 "gcn_valu_dst_operand"            "=  v,RD")
        (vec_merge:V64DI
          (bitop:V64DI
            (match_operand:V64DI 1 "gcn_valu_src0_operand"        "%  v,RD")
    (set_attr "length" "8")])
 
 (define_insn_and_split "*<reduc_op>_dpp_shr_v64di"
-  [(set (match_operand:V64DI 0 "register_operand"   "=&v")
+  [(set (match_operand:V64DI 0 "register_operand"   "=v")
        (unspec:V64DI
-         [(match_operand:V64DI 1 "register_operand" "v0")
-          (match_operand:V64DI 2 "register_operand" "v0")
-          (match_operand:SI 3 "const_int_operand"    "n")]
+         [(match_operand:V64DI 1 "register_operand" "v")
+          (match_operand:V64DI 2 "register_operand" "v")
+          (match_operand:SI 3 "const_int_operand"   "n")]
          REDUC_2REG_UNSPEC))]
   ""
   "#"
    (set_attr "length" "8")])
 
 (define_insn_and_split "*plus_carry_dpp_shr_v64di"
-  [(set (match_operand:V64DI 0 "register_operand"   "=&v")
+  [(set (match_operand:V64DI 0 "register_operand"   "=v")
        (unspec:V64DI
-         [(match_operand:V64DI 1 "register_operand" "v0")
-          (match_operand:V64DI 2 "register_operand" "v0")
-          (match_operand:SI 3 "const_int_operand"    "n")]
+         [(match_operand:V64DI 1 "register_operand" "v")
+          (match_operand:V64DI 2 "register_operand" "v")
+          (match_operand:SI 3 "const_int_operand"   "n")]
          UNSPEC_PLUS_CARRY_DPP_SHR))
    (clobber (reg:DI VCC_REG))]
   ""
index 5bcad7d..531f47a 100644 (file)
@@ -458,7 +458,15 @@ gcn_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
            || (!((regno - FIRST_SGPR_REG) & 1) && sgpr_2reg_mode_p (mode))
            || (((regno - FIRST_SGPR_REG) & 3) == 0 && mode == TImode));
   if (VGPR_REGNO_P (regno))
-    return (vgpr_1reg_mode_p (mode) || vgpr_2reg_mode_p (mode)
+    /* Vector instructions do not care about the alignment of register
+       pairs, but where there is no 64-bit instruction, many of the
+       define_split do not work if the input and output registers partially
+       overlap.  We tried to fix this with early clobber and match
+       constraints, but it was bug prone, added complexity, and conflicts
+       with the 'U0' constraints on vec_merge.
+       Therefore, we restrict ourselved to aligned registers.  */
+    return (vgpr_1reg_mode_p (mode)
+           || (!((regno - FIRST_VGPR_REG) & 1) && vgpr_2reg_mode_p (mode))
            /* TImode is used by DImode compare_and_swap.  */
            || mode == TImode);
   return false;
index a470536..b527d9a 100644 (file)
 ; through some RTL optimisation passes, and means the CC reg we set isn't
 ; dependent on the constraint alternative (which doesn't seem to work well).
 
-; There's an early clobber in the case where "v[0:1]=v[1:2]+?" but
-; "v[0:1]=v[0:1]+?" is fine (as is "v[1:2]=v[0:1]+?", but that's trickier).
-
 ; If v_addc_u32 is used to add with carry, a 32-bit literal constant cannot be
 ; used as an operand due to the read of VCC, so we restrict constants to the
 ; inlinable range for that alternative.
 
 (define_insn_and_split "adddi3"
-  [(set (match_operand:DI 0 "register_operand"         
-                                             "=&Sg,&Sg,&Sg,&Sg,&v,&v,&v,&v")
-       (plus:DI (match_operand:DI 1 "register_operand" 
-                                             "  Sg,  0,  0, Sg, v, 0, 0, v")
-                (match_operand:DI 2 "nonmemory_operand"
-                                             "   0,SgB,  0,SgB, 0,vA, 0,vA")))
-   (clobber (match_scratch:BI 3                      "= cs, cs, cs, cs, X, X, X, X"))
-   (clobber (match_scratch:DI 4                      "=  X,  X,  X,  X,cV,cV,cV,cV"))]
+  [(set (match_operand:DI 0 "register_operand"          "=Sg, v")
+       (plus:DI (match_operand:DI 1 "register_operand"  " Sg, v")
+                (match_operand:DI 2 "nonmemory_operand" "SgB,vA")))
+   (clobber (match_scratch:BI 3                                 "=cs, X"))
+   (clobber (match_scratch:DI 4                                 "= X,cV"))]
   ""
   "#"
   "&& reload_completed"
                  cc));
     DONE;
   }
-  [(set_attr "type" "mult,mult,mult,mult,vmult,vmult,vmult,vmult")
+  [(set_attr "type" "mult,vmult")
    (set_attr "length" "8")])
 
 (define_expand "adddi3_scc"
 ; for this, so we use a custom VOP3 add with CC_SAVE_REG as a temp.
 ; Note that it is not safe to save/clobber/restore SCC because doing so will
 ; break data-flow analysis, so this must use vector registers.
+;
+; The "v0" should be just "v", but somehow the "0" helps LRA not loop forever
+; on testcase pr54713-2.c with -O0. It's only an optimization hint anyway.
 
 (define_insn "addptrdi3"
-  [(set (match_operand:DI 0 "register_operand"          "= &v")
-       (plus:DI (match_operand:DI 1 "register_operand"  "  v0")
-                (match_operand:DI 2 "nonmemory_operand" "vDA0")))]
+  [(set (match_operand:DI 0 "register_operand"          "= v")
+       (plus:DI (match_operand:DI 1 "register_operand"  " v0")
+                (match_operand:DI 2 "nonmemory_operand" "vDA")))]
   ""
   {
     rtx new_operands[4] = { operands[0], operands[1], operands[2],
 (define_code_iterator vec_and_scalar64_com [and ior xor])
 
 (define_insn_and_split "<expander>di3"
-   [(set (match_operand:DI 0 "register_operand"  "= Sg,   &v,   &v")
+   [(set (match_operand:DI 0 "register_operand"  "= Sg,    v")
         (vec_and_scalar64_com:DI
-         (match_operand:DI 1 "gcn_alu_operand"  "%SgA,vSvDB,vSvDB")
-          (match_operand:DI 2 "gcn_alu_operand" " SgC,    v,    0")))
-   (clobber (match_scratch:BI 3                         "= cs,    X,    X"))]
+         (match_operand:DI 1 "gcn_alu_operand"  "%SgA,vSvDB")
+          (match_operand:DI 2 "gcn_alu_operand" " SgC,    v")))
+   (clobber (match_scratch:BI 3                         "= cs,    X"))]
   ""
   "@
    s_<mnemonic>0\t%0, %1, %2
-   #
    #"
   "reload_completed && gcn_vgpr_register_operand (operands[0], DImode)"
   [(parallel [(set (match_dup 4)
     operands[8] = gcn_operand_part (DImode, operands[1], 1);
     operands[9] = gcn_operand_part (DImode, operands[2], 1);
   }
-  [(set_attr "type" "sop2,vop2,vop2")
+  [(set_attr "type" "sop2,vop2")
    (set_attr "length" "8")])
 
 (define_insn "<expander>di3"