[Patch AArch64 2/2] Fix memory sizes to load/store patterns
authorJames Greenhalgh <james.greenhalgh@arm.com>
Tue, 12 Sep 2017 14:57:58 +0000 (14:57 +0000)
committerJames Greenhalgh <jgreenhalgh@gcc.gnu.org>
Tue, 12 Sep 2017 14:57:58 +0000 (14:57 +0000)
There seems to be a partial misconception in the AArch64 backend that
load1/load2 referred to the number of registers to load, rather than the
number of words to load. This patch fixes that using the new "number of
byte" types added in the previous patch.

That means using the load_16 and store_16 types that were defined in the
previous patch for the first time in the AArch64 backend. To ensure
continuity for scheduling models, I've just split this out from load_8.
Please update your models if this is very wrong!

---
gcc/

* config/aarch64/aarch64.md (movdi_aarch64): Set load/store
types correctly.
(movti_aarch64): Likewise.
(movdf_aarch64): Likewise.
(movtf_aarch64): Likewise.
(load_pairdi): Likewise.
(store_pairdi): Likewise.
(load_pairdf): Likewise.
(store_pairdf): Likewise.
(loadwb_pair<GPI:mode>_<P:mode>): Likewise.
(storewb_pair<GPI:mode>_<P:mode>): Likewise.
(ldr_got_small_<mode>): Likewise.
(ldr_got_small_28k_<mode>): Likewise.
(ldr_got_tiny): Likewise.
* config/aarch64/iterators.md (ldst_sz): New.
(ldpstp_sz): Likewise.
* config/aarch64/thunderx.md (thunderx_storepair): Split store_8
to store_16.
(thunderx_load): Split load_8 to load_16.
* config/aarch64/thunderx2t99.md (thunderx2t99_loadpair): Split
load_8 to load_16.
(thunderx2t99_storepair_basic): Split store_8 to store_16.
* config/arm/xgene1.md (xgene1_load_pair): Split load_8 to load_16.
(xgene1_store_pair): Split store_8 to store_16.
* config/aarch64/falkor.md (falkor_ld_3_ld): Split load_8 to load_16.
(falkor_st_0_st_sd): Split store_8 to store_16.

From-SVN: r252026

gcc/ChangeLog
gcc/config/aarch64/aarch64.md
gcc/config/aarch64/falkor.md
gcc/config/aarch64/iterators.md
gcc/config/aarch64/thunderx.md
gcc/config/aarch64/thunderx2t99.md
gcc/config/arm/xgene1.md

index fae4772..8e05247 100644 (file)
@@ -1,5 +1,34 @@
 2017-09-12  James Greenhalgh  <james.greenhalgh@arm.com>
 
+       * config/aarch64/aarch64.md (movdi_aarch64): Set load/store
+       types correctly.
+       (movti_aarch64): Likewise.
+       (movdf_aarch64): Likewise.
+       (movtf_aarch64): Likewise.
+       (load_pairdi): Likewise.
+       (store_pairdi): Likewise.
+       (load_pairdf): Likewise.
+       (store_pairdf): Likewise.
+       (loadwb_pair<GPI:mode>_<P:mode>): Likewise.
+       (storewb_pair<GPI:mode>_<P:mode>): Likewise.
+       (ldr_got_small_<mode>): Likewise.
+       (ldr_got_small_28k_<mode>): Likewise.
+       (ldr_got_tiny): Likewise.
+       * config/aarch64/iterators.md (ldst_sz): New.
+       (ldpstp_sz): Likewise.
+       * config/aarch64/thunderx.md (thunderx_storepair): Split store_8
+       to store_16.
+       (thunderx_load): Split load_8 to load_16.
+       * config/aarch64/thunderx2t99.md (thunderx2t99_loadpair): Split
+       load_8 to load_16.
+       (thunderx2t99_storepair_basic): Split store_8 to store_16.
+       * config/arm/xgene1.md (xgene1_load_pair): Split load_8 to load_16.
+       (xgene1_store_pair): Split store_8 to store_16.
+       * config/aarch64/falkor.md (falkor_ld_3_ld): Split load_8 to load_16.
+       (falkor_st_0_st_sd): Split store_8 to store_16.
+
+2017-09-12  James Greenhalgh  <james.greenhalgh@arm.com>
+
        * config/arm/types.md (type): Rename load1/2/3/4 to load_4/8/12/16
        and store1/2/3/4 to store_4/8/12/16.
        * config/aarch64/aarch64.md: Update for rename.
index 7cbb458..e85376c 100644 (file)
        aarch64_expand_mov_immediate (operands[0], operands[1]);
        DONE;
     }"
-  [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,load_4,\
-                     load_4,store_4,store_4,adr,adr,f_mcr,f_mrc,fmov,neon_move")
+  [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,load_8,\
+                     load_8,store_8,store_8,adr,adr,f_mcr,f_mrc,fmov,neon_move")
    (set_attr "fp" "*,*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
    (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
 )
    ldr\\t%q0, %1
    str\\t%q1, %0"
   [(set_attr "type" "multiple,f_mcr,f_mrc,neon_logic_q, \
-                            load_8,store_8,store_8,f_loadd,f_stored")
+                            load_16,store_16,store_16,\
+                             load_16,store_16")
    (set_attr "length" "8,8,8,4,4,4,4,4,4")
    (set_attr "simd" "*,*,*,yes,*,*,*,*,*")
    (set_attr "fp" "*,*,*,*,*,*,*,yes,yes")]
    mov\\t%x0, %x1
    mov\\t%x0, %1"
   [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,neon_move,\
-                    f_loadd,f_stored,load_4,store_4,mov_reg,\
+                    f_loadd,f_stored,load_8,store_8,mov_reg,\
                     fconstd")
    (set_attr "simd" "yes,*,*,*,*,yes,*,*,*,*,*,*")]
 )
    stp\\t%1, %H1, %0
    stp\\txzr, xzr, %0"
   [(set_attr "type" "logic_reg,multiple,f_mcr,f_mrc,neon_move_q,f_mcr,\
-                     f_loadd,f_stored,load_8,store_8,store_8")
+                     f_loadd,f_stored,load_16,store_16,store_16")
    (set_attr "length" "4,8,8,8,4,4,4,4,4,4,4")
    (set_attr "simd" "yes,*,*,*,yes,*,*,*,*,*,*")]
 )
   "@
    ldp\\t%x0, %x2, %1
    ldp\\t%d0, %d2, %1"
-  [(set_attr "type" "load_8,neon_load1_2reg")
+  [(set_attr "type" "load_16,neon_load1_2reg")
    (set_attr "fp" "*,yes")]
 )
 
   "@
    stp\\t%x1, %x3, %0
    stp\\t%d1, %d3, %0"
-  [(set_attr "type" "store_8,neon_store1_2reg")
+  [(set_attr "type" "store_16,neon_store1_2reg")
    (set_attr "fp" "*,yes")]
 )
 
   "@
    ldp\\t%d0, %d2, %1
    ldp\\t%x0, %x2, %1"
-  [(set_attr "type" "neon_load1_2reg,load_8")
+  [(set_attr "type" "neon_load1_2reg,load_16")
    (set_attr "fp" "yes,*")]
 )
 
   "@
    stp\\t%d1, %d3, %0
    stp\\t%x1, %x3, %0"
-  [(set_attr "type" "neon_store1_2reg,store_8")
+  [(set_attr "type" "neon_store1_2reg,store_16")
    (set_attr "fp" "yes,*")]
 )
 
                    (match_operand:P 5 "const_int_operand" "n"))))])]
   "INTVAL (operands[5]) == GET_MODE_SIZE (<GPI:MODE>mode)"
   "ldp\\t%<w>2, %<w>3, [%1], %4"
-  [(set_attr "type" "load_8")]
+  [(set_attr "type" "load_<ldpstp_sz>")]
 )
 
 (define_insn "loadwb_pair<GPF:mode>_<P:mode>"
           (match_operand:GPI 3 "register_operand" "r"))])]
   "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPI:MODE>mode)"
   "stp\\t%<w>2, %<w>3, [%0, %4]!"
-  [(set_attr "type" "store_8")]
+  [(set_attr "type" "store_<ldpstp_sz>")]
 )
 
 (define_insn "storewb_pair<GPF:mode>_<P:mode>"
                    UNSPEC_GOTSMALLPIC))]
   ""
   "ldr\\t%<w>0, [%1, #:got_lo12:%a2]"
-  [(set_attr "type" "load_4")]
+  [(set_attr "type" "load_<ldst_sz>")]
 )
 
 (define_insn "ldr_got_small_sidi"
                    UNSPEC_GOTSMALLPIC28K))]
   ""
   "ldr\\t%<w>0, [%1, #:<got_modifier>:%a2]"
-  [(set_attr "type" "load_4")]
+  [(set_attr "type" "load_<ldst_sz>")]
 )
 
 (define_insn "ldr_got_small_28k_sidi"
                   UNSPEC_GOTTINYPIC))]
   ""
   "ldr\\t%0, %L1"
-  [(set_attr "type" "load_4")]
+  [(set_attr "type" "load_8")]
 )
 
 (define_insn "aarch64_load_tp_hard"
index 66efc8c..83971ce 100644 (file)
 
 (define_insn_reservation "falkor_ld_3_ld" 3
   (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "load_4,load_8"))
+       (eq_attr "type" "load_4,load_8,load_16"))
   "falkor_ld")
 
 ;; Miscellaneous Data-Processing Instructions
 
 (define_insn_reservation "falkor_st_0_st_sd" 0
   (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "store_4,store_8"))
+       (eq_attr "type" "store_4,store_8,store_16"))
   "falkor_st+falkor_sd")
 \f
 ;; Muliply bypasses.
index 3e38767..477dc35 100644 (file)
 ;; 32-bit version and "%x0" in the 64-bit version.
 (define_mode_attr w [(QI "w") (HI "w") (SI "w") (DI "x") (SF "s") (DF "d")])
 
+;; The size of access, in bytes.
+(define_mode_attr ldst_sz [(SI "4") (DI "8")])
+;; Likewise for load/store pair.
+(define_mode_attr ldpstp_sz [(SI "8") (DI "16")])
+
 ;; For inequal width int to float conversion
 (define_mode_attr w1 [(HF "w") (SF "w") (DF "x")])
 (define_mode_attr w2 [(HF "x") (SF "x") (DF "w")])
index c18da2f..84ac6cd 100644 (file)
 ;; Store pair are single issued
 (define_insn_reservation "thunderx_storepair" 1
   (and (eq_attr "tune" "thunderx")
-       (eq_attr "type" "store_8"))
+       (eq_attr "type" "store_8,store_16"))
   "thunderx_pipe0 + thunderx_pipe1")
 
 ;; Prefetch are single issued
 ;; loads (and load pairs) from L1 take 3 cycles in pipe 0
 (define_insn_reservation "thunderx_load" 3
   (and (eq_attr "tune" "thunderx")
-       (eq_attr "type" "load_4, load_8"))
+       (eq_attr "type" "load_4, load_8, load_16"))
   "thunderx_pipe0")
 
 (define_insn_reservation "thunderx_brj" 1
index 41a45ca..5bcf4ff 100644 (file)
 
 (define_insn_reservation "thunderx2t99_loadpair" 5
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "load_8"))
+       (eq_attr "type" "load_8,load_16"))
   "thunderx2t99_i012,thunderx2t99_ls01")
 
 (define_insn_reservation "thunderx2t99_store_basic" 1
 
 (define_insn_reservation "thunderx2t99_storepair_basic" 1
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "store_8"))
+       (eq_attr "type" "store_8,store_16"))
   "thunderx2t99_ls01,thunderx2t99_sd")
 
 ;; FP data processing instructions.
index d0b17ab..c4b3773 100644 (file)
 
 (define_insn_reservation "xgene1_load_pair" 6
   (and (eq_attr "tune" "xgene1")
-       (eq_attr "type" "load_8"))
+       (eq_attr "type" "load_8, load_16"))
   "xgene1_decodeIsolated")
 
 (define_insn_reservation "xgene1_store_pair" 2
   (and (eq_attr "tune" "xgene1")
-       (eq_attr "type" "store_8"))
+       (eq_attr "type" "store_8, store_16"))
   "xgene1_decodeIsolated")
 
 (define_insn_reservation "xgene1_fp_load1" 10