From 142439c50cfd7e3a2ef609cbd754541152814f95 Mon Sep 17 00:00:00 2001 From: Kito Cheng Date: Mon, 5 Mar 2018 02:27:30 +0000 Subject: [PATCH] [NDS32] Implment setmem pattern. gcc/ * config/nds32/nds32-protos.h (nds32_expand_setmem): Declare. * config/nds32/nds32-multiple.md (setmemsi): Define. * config/nds32/nds32-memory-manipulation.c (nds32_gen_dup_4_byte_to_word_value): New. (emit_setmem_word_loop): New. (emit_setmem_byte_loop): New. (nds32_expand_setmem_loop): New. (nds32_expand_setmem_loop_v3m): New. (nds32_expand_setmem_unroll): New. (nds32_expand_setmem): New. Co-Authored-By: Chung-Ju Wu From-SVN: r258239 --- gcc/ChangeLog | 20 +- gcc/config/nds32/nds32-memory-manipulation.c | 419 +++++++++++++++++++++++++++ gcc/config/nds32/nds32-multiple.md | 19 ++ gcc/config/nds32/nds32-protos.h | 1 + 4 files changed, 456 insertions(+), 3 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9cc5c6b..4daf25f 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,7 +1,21 @@ +2018-03-05 Kito Cheng + Chung-Ju Wu + + * config/nds32/nds32-protos.h (nds32_expand_setmem): Declare. + * config/nds32/nds32-multiple.md (setmemsi): Define. + * config/nds32/nds32-memory-manipulation.c + (nds32_gen_dup_4_byte_to_word_value): New. + (emit_setmem_word_loop): New. + (emit_setmem_byte_loop): New. + (nds32_expand_setmem_loop): New. + (nds32_expand_setmem_loop_v3m): New. + (nds32_expand_setmem_unroll): New. + (nds32_expand_setmem): New. + 2018-03-04 Kito Cheng Chung-Ju Wu - *config/nds32/nds32-memory-manipulation.c + * config/nds32/nds32-memory-manipulation.c (nds32_emit_load_store): New. (nds32_emit_post_inc_load_store): New. (nds32_emit_mem_move): New. @@ -12,9 +26,9 @@ (nds32_expand_movmemsi_unroll): New. (nds32_expand_movmemqi): Rename ... (nds32_expand_movmemsi): ... to this. - *config/nds32/nds32-multiple.md (movmemqi): Rename ... + * config/nds32/nds32-multiple.md (movmemqi): Rename ... (movmemsi): ... to this. - *config/nds32/nds32-protos.h (nds32_expand_movmemqi): Rename ... + * config/nds32/nds32-protos.h (nds32_expand_movmemqi): Rename ... (nds32_expand_movmemsi): ... to this. 2018-03-04 Kito Cheng diff --git a/gcc/config/nds32/nds32-memory-manipulation.c b/gcc/config/nds32/nds32-memory-manipulation.c index e8b9be4..13ac8c1 100644 --- a/gcc/config/nds32/nds32-memory-manipulation.c +++ b/gcc/config/nds32/nds32-memory-manipulation.c @@ -428,6 +428,425 @@ nds32_expand_movmemsi (rtx dstmem, rtx srcmem, rtx total_bytes, rtx alignment) return false; } +/* ------------------------------------------------------------------------ */ + +/* Auxiliary function for expand setmem pattern. */ + +static rtx +nds32_gen_dup_4_byte_to_word_value (rtx value) +{ + rtx value4word = gen_reg_rtx (SImode); + + gcc_assert (GET_MODE (value) == QImode || CONST_INT_P (value)); + + if (CONST_INT_P (value)) + { + unsigned HOST_WIDE_INT val = UINTVAL (value) & GET_MODE_MASK(QImode); + rtx new_val = gen_int_mode (val | (val << 8) + | (val << 16) | (val << 24), SImode); + /* Just calculate at here if it's constant value. */ + emit_move_insn (value4word, new_val); + } + else + { + /* ! prepare word + andi $tmp1, $value, 0xff ! $tmp1 <- 0x000000ab + slli $tmp2, $tmp1, 8 ! $tmp2 <- 0x0000ab00 + or $tmp3, $tmp1, $tmp2 ! $tmp3 <- 0x0000abab + slli $tmp4, $tmp3, 16 ! $tmp4 <- 0xabab0000 + or $val4word, $tmp3, $tmp4 ! $value4word <- 0xabababab */ + + rtx tmp1, tmp2, tmp3, tmp4, final_value; + tmp1 = expand_binop (SImode, and_optab, value, + gen_int_mode (0xff, SImode), + NULL_RTX, 0, OPTAB_WIDEN); + tmp2 = expand_binop (SImode, ashl_optab, tmp1, + gen_int_mode (8, SImode), + NULL_RTX, 0, OPTAB_WIDEN); + tmp3 = expand_binop (SImode, ior_optab, tmp1, tmp2, + NULL_RTX, 0, OPTAB_WIDEN); + tmp4 = expand_binop (SImode, ashl_optab, tmp3, + gen_int_mode (16, SImode), + NULL_RTX, 0, OPTAB_WIDEN); + + final_value = expand_binop (SImode, ior_optab, tmp3, tmp4, + NULL_RTX, 0, OPTAB_WIDEN); + emit_move_insn (value4word, final_value); + } + + return value4word; +} + +static rtx +emit_setmem_word_loop (rtx itr, rtx size, rtx value) +{ + rtx word_mode_label = gen_label_rtx (); + rtx word_mode_end_label = gen_label_rtx (); + rtx byte_mode_size = gen_reg_rtx (SImode); + rtx byte_mode_size_tmp = gen_reg_rtx (SImode); + rtx word_mode_end = gen_reg_rtx (SImode); + rtx size_for_word = gen_reg_rtx (SImode); + + /* and $size_for_word, $size, #~3 */ + size_for_word = expand_binop (SImode, and_optab, size, + gen_int_mode (~3, SImode), + NULL_RTX, 0, OPTAB_WIDEN); + + emit_move_insn (byte_mode_size, size); + + /* beqz $size_for_word, .Lbyte_mode_entry */ + emit_cmp_and_jump_insns (size_for_word, const0_rtx, EQ, NULL, + SImode, 1, word_mode_end_label); + /* add $word_mode_end, $dst, $size_for_word */ + word_mode_end = expand_binop (Pmode, add_optab, itr, size_for_word, + NULL_RTX, 0, OPTAB_WIDEN); + + /* andi $byte_mode_size, $size, 3 */ + byte_mode_size_tmp = expand_binop (SImode, and_optab, size, GEN_INT (3), + NULL_RTX, 0, OPTAB_WIDEN); + + emit_move_insn (byte_mode_size, byte_mode_size_tmp); + + /* .Lword_mode: */ + emit_label (word_mode_label); + /* ! word-mode set loop + smw.bim $value4word, [$dst_itr], $value4word, 0 + bne $word_mode_end, $dst_itr, .Lword_mode */ + emit_insn (gen_unaligned_store_update_base_w (itr, + itr, + value)); + emit_cmp_and_jump_insns (word_mode_end, itr, NE, NULL, + Pmode, 1, word_mode_label); + + emit_label (word_mode_end_label); + + return byte_mode_size; +} + +static rtx +emit_setmem_byte_loop (rtx itr, rtx size, rtx value, bool need_end) +{ + rtx end = gen_reg_rtx (Pmode); + rtx byte_mode_label = gen_label_rtx (); + rtx end_label = gen_label_rtx (); + + value = force_reg (QImode, value); + + if (need_end) + end = expand_binop (Pmode, add_optab, itr, size, + NULL_RTX, 0, OPTAB_WIDEN); + /* beqz $byte_mode_size, .Lend + add $byte_mode_end, $dst_itr, $byte_mode_size */ + emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL, + SImode, 1, end_label); + + if (!need_end) + end = expand_binop (Pmode, add_optab, itr, size, + NULL_RTX, 0, OPTAB_WIDEN); + + /* .Lbyte_mode: */ + emit_label (byte_mode_label); + + /* ! byte-mode set loop + sbi.bi $value, [$dst_itr] ,1 + bne $byte_mode_end, $dst_itr, .Lbyte_mode */ + nds32_emit_post_inc_load_store (value, itr, QImode, false); + + emit_cmp_and_jump_insns (end, itr, NE, NULL, + Pmode, 1, byte_mode_label); + /* .Lend: */ + emit_label (end_label); + + if (need_end) + return end; + else + return NULL_RTX; +} + +static bool +nds32_expand_setmem_loop (rtx dstmem, rtx size, rtx value) +{ + rtx value4word; + rtx value4byte; + rtx dst; + rtx byte_mode_size; + + /* Emit loop version of setmem. + memset: + ! prepare word + andi $tmp1, $val, 0xff ! $tmp1 <- 0x000000ab + slli $tmp2, $tmp1, 8 ! $tmp2 <- 0x0000ab00 + or $tmp3, $val, $tmp2 ! $tmp3 <- 0x0000abab + slli $tmp4, $tmp3, 16 ! $tmp4 <- 0xabab0000 + or $val4word, $tmp3, $tmp4 ! $value4word <- 0xabababab + + and $size_for_word, $size, #-4 + beqz $size_for_word, .Lword_mode_end + + add $word_mode_end, $dst, $size_for_word + andi $byte_mode_size, $size, 3 + + .Lword_mode: + ! word-mode set loop + smw.bim $value4word, [$dst], $value4word, 0 + bne $word_mode_end, $dst, .Lword_mode + + .Lword_mode_end: + beqz $byte_mode_size, .Lend + add $byte_mode_end, $dst, $byte_mode_size + + .Lbyte_mode: + ! byte-mode set loop + sbi.bi $value4word, [$dst] ,1 + bne $byte_mode_end, $dst, .Lbyte_mode + .Lend: */ + + dst = copy_to_mode_reg (SImode, XEXP (dstmem, 0)); + + /* ! prepare word + andi $tmp1, $value, 0xff ! $tmp1 <- 0x000000ab + slli $tmp2, $tmp1, 8 ! $tmp2 <- 0x0000ab00 + or $tmp3, $tmp1, $tmp2 ! $tmp3 <- 0x0000abab + slli $tmp4, $tmp3, 16 ! $tmp4 <- 0xabab0000 + or $val4word, $tmp3, $tmp4 ! $value4word <- 0xabababab */ + value4word = nds32_gen_dup_4_byte_to_word_value (value); + + /* and $size_for_word, $size, #-4 + beqz $size_for_word, .Lword_mode_end + + add $word_mode_end, $dst, $size_for_word + andi $byte_mode_size, $size, 3 + + .Lword_mode: + ! word-mode set loop + smw.bim $value4word, [$dst], $value4word, 0 + bne $word_mode_end, $dst, .Lword_mode + .Lword_mode_end: */ + byte_mode_size = emit_setmem_word_loop (dst, size, value4word); + + /* beqz $byte_mode_size, .Lend + add $byte_mode_end, $dst, $byte_mode_size + + .Lbyte_mode: + ! byte-mode set loop + sbi.bi $value, [$dst] ,1 + bne $byte_mode_end, $dst, .Lbyte_mode + .Lend: */ + + value4byte = simplify_gen_subreg (QImode, value4word, SImode, + subreg_lowpart_offset (QImode, SImode)); + + emit_setmem_byte_loop (dst, byte_mode_size, value4byte, false); + + return true; +} + +static bool +nds32_expand_setmem_loop_v3m (rtx dstmem, rtx size, rtx value) +{ + rtx base_reg = copy_to_mode_reg (Pmode, XEXP (dstmem, 0)); + rtx need_align_bytes = gen_reg_rtx (SImode); + rtx last_2_bit = gen_reg_rtx (SImode); + rtx byte_loop_base = gen_reg_rtx (SImode); + rtx byte_loop_size = gen_reg_rtx (SImode); + rtx remain_size = gen_reg_rtx (SImode); + rtx new_base_reg; + rtx value4byte, value4word; + rtx byte_mode_size; + rtx last_byte_loop_label = gen_label_rtx (); + + size = force_reg (SImode, size); + + value4word = nds32_gen_dup_4_byte_to_word_value (value); + value4byte = simplify_gen_subreg (QImode, value4word, SImode, 0); + + emit_move_insn (byte_loop_size, size); + emit_move_insn (byte_loop_base, base_reg); + + /* Jump to last byte loop if size is less than 16. */ + emit_cmp_and_jump_insns (size, gen_int_mode (16, SImode), LE, NULL, + SImode, 1, last_byte_loop_label); + + /* Make sure align to 4 byte first since v3m can't unalign access. */ + emit_insn (gen_andsi3 (last_2_bit, + base_reg, + gen_int_mode (0x3, SImode))); + + emit_insn (gen_subsi3 (need_align_bytes, + gen_int_mode (4, SImode), + last_2_bit)); + + /* Align to 4 byte. */ + new_base_reg = emit_setmem_byte_loop (base_reg, + need_align_bytes, + value4byte, + true); + + /* Calculate remain size. */ + emit_insn (gen_subsi3 (remain_size, size, need_align_bytes)); + + /* Set memory word by word. */ + byte_mode_size = emit_setmem_word_loop (new_base_reg, + remain_size, + value4word); + + emit_move_insn (byte_loop_base, new_base_reg); + emit_move_insn (byte_loop_size, byte_mode_size); + + emit_label (last_byte_loop_label); + + /* And set memory for remain bytes. */ + emit_setmem_byte_loop (byte_loop_base, byte_loop_size, value4byte, false); + return true; +} + +static bool +nds32_expand_setmem_unroll (rtx dstmem, rtx size, rtx value, + rtx align ATTRIBUTE_UNUSED, + rtx expected_align ATTRIBUTE_UNUSED, + rtx expected_size ATTRIBUTE_UNUSED) +{ + unsigned maximum_regs, maximum_bytes, start_regno, regno; + rtx value4word; + rtx dst_base_reg, new_base_reg; + unsigned HOST_WIDE_INT remain_bytes, remain_words, prepare_regs, fill_per_smw; + unsigned HOST_WIDE_INT real_size; + + if (TARGET_REDUCED_REGS) + { + maximum_regs = 4; + maximum_bytes = 64; + start_regno = 2; + } + else + { + maximum_regs = 8; + maximum_bytes = 128; + start_regno = 16; + } + + real_size = UINTVAL (size) & GET_MODE_MASK(SImode); + + if (!(CONST_INT_P (size) && real_size <= maximum_bytes)) + return false; + + remain_bytes = real_size; + + gcc_assert (GET_MODE (value) == QImode || CONST_INT_P (value)); + + value4word = nds32_gen_dup_4_byte_to_word_value (value); + + prepare_regs = remain_bytes / UNITS_PER_WORD; + + dst_base_reg = copy_to_mode_reg (SImode, XEXP (dstmem, 0)); + + if (prepare_regs > maximum_regs) + prepare_regs = maximum_regs; + + fill_per_smw = prepare_regs * UNITS_PER_WORD; + + regno = start_regno; + switch (prepare_regs) + { + case 2: + default: + { + rtx reg0 = gen_rtx_REG (SImode, regno); + rtx reg1 = gen_rtx_REG (SImode, regno+1); + unsigned last_regno = start_regno + prepare_regs - 1; + + emit_move_insn (reg0, value4word); + emit_move_insn (reg1, value4word); + rtx regd = gen_rtx_REG (DImode, regno); + regno += 2; + + /* Try to utilize movd44! */ + while (regno <= last_regno) + { + if ((regno + 1) <=last_regno) + { + rtx reg = gen_rtx_REG (DImode, regno); + emit_move_insn (reg, regd); + regno += 2; + } + else + { + rtx reg = gen_rtx_REG (SImode, regno); + emit_move_insn (reg, reg0); + regno += 1; + } + } + break; + } + case 1: + { + rtx reg = gen_rtx_REG (SImode, regno++); + emit_move_insn (reg, value4word); + } + break; + case 0: + break; + } + + if (fill_per_smw) + for (;remain_bytes >= fill_per_smw;remain_bytes -= fill_per_smw) + { + emit_insn (nds32_expand_store_multiple (start_regno, prepare_regs, + dst_base_reg, dstmem, + true, &new_base_reg)); + dst_base_reg = new_base_reg; + dstmem = gen_rtx_MEM (SImode, dst_base_reg); + } + + remain_words = remain_bytes / UNITS_PER_WORD; + + if (remain_words) + { + emit_insn (nds32_expand_store_multiple (start_regno, remain_words, + dst_base_reg, dstmem, + true, &new_base_reg)); + dst_base_reg = new_base_reg; + dstmem = gen_rtx_MEM (SImode, dst_base_reg); + } + + remain_bytes = remain_bytes - (remain_words * UNITS_PER_WORD); + + if (remain_bytes) + { + value = simplify_gen_subreg (QImode, value4word, SImode, + subreg_lowpart_offset(QImode, SImode)); + int offset = 0; + for (;remain_bytes;--remain_bytes, ++offset) + { + nds32_emit_load_store (value, dstmem, QImode, offset, false); + } + } + + return true; +} + +bool +nds32_expand_setmem (rtx dstmem, rtx size, rtx value, rtx align, + rtx expected_align, + rtx expected_size) +{ + bool align_to_4_bytes = (INTVAL (align) & 3) == 0; + + /* Only expand at O3 */ + if (optimize_size || optimize < 3) + return false; + + if (TARGET_ISA_V3M && !align_to_4_bytes) + return nds32_expand_setmem_loop_v3m (dstmem, size, value); + + if (nds32_expand_setmem_unroll (dstmem, size, value, + align, expected_align, expected_size)) + return true; + + return nds32_expand_setmem_loop (dstmem, size, value); +} + +/* ------------------------------------------------------------------------ */ /* Functions to expand load_multiple and store_multiple. They are auxiliary extern functions to help create rtx template. diff --git a/gcc/config/nds32/nds32-multiple.md b/gcc/config/nds32/nds32-multiple.md index f266533..66880c2 100644 --- a/gcc/config/nds32/nds32-multiple.md +++ b/gcc/config/nds32/nds32-multiple.md @@ -3749,3 +3749,22 @@ }) ;; ------------------------------------------------------------------------ + +(define_expand "setmemsi" + [(use (match_operand:BLK 0 "memory_operand")) + (use (match_operand:SI 1 "nds32_reg_constant_operand")) + (use (match_operand:QI 2 "nonmemory_operand")) + (use (match_operand 3 "const_int_operand")) + (use (match_operand:SI 4 "const_int_operand")) + (use (match_operand:SI 5 "const_int_operand"))] + "" +{ + if (nds32_expand_setmem (operands[0], operands[1], + operands[2], operands[3], + operands[4], operands[5])) + DONE; + + FAIL; +}) + +;; ------------------------------------------------------------------------ diff --git a/gcc/config/nds32/nds32-protos.h b/gcc/config/nds32/nds32-protos.h index 6ed1522..075206e 100644 --- a/gcc/config/nds32/nds32-protos.h +++ b/gcc/config/nds32/nds32-protos.h @@ -69,6 +69,7 @@ extern bool nds32_valid_smw_lwm_base_p (rtx); extern rtx nds32_expand_load_multiple (int, int, rtx, rtx, bool, rtx *); extern rtx nds32_expand_store_multiple (int, int, rtx, rtx, bool, rtx *); extern bool nds32_expand_movmemsi (rtx, rtx, rtx, rtx); +extern bool nds32_expand_setmem (rtx, rtx, rtx, rtx, rtx, rtx); /* Auxiliary functions for expand unalign load instruction. */ -- 2.7.4