From 7c32ef413a661d53de9a90d29b5253bf98e6af82 Mon Sep 17 00:00:00 2001 From: Monk Chiang Date: Sun, 13 May 2018 05:41:37 +0000 Subject: [PATCH] [NDS32] Add DSP extension instructions. gcc/ * config.gcc (nds32be-*-*): Handle --with-ext-dsp. * config/nds32/constants.md (unspec_element, unspec_volatile_element): Add enum values for DSP extension instructions. * config/nds32/constraints.md (Iu06, IU06, CVp5, CVs5, CVs2, CVhi): New constraints. * config/nds32/iterators.md (shifts, shiftrt, sat_plus, all_plus, sat_minus, all_minus, plus_minus, extend, sumax, sumin, sumin_max): New code iterators. (su, zs, uk, opcode, add_rsub, add_sub): New code attributes. * config/nds32/nds32-dspext.md: New file for DSP implementation. * config/nds32/nds32-intrinsic.c: Implementation of DSP extension. * config/nds32/nds32-intrinsic.md: Likewise. * config/nds32/nds32_intrinsic.h: Likewise. * config/nds32/nds32-md-auxiliary.c: Likewise. * config/nds32/nds32-memory-manipulation.c: Consider DSP extension. * config/nds32/nds32-predicates.c (const_vector_to_hwint): New. (nds32_valid_CVp5_p, nds32_valid_CVs5_p): New. (nds32_valid_CVs2_p, nds32_valid_CVhi_p): New. * config/nds32/nds32-protos.h: New declarations for DSP extension. * config/nds32/nds32-utils.c (extract_mac_non_acc_rtx): New case TYPE_DMAC in switch statement. * config/nds32/nds32.c: New checking and implementation for DSP extension instructions. * config/nds32/nds32.h: Likewise. * config/nds32/nds32.md: Likewise. * config/nds32/nds32.opt (mhw-abs, mext-dsp): New options. * config/nds32/predicates.md: Implement new predicates for DSP extension. Co-Authored-By: Chung-Ju Wu Co-Authored-By: Kito Cheng From-SVN: r260206 --- gcc/ChangeLog | 33 + gcc/config.gcc | 5 + gcc/config/nds32/constants.md | 25 + gcc/config/nds32/constraints.md | 28 +- gcc/config/nds32/iterators.md | 40 + gcc/config/nds32/nds32-dspext.md | 5264 ++++++++++++++++++++++++++ gcc/config/nds32/nds32-intrinsic.c | 744 +++- gcc/config/nds32/nds32-intrinsic.md | 192 + gcc/config/nds32/nds32-md-auxiliary.c | 499 ++- gcc/config/nds32/nds32-memory-manipulation.c | 61 +- gcc/config/nds32/nds32-predicates.c | 77 + gcc/config/nds32/nds32-protos.h | 29 + gcc/config/nds32/nds32-utils.c | 1 + gcc/config/nds32/nds32.c | 65 + gcc/config/nds32/nds32.h | 330 +- gcc/config/nds32/nds32.md | 53 +- gcc/config/nds32/nds32.opt | 12 + gcc/config/nds32/nds32_intrinsic.h | 668 ++++ gcc/config/nds32/predicates.md | 60 + 19 files changed, 8151 insertions(+), 35 deletions(-) create mode 100644 gcc/config/nds32/nds32-dspext.md diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 92dc3f4..7b788ca 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,36 @@ +2018-05-13 Monk Chiang + Kito Cheng + Chung-Ju Wu + + * config.gcc (nds32be-*-*): Handle --with-ext-dsp. + * config/nds32/constants.md (unspec_element, unspec_volatile_element): + Add enum values for DSP extension instructions. + * config/nds32/constraints.md (Iu06, IU06, CVp5, CVs5, CVs2, CVhi): + New constraints. + * config/nds32/iterators.md (shifts, shiftrt, sat_plus, all_plus, + sat_minus, all_minus, plus_minus, extend, sumax, sumin, sumin_max): + New code iterators. + (su, zs, uk, opcode, add_rsub, add_sub): New code attributes. + * config/nds32/nds32-dspext.md: New file for DSP implementation. + * config/nds32/nds32-intrinsic.c: Implementation of DSP extension. + * config/nds32/nds32-intrinsic.md: Likewise. + * config/nds32/nds32_intrinsic.h: Likewise. + * config/nds32/nds32-md-auxiliary.c: Likewise. + * config/nds32/nds32-memory-manipulation.c: Consider DSP extension. + * config/nds32/nds32-predicates.c (const_vector_to_hwint): New. + (nds32_valid_CVp5_p, nds32_valid_CVs5_p): New. + (nds32_valid_CVs2_p, nds32_valid_CVhi_p): New. + * config/nds32/nds32-protos.h: New declarations for DSP extension. + * config/nds32/nds32-utils.c (extract_mac_non_acc_rtx): New case + TYPE_DMAC in switch statement. + * config/nds32/nds32.c: New checking and implementation for DSP + extension instructions. + * config/nds32/nds32.h: Likewise. + * config/nds32/nds32.md: Likewise. + * config/nds32/nds32.opt (mhw-abs, mext-dsp): New options. + * config/nds32/predicates.md: Implement new predicates for DSP + extension. + 2018-05-11 Michael Meissner * config/rs6000/rs6000.md (mov_softfloat, FMOVE32): diff --git a/gcc/config.gcc b/gcc/config.gcc index 9c4a849..fec118c 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -2346,6 +2346,11 @@ nds32be-*-*) tm_defines="${tm_defines} TARGET_BIG_ENDIAN_DEFAULT=1" tm_file="dbxelf.h elfos.h newlib-stdint.h ${tm_file} nds32/nds32_intrinsic.h" tmake_file="nds32/t-nds32 nds32/t-mlibs" + + # Handle --with-ext-dsp + if test x${with_ext_dsp} = xyes; then + tm_defines="${tm_defines} TARGET_DEFAULT_EXT_DSP=1" + fi ;; nios2-*-*) tm_file="elfos.h ${tm_file}" diff --git a/gcc/config/nds32/constants.md b/gcc/config/nds32/constants.md index 37c2704..cba989f 100644 --- a/gcc/config/nds32/constants.md +++ b/gcc/config/nds32/constants.md @@ -49,6 +49,16 @@ UNSPEC_FFB UNSPEC_FFMISM UNSPEC_FLMISM + UNSPEC_KDMBB + UNSPEC_KDMBT + UNSPEC_KDMTB + UNSPEC_KDMTT + UNSPEC_KHMBB + UNSPEC_KHMBT + UNSPEC_KHMTB + UNSPEC_KHMTT + UNSPEC_KSLRAW + UNSPEC_KSLRAWU UNSPEC_SVA UNSPEC_SVS UNSPEC_WSBH @@ -62,6 +72,19 @@ UNSPEC_UASTORE_HW UNSPEC_UASTORE_W UNSPEC_UASTORE_DW + UNSPEC_ROUND + UNSPEC_VEC_COMPARE + UNSPEC_KHM + UNSPEC_KHMX + UNSPEC_CLIP_OV + UNSPEC_CLIPS_OV + UNSPEC_BITREV + UNSPEC_KABS + UNSPEC_LOOP_END + UNSPEC_TLS_DESC + UNSPEC_TLS_IE + UNSPEC_KADDH + UNSPEC_KSUBH ]) ;; The unspec_volatile operation index. @@ -139,6 +162,8 @@ UNSPEC_VOLATILE_UNALIGNED_FEATURE UNSPEC_VOLATILE_ENABLE_UNALIGNED UNSPEC_VOLATILE_DISABLE_UNALIGNED + UNSPEC_VOLATILE_RDOV + UNSPEC_VOLATILE_CLROV ]) ;; ------------------------------------------------------------------------ diff --git a/gcc/config/nds32/constraints.md b/gcc/config/nds32/constraints.md index 7af7769..500f1a4 100644 --- a/gcc/config/nds32/constraints.md +++ b/gcc/config/nds32/constraints.md @@ -127,6 +127,11 @@ (and (match_code "const_int") (match_test "IN_RANGE (ival, -31, 0)"))) +(define_constraint "Iu06" + "Unsigned immediate 6-bit value" + (and (match_code "const_int") + (match_test "ival < (1 << 6) && ival >= 0"))) + ;; Ip05 is special and dedicated for v3 movpi45 instruction. ;; movpi45 has imm5u field but the range is 16 ~ 47. (define_constraint "Ip05" @@ -136,10 +141,10 @@ && ival >= (0 + 16) && (TARGET_ISA_V3 || TARGET_ISA_V3M)"))) -(define_constraint "Iu06" +(define_constraint "IU06" "Unsigned immediate 6-bit value constraint for addri36.sp instruction" (and (match_code "const_int") - (match_test "ival < (1 << 6) + (match_test "ival < (1 << 8) && ival >= 0 && (ival % 4 == 0) && (TARGET_ISA_V3 || TARGET_ISA_V3M)"))) @@ -302,6 +307,25 @@ (match_test "(TARGET_ISA_V3 || TARGET_ISA_V3M) && (IN_RANGE (exact_log2 (ival + 1), 1, 8))"))) +(define_constraint "CVp5" + "Unsigned immediate 5-bit value for movpi45 instruction with range 16-47" + (and (match_code "const_vector") + (match_test "nds32_valid_CVp5_p (op)"))) + +(define_constraint "CVs5" + "Signed immediate 5-bit value" + (and (match_code "const_vector") + (match_test "nds32_valid_CVs5_p (op)"))) + +(define_constraint "CVs2" + "Signed immediate 20-bit value" + (and (match_code "const_vector") + (match_test "nds32_valid_CVs2_p (op)"))) + +(define_constraint "CVhi" + "The immediate value that can be simply set high 20-bit" + (and (match_code "const_vector") + (match_test "nds32_valid_CVhi_p (op)"))) (define_memory_constraint "U33" "Memory constraint for 333 format" diff --git a/gcc/config/nds32/iterators.md b/gcc/config/nds32/iterators.md index c2062de..f4fb581 100644 --- a/gcc/config/nds32/iterators.md +++ b/gcc/config/nds32/iterators.md @@ -68,6 +68,28 @@ ;; shifts (define_code_iterator shift_rotate [ashift ashiftrt lshiftrt rotatert]) +(define_code_iterator shifts [ashift ashiftrt lshiftrt]) + +(define_code_iterator shiftrt [ashiftrt lshiftrt]) + +(define_code_iterator sat_plus [ss_plus us_plus]) + +(define_code_iterator all_plus [plus ss_plus us_plus]) + +(define_code_iterator sat_minus [ss_minus us_minus]) + +(define_code_iterator all_minus [minus ss_minus us_minus]) + +(define_code_iterator plus_minus [plus minus]) + +(define_code_iterator extend [sign_extend zero_extend]) + +(define_code_iterator sumax [smax umax]) + +(define_code_iterator sumin [smin umin]) + +(define_code_iterator sumin_max [smax umax smin umin]) + ;;---------------------------------------------------------------------------- ;; Code attributes. ;;---------------------------------------------------------------------------- @@ -76,5 +98,23 @@ (define_code_attr shift [(ashift "ashl") (ashiftrt "ashr") (lshiftrt "lshr") (rotatert "rotr")]) +(define_code_attr su + [(ashiftrt "") (lshiftrt "u") (sign_extend "s") (zero_extend "u")]) + +(define_code_attr zs + [(sign_extend "s") (zero_extend "z")]) + +(define_code_attr uk + [(plus "") (ss_plus "k") (us_plus "uk") + (minus "") (ss_minus "k") (us_minus "uk")]) + +(define_code_attr opcode + [(plus "add") (minus "sub") (smax "smax") (umax "umax") (smin "smin") (umin "umin")]) + +(define_code_attr add_rsub + [(plus "a") (minus "rs")]) + +(define_code_attr add_sub + [(plus "a") (minus "s")]) ;;---------------------------------------------------------------------------- diff --git a/gcc/config/nds32/nds32-dspext.md b/gcc/config/nds32/nds32-dspext.md new file mode 100644 index 0000000..4151353 --- /dev/null +++ b/gcc/config/nds32/nds32-dspext.md @@ -0,0 +1,5264 @@ +;; Machine description of Andes NDS32 cpu for GNU compiler +;; Copyright (C) 2012-2018 Free Software Foundation, Inc. +;; Contributed by Andes Technology Corporation. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_expand "mov" + [(set (match_operand:VQIHI 0 "general_operand" "") + (match_operand:VQIHI 1 "general_operand" ""))] + "NDS32_EXT_DSP_P ()" +{ + /* Need to force register if mem <- !reg. */ + if (MEM_P (operands[0]) && !REG_P (operands[1])) + operands[1] = force_reg (mode, operands[1]); + + /* If operands[1] is a large constant and cannot be performed + by a single instruction, we need to split it. */ + if (GET_CODE (operands[1]) == CONST_VECTOR + && !satisfies_constraint_CVs2 (operands[1]) + && !satisfies_constraint_CVhi (operands[1])) + { + HOST_WIDE_INT ival = const_vector_to_hwint (operands[1]); + rtx tmp_rtx; + + tmp_rtx = can_create_pseudo_p () + ? gen_reg_rtx (SImode) + : simplify_gen_subreg (SImode, operands[0], mode, 0); + + emit_move_insn (tmp_rtx, gen_int_mode (ival, SImode)); + convert_move (operands[0], tmp_rtx, false); + DONE; + } +}) + +(define_insn "*mov" + [(set (match_operand:VQIHI 0 "nonimmediate_operand" "=r, r,$U45,$U33,$U37,$U45, m,$ l,$ l,$ l,$ d, d, r,$ d, r, r, r, *f, *f, r, *f, Q") + (match_operand:VQIHI 1 "nds32_vmove_operand" " r, r, l, l, l, d, r, U45, U33, U37, U45,Ufe, m, CVp5, CVs5, CVs2, CVhi, *f, r, *f, Q, *f"))] + "NDS32_EXT_DSP_P () + && (register_operand(operands[0], mode) + || register_operand(operands[1], mode))" +{ + switch (which_alternative) + { + case 0: + return "mov55\t%0, %1"; + case 1: + return "ori\t%0, %1, 0"; + case 2: + case 3: + case 4: + case 5: + return nds32_output_16bit_store (operands, ); + case 6: + return nds32_output_32bit_store (operands, ); + case 7: + case 8: + case 9: + case 10: + case 11: + return nds32_output_16bit_load (operands, ); + case 12: + return nds32_output_32bit_load (operands, ); + case 13: + return "movpi45\t%0, %1"; + case 14: + return "movi55\t%0, %1"; + case 15: + return "movi\t%0, %1"; + case 16: + return "sethi\t%0, hi20(%1)"; + case 17: + if (TARGET_FPU_SINGLE) + return "fcpyss\t%0, %1, %1"; + else + return "#"; + case 18: + return "fmtsr\t%1, %0"; + case 19: + return "fmfsr\t%0, %1"; + case 20: + return nds32_output_float_load (operands); + case 21: + return nds32_output_float_store (operands); + default: + gcc_unreachable (); + } +} + [(set_attr "type" "alu,alu,store,store,store,store,store,load,load,load,load,load,load,alu,alu,alu,alu,fcpy,fmtsr,fmfsr,fload,fstore") + (set_attr "length" " 2, 4, 2, 2, 2, 2, 4, 2, 2, 2, 2, 2, 4, 2, 2, 4, 4, 4, 4, 4, 4, 4") + (set_attr "feature" " v1, v1, v1, v1, v1, v1, v1, v1, v1, v1, v1, v3m, v1, v1, v1, v1, v1, fpu, fpu, fpu, fpu, fpu")]) + +(define_expand "movv2si" + [(set (match_operand:V2SI 0 "general_operand" "") + (match_operand:V2SI 1 "general_operand" ""))] + "NDS32_EXT_DSP_P ()" +{ + /* Need to force register if mem <- !reg. */ + if (MEM_P (operands[0]) && !REG_P (operands[1])) + operands[1] = force_reg (V2SImode, operands[1]); +}) + +(define_insn "*movv2si" + [(set (match_operand:V2SI 0 "nonimmediate_operand" "=r, r, r, r, Da, m, f, Q, f, r, f") + (match_operand:V2SI 1 "general_operand" " r, i, Da, m, r, r, Q, f, f, f, r"))] + "NDS32_EXT_DSP_P () + && (register_operand(operands[0], V2SImode) + || register_operand(operands[1], V2SImode))" +{ + switch (which_alternative) + { + case 0: + return "movd44\t%0, %1"; + case 1: + /* reg <- const_int, we ask gcc to split instruction. */ + return "#"; + case 2: + /* The memory format is (mem (reg)), + we can generate 'lmw.bi' instruction. */ + return nds32_output_double (operands, true); + case 3: + /* We haven't 64-bit load instruction, + we split this pattern to two SImode pattern. */ + return "#"; + case 4: + /* The memory format is (mem (reg)), + we can generate 'smw.bi' instruction. */ + return nds32_output_double (operands, false); + case 5: + /* We haven't 64-bit store instruction, + we split this pattern to two SImode pattern. */ + return "#"; + case 6: + return nds32_output_float_load (operands); + case 7: + return nds32_output_float_store (operands); + case 8: + return "fcpysd\t%0, %1, %1"; + case 9: + return "fmfdr\t%0, %1"; + case 10: + return "fmtdr\t%1, %0"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "alu,alu,load,load,store,store,unknown,unknown,unknown,unknown,unknown") + (set_attr_alternative "length" + [ + ;; Alternative 0 + (if_then_else (match_test "!TARGET_16_BIT") + (const_int 4) + (const_int 2)) + ;; Alternative 1 + (const_int 16) + ;; Alternative 2 + (const_int 4) + ;; Alternative 3 + (const_int 8) + ;; Alternative 4 + (const_int 4) + ;; Alternative 5 + (const_int 8) + ;; Alternative 6 + (const_int 4) + ;; Alternative 7 + (const_int 4) + ;; Alternative 8 + (const_int 4) + ;; Alternative 9 + (const_int 4) + ;; Alternative 10 + (const_int 4) + ]) + (set_attr "feature" " v1, v1, v1, v1, v1, v1, fpu, fpu, fpu, fpu, fpu")]) + +(define_expand "movmisalign" + [(set (match_operand:VQIHI 0 "general_operand" "") + (match_operand:VQIHI 1 "general_operand" ""))] + "NDS32_EXT_DSP_P ()" +{ + rtx addr; + if (MEM_P (operands[0]) && !REG_P (operands[1])) + operands[1] = force_reg (mode, operands[1]); + + if (MEM_P (operands[0])) + { + addr = force_reg (Pmode, XEXP (operands[0], 0)); + emit_insn (gen_unaligned_store (addr, operands[1])); + } + else + { + addr = force_reg (Pmode, XEXP (operands[1], 0)); + emit_insn (gen_unaligned_load (operands[0], addr)); + } + DONE; +}) + +(define_expand "unaligned_load" + [(set (match_operand:VQIHI 0 "register_operand" "=r") + (unspec:VQIHI [(mem:VQIHI (match_operand:SI 1 "register_operand" "r"))] UNSPEC_UALOAD_W))] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_ISA_V3M) + nds32_expand_unaligned_load (operands, mode); + else + emit_insn (gen_unaligned_load_w (operands[0], gen_rtx_MEM (mode, operands[1]))); + DONE; +}) + +(define_insn "unaligned_load_w" + [(set (match_operand:VQIHI 0 "register_operand" "= r") + (unspec:VQIHI [(match_operand:VQIHI 1 "nds32_lmw_smw_base_operand" " Umw")] UNSPEC_UALOAD_W))] + "NDS32_EXT_DSP_P ()" +{ + return nds32_output_lmw_single_word (operands); +} + [(set_attr "type" "load") + (set_attr "length" "4")] +) + +(define_expand "unaligned_store" + [(set (mem:VQIHI (match_operand:SI 0 "register_operand" "r")) + (unspec:VQIHI [(match_operand:VQIHI 1 "register_operand" "r")] UNSPEC_UASTORE_W))] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_ISA_V3M) + nds32_expand_unaligned_store (operands, mode); + else + emit_insn (gen_unaligned_store_w (gen_rtx_MEM (mode, operands[0]), operands[1])); + DONE; +}) + +(define_insn "unaligned_store_w" + [(set (match_operand:VQIHI 0 "nds32_lmw_smw_base_operand" "=Umw") + (unspec:VQIHI [(match_operand:VQIHI 1 "register_operand" " r")] UNSPEC_UASTORE_W))] + "NDS32_EXT_DSP_P ()" +{ + return nds32_output_smw_single_word (operands); +} + [(set_attr "type" "store") + (set_attr "length" "4")] +) + +(define_insn "add3" + [(set (match_operand:VQIHI 0 "register_operand" "=r") + (all_plus:VQIHI (match_operand:VQIHI 1 "register_operand" " r") + (match_operand:VQIHI 2 "register_operand" " r")))] + "NDS32_EXT_DSP_P ()" + "add %0, %1, %2" + [(set_attr "type" "dalu") + (set_attr "length" "4") + (set_attr "feature" "v1")]) + +(define_insn "adddi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (all_plus:DI (match_operand:DI 1 "register_operand" " r") + (match_operand:DI 2 "register_operand" " r")))] + "NDS32_EXT_DSP_P ()" + "add64 %0, %1, %2" + [(set_attr "type" "dalu64") + (set_attr "length" "4") + (set_attr "feature" "v1")]) + +(define_insn "raddv4qi3" + [(set (match_operand:V4QI 0 "register_operand" "=r") + (truncate:V4QI + (ashiftrt:V4HI + (plus:V4HI (sign_extend:V4HI (match_operand:V4QI 1 "register_operand" " r")) + (sign_extend:V4HI (match_operand:V4QI 2 "register_operand" " r"))) + (const_int 1))))] + "NDS32_EXT_DSP_P ()" + "radd8\t%0, %1, %2" + [(set_attr "type" "dalu") + (set_attr "length" "4") + (set_attr "feature" "v1")]) + + +(define_insn "uraddv4qi3" + [(set (match_operand:V4QI 0 "register_operand" "=r") + (truncate:V4QI + (lshiftrt:V4HI + (plus:V4HI (zero_extend:V4HI (match_operand:V4QI 1 "register_operand" " r")) + (zero_extend:V4HI (match_operand:V4QI 2 "register_operand" " r"))) + (const_int 1))))] + "NDS32_EXT_DSP_P ()" + "uradd8\t%0, %1, %2" + [(set_attr "type" "dalu") + (set_attr "length" "4") + (set_attr "feature" "v1")]) + +(define_insn "raddv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (truncate:V2HI + (ashiftrt:V2SI + (plus:V2SI (sign_extend:V2SI (match_operand:V2HI 1 "register_operand" " r")) + (sign_extend:V2SI (match_operand:V2HI 2 "register_operand" " r"))) + (const_int 1))))] + "NDS32_EXT_DSP_P ()" + "radd16\t%0, %1, %2" + [(set_attr "type" "dalu") + (set_attr "length" "4") + (set_attr "feature" "v1")]) + +(define_insn "uraddv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (truncate:V2HI + (lshiftrt:V2SI + (plus:V2SI (zero_extend:V2SI (match_operand:V2HI 1 "register_operand" " r")) + (zero_extend:V2SI (match_operand:V2HI 2 "register_operand" " r"))) + (const_int 1))))] + "NDS32_EXT_DSP_P ()" + "uradd16\t%0, %1, %2" + [(set_attr "type" "dalu") + (set_attr "length" "4") + (set_attr "feature" "v1")]) + +(define_insn "radddi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (truncate:DI + (ashiftrt:TI + (plus:TI (sign_extend:TI (match_operand:DI 1 "register_operand" " r")) + (sign_extend:TI (match_operand:DI 2 "register_operand" " r"))) + (const_int 1))))] + "NDS32_EXT_DSP_P ()" + "radd64\t%0, %1, %2" + [(set_attr "type" "dalu64") + (set_attr "length" "4") + (set_attr "feature" "v1")]) + + +(define_insn "uradddi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (truncate:DI + (lshiftrt:TI + (plus:TI (zero_extend:TI (match_operand:DI 1 "register_operand" " r")) + (zero_extend:TI (match_operand:DI 2 "register_operand" " r"))) + (const_int 1))))] + "NDS32_EXT_DSP_P ()" + "uradd64\t%0, %1, %2" + [(set_attr "type" "dalu64") + (set_attr "length" "4") + (set_attr "feature" "v1")]) + +(define_insn "sub3" + [(set (match_operand:VQIHI 0 "register_operand" "=r") + (all_minus:VQIHI (match_operand:VQIHI 1 "register_operand" " r") + (match_operand:VQIHI 2 "register_operand" " r")))] + "NDS32_EXT_DSP_P ()" + "sub %0, %1, %2" + [(set_attr "type" "dalu") + (set_attr "length" "4") + (set_attr "feature" "v1")]) + +(define_insn "subdi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (all_minus:DI (match_operand:DI 1 "register_operand" " r") + (match_operand:DI 2 "register_operand" " r")))] + "NDS32_EXT_DSP_P ()" + "sub64 %0, %1, %2" + [(set_attr "type" "dalu64") + (set_attr "length" "4") + (set_attr "feature" "v1")]) + +(define_insn "rsubv4qi3" + [(set (match_operand:V4QI 0 "register_operand" "=r") + (truncate:V4QI + (ashiftrt:V4HI + (minus:V4HI (sign_extend:V4HI (match_operand:V4QI 1 "register_operand" " r")) + (sign_extend:V4HI (match_operand:V4QI 2 "register_operand" " r"))) + (const_int 1))))] + "NDS32_EXT_DSP_P ()" + "rsub8\t%0, %1, %2" + [(set_attr "type" "dalu") + (set_attr "length" "4")]) + +(define_insn "ursubv4qi3" + [(set (match_operand:V4QI 0 "register_operand" "=r") + (truncate:V4QI + (lshiftrt:V4HI + (minus:V4HI (zero_extend:V4HI (match_operand:V4QI 1 "register_operand" " r")) + (zero_extend:V4HI (match_operand:V4QI 2 "register_operand" " r"))) + (const_int 1))))] + "NDS32_EXT_DSP_P ()" + "ursub8\t%0, %1, %2" + [(set_attr "type" "dalu") + (set_attr "length" "4")]) + +(define_insn "rsubv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (truncate:V2HI + (ashiftrt:V2SI + (minus:V2SI (sign_extend:V2SI (match_operand:V2HI 1 "register_operand" " r")) + (sign_extend:V2SI (match_operand:V2HI 2 "register_operand" " r"))) + (const_int 1))))] + "NDS32_EXT_DSP_P ()" + "rsub16\t%0, %1, %2" + [(set_attr "type" "dalu") + (set_attr "length" "4")]) + +(define_insn "ursubv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (truncate:V2HI + (lshiftrt:V2SI + (minus:V2SI (zero_extend:V2SI (match_operand:V2HI 1 "register_operand" " r")) + (zero_extend:V2SI (match_operand:V2HI 2 "register_operand" " r"))) + (const_int 1))))] + "NDS32_EXT_DSP_P ()" + "ursub16\t%0, %1, %2" + [(set_attr "type" "dalu") + (set_attr "length" "4")]) + +(define_insn "rsubdi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (truncate:DI + (ashiftrt:TI + (minus:TI (sign_extend:TI (match_operand:DI 1 "register_operand" " r")) + (sign_extend:TI (match_operand:DI 2 "register_operand" " r"))) + (const_int 1))))] + "NDS32_EXT_DSP_P ()" + "rsub64\t%0, %1, %2" + [(set_attr "type" "dalu64") + (set_attr "length" "4")]) + + +(define_insn "ursubdi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (truncate:DI + (lshiftrt:TI + (minus:TI (zero_extend:TI (match_operand:DI 1 "register_operand" " r")) + (zero_extend:TI (match_operand:DI 2 "register_operand" " r"))) + (const_int 1))))] + "NDS32_EXT_DSP_P ()" + "ursub64\t%0, %1, %2" + [(set_attr "type" "dalu64") + (set_attr "length" "4")]) + +(define_expand "cras16_1" + [(match_operand:V2HI 0 "register_operand" "") + (match_operand:V2HI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_cras16_1_be (operands[0], operands[1], operands[2])); + else + emit_insn (gen_cras16_1_le (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn "cras16_1_le" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (minus:HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 0)])) + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)])))) + (vec_duplicate:V2HI + (plus:HI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)])) + (vec_select:HI + (match_dup 1) + (parallel [(const_int 1)])))) + (const_int 1)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "cras16\t%0, %1, %2" + [(set_attr "type" "dalu")] +) + +(define_insn "cras16_1_be" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (minus:HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 1)])) + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)])))) + (vec_duplicate:V2HI + (plus:HI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)])) + (vec_select:HI + (match_dup 1) + (parallel [(const_int 0)])))) + (const_int 2)))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "cras16\t%0, %1, %2" + [(set_attr "type" "dalu")] +) + +(define_expand "kcras16_1" + [(match_operand:V2HI 0 "register_operand" "") + (match_operand:V2HI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_kcras16_1_be (operands[0], operands[1], operands[2])); + else + emit_insn (gen_kcras16_1_le (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn "kcras16_1_le" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (ss_minus:HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 0)])) + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)])))) + (vec_duplicate:V2HI + (ss_plus:HI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)])) + (vec_select:HI + (match_dup 1) + (parallel [(const_int 1)])))) + (const_int 1)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "kcras16\t%0, %1, %2" + [(set_attr "type" "dalu")] +) + +(define_insn "kcras16_1_be" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (ss_minus:HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 1)])) + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)])))) + (vec_duplicate:V2HI + (ss_plus:HI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)])) + (vec_select:HI + (match_dup 1) + (parallel [(const_int 0)])))) + (const_int 2)))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "kcras16\t%0, %1, %2" + [(set_attr "type" "dalu")] +) + +(define_expand "ukcras16_1" + [(match_operand:V2HI 0 "register_operand" "") + (match_operand:V2HI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_ukcras16_1_be (operands[0], operands[1], operands[2])); + else + emit_insn (gen_ukcras16_1_le (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn "ukcras16_1_le" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (us_minus:HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 0)])) + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)])))) + (vec_duplicate:V2HI + (us_plus:HI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)])) + (vec_select:HI + (match_dup 1) + (parallel [(const_int 1)])))) + (const_int 1)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "ukcras16\t%0, %1, %2" + [(set_attr "type" "dalu")] +) + +(define_insn "ukcras16_1_be" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (us_minus:HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 1)])) + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)])))) + (vec_duplicate:V2HI + (us_plus:HI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)])) + (vec_select:HI + (match_dup 1) + (parallel [(const_int 0)])))) + (const_int 2)))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "ukcras16\t%0, %1, %2" + [(set_attr "type" "dalu")] +) + +(define_expand "crsa16_1" + [(match_operand:V2HI 0 "register_operand" "") + (match_operand:V2HI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_crsa16_1_be (operands[0], operands[1], operands[2])); + else + emit_insn (gen_crsa16_1_le (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn "crsa16_1_le" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (minus:HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 1)])) + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)])))) + (vec_duplicate:V2HI + (plus:HI + (vec_select:HI + (match_dup 1) + (parallel [(const_int 0)])) + (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)])))) + (const_int 2)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "crsa16\t%0, %1, %2" + [(set_attr "type" "dalu")] +) + +(define_insn "crsa16_1_be" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (minus:HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 0)])) + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)])))) + (vec_duplicate:V2HI + (plus:HI + (vec_select:HI + (match_dup 1) + (parallel [(const_int 1)])) + (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)])))) + (const_int 1)))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "crsa16\t%0, %1, %2" + [(set_attr "type" "dalu")] +) + +(define_expand "kcrsa16_1" + [(match_operand:V2HI 0 "register_operand" "") + (match_operand:V2HI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_kcrsa16_1_be (operands[0], operands[1], operands[2])); + else + emit_insn (gen_kcrsa16_1_le (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn "kcrsa16_1_le" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (ss_minus:HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 1)])) + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)])))) + (vec_duplicate:V2HI + (ss_plus:HI + (vec_select:HI + (match_dup 1) + (parallel [(const_int 0)])) + (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)])))) + (const_int 2)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "kcrsa16\t%0, %1, %2" + [(set_attr "type" "dalu")] +) + +(define_insn "kcrsa16_1_be" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (ss_minus:HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 0)])) + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)])))) + (vec_duplicate:V2HI + (ss_plus:HI + (vec_select:HI + (match_dup 1) + (parallel [(const_int 1)])) + (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)])))) + (const_int 1)))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "kcrsa16\t%0, %1, %2" + [(set_attr "type" "dalu")] +) + +(define_expand "ukcrsa16_1" + [(match_operand:V2HI 0 "register_operand" "") + (match_operand:V2HI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_ukcrsa16_1_be (operands[0], operands[1], operands[2])); + else + emit_insn (gen_ukcrsa16_1_le (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn "ukcrsa16_1_le" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (us_minus:HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 1)])) + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)])))) + (vec_duplicate:V2HI + (us_plus:HI + (vec_select:HI + (match_dup 1) + (parallel [(const_int 0)])) + (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)])))) + (const_int 2)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "ukcrsa16\t%0, %1, %2" + [(set_attr "type" "dalu")] +) + +(define_insn "ukcrsa16_1_be" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (us_minus:HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 0)])) + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)])))) + (vec_duplicate:V2HI + (us_plus:HI + (vec_select:HI + (match_dup 1) + (parallel [(const_int 1)])) + (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)])))) + (const_int 1)))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "ukcrsa16\t%0, %1, %2" + [(set_attr "type" "dalu")] +) + +(define_expand "rcras16_1" + [(match_operand:V2HI 0 "register_operand" "") + (match_operand:V2HI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_rcras16_1_be (operands[0], operands[1], operands[2])); + else + emit_insn (gen_rcras16_1_le (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn "rcras16_1_le" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (truncate:HI + (ashiftrt:SI + (minus:SI + (sign_extend:SI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 0)]))) + (sign_extend:SI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)])))) + (const_int 1)))) + (vec_duplicate:V2HI + (truncate:HI + (ashiftrt:SI + (plus:SI + (sign_extend:SI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)]))) + (sign_extend:SI + (vec_select:HI + (match_dup 1) + (parallel [(const_int 1)])))) + (const_int 1)))) + (const_int 1)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "rcras16\t%0, %1, %2" + [(set_attr "type" "dalu")] +) + +(define_insn "rcras16_1_be" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (truncate:HI + (ashiftrt:SI + (minus:SI + (sign_extend:SI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 1)]))) + (sign_extend:SI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)])))) + (const_int 1)))) + (vec_duplicate:V2HI + (truncate:HI + (ashiftrt:SI + (plus:SI + (sign_extend:SI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)]))) + (sign_extend:SI + (vec_select:HI + (match_dup 1) + (parallel [(const_int 0)])))) + (const_int 1)))) + (const_int 2)))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "rcras16\t%0, %1, %2" + [(set_attr "type" "dalu")] +) + +(define_expand "urcras16_1" + [(match_operand:V2HI 0 "register_operand" "") + (match_operand:V2HI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_urcras16_1_be (operands[0], operands[1], operands[2])); + else + emit_insn (gen_urcras16_1_le (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn "urcras16_1_le" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (truncate:HI + (lshiftrt:SI + (minus:SI + (zero_extend:SI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 0)]))) + (zero_extend:SI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)])))) + (const_int 1)))) + (vec_duplicate:V2HI + (truncate:HI + (lshiftrt:SI + (plus:SI + (zero_extend:SI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)]))) + (zero_extend:SI + (vec_select:HI + (match_dup 1) + (parallel [(const_int 1)])))) + (const_int 1)))) + (const_int 1)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "urcras16\t%0, %1, %2" + [(set_attr "type" "dalu")] +) + +(define_insn "urcras16_1_be" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (truncate:HI + (lshiftrt:SI + (minus:SI + (zero_extend:SI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 1)]))) + (zero_extend:SI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)])))) + (const_int 1)))) + (vec_duplicate:V2HI + (truncate:HI + (lshiftrt:SI + (plus:SI + (zero_extend:SI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)]))) + (zero_extend:SI + (vec_select:HI + (match_dup 1) + (parallel [(const_int 0)])))) + (const_int 1)))) + (const_int 2)))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "urcras16\t%0, %1, %2" + [(set_attr "type" "dalu")] +) + +(define_expand "rcrsa16_1" + [(match_operand:V2HI 0 "register_operand" "") + (match_operand:V2HI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_rcrsa16_1_be (operands[0], operands[1], operands[2])); + else + emit_insn (gen_rcrsa16_1_le (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn "rcrsa16_1_le" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (truncate:HI + (ashiftrt:SI + (minus:SI + (sign_extend:SI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 1)]))) + (sign_extend:SI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)])))) + (const_int 1)))) + (vec_duplicate:V2HI + (truncate:HI + (ashiftrt:SI + (plus:SI + (sign_extend:SI + (vec_select:HI + (match_dup 1) + (parallel [(const_int 0)]))) + (sign_extend:SI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)])))) + (const_int 1)))) + (const_int 2)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "rcrsa16\t%0, %1, %2" + [(set_attr "type" "dalu")] +) + +(define_insn "rcrsa16_1_be" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (truncate:HI + (ashiftrt:SI + (minus:SI + (sign_extend:SI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 0)]))) + (sign_extend:SI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)])))) + (const_int 1)))) + (vec_duplicate:V2HI + (truncate:HI + (ashiftrt:SI + (plus:SI + (sign_extend:SI + (vec_select:HI + (match_dup 1) + (parallel [(const_int 1)]))) + (sign_extend:SI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)])))) + (const_int 1)))) + (const_int 1)))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "rcrsa16\t%0, %1, %2" + [(set_attr "type" "dalu")] +) + +(define_expand "urcrsa16_1" + [(match_operand:V2HI 0 "register_operand" "") + (match_operand:V2HI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_urcrsa16_1_be (operands[0], operands[1], operands[2])); + else + emit_insn (gen_urcrsa16_1_le (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn "urcrsa16_1_le" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (truncate:HI + (lshiftrt:SI + (minus:SI + (zero_extend:SI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 1)]))) + (zero_extend:SI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)])))) + (const_int 1)))) + (vec_duplicate:V2HI + (truncate:HI + (lshiftrt:SI + (plus:SI + (zero_extend:SI + (vec_select:HI + (match_dup 1) + (parallel [(const_int 0)]))) + (zero_extend:SI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)])))) + (const_int 1)))) + (const_int 2)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "urcrsa16\t%0, %1, %2" + [(set_attr "type" "dalu")] +) + +(define_insn "urcrsa16_1_be" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (truncate:HI + (lshiftrt:SI + (minus:SI + (zero_extend:SI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 0)]))) + (zero_extend:SI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)])))) + (const_int 1)))) + (vec_duplicate:V2HI + (truncate:HI + (lshiftrt:SI + (plus:SI + (zero_extend:SI + (vec_select:HI + (match_dup 1) + (parallel [(const_int 1)]))) + (zero_extend:SI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)])))) + (const_int 1)))) + (const_int 1)))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "urcrsa16\t%0, %1, %2" + [(set_attr "type" "dalu")] +) + +(define_expand "v2hi3" + [(set (match_operand:V2HI 0 "register_operand" "") + (shifts:V2HI (match_operand:V2HI 1 "register_operand" "") + (match_operand:SI 2 "nds32_rimm4u_operand" "")))] + "NDS32_EXT_DSP_P ()" +{ + if (operands[2] == const0_rtx) + { + emit_move_insn (operands[0], operands[1]); + DONE; + } +}) + +(define_insn "*ashlv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "= r, r") + (ashift:V2HI (match_operand:V2HI 1 "register_operand" " r, r") + (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r")))] + "NDS32_EXT_DSP_P ()" + "@ + slli16\t%0, %1, %2 + sll16\t%0, %1, %2" + [(set_attr "type" "dalu,dalu") + (set_attr "length" " 4, 4")]) + +(define_insn "kslli16" + [(set (match_operand:V2HI 0 "register_operand" "= r, r") + (ss_ashift:V2HI (match_operand:V2HI 1 "register_operand" " r, r") + (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r")))] + "NDS32_EXT_DSP_P ()" + "@ + kslli16\t%0, %1, %2 + ksll16\t%0, %1, %2" + [(set_attr "type" "dalu,dalu") + (set_attr "length" " 4, 4")]) + +(define_insn "*ashrv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "= r, r") + (ashiftrt:V2HI (match_operand:V2HI 1 "register_operand" " r, r") + (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r")))] + "NDS32_EXT_DSP_P ()" + "@ + srai16\t%0, %1, %2 + sra16\t%0, %1, %2" + [(set_attr "type" "dalu,dalu") + (set_attr "length" " 4, 4")]) + +(define_insn "sra16_round" + [(set (match_operand:V2HI 0 "register_operand" "= r, r") + (unspec:V2HI [(ashiftrt:V2HI (match_operand:V2HI 1 "register_operand" " r, r") + (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r"))] + UNSPEC_ROUND))] + "NDS32_EXT_DSP_P ()" + "@ + srai16.u\t%0, %1, %2 + sra16.u\t%0, %1, %2" + [(set_attr "type" "daluround,daluround") + (set_attr "length" " 4, 4")]) + +(define_insn "*lshrv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "= r, r") + (lshiftrt:V2HI (match_operand:V2HI 1 "register_operand" " r, r") + (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r")))] + "NDS32_EXT_DSP_P ()" + "@ + srli16\t%0, %1, %2 + srl16\t%0, %1, %2" + [(set_attr "type" "dalu,dalu") + (set_attr "length" " 4, 4")]) + +(define_insn "srl16_round" + [(set (match_operand:V2HI 0 "register_operand" "= r, r") + (unspec:V2HI [(lshiftrt:V2HI (match_operand:V2HI 1 "register_operand" " r, r") + (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r"))] + UNSPEC_ROUND))] + "NDS32_EXT_DSP_P ()" + "@ + srli16.u\t%0, %1, %2 + srl16.u\t%0, %1, %2" + [(set_attr "type" "daluround,daluround") + (set_attr "length" " 4, 4")]) + +(define_insn "kslra16" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (if_then_else:V2HI + (lt:SI (match_operand:SI 2 "register_operand" " r") + (const_int 0)) + (ashiftrt:V2HI (match_operand:V2HI 1 "register_operand" " r") + (neg:SI (match_dup 2))) + (ashift:V2HI (match_dup 1) + (match_dup 2))))] + "NDS32_EXT_DSP_P ()" + "kslra16\t%0, %1, %2" + [(set_attr "type" "dalu") + (set_attr "length" "4")]) + +(define_insn "kslra16_round" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (if_then_else:V2HI + (lt:SI (match_operand:SI 2 "register_operand" " r") + (const_int 0)) + (unspec:V2HI [(ashiftrt:V2HI (match_operand:V2HI 1 "register_operand" " r") + (neg:SI (match_dup 2)))] + UNSPEC_ROUND) + (ashift:V2HI (match_dup 1) + (match_dup 2))))] + "NDS32_EXT_DSP_P ()" + "kslra16.u\t%0, %1, %2" + [(set_attr "type" "daluround") + (set_attr "length" "4")]) + +(define_insn "cmpeq" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(eq:SI (match_operand:VQIHI 1 "register_operand" " r") + (match_operand:VQIHI 2 "register_operand" " r"))] + UNSPEC_VEC_COMPARE))] + "NDS32_EXT_DSP_P ()" + "cmpeq\t%0, %1, %2" + [(set_attr "type" "dcmp") + (set_attr "length" "4")]) + +(define_insn "scmplt" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(lt:SI (match_operand:VQIHI 1 "register_operand" " r") + (match_operand:VQIHI 2 "register_operand" " r"))] + UNSPEC_VEC_COMPARE))] + "NDS32_EXT_DSP_P ()" + "scmplt\t%0, %1, %2" + [(set_attr "type" "dcmp") + (set_attr "length" "4")]) + +(define_insn "scmple" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(le:SI (match_operand:VQIHI 1 "register_operand" " r") + (match_operand:VQIHI 2 "register_operand" " r"))] + UNSPEC_VEC_COMPARE))] + "NDS32_EXT_DSP_P ()" + "scmple\t%0, %1, %2" + [(set_attr "type" "dcmp") + (set_attr "length" "4")]) + +(define_insn "ucmplt" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(ltu:SI (match_operand:VQIHI 1 "register_operand" " r") + (match_operand:VQIHI 2 "register_operand" " r"))] + UNSPEC_VEC_COMPARE))] + "NDS32_EXT_DSP_P ()" + "ucmplt\t%0, %1, %2" + [(set_attr "type" "dcmp") + (set_attr "length" "4")]) + +(define_insn "ucmple" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(leu:SI (match_operand:VQIHI 1 "register_operand" " r") + (match_operand:VQIHI 2 "register_operand" " r"))] + UNSPEC_VEC_COMPARE))] + "NDS32_EXT_DSP_P ()" + "ucmple\t%0, %1, %2" + [(set_attr "type" "dcmp") + (set_attr "length" "4")]) + +(define_insn "sclip16" + [(set (match_operand:V2HI 0 "register_operand" "= r") + (unspec:V2HI [(match_operand:V2HI 1 "register_operand" " r") + (match_operand:SI 2 "nds32_imm4u_operand" " Iu04")] + UNSPEC_CLIPS))] + "NDS32_EXT_DSP_P ()" + "sclip16\t%0, %1, %2" + [(set_attr "type" "dclip") + (set_attr "length" "4")]) + +(define_insn "uclip16" + [(set (match_operand:V2HI 0 "register_operand" "= r") + (unspec:V2HI [(match_operand:V2HI 1 "register_operand" " r") + (match_operand:SI 2 "nds32_imm4u_operand" " Iu04")] + UNSPEC_CLIP))] + "NDS32_EXT_DSP_P ()" + "uclip16\t%0, %1, %2" + [(set_attr "type" "dclip") + (set_attr "length" "4")]) + +(define_insn "khm16" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (unspec:V2HI [(match_operand:V2HI 1 "register_operand" " r") + (match_operand:V2HI 2 "register_operand" " r")] + UNSPEC_KHM))] + "NDS32_EXT_DSP_P ()" + "khm16\t%0, %1, %2" + [(set_attr "type" "dmul") + (set_attr "length" "4")]) + +(define_insn "khmx16" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (unspec:V2HI [(match_operand:V2HI 1 "register_operand" " r") + (match_operand:V2HI 2 "register_operand" " r")] + UNSPEC_KHMX))] + "NDS32_EXT_DSP_P ()" + "khmx16\t%0, %1, %2" + [(set_attr "type" "dmul") + (set_attr "length" "4")]) + +(define_expand "vec_setv4qi" + [(match_operand:V4QI 0 "register_operand" "") + (match_operand:QI 1 "register_operand" "") + (match_operand:SI 2 "immediate_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + HOST_WIDE_INT pos = INTVAL (operands[2]); + if (pos > 4) + gcc_unreachable (); + HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << pos; + emit_insn (gen_vec_setv4qi_internal (operands[0], operands[1], + operands[0], GEN_INT (elem))); + DONE; +}) + +(define_expand "insb" + [(match_operand:V4QI 0 "register_operand" "") + (match_operand:V4QI 1 "register_operand" "") + (match_operand:SI 2 "register_operand" "") + (match_operand:SI 3 "const_int_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (INTVAL (operands[3]) > 3 || INTVAL (operands[3]) < 0) + gcc_unreachable (); + + rtx src = gen_reg_rtx (QImode); + + convert_move (src, operands[2], false); + + HOST_WIDE_INT selector_index; + /* Big endian need reverse index. */ + if (TARGET_BIG_ENDIAN) + selector_index = 4 - INTVAL (operands[3]) - 1; + else + selector_index = INTVAL (operands[3]); + rtx selector = gen_int_mode (1 << selector_index, SImode); + emit_insn (gen_vec_setv4qi_internal (operands[0], src, + operands[1], selector)); + DONE; +}) + +(define_expand "insvsi" + [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "const_int_operand" "") + (match_operand:SI 2 "nds32_insv_operand" "")) + (match_operand:SI 3 "register_operand" ""))] + "NDS32_EXT_DSP_P ()" +{ + if (INTVAL (operands[1]) != 8) + FAIL; +} + [(set_attr "type" "dinsb") + (set_attr "length" "4")]) + + +(define_insn "insvsi_internal" + [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r") + (const_int 8) + (match_operand:SI 1 "nds32_insv_operand" "i")) + (match_operand:SI 2 "register_operand" "r"))] + "NDS32_EXT_DSP_P ()" + "insb\t%0, %2, %v1" + [(set_attr "type" "dinsb") + (set_attr "length" "4")]) + +(define_insn "insvsiqi_internal" + [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r") + (const_int 8) + (match_operand:SI 1 "nds32_insv_operand" "i")) + (zero_extend:SI (match_operand:QI 2 "register_operand" "r")))] + "NDS32_EXT_DSP_P ()" + "insb\t%0, %2, %v1" + [(set_attr "type" "dinsb") + (set_attr "length" "4")]) + +;; Intermedium pattern for synthetize insvsiqi_internal +;; v0 = ((v1 & 0xff) << 8) +(define_insn_and_split "and0xff_s8" + [(set (match_operand:SI 0 "register_operand" "=r") + (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r") + (const_int 8)) + (const_int 65280)))] + "NDS32_EXT_DSP_P () && !reload_completed" + "#" + "NDS32_EXT_DSP_P () && !reload_completed" + [(const_int 1)] +{ + rtx tmp = gen_reg_rtx (SImode); + emit_insn (gen_ashlsi3 (tmp, operands[1], gen_int_mode (8, SImode))); + emit_insn (gen_andsi3 (operands[0], tmp, gen_int_mode (0xffff, SImode))); + DONE; +}) + +;; v0 = (v1 & 0xff00ffff) | ((v2 << 16) | 0xff0000) +(define_insn_and_split "insbsi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI (and:SI (match_operand:SI 1 "register_operand" "0") + (const_int -16711681)) + (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") + (const_int 16)) + (const_int 16711680))))] + "NDS32_EXT_DSP_P () && !reload_completed" + "#" + "NDS32_EXT_DSP_P () && !reload_completed" + [(const_int 1)] +{ + rtx tmp = gen_reg_rtx (SImode); + emit_move_insn (tmp, operands[1]); + emit_insn (gen_insvsi_internal (tmp, gen_int_mode(16, SImode), operands[2])); + emit_move_insn (operands[0], tmp); + DONE; +}) + +;; v0 = (v1 & 0xff00ffff) | v2 +(define_insn_and_split "ior_and0xff00ffff_reg" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI (and:SI (match_operand:SI 1 "register_operand" "r") + (const_int -16711681)) + (match_operand:SI 2 "register_operand" "r")))] + "NDS32_EXT_DSP_P () && !reload_completed" + "#" + "NDS32_EXT_DSP_P () && !reload_completed" + [(const_int 1)] +{ + rtx tmp = gen_reg_rtx (SImode); + emit_insn (gen_andsi3 (tmp, operands[1], gen_int_mode (0xff00ffff, SImode))); + emit_insn (gen_iorsi3 (operands[0], tmp, operands[2])); + DONE; +}) + +(define_insn "vec_setv4qi_internal" + [(set (match_operand:V4QI 0 "register_operand" "= r, r, r, r") + (vec_merge:V4QI + (vec_duplicate:V4QI + (match_operand:QI 1 "register_operand" " r, r, r, r")) + (match_operand:V4QI 2 "register_operand" " 0, 0, 0, 0") + (match_operand:SI 3 "nds32_imm_1_2_4_8_operand" " Iv01, Iv02, Iv04, Iv08")))] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + { + const char *pats[] = { "insb\t%0, %1, 3", + "insb\t%0, %1, 2", + "insb\t%0, %1, 1", + "insb\t%0, %1, 0" }; + return pats[which_alternative]; + } + else + { + const char *pats[] = { "insb\t%0, %1, 0", + "insb\t%0, %1, 1", + "insb\t%0, %1, 2", + "insb\t%0, %1, 3" }; + return pats[which_alternative]; + } +} + [(set_attr "type" "dinsb") + (set_attr "length" "4")]) + +(define_insn "vec_setv4qi_internal_vec" + [(set (match_operand:V4QI 0 "register_operand" "= r, r, r, r") + (vec_merge:V4QI + (vec_duplicate:V4QI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r, r, r, r") + (parallel [(const_int 0)]))) + (match_operand:V4QI 2 "register_operand" " 0, 0, 0, 0") + (match_operand:SI 3 "nds32_imm_1_2_4_8_operand" " Iv01, Iv02, Iv04, Iv08")))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "@ + insb\t%0, %1, 0 + insb\t%0, %1, 1 + insb\t%0, %1, 2 + insb\t%0, %1, 3" + [(set_attr "type" "dinsb") + (set_attr "length" "4")]) + +(define_insn "vec_mergev4qi_and_cv0_1" + [(set (match_operand:V4QI 0 "register_operand" "=$l,r") + (vec_merge:V4QI + (vec_duplicate:V4QI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " l,r") + (parallel [(const_int 0)]))) + (const_vector:V4QI [ + (const_int 0) + (const_int 0) + (const_int 0) + (const_int 0)]) + (const_int 1)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "@ + zeb33\t%0, %1 + zeb\t%0, %1" + [(set_attr "type" "alu,alu") + (set_attr "length" " 2, 4")]) + +(define_insn "vec_mergev4qi_and_cv0_2" + [(set (match_operand:V4QI 0 "register_operand" "=$l,r") + (vec_merge:V4QI + (const_vector:V4QI [ + (const_int 0) + (const_int 0) + (const_int 0) + (const_int 0)]) + (vec_duplicate:V4QI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " l,r") + (parallel [(const_int 0)]))) + (const_int 2)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "@ + zeb33\t%0, %1 + zeb\t%0, %1" + [(set_attr "type" "alu,alu") + (set_attr "length" " 2, 4")]) + +(define_insn "vec_mergeqi_and_cv0_1" + [(set (match_operand:V4QI 0 "register_operand" "=$l,r") + (vec_merge:V4QI + (vec_duplicate:V4QI (match_operand:QI 1 "register_operand" " l,r")) + (const_vector:V4QI [ + (const_int 0) + (const_int 0) + (const_int 0) + (const_int 0)]) + (const_int 1)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "@ + zeb33\t%0, %1 + zeb\t%0, %1" + [(set_attr "type" "alu,alu") + (set_attr "length" " 2, 4")]) + +(define_insn "vec_mergeqi_and_cv0_2" + [(set (match_operand:V4QI 0 "register_operand" "=$l,r") + (vec_merge:V4QI + (const_vector:V4QI [ + (const_int 0) + (const_int 0) + (const_int 0) + (const_int 0)]) + (vec_duplicate:V4QI (match_operand:QI 1 "register_operand" " l,r")) + (const_int 2)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "@ + zeb33\t%0, %1 + zeb\t%0, %1" + [(set_attr "type" "alu,alu") + (set_attr "length" " 2, 4")]) + +(define_expand "vec_setv2hi" + [(match_operand:V2HI 0 "register_operand" "") + (match_operand:HI 1 "register_operand" "") + (match_operand:SI 2 "immediate_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + HOST_WIDE_INT pos = INTVAL (operands[2]); + if (pos > 2) + gcc_unreachable (); + HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << pos; + emit_insn (gen_vec_setv2hi_internal (operands[0], operands[1], + operands[0], GEN_INT (elem))); + DONE; +}) + +(define_insn "vec_setv2hi_internal" + [(set (match_operand:V2HI 0 "register_operand" "= r, r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (match_operand:HI 1 "register_operand" " r, r")) + (match_operand:V2HI 2 "register_operand" " r, r") + (match_operand:SI 3 "nds32_imm_1_2_operand" " Iv01, Iv02")))] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + { + const char *pats[] = { "pkbb16\t%0, %1, %2", + "pktb16\t%0, %2, %1" }; + return pats[which_alternative]; + } + else + { + const char *pats[] = { "pktb16\t%0, %2, %1", + "pkbb16\t%0, %1, %2" }; + return pats[which_alternative]; + } +} + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "vec_mergev2hi_and_cv0_1" + [(set (match_operand:V2HI 0 "register_operand" "=$l,r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " l,r") + (parallel [(const_int 0)]))) + (const_vector:V2HI [ + (const_int 0) + (const_int 0)]) + (const_int 1)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "@ + zeh33\t%0, %1 + zeh\t%0, %1" + [(set_attr "type" "alu,alu") + (set_attr "length" " 2, 4")]) + +(define_insn "vec_mergev2hi_and_cv0_2" + [(set (match_operand:V2HI 0 "register_operand" "=$l,r") + (vec_merge:V2HI + (const_vector:V2HI [ + (const_int 0) + (const_int 0)]) + (vec_duplicate:V2HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " l,r") + (parallel [(const_int 0)]))) + (const_int 2)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "@ + zeh33\t%0, %1 + zeh\t%0, %1" + [(set_attr "type" "alu,alu") + (set_attr "length" " 2, 4")]) + +(define_insn "vec_mergehi_and_cv0_1" + [(set (match_operand:V2HI 0 "register_operand" "=$l,r") + (vec_merge:V2HI + (vec_duplicate:V2HI (match_operand:HI 1 "register_operand" " l,r")) + (const_vector:V2HI [ + (const_int 0) + (const_int 0)]) + (const_int 1)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "@ + zeh33\t%0, %1 + zeh\t%0, %1" + [(set_attr "type" "alu,alu") + (set_attr "length" " 2, 4")]) + +(define_insn "vec_mergehi_and_cv0_2" + [(set (match_operand:V2HI 0 "register_operand" "=$l,r") + (vec_merge:V2HI + (const_vector:V2HI [ + (const_int 0) + (const_int 0)]) + (vec_duplicate:V2HI (match_operand:HI 1 "register_operand" " l,r")) + (const_int 2)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "@ + zeh33\t%0, %1 + zeh\t%0, %1" + [(set_attr "type" "alu,alu") + (set_attr "length" " 2, 4")]) + +(define_expand "pkbb" + [(match_operand:V2HI 0 "register_operand") + (match_operand:V2HI 1 "register_operand") + (match_operand:V2HI 2 "register_operand")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + { + emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2], + GEN_INT (1), GEN_INT (1), GEN_INT (1))); + } + else + { + emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2], + GEN_INT (2), GEN_INT (0), GEN_INT (0))); + } + DONE; +}) + +(define_insn "pkbbsi_1" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI (and:SI (match_operand:SI 1 "register_operand" "r") + (const_int 65535)) + (ashift:SI (match_operand:SI 2 "register_operand" "r") + (const_int 16))))] + "NDS32_EXT_DSP_P ()" + "pkbb16\t%0, %2, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "pkbbsi_2" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") + (const_int 16)) + (and:SI (match_operand:SI 1 "register_operand" "r") + (const_int 65535))))] + "NDS32_EXT_DSP_P ()" + "pkbb16\t%0, %2, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "pkbbsi_3" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "r")) + (ashift:SI (match_operand:SI 2 "register_operand" "r") + (const_int 16))))] + "NDS32_EXT_DSP_P ()" + "pkbb16\t%0, %2, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "pkbbsi_4" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") + (const_int 16)) + (zero_extend:SI (match_operand:HI 1 "register_operand" "r"))))] + "NDS32_EXT_DSP_P ()" + "pkbb16\t%0, %2, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +;; v0 = (v1 & 0xffff0000) | (v2 & 0xffff) +(define_insn "pktbsi_1" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI (and:SI (match_operand:SI 1 "register_operand" "r") + (const_int -65536)) + (zero_extend:SI (match_operand:HI 2 "register_operand" "r"))))] + "NDS32_EXT_DSP_P ()" + "pktb16\t%0, %1, %2" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "pktbsi_2" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI (and:SI (match_operand:SI 1 "register_operand" "r") + (const_int -65536)) + (and:SI (match_operand:SI 2 "register_operand" "r") + (const_int 65535))))] + "NDS32_EXT_DSP_P ()" + "pktb16\t%0, %1, %2" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn "pktbsi_3" + [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r") + (const_int 16 ) + (const_int 0)) + (match_operand:SI 1 "register_operand" " r"))] + "NDS32_EXT_DSP_P ()" + "pktb16\t%0, %0, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "pktbsi_4" + [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r") + (const_int 16 ) + (const_int 0)) + (zero_extend:SI (match_operand:HI 1 "register_operand" " r")))] + "NDS32_EXT_DSP_P ()" + "pktb16\t%0, %0, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "pkttsi" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI (and:SI (match_operand:SI 1 "register_operand" " r") + (const_int -65536)) + (lshiftrt:SI (match_operand:SI 2 "register_operand" " r") + (const_int 16))))] + "NDS32_EXT_DSP_P ()" + "pktt16\t%0, %1, %2" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_expand "pkbt" + [(match_operand:V2HI 0 "register_operand") + (match_operand:V2HI 1 "register_operand") + (match_operand:V2HI 2 "register_operand")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + { + emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2], + GEN_INT (1), GEN_INT (1), GEN_INT (0))); + } + else + { + emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2], + GEN_INT (2), GEN_INT (0), GEN_INT (1))); + } + DONE; +}) + +(define_expand "pktt" + [(match_operand:V2HI 0 "register_operand") + (match_operand:V2HI 1 "register_operand") + (match_operand:V2HI 2 "register_operand")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + { + emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2], + GEN_INT (1), GEN_INT (0), GEN_INT (0))); + } + else + { + emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2], + GEN_INT (2), GEN_INT (1), GEN_INT (1))); + } + DONE; +}) + +(define_expand "pktb" + [(match_operand:V2HI 0 "register_operand") + (match_operand:V2HI 1 "register_operand") + (match_operand:V2HI 2 "register_operand")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + { + emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2], + GEN_INT (1), GEN_INT (0), GEN_INT (1))); + } + else + { + emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2], + GEN_INT (2), GEN_INT (1), GEN_INT (0))); + } + DONE; +}) + +(define_insn "vec_mergerr" + [(set (match_operand:V2HI 0 "register_operand" "= r, r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (match_operand:HI 1 "register_operand" " r, r")) + (vec_duplicate:V2HI + (match_operand:HI 2 "register_operand" " r, r")) + (match_operand:SI 3 "nds32_imm_1_2_operand" " Iv01, Iv02")))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "@ + pkbb16\t%0, %2, %1 + pkbb16\t%0, %1, %2" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + + +(define_insn "vec_merge" + [(set (match_operand:V2HI 0 "register_operand" "= r, r") + (vec_merge:V2HI + (match_operand:V2HI 1 "register_operand" " r, r") + (match_operand:V2HI 2 "register_operand" " r, r") + (match_operand:SI 3 "nds32_imm_1_2_operand" " Iv01, Iv02")))] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + { + const char *pats[] = { "pktb16\t%0, %1, %2", + "pktb16\t%0, %2, %1" }; + return pats[which_alternative]; + } + else + { + const char *pats[] = { "pktb16\t%0, %2, %1", + "pktb16\t%0, %1, %2" }; + return pats[which_alternative]; + } +} + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "vec_mergerv" + [(set (match_operand:V2HI 0 "register_operand" "= r, r, r, r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (match_operand:HI 1 "register_operand" " r, r, r, r")) + (vec_duplicate:V2HI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r, r, r, r") + (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv00, Iv01")]))) + (match_operand:SI 3 "nds32_imm_1_2_operand" " Iv01, Iv01, Iv02, Iv02")))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "@ + pkbb16\t%0, %2, %1 + pktb16\t%0, %2, %1 + pkbb16\t%0, %1, %2 + pkbt16\t%0, %1, %2" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "vec_mergevr" + [(set (match_operand:V2HI 0 "register_operand" "= r, r, r, r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r, r, r, r") + (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv00, Iv01")]))) + (vec_duplicate:V2HI + (match_operand:HI 2 "register_operand" " r, r, r, r")) + (match_operand:SI 3 "nds32_imm_1_2_operand" " Iv01, Iv01, Iv02, Iv02")))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "@ + pkbb16\t%0, %2, %1 + pkbt16\t%0, %2, %1 + pkbb16\t%0, %1, %2 + pktb16\t%0, %1, %2" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "vec_mergevv" + [(set (match_operand:V2HI 0 "register_operand" "= r, r, r, r, r, r, r, r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r, r, r, r, r, r, r, r") + (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv00, Iv01, Iv01, Iv00, Iv00, Iv01, Iv01")]))) + (vec_duplicate:V2HI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r, r, r, r, r, r, r, r") + (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv01, Iv00, Iv00, Iv01, Iv01, Iv00")]))) + (match_operand:SI 3 "nds32_imm_1_2_operand" " Iv01, Iv01, Iv01, Iv01, Iv02, Iv02, Iv02, Iv02")))] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + { + const char *pats[] = { "pktt16\t%0, %1, %2", + "pktb16\t%0, %1, %2", + "pkbb16\t%0, %1, %2", + "pkbt16\t%0, %1, %2", + "pktt16\t%0, %2, %1", + "pkbt16\t%0, %2, %1", + "pkbb16\t%0, %2, %1", + "pktb16\t%0, %2, %1" }; + return pats[which_alternative]; + } + else + { + const char *pats[] = { "pkbb16\t%0, %2, %1", + "pktb16\t%0, %2, %1", + "pktt16\t%0, %2, %1", + "pkbt16\t%0, %2, %1", + "pkbb16\t%0, %1, %2", + "pkbt16\t%0, %1, %2", + "pktt16\t%0, %1, %2", + "pktb16\t%0, %1, %2" }; + return pats[which_alternative]; + } +} + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_expand "vec_extractv4qi" + [(set (match_operand:QI 0 "register_operand" "") + (vec_select:QI + (match_operand:V4QI 1 "nonimmediate_operand" "") + (parallel [(match_operand:SI 2 "const_int_operand" "")])))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" +{ + if (INTVAL (operands[2]) != 0 + && INTVAL (operands[2]) != 1 + && INTVAL (operands[2]) != 2 + && INTVAL (operands[2]) != 3) + gcc_unreachable (); + + if (INTVAL (operands[2]) != 0 && MEM_P (operands[0])) + FAIL; +}) + +(define_insn "vec_extractv4qi0" + [(set (match_operand:QI 0 "register_operand" "=l,r,r") + (vec_select:QI + (match_operand:V4QI 1 "nonimmediate_operand" " l,r,m") + (parallel [(const_int 0)])))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" +{ + switch (which_alternative) + { + case 0: + return "zeb33\t%0, %1"; + case 1: + return "zeb\t%0, %1"; + case 2: + return nds32_output_32bit_load (operands, 1); + default: + gcc_unreachable (); + } +} + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn "vec_extractv4qi0_ze" + [(set (match_operand:SI 0 "register_operand" "=l,r,r") + (zero_extend:SI + (vec_select:QI + (match_operand:V4QI 1 "nonimmediate_operand" " l,r,m") + (parallel [(const_int 0)]))))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" +{ + switch (which_alternative) + { + case 0: + return "zeb33\t%0, %1"; + case 1: + return "zeb\t%0, %1"; + case 2: + return nds32_output_32bit_load (operands, 1); + default: + gcc_unreachable (); + } +} + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn "vec_extractv4qi0_se" + [(set (match_operand:SI 0 "register_operand" "=l,r,r") + (sign_extend:SI + (vec_select:QI + (match_operand:V4QI 1 "nonimmediate_operand" " l,r,m") + (parallel [(const_int 0)]))))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" +{ + switch (which_alternative) + { + case 0: + return "seb33\t%0, %1"; + case 1: + return "seb\t%0, %1"; + case 2: + return nds32_output_32bit_load_s (operands, 1); + default: + gcc_unreachable (); + } +} + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn_and_split "vec_extractv4qi1" + [(set (match_operand:QI 0 "register_operand" "=r") + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 1)])))] + "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN" + "#" + "NDS32_EXT_DSP_P () && !reload_completed" + [(const_int 1)] +{ + rtx tmp = gen_reg_rtx (V4QImode); + emit_insn (gen_rotrv4qi_1 (tmp, operands[1])); + emit_insn (gen_vec_extractv4qi0 (operands[0], tmp)); + DONE; +} + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn_and_split "vec_extractv4qi2" + [(set (match_operand:QI 0 "register_operand" "=r") + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 2)])))] + "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN" + "#" + "NDS32_EXT_DSP_P () && !reload_completed" + [(const_int 1)] +{ + rtx tmp = gen_reg_rtx (V4QImode); + emit_insn (gen_rotrv4qi_2 (tmp, operands[1])); + emit_insn (gen_vec_extractv4qi0 (operands[0], tmp)); + DONE; +} + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn_and_split "vec_extractv4qi3" + [(set (match_operand:QI 0 "register_operand" "=r") + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 3)])))] + "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN" + "#" + "NDS32_EXT_DSP_P () && !reload_completed" + [(const_int 1)] +{ + rtx tmp = gen_reg_rtx (V4QImode); + emit_insn (gen_rotrv4qi_3 (tmp, operands[1])); + emit_insn (gen_vec_extractv4qi0 (operands[0], tmp)); + DONE; +} + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn "vec_extractv4qi3_se" + [(set (match_operand:SI 0 "register_operand" "=$d,r") + (sign_extend:SI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " 0,r") + (parallel [(const_int 3)]))))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "@ + srai45\t%0, 24 + srai\t%0, %1, 24" + [(set_attr "type" "alu,alu") + (set_attr "length" " 2, 4")]) + +(define_insn "vec_extractv4qi3_ze" + [(set (match_operand:SI 0 "register_operand" "=$d,r") + (zero_extend:SI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " 0,r") + (parallel [(const_int 3)]))))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "@ + srli45\t%0, 24 + srli\t%0, %1, 24" + [(set_attr "type" "alu,alu") + (set_attr "length" " 2, 4")]) + +(define_insn_and_split "vec_extractv4qihi0" + [(set (match_operand:HI 0 "register_operand" "=r") + (sign_extend:HI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 0)]))))] + "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN" + "#" + "NDS32_EXT_DSP_P () && !reload_completed" + [(const_int 1)] +{ + rtx tmp = gen_reg_rtx (QImode); + emit_insn (gen_vec_extractv4qi0 (tmp, operands[1])); + emit_insn (gen_extendqihi2 (operands[0], tmp)); + DONE; +} + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn_and_split "vec_extractv4qihi1" + [(set (match_operand:HI 0 "register_operand" "=r") + (sign_extend:HI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 1)]))))] + "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN" + "#" + "NDS32_EXT_DSP_P () && !reload_completed" + [(const_int 1)] +{ + rtx tmp = gen_reg_rtx (QImode); + emit_insn (gen_vec_extractv4qi1 (tmp, operands[1])); + emit_insn (gen_extendqihi2 (operands[0], tmp)); + DONE; +} + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn_and_split "vec_extractv4qihi2" + [(set (match_operand:HI 0 "register_operand" "=r") + (sign_extend:HI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 2)]))))] + "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN" + "#" + "NDS32_EXT_DSP_P () && !reload_completed" + [(const_int 1)] +{ + rtx tmp = gen_reg_rtx (QImode); + emit_insn (gen_vec_extractv4qi2 (tmp, operands[1])); + emit_insn (gen_extendqihi2 (operands[0], tmp)); + DONE; +} + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn_and_split "vec_extractv4qihi3" + [(set (match_operand:HI 0 "register_operand" "=r") + (sign_extend:HI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 3)]))))] + "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN" + "#" + "NDS32_EXT_DSP_P () && !reload_completed" + [(const_int 1)] +{ + rtx tmp = gen_reg_rtx (QImode); + emit_insn (gen_vec_extractv4qi3 (tmp, operands[1])); + emit_insn (gen_extendqihi2 (operands[0], tmp)); + DONE; +} + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_expand "vec_extractv2hi" + [(set (match_operand:HI 0 "register_operand" "") + (vec_select:HI + (match_operand:V2HI 1 "nonimmediate_operand" "") + (parallel [(match_operand:SI 2 "const_int_operand" "")])))] + "NDS32_EXT_DSP_P ()" +{ + if (INTVAL (operands[2]) != 0 + && INTVAL (operands[2]) != 1) + gcc_unreachable (); + + if (INTVAL (operands[2]) != 0 && MEM_P (operands[0])) + FAIL; +}) + +(define_insn "vec_extractv2hi0" + [(set (match_operand:HI 0 "register_operand" "=$l,r,r") + (vec_select:HI + (match_operand:V2HI 1 "nonimmediate_operand" " l,r,m") + (parallel [(const_int 0)])))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" +{ + switch (which_alternative) + { + case 0: + return "seh33\t%0, %1"; + case 1: + return "seh\t%0, %1"; + case 2: + return nds32_output_32bit_load_s (operands, 2); + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "alu,alu,load") + (set_attr "length" " 2, 4, 4")]) + +(define_insn "vec_extractv2hi0_ze" + [(set (match_operand:SI 0 "register_operand" "=$l, r,$ l, *r") + (zero_extend:SI + (vec_select:HI + (match_operand:V2HI 1 "nonimmediate_operand" " l, r, U33, m") + (parallel [(const_int 0)]))))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" +{ + switch (which_alternative) + { + case 0: + return "zeh33\t%0, %1"; + case 1: + return "zeh\t%0, %1"; + case 2: + return nds32_output_16bit_load (operands, 2); + case 3: + return nds32_output_32bit_load (operands, 2); + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "alu,alu,load,load") + (set_attr "length" " 2, 4, 2, 4")]) + +(define_insn "vec_extractv2hi0_se" + [(set (match_operand:SI 0 "register_operand" "=$l, r, r") + (sign_extend:SI + (vec_select:HI + (match_operand:V2HI 1 "nonimmediate_operand" " l,r,m") + (parallel [(const_int 0)]))))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" +{ + switch (which_alternative) + { + case 0: + return "seh33\t%0, %1"; + case 1: + return "seh\t%0, %1"; + case 2: + return nds32_output_32bit_load_s (operands, 2); + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "alu,alu,load") + (set_attr "length" " 2, 4, 4")]) + +(define_insn "vec_extractv2hi0_be" + [(set (match_operand:HI 0 "register_operand" "=$d,r") + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " 0,r") + (parallel [(const_int 0)])))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "@ + srai45\t%0, 16 + srai\t%0, %1, 16" + [(set_attr "type" "alu,alu") + (set_attr "length" " 2, 4")]) + +(define_insn "vec_extractv2hi1" + [(set (match_operand:HI 0 "register_operand" "=$d,r") + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " 0,r") + (parallel [(const_int 1)])))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "@ + srai45\t%0, 16 + srai\t%0, %1, 16" + [(set_attr "type" "alu,alu") + (set_attr "length" " 2, 4")]) + +(define_insn "vec_extractv2hi1_se" + [(set (match_operand:SI 0 "register_operand" "=$d,r") + (sign_extend:SI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " 0,r") + (parallel [(const_int 1)]))))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "@ + srai45\t%0, 16 + srai\t%0, %1, 16" + [(set_attr "type" "alu,alu") + (set_attr "length" " 2, 4")]) + +(define_insn "vec_extractv2hi1_ze" + [(set (match_operand:SI 0 "register_operand" "=$d,r") + (zero_extend:SI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " 0,r") + (parallel [(const_int 1)]))))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "@ + srli45\t%0, 16 + srli\t%0, %1, 16" + [(set_attr "type" "alu,alu") + (set_attr "length" " 2, 4")]) + +(define_insn "vec_extractv2hi1_be" + [(set (match_operand:HI 0 "register_operand" "=$l,r,r") + (vec_select:HI + (match_operand:V2HI 1 "nonimmediate_operand" " l,r,m") + (parallel [(const_int 1)])))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" +{ + switch (which_alternative) + { + case 0: + return "seh33\t%0, %1"; + case 1: + return "seh\t%0, %1"; + case 2: + return nds32_output_32bit_load_s (operands, 2); + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "alu,alu,load") + (set_attr "length" " 2, 4, 4")]) + +(define_insn "mul16" + [(set (match_operand:V2SI 0 "register_operand" "=r") + (mult:V2SI (extend:V2SI (match_operand:V2HI 1 "register_operand" "%r")) + (extend:V2SI (match_operand:V2HI 2 "register_operand" " r"))))] + "NDS32_EXT_DSP_P ()" + "mul16\t%0, %1, %2" + [(set_attr "type" "dmul") + (set_attr "length" "4")]) + +(define_insn "mulx16" + [(set (match_operand:V2SI 0 "register_operand" "=r") + (vec_merge:V2SI + (vec_duplicate:V2SI + (mult:SI + (extend:SI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 0)]))) + (extend:SI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)]))))) + (vec_duplicate:V2SI + (mult:SI + (extend:SI + (vec_select:HI + (match_dup 1) + (parallel [(const_int 1)]))) + (extend:SI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)]))))) + (const_int 1)))] + "NDS32_EXT_DSP_P ()" + "mulx16\t%0, %1, %2" + [(set_attr "type" "dmul") + (set_attr "length" "4")]) + +(define_insn "rotrv2hi_1" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_select:V2HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 1) (const_int 0)])))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "rotri\t%0, %1, 16" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn "rotrv2hi_1_be" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_select:V2HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 0) (const_int 1)])))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "rotri\t%0, %1, 16" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn "rotrv4qi_1" + [(set (match_operand:V4QI 0 "register_operand" "=r") + (vec_select:V4QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 1) (const_int 2) (const_int 3) (const_int 0)])))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "rotri\t%0, %1, 8" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn "rotrv4qi_1_be" + [(set (match_operand:V4QI 0 "register_operand" "=r") + (vec_select:V4QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 2) (const_int 1) (const_int 0) (const_int 3)])))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "rotri\t%0, %1, 8" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn "rotrv4qi_2" + [(set (match_operand:V4QI 0 "register_operand" "=r") + (vec_select:V4QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 2) (const_int 3) (const_int 0) (const_int 1)])))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "rotri\t%0, %1, 16" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn "rotrv4qi_2_be" + [(set (match_operand:V4QI 0 "register_operand" "=r") + (vec_select:V4QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 1) (const_int 0) (const_int 3) (const_int 2)])))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "rotri\t%0, %1, 16" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn "rotrv4qi_3" + [(set (match_operand:V4QI 0 "register_operand" "=r") + (vec_select:V4QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 3) (const_int 0) (const_int 1) (const_int 2)])))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "rotri\t%0, %1, 24" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn "rotrv4qi_3_be" + [(set (match_operand:V4QI 0 "register_operand" "=r") + (vec_select:V4QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 0) (const_int 3) (const_int 2) (const_int 1)])))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "rotri\t%0, %1, 24" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn "v4qi_dup_10" + [(set (match_operand:V4QI 0 "register_operand" "=r") + (vec_select:V4QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 0) (const_int 1) (const_int 0) (const_int 1)])))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "pkbb\t%0, %1, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "v4qi_dup_32" + [(set (match_operand:V4QI 0 "register_operand" "=r") + (vec_select:V4QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 2) (const_int 3) (const_int 2) (const_int 3)])))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "pktt\t%0, %1, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_expand "vec_unpacks_lo_v4qi" + [(match_operand:V2HI 0 "register_operand" "=r") + (match_operand:V4QI 1 "register_operand" " r")] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" +{ + emit_insn (gen_sunpkd810 (operands[0], operands[1])); + DONE; +}) + +(define_expand "sunpkd810" + [(match_operand:V2HI 0 "register_operand") + (match_operand:V4QI 1 "register_operand")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_sunpkd810_imp_be (operands[0], operands[1])); + else + emit_insn (gen_sunpkd810_imp (operands[0], operands[1])); + DONE; +}) + +(define_insn "unpkd810_imp" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 1)])))) + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_dup 1) + (parallel [(const_int 0)])))) + (const_int 2)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "unpkd810\t%0, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "unpkd810_imp_inv" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 0)])))) + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_dup 1) + (parallel [(const_int 1)])))) + (const_int 1)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "unpkd810\t%0, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "unpkd810_imp_be" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 2)])))) + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_dup 1) + (parallel [(const_int 3)])))) + (const_int 1)))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "unpkd810\t%0, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "unpkd810_imp_inv_be" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 3)])))) + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_dup 1) + (parallel [(const_int 2)])))) + (const_int 2)))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "unpkd810\t%0, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_expand "sunpkd820" + [(match_operand:V2HI 0 "register_operand") + (match_operand:V4QI 1 "register_operand")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_sunpkd820_imp_be (operands[0], operands[1])); + else + emit_insn (gen_sunpkd820_imp (operands[0], operands[1])); + DONE; +}) + +(define_insn "unpkd820_imp" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 2)])))) + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_dup 1) + (parallel [(const_int 0)])))) + (const_int 2)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "unpkd820\t%0, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "unpkd820_imp_inv" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 0)])))) + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_dup 1) + (parallel [(const_int 2)])))) + (const_int 1)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "unpkd820\t%0, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "unpkd820_imp_be" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 1)])))) + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_dup 1) + (parallel [(const_int 3)])))) + (const_int 1)))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "unpkd820\t%0, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "unpkd820_imp_inv_be" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 3)])))) + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_dup 1) + (parallel [(const_int 1)])))) + (const_int 2)))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "unpkd820\t%0, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_expand "sunpkd830" + [(match_operand:V2HI 0 "register_operand") + (match_operand:V4QI 1 "register_operand")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_sunpkd830_imp_be (operands[0], operands[1])); + else + emit_insn (gen_sunpkd830_imp (operands[0], operands[1])); + DONE; +}) + +(define_insn "unpkd830_imp" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 3)])))) + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_dup 1) + (parallel [(const_int 0)])))) + (const_int 2)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "unpkd830\t%0, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "unpkd830_imp_inv" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 0)])))) + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_dup 1) + (parallel [(const_int 3)])))) + (const_int 1)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "unpkd830\t%0, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "unpkd830_imp_be" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 0)])))) + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_dup 1) + (parallel [(const_int 3)])))) + (const_int 1)))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "unpkd830\t%0, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "unpkd830_imp_inv_be" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 3)])))) + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_dup 1) + (parallel [(const_int 0)])))) + (const_int 2)))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "unpkd830\t%0, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_expand "sunpkd831" + [(match_operand:V2HI 0 "register_operand") + (match_operand:V4QI 1 "register_operand")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_sunpkd831_imp_be (operands[0], operands[1])); + else + emit_insn (gen_sunpkd831_imp (operands[0], operands[1])); + DONE; +}) + +(define_insn "unpkd831_imp" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 3)])))) + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_dup 1) + (parallel [(const_int 1)])))) + (const_int 2)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "unpkd831\t%0, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "unpkd831_imp_inv" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 1)])))) + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_dup 1) + (parallel [(const_int 3)])))) + (const_int 1)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "unpkd831\t%0, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "unpkd831_imp_be" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 0)])))) + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_dup 1) + (parallel [(const_int 2)])))) + (const_int 1)))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "unpkd831\t%0, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "unpkd831_imp_inv_be" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 2)])))) + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_dup 1) + (parallel [(const_int 0)])))) + (const_int 2)))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "unpkd831\t%0, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_expand "zunpkd810" + [(match_operand:V2HI 0 "register_operand") + (match_operand:V4QI 1 "register_operand")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_zunpkd810_imp_be (operands[0], operands[1])); + else + emit_insn (gen_zunpkd810_imp (operands[0], operands[1])); + DONE; +}) + +(define_expand "zunpkd820" + [(match_operand:V2HI 0 "register_operand") + (match_operand:V4QI 1 "register_operand")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_zunpkd820_imp_be (operands[0], operands[1])); + else + emit_insn (gen_zunpkd820_imp (operands[0], operands[1])); + DONE; +}) + +(define_expand "zunpkd830" + [(match_operand:V2HI 0 "register_operand") + (match_operand:V4QI 1 "register_operand")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_zunpkd830_imp_be (operands[0], operands[1])); + else + emit_insn (gen_zunpkd830_imp (operands[0], operands[1])); + DONE; +}) + +(define_expand "zunpkd831" + [(match_operand:V2HI 0 "register_operand") + (match_operand:V4QI 1 "register_operand")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_zunpkd831_imp_be (operands[0], operands[1])); + else + emit_insn (gen_zunpkd831_imp (operands[0], operands[1])); + DONE; +}) + +(define_expand "smbb" + [(match_operand:SI 0 "register_operand" "") + (match_operand:V2HI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2], + GEN_INT (1), GEN_INT (1))); + else + emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2], + GEN_INT (0), GEN_INT (0))); + DONE; +}) + +(define_expand "smbt" + [(match_operand:SI 0 "register_operand" "") + (match_operand:V2HI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2], + GEN_INT (1), GEN_INT (0))); + else + emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2], + GEN_INT (0), GEN_INT (1))); + DONE; +}) + +(define_expand "smtt" + [(match_operand:SI 0 "register_operand" "") + (match_operand:V2HI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2], + GEN_INT (0), GEN_INT (0))); + else + emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2], + GEN_INT (1), GEN_INT (1))); + DONE; +}) + +(define_insn "mulhisi3v" + [(set (match_operand:SI 0 "register_operand" "= r, r, r, r") + (mult:SI + (sign_extend:SI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r, r, r, r") + (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv00, Iv01, Iv01")]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r, r, r, r") + (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv01, Iv00")])))))] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + { + const char *pats[] = { "smtt\t%0, %1, %2", + "smbt\t%0, %2, %1", + "smbb\t%0, %1, %2", + "smbt\t%0, %1, %2" }; + return pats[which_alternative]; + } + else + { + const char *pats[] = { "smbb\t%0, %1, %2", + "smbt\t%0, %1, %2", + "smtt\t%0, %1, %2", + "smbt\t%0, %2, %1" }; + return pats[which_alternative]; + } +} + [(set_attr "type" "dmul") + (set_attr "length" "4")]) + +(define_expand "kmabb" + [(match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "") + (match_operand:V2HI 3 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_kma_internal (operands[0], operands[2], operands[3], + GEN_INT (1), GEN_INT (1), + operands[1])); + else + emit_insn (gen_kma_internal (operands[0], operands[2], operands[3], + GEN_INT (0), GEN_INT (0), + operands[1])); + DONE; +}) + +(define_expand "kmabt" + [(match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "") + (match_operand:V2HI 3 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_kma_internal (operands[0], operands[2], operands[3], + GEN_INT (1), GEN_INT (0), + operands[1])); + else + emit_insn (gen_kma_internal (operands[0], operands[2], operands[3], + GEN_INT (0), GEN_INT (1), + operands[1])); + DONE; +}) + +(define_expand "kmatt" + [(match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "") + (match_operand:V2HI 3 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_kma_internal (operands[0], operands[2], operands[3], + GEN_INT (0), GEN_INT (0), + operands[1])); + else + emit_insn (gen_kma_internal (operands[0], operands[2], operands[3], + GEN_INT (1), GEN_INT (1), + operands[1])); + DONE; +}) + +(define_insn "kma_internal" + [(set (match_operand:SI 0 "register_operand" "= r, r, r, r") + (ss_plus:SI + (mult:SI + (sign_extend:SI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r, r, r, r") + (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv00, Iv01, Iv01")]))) + (sign_extend:SI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r, r, r, r") + (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv01, Iv00")])))) + (match_operand:SI 5 "register_operand" " 0, 0, 0, 0")))] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + { + const char *pats[] = { "kmatt\t%0, %1, %2", + "kmabt\t%0, %2, %1", + "kmabb\t%0, %1, %2", + "kmabt\t%0, %1, %2" }; + return pats[which_alternative]; + } + else + { + const char *pats[] = { "kmabb\t%0, %1, %2", + "kmabt\t%0, %1, %2", + "kmatt\t%0, %1, %2", + "kmabt\t%0, %2, %1" }; + return pats[which_alternative]; + } +} + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_expand "smds" + [(match_operand:SI 0 "register_operand" "") + (match_operand:V2HI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_smds_be (operands[0], operands[1], operands[2])); + else + emit_insn (gen_smds_le (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "smds_le" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 1) + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)]))))))] + "NDS32_EXT_DSP_P ()" +{ +}) + +(define_expand "smds_be" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 1) + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)]))))))] + "NDS32_EXT_DSP_P ()" +{ +}) + +(define_expand "smdrs" + [(match_operand:SI 0 "register_operand" "") + (match_operand:V2HI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_smdrs_be (operands[0], operands[1], operands[2])); + else + emit_insn (gen_smdrs_le (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "smdrs_le" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 1) + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)]))))))] + "NDS32_EXT_DSP_P ()" +{ +}) + +(define_expand "smdrs_be" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 1) + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)]))))))] + "NDS32_EXT_DSP_P ()" +{ +}) + +(define_expand "smxdsv" + [(match_operand:SI 0 "register_operand" "") + (match_operand:V2HI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_smxdsv_be (operands[0], operands[1], operands[2])); + else + emit_insn (gen_smxdsv_le (operands[0], operands[1], operands[2])); + DONE; +}) + + +(define_expand "smxdsv_le" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 1) + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)]))))))] + "NDS32_EXT_DSP_P ()" +{ +}) + +(define_expand "smxdsv_be" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 1) + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)]))))))] + "NDS32_EXT_DSP_P ()" +{ +}) + +(define_insn "smal1" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (match_operand:DI 1 "register_operand" " r") + (sign_extend:DI + (mult:SI + (sign_extend:SI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)]))) + (sign_extend:SI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)])))))))] + "NDS32_EXT_DSP_P ()" + "smal\t%0, %1, %2" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "smal2" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (match_operand:DI 1 "register_operand" " r") + (mult:DI + (sign_extend:DI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)]))) + (sign_extend:DI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)]))))))] + "NDS32_EXT_DSP_P ()" + "smal\t%0, %1, %2" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "smal3" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (match_operand:DI 1 "register_operand" " r") + (sign_extend:DI + (mult:SI + (sign_extend:SI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)]))) + (sign_extend:SI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)])))))))] + "NDS32_EXT_DSP_P ()" + "smal\t%0, %1, %2" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "smal4" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (match_operand:DI 1 "register_operand" " r") + (mult:DI + (sign_extend:DI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)]))) + (sign_extend:DI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)]))))))] + "NDS32_EXT_DSP_P ()" + "smal\t%0, %1, %2" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "smal5" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI + (sign_extend:DI + (mult:SI + (sign_extend:SI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)]))) + (sign_extend:SI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)]))))) + (match_operand:DI 1 "register_operand" " r")))] + "NDS32_EXT_DSP_P ()" + "smal\t%0, %1, %2" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "smal6" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI + (mult:DI + (sign_extend:DI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)]))) + (sign_extend:DI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)])))) + (match_operand:DI 1 "register_operand" " r")))] + "NDS32_EXT_DSP_P ()" + "smal\t%0, %1, %2" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "smal7" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI + (sign_extend:DI + (mult:SI + (sign_extend:SI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)]))) + (sign_extend:SI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)]))))) + (match_operand:DI 1 "register_operand" " r")))] + "NDS32_EXT_DSP_P ()" + "smal\t%0, %1, %2" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "smal8" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI + (mult:DI + (sign_extend:DI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)]))) + (sign_extend:DI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)])))) + (match_operand:DI 1 "register_operand" " r")))] + "NDS32_EXT_DSP_P ()" + "smal\t%0, %1, %2" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +;; We need this dummy pattern for smal +(define_insn_and_split "extendsidi2" + [(set (match_operand:DI 0 "register_operand" "") + (sign_extend:DI (match_operand:SI 1 "nds32_move_operand" "")))] + "NDS32_EXT_DSP_P ()" + "#" + "NDS32_EXT_DSP_P ()" + [(const_int 0)] +{ + rtx high_part_dst, low_part_dst; + + low_part_dst = nds32_di_low_part_subreg (operands[0]); + high_part_dst = nds32_di_high_part_subreg (operands[0]); + + emit_move_insn (low_part_dst, operands[1]); + emit_insn (gen_ashrsi3 (high_part_dst, low_part_dst, GEN_INT (31))); + DONE; +} + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +;; We need this dummy pattern for usmar64/usmsr64 +(define_insn_and_split "zero_extendsidi2" + [(set (match_operand:DI 0 "register_operand" "") + (zero_extend:DI (match_operand:SI 1 "nds32_move_operand" "")))] + "NDS32_EXT_DSP_P ()" + "#" + "NDS32_EXT_DSP_P ()" + [(const_int 0)] +{ + rtx high_part_dst, low_part_dst; + + low_part_dst = nds32_di_low_part_subreg (operands[0]); + high_part_dst = nds32_di_high_part_subreg (operands[0]); + + emit_move_insn (low_part_dst, operands[1]); + emit_move_insn (high_part_dst, const0_rtx); + DONE; +} + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn_and_split "extendhidi2" + [(set (match_operand:DI 0 "register_operand" "") + (sign_extend:DI (match_operand:HI 1 "nonimmediate_operand" "")))] + "NDS32_EXT_DSP_P ()" + "#" + "NDS32_EXT_DSP_P ()" + [(const_int 0)] +{ + rtx high_part_dst, low_part_dst; + + low_part_dst = nds32_di_low_part_subreg (operands[0]); + high_part_dst = nds32_di_high_part_subreg (operands[0]); + + + emit_insn (gen_extendhisi2 (low_part_dst, operands[1])); + emit_insn (gen_ashrsi3 (high_part_dst, low_part_dst, GEN_INT (31))); + DONE; +} + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn "extendqihi2" + [(set (match_operand:HI 0 "register_operand" "=r") + (sign_extend:HI (match_operand:QI 1 "register_operand" " r")))] + "NDS32_EXT_DSP_P ()" + "sunpkd820\t%0, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "smulsi3_highpart" + [(set (match_operand:SI 0 "register_operand" "=r") + (truncate:SI + (lshiftrt:DI + (mult:DI + (sign_extend:DI (match_operand:SI 1 "register_operand" " r")) + (sign_extend:DI (match_operand:SI 2 "register_operand" " r"))) + (const_int 32))))] + "NDS32_EXT_DSP_P ()" + "smmul\t%0, %1, %2" + [(set_attr "type" "dmul") + (set_attr "length" "4")]) + +(define_insn "smmul_round" + [(set (match_operand:SI 0 "register_operand" "=r") + (truncate:SI + (lshiftrt:DI + (unspec:DI [(mult:DI + (sign_extend:DI (match_operand:SI 1 "register_operand" " r")) + (sign_extend:DI (match_operand:SI 2 "register_operand" " r")))] + UNSPEC_ROUND) + (const_int 32))))] + "NDS32_EXT_DSP_P ()" + "smmul.u\t%0, %1, %2" + [(set_attr "type" "dmul") + (set_attr "length" "4")]) + +(define_insn "kmmac" + [(set (match_operand:SI 0 "register_operand" "=r") + (ss_plus:SI (match_operand:SI 1 "register_operand" " 0") + (truncate:SI + (lshiftrt:DI + (mult:DI + (sign_extend:DI (match_operand:SI 2 "register_operand" " r")) + (sign_extend:DI (match_operand:SI 3 "register_operand" " r"))) + (const_int 32)))))] + "NDS32_EXT_DSP_P ()" + "kmmac\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "kmmac_round" + [(set (match_operand:SI 0 "register_operand" "=r") + (ss_plus:SI (match_operand:SI 1 "register_operand" " 0") + (truncate:SI + (lshiftrt:DI + (unspec:DI [(mult:DI + (sign_extend:DI (match_operand:SI 2 "register_operand" " r")) + (sign_extend:DI (match_operand:SI 3 "register_operand" " r")))] + UNSPEC_ROUND) + (const_int 32)))))] + "NDS32_EXT_DSP_P ()" + "kmmac.u\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "kmmsb" + [(set (match_operand:SI 0 "register_operand" "=r") + (ss_minus:SI (match_operand:SI 1 "register_operand" " 0") + (truncate:SI + (lshiftrt:DI + (mult:DI + (sign_extend:DI (match_operand:SI 2 "register_operand" " r")) + (sign_extend:DI (match_operand:SI 3 "register_operand" " r"))) + (const_int 32)))))] + "NDS32_EXT_DSP_P ()" + "kmmsb\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "kmmsb_round" + [(set (match_operand:SI 0 "register_operand" "=r") + (ss_minus:SI (match_operand:SI 1 "register_operand" " 0") + (truncate:SI + (lshiftrt:DI + (unspec:DI [(mult:DI + (sign_extend:DI (match_operand:SI 2 "register_operand" " r")) + (sign_extend:DI (match_operand:SI 3 "register_operand" " r")))] + UNSPEC_ROUND) + (const_int 32)))))] + "NDS32_EXT_DSP_P ()" + "kmmsb.u\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "kwmmul" + [(set (match_operand:SI 0 "register_operand" "=r") + (truncate:SI + (lshiftrt:DI + (ss_mult:DI + (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" " r")) (const_int 2)) + (mult:DI (sign_extend:DI (match_operand:SI 2 "register_operand" " r")) (const_int 2))) + (const_int 32))))] + "NDS32_EXT_DSP_P ()" + "kwmmul\t%0, %1, %2" + [(set_attr "type" "dmul") + (set_attr "length" "4")]) + +(define_insn "kwmmul_round" + [(set (match_operand:SI 0 "register_operand" "=r") + (truncate:SI + (lshiftrt:DI + (unspec:DI [ + (ss_mult:DI + (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" " r")) (const_int 2)) + (mult:DI (sign_extend:DI (match_operand:SI 2 "register_operand" " r")) (const_int 2)))] + UNSPEC_ROUND) + (const_int 32))))] + "NDS32_EXT_DSP_P ()" + "kwmmul.u\t%0, %1, %2" + [(set_attr "type" "dmul") + (set_attr "length" "4")]) + +(define_expand "smmwb" + [(match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_smulhisi3_highpart_1 (operands[0], operands[1], operands[2], GEN_INT (1))); + else + emit_insn (gen_smulhisi3_highpart_1 (operands[0], operands[1], operands[2], GEN_INT (0))); + DONE; +}) + +(define_expand "smmwt" + [(match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_smulhisi3_highpart_1 (operands[0], operands[1], operands[2], GEN_INT (0))); + else + emit_insn (gen_smulhisi3_highpart_1 (operands[0], operands[1], operands[2], GEN_INT (1))); + DONE; +}) + +(define_insn "smulhisi3_highpart_1" + [(set (match_operand:SI 0 "register_operand" "= r, r") + (truncate:SI + (lshiftrt:DI + (mult:DI + (sign_extend:DI (match_operand:SI 1 "register_operand" " r, r")) + (sign_extend:DI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r, r") + (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")])))) + (const_int 16))))] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + { + const char *pats[] = { "smmwt\t%0, %1, %2", + "smmwb\t%0, %1, %2" }; + return pats[which_alternative]; + } + else + { + const char *pats[] = { "smmwb\t%0, %1, %2", + "smmwt\t%0, %1, %2" }; + return pats[which_alternative]; + } +} + [(set_attr "type" "dmul") + (set_attr "length" "4")]) + +(define_insn "smulhisi3_highpart_2" + [(set (match_operand:SI 0 "register_operand" "= r, r") + (truncate:SI + (lshiftrt:DI + (mult:DI + (sign_extend:DI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r, r") + (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")]))) + (sign_extend:DI (match_operand:SI 2 "register_operand" " r, r"))) + (const_int 16))))] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + { + const char *pats[] = { "smmwt\t%0, %1, %2", + "smmwb\t%0, %1, %2" }; + return pats[which_alternative]; + } + else + { + const char *pats[] = { "smmwb\t%0, %1, %2", + "smmwt\t%0, %1, %2" }; + return pats[which_alternative]; + } +} + [(set_attr "type" "dmul") + (set_attr "length" "4")]) + +(define_expand "smmwb_round" + [(match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_smmw_round_internal (operands[0], operands[1], operands[2], GEN_INT (1))); + else + emit_insn (gen_smmw_round_internal (operands[0], operands[1], operands[2], GEN_INT (0))); + DONE; +}) + +(define_expand "smmwt_round" + [(match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_smmw_round_internal (operands[0], operands[1], operands[2], GEN_INT (0))); + else + emit_insn (gen_smmw_round_internal (operands[0], operands[1], operands[2], GEN_INT (1))); + DONE; +}) + +(define_insn "smmw_round_internal" + [(set (match_operand:SI 0 "register_operand" "= r, r") + (truncate:SI + (lshiftrt:DI + (unspec:DI + [(mult:DI + (sign_extend:DI (match_operand:SI 1 "register_operand" " r, r")) + (sign_extend:DI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r, r") + (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")]))))] + UNSPEC_ROUND) + (const_int 16))))] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + { + const char *pats[] = { "smmwt.u\t%0, %1, %2", + "smmwb.u\t%0, %1, %2" }; + return pats[which_alternative]; + } + else + { + const char *pats[] = { "smmwb.u\t%0, %1, %2", + "smmwt.u\t%0, %1, %2" }; + return pats[which_alternative]; + } +} + [(set_attr "type" "dmul") + (set_attr "length" "4")]) + +(define_expand "kmmawb" + [(match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "register_operand" "") + (match_operand:V2HI 3 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_kmmaw_internal (operands[0], operands[2], operands[3], GEN_INT (1), operands[1])); + else + emit_insn (gen_kmmaw_internal (operands[0], operands[2], operands[3], GEN_INT (0), operands[1])); + DONE; +}) + +(define_expand "kmmawt" + [(match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "register_operand" "") + (match_operand:V2HI 3 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_kmmaw_internal (operands[0], operands[2], operands[3], GEN_INT (0), operands[1])); + else + emit_insn (gen_kmmaw_internal (operands[0], operands[2], operands[3], GEN_INT (1), operands[1])); + DONE; +}) + +(define_insn "kmmaw_internal" + [(set (match_operand:SI 0 "register_operand" "= r, r") + (ss_plus:SI + (match_operand:SI 4 "register_operand" " 0, 0") + (truncate:SI + (lshiftrt:DI + (mult:DI + (sign_extend:DI (match_operand:SI 1 "register_operand" " r, r")) + (sign_extend:DI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r, r") + (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")])))) + (const_int 16)))))] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + { + const char *pats[] = { "kmmawt\t%0, %1, %2", + "kmmawb\t%0, %1, %2" }; + return pats[which_alternative]; + } + else + { + const char *pats[] = { "kmmawb\t%0, %1, %2", + "kmmawt\t%0, %1, %2" }; + return pats[which_alternative]; + } +} + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_expand "kmmawb_round" + [(match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "register_operand" "") + (match_operand:V2HI 3 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_kmmaw_round_internal (operands[0], operands[2], operands[3], GEN_INT (1), operands[1])); + else + emit_insn (gen_kmmaw_round_internal (operands[0], operands[2], operands[3], GEN_INT (0), operands[1])); + DONE; +} + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_expand "kmmawt_round" + [(match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "register_operand" "") + (match_operand:V2HI 3 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_kmmaw_round_internal (operands[0], operands[2], operands[3], GEN_INT (0), operands[1])); + else + emit_insn (gen_kmmaw_round_internal (operands[0], operands[2], operands[3], GEN_INT (1), operands[1])); + DONE; +} + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + + +(define_insn "kmmaw_round_internal" + [(set (match_operand:SI 0 "register_operand" "= r, r") + (ss_plus:SI + (match_operand:SI 4 "register_operand" " 0, 0") + (truncate:SI + (lshiftrt:DI + (unspec:DI + [(mult:DI + (sign_extend:DI (match_operand:SI 1 "register_operand" " r, r")) + (sign_extend:DI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r, r") + (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")]))))] + UNSPEC_ROUND) + (const_int 16)))))] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + { + const char *pats[] = { "kmmawt.u\t%0, %1, %2", + "kmmawb.u\t%0, %1, %2" }; + return pats[which_alternative]; + } + else + { + const char *pats[] = { "kmmawb.u\t%0, %1, %2", + "kmmawt.u\t%0, %1, %2" }; + return pats[which_alternative]; + } +} + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_expand "smalbb" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "") + (match_operand:V2HI 3 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_smaddhidi (operands[0], operands[2], + operands[3], operands[1], + GEN_INT (1), GEN_INT (1))); + else + emit_insn (gen_smaddhidi (operands[0], operands[2], + operands[3], operands[1], + GEN_INT (0), GEN_INT (0))); + DONE; +}) + +(define_expand "smalbt" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "") + (match_operand:V2HI 3 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_smaddhidi (operands[0], operands[2], + operands[3], operands[1], + GEN_INT (1), GEN_INT (0))); + else + emit_insn (gen_smaddhidi (operands[0], operands[2], + operands[3], operands[1], + GEN_INT (0), GEN_INT (1))); + DONE; +}) + +(define_expand "smaltt" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "") + (match_operand:V2HI 3 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_smaddhidi (operands[0], operands[2], + operands[3], operands[1], + GEN_INT (0), GEN_INT (0))); + else + emit_insn (gen_smaddhidi (operands[0], operands[2], + operands[3], operands[1], + GEN_INT (1), GEN_INT (1))); + DONE; +}) + +(define_insn "smaddhidi" + [(set (match_operand:DI 0 "register_operand" "= r, r, r, r") + (plus:DI + (match_operand:DI 3 "register_operand" " 0, 0, 0, 0") + (mult:DI + (sign_extend:DI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r, r, r, r") + (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv00, Iv01, Iv01")]))) + (sign_extend:DI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r, r, r, r") + (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv01, Iv00")]))))))] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + { + const char *pats[] = { "smaltt\t%0, %1, %2", + "smalbt\t%0, %2, %1", + "smalbb\t%0, %1, %2", + "smalbt\t%0, %1, %2" }; + return pats[which_alternative]; + } + else + { + const char *pats[] = { "smalbb\t%0, %1, %2", + "smalbt\t%0, %1, %2", + "smaltt\t%0, %1, %2", + "smalbt\t%0, %2, %1" }; + return pats[which_alternative]; + } +} + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "smaddhidi2" + [(set (match_operand:DI 0 "register_operand" "= r, r, r, r") + (plus:DI + (mult:DI + (sign_extend:DI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r, r, r, r") + (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv00, Iv01, Iv01")]))) + (sign_extend:DI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r, r, r, r") + (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv01, Iv00")])))) + (match_operand:DI 3 "register_operand" " 0, 0, 0, 0")))] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + { + const char *pats[] = { "smaltt\t%0, %1, %2", + "smalbt\t%0, %2, %1", + "smalbb\t%0, %1, %2", + "smalbt\t%0, %1, %2" }; + return pats[which_alternative]; + } + else + { + const char *pats[] = { "smalbb\t%0, %1, %2", + "smalbt\t%0, %1, %2", + "smaltt\t%0, %1, %2", + "smalbt\t%0, %2, %1" }; + return pats[which_alternative]; + } +} + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_expand "smalda1" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" " r") + (match_operand:V2HI 3 "register_operand" " r")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_smalda1_be (operands[0], operands[1], operands[2], operands[3])); + else + emit_insn (gen_smalda1_le (operands[0], operands[1], operands[2], operands[3])); + DONE; +}) + +(define_expand "smalds1" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" " r") + (match_operand:V2HI 3 "register_operand" " r")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_smalds1_be (operands[0], operands[1], operands[2], operands[3])); + else + emit_insn (gen_smalds1_le (operands[0], operands[1], operands[2], operands[3])); + DONE; +}) + +(define_insn "smalda1_le" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI + (match_operand:DI 1 "register_operand" " 0") + (sign_extend:DI + (plus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 3 "register_operand" " r") + (parallel [(const_int 1)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_dup 3) + (parallel [(const_int 0)]))))))))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "smalda\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "smalds1_le" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI + (match_operand:DI 1 "register_operand" " 0") + (sign_extend:DI + (minus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 3 "register_operand" " r") + (parallel [(const_int 1)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_dup 3) + (parallel [(const_int 0)]))))))))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "smalds\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "smalda1_be" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI + (match_operand:DI 1 "register_operand" " 0") + (sign_extend:DI + (plus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 3 "register_operand" " r") + (parallel [(const_int 0)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_dup 3) + (parallel [(const_int 1)]))))))))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "smalda\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "smalds1_be" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI + (match_operand:DI 1 "register_operand" " 0") + (sign_extend:DI + (minus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 3 "register_operand" " r") + (parallel [(const_int 0)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_dup 3) + (parallel [(const_int 1)]))))))))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "smalds\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_expand "smaldrs3" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" " r") + (match_operand:V2HI 3 "register_operand" " r")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_smaldrs3_be (operands[0], operands[1], operands[2], operands[3])); + else + emit_insn (gen_smaldrs3_le (operands[0], operands[1], operands[2], operands[3])); + DONE; +}) + +(define_insn "smaldrs3_le" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI + (match_operand:DI 1 "register_operand" " 0") + (sign_extend:DI + (minus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 3 "register_operand" " r") + (parallel [(const_int 0)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_dup 3) + (parallel [(const_int 1)]))))))))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "smaldrs\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "smaldrs3_be" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI + (match_operand:DI 1 "register_operand" " 0") + (sign_extend:DI + (minus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 3 "register_operand" " r") + (parallel [(const_int 1)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_dup 3) + (parallel [(const_int 0)]))))))))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "smaldrs\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_expand "smalxda1" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" " r") + (match_operand:V2HI 3 "register_operand" " r")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_smalxda1_be (operands[0], operands[1], operands[2], operands[3])); + else + emit_insn (gen_smalxda1_le (operands[0], operands[1], operands[2], operands[3])); + DONE; +}) + +(define_expand "smalxds1" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" " r") + (match_operand:V2HI 3 "register_operand" " r")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_smalxds1_be (operands[0], operands[1], operands[2], operands[3])); + else + emit_insn (gen_smalxds1_le (operands[0], operands[1], operands[2], operands[3])); + DONE; +}) + +(define_insn "smalxd1_le" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI + (match_operand:DI 1 "register_operand" " 0") + (sign_extend:DI + (plus_minus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 3 "register_operand" " r") + (parallel [(const_int 0)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_dup 3) + (parallel [(const_int 1)]))))))))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "smalxd\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + + +(define_insn "smalxd1_be" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI + (match_operand:DI 1 "register_operand" " 0") + (sign_extend:DI + (plus_minus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 3 "register_operand" " r") + (parallel [(const_int 1)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_dup 3) + (parallel [(const_int 0)]))))))))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "smalxd\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "smslda1" + [(set (match_operand:DI 0 "register_operand" "=r") + (minus:DI + (minus:DI + (match_operand:DI 1 "register_operand" " 0") + (sign_extend:DI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 3 "register_operand" " r") + (parallel [(const_int 1)])))))) + (sign_extend:DI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_dup 3) + (parallel [(const_int 0)])))))))] + "NDS32_EXT_DSP_P ()" + "smslda\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "smslxda1" + [(set (match_operand:DI 0 "register_operand" "=r") + (minus:DI + (minus:DI + (match_operand:DI 1 "register_operand" " 0") + (sign_extend:DI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 3 "register_operand" " r") + (parallel [(const_int 0)])))))) + (sign_extend:DI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_dup 3) + (parallel [(const_int 1)])))))))] + "NDS32_EXT_DSP_P ()" + "smslxda\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +;; mada for synthetize smalda +(define_insn_and_split "mada1" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 1 "register_operand" "r") + (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iu01")]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" "r") + (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iu01")])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 1) + (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iu01")]))) + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(match_operand:SI 6 "nds32_imm_0_1_operand" " Iu01")]))))))] + "NDS32_EXT_DSP_P () && !reload_completed" + "#" + "NDS32_EXT_DSP_P () && !reload_completed" + [(const_int 1)] +{ + rtx result0 = gen_reg_rtx (SImode); + rtx result1 = gen_reg_rtx (SImode); + emit_insn (gen_mulhisi3v (result0, operands[1], operands[2], + operands[3], operands[4])); + emit_insn (gen_mulhisi3v (result1, operands[1], operands[2], + operands[5], operands[6])); + emit_insn (gen_addsi3 (operands[0], result0, result1)); + DONE; +}) + +(define_insn_and_split "mada2" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 1 "register_operand" "r") + (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iu01")]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" "r") + (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iu01")])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iu01")]))) + (sign_extend:SI (vec_select:HI + (match_dup 1) + (parallel [(match_operand:SI 6 "nds32_imm_0_1_operand" " Iu01")]))))))] + "NDS32_EXT_DSP_P () && !reload_completed" + "#" + "NDS32_EXT_DSP_P () && !reload_completed" + [(const_int 1)] +{ + rtx result0 = gen_reg_rtx (SImode); + rtx result1 = gen_reg_rtx (SImode); + emit_insn (gen_mulhisi3v (result0, operands[1], operands[2], + operands[3], operands[4])); + emit_insn (gen_mulhisi3v (result1, operands[1], operands[2], + operands[6], operands[5])); + emit_insn (gen_addsi3 (operands[0], result0, result1)); + DONE; +}) + +;; sms for synthetize smalds +(define_insn_and_split "sms1" + [(set (match_operand:SI 0 "register_operand" "= r") + (minus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iu01")]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iu01")])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 1) + (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iu01")]))) + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(match_operand:SI 6 "nds32_imm_0_1_operand" " Iu01")]))))))] + "NDS32_EXT_DSP_P () + && (!reload_completed + || !nds32_need_split_sms_p (operands[3], operands[4], + operands[5], operands[6]))" + +{ + return nds32_output_sms (operands[3], operands[4], + operands[5], operands[6]); +} + "NDS32_EXT_DSP_P () + && !reload_completed + && nds32_need_split_sms_p (operands[3], operands[4], + operands[5], operands[6])" + [(const_int 1)] +{ + nds32_split_sms (operands[0], operands[1], operands[2], + operands[3], operands[4], + operands[5], operands[6]); + DONE; +} + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn_and_split "sms2" + [(set (match_operand:SI 0 "register_operand" "= r") + (minus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iu01")]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iu01")])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iu01")]))) + (sign_extend:SI (vec_select:HI + (match_dup 1) + (parallel [(match_operand:SI 6 "nds32_imm_0_1_operand" " Iu01")]))))))] + "NDS32_EXT_DSP_P () + && (!reload_completed + || !nds32_need_split_sms_p (operands[3], operands[4], + operands[6], operands[5]))" +{ + return nds32_output_sms (operands[3], operands[4], + operands[6], operands[5]); +} + "NDS32_EXT_DSP_P () + && !reload_completed + && nds32_need_split_sms_p (operands[3], operands[4], + operands[6], operands[5])" + [(const_int 1)] +{ + nds32_split_sms (operands[0], operands[1], operands[2], + operands[3], operands[4], + operands[6], operands[5]); + DONE; +} + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "kmda" + [(set (match_operand:SI 0 "register_operand" "=r") + (ss_plus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 1 "register_operand" "r") + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" "r") + (parallel [(const_int 1)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 1) + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)]))))))] + "NDS32_EXT_DSP_P ()" + "kmda\t%0, %1, %2" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "kmxda" + [(set (match_operand:SI 0 "register_operand" "=r") + (ss_plus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 1 "register_operand" "r") + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" "r") + (parallel [(const_int 0)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 1) + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)]))))))] + "NDS32_EXT_DSP_P ()" + "kmxda\t%0, %1, %2" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "kmada" + [(set (match_operand:SI 0 "register_operand" "=r") + (ss_plus:SI + (match_operand:SI 1 "register_operand" " 0") + (ss_plus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 3 "register_operand" " r") + (parallel [(const_int 1)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_dup 3) + (parallel [(const_int 0)])))))))] + "NDS32_EXT_DSP_P ()" + "kmada\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "kmada2" + [(set (match_operand:SI 0 "register_operand" "=r") + (ss_plus:SI + (match_operand:SI 1 "register_operand" " 0") + (ss_plus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 3 "register_operand" " r") + (parallel [(const_int 0)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_dup 3) + (parallel [(const_int 1)])))))))] + "NDS32_EXT_DSP_P ()" + "kmada\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "kmaxda" + [(set (match_operand:SI 0 "register_operand" "=r") + (ss_plus:SI + (match_operand:SI 1 "register_operand" " 0") + (ss_plus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 3 "register_operand" " r") + (parallel [(const_int 0)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_dup 3) + (parallel [(const_int 1)])))))))] + "NDS32_EXT_DSP_P ()" + "kmaxda\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "kmads" + [(set (match_operand:SI 0 "register_operand" "=r") + (ss_plus:SI + (match_operand:SI 1 "register_operand" " 0") + (ss_minus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 3 "register_operand" " r") + (parallel [(const_int 1)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_dup 3) + (parallel [(const_int 0)])))))))] + "NDS32_EXT_DSP_P ()" + "kmads\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "kmadrs" + [(set (match_operand:SI 0 "register_operand" "=r") + (ss_plus:SI + (match_operand:SI 1 "register_operand" " 0") + (ss_minus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 3 "register_operand" " r") + (parallel [(const_int 0)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_dup 3) + (parallel [(const_int 1)])))))))] + "NDS32_EXT_DSP_P ()" + "kmadrs\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "kmaxds" + [(set (match_operand:SI 0 "register_operand" "=r") + (ss_plus:SI + (match_operand:SI 1 "register_operand" " 0") + (ss_minus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 3 "register_operand" " r") + (parallel [(const_int 0)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_dup 3) + (parallel [(const_int 1)])))))))] + "NDS32_EXT_DSP_P ()" + "kmaxds\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "kmsda" + [(set (match_operand:SI 0 "register_operand" "=r") + (ss_minus:SI + (match_operand:SI 1 "register_operand" " 0") + (ss_minus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 3 "register_operand" " r") + (parallel [(const_int 1)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_dup 3) + (parallel [(const_int 0)])))))))] + "NDS32_EXT_DSP_P ()" + "kmsda\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "kmsxda" + [(set (match_operand:SI 0 "register_operand" "=r") + (ss_minus:SI + (match_operand:SI 1 "register_operand" " 0") + (ss_minus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 3 "register_operand" " r") + (parallel [(const_int 0)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_dup 3) + (parallel [(const_int 1)])))))))] + "NDS32_EXT_DSP_P ()" + "kmsxda\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +;; smax[8|16] and umax[8|16] +(define_insn "3" + [(set (match_operand:VQIHI 0 "register_operand" "=r") + (sumax:VQIHI (match_operand:VQIHI 1 "register_operand" " r") + (match_operand:VQIHI 2 "register_operand" " r")))] + "NDS32_EXT_DSP_P ()" + "\t%0, %1, %2" + [(set_attr "type" "dalu") + (set_attr "length" "4")]) + +;; smin[8|16] and umin[8|16] +(define_insn "3" + [(set (match_operand:VQIHI 0 "register_operand" "=r") + (sumin:VQIHI (match_operand:VQIHI 1 "register_operand" " r") + (match_operand:VQIHI 2 "register_operand" " r")))] + "NDS32_EXT_DSP_P ()" + "\t%0, %1, %2" + [(set_attr "type" "dalu") + (set_attr "length" "4")]) + +(define_insn "3_bb" + [(set (match_operand: 0 "register_operand" "=r") + (sumin_max: (vec_select: + (match_operand:VQIHI 1 "register_operand" " r") + (parallel [(const_int 0)])) + (vec_select: + (match_operand:VQIHI 2 "register_operand" " r") + (parallel [(const_int 0)]))))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "\t%0, %1, %2" + [(set_attr "type" "dalu") + (set_attr "length" "4")]) + +(define_insn_and_split "3_tt" + [(set (match_operand: 0 "register_operand" "=r") + (sumin_max: (vec_select: + (match_operand:VQIHI 1 "register_operand" " r") + (parallel [(const_int 1)])) + (vec_select: + (match_operand:VQIHI 2 "register_operand" " r") + (parallel [(const_int 1)]))))] + "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN" + "#" + "NDS32_EXT_DSP_P () && !reload_completed" + [(const_int 0)] +{ + rtx tmp = gen_reg_rtx (mode); + emit_insn (gen_3 (tmp, operands[1], operands[2])); + emit_insn (gen_rotr_1 (tmp, tmp)); + emit_move_insn (operands[0], simplify_gen_subreg (mode, tmp, mode, 0)); + DONE; +} + [(set_attr "type" "dalu") + (set_attr "length" "4")]) + +(define_insn_and_split "v4qi3_22" + [(set (match_operand:QI 0 "register_operand" "=r") + (sumin_max:QI (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 2)])) + (vec_select:QI + (match_operand:V4QI 2 "register_operand" " r") + (parallel [(const_int 2)]))))] + "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN" + "#" + "NDS32_EXT_DSP_P () && !reload_completed" + [(const_int 0)] +{ + rtx tmp = gen_reg_rtx (V4QImode); + emit_insn (gen_v4qi3 (tmp, operands[1], operands[2])); + emit_insn (gen_rotrv4qi_2 (tmp, tmp)); + emit_move_insn (operands[0], simplify_gen_subreg (QImode, tmp, V4QImode, 0)); + DONE; +} + [(set_attr "type" "dalu") + (set_attr "length" "4")]) + +(define_insn_and_split "v4qi3_33" + [(set (match_operand:QI 0 "register_operand" "=r") + (sumin_max:QI (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 3)])) + (vec_select:QI + (match_operand:V4QI 2 "register_operand" " r") + (parallel [(const_int 3)]))))] + "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN" + "#" + "NDS32_EXT_DSP_P () && !reload_completed" + [(const_int 0)] +{ + rtx tmp = gen_reg_rtx (V4QImode); + emit_insn (gen_v4qi3 (tmp, operands[1], operands[2])); + emit_insn (gen_rotrv4qi_3 (tmp, tmp)); + emit_move_insn (operands[0], simplify_gen_subreg (QImode, tmp, V4QImode, 0)); + DONE; +} + [(set_attr "type" "dalu") + (set_attr "length" "4")]) + +(define_insn_and_split "v2hi3_bbtt" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (sumin_max:HI (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 1)])) + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)])))) + (vec_duplicate:V2HI + (sumin_max:HI (vec_select:HI + (match_dup:V2HI 1) + (parallel [(const_int 0)])) + (vec_select:HI + (match_dup:V2HI 2) + (parallel [(const_int 0)])))) + (const_int 2)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "#" + "NDS32_EXT_DSP_P ()" + [(const_int 0)] +{ + emit_insn (gen_v2hi3 (operands[0], operands[1], operands[2])); + DONE; +} + [(set_attr "type" "dalu") + (set_attr "length" "4")]) + +(define_expand "abs2" + [(set (match_operand:VQIHI 0 "register_operand" "=r") + (ss_abs:VQIHI (match_operand:VQIHI 1 "register_operand" " r")))] + "NDS32_EXT_DSP_P () && TARGET_HW_ABS && !flag_wrapv" +{ +}) + +(define_insn "kabs2" + [(set (match_operand:VQIHI 0 "register_operand" "=r") + (ss_abs:VQIHI (match_operand:VQIHI 1 "register_operand" " r")))] + "NDS32_EXT_DSP_P ()" + "kabs\t%0, %1" + [(set_attr "type" "dalu") + (set_attr "length" "4")]) + +(define_insn "mar64_1" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI + (match_operand:DI 1 "register_operand" " 0") + (mult:DI + (extend:DI + (match_operand:SI 2 "register_operand" " r")) + (extend:DI + (match_operand:SI 3 "register_operand" " r")))))] + "NDS32_EXT_DSP_P ()" + "mar64\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "mar64_2" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI + (mult:DI + (extend:DI + (match_operand:SI 2 "register_operand" " r")) + (extend:DI + (match_operand:SI 3 "register_operand" " r"))) + (match_operand:DI 1 "register_operand" " 0")))] + "NDS32_EXT_DSP_P ()" + "mar64\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "mar64_3" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI + (match_operand:DI 1 "register_operand" " 0") + (extend:DI + (mult:SI + (match_operand:SI 2 "register_operand" " r") + (match_operand:SI 3 "register_operand" " r")))))] + "NDS32_EXT_DSP_P ()" + "mar64\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "mar64_4" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI + (extend:DI + (mult:SI + (match_operand:SI 2 "register_operand" " r") + (match_operand:SI 3 "register_operand" " r"))) + (match_operand:DI 1 "register_operand" " 0")))] + "NDS32_EXT_DSP_P ()" + "mar64\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "msr64" + [(set (match_operand:DI 0 "register_operand" "=r") + (minus:DI + (match_operand:DI 1 "register_operand" " 0") + (mult:DI + (extend:DI + (match_operand:SI 2 "register_operand" " r")) + (extend:DI + (match_operand:SI 3 "register_operand" " r")))))] + "NDS32_EXT_DSP_P ()" + "msr64\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "msr64_2" + [(set (match_operand:DI 0 "register_operand" "=r") + (minus:DI + (match_operand:DI 1 "register_operand" " 0") + (extend:DI + (mult:SI + (match_operand:SI 2 "register_operand" " r") + (match_operand:SI 3 "register_operand" " r")))))] + "NDS32_EXT_DSP_P ()" + "msr64\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +;; kmar64, kmsr64, ukmar64 and ukmsr64 +(define_insn "kmar64_1" + [(set (match_operand:DI 0 "register_operand" "=r") + (ss_plus:DI + (match_operand:DI 1 "register_operand" " 0") + (mult:DI + (sign_extend:DI + (match_operand:SI 2 "register_operand" " r")) + (sign_extend:DI + (match_operand:SI 3 "register_operand" " r")))))] + "NDS32_EXT_DSP_P ()" + "kmar64\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "kmar64_2" + [(set (match_operand:DI 0 "register_operand" "=r") + (ss_plus:DI + (mult:DI + (sign_extend:DI + (match_operand:SI 2 "register_operand" " r")) + (sign_extend:DI + (match_operand:SI 3 "register_operand" " r"))) + (match_operand:DI 1 "register_operand" " 0")))] + "NDS32_EXT_DSP_P ()" + "kmar64\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "kmsr64" + [(set (match_operand:DI 0 "register_operand" "=r") + (ss_minus:DI + (match_operand:DI 1 "register_operand" " 0") + (mult:DI + (sign_extend:DI + (match_operand:SI 2 "register_operand" " r")) + (sign_extend:DI + (match_operand:SI 3 "register_operand" " r")))))] + "NDS32_EXT_DSP_P ()" + "kmsr64\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "ukmar64_1" + [(set (match_operand:DI 0 "register_operand" "=r") + (us_plus:DI + (match_operand:DI 1 "register_operand" " 0") + (mult:DI + (zero_extend:DI + (match_operand:SI 2 "register_operand" " r")) + (zero_extend:DI + (match_operand:SI 3 "register_operand" " r")))))] + "NDS32_EXT_DSP_P ()" + "ukmar64\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "ukmar64_2" + [(set (match_operand:DI 0 "register_operand" "=r") + (us_plus:DI + (mult:DI + (zero_extend:DI + (match_operand:SI 2 "register_operand" " r")) + (zero_extend:DI + (match_operand:SI 3 "register_operand" " r"))) + (match_operand:DI 1 "register_operand" " 0")))] + "NDS32_EXT_DSP_P ()" + "ukmar64\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "ukmsr64" + [(set (match_operand:DI 0 "register_operand" "=r") + (us_minus:DI + (match_operand:DI 1 "register_operand" " 0") + (mult:DI + (zero_extend:DI + (match_operand:SI 2 "register_operand" " r")) + (zero_extend:DI + (match_operand:SI 3 "register_operand" " r")))))] + "NDS32_EXT_DSP_P ()" + "ukmsr64\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "bpick1" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI + (and:SI + (match_operand:SI 1 "register_operand" " r") + (match_operand:SI 3 "register_operand" " r")) + (and:SI + (match_operand:SI 2 "register_operand" " r") + (not:SI (match_dup 3)))))] + "NDS32_EXT_DSP_P ()" + "bpick\t%0, %1, %2, %3" + [(set_attr "type" "dbpick") + (set_attr "length" "4")]) + +(define_insn "bpick2" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI + (and:SI + (match_operand:SI 1 "register_operand" " r") + (match_operand:SI 2 "register_operand" " r")) + (and:SI + (not:SI (match_dup 2)) + (match_operand:SI 3 "register_operand" " r"))))] + "NDS32_EXT_DSP_P ()" + "bpick\t%0, %1, %3, %2" + [(set_attr "type" "dbpick") + (set_attr "length" "4")]) + +(define_insn "bpick3" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI + (and:SI + (match_operand:SI 1 "register_operand" " r") + (match_operand:SI 2 "register_operand" " r")) + (and:SI + (match_operand:SI 3 "register_operand" " r") + (not:SI (match_dup 1)))))] + "NDS32_EXT_DSP_P ()" + "bpick\t%0, %2, %3, %1" + [(set_attr "type" "dbpick") + (set_attr "length" "4")]) + +(define_insn "bpick4" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI + (and:SI + (match_operand:SI 1 "register_operand" " r") + (match_operand:SI 2 "register_operand" " r")) + (and:SI + (not:SI (match_dup 1)) + (match_operand:SI 3 "register_operand" " r"))))] + "NDS32_EXT_DSP_P ()" + "bpick\t%0, %2, %3, %1" + [(set_attr "type" "dbpick") + (set_attr "length" "4")]) + +(define_insn "bpick5" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI + (and:SI + (match_operand:SI 1 "register_operand" " r") + (not:SI (match_operand:SI 2 "register_operand" " r"))) + (and:SI + (match_operand:SI 3 "register_operand" " r") + (match_dup 2))))] + "NDS32_EXT_DSP_P ()" + "bpick\t%0, %3, %1, %2" + [(set_attr "type" "dbpick") + (set_attr "length" "4")]) + +(define_insn "bpick6" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI + (and:SI + (not:SI (match_operand:SI 1 "register_operand" " r")) + (match_operand:SI 2 "register_operand" " r")) + (and:SI + (match_operand:SI 3 "register_operand" " r") + (match_dup 1))))] + "NDS32_EXT_DSP_P ()" + "bpick\t%0, %3, %2, %1" + [(set_attr "type" "dbpick") + (set_attr "length" "4")]) + +(define_insn "bpick7" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI + (and:SI + (match_operand:SI 1 "register_operand" " r") + (not:SI (match_operand:SI 2 "register_operand" " r"))) + (and:SI + (match_dup 2) + (match_operand:SI 3 "register_operand" " r"))))] + "NDS32_EXT_DSP_P ()" + "bpick\t%0, %3, %1, %2" + [(set_attr "type" "dbpick") + (set_attr "length" "4")]) + +(define_insn "bpick8" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI + (and:SI + (not:SI (match_operand:SI 1 "register_operand" " r")) + (match_operand:SI 2 "register_operand" " r")) + (and:SI + (match_dup 1) + (match_operand:SI 3 "register_operand" " r"))))] + "NDS32_EXT_DSP_P ()" + "bpick\t%0, %3, %2, %1" + [(set_attr "type" "dbpick") + (set_attr "length" "4")]) + +(define_insn "sraiu" + [(set (match_operand:SI 0 "register_operand" "= r, r") + (unspec:SI [(ashiftrt:SI (match_operand:SI 1 "register_operand" " r, r") + (match_operand:SI 2 "nds32_rimm5u_operand" " Iu05, r"))] + UNSPEC_ROUND))] + "NDS32_EXT_DSP_P ()" + "@ + srai.u\t%0, %1, %2 + sra.u\t%0, %1, %2" + [(set_attr "type" "daluround") + (set_attr "length" "4")]) + +(define_insn "kssl" + [(set (match_operand:SI 0 "register_operand" "= r, r") + (ss_ashift:SI (match_operand:SI 1 "register_operand" " r, r") + (match_operand:SI 2 "nds32_rimm5u_operand" " Iu05, r")))] + "NDS32_EXT_DSP_P ()" + "@ + kslli\t%0, %1, %2 + ksll\t%0, %1, %2" + [(set_attr "type" "dalu") + (set_attr "length" "4")]) + +(define_insn "kslraw_round" + [(set (match_operand:SI 0 "register_operand" "=r") + (if_then_else:SI + (lt:SI (match_operand:SI 2 "register_operand" " r") + (const_int 0)) + (unspec:SI [(ashiftrt:SI (match_operand:SI 1 "register_operand" " r") + (neg:SI (match_dup 2)))] + UNSPEC_ROUND) + (ss_ashift:SI (match_dup 1) + (match_dup 2))))] + "NDS32_EXT_DSP_P ()" + "kslraw.u\t%0, %1, %2" + [(set_attr "type" "daluround") + (set_attr "length" "4")]) + +(define_insn_and_split "di3" + [(set (match_operand:DI 0 "register_operand" "") + (shift_rotate:DI (match_operand:DI 1 "register_operand" "") + (match_operand:SI 2 "nds32_rimm6u_operand" "")))] + "NDS32_EXT_DSP_P () && !reload_completed" + "#" + "NDS32_EXT_DSP_P () && !reload_completed" + [(const_int 0)] +{ + if (REGNO (operands[0]) == REGNO (operands[1])) + { + rtx tmp = gen_reg_rtx (DImode); + nds32_split_di3 (tmp, operands[1], operands[2]); + emit_move_insn (operands[0], tmp); + } + else + nds32_split_di3 (operands[0], operands[1], operands[2]); + DONE; +}) + +(define_insn "sclip32" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_CLIPS_OV))] + "NDS32_EXT_DSP_P ()" + "sclip32\t%0, %1, %2" + [(set_attr "type" "dclip") + (set_attr "length" "4")] +) + +(define_insn "uclip32" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_CLIP_OV))] + "NDS32_EXT_DSP_P ()" + "uclip32\t%0, %1, %2" + [(set_attr "type" "dclip") + (set_attr "length" "4")] +) + +(define_insn "bitrev" + [(set (match_operand:SI 0 "register_operand" "=r, r") + (unspec:SI [(match_operand:SI 1 "register_operand" " r, r") + (match_operand:SI 2 "nds32_rimm5u_operand" " r, Iu05")] + UNSPEC_BITREV))] + "" + "@ + bitrev\t%0, %1, %2 + bitrevi\t%0, %1, %2" + [(set_attr "type" "dalu") + (set_attr "length" "4")] +) + +;; wext, wexti +(define_insn "wext" + [(set (match_operand:SI 0 "register_operand" "=r, r") + (truncate:SI + (shiftrt:DI + (match_operand:DI 1 "register_operand" " r, r") + (match_operand:SI 2 "nds32_rimm5u_operand" " r,Iu05"))))] + "NDS32_EXT_DSP_P ()" + "@ + wext\t%0, %1, %2 + wexti\t%0, %1, %2" + [(set_attr "type" "dwext") + (set_attr "length" "4")]) + +;; 32-bit add/sub instruction: raddw and rsubw. +(define_insn "rsi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (truncate:SI + (ashiftrt:DI + (plus_minus:DI + (sign_extend:DI (match_operand:SI 1 "register_operand" " r")) + (sign_extend:DI (match_operand:SI 2 "register_operand" " r"))) + (const_int 1))))] + "NDS32_EXT_DSP_P ()" + "rw\t%0, %1, %2" + [(set_attr "type" "dalu") + (set_attr "length" "4")]) + +;; 32-bit add/sub instruction: uraddw and ursubw. +(define_insn "ursi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (truncate:SI + (lshiftrt:DI + (plus_minus:DI + (zero_extend:DI (match_operand:SI 1 "register_operand" " r")) + (zero_extend:DI (match_operand:SI 2 "register_operand" " r"))) + (const_int 1))))] + "NDS32_EXT_DSP_P ()" + "urw\t%0, %1, %2" + [(set_attr "type" "dalu") + (set_attr "length" "4")]) diff --git a/gcc/config/nds32/nds32-intrinsic.c b/gcc/config/nds32/nds32-intrinsic.c index b9bb2d9..c2ad927b 100644 --- a/gcc/config/nds32/nds32-intrinsic.c +++ b/gcc/config/nds32/nds32-intrinsic.c @@ -519,6 +519,7 @@ static struct builtin_description bdesc_noarg[] = { NDS32_BUILTIN(unspec_fmfcfg, "fmfcfg", FMFCFG) NDS32_BUILTIN(unspec_fmfcsr, "fmfcsr", FMFCSR) + NDS32_BUILTIN(unspec_volatile_rdov, "rdov", RDOV) NDS32_BUILTIN(unspec_get_current_sp, "get_current_sp", GET_CURRENT_SP) NDS32_BUILTIN(unspec_return_address, "return_address", RETURN_ADDRESS) NDS32_BUILTIN(unspec_get_all_pending_int, "get_all_pending_int", @@ -558,6 +559,31 @@ static struct builtin_description bdesc_1arg[] = NDS32_NO_TARGET_BUILTIN(unspec_ret_itoff, "ret_itoff", RET_ITOFF) NDS32_NO_TARGET_BUILTIN(unspec_set_current_sp, "set_current_sp", SET_CURRENT_SP) + NDS32_BUILTIN(kabsv2hi2, "kabs16", KABS16) + NDS32_BUILTIN(kabsv2hi2, "v_kabs16", V_KABS16) + NDS32_BUILTIN(kabsv4qi2, "kabs8", KABS8) + NDS32_BUILTIN(kabsv4qi2, "v_kabs8", V_KABS8) + NDS32_BUILTIN(sunpkd810, "sunpkd810", SUNPKD810) + NDS32_BUILTIN(sunpkd810, "v_sunpkd810", V_SUNPKD810) + NDS32_BUILTIN(sunpkd820, "sunpkd820", SUNPKD820) + NDS32_BUILTIN(sunpkd820, "v_sunpkd820", V_SUNPKD820) + NDS32_BUILTIN(sunpkd830, "sunpkd830", SUNPKD830) + NDS32_BUILTIN(sunpkd830, "v_sunpkd830", V_SUNPKD830) + NDS32_BUILTIN(sunpkd831, "sunpkd831", SUNPKD831) + NDS32_BUILTIN(sunpkd831, "v_sunpkd831", V_SUNPKD831) + NDS32_BUILTIN(zunpkd810, "zunpkd810", ZUNPKD810) + NDS32_BUILTIN(zunpkd810, "v_zunpkd810", V_ZUNPKD810) + NDS32_BUILTIN(zunpkd820, "zunpkd820", ZUNPKD820) + NDS32_BUILTIN(zunpkd820, "v_zunpkd820", V_ZUNPKD820) + NDS32_BUILTIN(zunpkd830, "zunpkd830", ZUNPKD830) + NDS32_BUILTIN(zunpkd830, "v_zunpkd830", V_ZUNPKD830) + NDS32_BUILTIN(zunpkd831, "zunpkd831", ZUNPKD831) + NDS32_BUILTIN(zunpkd831, "v_zunpkd831", V_ZUNPKD831) + NDS32_BUILTIN(unspec_kabs, "kabs", KABS) + NDS32_BUILTIN(unaligned_loadv2hi, "get_unaligned_u16x2", UALOAD_U16) + NDS32_BUILTIN(unaligned_loadv2hi, "get_unaligned_s16x2", UALOAD_S16) + NDS32_BUILTIN(unaligned_loadv4qi, "get_unaligned_u8x4", UALOAD_U8) + NDS32_BUILTIN(unaligned_loadv4qi, "get_unaligned_s8x4", UALOAD_S8) }; /* Intrinsics that take just one argument. and the argument is immediate. */ @@ -593,6 +619,28 @@ static struct builtin_description bdesc_2arg[] = NDS32_BUILTIN(unspec_ffb, "ffb", FFB) NDS32_BUILTIN(unspec_ffmism, "ffmsim", FFMISM) NDS32_BUILTIN(unspec_flmism, "flmism", FLMISM) + NDS32_BUILTIN(unspec_kaddw, "kaddw", KADDW) + NDS32_BUILTIN(unspec_kaddh, "kaddh", KADDH) + NDS32_BUILTIN(unspec_ksubw, "ksubw", KSUBW) + NDS32_BUILTIN(unspec_ksubh, "ksubh", KSUBH) + NDS32_BUILTIN(unspec_kdmbb, "kdmbb", KDMBB) + NDS32_BUILTIN(unspec_kdmbb, "v_kdmbb", V_KDMBB) + NDS32_BUILTIN(unspec_kdmbt, "kdmbt", KDMBT) + NDS32_BUILTIN(unspec_kdmbt, "v_kdmbt", V_KDMBT) + NDS32_BUILTIN(unspec_kdmtb, "kdmtb", KDMTB) + NDS32_BUILTIN(unspec_kdmtb, "v_kdmtb", V_KDMTB) + NDS32_BUILTIN(unspec_kdmtt, "kdmtt", KDMTT) + NDS32_BUILTIN(unspec_kdmtt, "v_kdmtt", V_KDMTT) + NDS32_BUILTIN(unspec_khmbb, "khmbb", KHMBB) + NDS32_BUILTIN(unspec_khmbb, "v_khmbb", V_KHMBB) + NDS32_BUILTIN(unspec_khmbt, "khmbt", KHMBT) + NDS32_BUILTIN(unspec_khmbt, "v_khmbt", V_KHMBT) + NDS32_BUILTIN(unspec_khmtb, "khmtb", KHMTB) + NDS32_BUILTIN(unspec_khmtb, "v_khmtb", V_KHMTB) + NDS32_BUILTIN(unspec_khmtt, "khmtt", KHMTT) + NDS32_BUILTIN(unspec_khmtt, "v_khmtt", V_KHMTT) + NDS32_BUILTIN(unspec_kslraw, "kslraw", KSLRAW) + NDS32_BUILTIN(unspec_kslrawu, "kslraw_u", KSLRAW_U) NDS32_BUILTIN(rotrsi3, "rotr", ROTR) NDS32_BUILTIN(unspec_sva, "sva", SVA) NDS32_BUILTIN(unspec_svs, "svs", SVS) @@ -603,7 +651,202 @@ static struct builtin_description bdesc_2arg[] = NDS32_NO_TARGET_BUILTIN(unaligned_store_hw, "unaligned_store_hw", UASTORE_HW) NDS32_NO_TARGET_BUILTIN(unaligned_storesi, "unaligned_store_hw", UASTORE_W) NDS32_NO_TARGET_BUILTIN(unaligned_storedi, "unaligned_store_hw", UASTORE_DW) - + NDS32_BUILTIN(addv2hi3, "add16", ADD16) + NDS32_BUILTIN(addv2hi3, "v_uadd16", V_UADD16) + NDS32_BUILTIN(addv2hi3, "v_sadd16", V_SADD16) + NDS32_BUILTIN(raddv2hi3, "radd16", RADD16) + NDS32_BUILTIN(raddv2hi3, "v_radd16", V_RADD16) + NDS32_BUILTIN(uraddv2hi3, "uradd16", URADD16) + NDS32_BUILTIN(uraddv2hi3, "v_uradd16", V_URADD16) + NDS32_BUILTIN(kaddv2hi3, "kadd16", KADD16) + NDS32_BUILTIN(kaddv2hi3, "v_kadd16", V_KADD16) + NDS32_BUILTIN(ukaddv2hi3, "ukadd16", UKADD16) + NDS32_BUILTIN(ukaddv2hi3, "v_ukadd16", V_UKADD16) + NDS32_BUILTIN(subv2hi3, "sub16", SUB16) + NDS32_BUILTIN(subv2hi3, "v_usub16", V_USUB16) + NDS32_BUILTIN(subv2hi3, "v_ssub16", V_SSUB16) + NDS32_BUILTIN(rsubv2hi3, "rsub16", RSUB16) + NDS32_BUILTIN(rsubv2hi3, "v_rsub16", V_RSUB16) + NDS32_BUILTIN(ursubv2hi3, "ursub16", URSUB16) + NDS32_BUILTIN(ursubv2hi3, "v_ursub16", V_URSUB16) + NDS32_BUILTIN(ksubv2hi3, "ksub16", KSUB16) + NDS32_BUILTIN(ksubv2hi3, "v_ksub16", V_KSUB16) + NDS32_BUILTIN(uksubv2hi3, "uksub16", UKSUB16) + NDS32_BUILTIN(uksubv2hi3, "v_uksub16", V_UKSUB16) + NDS32_BUILTIN(cras16_1, "cras16", CRAS16) + NDS32_BUILTIN(cras16_1, "v_ucras16", V_UCRAS16) + NDS32_BUILTIN(cras16_1, "v_scras16", V_SCRAS16) + NDS32_BUILTIN(rcras16_1, "rcras16", RCRAS16) + NDS32_BUILTIN(rcras16_1, "v_rcras16", V_RCRAS16) + NDS32_BUILTIN(urcras16_1, "urcras16", URCRAS16) + NDS32_BUILTIN(urcras16_1, "v_urcras16", V_URCRAS16) + NDS32_BUILTIN(kcras16_1, "kcras16", KCRAS16) + NDS32_BUILTIN(kcras16_1, "v_kcras16", V_KCRAS16) + NDS32_BUILTIN(ukcras16_1, "ukcras16", UKCRAS16) + NDS32_BUILTIN(ukcras16_1, "v_ukcras16", V_UKCRAS16) + NDS32_BUILTIN(crsa16_1, "crsa16", CRSA16) + NDS32_BUILTIN(crsa16_1, "v_ucrsa16", V_UCRSA16) + NDS32_BUILTIN(crsa16_1, "v_scrsa16", V_SCRSA16) + NDS32_BUILTIN(rcrsa16_1, "rcrsa16", RCRSA16) + NDS32_BUILTIN(rcrsa16_1, "v_rcrsa16", V_RCRSA16) + NDS32_BUILTIN(urcrsa16_1, "urcrsa16", URCRSA16) + NDS32_BUILTIN(urcrsa16_1, "v_urcrsa16", V_URCRSA16) + NDS32_BUILTIN(kcrsa16_1, "kcrsa16", KCRSA16) + NDS32_BUILTIN(kcrsa16_1, "v_kcrsa16", V_KCRSA16) + NDS32_BUILTIN(ukcrsa16_1, "ukcrsa16", UKCRSA16) + NDS32_BUILTIN(ukcrsa16_1, "v_ukcrsa16", V_UKCRSA16) + NDS32_BUILTIN(addv4qi3, "add8", ADD8) + NDS32_BUILTIN(addv4qi3, "v_uadd8", V_UADD8) + NDS32_BUILTIN(addv4qi3, "v_sadd8", V_SADD8) + NDS32_BUILTIN(raddv4qi3, "radd8", RADD8) + NDS32_BUILTIN(raddv4qi3, "v_radd8", V_RADD8) + NDS32_BUILTIN(uraddv4qi3, "uradd8", URADD8) + NDS32_BUILTIN(uraddv4qi3, "v_uradd8", V_URADD8) + NDS32_BUILTIN(kaddv4qi3, "kadd8", KADD8) + NDS32_BUILTIN(kaddv4qi3, "v_kadd8", V_KADD8) + NDS32_BUILTIN(ukaddv4qi3, "ukadd8", UKADD8) + NDS32_BUILTIN(ukaddv4qi3, "v_ukadd8", V_UKADD8) + NDS32_BUILTIN(subv4qi3, "sub8", SUB8) + NDS32_BUILTIN(subv4qi3, "v_usub8", V_USUB8) + NDS32_BUILTIN(subv4qi3, "v_ssub8", V_SSUB8) + NDS32_BUILTIN(rsubv4qi3, "rsub8", RSUB8) + NDS32_BUILTIN(rsubv4qi3, "v_rsub8", V_RSUB8) + NDS32_BUILTIN(ursubv4qi3, "ursub8", URSUB8) + NDS32_BUILTIN(ursubv4qi3, "v_ursub8", V_URSUB8) + NDS32_BUILTIN(ksubv4qi3, "ksub8", KSUB8) + NDS32_BUILTIN(ksubv4qi3, "v_ksub8", V_KSUB8) + NDS32_BUILTIN(uksubv4qi3, "uksub8", UKSUB8) + NDS32_BUILTIN(uksubv4qi3, "v_uksub8", V_UKSUB8) + NDS32_BUILTIN(ashrv2hi3, "sra16", SRA16) + NDS32_BUILTIN(ashrv2hi3, "v_sra16", V_SRA16) + NDS32_BUILTIN(sra16_round, "sra16_u", SRA16_U) + NDS32_BUILTIN(sra16_round, "v_sra16_u", V_SRA16_U) + NDS32_BUILTIN(lshrv2hi3, "srl16", SRL16) + NDS32_BUILTIN(lshrv2hi3, "v_srl16", V_SRL16) + NDS32_BUILTIN(srl16_round, "srl16_u", SRL16_U) + NDS32_BUILTIN(srl16_round, "v_srl16_u", V_SRL16_U) + NDS32_BUILTIN(ashlv2hi3, "sll16", SLL16) + NDS32_BUILTIN(ashlv2hi3, "v_sll16", V_SLL16) + NDS32_BUILTIN(kslli16, "ksll16", KSLL16) + NDS32_BUILTIN(kslli16, "v_ksll16", V_KSLL16) + NDS32_BUILTIN(kslra16, "kslra16", KSLRA16) + NDS32_BUILTIN(kslra16, "v_kslra16", V_KSLRA16) + NDS32_BUILTIN(kslra16_round, "kslra16_u", KSLRA16_U) + NDS32_BUILTIN(kslra16_round, "v_kslra16_u", V_KSLRA16_U) + NDS32_BUILTIN(cmpeq16, "cmpeq16", CMPEQ16) + NDS32_BUILTIN(cmpeq16, "v_scmpeq16", V_SCMPEQ16) + NDS32_BUILTIN(cmpeq16, "v_ucmpeq16", V_UCMPEQ16) + NDS32_BUILTIN(scmplt16, "scmplt16", SCMPLT16) + NDS32_BUILTIN(scmplt16, "v_scmplt16", V_SCMPLT16) + NDS32_BUILTIN(scmple16, "scmple16", SCMPLE16) + NDS32_BUILTIN(scmple16, "v_scmple16", V_SCMPLE16) + NDS32_BUILTIN(ucmplt16, "ucmplt16", UCMPLT16) + NDS32_BUILTIN(ucmplt16, "v_ucmplt16", V_UCMPLT16) + NDS32_BUILTIN(ucmplt16, "ucmple16", UCMPLE16) + NDS32_BUILTIN(ucmplt16, "v_ucmple16", V_UCMPLE16) + NDS32_BUILTIN(cmpeq8, "cmpeq8", CMPEQ8) + NDS32_BUILTIN(cmpeq8, "v_scmpeq8", V_SCMPEQ8) + NDS32_BUILTIN(cmpeq8, "v_ucmpeq8", V_UCMPEQ8) + NDS32_BUILTIN(scmplt8, "scmplt8", SCMPLT8) + NDS32_BUILTIN(scmplt8, "v_scmplt8", V_SCMPLT8) + NDS32_BUILTIN(scmple8, "scmple8", SCMPLE8) + NDS32_BUILTIN(scmple8, "v_scmple8", V_SCMPLE8) + NDS32_BUILTIN(ucmplt8, "ucmplt8", UCMPLT8) + NDS32_BUILTIN(ucmplt8, "v_ucmplt8", V_UCMPLT8) + NDS32_BUILTIN(ucmplt8, "ucmple8", UCMPLE8) + NDS32_BUILTIN(ucmplt8, "v_ucmple8", V_UCMPLE8) + NDS32_BUILTIN(sminv2hi3, "smin16", SMIN16) + NDS32_BUILTIN(sminv2hi3, "v_smin16", V_SMIN16) + NDS32_BUILTIN(uminv2hi3, "umin16", UMIN16) + NDS32_BUILTIN(uminv2hi3, "v_umin16", V_UMIN16) + NDS32_BUILTIN(smaxv2hi3, "smax16", SMAX16) + NDS32_BUILTIN(smaxv2hi3, "v_smax16", V_SMAX16) + NDS32_BUILTIN(umaxv2hi3, "umax16", UMAX16) + NDS32_BUILTIN(umaxv2hi3, "v_umax16", V_UMAX16) + NDS32_BUILTIN(khm16, "khm16", KHM16) + NDS32_BUILTIN(khm16, "v_khm16", V_KHM16) + NDS32_BUILTIN(khmx16, "khmx16", KHMX16) + NDS32_BUILTIN(khmx16, "v_khmx16", V_KHMX16) + NDS32_BUILTIN(sminv4qi3, "smin8", SMIN8) + NDS32_BUILTIN(sminv4qi3, "v_smin8", V_SMIN8) + NDS32_BUILTIN(uminv4qi3, "umin8", UMIN8) + NDS32_BUILTIN(uminv4qi3, "v_umin8", V_UMIN8) + NDS32_BUILTIN(smaxv4qi3, "smax8", SMAX8) + NDS32_BUILTIN(smaxv4qi3, "v_smax8", V_SMAX8) + NDS32_BUILTIN(umaxv4qi3, "umax8", UMAX8) + NDS32_BUILTIN(umaxv4qi3, "v_umax8", V_UMAX8) + NDS32_BUILTIN(raddsi3, "raddw", RADDW) + NDS32_BUILTIN(uraddsi3, "uraddw", URADDW) + NDS32_BUILTIN(rsubsi3, "rsubw", RSUBW) + NDS32_BUILTIN(ursubsi3, "ursubw", URSUBW) + NDS32_BUILTIN(sraiu, "sra_u", SRA_U) + NDS32_BUILTIN(kssl, "ksll", KSLL) + NDS32_BUILTIN(pkbb, "pkbb16", PKBB16) + NDS32_BUILTIN(pkbb, "v_pkbb16", V_PKBB16) + NDS32_BUILTIN(pkbt, "pkbt16", PKBT16) + NDS32_BUILTIN(pkbt, "v_pkbt16", V_PKBT16) + NDS32_BUILTIN(pktb, "pktb16", PKTB16) + NDS32_BUILTIN(pktb, "v_pktb16", V_PKTB16) + NDS32_BUILTIN(pktt, "pktt16", PKTT16) + NDS32_BUILTIN(pktt, "v_pktt16", V_PKTT16) + NDS32_BUILTIN(smulsi3_highpart, "smmul", SMMUL) + NDS32_BUILTIN(smmul_round, "smmul_u", SMMUL_U) + NDS32_BUILTIN(smmwb, "smmwb", SMMWB) + NDS32_BUILTIN(smmwb, "v_smmwb", V_SMMWB) + NDS32_BUILTIN(smmwb_round, "smmwb_u", SMMWB_U) + NDS32_BUILTIN(smmwb_round, "v_smmwb_u", V_SMMWB_U) + NDS32_BUILTIN(smmwt, "smmwt", SMMWT) + NDS32_BUILTIN(smmwt, "v_smmwt", V_SMMWT) + NDS32_BUILTIN(smmwt_round, "smmwt_u", SMMWT_U) + NDS32_BUILTIN(smmwt_round, "v_smmwt_u", V_SMMWT_U) + NDS32_BUILTIN(smbb, "smbb", SMBB) + NDS32_BUILTIN(smbb, "v_smbb", V_SMBB) + NDS32_BUILTIN(smbt, "smbt", SMBT) + NDS32_BUILTIN(smbt, "v_smbt", V_SMBT) + NDS32_BUILTIN(smtt, "smtt", SMTT) + NDS32_BUILTIN(smtt, "v_smtt", V_SMTT) + NDS32_BUILTIN(kmda, "kmda", KMDA) + NDS32_BUILTIN(kmda, "v_kmda", V_KMDA) + NDS32_BUILTIN(kmxda, "kmxda", KMXDA) + NDS32_BUILTIN(kmxda, "v_kmxda", V_KMXDA) + NDS32_BUILTIN(smds, "smds", SMDS) + NDS32_BUILTIN(smds, "v_smds", V_SMDS) + NDS32_BUILTIN(smdrs, "smdrs", SMDRS) + NDS32_BUILTIN(smdrs, "v_smdrs", V_SMDRS) + NDS32_BUILTIN(smxdsv, "smxds", SMXDS) + NDS32_BUILTIN(smxdsv, "v_smxds", V_SMXDS) + NDS32_BUILTIN(smal1, "smal", SMAL) + NDS32_BUILTIN(smal1, "v_smal", V_SMAL) + NDS32_BUILTIN(bitrev, "bitrev", BITREV) + NDS32_BUILTIN(wext, "wext", WEXT) + NDS32_BUILTIN(adddi3, "sadd64", SADD64) + NDS32_BUILTIN(adddi3, "uadd64", UADD64) + NDS32_BUILTIN(radddi3, "radd64", RADD64) + NDS32_BUILTIN(uradddi3, "uradd64", URADD64) + NDS32_BUILTIN(kadddi3, "kadd64", KADD64) + NDS32_BUILTIN(ukadddi3, "ukadd64", UKADD64) + NDS32_BUILTIN(subdi3, "ssub64", SSUB64) + NDS32_BUILTIN(subdi3, "usub64", USUB64) + NDS32_BUILTIN(rsubdi3, "rsub64", RSUB64) + NDS32_BUILTIN(ursubdi3, "ursub64", URSUB64) + NDS32_BUILTIN(ksubdi3, "ksub64", KSUB64) + NDS32_BUILTIN(uksubdi3, "uksub64", UKSUB64) + NDS32_BUILTIN(smul16, "smul16", SMUL16) + NDS32_BUILTIN(smul16, "v_smul16", V_SMUL16) + NDS32_BUILTIN(smulx16, "smulx16", SMULX16) + NDS32_BUILTIN(smulx16, "v_smulx16", V_SMULX16) + NDS32_BUILTIN(umul16, "umul16", UMUL16) + NDS32_BUILTIN(umul16, "v_umul16", V_UMUL16) + NDS32_BUILTIN(umulx16, "umulx16", UMULX16) + NDS32_BUILTIN(umulx16, "v_umulx16", V_UMULX16) + NDS32_BUILTIN(kwmmul, "kwmmul", KWMMUL) + NDS32_BUILTIN(kwmmul_round, "kwmmul_u", KWMMUL_U) + NDS32_NO_TARGET_BUILTIN(unaligned_storev2hi, + "put_unaligned_u16x2", UASTORE_U16) + NDS32_NO_TARGET_BUILTIN(unaligned_storev2hi, + "put_unaligned_s16x2", UASTORE_S16) + NDS32_NO_TARGET_BUILTIN(unaligned_storev4qi, "put_unaligned_u8x4", UASTORE_U8) + NDS32_NO_TARGET_BUILTIN(unaligned_storev4qi, "put_unaligned_s8x4", UASTORE_S8) }; /* Two-argument intrinsics with an immediate second argument. */ @@ -617,6 +860,22 @@ static struct builtin_description bdesc_2argimm[] = NDS32_BUILTIN(unspec_clips, "clips", CLIPS) NDS32_NO_TARGET_BUILTIN(unspec_teqz, "teqz", TEQZ) NDS32_NO_TARGET_BUILTIN(unspec_tnez, "tnez", TNEZ) + NDS32_BUILTIN(ashrv2hi3, "srl16", SRL16) + NDS32_BUILTIN(ashrv2hi3, "v_srl16", V_SRL16) + NDS32_BUILTIN(srl16_round, "srl16_u", SRL16_U) + NDS32_BUILTIN(srl16_round, "v_srl16_u", V_SRL16_U) + NDS32_BUILTIN(kslli16, "ksll16", KSLL16) + NDS32_BUILTIN(kslli16, "v_ksll16", V_KSLL16) + NDS32_BUILTIN(sclip16, "sclip16", SCLIP16) + NDS32_BUILTIN(sclip16, "v_sclip16", V_SCLIP16) + NDS32_BUILTIN(uclip16, "uclip16", UCLIP16) + NDS32_BUILTIN(uclip16, "v_uclip16", V_UCLIP16) + NDS32_BUILTIN(sraiu, "sra_u", SRA_U) + NDS32_BUILTIN(kssl, "ksll", KSLL) + NDS32_BUILTIN(bitrev, "bitrev", BITREV) + NDS32_BUILTIN(wext, "wext", WEXT) + NDS32_BUILTIN(uclip32, "uclip32", UCLIP32) + NDS32_BUILTIN(sclip32, "sclip32", SCLIP32) }; /* Intrinsics that take three arguments. */ @@ -625,6 +884,67 @@ static struct builtin_description bdesc_3arg[] = NDS32_BUILTIN(unspec_pbsada, "pbsada", PBSADA) NDS32_NO_TARGET_BUILTIN(bse, "bse", BSE) NDS32_NO_TARGET_BUILTIN(bsp, "bsp", BSP) + NDS32_BUILTIN(kmabb, "kmabb", KMABB) + NDS32_BUILTIN(kmabb, "v_kmabb", V_KMABB) + NDS32_BUILTIN(kmabt, "kmabt", KMABT) + NDS32_BUILTIN(kmabt, "v_kmabt", V_KMABT) + NDS32_BUILTIN(kmatt, "kmatt", KMATT) + NDS32_BUILTIN(kmatt, "v_kmatt", V_KMATT) + NDS32_BUILTIN(kmada, "kmada", KMADA) + NDS32_BUILTIN(kmada, "v_kmada", V_KMADA) + NDS32_BUILTIN(kmaxda, "kmaxda", KMAXDA) + NDS32_BUILTIN(kmaxda, "v_kmaxda", V_KMAXDA) + NDS32_BUILTIN(kmads, "kmads", KMADS) + NDS32_BUILTIN(kmads, "v_kmads", V_KMADS) + NDS32_BUILTIN(kmadrs, "kmadrs", KMADRS) + NDS32_BUILTIN(kmadrs, "v_kmadrs", V_KMADRS) + NDS32_BUILTIN(kmaxds, "kmaxds", KMAXDS) + NDS32_BUILTIN(kmaxds, "v_kmaxds", V_KMAXDS) + NDS32_BUILTIN(kmsda, "kmsda", KMSDA) + NDS32_BUILTIN(kmsda, "v_kmsda", V_KMSDA) + NDS32_BUILTIN(kmsxda, "kmsxda", KMSXDA) + NDS32_BUILTIN(kmsxda, "v_kmsxda", V_KMSXDA) + NDS32_BUILTIN(bpick1, "bpick", BPICK) + NDS32_BUILTIN(smar64_1, "smar64", SMAR64) + NDS32_BUILTIN(smsr64, "smsr64", SMSR64) + NDS32_BUILTIN(umar64_1, "umar64", UMAR64) + NDS32_BUILTIN(umsr64, "umsr64", UMSR64) + NDS32_BUILTIN(kmar64_1, "kmar64", KMAR64) + NDS32_BUILTIN(kmsr64, "kmsr64", KMSR64) + NDS32_BUILTIN(ukmar64_1, "ukmar64", UKMAR64) + NDS32_BUILTIN(ukmsr64, "ukmsr64", UKMSR64) + NDS32_BUILTIN(smalbb, "smalbb", SMALBB) + NDS32_BUILTIN(smalbb, "v_smalbb", V_SMALBB) + NDS32_BUILTIN(smalbt, "smalbt", SMALBT) + NDS32_BUILTIN(smalbt, "v_smalbt", V_SMALBT) + NDS32_BUILTIN(smaltt, "smaltt", SMALTT) + NDS32_BUILTIN(smaltt, "v_smaltt", V_SMALTT) + NDS32_BUILTIN(smalda1, "smalda", SMALDA) + NDS32_BUILTIN(smalda1, "v_smalda", V_SMALDA) + NDS32_BUILTIN(smalxda1, "smalxda", SMALXDA) + NDS32_BUILTIN(smalxda1, "v_smalxda", V_SMALXDA) + NDS32_BUILTIN(smalds1, "smalds", SMALDS) + NDS32_BUILTIN(smalds1, "v_smalds", V_SMALDS) + NDS32_BUILTIN(smaldrs3, "smaldrs", SMALDRS) + NDS32_BUILTIN(smaldrs3, "v_smaldrs", V_SMALDRS) + NDS32_BUILTIN(smalxds1, "smalxds", SMALXDS) + NDS32_BUILTIN(smalxds1, "v_smalxds", V_SMALXDS) + NDS32_BUILTIN(smslda1, "smslda", SMSLDA) + NDS32_BUILTIN(smslda1, "v_smslda", V_SMSLDA) + NDS32_BUILTIN(smslxda1, "smslxda", SMSLXDA) + NDS32_BUILTIN(smslxda1, "v_smslxda", V_SMSLXDA) + NDS32_BUILTIN(kmmawb, "kmmawb", KMMAWB) + NDS32_BUILTIN(kmmawb, "v_kmmawb", V_KMMAWB) + NDS32_BUILTIN(kmmawb_round, "kmmawb_u", KMMAWB_U) + NDS32_BUILTIN(kmmawb_round, "v_kmmawb_u", V_KMMAWB_U) + NDS32_BUILTIN(kmmawt, "kmmawt", KMMAWT) + NDS32_BUILTIN(kmmawt, "v_kmmawt", V_KMMAWT) + NDS32_BUILTIN(kmmawt_round, "kmmawt_u", KMMAWT_U) + NDS32_BUILTIN(kmmawt_round, "v_kmmawt_u", V_KMMAWT_U) + NDS32_BUILTIN(kmmac, "kmmac", KMMAC) + NDS32_BUILTIN(kmmac_round, "kmmac_u", KMMAC_U) + NDS32_BUILTIN(kmmsb, "kmmsb", KMMSB) + NDS32_BUILTIN(kmmsb_round, "kmmsb_u", KMMSB_U) }; /* Three-argument intrinsics with an immediate third argument. */ @@ -634,6 +954,7 @@ static struct builtin_description bdesc_3argimm[] = NDS32_NO_TARGET_BUILTIN(prefetch_hw, "prefetch_hw", DPREF_HW) NDS32_NO_TARGET_BUILTIN(prefetch_w, "prefetch_w", DPREF_W) NDS32_NO_TARGET_BUILTIN(prefetch_dw, "prefetch_dw", DPREF_DW) + NDS32_BUILTIN(insb, "insb", INSB) }; /* Intrinsics that load a value. */ @@ -676,6 +997,11 @@ nds32_expand_builtin_impl (tree exp, unsigned i; struct builtin_description *d; + if (!NDS32_EXT_DSP_P () + && fcode > NDS32_BUILTIN_DSP_BEGIN + && fcode < NDS32_BUILTIN_DSP_END) + error ("don't support DSP extension instructions"); + switch (fcode) { /* FPU Register Transfer. */ @@ -812,6 +1138,9 @@ nds32_expand_builtin_impl (tree exp, case NDS32_BUILTIN_CCTL_L1D_WBALL_ONE_LVL: emit_insn (gen_cctl_l1d_wball_one_lvl()); return target; + case NDS32_BUILTIN_CLROV: + emit_insn (gen_unspec_volatile_clrov ()); + return target; case NDS32_BUILTIN_STANDBY_NO_WAKE_GRANT: emit_insn (gen_unspec_standby_no_wake_grant ()); return target; @@ -947,10 +1276,18 @@ nds32_init_builtins_impl (void) NDS32_BUILTIN_ ## CODE, BUILT_IN_MD, NULL, NULL_TREE) /* Looking for return type and argument can be found in tree.h file. */ + tree ptr_char_type_node = build_pointer_type (char_type_node); tree ptr_uchar_type_node = build_pointer_type (unsigned_char_type_node); tree ptr_ushort_type_node = build_pointer_type (short_unsigned_type_node); + tree ptr_short_type_node = build_pointer_type (short_integer_type_node); tree ptr_uint_type_node = build_pointer_type (unsigned_type_node); tree ptr_ulong_type_node = build_pointer_type (long_long_unsigned_type_node); + tree v4qi_type_node = build_vector_type (intQI_type_node, 4); + tree u_v4qi_type_node = build_vector_type (unsigned_intQI_type_node, 4); + tree v2hi_type_node = build_vector_type (intHI_type_node, 2); + tree u_v2hi_type_node = build_vector_type (unsigned_intHI_type_node, 2); + tree v2si_type_node = build_vector_type (intSI_type_node, 2); + tree u_v2si_type_node = build_vector_type (unsigned_intSI_type_node, 2); /* Cache. */ ADD_NDS32_BUILTIN1 ("isync", void, ptr_uint, ISYNC); @@ -1050,6 +1387,31 @@ nds32_init_builtins_impl (void) ADD_NDS32_BUILTIN2 ("se_ffmism", integer, unsigned, unsigned, FFMISM); ADD_NDS32_BUILTIN2 ("se_flmism", integer, unsigned, unsigned, FLMISM); + /* SATURATION */ + ADD_NDS32_BUILTIN2 ("kaddw", integer, integer, integer, KADDW); + ADD_NDS32_BUILTIN2 ("ksubw", integer, integer, integer, KSUBW); + ADD_NDS32_BUILTIN2 ("kaddh", integer, integer, integer, KADDH); + ADD_NDS32_BUILTIN2 ("ksubh", integer, integer, integer, KSUBH); + ADD_NDS32_BUILTIN2 ("kdmbb", integer, unsigned, unsigned, KDMBB); + ADD_NDS32_BUILTIN2 ("v_kdmbb", integer, v2hi, v2hi, V_KDMBB); + ADD_NDS32_BUILTIN2 ("kdmbt", integer, unsigned, unsigned, KDMBT); + ADD_NDS32_BUILTIN2 ("v_kdmbt", integer, v2hi, v2hi, V_KDMBT); + ADD_NDS32_BUILTIN2 ("kdmtb", integer, unsigned, unsigned, KDMTB); + ADD_NDS32_BUILTIN2 ("v_kdmtb", integer, v2hi, v2hi, V_KDMTB); + ADD_NDS32_BUILTIN2 ("kdmtt", integer, unsigned, unsigned, KDMTT); + ADD_NDS32_BUILTIN2 ("v_kdmtt", integer, v2hi, v2hi, V_KDMTT); + ADD_NDS32_BUILTIN2 ("khmbb", integer, unsigned, unsigned, KHMBB); + ADD_NDS32_BUILTIN2 ("v_khmbb", integer, v2hi, v2hi, V_KHMBB); + ADD_NDS32_BUILTIN2 ("khmbt", integer, unsigned, unsigned, KHMBT); + ADD_NDS32_BUILTIN2 ("v_khmbt", integer, v2hi, v2hi, V_KHMBT); + ADD_NDS32_BUILTIN2 ("khmtb", integer, unsigned, unsigned, KHMTB); + ADD_NDS32_BUILTIN2 ("v_khmtb", integer, v2hi, v2hi, V_KHMTB); + ADD_NDS32_BUILTIN2 ("khmtt", integer, unsigned, unsigned, KHMTT); + ADD_NDS32_BUILTIN2 ("v_khmtt", integer, v2hi, v2hi, V_KHMTT); + ADD_NDS32_BUILTIN2 ("kslraw", integer, integer, integer, KSLRAW); + ADD_NDS32_BUILTIN2 ("kslraw_u", integer, integer, integer, KSLRAW_U); + ADD_NDS32_BUILTIN0 ("rdov", unsigned, RDOV); + ADD_NDS32_BUILTIN0 ("clrov", void, CLROV); /* ROTR */ ADD_NDS32_BUILTIN2 ("rotr", unsigned, unsigned, unsigned, ROTR); @@ -1109,4 +1471,384 @@ nds32_init_builtins_impl (void) ADD_NDS32_BUILTIN0 ("enable_unaligned", void, ENABLE_UNALIGNED); ADD_NDS32_BUILTIN0 ("disable_unaligned", void, DISABLE_UNALIGNED); + /* DSP Extension: SIMD 16bit Add and Subtract. */ + ADD_NDS32_BUILTIN2 ("add16", unsigned, unsigned, unsigned, ADD16); + ADD_NDS32_BUILTIN2 ("v_uadd16", u_v2hi, u_v2hi, u_v2hi, V_UADD16); + ADD_NDS32_BUILTIN2 ("v_sadd16", v2hi, v2hi, v2hi, V_SADD16); + ADD_NDS32_BUILTIN2 ("radd16", unsigned, unsigned, unsigned, RADD16); + ADD_NDS32_BUILTIN2 ("v_radd16", v2hi, v2hi, v2hi, V_RADD16); + ADD_NDS32_BUILTIN2 ("uradd16", unsigned, unsigned, unsigned, URADD16); + ADD_NDS32_BUILTIN2 ("v_uradd16", u_v2hi, u_v2hi, u_v2hi, V_URADD16); + ADD_NDS32_BUILTIN2 ("kadd16", unsigned, unsigned, unsigned, KADD16); + ADD_NDS32_BUILTIN2 ("v_kadd16", v2hi, v2hi, v2hi, V_KADD16); + ADD_NDS32_BUILTIN2 ("ukadd16", unsigned, unsigned, unsigned, UKADD16); + ADD_NDS32_BUILTIN2 ("v_ukadd16", u_v2hi, u_v2hi, u_v2hi, V_UKADD16); + ADD_NDS32_BUILTIN2 ("sub16", unsigned, unsigned, unsigned, SUB16); + ADD_NDS32_BUILTIN2 ("v_usub16", u_v2hi, u_v2hi, u_v2hi, V_USUB16); + ADD_NDS32_BUILTIN2 ("v_ssub16", v2hi, v2hi, v2hi, V_SSUB16); + ADD_NDS32_BUILTIN2 ("rsub16", unsigned, unsigned, unsigned, RSUB16); + ADD_NDS32_BUILTIN2 ("v_rsub16", v2hi, v2hi, v2hi, V_RSUB16); + ADD_NDS32_BUILTIN2 ("ursub16", unsigned, unsigned, unsigned, URSUB16); + ADD_NDS32_BUILTIN2 ("v_ursub16", u_v2hi, u_v2hi, u_v2hi, V_URSUB16); + ADD_NDS32_BUILTIN2 ("ksub16", unsigned, unsigned, unsigned, KSUB16); + ADD_NDS32_BUILTIN2 ("v_ksub16", v2hi, v2hi, v2hi, V_KSUB16); + ADD_NDS32_BUILTIN2 ("uksub16", unsigned, unsigned, unsigned, UKSUB16); + ADD_NDS32_BUILTIN2 ("v_uksub16", u_v2hi, u_v2hi, u_v2hi, V_UKSUB16); + ADD_NDS32_BUILTIN2 ("cras16", unsigned, unsigned, unsigned, CRAS16); + ADD_NDS32_BUILTIN2 ("v_ucras16", u_v2hi, u_v2hi, u_v2hi, V_UCRAS16); + ADD_NDS32_BUILTIN2 ("v_scras16", v2hi, v2hi, v2hi, V_SCRAS16); + ADD_NDS32_BUILTIN2 ("rcras16", unsigned, unsigned, unsigned, RCRAS16); + ADD_NDS32_BUILTIN2 ("v_rcras16", v2hi, v2hi, v2hi, V_RCRAS16); + ADD_NDS32_BUILTIN2 ("urcras16", unsigned, unsigned, unsigned, URCRAS16); + ADD_NDS32_BUILTIN2 ("v_urcras16", u_v2hi, u_v2hi, u_v2hi, V_URCRAS16); + ADD_NDS32_BUILTIN2 ("kcras16", unsigned, unsigned, unsigned, KCRAS16); + ADD_NDS32_BUILTIN2 ("v_kcras16", v2hi, v2hi, v2hi, V_KCRAS16); + ADD_NDS32_BUILTIN2 ("ukcras16", unsigned, unsigned, unsigned, UKCRAS16); + ADD_NDS32_BUILTIN2 ("v_ukcras16", u_v2hi, u_v2hi, u_v2hi, V_UKCRAS16); + ADD_NDS32_BUILTIN2 ("crsa16", unsigned, unsigned, unsigned, CRSA16); + ADD_NDS32_BUILTIN2 ("v_ucrsa16", u_v2hi, u_v2hi, u_v2hi, V_UCRSA16); + ADD_NDS32_BUILTIN2 ("v_scrsa16", v2hi, v2hi, v2hi, V_SCRSA16); + ADD_NDS32_BUILTIN2 ("rcrsa16", unsigned, unsigned, unsigned, RCRSA16); + ADD_NDS32_BUILTIN2 ("v_rcrsa16", v2hi, v2hi, v2hi, V_RCRSA16); + ADD_NDS32_BUILTIN2 ("urcrsa16", unsigned, unsigned, unsigned, URCRSA16); + ADD_NDS32_BUILTIN2 ("v_urcrsa16", u_v2hi, u_v2hi, u_v2hi, V_URCRSA16); + ADD_NDS32_BUILTIN2 ("kcrsa16", unsigned, unsigned, unsigned, KCRSA16); + ADD_NDS32_BUILTIN2 ("v_kcrsa16", v2hi, v2hi, v2hi, V_KCRSA16); + ADD_NDS32_BUILTIN2 ("ukcrsa16", unsigned, unsigned, unsigned, UKCRSA16); + ADD_NDS32_BUILTIN2 ("v_ukcrsa16", u_v2hi, u_v2hi, u_v2hi, V_UKCRSA16); + + /* DSP Extension: SIMD 8bit Add and Subtract. */ + ADD_NDS32_BUILTIN2 ("add8", integer, integer, integer, ADD8); + ADD_NDS32_BUILTIN2 ("v_uadd8", u_v4qi, u_v4qi, u_v4qi, V_UADD8); + ADD_NDS32_BUILTIN2 ("v_sadd8", v4qi, v4qi, v4qi, V_SADD8); + ADD_NDS32_BUILTIN2 ("radd8", unsigned, unsigned, unsigned, RADD8); + ADD_NDS32_BUILTIN2 ("v_radd8", v4qi, v4qi, v4qi, V_RADD8); + ADD_NDS32_BUILTIN2 ("uradd8", unsigned, unsigned, unsigned, URADD8); + ADD_NDS32_BUILTIN2 ("v_uradd8", u_v4qi, u_v4qi, u_v4qi, V_URADD8); + ADD_NDS32_BUILTIN2 ("kadd8", unsigned, unsigned, unsigned, KADD8); + ADD_NDS32_BUILTIN2 ("v_kadd8", v4qi, v4qi, v4qi, V_KADD8); + ADD_NDS32_BUILTIN2 ("ukadd8", unsigned, unsigned, unsigned, UKADD8); + ADD_NDS32_BUILTIN2 ("v_ukadd8", u_v4qi, u_v4qi, u_v4qi, V_UKADD8); + ADD_NDS32_BUILTIN2 ("sub8", integer, integer, integer, SUB8); + ADD_NDS32_BUILTIN2 ("v_usub8", u_v4qi, u_v4qi, u_v4qi, V_USUB8); + ADD_NDS32_BUILTIN2 ("v_ssub8", v4qi, v4qi, v4qi, V_SSUB8); + ADD_NDS32_BUILTIN2 ("rsub8", unsigned, unsigned, unsigned, RSUB8); + ADD_NDS32_BUILTIN2 ("v_rsub8", v4qi, v4qi, v4qi, V_RSUB8); + ADD_NDS32_BUILTIN2 ("ursub8", unsigned, unsigned, unsigned, URSUB8); + ADD_NDS32_BUILTIN2 ("v_ursub8", u_v4qi, u_v4qi, u_v4qi, V_URSUB8); + ADD_NDS32_BUILTIN2 ("ksub8", unsigned, unsigned, unsigned, KSUB8); + ADD_NDS32_BUILTIN2 ("v_ksub8", v4qi, v4qi, v4qi, V_KSUB8); + ADD_NDS32_BUILTIN2 ("uksub8", unsigned, unsigned, unsigned, UKSUB8); + ADD_NDS32_BUILTIN2 ("v_uksub8", u_v4qi, u_v4qi, u_v4qi, V_UKSUB8); + + /* DSP Extension: SIMD 16bit Shift. */ + ADD_NDS32_BUILTIN2 ("sra16", unsigned, unsigned, unsigned, SRA16); + ADD_NDS32_BUILTIN2 ("v_sra16", v2hi, v2hi, unsigned, V_SRA16); + ADD_NDS32_BUILTIN2 ("sra16_u", unsigned, unsigned, unsigned, SRA16_U); + ADD_NDS32_BUILTIN2 ("v_sra16_u", v2hi, v2hi, unsigned, V_SRA16_U); + ADD_NDS32_BUILTIN2 ("srl16", unsigned, unsigned, unsigned, SRL16); + ADD_NDS32_BUILTIN2 ("v_srl16", u_v2hi, u_v2hi, unsigned, V_SRL16); + ADD_NDS32_BUILTIN2 ("srl16_u", unsigned, unsigned, unsigned, SRL16_U); + ADD_NDS32_BUILTIN2 ("v_srl16_u", u_v2hi, u_v2hi, unsigned, V_SRL16_U); + ADD_NDS32_BUILTIN2 ("sll16", unsigned, unsigned, unsigned, SLL16); + ADD_NDS32_BUILTIN2 ("v_sll16", u_v2hi, u_v2hi, unsigned, V_SLL16); + ADD_NDS32_BUILTIN2 ("ksll16", unsigned, unsigned, unsigned, KSLL16); + ADD_NDS32_BUILTIN2 ("v_ksll16", v2hi, v2hi, unsigned, V_KSLL16); + ADD_NDS32_BUILTIN2 ("kslra16", unsigned, unsigned, unsigned, KSLRA16); + ADD_NDS32_BUILTIN2 ("v_kslra16", v2hi, v2hi, unsigned, V_KSLRA16); + ADD_NDS32_BUILTIN2 ("kslra16_u", unsigned, unsigned, unsigned, KSLRA16_U); + ADD_NDS32_BUILTIN2 ("v_kslra16_u", v2hi, v2hi, unsigned, V_KSLRA16_U); + + /* DSP Extension: 16bit Compare. */ + ADD_NDS32_BUILTIN2 ("cmpeq16", unsigned, unsigned, unsigned, CMPEQ16); + ADD_NDS32_BUILTIN2 ("v_scmpeq16", u_v2hi, v2hi, v2hi, V_SCMPEQ16); + ADD_NDS32_BUILTIN2 ("v_ucmpeq16", u_v2hi, u_v2hi, u_v2hi, V_UCMPEQ16); + ADD_NDS32_BUILTIN2 ("scmplt16", unsigned, unsigned, unsigned, SCMPLT16); + ADD_NDS32_BUILTIN2 ("v_scmplt16", u_v2hi, v2hi, v2hi, V_SCMPLT16); + ADD_NDS32_BUILTIN2 ("scmple16", unsigned, unsigned, unsigned, SCMPLE16); + ADD_NDS32_BUILTIN2 ("v_scmple16", u_v2hi, v2hi, v2hi, V_SCMPLE16); + ADD_NDS32_BUILTIN2 ("ucmplt16", unsigned, unsigned, unsigned, UCMPLT16); + ADD_NDS32_BUILTIN2 ("v_ucmplt16", u_v2hi, u_v2hi, u_v2hi, V_UCMPLT16); + ADD_NDS32_BUILTIN2 ("ucmple16", unsigned, unsigned, unsigned, UCMPLE16); + ADD_NDS32_BUILTIN2 ("v_ucmple16", u_v2hi, u_v2hi, u_v2hi, V_UCMPLE16); + + /* DSP Extension: 8bit Compare. */ + ADD_NDS32_BUILTIN2 ("cmpeq8", unsigned, unsigned, unsigned, CMPEQ8); + ADD_NDS32_BUILTIN2 ("v_scmpeq8", u_v4qi, v4qi, v4qi, V_SCMPEQ8); + ADD_NDS32_BUILTIN2 ("v_ucmpeq8", u_v4qi, u_v4qi, u_v4qi, V_UCMPEQ8); + ADD_NDS32_BUILTIN2 ("scmplt8", unsigned, unsigned, unsigned, SCMPLT8); + ADD_NDS32_BUILTIN2 ("v_scmplt8", u_v4qi, v4qi, v4qi, V_SCMPLT8); + ADD_NDS32_BUILTIN2 ("scmple8", unsigned, unsigned, unsigned, SCMPLE8); + ADD_NDS32_BUILTIN2 ("v_scmple8", u_v4qi, v4qi, v4qi, V_SCMPLE8); + ADD_NDS32_BUILTIN2 ("ucmplt8", unsigned, unsigned, unsigned, UCMPLT8); + ADD_NDS32_BUILTIN2 ("v_ucmplt8", u_v4qi, u_v4qi, u_v4qi, V_UCMPLT8); + ADD_NDS32_BUILTIN2 ("ucmple8", unsigned, unsigned, unsigned, UCMPLE8); + ADD_NDS32_BUILTIN2 ("v_ucmple8", u_v4qi, u_v4qi, u_v4qi, V_UCMPLE8); + + /* DSP Extension: SIMD 16bit MISC. */ + ADD_NDS32_BUILTIN2 ("smin16", unsigned, unsigned, unsigned, SMIN16); + ADD_NDS32_BUILTIN2 ("v_smin16", v2hi, v2hi, v2hi, V_SMIN16); + ADD_NDS32_BUILTIN2 ("umin16", unsigned, unsigned, unsigned, UMIN16); + ADD_NDS32_BUILTIN2 ("v_umin16", u_v2hi, u_v2hi, u_v2hi, V_UMIN16); + ADD_NDS32_BUILTIN2 ("smax16", unsigned, unsigned, unsigned, SMAX16); + ADD_NDS32_BUILTIN2 ("v_smax16", v2hi, v2hi, v2hi, V_SMAX16); + ADD_NDS32_BUILTIN2 ("umax16", unsigned, unsigned, unsigned, UMAX16); + ADD_NDS32_BUILTIN2 ("v_umax16", u_v2hi, u_v2hi, u_v2hi, V_UMAX16); + ADD_NDS32_BUILTIN2 ("sclip16", unsigned, unsigned, unsigned, SCLIP16); + ADD_NDS32_BUILTIN2 ("v_sclip16", v2hi, v2hi, unsigned, V_SCLIP16); + ADD_NDS32_BUILTIN2 ("uclip16", unsigned, unsigned, unsigned, UCLIP16); + ADD_NDS32_BUILTIN2 ("v_uclip16", v2hi, v2hi, unsigned, V_UCLIP16); + ADD_NDS32_BUILTIN2 ("khm16", unsigned, unsigned, unsigned, KHM16); + ADD_NDS32_BUILTIN2 ("v_khm16", v2hi, v2hi, v2hi, V_KHM16); + ADD_NDS32_BUILTIN2 ("khmx16", unsigned, unsigned, unsigned, KHMX16); + ADD_NDS32_BUILTIN2 ("v_khmx16", v2hi, v2hi, v2hi, V_KHMX16); + ADD_NDS32_BUILTIN1 ("kabs16", unsigned, unsigned, KABS16); + ADD_NDS32_BUILTIN1 ("v_kabs16", v2hi, v2hi, V_KABS16); + ADD_NDS32_BUILTIN2 ("smul16", long_long_unsigned, unsigned, unsigned, SMUL16); + ADD_NDS32_BUILTIN2 ("v_smul16", v2si, v2hi, v2hi, V_SMUL16); + ADD_NDS32_BUILTIN2 ("smulx16", + long_long_unsigned, unsigned, unsigned, SMULX16); + ADD_NDS32_BUILTIN2 ("v_smulx16", v2si, v2hi, v2hi, V_SMULX16); + ADD_NDS32_BUILTIN2 ("umul16", long_long_unsigned, unsigned, unsigned, UMUL16); + ADD_NDS32_BUILTIN2 ("v_umul16", u_v2si, u_v2hi, u_v2hi, V_UMUL16); + ADD_NDS32_BUILTIN2 ("umulx16", + long_long_unsigned, unsigned, unsigned, UMULX16); + ADD_NDS32_BUILTIN2 ("v_umulx16", u_v2si, u_v2hi, u_v2hi, V_UMULX16); + + /* DSP Extension: SIMD 8bit MISC. */ + ADD_NDS32_BUILTIN2 ("smin8", unsigned, unsigned, unsigned, SMIN8); + ADD_NDS32_BUILTIN2 ("v_smin8", v4qi, v4qi, v4qi, V_SMIN8); + ADD_NDS32_BUILTIN2 ("umin8", unsigned, unsigned, unsigned, UMIN8); + ADD_NDS32_BUILTIN2 ("v_umin8", u_v4qi, u_v4qi, u_v4qi, V_UMIN8); + ADD_NDS32_BUILTIN2 ("smax8", unsigned, unsigned, unsigned, SMAX8); + ADD_NDS32_BUILTIN2 ("v_smax8", v4qi, v4qi, v4qi, V_SMAX8); + ADD_NDS32_BUILTIN2 ("umax8", unsigned, unsigned, unsigned, UMAX8); + ADD_NDS32_BUILTIN2 ("v_umax8", u_v4qi, u_v4qi, u_v4qi, V_UMAX8); + ADD_NDS32_BUILTIN1 ("kabs8", unsigned, unsigned, KABS8); + ADD_NDS32_BUILTIN1 ("v_kabs8", v4qi, v4qi, V_KABS8); + + /* DSP Extension: 8bit Unpacking. */ + ADD_NDS32_BUILTIN1 ("sunpkd810", unsigned, unsigned, SUNPKD810); + ADD_NDS32_BUILTIN1 ("v_sunpkd810", v2hi, v4qi, V_SUNPKD810); + ADD_NDS32_BUILTIN1 ("sunpkd820", unsigned, unsigned, SUNPKD820); + ADD_NDS32_BUILTIN1 ("v_sunpkd820", v2hi, v4qi, V_SUNPKD820); + ADD_NDS32_BUILTIN1 ("sunpkd830", unsigned, unsigned, SUNPKD830); + ADD_NDS32_BUILTIN1 ("v_sunpkd830", v2hi, v4qi, V_SUNPKD830); + ADD_NDS32_BUILTIN1 ("sunpkd831", unsigned, unsigned, SUNPKD831); + ADD_NDS32_BUILTIN1 ("v_sunpkd831", v2hi, v4qi, V_SUNPKD831); + ADD_NDS32_BUILTIN1 ("zunpkd810", unsigned, unsigned, ZUNPKD810); + ADD_NDS32_BUILTIN1 ("v_zunpkd810", u_v2hi, u_v4qi, V_ZUNPKD810); + ADD_NDS32_BUILTIN1 ("zunpkd820", unsigned, unsigned, ZUNPKD820); + ADD_NDS32_BUILTIN1 ("v_zunpkd820", u_v2hi, u_v4qi, V_ZUNPKD820); + ADD_NDS32_BUILTIN1 ("zunpkd830", unsigned, unsigned, ZUNPKD830); + ADD_NDS32_BUILTIN1 ("v_zunpkd830", u_v2hi, u_v4qi, V_ZUNPKD830); + ADD_NDS32_BUILTIN1 ("zunpkd831", unsigned, unsigned, ZUNPKD831); + ADD_NDS32_BUILTIN1 ("v_zunpkd831", u_v2hi, u_v4qi, V_ZUNPKD831); + + /* DSP Extension: 32bit Add and Subtract. */ + ADD_NDS32_BUILTIN2 ("raddw", integer, integer, integer, RADDW); + ADD_NDS32_BUILTIN2 ("uraddw", unsigned, unsigned, unsigned, URADDW); + ADD_NDS32_BUILTIN2 ("rsubw", integer, integer, integer, RSUBW); + ADD_NDS32_BUILTIN2 ("ursubw", unsigned, unsigned, unsigned, URSUBW); + + /* DSP Extension: 32bit Shift. */ + ADD_NDS32_BUILTIN2 ("sra_u", integer, integer, unsigned, SRA_U); + ADD_NDS32_BUILTIN2 ("ksll", integer, integer, unsigned, KSLL); + + /* DSP Extension: 16bit Packing. */ + ADD_NDS32_BUILTIN2 ("pkbb16", unsigned, unsigned, unsigned, PKBB16); + ADD_NDS32_BUILTIN2 ("v_pkbb16", u_v2hi, u_v2hi, u_v2hi, V_PKBB16); + ADD_NDS32_BUILTIN2 ("pkbt16", unsigned, unsigned, unsigned, PKBT16); + ADD_NDS32_BUILTIN2 ("v_pkbt16", u_v2hi, u_v2hi, u_v2hi, V_PKBT16); + ADD_NDS32_BUILTIN2 ("pktb16", unsigned, unsigned, unsigned, PKTB16); + ADD_NDS32_BUILTIN2 ("v_pktb16", u_v2hi, u_v2hi, u_v2hi, V_PKTB16); + ADD_NDS32_BUILTIN2 ("pktt16", unsigned, unsigned, unsigned, PKTT16); + ADD_NDS32_BUILTIN2 ("v_pktt16", u_v2hi, u_v2hi, u_v2hi, V_PKTT16); + + /* DSP Extension: Signed MSW 32x32 Multiply and ADD. */ + ADD_NDS32_BUILTIN2 ("smmul", integer, integer, integer, SMMUL); + ADD_NDS32_BUILTIN2 ("smmul_u", integer, integer, integer, SMMUL_U); + ADD_NDS32_BUILTIN3 ("kmmac", integer, integer, integer, integer, KMMAC); + ADD_NDS32_BUILTIN3 ("kmmac_u", integer, integer, integer, integer, KMMAC_U); + ADD_NDS32_BUILTIN3 ("kmmsb", integer, integer, integer, integer, KMMSB); + ADD_NDS32_BUILTIN3 ("kmmsb_u", integer, integer, integer, integer, KMMSB_U); + ADD_NDS32_BUILTIN2 ("kwmmul", integer, integer, integer, KWMMUL); + ADD_NDS32_BUILTIN2 ("kwmmul_u", integer, integer, integer, KWMMUL_U); + + /* DSP Extension: Most Significant Word 32x16 Multiply and ADD. */ + ADD_NDS32_BUILTIN2 ("smmwb", integer, integer, unsigned, SMMWB); + ADD_NDS32_BUILTIN2 ("v_smmwb", integer, integer, v2hi, V_SMMWB); + ADD_NDS32_BUILTIN2 ("smmwb_u", integer, integer, unsigned, SMMWB_U); + ADD_NDS32_BUILTIN2 ("v_smmwb_u", integer, integer, v2hi, V_SMMWB_U); + ADD_NDS32_BUILTIN2 ("smmwt", integer, integer, unsigned, SMMWT); + ADD_NDS32_BUILTIN2 ("v_smmwt", integer, integer, v2hi, V_SMMWT); + ADD_NDS32_BUILTIN2 ("smmwt_u", integer, integer, unsigned, SMMWT_U); + ADD_NDS32_BUILTIN2 ("v_smmwt_u", integer, integer, v2hi, V_SMMWT_U); + ADD_NDS32_BUILTIN3 ("kmmawb", integer, integer, integer, unsigned, KMMAWB); + ADD_NDS32_BUILTIN3 ("v_kmmawb", integer, integer, integer, v2hi, V_KMMAWB); + ADD_NDS32_BUILTIN3 ("kmmawb_u", + integer, integer, integer, unsigned, KMMAWB_U); + ADD_NDS32_BUILTIN3 ("v_kmmawb_u", + integer, integer, integer, v2hi, V_KMMAWB_U); + ADD_NDS32_BUILTIN3 ("kmmawt", integer, integer, integer, unsigned, KMMAWT); + ADD_NDS32_BUILTIN3 ("v_kmmawt", integer, integer, integer, v2hi, V_KMMAWT); + ADD_NDS32_BUILTIN3 ("kmmawt_u", + integer, integer, integer, unsigned, KMMAWT_U); + ADD_NDS32_BUILTIN3 ("v_kmmawt_u", + integer, integer, integer, v2hi, V_KMMAWT_U); + + /* DSP Extension: Signed 16bit Multiply with ADD/Subtract. */ + ADD_NDS32_BUILTIN2 ("smbb", integer, unsigned, unsigned, SMBB); + ADD_NDS32_BUILTIN2 ("v_smbb", integer, v2hi, v2hi, V_SMBB); + ADD_NDS32_BUILTIN2 ("smbt", integer, unsigned, unsigned, SMBT); + ADD_NDS32_BUILTIN2 ("v_smbt", integer, v2hi, v2hi, V_SMBT); + ADD_NDS32_BUILTIN2 ("smtt", integer, unsigned, unsigned, SMTT); + ADD_NDS32_BUILTIN2 ("v_smtt", integer, v2hi, v2hi, V_SMTT); + ADD_NDS32_BUILTIN2 ("kmda", integer, unsigned, unsigned, KMDA); + ADD_NDS32_BUILTIN2 ("v_kmda", integer, v2hi, v2hi, V_KMDA); + ADD_NDS32_BUILTIN2 ("kmxda", integer, unsigned, unsigned, KMXDA); + ADD_NDS32_BUILTIN2 ("v_kmxda", integer, v2hi, v2hi, V_KMXDA); + ADD_NDS32_BUILTIN2 ("smds", integer, unsigned, unsigned, SMDS); + ADD_NDS32_BUILTIN2 ("v_smds", integer, v2hi, v2hi, V_SMDS); + ADD_NDS32_BUILTIN2 ("smdrs", integer, unsigned, unsigned, SMDRS); + ADD_NDS32_BUILTIN2 ("v_smdrs", integer, v2hi, v2hi, V_SMDRS); + ADD_NDS32_BUILTIN2 ("smxds", integer, unsigned, unsigned, SMXDS); + ADD_NDS32_BUILTIN2 ("v_smxds", integer, v2hi, v2hi, V_SMXDS); + ADD_NDS32_BUILTIN3 ("kmabb", integer, integer, unsigned, unsigned, KMABB); + ADD_NDS32_BUILTIN3 ("v_kmabb", integer, integer, v2hi, v2hi, V_KMABB); + ADD_NDS32_BUILTIN3 ("kmabt", integer, integer, unsigned, unsigned, KMABT); + ADD_NDS32_BUILTIN3 ("v_kmabt", integer, integer, v2hi, v2hi, V_KMABT); + ADD_NDS32_BUILTIN3 ("kmatt", integer, integer, unsigned, unsigned, KMATT); + ADD_NDS32_BUILTIN3 ("v_kmatt", integer, integer, v2hi, v2hi, V_KMATT); + ADD_NDS32_BUILTIN3 ("kmada", integer, integer, unsigned, unsigned, KMADA); + ADD_NDS32_BUILTIN3 ("v_kmada", integer, integer, v2hi, v2hi, V_KMADA); + ADD_NDS32_BUILTIN3 ("kmaxda", integer, integer, unsigned, unsigned, KMAXDA); + ADD_NDS32_BUILTIN3 ("v_kmaxda", integer, integer, v2hi, v2hi, V_KMAXDA); + ADD_NDS32_BUILTIN3 ("kmads", integer, integer, unsigned, unsigned, KMADS); + ADD_NDS32_BUILTIN3 ("v_kmads", integer, integer, v2hi, v2hi, V_KMADS); + ADD_NDS32_BUILTIN3 ("kmadrs", integer, integer, unsigned, unsigned, KMADRS); + ADD_NDS32_BUILTIN3 ("v_kmadrs", integer, integer, v2hi, v2hi, V_KMADRS); + ADD_NDS32_BUILTIN3 ("kmaxds", integer, integer, unsigned, unsigned, KMAXDS); + ADD_NDS32_BUILTIN3 ("v_kmaxds", integer, integer, v2hi, v2hi, V_KMAXDS); + ADD_NDS32_BUILTIN3 ("kmsda", integer, integer, unsigned, unsigned, KMSDA); + ADD_NDS32_BUILTIN3 ("v_kmsda", integer, integer, v2hi, v2hi, V_KMSDA); + ADD_NDS32_BUILTIN3 ("kmsxda", integer, integer, unsigned, unsigned, KMSXDA); + ADD_NDS32_BUILTIN3 ("v_kmsxda", integer, integer, v2hi, v2hi, V_KMSXDA); + + /* DSP Extension: Signed 16bit Multiply with 64bit ADD/Subtract. */ + ADD_NDS32_BUILTIN2 ("smal", long_long_integer, + long_long_integer, unsigned, SMAL); + ADD_NDS32_BUILTIN2 ("v_smal", long_long_integer, + long_long_integer, v2hi, V_SMAL); + + /* DSP Extension: 32bit MISC. */ + ADD_NDS32_BUILTIN2 ("bitrev", unsigned, unsigned, unsigned, BITREV); + ADD_NDS32_BUILTIN2 ("wext", unsigned, long_long_integer, unsigned, WEXT); + ADD_NDS32_BUILTIN3 ("bpick", unsigned, unsigned, unsigned, unsigned, BPICK); + ADD_NDS32_BUILTIN3 ("insb", unsigned, unsigned, unsigned, unsigned, INSB); + + /* DSP Extension: 64bit Add and Subtract. */ + ADD_NDS32_BUILTIN2 ("sadd64", long_long_integer, + long_long_integer, long_long_integer, SADD64); + ADD_NDS32_BUILTIN2 ("uadd64", long_long_unsigned, + long_long_unsigned, long_long_unsigned, UADD64); + ADD_NDS32_BUILTIN2 ("radd64", long_long_integer, + long_long_integer, long_long_integer, RADD64); + ADD_NDS32_BUILTIN2 ("uradd64", long_long_unsigned, + long_long_unsigned, long_long_unsigned, URADD64); + ADD_NDS32_BUILTIN2 ("kadd64", long_long_integer, + long_long_integer, long_long_integer, KADD64); + ADD_NDS32_BUILTIN2 ("ukadd64", long_long_unsigned, + long_long_unsigned, long_long_unsigned, UKADD64); + ADD_NDS32_BUILTIN2 ("ssub64", long_long_integer, + long_long_integer, long_long_integer, SSUB64); + ADD_NDS32_BUILTIN2 ("usub64", long_long_unsigned, + long_long_unsigned, long_long_unsigned, USUB64); + ADD_NDS32_BUILTIN2 ("rsub64", long_long_integer, + long_long_integer, long_long_integer, RSUB64); + ADD_NDS32_BUILTIN2 ("ursub64", long_long_unsigned, + long_long_unsigned, long_long_unsigned, URSUB64); + ADD_NDS32_BUILTIN2 ("ksub64", long_long_integer, + long_long_integer, long_long_integer, KSUB64); + ADD_NDS32_BUILTIN2 ("uksub64", long_long_unsigned, + long_long_unsigned, long_long_unsigned, UKSUB64); + + /* DSP Extension: 32bit Multiply with 64bit Add/Subtract. */ + ADD_NDS32_BUILTIN3 ("smar64", long_long_integer, + long_long_integer, integer, integer, SMAR64); + ADD_NDS32_BUILTIN3 ("smsr64", long_long_integer, + long_long_integer, integer, integer, SMSR64); + ADD_NDS32_BUILTIN3 ("umar64", long_long_unsigned, + long_long_unsigned, unsigned, unsigned, UMAR64); + ADD_NDS32_BUILTIN3 ("umsr64", long_long_unsigned, + long_long_unsigned, unsigned, unsigned, UMSR64); + ADD_NDS32_BUILTIN3 ("kmar64", long_long_integer, + long_long_integer, integer, integer, KMAR64); + ADD_NDS32_BUILTIN3 ("kmsr64", long_long_integer, + long_long_integer, integer, integer, KMSR64); + ADD_NDS32_BUILTIN3 ("ukmar64", long_long_unsigned, + long_long_unsigned, unsigned, unsigned, UKMAR64); + ADD_NDS32_BUILTIN3 ("ukmsr64", long_long_unsigned, + long_long_unsigned, unsigned, unsigned, UKMSR64); + + /* DSP Extension: Signed 16bit Multiply with 64bit Add/Subtract. */ + ADD_NDS32_BUILTIN3 ("smalbb", long_long_integer, + long_long_integer, unsigned, unsigned, SMALBB); + ADD_NDS32_BUILTIN3 ("v_smalbb", long_long_integer, + long_long_integer, v2hi, v2hi, V_SMALBB); + ADD_NDS32_BUILTIN3 ("smalbt", long_long_integer, + long_long_integer, unsigned, unsigned, SMALBT); + ADD_NDS32_BUILTIN3 ("v_smalbt", long_long_integer, + long_long_integer, v2hi, v2hi, V_SMALBT); + ADD_NDS32_BUILTIN3 ("smaltt", long_long_integer, + long_long_integer, unsigned, unsigned, SMALTT); + ADD_NDS32_BUILTIN3 ("v_smaltt", long_long_integer, + long_long_integer, v2hi, v2hi, V_SMALTT); + ADD_NDS32_BUILTIN3 ("smalda", long_long_integer, + long_long_integer, unsigned, unsigned, SMALDA); + ADD_NDS32_BUILTIN3 ("v_smalda", long_long_integer, + long_long_integer, v2hi, v2hi, V_SMALDA); + ADD_NDS32_BUILTIN3 ("smalxda", long_long_integer, + long_long_integer, unsigned, unsigned, SMALXDA); + ADD_NDS32_BUILTIN3 ("v_smalxda", long_long_integer, + long_long_integer, v2hi, v2hi, V_SMALXDA); + ADD_NDS32_BUILTIN3 ("smalds", long_long_integer, + long_long_integer, unsigned, unsigned, SMALDS); + ADD_NDS32_BUILTIN3 ("v_smalds", long_long_integer, + long_long_integer, v2hi, v2hi, V_SMALDS); + ADD_NDS32_BUILTIN3 ("smaldrs", long_long_integer, + long_long_integer, unsigned, unsigned, SMALDRS); + ADD_NDS32_BUILTIN3 ("v_smaldrs", long_long_integer, + long_long_integer, v2hi, v2hi, V_SMALDRS); + ADD_NDS32_BUILTIN3 ("smalxds", long_long_integer, + long_long_integer, unsigned, unsigned, SMALXDS); + ADD_NDS32_BUILTIN3 ("v_smalxds", long_long_integer, + long_long_integer, v2hi, v2hi, V_SMALXDS); + ADD_NDS32_BUILTIN3 ("smslda", long_long_integer, + long_long_integer, unsigned, unsigned, SMSLDA); + ADD_NDS32_BUILTIN3 ("v_smslda", long_long_integer, + long_long_integer, v2hi, v2hi, V_SMSLDA); + ADD_NDS32_BUILTIN3 ("smslxda", long_long_integer, + long_long_integer, unsigned, unsigned, SMSLXDA); + ADD_NDS32_BUILTIN3 ("v_smslxda", long_long_integer, + long_long_integer, v2hi, v2hi, V_SMSLXDA); + + /* DSP Extension: augmented baseline. */ + ADD_NDS32_BUILTIN2 ("uclip32", unsigned, integer, unsigned, UCLIP32); + ADD_NDS32_BUILTIN2 ("sclip32", integer, integer, unsigned, SCLIP32); + ADD_NDS32_BUILTIN1 ("kabs", integer, integer, KABS); + + /* DSP Extension: vector type unaligned Load/Store */ + ADD_NDS32_BUILTIN1 ("get_unaligned_u16x2", u_v2hi, ptr_ushort, UALOAD_U16); + ADD_NDS32_BUILTIN1 ("get_unaligned_s16x2", v2hi, ptr_short, UALOAD_S16); + ADD_NDS32_BUILTIN1 ("get_unaligned_u8x4", u_v4qi, ptr_uchar, UALOAD_U8); + ADD_NDS32_BUILTIN1 ("get_unaligned_s8x4", v4qi, ptr_char, UALOAD_S8); + ADD_NDS32_BUILTIN2 ("put_unaligned_u16x2", void, ptr_ushort, + u_v2hi, UASTORE_U16); + ADD_NDS32_BUILTIN2 ("put_unaligned_s16x2", void, ptr_short, + v2hi, UASTORE_S16); + ADD_NDS32_BUILTIN2 ("put_unaligned_u8x4", void, ptr_uchar, + u_v4qi, UASTORE_U8); + ADD_NDS32_BUILTIN2 ("put_unaligned_s8x4", void, ptr_char, + v4qi, UASTORE_S8); } diff --git a/gcc/config/nds32/nds32-intrinsic.md b/gcc/config/nds32/nds32-intrinsic.md index 24e7c0b..02f7285 100644 --- a/gcc/config/nds32/nds32-intrinsic.md +++ b/gcc/config/nds32/nds32-intrinsic.md @@ -1037,6 +1037,187 @@ (set_attr "length" "4")] ) +;; SATURATION + +(define_insn "unspec_kaddw" + [(set (match_operand:SI 0 "register_operand" "=r") + (ss_plus:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")))] + "" + "kaddw\t%0, %1, %2" + [(set_attr "type" "alu") + (set_attr "length" "4")] +) + +(define_insn "unspec_ksubw" + [(set (match_operand:SI 0 "register_operand" "=r") + (ss_minus:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")))] + "" + "ksubw\t%0, %1, %2" + [(set_attr "type" "alu") + (set_attr "length" "4")] +) + +(define_insn "unspec_kaddh" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")] UNSPEC_KADDH))] + "" + "kaddh\t%0, %1, %2" + [(set_attr "type" "alu") + (set_attr "length" "4")] +) + +(define_insn "unspec_ksubh" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")] UNSPEC_KSUBH))] + "" + "ksubh\t%0, %1, %2" + [(set_attr "type" "alu") + (set_attr "length" "4")] +) + +(define_insn "unspec_kaddh_dsp" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(plus:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")) + (const_int 15)] UNSPEC_CLIPS))] + "NDS32_EXT_DSP_P ()" + "kaddh\t%0, %1, %2" + [(set_attr "type" "alu") + (set_attr "length" "4")] +) + +(define_insn "unspec_ksubh_dsp" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(minus:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")) + (const_int 15)] UNSPEC_CLIPS))] + "NDS32_EXT_DSP_P ()" + "ksubh\t%0, %1, %2" + [(set_attr "type" "alu") + (set_attr "length" "4")] +) + +(define_insn "unspec_kdmbb" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r") + (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KDMBB))] + "" + "kdmbb\t%0, %1, %2" + [(set_attr "type" "mul") + (set_attr "length" "4")] +) + +(define_insn "unspec_kdmbt" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r") + (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KDMBT))] + "" + "kdmbt\t%0, %1, %2" + [(set_attr "type" "mul") + (set_attr "length" "4")] +) + +(define_insn "unspec_kdmtb" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r") + (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KDMTB))] + "" + "kdmtb\t%0, %1, %2" + [(set_attr "type" "mul") + (set_attr "length" "4")] +) + +(define_insn "unspec_kdmtt" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r") + (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KDMTT))] + "" + "kdmtt\t%0, %1, %2" + [(set_attr "type" "mul") + (set_attr "length" "4")] +) + +(define_insn "unspec_khmbb" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r") + (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KHMBB))] + "" + "khmbb\t%0, %1, %2" + [(set_attr "type" "mul") + (set_attr "length" "4")] +) + +(define_insn "unspec_khmbt" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r") + (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KHMBT))] + "" + "khmbt\t%0, %1, %2" + [(set_attr "type" "mul") + (set_attr "length" "4")] +) + +(define_insn "unspec_khmtb" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r") + (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KHMTB))] + "" + "khmtb\t%0, %1, %2" + [(set_attr "type" "mul") + (set_attr "length" "4")] +) + +(define_insn "unspec_khmtt" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r") + (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KHMTT))] + "" + "khmtt\t%0, %1, %2" + [(set_attr "type" "mul") + (set_attr "length" "4")] +) + +(define_insn "unspec_kslraw" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")] UNSPEC_KSLRAW))] + "" + "kslraw\t%0, %1, %2" + [(set_attr "type" "alu") + (set_attr "length" "4")] +) + +(define_insn "unspec_kslrawu" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")] UNSPEC_KSLRAWU))] + "" + "kslraw.u\t%0, %1, %2" + [(set_attr "type" "alu") + (set_attr "length" "4")] +) + +(define_insn "unspec_volatile_rdov" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_RDOV))] + "" + "rdov\t%0" + [(set_attr "type" "misc") + (set_attr "length" "4")] +) + +(define_insn "unspec_volatile_clrov" + [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_CLROV)] + "" + "clrov" + [(set_attr "type" "misc") + (set_attr "length" "4")] +) + ;; System (define_insn "unspec_sva" @@ -1495,4 +1676,15 @@ DONE; }) +;; abs alias kabs + +(define_insn "unspec_kabs" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "register_operand" "r")] UNSPEC_KABS))] + "" + "kabs\t%0, %1" + [(set_attr "type" "alu") + (set_attr "length" "4")] +) + ;; ------------------------------------------------------------------------ diff --git a/gcc/config/nds32/nds32-md-auxiliary.c b/gcc/config/nds32/nds32-md-auxiliary.c index 720e85a..9696611 100644 --- a/gcc/config/nds32/nds32-md-auxiliary.c +++ b/gcc/config/nds32/nds32-md-auxiliary.c @@ -261,6 +261,118 @@ output_cond_branch_compare_zero (int code, const char *suffix, output_asm_insn (pattern, operands); } +static void +nds32_split_shiftrtdi3 (rtx dst, rtx src, rtx shiftamount, bool logic_shift_p) +{ + rtx src_high_part; + rtx dst_high_part, dst_low_part; + + dst_high_part = nds32_di_high_part_subreg (dst); + src_high_part = nds32_di_high_part_subreg (src); + dst_low_part = nds32_di_low_part_subreg (dst); + + if (CONST_INT_P (shiftamount)) + { + if (INTVAL (shiftamount) < 32) + { + if (logic_shift_p) + { + emit_insn (gen_uwext (dst_low_part, src, + shiftamount)); + emit_insn (gen_lshrsi3 (dst_high_part, src_high_part, + shiftamount)); + } + else + { + emit_insn (gen_wext (dst_low_part, src, + shiftamount)); + emit_insn (gen_ashrsi3 (dst_high_part, src_high_part, + shiftamount)); + } + } + else + { + rtx new_shift_amout = gen_int_mode(INTVAL (shiftamount) - 32, SImode); + + if (logic_shift_p) + { + emit_insn (gen_lshrsi3 (dst_low_part, src_high_part, + new_shift_amout)); + emit_move_insn (dst_high_part, const0_rtx); + } + else + { + emit_insn (gen_ashrsi3 (dst_low_part, src_high_part, + new_shift_amout)); + emit_insn (gen_ashrsi3 (dst_high_part, src_high_part, + GEN_INT (31))); + } + } + } + else + { + rtx dst_low_part_l32, dst_high_part_l32; + rtx dst_low_part_g32, dst_high_part_g32; + rtx new_shift_amout, select_reg; + dst_low_part_l32 = gen_reg_rtx (SImode); + dst_high_part_l32 = gen_reg_rtx (SImode); + dst_low_part_g32 = gen_reg_rtx (SImode); + dst_high_part_g32 = gen_reg_rtx (SImode); + new_shift_amout = gen_reg_rtx (SImode); + select_reg = gen_reg_rtx (SImode); + + emit_insn (gen_andsi3 (shiftamount, shiftamount, GEN_INT (0x3f))); + + if (logic_shift_p) + { + /* + if (shiftamount < 32) + dst_low_part = wext (src, shiftamount) + dst_high_part = src_high_part >> shiftamount + else + dst_low_part = src_high_part >> (shiftamount & 0x1f) + dst_high_part = 0 + */ + emit_insn (gen_uwext (dst_low_part_l32, src, shiftamount)); + emit_insn (gen_lshrsi3 (dst_high_part_l32, src_high_part, + shiftamount)); + + emit_insn (gen_andsi3 (new_shift_amout, shiftamount, GEN_INT (0x1f))); + emit_insn (gen_lshrsi3 (dst_low_part_g32, src_high_part, + new_shift_amout)); + emit_move_insn (dst_high_part_g32, const0_rtx); + } + else + { + /* + if (shiftamount < 32) + dst_low_part = wext (src, shiftamount) + dst_high_part = src_high_part >> shiftamount + else + dst_low_part = src_high_part >> (shiftamount & 0x1f) + # shift 31 for sign extend + dst_high_part = src_high_part >> 31 + */ + emit_insn (gen_wext (dst_low_part_l32, src, shiftamount)); + emit_insn (gen_ashrsi3 (dst_high_part_l32, src_high_part, + shiftamount)); + + emit_insn (gen_andsi3 (new_shift_amout, shiftamount, GEN_INT (0x1f))); + emit_insn (gen_ashrsi3 (dst_low_part_g32, src_high_part, + new_shift_amout)); + emit_insn (gen_ashrsi3 (dst_high_part_g32, src_high_part, + GEN_INT (31))); + } + + emit_insn (gen_slt_compare (select_reg, shiftamount, GEN_INT (32))); + + emit_insn (gen_cmovnsi (dst_low_part, select_reg, + dst_low_part_l32, dst_low_part_g32)); + emit_insn (gen_cmovnsi (dst_high_part, select_reg, + dst_high_part_l32, dst_high_part_g32)); + } +} + /* ------------------------------------------------------------------------ */ /* Auxiliary function for expand RTL pattern. */ @@ -1195,6 +1307,58 @@ nds32_emit_v3pop_fpr_callee_saved (int base) } } +enum nds32_expand_result_type +nds32_expand_extv (rtx *operands) +{ + gcc_assert (CONST_INT_P (operands[2]) && CONST_INT_P (operands[3])); + HOST_WIDE_INT width = INTVAL (operands[2]); + HOST_WIDE_INT bitpos = INTVAL (operands[3]); + rtx dst = operands[0]; + rtx src = operands[1]; + + if (MEM_P (src) + && width == 32 + && (bitpos % BITS_PER_UNIT) == 0 + && GET_MODE_BITSIZE (GET_MODE (dst)) == width) + { + rtx newmem = adjust_address (src, GET_MODE (dst), + bitpos / BITS_PER_UNIT); + + rtx base_addr = force_reg (Pmode, XEXP (newmem, 0)); + + emit_insn (gen_unaligned_loadsi (dst, base_addr)); + + return EXPAND_DONE; + } + return EXPAND_FAIL; +} + +enum nds32_expand_result_type +nds32_expand_insv (rtx *operands) +{ + gcc_assert (CONST_INT_P (operands[1]) && CONST_INT_P (operands[2])); + HOST_WIDE_INT width = INTVAL (operands[1]); + HOST_WIDE_INT bitpos = INTVAL (operands[2]); + rtx dst = operands[0]; + rtx src = operands[3]; + + if (MEM_P (dst) + && width == 32 + && (bitpos % BITS_PER_UNIT) == 0 + && GET_MODE_BITSIZE (GET_MODE (src)) == width) + { + rtx newmem = adjust_address (dst, GET_MODE (src), + bitpos / BITS_PER_UNIT); + + rtx base_addr = force_reg (Pmode, XEXP (newmem, 0)); + + emit_insn (gen_unaligned_storesi (base_addr, src)); + + return EXPAND_DONE; + } + return EXPAND_FAIL; +} + /* ------------------------------------------------------------------------ */ /* Function to return memory format. */ @@ -2424,7 +2588,10 @@ nds32_expand_unaligned_load (rtx *operands, enum machine_mode mode) } else { - reg[0] = operands[0]; + if (VECTOR_MODE_P (mode)) + reg[0] = gen_reg_rtx (SImode); + else + reg[0] = operands[0]; } for (num_reg = (mode == DImode) ? 2 : 1; num_reg > 0; num_reg--) @@ -2466,6 +2633,8 @@ nds32_expand_unaligned_load (rtx *operands, enum machine_mode mode) offset = offset + offset_adj; } } + if (VECTOR_MODE_P (mode)) + convert_move (operands[0], reg[0], false); } void @@ -2508,7 +2677,13 @@ nds32_expand_unaligned_store (rtx *operands, enum machine_mode mode) } else { - reg[0] = operands[1]; + if (VECTOR_MODE_P (mode)) + { + reg[0] = gen_reg_rtx (SImode); + convert_move (reg[0], operands[1], false); + } + else + reg[0] = operands[1]; } for (num_reg = (mode == DImode) ? 2 : 1; num_reg > 0; num_reg--) @@ -2765,6 +2940,36 @@ nds32_output_cbranchsi4_greater_less_zero (rtx_insn *insn, rtx *operands) return ""; } +const char * +nds32_output_unpkd8 (rtx output, rtx input, + rtx high_idx_rtx, rtx low_idx_rtx, + bool signed_p) +{ + char pattern[100]; + rtx output_operands[2]; + HOST_WIDE_INT high_idx, low_idx; + high_idx = INTVAL (high_idx_rtx); + low_idx = INTVAL (low_idx_rtx); + + gcc_assert (high_idx >= 0 && high_idx <= 3); + gcc_assert (low_idx >= 0 && low_idx <= 3); + + /* We only have 10, 20, 30 and 31. */ + if ((low_idx != 0 || high_idx == 0) && + !(low_idx == 1 && high_idx == 3)) + return "#"; + + char sign_char = signed_p ? 's' : 'z'; + + sprintf (pattern, + "%cunpkd8" HOST_WIDE_INT_PRINT_DEC HOST_WIDE_INT_PRINT_DEC "\t%%0, %%1", + sign_char, high_idx, low_idx); + output_operands[0] = output; + output_operands[1] = input; + output_asm_insn (pattern, output_operands); + return ""; +} + /* Return true if SYMBOL_REF X binds locally. */ static bool @@ -2815,6 +3020,91 @@ nds32_output_call (rtx insn, rtx *operands, rtx symbol, const char *long_call, return ""; } +bool +nds32_need_split_sms_p (rtx in0_idx0, rtx in1_idx0, + rtx in0_idx1, rtx in1_idx1) +{ + /* smds or smdrs. */ + if (INTVAL (in0_idx0) == INTVAL (in1_idx0) + && INTVAL (in0_idx1) == INTVAL (in1_idx1) + && INTVAL (in0_idx0) != INTVAL (in0_idx1)) + return false; + + /* smxds. */ + if (INTVAL (in0_idx0) != INTVAL (in0_idx1) + && INTVAL (in1_idx0) != INTVAL (in1_idx1)) + return false; + + return true; +} + +const char * +nds32_output_sms (rtx in0_idx0, rtx in1_idx0, + rtx in0_idx1, rtx in1_idx1) +{ + if (nds32_need_split_sms_p (in0_idx0, in1_idx0, + in0_idx1, in1_idx1)) + return "#"; + /* out = in0[in0_idx0] * in1[in1_idx0] - in0[in0_idx1] * in1[in1_idx1] */ + + /* smds or smdrs. */ + if (INTVAL (in0_idx0) == INTVAL (in1_idx0) + && INTVAL (in0_idx1) == INTVAL (in1_idx1) + && INTVAL (in0_idx0) != INTVAL (in0_idx1)) + { + if (INTVAL (in0_idx0) == 0) + { + if (TARGET_BIG_ENDIAN) + return "smds\t%0, %1, %2"; + else + return "smdrs\t%0, %1, %2"; + } + else + { + if (TARGET_BIG_ENDIAN) + return "smdrs\t%0, %1, %2"; + else + return "smds\t%0, %1, %2"; + } + } + + if (INTVAL (in0_idx0) != INTVAL (in0_idx1) + && INTVAL (in1_idx0) != INTVAL (in1_idx1)) + { + if (INTVAL (in0_idx0) == 1) + { + if (TARGET_BIG_ENDIAN) + return "smxds\t%0, %2, %1"; + else + return "smxds\t%0, %1, %2"; + } + else + { + if (TARGET_BIG_ENDIAN) + return "smxds\t%0, %1, %2"; + else + return "smxds\t%0, %2, %1"; + } + } + + gcc_unreachable (); + return ""; +} + +void +nds32_split_sms (rtx out, rtx in0, rtx in1, + rtx in0_idx0, rtx in1_idx0, + rtx in0_idx1, rtx in1_idx1) +{ + rtx result0 = gen_reg_rtx (SImode); + rtx result1 = gen_reg_rtx (SImode); + emit_insn (gen_mulhisi3v (result0, in0, in1, + in0_idx0, in1_idx0)); + emit_insn (gen_mulhisi3v (result1, in0, in1, + in0_idx1, in1_idx1)); + emit_insn (gen_subsi3 (out, result0, result1)); +} + /* Spilt a doubleword instrucion to two single word instructions. */ void nds32_spilt_doubleword (rtx *operands, bool load_p) @@ -2924,6 +3214,192 @@ nds32_spilt_doubleword (rtx *operands, bool load_p) } } +void +nds32_split_ashiftdi3 (rtx dst, rtx src, rtx shiftamount) +{ + rtx src_high_part, src_low_part; + rtx dst_high_part, dst_low_part; + + dst_high_part = nds32_di_high_part_subreg (dst); + dst_low_part = nds32_di_low_part_subreg (dst); + + src_high_part = nds32_di_high_part_subreg (src); + src_low_part = nds32_di_low_part_subreg (src); + + /* We need to handle shift more than 32 bit!!!! */ + if (CONST_INT_P (shiftamount)) + { + if (INTVAL (shiftamount) < 32) + { + rtx ext_start; + ext_start = gen_int_mode(32 - INTVAL (shiftamount), SImode); + + emit_insn (gen_wext (dst_high_part, src, ext_start)); + emit_insn (gen_ashlsi3 (dst_low_part, src_low_part, shiftamount)); + } + else + { + rtx new_shift_amout = gen_int_mode(INTVAL (shiftamount) - 32, SImode); + + emit_insn (gen_ashlsi3 (dst_high_part, src_low_part, + new_shift_amout)); + + emit_move_insn (dst_low_part, GEN_INT (0)); + } + } + else + { + rtx dst_low_part_l32, dst_high_part_l32; + rtx dst_low_part_g32, dst_high_part_g32; + rtx new_shift_amout, select_reg; + dst_low_part_l32 = gen_reg_rtx (SImode); + dst_high_part_l32 = gen_reg_rtx (SImode); + dst_low_part_g32 = gen_reg_rtx (SImode); + dst_high_part_g32 = gen_reg_rtx (SImode); + new_shift_amout = gen_reg_rtx (SImode); + select_reg = gen_reg_rtx (SImode); + + rtx ext_start; + ext_start = gen_reg_rtx (SImode); + + /* + if (shiftamount < 32) + dst_low_part = src_low_part << shiftamout + dst_high_part = wext (src, 32 - shiftamount) + # wext can't handle wext (src, 32) since it's only take rb[0:4] + # for extract. + dst_high_part = shiftamount == 0 ? src_high_part : dst_high_part + else + dst_low_part = 0 + dst_high_part = src_low_part << shiftamount & 0x1f + */ + + emit_insn (gen_subsi3 (ext_start, + gen_int_mode (32, SImode), + shiftamount)); + emit_insn (gen_wext (dst_high_part_l32, src, ext_start)); + + /* Handle for shiftamout == 0. */ + emit_insn (gen_cmovzsi (dst_high_part_l32, shiftamount, + src_high_part, dst_high_part_l32)); + + emit_insn (gen_ashlsi3 (dst_low_part_l32, src_low_part, shiftamount)); + + emit_move_insn (dst_low_part_g32, const0_rtx); + emit_insn (gen_andsi3 (new_shift_amout, shiftamount, GEN_INT (0x1f))); + emit_insn (gen_ashlsi3 (dst_high_part_g32, src_low_part, + new_shift_amout)); + + emit_insn (gen_slt_compare (select_reg, shiftamount, GEN_INT (32))); + + emit_insn (gen_cmovnsi (dst_low_part, select_reg, + dst_low_part_l32, dst_low_part_g32)); + emit_insn (gen_cmovnsi (dst_high_part, select_reg, + dst_high_part_l32, dst_high_part_g32)); + } +} + +void +nds32_split_ashiftrtdi3 (rtx dst, rtx src, rtx shiftamount) +{ + nds32_split_shiftrtdi3 (dst, src, shiftamount, false); +} + +void +nds32_split_lshiftrtdi3 (rtx dst, rtx src, rtx shiftamount) +{ + nds32_split_shiftrtdi3 (dst, src, shiftamount, true); +} + +void +nds32_split_rotatertdi3 (rtx dst, rtx src, rtx shiftamount) +{ + rtx dst_low_part_l32, dst_high_part_l32; + rtx dst_low_part_g32, dst_high_part_g32; + rtx select_reg, low5bit, low5bit_inv, minus32sa; + rtx dst_low_part_g32_tmph; + rtx dst_low_part_g32_tmpl; + rtx dst_high_part_l32_tmph; + rtx dst_high_part_l32_tmpl; + + rtx src_low_part, src_high_part; + rtx dst_high_part, dst_low_part; + + shiftamount = force_reg (SImode, shiftamount); + + emit_insn (gen_andsi3 (shiftamount, + shiftamount, + gen_int_mode (0x3f, SImode))); + + dst_high_part = nds32_di_high_part_subreg (dst); + dst_low_part = nds32_di_low_part_subreg (dst); + + src_high_part = nds32_di_high_part_subreg (src); + src_low_part = nds32_di_low_part_subreg (src); + + dst_low_part_l32 = gen_reg_rtx (SImode); + dst_high_part_l32 = gen_reg_rtx (SImode); + dst_low_part_g32 = gen_reg_rtx (SImode); + dst_high_part_g32 = gen_reg_rtx (SImode); + low5bit = gen_reg_rtx (SImode); + low5bit_inv = gen_reg_rtx (SImode); + minus32sa = gen_reg_rtx (SImode); + select_reg = gen_reg_rtx (SImode); + + dst_low_part_g32_tmph = gen_reg_rtx (SImode); + dst_low_part_g32_tmpl = gen_reg_rtx (SImode); + + dst_high_part_l32_tmph = gen_reg_rtx (SImode); + dst_high_part_l32_tmpl = gen_reg_rtx (SImode); + + emit_insn (gen_slt_compare (select_reg, shiftamount, GEN_INT (32))); + + /* if shiftamount < 32 + dst_low_part = wext(src, shiftamount) + else + dst_low_part = ((src_high_part >> (shiftamount & 0x1f)) + | (src_low_part << (32 - (shiftamount & 0x1f)))) + */ + emit_insn (gen_andsi3 (low5bit, shiftamount, gen_int_mode (0x1f, SImode))); + emit_insn (gen_subsi3 (low5bit_inv, gen_int_mode (32, SImode), low5bit)); + + emit_insn (gen_wext (dst_low_part_l32, src, shiftamount)); + + emit_insn (gen_lshrsi3 (dst_low_part_g32_tmpl, src_high_part, low5bit)); + emit_insn (gen_ashlsi3 (dst_low_part_g32_tmph, src_low_part, low5bit_inv)); + + emit_insn (gen_iorsi3 (dst_low_part_g32, + dst_low_part_g32_tmpl, + dst_low_part_g32_tmph)); + + emit_insn (gen_cmovnsi (dst_low_part, select_reg, + dst_low_part_l32, dst_low_part_g32)); + + /* if shiftamount < 32 + dst_high_part = ((src_high_part >> shiftamount) + | (src_low_part << (32 - shiftamount))) + dst_high_part = shiftamount == 0 ? src_high_part : dst_high_part + else + dst_high_part = wext(src, shiftamount & 0x1f) + */ + + emit_insn (gen_subsi3 (minus32sa, gen_int_mode (32, SImode), shiftamount)); + + emit_insn (gen_lshrsi3 (dst_high_part_l32_tmpl, src_high_part, shiftamount)); + emit_insn (gen_ashlsi3 (dst_high_part_l32_tmph, src_low_part, minus32sa)); + + emit_insn (gen_iorsi3 (dst_high_part_l32, + dst_high_part_l32_tmpl, + dst_high_part_l32_tmph)); + + emit_insn (gen_cmovzsi (dst_high_part_l32, shiftamount, + src_high_part, dst_high_part_l32)); + + emit_insn (gen_wext (dst_high_part_g32, src, low5bit)); + + emit_insn (gen_cmovnsi (dst_high_part, select_reg, + dst_high_part_l32, dst_high_part_g32)); +} /* Return true X is need use long call. */ bool nds32_long_call_p (rtx symbol) @@ -2976,3 +3452,22 @@ nds32_expand_constant (machine_mode mode, HOST_WIDE_INT val, emit_move_insn (target, gen_rtx_fmt_ee (AND, mode, source, temp)); } } + +/* Auxiliary functions for manipulation DI mode. */ +rtx nds32_di_high_part_subreg(rtx reg) +{ + unsigned high_part_offset = subreg_highpart_offset (SImode, DImode); + + return simplify_gen_subreg ( + SImode, reg, + DImode, high_part_offset); +} + +rtx nds32_di_low_part_subreg(rtx reg) +{ + unsigned low_part_offset = subreg_lowpart_offset (SImode, DImode); + + return simplify_gen_subreg ( + SImode, reg, + DImode, low_part_offset); +} diff --git a/gcc/config/nds32/nds32-memory-manipulation.c b/gcc/config/nds32/nds32-memory-manipulation.c index 8dea130..d02aabf 100644 --- a/gcc/config/nds32/nds32-memory-manipulation.c +++ b/gcc/config/nds32/nds32-memory-manipulation.c @@ -449,29 +449,44 @@ nds32_gen_dup_4_byte_to_word_value (rtx value) } else { - /* ! prepare word - andi $tmp1, $value, 0xff ! $tmp1 <- 0x000000ab - slli $tmp2, $tmp1, 8 ! $tmp2 <- 0x0000ab00 - or $tmp3, $tmp1, $tmp2 ! $tmp3 <- 0x0000abab - slli $tmp4, $tmp3, 16 ! $tmp4 <- 0xabab0000 - or $val4word, $tmp3, $tmp4 ! $value4word <- 0xabababab */ - - rtx tmp1, tmp2, tmp3, tmp4, final_value; - tmp1 = expand_binop (SImode, and_optab, value, - gen_int_mode (0xff, SImode), - NULL_RTX, 0, OPTAB_WIDEN); - tmp2 = expand_binop (SImode, ashl_optab, tmp1, - gen_int_mode (8, SImode), - NULL_RTX, 0, OPTAB_WIDEN); - tmp3 = expand_binop (SImode, ior_optab, tmp1, tmp2, - NULL_RTX, 0, OPTAB_WIDEN); - tmp4 = expand_binop (SImode, ashl_optab, tmp3, - gen_int_mode (16, SImode), - NULL_RTX, 0, OPTAB_WIDEN); - - final_value = expand_binop (SImode, ior_optab, tmp3, tmp4, - NULL_RTX, 0, OPTAB_WIDEN); - emit_move_insn (value4word, final_value); + if (NDS32_EXT_DSP_P ()) + { + /* ! prepare word + insb $tmp, $value, 1 ! $tmp <- 0x0000abab + pkbb16 $tmp6, $tmp2, $tmp2 ! $value4word <- 0xabababab */ + rtx tmp = gen_reg_rtx (SImode); + + convert_move (tmp, value, true); + + emit_insn ( + gen_insvsi_internal (tmp, gen_int_mode (0x8, SImode), tmp)); + + emit_insn (gen_pkbbsi_1 (value4word, tmp, tmp)); + } + else + { + /* ! prepare word + andi $tmp1, $value, 0xff ! $tmp1 <- 0x000000ab + slli $tmp2, $tmp1, 8 ! $tmp2 <- 0x0000ab00 + or $tmp3, $tmp1, $tmp2 ! $tmp3 <- 0x0000abab + slli $tmp4, $tmp3, 16 ! $tmp4 <- 0xabab0000 + or $val4word, $tmp3, $tmp4 ! $value4word <- 0xabababab */ + + rtx tmp1, tmp2, tmp3, tmp4; + tmp1 = expand_binop (SImode, and_optab, value, + gen_int_mode (0xff, SImode), + NULL_RTX, 0, OPTAB_WIDEN); + tmp2 = expand_binop (SImode, ashl_optab, tmp1, + gen_int_mode (8, SImode), + NULL_RTX, 0, OPTAB_WIDEN); + tmp3 = expand_binop (SImode, ior_optab, tmp1, tmp2, + NULL_RTX, 0, OPTAB_WIDEN); + tmp4 = expand_binop (SImode, ashl_optab, tmp3, + gen_int_mode (16, SImode), + NULL_RTX, 0, OPTAB_WIDEN); + + emit_insn (gen_iorsi3 (value4word, tmp3, tmp4)); + } } return value4word; diff --git a/gcc/config/nds32/nds32-predicates.c b/gcc/config/nds32/nds32-predicates.c index a670623..2363b96 100644 --- a/gcc/config/nds32/nds32-predicates.c +++ b/gcc/config/nds32/nds32-predicates.c @@ -518,4 +518,81 @@ nds32_const_double_range_ok_p (rtx op, machine_mode mode, return val >= lower && val < upper; } + +HOST_WIDE_INT +const_vector_to_hwint (rtx op) +{ + HOST_WIDE_INT hwint = 0; + HOST_WIDE_INT mask; + int i; + int shift_adv; + int shift = 0; + int nelem; + + switch (GET_MODE (op)) + { + case V2HImode: + mask = 0xffff; + shift_adv = 16; + nelem = 2; + break; + case V4QImode: + mask = 0xff; + shift_adv = 8; + nelem = 4; + break; + default: + gcc_unreachable (); + } + + if (TARGET_BIG_ENDIAN) + { + for (i = 0; i < nelem; ++i) + { + HOST_WIDE_INT val = XINT (XVECEXP (op, 0, nelem - i - 1), 0); + hwint |= (val & mask) << shift; + shift = shift + shift_adv; + } + } + else + { + for (i = 0; i < nelem; ++i) + { + HOST_WIDE_INT val = XINT (XVECEXP (op, 0, i), 0); + hwint |= (val & mask) << shift; + shift = shift + shift_adv; + } + } + + return hwint; +} + +bool +nds32_valid_CVp5_p (rtx op) +{ + HOST_WIDE_INT ival = const_vector_to_hwint (op); + return (ival < ((1 << 5) + 16)) && (ival >= (0 + 16)); +} + +bool +nds32_valid_CVs5_p (rtx op) +{ + HOST_WIDE_INT ival = const_vector_to_hwint (op); + return (ival < (1 << 4)) && (ival >= -(1 << 4)); +} + +bool +nds32_valid_CVs2_p (rtx op) +{ + HOST_WIDE_INT ival = const_vector_to_hwint (op); + return (ival < (1 << 19)) && (ival >= -(1 << 19)); +} + +bool +nds32_valid_CVhi_p (rtx op) +{ + HOST_WIDE_INT ival = const_vector_to_hwint (op); + return (ival != 0) && ((ival & 0xfff) == 0); +} + /* ------------------------------------------------------------------------ */ diff --git a/gcc/config/nds32/nds32-protos.h b/gcc/config/nds32/nds32-protos.h index e7b7d41..64500e3 100644 --- a/gcc/config/nds32/nds32-protos.h +++ b/gcc/config/nds32/nds32-protos.h @@ -73,6 +73,11 @@ extern unsigned int nds32_dbx_register_number (unsigned int); extern bool nds32_valid_smw_lwm_base_p (rtx); +/* Auxiliary functions for manipulation DI mode. */ + +extern rtx nds32_di_high_part_subreg(rtx); +extern rtx nds32_di_low_part_subreg(rtx); + /* Auxiliary functions for expanding rtl used in nds32-multiple.md. */ extern rtx nds32_expand_load_multiple (int, int, rtx, rtx, bool, rtx *); @@ -159,6 +164,10 @@ extern void nds32_expand_float_cstore (rtx *); extern enum nds32_expand_result_type nds32_expand_movcc (rtx *); extern void nds32_expand_float_movcc (rtx *); +/* Auxiliary functions for expand extv/insv instruction. */ + +extern enum nds32_expand_result_type nds32_expand_extv (rtx *); +extern enum nds32_expand_result_type nds32_expand_insv (rtx *); /* Auxiliary functions to identify long-call symbol. */ extern bool nds32_long_call_p (rtx); @@ -193,6 +202,8 @@ extern const char *nds32_output_cbranchsi4_equality_reg_or_const_int (rtx_insn * rtx *); extern const char *nds32_output_cbranchsi4_greater_less_zero (rtx_insn *, rtx *); +extern const char *nds32_output_unpkd8 (rtx, rtx, rtx, rtx, bool); + extern const char *nds32_output_call (rtx, rtx *, rtx, const char *, const char *, bool); @@ -203,9 +214,19 @@ extern const char *nds32_output_stack_push (rtx); extern const char *nds32_output_stack_pop (rtx); extern const char *nds32_output_return (void); + +/* Auxiliary functions to split/output sms pattern. */ +extern bool nds32_need_split_sms_p (rtx, rtx, rtx, rtx); +extern const char *nds32_output_sms (rtx, rtx, rtx, rtx); +extern void nds32_split_sms (rtx, rtx, rtx, rtx, rtx, rtx, rtx); + /* Auxiliary functions to split double word RTX pattern. */ extern void nds32_spilt_doubleword (rtx *, bool); +extern void nds32_split_ashiftdi3 (rtx, rtx, rtx); +extern void nds32_split_ashiftrtdi3 (rtx, rtx, rtx); +extern void nds32_split_lshiftrtdi3 (rtx, rtx, rtx); +extern void nds32_split_rotatertdi3 (rtx, rtx, rtx); /* Auxiliary functions to split large constant RTX pattern. */ @@ -246,6 +267,14 @@ extern int nds32_address_cost_impl (rtx, machine_mode, addr_space_t, bool); /* Auxiliary functions for pre-define marco. */ extern void nds32_cpu_cpp_builtins(struct cpp_reader *); +/* Auxiliary functions for const_vector's constraints. */ + +extern HOST_WIDE_INT const_vector_to_hwint (rtx); +extern bool nds32_valid_CVp5_p (rtx); +extern bool nds32_valid_CVs5_p (rtx); +extern bool nds32_valid_CVs2_p (rtx); +extern bool nds32_valid_CVhi_p (rtx); + extern bool nds32_split_double_word_load_store_p (rtx *,bool); namespace nds32 { diff --git a/gcc/config/nds32/nds32-utils.c b/gcc/config/nds32/nds32-utils.c index b0151be..203cd11 100644 --- a/gcc/config/nds32/nds32-utils.c +++ b/gcc/config/nds32/nds32-utils.c @@ -413,6 +413,7 @@ extract_mac_non_acc_rtx (rtx_insn *insn) switch (get_attr_type (insn)) { case TYPE_MAC: + case TYPE_DMAC: if (REG_P (XEXP (exp, 0))) return XEXP (exp, 1); else diff --git a/gcc/config/nds32/nds32.c b/gcc/config/nds32/nds32.c index da8af4c..50eb709 100644 --- a/gcc/config/nds32/nds32.c +++ b/gcc/config/nds32/nds32.c @@ -1987,6 +1987,16 @@ nds32_function_arg_boundary (machine_mode mode, const_tree type) : PARM_BOUNDARY); } +bool +nds32_vector_mode_supported_p (machine_mode mode) +{ + if (mode == V4QImode + || mode == V2HImode) + return NDS32_EXT_DSP_P (); + + return false; +} + /* -- How Scalar Function Values Are Returned. */ static rtx @@ -2688,6 +2698,23 @@ nds32_legitimate_address_p (machine_mode mode, rtx x, bool strict) } } +static machine_mode +nds32_vectorize_preferred_simd_mode (scalar_mode mode) +{ + if (!NDS32_EXT_DSP_P ()) + return word_mode; + + switch (mode) + { + case E_QImode: + return V4QImode; + case E_HImode: + return V2HImode; + default: + return word_mode; + } +} + /* Condition Code Status. */ @@ -2978,6 +3005,18 @@ nds32_print_operand (FILE *stream, rtx x, int code) /* No need to handle following process, so return immediately. */ return; + + case 'v': + gcc_assert (CONST_INT_P (x) + && (INTVAL (x) == 0 + || INTVAL (x) == 8 + || INTVAL (x) == 16 + || INTVAL (x) == 24)); + fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) / 8); + + /* No need to handle following process, so return immediately. */ + return; + case 'B': /* Use exact_log2() to search the 1-bit position. */ gcc_assert (CONST_INT_P (x)); @@ -3168,6 +3207,10 @@ nds32_print_operand (FILE *stream, rtx x, int code) output_addr_const (stream, x); break; + case CONST_VECTOR: + fprintf (stream, HOST_WIDE_INT_PRINT_HEX, const_vector_to_hwint (x)); + break; + default: /* Generally, output_addr_const () is able to handle most cases. We want to see what CODE could appear, @@ -3260,6 +3303,20 @@ nds32_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x) reg_names[REGNO (XEXP (op0, 0))], sv); } + else if (GET_CODE (op0) == ASHIFT && REG_P (op1)) + { + /* [Ra + Rb << sv] + In normal, ASHIFT can be converted to MULT like above case. + But when the address rtx does not go through canonicalize_address + defined in fwprop, we'll need this case. */ + int sv = INTVAL (XEXP (op0, 1)); + gcc_assert (sv <= 3 && sv >=0); + + fprintf (stream, "[%s + %s << %d]", + reg_names[REGNO (op1)], + reg_names[REGNO (XEXP (op0, 0))], + sv); + } else { /* The control flow is not supposed to be here. */ @@ -3770,6 +3827,8 @@ nds32_cpu_cpp_builtins(struct cpp_reader *pfile) builtin_define ("__NDS32_GP_DIRECT__"); if (TARGET_VH) builtin_define ("__NDS32_VH__"); + if (NDS32_EXT_DSP_P ()) + builtin_define ("__NDS32_EXT_DSP__"); if (TARGET_BIG_ENDIAN) builtin_define ("__big_endian__"); @@ -5010,6 +5069,9 @@ nds32_use_blocks_for_constant_p (machine_mode mode, #undef TARGET_FUNCTION_ARG_BOUNDARY #define TARGET_FUNCTION_ARG_BOUNDARY nds32_function_arg_boundary +#undef TARGET_VECTOR_MODE_SUPPORTED_P +#define TARGET_VECTOR_MODE_SUPPORTED_P nds32_vector_mode_supported_p + /* -- How Scalar Function Values Are Returned. */ #undef TARGET_FUNCTION_VALUE @@ -5087,6 +5149,9 @@ nds32_use_blocks_for_constant_p (machine_mode mode, #undef TARGET_LEGITIMATE_ADDRESS_P #define TARGET_LEGITIMATE_ADDRESS_P nds32_legitimate_address_p +#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE +#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE nds32_vectorize_preferred_simd_mode + /* Anchored Addresses. */ diff --git a/gcc/config/nds32/nds32.h b/gcc/config/nds32/nds32.h index 56dd7cf..229012a 100644 --- a/gcc/config/nds32/nds32.h +++ b/gcc/config/nds32/nds32.h @@ -140,6 +140,9 @@ enum nds32_16bit_address_type Check gcc/common/config/nds32/nds32-common.c for the optimizations that apply -malways-align. */ #define NDS32_ALIGN_P() (TARGET_ALWAYS_ALIGN) + +#define NDS32_EXT_DSP_P() (TARGET_EXT_DSP && !TARGET_FORCE_NO_EXT_DSP) + /* Get alignment according to mode or type information. When 'type' is nonnull, there is no need to look at 'mode'. */ #define NDS32_MODE_TYPE_ALIGN(mode, type) \ @@ -439,7 +442,30 @@ enum nds32_builtins NDS32_BUILTIN_FFB, NDS32_BUILTIN_FFMISM, NDS32_BUILTIN_FLMISM, - + NDS32_BUILTIN_KADDW, + NDS32_BUILTIN_KSUBW, + NDS32_BUILTIN_KADDH, + NDS32_BUILTIN_KSUBH, + NDS32_BUILTIN_KDMBB, + NDS32_BUILTIN_V_KDMBB, + NDS32_BUILTIN_KDMBT, + NDS32_BUILTIN_V_KDMBT, + NDS32_BUILTIN_KDMTB, + NDS32_BUILTIN_V_KDMTB, + NDS32_BUILTIN_KDMTT, + NDS32_BUILTIN_V_KDMTT, + NDS32_BUILTIN_KHMBB, + NDS32_BUILTIN_V_KHMBB, + NDS32_BUILTIN_KHMBT, + NDS32_BUILTIN_V_KHMBT, + NDS32_BUILTIN_KHMTB, + NDS32_BUILTIN_V_KHMTB, + NDS32_BUILTIN_KHMTT, + NDS32_BUILTIN_V_KHMTT, + NDS32_BUILTIN_KSLRAW, + NDS32_BUILTIN_KSLRAW_U, + NDS32_BUILTIN_RDOV, + NDS32_BUILTIN_CLROV, NDS32_BUILTIN_ROTR, NDS32_BUILTIN_SVA, NDS32_BUILTIN_SVS, @@ -512,7 +538,295 @@ enum nds32_builtins NDS32_BUILTIN_SET_TRIG_LEVEL, NDS32_BUILTIN_SET_TRIG_EDGE, NDS32_BUILTIN_GET_TRIG_TYPE, - + NDS32_BUILTIN_DSP_BEGIN, + NDS32_BUILTIN_ADD16, + NDS32_BUILTIN_V_UADD16, + NDS32_BUILTIN_V_SADD16, + NDS32_BUILTIN_RADD16, + NDS32_BUILTIN_V_RADD16, + NDS32_BUILTIN_URADD16, + NDS32_BUILTIN_V_URADD16, + NDS32_BUILTIN_KADD16, + NDS32_BUILTIN_V_KADD16, + NDS32_BUILTIN_UKADD16, + NDS32_BUILTIN_V_UKADD16, + NDS32_BUILTIN_SUB16, + NDS32_BUILTIN_V_USUB16, + NDS32_BUILTIN_V_SSUB16, + NDS32_BUILTIN_RSUB16, + NDS32_BUILTIN_V_RSUB16, + NDS32_BUILTIN_URSUB16, + NDS32_BUILTIN_V_URSUB16, + NDS32_BUILTIN_KSUB16, + NDS32_BUILTIN_V_KSUB16, + NDS32_BUILTIN_UKSUB16, + NDS32_BUILTIN_V_UKSUB16, + NDS32_BUILTIN_CRAS16, + NDS32_BUILTIN_V_UCRAS16, + NDS32_BUILTIN_V_SCRAS16, + NDS32_BUILTIN_RCRAS16, + NDS32_BUILTIN_V_RCRAS16, + NDS32_BUILTIN_URCRAS16, + NDS32_BUILTIN_V_URCRAS16, + NDS32_BUILTIN_KCRAS16, + NDS32_BUILTIN_V_KCRAS16, + NDS32_BUILTIN_UKCRAS16, + NDS32_BUILTIN_V_UKCRAS16, + NDS32_BUILTIN_CRSA16, + NDS32_BUILTIN_V_UCRSA16, + NDS32_BUILTIN_V_SCRSA16, + NDS32_BUILTIN_RCRSA16, + NDS32_BUILTIN_V_RCRSA16, + NDS32_BUILTIN_URCRSA16, + NDS32_BUILTIN_V_URCRSA16, + NDS32_BUILTIN_KCRSA16, + NDS32_BUILTIN_V_KCRSA16, + NDS32_BUILTIN_UKCRSA16, + NDS32_BUILTIN_V_UKCRSA16, + NDS32_BUILTIN_ADD8, + NDS32_BUILTIN_V_UADD8, + NDS32_BUILTIN_V_SADD8, + NDS32_BUILTIN_RADD8, + NDS32_BUILTIN_V_RADD8, + NDS32_BUILTIN_URADD8, + NDS32_BUILTIN_V_URADD8, + NDS32_BUILTIN_KADD8, + NDS32_BUILTIN_V_KADD8, + NDS32_BUILTIN_UKADD8, + NDS32_BUILTIN_V_UKADD8, + NDS32_BUILTIN_SUB8, + NDS32_BUILTIN_V_USUB8, + NDS32_BUILTIN_V_SSUB8, + NDS32_BUILTIN_RSUB8, + NDS32_BUILTIN_V_RSUB8, + NDS32_BUILTIN_URSUB8, + NDS32_BUILTIN_V_URSUB8, + NDS32_BUILTIN_KSUB8, + NDS32_BUILTIN_V_KSUB8, + NDS32_BUILTIN_UKSUB8, + NDS32_BUILTIN_V_UKSUB8, + NDS32_BUILTIN_SRA16, + NDS32_BUILTIN_V_SRA16, + NDS32_BUILTIN_SRA16_U, + NDS32_BUILTIN_V_SRA16_U, + NDS32_BUILTIN_SRL16, + NDS32_BUILTIN_V_SRL16, + NDS32_BUILTIN_SRL16_U, + NDS32_BUILTIN_V_SRL16_U, + NDS32_BUILTIN_SLL16, + NDS32_BUILTIN_V_SLL16, + NDS32_BUILTIN_KSLL16, + NDS32_BUILTIN_V_KSLL16, + NDS32_BUILTIN_KSLRA16, + NDS32_BUILTIN_V_KSLRA16, + NDS32_BUILTIN_KSLRA16_U, + NDS32_BUILTIN_V_KSLRA16_U, + NDS32_BUILTIN_CMPEQ16, + NDS32_BUILTIN_V_SCMPEQ16, + NDS32_BUILTIN_V_UCMPEQ16, + NDS32_BUILTIN_SCMPLT16, + NDS32_BUILTIN_V_SCMPLT16, + NDS32_BUILTIN_SCMPLE16, + NDS32_BUILTIN_V_SCMPLE16, + NDS32_BUILTIN_UCMPLT16, + NDS32_BUILTIN_V_UCMPLT16, + NDS32_BUILTIN_UCMPLE16, + NDS32_BUILTIN_V_UCMPLE16, + NDS32_BUILTIN_CMPEQ8, + NDS32_BUILTIN_V_SCMPEQ8, + NDS32_BUILTIN_V_UCMPEQ8, + NDS32_BUILTIN_SCMPLT8, + NDS32_BUILTIN_V_SCMPLT8, + NDS32_BUILTIN_SCMPLE8, + NDS32_BUILTIN_V_SCMPLE8, + NDS32_BUILTIN_UCMPLT8, + NDS32_BUILTIN_V_UCMPLT8, + NDS32_BUILTIN_UCMPLE8, + NDS32_BUILTIN_V_UCMPLE8, + NDS32_BUILTIN_SMIN16, + NDS32_BUILTIN_V_SMIN16, + NDS32_BUILTIN_UMIN16, + NDS32_BUILTIN_V_UMIN16, + NDS32_BUILTIN_SMAX16, + NDS32_BUILTIN_V_SMAX16, + NDS32_BUILTIN_UMAX16, + NDS32_BUILTIN_V_UMAX16, + NDS32_BUILTIN_SCLIP16, + NDS32_BUILTIN_V_SCLIP16, + NDS32_BUILTIN_UCLIP16, + NDS32_BUILTIN_V_UCLIP16, + NDS32_BUILTIN_KHM16, + NDS32_BUILTIN_V_KHM16, + NDS32_BUILTIN_KHMX16, + NDS32_BUILTIN_V_KHMX16, + NDS32_BUILTIN_KABS16, + NDS32_BUILTIN_V_KABS16, + NDS32_BUILTIN_SMIN8, + NDS32_BUILTIN_V_SMIN8, + NDS32_BUILTIN_UMIN8, + NDS32_BUILTIN_V_UMIN8, + NDS32_BUILTIN_SMAX8, + NDS32_BUILTIN_V_SMAX8, + NDS32_BUILTIN_UMAX8, + NDS32_BUILTIN_V_UMAX8, + NDS32_BUILTIN_KABS8, + NDS32_BUILTIN_V_KABS8, + NDS32_BUILTIN_SUNPKD810, + NDS32_BUILTIN_V_SUNPKD810, + NDS32_BUILTIN_SUNPKD820, + NDS32_BUILTIN_V_SUNPKD820, + NDS32_BUILTIN_SUNPKD830, + NDS32_BUILTIN_V_SUNPKD830, + NDS32_BUILTIN_SUNPKD831, + NDS32_BUILTIN_V_SUNPKD831, + NDS32_BUILTIN_ZUNPKD810, + NDS32_BUILTIN_V_ZUNPKD810, + NDS32_BUILTIN_ZUNPKD820, + NDS32_BUILTIN_V_ZUNPKD820, + NDS32_BUILTIN_ZUNPKD830, + NDS32_BUILTIN_V_ZUNPKD830, + NDS32_BUILTIN_ZUNPKD831, + NDS32_BUILTIN_V_ZUNPKD831, + NDS32_BUILTIN_RADDW, + NDS32_BUILTIN_URADDW, + NDS32_BUILTIN_RSUBW, + NDS32_BUILTIN_URSUBW, + NDS32_BUILTIN_SRA_U, + NDS32_BUILTIN_KSLL, + NDS32_BUILTIN_PKBB16, + NDS32_BUILTIN_V_PKBB16, + NDS32_BUILTIN_PKBT16, + NDS32_BUILTIN_V_PKBT16, + NDS32_BUILTIN_PKTB16, + NDS32_BUILTIN_V_PKTB16, + NDS32_BUILTIN_PKTT16, + NDS32_BUILTIN_V_PKTT16, + NDS32_BUILTIN_SMMUL, + NDS32_BUILTIN_SMMUL_U, + NDS32_BUILTIN_KMMAC, + NDS32_BUILTIN_KMMAC_U, + NDS32_BUILTIN_KMMSB, + NDS32_BUILTIN_KMMSB_U, + NDS32_BUILTIN_KWMMUL, + NDS32_BUILTIN_KWMMUL_U, + NDS32_BUILTIN_SMMWB, + NDS32_BUILTIN_V_SMMWB, + NDS32_BUILTIN_SMMWB_U, + NDS32_BUILTIN_V_SMMWB_U, + NDS32_BUILTIN_SMMWT, + NDS32_BUILTIN_V_SMMWT, + NDS32_BUILTIN_SMMWT_U, + NDS32_BUILTIN_V_SMMWT_U, + NDS32_BUILTIN_KMMAWB, + NDS32_BUILTIN_V_KMMAWB, + NDS32_BUILTIN_KMMAWB_U, + NDS32_BUILTIN_V_KMMAWB_U, + NDS32_BUILTIN_KMMAWT, + NDS32_BUILTIN_V_KMMAWT, + NDS32_BUILTIN_KMMAWT_U, + NDS32_BUILTIN_V_KMMAWT_U, + NDS32_BUILTIN_SMBB, + NDS32_BUILTIN_V_SMBB, + NDS32_BUILTIN_SMBT, + NDS32_BUILTIN_V_SMBT, + NDS32_BUILTIN_SMTT, + NDS32_BUILTIN_V_SMTT, + NDS32_BUILTIN_KMDA, + NDS32_BUILTIN_V_KMDA, + NDS32_BUILTIN_KMXDA, + NDS32_BUILTIN_V_KMXDA, + NDS32_BUILTIN_SMDS, + NDS32_BUILTIN_V_SMDS, + NDS32_BUILTIN_SMDRS, + NDS32_BUILTIN_V_SMDRS, + NDS32_BUILTIN_SMXDS, + NDS32_BUILTIN_V_SMXDS, + NDS32_BUILTIN_KMABB, + NDS32_BUILTIN_V_KMABB, + NDS32_BUILTIN_KMABT, + NDS32_BUILTIN_V_KMABT, + NDS32_BUILTIN_KMATT, + NDS32_BUILTIN_V_KMATT, + NDS32_BUILTIN_KMADA, + NDS32_BUILTIN_V_KMADA, + NDS32_BUILTIN_KMAXDA, + NDS32_BUILTIN_V_KMAXDA, + NDS32_BUILTIN_KMADS, + NDS32_BUILTIN_V_KMADS, + NDS32_BUILTIN_KMADRS, + NDS32_BUILTIN_V_KMADRS, + NDS32_BUILTIN_KMAXDS, + NDS32_BUILTIN_V_KMAXDS, + NDS32_BUILTIN_KMSDA, + NDS32_BUILTIN_V_KMSDA, + NDS32_BUILTIN_KMSXDA, + NDS32_BUILTIN_V_KMSXDA, + NDS32_BUILTIN_SMAL, + NDS32_BUILTIN_V_SMAL, + NDS32_BUILTIN_BITREV, + NDS32_BUILTIN_WEXT, + NDS32_BUILTIN_BPICK, + NDS32_BUILTIN_INSB, + NDS32_BUILTIN_SADD64, + NDS32_BUILTIN_UADD64, + NDS32_BUILTIN_RADD64, + NDS32_BUILTIN_URADD64, + NDS32_BUILTIN_KADD64, + NDS32_BUILTIN_UKADD64, + NDS32_BUILTIN_SSUB64, + NDS32_BUILTIN_USUB64, + NDS32_BUILTIN_RSUB64, + NDS32_BUILTIN_URSUB64, + NDS32_BUILTIN_KSUB64, + NDS32_BUILTIN_UKSUB64, + NDS32_BUILTIN_SMAR64, + NDS32_BUILTIN_SMSR64, + NDS32_BUILTIN_UMAR64, + NDS32_BUILTIN_UMSR64, + NDS32_BUILTIN_KMAR64, + NDS32_BUILTIN_KMSR64, + NDS32_BUILTIN_UKMAR64, + NDS32_BUILTIN_UKMSR64, + NDS32_BUILTIN_SMALBB, + NDS32_BUILTIN_V_SMALBB, + NDS32_BUILTIN_SMALBT, + NDS32_BUILTIN_V_SMALBT, + NDS32_BUILTIN_SMALTT, + NDS32_BUILTIN_V_SMALTT, + NDS32_BUILTIN_SMALDA, + NDS32_BUILTIN_V_SMALDA, + NDS32_BUILTIN_SMALXDA, + NDS32_BUILTIN_V_SMALXDA, + NDS32_BUILTIN_SMALDS, + NDS32_BUILTIN_V_SMALDS, + NDS32_BUILTIN_SMALDRS, + NDS32_BUILTIN_V_SMALDRS, + NDS32_BUILTIN_SMALXDS, + NDS32_BUILTIN_V_SMALXDS, + NDS32_BUILTIN_SMUL16, + NDS32_BUILTIN_V_SMUL16, + NDS32_BUILTIN_SMULX16, + NDS32_BUILTIN_V_SMULX16, + NDS32_BUILTIN_UMUL16, + NDS32_BUILTIN_V_UMUL16, + NDS32_BUILTIN_UMULX16, + NDS32_BUILTIN_V_UMULX16, + NDS32_BUILTIN_SMSLDA, + NDS32_BUILTIN_V_SMSLDA, + NDS32_BUILTIN_SMSLXDA, + NDS32_BUILTIN_V_SMSLXDA, + NDS32_BUILTIN_UCLIP32, + NDS32_BUILTIN_SCLIP32, + NDS32_BUILTIN_KABS, + NDS32_BUILTIN_UALOAD_U16, + NDS32_BUILTIN_UALOAD_S16, + NDS32_BUILTIN_UALOAD_U8, + NDS32_BUILTIN_UALOAD_S8, + NDS32_BUILTIN_UASTORE_U16, + NDS32_BUILTIN_UASTORE_S16, + NDS32_BUILTIN_UASTORE_U8, + NDS32_BUILTIN_UASTORE_S8, + NDS32_BUILTIN_DSP_END, NDS32_BUILTIN_UNALIGNED_FEATURE, NDS32_BUILTIN_ENABLE_UNALIGNED, NDS32_BUILTIN_DISABLE_UNALIGNED, @@ -576,6 +890,13 @@ enum nds32_builtins #endif #define TARGET_CONFIG_FPU_DEFAULT NDS32_CONFIG_FPU_2 + +#ifdef TARGET_DEFAULT_EXT_DSP +# define NDS32_EXT_DSP_SPEC " %{!mno-ext-dsp:-mext-dsp}" +#else +# define NDS32_EXT_DSP_SPEC "" +#endif + /* ------------------------------------------------------------------------ */ /* Controlling the Compilation Driver. */ @@ -591,7 +912,7 @@ enum nds32_builtins {"float", "%{!mfloat-abi=*:-mfloat-abi=%(VALUE)}" } #define CC1_SPEC \ - "" + NDS32_EXT_DSP_SPEC #define ASM_SPEC \ " %{mbig-endian:-EB} %{mlittle-endian:-EL}" \ @@ -603,7 +924,8 @@ enum nds32_builtins " %{mext-fpu-sp:-mfpu-sp-ext}" \ " %{mno-ext-fpu-sp:-mno-fpu-sp-ext}" \ " %{mext-fpu-dp:-mfpu-dp-ext}" \ - " %{mno-ext-fpu-sp:-mno-fpu-dp-ext}" + " %{mno-ext-fpu-sp:-mno-fpu-dp-ext}" \ + " %{mext-dsp:-mdsp-ext}" /* If user issues -mrelax, we need to pass '--relax' to linker. */ #define LINK_SPEC \ diff --git a/gcc/config/nds32/nds32.md b/gcc/config/nds32/nds32.md index 3b8107e..7249520 100644 --- a/gcc/config/nds32/nds32.md +++ b/gcc/config/nds32/nds32.md @@ -68,12 +68,13 @@ ;; Insn type, it is used to default other attribute values. (define_attr "type" "unknown,load,store,load_multiple,store_multiple,alu,alu_shift,pbsad,pbsada,mul,mac,div,branch,mmu,misc,\ - falu,fmuls,fmuld,fmacs,fmacd,fdivs,fdivd,fsqrts,fsqrtd,fcmp,fabs,fcpy,fcmov,fmfsr,fmfdr,fmtsr,fmtdr,fload,fstore" + falu,fmuls,fmuld,fmacs,fmacd,fdivs,fdivd,fsqrts,fsqrtd,fcmp,fabs,fcpy,fcmov,fmfsr,fmfdr,fmtsr,fmtdr,fload,fstore,\ + dalu,dalu64,daluround,dcmp,dclip,dmul,dmac,dinsb,dpack,dbpick,dwext" (const_string "unknown")) ;; Insn sub-type (define_attr "subtype" - "simple,shift" + "simple,shift,saturation" (const_string "simple")) ;; Length, in bytes, default is 4-bytes. @@ -133,6 +134,7 @@ ;; ---------------------------------------------------------------------------- +(include "nds32-dspext.md") ;; Move instructions. @@ -351,13 +353,58 @@ ;; ---------------------------------------------------------------------------- +(define_expand "extv" + [(set (match_operand 0 "register_operand" "") + (sign_extract (match_operand 1 "nonimmediate_operand" "") + (match_operand 2 "const_int_operand" "") + (match_operand 3 "const_int_operand" "")))] + "" +{ + enum nds32_expand_result_type result = nds32_expand_extv (operands); + switch (result) + { + case EXPAND_DONE: + DONE; + break; + case EXPAND_FAIL: + FAIL; + break; + case EXPAND_CREATE_TEMPLATE: + break; + default: + gcc_unreachable (); + } +}) + +(define_expand "insv" + [(set (zero_extract (match_operand 0 "nonimmediate_operand" "") + (match_operand 1 "const_int_operand" "") + (match_operand 2 "const_int_operand" "")) + (match_operand 3 "register_operand" ""))] + "" +{ + enum nds32_expand_result_type result = nds32_expand_insv (operands); + switch (result) + { + case EXPAND_DONE: + DONE; + break; + case EXPAND_FAIL: + FAIL; + break; + case EXPAND_CREATE_TEMPLATE: + break; + default: + gcc_unreachable (); + } +}) ;; Arithmetic instructions. (define_insn "addsi3" [(set (match_operand:SI 0 "register_operand" "= d, l, d, l, d, l, k, l, r, r") (plus:SI (match_operand:SI 1 "register_operand" "% 0, l, 0, l, 0, l, 0, k, r, r") - (match_operand:SI 2 "nds32_rimm15s_operand" " In05,In03,Iu05,Iu03, r, l,Is10,Iu06, Is15, r")))] + (match_operand:SI 2 "nds32_rimm15s_operand" " In05,In03,Iu05,Iu03, r, l,Is10,IU06, Is15, r")))] "" { switch (which_alternative) diff --git a/gcc/config/nds32/nds32.opt b/gcc/config/nds32/nds32.opt index dcf6d39..2c72a01 100644 --- a/gcc/config/nds32/nds32.opt +++ b/gcc/config/nds32/nds32.opt @@ -90,6 +90,10 @@ mcmov Target Report Mask(CMOV) Generate conditional move instructions. +mhw-abs +Target Report Mask(HW_ABS) +Generate hardware abs instructions. + mext-perf Target Report Mask(EXT_PERF) Generate performance extension instructions. @@ -102,6 +106,10 @@ mext-string Target Report Mask(EXT_STRING) Generate string extension instructions. +mext-dsp +Target Report Mask(EXT_DSP) +Generate DSP extension instructions. + mv3push Target Report Mask(V3PUSH) Generate v3 push25/pop25 instructions. @@ -321,6 +329,10 @@ mext-fpu-dp Target Report Mask(FPU_DOUBLE) Generate double-precision floating-point instructions. +mforce-no-ext-dsp +Target Undocumented Report Mask(FORCE_NO_EXT_DSP) +Force disable hardware loop, even use -mext-dsp. + malways-save-lp Target Var(flag_always_save_lp) Init(0) Always save $lp in the stack. diff --git a/gcc/config/nds32/nds32_intrinsic.h b/gcc/config/nds32/nds32_intrinsic.h index 7bb1177..24cb291 100644 --- a/gcc/config/nds32/nds32_intrinsic.h +++ b/gcc/config/nds32/nds32_intrinsic.h @@ -26,6 +26,13 @@ #ifndef _NDS32_INTRINSIC_H #define _NDS32_INTRINSIC_H +typedef signed char int8x4_t __attribute ((vector_size(4))); +typedef short int16x2_t __attribute ((vector_size(4))); +typedef int int32x2_t __attribute__((vector_size(8))); +typedef unsigned char uint8x4_t __attribute__ ((vector_size (4))); +typedef unsigned short uint16x2_t __attribute__ ((vector_size (4))); +typedef unsigned int uint32x2_t __attribute__((vector_size(8))); + /* General instrinsic register names. */ enum nds32_intrinsic_registers { @@ -691,6 +698,55 @@ enum nds32_dpref #define __nds32__tlbop_flua() \ (__builtin_nds32_tlbop_flua()) +#define __nds32__kaddw(a, b) \ + (__builtin_nds32_kaddw ((a), (b))) +#define __nds32__kaddh(a, b) \ + (__builtin_nds32_kaddh ((a), (b))) +#define __nds32__ksubw(a, b) \ + (__builtin_nds32_ksubw ((a), (b))) +#define __nds32__ksubh(a, b) \ + (__builtin_nds32_ksubh ((a), (b))) +#define __nds32__kdmbb(a, b) \ + (__builtin_nds32_kdmbb ((a), (b))) +#define __nds32__v_kdmbb(a, b) \ + (__builtin_nds32_v_kdmbb ((a), (b))) +#define __nds32__kdmbt(a, b) \ + (__builtin_nds32_kdmbt ((a), (b))) +#define __nds32__v_kdmbt(a, b) \ + (__builtin_nds32_v_kdmbt ((a), (b))) +#define __nds32__kdmtb(a, b) \ + (__builtin_nds32_kdmtb ((a), (b))) +#define __nds32__v_kdmtb(a, b) \ + (__builtin_nds32_v_kdmtb ((a), (b))) +#define __nds32__kdmtt(a, b) \ + (__builtin_nds32_kdmtt ((a), (b))) +#define __nds32__v_kdmtt(a, b) \ + (__builtin_nds32_v_kdmtt ((a), (b))) +#define __nds32__khmbb(a, b) \ + (__builtin_nds32_khmbb ((a), (b))) +#define __nds32__v_khmbb(a, b) \ + (__builtin_nds32_v_khmbb ((a), (b))) +#define __nds32__khmbt(a, b) \ + (__builtin_nds32_khmbt ((a), (b))) +#define __nds32__v_khmbt(a, b) \ + (__builtin_nds32_v_khmbt ((a), (b))) +#define __nds32__khmtb(a, b) \ + (__builtin_nds32_khmtb ((a), (b))) +#define __nds32__v_khmtb(a, b) \ + (__builtin_nds32_v_khmtb ((a), (b))) +#define __nds32__khmtt(a, b) \ + (__builtin_nds32_khmtt ((a), (b))) +#define __nds32__v_khmtt(a, b) \ + (__builtin_nds32_v_khmtt ((a), (b))) +#define __nds32__kslraw(a, b) \ + (__builtin_nds32_kslraw ((a), (b))) +#define __nds32__kslraw_u(a, b) \ + (__builtin_nds32_kslraw_u ((a), (b))) + +#define __nds32__rdov() \ + (__builtin_nds32_rdov()) +#define __nds32__clrov() \ + (__builtin_nds32_clrov()) #define __nds32__gie_dis() \ (__builtin_nds32_gie_dis()) #define __nds32__gie_en() \ @@ -720,10 +776,622 @@ enum nds32_dpref #define __nds32__get_trig_type(a) \ (__builtin_nds32_get_trig_type ((a))) +#define __nds32__get_unaligned_hw(a) \ + (__builtin_nds32_unaligned_load_hw ((a))) +#define __nds32__get_unaligned_w(a) \ + (__builtin_nds32_unaligned_load_w ((a))) +#define __nds32__get_unaligned_dw(a) \ + (__builtin_nds32_unaligned_load_dw ((a))) +#define __nds32__put_unaligned_hw(a, data) \ + (__builtin_nds32_unaligned_store_hw ((a), (data))) +#define __nds32__put_unaligned_w(a, data) \ + (__builtin_nds32_unaligned_store_w ((a), (data))) +#define __nds32__put_unaligned_dw(a, data) \ + (__builtin_nds32_unaligned_store_dw ((a), (data))) + +#define __nds32__add16(a, b) \ + (__builtin_nds32_add16 ((a), (b))) +#define __nds32__v_uadd16(a, b) \ + (__builtin_nds32_v_uadd16 ((a), (b))) +#define __nds32__v_sadd16(a, b) \ + (__builtin_nds32_v_sadd16 ((a), (b))) +#define __nds32__radd16(a, b) \ + (__builtin_nds32_radd16 ((a), (b))) +#define __nds32__v_radd16(a, b) \ + (__builtin_nds32_v_radd16 ((a), (b))) +#define __nds32__uradd16(a, b) \ + (__builtin_nds32_uradd16 ((a), (b))) +#define __nds32__v_uradd16(a, b) \ + (__builtin_nds32_v_uradd16 ((a), (b))) +#define __nds32__kadd16(a, b) \ + (__builtin_nds32_kadd16 ((a), (b))) +#define __nds32__v_kadd16(a, b) \ + (__builtin_nds32_v_kadd16 ((a), (b))) +#define __nds32__ukadd16(a, b) \ + (__builtin_nds32_ukadd16 ((a), (b))) +#define __nds32__v_ukadd16(a, b) \ + (__builtin_nds32_v_ukadd16 ((a), (b))) +#define __nds32__sub16(a, b) \ + (__builtin_nds32_sub16 ((a), (b))) +#define __nds32__v_usub16(a, b) \ + (__builtin_nds32_v_usub16 ((a), (b))) +#define __nds32__v_ssub16(a, b) \ + (__builtin_nds32_v_ssub16 ((a), (b))) +#define __nds32__rsub16(a, b) \ + (__builtin_nds32_rsub16 ((a), (b))) +#define __nds32__v_rsub16(a, b) \ + (__builtin_nds32_v_rsub16 ((a), (b))) +#define __nds32__ursub16(a, b) \ + (__builtin_nds32_ursub16 ((a), (b))) +#define __nds32__v_ursub16(a, b) \ + (__builtin_nds32_v_ursub16 ((a), (b))) +#define __nds32__ksub16(a, b) \ + (__builtin_nds32_ksub16 ((a), (b))) +#define __nds32__v_ksub16(a, b) \ + (__builtin_nds32_v_ksub16 ((a), (b))) +#define __nds32__uksub16(a, b) \ + (__builtin_nds32_uksub16 ((a), (b))) +#define __nds32__v_uksub16(a, b) \ + (__builtin_nds32_v_uksub16 ((a), (b))) +#define __nds32__cras16(a, b) \ + (__builtin_nds32_cras16 ((a), (b))) +#define __nds32__v_ucras16(a, b) \ + (__builtin_nds32_v_ucras16 ((a), (b))) +#define __nds32__v_scras16(a, b) \ + (__builtin_nds32_v_scras16 ((a), (b))) +#define __nds32__rcras16(a, b) \ + (__builtin_nds32_rcras16 ((a), (b))) +#define __nds32__v_rcras16(a, b) \ + (__builtin_nds32_v_rcras16 ((a), (b))) +#define __nds32__urcras16(a, b) \ + (__builtin_nds32_urcras16 ((a), (b))) +#define __nds32__v_urcras16(a, b) \ + (__builtin_nds32_v_urcras16 ((a), (b))) +#define __nds32__kcras16(a, b) \ + (__builtin_nds32_kcras16 ((a), (b))) +#define __nds32__v_kcras16(a, b) \ + (__builtin_nds32_v_kcras16 ((a), (b))) +#define __nds32__ukcras16(a, b) \ + (__builtin_nds32_ukcras16 ((a), (b))) +#define __nds32__v_ukcras16(a, b) \ + (__builtin_nds32_v_ukcras16 ((a), (b))) +#define __nds32__crsa16(a, b) \ + (__builtin_nds32_crsa16 ((a), (b))) +#define __nds32__v_ucrsa16(a, b) \ + (__builtin_nds32_v_ucrsa16 ((a), (b))) +#define __nds32__v_scrsa16(a, b) \ + (__builtin_nds32_v_scrsa16 ((a), (b))) +#define __nds32__rcrsa16(a, b) \ + (__builtin_nds32_rcrsa16 ((a), (b))) +#define __nds32__v_rcrsa16(a, b) \ + (__builtin_nds32_v_rcrsa16 ((a), (b))) +#define __nds32__urcrsa16(a, b) \ + (__builtin_nds32_urcrsa16 ((a), (b))) +#define __nds32__v_urcrsa16(a, b) \ + (__builtin_nds32_v_urcrsa16 ((a), (b))) +#define __nds32__kcrsa16(a, b) \ + (__builtin_nds32_kcrsa16 ((a), (b))) +#define __nds32__v_kcrsa16(a, b) \ + (__builtin_nds32_v_kcrsa16 ((a), (b))) +#define __nds32__ukcrsa16(a, b) \ + (__builtin_nds32_ukcrsa16 ((a), (b))) +#define __nds32__v_ukcrsa16(a, b) \ + (__builtin_nds32_v_ukcrsa16 ((a), (b))) + +#define __nds32__add8(a, b) \ + (__builtin_nds32_add8 ((a), (b))) +#define __nds32__v_uadd8(a, b) \ + (__builtin_nds32_v_uadd8 ((a), (b))) +#define __nds32__v_sadd8(a, b) \ + (__builtin_nds32_v_sadd8 ((a), (b))) +#define __nds32__radd8(a, b) \ + (__builtin_nds32_radd8 ((a), (b))) +#define __nds32__v_radd8(a, b) \ + (__builtin_nds32_v_radd8 ((a), (b))) +#define __nds32__uradd8(a, b) \ + (__builtin_nds32_uradd8 ((a), (b))) +#define __nds32__v_uradd8(a, b) \ + (__builtin_nds32_v_uradd8 ((a), (b))) +#define __nds32__kadd8(a, b) \ + (__builtin_nds32_kadd8 ((a), (b))) +#define __nds32__v_kadd8(a, b) \ + (__builtin_nds32_v_kadd8 ((a), (b))) +#define __nds32__ukadd8(a, b) \ + (__builtin_nds32_ukadd8 ((a), (b))) +#define __nds32__v_ukadd8(a, b) \ + (__builtin_nds32_v_ukadd8 ((a), (b))) +#define __nds32__sub8(a, b) \ + (__builtin_nds32_sub8 ((a), (b))) +#define __nds32__v_usub8(a, b) \ + (__builtin_nds32_v_usub8 ((a), (b))) +#define __nds32__v_ssub8(a, b) \ + (__builtin_nds32_v_ssub8 ((a), (b))) +#define __nds32__rsub8(a, b) \ + (__builtin_nds32_rsub8 ((a), (b))) +#define __nds32__v_rsub8(a, b) \ + (__builtin_nds32_v_rsub8 ((a), (b))) +#define __nds32__ursub8(a, b) \ + (__builtin_nds32_ursub8 ((a), (b))) +#define __nds32__v_ursub8(a, b) \ + (__builtin_nds32_v_ursub8 ((a), (b))) +#define __nds32__ksub8(a, b) \ + (__builtin_nds32_ksub8 ((a), (b))) +#define __nds32__v_ksub8(a, b) \ + (__builtin_nds32_v_ksub8 ((a), (b))) +#define __nds32__uksub8(a, b) \ + (__builtin_nds32_uksub8 ((a), (b))) +#define __nds32__v_uksub8(a, b) \ + (__builtin_nds32_v_uksub8 ((a), (b))) + +#define __nds32__sra16(a, b) \ + (__builtin_nds32_sra16 ((a), (b))) +#define __nds32__v_sra16(a, b) \ + (__builtin_nds32_v_sra16 ((a), (b))) +#define __nds32__sra16_u(a, b) \ + (__builtin_nds32_sra16_u ((a), (b))) +#define __nds32__v_sra16_u(a, b) \ + (__builtin_nds32_v_sra16_u ((a), (b))) +#define __nds32__srl16(a, b) \ + (__builtin_nds32_srl16 ((a), (b))) +#define __nds32__v_srl16(a, b) \ + (__builtin_nds32_v_srl16 ((a), (b))) +#define __nds32__srl16_u(a, b) \ + (__builtin_nds32_srl16_u ((a), (b))) +#define __nds32__v_srl16_u(a, b) \ + (__builtin_nds32_v_srl16_u ((a), (b))) +#define __nds32__sll16(a, b) \ + (__builtin_nds32_sll16 ((a), (b))) +#define __nds32__v_sll16(a, b) \ + (__builtin_nds32_v_sll16 ((a), (b))) +#define __nds32__ksll16(a, b) \ + (__builtin_nds32_ksll16 ((a), (b))) +#define __nds32__v_ksll16(a, b) \ + (__builtin_nds32_v_ksll16 ((a), (b))) +#define __nds32__kslra16(a, b) \ + (__builtin_nds32_kslra16 ((a), (b))) +#define __nds32__v_kslra16(a, b) \ + (__builtin_nds32_v_kslra16 ((a), (b))) +#define __nds32__kslra16_u(a, b) \ + (__builtin_nds32_kslra16_u ((a), (b))) +#define __nds32__v_kslra16_u(a, b) \ + (__builtin_nds32_v_kslra16_u ((a), (b))) + +#define __nds32__cmpeq16(a, b) \ + (__builtin_nds32_cmpeq16 ((a), (b))) +#define __nds32__v_scmpeq16(a, b) \ + (__builtin_nds32_v_scmpeq16 ((a), (b))) +#define __nds32__v_ucmpeq16(a, b) \ + (__builtin_nds32_v_ucmpeq16 ((a), (b))) +#define __nds32__scmplt16(a, b) \ + (__builtin_nds32_scmplt16 ((a), (b))) +#define __nds32__v_scmplt16(a, b) \ + (__builtin_nds32_v_scmplt16 ((a), (b))) +#define __nds32__scmple16(a, b) \ + (__builtin_nds32_scmple16 ((a), (b))) +#define __nds32__v_scmple16(a, b) \ + (__builtin_nds32_v_scmple16 ((a), (b))) +#define __nds32__ucmplt16(a, b) \ + (__builtin_nds32_ucmplt16 ((a), (b))) +#define __nds32__v_ucmplt16(a, b) \ + (__builtin_nds32_v_ucmplt16 ((a), (b))) +#define __nds32__ucmple16(a, b) \ + (__builtin_nds32_ucmple16 ((a), (b))) +#define __nds32__v_ucmple16(a, b) \ + (__builtin_nds32_v_ucmple16 ((a), (b))) + +#define __nds32__cmpeq8(a, b) \ + (__builtin_nds32_cmpeq8 ((a), (b))) +#define __nds32__v_scmpeq8(a, b) \ + (__builtin_nds32_v_scmpeq8 ((a), (b))) +#define __nds32__v_ucmpeq8(a, b) \ + (__builtin_nds32_v_ucmpeq8 ((a), (b))) +#define __nds32__scmplt8(a, b) \ + (__builtin_nds32_scmplt8 ((a), (b))) +#define __nds32__v_scmplt8(a, b) \ + (__builtin_nds32_v_scmplt8 ((a), (b))) +#define __nds32__scmple8(a, b) \ + (__builtin_nds32_scmple8 ((a), (b))) +#define __nds32__v_scmple8(a, b) \ + (__builtin_nds32_v_scmple8 ((a), (b))) +#define __nds32__ucmplt8(a, b) \ + (__builtin_nds32_ucmplt8 ((a), (b))) +#define __nds32__v_ucmplt8(a, b) \ + (__builtin_nds32_v_ucmplt8 ((a), (b))) +#define __nds32__ucmple8(a, b) \ + (__builtin_nds32_ucmple8 ((a), (b))) +#define __nds32__v_ucmple8(a, b) \ + (__builtin_nds32_v_ucmple8 ((a), (b))) + +#define __nds32__smin16(a, b) \ + (__builtin_nds32_smin16 ((a), (b))) +#define __nds32__v_smin16(a, b) \ + (__builtin_nds32_v_smin16 ((a), (b))) +#define __nds32__umin16(a, b) \ + (__builtin_nds32_umin16 ((a), (b))) +#define __nds32__v_umin16(a, b) \ + (__builtin_nds32_v_umin16 ((a), (b))) +#define __nds32__smax16(a, b) \ + (__builtin_nds32_smax16 ((a), (b))) +#define __nds32__v_smax16(a, b) \ + (__builtin_nds32_v_smax16 ((a), (b))) +#define __nds32__umax16(a, b) \ + (__builtin_nds32_umax16 ((a), (b))) +#define __nds32__v_umax16(a, b) \ + (__builtin_nds32_v_umax16 ((a), (b))) +#define __nds32__sclip16(a, b) \ + (__builtin_nds32_sclip16 ((a), (b))) +#define __nds32__v_sclip16(a, b) \ + (__builtin_nds32_v_sclip16 ((a), (b))) +#define __nds32__uclip16(a, b) \ + (__builtin_nds32_uclip16 ((a), (b))) +#define __nds32__v_uclip16(a, b) \ + (__builtin_nds32_v_uclip16 ((a), (b))) +#define __nds32__khm16(a, b) \ + (__builtin_nds32_khm16 ((a), (b))) +#define __nds32__v_khm16(a, b) \ + (__builtin_nds32_v_khm16 ((a), (b))) +#define __nds32__khmx16(a, b) \ + (__builtin_nds32_khmx16 ((a), (b))) +#define __nds32__v_khmx16(a, b) \ + (__builtin_nds32_v_khmx16 ((a), (b))) +#define __nds32__kabs16(a) \ + (__builtin_nds32_kabs16 ((a))) +#define __nds32__v_kabs16(a) \ + (__builtin_nds32_v_kabs16 ((a))) + +#define __nds32__smin8(a, b) \ + (__builtin_nds32_smin8 ((a), (b))) +#define __nds32__v_smin8(a, b) \ + (__builtin_nds32_v_smin8 ((a), (b))) +#define __nds32__umin8(a, b) \ + (__builtin_nds32_umin8 ((a), (b))) +#define __nds32__v_umin8(a, b) \ + (__builtin_nds32_v_umin8 ((a), (b))) +#define __nds32__smax8(a, b) \ + (__builtin_nds32_smax8 ((a), (b))) +#define __nds32__v_smax8(a, b) \ + (__builtin_nds32_v_smax8 ((a), (b))) +#define __nds32__umax8(a, b) \ + (__builtin_nds32_umax8 ((a), (b))) +#define __nds32__v_umax8(a, b) \ + (__builtin_nds32_v_umax8 ((a), (b))) +#define __nds32__kabs8(a) \ + (__builtin_nds32_kabs8 ((a))) +#define __nds32__v_kabs8(a) \ + (__builtin_nds32_v_kabs8 ((a))) + +#define __nds32__sunpkd810(a) \ + (__builtin_nds32_sunpkd810 ((a))) +#define __nds32__v_sunpkd810(a) \ + (__builtin_nds32_v_sunpkd810 ((a))) +#define __nds32__sunpkd820(a) \ + (__builtin_nds32_sunpkd820 ((a))) +#define __nds32__v_sunpkd820(a) \ + (__builtin_nds32_v_sunpkd820 ((a))) +#define __nds32__sunpkd830(a) \ + (__builtin_nds32_sunpkd830 ((a))) +#define __nds32__v_sunpkd830(a) \ + (__builtin_nds32_v_sunpkd830 ((a))) +#define __nds32__sunpkd831(a) \ + (__builtin_nds32_sunpkd831 ((a))) +#define __nds32__v_sunpkd831(a) \ + (__builtin_nds32_v_sunpkd831 ((a))) +#define __nds32__zunpkd810(a) \ + (__builtin_nds32_zunpkd810 ((a))) +#define __nds32__v_zunpkd810(a) \ + (__builtin_nds32_v_zunpkd810 ((a))) +#define __nds32__zunpkd820(a) \ + (__builtin_nds32_zunpkd820 ((a))) +#define __nds32__v_zunpkd820(a) \ + (__builtin_nds32_v_zunpkd820 ((a))) +#define __nds32__zunpkd830(a) \ + (__builtin_nds32_zunpkd830 ((a))) +#define __nds32__v_zunpkd830(a) \ + (__builtin_nds32_v_zunpkd830 ((a))) +#define __nds32__zunpkd831(a) \ + (__builtin_nds32_zunpkd831 ((a))) +#define __nds32__v_zunpkd831(a) \ + (__builtin_nds32_v_zunpkd831 ((a))) + +#define __nds32__raddw(a, b) \ + (__builtin_nds32_raddw ((a), (b))) +#define __nds32__uraddw(a, b) \ + (__builtin_nds32_uraddw ((a), (b))) +#define __nds32__rsubw(a, b) \ + (__builtin_nds32_rsubw ((a), (b))) +#define __nds32__ursubw(a, b) \ + (__builtin_nds32_ursubw ((a), (b))) + +#define __nds32__sra_u(a, b) \ + (__builtin_nds32_sra_u ((a), (b))) +#define __nds32__ksll(a, b) \ + (__builtin_nds32_ksll ((a), (b))) +#define __nds32__pkbb16(a, b) \ + (__builtin_nds32_pkbb16 ((a), (b))) +#define __nds32__v_pkbb16(a, b) \ + (__builtin_nds32_v_pkbb16 ((a), (b))) +#define __nds32__pkbt16(a, b) \ + (__builtin_nds32_pkbt16 ((a), (b))) +#define __nds32__v_pkbt16(a, b) \ + (__builtin_nds32_v_pkbt16 ((a), (b))) +#define __nds32__pktb16(a, b) \ + (__builtin_nds32_pktb16 ((a), (b))) +#define __nds32__v_pktb16(a, b) \ + (__builtin_nds32_v_pktb16 ((a), (b))) +#define __nds32__pktt16(a, b) \ + (__builtin_nds32_pktt16 ((a), (b))) +#define __nds32__v_pktt16(a, b) \ + (__builtin_nds32_v_pktt16 ((a), (b))) + +#define __nds32__smmul(a, b) \ + (__builtin_nds32_smmul ((a), (b))) +#define __nds32__smmul_u(a, b) \ + (__builtin_nds32_smmul_u ((a), (b))) +#define __nds32__kmmac(r, a, b) \ + (__builtin_nds32_kmmac ((r), (a), (b))) +#define __nds32__kmmac_u(r, a, b) \ + (__builtin_nds32_kmmac_u ((r), (a), (b))) +#define __nds32__kmmsb(r, a, b) \ + (__builtin_nds32_kmmsb ((r), (a), (b))) +#define __nds32__kmmsb_u(r, a, b) \ + (__builtin_nds32_kmmsb_u ((r), (a), (b))) +#define __nds32__kwmmul(a, b) \ + (__builtin_nds32_kwmmul ((a), (b))) +#define __nds32__kwmmul_u(a, b) \ + (__builtin_nds32_kwmmul_u ((a), (b))) + +#define __nds32__smmwb(a, b) \ + (__builtin_nds32_smmwb ((a), (b))) +#define __nds32__v_smmwb(a, b) \ + (__builtin_nds32_v_smmwb ((a), (b))) +#define __nds32__smmwb_u(a, b) \ + (__builtin_nds32_smmwb_u ((a), (b))) +#define __nds32__v_smmwb_u(a, b) \ + (__builtin_nds32_v_smmwb_u ((a), (b))) +#define __nds32__smmwt(a, b) \ + (__builtin_nds32_smmwt ((a), (b))) +#define __nds32__v_smmwt(a, b) \ + (__builtin_nds32_v_smmwt ((a), (b))) +#define __nds32__smmwt_u(a, b) \ + (__builtin_nds32_smmwt_u ((a), (b))) +#define __nds32__v_smmwt_u(a, b) \ + (__builtin_nds32_v_smmwt_u ((a), (b))) +#define __nds32__kmmawb(r, a, b) \ + (__builtin_nds32_kmmawb ((r), (a), (b))) +#define __nds32__v_kmmawb(r, a, b) \ + (__builtin_nds32_v_kmmawb ((r), (a), (b))) +#define __nds32__kmmawb_u(r, a, b) \ + (__builtin_nds32_kmmawb_u ((r), (a), (b))) +#define __nds32__v_kmmawb_u(r, a, b) \ + (__builtin_nds32_v_kmmawb_u ((r), (a), (b))) +#define __nds32__kmmawt(r, a, b) \ + (__builtin_nds32_kmmawt ((r), (a), (b))) +#define __nds32__v_kmmawt(r, a, b) \ + (__builtin_nds32_v_kmmawt ((r), (a), (b))) +#define __nds32__kmmawt_u(r, a, b) \ + (__builtin_nds32_kmmawt_u ((r), (a), (b))) +#define __nds32__v_kmmawt_u(r, a, b) \ + (__builtin_nds32_v_kmmawt_u ((r), (a), (b))) + +#define __nds32__smbb(a, b) \ + (__builtin_nds32_smbb ((a), (b))) +#define __nds32__v_smbb(a, b) \ + (__builtin_nds32_v_smbb ((a), (b))) +#define __nds32__smbt(a, b) \ + (__builtin_nds32_smbt ((a), (b))) +#define __nds32__v_smbt(a, b) \ + (__builtin_nds32_v_smbt ((a), (b))) +#define __nds32__smtt(a, b) \ + (__builtin_nds32_smtt ((a), (b))) +#define __nds32__v_smtt(a, b) \ + (__builtin_nds32_v_smtt ((a), (b))) +#define __nds32__kmda(a, b) \ + (__builtin_nds32_kmda ((a), (b))) +#define __nds32__v_kmda(a, b) \ + (__builtin_nds32_v_kmda ((a), (b))) +#define __nds32__kmxda(a, b) \ + (__builtin_nds32_kmxda ((a), (b))) +#define __nds32__v_kmxda(a, b) \ + (__builtin_nds32_v_kmxda ((a), (b))) +#define __nds32__smds(a, b) \ + (__builtin_nds32_smds ((a), (b))) +#define __nds32__v_smds(a, b) \ + (__builtin_nds32_v_smds ((a), (b))) +#define __nds32__smdrs(a, b) \ + (__builtin_nds32_smdrs ((a), (b))) +#define __nds32__v_smdrs(a, b) \ + (__builtin_nds32_v_smdrs ((a), (b))) +#define __nds32__smxds(a, b) \ + (__builtin_nds32_smxds ((a), (b))) +#define __nds32__v_smxds(a, b) \ + (__builtin_nds32_v_smxds ((a), (b))) +#define __nds32__kmabb(r, a, b) \ + (__builtin_nds32_kmabb ((r), (a), (b))) +#define __nds32__v_kmabb(r, a, b) \ + (__builtin_nds32_v_kmabb ((r), (a), (b))) +#define __nds32__kmabt(r, a, b) \ + (__builtin_nds32_kmabt ((r), (a), (b))) +#define __nds32__v_kmabt(r, a, b) \ + (__builtin_nds32_v_kmabt ((r), (a), (b))) +#define __nds32__kmatt(r, a, b) \ + (__builtin_nds32_kmatt ((r), (a), (b))) +#define __nds32__v_kmatt(r, a, b) \ + (__builtin_nds32_v_kmatt ((r), (a), (b))) +#define __nds32__kmada(r, a, b) \ + (__builtin_nds32_kmada ((r), (a), (b))) +#define __nds32__v_kmada(r, a, b) \ + (__builtin_nds32_v_kmada ((r), (a), (b))) +#define __nds32__kmaxda(r, a, b) \ + (__builtin_nds32_kmaxda ((r), (a), (b))) +#define __nds32__v_kmaxda(r, a, b) \ + (__builtin_nds32_v_kmaxda ((r), (a), (b))) +#define __nds32__kmads(r, a, b) \ + (__builtin_nds32_kmads ((r), (a), (b))) +#define __nds32__v_kmads(r, a, b) \ + (__builtin_nds32_v_kmads ((r), (a), (b))) +#define __nds32__kmadrs(r, a, b) \ + (__builtin_nds32_kmadrs ((r), (a), (b))) +#define __nds32__v_kmadrs(r, a, b) \ + (__builtin_nds32_v_kmadrs ((r), (a), (b))) +#define __nds32__kmaxds(r, a, b) \ + (__builtin_nds32_kmaxds ((r), (a), (b))) +#define __nds32__v_kmaxds(r, a, b) \ + (__builtin_nds32_v_kmaxds ((r), (a), (b))) +#define __nds32__kmsda(r, a, b) \ + (__builtin_nds32_kmsda ((r), (a), (b))) +#define __nds32__v_kmsda(r, a, b) \ + (__builtin_nds32_v_kmsda ((r), (a), (b))) +#define __nds32__kmsxda(r, a, b) \ + (__builtin_nds32_kmsxda ((r), (a), (b))) +#define __nds32__v_kmsxda(r, a, b) \ + (__builtin_nds32_v_kmsxda ((r), (a), (b))) + +#define __nds32__smal(a, b) \ + (__builtin_nds32_smal ((a), (b))) +#define __nds32__v_smal(a, b) \ + (__builtin_nds32_v_smal ((a), (b))) + +#define __nds32__bitrev(a, b) \ + (__builtin_nds32_bitrev ((a), (b))) +#define __nds32__wext(a, b) \ + (__builtin_nds32_wext ((a), (b))) +#define __nds32__bpick(r, a, b) \ + (__builtin_nds32_bpick ((r), (a), (b))) +#define __nds32__insb(r, a, b) \ + (__builtin_nds32_insb ((r), (a), (b))) + +#define __nds32__sadd64(a, b) \ + (__builtin_nds32_sadd64 ((a), (b))) +#define __nds32__uadd64(a, b) \ + (__builtin_nds32_uadd64 ((a), (b))) +#define __nds32__radd64(a, b) \ + (__builtin_nds32_radd64 ((a), (b))) +#define __nds32__uradd64(a, b) \ + (__builtin_nds32_uradd64 ((a), (b))) +#define __nds32__kadd64(a, b) \ + (__builtin_nds32_kadd64 ((a), (b))) +#define __nds32__ukadd64(a, b) \ + (__builtin_nds32_ukadd64 ((a), (b))) +#define __nds32__ssub64(a, b) \ + (__builtin_nds32_ssub64 ((a), (b))) +#define __nds32__usub64(a, b) \ + (__builtin_nds32_usub64 ((a), (b))) +#define __nds32__rsub64(a, b) \ + (__builtin_nds32_rsub64 ((a), (b))) +#define __nds32__ursub64(a, b) \ + (__builtin_nds32_ursub64 ((a), (b))) +#define __nds32__ksub64(a, b) \ + (__builtin_nds32_ksub64 ((a), (b))) +#define __nds32__uksub64(a, b) \ + (__builtin_nds32_uksub64 ((a), (b))) + +#define __nds32__smar64(r, a, b) \ + (__builtin_nds32_smar64 ((r), (a), (b))) +#define __nds32__smsr64(r, a, b) \ + (__builtin_nds32_smsr64 ((r), (a), (b))) +#define __nds32__umar64(r, a, b) \ + (__builtin_nds32_umar64 ((r), (a), (b))) +#define __nds32__umsr64(r, a, b) \ + (__builtin_nds32_umsr64 ((r), (a), (b))) +#define __nds32__kmar64(r, a, b) \ + (__builtin_nds32_kmar64 ((r), (a), (b))) +#define __nds32__kmsr64(r, a, b) \ + (__builtin_nds32_kmsr64 ((r), (a), (b))) +#define __nds32__ukmar64(r, a, b) \ + (__builtin_nds32_ukmar64 ((r), (a), (b))) +#define __nds32__ukmsr64(r, a, b) \ + (__builtin_nds32_ukmsr64 ((r), (a), (b))) + +#define __nds32__smalbb(r, a, b) \ + (__builtin_nds32_smalbb ((r), (a), (b))) +#define __nds32__v_smalbb(r, a, b) \ + (__builtin_nds32_v_smalbb ((r), (a), (b))) +#define __nds32__smalbt(r, a, b) \ + (__builtin_nds32_smalbt ((r), (a), (b))) +#define __nds32__v_smalbt(r, a, b) \ + (__builtin_nds32_v_smalbt ((r), (a), (b))) +#define __nds32__smaltt(r, a, b) \ + (__builtin_nds32_smaltt ((r), (a), (b))) +#define __nds32__v_smaltt(r, a, b) \ + (__builtin_nds32_v_smaltt ((r), (a), (b))) +#define __nds32__smalda(r, a, b) \ + (__builtin_nds32_smalda ((r), (a), (b))) +#define __nds32__v_smalda(r, a, b) \ + (__builtin_nds32_v_smalda ((r), (a), (b))) +#define __nds32__smalxda(r, a, b) \ + (__builtin_nds32_smalxda ((r), (a), (b))) +#define __nds32__v_smalxda(r, a, b) \ + (__builtin_nds32_v_smalxda ((r), (a), (b))) +#define __nds32__smalds(r, a, b) \ + (__builtin_nds32_smalds ((r), (a), (b))) +#define __nds32__v_smalds(r, a, b) \ + (__builtin_nds32_v_smalds ((r), (a), (b))) +#define __nds32__smaldrs(r, a, b) \ + (__builtin_nds32_smaldrs ((r), (a), (b))) +#define __nds32__v_smaldrs(r, a, b) \ + (__builtin_nds32_v_smaldrs ((r), (a), (b))) +#define __nds32__smalxds(r, a, b) \ + (__builtin_nds32_smalxds ((r), (a), (b))) +#define __nds32__v_smalxds(r, a, b) \ + (__builtin_nds32_v_smalxds ((r), (a), (b))) +#define __nds32__smslda(r, a, b) \ + (__builtin_nds32_smslda ((r), (a), (b))) +#define __nds32__v_smslda(r, a, b) \ + (__builtin_nds32_v_smslda ((r), (a), (b))) +#define __nds32__smslxda(r, a, b) \ + (__builtin_nds32_smslxda ((r), (a), (b))) +#define __nds32__v_smslxda(r, a, b) \ + (__builtin_nds32_v_smslxda ((r), (a), (b))) + +#define __nds32__smul16(a, b) \ + (__builtin_nds32_smul16 ((a), (b))) +#define __nds32__v_smul16(a, b) \ + (__builtin_nds32_v_smul16 ((a), (b))) +#define __nds32__smulx16(a, b) \ + (__builtin_nds32_smulx16 ((a), (b))) +#define __nds32__v_smulx16(a, b) \ + (__builtin_nds32_v_smulx16 ((a), (b))) +#define __nds32__umul16(a, b) \ + (__builtin_nds32_umul16 ((a), (b))) +#define __nds32__v_umul16(a, b) \ + (__builtin_nds32_v_umul16 ((a), (b))) +#define __nds32__umulx16(a, b) \ + (__builtin_nds32_umulx16 ((a), (b))) +#define __nds32__v_umulx16(a, b) \ + (__builtin_nds32_v_umulx16 ((a), (b))) + +#define __nds32__uclip32(a, imm) \ + (__builtin_nds32_uclip32 ((a), (imm))) +#define __nds32__sclip32(a, imm) \ + (__builtin_nds32_sclip32 ((a), (imm))) +#define __nds32__kabs(a) \ + (__builtin_nds32_kabs ((a))) + #define __nds32__unaligned_feature() \ (__builtin_nds32_unaligned_feature()) #define __nds32__enable_unaligned() \ (__builtin_nds32_enable_unaligned()) #define __nds32__disable_unaligned() \ (__builtin_nds32_disable_unaligned()) + +#define __nds32__get_unaligned_u16x2(a) \ + (__builtin_nds32_get_unaligned_u16x2 ((a))) +#define __nds32__get_unaligned_s16x2(a) \ + (__builtin_nds32_get_unaligned_s16x2 ((a))) +#define __nds32__get_unaligned_u8x4(a) \ + (__builtin_nds32_get_unaligned_u8x4 ((a))) +#define __nds32__get_unaligned_s8x4(a) \ + (__builtin_nds32_get_unaligned_s8x4 ((a))) + +#define __nds32__put_unaligned_u16x2(a, data) \ + (__builtin_nds32_put_unaligned_u16x2 ((a), (data))) +#define __nds32__put_unaligned_s16x2(a, data) \ + (__builtin_nds32_put_unaligned_s16x2 ((a), (data))) +#define __nds32__put_unaligned_u8x4(a, data) \ + (__builtin_nds32_put_unaligned_u8x4 ((a), (data))) +#define __nds32__put_unaligned_s8x4(a, data) \ + (__builtin_nds32_put_unaligned_s8x4 ((a), (data))) + +#define NDS32ATTR_SIGNATURE __attribute__((signature)) + #endif /* nds32_intrinsic.h */ diff --git a/gcc/config/nds32/predicates.md b/gcc/config/nds32/predicates.md index 9eb8468..e5f7ba4 100644 --- a/gcc/config/nds32/predicates.md +++ b/gcc/config/nds32/predicates.md @@ -56,14 +56,51 @@ (and (match_operand 0 "const_int_operand") (match_test "satisfies_constraint_Is11 (op)")))) +(define_predicate "nds32_imm_0_1_operand" + (and (match_operand 0 "const_int_operand") + (ior (match_test "satisfies_constraint_Iv00 (op)") + (match_test "satisfies_constraint_Iv01 (op)")))) + +(define_predicate "nds32_imm_1_2_operand" + (and (match_operand 0 "const_int_operand") + (ior (match_test "satisfies_constraint_Iv01 (op)") + (match_test "satisfies_constraint_Iv02 (op)")))) + +(define_predicate "nds32_imm_1_2_4_8_operand" + (and (match_operand 0 "const_int_operand") + (ior (ior (match_test "satisfies_constraint_Iv01 (op)") + (match_test "satisfies_constraint_Iv02 (op)")) + (ior (match_test "satisfies_constraint_Iv04 (op)") + (match_test "satisfies_constraint_Iv08 (op)"))))) + +(define_predicate "nds32_imm2u_operand" + (and (match_operand 0 "const_int_operand") + (match_test "satisfies_constraint_Iu02 (op)"))) + +(define_predicate "nds32_imm4u_operand" + (and (match_operand 0 "const_int_operand") + (match_test "satisfies_constraint_Iu04 (op)"))) + (define_predicate "nds32_imm5u_operand" (and (match_operand 0 "const_int_operand") (match_test "satisfies_constraint_Iu05 (op)"))) +(define_predicate "nds32_imm6u_operand" + (and (match_operand 0 "const_int_operand") + (match_test "satisfies_constraint_Iu06 (op)"))) + +(define_predicate "nds32_rimm4u_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "nds32_imm4u_operand"))) + (define_predicate "nds32_rimm5u_operand" (ior (match_operand 0 "register_operand") (match_operand 0 "nds32_imm5u_operand"))) +(define_predicate "nds32_rimm6u_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "nds32_imm6u_operand"))) + (define_predicate "nds32_move_operand" (and (match_operand 0 "general_operand") (not (match_code "high,const,symbol_ref,label_ref"))) @@ -78,6 +115,20 @@ return true; }) +(define_predicate "nds32_vmove_operand" + (and (match_operand 0 "general_operand") + (not (match_code "high,const,symbol_ref,label_ref"))) +{ + /* If the constant op does NOT satisfy Is20 nor Ihig, + we can not perform move behavior by a single instruction. */ + if (GET_CODE (op) == CONST_VECTOR + && !satisfies_constraint_CVs2 (op) + && !satisfies_constraint_CVhi (op)) + return false; + + return true; +}) + (define_predicate "nds32_and_operand" (match_operand 0 "nds32_reg_constant_operand") { @@ -127,6 +178,15 @@ (ior (match_operand 0 "nds32_symbolic_operand") (match_operand 0 "nds32_general_register_operand"))) +(define_predicate "nds32_insv_operand" + (match_code "const_int") +{ + return INTVAL (op) == 0 + || INTVAL (op) == 8 + || INTVAL (op) == 16 + || INTVAL (op) == 24; +}) + (define_predicate "nds32_lmw_smw_base_operand" (and (match_code "mem") (match_test "nds32_valid_smw_lwm_base_p (op)"))) -- 2.7.4