[NDS32] Add DSP extension instructions.
authorMonk Chiang <sh.chiang04@gmail.com>
Sun, 13 May 2018 05:41:37 +0000 (05:41 +0000)
committerChung-Ju Wu <jasonwucj@gcc.gnu.org>
Sun, 13 May 2018 05:41:37 +0000 (05:41 +0000)
gcc/
* config.gcc (nds32be-*-*): Handle --with-ext-dsp.
* config/nds32/constants.md (unspec_element, unspec_volatile_element):
Add enum values for DSP extension instructions.
* config/nds32/constraints.md (Iu06, IU06, CVp5, CVs5, CVs2, CVhi):
New constraints.
* config/nds32/iterators.md (shifts, shiftrt, sat_plus, all_plus,
sat_minus, all_minus, plus_minus, extend, sumax, sumin, sumin_max):
New code iterators.
(su, zs, uk, opcode, add_rsub, add_sub): New code attributes.
* config/nds32/nds32-dspext.md: New file for DSP implementation.
* config/nds32/nds32-intrinsic.c: Implementation of DSP extension.
* config/nds32/nds32-intrinsic.md: Likewise.
* config/nds32/nds32_intrinsic.h: Likewise.
* config/nds32/nds32-md-auxiliary.c: Likewise.
* config/nds32/nds32-memory-manipulation.c: Consider DSP extension.
* config/nds32/nds32-predicates.c (const_vector_to_hwint): New.
(nds32_valid_CVp5_p, nds32_valid_CVs5_p): New.
(nds32_valid_CVs2_p, nds32_valid_CVhi_p): New.
* config/nds32/nds32-protos.h: New declarations for DSP extension.
* config/nds32/nds32-utils.c (extract_mac_non_acc_rtx): New case
TYPE_DMAC in switch statement.
* config/nds32/nds32.c: New checking and implementation for DSP
extension instructions.
* config/nds32/nds32.h: Likewise.
* config/nds32/nds32.md: Likewise.
* config/nds32/nds32.opt (mhw-abs, mext-dsp): New options.
* config/nds32/predicates.md: Implement new predicates for DSP
extension.

Co-Authored-By: Chung-Ju Wu <jasonwucj@gmail.com>
Co-Authored-By: Kito Cheng <kito.cheng@gmail.com>
From-SVN: r260206

19 files changed:
gcc/ChangeLog
gcc/config.gcc
gcc/config/nds32/constants.md
gcc/config/nds32/constraints.md
gcc/config/nds32/iterators.md
gcc/config/nds32/nds32-dspext.md [new file with mode: 0644]
gcc/config/nds32/nds32-intrinsic.c
gcc/config/nds32/nds32-intrinsic.md
gcc/config/nds32/nds32-md-auxiliary.c
gcc/config/nds32/nds32-memory-manipulation.c
gcc/config/nds32/nds32-predicates.c
gcc/config/nds32/nds32-protos.h
gcc/config/nds32/nds32-utils.c
gcc/config/nds32/nds32.c
gcc/config/nds32/nds32.h
gcc/config/nds32/nds32.md
gcc/config/nds32/nds32.opt
gcc/config/nds32/nds32_intrinsic.h
gcc/config/nds32/predicates.md

index 92dc3f4..7b788ca 100644 (file)
@@ -1,3 +1,36 @@
+2018-05-13  Monk Chiang  <sh.chiang04@gmail.com>
+           Kito Cheng  <kito.cheng@gmail.com>
+           Chung-Ju Wu  <jasonwucj@gmail.com>
+
+       * config.gcc (nds32be-*-*): Handle --with-ext-dsp.
+       * config/nds32/constants.md (unspec_element, unspec_volatile_element):
+       Add enum values for DSP extension instructions.
+       * config/nds32/constraints.md (Iu06, IU06, CVp5, CVs5, CVs2, CVhi):
+       New constraints.
+       * config/nds32/iterators.md (shifts, shiftrt, sat_plus, all_plus,
+       sat_minus, all_minus, plus_minus, extend, sumax, sumin, sumin_max):
+       New code iterators.
+       (su, zs, uk, opcode, add_rsub, add_sub): New code attributes.
+       * config/nds32/nds32-dspext.md: New file for DSP implementation.
+       * config/nds32/nds32-intrinsic.c: Implementation of DSP extension.
+       * config/nds32/nds32-intrinsic.md: Likewise.
+       * config/nds32/nds32_intrinsic.h: Likewise.
+       * config/nds32/nds32-md-auxiliary.c: Likewise.
+       * config/nds32/nds32-memory-manipulation.c: Consider DSP extension.
+       * config/nds32/nds32-predicates.c (const_vector_to_hwint): New.
+       (nds32_valid_CVp5_p, nds32_valid_CVs5_p): New.
+       (nds32_valid_CVs2_p, nds32_valid_CVhi_p): New.
+       * config/nds32/nds32-protos.h: New declarations for DSP extension.
+       * config/nds32/nds32-utils.c (extract_mac_non_acc_rtx): New case
+       TYPE_DMAC in switch statement.
+       * config/nds32/nds32.c: New checking and implementation for DSP
+       extension instructions.
+       * config/nds32/nds32.h: Likewise.
+       * config/nds32/nds32.md: Likewise.
+       * config/nds32/nds32.opt (mhw-abs, mext-dsp): New options.
+       * config/nds32/predicates.md: Implement new predicates for DSP
+       extension.
+
 2018-05-11  Michael Meissner  <meissner@linux.vnet.ibm.com>
 
        * config/rs6000/rs6000.md (mov<mode>_softfloat, FMOVE32):
index 9c4a849..fec118c 100644 (file)
@@ -2346,6 +2346,11 @@ nds32be-*-*)
        tm_defines="${tm_defines} TARGET_BIG_ENDIAN_DEFAULT=1"
        tm_file="dbxelf.h elfos.h newlib-stdint.h ${tm_file} nds32/nds32_intrinsic.h"
        tmake_file="nds32/t-nds32 nds32/t-mlibs"
+
+       # Handle --with-ext-dsp
+       if test x${with_ext_dsp} = xyes; then
+               tm_defines="${tm_defines} TARGET_DEFAULT_EXT_DSP=1"
+       fi
        ;;
 nios2-*-*)
        tm_file="elfos.h ${tm_file}"
index 37c2704..cba989f 100644 (file)
   UNSPEC_FFB
   UNSPEC_FFMISM
   UNSPEC_FLMISM
+  UNSPEC_KDMBB
+  UNSPEC_KDMBT
+  UNSPEC_KDMTB
+  UNSPEC_KDMTT
+  UNSPEC_KHMBB
+  UNSPEC_KHMBT
+  UNSPEC_KHMTB
+  UNSPEC_KHMTT
+  UNSPEC_KSLRAW
+  UNSPEC_KSLRAWU
   UNSPEC_SVA
   UNSPEC_SVS
   UNSPEC_WSBH
   UNSPEC_UASTORE_HW
   UNSPEC_UASTORE_W
   UNSPEC_UASTORE_DW
+  UNSPEC_ROUND
+  UNSPEC_VEC_COMPARE
+  UNSPEC_KHM
+  UNSPEC_KHMX
+  UNSPEC_CLIP_OV
+  UNSPEC_CLIPS_OV
+  UNSPEC_BITREV
+  UNSPEC_KABS
+  UNSPEC_LOOP_END
+  UNSPEC_TLS_DESC
+  UNSPEC_TLS_IE
+  UNSPEC_KADDH
+  UNSPEC_KSUBH
 ])
 
 ;; The unspec_volatile operation index.
   UNSPEC_VOLATILE_UNALIGNED_FEATURE
   UNSPEC_VOLATILE_ENABLE_UNALIGNED
   UNSPEC_VOLATILE_DISABLE_UNALIGNED
+  UNSPEC_VOLATILE_RDOV
+  UNSPEC_VOLATILE_CLROV
 ])
 
 ;; ------------------------------------------------------------------------
index 7af7769..500f1a4 100644 (file)
   (and (match_code "const_int")
        (match_test "IN_RANGE (ival, -31, 0)")))
 
+(define_constraint "Iu06"
+  "Unsigned immediate 6-bit value"
+  (and (match_code "const_int")
+       (match_test "ival < (1 << 6) && ival >= 0")))
+
 ;; Ip05 is special and dedicated for v3 movpi45 instruction.
 ;; movpi45 has imm5u field but the range is 16 ~ 47.
 (define_constraint "Ip05"
                    && ival >= (0 + 16)
                    && (TARGET_ISA_V3 || TARGET_ISA_V3M)")))
 
-(define_constraint "Iu06"
+(define_constraint "IU06"
   "Unsigned immediate 6-bit value constraint for addri36.sp instruction"
   (and (match_code "const_int")
-       (match_test "ival < (1 << 6)
+       (match_test "ival < (1 << 8)
                    && ival >= 0
                    && (ival % 4 == 0)
                    && (TARGET_ISA_V3 || TARGET_ISA_V3M)")))
        (match_test "(TARGET_ISA_V3 || TARGET_ISA_V3M)
                    && (IN_RANGE (exact_log2 (ival + 1), 1, 8))")))
 
+(define_constraint "CVp5"
+  "Unsigned immediate 5-bit value for movpi45 instruction with range 16-47"
+  (and (match_code "const_vector")
+       (match_test "nds32_valid_CVp5_p (op)")))
+
+(define_constraint "CVs5"
+  "Signed immediate 5-bit value"
+  (and (match_code "const_vector")
+       (match_test "nds32_valid_CVs5_p (op)")))
+
+(define_constraint "CVs2"
+  "Signed immediate 20-bit value"
+  (and (match_code "const_vector")
+       (match_test "nds32_valid_CVs2_p (op)")))
+
+(define_constraint "CVhi"
+  "The immediate value that can be simply set high 20-bit"
+  (and (match_code "const_vector")
+       (match_test "nds32_valid_CVhi_p (op)")))
 
 (define_memory_constraint "U33"
   "Memory constraint for 333 format"
index c2062de..f4fb581 100644 (file)
 ;; shifts
 (define_code_iterator shift_rotate [ashift ashiftrt lshiftrt rotatert])
 
+(define_code_iterator shifts [ashift ashiftrt lshiftrt])
+
+(define_code_iterator shiftrt [ashiftrt lshiftrt])
+
+(define_code_iterator sat_plus [ss_plus us_plus])
+
+(define_code_iterator all_plus [plus ss_plus us_plus])
+
+(define_code_iterator sat_minus [ss_minus us_minus])
+
+(define_code_iterator all_minus [minus ss_minus us_minus])
+
+(define_code_iterator plus_minus [plus minus])
+
+(define_code_iterator extend [sign_extend zero_extend])
+
+(define_code_iterator sumax [smax umax])
+
+(define_code_iterator sumin [smin umin])
+
+(define_code_iterator sumin_max [smax umax smin umin])
+
 ;;----------------------------------------------------------------------------
 ;; Code attributes.
 ;;----------------------------------------------------------------------------
 (define_code_attr shift
   [(ashift "ashl") (ashiftrt "ashr") (lshiftrt "lshr") (rotatert "rotr")])
 
+(define_code_attr su
+  [(ashiftrt "") (lshiftrt "u") (sign_extend "s") (zero_extend "u")])
+
+(define_code_attr zs
+  [(sign_extend "s") (zero_extend "z")])
+
+(define_code_attr uk
+  [(plus "") (ss_plus "k") (us_plus "uk")
+   (minus "") (ss_minus "k") (us_minus "uk")])
+
+(define_code_attr opcode
+  [(plus "add") (minus "sub") (smax "smax") (umax "umax") (smin "smin") (umin "umin")])
+
+(define_code_attr add_rsub
+  [(plus "a") (minus "rs")])
+
+(define_code_attr add_sub
+  [(plus "a") (minus "s")])
 
 ;;----------------------------------------------------------------------------
diff --git a/gcc/config/nds32/nds32-dspext.md b/gcc/config/nds32/nds32-dspext.md
new file mode 100644 (file)
index 0000000..4151353
--- /dev/null
@@ -0,0 +1,5264 @@
+;; Machine description of Andes NDS32 cpu for GNU compiler
+;; Copyright (C) 2012-2018 Free Software Foundation, Inc.
+;; Contributed by Andes Technology Corporation.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_expand "mov<mode>"
+  [(set (match_operand:VQIHI 0 "general_operand" "")
+       (match_operand:VQIHI 1 "general_operand" ""))]
+  "NDS32_EXT_DSP_P ()"
+{
+  /* Need to force register if mem <- !reg.  */
+  if (MEM_P (operands[0]) && !REG_P (operands[1]))
+    operands[1] = force_reg (<MODE>mode, operands[1]);
+
+  /* If operands[1] is a large constant and cannot be performed
+     by a single instruction, we need to split it.  */
+  if (GET_CODE (operands[1]) == CONST_VECTOR
+      && !satisfies_constraint_CVs2 (operands[1])
+      && !satisfies_constraint_CVhi (operands[1]))
+    {
+      HOST_WIDE_INT ival = const_vector_to_hwint (operands[1]);
+      rtx tmp_rtx;
+
+      tmp_rtx = can_create_pseudo_p ()
+               ? gen_reg_rtx (SImode)
+               : simplify_gen_subreg (SImode, operands[0], <MODE>mode, 0);
+
+      emit_move_insn (tmp_rtx, gen_int_mode (ival, SImode));
+      convert_move (operands[0], tmp_rtx, false);
+      DONE;
+    }
+})
+
+(define_insn "*mov<mode>"
+  [(set (match_operand:VQIHI 0 "nonimmediate_operand" "=r, r,$U45,$U33,$U37,$U45, m,$  l,$  l,$  l,$  d,  d, r,$   d,    r,    r,    r, *f, *f,  r, *f,  Q")
+       (match_operand:VQIHI 1 "nds32_vmove_operand"  " r, r,   l,   l,   l,   d, r, U45, U33, U37, U45,Ufe, m, CVp5, CVs5, CVs2, CVhi, *f,  r, *f,  Q, *f"))]
+  "NDS32_EXT_DSP_P ()
+   && (register_operand(operands[0], <MODE>mode)
+       || register_operand(operands[1], <MODE>mode))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "mov55\t%0, %1";
+    case 1:
+      return "ori\t%0, %1, 0";
+    case 2:
+    case 3:
+    case 4:
+    case 5:
+      return nds32_output_16bit_store (operands, <byte>);
+    case 6:
+      return nds32_output_32bit_store (operands, <byte>);
+    case 7:
+    case 8:
+    case 9:
+    case 10:
+    case 11:
+      return nds32_output_16bit_load (operands, <byte>);
+    case 12:
+      return nds32_output_32bit_load (operands, <byte>);
+    case 13:
+      return "movpi45\t%0, %1";
+    case 14:
+      return "movi55\t%0, %1";
+    case 15:
+      return "movi\t%0, %1";
+    case 16:
+      return "sethi\t%0, hi20(%1)";
+    case 17:
+      if (TARGET_FPU_SINGLE)
+       return "fcpyss\t%0, %1, %1";
+      else
+       return "#";
+    case 18:
+      return "fmtsr\t%1, %0";
+    case 19:
+      return "fmfsr\t%0, %1";
+    case 20:
+      return nds32_output_float_load (operands);
+    case 21:
+      return nds32_output_float_store (operands);
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"    "alu,alu,store,store,store,store,store,load,load,load,load,load,load,alu,alu,alu,alu,fcpy,fmtsr,fmfsr,fload,fstore")
+   (set_attr "length"  "  2,  4,    2,    2,    2,    2,    4,   2,   2,   2,   2,   2,   4,  2,  2,  4,  4,   4,    4,    4,    4,     4")
+   (set_attr "feature" " v1, v1,   v1,   v1,   v1,   v1,   v1,  v1,  v1,  v1,  v1, v3m,  v1, v1, v1, v1, v1, fpu,  fpu,  fpu,  fpu,   fpu")])
+
+(define_expand "movv2si"
+  [(set (match_operand:V2SI 0 "general_operand" "")
+       (match_operand:V2SI 1 "general_operand" ""))]
+  "NDS32_EXT_DSP_P ()"
+{
+  /* Need to force register if mem <- !reg.  */
+  if (MEM_P (operands[0]) && !REG_P (operands[1]))
+    operands[1] = force_reg (V2SImode, operands[1]);
+})
+
+(define_insn "*movv2si"
+  [(set (match_operand:V2SI 0 "nonimmediate_operand" "=r, r,  r, r, Da, m, f, Q, f, r, f")
+       (match_operand:V2SI 1 "general_operand"      " r, i, Da, m,  r, r, Q, f, f, f, r"))]
+  "NDS32_EXT_DSP_P ()
+   && (register_operand(operands[0], V2SImode)
+       || register_operand(operands[1], V2SImode))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "movd44\t%0, %1";
+    case 1:
+      /* reg <- const_int, we ask gcc to split instruction.  */
+      return "#";
+    case 2:
+      /* The memory format is (mem (reg)),
+        we can generate 'lmw.bi' instruction.  */
+      return nds32_output_double (operands, true);
+    case 3:
+      /* We haven't 64-bit load instruction,
+        we split this pattern to two SImode pattern.  */
+      return "#";
+    case 4:
+      /* The memory format is (mem (reg)),
+        we can generate 'smw.bi' instruction.  */
+      return nds32_output_double (operands, false);
+    case 5:
+      /* We haven't 64-bit store instruction,
+        we split this pattern to two SImode pattern.  */
+      return "#";
+    case 6:
+      return nds32_output_float_load (operands);
+    case 7:
+      return nds32_output_float_store (operands);
+    case 8:
+      return "fcpysd\t%0, %1, %1";
+    case 9:
+      return "fmfdr\t%0, %1";
+    case 10:
+      return "fmtdr\t%1, %0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"    "alu,alu,load,load,store,store,unknown,unknown,unknown,unknown,unknown")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (match_test "!TARGET_16_BIT")
+                    (const_int 4)
+                    (const_int 2))
+       ;; Alternative 1
+       (const_int 16)
+       ;; Alternative 2
+       (const_int 4)
+       ;; Alternative 3
+       (const_int 8)
+       ;; Alternative 4
+       (const_int 4)
+       ;; Alternative 5
+       (const_int 8)
+       ;; Alternative 6
+       (const_int 4)
+       ;; Alternative 7
+       (const_int 4)
+       ;; Alternative 8
+       (const_int 4)
+       ;; Alternative 9
+       (const_int 4)
+       ;; Alternative 10
+       (const_int 4)
+     ])
+   (set_attr "feature" " v1, v1,  v1,  v1,   v1,   v1,    fpu,    fpu,    fpu,    fpu,    fpu")])
+
+(define_expand "movmisalign<mode>"
+  [(set (match_operand:VQIHI 0 "general_operand" "")
+       (match_operand:VQIHI 1 "general_operand" ""))]
+  "NDS32_EXT_DSP_P ()"
+{
+  rtx addr;
+  if (MEM_P (operands[0]) && !REG_P (operands[1]))
+    operands[1] = force_reg (<MODE>mode, operands[1]);
+
+  if (MEM_P (operands[0]))
+    {
+      addr = force_reg (Pmode, XEXP (operands[0], 0));
+      emit_insn (gen_unaligned_store<mode> (addr, operands[1]));
+    }
+  else
+    {
+      addr = force_reg (Pmode, XEXP (operands[1], 0));
+      emit_insn (gen_unaligned_load<mode> (operands[0], addr));
+    }
+  DONE;
+})
+
+(define_expand "unaligned_load<mode>"
+  [(set (match_operand:VQIHI 0 "register_operand" "=r")
+       (unspec:VQIHI [(mem:VQIHI (match_operand:SI 1 "register_operand" "r"))] UNSPEC_UALOAD_W))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_ISA_V3M)
+    nds32_expand_unaligned_load (operands, <MODE>mode);
+  else
+    emit_insn (gen_unaligned_load_w<mode> (operands[0], gen_rtx_MEM (<MODE>mode, operands[1])));
+  DONE;
+})
+
+(define_insn "unaligned_load_w<mode>"
+  [(set (match_operand:VQIHI 0 "register_operand"                          "=  r")
+       (unspec:VQIHI [(match_operand:VQIHI 1 "nds32_lmw_smw_base_operand" " Umw")] UNSPEC_UALOAD_W))]
+  "NDS32_EXT_DSP_P ()"
+{
+  return nds32_output_lmw_single_word (operands);
+}
+  [(set_attr "type"   "load")
+   (set_attr "length"    "4")]
+)
+
+(define_expand "unaligned_store<mode>"
+  [(set (mem:VQIHI (match_operand:SI 0 "register_operand" "r"))
+       (unspec:VQIHI [(match_operand:VQIHI 1 "register_operand" "r")] UNSPEC_UASTORE_W))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_ISA_V3M)
+    nds32_expand_unaligned_store (operands, <MODE>mode);
+  else
+    emit_insn (gen_unaligned_store_w<mode> (gen_rtx_MEM (<MODE>mode, operands[0]), operands[1]));
+  DONE;
+})
+
+(define_insn "unaligned_store_w<mode>"
+  [(set (match_operand:VQIHI 0 "nds32_lmw_smw_base_operand"      "=Umw")
+       (unspec:VQIHI [(match_operand:VQIHI 1 "register_operand" "   r")] UNSPEC_UASTORE_W))]
+  "NDS32_EXT_DSP_P ()"
+{
+  return nds32_output_smw_single_word (operands);
+}
+  [(set_attr "type"   "store")
+   (set_attr "length"     "4")]
+)
+
+(define_insn "<uk>add<mode>3"
+  [(set (match_operand:VQIHI 0 "register_operand"                 "=r")
+       (all_plus:VQIHI (match_operand:VQIHI 1 "register_operand" " r")
+                       (match_operand:VQIHI 2 "register_operand" " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "<uk>add<bits> %0, %1, %2"
+  [(set_attr "type"    "dalu")
+   (set_attr "length"  "4")
+   (set_attr "feature" "v1")])
+
+(define_insn "<uk>adddi3"
+  [(set (match_operand:DI 0 "register_operand"              "=r")
+       (all_plus:DI (match_operand:DI 1 "register_operand" " r")
+                    (match_operand:DI 2 "register_operand" " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "<uk>add64 %0, %1, %2"
+  [(set_attr "type"    "dalu64")
+   (set_attr "length"  "4")
+   (set_attr "feature" "v1")])
+
+(define_insn "raddv4qi3"
+  [(set (match_operand:V4QI 0 "register_operand"                  "=r")
+       (truncate:V4QI
+         (ashiftrt:V4HI
+           (plus:V4HI (sign_extend:V4HI (match_operand:V4QI 1 "register_operand" " r"))
+                      (sign_extend:V4HI (match_operand:V4QI 2 "register_operand" " r")))
+           (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "radd8\t%0, %1, %2"
+  [(set_attr "type"    "dalu")
+   (set_attr "length"  "4")
+   (set_attr "feature" "v1")])
+
+
+(define_insn "uraddv4qi3"
+  [(set (match_operand:V4QI 0 "register_operand"                  "=r")
+       (truncate:V4QI
+         (lshiftrt:V4HI
+           (plus:V4HI (zero_extend:V4HI (match_operand:V4QI 1 "register_operand" " r"))
+                      (zero_extend:V4HI (match_operand:V4QI 2 "register_operand" " r")))
+           (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "uradd8\t%0, %1, %2"
+  [(set_attr "type"    "dalu")
+   (set_attr "length"  "4")
+   (set_attr "feature" "v1")])
+
+(define_insn "raddv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand"                                  "=r")
+       (truncate:V2HI
+         (ashiftrt:V2SI
+           (plus:V2SI (sign_extend:V2SI (match_operand:V2HI 1 "register_operand" " r"))
+                      (sign_extend:V2SI (match_operand:V2HI 2 "register_operand" " r")))
+           (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "radd16\t%0, %1, %2"
+  [(set_attr "type"    "dalu")
+   (set_attr "length"  "4")
+   (set_attr "feature" "v1")])
+
+(define_insn "uraddv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand"                                  "=r")
+       (truncate:V2HI
+         (lshiftrt:V2SI
+           (plus:V2SI (zero_extend:V2SI (match_operand:V2HI 1 "register_operand" " r"))
+                      (zero_extend:V2SI (match_operand:V2HI 2 "register_operand" " r")))
+           (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "uradd16\t%0, %1, %2"
+  [(set_attr "type"    "dalu")
+   (set_attr "length"  "4")
+   (set_attr "feature" "v1")])
+
+(define_insn "radddi3"
+  [(set (match_operand:DI 0 "register_operand"            "=r")
+       (truncate:DI
+         (ashiftrt:TI
+           (plus:TI (sign_extend:TI (match_operand:DI 1 "register_operand" " r"))
+                    (sign_extend:TI (match_operand:DI 2 "register_operand" " r")))
+         (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "radd64\t%0, %1, %2"
+  [(set_attr "type"    "dalu64")
+   (set_attr "length"  "4")
+   (set_attr "feature" "v1")])
+
+
+(define_insn "uradddi3"
+  [(set (match_operand:DI 0 "register_operand"            "=r")
+       (truncate:DI
+         (lshiftrt:TI
+           (plus:TI (zero_extend:TI (match_operand:DI 1 "register_operand" " r"))
+                    (zero_extend:TI (match_operand:DI 2 "register_operand" " r")))
+         (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "uradd64\t%0, %1, %2"
+  [(set_attr "type"    "dalu64")
+   (set_attr "length"  "4")
+   (set_attr "feature" "v1")])
+
+(define_insn "<uk>sub<mode>3"
+  [(set (match_operand:VQIHI 0 "register_operand"                  "=r")
+       (all_minus:VQIHI (match_operand:VQIHI 1 "register_operand" " r")
+                        (match_operand:VQIHI 2 "register_operand" " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "<uk>sub<bits> %0, %1, %2"
+  [(set_attr "type"    "dalu")
+   (set_attr "length"  "4")
+   (set_attr "feature" "v1")])
+
+(define_insn "<uk>subdi3"
+  [(set (match_operand:DI 0 "register_operand"               "=r")
+       (all_minus:DI (match_operand:DI 1 "register_operand" " r")
+                     (match_operand:DI 2 "register_operand" " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "<uk>sub64 %0, %1, %2"
+  [(set_attr "type"    "dalu64")
+   (set_attr "length"  "4")
+   (set_attr "feature" "v1")])
+
+(define_insn "rsubv4qi3"
+  [(set (match_operand:V4QI 0 "register_operand"                                   "=r")
+       (truncate:V4QI
+         (ashiftrt:V4HI
+           (minus:V4HI (sign_extend:V4HI (match_operand:V4QI 1 "register_operand" " r"))
+                       (sign_extend:V4HI (match_operand:V4QI 2 "register_operand" " r")))
+           (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "rsub8\t%0, %1, %2"
+  [(set_attr "type"    "dalu")
+   (set_attr "length"  "4")])
+
+(define_insn "ursubv4qi3"
+  [(set (match_operand:V4QI 0 "register_operand"                                   "=r")
+       (truncate:V4QI
+         (lshiftrt:V4HI
+           (minus:V4HI (zero_extend:V4HI (match_operand:V4QI 1 "register_operand" " r"))
+                       (zero_extend:V4HI (match_operand:V4QI 2 "register_operand" " r")))
+           (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "ursub8\t%0, %1, %2"
+  [(set_attr "type"    "dalu")
+   (set_attr "length"  "4")])
+
+(define_insn "rsubv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand"                                   "=r")
+       (truncate:V2HI
+         (ashiftrt:V2SI
+           (minus:V2SI (sign_extend:V2SI (match_operand:V2HI 1 "register_operand" " r"))
+                       (sign_extend:V2SI (match_operand:V2HI 2 "register_operand" " r")))
+           (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "rsub16\t%0, %1, %2"
+  [(set_attr "type"    "dalu")
+   (set_attr "length"  "4")])
+
+(define_insn "ursubv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand"                                   "=r")
+       (truncate:V2HI
+         (lshiftrt:V2SI
+           (minus:V2SI (zero_extend:V2SI (match_operand:V2HI 1 "register_operand" " r"))
+                       (zero_extend:V2SI (match_operand:V2HI 2 "register_operand" " r")))
+           (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "ursub16\t%0, %1, %2"
+  [(set_attr "type"    "dalu")
+   (set_attr "length"  "4")])
+
+(define_insn "rsubdi3"
+  [(set (match_operand:DI 0 "register_operand"                   "=r")
+       (truncate:DI
+         (ashiftrt:TI
+           (minus:TI (sign_extend:TI (match_operand:DI 1 "register_operand" " r"))
+                     (sign_extend:TI (match_operand:DI 2 "register_operand" " r")))
+         (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "rsub64\t%0, %1, %2"
+  [(set_attr "type"    "dalu64")
+   (set_attr "length"  "4")])
+
+
+(define_insn "ursubdi3"
+  [(set (match_operand:DI 0 "register_operand"                   "=r")
+       (truncate:DI
+         (lshiftrt:TI
+           (minus:TI (zero_extend:TI (match_operand:DI 1 "register_operand" " r"))
+                     (zero_extend:TI (match_operand:DI 2 "register_operand" " r")))
+         (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "ursub64\t%0, %1, %2"
+  [(set_attr "type"    "dalu64")
+   (set_attr "length"  "4")])
+
+(define_expand "cras16_1"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_cras16_1_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_cras16_1_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "cras16_1_le"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI
+           (minus:HI
+             (vec_select:HI
+               (match_operand:V2HI 1 "register_operand" " r")
+               (parallel [(const_int 0)]))
+             (vec_select:HI
+               (match_operand:V2HI 2 "register_operand" " r")
+               (parallel [(const_int 1)]))))
+         (vec_duplicate:V2HI
+           (plus:HI
+             (vec_select:HI
+               (match_dup 2)
+               (parallel [(const_int 0)]))
+             (vec_select:HI
+               (match_dup 1)
+               (parallel [(const_int 1)]))))
+         (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "cras16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_insn "cras16_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI
+           (minus:HI
+             (vec_select:HI
+               (match_operand:V2HI 1 "register_operand" " r")
+               (parallel [(const_int 1)]))
+             (vec_select:HI
+               (match_operand:V2HI 2 "register_operand" " r")
+               (parallel [(const_int 0)]))))
+         (vec_duplicate:V2HI
+           (plus:HI
+             (vec_select:HI
+               (match_dup 2)
+               (parallel [(const_int 1)]))
+             (vec_select:HI
+               (match_dup 1)
+               (parallel [(const_int 0)]))))
+         (const_int 2)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "cras16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_expand "kcras16_1"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_kcras16_1_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_kcras16_1_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "kcras16_1_le"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI
+           (ss_minus:HI
+             (vec_select:HI
+               (match_operand:V2HI 1 "register_operand" " r")
+               (parallel [(const_int 0)]))
+             (vec_select:HI
+               (match_operand:V2HI 2 "register_operand" " r")
+               (parallel [(const_int 1)]))))
+         (vec_duplicate:V2HI
+           (ss_plus:HI
+             (vec_select:HI
+               (match_dup 2)
+               (parallel [(const_int 0)]))
+             (vec_select:HI
+               (match_dup 1)
+               (parallel [(const_int 1)]))))
+         (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "kcras16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_insn "kcras16_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI
+           (ss_minus:HI
+             (vec_select:HI
+               (match_operand:V2HI 1 "register_operand" " r")
+               (parallel [(const_int 1)]))
+             (vec_select:HI
+               (match_operand:V2HI 2 "register_operand" " r")
+               (parallel [(const_int 0)]))))
+         (vec_duplicate:V2HI
+           (ss_plus:HI
+             (vec_select:HI
+               (match_dup 2)
+               (parallel [(const_int 1)]))
+             (vec_select:HI
+               (match_dup 1)
+               (parallel [(const_int 0)]))))
+         (const_int 2)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "kcras16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_expand "ukcras16_1"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_ukcras16_1_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_ukcras16_1_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "ukcras16_1_le"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI
+           (us_minus:HI
+             (vec_select:HI
+               (match_operand:V2HI 1 "register_operand" " r")
+               (parallel [(const_int 0)]))
+             (vec_select:HI
+               (match_operand:V2HI 2 "register_operand" " r")
+               (parallel [(const_int 1)]))))
+         (vec_duplicate:V2HI
+           (us_plus:HI
+             (vec_select:HI
+               (match_dup 2)
+               (parallel [(const_int 0)]))
+             (vec_select:HI
+               (match_dup 1)
+               (parallel [(const_int 1)]))))
+         (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "ukcras16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_insn "ukcras16_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI
+           (us_minus:HI
+             (vec_select:HI
+               (match_operand:V2HI 1 "register_operand" " r")
+               (parallel [(const_int 1)]))
+             (vec_select:HI
+               (match_operand:V2HI 2 "register_operand" " r")
+               (parallel [(const_int 0)]))))
+         (vec_duplicate:V2HI
+           (us_plus:HI
+             (vec_select:HI
+               (match_dup 2)
+               (parallel [(const_int 1)]))
+             (vec_select:HI
+               (match_dup 1)
+               (parallel [(const_int 0)]))))
+         (const_int 2)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "ukcras16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_expand "crsa16_1"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_crsa16_1_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_crsa16_1_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "crsa16_1_le"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI
+           (minus:HI
+             (vec_select:HI
+               (match_operand:V2HI 1 "register_operand" " r")
+               (parallel [(const_int 1)]))
+             (vec_select:HI
+               (match_operand:V2HI 2 "register_operand" " r")
+               (parallel [(const_int 0)]))))
+         (vec_duplicate:V2HI
+           (plus:HI
+             (vec_select:HI
+               (match_dup 1)
+               (parallel [(const_int 0)]))
+             (vec_select:HI
+               (match_dup 2)
+               (parallel [(const_int 1)]))))
+         (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "crsa16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_insn "crsa16_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI
+           (minus:HI
+             (vec_select:HI
+               (match_operand:V2HI 1 "register_operand" " r")
+               (parallel [(const_int 0)]))
+             (vec_select:HI
+               (match_operand:V2HI 2 "register_operand" " r")
+               (parallel [(const_int 1)]))))
+         (vec_duplicate:V2HI
+           (plus:HI
+             (vec_select:HI
+               (match_dup 1)
+               (parallel [(const_int 1)]))
+             (vec_select:HI
+               (match_dup 2)
+               (parallel [(const_int 0)]))))
+         (const_int 1)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "crsa16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_expand "kcrsa16_1"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_kcrsa16_1_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_kcrsa16_1_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "kcrsa16_1_le"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI
+           (ss_minus:HI
+             (vec_select:HI
+               (match_operand:V2HI 1 "register_operand" " r")
+               (parallel [(const_int 1)]))
+             (vec_select:HI
+               (match_operand:V2HI 2 "register_operand" " r")
+               (parallel [(const_int 0)]))))
+         (vec_duplicate:V2HI
+           (ss_plus:HI
+             (vec_select:HI
+               (match_dup 1)
+               (parallel [(const_int 0)]))
+             (vec_select:HI
+               (match_dup 2)
+               (parallel [(const_int 1)]))))
+         (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "kcrsa16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_insn "kcrsa16_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI
+           (ss_minus:HI
+             (vec_select:HI
+               (match_operand:V2HI 1 "register_operand" " r")
+               (parallel [(const_int 0)]))
+             (vec_select:HI
+               (match_operand:V2HI 2 "register_operand" " r")
+               (parallel [(const_int 1)]))))
+         (vec_duplicate:V2HI
+           (ss_plus:HI
+             (vec_select:HI
+               (match_dup 1)
+               (parallel [(const_int 1)]))
+             (vec_select:HI
+               (match_dup 2)
+               (parallel [(const_int 0)]))))
+         (const_int 1)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "kcrsa16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_expand "ukcrsa16_1"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_ukcrsa16_1_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_ukcrsa16_1_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "ukcrsa16_1_le"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI
+           (us_minus:HI
+             (vec_select:HI
+               (match_operand:V2HI 1 "register_operand" " r")
+               (parallel [(const_int 1)]))
+             (vec_select:HI
+               (match_operand:V2HI 2 "register_operand" " r")
+               (parallel [(const_int 0)]))))
+         (vec_duplicate:V2HI
+           (us_plus:HI
+             (vec_select:HI
+               (match_dup 1)
+               (parallel [(const_int 0)]))
+             (vec_select:HI
+               (match_dup 2)
+               (parallel [(const_int 1)]))))
+         (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "ukcrsa16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_insn "ukcrsa16_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI
+           (us_minus:HI
+             (vec_select:HI
+               (match_operand:V2HI 1 "register_operand" " r")
+               (parallel [(const_int 0)]))
+             (vec_select:HI
+               (match_operand:V2HI 2 "register_operand" " r")
+               (parallel [(const_int 1)]))))
+         (vec_duplicate:V2HI
+           (us_plus:HI
+             (vec_select:HI
+               (match_dup 1)
+               (parallel [(const_int 1)]))
+             (vec_select:HI
+               (match_dup 2)
+               (parallel [(const_int 0)]))))
+         (const_int 1)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "ukcrsa16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_expand "rcras16_1"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_rcras16_1_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_rcras16_1_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "rcras16_1_le"
+  [(set (match_operand:V2HI 0 "register_operand"           "=r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI
+           (truncate:HI
+             (ashiftrt:SI
+               (minus:SI
+                 (sign_extend:SI
+                   (vec_select:HI
+                     (match_operand:V2HI 1 "register_operand" " r")
+                     (parallel [(const_int 0)])))
+                 (sign_extend:SI
+                   (vec_select:HI
+                     (match_operand:V2HI 2 "register_operand" " r")
+                     (parallel [(const_int 1)]))))
+               (const_int 1))))
+         (vec_duplicate:V2HI
+           (truncate:HI
+             (ashiftrt:SI
+               (plus:SI
+                 (sign_extend:SI
+                   (vec_select:HI
+                     (match_dup 2)
+                     (parallel [(const_int 0)])))
+                 (sign_extend:SI
+                   (vec_select:HI
+                     (match_dup 1)
+                     (parallel [(const_int 1)]))))
+               (const_int 1))))
+         (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "rcras16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_insn "rcras16_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"           "=r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI
+           (truncate:HI
+             (ashiftrt:SI
+               (minus:SI
+                 (sign_extend:SI
+                   (vec_select:HI
+                     (match_operand:V2HI 1 "register_operand" " r")
+                     (parallel [(const_int 1)])))
+                 (sign_extend:SI
+                   (vec_select:HI
+                     (match_operand:V2HI 2 "register_operand" " r")
+                     (parallel [(const_int 0)]))))
+               (const_int 1))))
+         (vec_duplicate:V2HI
+           (truncate:HI
+             (ashiftrt:SI
+               (plus:SI
+                 (sign_extend:SI
+                   (vec_select:HI
+                     (match_dup 2)
+                     (parallel [(const_int 1)])))
+                 (sign_extend:SI
+                   (vec_select:HI
+                     (match_dup 1)
+                     (parallel [(const_int 0)]))))
+               (const_int 1))))
+         (const_int 2)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "rcras16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_expand "urcras16_1"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_urcras16_1_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_urcras16_1_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "urcras16_1_le"
+  [(set (match_operand:V2HI 0 "register_operand"           "=r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI
+           (truncate:HI
+             (lshiftrt:SI
+               (minus:SI
+                 (zero_extend:SI
+                   (vec_select:HI
+                     (match_operand:V2HI 1 "register_operand" " r")
+                     (parallel [(const_int 0)])))
+                 (zero_extend:SI
+                   (vec_select:HI
+                     (match_operand:V2HI 2 "register_operand" " r")
+                     (parallel [(const_int 1)]))))
+               (const_int 1))))
+         (vec_duplicate:V2HI
+           (truncate:HI
+             (lshiftrt:SI
+               (plus:SI
+                 (zero_extend:SI
+                   (vec_select:HI
+                     (match_dup 2)
+                     (parallel [(const_int 0)])))
+                 (zero_extend:SI
+                   (vec_select:HI
+                     (match_dup 1)
+                     (parallel [(const_int 1)]))))
+               (const_int 1))))
+         (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "urcras16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_insn "urcras16_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"           "=r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI
+           (truncate:HI
+             (lshiftrt:SI
+               (minus:SI
+                 (zero_extend:SI
+                   (vec_select:HI
+                     (match_operand:V2HI 1 "register_operand" " r")
+                     (parallel [(const_int 1)])))
+                 (zero_extend:SI
+                   (vec_select:HI
+                     (match_operand:V2HI 2 "register_operand" " r")
+                     (parallel [(const_int 0)]))))
+               (const_int 1))))
+         (vec_duplicate:V2HI
+           (truncate:HI
+             (lshiftrt:SI
+               (plus:SI
+                 (zero_extend:SI
+                   (vec_select:HI
+                     (match_dup 2)
+                     (parallel [(const_int 1)])))
+                 (zero_extend:SI
+                   (vec_select:HI
+                     (match_dup 1)
+                     (parallel [(const_int 0)]))))
+               (const_int 1))))
+         (const_int 2)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "urcras16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_expand "rcrsa16_1"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_rcrsa16_1_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_rcrsa16_1_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "rcrsa16_1_le"
+  [(set (match_operand:V2HI 0 "register_operand"           "=r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI
+           (truncate:HI
+             (ashiftrt:SI
+               (minus:SI
+                 (sign_extend:SI
+                   (vec_select:HI
+                     (match_operand:V2HI 1 "register_operand" " r")
+                     (parallel [(const_int 1)])))
+                 (sign_extend:SI
+                   (vec_select:HI
+                     (match_operand:V2HI 2 "register_operand" " r")
+                     (parallel [(const_int 0)]))))
+               (const_int 1))))
+         (vec_duplicate:V2HI
+           (truncate:HI
+             (ashiftrt:SI
+               (plus:SI
+                 (sign_extend:SI
+                   (vec_select:HI
+                     (match_dup 1)
+                     (parallel [(const_int 0)])))
+                 (sign_extend:SI
+                   (vec_select:HI
+                     (match_dup 2)
+                     (parallel [(const_int 1)]))))
+               (const_int 1))))
+         (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "rcrsa16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_insn "rcrsa16_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"           "=r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI
+           (truncate:HI
+             (ashiftrt:SI
+               (minus:SI
+                 (sign_extend:SI
+                   (vec_select:HI
+                     (match_operand:V2HI 1 "register_operand" " r")
+                     (parallel [(const_int 0)])))
+                 (sign_extend:SI
+                   (vec_select:HI
+                     (match_operand:V2HI 2 "register_operand" " r")
+                     (parallel [(const_int 1)]))))
+               (const_int 1))))
+         (vec_duplicate:V2HI
+           (truncate:HI
+             (ashiftrt:SI
+               (plus:SI
+                 (sign_extend:SI
+                   (vec_select:HI
+                     (match_dup 1)
+                     (parallel [(const_int 1)])))
+                 (sign_extend:SI
+                   (vec_select:HI
+                     (match_dup 2)
+                     (parallel [(const_int 0)]))))
+               (const_int 1))))
+         (const_int 1)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "rcrsa16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_expand "urcrsa16_1"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_urcrsa16_1_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_urcrsa16_1_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "urcrsa16_1_le"
+  [(set (match_operand:V2HI 0 "register_operand"           "=r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI
+           (truncate:HI
+             (lshiftrt:SI
+               (minus:SI
+                 (zero_extend:SI
+                   (vec_select:HI
+                     (match_operand:V2HI 1 "register_operand" " r")
+                     (parallel [(const_int 1)])))
+                 (zero_extend:SI
+                   (vec_select:HI
+                     (match_operand:V2HI 2 "register_operand" " r")
+                     (parallel [(const_int 0)]))))
+               (const_int 1))))
+         (vec_duplicate:V2HI
+           (truncate:HI
+             (lshiftrt:SI
+               (plus:SI
+                 (zero_extend:SI
+                   (vec_select:HI
+                     (match_dup 1)
+                     (parallel [(const_int 0)])))
+                 (zero_extend:SI
+                   (vec_select:HI
+                     (match_dup 2)
+                     (parallel [(const_int 1)]))))
+               (const_int 1))))
+         (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "urcrsa16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_insn "urcrsa16_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"           "=r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI
+           (truncate:HI
+             (lshiftrt:SI
+               (minus:SI
+                 (zero_extend:SI
+                   (vec_select:HI
+                     (match_operand:V2HI 1 "register_operand" " r")
+                     (parallel [(const_int 0)])))
+                 (zero_extend:SI
+                   (vec_select:HI
+                     (match_operand:V2HI 2 "register_operand" " r")
+                     (parallel [(const_int 1)]))))
+               (const_int 1))))
+         (vec_duplicate:V2HI
+           (truncate:HI
+             (lshiftrt:SI
+               (plus:SI
+                 (zero_extend:SI
+                   (vec_select:HI
+                     (match_dup 1)
+                     (parallel [(const_int 1)])))
+                 (zero_extend:SI
+                   (vec_select:HI
+                     (match_dup 2)
+                     (parallel [(const_int 0)]))))
+               (const_int 1))))
+         (const_int 1)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "urcrsa16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_expand "<shift>v2hi3"
+  [(set (match_operand:V2HI 0 "register_operand"                  "")
+       (shifts:V2HI (match_operand:V2HI 1 "register_operand"     "")
+                    (match_operand:SI   2 "nds32_rimm4u_operand" "")))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (operands[2] == const0_rtx)
+    {
+      emit_move_insn (operands[0], operands[1]);
+      DONE;
+    }
+})
+
+(define_insn "*ashlv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand"                "=   r, r")
+       (ashift:V2HI (match_operand:V2HI 1 "register_operand"   "    r, r")
+                    (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r")))]
+  "NDS32_EXT_DSP_P ()"
+  "@
+   slli16\t%0, %1, %2
+   sll16\t%0, %1, %2"
+  [(set_attr "type"   "dalu,dalu")
+   (set_attr "length" "   4,   4")])
+
+(define_insn "kslli16"
+  [(set (match_operand:V2HI 0 "register_operand"                   "=   r, r")
+       (ss_ashift:V2HI (match_operand:V2HI 1 "register_operand"   "    r, r")
+                       (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r")))]
+  "NDS32_EXT_DSP_P ()"
+  "@
+   kslli16\t%0, %1, %2
+   ksll16\t%0, %1, %2"
+  [(set_attr "type"   "dalu,dalu")
+   (set_attr "length" "   4,   4")])
+
+(define_insn "*ashrv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand"                  "=   r, r")
+       (ashiftrt:V2HI (match_operand:V2HI 1 "register_operand"   "    r, r")
+                      (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r")))]
+  "NDS32_EXT_DSP_P ()"
+  "@
+   srai16\t%0, %1, %2
+   sra16\t%0, %1, %2"
+  [(set_attr "type"   "dalu,dalu")
+   (set_attr "length" "   4,   4")])
+
+(define_insn "sra16_round"
+  [(set (match_operand:V2HI 0 "register_operand"                                "=   r, r")
+       (unspec:V2HI [(ashiftrt:V2HI (match_operand:V2HI 1 "register_operand"   "    r, r")
+                                    (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r"))]
+                    UNSPEC_ROUND))]
+  "NDS32_EXT_DSP_P ()"
+  "@
+   srai16.u\t%0, %1, %2
+   sra16.u\t%0, %1, %2"
+  [(set_attr "type"   "daluround,daluround")
+   (set_attr "length" "         4,       4")])
+
+(define_insn "*lshrv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand"                  "=   r, r")
+       (lshiftrt:V2HI (match_operand:V2HI 1 "register_operand"   "    r, r")
+                      (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r")))]
+  "NDS32_EXT_DSP_P ()"
+  "@
+   srli16\t%0, %1, %2
+   srl16\t%0, %1, %2"
+  [(set_attr "type"   "dalu,dalu")
+   (set_attr "length" "   4,   4")])
+
+(define_insn "srl16_round"
+  [(set (match_operand:V2HI 0 "register_operand"                                "=   r, r")
+       (unspec:V2HI [(lshiftrt:V2HI (match_operand:V2HI 1 "register_operand"   "    r, r")
+                                    (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r"))]
+                    UNSPEC_ROUND))]
+  "NDS32_EXT_DSP_P ()"
+  "@
+   srli16.u\t%0, %1, %2
+   srl16.u\t%0, %1, %2"
+  [(set_attr "type"   "daluround,daluround")
+   (set_attr "length" "        4,        4")])
+
+(define_insn "kslra16"
+  [(set (match_operand:V2HI 0 "register_operand"                  "=r")
+       (if_then_else:V2HI
+         (lt:SI (match_operand:SI 2 "register_operand"           " r")
+                (const_int 0))
+         (ashiftrt:V2HI (match_operand:V2HI 1 "register_operand" " r")
+                        (neg:SI (match_dup 2)))
+         (ashift:V2HI (match_dup 1)
+                      (match_dup 2))))]
+  "NDS32_EXT_DSP_P ()"
+  "kslra16\t%0, %1, %2"
+  [(set_attr "type"    "dalu")
+   (set_attr "length"  "4")])
+
+(define_insn "kslra16_round"
+  [(set (match_operand:V2HI 0 "register_operand"                  "=r")
+       (if_then_else:V2HI
+         (lt:SI (match_operand:SI 2 "register_operand"           " r")
+                (const_int 0))
+         (unspec:V2HI [(ashiftrt:V2HI (match_operand:V2HI 1 "register_operand" " r")
+                                      (neg:SI (match_dup 2)))]
+                      UNSPEC_ROUND)
+         (ashift:V2HI (match_dup 1)
+                      (match_dup 2))))]
+  "NDS32_EXT_DSP_P ()"
+  "kslra16.u\t%0, %1, %2"
+  [(set_attr "type"    "daluround")
+   (set_attr "length"  "4")])
+
+(define_insn "cmpeq<bits>"
+  [(set (match_operand:SI 0 "register_operand"                       "=r")
+       (unspec:SI [(eq:SI (match_operand:VQIHI 1 "register_operand" " r")
+                          (match_operand:VQIHI 2 "register_operand" " r"))]
+                  UNSPEC_VEC_COMPARE))]
+  "NDS32_EXT_DSP_P ()"
+  "cmpeq<bits>\t%0, %1, %2"
+  [(set_attr "type"    "dcmp")
+   (set_attr "length"  "4")])
+
+(define_insn "scmplt<bits>"
+  [(set (match_operand:SI 0 "register_operand"                       "=r")
+       (unspec:SI [(lt:SI (match_operand:VQIHI 1 "register_operand" " r")
+                          (match_operand:VQIHI 2 "register_operand" " r"))]
+                  UNSPEC_VEC_COMPARE))]
+  "NDS32_EXT_DSP_P ()"
+  "scmplt<bits>\t%0, %1, %2"
+  [(set_attr "type"    "dcmp")
+   (set_attr "length"  "4")])
+
+(define_insn "scmple<bits>"
+  [(set (match_operand:SI 0 "register_operand"                       "=r")
+       (unspec:SI [(le:SI (match_operand:VQIHI 1 "register_operand" " r")
+                          (match_operand:VQIHI 2 "register_operand" " r"))]
+                  UNSPEC_VEC_COMPARE))]
+  "NDS32_EXT_DSP_P ()"
+  "scmple<bits>\t%0, %1, %2"
+  [(set_attr "type"    "dcmp")
+   (set_attr "length"  "4")])
+
+(define_insn "ucmplt<bits>"
+  [(set (match_operand:SI 0 "register_operand"                        "=r")
+       (unspec:SI [(ltu:SI (match_operand:VQIHI 1 "register_operand" " r")
+                           (match_operand:VQIHI 2 "register_operand" " r"))]
+                  UNSPEC_VEC_COMPARE))]
+  "NDS32_EXT_DSP_P ()"
+  "ucmplt<bits>\t%0, %1, %2"
+  [(set_attr "type"    "dcmp")
+   (set_attr "length"  "4")])
+
+(define_insn "ucmple<bits>"
+  [(set (match_operand:SI 0 "register_operand"                        "=r")
+       (unspec:SI [(leu:SI (match_operand:VQIHI 1 "register_operand" " r")
+                           (match_operand:VQIHI 2 "register_operand" " r"))]
+                  UNSPEC_VEC_COMPARE))]
+  "NDS32_EXT_DSP_P ()"
+  "ucmple<bits>\t%0, %1, %2"
+  [(set_attr "type"    "dcmp")
+   (set_attr "length"  "4")])
+
+(define_insn "sclip16"
+  [(set (match_operand:V2HI 0 "register_operand"                "=   r")
+       (unspec:V2HI [(match_operand:V2HI 1 "register_operand"  "    r")
+                     (match_operand:SI 2 "nds32_imm4u_operand" " Iu04")]
+                    UNSPEC_CLIPS))]
+  "NDS32_EXT_DSP_P ()"
+  "sclip16\t%0, %1, %2"
+  [(set_attr "type"    "dclip")
+   (set_attr "length"  "4")])
+
+(define_insn "uclip16"
+  [(set (match_operand:V2HI 0 "register_operand"                "=   r")
+       (unspec:V2HI [(match_operand:V2HI 1 "register_operand"  "    r")
+                     (match_operand:SI 2 "nds32_imm4u_operand" " Iu04")]
+                    UNSPEC_CLIP))]
+  "NDS32_EXT_DSP_P ()"
+  "uclip16\t%0, %1, %2"
+  [(set_attr "type"    "dclip")
+   (set_attr "length"  "4")])
+
+(define_insn "khm16"
+  [(set (match_operand:V2HI 0 "register_operand"                "=r")
+       (unspec:V2HI [(match_operand:V2HI 1 "register_operand"  " r")
+                     (match_operand:V2HI 2 "register_operand" "  r")]
+                    UNSPEC_KHM))]
+  "NDS32_EXT_DSP_P ()"
+  "khm16\t%0, %1, %2"
+  [(set_attr "type"    "dmul")
+   (set_attr "length"  "4")])
+
+(define_insn "khmx16"
+  [(set (match_operand:V2HI 0 "register_operand"                "=r")
+       (unspec:V2HI [(match_operand:V2HI 1 "register_operand"  " r")
+                     (match_operand:V2HI 2 "register_operand" "  r")]
+                    UNSPEC_KHMX))]
+  "NDS32_EXT_DSP_P ()"
+  "khmx16\t%0, %1, %2"
+  [(set_attr "type"    "dmul")
+   (set_attr "length"  "4")])
+
+(define_expand "vec_setv4qi"
+  [(match_operand:V4QI 0 "register_operand" "")
+   (match_operand:QI 1 "register_operand" "")
+   (match_operand:SI 2 "immediate_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  HOST_WIDE_INT pos = INTVAL (operands[2]);
+  if (pos > 4)
+    gcc_unreachable ();
+  HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << pos;
+  emit_insn (gen_vec_setv4qi_internal (operands[0], operands[1],
+                                      operands[0], GEN_INT (elem)));
+  DONE;
+})
+
+(define_expand "insb"
+  [(match_operand:V4QI 0 "register_operand" "")
+   (match_operand:V4QI 1 "register_operand" "")
+   (match_operand:SI 2 "register_operand" "")
+   (match_operand:SI 3 "const_int_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (INTVAL (operands[3]) > 3 || INTVAL (operands[3]) < 0)
+    gcc_unreachable ();
+
+  rtx src = gen_reg_rtx (QImode);
+
+  convert_move (src, operands[2], false);
+
+  HOST_WIDE_INT selector_index;
+  /* Big endian need reverse index. */
+  if (TARGET_BIG_ENDIAN)
+    selector_index = 4 - INTVAL (operands[3]) - 1;
+  else
+    selector_index = INTVAL (operands[3]);
+  rtx selector = gen_int_mode (1 << selector_index, SImode);
+  emit_insn (gen_vec_setv4qi_internal (operands[0], src,
+                                      operands[1], selector));
+  DONE;
+})
+
+(define_expand "insvsi"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "")
+                        (match_operand:SI 1 "const_int_operand" "")
+                        (match_operand:SI 2 "nds32_insv_operand" ""))
+       (match_operand:SI 3 "register_operand" ""))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (INTVAL (operands[1]) != 8)
+    FAIL;
+}
+  [(set_attr "type"    "dinsb")
+   (set_attr "length"  "4")])
+
+
+(define_insn "insvsi_internal"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand"   "+r")
+                        (const_int 8)
+                        (match_operand:SI 1 "nds32_insv_operand"  "i"))
+       (match_operand:SI 2                  "register_operand"    "r"))]
+  "NDS32_EXT_DSP_P ()"
+  "insb\t%0, %2, %v1"
+  [(set_attr "type"    "dinsb")
+   (set_attr "length"  "4")])
+
+(define_insn "insvsiqi_internal"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand"   "+r")
+                        (const_int 8)
+                        (match_operand:SI 1 "nds32_insv_operand"  "i"))
+       (zero_extend:SI (match_operand:QI 2 "register_operand"    "r")))]
+  "NDS32_EXT_DSP_P ()"
+  "insb\t%0, %2, %v1"
+  [(set_attr "type"    "dinsb")
+   (set_attr "length"  "4")])
+
+;; Intermedium pattern for synthetize insvsiqi_internal
+;; v0 = ((v1 & 0xff) << 8)
+(define_insn_and_split "and0xff_s8"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
+                          (const_int 8))
+               (const_int 65280)))]
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx tmp = gen_reg_rtx (SImode);
+  emit_insn (gen_ashlsi3 (tmp, operands[1], gen_int_mode (8, SImode)));
+  emit_insn (gen_andsi3 (operands[0], tmp, gen_int_mode (0xffff, SImode)));
+  DONE;
+})
+
+;; v0 = (v1 & 0xff00ffff) | ((v2 << 16) | 0xff0000)
+(define_insn_and_split "insbsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (ior:SI (and:SI (match_operand:SI 1 "register_operand" "0")
+                       (const_int -16711681))
+               (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r")
+                                  (const_int 16))
+                       (const_int 16711680))))]
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx tmp = gen_reg_rtx (SImode);
+  emit_move_insn (tmp, operands[1]);
+  emit_insn (gen_insvsi_internal (tmp, gen_int_mode(16, SImode), operands[2]));
+  emit_move_insn (operands[0], tmp);
+  DONE;
+})
+
+;; v0 = (v1 & 0xff00ffff) | v2
+(define_insn_and_split "ior_and0xff00ffff_reg"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (ior:SI (and:SI (match_operand:SI 1 "register_operand" "r")
+                       (const_int -16711681))
+               (match_operand:SI 2 "register_operand" "r")))]
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx tmp = gen_reg_rtx (SImode);
+  emit_insn (gen_andsi3 (tmp, operands[1], gen_int_mode (0xff00ffff, SImode)));
+  emit_insn (gen_iorsi3 (operands[0], tmp, operands[2]));
+  DONE;
+})
+
+(define_insn "vec_setv4qi_internal"
+  [(set (match_operand:V4QI 0 "register_operand"          "=   r,    r,    r,    r")
+       (vec_merge:V4QI
+         (vec_duplicate:V4QI
+           (match_operand:QI 1 "register_operand"        "    r,    r,    r,    r"))
+         (match_operand:V4QI 2 "register_operand"        "    0,    0,    0,    0")
+         (match_operand:SI 3 "nds32_imm_1_2_4_8_operand" " Iv01, Iv02, Iv04, Iv08")))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+       const char *pats[] = { "insb\t%0, %1, 3",
+                             "insb\t%0, %1, 2",
+                             "insb\t%0, %1, 1",
+                             "insb\t%0, %1, 0" };
+      return pats[which_alternative];
+    }
+  else
+    {
+       const char *pats[] = { "insb\t%0, %1, 0",
+                             "insb\t%0, %1, 1",
+                             "insb\t%0, %1, 2",
+                             "insb\t%0, %1, 3" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"    "dinsb")
+   (set_attr "length"  "4")])
+
+(define_insn "vec_setv4qi_internal_vec"
+  [(set (match_operand:V4QI 0 "register_operand"          "=   r,    r,    r,    r")
+       (vec_merge:V4QI
+         (vec_duplicate:V4QI
+           (vec_select:QI
+             (match_operand:V4QI 1 "register_operand"    "    r,    r,    r,    r")
+             (parallel [(const_int 0)])))
+         (match_operand:V4QI 2 "register_operand"        "    0,    0,    0,    0")
+         (match_operand:SI 3 "nds32_imm_1_2_4_8_operand" " Iv01, Iv02, Iv04, Iv08")))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   insb\t%0, %1, 0
+   insb\t%0, %1, 1
+   insb\t%0, %1, 2
+   insb\t%0, %1, 3"
+  [(set_attr "type"    "dinsb")
+   (set_attr "length"  "4")])
+
+(define_insn "vec_mergev4qi_and_cv0_1"
+  [(set (match_operand:V4QI 0 "register_operand"       "=$l,r")
+       (vec_merge:V4QI
+         (vec_duplicate:V4QI
+           (vec_select:QI
+             (match_operand:V4QI 1 "register_operand" "  l,r")
+             (parallel [(const_int 0)])))
+         (const_vector:V4QI [
+           (const_int 0)
+           (const_int 0)
+           (const_int 0)
+           (const_int 0)])
+         (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   zeb33\t%0, %1
+   zeb\t%0, %1"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn "vec_mergev4qi_and_cv0_2"
+  [(set (match_operand:V4QI 0 "register_operand"       "=$l,r")
+       (vec_merge:V4QI
+         (const_vector:V4QI [
+           (const_int 0)
+           (const_int 0)
+           (const_int 0)
+           (const_int 0)])
+         (vec_duplicate:V4QI
+           (vec_select:QI
+             (match_operand:V4QI 1 "register_operand" "  l,r")
+             (parallel [(const_int 0)])))
+         (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   zeb33\t%0, %1
+   zeb\t%0, %1"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn "vec_mergeqi_and_cv0_1"
+  [(set (match_operand:V4QI 0 "register_operand"                     "=$l,r")
+       (vec_merge:V4QI
+         (vec_duplicate:V4QI (match_operand:QI 1 "register_operand" "  l,r"))
+         (const_vector:V4QI [
+           (const_int 0)
+           (const_int 0)
+           (const_int 0)
+           (const_int 0)])
+         (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   zeb33\t%0, %1
+   zeb\t%0, %1"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn "vec_mergeqi_and_cv0_2"
+  [(set (match_operand:V4QI 0 "register_operand"                     "=$l,r")
+       (vec_merge:V4QI
+         (const_vector:V4QI [
+           (const_int 0)
+           (const_int 0)
+           (const_int 0)
+           (const_int 0)])
+         (vec_duplicate:V4QI (match_operand:QI 1 "register_operand" "  l,r"))
+         (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   zeb33\t%0, %1
+   zeb\t%0, %1"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_expand "vec_setv2hi"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:HI 1 "register_operand" "")
+   (match_operand:SI 2 "immediate_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  HOST_WIDE_INT pos = INTVAL (operands[2]);
+  if (pos > 2)
+    gcc_unreachable ();
+  HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << pos;
+  emit_insn (gen_vec_setv2hi_internal (operands[0], operands[1],
+                                      operands[0], GEN_INT (elem)));
+  DONE;
+})
+
+(define_insn "vec_setv2hi_internal"
+  [(set (match_operand:V2HI 0 "register_operand"      "=   r,    r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI
+           (match_operand:HI 1 "register_operand"    "    r,    r"))
+         (match_operand:V2HI 2 "register_operand"    "    r,    r")
+         (match_operand:SI 3 "nds32_imm_1_2_operand" " Iv01, Iv02")))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "pkbb16\t%0, %1, %2",
+                            "pktb16\t%0, %2, %1" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "pktb16\t%0, %2, %1",
+                            "pkbb16\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "vec_mergev2hi_and_cv0_1"
+  [(set (match_operand:V2HI 0 "register_operand"       "=$l,r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI
+           (vec_select:HI
+             (match_operand:V2HI 1 "register_operand" "  l,r")
+             (parallel [(const_int 0)])))
+         (const_vector:V2HI [
+           (const_int 0)
+           (const_int 0)])
+         (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   zeh33\t%0, %1
+   zeh\t%0, %1"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn "vec_mergev2hi_and_cv0_2"
+  [(set (match_operand:V2HI 0 "register_operand"       "=$l,r")
+       (vec_merge:V2HI
+         (const_vector:V2HI [
+           (const_int 0)
+           (const_int 0)])
+         (vec_duplicate:V2HI
+           (vec_select:HI
+             (match_operand:V2HI 1 "register_operand" "  l,r")
+             (parallel [(const_int 0)])))
+         (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   zeh33\t%0, %1
+   zeh\t%0, %1"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn "vec_mergehi_and_cv0_1"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=$l,r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI (match_operand:HI 1 "register_operand" "  l,r"))
+         (const_vector:V2HI [
+           (const_int 0)
+           (const_int 0)])
+         (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   zeh33\t%0, %1
+   zeh\t%0, %1"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn "vec_mergehi_and_cv0_2"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=$l,r")
+       (vec_merge:V2HI
+         (const_vector:V2HI [
+           (const_int 0)
+           (const_int 0)])
+         (vec_duplicate:V2HI (match_operand:HI 1 "register_operand" "  l,r"))
+         (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   zeh33\t%0, %1
+   zeh\t%0, %1"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_expand "pkbb"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V2HI 1 "register_operand")
+   (match_operand:V2HI 2 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2],
+                                 GEN_INT (1), GEN_INT (1), GEN_INT (1)));
+    }
+  else
+    {
+      emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2],
+                                 GEN_INT (2), GEN_INT (0), GEN_INT (0)));
+    }
+  DONE;
+})
+
+(define_insn "pkbbsi_1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (ior:SI (and:SI (match_operand:SI 1 "register_operand" "r")
+                       (const_int 65535))
+               (ashift:SI (match_operand:SI 2 "register_operand" "r")
+                          (const_int 16))))]
+  "NDS32_EXT_DSP_P ()"
+  "pkbb16\t%0, %2, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "pkbbsi_2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (ior:SI (ashift:SI (match_operand:SI 2 "register_operand" "r")
+                          (const_int 16))
+               (and:SI (match_operand:SI 1 "register_operand" "r")
+                       (const_int 65535))))]
+  "NDS32_EXT_DSP_P ()"
+  "pkbb16\t%0, %2, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "pkbbsi_3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (ior:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "r"))
+               (ashift:SI (match_operand:SI 2 "register_operand" "r")
+                          (const_int 16))))]
+  "NDS32_EXT_DSP_P ()"
+  "pkbb16\t%0, %2, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "pkbbsi_4"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (ior:SI (ashift:SI (match_operand:SI 2 "register_operand" "r")
+                          (const_int 16))
+               (zero_extend:SI (match_operand:HI 1 "register_operand" "r"))))]
+  "NDS32_EXT_DSP_P ()"
+  "pkbb16\t%0, %2, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+;; v0 = (v1 & 0xffff0000) | (v2 & 0xffff)
+(define_insn "pktbsi_1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (ior:SI (and:SI (match_operand:SI 1 "register_operand" "r")
+                       (const_int -65536))
+               (zero_extend:SI (match_operand:HI 2 "register_operand" "r"))))]
+  "NDS32_EXT_DSP_P ()"
+  "pktb16\t%0, %1, %2"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "pktbsi_2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (ior:SI (and:SI (match_operand:SI 1 "register_operand" "r")
+                       (const_int -65536))
+               (and:SI (match_operand:SI 2 "register_operand" "r")
+                       (const_int 65535))))]
+  "NDS32_EXT_DSP_P ()"
+  "pktb16\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "pktbsi_3"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r")
+                        (const_int 16 )
+                        (const_int 0))
+       (match_operand:SI 1 "register_operand"                  " r"))]
+  "NDS32_EXT_DSP_P ()"
+  "pktb16\t%0, %0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "pktbsi_4"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r")
+                        (const_int 16 )
+                        (const_int 0))
+       (zero_extend:SI (match_operand:HI 1 "register_operand"  " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "pktb16\t%0, %0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "pkttsi"
+  [(set (match_operand:SI 0 "register_operand"                      "=r")
+       (ior:SI (and:SI (match_operand:SI 1 "register_operand"      " r")
+                       (const_int -65536))
+               (lshiftrt:SI (match_operand:SI 2 "register_operand" " r")
+                            (const_int 16))))]
+  "NDS32_EXT_DSP_P ()"
+  "pktt16\t%0, %1, %2"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_expand "pkbt"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V2HI 1 "register_operand")
+   (match_operand:V2HI 2 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2],
+                                 GEN_INT (1), GEN_INT (1), GEN_INT (0)));
+    }
+  else
+    {
+      emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2],
+                                 GEN_INT (2), GEN_INT (0), GEN_INT (1)));
+    }
+  DONE;
+})
+
+(define_expand "pktt"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V2HI 1 "register_operand")
+   (match_operand:V2HI 2 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2],
+                                 GEN_INT (1), GEN_INT (0), GEN_INT (0)));
+    }
+  else
+    {
+      emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2],
+                                 GEN_INT (2), GEN_INT (1), GEN_INT (1)));
+    }
+  DONE;
+})
+
+(define_expand "pktb"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V2HI 1 "register_operand")
+   (match_operand:V2HI 2 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2],
+                                 GEN_INT (1), GEN_INT (0), GEN_INT (1)));
+    }
+  else
+    {
+      emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2],
+                                 GEN_INT (2), GEN_INT (1), GEN_INT (0)));
+    }
+  DONE;
+})
+
+(define_insn "vec_mergerr"
+  [(set (match_operand:V2HI 0 "register_operand"      "=   r,    r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI
+           (match_operand:HI 1 "register_operand"    "    r,    r"))
+         (vec_duplicate:V2HI
+           (match_operand:HI 2 "register_operand"    "    r,    r"))
+         (match_operand:SI 3 "nds32_imm_1_2_operand" " Iv01, Iv02")))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   pkbb16\t%0, %2, %1
+   pkbb16\t%0, %1, %2"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+
+(define_insn "vec_merge"
+  [(set (match_operand:V2HI 0 "register_operand"      "=   r,    r")
+       (vec_merge:V2HI
+         (match_operand:V2HI 1 "register_operand"    "    r,    r")
+         (match_operand:V2HI 2 "register_operand"    "    r,    r")
+         (match_operand:SI 3 "nds32_imm_1_2_operand" " Iv01, Iv02")))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "pktb16\t%0, %1, %2",
+                            "pktb16\t%0, %2, %1" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "pktb16\t%0, %2, %1",
+                            "pktb16\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "vec_mergerv"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=   r,    r,    r,    r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI
+           (match_operand:HI 1 "register_operand"                   "    r,    r,    r,    r"))
+         (vec_duplicate:V2HI
+           (vec_select:HI
+             (match_operand:V2HI 2 "register_operand"               "    r,    r,    r,    r")
+             (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv00, Iv01")])))
+         (match_operand:SI 3 "nds32_imm_1_2_operand"                " Iv01, Iv01, Iv02, Iv02")))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   pkbb16\t%0, %2, %1
+   pktb16\t%0, %2, %1
+   pkbb16\t%0, %1, %2
+   pkbt16\t%0, %1, %2"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "vec_mergevr"
+  [(set (match_operand:V2HI 0 "register_operand"                      "=   r,    r,    r,    r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI
+           (vec_select:HI
+             (match_operand:V2HI 1 "register_operand"                "    r,    r,    r,    r")
+              (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv00, Iv01")])))
+         (vec_duplicate:V2HI
+           (match_operand:HI 2 "register_operand"                    "    r,    r,    r,    r"))
+         (match_operand:SI 3 "nds32_imm_1_2_operand"                 " Iv01, Iv01, Iv02, Iv02")))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   pkbb16\t%0, %2, %1
+   pkbt16\t%0, %2, %1
+   pkbb16\t%0, %1, %2
+   pktb16\t%0, %1, %2"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "vec_mergevv"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=   r,    r,    r,    r,    r,    r,    r,    r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI
+           (vec_select:HI
+             (match_operand:V2HI 1 "register_operand"               "    r,    r,    r,    r,    r,    r,    r,    r")
+             (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv00, Iv01, Iv01, Iv00, Iv00, Iv01, Iv01")])))
+         (vec_duplicate:V2HI
+           (vec_select:HI
+             (match_operand:V2HI 2 "register_operand"               "    r,    r,    r,    r,    r,    r,    r,    r")
+             (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv01, Iv00, Iv00, Iv01, Iv01, Iv00")])))
+         (match_operand:SI 3 "nds32_imm_1_2_operand"                " Iv01, Iv01, Iv01, Iv01, Iv02, Iv02, Iv02, Iv02")))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "pktt16\t%0, %1, %2",
+                            "pktb16\t%0, %1, %2",
+                            "pkbb16\t%0, %1, %2",
+                            "pkbt16\t%0, %1, %2",
+                            "pktt16\t%0, %2, %1",
+                            "pkbt16\t%0, %2, %1",
+                            "pkbb16\t%0, %2, %1",
+                            "pktb16\t%0, %2, %1" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "pkbb16\t%0, %2, %1",
+                            "pktb16\t%0, %2, %1",
+                            "pktt16\t%0, %2, %1",
+                            "pkbt16\t%0, %2, %1",
+                            "pkbb16\t%0, %1, %2",
+                            "pkbt16\t%0, %1, %2",
+                            "pktt16\t%0, %1, %2",
+                            "pktb16\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_expand "vec_extractv4qi"
+  [(set (match_operand:QI 0 "register_operand" "")
+       (vec_select:QI
+         (match_operand:V4QI 1          "nonimmediate_operand" "")
+         (parallel [(match_operand:SI 2 "const_int_operand" "")])))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+{
+  if (INTVAL (operands[2]) != 0
+      && INTVAL (operands[2]) != 1
+      && INTVAL (operands[2]) != 2
+      && INTVAL (operands[2]) != 3)
+    gcc_unreachable ();
+
+  if (INTVAL (operands[2]) != 0 && MEM_P (operands[0]))
+    FAIL;
+})
+
+(define_insn "vec_extractv4qi0"
+  [(set (match_operand:QI 0 "register_operand"         "=l,r,r")
+       (vec_select:QI
+         (match_operand:V4QI 1 "nonimmediate_operand" " l,r,m")
+         (parallel [(const_int 0)])))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "zeb33\t%0, %1";
+    case 1:
+      return "zeb\t%0, %1";
+    case 2:
+      return nds32_output_32bit_load (operands, 1);
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "vec_extractv4qi0_ze"
+  [(set (match_operand:SI 0 "register_operand"         "=l,r,r")
+       (zero_extend:SI
+         (vec_select:QI
+           (match_operand:V4QI 1 "nonimmediate_operand" " l,r,m")
+           (parallel [(const_int 0)]))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "zeb33\t%0, %1";
+    case 1:
+      return "zeb\t%0, %1";
+    case 2:
+      return nds32_output_32bit_load (operands, 1);
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "vec_extractv4qi0_se"
+  [(set (match_operand:SI 0 "register_operand"         "=l,r,r")
+       (sign_extend:SI
+         (vec_select:QI
+           (match_operand:V4QI 1 "nonimmediate_operand" " l,r,m")
+           (parallel [(const_int 0)]))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "seb33\t%0, %1";
+    case 1:
+      return "seb\t%0, %1";
+    case 2:
+      return nds32_output_32bit_load_s (operands, 1);
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn_and_split "vec_extractv4qi1"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+       (vec_select:QI
+         (match_operand:V4QI 1 "register_operand" " r")
+         (parallel [(const_int 1)])))]
+  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx tmp = gen_reg_rtx (V4QImode);
+  emit_insn (gen_rotrv4qi_1 (tmp, operands[1]));
+  emit_insn (gen_vec_extractv4qi0 (operands[0], tmp));
+  DONE;
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn_and_split "vec_extractv4qi2"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+       (vec_select:QI
+         (match_operand:V4QI 1 "register_operand" " r")
+         (parallel [(const_int 2)])))]
+  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx tmp = gen_reg_rtx (V4QImode);
+  emit_insn (gen_rotrv4qi_2 (tmp, operands[1]));
+  emit_insn (gen_vec_extractv4qi0 (operands[0], tmp));
+  DONE;
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn_and_split "vec_extractv4qi3"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+       (vec_select:QI
+         (match_operand:V4QI 1 "register_operand" " r")
+         (parallel [(const_int 3)])))]
+  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx tmp = gen_reg_rtx (V4QImode);
+  emit_insn (gen_rotrv4qi_3 (tmp, operands[1]));
+  emit_insn (gen_vec_extractv4qi0 (operands[0], tmp));
+  DONE;
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "vec_extractv4qi3_se"
+  [(set (match_operand:SI 0 "register_operand"       "=$d,r")
+       (sign_extend:SI
+         (vec_select:QI
+           (match_operand:V4QI 1 "register_operand" "  0,r")
+           (parallel [(const_int 3)]))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   srai45\t%0, 24
+   srai\t%0, %1, 24"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn "vec_extractv4qi3_ze"
+  [(set (match_operand:SI 0 "register_operand"       "=$d,r")
+       (zero_extend:SI
+         (vec_select:QI
+           (match_operand:V4QI 1 "register_operand" "  0,r")
+           (parallel [(const_int 3)]))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   srli45\t%0, 24
+   srli\t%0, %1, 24"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn_and_split "vec_extractv4qihi0"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+       (sign_extend:HI
+         (vec_select:QI
+           (match_operand:V4QI 1 "register_operand" " r")
+           (parallel [(const_int 0)]))))]
+  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx tmp = gen_reg_rtx (QImode);
+  emit_insn (gen_vec_extractv4qi0 (tmp, operands[1]));
+  emit_insn (gen_extendqihi2 (operands[0], tmp));
+  DONE;
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn_and_split "vec_extractv4qihi1"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+       (sign_extend:HI
+         (vec_select:QI
+           (match_operand:V4QI 1 "register_operand" " r")
+           (parallel [(const_int 1)]))))]
+  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx tmp = gen_reg_rtx (QImode);
+  emit_insn (gen_vec_extractv4qi1 (tmp, operands[1]));
+  emit_insn (gen_extendqihi2 (operands[0], tmp));
+  DONE;
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn_and_split "vec_extractv4qihi2"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+       (sign_extend:HI
+         (vec_select:QI
+           (match_operand:V4QI 1 "register_operand" " r")
+           (parallel [(const_int 2)]))))]
+  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx tmp = gen_reg_rtx (QImode);
+  emit_insn (gen_vec_extractv4qi2 (tmp, operands[1]));
+  emit_insn (gen_extendqihi2 (operands[0], tmp));
+  DONE;
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn_and_split "vec_extractv4qihi3"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+       (sign_extend:HI
+         (vec_select:QI
+           (match_operand:V4QI 1 "register_operand" " r")
+           (parallel [(const_int 3)]))))]
+  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx tmp = gen_reg_rtx (QImode);
+  emit_insn (gen_vec_extractv4qi3 (tmp, operands[1]));
+  emit_insn (gen_extendqihi2 (operands[0], tmp));
+  DONE;
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_expand "vec_extractv2hi"
+  [(set (match_operand:HI 0 "register_operand" "")
+       (vec_select:HI
+         (match_operand:V2HI 1          "nonimmediate_operand" "")
+         (parallel [(match_operand:SI 2 "const_int_operand" "")])))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (INTVAL (operands[2]) != 0
+      && INTVAL (operands[2]) != 1)
+    gcc_unreachable ();
+
+  if (INTVAL (operands[2]) != 0 && MEM_P (operands[0]))
+    FAIL;
+})
+
+(define_insn "vec_extractv2hi0"
+  [(set (match_operand:HI 0 "register_operand"         "=$l,r,r")
+       (vec_select:HI
+         (match_operand:V2HI 1 "nonimmediate_operand" "  l,r,m")
+         (parallel [(const_int 0)])))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "seh33\t%0, %1";
+    case 1:
+      return "seh\t%0, %1";
+    case 2:
+      return nds32_output_32bit_load_s (operands, 2);
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"    "alu,alu,load")
+   (set_attr "length"  "  2,  4,   4")])
+
+(define_insn "vec_extractv2hi0_ze"
+  [(set (match_operand:SI 0 "register_operand"         "=$l, r,$  l, *r")
+        (zero_extend:SI
+         (vec_select:HI
+           (match_operand:V2HI 1 "nonimmediate_operand" "  l, r, U33,  m")
+           (parallel [(const_int 0)]))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "zeh33\t%0, %1";
+    case 1:
+      return "zeh\t%0, %1";
+    case 2:
+      return nds32_output_16bit_load (operands, 2);
+    case 3:
+      return nds32_output_32bit_load (operands, 2);
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,load,load")
+   (set_attr "length" "  2,  4,   2,   4")])
+
+(define_insn "vec_extractv2hi0_se"
+  [(set (match_operand:SI 0 "register_operand"         "=$l, r, r")
+        (sign_extend:SI
+         (vec_select:HI
+           (match_operand:V2HI 1 "nonimmediate_operand" "  l,r,m")
+           (parallel [(const_int 0)]))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "seh33\t%0, %1";
+    case 1:
+      return "seh\t%0, %1";
+    case 2:
+      return nds32_output_32bit_load_s (operands, 2);
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,load")
+   (set_attr "length" "  2,  4,   4")])
+
+(define_insn "vec_extractv2hi0_be"
+  [(set (match_operand:HI 0 "register_operand"     "=$d,r")
+       (vec_select:HI
+         (match_operand:V2HI 1 "register_operand" "  0,r")
+         (parallel [(const_int 0)])))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "@
+   srai45\t%0, 16
+   srai\t%0, %1, 16"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn "vec_extractv2hi1"
+  [(set (match_operand:HI 0 "register_operand"     "=$d,r")
+       (vec_select:HI
+         (match_operand:V2HI 1 "register_operand" "  0,r")
+         (parallel [(const_int 1)])))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   srai45\t%0, 16
+   srai\t%0, %1, 16"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn "vec_extractv2hi1_se"
+  [(set (match_operand:SI 0 "register_operand"     "=$d,r")
+       (sign_extend:SI
+         (vec_select:HI
+           (match_operand:V2HI 1 "register_operand" "  0,r")
+           (parallel [(const_int 1)]))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   srai45\t%0, 16
+   srai\t%0, %1, 16"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn "vec_extractv2hi1_ze"
+  [(set (match_operand:SI 0 "register_operand"     "=$d,r")
+       (zero_extend:SI
+         (vec_select:HI
+           (match_operand:V2HI 1 "register_operand" "  0,r")
+           (parallel [(const_int 1)]))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   srli45\t%0, 16
+   srli\t%0, %1, 16"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn "vec_extractv2hi1_be"
+  [(set (match_operand:HI 0 "register_operand"         "=$l,r,r")
+       (vec_select:HI
+         (match_operand:V2HI 1 "nonimmediate_operand" "  l,r,m")
+         (parallel [(const_int 1)])))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "seh33\t%0, %1";
+    case 1:
+      return "seh\t%0, %1";
+    case 2:
+      return nds32_output_32bit_load_s (operands, 2);
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"    "alu,alu,load")
+   (set_attr "length"  "  2,  4,   4")])
+
+(define_insn "<su>mul16"
+  [(set (match_operand:V2SI 0 "register_operand"                         "=r")
+       (mult:V2SI (extend:V2SI (match_operand:V2HI 1 "register_operand" "%r"))
+                  (extend:V2SI (match_operand:V2HI 2 "register_operand" " r"))))]
+  "NDS32_EXT_DSP_P ()"
+  "<su>mul16\t%0, %1, %2"
+  [(set_attr "type"   "dmul")
+   (set_attr "length"   "4")])
+
+(define_insn "<su>mulx16"
+  [(set (match_operand:V2SI 0 "register_operand"         "=r")
+       (vec_merge:V2SI
+         (vec_duplicate:V2SI
+           (mult:SI
+             (extend:SI
+               (vec_select:HI
+                 (match_operand:V2HI 1 "register_operand" " r")
+                 (parallel [(const_int 0)])))
+             (extend:SI
+               (vec_select:HI
+                 (match_operand:V2HI 2 "register_operand" " r")
+                 (parallel [(const_int 1)])))))
+         (vec_duplicate:V2SI
+           (mult:SI
+             (extend:SI
+               (vec_select:HI
+                 (match_dup 1)
+                 (parallel [(const_int 1)])))
+             (extend:SI
+               (vec_select:HI
+                 (match_dup 2)
+                 (parallel [(const_int 0)])))))
+         (const_int 1)))]
+  "NDS32_EXT_DSP_P ()"
+  "<su>mulx16\t%0, %1, %2"
+  [(set_attr "type"    "dmul")
+   (set_attr "length"   "4")])
+
+(define_insn "rotrv2hi_1"
+  [(set (match_operand:V2HI 0 "register_operand"    "=r")
+       (vec_select:V2HI
+          (match_operand:V2HI 1 "register_operand" " r")
+          (parallel [(const_int 1) (const_int 0)])))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "rotri\t%0, %1, 16"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "rotrv2hi_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"    "=r")
+       (vec_select:V2HI
+          (match_operand:V2HI 1 "register_operand" " r")
+          (parallel [(const_int 0) (const_int 1)])))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "rotri\t%0, %1, 16"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "rotrv4qi_1"
+  [(set (match_operand:V4QI 0 "register_operand"    "=r")
+       (vec_select:V4QI
+          (match_operand:V4QI 1 "register_operand" " r")
+          (parallel [(const_int 1) (const_int 2) (const_int 3) (const_int 0)])))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "rotri\t%0, %1, 8"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "rotrv4qi_1_be"
+  [(set (match_operand:V4QI 0 "register_operand"    "=r")
+       (vec_select:V4QI
+          (match_operand:V4QI 1 "register_operand" " r")
+          (parallel [(const_int 2) (const_int 1) (const_int 0) (const_int 3)])))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "rotri\t%0, %1, 8"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "rotrv4qi_2"
+  [(set (match_operand:V4QI 0 "register_operand"    "=r")
+       (vec_select:V4QI
+          (match_operand:V4QI 1 "register_operand" " r")
+          (parallel [(const_int 2) (const_int 3) (const_int 0) (const_int 1)])))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "rotri\t%0, %1, 16"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "rotrv4qi_2_be"
+  [(set (match_operand:V4QI 0 "register_operand"    "=r")
+       (vec_select:V4QI
+          (match_operand:V4QI 1 "register_operand" " r")
+          (parallel [(const_int 1) (const_int 0) (const_int 3) (const_int 2)])))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "rotri\t%0, %1, 16"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "rotrv4qi_3"
+  [(set (match_operand:V4QI 0 "register_operand"    "=r")
+       (vec_select:V4QI
+          (match_operand:V4QI 1 "register_operand" " r")
+          (parallel [(const_int 3) (const_int 0) (const_int 1) (const_int 2)])))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "rotri\t%0, %1, 24"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "rotrv4qi_3_be"
+  [(set (match_operand:V4QI 0 "register_operand"    "=r")
+       (vec_select:V4QI
+          (match_operand:V4QI 1 "register_operand" " r")
+          (parallel [(const_int 0) (const_int 3) (const_int 2) (const_int 1)])))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "rotri\t%0, %1, 24"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "v4qi_dup_10"
+  [(set (match_operand:V4QI 0 "register_operand"    "=r")
+       (vec_select:V4QI
+          (match_operand:V4QI 1 "register_operand" " r")
+          (parallel [(const_int 0) (const_int 1) (const_int 0) (const_int 1)])))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "pkbb\t%0, %1, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "v4qi_dup_32"
+  [(set (match_operand:V4QI 0 "register_operand"    "=r")
+       (vec_select:V4QI
+          (match_operand:V4QI 1 "register_operand" " r")
+          (parallel [(const_int 2) (const_int 3) (const_int 2) (const_int 3)])))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "pktt\t%0, %1, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_expand "vec_unpacks_lo_v4qi"
+  [(match_operand:V2HI 0 "register_operand" "=r")
+   (match_operand:V4QI 1 "register_operand" " r")]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+{
+  emit_insn (gen_sunpkd810 (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "sunpkd810"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V4QI 1 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_sunpkd810_imp_be (operands[0], operands[1]));
+  else
+    emit_insn (gen_sunpkd810_imp (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "<zs>unpkd810_imp"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI
+           (extend:HI
+             (vec_select:QI
+               (match_operand:V4QI 1 "register_operand"             " r")
+               (parallel [(const_int 1)]))))
+         (vec_duplicate:V2HI
+           (extend:HI
+             (vec_select:QI
+               (match_dup 1)
+               (parallel [(const_int 0)]))))
+         (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "<zs>unpkd810\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "<zs>unpkd810_imp_inv"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI
+           (extend:HI
+             (vec_select:QI
+               (match_operand:V4QI 1 "register_operand"             " r")
+               (parallel [(const_int 0)]))))
+         (vec_duplicate:V2HI
+           (extend:HI
+             (vec_select:QI
+               (match_dup 1)
+               (parallel [(const_int 1)]))))
+         (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "<zs>unpkd810\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "<zs>unpkd810_imp_be"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI
+           (extend:HI
+             (vec_select:QI
+               (match_operand:V4QI 1 "register_operand"             " r")
+               (parallel [(const_int 2)]))))
+         (vec_duplicate:V2HI
+           (extend:HI
+             (vec_select:QI
+               (match_dup 1)
+               (parallel [(const_int 3)]))))
+         (const_int 1)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "<zs>unpkd810\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "<zs>unpkd810_imp_inv_be"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI
+           (extend:HI
+             (vec_select:QI
+               (match_operand:V4QI 1 "register_operand"             " r")
+               (parallel [(const_int 3)]))))
+         (vec_duplicate:V2HI
+           (extend:HI
+             (vec_select:QI
+               (match_dup 1)
+               (parallel [(const_int 2)]))))
+         (const_int 2)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "<zs>unpkd810\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_expand "sunpkd820"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V4QI 1 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_sunpkd820_imp_be (operands[0], operands[1]));
+  else
+    emit_insn (gen_sunpkd820_imp (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "<zs>unpkd820_imp"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI
+           (extend:HI
+             (vec_select:QI
+               (match_operand:V4QI 1 "register_operand"             " r")
+               (parallel [(const_int 2)]))))
+         (vec_duplicate:V2HI
+           (extend:HI
+             (vec_select:QI
+               (match_dup 1)
+               (parallel [(const_int 0)]))))
+         (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "<zs>unpkd820\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "<zs>unpkd820_imp_inv"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI
+           (extend:HI
+             (vec_select:QI
+               (match_operand:V4QI 1 "register_operand"             " r")
+               (parallel [(const_int 0)]))))
+         (vec_duplicate:V2HI
+           (extend:HI
+             (vec_select:QI
+               (match_dup 1)
+               (parallel [(const_int 2)]))))
+         (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "<zs>unpkd820\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "<zs>unpkd820_imp_be"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI
+           (extend:HI
+             (vec_select:QI
+               (match_operand:V4QI 1 "register_operand"             " r")
+               (parallel [(const_int 1)]))))
+         (vec_duplicate:V2HI
+           (extend:HI
+             (vec_select:QI
+               (match_dup 1)
+               (parallel [(const_int 3)]))))
+         (const_int 1)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "<zs>unpkd820\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "<zs>unpkd820_imp_inv_be"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI
+           (extend:HI
+             (vec_select:QI
+               (match_operand:V4QI 1 "register_operand"             " r")
+               (parallel [(const_int 3)]))))
+         (vec_duplicate:V2HI
+           (extend:HI
+             (vec_select:QI
+               (match_dup 1)
+               (parallel [(const_int 1)]))))
+         (const_int 2)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "<zs>unpkd820\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_expand "sunpkd830"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V4QI 1 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_sunpkd830_imp_be (operands[0], operands[1]));
+  else
+    emit_insn (gen_sunpkd830_imp (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "<zs>unpkd830_imp"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI
+           (extend:HI
+             (vec_select:QI
+               (match_operand:V4QI 1 "register_operand"             " r")
+               (parallel [(const_int 3)]))))
+         (vec_duplicate:V2HI
+           (extend:HI
+             (vec_select:QI
+               (match_dup 1)
+               (parallel [(const_int 0)]))))
+         (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "<zs>unpkd830\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "<zs>unpkd830_imp_inv"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI
+           (extend:HI
+             (vec_select:QI
+               (match_operand:V4QI 1 "register_operand"             " r")
+               (parallel [(const_int 0)]))))
+         (vec_duplicate:V2HI
+           (extend:HI
+             (vec_select:QI
+               (match_dup 1)
+               (parallel [(const_int 3)]))))
+         (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "<zs>unpkd830\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "<zs>unpkd830_imp_be"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI
+           (extend:HI
+             (vec_select:QI
+               (match_operand:V4QI 1 "register_operand"             " r")
+               (parallel [(const_int 0)]))))
+         (vec_duplicate:V2HI
+           (extend:HI
+             (vec_select:QI
+               (match_dup 1)
+               (parallel [(const_int 3)]))))
+         (const_int 1)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "<zs>unpkd830\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "<zs>unpkd830_imp_inv_be"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI
+           (extend:HI
+             (vec_select:QI
+               (match_operand:V4QI 1 "register_operand"             " r")
+               (parallel [(const_int 3)]))))
+         (vec_duplicate:V2HI
+           (extend:HI
+             (vec_select:QI
+               (match_dup 1)
+               (parallel [(const_int 0)]))))
+         (const_int 2)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "<zs>unpkd830\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_expand "sunpkd831"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V4QI 1 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_sunpkd831_imp_be (operands[0], operands[1]));
+  else
+    emit_insn (gen_sunpkd831_imp (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "<zs>unpkd831_imp"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI
+           (extend:HI
+             (vec_select:QI
+               (match_operand:V4QI 1 "register_operand"             " r")
+               (parallel [(const_int 3)]))))
+         (vec_duplicate:V2HI
+           (extend:HI
+             (vec_select:QI
+               (match_dup 1)
+               (parallel [(const_int 1)]))))
+         (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "<zs>unpkd831\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "<zs>unpkd831_imp_inv"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI
+           (extend:HI
+             (vec_select:QI
+               (match_operand:V4QI 1 "register_operand"             " r")
+               (parallel [(const_int 1)]))))
+         (vec_duplicate:V2HI
+           (extend:HI
+             (vec_select:QI
+               (match_dup 1)
+               (parallel [(const_int 3)]))))
+         (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "<zs>unpkd831\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "<zs>unpkd831_imp_be"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI
+           (extend:HI
+             (vec_select:QI
+               (match_operand:V4QI 1 "register_operand"             " r")
+               (parallel [(const_int 0)]))))
+         (vec_duplicate:V2HI
+           (extend:HI
+             (vec_select:QI
+               (match_dup 1)
+               (parallel [(const_int 2)]))))
+         (const_int 1)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "<zs>unpkd831\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "<zs>unpkd831_imp_inv_be"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI
+           (extend:HI
+             (vec_select:QI
+               (match_operand:V4QI 1 "register_operand"             " r")
+               (parallel [(const_int 2)]))))
+         (vec_duplicate:V2HI
+           (extend:HI
+             (vec_select:QI
+               (match_dup 1)
+               (parallel [(const_int 0)]))))
+         (const_int 2)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "<zs>unpkd831\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_expand "zunpkd810"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V4QI 1 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_zunpkd810_imp_be (operands[0], operands[1]));
+  else
+    emit_insn (gen_zunpkd810_imp (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "zunpkd820"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V4QI 1 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_zunpkd820_imp_be (operands[0], operands[1]));
+  else
+    emit_insn (gen_zunpkd820_imp (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "zunpkd830"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V4QI 1 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_zunpkd830_imp_be (operands[0], operands[1]));
+  else
+    emit_insn (gen_zunpkd830_imp (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "zunpkd831"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V4QI 1 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_zunpkd831_imp_be (operands[0], operands[1]));
+  else
+    emit_insn (gen_zunpkd831_imp (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "smbb"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2],
+                             GEN_INT (1), GEN_INT (1)));
+  else
+    emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2],
+                             GEN_INT (0), GEN_INT (0)));
+  DONE;
+})
+
+(define_expand "smbt"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2],
+                             GEN_INT (1), GEN_INT (0)));
+  else
+    emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2],
+                             GEN_INT (0), GEN_INT (1)));
+  DONE;
+})
+
+(define_expand "smtt"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2],
+                             GEN_INT (0), GEN_INT (0)));
+  else
+    emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2],
+                             GEN_INT (1), GEN_INT (1)));
+  DONE;
+})
+
+(define_insn "mulhisi3v"
+  [(set (match_operand:SI 0 "register_operand"                         "=   r,    r,    r,    r")
+       (mult:SI
+         (sign_extend:SI
+            (vec_select:HI
+              (match_operand:V2HI 1 "register_operand"                "    r,    r,    r,    r")
+              (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand"  " Iv00, Iv00, Iv01, Iv01")])))
+         (sign_extend:SI (vec_select:HI
+              (match_operand:V2HI 2 "register_operand"                "    r,    r,    r,    r")
+              (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand"  " Iv00, Iv01, Iv01, Iv00")])))))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "smtt\t%0, %1, %2",
+                            "smbt\t%0, %2, %1",
+                            "smbb\t%0, %1, %2",
+                            "smbt\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "smbb\t%0, %1, %2",
+                            "smbt\t%0, %1, %2",
+                            "smtt\t%0, %1, %2",
+                            "smbt\t%0, %2, %1" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"     "dmul")
+   (set_attr "length"   "4")])
+
+(define_expand "kmabb"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")
+   (match_operand:V2HI 3 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_kma_internal (operands[0], operands[2], operands[3],
+                                GEN_INT (1), GEN_INT (1),
+                                operands[1]));
+  else
+    emit_insn (gen_kma_internal (operands[0], operands[2], operands[3],
+                                GEN_INT (0), GEN_INT (0),
+                                operands[1]));
+  DONE;
+})
+
+(define_expand "kmabt"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")
+   (match_operand:V2HI 3 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_kma_internal (operands[0], operands[2], operands[3],
+                                GEN_INT (1), GEN_INT (0),
+                                operands[1]));
+  else
+    emit_insn (gen_kma_internal (operands[0], operands[2], operands[3],
+                                GEN_INT (0), GEN_INT (1),
+                                operands[1]));
+  DONE;
+})
+
+(define_expand "kmatt"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")
+   (match_operand:V2HI 3 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_kma_internal (operands[0], operands[2], operands[3],
+                                GEN_INT (0), GEN_INT (0),
+                                operands[1]));
+  else
+    emit_insn (gen_kma_internal (operands[0], operands[2], operands[3],
+                                GEN_INT (1), GEN_INT (1),
+                                operands[1]));
+  DONE;
+})
+
+(define_insn "kma_internal"
+  [(set (match_operand:SI 0 "register_operand"                          "=    r,    r,    r,    r")
+       (ss_plus:SI
+         (mult:SI
+           (sign_extend:SI
+             (vec_select:HI
+               (match_operand:V2HI 1 "register_operand"                "    r,    r,    r,    r")
+               (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand"  " Iv00, Iv00, Iv01, Iv01")])))
+           (sign_extend:SI
+             (vec_select:HI
+               (match_operand:V2HI 2 "register_operand"                "    r,    r,    r,    r")
+               (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand"  " Iv00, Iv01, Iv01, Iv00")]))))
+         (match_operand:SI 5 "register_operand"                        "     0,    0,    0,    0")))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "kmatt\t%0, %1, %2",
+                            "kmabt\t%0, %2, %1",
+                            "kmabb\t%0, %1, %2",
+                            "kmabt\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "kmabb\t%0, %1, %2",
+                            "kmabt\t%0, %1, %2",
+                            "kmatt\t%0, %1, %2",
+                            "kmabt\t%0, %2, %1" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"    "dmac")
+   (set_attr "length"   "4")])
+
+(define_expand "smds"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smds_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_smds_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "smds_le"
+  [(set (match_operand:SI 0 "register_operand"                         "=r")
+       (minus:SI
+         (mult:SI
+           (sign_extend:SI (vec_select:HI
+                             (match_operand:V2HI 1 "register_operand" " r")
+                             (parallel [(const_int 1)])))
+           (sign_extend:SI (vec_select:HI
+                             (match_operand:V2HI 2 "register_operand" " r")
+                             (parallel [(const_int 1)]))))
+         (mult:SI
+           (sign_extend:SI (vec_select:HI
+                             (match_dup 1)
+                             (parallel [(const_int 0)])))
+           (sign_extend:SI (vec_select:HI
+                             (match_dup 2)
+                             (parallel [(const_int 0)]))))))]
+  "NDS32_EXT_DSP_P ()"
+{
+})
+
+(define_expand "smds_be"
+  [(set (match_operand:SI 0 "register_operand"                         "=r")
+       (minus:SI
+         (mult:SI
+           (sign_extend:SI (vec_select:HI
+                             (match_operand:V2HI 1 "register_operand" " r")
+                             (parallel [(const_int 0)])))
+           (sign_extend:SI (vec_select:HI
+                             (match_operand:V2HI 2 "register_operand" " r")
+                             (parallel [(const_int 0)]))))
+         (mult:SI
+           (sign_extend:SI (vec_select:HI
+                             (match_dup 1)
+                             (parallel [(const_int 1)])))
+           (sign_extend:SI (vec_select:HI
+                             (match_dup 2)
+                             (parallel [(const_int 1)]))))))]
+  "NDS32_EXT_DSP_P ()"
+{
+})
+
+(define_expand "smdrs"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smdrs_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_smdrs_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "smdrs_le"
+  [(set (match_operand:SI 0 "register_operand"                         "=r")
+       (minus:SI
+         (mult:SI
+           (sign_extend:SI (vec_select:HI
+                             (match_operand:V2HI 1 "register_operand" " r")
+                             (parallel [(const_int 0)])))
+           (sign_extend:SI (vec_select:HI
+                             (match_operand:V2HI 2 "register_operand" " r")
+                             (parallel [(const_int 0)]))))
+         (mult:SI
+           (sign_extend:SI (vec_select:HI
+                             (match_dup 1)
+                             (parallel [(const_int 1)])))
+           (sign_extend:SI (vec_select:HI
+                             (match_dup 2)
+                             (parallel [(const_int 1)]))))))]
+  "NDS32_EXT_DSP_P ()"
+{
+})
+
+(define_expand "smdrs_be"
+  [(set (match_operand:SI 0 "register_operand"                         "=r")
+       (minus:SI
+         (mult:SI
+           (sign_extend:SI (vec_select:HI
+                             (match_operand:V2HI 1 "register_operand" " r")
+                             (parallel [(const_int 1)])))
+           (sign_extend:SI (vec_select:HI
+                             (match_operand:V2HI 2 "register_operand" " r")
+                             (parallel [(const_int 1)]))))
+         (mult:SI
+           (sign_extend:SI (vec_select:HI
+                             (match_dup 1)
+                             (parallel [(const_int 0)])))
+           (sign_extend:SI (vec_select:HI
+                             (match_dup 2)
+                             (parallel [(const_int 0)]))))))]
+  "NDS32_EXT_DSP_P ()"
+{
+})
+
+(define_expand "smxdsv"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smxdsv_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_smxdsv_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+
+(define_expand "smxdsv_le"
+  [(set (match_operand:SI 0 "register_operand"                         "=r")
+       (minus:SI
+         (mult:SI
+           (sign_extend:SI (vec_select:HI
+                             (match_operand:V2HI 1 "register_operand" " r")
+                             (parallel [(const_int 1)])))
+           (sign_extend:SI (vec_select:HI
+                             (match_operand:V2HI 2 "register_operand" " r")
+                             (parallel [(const_int 0)]))))
+         (mult:SI
+           (sign_extend:SI (vec_select:HI
+                             (match_dup 1)
+                             (parallel [(const_int 0)])))
+           (sign_extend:SI (vec_select:HI
+                             (match_dup 2)
+                             (parallel [(const_int 1)]))))))]
+  "NDS32_EXT_DSP_P ()"
+{
+})
+
+(define_expand "smxdsv_be"
+  [(set (match_operand:SI 0 "register_operand"                         "=r")
+       (minus:SI
+         (mult:SI
+           (sign_extend:SI (vec_select:HI
+                             (match_operand:V2HI 1 "register_operand" " r")
+                             (parallel [(const_int 0)])))
+           (sign_extend:SI (vec_select:HI
+                             (match_operand:V2HI 2 "register_operand" " r")
+                             (parallel [(const_int 1)]))))
+         (mult:SI
+           (sign_extend:SI (vec_select:HI
+                             (match_dup 1)
+                             (parallel [(const_int 1)])))
+           (sign_extend:SI (vec_select:HI
+                             (match_dup 2)
+                             (parallel [(const_int 0)]))))))]
+  "NDS32_EXT_DSP_P ()"
+{
+})
+
+(define_insn "smal1"
+  [(set (match_operand:DI 0 "register_operand"             "=r")
+       (plus:DI (match_operand:DI 1 "register_operand"    " r")
+         (sign_extend:DI
+           (mult:SI
+             (sign_extend:SI
+               (vec_select:HI
+                 (match_operand:V2HI 2 "register_operand" " r")
+                 (parallel [(const_int 0)])))
+             (sign_extend:SI
+               (vec_select:HI
+                 (match_dup 2)
+                 (parallel [(const_int 1)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "smal\t%0, %1, %2"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "smal2"
+  [(set (match_operand:DI 0 "register_operand"           "=r")
+       (plus:DI (match_operand:DI 1 "register_operand"  " r")
+         (mult:DI
+           (sign_extend:DI
+             (vec_select:HI
+               (match_operand:V2HI 2 "register_operand" " r")
+               (parallel [(const_int 0)])))
+           (sign_extend:DI
+             (vec_select:HI
+               (match_dup 2)
+               (parallel [(const_int 1)]))))))]
+  "NDS32_EXT_DSP_P ()"
+  "smal\t%0, %1, %2"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "smal3"
+  [(set (match_operand:DI 0 "register_operand"             "=r")
+       (plus:DI (match_operand:DI 1 "register_operand"    " r")
+         (sign_extend:DI
+           (mult:SI
+             (sign_extend:SI
+               (vec_select:HI
+                 (match_operand:V2HI 2 "register_operand" " r")
+                 (parallel [(const_int 1)])))
+             (sign_extend:SI
+               (vec_select:HI
+                 (match_dup 2)
+                 (parallel [(const_int 0)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "smal\t%0, %1, %2"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "smal4"
+  [(set (match_operand:DI 0 "register_operand"           "=r")
+       (plus:DI (match_operand:DI 1 "register_operand"  " r")
+         (mult:DI
+           (sign_extend:DI
+             (vec_select:HI
+               (match_operand:V2HI 2 "register_operand" " r")
+               (parallel [(const_int 1)])))
+           (sign_extend:DI
+             (vec_select:HI
+               (match_dup 2)
+               (parallel [(const_int 0)]))))))]
+  "NDS32_EXT_DSP_P ()"
+  "smal\t%0, %1, %2"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "smal5"
+  [(set (match_operand:DI 0 "register_operand"             "=r")
+       (plus:DI
+         (sign_extend:DI
+           (mult:SI
+             (sign_extend:SI
+               (vec_select:HI
+                 (match_operand:V2HI 2 "register_operand" " r")
+                 (parallel [(const_int 0)])))
+             (sign_extend:SI
+               (vec_select:HI
+                 (match_dup 2)
+                 (parallel [(const_int 1)])))))
+         (match_operand:DI 1 "register_operand"           " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "smal\t%0, %1, %2"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "smal6"
+  [(set (match_operand:DI 0 "register_operand"           "=r")
+       (plus:DI
+         (mult:DI
+           (sign_extend:DI
+             (vec_select:HI
+               (match_operand:V2HI 2 "register_operand" " r")
+               (parallel [(const_int 0)])))
+           (sign_extend:DI
+             (vec_select:HI
+               (match_dup 2)
+               (parallel [(const_int 1)]))))
+         (match_operand:DI 1 "register_operand"         " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "smal\t%0, %1, %2"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "smal7"
+  [(set (match_operand:DI 0 "register_operand"             "=r")
+       (plus:DI
+         (sign_extend:DI
+           (mult:SI
+             (sign_extend:SI
+               (vec_select:HI
+                 (match_operand:V2HI 2 "register_operand" " r")
+                 (parallel [(const_int 1)])))
+             (sign_extend:SI
+               (vec_select:HI
+                 (match_dup 2)
+                 (parallel [(const_int 0)])))))
+         (match_operand:DI 1 "register_operand"           " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "smal\t%0, %1, %2"
+  [(set_attr "type"    "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "smal8"
+  [(set (match_operand:DI 0 "register_operand"           "=r")
+       (plus:DI
+         (mult:DI
+           (sign_extend:DI
+             (vec_select:HI
+               (match_operand:V2HI 2 "register_operand" " r")
+               (parallel [(const_int 1)])))
+           (sign_extend:DI
+             (vec_select:HI
+               (match_dup 2)
+               (parallel [(const_int 0)]))))
+         (match_operand:DI 1 "register_operand"         " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "smal\t%0, %1, %2"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+;; We need this dummy pattern for smal
+(define_insn_and_split "extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+       (sign_extend:DI (match_operand:SI 1 "nds32_move_operand" "")))]
+  "NDS32_EXT_DSP_P ()"
+  "#"
+  "NDS32_EXT_DSP_P ()"
+  [(const_int 0)]
+{
+  rtx high_part_dst, low_part_dst;
+
+  low_part_dst = nds32_di_low_part_subreg (operands[0]);
+  high_part_dst = nds32_di_high_part_subreg (operands[0]);
+
+  emit_move_insn (low_part_dst, operands[1]);
+  emit_insn (gen_ashrsi3 (high_part_dst, low_part_dst, GEN_INT (31)));
+  DONE;
+}
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+;; We need this dummy pattern for usmar64/usmsr64
+(define_insn_and_split "zero_extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+       (zero_extend:DI (match_operand:SI 1 "nds32_move_operand" "")))]
+  "NDS32_EXT_DSP_P ()"
+  "#"
+  "NDS32_EXT_DSP_P ()"
+  [(const_int 0)]
+{
+  rtx high_part_dst, low_part_dst;
+
+  low_part_dst = nds32_di_low_part_subreg (operands[0]);
+  high_part_dst = nds32_di_high_part_subreg (operands[0]);
+
+  emit_move_insn (low_part_dst, operands[1]);
+  emit_move_insn (high_part_dst, const0_rtx);
+  DONE;
+}
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn_and_split "extendhidi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+       (sign_extend:DI (match_operand:HI 1 "nonimmediate_operand" "")))]
+  "NDS32_EXT_DSP_P ()"
+  "#"
+  "NDS32_EXT_DSP_P ()"
+  [(const_int 0)]
+{
+  rtx high_part_dst, low_part_dst;
+
+  low_part_dst = nds32_di_low_part_subreg (operands[0]);
+  high_part_dst = nds32_di_high_part_subreg (operands[0]);
+
+
+  emit_insn (gen_extendhisi2 (low_part_dst, operands[1]));
+  emit_insn (gen_ashrsi3 (high_part_dst, low_part_dst, GEN_INT (31)));
+  DONE;
+}
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "extendqihi2"
+  [(set (match_operand:HI 0 "register_operand"                 "=r")
+       (sign_extend:HI (match_operand:QI 1 "register_operand" " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "sunpkd820\t%0, %1"
+  [(set_attr "type"       "dpack")
+   (set_attr "length"     "4")])
+
+(define_insn "smulsi3_highpart"
+  [(set (match_operand:SI 0 "register_operand"                       "=r")
+       (truncate:SI
+         (lshiftrt:DI
+           (mult:DI
+             (sign_extend:DI (match_operand:SI 1 "register_operand" " r"))
+             (sign_extend:DI (match_operand:SI 2 "register_operand" " r")))
+           (const_int 32))))]
+  "NDS32_EXT_DSP_P ()"
+  "smmul\t%0, %1, %2"
+  [(set_attr "type"     "dmul")
+   (set_attr "length"   "4")])
+
+(define_insn "smmul_round"
+  [(set (match_operand:SI 0 "register_operand"                       "=r")
+       (truncate:SI
+         (lshiftrt:DI
+           (unspec:DI [(mult:DI
+                         (sign_extend:DI (match_operand:SI 1 "register_operand" " r"))
+                         (sign_extend:DI (match_operand:SI 2 "register_operand" " r")))]
+                      UNSPEC_ROUND)
+           (const_int 32))))]
+  "NDS32_EXT_DSP_P ()"
+  "smmul.u\t%0, %1, %2"
+  [(set_attr "type"     "dmul")
+   (set_attr "length"   "4")])
+
+(define_insn "kmmac"
+  [(set (match_operand:SI 0 "register_operand"                         "=r")
+       (ss_plus:SI (match_operand:SI 1 "register_operand"             " 0")
+         (truncate:SI
+           (lshiftrt:DI
+             (mult:DI
+               (sign_extend:DI (match_operand:SI 2 "register_operand" " r"))
+               (sign_extend:DI (match_operand:SI 3 "register_operand" " r")))
+             (const_int 32)))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmmac\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmmac_round"
+  [(set (match_operand:SI 0 "register_operand"                                     "=r")
+       (ss_plus:SI (match_operand:SI 1 "register_operand"                         " 0")
+         (truncate:SI
+           (lshiftrt:DI
+             (unspec:DI [(mult:DI
+                           (sign_extend:DI (match_operand:SI 2 "register_operand" " r"))
+                           (sign_extend:DI (match_operand:SI 3 "register_operand" " r")))]
+                        UNSPEC_ROUND)
+             (const_int 32)))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmmac.u\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmmsb"
+  [(set (match_operand:SI 0 "register_operand"                         "=r")
+       (ss_minus:SI (match_operand:SI 1 "register_operand"            " 0")
+         (truncate:SI
+           (lshiftrt:DI
+             (mult:DI
+               (sign_extend:DI (match_operand:SI 2 "register_operand" " r"))
+               (sign_extend:DI (match_operand:SI 3 "register_operand" " r")))
+             (const_int 32)))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmmsb\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmmsb_round"
+  [(set (match_operand:SI 0 "register_operand"                                     "=r")
+       (ss_minus:SI (match_operand:SI 1 "register_operand"                        " 0")
+         (truncate:SI
+           (lshiftrt:DI
+             (unspec:DI [(mult:DI
+                           (sign_extend:DI (match_operand:SI 2 "register_operand" " r"))
+                           (sign_extend:DI (match_operand:SI 3 "register_operand" " r")))]
+                        UNSPEC_ROUND)
+             (const_int 32)))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmmsb.u\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kwmmul"
+  [(set (match_operand:SI 0 "register_operand"                       "=r")
+       (truncate:SI
+         (lshiftrt:DI
+           (ss_mult:DI
+             (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" " r")) (const_int 2))
+             (mult:DI (sign_extend:DI (match_operand:SI 2 "register_operand" " r")) (const_int 2)))
+           (const_int 32))))]
+  "NDS32_EXT_DSP_P ()"
+  "kwmmul\t%0, %1, %2"
+  [(set_attr "type"     "dmul")
+   (set_attr "length"   "4")])
+
+(define_insn "kwmmul_round"
+  [(set (match_operand:SI 0 "register_operand"                       "=r")
+       (truncate:SI
+         (lshiftrt:DI
+           (unspec:DI [
+             (ss_mult:DI
+               (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" " r")) (const_int 2))
+               (mult:DI (sign_extend:DI (match_operand:SI 2 "register_operand" " r")) (const_int 2)))]
+             UNSPEC_ROUND)
+           (const_int 32))))]
+  "NDS32_EXT_DSP_P ()"
+  "kwmmul.u\t%0, %1, %2"
+  [(set_attr "type"     "dmul")
+   (set_attr "length"   "4")])
+
+(define_expand "smmwb"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smulhisi3_highpart_1 (operands[0], operands[1], operands[2], GEN_INT (1)));
+  else
+    emit_insn (gen_smulhisi3_highpart_1 (operands[0], operands[1], operands[2], GEN_INT (0)));
+  DONE;
+})
+
+(define_expand "smmwt"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smulhisi3_highpart_1 (operands[0], operands[1], operands[2], GEN_INT (0)));
+  else
+    emit_insn (gen_smulhisi3_highpart_1 (operands[0], operands[1], operands[2], GEN_INT (1)));
+  DONE;
+})
+
+(define_insn "smulhisi3_highpart_1"
+  [(set (match_operand:SI 0 "register_operand"                           "=   r,    r")
+       (truncate:SI
+         (lshiftrt:DI
+           (mult:DI
+             (sign_extend:DI (match_operand:SI 1 "register_operand"     "    r,    r"))
+             (sign_extend:DI
+               (vec_select:HI
+                 (match_operand:V2HI 2 "register_operand"               "    r,    r")
+                 (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")]))))
+           (const_int 16))))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "smmwt\t%0, %1, %2",
+                            "smmwb\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "smmwb\t%0, %1, %2",
+                            "smmwt\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"     "dmul")
+   (set_attr "length"   "4")])
+
+(define_insn "smulhisi3_highpart_2"
+  [(set (match_operand:SI 0 "register_operand"                           "=   r,    r")
+       (truncate:SI
+         (lshiftrt:DI
+           (mult:DI
+             (sign_extend:DI
+               (vec_select:HI
+                 (match_operand:V2HI 1 "register_operand"               "    r,    r")
+                 (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")])))
+             (sign_extend:DI (match_operand:SI 2 "register_operand"     "    r,    r")))
+           (const_int 16))))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "smmwt\t%0, %1, %2",
+                            "smmwb\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "smmwb\t%0, %1, %2",
+                            "smmwt\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"     "dmul")
+   (set_attr "length"   "4")])
+
+(define_expand "smmwb_round"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smmw_round_internal (operands[0], operands[1], operands[2], GEN_INT (1)));
+  else
+    emit_insn (gen_smmw_round_internal (operands[0], operands[1], operands[2], GEN_INT (0)));
+  DONE;
+})
+
+(define_expand "smmwt_round"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smmw_round_internal (operands[0], operands[1], operands[2], GEN_INT (0)));
+  else
+    emit_insn (gen_smmw_round_internal (operands[0], operands[1], operands[2], GEN_INT (1)));
+  DONE;
+})
+
+(define_insn "smmw_round_internal"
+  [(set (match_operand:SI 0 "register_operand"                           "=   r,    r")
+       (truncate:SI
+         (lshiftrt:DI
+           (unspec:DI
+             [(mult:DI
+                (sign_extend:DI (match_operand:SI 1 "register_operand"     "    r,    r"))
+                (sign_extend:DI
+                  (vec_select:HI
+                    (match_operand:V2HI 2 "register_operand"               "    r,    r")
+                    (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")]))))]
+             UNSPEC_ROUND)
+           (const_int 16))))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "smmwt.u\t%0, %1, %2",
+                            "smmwb.u\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "smmwb.u\t%0, %1, %2",
+                            "smmwt.u\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"     "dmul")
+   (set_attr "length"   "4")])
+
+(define_expand "kmmawb"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:SI 2 "register_operand" "")
+   (match_operand:V2HI 3 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_kmmaw_internal (operands[0], operands[2], operands[3], GEN_INT (1), operands[1]));
+  else
+    emit_insn (gen_kmmaw_internal (operands[0], operands[2], operands[3], GEN_INT (0), operands[1]));
+  DONE;
+})
+
+(define_expand "kmmawt"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:SI 2 "register_operand" "")
+   (match_operand:V2HI 3 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_kmmaw_internal (operands[0], operands[2], operands[3], GEN_INT (0), operands[1]));
+  else
+    emit_insn (gen_kmmaw_internal (operands[0], operands[2], operands[3], GEN_INT (1), operands[1]));
+  DONE;
+})
+
+(define_insn "kmmaw_internal"
+  [(set (match_operand:SI 0 "register_operand"                               "=   r,    r")
+       (ss_plus:SI
+         (match_operand:SI 4 "register_operand"                             "    0,    0")
+         (truncate:SI
+           (lshiftrt:DI
+             (mult:DI
+               (sign_extend:DI (match_operand:SI 1 "register_operand"       "    r,    r"))
+                 (sign_extend:DI
+                   (vec_select:HI
+                     (match_operand:V2HI 2 "register_operand"               "    r,    r")
+                     (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")]))))
+             (const_int 16)))))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "kmmawt\t%0, %1, %2",
+                            "kmmawb\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "kmmawb\t%0, %1, %2",
+                            "kmmawt\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_expand "kmmawb_round"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:SI 2 "register_operand" "")
+   (match_operand:V2HI 3 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_kmmaw_round_internal (operands[0], operands[2], operands[3], GEN_INT (1), operands[1]));
+  else
+    emit_insn (gen_kmmaw_round_internal (operands[0], operands[2], operands[3], GEN_INT (0), operands[1]));
+  DONE;
+}
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_expand "kmmawt_round"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:SI 2 "register_operand" "")
+   (match_operand:V2HI 3 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_kmmaw_round_internal (operands[0], operands[2], operands[3], GEN_INT (0), operands[1]));
+  else
+    emit_insn (gen_kmmaw_round_internal (operands[0], operands[2], operands[3], GEN_INT (1), operands[1]));
+  DONE;
+}
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+
+(define_insn "kmmaw_round_internal"
+  [(set (match_operand:SI 0 "register_operand"                                "=   r,    r")
+       (ss_plus:SI
+         (match_operand:SI 4 "register_operand"                              "    0,    0")
+         (truncate:SI
+           (lshiftrt:DI
+             (unspec:DI
+               [(mult:DI
+                  (sign_extend:DI (match_operand:SI 1 "register_operand"     "    r,    r"))
+                  (sign_extend:DI
+                    (vec_select:HI
+                      (match_operand:V2HI 2 "register_operand"               "    r,    r")
+                      (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")]))))]
+               UNSPEC_ROUND)
+             (const_int 16)))))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "kmmawt.u\t%0, %1, %2",
+                            "kmmawb.u\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "kmmawb.u\t%0, %1, %2",
+                            "kmmawt.u\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_expand "smalbb"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")
+   (match_operand:V2HI 3 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smaddhidi (operands[0], operands[2],
+                             operands[3], operands[1],
+                             GEN_INT (1), GEN_INT (1)));
+  else
+    emit_insn (gen_smaddhidi (operands[0], operands[2],
+                             operands[3], operands[1],
+                             GEN_INT (0), GEN_INT (0)));
+  DONE;
+})
+
+(define_expand "smalbt"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")
+   (match_operand:V2HI 3 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smaddhidi (operands[0], operands[2],
+                             operands[3], operands[1],
+                             GEN_INT (1), GEN_INT (0)));
+  else
+    emit_insn (gen_smaddhidi (operands[0], operands[2],
+                             operands[3], operands[1],
+                             GEN_INT (0), GEN_INT (1)));
+  DONE;
+})
+
+(define_expand "smaltt"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")
+   (match_operand:V2HI 3 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smaddhidi (operands[0], operands[2],
+                             operands[3], operands[1],
+                             GEN_INT (0), GEN_INT (0)));
+  else
+    emit_insn (gen_smaddhidi (operands[0], operands[2],
+                             operands[3], operands[1],
+                             GEN_INT (1), GEN_INT (1)));
+  DONE;
+})
+
+(define_insn "smaddhidi"
+  [(set (match_operand:DI 0 "register_operand"                         "=   r,    r,    r,    r")
+       (plus:DI
+         (match_operand:DI 3 "register_operand"                       "    0,    0,    0,    0")
+         (mult:DI
+           (sign_extend:DI
+             (vec_select:HI
+               (match_operand:V2HI 1 "register_operand"               "    r,    r,    r,    r")
+               (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv00, Iv01, Iv01")])))
+           (sign_extend:DI
+             (vec_select:HI
+               (match_operand:V2HI 2 "register_operand"               "    r,    r,    r,    r")
+               (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv01, Iv00")]))))))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "smaltt\t%0, %1, %2",
+                            "smalbt\t%0, %2, %1",
+                            "smalbb\t%0, %1, %2",
+                            "smalbt\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "smalbb\t%0, %1, %2",
+                            "smalbt\t%0, %1, %2",
+                            "smaltt\t%0, %1, %2",
+                            "smalbt\t%0, %2, %1" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "smaddhidi2"
+  [(set (match_operand:DI 0 "register_operand"                         "=   r,    r,    r,    r")
+       (plus:DI
+         (mult:DI
+           (sign_extend:DI
+             (vec_select:HI
+               (match_operand:V2HI 1 "register_operand"               "    r,    r,    r,    r")
+               (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv00, Iv01, Iv01")])))
+           (sign_extend:DI
+             (vec_select:HI
+               (match_operand:V2HI 2 "register_operand"               "    r,    r,    r,    r")
+               (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv01, Iv00")]))))
+         (match_operand:DI 3 "register_operand"                       "    0,    0,    0,    0")))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "smaltt\t%0, %1, %2",
+                            "smalbt\t%0, %2, %1",
+                            "smalbb\t%0, %1, %2",
+                            "smalbt\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "smalbb\t%0, %1, %2",
+                            "smalbt\t%0, %1, %2",
+                            "smaltt\t%0, %1, %2",
+                            "smalbt\t%0, %2, %1" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_expand "smalda1"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" " r")
+   (match_operand:V2HI 3 "register_operand" " r")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smalda1_be (operands[0], operands[1], operands[2], operands[3]));
+  else
+    emit_insn (gen_smalda1_le (operands[0], operands[1], operands[2], operands[3]));
+  DONE;
+})
+
+(define_expand "smalds1"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" " r")
+   (match_operand:V2HI 3 "register_operand" " r")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smalds1_be (operands[0], operands[1], operands[2], operands[3]));
+  else
+    emit_insn (gen_smalds1_le (operands[0], operands[1], operands[2], operands[3]));
+  DONE;
+})
+
+(define_insn "smalda1_le"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+       (plus:DI
+         (match_operand:DI 1 "register_operand"                           " 0")
+         (sign_extend:DI
+           (plus:SI
+             (mult:SI
+               (sign_extend:SI (vec_select:HI
+                                 (match_operand:V2HI 2 "register_operand" " r")
+                                 (parallel [(const_int 1)])))
+               (sign_extend:SI (vec_select:HI
+                                 (match_operand:V2HI 3 "register_operand" " r")
+                                 (parallel [(const_int 1)]))))
+             (mult:SI
+               (sign_extend:SI (vec_select:HI
+                                 (match_dup 2)
+                                 (parallel [(const_int 0)])))
+               (sign_extend:SI (vec_select:HI
+                                 (match_dup 3)
+                                 (parallel [(const_int 0)]))))))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "smalda\t%0, %2, %3"
+  [(set_attr "type"    "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "smalds1_le"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+       (plus:DI
+         (match_operand:DI 1 "register_operand"                           " 0")
+         (sign_extend:DI
+           (minus:SI
+             (mult:SI
+               (sign_extend:SI (vec_select:HI
+                                 (match_operand:V2HI 2 "register_operand" " r")
+                                 (parallel [(const_int 1)])))
+               (sign_extend:SI (vec_select:HI
+                                 (match_operand:V2HI 3 "register_operand" " r")
+                                 (parallel [(const_int 1)]))))
+             (mult:SI
+               (sign_extend:SI (vec_select:HI
+                                 (match_dup 2)
+                                 (parallel [(const_int 0)])))
+               (sign_extend:SI (vec_select:HI
+                                 (match_dup 3)
+                                 (parallel [(const_int 0)]))))))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "smalds\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "smalda1_be"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+       (plus:DI
+         (match_operand:DI 1 "register_operand"                           " 0")
+         (sign_extend:DI
+           (plus:SI
+             (mult:SI
+               (sign_extend:SI (vec_select:HI
+                                 (match_operand:V2HI 2 "register_operand" " r")
+                                 (parallel [(const_int 0)])))
+               (sign_extend:SI (vec_select:HI
+                                 (match_operand:V2HI 3 "register_operand" " r")
+                                 (parallel [(const_int 0)]))))
+             (mult:SI
+               (sign_extend:SI (vec_select:HI
+                                 (match_dup 2)
+                                 (parallel [(const_int 1)])))
+               (sign_extend:SI (vec_select:HI
+                                 (match_dup 3)
+                                 (parallel [(const_int 1)]))))))))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "smalda\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "smalds1_be"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+       (plus:DI
+         (match_operand:DI 1 "register_operand"                           " 0")
+         (sign_extend:DI
+           (minus:SI
+             (mult:SI
+               (sign_extend:SI (vec_select:HI
+                                 (match_operand:V2HI 2 "register_operand" " r")
+                                 (parallel [(const_int 0)])))
+               (sign_extend:SI (vec_select:HI
+                                 (match_operand:V2HI 3 "register_operand" " r")
+                                 (parallel [(const_int 0)]))))
+             (mult:SI
+               (sign_extend:SI (vec_select:HI
+                                 (match_dup 2)
+                                 (parallel [(const_int 1)])))
+               (sign_extend:SI (vec_select:HI
+                                 (match_dup 3)
+                                 (parallel [(const_int 1)]))))))))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "smalds\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_expand "smaldrs3"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" " r")
+   (match_operand:V2HI 3 "register_operand" " r")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smaldrs3_be (operands[0], operands[1], operands[2], operands[3]));
+  else
+    emit_insn (gen_smaldrs3_le (operands[0], operands[1], operands[2], operands[3]));
+  DONE;
+})
+
+(define_insn "smaldrs3_le"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+       (plus:DI
+         (match_operand:DI 1 "register_operand"                           " 0")
+         (sign_extend:DI
+           (minus:SI
+             (mult:SI
+               (sign_extend:SI (vec_select:HI
+                                 (match_operand:V2HI 2 "register_operand" " r")
+                                 (parallel [(const_int 0)])))
+               (sign_extend:SI (vec_select:HI
+                                 (match_operand:V2HI 3 "register_operand" " r")
+                                 (parallel [(const_int 0)]))))
+             (mult:SI
+               (sign_extend:SI (vec_select:HI
+                                 (match_dup 2)
+                                 (parallel [(const_int 1)])))
+               (sign_extend:SI (vec_select:HI
+                                 (match_dup 3)
+                                 (parallel [(const_int 1)]))))))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "smaldrs\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "smaldrs3_be"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+       (plus:DI
+         (match_operand:DI 1 "register_operand"                           " 0")
+         (sign_extend:DI
+           (minus:SI
+             (mult:SI
+               (sign_extend:SI (vec_select:HI
+                                 (match_operand:V2HI 2 "register_operand" " r")
+                                 (parallel [(const_int 1)])))
+               (sign_extend:SI (vec_select:HI
+                                 (match_operand:V2HI 3 "register_operand" " r")
+                                 (parallel [(const_int 1)]))))
+             (mult:SI
+               (sign_extend:SI (vec_select:HI
+                                 (match_dup 2)
+                                 (parallel [(const_int 0)])))
+               (sign_extend:SI (vec_select:HI
+                                 (match_dup 3)
+                                 (parallel [(const_int 0)]))))))))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "smaldrs\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_expand "smalxda1"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" " r")
+   (match_operand:V2HI 3 "register_operand" " r")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smalxda1_be (operands[0], operands[1], operands[2], operands[3]));
+  else
+    emit_insn (gen_smalxda1_le (operands[0], operands[1], operands[2], operands[3]));
+  DONE;
+})
+
+(define_expand "smalxds1"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" " r")
+   (match_operand:V2HI 3 "register_operand" " r")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smalxds1_be (operands[0], operands[1], operands[2], operands[3]));
+  else
+    emit_insn (gen_smalxds1_le (operands[0], operands[1], operands[2], operands[3]));
+  DONE;
+})
+
+(define_insn "smalxd<add_sub>1_le"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+       (plus:DI
+         (match_operand:DI 1 "register_operand"                           " 0")
+         (sign_extend:DI
+           (plus_minus:SI
+             (mult:SI
+               (sign_extend:SI (vec_select:HI
+                                 (match_operand:V2HI 2 "register_operand" " r")
+                                 (parallel [(const_int 1)])))
+               (sign_extend:SI (vec_select:HI
+                                 (match_operand:V2HI 3 "register_operand" " r")
+                                 (parallel [(const_int 0)]))))
+             (mult:SI
+               (sign_extend:SI (vec_select:HI
+                                 (match_dup 2)
+                                 (parallel [(const_int 0)])))
+               (sign_extend:SI (vec_select:HI
+                                 (match_dup 3)
+                                 (parallel [(const_int 1)]))))))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "smalxd<add_sub>\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+
+(define_insn "smalxd<add_sub>1_be"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+       (plus:DI
+         (match_operand:DI 1 "register_operand"                           " 0")
+         (sign_extend:DI
+           (plus_minus:SI
+             (mult:SI
+               (sign_extend:SI (vec_select:HI
+                                 (match_operand:V2HI 2 "register_operand" " r")
+                                 (parallel [(const_int 0)])))
+               (sign_extend:SI (vec_select:HI
+                                 (match_operand:V2HI 3 "register_operand" " r")
+                                 (parallel [(const_int 1)]))))
+             (mult:SI
+               (sign_extend:SI (vec_select:HI
+                                 (match_dup 2)
+                                 (parallel [(const_int 1)])))
+               (sign_extend:SI (vec_select:HI
+                                 (match_dup 3)
+                                 (parallel [(const_int 0)]))))))))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "smalxd<add_sub>\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "smslda1"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+       (minus:DI
+         (minus:DI
+           (match_operand:DI 1 "register_operand"                           " 0")
+           (sign_extend:DI
+             (mult:SI
+               (sign_extend:SI (vec_select:HI
+                                 (match_operand:V2HI 2 "register_operand" " r")
+                                 (parallel [(const_int 1)])))
+               (sign_extend:SI (vec_select:HI
+                                 (match_operand:V2HI 3 "register_operand" " r")
+                                 (parallel [(const_int 1)]))))))
+         (sign_extend:DI
+           (mult:SI
+             (sign_extend:SI (vec_select:HI
+                               (match_dup 2)
+                               (parallel [(const_int 0)])))
+             (sign_extend:SI (vec_select:HI
+                               (match_dup 3)
+                               (parallel [(const_int 0)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "smslda\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "smslxda1"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+       (minus:DI
+         (minus:DI
+           (match_operand:DI 1 "register_operand"                           " 0")
+             (sign_extend:DI
+               (mult:SI
+                 (sign_extend:SI (vec_select:HI
+                                   (match_operand:V2HI 2 "register_operand" " r")
+                                   (parallel [(const_int 1)])))
+                 (sign_extend:SI (vec_select:HI
+                                   (match_operand:V2HI 3 "register_operand" " r")
+                                   (parallel [(const_int 0)]))))))
+         (sign_extend:DI
+           (mult:SI
+             (sign_extend:SI (vec_select:HI
+                               (match_dup 2)
+                               (parallel [(const_int 0)])))
+             (sign_extend:SI (vec_select:HI
+                               (match_dup 3)
+                               (parallel [(const_int 1)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "smslxda\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+;; mada for synthetize smalda
+(define_insn_and_split "mada1"
+  [(set (match_operand:SI 0 "register_operand"                          "=r")
+       (plus:SI
+         (mult:SI
+           (sign_extend:SI (vec_select:HI
+                             (match_operand:V2HI 1 "register_operand" "r")
+                             (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iu01")])))
+           (sign_extend:SI (vec_select:HI
+                             (match_operand:V2HI 2 "register_operand" "r")
+                             (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iu01")]))))
+         (mult:SI
+           (sign_extend:SI (vec_select:HI
+                             (match_dup 1)
+                             (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iu01")])))
+           (sign_extend:SI (vec_select:HI
+                             (match_dup 2)
+                             (parallel [(match_operand:SI 6 "nds32_imm_0_1_operand" " Iu01")]))))))]
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx result0 = gen_reg_rtx (SImode);
+  rtx result1 = gen_reg_rtx (SImode);
+  emit_insn (gen_mulhisi3v (result0, operands[1], operands[2],
+                           operands[3], operands[4]));
+  emit_insn (gen_mulhisi3v (result1, operands[1], operands[2],
+                           operands[5], operands[6]));
+  emit_insn (gen_addsi3 (operands[0], result0, result1));
+  DONE;
+})
+
+(define_insn_and_split "mada2"
+  [(set (match_operand:SI 0 "register_operand"                          "=r")
+       (plus:SI
+         (mult:SI
+           (sign_extend:SI (vec_select:HI
+                             (match_operand:V2HI 1 "register_operand" "r")
+                             (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iu01")])))
+           (sign_extend:SI (vec_select:HI
+                             (match_operand:V2HI 2 "register_operand" "r")
+                             (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iu01")]))))
+         (mult:SI
+           (sign_extend:SI (vec_select:HI
+                             (match_dup 2)
+                             (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iu01")])))
+           (sign_extend:SI (vec_select:HI
+                             (match_dup 1)
+                             (parallel [(match_operand:SI 6 "nds32_imm_0_1_operand" " Iu01")]))))))]
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx result0 = gen_reg_rtx (SImode);
+  rtx result1 = gen_reg_rtx (SImode);
+  emit_insn (gen_mulhisi3v (result0, operands[1], operands[2],
+                           operands[3], operands[4]));
+  emit_insn (gen_mulhisi3v (result1, operands[1], operands[2],
+                           operands[6], operands[5]));
+  emit_insn (gen_addsi3 (operands[0], result0, result1));
+  DONE;
+})
+
+;; sms for synthetize smalds
+(define_insn_and_split "sms1"
+  [(set (match_operand:SI 0 "register_operand"                                       "=   r")
+       (minus:SI
+         (mult:SI
+           (sign_extend:SI (vec_select:HI
+                             (match_operand:V2HI 1 "register_operand"               "    r")
+                             (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iu01")])))
+           (sign_extend:SI (vec_select:HI
+                             (match_operand:V2HI 2 "register_operand"               "    r")
+                             (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iu01")]))))
+         (mult:SI
+           (sign_extend:SI (vec_select:HI
+                             (match_dup 1)
+                             (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iu01")])))
+           (sign_extend:SI (vec_select:HI
+                             (match_dup 2)
+                             (parallel [(match_operand:SI 6 "nds32_imm_0_1_operand" " Iu01")]))))))]
+  "NDS32_EXT_DSP_P ()
+   && (!reload_completed
+       || !nds32_need_split_sms_p (operands[3], operands[4],
+                                  operands[5], operands[6]))"
+
+{
+  return nds32_output_sms (operands[3], operands[4],
+                          operands[5], operands[6]);
+}
+  "NDS32_EXT_DSP_P ()
+   && !reload_completed
+   && nds32_need_split_sms_p (operands[3], operands[4],
+                             operands[5], operands[6])"
+  [(const_int 1)]
+{
+  nds32_split_sms (operands[0], operands[1], operands[2],
+                  operands[3], operands[4],
+                  operands[5], operands[6]);
+  DONE;
+}
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn_and_split "sms2"
+  [(set (match_operand:SI 0 "register_operand"                                       "=   r")
+       (minus:SI
+         (mult:SI
+           (sign_extend:SI (vec_select:HI
+                             (match_operand:V2HI 1 "register_operand"               "    r")
+                             (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iu01")])))
+           (sign_extend:SI (vec_select:HI
+                             (match_operand:V2HI 2 "register_operand"               "    r")
+                             (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iu01")]))))
+         (mult:SI
+           (sign_extend:SI (vec_select:HI
+                             (match_dup 2)
+                             (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iu01")])))
+           (sign_extend:SI (vec_select:HI
+                             (match_dup 1)
+                             (parallel [(match_operand:SI 6 "nds32_imm_0_1_operand" " Iu01")]))))))]
+  "NDS32_EXT_DSP_P ()
+   && (!reload_completed
+       || !nds32_need_split_sms_p (operands[3], operands[4],
+                                  operands[6], operands[5]))"
+{
+  return nds32_output_sms (operands[3], operands[4],
+                          operands[6], operands[5]);
+}
+  "NDS32_EXT_DSP_P ()
+   && !reload_completed
+   && nds32_need_split_sms_p (operands[3], operands[4],
+                             operands[6], operands[5])"
+  [(const_int 1)]
+{
+  nds32_split_sms (operands[0], operands[1], operands[2],
+                  operands[3], operands[4],
+                  operands[6], operands[5]);
+  DONE;
+}
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmda"
+  [(set (match_operand:SI 0 "register_operand"                          "=r")
+       (ss_plus:SI
+         (mult:SI
+           (sign_extend:SI (vec_select:HI
+                             (match_operand:V2HI 1 "register_operand" "r")
+                             (parallel [(const_int 1)])))
+           (sign_extend:SI (vec_select:HI
+                             (match_operand:V2HI 2 "register_operand" "r")
+                             (parallel [(const_int 1)]))))
+         (mult:SI
+           (sign_extend:SI (vec_select:HI
+                             (match_dup 1)
+                             (parallel [(const_int 0)])))
+           (sign_extend:SI (vec_select:HI
+                             (match_dup 2)
+                             (parallel [(const_int 0)]))))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmda\t%0, %1, %2"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmxda"
+  [(set (match_operand:SI 0 "register_operand"                          "=r")
+       (ss_plus:SI
+         (mult:SI
+           (sign_extend:SI (vec_select:HI
+                             (match_operand:V2HI 1 "register_operand" "r")
+                             (parallel [(const_int 1)])))
+           (sign_extend:SI (vec_select:HI
+                             (match_operand:V2HI 2 "register_operand" "r")
+                             (parallel [(const_int 0)]))))
+         (mult:SI
+           (sign_extend:SI (vec_select:HI
+                             (match_dup 1)
+                             (parallel [(const_int 0)])))
+           (sign_extend:SI (vec_select:HI
+                             (match_dup 2)
+                             (parallel [(const_int 1)]))))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmxda\t%0, %1, %2"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmada"
+  [(set (match_operand:SI 0 "register_operand"                           "=r")
+       (ss_plus:SI
+         (match_operand:SI 1 "register_operand"                         " 0")
+         (ss_plus:SI
+           (mult:SI
+             (sign_extend:SI (vec_select:HI
+                               (match_operand:V2HI 2 "register_operand" " r")
+                               (parallel [(const_int 1)])))
+             (sign_extend:SI (vec_select:HI
+                               (match_operand:V2HI 3 "register_operand" " r")
+                               (parallel [(const_int 1)]))))
+           (mult:SI
+             (sign_extend:SI (vec_select:HI
+                               (match_dup 2)
+                               (parallel [(const_int 0)])))
+             (sign_extend:SI (vec_select:HI
+                               (match_dup 3)
+                               (parallel [(const_int 0)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmada\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmada2"
+  [(set (match_operand:SI 0 "register_operand"                           "=r")
+       (ss_plus:SI
+         (match_operand:SI 1 "register_operand"                         " 0")
+         (ss_plus:SI
+           (mult:SI
+             (sign_extend:SI (vec_select:HI
+                               (match_operand:V2HI 2 "register_operand" " r")
+                               (parallel [(const_int 0)])))
+             (sign_extend:SI (vec_select:HI
+                               (match_operand:V2HI 3 "register_operand" " r")
+                               (parallel [(const_int 0)]))))
+           (mult:SI
+             (sign_extend:SI (vec_select:HI
+                               (match_dup 2)
+                               (parallel [(const_int 1)])))
+             (sign_extend:SI (vec_select:HI
+                               (match_dup 3)
+                               (parallel [(const_int 1)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmada\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmaxda"
+  [(set (match_operand:SI 0 "register_operand"                           "=r")
+       (ss_plus:SI
+         (match_operand:SI 1 "register_operand"                         " 0")
+         (ss_plus:SI
+           (mult:SI
+             (sign_extend:SI (vec_select:HI
+                               (match_operand:V2HI 2 "register_operand" " r")
+                               (parallel [(const_int 1)])))
+             (sign_extend:SI (vec_select:HI
+                               (match_operand:V2HI 3 "register_operand" " r")
+                               (parallel [(const_int 0)]))))
+           (mult:SI
+             (sign_extend:SI (vec_select:HI
+                               (match_dup 2)
+                               (parallel [(const_int 0)])))
+             (sign_extend:SI (vec_select:HI
+                               (match_dup 3)
+                               (parallel [(const_int 1)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmaxda\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmads"
+  [(set (match_operand:SI 0 "register_operand"                           "=r")
+       (ss_plus:SI
+         (match_operand:SI 1 "register_operand"                         " 0")
+         (ss_minus:SI
+           (mult:SI
+             (sign_extend:SI (vec_select:HI
+                               (match_operand:V2HI 2 "register_operand" " r")
+                               (parallel [(const_int 1)])))
+             (sign_extend:SI (vec_select:HI
+                               (match_operand:V2HI 3 "register_operand" " r")
+                               (parallel [(const_int 1)]))))
+           (mult:SI
+             (sign_extend:SI (vec_select:HI
+                               (match_dup 2)
+                               (parallel [(const_int 0)])))
+             (sign_extend:SI (vec_select:HI
+                               (match_dup 3)
+                               (parallel [(const_int 0)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmads\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmadrs"
+  [(set (match_operand:SI 0 "register_operand"                           "=r")
+       (ss_plus:SI
+         (match_operand:SI 1 "register_operand"                         " 0")
+         (ss_minus:SI
+           (mult:SI
+             (sign_extend:SI (vec_select:HI
+                               (match_operand:V2HI 2 "register_operand" " r")
+                               (parallel [(const_int 0)])))
+             (sign_extend:SI (vec_select:HI
+                               (match_operand:V2HI 3 "register_operand" " r")
+                               (parallel [(const_int 0)]))))
+           (mult:SI
+             (sign_extend:SI (vec_select:HI
+                               (match_dup 2)
+                               (parallel [(const_int 1)])))
+             (sign_extend:SI (vec_select:HI
+                               (match_dup 3)
+                               (parallel [(const_int 1)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmadrs\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmaxds"
+  [(set (match_operand:SI 0 "register_operand"                           "=r")
+       (ss_plus:SI
+         (match_operand:SI 1 "register_operand"                         " 0")
+         (ss_minus:SI
+           (mult:SI
+             (sign_extend:SI (vec_select:HI
+                               (match_operand:V2HI 2 "register_operand" " r")
+                               (parallel [(const_int 1)])))
+             (sign_extend:SI (vec_select:HI
+                               (match_operand:V2HI 3 "register_operand" " r")
+                               (parallel [(const_int 0)]))))
+           (mult:SI
+             (sign_extend:SI (vec_select:HI
+                               (match_dup 2)
+                               (parallel [(const_int 0)])))
+             (sign_extend:SI (vec_select:HI
+                               (match_dup 3)
+                               (parallel [(const_int 1)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmaxds\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmsda"
+  [(set (match_operand:SI 0 "register_operand"                           "=r")
+       (ss_minus:SI
+         (match_operand:SI 1 "register_operand"                         " 0")
+         (ss_minus:SI
+           (mult:SI
+             (sign_extend:SI (vec_select:HI
+                               (match_operand:V2HI 2 "register_operand" " r")
+                               (parallel [(const_int 1)])))
+             (sign_extend:SI (vec_select:HI
+                               (match_operand:V2HI 3 "register_operand" " r")
+                               (parallel [(const_int 1)]))))
+           (mult:SI
+             (sign_extend:SI (vec_select:HI
+                               (match_dup 2)
+                               (parallel [(const_int 0)])))
+             (sign_extend:SI (vec_select:HI
+                               (match_dup 3)
+                               (parallel [(const_int 0)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmsda\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmsxda"
+  [(set (match_operand:SI 0 "register_operand"                           "=r")
+       (ss_minus:SI
+         (match_operand:SI 1 "register_operand"                         " 0")
+         (ss_minus:SI
+           (mult:SI
+             (sign_extend:SI (vec_select:HI
+                               (match_operand:V2HI 2 "register_operand" " r")
+                               (parallel [(const_int 1)])))
+             (sign_extend:SI (vec_select:HI
+                               (match_operand:V2HI 3 "register_operand" " r")
+                               (parallel [(const_int 0)]))))
+           (mult:SI
+             (sign_extend:SI (vec_select:HI
+                               (match_dup 2)
+                               (parallel [(const_int 0)])))
+             (sign_extend:SI (vec_select:HI
+                               (match_dup 3)
+                               (parallel [(const_int 1)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmsxda\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+;; smax[8|16] and umax[8|16]
+(define_insn "<opcode><mode>3"
+  [(set (match_operand:VQIHI 0 "register_operand"               "=r")
+       (sumax:VQIHI (match_operand:VQIHI 1 "register_operand" " r")
+                    (match_operand:VQIHI 2 "register_operand" " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "<opcode><bits>\t%0, %1, %2"
+  [(set_attr "type"   "dalu")
+   (set_attr "length" "4")])
+
+;; smin[8|16] and umin[8|16]
+(define_insn "<opcode><mode>3"
+  [(set (match_operand:VQIHI 0 "register_operand"              "=r")
+       (sumin:VQIHI (match_operand:VQIHI 1 "register_operand" " r")
+                    (match_operand:VQIHI 2 "register_operand" " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "<opcode><bits>\t%0, %1, %2"
+  [(set_attr "type"   "dalu")
+   (set_attr "length" "4")])
+
+(define_insn "<opcode><mode>3_bb"
+  [(set (match_operand:<VELT> 0 "register_operand"                    "=r")
+       (sumin_max:<VELT> (vec_select:<VELT>
+                           (match_operand:VQIHI 1 "register_operand" " r")
+                           (parallel [(const_int 0)]))
+                         (vec_select:<VELT>
+                           (match_operand:VQIHI 2 "register_operand" " r")
+                           (parallel [(const_int 0)]))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "<opcode><bits>\t%0, %1, %2"
+  [(set_attr "type"   "dalu")
+   (set_attr "length" "4")])
+
+(define_insn_and_split "<opcode><mode>3_tt"
+  [(set (match_operand:<VELT> 0 "register_operand"                    "=r")
+       (sumin_max:<VELT> (vec_select:<VELT>
+                           (match_operand:VQIHI 1 "register_operand" " r")
+                           (parallel [(const_int 1)]))
+                         (vec_select:<VELT>
+                           (match_operand:VQIHI 2 "register_operand" " r")
+                           (parallel [(const_int 1)]))))]
+  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 0)]
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_<opcode><mode>3 (tmp, operands[1], operands[2]));
+  emit_insn (gen_rotr<mode>_1 (tmp, tmp));
+  emit_move_insn (operands[0], simplify_gen_subreg (<VELT>mode, tmp, <MODE>mode, 0));
+  DONE;
+}
+  [(set_attr "type"   "dalu")
+   (set_attr "length" "4")])
+
+(define_insn_and_split "<opcode>v4qi3_22"
+  [(set (match_operand:QI 0 "register_operand"                   "=r")
+       (sumin_max:QI (vec_select:QI
+                       (match_operand:V4QI 1 "register_operand" " r")
+                       (parallel [(const_int 2)]))
+                     (vec_select:QI
+                       (match_operand:V4QI 2 "register_operand" " r")
+                       (parallel [(const_int 2)]))))]
+  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 0)]
+{
+  rtx tmp = gen_reg_rtx (V4QImode);
+  emit_insn (gen_<opcode>v4qi3 (tmp, operands[1], operands[2]));
+  emit_insn (gen_rotrv4qi_2 (tmp, tmp));
+  emit_move_insn (operands[0], simplify_gen_subreg (QImode, tmp, V4QImode, 0));
+  DONE;
+}
+  [(set_attr "type"   "dalu")
+   (set_attr "length" "4")])
+
+(define_insn_and_split "<opcode>v4qi3_33"
+  [(set (match_operand:QI 0 "register_operand"                   "=r")
+       (sumin_max:QI (vec_select:QI
+                       (match_operand:V4QI 1 "register_operand" " r")
+                       (parallel [(const_int 3)]))
+                     (vec_select:QI
+                       (match_operand:V4QI 2 "register_operand" " r")
+                       (parallel [(const_int 3)]))))]
+  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 0)]
+{
+  rtx tmp = gen_reg_rtx (V4QImode);
+  emit_insn (gen_<opcode>v4qi3 (tmp, operands[1], operands[2]));
+  emit_insn (gen_rotrv4qi_3 (tmp, tmp));
+  emit_move_insn (operands[0], simplify_gen_subreg (QImode, tmp, V4QImode, 0));
+  DONE;
+}
+  [(set_attr "type"   "dalu")
+   (set_attr "length" "4")])
+
+(define_insn_and_split "<opcode>v2hi3_bbtt"
+  [(set (match_operand:V2HI 0 "register_operand"                         "=r")
+       (vec_merge:V2HI
+         (vec_duplicate:V2HI
+           (sumin_max:HI (vec_select:HI
+                           (match_operand:V2HI 1 "register_operand" " r")
+                           (parallel [(const_int 1)]))
+                         (vec_select:HI
+                           (match_operand:V2HI 2 "register_operand" " r")
+                           (parallel [(const_int 1)]))))
+         (vec_duplicate:V2HI
+           (sumin_max:HI (vec_select:HI
+                           (match_dup:V2HI 1)
+                           (parallel [(const_int 0)]))
+                         (vec_select:HI
+                           (match_dup:V2HI 2)
+                           (parallel [(const_int 0)]))))
+         (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P ()"
+  [(const_int 0)]
+{
+  emit_insn (gen_<opcode>v2hi3 (operands[0], operands[1], operands[2]));
+  DONE;
+}
+  [(set_attr "type"   "dalu")
+   (set_attr "length" "4")])
+
+(define_expand "abs<mode>2"
+  [(set (match_operand:VQIHI 0 "register_operand"                "=r")
+       (ss_abs:VQIHI (match_operand:VQIHI 1 "register_operand" " r")))]
+  "NDS32_EXT_DSP_P () && TARGET_HW_ABS && !flag_wrapv"
+{
+})
+
+(define_insn "kabs<mode>2"
+  [(set (match_operand:VQIHI 0 "register_operand"                "=r")
+       (ss_abs:VQIHI (match_operand:VQIHI 1 "register_operand" " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "kabs<bits>\t%0, %1"
+  [(set_attr "type"   "dalu")
+   (set_attr "length" "4")])
+
+(define_insn "<su>mar64_1"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+       (plus:DI
+         (match_operand:DI 1 "register_operand"     " 0")
+         (mult:DI
+           (extend:DI
+             (match_operand:SI 2 "register_operand" " r"))
+           (extend:DI
+             (match_operand:SI 3 "register_operand" " r")))))]
+  "NDS32_EXT_DSP_P ()"
+  "<su>mar64\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "<su>mar64_2"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+       (plus:DI
+         (mult:DI
+           (extend:DI
+             (match_operand:SI 2 "register_operand" " r"))
+           (extend:DI
+             (match_operand:SI 3 "register_operand" " r")))
+         (match_operand:DI 1 "register_operand"     " 0")))]
+  "NDS32_EXT_DSP_P ()"
+  "<su>mar64\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "<su>mar64_3"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+       (plus:DI
+         (match_operand:DI 1 "register_operand"     " 0")
+         (extend:DI
+           (mult:SI
+             (match_operand:SI 2 "register_operand" " r")
+             (match_operand:SI 3 "register_operand" " r")))))]
+  "NDS32_EXT_DSP_P ()"
+  "<su>mar64\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "<su>mar64_4"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+       (plus:DI
+         (extend:DI
+         (mult:SI
+             (match_operand:SI 2 "register_operand" " r")
+             (match_operand:SI 3 "register_operand" " r")))
+         (match_operand:DI 1 "register_operand"     " 0")))]
+  "NDS32_EXT_DSP_P ()"
+  "<su>mar64\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "<su>msr64"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+       (minus:DI
+         (match_operand:DI 1 "register_operand"     " 0")
+         (mult:DI
+           (extend:DI
+             (match_operand:SI 2 "register_operand" " r"))
+           (extend:DI
+             (match_operand:SI 3 "register_operand" " r")))))]
+  "NDS32_EXT_DSP_P ()"
+  "<su>msr64\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "<su>msr64_2"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+       (minus:DI
+         (match_operand:DI 1 "register_operand"     " 0")
+         (extend:DI
+           (mult:SI
+             (match_operand:SI 2 "register_operand" " r")
+             (match_operand:SI 3 "register_operand" " r")))))]
+  "NDS32_EXT_DSP_P ()"
+  "<su>msr64\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+;; kmar64, kmsr64, ukmar64 and ukmsr64
+(define_insn "kmar64_1"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+       (ss_plus:DI
+         (match_operand:DI 1 "register_operand"     " 0")
+         (mult:DI
+           (sign_extend:DI
+             (match_operand:SI 2 "register_operand" " r"))
+           (sign_extend:DI
+             (match_operand:SI 3 "register_operand" " r")))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmar64\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmar64_2"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+       (ss_plus:DI
+         (mult:DI
+           (sign_extend:DI
+             (match_operand:SI 2 "register_operand" " r"))
+           (sign_extend:DI
+             (match_operand:SI 3 "register_operand" " r")))
+         (match_operand:DI 1 "register_operand"     " 0")))]
+  "NDS32_EXT_DSP_P ()"
+  "kmar64\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmsr64"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+       (ss_minus:DI
+         (match_operand:DI 1 "register_operand"     " 0")
+         (mult:DI
+           (sign_extend:DI
+             (match_operand:SI 2 "register_operand" " r"))
+           (sign_extend:DI
+             (match_operand:SI 3 "register_operand" " r")))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmsr64\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "ukmar64_1"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+       (us_plus:DI
+         (match_operand:DI 1 "register_operand"     " 0")
+         (mult:DI
+           (zero_extend:DI
+             (match_operand:SI 2 "register_operand" " r"))
+           (zero_extend:DI
+             (match_operand:SI 3 "register_operand" " r")))))]
+  "NDS32_EXT_DSP_P ()"
+  "ukmar64\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "ukmar64_2"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+       (us_plus:DI
+         (mult:DI
+           (zero_extend:DI
+             (match_operand:SI 2 "register_operand" " r"))
+           (zero_extend:DI
+             (match_operand:SI 3 "register_operand" " r")))
+         (match_operand:DI 1 "register_operand"     " 0")))]
+  "NDS32_EXT_DSP_P ()"
+  "ukmar64\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "ukmsr64"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+       (us_minus:DI
+         (match_operand:DI 1 "register_operand"     " 0")
+         (mult:DI
+           (zero_extend:DI
+             (match_operand:SI 2 "register_operand" " r"))
+           (zero_extend:DI
+             (match_operand:SI 3 "register_operand" " r")))))]
+  "NDS32_EXT_DSP_P ()"
+  "ukmsr64\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "bpick1"
+  [(set (match_operand:SI 0 "register_operand"       "=r")
+         (ior:SI
+           (and:SI
+             (match_operand:SI 1 "register_operand" " r")
+             (match_operand:SI 3 "register_operand" " r"))
+           (and:SI
+             (match_operand:SI 2 "register_operand" " r")
+             (not:SI (match_dup 3)))))]
+  "NDS32_EXT_DSP_P ()"
+  "bpick\t%0, %1, %2, %3"
+  [(set_attr "type"     "dbpick")
+   (set_attr "length"   "4")])
+
+(define_insn "bpick2"
+  [(set (match_operand:SI 0 "register_operand"       "=r")
+         (ior:SI
+           (and:SI
+             (match_operand:SI 1 "register_operand" " r")
+             (match_operand:SI 2 "register_operand" " r"))
+           (and:SI
+             (not:SI (match_dup 2))
+             (match_operand:SI 3 "register_operand" " r"))))]
+  "NDS32_EXT_DSP_P ()"
+  "bpick\t%0, %1, %3, %2"
+  [(set_attr "type"     "dbpick")
+   (set_attr "length"   "4")])
+
+(define_insn "bpick3"
+  [(set (match_operand:SI 0 "register_operand"       "=r")
+         (ior:SI
+           (and:SI
+             (match_operand:SI 1 "register_operand" " r")
+             (match_operand:SI 2 "register_operand" " r"))
+           (and:SI
+             (match_operand:SI 3 "register_operand" " r")
+             (not:SI (match_dup 1)))))]
+  "NDS32_EXT_DSP_P ()"
+  "bpick\t%0, %2, %3, %1"
+  [(set_attr "type"     "dbpick")
+   (set_attr "length"   "4")])
+
+(define_insn "bpick4"
+  [(set (match_operand:SI 0 "register_operand"       "=r")
+         (ior:SI
+           (and:SI
+             (match_operand:SI 1 "register_operand" " r")
+             (match_operand:SI 2 "register_operand" " r"))
+           (and:SI
+             (not:SI (match_dup 1))
+             (match_operand:SI 3 "register_operand" " r"))))]
+  "NDS32_EXT_DSP_P ()"
+  "bpick\t%0, %2, %3, %1"
+  [(set_attr "type"     "dbpick")
+   (set_attr "length"   "4")])
+
+(define_insn "bpick5"
+  [(set (match_operand:SI 0 "register_operand"               "=r")
+         (ior:SI
+           (and:SI
+             (match_operand:SI 1 "register_operand"         " r")
+             (not:SI (match_operand:SI 2 "register_operand" " r")))
+           (and:SI
+             (match_operand:SI 3 "register_operand"         " r")
+             (match_dup 2))))]
+  "NDS32_EXT_DSP_P ()"
+  "bpick\t%0, %3, %1, %2"
+  [(set_attr "type"     "dbpick")
+   (set_attr "length"   "4")])
+
+(define_insn "bpick6"
+  [(set (match_operand:SI 0 "register_operand"               "=r")
+         (ior:SI
+           (and:SI
+             (not:SI (match_operand:SI 1 "register_operand" " r"))
+             (match_operand:SI 2 "register_operand"         " r"))
+           (and:SI
+             (match_operand:SI 3 "register_operand" " r")
+             (match_dup 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "bpick\t%0, %3, %2, %1"
+  [(set_attr "type"     "dbpick")
+   (set_attr "length"   "4")])
+
+(define_insn "bpick7"
+  [(set (match_operand:SI 0 "register_operand"               "=r")
+         (ior:SI
+           (and:SI
+             (match_operand:SI 1 "register_operand"         " r")
+             (not:SI (match_operand:SI 2 "register_operand" " r")))
+           (and:SI
+             (match_dup 2)
+             (match_operand:SI 3 "register_operand"         " r"))))]
+  "NDS32_EXT_DSP_P ()"
+  "bpick\t%0, %3, %1, %2"
+  [(set_attr "type"     "dbpick")
+   (set_attr "length"   "4")])
+
+(define_insn "bpick8"
+  [(set (match_operand:SI 0 "register_operand"               "=r")
+         (ior:SI
+           (and:SI
+             (not:SI (match_operand:SI 1 "register_operand" " r"))
+             (match_operand:SI 2 "register_operand"         " r"))
+           (and:SI
+             (match_dup 1)
+             (match_operand:SI 3 "register_operand"         " r"))))]
+  "NDS32_EXT_DSP_P ()"
+  "bpick\t%0, %3, %2, %1"
+  [(set_attr "type"     "dbpick")
+   (set_attr "length"   "4")])
+
+(define_insn "sraiu"
+  [(set (match_operand:SI 0 "register_operand"                              "=   r, r")
+       (unspec:SI [(ashiftrt:SI (match_operand:SI 1 "register_operand"     "    r, r")
+                                (match_operand:SI 2 "nds32_rimm5u_operand" " Iu05, r"))]
+                   UNSPEC_ROUND))]
+  "NDS32_EXT_DSP_P ()"
+  "@
+   srai.u\t%0, %1, %2
+   sra.u\t%0, %1, %2"
+  [(set_attr "type"   "daluround")
+   (set_attr "length" "4")])
+
+(define_insn "kssl"
+  [(set (match_operand:SI 0 "register_operand"                   "=   r, r")
+       (ss_ashift:SI (match_operand:SI 1 "register_operand"     "    r, r")
+                     (match_operand:SI 2 "nds32_rimm5u_operand" " Iu05, r")))]
+  "NDS32_EXT_DSP_P ()"
+  "@
+   kslli\t%0, %1, %2
+   ksll\t%0, %1, %2"
+  [(set_attr "type"   "dalu")
+   (set_attr "length" "4")])
+
+(define_insn "kslraw_round"
+  [(set (match_operand:SI 0 "register_operand"                  "=r")
+       (if_then_else:SI
+         (lt:SI (match_operand:SI 2 "register_operand"        " r")
+                (const_int 0))
+         (unspec:SI [(ashiftrt:SI (match_operand:SI 1 "register_operand" " r")
+                                  (neg:SI (match_dup 2)))]
+                    UNSPEC_ROUND)
+         (ss_ashift:SI (match_dup 1)
+                       (match_dup 2))))]
+  "NDS32_EXT_DSP_P ()"
+  "kslraw.u\t%0, %1, %2"
+  [(set_attr "type"    "daluround")
+   (set_attr "length"  "4")])
+
+(define_insn_and_split "<shift>di3"
+  [(set (match_operand:DI 0 "register_operand" "")
+       (shift_rotate:DI (match_operand:DI 1 "register_operand" "")
+                        (match_operand:SI 2 "nds32_rimm6u_operand" "")))]
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 0)]
+{
+  if (REGNO (operands[0]) == REGNO (operands[1]))
+    {
+      rtx tmp = gen_reg_rtx (DImode);
+      nds32_split_<code>di3 (tmp, operands[1], operands[2]);
+      emit_move_insn (operands[0], tmp);
+    }
+  else
+    nds32_split_<code>di3 (operands[0], operands[1], operands[2]);
+  DONE;
+})
+
+(define_insn "sclip32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (unspec:SI [(match_operand:SI 1 "register_operand" "r")
+                   (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_CLIPS_OV))]
+  "NDS32_EXT_DSP_P ()"
+  "sclip32\t%0, %1, %2"
+  [(set_attr "type"   "dclip")
+   (set_attr "length" "4")]
+)
+
+(define_insn "uclip32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (unspec:SI [(match_operand:SI 1 "register_operand" "r")
+                   (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_CLIP_OV))]
+  "NDS32_EXT_DSP_P ()"
+  "uclip32\t%0, %1, %2"
+  [(set_attr "type"   "dclip")
+   (set_attr "length" "4")]
+)
+
+(define_insn "bitrev"
+  [(set (match_operand:SI 0 "register_operand"                 "=r,    r")
+       (unspec:SI [(match_operand:SI 1 "register_operand"     " r,    r")
+                   (match_operand:SI 2 "nds32_rimm5u_operand" " r, Iu05")]
+                  UNSPEC_BITREV))]
+  ""
+  "@
+   bitrev\t%0, %1, %2
+   bitrevi\t%0, %1, %2"
+  [(set_attr "type"   "dalu")
+   (set_attr "length" "4")]
+)
+
+;; wext, wexti
+(define_insn "<su>wext"
+  [(set (match_operand:SI 0 "register_operand"                "=r,   r")
+       (truncate:SI
+         (shiftrt:DI
+           (match_operand:DI 1 "register_operand"            " r,   r")
+           (match_operand:SI 2 "nds32_rimm5u_operand"        " r,Iu05"))))]
+  "NDS32_EXT_DSP_P ()"
+  "@
+   wext\t%0, %1, %2
+   wexti\t%0, %1, %2"
+  [(set_attr "type"     "dwext")
+   (set_attr "length"   "4")])
+
+;; 32-bit add/sub instruction: raddw and rsubw.
+(define_insn "r<opcode>si3"
+  [(set (match_operand:SI 0 "register_operand"                       "=r")
+       (truncate:SI
+         (ashiftrt:DI
+           (plus_minus:DI
+             (sign_extend:DI (match_operand:SI 1 "register_operand" " r"))
+             (sign_extend:DI (match_operand:SI 2 "register_operand" " r")))
+           (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "r<opcode>w\t%0, %1, %2"
+  [(set_attr "type"    "dalu")
+   (set_attr "length"  "4")])
+
+;; 32-bit add/sub instruction: uraddw and ursubw.
+(define_insn "ur<opcode>si3"
+  [(set (match_operand:SI 0 "register_operand"                       "=r")
+       (truncate:SI
+         (lshiftrt:DI
+           (plus_minus:DI
+             (zero_extend:DI (match_operand:SI 1 "register_operand" " r"))
+             (zero_extend:DI (match_operand:SI 2 "register_operand" " r")))
+           (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "ur<opcode>w\t%0, %1, %2"
+  [(set_attr "type"    "dalu")
+   (set_attr "length"  "4")])
index b9bb2d9..c2ad927 100644 (file)
@@ -519,6 +519,7 @@ static struct builtin_description bdesc_noarg[] =
 {
   NDS32_BUILTIN(unspec_fmfcfg, "fmfcfg", FMFCFG)
   NDS32_BUILTIN(unspec_fmfcsr, "fmfcsr", FMFCSR)
+  NDS32_BUILTIN(unspec_volatile_rdov, "rdov", RDOV)
   NDS32_BUILTIN(unspec_get_current_sp, "get_current_sp", GET_CURRENT_SP)
   NDS32_BUILTIN(unspec_return_address, "return_address", RETURN_ADDRESS)
   NDS32_BUILTIN(unspec_get_all_pending_int, "get_all_pending_int",
@@ -558,6 +559,31 @@ static struct builtin_description bdesc_1arg[] =
   NDS32_NO_TARGET_BUILTIN(unspec_ret_itoff, "ret_itoff", RET_ITOFF)
   NDS32_NO_TARGET_BUILTIN(unspec_set_current_sp,
                          "set_current_sp", SET_CURRENT_SP)
+  NDS32_BUILTIN(kabsv2hi2, "kabs16", KABS16)
+  NDS32_BUILTIN(kabsv2hi2, "v_kabs16", V_KABS16)
+  NDS32_BUILTIN(kabsv4qi2, "kabs8", KABS8)
+  NDS32_BUILTIN(kabsv4qi2, "v_kabs8", V_KABS8)
+  NDS32_BUILTIN(sunpkd810, "sunpkd810", SUNPKD810)
+  NDS32_BUILTIN(sunpkd810, "v_sunpkd810", V_SUNPKD810)
+  NDS32_BUILTIN(sunpkd820, "sunpkd820", SUNPKD820)
+  NDS32_BUILTIN(sunpkd820, "v_sunpkd820", V_SUNPKD820)
+  NDS32_BUILTIN(sunpkd830, "sunpkd830", SUNPKD830)
+  NDS32_BUILTIN(sunpkd830, "v_sunpkd830", V_SUNPKD830)
+  NDS32_BUILTIN(sunpkd831, "sunpkd831", SUNPKD831)
+  NDS32_BUILTIN(sunpkd831, "v_sunpkd831", V_SUNPKD831)
+  NDS32_BUILTIN(zunpkd810, "zunpkd810", ZUNPKD810)
+  NDS32_BUILTIN(zunpkd810, "v_zunpkd810", V_ZUNPKD810)
+  NDS32_BUILTIN(zunpkd820, "zunpkd820", ZUNPKD820)
+  NDS32_BUILTIN(zunpkd820, "v_zunpkd820", V_ZUNPKD820)
+  NDS32_BUILTIN(zunpkd830, "zunpkd830", ZUNPKD830)
+  NDS32_BUILTIN(zunpkd830, "v_zunpkd830", V_ZUNPKD830)
+  NDS32_BUILTIN(zunpkd831, "zunpkd831", ZUNPKD831)
+  NDS32_BUILTIN(zunpkd831, "v_zunpkd831", V_ZUNPKD831)
+  NDS32_BUILTIN(unspec_kabs, "kabs", KABS)
+  NDS32_BUILTIN(unaligned_loadv2hi, "get_unaligned_u16x2", UALOAD_U16)
+  NDS32_BUILTIN(unaligned_loadv2hi, "get_unaligned_s16x2", UALOAD_S16)
+  NDS32_BUILTIN(unaligned_loadv4qi, "get_unaligned_u8x4", UALOAD_U8)
+  NDS32_BUILTIN(unaligned_loadv4qi, "get_unaligned_s8x4", UALOAD_S8)
 };
 
 /* Intrinsics that take just one argument. and the argument is immediate.  */
@@ -593,6 +619,28 @@ static struct builtin_description bdesc_2arg[] =
   NDS32_BUILTIN(unspec_ffb, "ffb", FFB)
   NDS32_BUILTIN(unspec_ffmism, "ffmsim", FFMISM)
   NDS32_BUILTIN(unspec_flmism, "flmism", FLMISM)
+  NDS32_BUILTIN(unspec_kaddw, "kaddw", KADDW)
+  NDS32_BUILTIN(unspec_kaddh, "kaddh", KADDH)
+  NDS32_BUILTIN(unspec_ksubw, "ksubw", KSUBW)
+  NDS32_BUILTIN(unspec_ksubh, "ksubh", KSUBH)
+  NDS32_BUILTIN(unspec_kdmbb, "kdmbb", KDMBB)
+  NDS32_BUILTIN(unspec_kdmbb, "v_kdmbb", V_KDMBB)
+  NDS32_BUILTIN(unspec_kdmbt, "kdmbt", KDMBT)
+  NDS32_BUILTIN(unspec_kdmbt, "v_kdmbt", V_KDMBT)
+  NDS32_BUILTIN(unspec_kdmtb, "kdmtb", KDMTB)
+  NDS32_BUILTIN(unspec_kdmtb, "v_kdmtb", V_KDMTB)
+  NDS32_BUILTIN(unspec_kdmtt, "kdmtt", KDMTT)
+  NDS32_BUILTIN(unspec_kdmtt, "v_kdmtt", V_KDMTT)
+  NDS32_BUILTIN(unspec_khmbb, "khmbb", KHMBB)
+  NDS32_BUILTIN(unspec_khmbb, "v_khmbb", V_KHMBB)
+  NDS32_BUILTIN(unspec_khmbt, "khmbt", KHMBT)
+  NDS32_BUILTIN(unspec_khmbt, "v_khmbt", V_KHMBT)
+  NDS32_BUILTIN(unspec_khmtb, "khmtb", KHMTB)
+  NDS32_BUILTIN(unspec_khmtb, "v_khmtb", V_KHMTB)
+  NDS32_BUILTIN(unspec_khmtt, "khmtt", KHMTT)
+  NDS32_BUILTIN(unspec_khmtt, "v_khmtt", V_KHMTT)
+  NDS32_BUILTIN(unspec_kslraw, "kslraw", KSLRAW)
+  NDS32_BUILTIN(unspec_kslrawu, "kslraw_u", KSLRAW_U)
   NDS32_BUILTIN(rotrsi3, "rotr", ROTR)
   NDS32_BUILTIN(unspec_sva, "sva", SVA)
   NDS32_BUILTIN(unspec_svs, "svs", SVS)
@@ -603,7 +651,202 @@ static struct builtin_description bdesc_2arg[] =
   NDS32_NO_TARGET_BUILTIN(unaligned_store_hw, "unaligned_store_hw", UASTORE_HW)
   NDS32_NO_TARGET_BUILTIN(unaligned_storesi, "unaligned_store_hw", UASTORE_W)
   NDS32_NO_TARGET_BUILTIN(unaligned_storedi, "unaligned_store_hw", UASTORE_DW)
-
+  NDS32_BUILTIN(addv2hi3, "add16", ADD16)
+  NDS32_BUILTIN(addv2hi3, "v_uadd16", V_UADD16)
+  NDS32_BUILTIN(addv2hi3, "v_sadd16", V_SADD16)
+  NDS32_BUILTIN(raddv2hi3, "radd16", RADD16)
+  NDS32_BUILTIN(raddv2hi3, "v_radd16", V_RADD16)
+  NDS32_BUILTIN(uraddv2hi3, "uradd16", URADD16)
+  NDS32_BUILTIN(uraddv2hi3, "v_uradd16", V_URADD16)
+  NDS32_BUILTIN(kaddv2hi3, "kadd16", KADD16)
+  NDS32_BUILTIN(kaddv2hi3, "v_kadd16", V_KADD16)
+  NDS32_BUILTIN(ukaddv2hi3, "ukadd16", UKADD16)
+  NDS32_BUILTIN(ukaddv2hi3, "v_ukadd16", V_UKADD16)
+  NDS32_BUILTIN(subv2hi3, "sub16", SUB16)
+  NDS32_BUILTIN(subv2hi3, "v_usub16", V_USUB16)
+  NDS32_BUILTIN(subv2hi3, "v_ssub16", V_SSUB16)
+  NDS32_BUILTIN(rsubv2hi3, "rsub16", RSUB16)
+  NDS32_BUILTIN(rsubv2hi3, "v_rsub16", V_RSUB16)
+  NDS32_BUILTIN(ursubv2hi3, "ursub16", URSUB16)
+  NDS32_BUILTIN(ursubv2hi3, "v_ursub16", V_URSUB16)
+  NDS32_BUILTIN(ksubv2hi3, "ksub16", KSUB16)
+  NDS32_BUILTIN(ksubv2hi3, "v_ksub16", V_KSUB16)
+  NDS32_BUILTIN(uksubv2hi3, "uksub16", UKSUB16)
+  NDS32_BUILTIN(uksubv2hi3, "v_uksub16", V_UKSUB16)
+  NDS32_BUILTIN(cras16_1, "cras16", CRAS16)
+  NDS32_BUILTIN(cras16_1, "v_ucras16", V_UCRAS16)
+  NDS32_BUILTIN(cras16_1, "v_scras16", V_SCRAS16)
+  NDS32_BUILTIN(rcras16_1, "rcras16", RCRAS16)
+  NDS32_BUILTIN(rcras16_1, "v_rcras16", V_RCRAS16)
+  NDS32_BUILTIN(urcras16_1, "urcras16", URCRAS16)
+  NDS32_BUILTIN(urcras16_1, "v_urcras16", V_URCRAS16)
+  NDS32_BUILTIN(kcras16_1, "kcras16", KCRAS16)
+  NDS32_BUILTIN(kcras16_1, "v_kcras16", V_KCRAS16)
+  NDS32_BUILTIN(ukcras16_1, "ukcras16", UKCRAS16)
+  NDS32_BUILTIN(ukcras16_1, "v_ukcras16", V_UKCRAS16)
+  NDS32_BUILTIN(crsa16_1, "crsa16", CRSA16)
+  NDS32_BUILTIN(crsa16_1, "v_ucrsa16", V_UCRSA16)
+  NDS32_BUILTIN(crsa16_1, "v_scrsa16", V_SCRSA16)
+  NDS32_BUILTIN(rcrsa16_1, "rcrsa16", RCRSA16)
+  NDS32_BUILTIN(rcrsa16_1, "v_rcrsa16", V_RCRSA16)
+  NDS32_BUILTIN(urcrsa16_1, "urcrsa16", URCRSA16)
+  NDS32_BUILTIN(urcrsa16_1, "v_urcrsa16", V_URCRSA16)
+  NDS32_BUILTIN(kcrsa16_1, "kcrsa16", KCRSA16)
+  NDS32_BUILTIN(kcrsa16_1, "v_kcrsa16", V_KCRSA16)
+  NDS32_BUILTIN(ukcrsa16_1, "ukcrsa16", UKCRSA16)
+  NDS32_BUILTIN(ukcrsa16_1, "v_ukcrsa16", V_UKCRSA16)
+  NDS32_BUILTIN(addv4qi3, "add8", ADD8)
+  NDS32_BUILTIN(addv4qi3, "v_uadd8", V_UADD8)
+  NDS32_BUILTIN(addv4qi3, "v_sadd8", V_SADD8)
+  NDS32_BUILTIN(raddv4qi3, "radd8", RADD8)
+  NDS32_BUILTIN(raddv4qi3, "v_radd8", V_RADD8)
+  NDS32_BUILTIN(uraddv4qi3, "uradd8", URADD8)
+  NDS32_BUILTIN(uraddv4qi3, "v_uradd8", V_URADD8)
+  NDS32_BUILTIN(kaddv4qi3, "kadd8", KADD8)
+  NDS32_BUILTIN(kaddv4qi3, "v_kadd8", V_KADD8)
+  NDS32_BUILTIN(ukaddv4qi3, "ukadd8", UKADD8)
+  NDS32_BUILTIN(ukaddv4qi3, "v_ukadd8", V_UKADD8)
+  NDS32_BUILTIN(subv4qi3, "sub8", SUB8)
+  NDS32_BUILTIN(subv4qi3, "v_usub8", V_USUB8)
+  NDS32_BUILTIN(subv4qi3, "v_ssub8", V_SSUB8)
+  NDS32_BUILTIN(rsubv4qi3, "rsub8", RSUB8)
+  NDS32_BUILTIN(rsubv4qi3, "v_rsub8", V_RSUB8)
+  NDS32_BUILTIN(ursubv4qi3, "ursub8", URSUB8)
+  NDS32_BUILTIN(ursubv4qi3, "v_ursub8", V_URSUB8)
+  NDS32_BUILTIN(ksubv4qi3, "ksub8", KSUB8)
+  NDS32_BUILTIN(ksubv4qi3, "v_ksub8", V_KSUB8)
+  NDS32_BUILTIN(uksubv4qi3, "uksub8", UKSUB8)
+  NDS32_BUILTIN(uksubv4qi3, "v_uksub8", V_UKSUB8)
+  NDS32_BUILTIN(ashrv2hi3, "sra16", SRA16)
+  NDS32_BUILTIN(ashrv2hi3, "v_sra16", V_SRA16)
+  NDS32_BUILTIN(sra16_round, "sra16_u", SRA16_U)
+  NDS32_BUILTIN(sra16_round, "v_sra16_u", V_SRA16_U)
+  NDS32_BUILTIN(lshrv2hi3, "srl16", SRL16)
+  NDS32_BUILTIN(lshrv2hi3, "v_srl16", V_SRL16)
+  NDS32_BUILTIN(srl16_round, "srl16_u", SRL16_U)
+  NDS32_BUILTIN(srl16_round, "v_srl16_u", V_SRL16_U)
+  NDS32_BUILTIN(ashlv2hi3, "sll16", SLL16)
+  NDS32_BUILTIN(ashlv2hi3, "v_sll16", V_SLL16)
+  NDS32_BUILTIN(kslli16, "ksll16", KSLL16)
+  NDS32_BUILTIN(kslli16, "v_ksll16", V_KSLL16)
+  NDS32_BUILTIN(kslra16, "kslra16", KSLRA16)
+  NDS32_BUILTIN(kslra16, "v_kslra16", V_KSLRA16)
+  NDS32_BUILTIN(kslra16_round, "kslra16_u", KSLRA16_U)
+  NDS32_BUILTIN(kslra16_round, "v_kslra16_u", V_KSLRA16_U)
+  NDS32_BUILTIN(cmpeq16, "cmpeq16", CMPEQ16)
+  NDS32_BUILTIN(cmpeq16, "v_scmpeq16", V_SCMPEQ16)
+  NDS32_BUILTIN(cmpeq16, "v_ucmpeq16", V_UCMPEQ16)
+  NDS32_BUILTIN(scmplt16, "scmplt16", SCMPLT16)
+  NDS32_BUILTIN(scmplt16, "v_scmplt16", V_SCMPLT16)
+  NDS32_BUILTIN(scmple16, "scmple16", SCMPLE16)
+  NDS32_BUILTIN(scmple16, "v_scmple16", V_SCMPLE16)
+  NDS32_BUILTIN(ucmplt16, "ucmplt16", UCMPLT16)
+  NDS32_BUILTIN(ucmplt16, "v_ucmplt16", V_UCMPLT16)
+  NDS32_BUILTIN(ucmplt16, "ucmple16", UCMPLE16)
+  NDS32_BUILTIN(ucmplt16, "v_ucmple16", V_UCMPLE16)
+  NDS32_BUILTIN(cmpeq8, "cmpeq8", CMPEQ8)
+  NDS32_BUILTIN(cmpeq8, "v_scmpeq8", V_SCMPEQ8)
+  NDS32_BUILTIN(cmpeq8, "v_ucmpeq8", V_UCMPEQ8)
+  NDS32_BUILTIN(scmplt8, "scmplt8", SCMPLT8)
+  NDS32_BUILTIN(scmplt8, "v_scmplt8", V_SCMPLT8)
+  NDS32_BUILTIN(scmple8, "scmple8", SCMPLE8)
+  NDS32_BUILTIN(scmple8, "v_scmple8", V_SCMPLE8)
+  NDS32_BUILTIN(ucmplt8, "ucmplt8", UCMPLT8)
+  NDS32_BUILTIN(ucmplt8, "v_ucmplt8", V_UCMPLT8)
+  NDS32_BUILTIN(ucmplt8, "ucmple8", UCMPLE8)
+  NDS32_BUILTIN(ucmplt8, "v_ucmple8", V_UCMPLE8)
+  NDS32_BUILTIN(sminv2hi3, "smin16", SMIN16)
+  NDS32_BUILTIN(sminv2hi3, "v_smin16", V_SMIN16)
+  NDS32_BUILTIN(uminv2hi3, "umin16", UMIN16)
+  NDS32_BUILTIN(uminv2hi3, "v_umin16", V_UMIN16)
+  NDS32_BUILTIN(smaxv2hi3, "smax16", SMAX16)
+  NDS32_BUILTIN(smaxv2hi3, "v_smax16", V_SMAX16)
+  NDS32_BUILTIN(umaxv2hi3, "umax16", UMAX16)
+  NDS32_BUILTIN(umaxv2hi3, "v_umax16", V_UMAX16)
+  NDS32_BUILTIN(khm16, "khm16", KHM16)
+  NDS32_BUILTIN(khm16, "v_khm16", V_KHM16)
+  NDS32_BUILTIN(khmx16, "khmx16", KHMX16)
+  NDS32_BUILTIN(khmx16, "v_khmx16", V_KHMX16)
+  NDS32_BUILTIN(sminv4qi3, "smin8", SMIN8)
+  NDS32_BUILTIN(sminv4qi3, "v_smin8", V_SMIN8)
+  NDS32_BUILTIN(uminv4qi3, "umin8", UMIN8)
+  NDS32_BUILTIN(uminv4qi3, "v_umin8", V_UMIN8)
+  NDS32_BUILTIN(smaxv4qi3, "smax8", SMAX8)
+  NDS32_BUILTIN(smaxv4qi3, "v_smax8", V_SMAX8)
+  NDS32_BUILTIN(umaxv4qi3, "umax8", UMAX8)
+  NDS32_BUILTIN(umaxv4qi3, "v_umax8", V_UMAX8)
+  NDS32_BUILTIN(raddsi3, "raddw", RADDW)
+  NDS32_BUILTIN(uraddsi3, "uraddw", URADDW)
+  NDS32_BUILTIN(rsubsi3, "rsubw", RSUBW)
+  NDS32_BUILTIN(ursubsi3, "ursubw", URSUBW)
+  NDS32_BUILTIN(sraiu, "sra_u", SRA_U)
+  NDS32_BUILTIN(kssl, "ksll", KSLL)
+  NDS32_BUILTIN(pkbb, "pkbb16", PKBB16)
+  NDS32_BUILTIN(pkbb, "v_pkbb16", V_PKBB16)
+  NDS32_BUILTIN(pkbt, "pkbt16", PKBT16)
+  NDS32_BUILTIN(pkbt, "v_pkbt16", V_PKBT16)
+  NDS32_BUILTIN(pktb, "pktb16", PKTB16)
+  NDS32_BUILTIN(pktb, "v_pktb16", V_PKTB16)
+  NDS32_BUILTIN(pktt, "pktt16", PKTT16)
+  NDS32_BUILTIN(pktt, "v_pktt16", V_PKTT16)
+  NDS32_BUILTIN(smulsi3_highpart, "smmul", SMMUL)
+  NDS32_BUILTIN(smmul_round, "smmul_u", SMMUL_U)
+  NDS32_BUILTIN(smmwb, "smmwb", SMMWB)
+  NDS32_BUILTIN(smmwb, "v_smmwb", V_SMMWB)
+  NDS32_BUILTIN(smmwb_round, "smmwb_u", SMMWB_U)
+  NDS32_BUILTIN(smmwb_round, "v_smmwb_u", V_SMMWB_U)
+  NDS32_BUILTIN(smmwt, "smmwt", SMMWT)
+  NDS32_BUILTIN(smmwt, "v_smmwt", V_SMMWT)
+  NDS32_BUILTIN(smmwt_round, "smmwt_u", SMMWT_U)
+  NDS32_BUILTIN(smmwt_round, "v_smmwt_u", V_SMMWT_U)
+  NDS32_BUILTIN(smbb, "smbb", SMBB)
+  NDS32_BUILTIN(smbb, "v_smbb", V_SMBB)
+  NDS32_BUILTIN(smbt, "smbt", SMBT)
+  NDS32_BUILTIN(smbt, "v_smbt", V_SMBT)
+  NDS32_BUILTIN(smtt, "smtt", SMTT)
+  NDS32_BUILTIN(smtt, "v_smtt", V_SMTT)
+  NDS32_BUILTIN(kmda, "kmda", KMDA)
+  NDS32_BUILTIN(kmda, "v_kmda", V_KMDA)
+  NDS32_BUILTIN(kmxda, "kmxda", KMXDA)
+  NDS32_BUILTIN(kmxda, "v_kmxda", V_KMXDA)
+  NDS32_BUILTIN(smds, "smds", SMDS)
+  NDS32_BUILTIN(smds, "v_smds", V_SMDS)
+  NDS32_BUILTIN(smdrs, "smdrs", SMDRS)
+  NDS32_BUILTIN(smdrs, "v_smdrs", V_SMDRS)
+  NDS32_BUILTIN(smxdsv, "smxds", SMXDS)
+  NDS32_BUILTIN(smxdsv, "v_smxds", V_SMXDS)
+  NDS32_BUILTIN(smal1, "smal", SMAL)
+  NDS32_BUILTIN(smal1, "v_smal", V_SMAL)
+  NDS32_BUILTIN(bitrev, "bitrev", BITREV)
+  NDS32_BUILTIN(wext, "wext", WEXT)
+  NDS32_BUILTIN(adddi3, "sadd64", SADD64)
+  NDS32_BUILTIN(adddi3, "uadd64", UADD64)
+  NDS32_BUILTIN(radddi3, "radd64", RADD64)
+  NDS32_BUILTIN(uradddi3, "uradd64", URADD64)
+  NDS32_BUILTIN(kadddi3, "kadd64", KADD64)
+  NDS32_BUILTIN(ukadddi3, "ukadd64", UKADD64)
+  NDS32_BUILTIN(subdi3, "ssub64", SSUB64)
+  NDS32_BUILTIN(subdi3, "usub64", USUB64)
+  NDS32_BUILTIN(rsubdi3, "rsub64", RSUB64)
+  NDS32_BUILTIN(ursubdi3, "ursub64", URSUB64)
+  NDS32_BUILTIN(ksubdi3, "ksub64", KSUB64)
+  NDS32_BUILTIN(uksubdi3, "uksub64", UKSUB64)
+  NDS32_BUILTIN(smul16, "smul16", SMUL16)
+  NDS32_BUILTIN(smul16, "v_smul16", V_SMUL16)
+  NDS32_BUILTIN(smulx16, "smulx16", SMULX16)
+  NDS32_BUILTIN(smulx16, "v_smulx16", V_SMULX16)
+  NDS32_BUILTIN(umul16, "umul16", UMUL16)
+  NDS32_BUILTIN(umul16, "v_umul16", V_UMUL16)
+  NDS32_BUILTIN(umulx16, "umulx16", UMULX16)
+  NDS32_BUILTIN(umulx16, "v_umulx16", V_UMULX16)
+  NDS32_BUILTIN(kwmmul, "kwmmul", KWMMUL)
+  NDS32_BUILTIN(kwmmul_round, "kwmmul_u", KWMMUL_U)
+  NDS32_NO_TARGET_BUILTIN(unaligned_storev2hi,
+                         "put_unaligned_u16x2", UASTORE_U16)
+  NDS32_NO_TARGET_BUILTIN(unaligned_storev2hi,
+                         "put_unaligned_s16x2", UASTORE_S16)
+  NDS32_NO_TARGET_BUILTIN(unaligned_storev4qi, "put_unaligned_u8x4", UASTORE_U8)
+  NDS32_NO_TARGET_BUILTIN(unaligned_storev4qi, "put_unaligned_s8x4", UASTORE_S8)
 };
 
 /* Two-argument intrinsics with an immediate second argument.  */
@@ -617,6 +860,22 @@ static struct builtin_description bdesc_2argimm[] =
   NDS32_BUILTIN(unspec_clips, "clips", CLIPS)
   NDS32_NO_TARGET_BUILTIN(unspec_teqz, "teqz", TEQZ)
   NDS32_NO_TARGET_BUILTIN(unspec_tnez, "tnez", TNEZ)
+  NDS32_BUILTIN(ashrv2hi3, "srl16", SRL16)
+  NDS32_BUILTIN(ashrv2hi3, "v_srl16", V_SRL16)
+  NDS32_BUILTIN(srl16_round, "srl16_u", SRL16_U)
+  NDS32_BUILTIN(srl16_round, "v_srl16_u", V_SRL16_U)
+  NDS32_BUILTIN(kslli16, "ksll16", KSLL16)
+  NDS32_BUILTIN(kslli16, "v_ksll16", V_KSLL16)
+  NDS32_BUILTIN(sclip16, "sclip16", SCLIP16)
+  NDS32_BUILTIN(sclip16, "v_sclip16", V_SCLIP16)
+  NDS32_BUILTIN(uclip16, "uclip16", UCLIP16)
+  NDS32_BUILTIN(uclip16, "v_uclip16", V_UCLIP16)
+  NDS32_BUILTIN(sraiu, "sra_u", SRA_U)
+  NDS32_BUILTIN(kssl, "ksll", KSLL)
+  NDS32_BUILTIN(bitrev, "bitrev", BITREV)
+  NDS32_BUILTIN(wext, "wext", WEXT)
+  NDS32_BUILTIN(uclip32, "uclip32", UCLIP32)
+  NDS32_BUILTIN(sclip32, "sclip32", SCLIP32)
 };
 
 /* Intrinsics that take three arguments.  */
@@ -625,6 +884,67 @@ static struct builtin_description bdesc_3arg[] =
   NDS32_BUILTIN(unspec_pbsada, "pbsada", PBSADA)
   NDS32_NO_TARGET_BUILTIN(bse, "bse", BSE)
   NDS32_NO_TARGET_BUILTIN(bsp, "bsp", BSP)
+  NDS32_BUILTIN(kmabb, "kmabb", KMABB)
+  NDS32_BUILTIN(kmabb, "v_kmabb", V_KMABB)
+  NDS32_BUILTIN(kmabt, "kmabt", KMABT)
+  NDS32_BUILTIN(kmabt, "v_kmabt", V_KMABT)
+  NDS32_BUILTIN(kmatt, "kmatt", KMATT)
+  NDS32_BUILTIN(kmatt, "v_kmatt", V_KMATT)
+  NDS32_BUILTIN(kmada, "kmada", KMADA)
+  NDS32_BUILTIN(kmada, "v_kmada", V_KMADA)
+  NDS32_BUILTIN(kmaxda, "kmaxda", KMAXDA)
+  NDS32_BUILTIN(kmaxda, "v_kmaxda", V_KMAXDA)
+  NDS32_BUILTIN(kmads, "kmads", KMADS)
+  NDS32_BUILTIN(kmads, "v_kmads", V_KMADS)
+  NDS32_BUILTIN(kmadrs, "kmadrs", KMADRS)
+  NDS32_BUILTIN(kmadrs, "v_kmadrs", V_KMADRS)
+  NDS32_BUILTIN(kmaxds, "kmaxds", KMAXDS)
+  NDS32_BUILTIN(kmaxds, "v_kmaxds", V_KMAXDS)
+  NDS32_BUILTIN(kmsda, "kmsda", KMSDA)
+  NDS32_BUILTIN(kmsda, "v_kmsda", V_KMSDA)
+  NDS32_BUILTIN(kmsxda, "kmsxda", KMSXDA)
+  NDS32_BUILTIN(kmsxda, "v_kmsxda", V_KMSXDA)
+  NDS32_BUILTIN(bpick1, "bpick", BPICK)
+  NDS32_BUILTIN(smar64_1, "smar64", SMAR64)
+  NDS32_BUILTIN(smsr64, "smsr64", SMSR64)
+  NDS32_BUILTIN(umar64_1, "umar64", UMAR64)
+  NDS32_BUILTIN(umsr64, "umsr64", UMSR64)
+  NDS32_BUILTIN(kmar64_1, "kmar64", KMAR64)
+  NDS32_BUILTIN(kmsr64, "kmsr64", KMSR64)
+  NDS32_BUILTIN(ukmar64_1, "ukmar64", UKMAR64)
+  NDS32_BUILTIN(ukmsr64, "ukmsr64", UKMSR64)
+  NDS32_BUILTIN(smalbb, "smalbb", SMALBB)
+  NDS32_BUILTIN(smalbb, "v_smalbb", V_SMALBB)
+  NDS32_BUILTIN(smalbt, "smalbt", SMALBT)
+  NDS32_BUILTIN(smalbt, "v_smalbt", V_SMALBT)
+  NDS32_BUILTIN(smaltt, "smaltt", SMALTT)
+  NDS32_BUILTIN(smaltt, "v_smaltt", V_SMALTT)
+  NDS32_BUILTIN(smalda1, "smalda", SMALDA)
+  NDS32_BUILTIN(smalda1, "v_smalda", V_SMALDA)
+  NDS32_BUILTIN(smalxda1, "smalxda", SMALXDA)
+  NDS32_BUILTIN(smalxda1, "v_smalxda", V_SMALXDA)
+  NDS32_BUILTIN(smalds1, "smalds", SMALDS)
+  NDS32_BUILTIN(smalds1, "v_smalds", V_SMALDS)
+  NDS32_BUILTIN(smaldrs3, "smaldrs", SMALDRS)
+  NDS32_BUILTIN(smaldrs3, "v_smaldrs", V_SMALDRS)
+  NDS32_BUILTIN(smalxds1, "smalxds", SMALXDS)
+  NDS32_BUILTIN(smalxds1, "v_smalxds", V_SMALXDS)
+  NDS32_BUILTIN(smslda1, "smslda", SMSLDA)
+  NDS32_BUILTIN(smslda1, "v_smslda", V_SMSLDA)
+  NDS32_BUILTIN(smslxda1, "smslxda", SMSLXDA)
+  NDS32_BUILTIN(smslxda1, "v_smslxda", V_SMSLXDA)
+  NDS32_BUILTIN(kmmawb, "kmmawb", KMMAWB)
+  NDS32_BUILTIN(kmmawb, "v_kmmawb", V_KMMAWB)
+  NDS32_BUILTIN(kmmawb_round, "kmmawb_u", KMMAWB_U)
+  NDS32_BUILTIN(kmmawb_round, "v_kmmawb_u", V_KMMAWB_U)
+  NDS32_BUILTIN(kmmawt, "kmmawt", KMMAWT)
+  NDS32_BUILTIN(kmmawt, "v_kmmawt", V_KMMAWT)
+  NDS32_BUILTIN(kmmawt_round, "kmmawt_u", KMMAWT_U)
+  NDS32_BUILTIN(kmmawt_round, "v_kmmawt_u", V_KMMAWT_U)
+  NDS32_BUILTIN(kmmac, "kmmac", KMMAC)
+  NDS32_BUILTIN(kmmac_round, "kmmac_u", KMMAC_U)
+  NDS32_BUILTIN(kmmsb, "kmmsb", KMMSB)
+  NDS32_BUILTIN(kmmsb_round, "kmmsb_u", KMMSB_U)
 };
 
 /* Three-argument intrinsics with an immediate third argument.  */
@@ -634,6 +954,7 @@ static struct builtin_description bdesc_3argimm[] =
   NDS32_NO_TARGET_BUILTIN(prefetch_hw, "prefetch_hw", DPREF_HW)
   NDS32_NO_TARGET_BUILTIN(prefetch_w, "prefetch_w", DPREF_W)
   NDS32_NO_TARGET_BUILTIN(prefetch_dw, "prefetch_dw", DPREF_DW)
+  NDS32_BUILTIN(insb, "insb", INSB)
 };
 
 /* Intrinsics that load a value.  */
@@ -676,6 +997,11 @@ nds32_expand_builtin_impl (tree exp,
   unsigned i;
   struct builtin_description *d;
 
+  if (!NDS32_EXT_DSP_P ()
+      && fcode > NDS32_BUILTIN_DSP_BEGIN
+      && fcode < NDS32_BUILTIN_DSP_END)
+    error ("don't support DSP extension instructions");
+
   switch (fcode)
     {
     /* FPU Register Transfer.  */
@@ -812,6 +1138,9 @@ nds32_expand_builtin_impl (tree exp,
     case NDS32_BUILTIN_CCTL_L1D_WBALL_ONE_LVL:
       emit_insn (gen_cctl_l1d_wball_one_lvl());
       return target;
+    case NDS32_BUILTIN_CLROV:
+      emit_insn (gen_unspec_volatile_clrov ());
+      return target;
     case NDS32_BUILTIN_STANDBY_NO_WAKE_GRANT:
       emit_insn (gen_unspec_standby_no_wake_grant ());
       return target;
@@ -947,10 +1276,18 @@ nds32_init_builtins_impl (void)
                        NDS32_BUILTIN_ ## CODE, BUILT_IN_MD, NULL, NULL_TREE)
 
   /* Looking for return type and argument can be found in tree.h file.  */
+  tree ptr_char_type_node = build_pointer_type (char_type_node);
   tree ptr_uchar_type_node = build_pointer_type (unsigned_char_type_node);
   tree ptr_ushort_type_node = build_pointer_type (short_unsigned_type_node);
+  tree ptr_short_type_node = build_pointer_type (short_integer_type_node);
   tree ptr_uint_type_node = build_pointer_type (unsigned_type_node);
   tree ptr_ulong_type_node = build_pointer_type (long_long_unsigned_type_node);
+  tree v4qi_type_node = build_vector_type (intQI_type_node, 4);
+  tree u_v4qi_type_node = build_vector_type (unsigned_intQI_type_node, 4);
+  tree v2hi_type_node = build_vector_type (intHI_type_node, 2);
+  tree u_v2hi_type_node = build_vector_type (unsigned_intHI_type_node, 2);
+  tree v2si_type_node = build_vector_type (intSI_type_node, 2);
+  tree u_v2si_type_node = build_vector_type (unsigned_intSI_type_node, 2);
 
   /* Cache.  */
   ADD_NDS32_BUILTIN1 ("isync", void, ptr_uint, ISYNC);
@@ -1050,6 +1387,31 @@ nds32_init_builtins_impl (void)
   ADD_NDS32_BUILTIN2 ("se_ffmism", integer, unsigned, unsigned, FFMISM);
   ADD_NDS32_BUILTIN2 ("se_flmism", integer, unsigned, unsigned, FLMISM);
 
+  /* SATURATION  */
+  ADD_NDS32_BUILTIN2 ("kaddw", integer, integer, integer, KADDW);
+  ADD_NDS32_BUILTIN2 ("ksubw", integer, integer, integer, KSUBW);
+  ADD_NDS32_BUILTIN2 ("kaddh", integer, integer, integer, KADDH);
+  ADD_NDS32_BUILTIN2 ("ksubh", integer, integer, integer, KSUBH);
+  ADD_NDS32_BUILTIN2 ("kdmbb", integer, unsigned, unsigned, KDMBB);
+  ADD_NDS32_BUILTIN2 ("v_kdmbb", integer, v2hi, v2hi, V_KDMBB);
+  ADD_NDS32_BUILTIN2 ("kdmbt", integer, unsigned, unsigned, KDMBT);
+  ADD_NDS32_BUILTIN2 ("v_kdmbt", integer, v2hi, v2hi, V_KDMBT);
+  ADD_NDS32_BUILTIN2 ("kdmtb", integer, unsigned, unsigned, KDMTB);
+  ADD_NDS32_BUILTIN2 ("v_kdmtb", integer, v2hi, v2hi, V_KDMTB);
+  ADD_NDS32_BUILTIN2 ("kdmtt", integer, unsigned, unsigned, KDMTT);
+  ADD_NDS32_BUILTIN2 ("v_kdmtt", integer, v2hi, v2hi, V_KDMTT);
+  ADD_NDS32_BUILTIN2 ("khmbb", integer, unsigned, unsigned, KHMBB);
+  ADD_NDS32_BUILTIN2 ("v_khmbb", integer, v2hi, v2hi, V_KHMBB);
+  ADD_NDS32_BUILTIN2 ("khmbt", integer, unsigned, unsigned, KHMBT);
+  ADD_NDS32_BUILTIN2 ("v_khmbt", integer, v2hi, v2hi, V_KHMBT);
+  ADD_NDS32_BUILTIN2 ("khmtb", integer, unsigned, unsigned, KHMTB);
+  ADD_NDS32_BUILTIN2 ("v_khmtb", integer, v2hi, v2hi, V_KHMTB);
+  ADD_NDS32_BUILTIN2 ("khmtt", integer, unsigned, unsigned, KHMTT);
+  ADD_NDS32_BUILTIN2 ("v_khmtt", integer, v2hi, v2hi, V_KHMTT);
+  ADD_NDS32_BUILTIN2 ("kslraw", integer, integer, integer, KSLRAW);
+  ADD_NDS32_BUILTIN2 ("kslraw_u", integer, integer, integer, KSLRAW_U);
+  ADD_NDS32_BUILTIN0 ("rdov", unsigned, RDOV);
+  ADD_NDS32_BUILTIN0 ("clrov", void, CLROV);
 
   /* ROTR  */
   ADD_NDS32_BUILTIN2 ("rotr", unsigned, unsigned, unsigned, ROTR);
@@ -1109,4 +1471,384 @@ nds32_init_builtins_impl (void)
   ADD_NDS32_BUILTIN0 ("enable_unaligned", void, ENABLE_UNALIGNED);
   ADD_NDS32_BUILTIN0 ("disable_unaligned", void, DISABLE_UNALIGNED);
 
+  /* DSP Extension: SIMD 16bit Add and Subtract.  */
+  ADD_NDS32_BUILTIN2 ("add16", unsigned, unsigned, unsigned, ADD16);
+  ADD_NDS32_BUILTIN2 ("v_uadd16", u_v2hi, u_v2hi, u_v2hi, V_UADD16);
+  ADD_NDS32_BUILTIN2 ("v_sadd16", v2hi, v2hi, v2hi, V_SADD16);
+  ADD_NDS32_BUILTIN2 ("radd16", unsigned, unsigned, unsigned, RADD16);
+  ADD_NDS32_BUILTIN2 ("v_radd16", v2hi, v2hi, v2hi, V_RADD16);
+  ADD_NDS32_BUILTIN2 ("uradd16", unsigned, unsigned, unsigned, URADD16);
+  ADD_NDS32_BUILTIN2 ("v_uradd16", u_v2hi, u_v2hi, u_v2hi, V_URADD16);
+  ADD_NDS32_BUILTIN2 ("kadd16", unsigned, unsigned, unsigned, KADD16);
+  ADD_NDS32_BUILTIN2 ("v_kadd16", v2hi, v2hi, v2hi, V_KADD16);
+  ADD_NDS32_BUILTIN2 ("ukadd16", unsigned, unsigned, unsigned, UKADD16);
+  ADD_NDS32_BUILTIN2 ("v_ukadd16", u_v2hi, u_v2hi, u_v2hi, V_UKADD16);
+  ADD_NDS32_BUILTIN2 ("sub16", unsigned, unsigned, unsigned, SUB16);
+  ADD_NDS32_BUILTIN2 ("v_usub16", u_v2hi, u_v2hi, u_v2hi, V_USUB16);
+  ADD_NDS32_BUILTIN2 ("v_ssub16", v2hi, v2hi, v2hi, V_SSUB16);
+  ADD_NDS32_BUILTIN2 ("rsub16", unsigned, unsigned, unsigned, RSUB16);
+  ADD_NDS32_BUILTIN2 ("v_rsub16", v2hi, v2hi, v2hi, V_RSUB16);
+  ADD_NDS32_BUILTIN2 ("ursub16", unsigned, unsigned, unsigned, URSUB16);
+  ADD_NDS32_BUILTIN2 ("v_ursub16", u_v2hi, u_v2hi, u_v2hi, V_URSUB16);
+  ADD_NDS32_BUILTIN2 ("ksub16", unsigned, unsigned, unsigned, KSUB16);
+  ADD_NDS32_BUILTIN2 ("v_ksub16", v2hi, v2hi, v2hi, V_KSUB16);
+  ADD_NDS32_BUILTIN2 ("uksub16", unsigned, unsigned, unsigned, UKSUB16);
+  ADD_NDS32_BUILTIN2 ("v_uksub16", u_v2hi, u_v2hi, u_v2hi, V_UKSUB16);
+  ADD_NDS32_BUILTIN2 ("cras16", unsigned, unsigned, unsigned, CRAS16);
+  ADD_NDS32_BUILTIN2 ("v_ucras16", u_v2hi, u_v2hi, u_v2hi, V_UCRAS16);
+  ADD_NDS32_BUILTIN2 ("v_scras16", v2hi, v2hi, v2hi, V_SCRAS16);
+  ADD_NDS32_BUILTIN2 ("rcras16", unsigned, unsigned, unsigned, RCRAS16);
+  ADD_NDS32_BUILTIN2 ("v_rcras16", v2hi, v2hi, v2hi, V_RCRAS16);
+  ADD_NDS32_BUILTIN2 ("urcras16", unsigned, unsigned, unsigned, URCRAS16);
+  ADD_NDS32_BUILTIN2 ("v_urcras16", u_v2hi, u_v2hi, u_v2hi, V_URCRAS16);
+  ADD_NDS32_BUILTIN2 ("kcras16", unsigned, unsigned, unsigned, KCRAS16);
+  ADD_NDS32_BUILTIN2 ("v_kcras16", v2hi, v2hi, v2hi, V_KCRAS16);
+  ADD_NDS32_BUILTIN2 ("ukcras16", unsigned, unsigned, unsigned, UKCRAS16);
+  ADD_NDS32_BUILTIN2 ("v_ukcras16", u_v2hi, u_v2hi, u_v2hi, V_UKCRAS16);
+  ADD_NDS32_BUILTIN2 ("crsa16", unsigned, unsigned, unsigned, CRSA16);
+  ADD_NDS32_BUILTIN2 ("v_ucrsa16", u_v2hi, u_v2hi, u_v2hi, V_UCRSA16);
+  ADD_NDS32_BUILTIN2 ("v_scrsa16", v2hi, v2hi, v2hi, V_SCRSA16);
+  ADD_NDS32_BUILTIN2 ("rcrsa16", unsigned, unsigned, unsigned, RCRSA16);
+  ADD_NDS32_BUILTIN2 ("v_rcrsa16", v2hi, v2hi, v2hi, V_RCRSA16);
+  ADD_NDS32_BUILTIN2 ("urcrsa16", unsigned, unsigned, unsigned, URCRSA16);
+  ADD_NDS32_BUILTIN2 ("v_urcrsa16", u_v2hi, u_v2hi, u_v2hi, V_URCRSA16);
+  ADD_NDS32_BUILTIN2 ("kcrsa16", unsigned, unsigned, unsigned, KCRSA16);
+  ADD_NDS32_BUILTIN2 ("v_kcrsa16", v2hi, v2hi, v2hi, V_KCRSA16);
+  ADD_NDS32_BUILTIN2 ("ukcrsa16", unsigned, unsigned, unsigned, UKCRSA16);
+  ADD_NDS32_BUILTIN2 ("v_ukcrsa16", u_v2hi, u_v2hi, u_v2hi, V_UKCRSA16);
+
+  /* DSP Extension: SIMD 8bit Add and Subtract.  */
+  ADD_NDS32_BUILTIN2 ("add8", integer, integer, integer, ADD8);
+  ADD_NDS32_BUILTIN2 ("v_uadd8", u_v4qi, u_v4qi, u_v4qi, V_UADD8);
+  ADD_NDS32_BUILTIN2 ("v_sadd8", v4qi, v4qi, v4qi, V_SADD8);
+  ADD_NDS32_BUILTIN2 ("radd8", unsigned, unsigned, unsigned, RADD8);
+  ADD_NDS32_BUILTIN2 ("v_radd8", v4qi, v4qi, v4qi, V_RADD8);
+  ADD_NDS32_BUILTIN2 ("uradd8", unsigned, unsigned, unsigned, URADD8);
+  ADD_NDS32_BUILTIN2 ("v_uradd8", u_v4qi, u_v4qi, u_v4qi, V_URADD8);
+  ADD_NDS32_BUILTIN2 ("kadd8", unsigned, unsigned, unsigned, KADD8);
+  ADD_NDS32_BUILTIN2 ("v_kadd8", v4qi, v4qi, v4qi, V_KADD8);
+  ADD_NDS32_BUILTIN2 ("ukadd8", unsigned, unsigned, unsigned, UKADD8);
+  ADD_NDS32_BUILTIN2 ("v_ukadd8", u_v4qi, u_v4qi, u_v4qi, V_UKADD8);
+  ADD_NDS32_BUILTIN2 ("sub8", integer, integer, integer, SUB8);
+  ADD_NDS32_BUILTIN2 ("v_usub8", u_v4qi, u_v4qi, u_v4qi, V_USUB8);
+  ADD_NDS32_BUILTIN2 ("v_ssub8", v4qi, v4qi, v4qi, V_SSUB8);
+  ADD_NDS32_BUILTIN2 ("rsub8", unsigned, unsigned, unsigned, RSUB8);
+  ADD_NDS32_BUILTIN2 ("v_rsub8", v4qi, v4qi, v4qi, V_RSUB8);
+  ADD_NDS32_BUILTIN2 ("ursub8", unsigned, unsigned, unsigned, URSUB8);
+  ADD_NDS32_BUILTIN2 ("v_ursub8", u_v4qi, u_v4qi, u_v4qi, V_URSUB8);
+  ADD_NDS32_BUILTIN2 ("ksub8", unsigned, unsigned, unsigned, KSUB8);
+  ADD_NDS32_BUILTIN2 ("v_ksub8", v4qi, v4qi, v4qi, V_KSUB8);
+  ADD_NDS32_BUILTIN2 ("uksub8", unsigned, unsigned, unsigned, UKSUB8);
+  ADD_NDS32_BUILTIN2 ("v_uksub8", u_v4qi, u_v4qi, u_v4qi, V_UKSUB8);
+
+  /* DSP Extension: SIMD 16bit Shift.  */
+  ADD_NDS32_BUILTIN2 ("sra16", unsigned, unsigned, unsigned, SRA16);
+  ADD_NDS32_BUILTIN2 ("v_sra16", v2hi, v2hi, unsigned, V_SRA16);
+  ADD_NDS32_BUILTIN2 ("sra16_u", unsigned, unsigned, unsigned, SRA16_U);
+  ADD_NDS32_BUILTIN2 ("v_sra16_u", v2hi, v2hi, unsigned, V_SRA16_U);
+  ADD_NDS32_BUILTIN2 ("srl16", unsigned, unsigned, unsigned, SRL16);
+  ADD_NDS32_BUILTIN2 ("v_srl16", u_v2hi, u_v2hi, unsigned, V_SRL16);
+  ADD_NDS32_BUILTIN2 ("srl16_u", unsigned, unsigned, unsigned, SRL16_U);
+  ADD_NDS32_BUILTIN2 ("v_srl16_u", u_v2hi, u_v2hi, unsigned, V_SRL16_U);
+  ADD_NDS32_BUILTIN2 ("sll16", unsigned, unsigned, unsigned, SLL16);
+  ADD_NDS32_BUILTIN2 ("v_sll16", u_v2hi, u_v2hi, unsigned, V_SLL16);
+  ADD_NDS32_BUILTIN2 ("ksll16", unsigned, unsigned, unsigned, KSLL16);
+  ADD_NDS32_BUILTIN2 ("v_ksll16", v2hi, v2hi, unsigned, V_KSLL16);
+  ADD_NDS32_BUILTIN2 ("kslra16", unsigned, unsigned, unsigned, KSLRA16);
+  ADD_NDS32_BUILTIN2 ("v_kslra16", v2hi, v2hi, unsigned, V_KSLRA16);
+  ADD_NDS32_BUILTIN2 ("kslra16_u", unsigned, unsigned, unsigned, KSLRA16_U);
+  ADD_NDS32_BUILTIN2 ("v_kslra16_u", v2hi, v2hi, unsigned, V_KSLRA16_U);
+
+  /* DSP Extension: 16bit Compare.  */
+  ADD_NDS32_BUILTIN2 ("cmpeq16", unsigned, unsigned, unsigned, CMPEQ16);
+  ADD_NDS32_BUILTIN2 ("v_scmpeq16", u_v2hi, v2hi, v2hi, V_SCMPEQ16);
+  ADD_NDS32_BUILTIN2 ("v_ucmpeq16", u_v2hi, u_v2hi, u_v2hi, V_UCMPEQ16);
+  ADD_NDS32_BUILTIN2 ("scmplt16", unsigned, unsigned, unsigned, SCMPLT16);
+  ADD_NDS32_BUILTIN2 ("v_scmplt16", u_v2hi, v2hi, v2hi, V_SCMPLT16);
+  ADD_NDS32_BUILTIN2 ("scmple16", unsigned, unsigned, unsigned, SCMPLE16);
+  ADD_NDS32_BUILTIN2 ("v_scmple16", u_v2hi, v2hi, v2hi, V_SCMPLE16);
+  ADD_NDS32_BUILTIN2 ("ucmplt16", unsigned, unsigned, unsigned, UCMPLT16);
+  ADD_NDS32_BUILTIN2 ("v_ucmplt16", u_v2hi, u_v2hi, u_v2hi, V_UCMPLT16);
+  ADD_NDS32_BUILTIN2 ("ucmple16", unsigned, unsigned, unsigned, UCMPLE16);
+  ADD_NDS32_BUILTIN2 ("v_ucmple16", u_v2hi, u_v2hi, u_v2hi, V_UCMPLE16);
+
+  /* DSP Extension: 8bit Compare.  */
+  ADD_NDS32_BUILTIN2 ("cmpeq8", unsigned, unsigned, unsigned, CMPEQ8);
+  ADD_NDS32_BUILTIN2 ("v_scmpeq8", u_v4qi, v4qi, v4qi, V_SCMPEQ8);
+  ADD_NDS32_BUILTIN2 ("v_ucmpeq8", u_v4qi, u_v4qi, u_v4qi, V_UCMPEQ8);
+  ADD_NDS32_BUILTIN2 ("scmplt8", unsigned, unsigned, unsigned, SCMPLT8);
+  ADD_NDS32_BUILTIN2 ("v_scmplt8", u_v4qi, v4qi, v4qi, V_SCMPLT8);
+  ADD_NDS32_BUILTIN2 ("scmple8", unsigned, unsigned, unsigned, SCMPLE8);
+  ADD_NDS32_BUILTIN2 ("v_scmple8", u_v4qi, v4qi, v4qi, V_SCMPLE8);
+  ADD_NDS32_BUILTIN2 ("ucmplt8", unsigned, unsigned, unsigned, UCMPLT8);
+  ADD_NDS32_BUILTIN2 ("v_ucmplt8", u_v4qi, u_v4qi, u_v4qi, V_UCMPLT8);
+  ADD_NDS32_BUILTIN2 ("ucmple8", unsigned, unsigned, unsigned, UCMPLE8);
+  ADD_NDS32_BUILTIN2 ("v_ucmple8", u_v4qi, u_v4qi, u_v4qi, V_UCMPLE8);
+
+  /* DSP Extension: SIMD 16bit MISC.  */
+  ADD_NDS32_BUILTIN2 ("smin16", unsigned, unsigned, unsigned, SMIN16);
+  ADD_NDS32_BUILTIN2 ("v_smin16", v2hi, v2hi, v2hi, V_SMIN16);
+  ADD_NDS32_BUILTIN2 ("umin16", unsigned, unsigned, unsigned, UMIN16);
+  ADD_NDS32_BUILTIN2 ("v_umin16", u_v2hi, u_v2hi, u_v2hi, V_UMIN16);
+  ADD_NDS32_BUILTIN2 ("smax16", unsigned, unsigned, unsigned, SMAX16);
+  ADD_NDS32_BUILTIN2 ("v_smax16", v2hi, v2hi, v2hi, V_SMAX16);
+  ADD_NDS32_BUILTIN2 ("umax16", unsigned, unsigned, unsigned, UMAX16);
+  ADD_NDS32_BUILTIN2 ("v_umax16", u_v2hi, u_v2hi, u_v2hi, V_UMAX16);
+  ADD_NDS32_BUILTIN2 ("sclip16", unsigned, unsigned, unsigned, SCLIP16);
+  ADD_NDS32_BUILTIN2 ("v_sclip16", v2hi, v2hi, unsigned, V_SCLIP16);
+  ADD_NDS32_BUILTIN2 ("uclip16", unsigned, unsigned, unsigned, UCLIP16);
+  ADD_NDS32_BUILTIN2 ("v_uclip16", v2hi, v2hi, unsigned, V_UCLIP16);
+  ADD_NDS32_BUILTIN2 ("khm16", unsigned, unsigned, unsigned, KHM16);
+  ADD_NDS32_BUILTIN2 ("v_khm16", v2hi, v2hi, v2hi, V_KHM16);
+  ADD_NDS32_BUILTIN2 ("khmx16", unsigned, unsigned, unsigned, KHMX16);
+  ADD_NDS32_BUILTIN2 ("v_khmx16", v2hi, v2hi, v2hi, V_KHMX16);
+  ADD_NDS32_BUILTIN1 ("kabs16", unsigned, unsigned, KABS16);
+  ADD_NDS32_BUILTIN1 ("v_kabs16", v2hi, v2hi, V_KABS16);
+  ADD_NDS32_BUILTIN2 ("smul16", long_long_unsigned, unsigned, unsigned, SMUL16);
+  ADD_NDS32_BUILTIN2 ("v_smul16", v2si, v2hi, v2hi, V_SMUL16);
+  ADD_NDS32_BUILTIN2 ("smulx16",
+                     long_long_unsigned, unsigned, unsigned, SMULX16);
+  ADD_NDS32_BUILTIN2 ("v_smulx16", v2si, v2hi, v2hi, V_SMULX16);
+  ADD_NDS32_BUILTIN2 ("umul16", long_long_unsigned, unsigned, unsigned, UMUL16);
+  ADD_NDS32_BUILTIN2 ("v_umul16", u_v2si, u_v2hi, u_v2hi, V_UMUL16);
+  ADD_NDS32_BUILTIN2 ("umulx16",
+                     long_long_unsigned, unsigned, unsigned, UMULX16);
+  ADD_NDS32_BUILTIN2 ("v_umulx16", u_v2si, u_v2hi, u_v2hi, V_UMULX16);
+
+  /* DSP Extension: SIMD 8bit MISC.  */
+  ADD_NDS32_BUILTIN2 ("smin8", unsigned, unsigned, unsigned, SMIN8);
+  ADD_NDS32_BUILTIN2 ("v_smin8", v4qi, v4qi, v4qi, V_SMIN8);
+  ADD_NDS32_BUILTIN2 ("umin8", unsigned, unsigned, unsigned, UMIN8);
+  ADD_NDS32_BUILTIN2 ("v_umin8", u_v4qi, u_v4qi, u_v4qi, V_UMIN8);
+  ADD_NDS32_BUILTIN2 ("smax8", unsigned, unsigned, unsigned, SMAX8);
+  ADD_NDS32_BUILTIN2 ("v_smax8", v4qi, v4qi, v4qi, V_SMAX8);
+  ADD_NDS32_BUILTIN2 ("umax8", unsigned, unsigned, unsigned, UMAX8);
+  ADD_NDS32_BUILTIN2 ("v_umax8", u_v4qi, u_v4qi, u_v4qi, V_UMAX8);
+  ADD_NDS32_BUILTIN1 ("kabs8", unsigned, unsigned, KABS8);
+  ADD_NDS32_BUILTIN1 ("v_kabs8", v4qi, v4qi, V_KABS8);
+
+  /* DSP Extension: 8bit Unpacking.  */
+  ADD_NDS32_BUILTIN1 ("sunpkd810", unsigned, unsigned, SUNPKD810);
+  ADD_NDS32_BUILTIN1 ("v_sunpkd810", v2hi, v4qi, V_SUNPKD810);
+  ADD_NDS32_BUILTIN1 ("sunpkd820", unsigned, unsigned, SUNPKD820);
+  ADD_NDS32_BUILTIN1 ("v_sunpkd820", v2hi, v4qi, V_SUNPKD820);
+  ADD_NDS32_BUILTIN1 ("sunpkd830", unsigned, unsigned, SUNPKD830);
+  ADD_NDS32_BUILTIN1 ("v_sunpkd830", v2hi, v4qi, V_SUNPKD830);
+  ADD_NDS32_BUILTIN1 ("sunpkd831", unsigned, unsigned, SUNPKD831);
+  ADD_NDS32_BUILTIN1 ("v_sunpkd831", v2hi, v4qi, V_SUNPKD831);
+  ADD_NDS32_BUILTIN1 ("zunpkd810", unsigned, unsigned, ZUNPKD810);
+  ADD_NDS32_BUILTIN1 ("v_zunpkd810", u_v2hi, u_v4qi, V_ZUNPKD810);
+  ADD_NDS32_BUILTIN1 ("zunpkd820", unsigned, unsigned, ZUNPKD820);
+  ADD_NDS32_BUILTIN1 ("v_zunpkd820", u_v2hi, u_v4qi, V_ZUNPKD820);
+  ADD_NDS32_BUILTIN1 ("zunpkd830", unsigned, unsigned, ZUNPKD830);
+  ADD_NDS32_BUILTIN1 ("v_zunpkd830", u_v2hi, u_v4qi, V_ZUNPKD830);
+  ADD_NDS32_BUILTIN1 ("zunpkd831", unsigned, unsigned, ZUNPKD831);
+  ADD_NDS32_BUILTIN1 ("v_zunpkd831", u_v2hi, u_v4qi, V_ZUNPKD831);
+
+  /* DSP Extension: 32bit Add and Subtract.  */
+  ADD_NDS32_BUILTIN2 ("raddw", integer, integer, integer, RADDW);
+  ADD_NDS32_BUILTIN2 ("uraddw", unsigned, unsigned, unsigned, URADDW);
+  ADD_NDS32_BUILTIN2 ("rsubw", integer, integer, integer, RSUBW);
+  ADD_NDS32_BUILTIN2 ("ursubw", unsigned, unsigned, unsigned, URSUBW);
+
+  /* DSP Extension: 32bit Shift.  */
+  ADD_NDS32_BUILTIN2 ("sra_u", integer, integer, unsigned, SRA_U);
+  ADD_NDS32_BUILTIN2 ("ksll", integer, integer, unsigned, KSLL);
+
+  /* DSP Extension: 16bit Packing.  */
+  ADD_NDS32_BUILTIN2 ("pkbb16", unsigned, unsigned, unsigned, PKBB16);
+  ADD_NDS32_BUILTIN2 ("v_pkbb16", u_v2hi, u_v2hi, u_v2hi, V_PKBB16);
+  ADD_NDS32_BUILTIN2 ("pkbt16", unsigned, unsigned, unsigned, PKBT16);
+  ADD_NDS32_BUILTIN2 ("v_pkbt16", u_v2hi, u_v2hi, u_v2hi, V_PKBT16);
+  ADD_NDS32_BUILTIN2 ("pktb16", unsigned, unsigned, unsigned, PKTB16);
+  ADD_NDS32_BUILTIN2 ("v_pktb16", u_v2hi, u_v2hi, u_v2hi, V_PKTB16);
+  ADD_NDS32_BUILTIN2 ("pktt16", unsigned, unsigned, unsigned, PKTT16);
+  ADD_NDS32_BUILTIN2 ("v_pktt16", u_v2hi, u_v2hi, u_v2hi, V_PKTT16);
+
+  /* DSP Extension: Signed MSW 32x32 Multiply and ADD.  */
+  ADD_NDS32_BUILTIN2 ("smmul", integer, integer, integer, SMMUL);
+  ADD_NDS32_BUILTIN2 ("smmul_u", integer, integer, integer, SMMUL_U);
+  ADD_NDS32_BUILTIN3 ("kmmac", integer, integer, integer, integer, KMMAC);
+  ADD_NDS32_BUILTIN3 ("kmmac_u", integer, integer, integer, integer, KMMAC_U);
+  ADD_NDS32_BUILTIN3 ("kmmsb", integer, integer, integer, integer, KMMSB);
+  ADD_NDS32_BUILTIN3 ("kmmsb_u", integer, integer, integer, integer, KMMSB_U);
+  ADD_NDS32_BUILTIN2 ("kwmmul", integer, integer, integer, KWMMUL);
+  ADD_NDS32_BUILTIN2 ("kwmmul_u", integer, integer, integer, KWMMUL_U);
+
+  /* DSP Extension: Most Significant Word 32x16 Multiply and ADD.  */
+  ADD_NDS32_BUILTIN2 ("smmwb", integer, integer, unsigned, SMMWB);
+  ADD_NDS32_BUILTIN2 ("v_smmwb", integer, integer, v2hi, V_SMMWB);
+  ADD_NDS32_BUILTIN2 ("smmwb_u", integer, integer, unsigned, SMMWB_U);
+  ADD_NDS32_BUILTIN2 ("v_smmwb_u", integer, integer, v2hi, V_SMMWB_U);
+  ADD_NDS32_BUILTIN2 ("smmwt", integer, integer, unsigned, SMMWT);
+  ADD_NDS32_BUILTIN2 ("v_smmwt", integer, integer, v2hi, V_SMMWT);
+  ADD_NDS32_BUILTIN2 ("smmwt_u", integer, integer, unsigned, SMMWT_U);
+  ADD_NDS32_BUILTIN2 ("v_smmwt_u", integer, integer, v2hi, V_SMMWT_U);
+  ADD_NDS32_BUILTIN3 ("kmmawb", integer, integer, integer, unsigned, KMMAWB);
+  ADD_NDS32_BUILTIN3 ("v_kmmawb", integer, integer, integer, v2hi, V_KMMAWB);
+  ADD_NDS32_BUILTIN3 ("kmmawb_u",
+                     integer, integer, integer, unsigned, KMMAWB_U);
+  ADD_NDS32_BUILTIN3 ("v_kmmawb_u",
+                     integer, integer, integer, v2hi, V_KMMAWB_U);
+  ADD_NDS32_BUILTIN3 ("kmmawt", integer, integer, integer, unsigned, KMMAWT);
+  ADD_NDS32_BUILTIN3 ("v_kmmawt", integer, integer, integer, v2hi, V_KMMAWT);
+  ADD_NDS32_BUILTIN3 ("kmmawt_u",
+                     integer, integer, integer, unsigned, KMMAWT_U);
+  ADD_NDS32_BUILTIN3 ("v_kmmawt_u",
+                     integer, integer, integer, v2hi, V_KMMAWT_U);
+
+  /* DSP Extension: Signed 16bit Multiply with ADD/Subtract.  */
+  ADD_NDS32_BUILTIN2 ("smbb", integer, unsigned, unsigned, SMBB);
+  ADD_NDS32_BUILTIN2 ("v_smbb", integer, v2hi, v2hi, V_SMBB);
+  ADD_NDS32_BUILTIN2 ("smbt", integer, unsigned, unsigned, SMBT);
+  ADD_NDS32_BUILTIN2 ("v_smbt", integer, v2hi, v2hi, V_SMBT);
+  ADD_NDS32_BUILTIN2 ("smtt", integer, unsigned, unsigned, SMTT);
+  ADD_NDS32_BUILTIN2 ("v_smtt", integer, v2hi, v2hi, V_SMTT);
+  ADD_NDS32_BUILTIN2 ("kmda", integer, unsigned, unsigned, KMDA);
+  ADD_NDS32_BUILTIN2 ("v_kmda", integer, v2hi, v2hi, V_KMDA);
+  ADD_NDS32_BUILTIN2 ("kmxda", integer, unsigned, unsigned, KMXDA);
+  ADD_NDS32_BUILTIN2 ("v_kmxda", integer, v2hi, v2hi, V_KMXDA);
+  ADD_NDS32_BUILTIN2 ("smds", integer, unsigned, unsigned, SMDS);
+  ADD_NDS32_BUILTIN2 ("v_smds", integer, v2hi, v2hi, V_SMDS);
+  ADD_NDS32_BUILTIN2 ("smdrs", integer, unsigned, unsigned, SMDRS);
+  ADD_NDS32_BUILTIN2 ("v_smdrs", integer, v2hi, v2hi, V_SMDRS);
+  ADD_NDS32_BUILTIN2 ("smxds", integer, unsigned, unsigned, SMXDS);
+  ADD_NDS32_BUILTIN2 ("v_smxds", integer, v2hi, v2hi, V_SMXDS);
+  ADD_NDS32_BUILTIN3 ("kmabb", integer, integer, unsigned, unsigned, KMABB);
+  ADD_NDS32_BUILTIN3 ("v_kmabb", integer, integer, v2hi, v2hi, V_KMABB);
+  ADD_NDS32_BUILTIN3 ("kmabt", integer, integer, unsigned, unsigned, KMABT);
+  ADD_NDS32_BUILTIN3 ("v_kmabt", integer, integer, v2hi, v2hi, V_KMABT);
+  ADD_NDS32_BUILTIN3 ("kmatt", integer, integer, unsigned, unsigned, KMATT);
+  ADD_NDS32_BUILTIN3 ("v_kmatt", integer, integer, v2hi, v2hi, V_KMATT);
+  ADD_NDS32_BUILTIN3 ("kmada", integer, integer, unsigned, unsigned, KMADA);
+  ADD_NDS32_BUILTIN3 ("v_kmada", integer, integer, v2hi, v2hi, V_KMADA);
+  ADD_NDS32_BUILTIN3 ("kmaxda", integer, integer, unsigned, unsigned, KMAXDA);
+  ADD_NDS32_BUILTIN3 ("v_kmaxda", integer, integer, v2hi, v2hi, V_KMAXDA);
+  ADD_NDS32_BUILTIN3 ("kmads", integer, integer, unsigned, unsigned, KMADS);
+  ADD_NDS32_BUILTIN3 ("v_kmads", integer, integer, v2hi, v2hi, V_KMADS);
+  ADD_NDS32_BUILTIN3 ("kmadrs", integer, integer, unsigned, unsigned, KMADRS);
+  ADD_NDS32_BUILTIN3 ("v_kmadrs", integer, integer, v2hi, v2hi, V_KMADRS);
+  ADD_NDS32_BUILTIN3 ("kmaxds", integer, integer, unsigned, unsigned, KMAXDS);
+  ADD_NDS32_BUILTIN3 ("v_kmaxds", integer, integer, v2hi, v2hi, V_KMAXDS);
+  ADD_NDS32_BUILTIN3 ("kmsda", integer, integer, unsigned, unsigned, KMSDA);
+  ADD_NDS32_BUILTIN3 ("v_kmsda", integer, integer, v2hi, v2hi, V_KMSDA);
+  ADD_NDS32_BUILTIN3 ("kmsxda", integer, integer, unsigned, unsigned, KMSXDA);
+  ADD_NDS32_BUILTIN3 ("v_kmsxda", integer, integer, v2hi, v2hi, V_KMSXDA);
+
+  /* DSP Extension: Signed 16bit Multiply with 64bit ADD/Subtract.  */
+  ADD_NDS32_BUILTIN2 ("smal", long_long_integer,
+                     long_long_integer, unsigned, SMAL);
+  ADD_NDS32_BUILTIN2 ("v_smal", long_long_integer,
+                     long_long_integer, v2hi, V_SMAL);
+
+  /* DSP Extension: 32bit MISC.  */
+  ADD_NDS32_BUILTIN2 ("bitrev", unsigned, unsigned, unsigned, BITREV);
+  ADD_NDS32_BUILTIN2 ("wext", unsigned, long_long_integer, unsigned, WEXT);
+  ADD_NDS32_BUILTIN3 ("bpick", unsigned, unsigned, unsigned, unsigned, BPICK);
+  ADD_NDS32_BUILTIN3 ("insb", unsigned, unsigned, unsigned, unsigned, INSB);
+
+  /* DSP Extension: 64bit Add and Subtract.  */
+  ADD_NDS32_BUILTIN2 ("sadd64", long_long_integer,
+                     long_long_integer, long_long_integer, SADD64);
+  ADD_NDS32_BUILTIN2 ("uadd64", long_long_unsigned,
+                     long_long_unsigned, long_long_unsigned, UADD64);
+  ADD_NDS32_BUILTIN2 ("radd64", long_long_integer,
+                     long_long_integer, long_long_integer, RADD64);
+  ADD_NDS32_BUILTIN2 ("uradd64", long_long_unsigned,
+                     long_long_unsigned, long_long_unsigned, URADD64);
+  ADD_NDS32_BUILTIN2 ("kadd64", long_long_integer,
+                     long_long_integer, long_long_integer, KADD64);
+  ADD_NDS32_BUILTIN2 ("ukadd64", long_long_unsigned,
+                     long_long_unsigned, long_long_unsigned, UKADD64);
+  ADD_NDS32_BUILTIN2 ("ssub64", long_long_integer,
+                     long_long_integer, long_long_integer, SSUB64);
+  ADD_NDS32_BUILTIN2 ("usub64", long_long_unsigned,
+                     long_long_unsigned, long_long_unsigned, USUB64);
+  ADD_NDS32_BUILTIN2 ("rsub64", long_long_integer,
+                     long_long_integer, long_long_integer, RSUB64);
+  ADD_NDS32_BUILTIN2 ("ursub64", long_long_unsigned,
+                     long_long_unsigned, long_long_unsigned, URSUB64);
+  ADD_NDS32_BUILTIN2 ("ksub64", long_long_integer,
+                     long_long_integer, long_long_integer, KSUB64);
+  ADD_NDS32_BUILTIN2 ("uksub64", long_long_unsigned,
+                     long_long_unsigned, long_long_unsigned, UKSUB64);
+
+  /* DSP Extension: 32bit Multiply with 64bit Add/Subtract.  */
+  ADD_NDS32_BUILTIN3 ("smar64", long_long_integer,
+                     long_long_integer, integer, integer, SMAR64);
+  ADD_NDS32_BUILTIN3 ("smsr64", long_long_integer,
+                     long_long_integer, integer, integer, SMSR64);
+  ADD_NDS32_BUILTIN3 ("umar64", long_long_unsigned,
+                     long_long_unsigned, unsigned, unsigned, UMAR64);
+  ADD_NDS32_BUILTIN3 ("umsr64", long_long_unsigned,
+                     long_long_unsigned, unsigned, unsigned, UMSR64);
+  ADD_NDS32_BUILTIN3 ("kmar64", long_long_integer,
+                     long_long_integer, integer, integer, KMAR64);
+  ADD_NDS32_BUILTIN3 ("kmsr64", long_long_integer,
+                     long_long_integer, integer, integer, KMSR64);
+  ADD_NDS32_BUILTIN3 ("ukmar64", long_long_unsigned,
+                     long_long_unsigned, unsigned, unsigned, UKMAR64);
+  ADD_NDS32_BUILTIN3 ("ukmsr64", long_long_unsigned,
+                     long_long_unsigned, unsigned, unsigned, UKMSR64);
+
+  /* DSP Extension: Signed 16bit Multiply with 64bit Add/Subtract.  */
+  ADD_NDS32_BUILTIN3 ("smalbb", long_long_integer,
+                     long_long_integer, unsigned, unsigned, SMALBB);
+  ADD_NDS32_BUILTIN3 ("v_smalbb", long_long_integer,
+                     long_long_integer, v2hi, v2hi, V_SMALBB);
+  ADD_NDS32_BUILTIN3 ("smalbt", long_long_integer,
+                     long_long_integer, unsigned, unsigned, SMALBT);
+  ADD_NDS32_BUILTIN3 ("v_smalbt", long_long_integer,
+                     long_long_integer, v2hi, v2hi, V_SMALBT);
+  ADD_NDS32_BUILTIN3 ("smaltt", long_long_integer,
+                     long_long_integer, unsigned, unsigned, SMALTT);
+  ADD_NDS32_BUILTIN3 ("v_smaltt", long_long_integer,
+                     long_long_integer, v2hi, v2hi, V_SMALTT);
+  ADD_NDS32_BUILTIN3 ("smalda", long_long_integer,
+                     long_long_integer, unsigned, unsigned, SMALDA);
+  ADD_NDS32_BUILTIN3 ("v_smalda", long_long_integer,
+                     long_long_integer, v2hi, v2hi, V_SMALDA);
+  ADD_NDS32_BUILTIN3 ("smalxda", long_long_integer,
+                     long_long_integer, unsigned, unsigned, SMALXDA);
+  ADD_NDS32_BUILTIN3 ("v_smalxda", long_long_integer,
+                     long_long_integer, v2hi, v2hi, V_SMALXDA);
+  ADD_NDS32_BUILTIN3 ("smalds", long_long_integer,
+                     long_long_integer, unsigned, unsigned, SMALDS);
+  ADD_NDS32_BUILTIN3 ("v_smalds", long_long_integer,
+                     long_long_integer, v2hi, v2hi, V_SMALDS);
+  ADD_NDS32_BUILTIN3 ("smaldrs", long_long_integer,
+                     long_long_integer, unsigned, unsigned, SMALDRS);
+  ADD_NDS32_BUILTIN3 ("v_smaldrs", long_long_integer,
+                     long_long_integer, v2hi, v2hi, V_SMALDRS);
+  ADD_NDS32_BUILTIN3 ("smalxds", long_long_integer,
+                     long_long_integer, unsigned, unsigned, SMALXDS);
+  ADD_NDS32_BUILTIN3 ("v_smalxds", long_long_integer,
+                     long_long_integer, v2hi, v2hi, V_SMALXDS);
+  ADD_NDS32_BUILTIN3 ("smslda", long_long_integer,
+                     long_long_integer, unsigned, unsigned, SMSLDA);
+  ADD_NDS32_BUILTIN3 ("v_smslda", long_long_integer,
+                     long_long_integer, v2hi, v2hi, V_SMSLDA);
+  ADD_NDS32_BUILTIN3 ("smslxda", long_long_integer,
+                     long_long_integer, unsigned, unsigned, SMSLXDA);
+  ADD_NDS32_BUILTIN3 ("v_smslxda", long_long_integer,
+                     long_long_integer, v2hi, v2hi, V_SMSLXDA);
+
+  /* DSP Extension: augmented baseline.  */
+  ADD_NDS32_BUILTIN2 ("uclip32", unsigned, integer, unsigned, UCLIP32);
+  ADD_NDS32_BUILTIN2 ("sclip32", integer, integer, unsigned, SCLIP32);
+  ADD_NDS32_BUILTIN1 ("kabs", integer, integer, KABS);
+
+  /* DSP Extension: vector type unaligned Load/Store  */
+  ADD_NDS32_BUILTIN1 ("get_unaligned_u16x2", u_v2hi, ptr_ushort, UALOAD_U16);
+  ADD_NDS32_BUILTIN1 ("get_unaligned_s16x2", v2hi, ptr_short, UALOAD_S16);
+  ADD_NDS32_BUILTIN1 ("get_unaligned_u8x4", u_v4qi, ptr_uchar, UALOAD_U8);
+  ADD_NDS32_BUILTIN1 ("get_unaligned_s8x4", v4qi, ptr_char, UALOAD_S8);
+  ADD_NDS32_BUILTIN2 ("put_unaligned_u16x2", void, ptr_ushort,
+                     u_v2hi, UASTORE_U16);
+  ADD_NDS32_BUILTIN2 ("put_unaligned_s16x2", void, ptr_short,
+                     v2hi, UASTORE_S16);
+  ADD_NDS32_BUILTIN2 ("put_unaligned_u8x4", void, ptr_uchar,
+                     u_v4qi, UASTORE_U8);
+  ADD_NDS32_BUILTIN2 ("put_unaligned_s8x4", void, ptr_char,
+                     v4qi, UASTORE_S8);
 }
index 24e7c0b..02f7285 100644 (file)
    (set_attr "length" "4")]
 )
 
+;; SATURATION
+
+(define_insn "unspec_kaddw"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (ss_plus:SI (match_operand:SI 1 "register_operand" "r")
+                   (match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "kaddw\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_ksubw"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (ss_minus:SI (match_operand:SI 1 "register_operand" "r")
+                    (match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "ksubw\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_kaddh"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (unspec:SI [(match_operand:SI 1 "register_operand" "r")
+                   (match_operand:SI 2 "register_operand" "r")] UNSPEC_KADDH))]
+  ""
+  "kaddh\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_ksubh"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (unspec:SI [(match_operand:SI 1 "register_operand" "r")
+                   (match_operand:SI 2 "register_operand" "r")] UNSPEC_KSUBH))]
+  ""
+  "ksubh\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_kaddh_dsp"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (unspec:SI [(plus:SI (match_operand:SI 1 "register_operand" "r")
+                            (match_operand:SI 2 "register_operand" "r"))
+                   (const_int 15)] UNSPEC_CLIPS))]
+  "NDS32_EXT_DSP_P ()"
+  "kaddh\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_ksubh_dsp"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (unspec:SI [(minus:SI (match_operand:SI 1 "register_operand" "r")
+                             (match_operand:SI 2 "register_operand" "r"))
+                   (const_int 15)] UNSPEC_CLIPS))]
+  "NDS32_EXT_DSP_P ()"
+  "ksubh\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_kdmbb"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+       (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r")
+                     (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KDMBB))]
+  ""
+  "kdmbb\t%0, %1, %2"
+  [(set_attr "type"    "mul")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_kdmbt"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+       (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r")
+                     (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KDMBT))]
+  ""
+  "kdmbt\t%0, %1, %2"
+  [(set_attr "type"    "mul")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_kdmtb"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+       (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r")
+                     (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KDMTB))]
+  ""
+  "kdmtb\t%0, %1, %2"
+  [(set_attr "type"    "mul")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_kdmtt"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+       (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r")
+                     (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KDMTT))]
+  ""
+  "kdmtt\t%0, %1, %2"
+  [(set_attr "type"    "mul")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_khmbb"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+       (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r")
+                     (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KHMBB))]
+  ""
+  "khmbb\t%0, %1, %2"
+  [(set_attr "type"    "mul")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_khmbt"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+       (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r")
+                     (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KHMBT))]
+  ""
+  "khmbt\t%0, %1, %2"
+  [(set_attr "type"    "mul")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_khmtb"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+       (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r")
+                     (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KHMTB))]
+  ""
+  "khmtb\t%0, %1, %2"
+  [(set_attr "type"    "mul")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_khmtt"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+       (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r")
+                     (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KHMTT))]
+  ""
+  "khmtt\t%0, %1, %2"
+  [(set_attr "type"    "mul")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_kslraw"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (unspec:SI [(match_operand:SI 1 "register_operand" "r")
+                   (match_operand:SI 2 "register_operand" "r")] UNSPEC_KSLRAW))]
+  ""
+  "kslraw\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_kslrawu"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (unspec:SI [(match_operand:SI 1 "register_operand" "r")
+                   (match_operand:SI 2 "register_operand" "r")] UNSPEC_KSLRAWU))]
+  ""
+  "kslraw.u\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_volatile_rdov"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_RDOV))]
+  ""
+  "rdov\t%0"
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_volatile_clrov"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_CLROV)]
+  ""
+  "clrov"
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "4")]
+)
+
 ;; System
 
 (define_insn "unspec_sva"
   DONE;
 })
 
+;; abs alias kabs
+
+(define_insn "unspec_kabs"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (unspec:SI [(match_operand:SI 1 "register_operand" "r")] UNSPEC_KABS))]
+  ""
+  "kabs\t%0, %1"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")]
+)
+
 ;; ------------------------------------------------------------------------
index 720e85a..9696611 100644 (file)
@@ -261,6 +261,118 @@ output_cond_branch_compare_zero (int code, const char *suffix,
   output_asm_insn (pattern, operands);
 }
 
+static void
+nds32_split_shiftrtdi3 (rtx dst, rtx src, rtx shiftamount, bool logic_shift_p)
+{
+  rtx src_high_part;
+  rtx dst_high_part, dst_low_part;
+
+  dst_high_part = nds32_di_high_part_subreg (dst);
+  src_high_part = nds32_di_high_part_subreg (src);
+  dst_low_part = nds32_di_low_part_subreg (dst);
+
+  if (CONST_INT_P (shiftamount))
+    {
+      if (INTVAL (shiftamount) < 32)
+       {
+         if (logic_shift_p)
+           {
+             emit_insn (gen_uwext (dst_low_part, src,
+                                                 shiftamount));
+             emit_insn (gen_lshrsi3 (dst_high_part, src_high_part,
+                                                    shiftamount));
+           }
+         else
+           {
+             emit_insn (gen_wext (dst_low_part, src,
+                                                shiftamount));
+             emit_insn (gen_ashrsi3 (dst_high_part, src_high_part,
+                                                    shiftamount));
+           }
+       }
+      else
+       {
+         rtx new_shift_amout = gen_int_mode(INTVAL (shiftamount) - 32, SImode);
+
+         if (logic_shift_p)
+           {
+             emit_insn (gen_lshrsi3 (dst_low_part, src_high_part,
+                                                   new_shift_amout));
+             emit_move_insn (dst_high_part, const0_rtx);
+           }
+         else
+           {
+             emit_insn (gen_ashrsi3 (dst_low_part, src_high_part,
+                                                   new_shift_amout));
+             emit_insn (gen_ashrsi3 (dst_high_part, src_high_part,
+                                                    GEN_INT (31)));
+           }
+       }
+    }
+  else
+    {
+      rtx dst_low_part_l32, dst_high_part_l32;
+      rtx dst_low_part_g32, dst_high_part_g32;
+      rtx new_shift_amout, select_reg;
+      dst_low_part_l32 = gen_reg_rtx (SImode);
+      dst_high_part_l32 = gen_reg_rtx (SImode);
+      dst_low_part_g32 = gen_reg_rtx (SImode);
+      dst_high_part_g32 = gen_reg_rtx (SImode);
+      new_shift_amout = gen_reg_rtx (SImode);
+      select_reg = gen_reg_rtx (SImode);
+
+      emit_insn (gen_andsi3 (shiftamount, shiftamount, GEN_INT (0x3f)));
+
+      if (logic_shift_p)
+       {
+         /*
+            if (shiftamount < 32)
+              dst_low_part = wext (src, shiftamount)
+              dst_high_part = src_high_part >> shiftamount
+            else
+              dst_low_part = src_high_part >> (shiftamount & 0x1f)
+              dst_high_part = 0
+         */
+         emit_insn (gen_uwext (dst_low_part_l32, src, shiftamount));
+         emit_insn (gen_lshrsi3 (dst_high_part_l32, src_high_part,
+                                                    shiftamount));
+
+         emit_insn (gen_andsi3 (new_shift_amout, shiftamount, GEN_INT (0x1f)));
+         emit_insn (gen_lshrsi3 (dst_low_part_g32, src_high_part,
+                                                   new_shift_amout));
+         emit_move_insn (dst_high_part_g32, const0_rtx);
+       }
+      else
+       {
+         /*
+            if (shiftamount < 32)
+              dst_low_part = wext (src, shiftamount)
+              dst_high_part = src_high_part >> shiftamount
+            else
+              dst_low_part = src_high_part >> (shiftamount & 0x1f)
+              # shift 31 for sign extend
+              dst_high_part = src_high_part >> 31
+         */
+         emit_insn (gen_wext (dst_low_part_l32, src, shiftamount));
+         emit_insn (gen_ashrsi3 (dst_high_part_l32, src_high_part,
+                                                    shiftamount));
+
+         emit_insn (gen_andsi3 (new_shift_amout, shiftamount, GEN_INT (0x1f)));
+         emit_insn (gen_ashrsi3 (dst_low_part_g32, src_high_part,
+                                                   new_shift_amout));
+         emit_insn (gen_ashrsi3 (dst_high_part_g32, src_high_part,
+                                                    GEN_INT (31)));
+       }
+
+      emit_insn (gen_slt_compare (select_reg, shiftamount, GEN_INT (32)));
+
+      emit_insn (gen_cmovnsi (dst_low_part, select_reg,
+                             dst_low_part_l32, dst_low_part_g32));
+      emit_insn (gen_cmovnsi (dst_high_part, select_reg,
+                             dst_high_part_l32, dst_high_part_g32));
+  }
+}
+
 /* ------------------------------------------------------------------------ */
 
 /* Auxiliary function for expand RTL pattern.  */
@@ -1195,6 +1307,58 @@ nds32_emit_v3pop_fpr_callee_saved (int base)
     }
 }
 
+enum nds32_expand_result_type
+nds32_expand_extv (rtx *operands)
+{
+  gcc_assert (CONST_INT_P (operands[2]) && CONST_INT_P (operands[3]));
+  HOST_WIDE_INT width = INTVAL (operands[2]);
+  HOST_WIDE_INT bitpos = INTVAL (operands[3]);
+  rtx dst = operands[0];
+  rtx src = operands[1];
+
+  if (MEM_P (src)
+      && width == 32
+      && (bitpos % BITS_PER_UNIT)  == 0
+      && GET_MODE_BITSIZE (GET_MODE (dst)) == width)
+    {
+      rtx newmem = adjust_address (src, GET_MODE (dst),
+                                  bitpos / BITS_PER_UNIT);
+
+      rtx base_addr = force_reg (Pmode, XEXP (newmem, 0));
+
+      emit_insn (gen_unaligned_loadsi (dst, base_addr));
+
+      return EXPAND_DONE;
+    }
+  return EXPAND_FAIL;
+}
+
+enum nds32_expand_result_type
+nds32_expand_insv (rtx *operands)
+{
+  gcc_assert (CONST_INT_P (operands[1]) && CONST_INT_P (operands[2]));
+  HOST_WIDE_INT width = INTVAL (operands[1]);
+  HOST_WIDE_INT bitpos = INTVAL (operands[2]);
+  rtx dst = operands[0];
+  rtx src = operands[3];
+
+  if (MEM_P (dst)
+      && width == 32
+      && (bitpos % BITS_PER_UNIT)  == 0
+      && GET_MODE_BITSIZE (GET_MODE (src)) == width)
+    {
+      rtx newmem = adjust_address (dst, GET_MODE (src),
+                                     bitpos / BITS_PER_UNIT);
+
+      rtx base_addr = force_reg (Pmode, XEXP (newmem, 0));
+
+      emit_insn (gen_unaligned_storesi (base_addr, src));
+
+      return EXPAND_DONE;
+    }
+  return EXPAND_FAIL;
+}
+
 /* ------------------------------------------------------------------------ */
 
 /* Function to return memory format.  */
@@ -2424,7 +2588,10 @@ nds32_expand_unaligned_load (rtx *operands, enum machine_mode mode)
     }
   else
     {
-      reg[0] = operands[0];
+      if (VECTOR_MODE_P (mode))
+       reg[0] = gen_reg_rtx (SImode);
+      else
+       reg[0] = operands[0];
     }
 
   for (num_reg = (mode == DImode) ? 2 : 1; num_reg > 0; num_reg--)
@@ -2466,6 +2633,8 @@ nds32_expand_unaligned_load (rtx *operands, enum machine_mode mode)
          offset = offset + offset_adj;
        }
     }
+    if (VECTOR_MODE_P (mode))
+      convert_move (operands[0], reg[0], false);
 }
 
 void
@@ -2508,7 +2677,13 @@ nds32_expand_unaligned_store (rtx *operands, enum machine_mode mode)
     }
   else
     {
-      reg[0] = operands[1];
+      if (VECTOR_MODE_P (mode))
+       {
+         reg[0] = gen_reg_rtx (SImode);
+         convert_move (reg[0], operands[1], false);
+       }
+      else
+       reg[0] = operands[1];
     }
 
   for (num_reg = (mode == DImode) ? 2 : 1; num_reg > 0; num_reg--)
@@ -2765,6 +2940,36 @@ nds32_output_cbranchsi4_greater_less_zero (rtx_insn *insn, rtx *operands)
   return "";
 }
 
+const char *
+nds32_output_unpkd8 (rtx output, rtx input,
+                    rtx high_idx_rtx, rtx low_idx_rtx,
+                    bool signed_p)
+{
+  char pattern[100];
+  rtx output_operands[2];
+  HOST_WIDE_INT high_idx, low_idx;
+  high_idx = INTVAL (high_idx_rtx);
+  low_idx = INTVAL (low_idx_rtx);
+
+  gcc_assert (high_idx >= 0 && high_idx <= 3);
+  gcc_assert (low_idx >= 0 && low_idx <= 3);
+
+  /* We only have 10, 20, 30 and 31.  */
+  if ((low_idx != 0 || high_idx == 0) &&
+      !(low_idx == 1 && high_idx == 3))
+    return "#";
+
+  char sign_char = signed_p ? 's' : 'z';
+
+  sprintf (pattern,
+          "%cunpkd8" HOST_WIDE_INT_PRINT_DEC HOST_WIDE_INT_PRINT_DEC "\t%%0, %%1",
+          sign_char, high_idx, low_idx);
+  output_operands[0] = output;
+  output_operands[1] = input;
+  output_asm_insn (pattern, output_operands);
+  return "";
+}
+
 /* Return true if SYMBOL_REF X binds locally.  */
 
 static bool
@@ -2815,6 +3020,91 @@ nds32_output_call (rtx insn, rtx *operands, rtx symbol, const char *long_call,
   return "";
 }
 
+bool
+nds32_need_split_sms_p (rtx in0_idx0, rtx in1_idx0,
+                       rtx in0_idx1, rtx in1_idx1)
+{
+  /* smds or smdrs.  */
+  if (INTVAL (in0_idx0) == INTVAL (in1_idx0)
+      && INTVAL (in0_idx1) == INTVAL (in1_idx1)
+      && INTVAL (in0_idx0) != INTVAL (in0_idx1))
+    return false;
+
+  /* smxds.  */
+  if (INTVAL (in0_idx0) != INTVAL (in0_idx1)
+      && INTVAL (in1_idx0) != INTVAL (in1_idx1))
+    return false;
+
+  return true;
+}
+
+const char *
+nds32_output_sms (rtx in0_idx0, rtx in1_idx0,
+                 rtx in0_idx1, rtx in1_idx1)
+{
+  if (nds32_need_split_sms_p (in0_idx0, in1_idx0,
+                             in0_idx1, in1_idx1))
+    return "#";
+  /* out = in0[in0_idx0] * in1[in1_idx0] - in0[in0_idx1] * in1[in1_idx1] */
+
+  /* smds or smdrs.  */
+  if (INTVAL (in0_idx0) == INTVAL (in1_idx0)
+      && INTVAL (in0_idx1) == INTVAL (in1_idx1)
+      && INTVAL (in0_idx0) != INTVAL (in0_idx1))
+    {
+      if (INTVAL (in0_idx0) == 0)
+       {
+         if (TARGET_BIG_ENDIAN)
+           return "smds\t%0, %1, %2";
+         else
+           return "smdrs\t%0, %1, %2";
+       }
+      else
+       {
+         if (TARGET_BIG_ENDIAN)
+           return "smdrs\t%0, %1, %2";
+         else
+           return "smds\t%0, %1, %2";
+       }
+    }
+
+  if (INTVAL (in0_idx0) != INTVAL (in0_idx1)
+      && INTVAL (in1_idx0) != INTVAL (in1_idx1))
+    {
+      if (INTVAL (in0_idx0) == 1)
+       {
+         if (TARGET_BIG_ENDIAN)
+           return "smxds\t%0, %2, %1";
+         else
+           return "smxds\t%0, %1, %2";
+       }
+      else
+       {
+         if (TARGET_BIG_ENDIAN)
+           return "smxds\t%0, %1, %2";
+         else
+           return "smxds\t%0, %2, %1";
+       }
+    }
+
+  gcc_unreachable ();
+  return "";
+}
+
+void
+nds32_split_sms (rtx out, rtx in0, rtx in1,
+                rtx in0_idx0, rtx in1_idx0,
+                rtx in0_idx1, rtx in1_idx1)
+{
+  rtx result0 = gen_reg_rtx (SImode);
+  rtx result1 = gen_reg_rtx (SImode);
+  emit_insn (gen_mulhisi3v (result0, in0, in1,
+                           in0_idx0, in1_idx0));
+  emit_insn (gen_mulhisi3v (result1, in0, in1,
+                           in0_idx1, in1_idx1));
+  emit_insn (gen_subsi3 (out, result0, result1));
+}
+
 /* Spilt a doubleword instrucion to two single word instructions.  */
 void
 nds32_spilt_doubleword (rtx *operands, bool load_p)
@@ -2924,6 +3214,192 @@ nds32_spilt_doubleword (rtx *operands, bool load_p)
     }
 }
 
+void
+nds32_split_ashiftdi3 (rtx dst, rtx src, rtx shiftamount)
+{
+  rtx src_high_part, src_low_part;
+  rtx dst_high_part, dst_low_part;
+
+  dst_high_part = nds32_di_high_part_subreg (dst);
+  dst_low_part = nds32_di_low_part_subreg (dst);
+
+  src_high_part = nds32_di_high_part_subreg (src);
+  src_low_part = nds32_di_low_part_subreg (src);
+
+  /* We need to handle shift more than 32 bit!!!! */
+  if (CONST_INT_P (shiftamount))
+    {
+      if (INTVAL (shiftamount) < 32)
+       {
+         rtx ext_start;
+         ext_start = gen_int_mode(32 - INTVAL (shiftamount), SImode);
+
+         emit_insn (gen_wext (dst_high_part, src, ext_start));
+         emit_insn (gen_ashlsi3 (dst_low_part, src_low_part, shiftamount));
+       }
+      else
+       {
+         rtx new_shift_amout = gen_int_mode(INTVAL (shiftamount) - 32, SImode);
+
+         emit_insn (gen_ashlsi3 (dst_high_part, src_low_part,
+                                                new_shift_amout));
+
+         emit_move_insn (dst_low_part, GEN_INT (0));
+       }
+    }
+  else
+    {
+      rtx dst_low_part_l32, dst_high_part_l32;
+      rtx dst_low_part_g32, dst_high_part_g32;
+      rtx new_shift_amout, select_reg;
+      dst_low_part_l32 = gen_reg_rtx (SImode);
+      dst_high_part_l32 = gen_reg_rtx (SImode);
+      dst_low_part_g32 = gen_reg_rtx (SImode);
+      dst_high_part_g32 = gen_reg_rtx (SImode);
+      new_shift_amout = gen_reg_rtx (SImode);
+      select_reg = gen_reg_rtx (SImode);
+
+      rtx ext_start;
+      ext_start = gen_reg_rtx (SImode);
+
+      /*
+        if (shiftamount < 32)
+          dst_low_part = src_low_part << shiftamout
+          dst_high_part = wext (src, 32 - shiftamount)
+          # wext can't handle wext (src, 32) since it's only take rb[0:4]
+          # for extract.
+          dst_high_part = shiftamount == 0 ? src_high_part : dst_high_part
+        else
+          dst_low_part = 0
+          dst_high_part = src_low_part << shiftamount & 0x1f
+      */
+
+      emit_insn (gen_subsi3 (ext_start,
+                            gen_int_mode (32, SImode),
+                            shiftamount));
+      emit_insn (gen_wext (dst_high_part_l32, src, ext_start));
+
+      /* Handle for shiftamout == 0.  */
+      emit_insn (gen_cmovzsi (dst_high_part_l32, shiftamount,
+                             src_high_part, dst_high_part_l32));
+
+      emit_insn (gen_ashlsi3 (dst_low_part_l32, src_low_part, shiftamount));
+
+      emit_move_insn (dst_low_part_g32, const0_rtx);
+      emit_insn (gen_andsi3 (new_shift_amout, shiftamount, GEN_INT (0x1f)));
+      emit_insn (gen_ashlsi3 (dst_high_part_g32, src_low_part,
+                                                new_shift_amout));
+
+      emit_insn (gen_slt_compare (select_reg, shiftamount, GEN_INT (32)));
+
+      emit_insn (gen_cmovnsi (dst_low_part, select_reg,
+                             dst_low_part_l32, dst_low_part_g32));
+      emit_insn (gen_cmovnsi (dst_high_part, select_reg,
+                             dst_high_part_l32, dst_high_part_g32));
+    }
+}
+
+void
+nds32_split_ashiftrtdi3 (rtx dst, rtx src, rtx shiftamount)
+{
+  nds32_split_shiftrtdi3 (dst, src, shiftamount, false);
+}
+
+void
+nds32_split_lshiftrtdi3 (rtx dst, rtx src, rtx shiftamount)
+{
+  nds32_split_shiftrtdi3 (dst, src, shiftamount, true);
+}
+
+void
+nds32_split_rotatertdi3 (rtx dst, rtx src, rtx shiftamount)
+{
+  rtx dst_low_part_l32, dst_high_part_l32;
+  rtx dst_low_part_g32, dst_high_part_g32;
+  rtx select_reg, low5bit, low5bit_inv, minus32sa;
+  rtx dst_low_part_g32_tmph;
+  rtx dst_low_part_g32_tmpl;
+  rtx dst_high_part_l32_tmph;
+  rtx dst_high_part_l32_tmpl;
+
+  rtx src_low_part, src_high_part;
+  rtx dst_high_part, dst_low_part;
+
+  shiftamount = force_reg (SImode, shiftamount);
+
+  emit_insn (gen_andsi3 (shiftamount,
+                        shiftamount,
+                        gen_int_mode (0x3f, SImode)));
+
+  dst_high_part = nds32_di_high_part_subreg (dst);
+  dst_low_part = nds32_di_low_part_subreg (dst);
+
+  src_high_part = nds32_di_high_part_subreg (src);
+  src_low_part = nds32_di_low_part_subreg (src);
+
+  dst_low_part_l32 = gen_reg_rtx (SImode);
+  dst_high_part_l32 = gen_reg_rtx (SImode);
+  dst_low_part_g32 = gen_reg_rtx (SImode);
+  dst_high_part_g32 = gen_reg_rtx (SImode);
+  low5bit = gen_reg_rtx (SImode);
+  low5bit_inv = gen_reg_rtx (SImode);
+  minus32sa = gen_reg_rtx (SImode);
+  select_reg = gen_reg_rtx (SImode);
+
+  dst_low_part_g32_tmph = gen_reg_rtx (SImode);
+  dst_low_part_g32_tmpl = gen_reg_rtx (SImode);
+
+  dst_high_part_l32_tmph = gen_reg_rtx (SImode);
+  dst_high_part_l32_tmpl = gen_reg_rtx (SImode);
+
+  emit_insn (gen_slt_compare (select_reg, shiftamount, GEN_INT (32)));
+
+  /* if shiftamount < 32
+       dst_low_part = wext(src, shiftamount)
+     else
+       dst_low_part = ((src_high_part >> (shiftamount & 0x1f))
+                      | (src_low_part << (32 - (shiftamount & 0x1f))))
+  */
+  emit_insn (gen_andsi3 (low5bit, shiftamount, gen_int_mode (0x1f, SImode)));
+  emit_insn (gen_subsi3 (low5bit_inv, gen_int_mode (32, SImode), low5bit));
+
+  emit_insn (gen_wext (dst_low_part_l32, src, shiftamount));
+
+  emit_insn (gen_lshrsi3 (dst_low_part_g32_tmpl, src_high_part, low5bit));
+  emit_insn (gen_ashlsi3 (dst_low_part_g32_tmph, src_low_part, low5bit_inv));
+
+  emit_insn (gen_iorsi3 (dst_low_part_g32,
+                        dst_low_part_g32_tmpl,
+                        dst_low_part_g32_tmph));
+
+  emit_insn (gen_cmovnsi (dst_low_part, select_reg,
+                         dst_low_part_l32, dst_low_part_g32));
+
+  /* if shiftamount < 32
+       dst_high_part = ((src_high_part >> shiftamount)
+                       | (src_low_part << (32 - shiftamount)))
+       dst_high_part = shiftamount == 0 ? src_high_part : dst_high_part
+     else
+       dst_high_part = wext(src, shiftamount & 0x1f)
+  */
+
+  emit_insn (gen_subsi3 (minus32sa, gen_int_mode (32, SImode), shiftamount));
+
+  emit_insn (gen_lshrsi3 (dst_high_part_l32_tmpl, src_high_part, shiftamount));
+  emit_insn (gen_ashlsi3 (dst_high_part_l32_tmph, src_low_part, minus32sa));
+
+  emit_insn (gen_iorsi3 (dst_high_part_l32,
+                        dst_high_part_l32_tmpl,
+                        dst_high_part_l32_tmph));
+
+  emit_insn (gen_cmovzsi (dst_high_part_l32, shiftamount,
+                         src_high_part, dst_high_part_l32));
+
+  emit_insn (gen_wext (dst_high_part_g32, src, low5bit));
+
+  emit_insn (gen_cmovnsi (dst_high_part, select_reg,
+                         dst_high_part_l32, dst_high_part_g32));
+}
 /* Return true X is need use long call.  */
 bool
 nds32_long_call_p (rtx symbol)
@@ -2976,3 +3452,22 @@ nds32_expand_constant (machine_mode mode, HOST_WIDE_INT val,
       emit_move_insn (target, gen_rtx_fmt_ee (AND, mode, source, temp));
     }
 }
+
+/* Auxiliary functions for manipulation DI mode.  */
+rtx nds32_di_high_part_subreg(rtx reg)
+{
+  unsigned high_part_offset = subreg_highpart_offset (SImode, DImode);
+
+  return simplify_gen_subreg (
+          SImode, reg,
+          DImode, high_part_offset);
+}
+
+rtx nds32_di_low_part_subreg(rtx reg)
+{
+  unsigned low_part_offset = subreg_lowpart_offset (SImode, DImode);
+
+  return simplify_gen_subreg (
+          SImode, reg,
+          DImode, low_part_offset);
+}
index 8dea130..d02aabf 100644 (file)
@@ -449,29 +449,44 @@ nds32_gen_dup_4_byte_to_word_value (rtx value)
     }
   else
     {
-      /* ! prepare word
-        andi    $tmp1, $value, 0xff       ! $tmp1  <- 0x000000ab
-        slli    $tmp2, $tmp1, 8           ! $tmp2  <- 0x0000ab00
-        or      $tmp3, $tmp1, $tmp2       ! $tmp3  <- 0x0000abab
-        slli    $tmp4, $tmp3, 16          ! $tmp4  <- 0xabab0000
-        or      $val4word, $tmp3, $tmp4   ! $value4word  <- 0xabababab  */
-
-      rtx tmp1, tmp2, tmp3, tmp4, final_value;
-      tmp1 = expand_binop (SImode, and_optab, value,
-                          gen_int_mode (0xff, SImode),
-                          NULL_RTX, 0, OPTAB_WIDEN);
-      tmp2 = expand_binop (SImode, ashl_optab, tmp1,
-                          gen_int_mode (8, SImode),
-                          NULL_RTX, 0, OPTAB_WIDEN);
-      tmp3 = expand_binop (SImode, ior_optab, tmp1, tmp2,
-                          NULL_RTX, 0, OPTAB_WIDEN);
-      tmp4 = expand_binop (SImode, ashl_optab, tmp3,
-                          gen_int_mode (16, SImode),
-                          NULL_RTX, 0, OPTAB_WIDEN);
-
-      final_value = expand_binop (SImode, ior_optab, tmp3, tmp4,
-                                 NULL_RTX, 0, OPTAB_WIDEN);
-      emit_move_insn (value4word, final_value);
+      if (NDS32_EXT_DSP_P ())
+       {
+         /* ! prepare word
+            insb    $tmp, $value, 1         ! $tmp  <- 0x0000abab
+            pkbb16  $tmp6, $tmp2, $tmp2   ! $value4word  <- 0xabababab */
+         rtx tmp = gen_reg_rtx (SImode);
+
+         convert_move (tmp, value, true);
+
+         emit_insn (
+           gen_insvsi_internal (tmp, gen_int_mode (0x8, SImode), tmp));
+
+         emit_insn (gen_pkbbsi_1 (value4word, tmp, tmp));
+       }
+      else
+       {
+         /* ! prepare word
+            andi    $tmp1, $value, 0xff       ! $tmp1  <- 0x000000ab
+            slli    $tmp2, $tmp1, 8           ! $tmp2  <- 0x0000ab00
+            or      $tmp3, $tmp1, $tmp2       ! $tmp3  <- 0x0000abab
+            slli    $tmp4, $tmp3, 16          ! $tmp4  <- 0xabab0000
+            or      $val4word, $tmp3, $tmp4   ! $value4word  <- 0xabababab  */
+
+         rtx tmp1, tmp2, tmp3, tmp4;
+         tmp1 = expand_binop (SImode, and_optab, value,
+                              gen_int_mode (0xff, SImode),
+                              NULL_RTX, 0, OPTAB_WIDEN);
+         tmp2 = expand_binop (SImode, ashl_optab, tmp1,
+                              gen_int_mode (8, SImode),
+                              NULL_RTX, 0, OPTAB_WIDEN);
+         tmp3 = expand_binop (SImode, ior_optab, tmp1, tmp2,
+                              NULL_RTX, 0, OPTAB_WIDEN);
+         tmp4 = expand_binop (SImode, ashl_optab, tmp3,
+                              gen_int_mode (16, SImode),
+                              NULL_RTX, 0, OPTAB_WIDEN);
+
+         emit_insn (gen_iorsi3 (value4word, tmp3, tmp4));
+       }
     }
 
   return value4word;
index a670623..2363b96 100644 (file)
@@ -518,4 +518,81 @@ nds32_const_double_range_ok_p (rtx op, machine_mode mode,
 
   return val >= lower && val < upper;
 }
+
+HOST_WIDE_INT
+const_vector_to_hwint (rtx op)
+{
+  HOST_WIDE_INT hwint = 0;
+  HOST_WIDE_INT mask;
+  int i;
+  int shift_adv;
+  int shift = 0;
+  int nelem;
+
+  switch (GET_MODE (op))
+    {
+      case V2HImode:
+       mask = 0xffff;
+       shift_adv = 16;
+       nelem = 2;
+       break;
+      case V4QImode:
+       mask = 0xff;
+       shift_adv = 8;
+       nelem = 4;
+       break;
+      default:
+       gcc_unreachable ();
+    }
+
+  if (TARGET_BIG_ENDIAN)
+    {
+      for (i = 0; i < nelem; ++i)
+       {
+         HOST_WIDE_INT val = XINT (XVECEXP (op, 0, nelem - i - 1), 0);
+         hwint |= (val & mask) << shift;
+         shift = shift + shift_adv;
+       }
+    }
+  else
+    {
+      for (i = 0; i < nelem; ++i)
+       {
+         HOST_WIDE_INT val = XINT (XVECEXP (op, 0, i), 0);
+         hwint |= (val & mask) << shift;
+         shift = shift + shift_adv;
+       }
+    }
+
+  return hwint;
+}
+
+bool
+nds32_valid_CVp5_p (rtx op)
+{
+  HOST_WIDE_INT ival = const_vector_to_hwint (op);
+  return (ival < ((1 << 5) + 16)) && (ival >= (0 + 16));
+}
+
+bool
+nds32_valid_CVs5_p (rtx op)
+{
+  HOST_WIDE_INT ival = const_vector_to_hwint (op);
+  return (ival < (1 << 4)) && (ival >= -(1 << 4));
+}
+
+bool
+nds32_valid_CVs2_p (rtx op)
+{
+  HOST_WIDE_INT ival = const_vector_to_hwint (op);
+  return (ival < (1 << 19)) && (ival >= -(1 << 19));
+}
+
+bool
+nds32_valid_CVhi_p (rtx op)
+{
+  HOST_WIDE_INT ival = const_vector_to_hwint (op);
+  return (ival != 0) && ((ival & 0xfff) == 0);
+}
+
 /* ------------------------------------------------------------------------ */
index e7b7d41..64500e3 100644 (file)
@@ -73,6 +73,11 @@ extern unsigned int nds32_dbx_register_number (unsigned int);
 
 extern bool nds32_valid_smw_lwm_base_p (rtx);
 
+/* Auxiliary functions for manipulation DI mode.  */
+
+extern rtx nds32_di_high_part_subreg(rtx);
+extern rtx nds32_di_low_part_subreg(rtx);
+
 /* Auxiliary functions for expanding rtl used in nds32-multiple.md.  */
 
 extern rtx nds32_expand_load_multiple (int, int, rtx, rtx, bool, rtx *);
@@ -159,6 +164,10 @@ extern void nds32_expand_float_cstore (rtx *);
 extern enum nds32_expand_result_type nds32_expand_movcc (rtx *);
 extern void nds32_expand_float_movcc (rtx *);
 
+/* Auxiliary functions for expand extv/insv instruction.  */
+
+extern enum nds32_expand_result_type nds32_expand_extv (rtx *);
+extern enum nds32_expand_result_type nds32_expand_insv (rtx *);
 
 /* Auxiliary functions to identify long-call symbol.  */
 extern bool nds32_long_call_p (rtx);
@@ -193,6 +202,8 @@ extern const char *nds32_output_cbranchsi4_equality_reg_or_const_int (rtx_insn *
                                                                      rtx *);
 extern const char *nds32_output_cbranchsi4_greater_less_zero (rtx_insn *, rtx *);
 
+extern const char *nds32_output_unpkd8 (rtx, rtx, rtx, rtx, bool);
+
 extern const char *nds32_output_call (rtx, rtx *, rtx,
                                      const char *, const char *, bool);
 
@@ -203,9 +214,19 @@ extern const char *nds32_output_stack_push (rtx);
 extern const char *nds32_output_stack_pop (rtx);
 extern const char *nds32_output_return (void);
 
+
+/* Auxiliary functions to split/output sms pattern.  */
+extern bool nds32_need_split_sms_p (rtx, rtx, rtx, rtx);
+extern const char *nds32_output_sms (rtx, rtx, rtx, rtx);
+extern void nds32_split_sms (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
+
 /* Auxiliary functions to split double word RTX pattern.  */
 
 extern void nds32_spilt_doubleword (rtx *, bool);
+extern void nds32_split_ashiftdi3 (rtx, rtx, rtx);
+extern void nds32_split_ashiftrtdi3 (rtx, rtx, rtx);
+extern void nds32_split_lshiftrtdi3 (rtx, rtx, rtx);
+extern void nds32_split_rotatertdi3 (rtx, rtx, rtx);
 
 /* Auxiliary functions to split large constant RTX pattern.  */
 
@@ -246,6 +267,14 @@ extern int nds32_address_cost_impl (rtx, machine_mode, addr_space_t, bool);
 /* Auxiliary functions for pre-define marco.  */
 extern void nds32_cpu_cpp_builtins(struct cpp_reader *);
 
+/* Auxiliary functions for const_vector's constraints.  */
+
+extern HOST_WIDE_INT const_vector_to_hwint (rtx);
+extern bool nds32_valid_CVp5_p (rtx);
+extern bool nds32_valid_CVs5_p (rtx);
+extern bool nds32_valid_CVs2_p (rtx);
+extern bool nds32_valid_CVhi_p (rtx);
+
 extern bool nds32_split_double_word_load_store_p (rtx *,bool);
 
 namespace nds32 {
index b0151be..203cd11 100644 (file)
@@ -413,6 +413,7 @@ extract_mac_non_acc_rtx (rtx_insn *insn)
   switch (get_attr_type (insn))
     {
     case TYPE_MAC:
+    case TYPE_DMAC:
       if (REG_P (XEXP (exp, 0)))
        return XEXP (exp, 1);
       else
index da8af4c..50eb709 100644 (file)
@@ -1987,6 +1987,16 @@ nds32_function_arg_boundary (machine_mode mode, const_tree type)
          : PARM_BOUNDARY);
 }
 
+bool
+nds32_vector_mode_supported_p (machine_mode mode)
+{
+  if (mode == V4QImode
+      || mode == V2HImode)
+    return NDS32_EXT_DSP_P ();
+
+  return false;
+}
+
 /* -- How Scalar Function Values Are Returned.  */
 
 static rtx
@@ -2688,6 +2698,23 @@ nds32_legitimate_address_p (machine_mode mode, rtx x, bool strict)
     }
 }
 
+static machine_mode
+nds32_vectorize_preferred_simd_mode (scalar_mode mode)
+{
+  if (!NDS32_EXT_DSP_P ())
+    return word_mode;
+
+  switch (mode)
+    {
+    case E_QImode:
+      return V4QImode;
+    case E_HImode:
+      return V2HImode;
+    default:
+      return word_mode;
+    }
+}
+
 \f
 /* Condition Code Status.  */
 
@@ -2978,6 +3005,18 @@ nds32_print_operand (FILE *stream, rtx x, int code)
 
       /* No need to handle following process, so return immediately.  */
       return;
+
+    case 'v':
+      gcc_assert (CONST_INT_P (x)
+                 && (INTVAL (x) == 0
+                     || INTVAL (x) == 8
+                     || INTVAL (x) == 16
+                     || INTVAL (x) == 24));
+      fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) / 8);
+
+      /* No need to handle following process, so return immediately.  */
+      return;
+
     case 'B':
       /* Use exact_log2() to search the 1-bit position.  */
       gcc_assert (CONST_INT_P (x));
@@ -3168,6 +3207,10 @@ nds32_print_operand (FILE *stream, rtx x, int code)
       output_addr_const (stream, x);
       break;
 
+    case CONST_VECTOR:
+      fprintf (stream, HOST_WIDE_INT_PRINT_HEX, const_vector_to_hwint (x));
+      break;
+
     default:
       /* Generally, output_addr_const () is able to handle most cases.
         We want to see what CODE could appear,
@@ -3260,6 +3303,20 @@ nds32_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x)
                           reg_names[REGNO (XEXP (op0, 0))],
                           sv);
        }
+      else if (GET_CODE (op0) == ASHIFT && REG_P (op1))
+       {
+         /* [Ra + Rb << sv]
+            In normal, ASHIFT can be converted to MULT like above case.
+            But when the address rtx does not go through canonicalize_address
+            defined in fwprop, we'll need this case.  */
+         int sv = INTVAL (XEXP (op0, 1));
+         gcc_assert (sv <= 3 && sv >=0);
+
+         fprintf (stream, "[%s + %s << %d]",
+                  reg_names[REGNO (op1)],
+                  reg_names[REGNO (XEXP (op0, 0))],
+                  sv);
+       }
       else
        {
          /* The control flow is not supposed to be here.  */
@@ -3770,6 +3827,8 @@ nds32_cpu_cpp_builtins(struct cpp_reader *pfile)
     builtin_define ("__NDS32_GP_DIRECT__");
   if (TARGET_VH)
     builtin_define ("__NDS32_VH__");
+  if (NDS32_EXT_DSP_P ())
+    builtin_define ("__NDS32_EXT_DSP__");
 
   if (TARGET_BIG_ENDIAN)
     builtin_define ("__big_endian__");
@@ -5010,6 +5069,9 @@ nds32_use_blocks_for_constant_p (machine_mode mode,
 #undef TARGET_FUNCTION_ARG_BOUNDARY
 #define TARGET_FUNCTION_ARG_BOUNDARY nds32_function_arg_boundary
 
+#undef TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P nds32_vector_mode_supported_p
+
 /* -- How Scalar Function Values Are Returned.  */
 
 #undef TARGET_FUNCTION_VALUE
@@ -5087,6 +5149,9 @@ nds32_use_blocks_for_constant_p (machine_mode mode,
 #undef TARGET_LEGITIMATE_ADDRESS_P
 #define TARGET_LEGITIMATE_ADDRESS_P nds32_legitimate_address_p
 
+#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
+#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE nds32_vectorize_preferred_simd_mode
+
 \f
 /* Anchored Addresses.  */
 
index 56dd7cf..229012a 100644 (file)
@@ -140,6 +140,9 @@ enum nds32_16bit_address_type
    Check gcc/common/config/nds32/nds32-common.c for the optimizations that
    apply -malways-align.  */
 #define NDS32_ALIGN_P() (TARGET_ALWAYS_ALIGN)
+
+#define NDS32_EXT_DSP_P() (TARGET_EXT_DSP && !TARGET_FORCE_NO_EXT_DSP)
+
 /* Get alignment according to mode or type information.
    When 'type' is nonnull, there is no need to look at 'mode'.  */
 #define NDS32_MODE_TYPE_ALIGN(mode, type) \
@@ -439,7 +442,30 @@ enum nds32_builtins
   NDS32_BUILTIN_FFB,
   NDS32_BUILTIN_FFMISM,
   NDS32_BUILTIN_FLMISM,
-
+  NDS32_BUILTIN_KADDW,
+  NDS32_BUILTIN_KSUBW,
+  NDS32_BUILTIN_KADDH,
+  NDS32_BUILTIN_KSUBH,
+  NDS32_BUILTIN_KDMBB,
+  NDS32_BUILTIN_V_KDMBB,
+  NDS32_BUILTIN_KDMBT,
+  NDS32_BUILTIN_V_KDMBT,
+  NDS32_BUILTIN_KDMTB,
+  NDS32_BUILTIN_V_KDMTB,
+  NDS32_BUILTIN_KDMTT,
+  NDS32_BUILTIN_V_KDMTT,
+  NDS32_BUILTIN_KHMBB,
+  NDS32_BUILTIN_V_KHMBB,
+  NDS32_BUILTIN_KHMBT,
+  NDS32_BUILTIN_V_KHMBT,
+  NDS32_BUILTIN_KHMTB,
+  NDS32_BUILTIN_V_KHMTB,
+  NDS32_BUILTIN_KHMTT,
+  NDS32_BUILTIN_V_KHMTT,
+  NDS32_BUILTIN_KSLRAW,
+  NDS32_BUILTIN_KSLRAW_U,
+  NDS32_BUILTIN_RDOV,
+  NDS32_BUILTIN_CLROV,
   NDS32_BUILTIN_ROTR,
   NDS32_BUILTIN_SVA,
   NDS32_BUILTIN_SVS,
@@ -512,7 +538,295 @@ enum nds32_builtins
   NDS32_BUILTIN_SET_TRIG_LEVEL,
   NDS32_BUILTIN_SET_TRIG_EDGE,
   NDS32_BUILTIN_GET_TRIG_TYPE,
-
+  NDS32_BUILTIN_DSP_BEGIN,
+  NDS32_BUILTIN_ADD16,
+  NDS32_BUILTIN_V_UADD16,
+  NDS32_BUILTIN_V_SADD16,
+  NDS32_BUILTIN_RADD16,
+  NDS32_BUILTIN_V_RADD16,
+  NDS32_BUILTIN_URADD16,
+  NDS32_BUILTIN_V_URADD16,
+  NDS32_BUILTIN_KADD16,
+  NDS32_BUILTIN_V_KADD16,
+  NDS32_BUILTIN_UKADD16,
+  NDS32_BUILTIN_V_UKADD16,
+  NDS32_BUILTIN_SUB16,
+  NDS32_BUILTIN_V_USUB16,
+  NDS32_BUILTIN_V_SSUB16,
+  NDS32_BUILTIN_RSUB16,
+  NDS32_BUILTIN_V_RSUB16,
+  NDS32_BUILTIN_URSUB16,
+  NDS32_BUILTIN_V_URSUB16,
+  NDS32_BUILTIN_KSUB16,
+  NDS32_BUILTIN_V_KSUB16,
+  NDS32_BUILTIN_UKSUB16,
+  NDS32_BUILTIN_V_UKSUB16,
+  NDS32_BUILTIN_CRAS16,
+  NDS32_BUILTIN_V_UCRAS16,
+  NDS32_BUILTIN_V_SCRAS16,
+  NDS32_BUILTIN_RCRAS16,
+  NDS32_BUILTIN_V_RCRAS16,
+  NDS32_BUILTIN_URCRAS16,
+  NDS32_BUILTIN_V_URCRAS16,
+  NDS32_BUILTIN_KCRAS16,
+  NDS32_BUILTIN_V_KCRAS16,
+  NDS32_BUILTIN_UKCRAS16,
+  NDS32_BUILTIN_V_UKCRAS16,
+  NDS32_BUILTIN_CRSA16,
+  NDS32_BUILTIN_V_UCRSA16,
+  NDS32_BUILTIN_V_SCRSA16,
+  NDS32_BUILTIN_RCRSA16,
+  NDS32_BUILTIN_V_RCRSA16,
+  NDS32_BUILTIN_URCRSA16,
+  NDS32_BUILTIN_V_URCRSA16,
+  NDS32_BUILTIN_KCRSA16,
+  NDS32_BUILTIN_V_KCRSA16,
+  NDS32_BUILTIN_UKCRSA16,
+  NDS32_BUILTIN_V_UKCRSA16,
+  NDS32_BUILTIN_ADD8,
+  NDS32_BUILTIN_V_UADD8,
+  NDS32_BUILTIN_V_SADD8,
+  NDS32_BUILTIN_RADD8,
+  NDS32_BUILTIN_V_RADD8,
+  NDS32_BUILTIN_URADD8,
+  NDS32_BUILTIN_V_URADD8,
+  NDS32_BUILTIN_KADD8,
+  NDS32_BUILTIN_V_KADD8,
+  NDS32_BUILTIN_UKADD8,
+  NDS32_BUILTIN_V_UKADD8,
+  NDS32_BUILTIN_SUB8,
+  NDS32_BUILTIN_V_USUB8,
+  NDS32_BUILTIN_V_SSUB8,
+  NDS32_BUILTIN_RSUB8,
+  NDS32_BUILTIN_V_RSUB8,
+  NDS32_BUILTIN_URSUB8,
+  NDS32_BUILTIN_V_URSUB8,
+  NDS32_BUILTIN_KSUB8,
+  NDS32_BUILTIN_V_KSUB8,
+  NDS32_BUILTIN_UKSUB8,
+  NDS32_BUILTIN_V_UKSUB8,
+  NDS32_BUILTIN_SRA16,
+  NDS32_BUILTIN_V_SRA16,
+  NDS32_BUILTIN_SRA16_U,
+  NDS32_BUILTIN_V_SRA16_U,
+  NDS32_BUILTIN_SRL16,
+  NDS32_BUILTIN_V_SRL16,
+  NDS32_BUILTIN_SRL16_U,
+  NDS32_BUILTIN_V_SRL16_U,
+  NDS32_BUILTIN_SLL16,
+  NDS32_BUILTIN_V_SLL16,
+  NDS32_BUILTIN_KSLL16,
+  NDS32_BUILTIN_V_KSLL16,
+  NDS32_BUILTIN_KSLRA16,
+  NDS32_BUILTIN_V_KSLRA16,
+  NDS32_BUILTIN_KSLRA16_U,
+  NDS32_BUILTIN_V_KSLRA16_U,
+  NDS32_BUILTIN_CMPEQ16,
+  NDS32_BUILTIN_V_SCMPEQ16,
+  NDS32_BUILTIN_V_UCMPEQ16,
+  NDS32_BUILTIN_SCMPLT16,
+  NDS32_BUILTIN_V_SCMPLT16,
+  NDS32_BUILTIN_SCMPLE16,
+  NDS32_BUILTIN_V_SCMPLE16,
+  NDS32_BUILTIN_UCMPLT16,
+  NDS32_BUILTIN_V_UCMPLT16,
+  NDS32_BUILTIN_UCMPLE16,
+  NDS32_BUILTIN_V_UCMPLE16,
+  NDS32_BUILTIN_CMPEQ8,
+  NDS32_BUILTIN_V_SCMPEQ8,
+  NDS32_BUILTIN_V_UCMPEQ8,
+  NDS32_BUILTIN_SCMPLT8,
+  NDS32_BUILTIN_V_SCMPLT8,
+  NDS32_BUILTIN_SCMPLE8,
+  NDS32_BUILTIN_V_SCMPLE8,
+  NDS32_BUILTIN_UCMPLT8,
+  NDS32_BUILTIN_V_UCMPLT8,
+  NDS32_BUILTIN_UCMPLE8,
+  NDS32_BUILTIN_V_UCMPLE8,
+  NDS32_BUILTIN_SMIN16,
+  NDS32_BUILTIN_V_SMIN16,
+  NDS32_BUILTIN_UMIN16,
+  NDS32_BUILTIN_V_UMIN16,
+  NDS32_BUILTIN_SMAX16,
+  NDS32_BUILTIN_V_SMAX16,
+  NDS32_BUILTIN_UMAX16,
+  NDS32_BUILTIN_V_UMAX16,
+  NDS32_BUILTIN_SCLIP16,
+  NDS32_BUILTIN_V_SCLIP16,
+  NDS32_BUILTIN_UCLIP16,
+  NDS32_BUILTIN_V_UCLIP16,
+  NDS32_BUILTIN_KHM16,
+  NDS32_BUILTIN_V_KHM16,
+  NDS32_BUILTIN_KHMX16,
+  NDS32_BUILTIN_V_KHMX16,
+  NDS32_BUILTIN_KABS16,
+  NDS32_BUILTIN_V_KABS16,
+  NDS32_BUILTIN_SMIN8,
+  NDS32_BUILTIN_V_SMIN8,
+  NDS32_BUILTIN_UMIN8,
+  NDS32_BUILTIN_V_UMIN8,
+  NDS32_BUILTIN_SMAX8,
+  NDS32_BUILTIN_V_SMAX8,
+  NDS32_BUILTIN_UMAX8,
+  NDS32_BUILTIN_V_UMAX8,
+  NDS32_BUILTIN_KABS8,
+  NDS32_BUILTIN_V_KABS8,
+  NDS32_BUILTIN_SUNPKD810,
+  NDS32_BUILTIN_V_SUNPKD810,
+  NDS32_BUILTIN_SUNPKD820,
+  NDS32_BUILTIN_V_SUNPKD820,
+  NDS32_BUILTIN_SUNPKD830,
+  NDS32_BUILTIN_V_SUNPKD830,
+  NDS32_BUILTIN_SUNPKD831,
+  NDS32_BUILTIN_V_SUNPKD831,
+  NDS32_BUILTIN_ZUNPKD810,
+  NDS32_BUILTIN_V_ZUNPKD810,
+  NDS32_BUILTIN_ZUNPKD820,
+  NDS32_BUILTIN_V_ZUNPKD820,
+  NDS32_BUILTIN_ZUNPKD830,
+  NDS32_BUILTIN_V_ZUNPKD830,
+  NDS32_BUILTIN_ZUNPKD831,
+  NDS32_BUILTIN_V_ZUNPKD831,
+  NDS32_BUILTIN_RADDW,
+  NDS32_BUILTIN_URADDW,
+  NDS32_BUILTIN_RSUBW,
+  NDS32_BUILTIN_URSUBW,
+  NDS32_BUILTIN_SRA_U,
+  NDS32_BUILTIN_KSLL,
+  NDS32_BUILTIN_PKBB16,
+  NDS32_BUILTIN_V_PKBB16,
+  NDS32_BUILTIN_PKBT16,
+  NDS32_BUILTIN_V_PKBT16,
+  NDS32_BUILTIN_PKTB16,
+  NDS32_BUILTIN_V_PKTB16,
+  NDS32_BUILTIN_PKTT16,
+  NDS32_BUILTIN_V_PKTT16,
+  NDS32_BUILTIN_SMMUL,
+  NDS32_BUILTIN_SMMUL_U,
+  NDS32_BUILTIN_KMMAC,
+  NDS32_BUILTIN_KMMAC_U,
+  NDS32_BUILTIN_KMMSB,
+  NDS32_BUILTIN_KMMSB_U,
+  NDS32_BUILTIN_KWMMUL,
+  NDS32_BUILTIN_KWMMUL_U,
+  NDS32_BUILTIN_SMMWB,
+  NDS32_BUILTIN_V_SMMWB,
+  NDS32_BUILTIN_SMMWB_U,
+  NDS32_BUILTIN_V_SMMWB_U,
+  NDS32_BUILTIN_SMMWT,
+  NDS32_BUILTIN_V_SMMWT,
+  NDS32_BUILTIN_SMMWT_U,
+  NDS32_BUILTIN_V_SMMWT_U,
+  NDS32_BUILTIN_KMMAWB,
+  NDS32_BUILTIN_V_KMMAWB,
+  NDS32_BUILTIN_KMMAWB_U,
+  NDS32_BUILTIN_V_KMMAWB_U,
+  NDS32_BUILTIN_KMMAWT,
+  NDS32_BUILTIN_V_KMMAWT,
+  NDS32_BUILTIN_KMMAWT_U,
+  NDS32_BUILTIN_V_KMMAWT_U,
+  NDS32_BUILTIN_SMBB,
+  NDS32_BUILTIN_V_SMBB,
+  NDS32_BUILTIN_SMBT,
+  NDS32_BUILTIN_V_SMBT,
+  NDS32_BUILTIN_SMTT,
+  NDS32_BUILTIN_V_SMTT,
+  NDS32_BUILTIN_KMDA,
+  NDS32_BUILTIN_V_KMDA,
+  NDS32_BUILTIN_KMXDA,
+  NDS32_BUILTIN_V_KMXDA,
+  NDS32_BUILTIN_SMDS,
+  NDS32_BUILTIN_V_SMDS,
+  NDS32_BUILTIN_SMDRS,
+  NDS32_BUILTIN_V_SMDRS,
+  NDS32_BUILTIN_SMXDS,
+  NDS32_BUILTIN_V_SMXDS,
+  NDS32_BUILTIN_KMABB,
+  NDS32_BUILTIN_V_KMABB,
+  NDS32_BUILTIN_KMABT,
+  NDS32_BUILTIN_V_KMABT,
+  NDS32_BUILTIN_KMATT,
+  NDS32_BUILTIN_V_KMATT,
+  NDS32_BUILTIN_KMADA,
+  NDS32_BUILTIN_V_KMADA,
+  NDS32_BUILTIN_KMAXDA,
+  NDS32_BUILTIN_V_KMAXDA,
+  NDS32_BUILTIN_KMADS,
+  NDS32_BUILTIN_V_KMADS,
+  NDS32_BUILTIN_KMADRS,
+  NDS32_BUILTIN_V_KMADRS,
+  NDS32_BUILTIN_KMAXDS,
+  NDS32_BUILTIN_V_KMAXDS,
+  NDS32_BUILTIN_KMSDA,
+  NDS32_BUILTIN_V_KMSDA,
+  NDS32_BUILTIN_KMSXDA,
+  NDS32_BUILTIN_V_KMSXDA,
+  NDS32_BUILTIN_SMAL,
+  NDS32_BUILTIN_V_SMAL,
+  NDS32_BUILTIN_BITREV,
+  NDS32_BUILTIN_WEXT,
+  NDS32_BUILTIN_BPICK,
+  NDS32_BUILTIN_INSB,
+  NDS32_BUILTIN_SADD64,
+  NDS32_BUILTIN_UADD64,
+  NDS32_BUILTIN_RADD64,
+  NDS32_BUILTIN_URADD64,
+  NDS32_BUILTIN_KADD64,
+  NDS32_BUILTIN_UKADD64,
+  NDS32_BUILTIN_SSUB64,
+  NDS32_BUILTIN_USUB64,
+  NDS32_BUILTIN_RSUB64,
+  NDS32_BUILTIN_URSUB64,
+  NDS32_BUILTIN_KSUB64,
+  NDS32_BUILTIN_UKSUB64,
+  NDS32_BUILTIN_SMAR64,
+  NDS32_BUILTIN_SMSR64,
+  NDS32_BUILTIN_UMAR64,
+  NDS32_BUILTIN_UMSR64,
+  NDS32_BUILTIN_KMAR64,
+  NDS32_BUILTIN_KMSR64,
+  NDS32_BUILTIN_UKMAR64,
+  NDS32_BUILTIN_UKMSR64,
+  NDS32_BUILTIN_SMALBB,
+  NDS32_BUILTIN_V_SMALBB,
+  NDS32_BUILTIN_SMALBT,
+  NDS32_BUILTIN_V_SMALBT,
+  NDS32_BUILTIN_SMALTT,
+  NDS32_BUILTIN_V_SMALTT,
+  NDS32_BUILTIN_SMALDA,
+  NDS32_BUILTIN_V_SMALDA,
+  NDS32_BUILTIN_SMALXDA,
+  NDS32_BUILTIN_V_SMALXDA,
+  NDS32_BUILTIN_SMALDS,
+  NDS32_BUILTIN_V_SMALDS,
+  NDS32_BUILTIN_SMALDRS,
+  NDS32_BUILTIN_V_SMALDRS,
+  NDS32_BUILTIN_SMALXDS,
+  NDS32_BUILTIN_V_SMALXDS,
+  NDS32_BUILTIN_SMUL16,
+  NDS32_BUILTIN_V_SMUL16,
+  NDS32_BUILTIN_SMULX16,
+  NDS32_BUILTIN_V_SMULX16,
+  NDS32_BUILTIN_UMUL16,
+  NDS32_BUILTIN_V_UMUL16,
+  NDS32_BUILTIN_UMULX16,
+  NDS32_BUILTIN_V_UMULX16,
+  NDS32_BUILTIN_SMSLDA,
+  NDS32_BUILTIN_V_SMSLDA,
+  NDS32_BUILTIN_SMSLXDA,
+  NDS32_BUILTIN_V_SMSLXDA,
+  NDS32_BUILTIN_UCLIP32,
+  NDS32_BUILTIN_SCLIP32,
+  NDS32_BUILTIN_KABS,
+  NDS32_BUILTIN_UALOAD_U16,
+  NDS32_BUILTIN_UALOAD_S16,
+  NDS32_BUILTIN_UALOAD_U8,
+  NDS32_BUILTIN_UALOAD_S8,
+  NDS32_BUILTIN_UASTORE_U16,
+  NDS32_BUILTIN_UASTORE_S16,
+  NDS32_BUILTIN_UASTORE_U8,
+  NDS32_BUILTIN_UASTORE_S8,
+  NDS32_BUILTIN_DSP_END,
   NDS32_BUILTIN_UNALIGNED_FEATURE,
   NDS32_BUILTIN_ENABLE_UNALIGNED,
   NDS32_BUILTIN_DISABLE_UNALIGNED,
@@ -576,6 +890,13 @@ enum nds32_builtins
 #endif
 
 #define TARGET_CONFIG_FPU_DEFAULT NDS32_CONFIG_FPU_2
+
+#ifdef TARGET_DEFAULT_EXT_DSP
+#  define NDS32_EXT_DSP_SPEC " %{!mno-ext-dsp:-mext-dsp}"
+#else
+#  define NDS32_EXT_DSP_SPEC ""
+#endif
+
 /* ------------------------------------------------------------------------ */
 \f
 /* Controlling the Compilation Driver.  */
@@ -591,7 +912,7 @@ enum nds32_builtins
   {"float", "%{!mfloat-abi=*:-mfloat-abi=%(VALUE)}" }
 
 #define CC1_SPEC \
-  ""
+  NDS32_EXT_DSP_SPEC
 
 #define ASM_SPEC \
   " %{mbig-endian:-EB} %{mlittle-endian:-EL}" \
@@ -603,7 +924,8 @@ enum nds32_builtins
   " %{mext-fpu-sp:-mfpu-sp-ext}" \
   " %{mno-ext-fpu-sp:-mno-fpu-sp-ext}" \
   " %{mext-fpu-dp:-mfpu-dp-ext}" \
-  " %{mno-ext-fpu-sp:-mno-fpu-dp-ext}"
+  " %{mno-ext-fpu-sp:-mno-fpu-dp-ext}" \
+  " %{mext-dsp:-mdsp-ext}"
 
 /* If user issues -mrelax, we need to pass '--relax' to linker.  */
 #define LINK_SPEC \
index 3b8107e..7249520 100644 (file)
 ;; Insn type, it is used to default other attribute values.
 (define_attr "type"
   "unknown,load,store,load_multiple,store_multiple,alu,alu_shift,pbsad,pbsada,mul,mac,div,branch,mmu,misc,\
-   falu,fmuls,fmuld,fmacs,fmacd,fdivs,fdivd,fsqrts,fsqrtd,fcmp,fabs,fcpy,fcmov,fmfsr,fmfdr,fmtsr,fmtdr,fload,fstore"
+   falu,fmuls,fmuld,fmacs,fmacd,fdivs,fdivd,fsqrts,fsqrtd,fcmp,fabs,fcpy,fcmov,fmfsr,fmfdr,fmtsr,fmtdr,fload,fstore,\
+   dalu,dalu64,daluround,dcmp,dclip,dmul,dmac,dinsb,dpack,dbpick,dwext"
   (const_string "unknown"))
 
 ;; Insn sub-type
 (define_attr "subtype"
-  "simple,shift"
+  "simple,shift,saturation"
   (const_string "simple"))
 
 ;; Length, in bytes, default is 4-bytes.
 
 ;; ----------------------------------------------------------------------------
 
+(include "nds32-dspext.md")
 
 ;; Move instructions.
 
 
 
 ;; ----------------------------------------------------------------------------
+(define_expand "extv"
+  [(set (match_operand 0 "register_operand" "")
+        (sign_extract (match_operand 1 "nonimmediate_operand" "")
+                      (match_operand 2 "const_int_operand" "")
+                      (match_operand 3 "const_int_operand" "")))]
+  ""
+{
+  enum nds32_expand_result_type result = nds32_expand_extv (operands);
+  switch (result)
+    {
+    case EXPAND_DONE:
+      DONE;
+      break;
+    case EXPAND_FAIL:
+      FAIL;
+      break;
+    case EXPAND_CREATE_TEMPLATE:
+      break;
+    default:
+      gcc_unreachable ();
+    }
+})
+
+(define_expand "insv"
+  [(set (zero_extract (match_operand 0 "nonimmediate_operand" "")
+                      (match_operand 1 "const_int_operand" "")
+                      (match_operand 2 "const_int_operand" ""))
+        (match_operand 3 "register_operand" ""))]
+  ""
+{
+  enum nds32_expand_result_type result = nds32_expand_insv (operands);
+  switch (result)
+    {
+    case EXPAND_DONE:
+      DONE;
+      break;
+    case EXPAND_FAIL:
+      FAIL;
+      break;
+    case EXPAND_CREATE_TEMPLATE:
+      break;
+    default:
+      gcc_unreachable ();
+    }
+})
 
 ;; Arithmetic instructions.
 
 (define_insn "addsi3"
   [(set (match_operand:SI 0 "register_operand"               "=   d,   l,   d,   l, d, l,   k,   l,    r, r")
        (plus:SI (match_operand:SI 1 "register_operand"      "%   0,   l,   0,   l, 0, l,   0,   k,    r, r")
-                (match_operand:SI 2 "nds32_rimm15s_operand" " In05,In03,Iu05,Iu03, r, l,Is10,Iu06, Is15, r")))]
+                (match_operand:SI 2 "nds32_rimm15s_operand" " In05,In03,Iu05,Iu03, r, l,Is10,IU06, Is15, r")))]
   ""
 {
   switch (which_alternative)
index dcf6d39..2c72a01 100644 (file)
@@ -90,6 +90,10 @@ mcmov
 Target Report Mask(CMOV)
 Generate conditional move instructions.
 
+mhw-abs
+Target Report Mask(HW_ABS)
+Generate hardware abs instructions.
+
 mext-perf
 Target Report Mask(EXT_PERF)
 Generate performance extension instructions.
@@ -102,6 +106,10 @@ mext-string
 Target Report Mask(EXT_STRING)
 Generate string extension instructions.
 
+mext-dsp
+Target Report Mask(EXT_DSP)
+Generate DSP extension instructions.
+
 mv3push
 Target Report Mask(V3PUSH)
 Generate v3 push25/pop25 instructions.
@@ -321,6 +329,10 @@ mext-fpu-dp
 Target Report Mask(FPU_DOUBLE)
 Generate double-precision floating-point instructions.
 
+mforce-no-ext-dsp
+Target Undocumented Report Mask(FORCE_NO_EXT_DSP)
+Force disable hardware loop, even use -mext-dsp.
+
 malways-save-lp
 Target Var(flag_always_save_lp) Init(0)
 Always save $lp in the stack.
index 7bb1177..24cb291 100644 (file)
 #ifndef _NDS32_INTRINSIC_H
 #define _NDS32_INTRINSIC_H
 
+typedef signed char int8x4_t __attribute ((vector_size(4)));
+typedef short int16x2_t __attribute ((vector_size(4)));
+typedef int int32x2_t __attribute__((vector_size(8)));
+typedef unsigned char uint8x4_t __attribute__ ((vector_size (4)));
+typedef unsigned short uint16x2_t __attribute__ ((vector_size (4)));
+typedef unsigned int uint32x2_t __attribute__((vector_size(8)));
+
 /* General instrinsic register names.  */
 enum nds32_intrinsic_registers
 {
@@ -691,6 +698,55 @@ enum nds32_dpref
 #define __nds32__tlbop_flua() \
 (__builtin_nds32_tlbop_flua())
 
+#define __nds32__kaddw(a, b) \
+  (__builtin_nds32_kaddw ((a), (b)))
+#define __nds32__kaddh(a, b) \
+  (__builtin_nds32_kaddh ((a), (b)))
+#define __nds32__ksubw(a, b) \
+  (__builtin_nds32_ksubw ((a), (b)))
+#define __nds32__ksubh(a, b) \
+  (__builtin_nds32_ksubh ((a), (b)))
+#define __nds32__kdmbb(a, b) \
+  (__builtin_nds32_kdmbb ((a), (b)))
+#define __nds32__v_kdmbb(a, b) \
+  (__builtin_nds32_v_kdmbb ((a), (b)))
+#define __nds32__kdmbt(a, b) \
+  (__builtin_nds32_kdmbt ((a), (b)))
+#define __nds32__v_kdmbt(a, b) \
+  (__builtin_nds32_v_kdmbt ((a), (b)))
+#define __nds32__kdmtb(a, b) \
+  (__builtin_nds32_kdmtb ((a), (b)))
+#define __nds32__v_kdmtb(a, b) \
+  (__builtin_nds32_v_kdmtb ((a), (b)))
+#define __nds32__kdmtt(a, b) \
+  (__builtin_nds32_kdmtt ((a), (b)))
+#define __nds32__v_kdmtt(a, b) \
+  (__builtin_nds32_v_kdmtt ((a), (b)))
+#define __nds32__khmbb(a, b) \
+  (__builtin_nds32_khmbb ((a), (b)))
+#define __nds32__v_khmbb(a, b) \
+  (__builtin_nds32_v_khmbb ((a), (b)))
+#define __nds32__khmbt(a, b) \
+  (__builtin_nds32_khmbt ((a), (b)))
+#define __nds32__v_khmbt(a, b) \
+  (__builtin_nds32_v_khmbt ((a), (b)))
+#define __nds32__khmtb(a, b) \
+  (__builtin_nds32_khmtb ((a), (b)))
+#define __nds32__v_khmtb(a, b) \
+  (__builtin_nds32_v_khmtb ((a), (b)))
+#define __nds32__khmtt(a, b) \
+  (__builtin_nds32_khmtt ((a), (b)))
+#define __nds32__v_khmtt(a, b) \
+  (__builtin_nds32_v_khmtt ((a), (b)))
+#define __nds32__kslraw(a, b) \
+  (__builtin_nds32_kslraw ((a), (b)))
+#define __nds32__kslraw_u(a, b) \
+  (__builtin_nds32_kslraw_u ((a), (b)))
+
+#define __nds32__rdov() \
+  (__builtin_nds32_rdov())
+#define __nds32__clrov() \
+  (__builtin_nds32_clrov())
 #define __nds32__gie_dis() \
   (__builtin_nds32_gie_dis())
 #define __nds32__gie_en() \
@@ -720,10 +776,622 @@ enum nds32_dpref
 #define __nds32__get_trig_type(a) \
   (__builtin_nds32_get_trig_type ((a)))
 
+#define __nds32__get_unaligned_hw(a) \
+  (__builtin_nds32_unaligned_load_hw ((a)))
+#define __nds32__get_unaligned_w(a) \
+  (__builtin_nds32_unaligned_load_w ((a)))
+#define __nds32__get_unaligned_dw(a) \
+  (__builtin_nds32_unaligned_load_dw ((a)))
+#define __nds32__put_unaligned_hw(a, data) \
+  (__builtin_nds32_unaligned_store_hw ((a), (data)))
+#define __nds32__put_unaligned_w(a, data) \
+  (__builtin_nds32_unaligned_store_w ((a), (data)))
+#define __nds32__put_unaligned_dw(a, data) \
+  (__builtin_nds32_unaligned_store_dw ((a), (data)))
+
+#define __nds32__add16(a, b) \
+  (__builtin_nds32_add16 ((a), (b)))
+#define __nds32__v_uadd16(a, b) \
+  (__builtin_nds32_v_uadd16 ((a), (b)))
+#define __nds32__v_sadd16(a, b) \
+  (__builtin_nds32_v_sadd16 ((a), (b)))
+#define __nds32__radd16(a, b) \
+  (__builtin_nds32_radd16 ((a), (b)))
+#define __nds32__v_radd16(a, b) \
+  (__builtin_nds32_v_radd16 ((a), (b)))
+#define __nds32__uradd16(a, b) \
+  (__builtin_nds32_uradd16 ((a), (b)))
+#define __nds32__v_uradd16(a, b) \
+  (__builtin_nds32_v_uradd16 ((a), (b)))
+#define __nds32__kadd16(a, b) \
+  (__builtin_nds32_kadd16 ((a), (b)))
+#define __nds32__v_kadd16(a, b) \
+  (__builtin_nds32_v_kadd16 ((a), (b)))
+#define __nds32__ukadd16(a, b) \
+  (__builtin_nds32_ukadd16 ((a), (b)))
+#define __nds32__v_ukadd16(a, b) \
+  (__builtin_nds32_v_ukadd16 ((a), (b)))
+#define __nds32__sub16(a, b) \
+  (__builtin_nds32_sub16 ((a), (b)))
+#define __nds32__v_usub16(a, b) \
+  (__builtin_nds32_v_usub16 ((a), (b)))
+#define __nds32__v_ssub16(a, b) \
+  (__builtin_nds32_v_ssub16 ((a), (b)))
+#define __nds32__rsub16(a, b) \
+  (__builtin_nds32_rsub16 ((a), (b)))
+#define __nds32__v_rsub16(a, b) \
+  (__builtin_nds32_v_rsub16 ((a), (b)))
+#define __nds32__ursub16(a, b) \
+  (__builtin_nds32_ursub16 ((a), (b)))
+#define __nds32__v_ursub16(a, b) \
+  (__builtin_nds32_v_ursub16 ((a), (b)))
+#define __nds32__ksub16(a, b) \
+  (__builtin_nds32_ksub16 ((a), (b)))
+#define __nds32__v_ksub16(a, b) \
+  (__builtin_nds32_v_ksub16 ((a), (b)))
+#define __nds32__uksub16(a, b) \
+  (__builtin_nds32_uksub16 ((a), (b)))
+#define __nds32__v_uksub16(a, b) \
+  (__builtin_nds32_v_uksub16 ((a), (b)))
+#define __nds32__cras16(a, b) \
+  (__builtin_nds32_cras16 ((a), (b)))
+#define __nds32__v_ucras16(a, b) \
+  (__builtin_nds32_v_ucras16 ((a), (b)))
+#define __nds32__v_scras16(a, b) \
+  (__builtin_nds32_v_scras16 ((a), (b)))
+#define __nds32__rcras16(a, b) \
+  (__builtin_nds32_rcras16 ((a), (b)))
+#define __nds32__v_rcras16(a, b) \
+  (__builtin_nds32_v_rcras16 ((a), (b)))
+#define __nds32__urcras16(a, b) \
+  (__builtin_nds32_urcras16 ((a), (b)))
+#define __nds32__v_urcras16(a, b) \
+  (__builtin_nds32_v_urcras16 ((a), (b)))
+#define __nds32__kcras16(a, b) \
+  (__builtin_nds32_kcras16 ((a), (b)))
+#define __nds32__v_kcras16(a, b) \
+  (__builtin_nds32_v_kcras16 ((a), (b)))
+#define __nds32__ukcras16(a, b) \
+  (__builtin_nds32_ukcras16 ((a), (b)))
+#define __nds32__v_ukcras16(a, b) \
+  (__builtin_nds32_v_ukcras16 ((a), (b)))
+#define __nds32__crsa16(a, b) \
+  (__builtin_nds32_crsa16 ((a), (b)))
+#define __nds32__v_ucrsa16(a, b) \
+  (__builtin_nds32_v_ucrsa16 ((a), (b)))
+#define __nds32__v_scrsa16(a, b) \
+  (__builtin_nds32_v_scrsa16 ((a), (b)))
+#define __nds32__rcrsa16(a, b) \
+  (__builtin_nds32_rcrsa16 ((a), (b)))
+#define __nds32__v_rcrsa16(a, b) \
+  (__builtin_nds32_v_rcrsa16 ((a), (b)))
+#define __nds32__urcrsa16(a, b) \
+  (__builtin_nds32_urcrsa16 ((a), (b)))
+#define __nds32__v_urcrsa16(a, b) \
+  (__builtin_nds32_v_urcrsa16 ((a), (b)))
+#define __nds32__kcrsa16(a, b) \
+  (__builtin_nds32_kcrsa16 ((a), (b)))
+#define __nds32__v_kcrsa16(a, b) \
+  (__builtin_nds32_v_kcrsa16 ((a), (b)))
+#define __nds32__ukcrsa16(a, b) \
+  (__builtin_nds32_ukcrsa16 ((a), (b)))
+#define __nds32__v_ukcrsa16(a, b) \
+  (__builtin_nds32_v_ukcrsa16 ((a), (b)))
+
+#define __nds32__add8(a, b) \
+  (__builtin_nds32_add8 ((a), (b)))
+#define __nds32__v_uadd8(a, b) \
+  (__builtin_nds32_v_uadd8 ((a), (b)))
+#define __nds32__v_sadd8(a, b) \
+  (__builtin_nds32_v_sadd8 ((a), (b)))
+#define __nds32__radd8(a, b) \
+  (__builtin_nds32_radd8 ((a), (b)))
+#define __nds32__v_radd8(a, b) \
+  (__builtin_nds32_v_radd8 ((a), (b)))
+#define __nds32__uradd8(a, b) \
+  (__builtin_nds32_uradd8 ((a), (b)))
+#define __nds32__v_uradd8(a, b) \
+  (__builtin_nds32_v_uradd8 ((a), (b)))
+#define __nds32__kadd8(a, b) \
+  (__builtin_nds32_kadd8 ((a), (b)))
+#define __nds32__v_kadd8(a, b) \
+  (__builtin_nds32_v_kadd8 ((a), (b)))
+#define __nds32__ukadd8(a, b) \
+  (__builtin_nds32_ukadd8 ((a), (b)))
+#define __nds32__v_ukadd8(a, b) \
+  (__builtin_nds32_v_ukadd8 ((a), (b)))
+#define __nds32__sub8(a, b) \
+  (__builtin_nds32_sub8 ((a), (b)))
+#define __nds32__v_usub8(a, b) \
+  (__builtin_nds32_v_usub8 ((a), (b)))
+#define __nds32__v_ssub8(a, b) \
+  (__builtin_nds32_v_ssub8 ((a), (b)))
+#define __nds32__rsub8(a, b) \
+  (__builtin_nds32_rsub8 ((a), (b)))
+#define __nds32__v_rsub8(a, b) \
+  (__builtin_nds32_v_rsub8 ((a), (b)))
+#define __nds32__ursub8(a, b) \
+  (__builtin_nds32_ursub8 ((a), (b)))
+#define __nds32__v_ursub8(a, b) \
+  (__builtin_nds32_v_ursub8 ((a), (b)))
+#define __nds32__ksub8(a, b) \
+  (__builtin_nds32_ksub8 ((a), (b)))
+#define __nds32__v_ksub8(a, b) \
+  (__builtin_nds32_v_ksub8 ((a), (b)))
+#define __nds32__uksub8(a, b) \
+  (__builtin_nds32_uksub8 ((a), (b)))
+#define __nds32__v_uksub8(a, b) \
+  (__builtin_nds32_v_uksub8 ((a), (b)))
+
+#define __nds32__sra16(a, b) \
+  (__builtin_nds32_sra16 ((a), (b)))
+#define __nds32__v_sra16(a, b) \
+  (__builtin_nds32_v_sra16 ((a), (b)))
+#define __nds32__sra16_u(a, b) \
+  (__builtin_nds32_sra16_u ((a), (b)))
+#define __nds32__v_sra16_u(a, b) \
+  (__builtin_nds32_v_sra16_u ((a), (b)))
+#define __nds32__srl16(a, b) \
+  (__builtin_nds32_srl16 ((a), (b)))
+#define __nds32__v_srl16(a, b) \
+  (__builtin_nds32_v_srl16 ((a), (b)))
+#define __nds32__srl16_u(a, b) \
+  (__builtin_nds32_srl16_u ((a), (b)))
+#define __nds32__v_srl16_u(a, b) \
+  (__builtin_nds32_v_srl16_u ((a), (b)))
+#define __nds32__sll16(a, b) \
+  (__builtin_nds32_sll16 ((a), (b)))
+#define __nds32__v_sll16(a, b) \
+  (__builtin_nds32_v_sll16 ((a), (b)))
+#define __nds32__ksll16(a, b) \
+  (__builtin_nds32_ksll16 ((a), (b)))
+#define __nds32__v_ksll16(a, b) \
+  (__builtin_nds32_v_ksll16 ((a), (b)))
+#define __nds32__kslra16(a, b) \
+  (__builtin_nds32_kslra16 ((a), (b)))
+#define __nds32__v_kslra16(a, b) \
+  (__builtin_nds32_v_kslra16 ((a), (b)))
+#define __nds32__kslra16_u(a, b) \
+  (__builtin_nds32_kslra16_u ((a), (b)))
+#define __nds32__v_kslra16_u(a, b) \
+  (__builtin_nds32_v_kslra16_u ((a), (b)))
+
+#define __nds32__cmpeq16(a, b) \
+  (__builtin_nds32_cmpeq16 ((a), (b)))
+#define __nds32__v_scmpeq16(a, b) \
+  (__builtin_nds32_v_scmpeq16 ((a), (b)))
+#define __nds32__v_ucmpeq16(a, b) \
+  (__builtin_nds32_v_ucmpeq16 ((a), (b)))
+#define __nds32__scmplt16(a, b) \
+  (__builtin_nds32_scmplt16 ((a), (b)))
+#define __nds32__v_scmplt16(a, b) \
+  (__builtin_nds32_v_scmplt16 ((a), (b)))
+#define __nds32__scmple16(a, b) \
+  (__builtin_nds32_scmple16 ((a), (b)))
+#define __nds32__v_scmple16(a, b) \
+  (__builtin_nds32_v_scmple16 ((a), (b)))
+#define __nds32__ucmplt16(a, b) \
+  (__builtin_nds32_ucmplt16 ((a), (b)))
+#define __nds32__v_ucmplt16(a, b) \
+  (__builtin_nds32_v_ucmplt16 ((a), (b)))
+#define __nds32__ucmple16(a, b) \
+  (__builtin_nds32_ucmple16 ((a), (b)))
+#define __nds32__v_ucmple16(a, b) \
+  (__builtin_nds32_v_ucmple16 ((a), (b)))
+
+#define __nds32__cmpeq8(a, b) \
+  (__builtin_nds32_cmpeq8 ((a), (b)))
+#define __nds32__v_scmpeq8(a, b) \
+  (__builtin_nds32_v_scmpeq8 ((a), (b)))
+#define __nds32__v_ucmpeq8(a, b) \
+  (__builtin_nds32_v_ucmpeq8 ((a), (b)))
+#define __nds32__scmplt8(a, b) \
+  (__builtin_nds32_scmplt8 ((a), (b)))
+#define __nds32__v_scmplt8(a, b) \
+  (__builtin_nds32_v_scmplt8 ((a), (b)))
+#define __nds32__scmple8(a, b) \
+  (__builtin_nds32_scmple8 ((a), (b)))
+#define __nds32__v_scmple8(a, b) \
+  (__builtin_nds32_v_scmple8 ((a), (b)))
+#define __nds32__ucmplt8(a, b) \
+  (__builtin_nds32_ucmplt8 ((a), (b)))
+#define __nds32__v_ucmplt8(a, b) \
+  (__builtin_nds32_v_ucmplt8 ((a), (b)))
+#define __nds32__ucmple8(a, b) \
+  (__builtin_nds32_ucmple8 ((a), (b)))
+#define __nds32__v_ucmple8(a, b) \
+  (__builtin_nds32_v_ucmple8 ((a), (b)))
+
+#define __nds32__smin16(a, b) \
+  (__builtin_nds32_smin16 ((a), (b)))
+#define __nds32__v_smin16(a, b) \
+  (__builtin_nds32_v_smin16 ((a), (b)))
+#define __nds32__umin16(a, b) \
+  (__builtin_nds32_umin16 ((a), (b)))
+#define __nds32__v_umin16(a, b) \
+  (__builtin_nds32_v_umin16 ((a), (b)))
+#define __nds32__smax16(a, b) \
+  (__builtin_nds32_smax16 ((a), (b)))
+#define __nds32__v_smax16(a, b) \
+  (__builtin_nds32_v_smax16 ((a), (b)))
+#define __nds32__umax16(a, b) \
+  (__builtin_nds32_umax16 ((a), (b)))
+#define __nds32__v_umax16(a, b) \
+  (__builtin_nds32_v_umax16 ((a), (b)))
+#define __nds32__sclip16(a, b) \
+  (__builtin_nds32_sclip16 ((a), (b)))
+#define __nds32__v_sclip16(a, b) \
+  (__builtin_nds32_v_sclip16 ((a), (b)))
+#define __nds32__uclip16(a, b) \
+  (__builtin_nds32_uclip16 ((a), (b)))
+#define __nds32__v_uclip16(a, b) \
+  (__builtin_nds32_v_uclip16 ((a), (b)))
+#define __nds32__khm16(a, b) \
+  (__builtin_nds32_khm16 ((a), (b)))
+#define __nds32__v_khm16(a, b) \
+  (__builtin_nds32_v_khm16 ((a), (b)))
+#define __nds32__khmx16(a, b) \
+  (__builtin_nds32_khmx16 ((a), (b)))
+#define __nds32__v_khmx16(a, b) \
+  (__builtin_nds32_v_khmx16 ((a), (b)))
+#define __nds32__kabs16(a) \
+  (__builtin_nds32_kabs16 ((a)))
+#define __nds32__v_kabs16(a) \
+  (__builtin_nds32_v_kabs16 ((a)))
+
+#define __nds32__smin8(a, b) \
+  (__builtin_nds32_smin8 ((a), (b)))
+#define __nds32__v_smin8(a, b) \
+  (__builtin_nds32_v_smin8 ((a), (b)))
+#define __nds32__umin8(a, b) \
+  (__builtin_nds32_umin8 ((a), (b)))
+#define __nds32__v_umin8(a, b) \
+  (__builtin_nds32_v_umin8 ((a), (b)))
+#define __nds32__smax8(a, b) \
+  (__builtin_nds32_smax8 ((a), (b)))
+#define __nds32__v_smax8(a, b) \
+  (__builtin_nds32_v_smax8 ((a), (b)))
+#define __nds32__umax8(a, b) \
+  (__builtin_nds32_umax8 ((a), (b)))
+#define __nds32__v_umax8(a, b) \
+  (__builtin_nds32_v_umax8 ((a), (b)))
+#define __nds32__kabs8(a) \
+  (__builtin_nds32_kabs8 ((a)))
+#define __nds32__v_kabs8(a) \
+  (__builtin_nds32_v_kabs8 ((a)))
+
+#define __nds32__sunpkd810(a) \
+  (__builtin_nds32_sunpkd810 ((a)))
+#define __nds32__v_sunpkd810(a) \
+  (__builtin_nds32_v_sunpkd810 ((a)))
+#define __nds32__sunpkd820(a) \
+  (__builtin_nds32_sunpkd820 ((a)))
+#define __nds32__v_sunpkd820(a) \
+  (__builtin_nds32_v_sunpkd820 ((a)))
+#define __nds32__sunpkd830(a) \
+  (__builtin_nds32_sunpkd830 ((a)))
+#define __nds32__v_sunpkd830(a) \
+  (__builtin_nds32_v_sunpkd830 ((a)))
+#define __nds32__sunpkd831(a) \
+  (__builtin_nds32_sunpkd831 ((a)))
+#define __nds32__v_sunpkd831(a) \
+  (__builtin_nds32_v_sunpkd831 ((a)))
+#define __nds32__zunpkd810(a) \
+  (__builtin_nds32_zunpkd810 ((a)))
+#define __nds32__v_zunpkd810(a) \
+  (__builtin_nds32_v_zunpkd810 ((a)))
+#define __nds32__zunpkd820(a) \
+  (__builtin_nds32_zunpkd820 ((a)))
+#define __nds32__v_zunpkd820(a) \
+  (__builtin_nds32_v_zunpkd820 ((a)))
+#define __nds32__zunpkd830(a) \
+  (__builtin_nds32_zunpkd830 ((a)))
+#define __nds32__v_zunpkd830(a) \
+  (__builtin_nds32_v_zunpkd830 ((a)))
+#define __nds32__zunpkd831(a) \
+  (__builtin_nds32_zunpkd831 ((a)))
+#define __nds32__v_zunpkd831(a) \
+  (__builtin_nds32_v_zunpkd831 ((a)))
+
+#define __nds32__raddw(a, b) \
+  (__builtin_nds32_raddw ((a), (b)))
+#define __nds32__uraddw(a, b) \
+  (__builtin_nds32_uraddw ((a), (b)))
+#define __nds32__rsubw(a, b) \
+  (__builtin_nds32_rsubw ((a), (b)))
+#define __nds32__ursubw(a, b) \
+  (__builtin_nds32_ursubw ((a), (b)))
+
+#define __nds32__sra_u(a, b) \
+  (__builtin_nds32_sra_u ((a), (b)))
+#define __nds32__ksll(a, b) \
+  (__builtin_nds32_ksll ((a), (b)))
+#define __nds32__pkbb16(a, b) \
+  (__builtin_nds32_pkbb16 ((a), (b)))
+#define __nds32__v_pkbb16(a, b) \
+  (__builtin_nds32_v_pkbb16 ((a), (b)))
+#define __nds32__pkbt16(a, b) \
+  (__builtin_nds32_pkbt16 ((a), (b)))
+#define __nds32__v_pkbt16(a, b) \
+  (__builtin_nds32_v_pkbt16 ((a), (b)))
+#define __nds32__pktb16(a, b) \
+  (__builtin_nds32_pktb16 ((a), (b)))
+#define __nds32__v_pktb16(a, b) \
+  (__builtin_nds32_v_pktb16 ((a), (b)))
+#define __nds32__pktt16(a, b) \
+  (__builtin_nds32_pktt16 ((a), (b)))
+#define __nds32__v_pktt16(a, b) \
+  (__builtin_nds32_v_pktt16 ((a), (b)))
+
+#define __nds32__smmul(a, b) \
+  (__builtin_nds32_smmul ((a), (b)))
+#define __nds32__smmul_u(a, b) \
+  (__builtin_nds32_smmul_u ((a), (b)))
+#define __nds32__kmmac(r, a, b) \
+  (__builtin_nds32_kmmac ((r), (a), (b)))
+#define __nds32__kmmac_u(r, a, b) \
+  (__builtin_nds32_kmmac_u ((r), (a), (b)))
+#define __nds32__kmmsb(r, a, b) \
+  (__builtin_nds32_kmmsb ((r), (a), (b)))
+#define __nds32__kmmsb_u(r, a, b) \
+  (__builtin_nds32_kmmsb_u ((r), (a), (b)))
+#define __nds32__kwmmul(a, b) \
+  (__builtin_nds32_kwmmul ((a), (b)))
+#define __nds32__kwmmul_u(a, b) \
+  (__builtin_nds32_kwmmul_u ((a), (b)))
+
+#define __nds32__smmwb(a, b) \
+  (__builtin_nds32_smmwb ((a), (b)))
+#define __nds32__v_smmwb(a, b) \
+  (__builtin_nds32_v_smmwb ((a), (b)))
+#define __nds32__smmwb_u(a, b) \
+  (__builtin_nds32_smmwb_u ((a), (b)))
+#define __nds32__v_smmwb_u(a, b) \
+  (__builtin_nds32_v_smmwb_u ((a), (b)))
+#define __nds32__smmwt(a, b) \
+  (__builtin_nds32_smmwt ((a), (b)))
+#define __nds32__v_smmwt(a, b) \
+  (__builtin_nds32_v_smmwt ((a), (b)))
+#define __nds32__smmwt_u(a, b) \
+  (__builtin_nds32_smmwt_u ((a), (b)))
+#define __nds32__v_smmwt_u(a, b) \
+  (__builtin_nds32_v_smmwt_u ((a), (b)))
+#define __nds32__kmmawb(r, a, b) \
+  (__builtin_nds32_kmmawb ((r), (a), (b)))
+#define __nds32__v_kmmawb(r, a, b) \
+  (__builtin_nds32_v_kmmawb ((r), (a), (b)))
+#define __nds32__kmmawb_u(r, a, b) \
+  (__builtin_nds32_kmmawb_u ((r), (a), (b)))
+#define __nds32__v_kmmawb_u(r, a, b) \
+  (__builtin_nds32_v_kmmawb_u ((r), (a), (b)))
+#define __nds32__kmmawt(r, a, b) \
+  (__builtin_nds32_kmmawt ((r), (a), (b)))
+#define __nds32__v_kmmawt(r, a, b) \
+  (__builtin_nds32_v_kmmawt ((r), (a), (b)))
+#define __nds32__kmmawt_u(r, a, b) \
+  (__builtin_nds32_kmmawt_u ((r), (a), (b)))
+#define __nds32__v_kmmawt_u(r, a, b) \
+  (__builtin_nds32_v_kmmawt_u ((r), (a), (b)))
+
+#define __nds32__smbb(a, b) \
+  (__builtin_nds32_smbb ((a), (b)))
+#define __nds32__v_smbb(a, b) \
+  (__builtin_nds32_v_smbb ((a), (b)))
+#define __nds32__smbt(a, b) \
+  (__builtin_nds32_smbt ((a), (b)))
+#define __nds32__v_smbt(a, b) \
+  (__builtin_nds32_v_smbt ((a), (b)))
+#define __nds32__smtt(a, b) \
+  (__builtin_nds32_smtt ((a), (b)))
+#define __nds32__v_smtt(a, b) \
+  (__builtin_nds32_v_smtt ((a), (b)))
+#define __nds32__kmda(a, b) \
+  (__builtin_nds32_kmda ((a), (b)))
+#define __nds32__v_kmda(a, b) \
+  (__builtin_nds32_v_kmda ((a), (b)))
+#define __nds32__kmxda(a, b) \
+  (__builtin_nds32_kmxda ((a), (b)))
+#define __nds32__v_kmxda(a, b) \
+  (__builtin_nds32_v_kmxda ((a), (b)))
+#define __nds32__smds(a, b) \
+  (__builtin_nds32_smds ((a), (b)))
+#define __nds32__v_smds(a, b) \
+  (__builtin_nds32_v_smds ((a), (b)))
+#define __nds32__smdrs(a, b) \
+  (__builtin_nds32_smdrs ((a), (b)))
+#define __nds32__v_smdrs(a, b) \
+  (__builtin_nds32_v_smdrs ((a), (b)))
+#define __nds32__smxds(a, b) \
+  (__builtin_nds32_smxds ((a), (b)))
+#define __nds32__v_smxds(a, b) \
+  (__builtin_nds32_v_smxds ((a), (b)))
+#define __nds32__kmabb(r, a, b) \
+  (__builtin_nds32_kmabb ((r), (a), (b)))
+#define __nds32__v_kmabb(r, a, b) \
+  (__builtin_nds32_v_kmabb ((r), (a), (b)))
+#define __nds32__kmabt(r, a, b) \
+  (__builtin_nds32_kmabt ((r), (a), (b)))
+#define __nds32__v_kmabt(r, a, b) \
+  (__builtin_nds32_v_kmabt ((r), (a), (b)))
+#define __nds32__kmatt(r, a, b) \
+  (__builtin_nds32_kmatt ((r), (a), (b)))
+#define __nds32__v_kmatt(r, a, b) \
+  (__builtin_nds32_v_kmatt ((r), (a), (b)))
+#define __nds32__kmada(r, a, b) \
+  (__builtin_nds32_kmada ((r), (a), (b)))
+#define __nds32__v_kmada(r, a, b) \
+  (__builtin_nds32_v_kmada ((r), (a), (b)))
+#define __nds32__kmaxda(r, a, b) \
+  (__builtin_nds32_kmaxda ((r), (a), (b)))
+#define __nds32__v_kmaxda(r, a, b) \
+  (__builtin_nds32_v_kmaxda ((r), (a), (b)))
+#define __nds32__kmads(r, a, b) \
+  (__builtin_nds32_kmads ((r), (a), (b)))
+#define __nds32__v_kmads(r, a, b) \
+  (__builtin_nds32_v_kmads ((r), (a), (b)))
+#define __nds32__kmadrs(r, a, b) \
+  (__builtin_nds32_kmadrs ((r), (a), (b)))
+#define __nds32__v_kmadrs(r, a, b) \
+  (__builtin_nds32_v_kmadrs ((r), (a), (b)))
+#define __nds32__kmaxds(r, a, b) \
+  (__builtin_nds32_kmaxds ((r), (a), (b)))
+#define __nds32__v_kmaxds(r, a, b) \
+  (__builtin_nds32_v_kmaxds ((r), (a), (b)))
+#define __nds32__kmsda(r, a, b) \
+  (__builtin_nds32_kmsda ((r), (a), (b)))
+#define __nds32__v_kmsda(r, a, b) \
+  (__builtin_nds32_v_kmsda ((r), (a), (b)))
+#define __nds32__kmsxda(r, a, b) \
+  (__builtin_nds32_kmsxda ((r), (a), (b)))
+#define __nds32__v_kmsxda(r, a, b) \
+  (__builtin_nds32_v_kmsxda ((r), (a), (b)))
+
+#define __nds32__smal(a, b) \
+  (__builtin_nds32_smal ((a), (b)))
+#define __nds32__v_smal(a, b) \
+  (__builtin_nds32_v_smal ((a), (b)))
+
+#define __nds32__bitrev(a, b) \
+  (__builtin_nds32_bitrev ((a), (b)))
+#define __nds32__wext(a, b) \
+  (__builtin_nds32_wext ((a), (b)))
+#define __nds32__bpick(r, a, b) \
+  (__builtin_nds32_bpick ((r), (a), (b)))
+#define __nds32__insb(r, a, b) \
+  (__builtin_nds32_insb ((r), (a), (b)))
+
+#define __nds32__sadd64(a, b) \
+  (__builtin_nds32_sadd64 ((a), (b)))
+#define __nds32__uadd64(a, b) \
+  (__builtin_nds32_uadd64 ((a), (b)))
+#define __nds32__radd64(a, b) \
+  (__builtin_nds32_radd64 ((a), (b)))
+#define __nds32__uradd64(a, b) \
+  (__builtin_nds32_uradd64 ((a), (b)))
+#define __nds32__kadd64(a, b) \
+  (__builtin_nds32_kadd64 ((a), (b)))
+#define __nds32__ukadd64(a, b) \
+  (__builtin_nds32_ukadd64 ((a), (b)))
+#define __nds32__ssub64(a, b) \
+  (__builtin_nds32_ssub64 ((a), (b)))
+#define __nds32__usub64(a, b) \
+  (__builtin_nds32_usub64 ((a), (b)))
+#define __nds32__rsub64(a, b) \
+  (__builtin_nds32_rsub64 ((a), (b)))
+#define __nds32__ursub64(a, b) \
+  (__builtin_nds32_ursub64 ((a), (b)))
+#define __nds32__ksub64(a, b) \
+  (__builtin_nds32_ksub64 ((a), (b)))
+#define __nds32__uksub64(a, b) \
+  (__builtin_nds32_uksub64 ((a), (b)))
+
+#define __nds32__smar64(r, a, b) \
+  (__builtin_nds32_smar64 ((r), (a), (b)))
+#define __nds32__smsr64(r, a, b) \
+  (__builtin_nds32_smsr64 ((r), (a), (b)))
+#define __nds32__umar64(r, a, b) \
+  (__builtin_nds32_umar64 ((r), (a), (b)))
+#define __nds32__umsr64(r, a, b) \
+  (__builtin_nds32_umsr64 ((r), (a), (b)))
+#define __nds32__kmar64(r, a, b) \
+  (__builtin_nds32_kmar64 ((r), (a), (b)))
+#define __nds32__kmsr64(r, a, b) \
+  (__builtin_nds32_kmsr64 ((r), (a), (b)))
+#define __nds32__ukmar64(r, a, b) \
+  (__builtin_nds32_ukmar64 ((r), (a), (b)))
+#define __nds32__ukmsr64(r, a, b) \
+  (__builtin_nds32_ukmsr64 ((r), (a), (b)))
+
+#define __nds32__smalbb(r, a, b) \
+  (__builtin_nds32_smalbb ((r), (a), (b)))
+#define __nds32__v_smalbb(r, a, b) \
+  (__builtin_nds32_v_smalbb ((r), (a), (b)))
+#define __nds32__smalbt(r, a, b) \
+  (__builtin_nds32_smalbt ((r), (a), (b)))
+#define __nds32__v_smalbt(r, a, b) \
+  (__builtin_nds32_v_smalbt ((r), (a), (b)))
+#define __nds32__smaltt(r, a, b) \
+  (__builtin_nds32_smaltt ((r), (a), (b)))
+#define __nds32__v_smaltt(r, a, b) \
+  (__builtin_nds32_v_smaltt ((r), (a), (b)))
+#define __nds32__smalda(r, a, b) \
+  (__builtin_nds32_smalda ((r), (a), (b)))
+#define __nds32__v_smalda(r, a, b) \
+  (__builtin_nds32_v_smalda ((r), (a), (b)))
+#define __nds32__smalxda(r, a, b) \
+  (__builtin_nds32_smalxda ((r), (a), (b)))
+#define __nds32__v_smalxda(r, a, b) \
+  (__builtin_nds32_v_smalxda ((r), (a), (b)))
+#define __nds32__smalds(r, a, b) \
+  (__builtin_nds32_smalds ((r), (a), (b)))
+#define __nds32__v_smalds(r, a, b) \
+  (__builtin_nds32_v_smalds ((r), (a), (b)))
+#define __nds32__smaldrs(r, a, b) \
+  (__builtin_nds32_smaldrs ((r), (a), (b)))
+#define __nds32__v_smaldrs(r, a, b) \
+  (__builtin_nds32_v_smaldrs ((r), (a), (b)))
+#define __nds32__smalxds(r, a, b) \
+  (__builtin_nds32_smalxds ((r), (a), (b)))
+#define __nds32__v_smalxds(r, a, b) \
+  (__builtin_nds32_v_smalxds ((r), (a), (b)))
+#define __nds32__smslda(r, a, b) \
+  (__builtin_nds32_smslda ((r), (a), (b)))
+#define __nds32__v_smslda(r, a, b) \
+  (__builtin_nds32_v_smslda ((r), (a), (b)))
+#define __nds32__smslxda(r, a, b) \
+  (__builtin_nds32_smslxda ((r), (a), (b)))
+#define __nds32__v_smslxda(r, a, b) \
+  (__builtin_nds32_v_smslxda ((r), (a), (b)))
+
+#define __nds32__smul16(a, b) \
+  (__builtin_nds32_smul16 ((a), (b)))
+#define __nds32__v_smul16(a, b) \
+  (__builtin_nds32_v_smul16 ((a), (b)))
+#define __nds32__smulx16(a, b) \
+  (__builtin_nds32_smulx16 ((a), (b)))
+#define __nds32__v_smulx16(a, b) \
+  (__builtin_nds32_v_smulx16 ((a), (b)))
+#define __nds32__umul16(a, b) \
+  (__builtin_nds32_umul16 ((a), (b)))
+#define __nds32__v_umul16(a, b) \
+  (__builtin_nds32_v_umul16 ((a), (b)))
+#define __nds32__umulx16(a, b) \
+  (__builtin_nds32_umulx16 ((a), (b)))
+#define __nds32__v_umulx16(a, b) \
+  (__builtin_nds32_v_umulx16 ((a), (b)))
+
+#define __nds32__uclip32(a, imm) \
+  (__builtin_nds32_uclip32 ((a), (imm)))
+#define __nds32__sclip32(a, imm) \
+  (__builtin_nds32_sclip32 ((a), (imm)))
+#define __nds32__kabs(a) \
+  (__builtin_nds32_kabs ((a)))
+
 #define __nds32__unaligned_feature() \
   (__builtin_nds32_unaligned_feature())
 #define __nds32__enable_unaligned() \
   (__builtin_nds32_enable_unaligned())
 #define __nds32__disable_unaligned() \
   (__builtin_nds32_disable_unaligned())
+
+#define __nds32__get_unaligned_u16x2(a) \
+  (__builtin_nds32_get_unaligned_u16x2 ((a)))
+#define __nds32__get_unaligned_s16x2(a) \
+  (__builtin_nds32_get_unaligned_s16x2 ((a)))
+#define __nds32__get_unaligned_u8x4(a) \
+  (__builtin_nds32_get_unaligned_u8x4 ((a)))
+#define __nds32__get_unaligned_s8x4(a) \
+  (__builtin_nds32_get_unaligned_s8x4 ((a)))
+
+#define __nds32__put_unaligned_u16x2(a, data) \
+  (__builtin_nds32_put_unaligned_u16x2 ((a), (data)))
+#define __nds32__put_unaligned_s16x2(a, data) \
+  (__builtin_nds32_put_unaligned_s16x2 ((a), (data)))
+#define __nds32__put_unaligned_u8x4(a, data) \
+  (__builtin_nds32_put_unaligned_u8x4 ((a), (data)))
+#define __nds32__put_unaligned_s8x4(a, data) \
+  (__builtin_nds32_put_unaligned_s8x4 ((a), (data)))
+
+#define NDS32ATTR_SIGNATURE              __attribute__((signature))
+
 #endif /* nds32_intrinsic.h */
index 9eb8468..e5f7ba4 100644 (file)
        (and (match_operand 0 "const_int_operand")
            (match_test "satisfies_constraint_Is11 (op)"))))
 
+(define_predicate "nds32_imm_0_1_operand"
+  (and (match_operand 0 "const_int_operand")
+       (ior (match_test "satisfies_constraint_Iv00 (op)")
+           (match_test "satisfies_constraint_Iv01 (op)"))))
+
+(define_predicate "nds32_imm_1_2_operand"
+  (and (match_operand 0 "const_int_operand")
+       (ior (match_test "satisfies_constraint_Iv01 (op)")
+           (match_test "satisfies_constraint_Iv02 (op)"))))
+
+(define_predicate "nds32_imm_1_2_4_8_operand"
+  (and (match_operand 0 "const_int_operand")
+       (ior (ior (match_test "satisfies_constraint_Iv01 (op)")
+                (match_test "satisfies_constraint_Iv02 (op)"))
+           (ior (match_test "satisfies_constraint_Iv04 (op)")
+                (match_test "satisfies_constraint_Iv08 (op)")))))
+
+(define_predicate "nds32_imm2u_operand"
+  (and (match_operand 0 "const_int_operand")
+       (match_test "satisfies_constraint_Iu02 (op)")))
+
+(define_predicate "nds32_imm4u_operand"
+  (and (match_operand 0 "const_int_operand")
+       (match_test "satisfies_constraint_Iu04 (op)")))
+
 (define_predicate "nds32_imm5u_operand"
   (and (match_operand 0 "const_int_operand")
        (match_test "satisfies_constraint_Iu05 (op)")))
 
+(define_predicate "nds32_imm6u_operand"
+  (and (match_operand 0 "const_int_operand")
+       (match_test "satisfies_constraint_Iu06 (op)")))
+
+(define_predicate "nds32_rimm4u_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "nds32_imm4u_operand")))
+
 (define_predicate "nds32_rimm5u_operand"
   (ior (match_operand 0 "register_operand")
        (match_operand 0 "nds32_imm5u_operand")))
 
+(define_predicate "nds32_rimm6u_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "nds32_imm6u_operand")))
+
 (define_predicate "nds32_move_operand"
   (and (match_operand 0 "general_operand")
        (not (match_code "high,const,symbol_ref,label_ref")))
   return true;
 })
 
+(define_predicate "nds32_vmove_operand"
+  (and (match_operand 0 "general_operand")
+       (not (match_code "high,const,symbol_ref,label_ref")))
+{
+  /* If the constant op does NOT satisfy Is20 nor Ihig,
+     we can not perform move behavior by a single instruction.  */
+  if (GET_CODE (op) == CONST_VECTOR
+      && !satisfies_constraint_CVs2 (op)
+      && !satisfies_constraint_CVhi (op))
+    return false;
+
+  return true;
+})
+
 (define_predicate "nds32_and_operand"
   (match_operand 0 "nds32_reg_constant_operand")
 {
   (ior (match_operand 0 "nds32_symbolic_operand")
        (match_operand 0 "nds32_general_register_operand")))
 
+(define_predicate "nds32_insv_operand"
+  (match_code "const_int")
+{
+  return INTVAL (op) == 0
+        || INTVAL (op) == 8
+        || INTVAL (op) == 16
+        || INTVAL (op) == 24;
+})
+
 (define_predicate "nds32_lmw_smw_base_operand"
   (and (match_code "mem")
        (match_test "nds32_valid_smw_lwm_base_p (op)")))