buildgcc: Integrate nds32 update from Andes Technology

This patch has been provided by Mentor Chih-Chyang Chang on behalf of Andes Technology. It fixes using the coreboot toolchain to compile the Chrome EC code base on the ITE8320 embedded controller. The new patch incorporates a fix for the issue previously fixed by patches/gcc-6.3.0_nds32.patch, so that patch can be removed. patches/gcc-6.3.0_riscv.patch needs to be slightly adjusted to still apply cleanly (configure scripts only). Change-Id: I0033888360f13ba951b692b3242aab6697ca61b3 Signed-off-by: Stefan Reinauer <stefan.reinauer@coreboot.org> Reviewed-on: https://review.coreboot.org/20901 Tested-by: build bot (Jenkins) <no-reply@coreboot.org> Reviewed-by: Patrick Georgi <pgeorgi@google.com>
author: Stefan Reinauer <stefan.reinauer@coreboot.org> 2017-08-07 15:27:15 -0700
committer: Patrick Georgi <pgeorgi@google.com> 2017-09-01 12:32:36 +0000
commit: f3e23a313558b1e9e913878d7a638ff32321a4b3 (patch)
tree: 12064c039d78bcb9e7f4bab4c986d533a7659b81 /util/crossgcc/patches
parent: d37ebddfd84699464d076642f35fce0ef21cd1d5 (diff)
download: coreboot-f3e23a313558b1e9e913878d7a638ff32321a4b3.tar.xz
3 files changed, 73401 insertions, 21 deletions
diff --git a/util/crossgcc/patches/gcc-6.3.0_nds32.patch b/util/crossgcc/patches/gcc-6.3.0_nds32.patch
deleted file mode 100644
index cdfb02f351..0000000000
--- a/util/crossgcc/patches/gcc-6.3.0_nds32.patch
+++ /dev/null
@@ -1,17 +0,0 @@
-diff -urN gcc-6.1.0.orig/gcc/config/nds32/nds32.md gcc-6.1.0/gcc/config/nds32/nds32.md
---- gcc-6.1.0.orig/gcc/config/nds32/nds32.md	2015-01-15 22:45:09.000000000 -0800
-+++ gcc-6.1.0/gcc/config/nds32/nds32.md	2016-04-14 22:09:09.000000000 -0700
-@@ -2289,11 +2289,11 @@
-   emit_jump_insn (gen_cbranchsi4 (test, operands[0], operands[2],
- 				  operands[4]));
- 
--  operands[5] = gen_reg_rtx (SImode);
-+  rtx tmp = gen_reg_rtx (SImode);
-   /* Step C, D, E, and F, using another temporary register operands[5].  */
-   emit_jump_insn (gen_casesi_internal (operands[0],
- 				       operands[3],
--				       operands[5]));
-+				       tmp));
-   DONE;
- })
- 
diff --git a/util/crossgcc/patches/gcc-6.3.0_nds32_ite.patch b/util/crossgcc/patches/gcc-6.3.0_nds32_ite.patch
new file mode 100644
index 0000000000..50e39691b6
--- /dev/null
+++ b/util/crossgcc/patches/gcc-6.3.0_nds32_ite.patch
@@ -0,0 +1,73397 @@
+diff --git a/gcc/common.opt b/gcc/common.opt
+index 67048db..e6f8fd3 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -1281,7 +1281,7 @@ ffast-math
+ Common
+ 
+ ffat-lto-objects
+-Common Var(flag_fat_lto_objects)
++Common Var(flag_fat_lto_objects) Init(1)
+ Output lto objects containing both the intermediate language and binary output.
+ 
+ ffinite-math-only
+diff --git a/gcc/common/config/nds32/nds32-common.c b/gcc/common/config/nds32/nds32-common.c
+index fb75956..66ea95c 100644
+--- a/gcc/common/config/nds32/nds32-common.c
++++ b/gcc/common/config/nds32/nds32-common.c
+@@ -53,6 +53,16 @@ nds32_handle_option (struct gcc_options *opts ATTRIBUTE_UNUSED,
+ 
+       return true;
+ 
++    case OPT_misr_secure_:
++      /* Check the valid security level: 0 1 2 3.  */
++      if (value < 0 || value > 3)
++	{
++	  error_at (loc, "for the option -misr-secure=X, the valid X "
++			 "must be: 0, 1, 2, or 3");
++	  return false;
++	}
++      return true;
++
+     case OPT_mcache_block_size_:
+       /* Check valid value: 4 8 16 32 64 128 256 512.  */
+       if (exact_log2 (value) < 2 || exact_log2 (value) > 9)
+@@ -74,15 +84,69 @@ nds32_handle_option (struct gcc_options *opts ATTRIBUTE_UNUSED,
+ /* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
+ static const struct default_options nds32_option_optimization_table[] =
+ {
+-  /* Enable -fomit-frame-pointer by default at -O1 or higher.  */
+-  { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
++#ifdef TARGET_DEFAULT_NO_MATH_ERRNO
++  /* Under some configuration, we would like to use -fno-math-errno by default
++     at all optimization levels for performance and code size consideration.
++     Please check gcc/config.gcc for more implementation details.  */
++  { OPT_LEVELS_ALL,               OPT_fmath_errno,         NULL, 0 },
++#endif
++#if TARGET_LINUX_ABI == 0
++  /* Disable -fdelete-null-pointer-checks by default in ELF toolchain.  */
++  { OPT_LEVELS_ALL,               OPT_fdelete_null_pointer_checks,
++							   NULL, 0 },
++#endif
++  /* Enable -fsched-pressure by default at -O1 and above.  */
++  { OPT_LEVELS_1_PLUS,            OPT_fsched_pressure,     NULL, 1 },
++  /* Enable -fomit-frame-pointer by default at all optimization levels.  */
++  { OPT_LEVELS_ALL,               OPT_fomit_frame_pointer, NULL, 1 },
++  /* Enable -mrelax-hint by default at all optimization levels.  */
++  { OPT_LEVELS_ALL,               OPT_mrelax_hint,         NULL, 1 },
++  /* Enable -mabi-compatible by default at all optimization levels.  */
++  { OPT_LEVELS_ALL,               OPT_mabi_compatible,     NULL, 1 },
++  /* Enalbe -malways-align by default at -O1 and above, but not -Os or -Og.  */
++  { OPT_LEVELS_1_PLUS_SPEED_ONLY, OPT_malways_align,       NULL, 1 },
+   /* Enable -mv3push by default at -Os, but it is useless under V2 ISA.  */
+-  { OPT_LEVELS_SIZE,   OPT_mv3push,             NULL, 1 },
+-
+-  { OPT_LEVELS_NONE,   0,                       NULL, 0 }
++  { OPT_LEVELS_SIZE,              OPT_mv3push,             NULL, 1 },
++  /* Enable -mload-store-opt by default at -Os.  */
++  { OPT_LEVELS_SIZE,              OPT_mload_store_opt,     NULL, 1 },
++  /* Enable -mregrename by default at -O1 and above.  */
++  { OPT_LEVELS_1_PLUS,            OPT_mregrename,          NULL, 1 },
++  /* Enable -mgcse by default at -O1 and above.  */
++  { OPT_LEVELS_1_PLUS,            OPT_mgcse,               NULL, 1 },
++  /* Enable -msign-conversion by default at -O1 and above.  */
++  { OPT_LEVELS_1_PLUS,            OPT_msign_conversion,    NULL, 1 },
++  /* Enable -mscalbn-transform by default at -O1 and above.  */
++  { OPT_LEVELS_1_PLUS,            OPT_mscalbn_transform,   NULL, 1 },
++  /* Enable -mconst_remeterialization by default at -O1 and above.  */
++  { OPT_LEVELS_1_PLUS,            OPT_mconst_remater, NULL, 1 },
++  /* Enable -mcprop-acc by default at -O1 and above.  */
++  { OPT_LEVELS_1_PLUS,            OPT_mcprop_acc,   NULL, 1 },
++#ifdef TARGET_OS_DEFAULT_IFC
++  /* Enable -mifc by default at -Os, but it is useless under V2/V3M ISA.  */
++  { OPT_LEVELS_SIZE,              OPT_mifc,                NULL, 1 },
++#endif
++#ifdef TARGET_OS_DEFAULT_EX9
++  /* Enable -mex9 by default at -Os, but it is useless under V2/V3M ISA.  */
++  { OPT_LEVELS_SIZE,              OPT_mex9,                NULL, 1 },
++#endif
++
++  { OPT_LEVELS_NONE,              0,                       NULL, 0 }
+ };
+ 
+ /* ------------------------------------------------------------------------ */
++
++/* Implement TARGET_EXCEPT_UNWIND_INFO.  */
++static enum unwind_info_type
++nds32_except_unwind_info (struct gcc_options *opts ATTRIBUTE_UNUSED)
++{
++  if (TARGET_LINUX_ABI)
++    return UI_DWARF2;
++
++  return UI_SJLJ;
++}
++
++/* ------------------------------------------------------------------------ */
++
+ 
+ /* Run-time Target Specification.  */
+ 
+@@ -95,14 +159,22 @@ static const struct default_options nds32_option_optimization_table[] =
+ 
+    Other MASK_XXX flags are set individually.
+    By default we enable
+-     TARGET_16_BIT   : Generate 16/32 bit mixed length instruction.
+-     TARGET_PERF_EXT : Generate performance extention instrcution.
+-     TARGET_CMOV     : Generate conditional move instruction.  */
++     TARGET_16_BIT     : Generate 16/32 bit mixed length instruction.
++     TARGET_EXT_PERF   : Generate performance extention instrcution.
++     TARGET_EXT_PERF2  : Generate performance extention version 2 instrcution.
++     TARGET_EXT_STRING : Generate string extention instrcution.
++     TARGET_HW_ABS     : Generate hardware abs instruction.
++     TARGET_CMOV       : Generate conditional move instruction.  */
+ #undef TARGET_DEFAULT_TARGET_FLAGS
+ #define TARGET_DEFAULT_TARGET_FLAGS		\
+   (TARGET_CPU_DEFAULT				\
++   | TARGET_DEFAULT_FPU_ISA			\
++   | TARGET_DEFAULT_FPU_FMA			\
+    | MASK_16_BIT				\
+-   | MASK_PERF_EXT				\
++   | MASK_EXT_PERF				\
++   | MASK_EXT_PERF2				\
++   | MASK_EXT_STRING				\
++   | MASK_HW_ABS				\
+    | MASK_CMOV)
+ 
+ #undef TARGET_HANDLE_OPTION
+@@ -115,7 +187,7 @@ static const struct default_options nds32_option_optimization_table[] =
+ /* Defining the Output Assembler Language.  */
+ 
+ #undef TARGET_EXCEPT_UNWIND_INFO
+-#define TARGET_EXCEPT_UNWIND_INFO sjlj_except_unwind_info
++#define TARGET_EXCEPT_UNWIND_INFO nds32_except_unwind_info
+ 
+ /* ------------------------------------------------------------------------ */
+ 
+diff --git a/gcc/config.gcc b/gcc/config.gcc
+index 1d5b23f..367a821 100644
+--- a/gcc/config.gcc
++++ b/gcc/config.gcc
+@@ -433,8 +433,28 @@ mips*-*-*)
+ 	;;
+ nds32*)
+ 	cpu_type=nds32
+-	extra_headers="nds32_intrinsic.h"
+-	extra_objs="nds32-cost.o nds32-intrinsic.o nds32-isr.o nds32-md-auxiliary.o nds32-pipelines-auxiliary.o nds32-predicates.o nds32-memory-manipulation.o nds32-fp-as-gp.o"
++	extra_headers="nds32_intrinsic.h nds32_isr.h nds32_init.inc"
++	case ${target} in
++	  nds32*-*-linux*)
++	    extra_options="${extra_options} nds32/nds32-linux.opt"
++	    ;;
++	  nds32*-*-elf*)
++	    extra_options="${extra_options} nds32/nds32-elf.opt"
++	    ;;
++	  *)
++	    ;;
++	esac
++	extra_options="${extra_options} g.opt"
++	extra_objs="nds32-cost.o nds32-intrinsic.o nds32-md-auxiliary.o \
++		    nds32-pipelines-auxiliary.o nds32-predicates.o \
++		    nds32-memory-manipulation.o nds32-fp-as-gp.o \
++		    nds32-load-store-opt.o nds32-soft-fp-comm.o nds32-isr.o \
++		    nds32-regrename.o nds32-gcse.o nds32-relax-opt.o \
++		    nds32-sign-conversion.o \
++		    nds32-scalbn-transform.o nds32-lmwsmw.o \
++		    nds32-reg-utils.o nds32-const-remater.o \
++		    nds32-utils.o nds32-abi-compatible.o \
++		    nds32-cprop-acc.o"
+ 	;;
+ nios2-*-*)
+ 	cpu_type=nios2
+@@ -2265,17 +2285,67 @@ msp430*-*-*)
+ 	tmake_file="${tmake_file} msp430/t-msp430"
+ 	extra_gcc_objs="driver-msp430.o"
+ 	;;
+-nds32le-*-*)
++nds32*-*-*)
+ 	target_cpu_default="0"
+ 	tm_defines="${tm_defines}"
+-	tm_file="dbxelf.h elfos.h newlib-stdint.h ${tm_file}"
+-	tmake_file="nds32/t-nds32 nds32/t-mlibs"
+-	;;
+-nds32be-*-*)
+-	target_cpu_default="0|MASK_BIG_ENDIAN"
+-	tm_defines="${tm_defines} TARGET_BIG_ENDIAN_DEFAULT=1"
+-	tm_file="dbxelf.h elfos.h newlib-stdint.h ${tm_file}"
+-	tmake_file="nds32/t-nds32 nds32/t-mlibs"
++	case ${target} in
++	  nds32le*-*-*)
++	    ;;
++	  nds32be-*-*)
++	    target_cpu_default="${target_cpu_default}|MASK_BIG_ENDIAN"
++	    tm_defines="${tm_defines} TARGET_BIG_ENDIAN_DEFAULT=1"
++	    ;;
++	esac
++	case ${target} in
++	  nds32*-*-elf*)
++	    tm_file="dbxelf.h elfos.h newlib-stdint.h ${tm_file} nds32/elf.h nds32/nds32_intrinsic.h"
++	    tmake_file="nds32/t-nds32 nds32/t-elf"
++	    ;;
++	  nds32*-*-linux*)
++	    tm_file="dbxelf.h elfos.h ${tm_file} gnu-user.h linux.h glibc-stdint.h nds32/linux.h nds32/nds32_intrinsic.h"
++	    tmake_file="${tmake_file} nds32/t-nds32 nds32/t-linux"
++	    ;;
++	esac
++	nds32_multilibs="${with_multilib_list}"
++	if test "$nds32_multilibs" = "default"; then
++	  nds32_multilibs=""
++	fi
++	nds32_multilibs=`echo $nds32_multilibs | sed -e 's/,/ /g'`
++	for nds32_multilib in ${nds32_multilibs}; do
++		case ${nds32_multilib} in
++		dsp | zol | v3m+ | graywolf )
++			TM_MULTILIB_CONFIG="${TM_MULTILIB_CONFIG} ${nds32_multilib}"
++			;;
++		*)
++			echo "--with-multilib-list=${nds32_multilib} not supported."
++			exit 1
++		esac
++	done
++
++	# Handle --enable-default-relax setting.
++	if test x${enable_default_relax} = xyes; then
++		tm_defines="${tm_defines} TARGET_DEFAULT_RELAX=1"
++	fi
++	# Handle --enable-Os-default-ifc setting.
++	if test x${enable_Os_default_ifc} = xyes; then
++		tm_defines="${tm_defines} TARGET_OS_DEFAULT_IFC=1"
++	fi
++	# Handle --enable-Os-default-ex9 setting.
++	if test x${enable_Os_default_ex9} = xyes; then
++		tm_defines="${tm_defines} TARGET_OS_DEFAULT_EX9=1"
++	fi
++	# Handle --with-ext-dsp
++	if test x${with_ext_dsp} = xyes; then
++		tm_defines="${tm_defines} TARGET_DEFAULT_EXT_DSP=1"
++	fi
++	if test x${with_ext_zol} = xyes; then
++		tm_defines="${tm_defines} TARGET_DEFAULT_HWLOOP=1"
++	fi
++	# Handle --with-16bit-ext, and default is on
++	if test x${with_ext_16bit} != xno; then
++		tm_defines="${tm_defines} TARGET_DEFAULT_16BIT=1"
++	fi
++
+ 	;;
+ nios2-*-*)
+ 	tm_file="elfos.h ${tm_file}"
+@@ -4097,15 +4167,51 @@ case "${target}" in
+ 		;;
+ 
+ 	nds32*-*-*)
+-		supported_defaults="arch nds32_lib"
++		supported_defaults="arch cpu nds32_lib float fpu_config memory_model"
+ 
+ 		# process --with-arch
+ 		case "${with_arch}" in
+-		"" | v2 | v3 | v3m)
++		"" | v3 | v3j)
++			# OK
++			tm_defines="${tm_defines} TARGET_ARCH_DEFAULT=0"
++			tm_defines="${tm_defines} TARGET_DEFAULT_ISR_VECTOR_SIZE=4"
++			;;
++		v2 | v2j | v3m)
++			# OK
++			tm_defines="${tm_defines} TARGET_ARCH_DEFAULT=0"
++			tm_defines="${tm_defines} TARGET_DEFAULT_ISR_VECTOR_SIZE=16"
++			;;
++		v3f)
++			tm_defines="${tm_defines} TARGET_ARCH_DEFAULT=1"
++			tm_defines="${tm_defines} TARGET_DEFAULT_ISR_VECTOR_SIZE=4"
++			;;
++		v3s)
++			tm_defines="${tm_defines} TARGET_ARCH_DEFAULT=2"
++			tm_defines="${tm_defines} TARGET_DEFAULT_ISR_VECTOR_SIZE=4"
++			;;
++		*)
++			echo "Cannot accept --with-arch=$with_arch, available values are: v2 v2j v3 v3j v3m v3f v3s" 1>&2
++			exit 1
++			;;
++		esac
++
++		# process --with-memory-model
++		case "${with_memory_model}" in
++		"" | fast | slow)
++			;;
++		*)
++			echo "Cannot accept --with-memory-model=$with_memory_model, available values are: fast slow" 1>&2
++			exit 1
++			;;
++		esac
++
++		# process --with-cpu
++		case "${with_cpu}" in
++		"" | n7 | n8 | e8 | s8 | n9 | n10 | d10 | graywolf | n12 | n13 | panther)
+ 			# OK
+ 			;;
+ 		*)
+-			echo "Cannot accept --with-arch=$with_arch, available values are: v2 v3 v3m" 1>&2
++			echo "Cannot accept --with-cpu=$with_cpu, available values are: n7 n8 e8 s8 n9 n10 d10 graywolf n12 n13 panther" 1>&2
+ 			exit 1
+ 			;;
+ 		esac
+@@ -4115,31 +4221,56 @@ case "${target}" in
+ 		"")
+ 			# the default library is newlib
+ 			with_nds32_lib=newlib
++			tm_defines="${tm_defines} TARGET_DEFAULT_CTOR_DTOR=1"
+ 			;;
+ 		newlib)
+ 			# OK
++			tm_defines="${tm_defines} TARGET_DEFAULT_CTOR_DTOR=1"
+ 			;;
+ 		mculib)
+ 			# OK
++			# for the arch=v3f or arch=v3s under mculib toolchain,
++			# we would like to set -fno-math-errno as default
++			case "${with_arch}" in
++			v3f | v3s)
++				tm_defines="${tm_defines} TARGET_DEFAULT_NO_MATH_ERRNO=1"
++				;;
++			esac
++			;;
++		glibc)
++			# OK
++			tm_defines="${tm_defines} TARGET_DEFAULT_TLSDESC_TRAMPOLINE=1"
++			;;
++		uclibc)
+ 			;;
+ 		*)
+-			echo "Cannot accept --with-nds32-lib=$with_nds32_lib, available values are: newlib mculib" 1>&2
++			echo "Cannot accept --with-nds32-lib=$with_nds32_lib, available values are: newlib mculib glibc uclibc" 1>&2
++			exit 1
++			;;
++		esac
++
++		# process --with-float
++		case "${with_float}" in
++		"" | soft | hard)
++			# OK
++			;;
++		*)
++			echo "Cannot accept --with-float=$with_float, available values are: soft hard" 1>&2
++			exit 1
++			;;
++		esac
++
++		# process --with-config-fpu
++		case "${with_config_fpu}" in
++		"" | 0 | 1 | 2 | 3)
++			# OK
++			;;
++		*)
++			echo "Cannot accept --with-config-fpu=$with_config_fpu, available values from 0 to 7" 1>&2
+ 			exit 1
+ 			;;
+ 		esac
+-		;;
+ 
+-	nios2*-*-*)
+-		supported_defaults="arch"
+-			case "$with_arch" in
+-			"" | r1 | r2)
+-				# OK
+-				;;
+-			*)
+-				echo "Unknown arch used in --with-arch=$with_arch" 1>&2
+-				exit 1
+-				;;
+-			esac
+ 		;;
+ 
+ 	powerpc*-*-* | rs6000-*-*)
+@@ -4527,7 +4658,7 @@ case ${target} in
+ esac
+ 
+ t=
+-all_defaults="abi cpu cpu_32 cpu_64 arch arch_32 arch_64 tune tune_32 tune_64 schedule float mode fpu nan fp_32 odd_spreg_32 divide llsc mips-plt synci tls"
++all_defaults="abi cpu cpu_32 cpu_64 arch arch_32 arch_64 tune tune_32 tune_64 schedule float mode fpu nan fp_32 odd_spreg_32 divide llsc mips-plt synci tls memory_model"
+ for option in $all_defaults
+ do
+ 	eval "val=\$with_"`echo $option | sed s/-/_/g`
+diff --git a/gcc/config/nds32/constants.md b/gcc/config/nds32/constants.md
+index bea42ee..6c92412 100644
+--- a/gcc/config/nds32/constants.md
++++ b/gcc/config/nds32/constants.md
+@@ -23,25 +23,176 @@
+ (define_constants
+   [(R8_REGNUM  8)
+    (TA_REGNUM 15)
++   (TP_REGNUM 25)
+    (FP_REGNUM 28)
+    (GP_REGNUM 29)
+    (LP_REGNUM 30)
+    (SP_REGNUM 31)
++   (LB_REGNUM 98)
++   (LE_REGNUM 99)
++   (LC_REGNUM 100)
+   ])
+ 
+ 
++;; The unpec operation index.
++(define_c_enum "unspec_element" [
++  UNSPEC_COPYSIGN
++  UNSPEC_FCPYNSD
++  UNSPEC_FCPYNSS
++  UNSPEC_FCPYSD
++  UNSPEC_FCPYSS
++  UNSPEC_CLIP
++  UNSPEC_CLIPS
++  UNSPEC_CLO
++  UNSPEC_PBSAD
++  UNSPEC_PBSADA
++  UNSPEC_BSE
++  UNSPEC_BSE_2
++  UNSPEC_BSP
++  UNSPEC_BSP_2
++  UNSPEC_FFB
++  UNSPEC_FFMISM
++  UNSPEC_FLMISM
++  UNSPEC_KDMBB
++  UNSPEC_KDMBT
++  UNSPEC_KDMTB
++  UNSPEC_KDMTT
++  UNSPEC_KHMBB
++  UNSPEC_KHMBT
++  UNSPEC_KHMTB
++  UNSPEC_KHMTT
++  UNSPEC_KSLRAW
++  UNSPEC_KSLRAWU
++  UNSPEC_SVA
++  UNSPEC_SVS
++  UNSPEC_WSBH
++  UNSPEC_LWUP
++  UNSPEC_LBUP
++  UNSPEC_SWUP
++  UNSPEC_SBUP
++  UNSPEC_LMWZB
++  UNSPEC_SMWZB
++  UNSPEC_UALOAD_HW
++  UNSPEC_UALOAD_W
++  UNSPEC_UALOAD_DW
++  UNSPEC_UASTORE_HW
++  UNSPEC_UASTORE_W
++  UNSPEC_UASTORE_DW
++  UNSPEC_GOTINIT
++  UNSPEC_GOT
++  UNSPEC_GOTOFF
++  UNSPEC_PLT
++  UNSPEC_TLSGD
++  UNSPEC_TLSLD
++  UNSPEC_TLSIE
++  UNSPEC_TLSLE
++  UNSPEC_ROUND
++  UNSPEC_VEC_COMPARE
++  UNSPEC_KHM
++  UNSPEC_KHMX
++  UNSPEC_CLIP_OV
++  UNSPEC_CLIPS_OV
++  UNSPEC_BITREV
++  UNSPEC_KABS
++  UNSPEC_LOOP_END
++  UNSPEC_TLS_DESC
++  UNSPEC_TLS_IE
++  UNSPEC_ADD32
++  UNSPEC_ICT
++])
++
++
+ ;; The unspec_volatile operation index.
+ (define_c_enum "unspec_volatile_element" [
+-  UNSPEC_VOLATILE_FUNC_RETURN
++  UNSPEC_VOLATILE_EH_RETURN
+   UNSPEC_VOLATILE_ISYNC
+   UNSPEC_VOLATILE_ISB
++  UNSPEC_VOLATILE_DSB
++  UNSPEC_VOLATILE_MSYNC
++  UNSPEC_VOLATILE_MSYNC_ALL
++  UNSPEC_VOLATILE_MSYNC_STORE
+   UNSPEC_VOLATILE_MFSR
+   UNSPEC_VOLATILE_MFUSR
+   UNSPEC_VOLATILE_MTSR
+   UNSPEC_VOLATILE_MTUSR
+   UNSPEC_VOLATILE_SETGIE_EN
+   UNSPEC_VOLATILE_SETGIE_DIS
++  UNSPEC_VOLATILE_FMFCSR
++  UNSPEC_VOLATILE_FMTCSR
++  UNSPEC_VOLATILE_FMFCFG
++  UNSPEC_VOLATILE_JR_ITOFF
++  UNSPEC_VOLATILE_JR_TOFF
++  UNSPEC_VOLATILE_JRAL_ITON
++  UNSPEC_VOLATILE_JRAL_TON
++  UNSPEC_VOLATILE_RET_ITOFF
++  UNSPEC_VOLATILE_RET_TOFF
++  UNSPEC_VOLATILE_STANDBY_NO_WAKE_GRANT
++  UNSPEC_VOLATILE_STANDBY_WAKE_GRANT
++  UNSPEC_VOLATILE_STANDBY_WAKE_DONE
++  UNSPEC_VOLATILE_TEQZ
++  UNSPEC_VOLATILE_TNEZ
++  UNSPEC_VOLATILE_TRAP
++  UNSPEC_VOLATILE_SETEND_BIG
++  UNSPEC_VOLATILE_SETEND_LITTLE
++  UNSPEC_VOLATILE_BREAK
++  UNSPEC_VOLATILE_SYSCALL
++  UNSPEC_VOLATILE_NOP
++  UNSPEC_VOLATILE_RES_DEP
++  UNSPEC_VOLATILE_DATA_DEP
++  UNSPEC_VOLATILE_LLW
++  UNSPEC_VOLATILE_SCW
++  UNSPEC_VOLATILE_CCTL_L1D_INVALALL
++  UNSPEC_VOLATILE_CCTL_L1D_WBALL_ALVL
++  UNSPEC_VOLATILE_CCTL_L1D_WBALL_ONE_LVL
++  UNSPEC_VOLATILE_CCTL_IDX_WRITE
++  UNSPEC_VOLATILE_CCTL_IDX_READ
++  UNSPEC_VOLATILE_CCTL_VA_WBINVAL_L1
++  UNSPEC_VOLATILE_CCTL_VA_WBINVAL_LA
++  UNSPEC_VOLATILE_CCTL_IDX_WBINVAL
++  UNSPEC_VOLATILE_CCTL_VA_LCK
++  UNSPEC_VOLATILE_DPREF_QW
++  UNSPEC_VOLATILE_DPREF_HW
++  UNSPEC_VOLATILE_DPREF_W
++  UNSPEC_VOLATILE_DPREF_DW
++  UNSPEC_VOLATILE_TLBOP_TRD
++  UNSPEC_VOLATILE_TLBOP_TWR
++  UNSPEC_VOLATILE_TLBOP_RWR
++  UNSPEC_VOLATILE_TLBOP_RWLK
++  UNSPEC_VOLATILE_TLBOP_UNLK
++  UNSPEC_VOLATILE_TLBOP_PB
++  UNSPEC_VOLATILE_TLBOP_INV
++  UNSPEC_VOLATILE_TLBOP_FLUA
++  UNSPEC_VOLATILE_ENABLE_INT
++  UNSPEC_VOLATILE_DISABLE_INT
++  UNSPEC_VOLATILE_SET_PENDING_SWINT
++  UNSPEC_VOLATILE_CLR_PENDING_SWINT
++  UNSPEC_VOLATILE_CLR_PENDING_HWINT
++  UNSPEC_VOLATILE_GET_ALL_PENDING_INT
++  UNSPEC_VOLATILE_GET_PENDING_INT
++  UNSPEC_VOLATILE_SET_INT_PRIORITY
++  UNSPEC_VOLATILE_GET_INT_PRIORITY
++  UNSPEC_VOLATILE_SET_TRIG_LEVEL
++  UNSPEC_VOLATILE_SET_TRIG_EDGE
++  UNSPEC_VOLATILE_GET_TRIG_TYPE
++  UNSPEC_VOLATILE_RELAX_GROUP
++  UNSPEC_VOLATILE_INNERMOST_LOOP_BEGIN
++  UNSPEC_VOLATILE_INNERMOST_LOOP_END
++  UNSPEC_VOLATILE_OMIT_FP_BEGIN
++  UNSPEC_VOLATILE_OMIT_FP_END
+   UNSPEC_VOLATILE_POP25_RETURN
++  UNSPEC_VOLATILE_SIGNATURE_BEGIN
++  UNSPEC_VOLATILE_SIGNATURE_END
++  UNSPEC_VOLATILE_NO_HWLOOP
++  UNSPEC_VOLATILE_NO_IFC_BEGIN
++  UNSPEC_VOLATILE_NO_IFC_END
++  UNSPEC_VOLATILE_NO_EX9_BEGIN
++  UNSPEC_VOLATILE_NO_EX9_END
++  UNSPEC_VOLATILE_UNALIGNED_FEATURE
++  UNSPEC_VOLATILE_ENABLE_UNALIGNED
++  UNSPEC_VOLATILE_DISABLE_UNALIGNED
++  UNSPEC_VOLATILE_RDOV
++  UNSPEC_VOLATILE_CLROV
++  UNSPEC_VOLATILE_HWLOOP_LAST_INSN
+ ])
+ 
+ ;; ------------------------------------------------------------------------
+diff --git a/gcc/config/nds32/constraints.md b/gcc/config/nds32/constraints.md
+index 1f44a1a..8163f46 100644
+--- a/gcc/config/nds32/constraints.md
++++ b/gcc/config/nds32/constraints.md
+@@ -25,9 +25,6 @@
+ ;; Machine-dependent floating: G H
+ 
+ 
+-(define_register_constraint "w" "(TARGET_ISA_V3 || TARGET_ISA_V3M) ? LOW_REGS : NO_REGS"
+-  "LOW register class $r0 ~ $r7 constraint for V3/V3M ISA")
+-
+ (define_register_constraint "l" "LOW_REGS"
+   "LOW register class $r0 ~ $r7")
+ 
+@@ -41,9 +38,59 @@
+ (define_register_constraint "t" "R15_TA_REG"
+   "Temporary Assist register $ta (i.e. $r15)")
+ 
++(define_register_constraint "e" "R8_REG"
++  "Function Entry register $r8)")
++
+ (define_register_constraint "k" "STACK_REG"
+   "Stack register $sp")
+ 
++(define_register_constraint "v" "R5_REG"
++  "Register $r5")
++
++(define_register_constraint "x" "FRAME_POINTER_REG"
++  "Frame pointer register $fp")
++
++(define_register_constraint "f"
++  "(TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE) ? FP_REGS : NO_REGS"
++ "The Floating point registers $fs0 ~ $fs31")
++
++(define_register_constraint "A" "LOOP_REGS"
++  "Loop register class")
++
++(define_constraint "Iv00"
++  "Constant value 0"
++  (and (match_code "const_int")
++       (match_test "ival == 0")))
++
++(define_constraint "Iv01"
++  "Constant value 1"
++  (and (match_code "const_int")
++       (match_test "ival == 1")))
++
++(define_constraint "Iv02"
++  "Constant value 2"
++  (and (match_code "const_int")
++       (match_test "ival == 2")))
++
++(define_constraint "Iv04"
++  "Constant value 4"
++  (and (match_code "const_int")
++       (match_test "ival == 4")))
++
++(define_constraint "Iv08"
++  "Constant value 8"
++  (and (match_code "const_int")
++       (match_test "ival == 8")))
++
++(define_constraint "Iu01"
++  "Unsigned immediate 1-bit value"
++  (and (match_code "const_int")
++       (match_test "ival == 1 || ival == 0")))
++
++(define_constraint "Iu02"
++  "Unsigned immediate 2-bit value"
++  (and (match_code "const_int")
++       (match_test "ival < (1 << 2) && ival >= 0")))
+ 
+ (define_constraint "Iu03"
+   "Unsigned immediate 3-bit value"
+@@ -65,6 +112,11 @@
+   (and (match_code "const_int")
+        (match_test "ival < (1 << 4) && ival >= -(1 << 4)")))
+ 
++(define_constraint "Cs05"
++  "Signed immediate 5-bit value"
++  (and (match_code "const_double")
++       (match_test "nds32_const_double_range_ok_p (op, SFmode, -(1 << 4), (1 << 4))")))
++
+ (define_constraint "Iu05"
+   "Unsigned immediate 5-bit value"
+   (and (match_code "const_int")
+@@ -75,6 +127,11 @@
+   (and (match_code "const_int")
+        (match_test "IN_RANGE (ival, -31, 0)")))
+ 
++(define_constraint "Iu06"
++  "Unsigned immediate 6-bit value"
++  (and (match_code "const_int")
++       (match_test "ival < (1 << 6) && ival >= 0")))
++
+ ;; Ip05 is special and dedicated for v3 movpi45 instruction.
+ ;; movpi45 has imm5u field but the range is 16 ~ 47.
+ (define_constraint "Ip05"
+@@ -84,10 +141,10 @@
+ 		    && ival >= (0 + 16)
+ 		    && (TARGET_ISA_V3 || TARGET_ISA_V3M)")))
+ 
+-(define_constraint "Iu06"
++(define_constraint "IU06"
+   "Unsigned immediate 6-bit value constraint for addri36.sp instruction"
+   (and (match_code "const_int")
+-       (match_test "ival < (1 << 6)
++       (match_test "ival < (1 << 8)
+ 		    && ival >= 0
+ 		    && (ival % 4 == 0)
+ 		    && (TARGET_ISA_V3 || TARGET_ISA_V3M)")))
+@@ -103,6 +160,11 @@
+        (match_test "ival < (1 << 9) && ival >= 0")))
+ 
+ 
++(define_constraint "Is08"
++  "Signed immediate 8-bit value"
++  (and (match_code "const_int")
++       (match_test "ival < (1 << 7) && ival >= -(1 << 7)")))
++
+ (define_constraint "Is10"
+   "Signed immediate 10-bit value"
+   (and (match_code "const_int")
+@@ -113,6 +175,10 @@
+   (and (match_code "const_int")
+        (match_test "ival < (1 << 10) && ival >= -(1 << 10)")))
+ 
++(define_constraint "Is14"
++  "Signed immediate 14-bit value"
++  (and (match_code "const_int")
++       (match_test "ival < (1 << 13) && ival >= -(1 << 13)")))
+ 
+ (define_constraint "Is15"
+   "Signed immediate 15-bit value"
+@@ -194,12 +260,21 @@
+   (and (match_code "const_int")
+        (match_test "ival < (1 << 19) && ival >= -(1 << 19)")))
+ 
++(define_constraint "Cs20"
++  "Signed immediate 20-bit value"
++  (and (match_code "const_double")
++       (match_test "nds32_const_double_range_ok_p (op, SFmode, -(1 << 19), (1 << 19))")))
+ 
+ (define_constraint "Ihig"
+   "The immediate value that can be simply set high 20-bit"
+   (and (match_code "const_int")
+        (match_test "(ival != 0) && ((ival & 0xfff) == 0)")))
+ 
++(define_constraint "Chig"
++  "The immediate value that can be simply set high 20-bit"
++  (and (match_code "high")
++       (match_test "GET_CODE (XEXP (op, 0)) == CONST_DOUBLE")))
++
+ (define_constraint "Izeb"
+   "The immediate value 0xff"
+   (and (match_code "const_int")
+@@ -213,12 +288,12 @@
+ (define_constraint "Ixls"
+   "The immediate value 0x01"
+   (and (match_code "const_int")
+-       (match_test "TARGET_PERF_EXT && (ival == 0x1)")))
++       (match_test "TARGET_EXT_PERF && (ival == 0x1)")))
+ 
+ (define_constraint "Ix11"
+   "The immediate value 0x7ff"
+   (and (match_code "const_int")
+-       (match_test "TARGET_PERF_EXT && (ival == 0x7ff)")))
++       (match_test "TARGET_EXT_PERF && (ival == 0x7ff)")))
+ 
+ (define_constraint "Ibms"
+   "The immediate value with power of 2"
+@@ -232,23 +307,70 @@
+        (match_test "(TARGET_ISA_V3 || TARGET_ISA_V3M)
+ 		    && (IN_RANGE (exact_log2 (ival + 1), 1, 8))")))
+ 
++(define_constraint "CVp5"
++  "Unsigned immediate 5-bit value for movpi45 instruction with range 16-47"
++  (and (match_code "const_vector")
++       (match_test "nds32_valid_CVp5_p (op)")))
++
++(define_constraint "CVs5"
++  "Signed immediate 5-bit value"
++  (and (match_code "const_vector")
++       (match_test "nds32_valid_CVs5_p (op)")))
++
++(define_constraint "CVs2"
++  "Signed immediate 20-bit value"
++  (and (match_code "const_vector")
++       (match_test "nds32_valid_CVs2_p (op)")))
++
++(define_constraint "CVhi"
++  "The immediate value that can be simply set high 20-bit"
++  (and (match_code "const_vector")
++       (match_test "nds32_valid_CVhi_p (op)")))
+ 
+ (define_memory_constraint "U33"
+   "Memory constraint for 333 format"
+   (and (match_code "mem")
+-       (match_test "nds32_mem_format (op) == ADDRESS_LO_REG_IMM3U")))
++       (match_test "nds32_mem_format (op) == ADDRESS_POST_INC_LO_REG_IMM3U
++		    || nds32_mem_format (op) == ADDRESS_POST_MODIFY_LO_REG_IMM3U
++		    || nds32_mem_format (op) == ADDRESS_LO_REG_IMM3U")))
+ 
+ (define_memory_constraint "U45"
+   "Memory constraint for 45 format"
+   (and (match_code "mem")
+        (match_test "(nds32_mem_format (op) == ADDRESS_REG)
+-		    && (GET_MODE (op) == SImode)")))
++		    && ((GET_MODE (op) == SImode)
++		       || (GET_MODE (op) == SFmode))")))
++
++(define_memory_constraint "Ufe"
++  "Memory constraint for fe format"
++  (and (match_code "mem")
++       (match_test "nds32_mem_format (op) == ADDRESS_R8_IMM7U
++		    && (GET_MODE (op) == SImode
++			|| GET_MODE (op) == SFmode)")))
+ 
+ (define_memory_constraint "U37"
+   "Memory constraint for 37 format"
+   (and (match_code "mem")
+        (match_test "(nds32_mem_format (op) == ADDRESS_SP_IMM7U
+ 		    || nds32_mem_format (op) == ADDRESS_FP_IMM7U)
+-		    && (GET_MODE (op) == SImode)")))
++		    && (GET_MODE (op) == SImode
++			|| GET_MODE (op) == SFmode)")))
++
++(define_memory_constraint "Umw"
++  "Memory constraint for lwm/smw"
++  (and (match_code "mem")
++       (match_test "nds32_valid_smw_lwm_base_p (op)")))
++
++(define_memory_constraint "Da"
++  "Memory constraint for non-offset loads/stores"
++  (and (match_code "mem")
++       (match_test "REG_P (XEXP (op, 0))
++		    || (GET_CODE (XEXP (op, 0)) == POST_INC)")))
++
++(define_memory_constraint "Q"
++  "Memory constraint for no symbol_ref and const"
++  (and (match_code "mem")
++       (match_test "(TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE)
++		     && nds32_float_mem_operand_p (op)")))
+ 
+ ;; ------------------------------------------------------------------------
+diff --git a/gcc/config/nds32/elf.h b/gcc/config/nds32/elf.h
+new file mode 100644
+index 0000000..315dcd8
+--- /dev/null
++++ b/gcc/config/nds32/elf.h
+@@ -0,0 +1,83 @@
++/* Definitions of target machine of Andes NDS32 cpu for GNU compiler
++   Copyright (C) 2012-2016 Free Software Foundation, Inc.
++   Contributed by Andes Technology Corporation.
++
++   This file is part of GCC.
++
++   GCC is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published
++   by the Free Software Foundation; either version 3, or (at your
++   option) any later version.
++
++   GCC is distributed in the hope that it will be useful, but WITHOUT
++   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++   License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with GCC; see the file COPYING3.  If not see
++   <http://www.gnu.org/licenses/>.  */
++
++
++/* ------------------------------------------------------------------------ */
++
++#define TARGET_LINUX_ABI 0
++
++/* In the configure stage we may use options --enable-default-relax,
++   --enable-Os-default-ifc and --enable-Os-default-ex9.  They effect
++   the default spec of passing --relax, --mifc, and --mex9 to linker.
++   We use NDS32_RELAX_SPEC, NDS32_IFC_SPEC, and NDS32_EX9_SPEC
++   so that we can customize them conveniently.  */
++#define LINK_SPEC \
++  " %{G*}" \
++  " %{mbig-endian:-EB} %{mlittle-endian:-EL}" \
++  " %{shared:-shared}" \
++  NDS32_RELAX_SPEC \
++  NDS32_IFC_SPEC \
++  NDS32_EX9_SPEC
++
++#define LIB_SPEC \
++  " -lc -lgloss"
++
++#define LIBGCC_SPEC \
++  " -lgcc"
++
++/* The option -mno-ctor-dtor can disable constructor/destructor feature
++   by applying different crt stuff.  In the convention, crt0.o is the
++   startup file without constructor/destructor;
++   crt1.o, crti.o, crtbegin.o, crtend.o, and crtn.o are the
++   startup files with constructor/destructor.
++   Note that crt0.o, crt1.o, crti.o, and crtn.o are provided
++   by newlib/mculib/glibc/ublic, while crtbegin.o and crtend.o are
++   currently provided by GCC for nds32 target.
++
++   For nds32 target so far:
++   If -mno-ctor-dtor, we are going to link
++   "crt0.o [user objects]".
++   If -mctor-dtor, we are going to link
++   "crt1.o crtbegin1.o [user objects] crtend1.o".
++
++   Note that the TARGET_DEFAULT_CTOR_DTOR would effect the
++   default behavior.  Check gcc/config.gcc for more information.  */
++#ifdef TARGET_DEFAULT_CTOR_DTOR
++  #define STARTFILE_SPEC \
++    " %{!mno-ctor-dtor:crt1.o%s;:crt0.o%s}" \
++    " %{!mno-ctor-dtor:crtbegin1.o%s}" \
++    " %{mcrt-arg:crtarg.o%s}"
++  #define ENDFILE_SPEC \
++    " %{!mno-ctor-dtor:crtend1.o%s}"
++#else
++  #define STARTFILE_SPEC \
++    " %{mctor-dtor|coverage:crt1.o%s;:crt0.o%s}" \
++    " %{mctor-dtor|coverage:crtbegin1.o%s}" \
++    " %{mcrt-arg:crtarg.o%s}"
++  #define ENDFILE_SPEC \
++    " %{mctor-dtor|coverage:crtend1.o%s}"
++#endif
++
++#define STARTFILE_CXX_SPEC \
++  " %{!mno-ctor-dtor:crt1.o%s;:crt0.o%s}" \
++  " %{!mno-ctor-dtor:crtbegin1.o%s}" \
++  " %{mcrt-arg:crtarg.o%s}"
++#define ENDFILE_CXX_SPEC \
++  " %{!mno-ctor-dtor:crtend1.o%s}"
+diff --git a/gcc/config/nds32/iterators.md b/gcc/config/nds32/iterators.md
+index ab0f103..6023b9c 100644
+--- a/gcc/config/nds32/iterators.md
++++ b/gcc/config/nds32/iterators.md
+@@ -26,30 +26,99 @@
+ ;; A list of integer modes that are up to one word long.
+ (define_mode_iterator QIHISI [QI HI SI])
+ 
++;; A list of integer modes for one word and double word.
++(define_mode_iterator SIDI [SI DI])
++
+ ;; A list of integer modes that are up to one half-word long.
+ (define_mode_iterator QIHI [QI HI])
+ 
+ ;; A list of the modes that are up to double-word long.
+ (define_mode_iterator DIDF [DI DF])
+ 
++;; A list of the modes that are up to one word long vector.
++(define_mode_iterator VQIHI [V4QI V2HI])
++
++;; A list of the modes that are up to one word long vector and scalar.
++(define_mode_iterator VSQIHI [V4QI V2HI QI HI])
++
++(define_mode_iterator VSQIHIDI [V4QI V2HI QI HI DI])
++
++(define_mode_iterator VQIHIDI [V4QI V2HI DI])
++
++;; A list of the modes that are up to one word long vector
++;; and scalar for HImode.
++(define_mode_iterator VSHI [V2HI HI])
++
++;; A list of the modes that are up to double-word long.
++(define_mode_iterator ANYF [(SF "TARGET_FPU_SINGLE")
++			    (DF "TARGET_FPU_DOUBLE")])
+ 
+ ;;----------------------------------------------------------------------------
+ ;; Mode attributes.
+ ;;----------------------------------------------------------------------------
+ 
+-(define_mode_attr size [(QI "b") (HI "h") (SI "w")])
++(define_mode_attr size [(QI "b") (HI "h") (SI "w") (SF "s") (DF "d")])
+ 
+-(define_mode_attr byte [(QI "1") (HI "2") (SI "4")])
++(define_mode_attr byte [(QI "1") (HI "2") (SI "4") (V4QI "4") (V2HI "4")])
+ 
++(define_mode_attr bits [(V4QI "8") (QI "8") (V2HI "16") (HI "16") (DI "64")])
++
++(define_mode_attr VELT [(V4QI "QI") (V2HI "HI")])
+ 
+ ;;----------------------------------------------------------------------------
+ ;; Code iterators.
+ ;;----------------------------------------------------------------------------
+ 
++;; shifts
++(define_code_iterator shift_rotate [ashift ashiftrt lshiftrt rotatert])
++
++(define_code_iterator shifts [ashift ashiftrt lshiftrt])
++
++(define_code_iterator shiftrt [ashiftrt lshiftrt])
++
++(define_code_iterator sat_plus [ss_plus us_plus])
++
++(define_code_iterator all_plus [plus ss_plus us_plus])
++
++(define_code_iterator sat_minus [ss_minus us_minus])
++
++(define_code_iterator all_minus [minus ss_minus us_minus])
++
++(define_code_iterator plus_minus [plus minus])
++
++(define_code_iterator extend [sign_extend zero_extend])
++
++(define_code_iterator sumax [smax umax])
++
++(define_code_iterator sumin [smin umin])
++
++(define_code_iterator sumin_max [smax umax smin umin])
+ 
+ ;;----------------------------------------------------------------------------
+ ;; Code attributes.
+ ;;----------------------------------------------------------------------------
+ 
++;; shifts
++(define_code_attr shift
++  [(ashift "ashl") (ashiftrt "ashr") (lshiftrt "lshr") (rotatert "rotr")])
++
++(define_code_attr su
++  [(ashiftrt "") (lshiftrt "u") (sign_extend "s") (zero_extend "u")])
++
++(define_code_attr zs
++  [(sign_extend "s") (zero_extend "z")])
++
++(define_code_attr uk
++  [(plus "") (ss_plus "k") (us_plus "uk")
++   (minus "") (ss_minus "k") (us_minus "uk")])
++
++(define_code_attr opcode
++  [(plus "add") (minus "sub") (smax "smax") (umax "umax") (smin "smin") (umin "umin")])
++
++(define_code_attr add_rsub
++  [(plus "a") (minus "rs")])
++
++(define_code_attr add_sub
++  [(plus "a") (minus "s")])
+ 
+ ;;----------------------------------------------------------------------------
+diff --git a/gcc/config/nds32/linux.h b/gcc/config/nds32/linux.h
+new file mode 100644
+index 0000000..36ddf2f
+--- /dev/null
++++ b/gcc/config/nds32/linux.h
+@@ -0,0 +1,78 @@
++/* Definitions of target machine of Andes NDS32 cpu for GNU compiler
++   Copyright (C) 2012-2016 Free Software Foundation, Inc.
++   Contributed by Andes Technology Corporation.
++
++   This file is part of GCC.
++
++   GCC is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published
++   by the Free Software Foundation; either version 3, or (at your
++   option) any later version.
++
++   GCC is distributed in the hope that it will be useful, but WITHOUT
++   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++   License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with GCC; see the file COPYING3.  If not see
++   <http://www.gnu.org/licenses/>.  */
++
++
++/* ------------------------------------------------------------------------ */
++
++#define TARGET_LINUX_ABI 1
++
++#undef  SIZE_TYPE
++#define SIZE_TYPE "unsigned int"
++
++#undef  PTRDIFF_TYPE
++#define PTRDIFF_TYPE "int"
++
++#ifdef TARGET_DEFAULT_TLSDESC_TRAMPOLINE
++  #define NDS32_TLSDESC_TRAMPOLINE_SPEC \
++    " %{!mno-tlsdesc-trampoline:--mtlsdesc-trampoline}"
++#else
++  #define NDS32_TLSDESC_TRAMPOLINE_SPEC ""
++#endif
++
++#define TARGET_OS_CPP_BUILTINS()                \
++  do                                            \
++    {                                           \
++      GNU_USER_TARGET_OS_CPP_BUILTINS();           \
++    }                                           \
++  while (0)
++
++#define GLIBC_DYNAMIC_LINKER "/lib/ld.so.1"
++
++/* In the configure stage we may use options --enable-default-relax,
++   --enable-Os-default-ifc and --enable-Os-default-ex9.  They effect
++   the default spec of passing --relax, --mifc, and --mex9 to linker.
++   We use NDS32_RELAX_SPEC, NDS32_IFC_SPEC, and NDS32_EX9_SPEC
++   so that we can customize them conveniently.  */
++#define LINK_SPEC \
++ " %{G*}" \
++ " %{mbig-endian:-EB} %{mlittle-endian:-EL}" \
++ " %{shared:-shared} \
++  %{!shared: \
++    %{!static: \
++      %{rdynamic:-export-dynamic} \
++      -dynamic-linker " GNU_USER_DYNAMIC_LINKER "} \
++    %{static:-static}}" \
++  NDS32_RELAX_SPEC \
++  NDS32_IFC_SPEC \
++  NDS32_EX9_SPEC \
++  NDS32_TLSDESC_TRAMPOLINE_SPEC
++
++#define LINK_PIE_SPEC "%{pie:%{!fno-pie:%{!fno-PIE:%{!static:-pie}}}} "
++
++
++/* The SYNC operations are implemented as library functions, not
++   INSN patterns.  As a result, the HAVE defines for the patterns are
++   not defined.  We need to define them to generate the corresponding
++   __GCC_HAVE_SYNC_COMPARE_AND_SWAP_* and __GCC_ATOMIC_*_LOCK_FREE
++   defines.
++   Ref: https://sourceware.org/ml/libc-alpha/2014-09/msg00322.html  */
++#define HAVE_sync_compare_and_swapqi 1
++#define HAVE_sync_compare_and_swaphi 1
++#define HAVE_sync_compare_and_swapsi 1
+diff --git a/gcc/config/nds32/nds32-abi-compatible.c b/gcc/config/nds32/nds32-abi-compatible.c
+new file mode 100644
+index 0000000..f2ed006
+--- /dev/null
++++ b/gcc/config/nds32/nds32-abi-compatible.c
+@@ -0,0 +1,315 @@
++/* A Gimple-level pass of Andes NDS32 cpu for GNU compiler.
++   This pass collects the usage of float-point.
++
++   Copyright (C) 2012-2016 Free Software Foundation, Inc.
++   Contributed by Andes Technology Corporation.
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify it under
++the terms of the GNU General Public License as published by the Free
++Software Foundation; either version 3, or (at your option) any later
++version.
++
++GCC is distributed in the hope that it will be useful, but WITHOUT ANY
++WARRANTY; without even the implied warranty of MERCHANTABILITY or
++FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
++
++
++#include "config.h"
++#include "system.h"
++#include "coretypes.h"
++#include "tm.h"
++#include "hash-set.h"
++#include "machmode.h"
++#include "vec.h"
++#include "double-int.h"
++#include "input.h"
++#include "alias.h"
++#include "symtab.h"
++#include "wide-int.h"
++#include "inchash.h"
++#include "tree.h"
++#include "stor-layout.h"
++#include "varasm.h"
++#include "calls.h"
++#include "rtl.h"
++#include "regs.h"
++#include "hard-reg-set.h"
++#include "insn-config.h"   /* Required by recog.h.  */
++#include "conditions.h"
++#include "output.h"
++#include "insn-attr.h"     /* For DFA state_t.  */
++#include "insn-codes.h"    /* For CODE_FOR_xxx.  */
++#include "reload.h"     /* For push_reload ().  */
++#include "flags.h"
++#include "input.h"
++#include "function.h"
++#include "expr.h"
++#include "recog.h"
++#include "diagnostic-core.h"
++#include "dominance.h"
++#include "cfg.h"
++#include "cfgrtl.h"
++#include "cfganal.h"
++#include "lcm.h"
++#include "cfgbuild.h"
++#include "cfgcleanup.h"
++#include "predict.h"
++#include "basic-block.h"
++#include "bitmap.h"
++#include "df.h"
++#include "tm_p.h"
++#include "tm-constrs.h"
++#include "optabs.h"		/* For GEN_FCN.  */
++#include "target.h"
++#include "langhooks.h"     /* For add_builtin_function ().  */
++#include "ggc.h"
++#include "tree-pass.h"
++#include "tree-ssa-alias.h"
++#include "fold-const.h"
++#include "gimple-expr.h"
++#include "is-a.h"
++#include "gimple.h"
++#include "gimplify.h"
++#include "gimple-iterator.h"
++#include "gimplify-me.h"
++#include "gimple-ssa.h"
++#include "ipa-ref.h"
++#include "lto-streamer.h"
++#include "cgraph.h"
++#include "tree-cfg.h"
++#include "tree-phinodes.h"
++#include "stringpool.h"
++#include "tree-ssanames.h"
++#include "tree-pass.h"
++#include "gimple-pretty-print.h"
++#include "gimple-walk.h"
++
++/* Indicate the translation unit whether including floating-point arithmetic
++   or not.  */
++bool nds32_include_fp_arith = false;
++
++/* Return true if the return type and argument types of current function
++   pass the insepction. Furthermore, the global value NDS32_INCLUDE_FP_ARITH
++   is modified.  */
++
++static bool
++nds32_acd_func_rtn_args_check (tree fn_decl)
++{
++  tree fn_type = TREE_TYPE (fn_decl);
++  function_args_iterator iter;
++  tree arg_type = NULL_TREE;
++  tree rtn_type = NULL_TREE;
++  unsigned argno = 1;
++
++  gcc_assert (fn_type);
++
++  rtn_type = TREE_TYPE (fn_type);
++  if (dump_file)
++    {
++      fprintf (dump_file,
++	       " Check the return & arguments for function %s\n"
++	       "  Prototype:",
++	       fndecl_name (fn_decl));
++      print_generic_decl (dump_file, fn_decl, 0);
++      fprintf (dump_file, "\n");
++    }
++
++  /* Check the return type.  */
++  if (FLOAT_TYPE_P (rtn_type)
++      || RECORD_OR_UNION_TYPE_P (rtn_type))
++    {
++      if (dump_file)
++	fprintf (dump_file, "  ! Return type is FP or record/union type\n");
++      nds32_include_fp_arith = true;
++
++      return false;
++    }
++
++  /* Check if the function has a variable argument list.  */
++  if (stdarg_p (fn_type))
++    {
++      if (dump_file)
++	fprintf (dump_file, "  ! Has variable argument list (i.e. ,...)\n");
++      nds32_include_fp_arith = true;
++
++      return false;
++    }
++
++  /* Check the arguments.  */
++  FOREACH_FUNCTION_ARGS (fn_type, arg_type, iter)
++    {
++      if (arg_type == void_type_node)
++	break;
++
++      if (FLOAT_TYPE_P (arg_type)
++	  || RECORD_OR_UNION_TYPE_P (arg_type))
++	{
++	  if (dump_file)
++	    fprintf (dump_file,
++		     "  ! No.%d argument is FP or record/union type\n",
++		     argno);
++	  nds32_include_fp_arith = true;
++
++	  return false;
++	}
++      argno++;
++    }
++
++  if (dump_file)
++    fprintf (dump_file,
++	     "  >> Pass the inspection of return & arguments type\n");
++
++  return true;
++}
++
++/* Helper for nds32_abi_compatible. Return *TP if it is a floating-point
++   -related operand.  */
++
++static tree
++nds32_acd_walk_op_fn (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
++{
++  tree t = *tp;
++
++  if (t && TREE_TYPE (t)
++      && (FLOAT_TYPE_P (TREE_TYPE (t))
++	  || TREE_CODE (t) == REAL_CST
++	  || TREE_CODE (t) == COMPLEX_CST
++	  || TREE_CODE (t) == FLOAT_EXPR
++	  || TREE_CODE (t) == REALPART_EXPR))
++    {
++      *walk_subtrees = 0;
++      return t;
++    }
++
++  return NULL_TREE;
++}
++
++/* Helper for nds32_abi_compatible. Return non-NULL tree and set
++   *HANDLED_OPS_P to true if *GSI_P is an ASM stmt.  */
++
++static tree
++nds32_acd_walk_stmt_fn (gimple_stmt_iterator *gsi_p, bool *handled_ops_p,
++		       	struct walk_stmt_info *wi ATTRIBUTE_UNUSED)
++{
++  gimple *stmt = gsi_stmt (*gsi_p);
++
++  switch (gimple_code (stmt))
++    {
++    case GIMPLE_DEBUG:
++      *handled_ops_p = true;
++      break;
++
++    case GIMPLE_ASM:
++      *handled_ops_p = true;
++      return (tree) -1;
++      break;
++
++    case GIMPLE_CALL:
++	{
++	  tree call_decl = gimple_call_fndecl (stmt);
++	  if (!call_decl
++	      || !nds32_acd_func_rtn_args_check (call_decl))
++	    {
++	      *handled_ops_p = true;
++	      return call_decl;
++	    }
++	}
++      break;
++
++    default:
++      break;
++    }
++
++  return NULL_TREE;
++}
++
++/* This function is the entry of ABI compatible detection pass.  */
++
++static int
++nds32_abi_compatible (void)
++{
++  basic_block bb;
++  struct walk_stmt_info wi;
++
++  memset (&wi, 0, sizeof (wi));
++
++  if (!nds32_acd_func_rtn_args_check (current_function_decl))
++    return 0;
++
++  if (dump_file)
++    fprintf (dump_file, "Check function body %s\n",
++	     function_name (cfun));
++
++  FOR_EACH_BB_FN (bb, cfun)
++    {
++      gimple *ret;
++      gimple_seq seq = bb_seq (bb);
++
++      ret = walk_gimple_seq (seq,
++			     nds32_acd_walk_stmt_fn,
++			     nds32_acd_walk_op_fn,
++			     &wi);
++      if (ret != NULL)
++	{
++	  if (dump_file)
++	    {
++	      fprintf (dump_file, " ! NO PASS: ");
++	      print_gimple_stmt (dump_file, ret, 0, TDF_SLIM|TDF_RAW);
++	    }
++	  nds32_include_fp_arith = true;
++	  break;
++	}
++    }
++
++  if (dump_file)
++    if (!nds32_include_fp_arith)
++      fprintf (dump_file,
++	       " >> Pass the inspection of FP operand for function body\n");
++
++  return 0;
++}
++
++static bool
++gate_nds32_abi_compatible (void)
++{
++  return flag_nds32_abi_compatible
++    && !nds32_include_fp_arith;
++}
++
++const pass_data pass_data_nds32_abi_compatible =
++{
++  GIMPLE_PASS,				/* type */
++  "abi_compatible",			/* name */
++  OPTGROUP_NONE,			/* optinfo_flags */
++  TV_MACH_DEP,				/* tv_id */
++  ( PROP_cfg | PROP_ssa ),		/* properties_required */
++  0,					/* properties_provided */
++  0,					/* properties_destroyed */
++  0,					/* todo_flags_start */
++  0,					/* todo_flags_finish */
++};
++
++class pass_nds32_abi_compatible : public gimple_opt_pass
++{
++public:
++  pass_nds32_abi_compatible (gcc::context *ctxt)
++    : gimple_opt_pass (pass_data_nds32_abi_compatible, ctxt)
++  {}
++
++  /* opt_pass methods: */
++  bool gate (function *) { return gate_nds32_abi_compatible (); }
++  unsigned int execute (function *) { return nds32_abi_compatible (); }
++};
++
++gimple_opt_pass *
++make_pass_nds32_abi_compatible (gcc::context *ctxt)
++{
++  return new pass_nds32_abi_compatible (ctxt);
++}
+diff --git a/gcc/config/nds32/nds32-const-remater.c b/gcc/config/nds32/nds32-const-remater.c
+new file mode 100644
+index 0000000..760e567
+--- /dev/null
++++ b/gcc/config/nds32/nds32-const-remater.c
+@@ -0,0 +1,461 @@
++/* Global CSE pass of Andes NDS32 cpu for GNU compiler
++   Copyright (C) 2012-2016 Free Software Foundation, Inc.
++   Contributed by Andes Technology Corporation.
++
++   This file is part of GCC.
++
++   GCC is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published
++   by the Free Software Foundation; either version 3, or (at your
++   option) any later version.
++
++   GCC is distributed in the hope that it will be useful, but WITHOUT
++   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++   License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with GCC; see the file COPYING3.  If not see
++   <http://www.gnu.org/licenses/>.  */
++
++/* ------------------------------------------------------------------------ */
++
++#include "config.h"
++#include "system.h"
++#include "coretypes.h"
++#include "backend.h"
++#include "tree.h"
++#include "rtl.h"
++#include "df.h"
++#include "alias.h"
++#include "stor-layout.h"
++#include "varasm.h"
++#include "calls.h"
++#include "regs.h"
++#include "insn-config.h"	/* Required by recog.h.  */
++#include "conditions.h"
++#include "output.h"
++#include "insn-attr.h"		/* For DFA state_t.  */
++#include "insn-codes.h"		/* For CODE_FOR_xxx.  */
++#include "reload.h"		/* For push_reload().  */
++#include "flags.h"
++#include "insn-config.h"
++#include "expmed.h"
++#include "dojump.h"
++#include "explow.h"
++#include "emit-rtl.h"
++#include "stmt.h"
++#include "expr.h"
++#include "recog.h"
++#include "diagnostic-core.h"
++#include "cfgrtl.h"
++#include "cfganal.h"
++#include "lcm.h"
++#include "cfgbuild.h"
++#include "cfgcleanup.h"
++#include "tm_p.h"
++#include "tm-constrs.h"
++#include "optabs.h"		/* For GEN_FCN.  */
++#include "target.h"
++#include "langhooks.h"		/* For add_builtin_function().  */
++#include "builtins.h"
++#include "cpplib.h"
++#include "params.h"
++#include "tree-pass.h"
++#include "dbgcnt.h"
++#include "df.h"
++#include "tm-constrs.h"
++
++/* ------------------------------------------------------------------------ */
++
++typedef struct reg_avail_info
++{
++  rtx insn;
++  unsigned int uint;
++  unsigned int regno;
++} reg_avail_info_t;
++
++
++static void find_common_const (void);
++static bool try_rematerialize (rtx_insn *, unsigned int,
++			       auto_vec<reg_avail_info_t, 32> *);
++static void clean_reg_avail_info (rtx ,const_rtx, void *);
++static rtx get_const (rtx);
++static bool addsi3_format_p (rtx);
++
++/* Search the register records.  */
++static bool
++try_rematerialize (rtx_insn *insn, unsigned int uint_r,
++		   auto_vec<reg_avail_info_t, 32> *reg_avail_infos)
++{
++  unsigned int i, uint_i, cl_i, cl_r, ct_i, ct_r;
++  rtx pat, src, dest, new_insn;
++  bool done = FALSE;
++  df_ref df_rec;
++  df_link *link;
++
++  cl_r = __builtin_clz (uint_r);
++  ct_r = __builtin_ctz (uint_r);
++  for (i = 0; i < reg_avail_infos->length (); ++i)
++    {
++      if ((*reg_avail_infos)[i].uint != uint_r)
++	{
++	  uint_i = (*reg_avail_infos)[i].uint;
++	  if (dump_file)
++	    fprintf (dump_file, "Try rematerialize %08x with const %08x\n",
++		     uint_r, uint_i);
++	  cl_i = __builtin_clz (uint_i);
++	  ct_i = __builtin_ctz (uint_i);
++	  src = SET_DEST (PATTERN ((*reg_avail_infos)[i].insn));
++	  dest = SET_DEST (PATTERN (insn));
++
++	  if (cl_r > cl_i
++	      && (uint_i >> (cl_r - cl_i)) == uint_r)
++	    {
++	      /* Right shift logical.  */
++	      pat = gen_rtx_LSHIFTRT (SImode, src, GEN_INT (cl_r - cl_i));
++	      done = TRUE;
++	      if (dump_file)
++		fprintf (dump_file,
++			 "Rematerialize %08x with const %08x by l>> %d\n",
++			 uint_r, uint_i, (cl_r - cl_i));
++	    }
++	  else if (ct_i >= ct_r
++		   && ((int) uint_i >> (ct_i - ct_r)) == (int) uint_r)
++	    {
++	      /* Right shift arithmetic.  */
++	      pat = gen_rtx_ASHIFTRT (SImode, src, GEN_INT (ct_i - ct_r));
++	      done = TRUE;
++	      if (dump_file)
++		fprintf (dump_file,
++			 "Rematerialize %08x with const %08x by a>> %d\n",
++			 uint_r, uint_i, (cl_r - cl_i));
++	    }
++	  else if (ct_r > ct_i
++		   && (uint_i << (ct_r - ct_i)) == uint_r)
++	    {
++	      /* Left shift.  */
++	      pat = gen_rtx_ASHIFT (SImode, src, GEN_INT (ct_r - ct_i));
++	      done = TRUE;
++	      if (dump_file)
++		fprintf (dump_file,
++			 "Rematerialize %08x with const %08x by << %d\n",
++			uint_r, uint_i, (ct_r - ct_i));
++	    }
++	  else if (TARGET_EXT_PERF && __builtin_popcount (uint_r ^ uint_i) == 1)
++	    {
++	      unsigned int val = uint_r ^ uint_i;
++	      if ((uint_r & (uint_r ^ uint_i)) != 0)
++		{
++		  if (val > (1 << 5))
++		    {
++		      /* Bit set.  */
++		      pat = gen_rtx_IOR (SImode, src, GEN_INT (val));
++		      done = TRUE;
++		      if (dump_file)
++			fprintf (dump_file,
++				 "Rematerialize %08x with const %08x by | %08x\n",
++				 uint_r, uint_i, uint_r ^ uint_i);
++		    }
++		  else
++		    {
++		      /* Transform to plus if immediate can fit addi45.  */
++		      pat = gen_rtx_PLUS (SImode, src, GEN_INT (val));
++		      done = TRUE;
++		      if (dump_file)
++			fprintf (dump_file,
++				 "Rematerialize %08x with const %08x by | %08x\n",
++				 uint_r, uint_i, uint_r ^ uint_i);
++		    }
++		}
++	      else
++		{
++		  if (val > (1 << 5))
++		    {
++		      /* Bit clear.  */
++		      pat = gen_rtx_AND (SImode, src, GEN_INT (~(uint_r ^ uint_i)));
++		      done = TRUE;
++		      if (dump_file)
++			fprintf (dump_file,
++				 "Rematerialize %08x with const %08x by & %08x\n",
++				 uint_r, uint_i, ~(uint_r ^ uint_i));
++		    }
++		  else
++		    {
++		      /* Transform to plus if immediate can fit subi45.  */
++		      pat = gen_rtx_PLUS (SImode, src, GEN_INT ((int) -val));
++		      done = TRUE;
++		      if (dump_file)
++			fprintf (dump_file,
++				 "Rematerialize %08x with const %08x by | %08x\n",
++				 uint_r, uint_i, uint_r ^ uint_i);
++		    }
++		}
++	    }
++	  else if  ((uint_r > uint_i ? uint_r - uint_i
++		     : uint_i - uint_r) < 0x4000)
++	    {
++	      /* Check insn_info existence because the instruction
++		 maybe be deleted.*/
++	      if (DF_INSN_INFO_GET ((*reg_avail_infos)[i].insn))
++		{
++		  df_rec = DF_INSN_DEFS ((*reg_avail_infos)[i].insn);
++		  link = DF_REF_CHAIN (df_rec);
++
++		  /* Do not use the dead instruction. */
++		  /* Do not use the original matched sethi.  */
++		  if (!link)
++		    continue;
++		  for (link = DF_REF_CHAIN (df_rec); link; link = link->next)
++		    {
++		      if (DF_REF_REGNO (link->ref) == 0
++			  || !DF_REF_INSN_INFO (link->ref)
++			  || DF_REF_INSN (link->ref) == insn)
++			break;
++		    }
++		  if (link)
++		    continue;
++		}
++
++	      /* Add.  */
++	      if (uint_r > uint_i)
++		{
++		  pat = gen_rtx_PLUS (SImode, src, GEN_INT (uint_r - uint_i));
++		  done = TRUE;
++		}
++	      else
++		{
++		  pat = gen_rtx_PLUS (SImode, src, GEN_INT ((HOST_WIDE_INT)
++							    uint_r - uint_i));
++		  done = TRUE;
++		}
++	    }
++
++	  if (done)
++	    {
++	      /* Emit the new instruction.  */
++	      new_insn = gen_move_insn (dest, pat);
++	      emit_insn_before (new_insn, insn);
++	      set_dst_reg_note (new_insn, REG_EQUAL, GEN_INT (uint_r), dest);
++	      return TRUE;
++	    }
++	}
++    }
++  return FALSE;
++}
++
++/* Clean the reg_avail_info value.  */
++static void
++clean_reg_avail_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED,
++		      void *data)
++{
++  unsigned int i;
++  auto_vec<reg_avail_info_t, 32> *reg_avail_infos =
++    (auto_vec<reg_avail_info_t, 32> *) data;
++
++  if (GET_CODE (dest) == SUBREG)
++    dest = SUBREG_REG (dest);
++
++  if (REG_P (dest))
++    for (i = 0; i < reg_avail_infos->length (); ++i)
++      if ((*reg_avail_infos)[i].regno == REGNO (dest)
++	  || (GET_MODE_SIZE (GET_MODE (dest)) == 8
++	      && (*reg_avail_infos)[i].regno == REGNO (dest) + 1))
++	reg_avail_infos->unordered_remove (i--);
++}
++
++/* Return the const if the setting value is a constant integer.  */
++static rtx
++get_const (rtx insn)
++{
++  rtx note;
++
++  if (GET_CODE (PATTERN (insn)) != SET
++      || !REG_P (SET_DEST (PATTERN (insn)))
++      || GET_MODE (SET_DEST (PATTERN (insn))) != SImode)
++    return NULL_RTX;
++
++  /* Constant move instruction.  */
++  if (CONST_INT_P (XEXP (PATTERN (insn), 1)))
++    return XEXP (PATTERN (insn), 1);
++
++  note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
++  if (!note)
++    note = find_reg_note (insn, REG_EQUIV, NULL_RTX);
++
++  if (note && CONST_INT_P (XEXP (note, 0)))
++    return XEXP (note, 0);
++
++  return NULL_RTX;
++}
++
++/* Return true if the instruction is addi format.  */
++static bool
++addsi3_format_p (rtx insn)
++{
++  if (GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
++      && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT)
++    return TRUE;
++
++  return FALSE;
++}
++
++/* Return true if the instruction is sethi format.  */
++static bool
++sethi_format_p (rtx insn)
++{
++  if (GET_CODE (PATTERN (insn)) == SET
++      && GET_CODE (XEXP (PATTERN (insn), 1)) == CONST_INT
++      && satisfies_constraint_Ihig (XEXP (PATTERN (insn), 1)))
++    return TRUE;
++  return FALSE;
++}
++
++/* Return true if the register definition only be used by insn.  */
++static bool
++use_only_p (rtx insn)
++{
++  rtx def_insn;
++  df_ref rec;
++  df_link *link;
++  rec = DF_INSN_USES (insn);
++  link = DF_REF_CHAIN (rec);
++
++  if (!link
++      || DF_REF_REGNO (link->ref) == 0
++      || !DF_REF_INSN_INFO (link->ref))
++    return FALSE;
++
++  def_insn = DF_REF_INSN (link->ref);
++
++  if (!sethi_format_p (def_insn))
++    return FALSE;
++
++  rec = DF_INSN_DEFS (def_insn);
++  link = DF_REF_CHAIN (rec);
++
++  if (!link
++      || link->next
++      || DF_REF_REGNO (link->ref) == 0
++      || !DF_REF_INSN_INFO (link->ref))
++    return FALSE;
++
++  return TRUE;
++}
++
++/* Traverse instructions in each basic block, and save the value of
++   setting constant instructions.  */
++static void
++find_common_const (void)
++{
++  basic_block bb;
++  unsigned int i;
++
++  /* Save register constant value.  */
++  auto_vec<reg_avail_info_t, 32> reg_avail_infos;
++  reg_avail_info_t reg_avail_info;
++
++
++  FOR_EACH_BB_FN (bb, cfun)
++    {
++      rtx_insn *insn;
++      rtx dest, cst;
++
++      /* Clear the vector.  */
++      while (!reg_avail_infos.is_empty ())
++	reg_avail_infos.pop ();
++
++      FOR_BB_INSNS (bb, insn)
++	{
++	  if (!NONDEBUG_INSN_P (insn))
++	    continue;
++
++	  if (CALL_P (insn))
++	    {
++	      /* Clean hard register.  */
++	      for (i = 0; i < reg_avail_infos.length ();)
++		{
++		  if (HARD_REGISTER_NUM_P (reg_avail_infos[i].regno)
++		      && call_used_regs[reg_avail_infos[i].regno])
++		    reg_avail_infos.unordered_remove (i);
++		  else
++		    ++i;
++		}
++	    }
++
++	  cst = get_const (insn);
++	  if (cst == NULL_RTX)
++	    {
++	      note_stores (PATTERN (insn), clean_reg_avail_info,
++			   &reg_avail_infos);
++	      continue;
++	    }
++
++	  dest = SET_DEST (PATTERN (insn));
++
++	  if (addsi3_format_p (insn)
++	      && use_only_p (insn)
++	      && try_rematerialize (insn, XUINT (cst, 0), &reg_avail_infos))
++	    {
++	      delete_insn (insn);
++	      df_insn_rescan_all ();
++	    }
++
++	  note_stores (PATTERN (insn), clean_reg_avail_info, &reg_avail_infos);
++	  reg_avail_info.insn = insn;
++	  reg_avail_info.uint = XUINT (cst, 0);
++	  reg_avail_info.regno = REGNO (dest);
++	  if (dump_file)
++	    fprintf (dump_file, "Find const %08x on %u\n",
++		     reg_avail_info.uint, reg_avail_info.regno);
++	  reg_avail_infos.safe_push (reg_avail_info);
++	}
++    }
++}
++
++static unsigned int
++nds32_const_remater_opt (void)
++{
++  df_chain_add_problem (DF_DU_CHAIN + DF_UD_CHAIN);
++  df_note_add_problem ();
++  df_insn_rescan_all ();
++  df_analyze ();
++
++  find_common_const ();
++
++  df_insn_rescan_all ();
++  return 0;
++}
++
++const pass_data pass_data_nds32_const_remater_opt =
++{
++  RTL_PASS,				/* type */
++  "const_remater_opt",				/* name */
++  OPTGROUP_NONE,			/* optinfo_flags */
++  TV_MACH_DEP,				/* tv_id */
++  0,					/* properties_required */
++  0,					/* properties_provided */
++  0,					/* properties_destroyed */
++  0,					/* todo_flags_start */
++  TODO_df_finish,	/* todo_flags_finish */
++};
++
++class pass_nds32_const_remater_opt : public rtl_opt_pass
++{
++public:
++  pass_nds32_const_remater_opt (gcc::context *ctxt)
++    : rtl_opt_pass (pass_data_nds32_const_remater_opt, ctxt)
++  {}
++
++  /* opt_pass methods: */
++  bool gate (function *) { return flag_nds32_const_remater_opt; }
++  unsigned int execute (function *) { return nds32_const_remater_opt (); }
++};
++
++rtl_opt_pass *
++make_pass_nds32_const_remater_opt (gcc::context *ctxt)
++{
++  return new pass_nds32_const_remater_opt (ctxt);
++}
++
++/* ------------------------------------------------------------------------ */
+diff --git a/gcc/config/nds32/nds32-cost.c b/gcc/config/nds32/nds32-cost.c
+index e6a29fc..881d086 100644
+--- a/gcc/config/nds32/nds32-cost.c
++++ b/gcc/config/nds32/nds32-cost.c
+@@ -24,73 +24,447 @@
+ #include "system.h"
+ #include "coretypes.h"
+ #include "backend.h"
+-#include "target.h"
+-#include "rtl.h"
+ #include "tree.h"
+-#include "tm_p.h"
+-#include "optabs.h"		/* For GEN_FCN.  */
++#include "rtl.h"
++#include "df.h"
++#include "alias.h"
++#include "stor-layout.h"
++#include "varasm.h"
++#include "calls.h"
++#include "regs.h"
++#include "insn-config.h"	/* Required by recog.h.  */
++#include "conditions.h"
++#include "output.h"
++#include "insn-attr.h"		/* For DFA state_t.  */
++#include "insn-codes.h"		/* For CODE_FOR_xxx.  */
++#include "reload.h"		/* For push_reload().  */
++#include "flags.h"
++#include "insn-config.h"
++#include "expmed.h"
++#include "dojump.h"
++#include "explow.h"
++#include "emit-rtl.h"
++#include "stmt.h"
++#include "expr.h"
+ #include "recog.h"
++#include "diagnostic-core.h"
++#include "cfgrtl.h"
++#include "cfganal.h"
++#include "lcm.h"
++#include "cfgbuild.h"
++#include "cfgcleanup.h"
++#include "tm_p.h"
+ #include "tm-constrs.h"
++#include "optabs.h"		/* For GEN_FCN.  */
++#include "target.h"
++#include "langhooks.h"		/* For add_builtin_function().  */
++#include "builtins.h"
++#include "tree-pass.h"
+ 
+ /* ------------------------------------------------------------------------ */
+ 
+-bool
+-nds32_rtx_costs_impl (rtx x,
+-		      machine_mode mode ATTRIBUTE_UNUSED,
+-		      int outer_code,
+-		      int opno ATTRIBUTE_UNUSED,
+-		      int *total,
+-		      bool speed)
+-{
+-  int code = GET_CODE (x);
++typedef bool (*rtx_cost_func) (rtx, int, int, int, int*);
+ 
+-  /* According to 'speed', goto suitable cost model section.  */
+-  if (speed)
+-    goto performance_cost;
+-  else
+-    goto size_cost;
++struct rtx_cost_model_t {
++  rtx_cost_func speed_prefer;
++  rtx_cost_func size_prefer;
++};
+ 
++static rtx_cost_model_t rtx_cost_model;
+ 
+-performance_cost:
+-  /* This is section for performance cost model.  */
++static int insn_size_16bit; /* Initial at nds32_init_rtx_costs.  */
++static const int insn_size_32bit = 4;
++
++static bool
++nds32_rtx_costs_speed_prefer (rtx x ATTRIBUTE_UNUSED,
++			      int code,
++			      int outer_code ATTRIBUTE_UNUSED,
++			      int opno ATTRIBUTE_UNUSED,
++			      int *total)
++{
++  rtx op0;
++  rtx op1;
++  enum machine_mode mode = GET_MODE (x);
++  /* Scale cost by mode size.  */
++  int cost = COSTS_N_INSNS (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
+ 
+-  /* In gcc/rtl.h, the default value of COSTS_N_INSNS(N) is N*4.
+-     We treat it as 4-cycle cost for each instruction
+-     under performance consideration.  */
+   switch (code)
+     {
+-    case SET:
+-      /* For 'SET' rtx, we need to return false
+-         so that it can recursively calculate costs.  */
+-      return false;
+-
+     case USE:
+       /* Used in combine.c as a marker.  */
+       *total = 0;
+-      break;
++      return true;
++
++    case CONST_INT:
++      /* When not optimizing for size, we care more about the cost
++	 of hot code, and hot code is often in a loop.  If a constant
++	 operand needs to be forced into a register, we will often be
++	 able to hoist the constant load out of the loop, so the load
++	 should not contribute to the cost.  */
++      if (outer_code == SET || outer_code == PLUS)
++	*total = satisfies_constraint_Is20 (x) ? 0 : 4;
++      else if (outer_code == AND || outer_code == IOR || outer_code == XOR
++	       || outer_code == MINUS)
++	*total = satisfies_constraint_Iu15 (x) ? 0 : 4;
++      else if (outer_code == ASHIFT || outer_code == ASHIFTRT
++	       || outer_code == LSHIFTRT)
++	*total = satisfies_constraint_Iu05 (x) ? 0 : 4;
++      else if (GET_RTX_CLASS (outer_code) == RTX_COMPARE
++	       || GET_RTX_CLASS (outer_code) == RTX_COMM_COMPARE)
++	*total = satisfies_constraint_Is16 (x) ? 0 : 4;
++      else
++	*total = COSTS_N_INSNS (1);
++      return true;
++
++    case CONST:
++    case LO_SUM:
++    case HIGH:
++    case SYMBOL_REF:
++      *total = COSTS_N_INSNS (1);
++      return true;
++
++    case MEM:
++      *total = COSTS_N_INSNS (1);
++      return true;
++
++    case SET:
++      op0 = SET_DEST (x);
++      op1 = SET_SRC (x);
++      mode = GET_MODE (op0);
++      /* Scale cost by mode size.  */
++      cost = COSTS_N_INSNS (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
++
++      switch (GET_CODE (op1))
++	{
++	case REG:
++	case SUBREG:
++	  /* Register move and Store instructions.  */
++	  if ((REG_P (op0) || MEM_P (op0))
++	      && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (DImode))
++	    *total = COSTS_N_INSNS (1);
++	  else
++	    *total = cost;
++	  return true;
++
++	case MEM:
++	  /* Load instructions.  */
++	  if (REG_P (op0) && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (DImode))
++	    *total = COSTS_N_INSNS (1);
++	  else
++	    *total = cost;
++	  return true;
++
++	case CONST_INT:
++	  /* movi instruction.  */
++	  if (REG_P (op0) && GET_MODE_SIZE (mode) < GET_MODE_SIZE (DImode))
++	    {
++	      if (satisfies_constraint_Is20 (op1))
++		*total = COSTS_N_INSNS (1) - 1;
++	      else
++		*total = COSTS_N_INSNS (2);
++	    }
++	  else
++	    *total = cost;
++	  return true;
++
++	case CONST:
++	case SYMBOL_REF:
++	case LABEL_REF:
++	  /* la instruction.  */
++	  if (REG_P (op0) && GET_MODE_SIZE (mode) < GET_MODE_SIZE (DImode))
++	    *total = COSTS_N_INSNS (1) - 1;
++	  else
++	    *total = cost;
++	  return true;
++	case VEC_SELECT:
++	  *total = cost;
++	  return true;
++
++	default:
++	  *total = cost;
++	  return true;
++	}
++
++    case PLUS:
++      op0 = XEXP (x, 0);
++      op1 = XEXP (x, 1);
++
++      if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode))
++	*total = cost;
++      else if (GET_CODE (op0) == MULT || GET_CODE (op0) == LSHIFTRT
++	       || GET_CODE (op1) == MULT || GET_CODE (op1) == LSHIFTRT)
++	{
++	  /* ALU_SHIFT */
++	  if (TARGET_PIPELINE_PANTHER)
++	    *total = COSTS_N_INSNS (1);
++	  else
++	    *total = COSTS_N_INSNS (2);
++	}
++      else if ((GET_CODE (op1) == CONST_INT
++		&& satisfies_constraint_Is15 (op1))
++		|| REG_P (op1))
++	/* ADD instructions */
++	*total = COSTS_N_INSNS (1);
++      else
++	/* ADD instructions: IMM out of range.  */
++	*total = COSTS_N_INSNS (2);
++      return true;
++
++    case MINUS:
++      op0 = XEXP (x, 0);
++      op1 = XEXP (x, 1);
++
++      if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode))
++	*total = cost;
++      else if (GET_CODE (op0) == MULT || GET_CODE (op0) == LSHIFTRT
++	       || GET_CODE (op1) == MULT || GET_CODE (op1) == LSHIFTRT)
++	{
++	  /* ALU_SHIFT */
++	  if (TARGET_PIPELINE_PANTHER)
++	    *total = COSTS_N_INSNS (1);
++	  else
++	    *total = COSTS_N_INSNS (2);
++	}
++      else if ((GET_CODE (op0) == CONST_INT
++		&& satisfies_constraint_Is15 (op0))
++		|| REG_P (op0))
++	/* SUB instructions */
++	*total = COSTS_N_INSNS (1);
++      else
++	/* SUB instructions: IMM out of range.  */
++	*total = COSTS_N_INSNS (2);
++      return true;
++
++    case TRUNCATE:
++      /* TRUNCATE and AND behavior is same. */
++      *total = COSTS_N_INSNS (1);
++      return true;
++
++    case AND:
++    case IOR:
++    case XOR:
++      op0 = XEXP (x, 0);
++      op1 = XEXP (x, 1);
++
++      if (NDS32_EXT_DSP_P ())
++	{
++	  /* We prefer (and (ior) (ior)) than (ior (and) (and)) for
++	     synthetize pk** and insb instruction.  */
++	  if (code == AND && GET_CODE (op0) == IOR && GET_CODE (op1) == IOR)
++	    return COSTS_N_INSNS (1);
++
++	  if (code == IOR && GET_CODE (op0) == AND && GET_CODE (op1) == AND)
++	    return COSTS_N_INSNS (10);
++	}
++
++      if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode))
++	*total = cost;
++      else if (GET_CODE (op0) == ASHIFT || GET_CODE (op0) == LSHIFTRT)
++	{
++	  /* ALU_SHIFT */
++	  if (TARGET_PIPELINE_PANTHER)
++	    *total = COSTS_N_INSNS (1);
++	  else
++	    *total = COSTS_N_INSNS (2);
++	}
++      else if ((GET_CODE (op1) == CONST_INT
++	       && satisfies_constraint_Iu15 (op1))
++	       || REG_P (op1))
++	/* AND, OR, XOR instructions */
++	*total = COSTS_N_INSNS (1);
++      else if (code == AND || GET_CODE (op0) == NOT)
++	/* BITC instruction */
++	*total = COSTS_N_INSNS (1);
++      else
++	/* AND, OR, XOR instructions: IMM out of range.  */
++	*total = COSTS_N_INSNS (2);
++      return true;
+ 
+     case MULT:
++      if (GET_MODE (x) == DImode
++	  || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
++	  || GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
++	/* MUL instructions */
++	*total = COSTS_N_INSNS (1);
++      else if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode))
++	*total = cost;
++      else if (outer_code == PLUS || outer_code == MINUS)
++	{
++	  /* ALU_SHIFT */
++	  if (TARGET_PIPELINE_PANTHER)
++	    *total = COSTS_N_INSNS (1);
++	  else
++	    *total = COSTS_N_INSNS (2);
++	}
++      else if ((GET_CODE (XEXP (x, 1)) == CONST_INT
++	       && satisfies_constraint_Iu05 (XEXP (x, 1)))
++	       || REG_P (XEXP (x, 1)))
++	/* MUL instructions */
++	*total = COSTS_N_INSNS (1);
++      else
++	/* MUL instructions: IMM out of range.  */
++	*total = COSTS_N_INSNS (2);
++
++      if (TARGET_MUL_SLOW)
++	*total += COSTS_N_INSNS (4);
++
++      return true;
++
++    case LSHIFTRT:
++      if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode))
++	*total = cost;
++      else if (outer_code == PLUS || outer_code == MINUS
++	       || outer_code == AND || outer_code == IOR
++	       || outer_code == XOR)
++	{
++	  /* ALU_SHIFT */
++	  if (TARGET_PIPELINE_PANTHER)
++	    *total = COSTS_N_INSNS (1);
++	  else
++	    *total = COSTS_N_INSNS (2);
++	}
++      else if ((GET_CODE (XEXP (x, 1)) == CONST_INT
++	       && satisfies_constraint_Iu05 (XEXP (x, 1)))
++	       || REG_P (XEXP (x, 1)))
++	/* SRL instructions */
++	*total = COSTS_N_INSNS (1);
++      else
++	/* SRL instructions: IMM out of range.  */
++	*total = COSTS_N_INSNS (2);
++      return true;
++
++    case ASHIFT:
++      if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode))
++	*total = cost;
++      else if (outer_code == AND || outer_code == IOR
++	       || outer_code == XOR)
++	{
++	  /* ALU_SHIFT */
++	  if (TARGET_PIPELINE_PANTHER)
++	    *total = COSTS_N_INSNS (1);
++	  else
++	    *total = COSTS_N_INSNS (2);
++	}
++      else if ((GET_CODE (XEXP (x, 1)) == CONST_INT
++	       && satisfies_constraint_Iu05 (XEXP (x, 1)))
++	       || REG_P (XEXP (x, 1)))
++	/* SLL instructions */
++	*total = COSTS_N_INSNS (1);
++      else
++	/* SLL instructions: IMM out of range.  */
++	*total = COSTS_N_INSNS (2);
++      return true;
++
++    case ASHIFTRT:
++    case ROTATERT:
++      if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode))
++	*total = cost;
++      else if ((GET_CODE (XEXP (x, 1)) == CONST_INT
++	       && satisfies_constraint_Iu05 (XEXP (x, 1)))
++	       || REG_P (XEXP (x, 1)))
++	/* ROTR, SLL instructions */
++	*total = COSTS_N_INSNS (1);
++      else
++	/* ROTR, SLL instructions: IMM out of range.  */
++	*total = COSTS_N_INSNS (2);
++      return true;
++
++    case LT:
++    case LTU:
++      if (outer_code == SET)
++	{
++	  if ((GET_CODE (XEXP (x, 1)) == CONST_INT
++	      && satisfies_constraint_Iu15 (XEXP (x, 1)))
++	      || REG_P (XEXP (x, 1)))
++	    /* SLT, SLTI instructions */
++	    *total = COSTS_N_INSNS (1);
++	  else
++	    /* SLT, SLT instructions: IMM out of range.  */
++	    *total = COSTS_N_INSNS (2);
++	}
++      else
++	/* branch */
++	*total = COSTS_N_INSNS (2);
++      return true;
++
++    case EQ:
++    case NE:
++    case GE:
++    case LE:
++    case GT:
++      /* branch */
++      *total = COSTS_N_INSNS (2);
++      return true;
++
++    case IF_THEN_ELSE:
++      if (GET_CODE (XEXP (x, 1)) == LABEL_REF)
++	/* branch */
++	*total = COSTS_N_INSNS (2);
++      else
++	/* cmovz, cmovn instructions */
++	*total = COSTS_N_INSNS (1);
++      return true;
++
++    case LABEL_REF:
++      if (outer_code == IF_THEN_ELSE)
++	/* branch */
++	*total = COSTS_N_INSNS (2);
++      else
++	*total = COSTS_N_INSNS (1);
++      return true;
++
++    case ZERO_EXTEND:
++    case SIGN_EXTEND:
++      if (MEM_P (XEXP (x, 0)))
++	/* Using memory access. */
++	*total = COSTS_N_INSNS (1);
++      else
++	/* Zero extend and sign extend instructions.  */
++	*total = COSTS_N_INSNS (1);
++      return true;
++
++    case NEG:
++    case NOT:
+       *total = COSTS_N_INSNS (1);
+-      break;
++      return true;
+ 
+     case DIV:
+     case UDIV:
+     case MOD:
+     case UMOD:
+-      *total = COSTS_N_INSNS (7);
+-      break;
++      *total = COSTS_N_INSNS (20);
++      return true;
+ 
+-    default:
++    case CALL:
++      *total = COSTS_N_INSNS (2);
++      return true;
++
++    case CLZ:
++    case SMIN:
++    case SMAX:
++    case ZERO_EXTRACT:
++      if (TARGET_EXT_PERF)
++	*total = COSTS_N_INSNS (1);
++      else
++	*total = COSTS_N_INSNS (3);
++      return true;
++    case VEC_SELECT:
+       *total = COSTS_N_INSNS (1);
+-      break;
+-    }
+-
+-  return true;
+-
++      return true;
+ 
+-size_cost:
+-  /* This is section for size cost model.  */
++    default:
++      *total = COSTS_N_INSNS (3);
++      return true;
++    }
++}
+ 
++static bool
++nds32_rtx_costs_size_prefer (rtx x,
++			     int code,
++			     int outer_code,
++			     int opno ATTRIBUTE_UNUSED,
++			     int *total)
++{
+   /* In gcc/rtl.h, the default value of COSTS_N_INSNS(N) is N*4.
+      We treat it as 4-byte cost for each instruction
+      under code size consideration.  */
+@@ -98,7 +472,7 @@ size_cost:
+     {
+     case SET:
+       /* For 'SET' rtx, we need to return false
+-         so that it can recursively calculate costs.  */
++	 so that it can recursively calculate costs.  */
+       return false;
+ 
+     case USE:
+@@ -108,92 +482,169 @@ size_cost:
+ 
+     case CONST_INT:
+       /* All instructions involving constant operation
+-         need to be considered for cost evaluation.  */
++	 need to be considered for cost evaluation.  */
+       if (outer_code == SET)
+ 	{
+ 	  /* (set X imm5s), use movi55, 2-byte cost.
+ 	     (set X imm20s), use movi, 4-byte cost.
+ 	     (set X BIG_INT), use sethi/ori, 8-byte cost.  */
+ 	  if (satisfies_constraint_Is05 (x))
+-	    *total = COSTS_N_INSNS (1) - 2;
++	    *total = insn_size_16bit;
+ 	  else if (satisfies_constraint_Is20 (x))
+-	    *total = COSTS_N_INSNS (1);
++	    *total = insn_size_32bit;
+ 	  else
+-	    *total = COSTS_N_INSNS (2);
++	    *total = insn_size_32bit * 2;
+ 	}
+       else if (outer_code == PLUS || outer_code == MINUS)
+ 	{
+ 	  /* Possible addi333/subi333 or subi45/addi45, 2-byte cost.
+ 	     General case, cost 1 instruction with 4-byte.  */
+ 	  if (satisfies_constraint_Iu05 (x))
+-	    *total = COSTS_N_INSNS (1) - 2;
++	    *total = insn_size_16bit;
+ 	  else
+-	    *total = COSTS_N_INSNS (1);
++	    *total = insn_size_32bit;
+ 	}
+       else if (outer_code == ASHIFT)
+ 	{
+ 	  /* Possible slli333, 2-byte cost.
+ 	     General case, cost 1 instruction with 4-byte.  */
+ 	  if (satisfies_constraint_Iu03 (x))
+-	    *total = COSTS_N_INSNS (1) - 2;
++	    *total = insn_size_16bit;
+ 	  else
+-	    *total = COSTS_N_INSNS (1);
++	    *total = insn_size_32bit;
+ 	}
+       else if (outer_code == ASHIFTRT || outer_code == LSHIFTRT)
+ 	{
+ 	  /* Possible srai45 or srli45, 2-byte cost.
+ 	     General case, cost 1 instruction with 4-byte.  */
+ 	  if (satisfies_constraint_Iu05 (x))
+-	    *total = COSTS_N_INSNS (1) - 2;
++	    *total = insn_size_16bit;
+ 	  else
+-	    *total = COSTS_N_INSNS (1);
++	    *total = insn_size_32bit;
+ 	}
+       else
+ 	{
+ 	  /* For other cases, simply set it 4-byte cost.  */
+-	  *total = COSTS_N_INSNS (1);
++	  *total = insn_size_32bit;
+ 	}
+       break;
+ 
+     case CONST_DOUBLE:
+       /* It requires high part and low part processing, set it 8-byte cost.  */
+-      *total = COSTS_N_INSNS (2);
++      *total = insn_size_32bit * 2;
++      break;
++
++    case CONST:
++    case SYMBOL_REF:
++      *total = insn_size_32bit * 2;
+       break;
+ 
+     default:
+       /* For other cases, generally we set it 4-byte cost
+-         and stop resurively traversing.  */
+-      *total = COSTS_N_INSNS (1);
++	 and stop resurively traversing.  */
++      *total = insn_size_32bit;
+       break;
+     }
+ 
+   return true;
+ }
+ 
+-int
+-nds32_address_cost_impl (rtx address,
+-			 machine_mode mode ATTRIBUTE_UNUSED,
+-			 addr_space_t as ATTRIBUTE_UNUSED,
+-			 bool speed)
++void
++nds32_init_rtx_costs (void)
++{
++  rtx_cost_model.speed_prefer = nds32_rtx_costs_speed_prefer;
++  rtx_cost_model.size_prefer  = nds32_rtx_costs_size_prefer;
++
++  if (TARGET_16_BIT)
++    insn_size_16bit = 2;
++  else
++    insn_size_16bit = 4;
++}
++
++/* This target hook describes the relative costs of RTL expressions.
++   Return 'true' when all subexpressions of x have been processed.
++   Return 'false' to sum the costs of sub-rtx, plus cost of this operation.
++   Refer to gcc/rtlanal.c for more information.  */
++bool
++nds32_rtx_costs_impl (rtx x,
++		      machine_mode mode ATTRIBUTE_UNUSED,
++		      int outer_code,
++		      int opno,
++		      int *total,
++		      bool speed)
++{
++  int code = GET_CODE (x);
++
++  /* According to 'speed', use suitable cost model section.  */
++  if (speed)
++    return rtx_cost_model.speed_prefer(x, code, outer_code, opno, total);
++  else
++    return rtx_cost_model.size_prefer(x, code, outer_code, opno, total);
++}
++
++
++int nds32_address_cost_speed_prefer (rtx address)
+ {
+   rtx plus0, plus1;
+   enum rtx_code code;
+ 
+   code = GET_CODE (address);
+ 
+-  /* According to 'speed', goto suitable cost model section.  */
+-  if (speed)
+-    goto performance_cost;
+-  else
+-    goto size_cost;
++  switch (code)
++    {
++    case POST_MODIFY:
++    case POST_INC:
++    case POST_DEC:
++      /* We encourage that rtx contains
++	 POST_MODIFY/POST_INC/POST_DEC behavior.  */
++      return COSTS_N_INSNS (1) - 2;
++
++    case SYMBOL_REF:
++      /* We can have gp-relative load/store for symbol_ref.
++	Have it 4-byte cost.  */
++      return COSTS_N_INSNS (2);
++
++    case CONST:
++      /* It is supposed to be the pattern (const (plus symbol_ref const_int)).
++	 Have it 4-byte cost.  */
++      return COSTS_N_INSNS (2);
++
++    case REG:
++      /* Simply return 4-byte costs.  */
++      return COSTS_N_INSNS (1) - 2;
++
++    case PLUS:
++      /* We do not need to check if the address is a legitimate address,
++	 because this hook is never called with an invalid address.
++	 But we better check the range of
++	 const_int value for cost, if it exists.  */
++      plus0 = XEXP (address, 0);
++      plus1 = XEXP (address, 1);
++
++      if (REG_P (plus0) && CONST_INT_P (plus1))
++	return COSTS_N_INSNS (1) - 2;
++      else if (ARITHMETIC_P (plus0) || ARITHMETIC_P (plus1))
++	return COSTS_N_INSNS (1) - 1;
++      else if (REG_P (plus0) && REG_P (plus1))
++	return COSTS_N_INSNS (1);
++
++      /* For other 'plus' situation, make it cost 4-byte.  */
++      return COSTS_N_INSNS (1);
+ 
+-performance_cost:
+-  /* This is section for performance cost model.  */
++    default:
++      break;
++    }
+ 
+-  /* FALLTHRU, currently we use same cost model as size_cost.  */
++  return COSTS_N_INSNS (4);
+ 
+-size_cost:
+-  /* This is section for size cost model.  */
++}
++
++int nds32_address_cost_speed_fwprop (rtx address)
++{
++  rtx plus0, plus1;
++  enum rtx_code code;
++
++  code = GET_CODE (address);
+ 
+   switch (code)
+     {
+@@ -201,18 +652,18 @@ size_cost:
+     case POST_INC:
+     case POST_DEC:
+       /* We encourage that rtx contains
+-         POST_MODIFY/POST_INC/POST_DEC behavior.  */
++	 POST_MODIFY/POST_INC/POST_DEC behavior.  */
+       return 0;
+ 
+     case SYMBOL_REF:
+       /* We can have gp-relative load/store for symbol_ref.
+-         Have it 4-byte cost.  */
+-      return COSTS_N_INSNS (1);
++	 Have it 4-byte cost.  */
++      return COSTS_N_INSNS (2);
+ 
+     case CONST:
+       /* It is supposed to be the pattern (const (plus symbol_ref const_int)).
+-         Have it 4-byte cost.  */
+-      return COSTS_N_INSNS (1);
++	 Have it 4-byte cost.  */
++      return COSTS_N_INSNS (2);
+ 
+     case REG:
+       /* Simply return 4-byte costs.  */
+@@ -220,21 +671,25 @@ size_cost:
+ 
+     case PLUS:
+       /* We do not need to check if the address is a legitimate address,
+-         because this hook is never called with an invalid address.
+-         But we better check the range of
+-         const_int value for cost, if it exists.  */
++	 because this hook is never called with an invalid address.
++	 But we better check the range of
++	 const_int value for cost, if it exists.  */
+       plus0 = XEXP (address, 0);
+       plus1 = XEXP (address, 1);
+ 
+       if (REG_P (plus0) && CONST_INT_P (plus1))
+-        {
++	{
+ 	  /* If it is possible to be lwi333/swi333 form,
+ 	     make it 2-byte cost.  */
+-	  if (satisfies_constraint_Iu05 (plus1))
++	  if (satisfies_constraint_Iu03 (plus1))
+ 	    return (COSTS_N_INSNS (1) - 2);
+ 	  else
+ 	    return COSTS_N_INSNS (1);
+ 	}
++      if (ARITHMETIC_P (plus0) || ARITHMETIC_P (plus1))
++	return COSTS_N_INSNS (1) - 2;
++      else if (REG_P (plus0) && REG_P (plus1))
++	return COSTS_N_INSNS (1);
+ 
+       /* For other 'plus' situation, make it cost 4-byte.  */
+       return COSTS_N_INSNS (1);
+@@ -246,4 +701,84 @@ size_cost:
+   return COSTS_N_INSNS (4);
+ }
+ 
++
++int nds32_address_cost_size_prefer (rtx address)
++{
++  rtx plus0, plus1;
++  enum rtx_code code;
++
++  code = GET_CODE (address);
++
++  switch (code)
++    {
++    case POST_MODIFY:
++    case POST_INC:
++    case POST_DEC:
++      /* We encourage that rtx contains
++	 POST_MODIFY/POST_INC/POST_DEC behavior.  */
++      return 0;
++
++    case SYMBOL_REF:
++      /* We can have gp-relative load/store for symbol_ref.
++	 Have it 4-byte cost.  */
++      return COSTS_N_INSNS (2);
++
++    case CONST:
++      /* It is supposed to be the pattern (const (plus symbol_ref const_int)).
++	 Have it 4-byte cost.  */
++      return COSTS_N_INSNS (2);
++
++    case REG:
++      /* Simply return 4-byte costs.  */
++      return COSTS_N_INSNS (1) - 1;
++
++    case PLUS:
++      /* We do not need to check if the address is a legitimate address,
++	 because this hook is never called with an invalid address.
++	 But we better check the range of
++	 const_int value for cost, if it exists.  */
++      plus0 = XEXP (address, 0);
++      plus1 = XEXP (address, 1);
++
++      if (REG_P (plus0) && CONST_INT_P (plus1))
++	{
++	  /* If it is possible to be lwi333/swi333 form,
++	     make it 2-byte cost.  */
++	  if (satisfies_constraint_Iu03 (plus1))
++	    return (COSTS_N_INSNS (1) - 2);
++	  else
++	    return COSTS_N_INSNS (1) - 1;
++	}
++
++      /* (plus (reg) (mult (reg) (const))) */
++      if (ARITHMETIC_P (plus0) || ARITHMETIC_P (plus1))
++	return (COSTS_N_INSNS (1) - 1);
++
++      /* For other 'plus' situation, make it cost 4-byte.  */
++      return COSTS_N_INSNS (1);
++
++    default:
++      break;
++    }
++
++  return COSTS_N_INSNS (4);
++
++}
++
++int nds32_address_cost_impl (rtx address,
++			     enum machine_mode mode ATTRIBUTE_UNUSED,
++			     addr_space_t as ATTRIBUTE_UNUSED,
++			     bool speed_p)
++{
++  if (speed_p)
++    {
++      if (current_pass->tv_id == TV_FWPROP)
++	return nds32_address_cost_speed_fwprop (address);
++      else
++	return nds32_address_cost_speed_prefer (address);
++    }
++  else
++    return nds32_address_cost_size_prefer (address);
++}
++
+ /* ------------------------------------------------------------------------ */
+diff --git a/gcc/config/nds32/nds32-cprop-acc.c b/gcc/config/nds32/nds32-cprop-acc.c
+new file mode 100644
+index 0000000..0852095
+--- /dev/null
++++ b/gcc/config/nds32/nds32-cprop-acc.c
+@@ -0,0 +1,845 @@
++/* Copy propagation on hard registers for accumulate style instruction.
++   Copyright (C) 2000-2014 Free Software Foundation, Inc.
++
++   This file is part of GCC.
++
++   GCC is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 3, or (at your option)
++   any later version.
++
++   GCC is distributed in the hope that it will be useful, but WITHOUT
++   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++   License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with GCC; see the file COPYING3.  If not see
++   <http://www.gnu.org/licenses/>.  */
++
++#include "config.h"
++#include "system.h"
++#include "coretypes.h"
++#include "tm.h"
++#include "rtl.h"
++#include "tm_p.h"
++#include "insn-config.h"
++#include "regs.h"
++#include "addresses.h"
++#include "predict.h"
++#include "basic-block.h"
++#include "reload.h"
++#include "hash-set.h"
++#include "dominance.h"
++#include "cfg.h"
++#include "function.h"
++#include "recog.h"
++#include "cfgrtl.h"
++#include "flags.h"
++#include "diagnostic-core.h"
++#include "obstack.h"
++#include "tree-pass.h"
++#include "bitmap.h"
++#include "df.h"
++#include "output.h"
++#include "emit-rtl.h"
++#include <vector>
++
++/* For each move instruction, we have a two-dimensional vector that record
++   what insns need to replace the operands when the move instruction is
++   propagated.  */
++
++typedef std::vector<rtx_insn *> insn_list;
++
++/* Function called by note_uses to replace used subexpressions.  */
++
++struct replace_src_operands_data
++{
++  rtx dst_reg;
++  rtx src_reg;
++  unsigned int old_regno;
++  unsigned int new_regno;
++  rtx_insn *insn;
++};
++
++/* Return true if a mode change from ORIG to NEW is allowed for REGNO.
++   Adapted from mode_change_ok in regcprop.  */
++
++static bool
++nds32_mode_change_ok (enum machine_mode orig_mode, enum machine_mode new_mode,
++		      unsigned int regno ATTRIBUTE_UNUSED)
++{
++  if (GET_MODE_SIZE (orig_mode) < GET_MODE_SIZE (new_mode))
++    return false;
++
++#ifdef CANNOT_CHANGE_MODE_CLASS
++  return !REG_CANNOT_CHANGE_MODE_P (regno, orig_mode, new_mode);
++#endif
++
++  return true;
++}
++
++/* Register REGNO was originally set in ORIG_MODE.  It - or a copy of it -
++   was copied in COPY_MODE to COPY_REGNO, and then COPY_REGNO was accessed
++   in NEW_MODE.
++   Return a NEW_MODE rtx for REGNO if that's OK, otherwise return NULL_RTX.
++   Adapted from maybe_mode_change in regcprop.  */
++
++static rtx
++nds32_mode_change_reg (enum machine_mode orig_mode, enum machine_mode copy_mode,
++		       enum machine_mode new_mode, unsigned int regno,
++		       unsigned int copy_regno ATTRIBUTE_UNUSED)
++{
++  if (GET_MODE_SIZE (copy_mode) < GET_MODE_SIZE (orig_mode)
++      && GET_MODE_SIZE (copy_mode) < GET_MODE_SIZE (new_mode))
++    return NULL_RTX;
++
++  if (orig_mode == new_mode)
++    return gen_raw_REG (new_mode, regno);
++  else if (nds32_mode_change_ok (orig_mode, new_mode, regno))
++    {
++      int copy_nregs = hard_regno_nregs[copy_regno][copy_mode];
++      int use_nregs = hard_regno_nregs[copy_regno][new_mode];
++      int copy_offset
++	= GET_MODE_SIZE (copy_mode) / copy_nregs * (copy_nregs - use_nregs);
++      int offset
++	= GET_MODE_SIZE (orig_mode) - GET_MODE_SIZE (new_mode) - copy_offset;
++      int byteoffset = offset % UNITS_PER_WORD;
++      int wordoffset = offset - byteoffset;
++
++      offset = ((WORDS_BIG_ENDIAN ? wordoffset : 0)
++		+ (BYTES_BIG_ENDIAN ? byteoffset : 0));
++      regno += subreg_regno_offset (regno, orig_mode, offset, new_mode);
++      if (HARD_REGNO_MODE_OK (regno, new_mode))
++	return gen_raw_REG (new_mode, regno);
++    }
++  return NULL_RTX;
++}
++
++/* Return true if INSN is a register-based move instruction, false
++   otherwise.  */
++
++static bool
++nds32_is_reg_mov_p (rtx_insn *insn)
++{
++  rtx pat = PATTERN (insn);
++
++  if (GET_CODE (pat) != SET)
++    return false;
++
++  rtx src_reg = SET_SRC (pat);
++  rtx dst_reg = SET_DEST (pat);
++
++  if (REG_P (dst_reg) && REG_P (src_reg) && can_copy_p (GET_MODE (dst_reg)))
++    return true;
++  else
++    return false;
++}
++
++
++/* Return accumulated register if INSN is an accumulate style instruction,
++   otherwise return NULL_RTX.  */
++
++static rtx
++nds32_is_acc_insn_p (rtx_insn *insn)
++{
++  int i;
++  const operand_alternative *op_alt;
++  rtx pat;
++
++  if (get_attr_length (insn) != 4)
++    return NULL_RTX;
++
++  pat = PATTERN (insn);
++  if (GET_CODE (pat) != SET)
++    return NULL_RTX;
++
++  /* Try to get the insn data from recog_data.  */
++  recog_memoized (insn);
++  extract_constrain_insn (insn);
++  /* Transform the constraint strings into a more usable form,
++     recog_op_alt.  */
++  preprocess_constraints (insn);
++  op_alt = which_op_alt ();
++
++  /* Check all operands whether the output operand is identical to
++     another input operand  */
++  for (i = 0; i < recog_data.n_operands; ++i)
++    {
++      int matches = op_alt[i].matches;
++      int matched = op_alt[i].matched;
++      if ((matches >= 0
++	   && (recog_data.operand_type[i] != OP_IN
++	       || recog_data.operand_type[matches] != OP_IN))
++	  || (matched >= 0
++	      && (recog_data.operand_type[i] != OP_IN
++		  || recog_data.operand_type[matched] != OP_IN)))
++	return recog_data.operand[i];
++    }
++
++  return NULL_RTX;
++}
++
++/* Finds the reference corresponding to the definition of register whose
++   register number is REGNO in INSN. DF is the dataflow object.
++   Adapted from df_find_def in df-core.  */
++
++static df_ref
++nds32_df_find_regno_def (rtx_insn *insn, unsigned int regno)
++{
++  df_ref def;
++
++  FOR_EACH_INSN_DEF (def, insn)
++    if (DF_REF_REGNO (def) == regno)
++      return def;
++
++  return NULL;
++ }
++
++/* Return true if the REG in INSN is only defined by one insn whose uid
++   is DEF_UID, otherwise return false.  */
++
++static bool
++nds32_is_single_def_p (rtx_insn *insn, rtx reg, unsigned int def_uid)
++{
++  df_ref use;
++
++  FOR_EACH_INSN_USE (use, insn)
++    {
++      df_link *link;
++      unsigned int uid;
++
++      if (DF_REF_REGNO (use) >= REGNO (reg)
++	  && DF_REF_REGNO (use) < END_REGNO (reg))
++	{
++	  link = DF_REF_CHAIN (use);
++	  if (link->next
++	      || DF_REF_IS_ARTIFICIAL (link->ref))
++	    return false;
++
++	  uid = DF_REF_INSN_UID (link->ref);
++	  if (uid != def_uid)
++	    return false;
++	}
++    }
++
++  return true;
++}
++
++/* Return true if there is no definition of REG on any path from the insn
++   whose uid is FROM_UID (called FROM) to insn TO, otherwise return false.
++   This function collects the reaching definitions bitmap at insn TO, and
++   check if all uses of REG in insn FROM can reach insn TO.  */
++
++static bool
++nds32_no_define_reg_p (rtx to, rtx reg, unsigned int from_uid)
++{
++  basic_block bb = BLOCK_FOR_INSN (to);
++  struct df_rd_bb_info *bb_info = DF_RD_BB_INFO (bb);
++  bitmap_head rd_local;
++  bool result = true;
++  rtx_insn *insn;
++  df_ref use;
++  df_insn_info *insn_info;
++
++  bitmap_initialize (&rd_local, &bitmap_default_obstack);
++  bitmap_copy (&rd_local, &bb_info->in);
++  df_rd_simulate_artificial_defs_at_top (bb, &rd_local);
++
++  for (insn = BB_HEAD (bb); insn != to; insn = NEXT_INSN (insn))
++    if (INSN_P (insn))
++      df_rd_simulate_one_insn (bb, insn, &rd_local);
++
++  if (dump_file)
++    {
++      fprintf (dump_file, "scan reach define:");
++      print_rtl_single (dump_file, to);
++
++      fprintf (dump_file, "bb rd in:\n");
++      dump_bitmap (dump_file, &bb_info->in);
++
++      fprintf (dump_file, "reach def:\n");
++      dump_bitmap (dump_file, &rd_local);
++    }
++
++  insn_info = DF_INSN_UID_GET (from_uid);
++  FOR_EACH_INSN_INFO_USE (use, insn_info)
++    {
++      df_link *link;
++
++      if (DF_REF_REGNO (use) >= REGNO (reg)
++	  && DF_REF_REGNO (use) < END_REGNO (reg))
++	for (link = DF_REF_CHAIN (use); link; link = link->next)
++	  {
++	    if (dump_file)
++	      {
++		fprintf (dump_file, "use ID %d\n", DF_REF_ID (link->ref));
++		if (DF_REF_IS_ARTIFICIAL (link->ref))
++		  fprintf (dump_file, "use ref is artificial\n");
++		else
++		  {
++		    fprintf (dump_file, "use from insn:");
++		    print_rtl_single (dump_file, DF_REF_INSN (link->ref));
++		  }
++	      }
++	    result &=
++	      (bitmap_bit_p (&rd_local, DF_REF_ID (link->ref)))
++	      ? true
++	      : false;
++	  }
++    }
++
++  bitmap_clear (&rd_local);
++  return result;
++}
++
++/* Return true if the value held by REG is no longer needed before INSN
++   (i.e. REG is dead before INSN), otherwise return false.  */
++
++static bool
++nds32_is_dead_reg_p (rtx_insn *insn, rtx reg)
++{
++  basic_block bb = BLOCK_FOR_INSN (insn);
++  bitmap live = BITMAP_ALLOC (&reg_obstack);
++  bool result = true;
++  rtx_insn *i;
++  unsigned int rn;
++
++  bitmap_copy (live, DF_LR_IN (bb));
++  df_simulate_initialize_forwards (bb, live);
++
++  for (i = BB_HEAD (bb); i != insn; i = NEXT_INSN (i))
++    df_simulate_one_insn_forwards (bb, i, live);
++
++  if (dump_file)
++    {
++      fprintf (dump_file, "scan live regs:");
++      print_rtl_single (dump_file, insn);
++
++      fprintf (dump_file, "bb lr in:\n");
++      dump_bitmap (dump_file, DF_LR_IN (bb));
++
++      fprintf (dump_file, "live:\n");
++      dump_bitmap (dump_file, live);
++    }
++
++  for (rn = REGNO (reg); rn < END_REGNO (reg); ++rn)
++    result &= (bitmap_bit_p (live, rn)) ? false : true;
++
++  BITMAP_FREE (live);
++  return result;
++}
++
++/* Return true if START can do propagation. Notice START maybe a move
++   instruction or an accumulate style instruction.
++   MOV_UID is the uid of beginning move instruction that is only used by
++   function nds32_no_define_reg_p.
++   DST_REG & SRC_REG is the SET_DEST and SET_SRC of a move instruction that
++   maybe real or unreal, respectively.
++   INDEX indicates what number sequence is currently considered rank as
++   consecutive hard registers. Simultaneously, INDEX is the index of row in
++   INSN_LISTS.   */
++
++static bool
++nds32_can_cprop_acc_1 (rtx_insn *start, unsigned int mov_uid,
++		       rtx dst_reg, rtx src_reg,
++		       unsigned int index,
++		       std::vector<insn_list> &insn_lists)
++{
++  unsigned int lead_regno = REGNO (dst_reg) + index;
++  unsigned int new_regno = REGNO (src_reg) + index;
++  df_ref def_rec;
++  df_link *link;
++
++  def_rec = nds32_df_find_regno_def (start, lead_regno);
++  gcc_assert (def_rec);
++
++  for (link = DF_REF_CHAIN (def_rec); link; link = link->next)
++    {
++      rtx *use_loc;
++      unsigned int use_regno;
++      enum machine_mode use_mode;
++      rtx_insn *use_insn;
++      rtx acc_reg, new_src;
++
++      if (DF_REF_IS_ARTIFICIAL (link->ref))
++	return false;
++
++      use_loc = DF_REF_LOC (link->ref);
++      gcc_assert (use_loc && REG_P (*use_loc));
++
++      use_regno = REGNO (*use_loc);
++      /* Do not propagate when any insns use register that regno is
++	 smaller than DST_REG.  */
++      if (use_regno < REGNO (dst_reg))
++	return false;
++
++      /* This status should be handled by previous call.  */
++      if (use_regno < lead_regno)
++	continue;
++
++      /* Do not propagate because not all of the pieces of the copy came
++	 from DST_REG.  */
++      if (END_REGNO (*use_loc) > END_REGNO (dst_reg))
++	return false;
++
++      use_insn = DF_REF_INSN (link->ref);
++      /* Do not propagate since call-used registers can't be replaced.  */
++      if (CALL_P (use_insn))
++	return false;
++
++      /* Do not replace in asms intentionally referencing hard registers.  */
++      if (asm_noperands (PATTERN (use_insn)) >= 0
++	  && use_regno == ORIGINAL_REGNO (*use_loc))
++	return false;
++
++      /* Do not propagate when the register is defined by more than one
++	 instruction.  */
++      if (!nds32_is_single_def_p (use_insn, *use_loc, INSN_UID (start)))
++	return false;
++
++      use_mode = GET_MODE (*use_loc);
++      new_src = nds32_mode_change_reg (GET_MODE (src_reg),
++				       GET_MODE (dst_reg),
++				       use_mode,
++				       new_regno,
++				       use_regno);
++      /* Do not propagate if we can't generate a new register with new mode.  */
++      if (!new_src)
++	return false;
++
++      /* Can not replace DST_REG with SRC_REG when SRC_REG is redefined between
++	 START and use insn of START.  */
++      if (!nds32_no_define_reg_p (use_insn, new_src, mov_uid))
++	return false;
++
++      acc_reg = nds32_is_acc_insn_p (use_insn);
++      /* Handle the accumulate style instruction that accumulate register
++	 may be replaced.
++         Also handle the AUTO_INC register that is another form of accumulated
++	 register.  */
++      if ((acc_reg && rtx_equal_p (acc_reg, *use_loc))
++	  || FIND_REG_INC_NOTE (use_insn, *use_loc))
++	{
++	  unsigned int i, use_nregs;
++
++	  /* ACC_REG can't be replaced since the SRC_REG can't be
++	     overwritten.  */
++	  if (!nds32_is_dead_reg_p (use_insn, new_src))
++	    return false;
++
++	  /* Once we confirm that ACC_REG can be replaced, the unreal move
++	     instruction is generated. For example:
++	     mov   r0, r1	   mov   r0, r1
++	     cmovn r0, r2, r3  ->  cmovn r1, r2, r3
++				   mov   r0, r1
++	     If the unreal move instruction can do propagation, the ACC_REG
++	     can be replaced. We check it in a recursive way.  */
++	  use_nregs = hard_regno_nregs [use_regno][(int) use_mode];
++	  for (i = 0; i < use_nregs; ++i)
++	    if (!nds32_can_cprop_acc_1 (use_insn, mov_uid,
++					*use_loc, new_src,
++					i, insn_lists))
++	      return false;
++	}
++      insn_lists[index].push_back (use_insn);
++    }
++
++  return true;
++}
++
++/* Return true if MOV can do propagation, otherwise return false.
++   INSN_LISTS is used to record what insns need to replace the operands.  */
++
++static bool
++nds32_can_cprop_acc (rtx_insn *mov, std::vector<insn_list> &insn_lists)
++{
++  rtx dst_reg = SET_DEST (PATTERN (mov));
++  rtx src_reg = SET_SRC (PATTERN (mov));
++  unsigned int dst_regno = REGNO (dst_reg);
++  enum machine_mode dst_mode = GET_MODE (dst_reg);
++  unsigned int dst_nregs = hard_regno_nregs[dst_regno][(int) dst_mode];
++  unsigned int index;
++
++  insn_lists.resize (dst_nregs);
++  for (index = 0; index < dst_nregs; ++index)
++    if (!nds32_can_cprop_acc_1 (mov, INSN_UID (mov),
++				dst_reg, src_reg,
++				index, insn_lists))
++      return false;
++
++  return true;
++}
++
++/* Replace every occurrence of OLD_REGNO in LOC with NEW_REGNO. LOC maybe a
++   part of INSN.
++   DST_REG & SRC_REG are used by function nds32_mode_change_reg.
++   Mark each change with validate_change passing INSN.  */
++
++static void
++nds32_replace_partial_operands (rtx *loc, rtx dst_reg, rtx src_reg,
++				unsigned int old_regno, unsigned int new_regno,
++				rtx_insn *insn)
++{
++  int i, j;
++  rtx x = *loc;
++  enum rtx_code code;
++  const char *fmt;
++
++  if (!x)
++    return;
++
++  code = GET_CODE (x);
++  fmt = GET_RTX_FORMAT (code);
++
++  if (REG_P (x) && REGNO (x) == old_regno)
++    {
++      rtx new_reg = nds32_mode_change_reg (GET_MODE (src_reg),
++					   GET_MODE (dst_reg),
++					   GET_MODE (x),
++					   new_regno,
++					   old_regno);
++
++      gcc_assert (new_reg);
++
++      ORIGINAL_REGNO (new_reg) = ORIGINAL_REGNO (x);
++      REG_ATTRS (new_reg) = REG_ATTRS (x);
++      REG_POINTER (new_reg) = REG_POINTER (x);
++
++      /* ??? unshare or not?  */
++      validate_change (insn, loc, new_reg, 1);
++      return;
++    }
++
++  /* Call ourself recursively to perform the replacements.  */
++  for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
++    {
++      if (fmt[i] == 'e')
++	nds32_replace_partial_operands (&XEXP (x, i), dst_reg, src_reg,
++					old_regno, new_regno, insn);
++      else if (fmt[i] == 'E') /* ??? how about V?  */
++	for (j = XVECLEN (x, i) - 1; j >= 0; j--)
++	  nds32_replace_partial_operands (&XVECEXP (x, i, j), dst_reg, src_reg,
++					  old_regno, new_regno, insn);
++    }
++}
++
++/* Try replacing every occurrence of OLD_REGNO in INSN with NEW_REGNO.  */
++
++static void
++nds32_replace_all_operands (rtx dst_reg, rtx src_reg,
++			    unsigned int old_regno, unsigned int new_regno,
++			    rtx_insn *insn)
++{
++  nds32_replace_partial_operands (&PATTERN (insn), dst_reg, src_reg,
++				  old_regno, new_regno, insn);
++}
++
++/* Called via note_uses in function nds32_replace_src_operands, for all used
++   rtx do replacement.  */
++
++static void
++nds32_replace_src_operands_1 (rtx *loc, void *data)
++{
++  struct replace_src_operands_data *d
++    = (struct replace_src_operands_data *) data;
++
++  nds32_replace_partial_operands (loc, d->dst_reg, d->src_reg,
++				  d->old_regno, d->new_regno, d->insn);
++}
++
++/* Try replacing every occurrence of OLD_REGNO in INSN with NEW_REGNO,
++   avoiding SET_DESTs.  */
++
++static void
++nds32_replace_src_operands (rtx dst_reg, rtx src_reg,
++			    unsigned int old_regno, unsigned int new_regno,
++			    rtx_insn *insn)
++{
++  struct replace_src_operands_data d
++    = {dst_reg, src_reg, old_regno, new_regno, insn};
++
++  note_uses (&PATTERN (insn), nds32_replace_src_operands_1, &d);
++}
++
++/* Try replacing every occurrence of SRC_REG (include its consecutive hard
++   registers) in each insn of INSN_LISTS with DST_REG.  */
++
++static bool
++nds32_try_replace_operands (rtx dst_reg, rtx src_reg,
++			    std::vector<insn_list> &insn_lists)
++{
++  unsigned int i;
++  std::vector<rtx_insn *>::iterator ritr;
++  unsigned int old_regno, new_regno;
++
++  old_regno = REGNO (dst_reg);
++  new_regno = REGNO (src_reg);
++
++  for (i = 0; i < insn_lists.size (); ++i, ++old_regno, ++new_regno)
++    for (ritr = insn_lists[i].begin (); ritr != insn_lists[i].end (); ++ritr)
++      {
++	rtx_insn *insn = *ritr;
++	rtx acc_reg;
++
++	acc_reg = nds32_is_acc_insn_p (insn);
++	if (acc_reg && REGNO (acc_reg) == old_regno)
++	  {
++	    /* Replace OP_OUT & OP_INOUT  */
++	    nds32_replace_all_operands (dst_reg, src_reg,
++					old_regno, new_regno, insn);
++
++	  }
++	else
++	  {
++	    /* Replace OP_IN  */
++	    nds32_replace_src_operands (dst_reg, src_reg,
++					old_regno, new_regno, insn);
++	  }
++      }
++
++  if (!apply_change_group ())
++    return false;
++  else
++    {
++      df_analyze ();
++      return true;
++    }
++}
++
++/* Check if each move instruction in WORK_LIST can do propagation, and
++   then try to replace operands if necessary. */
++
++static int
++nds32_do_cprop_acc (auto_vec<rtx_insn *> &work_list)
++{
++  int n_replace = 0;
++  int i;
++  rtx_insn *mov;
++  std::vector<insn_list> insn_lists;
++
++  FOR_EACH_VEC_ELT (work_list, i, mov)
++    {
++      if (nds32_can_cprop_acc (mov, insn_lists))
++	{
++	  if (dump_file)
++	    fprintf (dump_file, "\n [CPROP_ACC] insn %d will be cprop. \n",
++		     INSN_UID (mov));
++
++	  if (nds32_try_replace_operands (SET_DEST (PATTERN (mov)),
++					  SET_SRC (PATTERN (mov)),
++					  insn_lists))
++	    n_replace++;
++	}
++      insn_lists.clear ();
++    }
++
++  return n_replace;
++}
++
++/* Return true if MOV meets the conditions of propagation about move
++   instruction, otherwise return false.  */
++
++static bool
++nds32_is_target_mov_p (rtx mov)
++{
++  rtx dst = SET_DEST (PATTERN (mov));
++  rtx src = SET_SRC (PATTERN (mov));
++  unsigned int dst_regno, src_regno;
++  unsigned int dst_nregs, src_nregs;
++  bool dst_is_general, src_is_general;
++
++  gcc_assert (REG_P (dst) && REG_P (src));
++
++  dst_regno = REGNO (dst);
++  src_regno = REGNO (src);
++  dst_nregs = hard_regno_nregs[dst_regno][GET_MODE (dst)];
++  src_nregs = hard_regno_nregs[src_regno][GET_MODE (src)];
++
++  /* Do not propagate to the stack pointer, as that can leave memory accesses
++     with no scheduling dependency on the stack update.
++     Adapted from regcprop.  */
++  if (dst_regno == STACK_POINTER_REGNUM)
++    return false;
++
++  /* Likewise with the frame pointer, if we're using one.
++     Adapted from regcprop.  */
++  if (frame_pointer_needed && dst_regno == HARD_FRAME_POINTER_REGNUM)
++    return false;
++
++  /* Do not propagate to fixed or global registers, patterns can be relying
++     to see particular fixed register or users can expect the chosen global
++     register in asm.
++     Adapted from regcprop.  */
++  if (fixed_regs[dst_regno] || global_regs[dst_regno])
++    return false;
++
++  /* Make sure the all consecutive registers of SET_DEST are only defined by
++     SET_SRC.  */
++  if (dst_nregs > src_nregs)
++    return false;
++
++  /* Narrowing on big endian will result in the invalid transformation.  */
++  if (dst_nregs < src_nregs
++      && (GET_MODE_SIZE (GET_MODE (src)) > UNITS_PER_WORD
++	  ? WORDS_BIG_ENDIAN : BYTES_BIG_ENDIAN))
++    return false;
++
++  dst_is_general = in_hard_reg_set_p (reg_class_contents[GENERAL_REGS],
++				      GET_MODE (dst), REGNO (dst));
++  src_is_general = in_hard_reg_set_p (reg_class_contents[GENERAL_REGS],
++				      GET_MODE (src), REGNO (src));
++  /* Make sure the register class of SET_DEST & SET_SRC are the same.  */
++  if (dst_is_general ^ src_is_general)
++    return false;
++
++  return true;
++}
++
++/* Collect the move instructions that are the uses of accumulated register
++   in WORK_LIST */
++
++static void
++nds32_cprop_acc_find_target_mov (auto_vec<rtx_insn *> &work_list)
++{
++  basic_block bb;
++  rtx_insn *insn;
++  rtx acc_reg;
++
++  FOR_EACH_BB_FN (bb, cfun)
++    FOR_BB_INSNS (bb, insn)
++      if (INSN_P (insn))
++	{
++	  acc_reg = nds32_is_acc_insn_p (insn);
++	  if (acc_reg)
++	    {
++	      unsigned int acc_regno;
++	      enum machine_mode acc_mode;
++	      df_ref use;
++	      df_link *link;
++	      rtx_insn *def_insn;
++
++	      if (!single_set (insn) || !REG_P (acc_reg))
++		continue;
++
++	      acc_regno = REGNO (acc_reg);
++	      /* Don't replace in asms intentionally referencing hard regs.  */
++	      if (asm_noperands (PATTERN (insn)) >= 0
++		  && acc_regno == ORIGINAL_REGNO (acc_reg))
++		continue;
++
++	      if (dump_file)
++		fprintf (dump_file,
++			 "\n [CPROP_ACC] "
++			 "RTL_UID %d is an exchangeable ACC insn. \n",
++			 INSN_UID (insn));
++
++	      use = df_find_use (insn, acc_reg);
++	      gcc_assert (use);
++	      link = DF_REF_CHAIN (use);
++
++	      if (link->next
++		  || DF_REF_IS_ARTIFICIAL (link->ref))
++		continue;
++
++	      acc_mode = GET_MODE (acc_reg);
++	      def_insn = DF_REF_INSN (link->ref);
++	      if (nds32_is_reg_mov_p (def_insn))
++		{
++		  rtx *loc = DF_REF_LOC (link->ref);
++		  enum machine_mode loc_mode = GET_MODE (*loc);
++
++		  /* If the move instruction can't define whole accumulated
++		     register, the replacement is invalid.  */
++		  if (loc_mode != acc_mode)
++		    if (hard_regno_nregs[acc_regno][acc_mode]
++			> hard_regno_nregs[acc_regno][loc_mode])
++		      continue;
++
++		  if (nds32_is_target_mov_p (def_insn))
++		    work_list.safe_push (def_insn);
++		}
++	    }
++	}
++}
++
++/* Main entry point for the forward copy propagation optimization for
++   accumulate style instruction.  */
++
++static int
++nds32_cprop_acc_opt (void)
++{
++  df_chain_add_problem (DF_DU_CHAIN + DF_UD_CHAIN);
++  df_note_add_problem ();
++  df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
++  df_insn_rescan_all ();
++  df_analyze ();
++
++  auto_vec<rtx_insn *> work_list;
++
++  nds32_cprop_acc_find_target_mov (work_list);
++  if (work_list.is_empty())
++    {
++      if (dump_file)
++	fprintf (dump_file, "\n [CPROP_ACC] The work_list is empty. \n");
++      return 0;
++    }
++
++  if (dump_file)
++    {
++      int i;
++      rtx_insn *mov;
++
++      fprintf (dump_file, "\n [CPROP_ACC] The content of work_list:");
++      FOR_EACH_VEC_ELT (work_list, i, mov)
++	fprintf (dump_file, " %d", INSN_UID (mov));
++      fprintf (dump_file, "\n");
++    }
++
++  compute_bb_for_insn ();
++
++  int n_replace = nds32_do_cprop_acc (work_list);
++
++  if (dump_file)
++    {
++      fprintf (dump_file, "\n [CPROP_ACC] Result: ");
++      if (n_replace == 0)
++	fprintf (dump_file, "No move can do cprop. \n");
++      else
++	fprintf (dump_file, "Do cprop for %d move. \n", n_replace);
++    }
++
++  work_list.release ();
++  return 1;
++}
++
++const pass_data pass_data_nds32_cprop_acc_opt =
++{
++  RTL_PASS,                                     /* type */
++  "cprop_acc",                                  /* name */
++  OPTGROUP_NONE,                                /* optinfo_flags */
++  TV_MACH_DEP,                                  /* tv_id */
++  0,                                            /* properties_required */
++  0,                                            /* properties_provided */
++  0,                                            /* properties_destroyed */
++  0,                                            /* todo_flags_start */
++  TODO_df_finish,				/* todo_flags_finish */
++};
++
++class pass_nds32_cprop_acc_opt : public rtl_opt_pass
++{
++public:
++  pass_nds32_cprop_acc_opt (gcc::context *ctxt)
++    : rtl_opt_pass (pass_data_nds32_cprop_acc_opt, ctxt)
++  {}
++
++  /* opt_pass methods: */
++  bool gate (function *) { return optimize > 0 && flag_nds32_cprop_acc; }
++  unsigned int execute (function *) { return nds32_cprop_acc_opt (); }
++};
++
++rtl_opt_pass *
++make_pass_nds32_cprop_acc_opt (gcc::context *ctxt)
++{
++  return new pass_nds32_cprop_acc_opt (ctxt);
++}
+diff --git a/gcc/config/nds32/nds32-doubleword.md b/gcc/config/nds32/nds32-doubleword.md
+index 23a9f25..7c9dfb9 100644
+--- a/gcc/config/nds32/nds32-doubleword.md
++++ b/gcc/config/nds32/nds32-doubleword.md
+@@ -23,7 +23,8 @@
+ ;; Move DImode/DFmode instructions.
+ ;; -------------------------------------------------------------
+ 
+-
++;; Do *NOT* try to split DI/DFmode before reload since LRA seem
++;; still buggy for such behavior at least at gcc 4.8.2...
+ (define_expand "movdi"
+   [(set (match_operand:DI 0 "general_operand" "")
+ 	(match_operand:DI 1 "general_operand" ""))]
+@@ -46,149 +47,100 @@
+ 
+ 
+ (define_insn "move_<mode>"
+-  [(set (match_operand:DIDF 0 "nonimmediate_operand" "=r, r, r, m")
+-	(match_operand:DIDF 1 "general_operand"      " r, i, m, r"))]
+-  ""
++  [(set (match_operand:DIDF 0 "nonimmediate_operand" "=r, r,  r, r, Da, m, f, Q, f, *r, *f")
++	(match_operand:DIDF 1 "general_operand"      " r, i, Da, m,  r, r, Q, f, f, *f, *r"))]
++  "register_operand(operands[0], <MODE>mode)
++   || register_operand(operands[1], <MODE>mode)"
+ {
+-  rtx addr;
+-  rtx otherops[5];
+-
+   switch (which_alternative)
+     {
+     case 0:
+       return "movd44\t%0, %1";
+-
+     case 1:
+       /* reg <- const_int, we ask gcc to split instruction.  */
+       return "#";
+-
+     case 2:
+-      /* Refer to nds32_legitimate_address_p() in nds32.c,
+-         we only allow "reg", "symbol_ref", "const", and "reg + const_int"
+-         as address rtx for DImode/DFmode memory access.  */
+-      addr = XEXP (operands[1], 0);
+-
+-      otherops[0] = gen_rtx_REG (SImode, REGNO (operands[0]));
+-      otherops[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+-      otherops[2] = addr;
+-
+-      if (REG_P (addr))
+-	{
+-	  /* (reg) <- (mem (reg)) */
+-	  output_asm_insn ("lmw.bi\t%0, [%2], %1, 0", otherops);
+-	}
+-      else if (GET_CODE (addr) == PLUS)
+-	{
+-	  /* (reg) <- (mem (plus (reg) (const_int))) */
+-	  rtx op0 = XEXP (addr, 0);
+-	  rtx op1 = XEXP (addr, 1);
+-
+-	  if (REG_P (op0))
+-	    {
+-	      otherops[2] = op0;
+-	      otherops[3] = op1;
+-	      otherops[4] = gen_int_mode (INTVAL (op1) + 4, SImode);
+-	    }
+-	  else
+-	    {
+-	      otherops[2] = op1;
+-	      otherops[3] = op0;
+-	      otherops[4] = gen_int_mode (INTVAL (op0) + 4, SImode);
+-	    }
+-
+-	  /* To avoid base overwrite when REGNO(%0) == REGNO(%2).  */
+-	  if (REGNO (otherops[0]) != REGNO (otherops[2]))
+-	    {
+-	      output_asm_insn ("lwi\t%0, [%2 + (%3)]", otherops);
+-	      output_asm_insn ("lwi\t%1, [%2 + (%4)]", otherops);
+-	    }
+-	  else
+-	    {
+-	      output_asm_insn ("lwi\t%1, [%2 + (%4)]", otherops);
+-	      output_asm_insn ("lwi\t%0,[ %2 + (%3)]", otherops);
+-	    }
+-	}
+-      else
+-	{
+-	  /* (reg) <- (mem (symbol_ref ...))
+-	     (reg) <- (mem (const ...)) */
+-	  output_asm_insn ("lwi.gp\t%0, [ + %2]", otherops);
+-	  output_asm_insn ("lwi.gp\t%1, [ + %2 + 4]", otherops);
+-	}
+-
+-      /* We have already used output_asm_insn() by ourself,
+-         so return an empty string.  */
+-      return "";
+-
++      /* The memory format is (mem (reg)),
++	 we can generate 'lmw.bi' instruction.  */
++      return nds32_output_double (operands, true);
+     case 3:
+-      /* Refer to nds32_legitimate_address_p() in nds32.c,
+-         we only allow "reg", "symbol_ref", "const", and "reg + const_int"
+-         as address rtx for DImode/DFmode memory access.  */
+-      addr = XEXP (operands[0], 0);
+-
+-      otherops[0] = gen_rtx_REG (SImode, REGNO (operands[1]));
+-      otherops[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
+-      otherops[2] = addr;
+-
+-      if (REG_P (addr))
+-	{
+-	  /* (mem (reg)) <- (reg) */
+-	  output_asm_insn ("smw.bi\t%0, [%2], %1, 0", otherops);
+-	}
+-      else if (GET_CODE (addr) == PLUS)
+-	{
+-	  /* (mem (plus (reg) (const_int))) <- (reg) */
+-	  rtx op0 = XEXP (addr, 0);
+-	  rtx op1 = XEXP (addr, 1);
+-
+-	  if (REG_P (op0))
+-	    {
+-	      otherops[2] = op0;
+-	      otherops[3] = op1;
+-	      otherops[4] = gen_int_mode (INTVAL (op1) + 4, SImode);
+-	    }
+-	  else
+-	    {
+-	      otherops[2] = op1;
+-	      otherops[3] = op0;
+-	      otherops[4] = gen_int_mode (INTVAL (op0) + 4, SImode);
+-	    }
+-
+-	  /* To avoid base overwrite when REGNO(%0) == REGNO(%2).  */
+-	  if (REGNO (otherops[0]) != REGNO (otherops[2]))
+-	    {
+-	      output_asm_insn ("swi\t%0, [%2 + (%3)]", otherops);
+-	      output_asm_insn ("swi\t%1, [%2 + (%4)]", otherops);
+-	    }
+-	  else
+-	    {
+-	      output_asm_insn ("swi\t%1, [%2 + (%4)]", otherops);
+-	      output_asm_insn ("swi\t%0, [%2 + (%3)]", otherops);
+-	    }
+-	}
+-      else
+-	{
+-	  /* (mem (symbol_ref ...)) <- (reg)
+-	     (mem (const ...))      <- (reg) */
+-	  output_asm_insn ("swi.gp\t%0, [ + %2]", otherops);
+-	  output_asm_insn ("swi.gp\t%1, [ + %2 + 4]", otherops);
+-	}
+-
+-      /* We have already used output_asm_insn() by ourself,
+-         so return an empty string.  */
+-      return "";
+-
++      /* We haven't 64-bit load instruction,
++	 we split this pattern to two SImode pattern.  */
++      return "#";
++    case 4:
++      /* The memory format is (mem (reg)),
++	 we can generate 'smw.bi' instruction.  */
++      return nds32_output_double (operands, false);
++    case 5:
++      /* We haven't 64-bit store instruction,
++	 we split this pattern to two SImode pattern.  */
++      return "#";
++    case 6:
++      return nds32_output_float_load (operands);
++    case 7:
++      return nds32_output_float_store (operands);
++    case 8:
++      return "fcpysd\t%0, %1, %1";
++    case 9:
++      return "fmfdr\t%0, %1";
++    case 10:
++      return "fmtdr\t%1, %0";
+     default:
+       gcc_unreachable ();
+     }
+ }
+-  [(set_attr "type"   "move,move,move,move")
+-   (set_attr "length" "   4,  16,   8,   8")])
++  [(set_attr "type"    "alu,alu,load,load,store,store,fload,fstore,fcpy,fmfdr,fmtdr")
++   (set_attr_alternative "length"
++     [
++       ;; Alternative 0
++       (if_then_else (match_test "!TARGET_16_BIT")
++		     (const_int 4)
++		     (const_int 2))
++       ;; Alternative 1
++       (const_int 16)
++       ;; Alternative 2
++       (const_int 4)
++       ;; Alternative 3
++       (const_int 8)
++       ;; Alternative 4
++       (const_int 4)
++       ;; Alternative 5
++       (const_int 8)
++       ;; Alternative 6
++       (const_int 4)
++       ;; Alternative 7
++       (const_int 4)
++       ;; Alternative 8
++       (const_int 4)
++       ;; Alternative 9
++       (const_int 4)
++       ;; Alternative 10
++       (const_int 4)
++     ])
++   (set_attr "feature" " v1, v1,  v1,  v1,   v1,   v1,    fpu,    fpu,    fpu,    fpu,    fpu")])
++
++;; Split move_di pattern when the hard register is odd.
++(define_split
++  [(set (match_operand:DIDF 0 "register_operand" "")
++	(match_operand:DIDF 1 "register_operand" ""))]
++  "(NDS32_IS_GPR_REGNUM (REGNO (operands[0]))
++    && ((REGNO (operands[0]) & 0x1) == 1))
++   || (NDS32_IS_GPR_REGNUM (REGNO (operands[1]))
++       && ((REGNO (operands[1]) & 0x1) == 1))"
++  [(set (match_dup 2) (match_dup 3))
++   (set (match_dup 4) (match_dup 5))]
++  {
++     operands[2] = gen_lowpart (SImode, operands[0]);
++     operands[4] = gen_highpart (SImode, operands[0]);
++     operands[3] = gen_lowpart (SImode, operands[1]);
++     operands[5] = gen_highpart (SImode, operands[1]);
++  }
++)
+ 
+ (define_split
+   [(set (match_operand:DIDF 0 "register_operand"     "")
+ 	(match_operand:DIDF 1 "const_double_operand" ""))]
+-  "reload_completed"
++  "flag_pic || reload_completed"
+   [(set (match_dup 2) (match_dup 3))
+    (set (match_dup 4) (match_dup 5))]
+ {
+@@ -207,7 +159,12 @@
+   /* Actually we would like to create move behavior by ourself.
+      So that movsi expander could have chance to split large constant.  */
+   emit_move_insn (operands[2], operands[3]);
+-  emit_move_insn (operands[4], operands[5]);
++
++  unsigned HOST_WIDE_INT mask = GET_MODE_MASK (SImode);
++  if ((UINTVAL (operands[3]) & mask) == (UINTVAL (operands[5]) & mask))
++    emit_move_insn (operands[4], operands[2]);
++  else
++    emit_move_insn (operands[4], operands[5]);
+   DONE;
+ })
+ 
+@@ -217,7 +174,9 @@
+   [(set (match_operand:DIDF 0 "register_operand" "")
+ 	(match_operand:DIDF 1 "register_operand" ""))]
+   "reload_completed
+-   && (TARGET_ISA_V2 || !TARGET_16_BIT)"
++   && (TARGET_ISA_V2 || !TARGET_16_BIT)
++   && NDS32_IS_GPR_REGNUM (REGNO (operands[0]))
++   && NDS32_IS_GPR_REGNUM (REGNO (operands[1]))"
+   [(set (match_dup 0) (match_dup 1))
+    (set (match_dup 2) (match_dup 3))]
+ {
+@@ -239,6 +198,28 @@
+     }
+ })
+ 
++(define_split
++  [(set (match_operand:DIDF 0 "nds32_general_register_operand" "")
++	(match_operand:DIDF 1 "memory_operand" ""))]
++  "reload_completed
++   && nds32_split_double_word_load_store_p (operands, true)"
++  [(set (match_dup 2) (match_dup 3))
++   (set (match_dup 4) (match_dup 5))]
++{
++  nds32_spilt_doubleword (operands, true);
++})
++
++(define_split
++  [(set (match_operand:DIDF 0  "memory_operand" "")
++	(match_operand:DIDF 1  "nds32_general_register_operand" ""))]
++  "reload_completed
++   && nds32_split_double_word_load_store_p (operands, false)"
++  [(set (match_dup 2) (match_dup 3))
++   (set (match_dup 4) (match_dup 5))]
++{
++  nds32_spilt_doubleword (operands, false);
++})
++
+ ;; -------------------------------------------------------------
+ ;; Boolean DImode instructions.
+ ;; -------------------------------------------------------------
+diff --git a/gcc/config/nds32/nds32-dspext.md b/gcc/config/nds32/nds32-dspext.md
+new file mode 100644
+index 0000000..6ec2137
+--- /dev/null
++++ b/gcc/config/nds32/nds32-dspext.md
+@@ -0,0 +1,5280 @@
++;; Machine description of Andes NDS32 cpu for GNU compiler
++;; Copyright (C) 2012-2016 Free Software Foundation, Inc.
++;; Contributed by Andes Technology Corporation.
++;;
++;; This file is part of GCC.
++;;
++;; GCC is free software; you can redistribute it and/or modify it
++;; under the terms of the GNU General Public License as published
++;; by the Free Software Foundation; either version 3, or (at your
++;; option) any later version.
++;;
++;; GCC is distributed in the hope that it will be useful, but WITHOUT
++;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++;; License for more details.
++;;
++;; You should have received a copy of the GNU General Public License
++;; along with GCC; see the file COPYING3.  If not see
++;; <http://www.gnu.org/licenses/>.
++
++(define_expand "mov<mode>"
++  [(set (match_operand:VQIHI 0 "general_operand" "")
++	(match_operand:VQIHI 1 "general_operand" ""))]
++  "NDS32_EXT_DSP_P ()"
++{
++  /* Need to force register if mem <- !reg.  */
++  if (MEM_P (operands[0]) && !REG_P (operands[1]))
++    operands[1] = force_reg (<MODE>mode, operands[1]);
++
++  /* If operands[1] is a large constant and cannot be performed
++     by a single instruction, we need to split it.  */
++  if (GET_CODE (operands[1]) == CONST_VECTOR
++      && !satisfies_constraint_CVs2 (operands[1])
++      && !satisfies_constraint_CVhi (operands[1]))
++    {
++      HOST_WIDE_INT ival = const_vector_to_hwint (operands[1]);
++      rtx tmp_rtx;
++
++      tmp_rtx = can_create_pseudo_p ()
++		? gen_reg_rtx (SImode)
++		: simplify_gen_subreg (SImode, operands[0], <MODE>mode, 0);
++
++      emit_move_insn (tmp_rtx, gen_int_mode (ival, SImode));
++      convert_move (operands[0], tmp_rtx, false);
++      DONE;
++    }
++
++  if (REG_P (operands[0]) && SYMBOLIC_CONST_P (operands[1]))
++    {
++      if (nds32_tls_referenced_p (operands [1]))
++	{
++	  nds32_expand_tls_move (operands);
++	  DONE;
++	}
++      else if (flag_pic)
++	{
++	  nds32_expand_pic_move (operands);
++	  DONE;
++	}
++    }
++})
++
++(define_insn "*mov<mode>"
++  [(set (match_operand:VQIHI 0 "nonimmediate_operand" "=r, r,$U45,$U33,$U37,$U45, m,$  l,$  l,$  l,$  d,  d, r,$   d,    r,    r,    r, *f, *f,  r, *f,  Q, A")
++	(match_operand:VQIHI 1 "nds32_vmove_operand"  " r, r,   l,   l,   l,   d, r, U45, U33, U37, U45,Ufe, m, CVp5, CVs5, CVs2, CVhi, *f,  r, *f,  Q, *f, r"))]
++  "NDS32_EXT_DSP_P ()
++   && (register_operand(operands[0], <MODE>mode)
++       || register_operand(operands[1], <MODE>mode))"
++{
++  switch (which_alternative)
++    {
++    case 0:
++      return "mov55\t%0, %1";
++    case 1:
++      return "ori\t%0, %1, 0";
++    case 2:
++    case 3:
++    case 4:
++    case 5:
++      return nds32_output_16bit_store (operands, <byte>);
++    case 6:
++      return nds32_output_32bit_store (operands, <byte>);
++    case 7:
++    case 8:
++    case 9:
++    case 10:
++    case 11:
++      return nds32_output_16bit_load (operands, <byte>);
++    case 12:
++      return nds32_output_32bit_load (operands, <byte>);
++    case 13:
++      return "movpi45\t%0, %1";
++    case 14:
++      return "movi55\t%0, %1";
++    case 15:
++      return "movi\t%0, %1";
++    case 16:
++      return "sethi\t%0, hi20(%1)";
++    case 17:
++      if (TARGET_FPU_SINGLE)
++	return "fcpyss\t%0, %1, %1";
++      else
++	return "#";
++    case 18:
++      return "fmtsr\t%1, %0";
++    case 19:
++      return "fmfsr\t%0, %1";
++    case 20:
++      return nds32_output_float_load (operands);
++    case 21:
++      return nds32_output_float_store (operands);
++    case 22:
++      return "mtusr\t%1, %0";
++    default:
++      gcc_unreachable ();
++    }
++}
++  [(set_attr "type"    "alu,alu,store,store,store,store,store,load,load,load,load,load,load,alu,alu,alu,alu,fcpy,fmtsr,fmfsr,fload,fstore,alu")
++   (set_attr "length"  "  2,  4,    2,    2,    2,    2,    4,   2,   2,   2,   2,   2,   4,  2,  2,  4,  4,   4,    4,    4,    4,     4,  4")
++   (set_attr "feature" " v1, v1,   v1,   v1,   v1,   v1,   v1,  v1,  v1,  v1,  v1, v3m,  v1, v1, v1, v1, v1, fpu,  fpu,  fpu,  fpu,   fpu, v1")])
++
++(define_expand "movv2si"
++  [(set (match_operand:V2SI 0 "general_operand" "")
++	(match_operand:V2SI 1 "general_operand" ""))]
++  "NDS32_EXT_DSP_P ()"
++{
++  /* Need to force register if mem <- !reg.  */
++  if (MEM_P (operands[0]) && !REG_P (operands[1]))
++    operands[1] = force_reg (V2SImode, operands[1]);
++})
++
++(define_insn "*movv2si"
++  [(set (match_operand:V2SI 0 "nonimmediate_operand" "=r, r,  r, r, Da, m, f, Q, f, r, f")
++	(match_operand:V2SI 1 "general_operand"      " r, i, Da, m,  r, r, Q, f, f, f, r"))]
++  "NDS32_EXT_DSP_P ()
++   && (register_operand(operands[0], V2SImode)
++       || register_operand(operands[1], V2SImode))"
++{
++  switch (which_alternative)
++    {
++    case 0:
++      return "movd44\t%0, %1";
++    case 1:
++      /* reg <- const_int, we ask gcc to split instruction.  */
++      return "#";
++    case 2:
++      /* The memory format is (mem (reg)),
++	 we can generate 'lmw.bi' instruction.  */
++      return nds32_output_double (operands, true);
++    case 3:
++      /* We haven't 64-bit load instruction,
++	 we split this pattern to two SImode pattern.  */
++      return "#";
++    case 4:
++      /* The memory format is (mem (reg)),
++	 we can generate 'smw.bi' instruction.  */
++      return nds32_output_double (operands, false);
++    case 5:
++      /* We haven't 64-bit store instruction,
++	 we split this pattern to two SImode pattern.  */
++      return "#";
++    case 6:
++      return nds32_output_float_load (operands);
++    case 7:
++      return nds32_output_float_store (operands);
++    case 8:
++      return "fcpysd\t%0, %1, %1";
++    case 9:
++      return "fmfdr\t%0, %1";
++    case 10:
++      return "fmtdr\t%1, %0";
++    default:
++      gcc_unreachable ();
++    }
++}
++  [(set_attr "type"    "alu,alu,load,load,store,store,unknown,unknown,unknown,unknown,unknown")
++   (set_attr_alternative "length"
++     [
++       ;; Alternative 0
++       (if_then_else (match_test "!TARGET_16_BIT")
++		     (const_int 4)
++		     (const_int 2))
++       ;; Alternative 1
++       (const_int 16)
++       ;; Alternative 2
++       (const_int 4)
++       ;; Alternative 3
++       (const_int 8)
++       ;; Alternative 4
++       (const_int 4)
++       ;; Alternative 5
++       (const_int 8)
++       ;; Alternative 6
++       (const_int 4)
++       ;; Alternative 7
++       (const_int 4)
++       ;; Alternative 8
++       (const_int 4)
++       ;; Alternative 9
++       (const_int 4)
++       ;; Alternative 10
++       (const_int 4)
++     ])
++   (set_attr "feature" " v1, v1,  v1,  v1,   v1,   v1,    fpu,    fpu,    fpu,    fpu,    fpu")])
++
++(define_expand "movmisalign<mode>"
++  [(set (match_operand:VQIHI 0 "general_operand" "")
++	(match_operand:VQIHI 1 "general_operand" ""))]
++  "NDS32_EXT_DSP_P ()"
++{
++  rtx addr;
++  if (MEM_P (operands[0]) && !REG_P (operands[1]))
++    operands[1] = force_reg (<MODE>mode, operands[1]);
++
++  if (MEM_P (operands[0]))
++    {
++      addr = force_reg (Pmode, XEXP (operands[0], 0));
++      emit_insn (gen_unaligned_store<mode> (addr, operands[1]));
++    }
++  else
++    {
++      addr = force_reg (Pmode, XEXP (operands[1], 0));
++      emit_insn (gen_unaligned_load<mode> (operands[0], addr));
++    }
++  DONE;
++})
++
++(define_expand "unaligned_load<mode>"
++  [(set (match_operand:VQIHI 0 "register_operand" "=r")
++	(unspec:VQIHI [(mem:VQIHI (match_operand:SI 1 "register_operand" "r"))] UNSPEC_UALOAD_W))]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_ISA_V3M)
++    nds32_expand_unaligned_load (operands, <MODE>mode);
++  else
++    emit_insn (gen_unaligned_load_w<mode> (operands[0], gen_rtx_MEM (<MODE>mode, operands[1])));
++  DONE;
++})
++
++(define_insn "unaligned_load_w<mode>"
++  [(set (match_operand:VQIHI 0 "register_operand"                          "=  r")
++	(unspec:VQIHI [(match_operand:VQIHI 1 "nds32_lmw_smw_base_operand" " Umw")] UNSPEC_UALOAD_W))]
++  "NDS32_EXT_DSP_P ()"
++{
++  return nds32_output_lmw_single_word (operands);
++}
++  [(set_attr "type"   "load")
++   (set_attr "length"    "4")]
++)
++
++(define_expand "unaligned_store<mode>"
++  [(set (mem:VQIHI (match_operand:SI 0 "register_operand" "r"))
++	(unspec:VQIHI [(match_operand:VQIHI 1 "register_operand" "r")] UNSPEC_UASTORE_W))]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_ISA_V3M)
++    nds32_expand_unaligned_store (operands, <MODE>mode);
++  else
++    emit_insn (gen_unaligned_store_w<mode> (gen_rtx_MEM (<MODE>mode, operands[0]), operands[1]));
++  DONE;
++})
++
++(define_insn "unaligned_store_w<mode>"
++  [(set (match_operand:VQIHI 0 "nds32_lmw_smw_base_operand"      "=Umw")
++	(unspec:VQIHI [(match_operand:VQIHI 1 "register_operand" "   r")] UNSPEC_UASTORE_W))]
++  "NDS32_EXT_DSP_P ()"
++{
++  return nds32_output_smw_single_word (operands);
++}
++  [(set_attr "type"   "store")
++   (set_attr "length"     "4")]
++)
++
++(define_insn "<uk>add<mode>3"
++  [(set (match_operand:VQIHI 0 "register_operand"                 "=r")
++	(all_plus:VQIHI (match_operand:VQIHI 1 "register_operand" " r")
++			(match_operand:VQIHI 2 "register_operand" " r")))]
++  "NDS32_EXT_DSP_P ()"
++  "<uk>add<bits> %0, %1, %2"
++  [(set_attr "type"    "dalu")
++   (set_attr "length"  "4")
++   (set_attr "feature" "v1")])
++
++(define_insn "<uk>adddi3"
++  [(set (match_operand:DI 0 "register_operand"              "=r")
++	(all_plus:DI (match_operand:DI 1 "register_operand" " r")
++		     (match_operand:DI 2 "register_operand" " r")))]
++  "NDS32_EXT_DSP_P ()"
++  "<uk>add64 %0, %1, %2"
++  [(set_attr "type"    "dalu64")
++   (set_attr "length"  "4")
++   (set_attr "feature" "v1")])
++
++(define_insn "raddv4qi3"
++  [(set (match_operand:V4QI 0 "register_operand"                  "=r")
++	(truncate:V4QI
++	  (ashiftrt:V4HI
++	    (plus:V4HI (sign_extend:V4HI (match_operand:V4QI 1 "register_operand" " r"))
++		       (sign_extend:V4HI (match_operand:V4QI 2 "register_operand" " r")))
++	    (const_int 1))))]
++  "NDS32_EXT_DSP_P ()"
++  "radd8\t%0, %1, %2"
++  [(set_attr "type"    "dalu")
++   (set_attr "length"  "4")
++   (set_attr "feature" "v1")])
++
++
++(define_insn "uraddv4qi3"
++  [(set (match_operand:V4QI 0 "register_operand"                  "=r")
++	(truncate:V4QI
++	  (lshiftrt:V4HI
++	    (plus:V4HI (zero_extend:V4HI (match_operand:V4QI 1 "register_operand" " r"))
++		       (zero_extend:V4HI (match_operand:V4QI 2 "register_operand" " r")))
++	    (const_int 1))))]
++  "NDS32_EXT_DSP_P ()"
++  "uradd8\t%0, %1, %2"
++  [(set_attr "type"    "dalu")
++   (set_attr "length"  "4")
++   (set_attr "feature" "v1")])
++
++(define_insn "raddv2hi3"
++  [(set (match_operand:V2HI 0 "register_operand"                                  "=r")
++	(truncate:V2HI
++	  (ashiftrt:V2SI
++	    (plus:V2SI (sign_extend:V2SI (match_operand:V2HI 1 "register_operand" " r"))
++		       (sign_extend:V2SI (match_operand:V2HI 2 "register_operand" " r")))
++	    (const_int 1))))]
++  "NDS32_EXT_DSP_P ()"
++  "radd16\t%0, %1, %2"
++  [(set_attr "type"    "dalu")
++   (set_attr "length"  "4")
++   (set_attr "feature" "v1")])
++
++(define_insn "uraddv2hi3"
++  [(set (match_operand:V2HI 0 "register_operand"                                  "=r")
++	(truncate:V2HI
++	  (lshiftrt:V2SI
++	    (plus:V2SI (zero_extend:V2SI (match_operand:V2HI 1 "register_operand" " r"))
++		       (zero_extend:V2SI (match_operand:V2HI 2 "register_operand" " r")))
++	    (const_int 1))))]
++  "NDS32_EXT_DSP_P ()"
++  "uradd16\t%0, %1, %2"
++  [(set_attr "type"    "dalu")
++   (set_attr "length"  "4")
++   (set_attr "feature" "v1")])
++
++(define_insn "radddi3"
++  [(set (match_operand:DI 0 "register_operand"            "=r")
++	(truncate:DI
++	  (ashiftrt:TI
++	    (plus:TI (sign_extend:TI (match_operand:DI 1 "register_operand" " r"))
++		     (sign_extend:TI (match_operand:DI 2 "register_operand" " r")))
++	  (const_int 1))))]
++  "NDS32_EXT_DSP_P ()"
++  "radd64\t%0, %1, %2"
++  [(set_attr "type"    "dalu64")
++   (set_attr "length"  "4")
++   (set_attr "feature" "v1")])
++
++
++(define_insn "uradddi3"
++  [(set (match_operand:DI 0 "register_operand"            "=r")
++	(truncate:DI
++	  (lshiftrt:TI
++	    (plus:TI (zero_extend:TI (match_operand:DI 1 "register_operand" " r"))
++		     (zero_extend:TI (match_operand:DI 2 "register_operand" " r")))
++	  (const_int 1))))]
++  "NDS32_EXT_DSP_P ()"
++  "uradd64\t%0, %1, %2"
++  [(set_attr "type"    "dalu64")
++   (set_attr "length"  "4")
++   (set_attr "feature" "v1")])
++
++(define_insn "<uk>sub<mode>3"
++  [(set (match_operand:VQIHI 0 "register_operand"                  "=r")
++	(all_minus:VQIHI (match_operand:VQIHI 1 "register_operand" " r")
++			 (match_operand:VQIHI 2 "register_operand" " r")))]
++  "NDS32_EXT_DSP_P ()"
++  "<uk>sub<bits> %0, %1, %2"
++  [(set_attr "type"    "dalu")
++   (set_attr "length"  "4")
++   (set_attr "feature" "v1")])
++
++(define_insn "<uk>subdi3"
++  [(set (match_operand:DI 0 "register_operand"               "=r")
++	(all_minus:DI (match_operand:DI 1 "register_operand" " r")
++		      (match_operand:DI 2 "register_operand" " r")))]
++  "NDS32_EXT_DSP_P ()"
++  "<uk>sub64 %0, %1, %2"
++  [(set_attr "type"    "dalu64")
++   (set_attr "length"  "4")
++   (set_attr "feature" "v1")])
++
++(define_insn "rsubv4qi3"
++  [(set (match_operand:V4QI 0 "register_operand"                                   "=r")
++	(truncate:V4QI
++	  (ashiftrt:V4HI
++	    (minus:V4HI (sign_extend:V4HI (match_operand:V4QI 1 "register_operand" " r"))
++			(sign_extend:V4HI (match_operand:V4QI 2 "register_operand" " r")))
++	    (const_int 1))))]
++  "NDS32_EXT_DSP_P ()"
++  "rsub8\t%0, %1, %2"
++  [(set_attr "type"    "dalu")
++   (set_attr "length"  "4")])
++
++(define_insn "ursubv4qi3"
++  [(set (match_operand:V4QI 0 "register_operand"                                   "=r")
++	(truncate:V4QI
++	  (lshiftrt:V4HI
++	    (minus:V4HI (zero_extend:V4HI (match_operand:V4QI 1 "register_operand" " r"))
++			(zero_extend:V4HI (match_operand:V4QI 2 "register_operand" " r")))
++	    (const_int 1))))]
++  "NDS32_EXT_DSP_P ()"
++  "ursub8\t%0, %1, %2"
++  [(set_attr "type"    "dalu")
++   (set_attr "length"  "4")])
++
++(define_insn "rsubv2hi3"
++  [(set (match_operand:V2HI 0 "register_operand"                                   "=r")
++	(truncate:V2HI
++	  (ashiftrt:V2SI
++	    (minus:V2SI (sign_extend:V2SI (match_operand:V2HI 1 "register_operand" " r"))
++			(sign_extend:V2SI (match_operand:V2HI 2 "register_operand" " r")))
++	    (const_int 1))))]
++  "NDS32_EXT_DSP_P ()"
++  "rsub16\t%0, %1, %2"
++  [(set_attr "type"    "dalu")
++   (set_attr "length"  "4")])
++
++(define_insn "ursubv2hi3"
++  [(set (match_operand:V2HI 0 "register_operand"                                   "=r")
++	(truncate:V2HI
++	  (lshiftrt:V2SI
++	    (minus:V2SI (zero_extend:V2SI (match_operand:V2HI 1 "register_operand" " r"))
++			(zero_extend:V2SI (match_operand:V2HI 2 "register_operand" " r")))
++	    (const_int 1))))]
++  "NDS32_EXT_DSP_P ()"
++  "ursub16\t%0, %1, %2"
++  [(set_attr "type"    "dalu")
++   (set_attr "length"  "4")])
++
++(define_insn "rsubdi3"
++  [(set (match_operand:DI 0 "register_operand"                   "=r")
++	(truncate:DI
++	  (ashiftrt:TI
++	    (minus:TI (sign_extend:TI (match_operand:DI 1 "register_operand" " r"))
++		      (sign_extend:TI (match_operand:DI 2 "register_operand" " r")))
++	  (const_int 1))))]
++  "NDS32_EXT_DSP_P ()"
++  "rsub64\t%0, %1, %2"
++  [(set_attr "type"    "dalu64")
++   (set_attr "length"  "4")])
++
++
++(define_insn "ursubdi3"
++  [(set (match_operand:DI 0 "register_operand"                   "=r")
++	(truncate:DI
++	  (lshiftrt:TI
++	    (minus:TI (zero_extend:TI (match_operand:DI 1 "register_operand" " r"))
++		      (zero_extend:TI (match_operand:DI 2 "register_operand" " r")))
++	  (const_int 1))))]
++  "NDS32_EXT_DSP_P ()"
++  "ursub64\t%0, %1, %2"
++  [(set_attr "type"    "dalu64")
++   (set_attr "length"  "4")])
++
++(define_expand "cras16_1"
++  [(match_operand:V2HI 0 "register_operand" "")
++   (match_operand:V2HI 1 "register_operand" "")
++   (match_operand:V2HI 2 "register_operand" "")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    emit_insn (gen_cras16_1_be (operands[0], operands[1], operands[2]));
++  else
++    emit_insn (gen_cras16_1_le (operands[0], operands[1], operands[2]));
++  DONE;
++})
++
++(define_insn "cras16_1_le"
++  [(set (match_operand:V2HI 0 "register_operand"         "=r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI
++	    (minus:HI
++	      (vec_select:HI
++		(match_operand:V2HI 1 "register_operand" " r")
++		(parallel [(const_int 0)]))
++	      (vec_select:HI
++		(match_operand:V2HI 2 "register_operand" " r")
++		(parallel [(const_int 1)]))))
++	  (vec_duplicate:V2HI
++	    (plus:HI
++	      (vec_select:HI
++		(match_dup 2)
++		(parallel [(const_int 0)]))
++	      (vec_select:HI
++		(match_dup 1)
++		(parallel [(const_int 1)]))))
++	  (const_int 1)))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "cras16\t%0, %1, %2"
++  [(set_attr "type" "dalu")]
++)
++
++(define_insn "cras16_1_be"
++  [(set (match_operand:V2HI 0 "register_operand"         "=r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI
++	    (minus:HI
++	      (vec_select:HI
++		(match_operand:V2HI 1 "register_operand" " r")
++		(parallel [(const_int 1)]))
++	      (vec_select:HI
++		(match_operand:V2HI 2 "register_operand" " r")
++		(parallel [(const_int 0)]))))
++	  (vec_duplicate:V2HI
++	    (plus:HI
++	      (vec_select:HI
++		(match_dup 2)
++		(parallel [(const_int 1)]))
++	      (vec_select:HI
++		(match_dup 1)
++		(parallel [(const_int 0)]))))
++	  (const_int 2)))]
++  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
++  "cras16\t%0, %1, %2"
++  [(set_attr "type" "dalu")]
++)
++
++(define_expand "kcras16_1"
++  [(match_operand:V2HI 0 "register_operand" "")
++   (match_operand:V2HI 1 "register_operand" "")
++   (match_operand:V2HI 2 "register_operand" "")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    emit_insn (gen_kcras16_1_be (operands[0], operands[1], operands[2]));
++  else
++    emit_insn (gen_kcras16_1_le (operands[0], operands[1], operands[2]));
++  DONE;
++})
++
++(define_insn "kcras16_1_le"
++  [(set (match_operand:V2HI 0 "register_operand"         "=r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI
++	    (ss_minus:HI
++	      (vec_select:HI
++		(match_operand:V2HI 1 "register_operand" " r")
++		(parallel [(const_int 0)]))
++	      (vec_select:HI
++		(match_operand:V2HI 2 "register_operand" " r")
++		(parallel [(const_int 1)]))))
++	  (vec_duplicate:V2HI
++	    (ss_plus:HI
++	      (vec_select:HI
++		(match_dup 2)
++		(parallel [(const_int 0)]))
++	      (vec_select:HI
++		(match_dup 1)
++		(parallel [(const_int 1)]))))
++	  (const_int 1)))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "kcras16\t%0, %1, %2"
++  [(set_attr "type" "dalu")]
++)
++
++(define_insn "kcras16_1_be"
++  [(set (match_operand:V2HI 0 "register_operand"         "=r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI
++	    (ss_minus:HI
++	      (vec_select:HI
++		(match_operand:V2HI 1 "register_operand" " r")
++		(parallel [(const_int 1)]))
++	      (vec_select:HI
++		(match_operand:V2HI 2 "register_operand" " r")
++		(parallel [(const_int 0)]))))
++	  (vec_duplicate:V2HI
++	    (ss_plus:HI
++	      (vec_select:HI
++		(match_dup 2)
++		(parallel [(const_int 1)]))
++	      (vec_select:HI
++		(match_dup 1)
++		(parallel [(const_int 0)]))))
++	  (const_int 2)))]
++  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
++  "kcras16\t%0, %1, %2"
++  [(set_attr "type" "dalu")]
++)
++
++(define_expand "ukcras16_1"
++  [(match_operand:V2HI 0 "register_operand" "")
++   (match_operand:V2HI 1 "register_operand" "")
++   (match_operand:V2HI 2 "register_operand" "")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    emit_insn (gen_ukcras16_1_be (operands[0], operands[1], operands[2]));
++  else
++    emit_insn (gen_ukcras16_1_le (operands[0], operands[1], operands[2]));
++  DONE;
++})
++
++(define_insn "ukcras16_1_le"
++  [(set (match_operand:V2HI 0 "register_operand"         "=r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI
++	    (us_minus:HI
++	      (vec_select:HI
++		(match_operand:V2HI 1 "register_operand" " r")
++		(parallel [(const_int 0)]))
++	      (vec_select:HI
++		(match_operand:V2HI 2 "register_operand" " r")
++		(parallel [(const_int 1)]))))
++	  (vec_duplicate:V2HI
++	    (us_plus:HI
++	      (vec_select:HI
++		(match_dup 2)
++		(parallel [(const_int 0)]))
++	      (vec_select:HI
++		(match_dup 1)
++		(parallel [(const_int 1)]))))
++	  (const_int 1)))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "ukcras16\t%0, %1, %2"
++  [(set_attr "type" "dalu")]
++)
++
++(define_insn "ukcras16_1_be"
++  [(set (match_operand:V2HI 0 "register_operand"         "=r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI
++	    (us_minus:HI
++	      (vec_select:HI
++		(match_operand:V2HI 1 "register_operand" " r")
++		(parallel [(const_int 1)]))
++	      (vec_select:HI
++		(match_operand:V2HI 2 "register_operand" " r")
++		(parallel [(const_int 0)]))))
++	  (vec_duplicate:V2HI
++	    (us_plus:HI
++	      (vec_select:HI
++		(match_dup 2)
++		(parallel [(const_int 1)]))
++	      (vec_select:HI
++		(match_dup 1)
++		(parallel [(const_int 0)]))))
++	  (const_int 2)))]
++  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
++  "ukcras16\t%0, %1, %2"
++  [(set_attr "type" "dalu")]
++)
++
++(define_expand "crsa16_1"
++  [(match_operand:V2HI 0 "register_operand" "")
++   (match_operand:V2HI 1 "register_operand" "")
++   (match_operand:V2HI 2 "register_operand" "")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    emit_insn (gen_crsa16_1_be (operands[0], operands[1], operands[2]));
++  else
++    emit_insn (gen_crsa16_1_le (operands[0], operands[1], operands[2]));
++  DONE;
++})
++
++(define_insn "crsa16_1_le"
++  [(set (match_operand:V2HI 0 "register_operand"         "=r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI
++	    (minus:HI
++	      (vec_select:HI
++		(match_operand:V2HI 1 "register_operand" " r")
++		(parallel [(const_int 1)]))
++	      (vec_select:HI
++		(match_operand:V2HI 2 "register_operand" " r")
++		(parallel [(const_int 0)]))))
++	  (vec_duplicate:V2HI
++	    (plus:HI
++	      (vec_select:HI
++		(match_dup 1)
++		(parallel [(const_int 0)]))
++	      (vec_select:HI
++		(match_dup 2)
++		(parallel [(const_int 1)]))))
++	  (const_int 2)))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "crsa16\t%0, %1, %2"
++  [(set_attr "type" "dalu")]
++)
++
++(define_insn "crsa16_1_be"
++  [(set (match_operand:V2HI 0 "register_operand"         "=r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI
++	    (minus:HI
++	      (vec_select:HI
++		(match_operand:V2HI 1 "register_operand" " r")
++		(parallel [(const_int 0)]))
++	      (vec_select:HI
++		(match_operand:V2HI 2 "register_operand" " r")
++		(parallel [(const_int 1)]))))
++	  (vec_duplicate:V2HI
++	    (plus:HI
++	      (vec_select:HI
++		(match_dup 1)
++		(parallel [(const_int 1)]))
++	      (vec_select:HI
++		(match_dup 2)
++		(parallel [(const_int 0)]))))
++	  (const_int 1)))]
++  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
++  "crsa16\t%0, %1, %2"
++  [(set_attr "type" "dalu")]
++)
++
++(define_expand "kcrsa16_1"
++  [(match_operand:V2HI 0 "register_operand" "")
++   (match_operand:V2HI 1 "register_operand" "")
++   (match_operand:V2HI 2 "register_operand" "")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    emit_insn (gen_kcrsa16_1_be (operands[0], operands[1], operands[2]));
++  else
++    emit_insn (gen_kcrsa16_1_le (operands[0], operands[1], operands[2]));
++  DONE;
++})
++
++(define_insn "kcrsa16_1_le"
++  [(set (match_operand:V2HI 0 "register_operand"         "=r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI
++	    (ss_minus:HI
++	      (vec_select:HI
++		(match_operand:V2HI 1 "register_operand" " r")
++		(parallel [(const_int 1)]))
++	      (vec_select:HI
++		(match_operand:V2HI 2 "register_operand" " r")
++		(parallel [(const_int 0)]))))
++	  (vec_duplicate:V2HI
++	    (ss_plus:HI
++	      (vec_select:HI
++		(match_dup 1)
++		(parallel [(const_int 0)]))
++	      (vec_select:HI
++		(match_dup 2)
++		(parallel [(const_int 1)]))))
++	  (const_int 2)))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "kcrsa16\t%0, %1, %2"
++  [(set_attr "type" "dalu")]
++)
++
++(define_insn "kcrsa16_1_be"
++  [(set (match_operand:V2HI 0 "register_operand"         "=r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI
++	    (ss_minus:HI
++	      (vec_select:HI
++		(match_operand:V2HI 1 "register_operand" " r")
++		(parallel [(const_int 0)]))
++	      (vec_select:HI
++		(match_operand:V2HI 2 "register_operand" " r")
++		(parallel [(const_int 1)]))))
++	  (vec_duplicate:V2HI
++	    (ss_plus:HI
++	      (vec_select:HI
++		(match_dup 1)
++		(parallel [(const_int 1)]))
++	      (vec_select:HI
++		(match_dup 2)
++		(parallel [(const_int 0)]))))
++	  (const_int 1)))]
++  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
++  "kcrsa16\t%0, %1, %2"
++  [(set_attr "type" "dalu")]
++)
++
++(define_expand "ukcrsa16_1"
++  [(match_operand:V2HI 0 "register_operand" "")
++   (match_operand:V2HI 1 "register_operand" "")
++   (match_operand:V2HI 2 "register_operand" "")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    emit_insn (gen_ukcrsa16_1_be (operands[0], operands[1], operands[2]));
++  else
++    emit_insn (gen_ukcrsa16_1_le (operands[0], operands[1], operands[2]));
++  DONE;
++})
++
++(define_insn "ukcrsa16_1_le"
++  [(set (match_operand:V2HI 0 "register_operand"         "=r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI
++	    (us_minus:HI
++	      (vec_select:HI
++		(match_operand:V2HI 1 "register_operand" " r")
++		(parallel [(const_int 1)]))
++	      (vec_select:HI
++		(match_operand:V2HI 2 "register_operand" " r")
++		(parallel [(const_int 0)]))))
++	  (vec_duplicate:V2HI
++	    (us_plus:HI
++	      (vec_select:HI
++		(match_dup 1)
++		(parallel [(const_int 0)]))
++	      (vec_select:HI
++		(match_dup 2)
++		(parallel [(const_int 1)]))))
++	  (const_int 2)))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "ukcrsa16\t%0, %1, %2"
++  [(set_attr "type" "dalu")]
++)
++
++(define_insn "ukcrsa16_1_be"
++  [(set (match_operand:V2HI 0 "register_operand"         "=r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI
++	    (us_minus:HI
++	      (vec_select:HI
++		(match_operand:V2HI 1 "register_operand" " r")
++		(parallel [(const_int 0)]))
++	      (vec_select:HI
++		(match_operand:V2HI 2 "register_operand" " r")
++		(parallel [(const_int 1)]))))
++	  (vec_duplicate:V2HI
++	    (us_plus:HI
++	      (vec_select:HI
++		(match_dup 1)
++		(parallel [(const_int 1)]))
++	      (vec_select:HI
++		(match_dup 2)
++		(parallel [(const_int 0)]))))
++	  (const_int 1)))]
++  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
++  "ukcrsa16\t%0, %1, %2"
++  [(set_attr "type" "dalu")]
++)
++
++(define_expand "rcras16_1"
++  [(match_operand:V2HI 0 "register_operand" "")
++   (match_operand:V2HI 1 "register_operand" "")
++   (match_operand:V2HI 2 "register_operand" "")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    emit_insn (gen_rcras16_1_be (operands[0], operands[1], operands[2]));
++  else
++    emit_insn (gen_rcras16_1_le (operands[0], operands[1], operands[2]));
++  DONE;
++})
++
++(define_insn "rcras16_1_le"
++  [(set (match_operand:V2HI 0 "register_operand"           "=r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI
++	    (truncate:HI
++	      (ashiftrt:SI
++		(minus:SI
++		  (sign_extend:SI
++		    (vec_select:HI
++		      (match_operand:V2HI 1 "register_operand" " r")
++		      (parallel [(const_int 0)])))
++		  (sign_extend:SI
++		    (vec_select:HI
++		      (match_operand:V2HI 2 "register_operand" " r")
++		      (parallel [(const_int 1)]))))
++		(const_int 1))))
++	  (vec_duplicate:V2HI
++	    (truncate:HI
++	      (ashiftrt:SI
++		(plus:SI
++		  (sign_extend:SI
++		    (vec_select:HI
++		      (match_dup 2)
++		      (parallel [(const_int 0)])))
++		  (sign_extend:SI
++		    (vec_select:HI
++		      (match_dup 1)
++		      (parallel [(const_int 1)]))))
++		(const_int 1))))
++	  (const_int 1)))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "rcras16\t%0, %1, %2"
++  [(set_attr "type" "dalu")]
++)
++
++(define_insn "rcras16_1_be"
++  [(set (match_operand:V2HI 0 "register_operand"           "=r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI
++	    (truncate:HI
++	      (ashiftrt:SI
++		(minus:SI
++		  (sign_extend:SI
++		    (vec_select:HI
++		      (match_operand:V2HI 1 "register_operand" " r")
++		      (parallel [(const_int 1)])))
++		  (sign_extend:SI
++		    (vec_select:HI
++		      (match_operand:V2HI 2 "register_operand" " r")
++		      (parallel [(const_int 0)]))))
++		(const_int 1))))
++	  (vec_duplicate:V2HI
++	    (truncate:HI
++	      (ashiftrt:SI
++		(plus:SI
++		  (sign_extend:SI
++		    (vec_select:HI
++		      (match_dup 2)
++		      (parallel [(const_int 1)])))
++		  (sign_extend:SI
++		    (vec_select:HI
++		      (match_dup 1)
++		      (parallel [(const_int 0)]))))
++		(const_int 1))))
++	  (const_int 2)))]
++  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
++  "rcras16\t%0, %1, %2"
++  [(set_attr "type" "dalu")]
++)
++
++(define_expand "urcras16_1"
++  [(match_operand:V2HI 0 "register_operand" "")
++   (match_operand:V2HI 1 "register_operand" "")
++   (match_operand:V2HI 2 "register_operand" "")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    emit_insn (gen_urcras16_1_be (operands[0], operands[1], operands[2]));
++  else
++    emit_insn (gen_urcras16_1_le (operands[0], operands[1], operands[2]));
++  DONE;
++})
++
++(define_insn "urcras16_1_le"
++  [(set (match_operand:V2HI 0 "register_operand"           "=r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI
++	    (truncate:HI
++	      (lshiftrt:SI
++		(minus:SI
++		  (zero_extend:SI
++		    (vec_select:HI
++		      (match_operand:V2HI 1 "register_operand" " r")
++		      (parallel [(const_int 0)])))
++		  (zero_extend:SI
++		    (vec_select:HI
++		      (match_operand:V2HI 2 "register_operand" " r")
++		      (parallel [(const_int 1)]))))
++		(const_int 1))))
++	  (vec_duplicate:V2HI
++	    (truncate:HI
++	      (lshiftrt:SI
++		(plus:SI
++		  (zero_extend:SI
++		    (vec_select:HI
++		      (match_dup 2)
++		      (parallel [(const_int 0)])))
++		  (zero_extend:SI
++		    (vec_select:HI
++		      (match_dup 1)
++		      (parallel [(const_int 1)]))))
++		(const_int 1))))
++	  (const_int 1)))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "urcras16\t%0, %1, %2"
++  [(set_attr "type" "dalu")]
++)
++
++(define_insn "urcras16_1_be"
++  [(set (match_operand:V2HI 0 "register_operand"           "=r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI
++	    (truncate:HI
++	      (lshiftrt:SI
++		(minus:SI
++		  (zero_extend:SI
++		    (vec_select:HI
++		      (match_operand:V2HI 1 "register_operand" " r")
++		      (parallel [(const_int 1)])))
++		  (zero_extend:SI
++		    (vec_select:HI
++		      (match_operand:V2HI 2 "register_operand" " r")
++		      (parallel [(const_int 0)]))))
++		(const_int 1))))
++	  (vec_duplicate:V2HI
++	    (truncate:HI
++	      (lshiftrt:SI
++		(plus:SI
++		  (zero_extend:SI
++		    (vec_select:HI
++		      (match_dup 2)
++		      (parallel [(const_int 1)])))
++		  (zero_extend:SI
++		    (vec_select:HI
++		      (match_dup 1)
++		      (parallel [(const_int 0)]))))
++		(const_int 1))))
++	  (const_int 2)))]
++  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
++  "urcras16\t%0, %1, %2"
++  [(set_attr "type" "dalu")]
++)
++
++(define_expand "rcrsa16_1"
++  [(match_operand:V2HI 0 "register_operand" "")
++   (match_operand:V2HI 1 "register_operand" "")
++   (match_operand:V2HI 2 "register_operand" "")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    emit_insn (gen_rcrsa16_1_be (operands[0], operands[1], operands[2]));
++  else
++    emit_insn (gen_rcrsa16_1_le (operands[0], operands[1], operands[2]));
++  DONE;
++})
++
++(define_insn "rcrsa16_1_le"
++  [(set (match_operand:V2HI 0 "register_operand"           "=r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI
++	    (truncate:HI
++	      (ashiftrt:SI
++	        (minus:SI
++		  (sign_extend:SI
++		    (vec_select:HI
++		      (match_operand:V2HI 1 "register_operand" " r")
++		      (parallel [(const_int 1)])))
++		  (sign_extend:SI
++		    (vec_select:HI
++		      (match_operand:V2HI 2 "register_operand" " r")
++		      (parallel [(const_int 0)]))))
++		(const_int 1))))
++	  (vec_duplicate:V2HI
++	    (truncate:HI
++	      (ashiftrt:SI
++		(plus:SI
++		  (sign_extend:SI
++		    (vec_select:HI
++		      (match_dup 1)
++		      (parallel [(const_int 0)])))
++		  (sign_extend:SI
++		    (vec_select:HI
++		      (match_dup 2)
++		      (parallel [(const_int 1)]))))
++		(const_int 1))))
++	  (const_int 2)))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "rcrsa16\t%0, %1, %2"
++  [(set_attr "type" "dalu")]
++)
++
++(define_insn "rcrsa16_1_be"
++  [(set (match_operand:V2HI 0 "register_operand"           "=r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI
++	    (truncate:HI
++	      (ashiftrt:SI
++	        (minus:SI
++		  (sign_extend:SI
++		    (vec_select:HI
++		      (match_operand:V2HI 1 "register_operand" " r")
++		      (parallel [(const_int 0)])))
++		  (sign_extend:SI
++		    (vec_select:HI
++		      (match_operand:V2HI 2 "register_operand" " r")
++		      (parallel [(const_int 1)]))))
++		(const_int 1))))
++	  (vec_duplicate:V2HI
++	    (truncate:HI
++	      (ashiftrt:SI
++		(plus:SI
++		  (sign_extend:SI
++		    (vec_select:HI
++		      (match_dup 1)
++		      (parallel [(const_int 1)])))
++		  (sign_extend:SI
++		    (vec_select:HI
++		      (match_dup 2)
++		      (parallel [(const_int 0)]))))
++		(const_int 1))))
++	  (const_int 1)))]
++  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
++  "rcrsa16\t%0, %1, %2"
++  [(set_attr "type" "dalu")]
++)
++
++(define_expand "urcrsa16_1"
++  [(match_operand:V2HI 0 "register_operand" "")
++   (match_operand:V2HI 1 "register_operand" "")
++   (match_operand:V2HI 2 "register_operand" "")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    emit_insn (gen_urcrsa16_1_be (operands[0], operands[1], operands[2]));
++  else
++    emit_insn (gen_urcrsa16_1_le (operands[0], operands[1], operands[2]));
++  DONE;
++})
++
++(define_insn "urcrsa16_1_le"
++  [(set (match_operand:V2HI 0 "register_operand"           "=r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI
++	    (truncate:HI
++	      (lshiftrt:SI
++	        (minus:SI
++		  (zero_extend:SI
++		    (vec_select:HI
++		      (match_operand:V2HI 1 "register_operand" " r")
++		      (parallel [(const_int 1)])))
++		  (zero_extend:SI
++		    (vec_select:HI
++		      (match_operand:V2HI 2 "register_operand" " r")
++		      (parallel [(const_int 0)]))))
++		(const_int 1))))
++	  (vec_duplicate:V2HI
++	    (truncate:HI
++	      (lshiftrt:SI
++		(plus:SI
++		  (zero_extend:SI
++		    (vec_select:HI
++		      (match_dup 1)
++		      (parallel [(const_int 0)])))
++		  (zero_extend:SI
++		    (vec_select:HI
++		      (match_dup 2)
++		      (parallel [(const_int 1)]))))
++		(const_int 1))))
++	  (const_int 2)))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "urcrsa16\t%0, %1, %2"
++  [(set_attr "type" "dalu")]
++)
++
++(define_insn "urcrsa16_1_be"
++  [(set (match_operand:V2HI 0 "register_operand"           "=r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI
++	    (truncate:HI
++	      (lshiftrt:SI
++	        (minus:SI
++		  (zero_extend:SI
++		    (vec_select:HI
++		      (match_operand:V2HI 1 "register_operand" " r")
++		      (parallel [(const_int 0)])))
++		  (zero_extend:SI
++		    (vec_select:HI
++		      (match_operand:V2HI 2 "register_operand" " r")
++		      (parallel [(const_int 1)]))))
++		(const_int 1))))
++	  (vec_duplicate:V2HI
++	    (truncate:HI
++	      (lshiftrt:SI
++		(plus:SI
++		  (zero_extend:SI
++		    (vec_select:HI
++		      (match_dup 1)
++		      (parallel [(const_int 1)])))
++		  (zero_extend:SI
++		    (vec_select:HI
++		      (match_dup 2)
++		      (parallel [(const_int 0)]))))
++		(const_int 1))))
++	  (const_int 1)))]
++  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
++  "urcrsa16\t%0, %1, %2"
++  [(set_attr "type" "dalu")]
++)
++
++(define_expand "<shift>v2hi3"
++  [(set (match_operand:V2HI 0 "register_operand"                  "")
++	(shifts:V2HI (match_operand:V2HI 1 "register_operand"     "")
++		     (match_operand:SI   2 "nds32_rimm4u_operand" "")))]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (operands[2] == const0_rtx)
++    {
++      emit_move_insn (operands[0], operands[1]);
++      DONE;
++    }
++})
++
++(define_insn "*ashlv2hi3"
++  [(set (match_operand:V2HI 0 "register_operand"                "=   r, r")
++	(ashift:V2HI (match_operand:V2HI 1 "register_operand"   "    r, r")
++		     (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r")))]
++  "NDS32_EXT_DSP_P ()"
++  "@
++   slli16\t%0, %1, %2
++   sll16\t%0, %1, %2"
++  [(set_attr "type"   "dalu,dalu")
++   (set_attr "length" "   4,   4")])
++
++(define_insn "kslli16"
++  [(set (match_operand:V2HI 0 "register_operand"                   "=   r, r")
++	(ss_ashift:V2HI (match_operand:V2HI 1 "register_operand"   "    r, r")
++			(match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r")))]
++  "NDS32_EXT_DSP_P ()"
++  "@
++   kslli16\t%0, %1, %2
++   ksll16\t%0, %1, %2"
++  [(set_attr "type"   "dalu,dalu")
++   (set_attr "length" "   4,   4")])
++
++(define_insn "*ashrv2hi3"
++  [(set (match_operand:V2HI 0 "register_operand"                  "=   r, r")
++	(ashiftrt:V2HI (match_operand:V2HI 1 "register_operand"   "    r, r")
++		       (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r")))]
++  "NDS32_EXT_DSP_P ()"
++  "@
++   srai16\t%0, %1, %2
++   sra16\t%0, %1, %2"
++  [(set_attr "type"   "dalu,dalu")
++   (set_attr "length" "   4,   4")])
++
++(define_insn "sra16_round"
++  [(set (match_operand:V2HI 0 "register_operand"                                "=   r, r")
++	(unspec:V2HI [(ashiftrt:V2HI (match_operand:V2HI 1 "register_operand"   "    r, r")
++				     (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r"))]
++		     UNSPEC_ROUND))]
++  "NDS32_EXT_DSP_P ()"
++  "@
++   srai16.u\t%0, %1, %2
++   sra16.u\t%0, %1, %2"
++  [(set_attr "type"   "daluround,daluround")
++   (set_attr "length" "         4,       4")])
++
++(define_insn "*lshrv2hi3"
++  [(set (match_operand:V2HI 0 "register_operand"                  "=   r, r")
++	(lshiftrt:V2HI (match_operand:V2HI 1 "register_operand"   "    r, r")
++		       (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r")))]
++  "NDS32_EXT_DSP_P ()"
++  "@
++   srli16\t%0, %1, %2
++   srl16\t%0, %1, %2"
++  [(set_attr "type"   "dalu,dalu")
++   (set_attr "length" "   4,   4")])
++
++(define_insn "srl16_round"
++  [(set (match_operand:V2HI 0 "register_operand"                                "=   r, r")
++	(unspec:V2HI [(lshiftrt:V2HI (match_operand:V2HI 1 "register_operand"   "    r, r")
++				     (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r"))]
++		     UNSPEC_ROUND))]
++  "NDS32_EXT_DSP_P ()"
++  "@
++   srli16.u\t%0, %1, %2
++   srl16.u\t%0, %1, %2"
++  [(set_attr "type"   "daluround,daluround")
++   (set_attr "length" "        4,        4")])
++
++(define_insn "kslra16"
++  [(set (match_operand:V2HI 0 "register_operand"                  "=r")
++	(if_then_else:V2HI
++	  (lt:SI (match_operand:SI 2 "register_operand"           " r")
++		 (const_int 0))
++	  (ashiftrt:V2HI (match_operand:V2HI 1 "register_operand" " r")
++			 (neg:SI (match_dup 2)))
++	  (ashift:V2HI (match_dup 1)
++		       (match_dup 2))))]
++  "NDS32_EXT_DSP_P ()"
++  "kslra16\t%0, %1, %2"
++  [(set_attr "type"    "dalu")
++   (set_attr "length"  "4")])
++
++(define_insn "kslra16_round"
++  [(set (match_operand:V2HI 0 "register_operand"                  "=r")
++	(if_then_else:V2HI
++	  (lt:SI (match_operand:SI 2 "register_operand"           " r")
++		 (const_int 0))
++	  (unspec:V2HI [(ashiftrt:V2HI (match_operand:V2HI 1 "register_operand" " r")
++				       (neg:SI (match_dup 2)))]
++		       UNSPEC_ROUND)
++	  (ashift:V2HI (match_dup 1)
++		       (match_dup 2))))]
++  "NDS32_EXT_DSP_P ()"
++  "kslra16.u\t%0, %1, %2"
++  [(set_attr "type"    "daluround")
++   (set_attr "length"  "4")])
++
++(define_insn "cmpeq<bits>"
++  [(set (match_operand:SI 0 "register_operand"                       "=r")
++	(unspec:SI [(eq:SI (match_operand:VQIHI 1 "register_operand" " r")
++			   (match_operand:VQIHI 2 "register_operand" " r"))]
++		   UNSPEC_VEC_COMPARE))]
++  "NDS32_EXT_DSP_P ()"
++  "cmpeq<bits>\t%0, %1, %2"
++  [(set_attr "type"    "dcmp")
++   (set_attr "length"  "4")])
++
++(define_insn "scmplt<bits>"
++  [(set (match_operand:SI 0 "register_operand"                       "=r")
++	(unspec:SI [(lt:SI (match_operand:VQIHI 1 "register_operand" " r")
++			   (match_operand:VQIHI 2 "register_operand" " r"))]
++		   UNSPEC_VEC_COMPARE))]
++  "NDS32_EXT_DSP_P ()"
++  "scmplt<bits>\t%0, %1, %2"
++  [(set_attr "type"    "dcmp")
++   (set_attr "length"  "4")])
++
++(define_insn "scmple<bits>"
++  [(set (match_operand:SI 0 "register_operand"                       "=r")
++	(unspec:SI [(le:SI (match_operand:VQIHI 1 "register_operand" " r")
++			   (match_operand:VQIHI 2 "register_operand" " r"))]
++		   UNSPEC_VEC_COMPARE))]
++  "NDS32_EXT_DSP_P ()"
++  "scmple<bits>\t%0, %1, %2"
++  [(set_attr "type"    "dcmp")
++   (set_attr "length"  "4")])
++
++(define_insn "ucmplt<bits>"
++  [(set (match_operand:SI 0 "register_operand"                        "=r")
++	(unspec:SI [(ltu:SI (match_operand:VQIHI 1 "register_operand" " r")
++			    (match_operand:VQIHI 2 "register_operand" " r"))]
++		   UNSPEC_VEC_COMPARE))]
++  "NDS32_EXT_DSP_P ()"
++  "ucmplt<bits>\t%0, %1, %2"
++  [(set_attr "type"    "dcmp")
++   (set_attr "length"  "4")])
++
++(define_insn "ucmple<bits>"
++  [(set (match_operand:SI 0 "register_operand"                        "=r")
++	(unspec:SI [(leu:SI (match_operand:VQIHI 1 "register_operand" " r")
++			    (match_operand:VQIHI 2 "register_operand" " r"))]
++		   UNSPEC_VEC_COMPARE))]
++  "NDS32_EXT_DSP_P ()"
++  "ucmple<bits>\t%0, %1, %2"
++  [(set_attr "type"    "dcmp")
++   (set_attr "length"  "4")])
++
++(define_insn "sclip16"
++  [(set (match_operand:V2HI 0 "register_operand"                "=   r")
++	(unspec:V2HI [(match_operand:V2HI 1 "register_operand"  "    r")
++		      (match_operand:SI 2 "nds32_imm4u_operand" " Iu04")]
++		     UNSPEC_CLIPS))]
++  "NDS32_EXT_DSP_P ()"
++  "sclip16\t%0, %1, %2"
++  [(set_attr "type"    "dclip")
++   (set_attr "length"  "4")])
++
++(define_insn "uclip16"
++  [(set (match_operand:V2HI 0 "register_operand"                "=   r")
++	(unspec:V2HI [(match_operand:V2HI 1 "register_operand"  "    r")
++		      (match_operand:SI 2 "nds32_imm4u_operand" " Iu04")]
++		     UNSPEC_CLIP))]
++  "NDS32_EXT_DSP_P ()"
++  "uclip16\t%0, %1, %2"
++  [(set_attr "type"    "dclip")
++   (set_attr "length"  "4")])
++
++(define_insn "khm16"
++  [(set (match_operand:V2HI 0 "register_operand"                "=r")
++	(unspec:V2HI [(match_operand:V2HI 1 "register_operand"  " r")
++		      (match_operand:V2HI 2 "register_operand" "  r")]
++		     UNSPEC_KHM))]
++  "NDS32_EXT_DSP_P ()"
++  "khm16\t%0, %1, %2"
++  [(set_attr "type"    "dmul")
++   (set_attr "length"  "4")])
++
++(define_insn "khmx16"
++  [(set (match_operand:V2HI 0 "register_operand"                "=r")
++	(unspec:V2HI [(match_operand:V2HI 1 "register_operand"  " r")
++		      (match_operand:V2HI 2 "register_operand" "  r")]
++		     UNSPEC_KHMX))]
++  "NDS32_EXT_DSP_P ()"
++  "khmx16\t%0, %1, %2"
++  [(set_attr "type"    "dmul")
++   (set_attr "length"  "4")])
++
++(define_expand "vec_setv4qi"
++  [(match_operand:V4QI 0 "register_operand" "")
++   (match_operand:QI 1 "register_operand" "")
++   (match_operand:SI 2 "immediate_operand" "")]
++  "NDS32_EXT_DSP_P ()"
++{
++  HOST_WIDE_INT pos = INTVAL (operands[2]);
++  if (pos > 4)
++    gcc_unreachable ();
++  HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << pos;
++  emit_insn (gen_vec_setv4qi_internal (operands[0], operands[1],
++				       operands[0], GEN_INT (elem)));
++  DONE;
++})
++
++(define_expand "insb"
++  [(match_operand:V4QI 0 "register_operand" "")
++   (match_operand:V4QI 1 "register_operand" "")
++   (match_operand:SI 2 "register_operand" "")
++   (match_operand:SI 3 "const_int_operand" "")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (INTVAL (operands[3]) > 3 || INTVAL (operands[3]) < 0)
++    gcc_unreachable ();
++
++  rtx src = gen_reg_rtx (QImode);
++
++  convert_move (src, operands[2], false);
++
++  HOST_WIDE_INT selector_index;
++  /* Big endian need reverse index. */
++  if (TARGET_BIG_ENDIAN)
++    selector_index = 4 - INTVAL (operands[3]) - 1;
++  else
++    selector_index = INTVAL (operands[3]);
++  rtx selector = gen_int_mode (1 << selector_index, SImode);
++  emit_insn (gen_vec_setv4qi_internal (operands[0], src,
++				       operands[1], selector));
++  DONE;
++})
++
++(define_expand "insvsi"
++  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "")
++			 (match_operand:SI 1 "const_int_operand" "")
++			 (match_operand:SI 2 "nds32_insv_operand" ""))
++	(match_operand:SI 3 "register_operand" ""))]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (INTVAL (operands[1]) != 8)
++    FAIL;
++}
++  [(set_attr "type"    "dinsb")
++   (set_attr "length"  "4")])
++
++
++(define_insn "insvsi_internal"
++  [(set (zero_extract:SI (match_operand:SI 0 "register_operand"   "+r")
++			 (const_int 8)
++			 (match_operand:SI 1 "nds32_insv_operand"  "i"))
++	(match_operand:SI 2                  "register_operand"    "r"))]
++  "NDS32_EXT_DSP_P ()"
++  "insb\t%0, %2, %v1"
++  [(set_attr "type"    "dinsb")
++   (set_attr "length"  "4")])
++
++(define_insn "insvsiqi_internal"
++  [(set (zero_extract:SI (match_operand:SI 0 "register_operand"   "+r")
++			 (const_int 8)
++			 (match_operand:SI 1 "nds32_insv_operand"  "i"))
++	(zero_extend:SI (match_operand:QI 2 "register_operand"    "r")))]
++  "NDS32_EXT_DSP_P ()"
++  "insb\t%0, %2, %v1"
++  [(set_attr "type"    "dinsb")
++   (set_attr "length"  "4")])
++
++;; Intermedium pattern for synthetize insvsiqi_internal
++;; v0 = ((v1 & 0xff) << 8)
++(define_insn_and_split "and0xff_s8"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
++			   (const_int 8))
++		(const_int 65280)))]
++  "NDS32_EXT_DSP_P () && !reload_completed"
++  "#"
++  "NDS32_EXT_DSP_P () && !reload_completed"
++  [(const_int 1)]
++{
++  rtx tmp = gen_reg_rtx (SImode);
++  emit_insn (gen_ashlsi3 (tmp, operands[1], gen_int_mode (8, SImode)));
++  emit_insn (gen_andsi3 (operands[0], tmp, gen_int_mode (0xffff, SImode)));
++  DONE;
++})
++
++;; v0 = (v1 & 0xff00ffff) | ((v2 << 16) | 0xff0000)
++(define_insn_and_split "insbsi2"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(ior:SI (and:SI (match_operand:SI 1 "register_operand" "0")
++			(const_int -16711681))
++		(and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r")
++				   (const_int 16))
++			(const_int 16711680))))]
++  "NDS32_EXT_DSP_P () && !reload_completed"
++  "#"
++  "NDS32_EXT_DSP_P () && !reload_completed"
++  [(const_int 1)]
++{
++  rtx tmp = gen_reg_rtx (SImode);
++  emit_move_insn (tmp, operands[1]);
++  emit_insn (gen_insvsi_internal (tmp, gen_int_mode(16, SImode), operands[2]));
++  emit_move_insn (operands[0], tmp);
++  DONE;
++})
++
++;; v0 = (v1 & 0xff00ffff) | v2
++(define_insn_and_split "ior_and0xff00ffff_reg"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(ior:SI (and:SI (match_operand:SI 1 "register_operand" "r")
++			(const_int -16711681))
++		(match_operand:SI 2 "register_operand" "r")))]
++  "NDS32_EXT_DSP_P () && !reload_completed"
++  "#"
++  "NDS32_EXT_DSP_P () && !reload_completed"
++  [(const_int 1)]
++{
++  rtx tmp = gen_reg_rtx (SImode);
++  emit_insn (gen_andsi3 (tmp, operands[1], gen_int_mode (0xff00ffff, SImode)));
++  emit_insn (gen_iorsi3 (operands[0], tmp, operands[2]));
++  DONE;
++})
++
++(define_insn "vec_setv4qi_internal"
++  [(set (match_operand:V4QI 0 "register_operand"          "=   r,    r,    r,    r")
++	(vec_merge:V4QI
++	  (vec_duplicate:V4QI
++	    (match_operand:QI 1 "register_operand"        "    r,    r,    r,    r"))
++	  (match_operand:V4QI 2 "register_operand"        "    0,    0,    0,    0")
++	  (match_operand:SI 3 "nds32_imm_1_2_4_8_operand" " Iv01, Iv02, Iv04, Iv08")))]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    {
++       const char *pats[] = { "insb\t%0, %1, 3",
++			      "insb\t%0, %1, 2",
++			      "insb\t%0, %1, 1",
++			      "insb\t%0, %1, 0" };
++      return pats[which_alternative];
++    }
++  else
++    {
++       const char *pats[] = { "insb\t%0, %1, 0",
++			      "insb\t%0, %1, 1",
++			      "insb\t%0, %1, 2",
++			      "insb\t%0, %1, 3" };
++      return pats[which_alternative];
++    }
++}
++  [(set_attr "type"    "dinsb")
++   (set_attr "length"  "4")])
++
++(define_insn "vec_setv4qi_internal_vec"
++  [(set (match_operand:V4QI 0 "register_operand"          "=   r,    r,    r,    r")
++	(vec_merge:V4QI
++	  (vec_duplicate:V4QI
++	    (vec_select:QI
++	      (match_operand:V4QI 1 "register_operand"    "    r,    r,    r,    r")
++	      (parallel [(const_int 0)])))
++	  (match_operand:V4QI 2 "register_operand"        "    0,    0,    0,    0")
++	  (match_operand:SI 3 "nds32_imm_1_2_4_8_operand" " Iv01, Iv02, Iv04, Iv08")))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "@
++   insb\t%0, %1, 0
++   insb\t%0, %1, 1
++   insb\t%0, %1, 2
++   insb\t%0, %1, 3"
++  [(set_attr "type"    "dinsb")
++   (set_attr "length"  "4")])
++
++(define_insn "vec_mergev4qi_and_cv0_1"
++  [(set (match_operand:V4QI 0 "register_operand"       "=$l,r")
++	(vec_merge:V4QI
++	  (vec_duplicate:V4QI
++	    (vec_select:QI
++	      (match_operand:V4QI 1 "register_operand" "  l,r")
++	      (parallel [(const_int 0)])))
++	  (const_vector:V4QI [
++	    (const_int 0)
++	    (const_int 0)
++	    (const_int 0)
++	    (const_int 0)])
++	  (const_int 1)))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "@
++   zeb33\t%0, %1
++   zeb\t%0, %1"
++  [(set_attr "type"    "alu,alu")
++   (set_attr "length"  "  2,  4")])
++
++(define_insn "vec_mergev4qi_and_cv0_2"
++  [(set (match_operand:V4QI 0 "register_operand"       "=$l,r")
++	(vec_merge:V4QI
++	  (const_vector:V4QI [
++	    (const_int 0)
++	    (const_int 0)
++	    (const_int 0)
++	    (const_int 0)])
++	  (vec_duplicate:V4QI
++	    (vec_select:QI
++	      (match_operand:V4QI 1 "register_operand" "  l,r")
++	      (parallel [(const_int 0)])))
++	  (const_int 2)))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "@
++   zeb33\t%0, %1
++   zeb\t%0, %1"
++  [(set_attr "type"    "alu,alu")
++   (set_attr "length"  "  2,  4")])
++
++(define_insn "vec_mergeqi_and_cv0_1"
++  [(set (match_operand:V4QI 0 "register_operand"                     "=$l,r")
++	(vec_merge:V4QI
++	  (vec_duplicate:V4QI (match_operand:QI 1 "register_operand" "  l,r"))
++	  (const_vector:V4QI [
++	    (const_int 0)
++	    (const_int 0)
++	    (const_int 0)
++	    (const_int 0)])
++	  (const_int 1)))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "@
++   zeb33\t%0, %1
++   zeb\t%0, %1"
++  [(set_attr "type"    "alu,alu")
++   (set_attr "length"  "  2,  4")])
++
++(define_insn "vec_mergeqi_and_cv0_2"
++  [(set (match_operand:V4QI 0 "register_operand"                     "=$l,r")
++	(vec_merge:V4QI
++	  (const_vector:V4QI [
++	    (const_int 0)
++	    (const_int 0)
++	    (const_int 0)
++	    (const_int 0)])
++	  (vec_duplicate:V4QI (match_operand:QI 1 "register_operand" "  l,r"))
++	  (const_int 2)))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "@
++   zeb33\t%0, %1
++   zeb\t%0, %1"
++  [(set_attr "type"    "alu,alu")
++   (set_attr "length"  "  2,  4")])
++
++(define_expand "vec_setv2hi"
++  [(match_operand:V2HI 0 "register_operand" "")
++   (match_operand:HI 1 "register_operand" "")
++   (match_operand:SI 2 "immediate_operand" "")]
++  "NDS32_EXT_DSP_P ()"
++{
++  HOST_WIDE_INT pos = INTVAL (operands[2]);
++  if (pos > 2)
++    gcc_unreachable ();
++  HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << pos;
++  emit_insn (gen_vec_setv2hi_internal (operands[0], operands[1],
++				       operands[0], GEN_INT (elem)));
++  DONE;
++})
++
++(define_insn "vec_setv2hi_internal"
++  [(set (match_operand:V2HI 0 "register_operand"      "=   r,    r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI
++	    (match_operand:HI 1 "register_operand"    "    r,    r"))
++	  (match_operand:V2HI 2 "register_operand"    "    r,    r")
++	  (match_operand:SI 3 "nds32_imm_1_2_operand" " Iv01, Iv02")))]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    {
++      const char *pats[] = { "pkbb16\t%0, %1, %2",
++			     "pktb16\t%0, %2, %1" };
++      return pats[which_alternative];
++    }
++  else
++    {
++      const char *pats[] = { "pktb16\t%0, %2, %1",
++			     "pkbb16\t%0, %1, %2" };
++      return pats[which_alternative];
++    }
++}
++  [(set_attr "type"    "dpack")
++   (set_attr "length"  "4")])
++
++(define_insn "vec_mergev2hi_and_cv0_1"
++  [(set (match_operand:V2HI 0 "register_operand"       "=$l,r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI
++	    (vec_select:HI
++	      (match_operand:V2HI 1 "register_operand" "  l,r")
++	      (parallel [(const_int 0)])))
++	  (const_vector:V2HI [
++	    (const_int 0)
++	    (const_int 0)])
++	  (const_int 1)))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "@
++   zeh33\t%0, %1
++   zeh\t%0, %1"
++  [(set_attr "type"    "alu,alu")
++   (set_attr "length"  "  2,  4")])
++
++(define_insn "vec_mergev2hi_and_cv0_2"
++  [(set (match_operand:V2HI 0 "register_operand"       "=$l,r")
++	(vec_merge:V2HI
++	  (const_vector:V2HI [
++	    (const_int 0)
++	    (const_int 0)])
++	  (vec_duplicate:V2HI
++	    (vec_select:HI
++	      (match_operand:V2HI 1 "register_operand" "  l,r")
++	      (parallel [(const_int 0)])))
++	  (const_int 2)))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "@
++   zeh33\t%0, %1
++   zeh\t%0, %1"
++  [(set_attr "type"    "alu,alu")
++   (set_attr "length"  "  2,  4")])
++
++(define_insn "vec_mergehi_and_cv0_1"
++  [(set (match_operand:V2HI 0 "register_operand"                     "=$l,r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI (match_operand:HI 1 "register_operand" "  l,r"))
++	  (const_vector:V2HI [
++	    (const_int 0)
++	    (const_int 0)])
++	  (const_int 1)))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "@
++   zeh33\t%0, %1
++   zeh\t%0, %1"
++  [(set_attr "type"    "alu,alu")
++   (set_attr "length"  "  2,  4")])
++
++(define_insn "vec_mergehi_and_cv0_2"
++  [(set (match_operand:V2HI 0 "register_operand"                     "=$l,r")
++	(vec_merge:V2HI
++	  (const_vector:V2HI [
++	    (const_int 0)
++	    (const_int 0)])
++	  (vec_duplicate:V2HI (match_operand:HI 1 "register_operand" "  l,r"))
++	  (const_int 2)))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "@
++   zeh33\t%0, %1
++   zeh\t%0, %1"
++  [(set_attr "type"    "alu,alu")
++   (set_attr "length"  "  2,  4")])
++
++(define_expand "pkbb"
++  [(match_operand:V2HI 0 "register_operand")
++   (match_operand:V2HI 1 "register_operand")
++   (match_operand:V2HI 2 "register_operand")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    {
++      emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2],
++				  GEN_INT (1), GEN_INT (1), GEN_INT (1)));
++    }
++  else
++    {
++      emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2],
++				  GEN_INT (2), GEN_INT (0), GEN_INT (0)));
++    }
++  DONE;
++})
++
++(define_insn "pkbbsi_1"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(ior:SI (and:SI (match_operand:SI 1 "register_operand" "r")
++			(const_int 65535))
++		(ashift:SI (match_operand:SI 2 "register_operand" "r")
++			   (const_int 16))))]
++  "NDS32_EXT_DSP_P ()"
++  "pkbb16\t%0, %2, %1"
++  [(set_attr "type"    "dpack")
++   (set_attr "length"  "4")])
++
++(define_insn "pkbbsi_2"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(ior:SI	(ashift:SI (match_operand:SI 2 "register_operand" "r")
++			   (const_int 16))
++		(and:SI (match_operand:SI 1 "register_operand" "r")
++			(const_int 65535))))]
++  "NDS32_EXT_DSP_P ()"
++  "pkbb16\t%0, %2, %1"
++  [(set_attr "type"    "dpack")
++   (set_attr "length"  "4")])
++
++(define_insn "pkbbsi_3"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(ior:SI (zero_extend:SI	(match_operand:HI 1 "register_operand" "r"))
++		(ashift:SI (match_operand:SI 2 "register_operand" "r")
++			   (const_int 16))))]
++  "NDS32_EXT_DSP_P ()"
++  "pkbb16\t%0, %2, %1"
++  [(set_attr "type"    "dpack")
++   (set_attr "length"  "4")])
++
++(define_insn "pkbbsi_4"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(ior:SI	(ashift:SI (match_operand:SI 2 "register_operand" "r")
++			   (const_int 16))
++		(zero_extend:SI (match_operand:HI 1 "register_operand" "r"))))]
++  "NDS32_EXT_DSP_P ()"
++  "pkbb16\t%0, %2, %1"
++  [(set_attr "type"    "dpack")
++   (set_attr "length"  "4")])
++
++;; v0 = (v1 & 0xffff0000) | (v2 & 0xffff)
++(define_insn "pktbsi_1"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(ior:SI (and:SI (match_operand:SI 1 "register_operand" "r")
++			(const_int -65536))
++		(zero_extend:SI (match_operand:HI 2 "register_operand" "r"))))]
++  "NDS32_EXT_DSP_P ()"
++  "pktb16\t%0, %1, %2"
++  [(set_attr "type"    "dpack")
++   (set_attr "length"  "4")])
++
++(define_insn "pktbsi_2"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(ior:SI (and:SI (match_operand:SI 1 "register_operand" "r")
++			(const_int -65536))
++		(and:SI (match_operand:SI 2 "register_operand" "r")
++			(const_int 65535))))]
++  "NDS32_EXT_DSP_P ()"
++  "pktb16\t%0, %1, %2"
++  [(set_attr "type"    "alu")
++   (set_attr "length"  "4")])
++
++(define_insn "pktbsi_3"
++  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r")
++			 (const_int 16 )
++			 (const_int 0))
++	(match_operand:SI 1 "register_operand"                  " r"))]
++  "NDS32_EXT_DSP_P ()"
++  "pktb16\t%0, %0, %1"
++  [(set_attr "type"    "dpack")
++   (set_attr "length"  "4")])
++
++(define_insn "pktbsi_4"
++  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r")
++			 (const_int 16 )
++			 (const_int 0))
++	(zero_extend:SI (match_operand:HI 1 "register_operand"  " r")))]
++  "NDS32_EXT_DSP_P ()"
++  "pktb16\t%0, %0, %1"
++  [(set_attr "type"    "dpack")
++   (set_attr "length"  "4")])
++
++(define_insn "pkttsi"
++  [(set (match_operand:SI 0 "register_operand"                      "=r")
++	(ior:SI (and:SI (match_operand:SI 1 "register_operand"      " r")
++			(const_int -65536))
++		(lshiftrt:SI (match_operand:SI 2 "register_operand" " r")
++			     (const_int 16))))]
++  "NDS32_EXT_DSP_P ()"
++  "pktt16\t%0, %1, %2"
++  [(set_attr "type"    "dpack")
++   (set_attr "length"  "4")])
++
++(define_expand "pkbt"
++  [(match_operand:V2HI 0 "register_operand")
++   (match_operand:V2HI 1 "register_operand")
++   (match_operand:V2HI 2 "register_operand")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    {
++      emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2],
++				  GEN_INT (1), GEN_INT (1), GEN_INT (0)));
++    }
++  else
++    {
++      emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2],
++				  GEN_INT (2), GEN_INT (0), GEN_INT (1)));
++    }
++  DONE;
++})
++
++(define_expand "pktt"
++  [(match_operand:V2HI 0 "register_operand")
++   (match_operand:V2HI 1 "register_operand")
++   (match_operand:V2HI 2 "register_operand")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    {
++      emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2],
++				  GEN_INT (1), GEN_INT (0), GEN_INT (0)));
++    }
++  else
++    {
++      emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2],
++				  GEN_INT (2), GEN_INT (1), GEN_INT (1)));
++    }
++  DONE;
++})
++
++(define_expand "pktb"
++  [(match_operand:V2HI 0 "register_operand")
++   (match_operand:V2HI 1 "register_operand")
++   (match_operand:V2HI 2 "register_operand")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    {
++      emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2],
++				  GEN_INT (1), GEN_INT (0), GEN_INT (1)));
++    }
++  else
++    {
++      emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2],
++				  GEN_INT (2), GEN_INT (1), GEN_INT (0)));
++    }
++  DONE;
++})
++
++(define_insn "vec_mergerr"
++  [(set (match_operand:V2HI 0 "register_operand"      "=   r,    r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI
++	    (match_operand:HI 1 "register_operand"    "    r,    r"))
++	  (vec_duplicate:V2HI
++	    (match_operand:HI 2 "register_operand"    "    r,    r"))
++	  (match_operand:SI 3 "nds32_imm_1_2_operand" " Iv01, Iv02")))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "@
++   pkbb16\t%0, %2, %1
++   pkbb16\t%0, %1, %2"
++  [(set_attr "type"    "dpack")
++   (set_attr "length"  "4")])
++
++
++(define_insn "vec_merge"
++  [(set (match_operand:V2HI 0 "register_operand"      "=   r,    r")
++	(vec_merge:V2HI
++	  (match_operand:V2HI 1 "register_operand"    "    r,    r")
++	  (match_operand:V2HI 2 "register_operand"    "    r,    r")
++	  (match_operand:SI 3 "nds32_imm_1_2_operand" " Iv01, Iv02")))]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    {
++      const char *pats[] = { "pktb16\t%0, %1, %2",
++			     "pktb16\t%0, %2, %1" };
++      return pats[which_alternative];
++    }
++  else
++    {
++      const char *pats[] = { "pktb16\t%0, %2, %1",
++			     "pktb16\t%0, %1, %2" };
++      return pats[which_alternative];
++    }
++}
++  [(set_attr "type"    "dpack")
++   (set_attr "length"  "4")])
++
++(define_insn "vec_mergerv"
++  [(set (match_operand:V2HI 0 "register_operand"                     "=   r,    r,    r,    r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI
++	    (match_operand:HI 1 "register_operand"                   "    r,    r,    r,    r"))
++	  (vec_duplicate:V2HI
++	    (vec_select:HI
++	      (match_operand:V2HI 2 "register_operand"               "    r,    r,    r,    r")
++	      (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv00, Iv01")])))
++	  (match_operand:SI 3 "nds32_imm_1_2_operand"                " Iv01, Iv01, Iv02, Iv02")))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "@
++   pkbb16\t%0, %2, %1
++   pktb16\t%0, %2, %1
++   pkbb16\t%0, %1, %2
++   pkbt16\t%0, %1, %2"
++  [(set_attr "type"    "dpack")
++   (set_attr "length"  "4")])
++
++(define_insn "vec_mergevr"
++  [(set (match_operand:V2HI 0 "register_operand"                      "=   r,    r,    r,    r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI
++	    (vec_select:HI
++	      (match_operand:V2HI 1 "register_operand"                "    r,    r,    r,    r")
++	       (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv00, Iv01")])))
++	  (vec_duplicate:V2HI
++	    (match_operand:HI 2 "register_operand"                    "    r,    r,    r,    r"))
++	  (match_operand:SI 3 "nds32_imm_1_2_operand"                 " Iv01, Iv01, Iv02, Iv02")))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "@
++   pkbb16\t%0, %2, %1
++   pkbt16\t%0, %2, %1
++   pkbb16\t%0, %1, %2
++   pktb16\t%0, %1, %2"
++  [(set_attr "type"    "dpack")
++   (set_attr "length"  "4")])
++
++(define_insn "vec_mergevv"
++  [(set (match_operand:V2HI 0 "register_operand"                     "=   r,    r,    r,    r,    r,    r,    r,    r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI
++	    (vec_select:HI
++	      (match_operand:V2HI 1 "register_operand"               "    r,    r,    r,    r,    r,    r,    r,    r")
++	      (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv00, Iv01, Iv01, Iv00, Iv00, Iv01, Iv01")])))
++	  (vec_duplicate:V2HI
++	    (vec_select:HI
++	      (match_operand:V2HI 2 "register_operand"               "    r,    r,    r,    r,    r,    r,    r,    r")
++	      (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv01, Iv00, Iv00, Iv01, Iv01, Iv00")])))
++	  (match_operand:SI 3 "nds32_imm_1_2_operand"                " Iv01, Iv01, Iv01, Iv01, Iv02, Iv02, Iv02, Iv02")))]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    {
++      const char *pats[] = { "pktt16\t%0, %1, %2",
++			     "pktb16\t%0, %1, %2",
++			     "pkbb16\t%0, %1, %2",
++			     "pkbt16\t%0, %1, %2",
++			     "pktt16\t%0, %2, %1",
++			     "pkbt16\t%0, %2, %1",
++			     "pkbb16\t%0, %2, %1",
++			     "pktb16\t%0, %2, %1" };
++      return pats[which_alternative];
++    }
++  else
++    {
++      const char *pats[] = { "pkbb16\t%0, %2, %1",
++			     "pktb16\t%0, %2, %1",
++			     "pktt16\t%0, %2, %1",
++			     "pkbt16\t%0, %2, %1",
++			     "pkbb16\t%0, %1, %2",
++			     "pkbt16\t%0, %1, %2",
++			     "pktt16\t%0, %1, %2",
++			     "pktb16\t%0, %1, %2" };
++      return pats[which_alternative];
++    }
++}
++  [(set_attr "type"    "dpack")
++   (set_attr "length"  "4")])
++
++(define_expand "vec_extractv4qi"
++  [(set (match_operand:QI 0 "register_operand" "")
++	(vec_select:QI
++	  (match_operand:V4QI 1          "nonimmediate_operand" "")
++	  (parallel [(match_operand:SI 2 "const_int_operand" "")])))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++{
++  if (INTVAL (operands[2]) != 0
++      && INTVAL (operands[2]) != 1
++      && INTVAL (operands[2]) != 2
++      && INTVAL (operands[2]) != 3)
++    gcc_unreachable ();
++
++  if (INTVAL (operands[2]) != 0 && MEM_P (operands[0]))
++    FAIL;
++})
++
++(define_insn "vec_extractv4qi0"
++  [(set (match_operand:QI 0 "register_operand"         "=l,r,r")
++	(vec_select:QI
++	  (match_operand:V4QI 1 "nonimmediate_operand" " l,r,m")
++	  (parallel [(const_int 0)])))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++{
++  switch (which_alternative)
++    {
++    case 0:
++      return "zeb33\t%0, %1";
++    case 1:
++      return "zeb\t%0, %1";
++    case 2:
++      return nds32_output_32bit_load (operands, 1);
++    default:
++      gcc_unreachable ();
++    }
++}
++  [(set_attr "type"    "alu")
++   (set_attr "length"  "4")])
++
++(define_insn "vec_extractv4qi0_ze"
++  [(set (match_operand:SI 0 "register_operand"         "=l,r,r")
++	(zero_extend:SI
++	  (vec_select:QI
++	    (match_operand:V4QI 1 "nonimmediate_operand" " l,r,m")
++	    (parallel [(const_int 0)]))))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++{
++  switch (which_alternative)
++    {
++    case 0:
++      return "zeb33\t%0, %1";
++    case 1:
++      return "zeb\t%0, %1";
++    case 2:
++      return nds32_output_32bit_load (operands, 1);
++    default:
++      gcc_unreachable ();
++    }
++}
++  [(set_attr "type"    "alu")
++   (set_attr "length"  "4")])
++
++(define_insn "vec_extractv4qi0_se"
++  [(set (match_operand:SI 0 "register_operand"         "=l,r,r")
++	(sign_extend:SI
++	  (vec_select:QI
++	    (match_operand:V4QI 1 "nonimmediate_operand" " l,r,m")
++	    (parallel [(const_int 0)]))))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++{
++  switch (which_alternative)
++    {
++    case 0:
++      return "seb33\t%0, %1";
++    case 1:
++      return "seb\t%0, %1";
++    case 2:
++      return nds32_output_32bit_load_se (operands, 1);
++    default:
++      gcc_unreachable ();
++    }
++}
++  [(set_attr "type"    "alu")
++   (set_attr "length"  "4")])
++
++(define_insn_and_split "vec_extractv4qi1"
++  [(set (match_operand:QI 0 "register_operand" "=r")
++	(vec_select:QI
++	  (match_operand:V4QI 1 "register_operand" " r")
++	  (parallel [(const_int 1)])))]
++  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
++  "#"
++  "NDS32_EXT_DSP_P () && !reload_completed"
++  [(const_int 1)]
++{
++  rtx tmp = gen_reg_rtx (V4QImode);
++  emit_insn (gen_rotrv4qi_1 (tmp, operands[1]));
++  emit_insn (gen_vec_extractv4qi0 (operands[0], tmp));
++  DONE;
++}
++  [(set_attr "type"    "alu")
++   (set_attr "length"  "4")])
++
++(define_insn_and_split "vec_extractv4qi2"
++  [(set (match_operand:QI 0 "register_operand" "=r")
++	(vec_select:QI
++	  (match_operand:V4QI 1 "register_operand" " r")
++	  (parallel [(const_int 2)])))]
++  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
++  "#"
++  "NDS32_EXT_DSP_P () && !reload_completed"
++  [(const_int 1)]
++{
++  rtx tmp = gen_reg_rtx (V4QImode);
++  emit_insn (gen_rotrv4qi_2 (tmp, operands[1]));
++  emit_insn (gen_vec_extractv4qi0 (operands[0], tmp));
++  DONE;
++}
++  [(set_attr "type"    "alu")
++   (set_attr "length"  "4")])
++
++(define_insn_and_split "vec_extractv4qi3"
++  [(set (match_operand:QI 0 "register_operand" "=r")
++	(vec_select:QI
++	  (match_operand:V4QI 1 "register_operand" " r")
++	  (parallel [(const_int 3)])))]
++  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
++  "#"
++  "NDS32_EXT_DSP_P () && !reload_completed"
++  [(const_int 1)]
++{
++  rtx tmp = gen_reg_rtx (V4QImode);
++  emit_insn (gen_rotrv4qi_3 (tmp, operands[1]));
++  emit_insn (gen_vec_extractv4qi0 (operands[0], tmp));
++  DONE;
++}
++  [(set_attr "type"    "alu")
++   (set_attr "length"  "4")])
++
++(define_insn "vec_extractv4qi3_se"
++  [(set (match_operand:SI 0 "register_operand"       "=$d,r")
++	(sign_extend:SI
++	  (vec_select:QI
++	    (match_operand:V4QI 1 "register_operand" "  0,r")
++	    (parallel [(const_int 3)]))))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "@
++   srai45\t%0, 24
++   srai\t%0, %1, 24"
++  [(set_attr "type"    "alu,alu")
++   (set_attr "length"  "  2,  4")])
++
++(define_insn "vec_extractv4qi3_ze"
++  [(set (match_operand:SI 0 "register_operand"       "=$d,r")
++	(zero_extend:SI
++	  (vec_select:QI
++	    (match_operand:V4QI 1 "register_operand" "  0,r")
++	    (parallel [(const_int 3)]))))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "@
++   srli45\t%0, 24
++   srli\t%0, %1, 24"
++  [(set_attr "type"    "alu,alu")
++   (set_attr "length"  "  2,  4")])
++
++(define_insn_and_split "vec_extractv4qihi0"
++  [(set (match_operand:HI 0 "register_operand" "=r")
++	(sign_extend:HI
++	  (vec_select:QI
++	    (match_operand:V4QI 1 "register_operand" " r")
++	    (parallel [(const_int 0)]))))]
++  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
++  "#"
++  "NDS32_EXT_DSP_P () && !reload_completed"
++  [(const_int 1)]
++{
++  rtx tmp = gen_reg_rtx (QImode);
++  emit_insn (gen_vec_extractv4qi0 (tmp, operands[1]));
++  emit_insn (gen_extendqihi2 (operands[0], tmp));
++  DONE;
++}
++  [(set_attr "type"    "alu")
++   (set_attr "length"  "4")])
++
++(define_insn_and_split "vec_extractv4qihi1"
++  [(set (match_operand:HI 0 "register_operand" "=r")
++	(sign_extend:HI
++	  (vec_select:QI
++	    (match_operand:V4QI 1 "register_operand" " r")
++	    (parallel [(const_int 1)]))))]
++  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
++  "#"
++  "NDS32_EXT_DSP_P () && !reload_completed"
++  [(const_int 1)]
++{
++  rtx tmp = gen_reg_rtx (QImode);
++  emit_insn (gen_vec_extractv4qi1 (tmp, operands[1]));
++  emit_insn (gen_extendqihi2 (operands[0], tmp));
++  DONE;
++}
++  [(set_attr "type"    "alu")
++   (set_attr "length"  "4")])
++
++(define_insn_and_split "vec_extractv4qihi2"
++  [(set (match_operand:HI 0 "register_operand" "=r")
++	(sign_extend:HI
++	  (vec_select:QI
++	    (match_operand:V4QI 1 "register_operand" " r")
++	    (parallel [(const_int 2)]))))]
++  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
++  "#"
++  "NDS32_EXT_DSP_P () && !reload_completed"
++  [(const_int 1)]
++{
++  rtx tmp = gen_reg_rtx (QImode);
++  emit_insn (gen_vec_extractv4qi2 (tmp, operands[1]));
++  emit_insn (gen_extendqihi2 (operands[0], tmp));
++  DONE;
++}
++  [(set_attr "type"    "alu")
++   (set_attr "length"  "4")])
++
++(define_insn_and_split "vec_extractv4qihi3"
++  [(set (match_operand:HI 0 "register_operand" "=r")
++	(sign_extend:HI
++	  (vec_select:QI
++	    (match_operand:V4QI 1 "register_operand" " r")
++	    (parallel [(const_int 3)]))))]
++  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
++  "#"
++  "NDS32_EXT_DSP_P () && !reload_completed"
++  [(const_int 1)]
++{
++  rtx tmp = gen_reg_rtx (QImode);
++  emit_insn (gen_vec_extractv4qi3 (tmp, operands[1]));
++  emit_insn (gen_extendqihi2 (operands[0], tmp));
++  DONE;
++}
++  [(set_attr "type"    "alu")
++   (set_attr "length"  "4")])
++
++(define_expand "vec_extractv2hi"
++  [(set (match_operand:HI 0 "register_operand" "")
++	(vec_select:HI
++	  (match_operand:V2HI 1          "nonimmediate_operand" "")
++	  (parallel [(match_operand:SI 2 "const_int_operand" "")])))]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (INTVAL (operands[2]) != 0
++      && INTVAL (operands[2]) != 1)
++    gcc_unreachable ();
++
++  if (INTVAL (operands[2]) != 0 && MEM_P (operands[0]))
++    FAIL;
++})
++
++(define_insn "vec_extractv2hi0"
++  [(set (match_operand:HI 0 "register_operand"         "=$l,r,r")
++	(vec_select:HI
++	  (match_operand:V2HI 1 "nonimmediate_operand" "  l,r,m")
++	  (parallel [(const_int 0)])))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++{
++  switch (which_alternative)
++    {
++    case 0:
++      return "seh33\t%0, %1";
++    case 1:
++      return "seh\t%0, %1";
++    case 2:
++      return nds32_output_32bit_load_se (operands, 2);
++
++    default:
++      gcc_unreachable ();
++    }
++}
++  [(set_attr "type"    "alu,alu,load")
++   (set_attr "length"  "  2,  4,   4")])
++
++(define_insn "vec_extractv2hi0_ze"
++  [(set (match_operand:SI 0 "register_operand"         "=$l, r,$  l, *r")
++        (zero_extend:SI
++	  (vec_select:HI
++	    (match_operand:V2HI 1 "nonimmediate_operand" "  l, r, U33,  m")
++	    (parallel [(const_int 0)]))))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++{
++  switch (which_alternative)
++    {
++    case 0:
++      return "zeh33\t%0, %1";
++    case 1:
++      return "zeh\t%0, %1";
++    case 2:
++      return nds32_output_16bit_load (operands, 2);
++    case 3:
++      return nds32_output_32bit_load (operands, 2);
++
++    default:
++      gcc_unreachable ();
++    }
++}
++  [(set_attr "type"   "alu,alu,load,load")
++   (set_attr "length" "  2,  4,   2,   4")])
++
++(define_insn "vec_extractv2hi0_se"
++  [(set (match_operand:SI 0 "register_operand"         "=$l, r, r")
++        (sign_extend:SI
++	  (vec_select:HI
++	    (match_operand:V2HI 1 "nonimmediate_operand" "  l,r,m")
++	    (parallel [(const_int 0)]))))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++{
++  switch (which_alternative)
++    {
++    case 0:
++      return "seh33\t%0, %1";
++    case 1:
++      return "seh\t%0, %1";
++    case 2:
++      return nds32_output_32bit_load_se (operands, 2);
++
++    default:
++      gcc_unreachable ();
++    }
++}
++  [(set_attr "type"   "alu,alu,load")
++   (set_attr "length" "  2,  4,   4")])
++
++(define_insn "vec_extractv2hi0_be"
++  [(set (match_operand:HI 0 "register_operand"     "=$d,r")
++	(vec_select:HI
++	  (match_operand:V2HI 1 "register_operand" "  0,r")
++	  (parallel [(const_int 0)])))]
++  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
++  "@
++   srai45\t%0, 16
++   srai\t%0, %1, 16"
++  [(set_attr "type"    "alu,alu")
++   (set_attr "length"  "  2,  4")])
++
++(define_insn "vec_extractv2hi1"
++  [(set (match_operand:HI 0 "register_operand"     "=$d,r")
++	(vec_select:HI
++	  (match_operand:V2HI 1 "register_operand" "  0,r")
++	  (parallel [(const_int 1)])))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "@
++   srai45\t%0, 16
++   srai\t%0, %1, 16"
++  [(set_attr "type"    "alu,alu")
++   (set_attr "length"  "  2,  4")])
++
++(define_insn "vec_extractv2hi1_se"
++  [(set (match_operand:SI 0 "register_operand"     "=$d,r")
++	(sign_extend:SI
++	  (vec_select:HI
++	    (match_operand:V2HI 1 "register_operand" "  0,r")
++	    (parallel [(const_int 1)]))))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "@
++   srai45\t%0, 16
++   srai\t%0, %1, 16"
++  [(set_attr "type"    "alu,alu")
++   (set_attr "length"  "  2,  4")])
++
++(define_insn "vec_extractv2hi1_ze"
++  [(set (match_operand:SI 0 "register_operand"     "=$d,r")
++	(zero_extend:SI
++	  (vec_select:HI
++	    (match_operand:V2HI 1 "register_operand" "  0,r")
++	    (parallel [(const_int 1)]))))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "@
++   srli45\t%0, 16
++   srli\t%0, %1, 16"
++  [(set_attr "type"    "alu,alu")
++   (set_attr "length"  "  2,  4")])
++
++(define_insn "vec_extractv2hi1_be"
++  [(set (match_operand:HI 0 "register_operand"         "=$l,r,r")
++	(vec_select:HI
++	  (match_operand:V2HI 1 "nonimmediate_operand" "  l,r,m")
++	  (parallel [(const_int 1)])))]
++  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
++{
++  switch (which_alternative)
++    {
++    case 0:
++      return "seh33\t%0, %1";
++    case 1:
++      return "seh\t%0, %1";
++    case 2:
++      return nds32_output_32bit_load_se (operands, 2);
++
++    default:
++      gcc_unreachable ();
++    }
++}
++  [(set_attr "type"    "alu,alu,load")
++   (set_attr "length"  "  2,  4,   4")])
++
++(define_insn "<su>mul16"
++  [(set (match_operand:V2SI 0 "register_operand"                         "=r")
++	(mult:V2SI (extend:V2SI (match_operand:V2HI 1 "register_operand" "%r"))
++		   (extend:V2SI (match_operand:V2HI 2 "register_operand" " r"))))]
++  "NDS32_EXT_DSP_P ()"
++  "<su>mul16\t%0, %1, %2"
++  [(set_attr "type"   "dmul")
++   (set_attr "length"   "4")])
++
++(define_insn "<su>mulx16"
++  [(set (match_operand:V2SI 0 "register_operand"         "=r")
++	(vec_merge:V2SI
++	  (vec_duplicate:V2SI
++	    (mult:SI
++	      (extend:SI
++		(vec_select:HI
++		  (match_operand:V2HI 1 "register_operand" " r")
++		  (parallel [(const_int 0)])))
++	      (extend:SI
++		(vec_select:HI
++		  (match_operand:V2HI 2 "register_operand" " r")
++		  (parallel [(const_int 1)])))))
++	  (vec_duplicate:V2SI
++	    (mult:SI
++	      (extend:SI
++		(vec_select:HI
++		  (match_dup 1)
++		  (parallel [(const_int 1)])))
++	      (extend:SI
++		(vec_select:HI
++		  (match_dup 2)
++		  (parallel [(const_int 0)])))))
++	  (const_int 1)))]
++  "NDS32_EXT_DSP_P ()"
++  "<su>mulx16\t%0, %1, %2"
++  [(set_attr "type"    "dmul")
++   (set_attr "length"   "4")])
++
++(define_insn "rotrv2hi_1"
++  [(set (match_operand:V2HI 0 "register_operand"    "=r")
++	(vec_select:V2HI
++	   (match_operand:V2HI 1 "register_operand" " r")
++	   (parallel [(const_int 1) (const_int 0)])))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "rotri\t%0, %1, 16"
++  [(set_attr "type"   "alu")
++   (set_attr "length"  "4")])
++
++(define_insn "rotrv2hi_1_be"
++  [(set (match_operand:V2HI 0 "register_operand"    "=r")
++	(vec_select:V2HI
++	   (match_operand:V2HI 1 "register_operand" " r")
++	   (parallel [(const_int 0) (const_int 1)])))]
++  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
++  "rotri\t%0, %1, 16"
++  [(set_attr "type"   "alu")
++   (set_attr "length"  "4")])
++
++(define_insn "rotrv4qi_1"
++  [(set (match_operand:V4QI 0 "register_operand"    "=r")
++	(vec_select:V4QI
++	   (match_operand:V4QI 1 "register_operand" " r")
++	   (parallel [(const_int 1) (const_int 2) (const_int 3) (const_int 0)])))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "rotri\t%0, %1, 8"
++  [(set_attr "type"   "alu")
++   (set_attr "length"  "4")])
++
++(define_insn "rotrv4qi_1_be"
++  [(set (match_operand:V4QI 0 "register_operand"    "=r")
++	(vec_select:V4QI
++	   (match_operand:V4QI 1 "register_operand" " r")
++	   (parallel [(const_int 2) (const_int 1) (const_int 0) (const_int 3)])))]
++  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
++  "rotri\t%0, %1, 8"
++  [(set_attr "type"   "alu")
++   (set_attr "length"  "4")])
++
++(define_insn "rotrv4qi_2"
++  [(set (match_operand:V4QI 0 "register_operand"    "=r")
++	(vec_select:V4QI
++	   (match_operand:V4QI 1 "register_operand" " r")
++	   (parallel [(const_int 2) (const_int 3) (const_int 0) (const_int 1)])))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "rotri\t%0, %1, 16"
++  [(set_attr "type"   "alu")
++   (set_attr "length"  "4")])
++
++(define_insn "rotrv4qi_2_be"
++  [(set (match_operand:V4QI 0 "register_operand"    "=r")
++	(vec_select:V4QI
++	   (match_operand:V4QI 1 "register_operand" " r")
++	   (parallel [(const_int 1) (const_int 0) (const_int 3) (const_int 2)])))]
++  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
++  "rotri\t%0, %1, 16"
++  [(set_attr "type"   "alu")
++   (set_attr "length"  "4")])
++
++(define_insn "rotrv4qi_3"
++  [(set (match_operand:V4QI 0 "register_operand"    "=r")
++	(vec_select:V4QI
++	   (match_operand:V4QI 1 "register_operand" " r")
++	   (parallel [(const_int 3) (const_int 0) (const_int 1) (const_int 2)])))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "rotri\t%0, %1, 24"
++  [(set_attr "type"   "alu")
++   (set_attr "length"  "4")])
++
++(define_insn "rotrv4qi_3_be"
++  [(set (match_operand:V4QI 0 "register_operand"    "=r")
++	(vec_select:V4QI
++	   (match_operand:V4QI 1 "register_operand" " r")
++	   (parallel [(const_int 0) (const_int 3) (const_int 2) (const_int 1)])))]
++  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
++  "rotri\t%0, %1, 24"
++  [(set_attr "type"   "alu")
++   (set_attr "length"  "4")])
++
++(define_insn "v4qi_dup_10"
++  [(set (match_operand:V4QI 0 "register_operand"    "=r")
++	(vec_select:V4QI
++	   (match_operand:V4QI 1 "register_operand" " r")
++	   (parallel [(const_int 0) (const_int 1) (const_int 0) (const_int 1)])))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "pkbb\t%0, %1, %1"
++  [(set_attr "type"    "dpack")
++   (set_attr "length"  "4")])
++
++(define_insn "v4qi_dup_32"
++  [(set (match_operand:V4QI 0 "register_operand"    "=r")
++	(vec_select:V4QI
++	   (match_operand:V4QI 1 "register_operand" " r")
++	   (parallel [(const_int 2) (const_int 3) (const_int 2) (const_int 3)])))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "pktt\t%0, %1, %1"
++  [(set_attr "type"    "dpack")
++   (set_attr "length"  "4")])
++
++(define_expand "vec_unpacks_lo_v4qi"
++  [(match_operand:V2HI 0 "register_operand" "=r")
++   (match_operand:V4QI 1 "register_operand" " r")]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++{
++  emit_insn (gen_sunpkd810 (operands[0], operands[1]));
++  DONE;
++})
++
++(define_expand "sunpkd810"
++  [(match_operand:V2HI 0 "register_operand")
++   (match_operand:V4QI 1 "register_operand")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    emit_insn (gen_sunpkd810_imp_be (operands[0], operands[1]));
++  else
++    emit_insn (gen_sunpkd810_imp (operands[0], operands[1]));
++  DONE;
++})
++
++(define_insn "<zs>unpkd810_imp"
++  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI
++	    (extend:HI
++	      (vec_select:QI
++		(match_operand:V4QI 1 "register_operand"             " r")
++		(parallel [(const_int 1)]))))
++	  (vec_duplicate:V2HI
++	    (extend:HI
++	      (vec_select:QI
++		(match_dup 1)
++		(parallel [(const_int 0)]))))
++	  (const_int 2)))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "<zs>unpkd810\t%0, %1"
++  [(set_attr "type"    "dpack")
++   (set_attr "length"  "4")])
++
++(define_insn "<zs>unpkd810_imp_inv"
++  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI
++	    (extend:HI
++	      (vec_select:QI
++		(match_operand:V4QI 1 "register_operand"             " r")
++		(parallel [(const_int 0)]))))
++	  (vec_duplicate:V2HI
++	    (extend:HI
++	      (vec_select:QI
++		(match_dup 1)
++		(parallel [(const_int 1)]))))
++	  (const_int 1)))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "<zs>unpkd810\t%0, %1"
++  [(set_attr "type"    "dpack")
++   (set_attr "length"  "4")])
++
++(define_insn "<zs>unpkd810_imp_be"
++  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI
++	    (extend:HI
++	      (vec_select:QI
++		(match_operand:V4QI 1 "register_operand"             " r")
++		(parallel [(const_int 2)]))))
++	  (vec_duplicate:V2HI
++	    (extend:HI
++	      (vec_select:QI
++		(match_dup 1)
++		(parallel [(const_int 3)]))))
++	  (const_int 1)))]
++  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
++  "<zs>unpkd810\t%0, %1"
++  [(set_attr "type"    "dpack")
++   (set_attr "length"  "4")])
++
++(define_insn "<zs>unpkd810_imp_inv_be"
++  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI
++	    (extend:HI
++	      (vec_select:QI
++		(match_operand:V4QI 1 "register_operand"             " r")
++		(parallel [(const_int 3)]))))
++	  (vec_duplicate:V2HI
++	    (extend:HI
++	      (vec_select:QI
++		(match_dup 1)
++		(parallel [(const_int 2)]))))
++	  (const_int 2)))]
++  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
++  "<zs>unpkd810\t%0, %1"
++  [(set_attr "type"    "dpack")
++   (set_attr "length"  "4")])
++
++(define_expand "sunpkd820"
++  [(match_operand:V2HI 0 "register_operand")
++   (match_operand:V4QI 1 "register_operand")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    emit_insn (gen_sunpkd820_imp_be (operands[0], operands[1]));
++  else
++    emit_insn (gen_sunpkd820_imp (operands[0], operands[1]));
++  DONE;
++})
++
++(define_insn "<zs>unpkd820_imp"
++  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI
++	    (extend:HI
++	      (vec_select:QI
++		(match_operand:V4QI 1 "register_operand"             " r")
++		(parallel [(const_int 2)]))))
++	  (vec_duplicate:V2HI
++	    (extend:HI
++	      (vec_select:QI
++		(match_dup 1)
++		(parallel [(const_int 0)]))))
++	  (const_int 2)))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "<zs>unpkd820\t%0, %1"
++  [(set_attr "type"    "dpack")
++   (set_attr "length"  "4")])
++
++(define_insn "<zs>unpkd820_imp_inv"
++  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI
++	    (extend:HI
++	      (vec_select:QI
++		(match_operand:V4QI 1 "register_operand"             " r")
++		(parallel [(const_int 0)]))))
++	  (vec_duplicate:V2HI
++	    (extend:HI
++	      (vec_select:QI
++		(match_dup 1)
++		(parallel [(const_int 2)]))))
++	  (const_int 1)))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "<zs>unpkd820\t%0, %1"
++  [(set_attr "type"    "dpack")
++   (set_attr "length"  "4")])
++
++(define_insn "<zs>unpkd820_imp_be"
++  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI
++	    (extend:HI
++	      (vec_select:QI
++		(match_operand:V4QI 1 "register_operand"             " r")
++		(parallel [(const_int 1)]))))
++	  (vec_duplicate:V2HI
++	    (extend:HI
++	      (vec_select:QI
++		(match_dup 1)
++		(parallel [(const_int 3)]))))
++	  (const_int 1)))]
++  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
++  "<zs>unpkd820\t%0, %1"
++  [(set_attr "type"    "dpack")
++   (set_attr "length"  "4")])
++
++(define_insn "<zs>unpkd820_imp_inv_be"
++  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI
++	    (extend:HI
++	      (vec_select:QI
++		(match_operand:V4QI 1 "register_operand"             " r")
++		(parallel [(const_int 3)]))))
++	  (vec_duplicate:V2HI
++	    (extend:HI
++	      (vec_select:QI
++		(match_dup 1)
++		(parallel [(const_int 1)]))))
++	  (const_int 2)))]
++  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
++  "<zs>unpkd820\t%0, %1"
++  [(set_attr "type"    "dpack")
++   (set_attr "length"  "4")])
++
++(define_expand "sunpkd830"
++  [(match_operand:V2HI 0 "register_operand")
++   (match_operand:V4QI 1 "register_operand")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    emit_insn (gen_sunpkd830_imp_be (operands[0], operands[1]));
++  else
++    emit_insn (gen_sunpkd830_imp (operands[0], operands[1]));
++  DONE;
++})
++
++(define_insn "<zs>unpkd830_imp"
++  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI
++	    (extend:HI
++	      (vec_select:QI
++		(match_operand:V4QI 1 "register_operand"             " r")
++		(parallel [(const_int 3)]))))
++	  (vec_duplicate:V2HI
++	    (extend:HI
++	      (vec_select:QI
++		(match_dup 1)
++		(parallel [(const_int 0)]))))
++	  (const_int 2)))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "<zs>unpkd830\t%0, %1"
++  [(set_attr "type"    "dpack")
++   (set_attr "length"  "4")])
++
++(define_insn "<zs>unpkd830_imp_inv"
++  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI
++	    (extend:HI
++	      (vec_select:QI
++		(match_operand:V4QI 1 "register_operand"             " r")
++		(parallel [(const_int 0)]))))
++	  (vec_duplicate:V2HI
++	    (extend:HI
++	      (vec_select:QI
++		(match_dup 1)
++		(parallel [(const_int 3)]))))
++	  (const_int 1)))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "<zs>unpkd830\t%0, %1"
++  [(set_attr "type"    "dpack")
++   (set_attr "length"  "4")])
++
++(define_insn "<zs>unpkd830_imp_be"
++  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI
++	    (extend:HI
++	      (vec_select:QI
++		(match_operand:V4QI 1 "register_operand"             " r")
++		(parallel [(const_int 0)]))))
++	  (vec_duplicate:V2HI
++	    (extend:HI
++	      (vec_select:QI
++		(match_dup 1)
++		(parallel [(const_int 3)]))))
++	  (const_int 1)))]
++  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
++  "<zs>unpkd830\t%0, %1"
++  [(set_attr "type"    "dpack")
++   (set_attr "length"  "4")])
++
++(define_insn "<zs>unpkd830_imp_inv_be"
++  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI
++	    (extend:HI
++	      (vec_select:QI
++		(match_operand:V4QI 1 "register_operand"             " r")
++		(parallel [(const_int 3)]))))
++	  (vec_duplicate:V2HI
++	    (extend:HI
++	      (vec_select:QI
++		(match_dup 1)
++		(parallel [(const_int 0)]))))
++	  (const_int 2)))]
++  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
++  "<zs>unpkd830\t%0, %1"
++  [(set_attr "type"    "dpack")
++   (set_attr "length"  "4")])
++
++(define_expand "sunpkd831"
++  [(match_operand:V2HI 0 "register_operand")
++   (match_operand:V4QI 1 "register_operand")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    emit_insn (gen_sunpkd831_imp_be (operands[0], operands[1]));
++  else
++    emit_insn (gen_sunpkd831_imp (operands[0], operands[1]));
++  DONE;
++})
++
++(define_insn "<zs>unpkd831_imp"
++  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI
++	    (extend:HI
++	      (vec_select:QI
++		(match_operand:V4QI 1 "register_operand"             " r")
++		(parallel [(const_int 3)]))))
++	  (vec_duplicate:V2HI
++	    (extend:HI
++	      (vec_select:QI
++		(match_dup 1)
++		(parallel [(const_int 1)]))))
++	  (const_int 2)))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "<zs>unpkd831\t%0, %1"
++  [(set_attr "type"    "dpack")
++   (set_attr "length"  "4")])
++
++(define_insn "<zs>unpkd831_imp_inv"
++  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI
++	    (extend:HI
++	      (vec_select:QI
++		(match_operand:V4QI 1 "register_operand"             " r")
++		(parallel [(const_int 1)]))))
++	  (vec_duplicate:V2HI
++	    (extend:HI
++	      (vec_select:QI
++		(match_dup 1)
++		(parallel [(const_int 3)]))))
++	  (const_int 1)))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "<zs>unpkd831\t%0, %1"
++  [(set_attr "type"    "dpack")
++   (set_attr "length"  "4")])
++
++(define_insn "<zs>unpkd831_imp_be"
++  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI
++	    (extend:HI
++	      (vec_select:QI
++		(match_operand:V4QI 1 "register_operand"             " r")
++		(parallel [(const_int 0)]))))
++	  (vec_duplicate:V2HI
++	    (extend:HI
++	      (vec_select:QI
++		(match_dup 1)
++		(parallel [(const_int 2)]))))
++	  (const_int 1)))]
++  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
++  "<zs>unpkd831\t%0, %1"
++  [(set_attr "type"    "dpack")
++   (set_attr "length"  "4")])
++
++(define_insn "<zs>unpkd831_imp_inv_be"
++  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI
++	    (extend:HI
++	      (vec_select:QI
++		(match_operand:V4QI 1 "register_operand"             " r")
++		(parallel [(const_int 2)]))))
++	  (vec_duplicate:V2HI
++	    (extend:HI
++	      (vec_select:QI
++		(match_dup 1)
++		(parallel [(const_int 0)]))))
++	  (const_int 2)))]
++  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
++  "<zs>unpkd831\t%0, %1"
++  [(set_attr "type"    "dpack")
++   (set_attr "length"  "4")])
++
++(define_expand "zunpkd810"
++  [(match_operand:V2HI 0 "register_operand")
++   (match_operand:V4QI 1 "register_operand")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    emit_insn (gen_zunpkd810_imp_be (operands[0], operands[1]));
++  else
++    emit_insn (gen_zunpkd810_imp (operands[0], operands[1]));
++  DONE;
++})
++
++(define_expand "zunpkd820"
++  [(match_operand:V2HI 0 "register_operand")
++   (match_operand:V4QI 1 "register_operand")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    emit_insn (gen_zunpkd820_imp_be (operands[0], operands[1]));
++  else
++    emit_insn (gen_zunpkd820_imp (operands[0], operands[1]));
++  DONE;
++})
++
++(define_expand "zunpkd830"
++  [(match_operand:V2HI 0 "register_operand")
++   (match_operand:V4QI 1 "register_operand")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    emit_insn (gen_zunpkd830_imp_be (operands[0], operands[1]));
++  else
++    emit_insn (gen_zunpkd830_imp (operands[0], operands[1]));
++  DONE;
++})
++
++(define_expand "zunpkd831"
++  [(match_operand:V2HI 0 "register_operand")
++   (match_operand:V4QI 1 "register_operand")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    emit_insn (gen_zunpkd831_imp_be (operands[0], operands[1]));
++  else
++    emit_insn (gen_zunpkd831_imp (operands[0], operands[1]));
++  DONE;
++})
++
++(define_expand "smbb"
++  [(match_operand:SI 0 "register_operand" "")
++   (match_operand:V2HI 1 "register_operand" "")
++   (match_operand:V2HI 2 "register_operand" "")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2],
++			      GEN_INT (1), GEN_INT (1)));
++  else
++    emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2],
++			      GEN_INT (0), GEN_INT (0)));
++  DONE;
++})
++
++(define_expand "smbt"
++  [(match_operand:SI 0 "register_operand" "")
++   (match_operand:V2HI 1 "register_operand" "")
++   (match_operand:V2HI 2 "register_operand" "")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2],
++			      GEN_INT (1), GEN_INT (0)));
++  else
++    emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2],
++			      GEN_INT (0), GEN_INT (1)));
++  DONE;
++})
++
++(define_expand "smtt"
++  [(match_operand:SI 0 "register_operand" "")
++   (match_operand:V2HI 1 "register_operand" "")
++   (match_operand:V2HI 2 "register_operand" "")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2],
++			      GEN_INT (0), GEN_INT (0)));
++  else
++    emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2],
++			      GEN_INT (1), GEN_INT (1)));
++  DONE;
++})
++
++(define_insn "mulhisi3v"
++  [(set (match_operand:SI 0 "register_operand"                         "=   r,    r,    r,    r")
++	(mult:SI
++	  (sign_extend:SI
++	     (vec_select:HI
++	       (match_operand:V2HI 1 "register_operand"                "    r,    r,    r,    r")
++	       (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand"  " Iv00, Iv00, Iv01, Iv01")])))
++	  (sign_extend:SI (vec_select:HI
++	       (match_operand:V2HI 2 "register_operand"                "    r,    r,    r,    r")
++	       (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand"  " Iv00, Iv01, Iv01, Iv00")])))))]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    {
++      const char *pats[] = { "smtt\t%0, %1, %2",
++			     "smbt\t%0, %2, %1",
++			     "smbb\t%0, %1, %2",
++			     "smbt\t%0, %1, %2" };
++      return pats[which_alternative];
++    }
++  else
++    {
++      const char *pats[] = { "smbb\t%0, %1, %2",
++			     "smbt\t%0, %1, %2",
++			     "smtt\t%0, %1, %2",
++			     "smbt\t%0, %2, %1" };
++      return pats[which_alternative];
++    }
++}
++  [(set_attr "type"     "dmul")
++   (set_attr "length"   "4")])
++
++(define_expand "kmabb"
++  [(match_operand:SI 0 "register_operand" "")
++   (match_operand:SI 1 "register_operand" "")
++   (match_operand:V2HI 2 "register_operand" "")
++   (match_operand:V2HI 3 "register_operand" "")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    emit_insn (gen_kma_internal (operands[0], operands[2], operands[3],
++				 GEN_INT (1), GEN_INT (1),
++				 operands[1]));
++  else
++    emit_insn (gen_kma_internal (operands[0], operands[2], operands[3],
++				 GEN_INT (0), GEN_INT (0),
++				 operands[1]));
++  DONE;
++})
++
++(define_expand "kmabt"
++  [(match_operand:SI 0 "register_operand" "")
++   (match_operand:SI 1 "register_operand" "")
++   (match_operand:V2HI 2 "register_operand" "")
++   (match_operand:V2HI 3 "register_operand" "")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    emit_insn (gen_kma_internal (operands[0], operands[2], operands[3],
++				 GEN_INT (1), GEN_INT (0),
++				 operands[1]));
++  else
++    emit_insn (gen_kma_internal (operands[0], operands[2], operands[3],
++				 GEN_INT (0), GEN_INT (1),
++				 operands[1]));
++  DONE;
++})
++
++(define_expand "kmatt"
++  [(match_operand:SI 0 "register_operand" "")
++   (match_operand:SI 1 "register_operand" "")
++   (match_operand:V2HI 2 "register_operand" "")
++   (match_operand:V2HI 3 "register_operand" "")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    emit_insn (gen_kma_internal (operands[0], operands[2], operands[3],
++				 GEN_INT (0), GEN_INT (0),
++				 operands[1]));
++  else
++    emit_insn (gen_kma_internal (operands[0], operands[2], operands[3],
++				 GEN_INT (1), GEN_INT (1),
++				 operands[1]));
++  DONE;
++})
++
++(define_insn "kma_internal"
++  [(set (match_operand:SI 0 "register_operand"                          "=    r,    r,    r,    r")
++	(ss_plus:SI
++	  (mult:SI
++	    (sign_extend:SI
++	      (vec_select:HI
++		(match_operand:V2HI 1 "register_operand"                "    r,    r,    r,    r")
++	        (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand"  " Iv00, Iv00, Iv01, Iv01")])))
++	    (sign_extend:SI
++	      (vec_select:HI
++	        (match_operand:V2HI 2 "register_operand"                "    r,    r,    r,    r")
++	        (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand"  " Iv00, Iv01, Iv01, Iv00")]))))
++	  (match_operand:SI 5 "register_operand"                        "     0,    0,    0,    0")))]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    {
++      const char *pats[] = { "kmatt\t%0, %1, %2",
++			     "kmabt\t%0, %2, %1",
++			     "kmabb\t%0, %1, %2",
++			     "kmabt\t%0, %1, %2" };
++      return pats[which_alternative];
++    }
++  else
++    {
++      const char *pats[] = { "kmabb\t%0, %1, %2",
++			     "kmabt\t%0, %1, %2",
++			     "kmatt\t%0, %1, %2",
++			     "kmabt\t%0, %2, %1" };
++      return pats[which_alternative];
++    }
++}
++  [(set_attr "type"    "dmac")
++   (set_attr "length"   "4")])
++
++(define_expand "smds"
++  [(match_operand:SI 0 "register_operand" "")
++   (match_operand:V2HI 1 "register_operand" "")
++   (match_operand:V2HI 2 "register_operand" "")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    emit_insn (gen_smds_be (operands[0], operands[1], operands[2]));
++  else
++    emit_insn (gen_smds_le (operands[0], operands[1], operands[2]));
++  DONE;
++})
++
++(define_expand "smds_le"
++  [(set (match_operand:SI 0 "register_operand"                         "=r")
++	(minus:SI
++	  (mult:SI
++	    (sign_extend:SI (vec_select:HI
++			      (match_operand:V2HI 1 "register_operand" " r")
++			      (parallel [(const_int 1)])))
++	    (sign_extend:SI (vec_select:HI
++			      (match_operand:V2HI 2 "register_operand" " r")
++			      (parallel [(const_int 1)]))))
++	  (mult:SI
++	    (sign_extend:SI (vec_select:HI
++			      (match_dup 1)
++			      (parallel [(const_int 0)])))
++	    (sign_extend:SI (vec_select:HI
++			      (match_dup 2)
++			      (parallel [(const_int 0)]))))))]
++  "NDS32_EXT_DSP_P ()"
++{
++})
++
++(define_expand "smds_be"
++  [(set (match_operand:SI 0 "register_operand"                         "=r")
++	(minus:SI
++	  (mult:SI
++	    (sign_extend:SI (vec_select:HI
++			      (match_operand:V2HI 1 "register_operand" " r")
++			      (parallel [(const_int 0)])))
++	    (sign_extend:SI (vec_select:HI
++			      (match_operand:V2HI 2 "register_operand" " r")
++			      (parallel [(const_int 0)]))))
++	  (mult:SI
++	    (sign_extend:SI (vec_select:HI
++			      (match_dup 1)
++			      (parallel [(const_int 1)])))
++	    (sign_extend:SI (vec_select:HI
++			      (match_dup 2)
++			      (parallel [(const_int 1)]))))))]
++  "NDS32_EXT_DSP_P ()"
++{
++})
++
++(define_expand "smdrs"
++  [(match_operand:SI 0 "register_operand" "")
++   (match_operand:V2HI 1 "register_operand" "")
++   (match_operand:V2HI 2 "register_operand" "")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    emit_insn (gen_smdrs_be (operands[0], operands[1], operands[2]));
++  else
++    emit_insn (gen_smdrs_le (operands[0], operands[1], operands[2]));
++  DONE;
++})
++
++(define_expand "smdrs_le"
++  [(set (match_operand:SI 0 "register_operand"                         "=r")
++	(minus:SI
++	  (mult:SI
++	    (sign_extend:SI (vec_select:HI
++			      (match_operand:V2HI 1 "register_operand" " r")
++			      (parallel [(const_int 0)])))
++	    (sign_extend:SI (vec_select:HI
++			      (match_operand:V2HI 2 "register_operand" " r")
++			      (parallel [(const_int 0)]))))
++	  (mult:SI
++	    (sign_extend:SI (vec_select:HI
++			      (match_dup 1)
++			      (parallel [(const_int 1)])))
++	    (sign_extend:SI (vec_select:HI
++			      (match_dup 2)
++			      (parallel [(const_int 1)]))))))]
++  "NDS32_EXT_DSP_P ()"
++{
++})
++
++(define_expand "smdrs_be"
++  [(set (match_operand:SI 0 "register_operand"                         "=r")
++	(minus:SI
++	  (mult:SI
++	    (sign_extend:SI (vec_select:HI
++			      (match_operand:V2HI 1 "register_operand" " r")
++			      (parallel [(const_int 1)])))
++	    (sign_extend:SI (vec_select:HI
++			      (match_operand:V2HI 2 "register_operand" " r")
++			      (parallel [(const_int 1)]))))
++	  (mult:SI
++	    (sign_extend:SI (vec_select:HI
++			      (match_dup 1)
++			      (parallel [(const_int 0)])))
++	    (sign_extend:SI (vec_select:HI
++			      (match_dup 2)
++			      (parallel [(const_int 0)]))))))]
++  "NDS32_EXT_DSP_P ()"
++{
++})
++
++(define_expand "smxdsv"
++  [(match_operand:SI 0 "register_operand" "")
++   (match_operand:V2HI 1 "register_operand" "")
++   (match_operand:V2HI 2 "register_operand" "")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    emit_insn (gen_smxdsv_be (operands[0], operands[1], operands[2]));
++  else
++    emit_insn (gen_smxdsv_le (operands[0], operands[1], operands[2]));
++  DONE;
++})
++
++
++(define_expand "smxdsv_le"
++  [(set (match_operand:SI 0 "register_operand"                         "=r")
++	(minus:SI
++	  (mult:SI
++	    (sign_extend:SI (vec_select:HI
++			      (match_operand:V2HI 1 "register_operand" " r")
++			      (parallel [(const_int 1)])))
++	    (sign_extend:SI (vec_select:HI
++			      (match_operand:V2HI 2 "register_operand" " r")
++			      (parallel [(const_int 0)]))))
++	  (mult:SI
++	    (sign_extend:SI (vec_select:HI
++			      (match_dup 1)
++			      (parallel [(const_int 0)])))
++	    (sign_extend:SI (vec_select:HI
++			      (match_dup 2)
++			      (parallel [(const_int 1)]))))))]
++  "NDS32_EXT_DSP_P ()"
++{
++})
++
++(define_expand "smxdsv_be"
++  [(set (match_operand:SI 0 "register_operand"                         "=r")
++	(minus:SI
++	  (mult:SI
++	    (sign_extend:SI (vec_select:HI
++			      (match_operand:V2HI 1 "register_operand" " r")
++			      (parallel [(const_int 0)])))
++	    (sign_extend:SI (vec_select:HI
++			      (match_operand:V2HI 2 "register_operand" " r")
++			      (parallel [(const_int 1)]))))
++	  (mult:SI
++	    (sign_extend:SI (vec_select:HI
++			      (match_dup 1)
++			      (parallel [(const_int 1)])))
++	    (sign_extend:SI (vec_select:HI
++			      (match_dup 2)
++			      (parallel [(const_int 0)]))))))]
++  "NDS32_EXT_DSP_P ()"
++{
++})
++
++(define_insn "smal1"
++  [(set (match_operand:DI 0 "register_operand"             "=r")
++	(plus:DI (match_operand:DI 1 "register_operand"    " r")
++	  (sign_extend:DI
++	    (mult:SI
++	      (sign_extend:SI
++		(vec_select:HI
++		  (match_operand:V2HI 2 "register_operand" " r")
++		  (parallel [(const_int 0)])))
++	      (sign_extend:SI
++		(vec_select:HI
++		  (match_dup 2)
++		  (parallel [(const_int 1)])))))))]
++  "NDS32_EXT_DSP_P ()"
++  "smal\t%0, %1, %2"
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++(define_insn "smal2"
++  [(set (match_operand:DI 0 "register_operand"           "=r")
++	(plus:DI (match_operand:DI 1 "register_operand"  " r")
++	  (mult:DI
++	    (sign_extend:DI
++	      (vec_select:HI
++		(match_operand:V2HI 2 "register_operand" " r")
++		(parallel [(const_int 0)])))
++	    (sign_extend:DI
++	      (vec_select:HI
++		(match_dup 2)
++		(parallel [(const_int 1)]))))))]
++  "NDS32_EXT_DSP_P ()"
++  "smal\t%0, %1, %2"
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++(define_insn "smal3"
++  [(set (match_operand:DI 0 "register_operand"             "=r")
++	(plus:DI (match_operand:DI 1 "register_operand"    " r")
++	  (sign_extend:DI
++	    (mult:SI
++	      (sign_extend:SI
++		(vec_select:HI
++		  (match_operand:V2HI 2 "register_operand" " r")
++		  (parallel [(const_int 1)])))
++	      (sign_extend:SI
++		(vec_select:HI
++		  (match_dup 2)
++		  (parallel [(const_int 0)])))))))]
++  "NDS32_EXT_DSP_P ()"
++  "smal\t%0, %1, %2"
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++(define_insn "smal4"
++  [(set (match_operand:DI 0 "register_operand"           "=r")
++	(plus:DI (match_operand:DI 1 "register_operand"  " r")
++	  (mult:DI
++	    (sign_extend:DI
++	      (vec_select:HI
++		(match_operand:V2HI 2 "register_operand" " r")
++		(parallel [(const_int 1)])))
++	    (sign_extend:DI
++	      (vec_select:HI
++		(match_dup 2)
++		(parallel [(const_int 0)]))))))]
++  "NDS32_EXT_DSP_P ()"
++  "smal\t%0, %1, %2"
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++(define_insn "smal5"
++  [(set (match_operand:DI 0 "register_operand"             "=r")
++	(plus:DI
++	  (sign_extend:DI
++	    (mult:SI
++	      (sign_extend:SI
++		(vec_select:HI
++		  (match_operand:V2HI 2 "register_operand" " r")
++		  (parallel [(const_int 0)])))
++	      (sign_extend:SI
++		(vec_select:HI
++		  (match_dup 2)
++		  (parallel [(const_int 1)])))))
++	  (match_operand:DI 1 "register_operand"           " r")))]
++  "NDS32_EXT_DSP_P ()"
++  "smal\t%0, %1, %2"
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++(define_insn "smal6"
++  [(set (match_operand:DI 0 "register_operand"           "=r")
++	(plus:DI
++	  (mult:DI
++	    (sign_extend:DI
++	      (vec_select:HI
++		(match_operand:V2HI 2 "register_operand" " r")
++		(parallel [(const_int 0)])))
++	    (sign_extend:DI
++	      (vec_select:HI
++		(match_dup 2)
++		(parallel [(const_int 1)]))))
++	  (match_operand:DI 1 "register_operand"         " r")))]
++  "NDS32_EXT_DSP_P ()"
++  "smal\t%0, %1, %2"
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++(define_insn "smal7"
++  [(set (match_operand:DI 0 "register_operand"             "=r")
++	(plus:DI
++	  (sign_extend:DI
++	    (mult:SI
++	      (sign_extend:SI
++		(vec_select:HI
++		  (match_operand:V2HI 2 "register_operand" " r")
++		  (parallel [(const_int 1)])))
++	      (sign_extend:SI
++		(vec_select:HI
++		  (match_dup 2)
++		  (parallel [(const_int 0)])))))
++	  (match_operand:DI 1 "register_operand"           " r")))]
++  "NDS32_EXT_DSP_P ()"
++  "smal\t%0, %1, %2"
++  [(set_attr "type"    "dmac")
++   (set_attr "length"   "4")])
++
++(define_insn "smal8"
++  [(set (match_operand:DI 0 "register_operand"           "=r")
++	(plus:DI
++	  (mult:DI
++	    (sign_extend:DI
++	      (vec_select:HI
++		(match_operand:V2HI 2 "register_operand" " r")
++		(parallel [(const_int 1)])))
++	    (sign_extend:DI
++	      (vec_select:HI
++		(match_dup 2)
++		(parallel [(const_int 0)]))))
++	  (match_operand:DI 1 "register_operand"         " r")))]
++  "NDS32_EXT_DSP_P ()"
++  "smal\t%0, %1, %2"
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++;; We need this dummy pattern for smal
++(define_insn_and_split "extendsidi2"
++  [(set (match_operand:DI 0 "register_operand" "")
++	(sign_extend:DI (match_operand:SI 1 "nds32_move_operand" "")))]
++  "NDS32_EXT_DSP_P ()"
++  "#"
++  "NDS32_EXT_DSP_P ()"
++  [(const_int 0)]
++{
++  rtx high_part_dst, low_part_dst;
++
++  low_part_dst = nds32_di_low_part_subreg (operands[0]);
++  high_part_dst = nds32_di_high_part_subreg (operands[0]);
++
++  emit_move_insn (low_part_dst, operands[1]);
++  emit_insn (gen_ashrsi3 (high_part_dst, low_part_dst, GEN_INT (31)));
++  DONE;
++}
++  [(set_attr "type"   "alu")
++   (set_attr "length"   "4")])
++
++;; We need this dummy pattern for usmar64/usmsr64
++(define_insn_and_split "zero_extendsidi2"
++  [(set (match_operand:DI 0 "register_operand" "")
++	(zero_extend:DI (match_operand:SI 1 "nds32_move_operand" "")))]
++  "NDS32_EXT_DSP_P ()"
++  "#"
++  "NDS32_EXT_DSP_P ()"
++  [(const_int 0)]
++{
++  rtx high_part_dst, low_part_dst;
++
++  low_part_dst = nds32_di_low_part_subreg (operands[0]);
++  high_part_dst = nds32_di_high_part_subreg (operands[0]);
++
++  emit_move_insn (low_part_dst, operands[1]);
++  emit_move_insn (high_part_dst, const0_rtx);
++  DONE;
++}
++  [(set_attr "type"   "alu")
++   (set_attr "length"   "4")])
++
++(define_insn_and_split "extendhidi2"
++  [(set (match_operand:DI 0 "register_operand" "")
++	(sign_extend:DI (match_operand:HI 1 "nonimmediate_operand" "")))]
++  "NDS32_EXT_DSP_P ()"
++  "#"
++  "NDS32_EXT_DSP_P ()"
++  [(const_int 0)]
++{
++  rtx high_part_dst, low_part_dst;
++
++  low_part_dst = nds32_di_low_part_subreg (operands[0]);
++  high_part_dst = nds32_di_high_part_subreg (operands[0]);
++
++
++  emit_insn (gen_extendhisi2 (low_part_dst, operands[1]));
++  emit_insn (gen_ashrsi3 (high_part_dst, low_part_dst, GEN_INT (31)));
++  DONE;
++}
++  [(set_attr "type"   "alu")
++   (set_attr "length"   "4")])
++
++(define_insn "extendqihi2"
++  [(set (match_operand:HI 0 "register_operand"                 "=r")
++	(sign_extend:HI (match_operand:QI 1 "register_operand" " r")))]
++  "NDS32_EXT_DSP_P ()"
++  "sunpkd820\t%0, %1"
++  [(set_attr "type"       "dpack")
++   (set_attr "length"     "4")])
++
++(define_insn "smulsi3_highpart"
++  [(set (match_operand:SI 0 "register_operand"                       "=r")
++	(truncate:SI
++	  (lshiftrt:DI
++	    (mult:DI
++	      (sign_extend:DI (match_operand:SI 1 "register_operand" " r"))
++	      (sign_extend:DI (match_operand:SI 2 "register_operand" " r")))
++	    (const_int 32))))]
++  "NDS32_EXT_DSP_P ()"
++  "smmul\t%0, %1, %2"
++  [(set_attr "type"     "dmul")
++   (set_attr "length"   "4")])
++
++(define_insn "smmul_round"
++  [(set (match_operand:SI 0 "register_operand"                       "=r")
++	(truncate:SI
++	  (lshiftrt:DI
++	    (unspec:DI [(mult:DI
++		  	  (sign_extend:DI (match_operand:SI 1 "register_operand" " r"))
++			  (sign_extend:DI (match_operand:SI 2 "register_operand" " r")))]
++		       UNSPEC_ROUND)
++	    (const_int 32))))]
++  "NDS32_EXT_DSP_P ()"
++  "smmul.u\t%0, %1, %2"
++  [(set_attr "type"     "dmul")
++   (set_attr "length"   "4")])
++
++(define_insn "kmmac"
++  [(set (match_operand:SI 0 "register_operand"                         "=r")
++	(ss_plus:SI (match_operand:SI 1 "register_operand"             " 0")
++	  (truncate:SI
++	    (lshiftrt:DI
++	      (mult:DI
++		(sign_extend:DI (match_operand:SI 2 "register_operand" " r"))
++		(sign_extend:DI (match_operand:SI 3 "register_operand" " r")))
++	      (const_int 32)))))]
++  "NDS32_EXT_DSP_P ()"
++  "kmmac\t%0, %2, %3"
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++(define_insn "kmmac_round"
++  [(set (match_operand:SI 0 "register_operand"                                     "=r")
++	(ss_plus:SI (match_operand:SI 1 "register_operand"                         " 0")
++	  (truncate:SI
++	    (lshiftrt:DI
++	      (unspec:DI [(mult:DI
++			    (sign_extend:DI (match_operand:SI 2 "register_operand" " r"))
++			    (sign_extend:DI (match_operand:SI 3 "register_operand" " r")))]
++			 UNSPEC_ROUND)
++	      (const_int 32)))))]
++  "NDS32_EXT_DSP_P ()"
++  "kmmac.u\t%0, %2, %3"
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++(define_insn "kmmsb"
++  [(set (match_operand:SI 0 "register_operand"                         "=r")
++	(ss_minus:SI (match_operand:SI 1 "register_operand"            " 0")
++	  (truncate:SI
++	    (lshiftrt:DI
++	      (mult:DI
++		(sign_extend:DI (match_operand:SI 2 "register_operand" " r"))
++		(sign_extend:DI (match_operand:SI 3 "register_operand" " r")))
++	      (const_int 32)))))]
++  "NDS32_EXT_DSP_P ()"
++  "kmmsb\t%0, %2, %3"
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++(define_insn "kmmsb_round"
++  [(set (match_operand:SI 0 "register_operand"                                     "=r")
++	(ss_minus:SI (match_operand:SI 1 "register_operand"                        " 0")
++	  (truncate:SI
++	    (lshiftrt:DI
++	      (unspec:DI [(mult:DI
++			    (sign_extend:DI (match_operand:SI 2 "register_operand" " r"))
++			    (sign_extend:DI (match_operand:SI 3 "register_operand" " r")))]
++			 UNSPEC_ROUND)
++	      (const_int 32)))))]
++  "NDS32_EXT_DSP_P ()"
++  "kmmsb.u\t%0, %2, %3"
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++(define_insn "kwmmul"
++  [(set (match_operand:SI 0 "register_operand"                       "=r")
++	(truncate:SI
++	  (lshiftrt:DI
++	    (ss_mult:DI
++	      (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" " r")) (const_int 2))
++	      (mult:DI (sign_extend:DI (match_operand:SI 2 "register_operand" " r")) (const_int 2)))
++	    (const_int 32))))]
++  "NDS32_EXT_DSP_P ()"
++  "kwmmul\t%0, %1, %2"
++  [(set_attr "type"     "dmul")
++   (set_attr "length"   "4")])
++
++(define_insn "kwmmul_round"
++  [(set (match_operand:SI 0 "register_operand"                       "=r")
++	(truncate:SI
++	  (lshiftrt:DI
++	    (unspec:DI [
++	      (ss_mult:DI
++		(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" " r")) (const_int 2))
++		(mult:DI (sign_extend:DI (match_operand:SI 2 "register_operand" " r")) (const_int 2)))]
++	      UNSPEC_ROUND)
++	    (const_int 32))))]
++  "NDS32_EXT_DSP_P ()"
++  "kwmmul.u\t%0, %1, %2"
++  [(set_attr "type"     "dmul")
++   (set_attr "length"   "4")])
++
++(define_expand "smmwb"
++  [(match_operand:SI 0 "register_operand" "")
++   (match_operand:SI 1 "register_operand" "")
++   (match_operand:V2HI 2 "register_operand" "")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    emit_insn (gen_smulhisi3_highpart_1 (operands[0], operands[1], operands[2], GEN_INT (1)));
++  else
++    emit_insn (gen_smulhisi3_highpart_1 (operands[0], operands[1], operands[2], GEN_INT (0)));
++  DONE;
++})
++
++(define_expand "smmwt"
++  [(match_operand:SI 0 "register_operand" "")
++   (match_operand:SI 1 "register_operand" "")
++   (match_operand:V2HI 2 "register_operand" "")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    emit_insn (gen_smulhisi3_highpart_1 (operands[0], operands[1], operands[2], GEN_INT (0)));
++  else
++    emit_insn (gen_smulhisi3_highpart_1 (operands[0], operands[1], operands[2], GEN_INT (1)));
++  DONE;
++})
++
++(define_insn "smulhisi3_highpart_1"
++  [(set (match_operand:SI 0 "register_operand"                           "=   r,    r")
++	(truncate:SI
++	  (lshiftrt:DI
++	    (mult:DI
++	      (sign_extend:DI (match_operand:SI 1 "register_operand"     "    r,    r"))
++	      (sign_extend:DI
++	        (vec_select:HI
++		  (match_operand:V2HI 2 "register_operand"               "    r,    r")
++		  (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")]))))
++	    (const_int 16))))]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    {
++      const char *pats[] = { "smmwt\t%0, %1, %2",
++			     "smmwb\t%0, %1, %2" };
++      return pats[which_alternative];
++    }
++  else
++    {
++      const char *pats[] = { "smmwb\t%0, %1, %2",
++			     "smmwt\t%0, %1, %2" };
++      return pats[which_alternative];
++    }
++}
++  [(set_attr "type"     "dmul")
++   (set_attr "length"   "4")])
++
++(define_insn "smulhisi3_highpart_2"
++  [(set (match_operand:SI 0 "register_operand"                           "=   r,    r")
++	(truncate:SI
++	  (lshiftrt:DI
++	    (mult:DI
++	      (sign_extend:DI
++	        (vec_select:HI
++		  (match_operand:V2HI 1 "register_operand"               "    r,    r")
++		  (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")])))
++	      (sign_extend:DI (match_operand:SI 2 "register_operand"     "    r,    r")))
++	    (const_int 16))))]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    {
++      const char *pats[] = { "smmwt\t%0, %1, %2",
++			     "smmwb\t%0, %1, %2" };
++      return pats[which_alternative];
++    }
++  else
++    {
++      const char *pats[] = { "smmwb\t%0, %1, %2",
++			     "smmwt\t%0, %1, %2" };
++      return pats[which_alternative];
++    }
++}
++  [(set_attr "type"     "dmul")
++   (set_attr "length"   "4")])
++
++(define_expand "smmwb_round"
++  [(match_operand:SI 0 "register_operand" "")
++   (match_operand:SI 1 "register_operand" "")
++   (match_operand:V2HI 2 "register_operand" "")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    emit_insn (gen_smmw_round_internal (operands[0], operands[1], operands[2], GEN_INT (1)));
++  else
++    emit_insn (gen_smmw_round_internal (operands[0], operands[1], operands[2], GEN_INT (0)));
++  DONE;
++})
++
++(define_expand "smmwt_round"
++  [(match_operand:SI 0 "register_operand" "")
++   (match_operand:SI 1 "register_operand" "")
++   (match_operand:V2HI 2 "register_operand" "")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    emit_insn (gen_smmw_round_internal (operands[0], operands[1], operands[2], GEN_INT (0)));
++  else
++    emit_insn (gen_smmw_round_internal (operands[0], operands[1], operands[2], GEN_INT (1)));
++  DONE;
++})
++
++(define_insn "smmw_round_internal"
++  [(set (match_operand:SI 0 "register_operand"                           "=   r,    r")
++	(truncate:SI
++	  (lshiftrt:DI
++	    (unspec:DI
++	      [(mult:DI
++		 (sign_extend:DI (match_operand:SI 1 "register_operand"     "    r,    r"))
++		 (sign_extend:DI
++		   (vec_select:HI
++		     (match_operand:V2HI 2 "register_operand"               "    r,    r")
++		     (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")]))))]
++	      UNSPEC_ROUND)
++	    (const_int 16))))]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    {
++      const char *pats[] = { "smmwt.u\t%0, %1, %2",
++			     "smmwb.u\t%0, %1, %2" };
++      return pats[which_alternative];
++    }
++  else
++    {
++      const char *pats[] = { "smmwb.u\t%0, %1, %2",
++			     "smmwt.u\t%0, %1, %2" };
++      return pats[which_alternative];
++    }
++}
++  [(set_attr "type"     "dmul")
++   (set_attr "length"   "4")])
++
++(define_expand "kmmawb"
++  [(match_operand:SI 0 "register_operand" "")
++   (match_operand:SI 1 "register_operand" "")
++   (match_operand:SI 2 "register_operand" "")
++   (match_operand:V2HI 3 "register_operand" "")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    emit_insn (gen_kmmaw_internal (operands[0], operands[2], operands[3], GEN_INT (1), operands[1]));
++  else
++    emit_insn (gen_kmmaw_internal (operands[0], operands[2], operands[3], GEN_INT (0), operands[1]));
++  DONE;
++})
++
++(define_expand "kmmawt"
++  [(match_operand:SI 0 "register_operand" "")
++   (match_operand:SI 1 "register_operand" "")
++   (match_operand:SI 2 "register_operand" "")
++   (match_operand:V2HI 3 "register_operand" "")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    emit_insn (gen_kmmaw_internal (operands[0], operands[2], operands[3], GEN_INT (0), operands[1]));
++  else
++    emit_insn (gen_kmmaw_internal (operands[0], operands[2], operands[3], GEN_INT (1), operands[1]));
++  DONE;
++})
++
++(define_insn "kmmaw_internal"
++  [(set (match_operand:SI 0 "register_operand"                               "=   r,    r")
++	(ss_plus:SI
++	  (match_operand:SI 4 "register_operand"                             "    0,    0")
++	  (truncate:SI
++	    (lshiftrt:DI
++	      (mult:DI
++		(sign_extend:DI (match_operand:SI 1 "register_operand"       "    r,    r"))
++		  (sign_extend:DI
++		    (vec_select:HI
++		      (match_operand:V2HI 2 "register_operand"               "    r,    r")
++		      (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")]))))
++	      (const_int 16)))))]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    {
++      const char *pats[] = { "kmmawt\t%0, %1, %2",
++			     "kmmawb\t%0, %1, %2" };
++      return pats[which_alternative];
++    }
++  else
++    {
++      const char *pats[] = { "kmmawb\t%0, %1, %2",
++			     "kmmawt\t%0, %1, %2" };
++      return pats[which_alternative];
++    }
++}
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++(define_expand "kmmawb_round"
++  [(match_operand:SI 0 "register_operand" "")
++   (match_operand:SI 1 "register_operand" "")
++   (match_operand:SI 2 "register_operand" "")
++   (match_operand:V2HI 3 "register_operand" "")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    emit_insn (gen_kmmaw_round_internal (operands[0], operands[2], operands[3], GEN_INT (1), operands[1]));
++  else
++    emit_insn (gen_kmmaw_round_internal (operands[0], operands[2], operands[3], GEN_INT (0), operands[1]));
++  DONE;
++}
++  [(set_attr "type"   "alu")
++   (set_attr "length"   "4")])
++
++(define_expand "kmmawt_round"
++  [(match_operand:SI 0 "register_operand" "")
++   (match_operand:SI 1 "register_operand" "")
++   (match_operand:SI 2 "register_operand" "")
++   (match_operand:V2HI 3 "register_operand" "")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    emit_insn (gen_kmmaw_round_internal (operands[0], operands[2], operands[3], GEN_INT (0), operands[1]));
++  else
++    emit_insn (gen_kmmaw_round_internal (operands[0], operands[2], operands[3], GEN_INT (1), operands[1]));
++  DONE;
++}
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++
++(define_insn "kmmaw_round_internal"
++  [(set (match_operand:SI 0 "register_operand"                                "=   r,    r")
++	(ss_plus:SI
++	  (match_operand:SI 4 "register_operand"                              "    0,    0")
++	  (truncate:SI
++	    (lshiftrt:DI
++	      (unspec:DI
++		[(mult:DI
++		   (sign_extend:DI (match_operand:SI 1 "register_operand"     "    r,    r"))
++		   (sign_extend:DI
++		     (vec_select:HI
++		       (match_operand:V2HI 2 "register_operand"               "    r,    r")
++		       (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")]))))]
++		UNSPEC_ROUND)
++	      (const_int 16)))))]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    {
++      const char *pats[] = { "kmmawt.u\t%0, %1, %2",
++			     "kmmawb.u\t%0, %1, %2" };
++      return pats[which_alternative];
++    }
++  else
++    {
++      const char *pats[] = { "kmmawb.u\t%0, %1, %2",
++			     "kmmawt.u\t%0, %1, %2" };
++      return pats[which_alternative];
++    }
++}
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++(define_expand "smalbb"
++  [(match_operand:DI 0 "register_operand" "")
++   (match_operand:DI 1 "register_operand" "")
++   (match_operand:V2HI 2 "register_operand" "")
++   (match_operand:V2HI 3 "register_operand" "")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    emit_insn (gen_smaddhidi (operands[0], operands[2],
++			      operands[3], operands[1],
++			      GEN_INT (1), GEN_INT (1)));
++  else
++    emit_insn (gen_smaddhidi (operands[0], operands[2],
++			      operands[3], operands[1],
++			      GEN_INT (0), GEN_INT (0)));
++  DONE;
++})
++
++(define_expand "smalbt"
++  [(match_operand:DI 0 "register_operand" "")
++   (match_operand:DI 1 "register_operand" "")
++   (match_operand:V2HI 2 "register_operand" "")
++   (match_operand:V2HI 3 "register_operand" "")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    emit_insn (gen_smaddhidi (operands[0], operands[2],
++			      operands[3], operands[1],
++			      GEN_INT (1), GEN_INT (0)));
++  else
++    emit_insn (gen_smaddhidi (operands[0], operands[2],
++			      operands[3], operands[1],
++			      GEN_INT (0), GEN_INT (1)));
++  DONE;
++})
++
++(define_expand "smaltt"
++  [(match_operand:DI 0 "register_operand" "")
++   (match_operand:DI 1 "register_operand" "")
++   (match_operand:V2HI 2 "register_operand" "")
++   (match_operand:V2HI 3 "register_operand" "")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    emit_insn (gen_smaddhidi (operands[0], operands[2],
++			      operands[3], operands[1],
++			      GEN_INT (0), GEN_INT (0)));
++  else
++    emit_insn (gen_smaddhidi (operands[0], operands[2],
++			      operands[3], operands[1],
++			      GEN_INT (1), GEN_INT (1)));
++  DONE;
++})
++
++(define_insn "smaddhidi"
++  [(set (match_operand:DI 0 "register_operand"                         "=   r,    r,    r,    r")
++	(plus:DI
++	  (match_operand:DI 3 "register_operand"                       "    0,    0,    0,    0")
++	  (mult:DI
++	    (sign_extend:DI
++	      (vec_select:HI
++		(match_operand:V2HI 1 "register_operand"               "    r,    r,    r,    r")
++		(parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv00, Iv01, Iv01")])))
++	    (sign_extend:DI
++	      (vec_select:HI
++		(match_operand:V2HI 2 "register_operand"               "    r,    r,    r,    r")
++		(parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv01, Iv00")]))))))]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    {
++      const char *pats[] = { "smaltt\t%0, %1, %2",
++			     "smalbt\t%0, %2, %1",
++			     "smalbb\t%0, %1, %2",
++			     "smalbt\t%0, %1, %2" };
++      return pats[which_alternative];
++    }
++  else
++    {
++      const char *pats[] = { "smalbb\t%0, %1, %2",
++			     "smalbt\t%0, %1, %2",
++			     "smaltt\t%0, %1, %2",
++			     "smalbt\t%0, %2, %1" };
++      return pats[which_alternative];
++    }
++}
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++(define_insn "smaddhidi2"
++  [(set (match_operand:DI 0 "register_operand"                         "=   r,    r,    r,    r")
++	(plus:DI
++	  (mult:DI
++	    (sign_extend:DI
++	      (vec_select:HI
++		(match_operand:V2HI 1 "register_operand"               "    r,    r,    r,    r")
++		(parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv00, Iv01, Iv01")])))
++	    (sign_extend:DI
++	      (vec_select:HI
++		(match_operand:V2HI 2 "register_operand"               "    r,    r,    r,    r")
++		(parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv01, Iv00")]))))
++	  (match_operand:DI 3 "register_operand"                       "    0,    0,    0,    0")))]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    {
++      const char *pats[] = { "smaltt\t%0, %1, %2",
++			     "smalbt\t%0, %2, %1",
++			     "smalbb\t%0, %1, %2",
++			     "smalbt\t%0, %1, %2" };
++      return pats[which_alternative];
++    }
++  else
++    {
++      const char *pats[] = { "smalbb\t%0, %1, %2",
++			     "smalbt\t%0, %1, %2",
++			     "smaltt\t%0, %1, %2",
++			     "smalbt\t%0, %2, %1" };
++      return pats[which_alternative];
++    }
++}
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++(define_expand "smalda1"
++  [(match_operand:DI 0 "register_operand" "")
++   (match_operand:DI 1 "register_operand" "")
++   (match_operand:V2HI 2 "register_operand" " r")
++   (match_operand:V2HI 3 "register_operand" " r")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    emit_insn (gen_smalda1_be (operands[0], operands[1], operands[2], operands[3]));
++  else
++    emit_insn (gen_smalda1_le (operands[0], operands[1], operands[2], operands[3]));
++  DONE;
++})
++
++(define_expand "smalds1"
++  [(match_operand:DI 0 "register_operand" "")
++   (match_operand:DI 1 "register_operand" "")
++   (match_operand:V2HI 2 "register_operand" " r")
++   (match_operand:V2HI 3 "register_operand" " r")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    emit_insn (gen_smalds1_be (operands[0], operands[1], operands[2], operands[3]));
++  else
++    emit_insn (gen_smalds1_le (operands[0], operands[1], operands[2], operands[3]));
++  DONE;
++})
++
++(define_insn "smalda1_le"
++  [(set (match_operand:DI 0 "register_operand"                             "=r")
++	(plus:DI
++	  (match_operand:DI 1 "register_operand"                           " 0")
++	  (sign_extend:DI
++	    (plus:SI
++	      (mult:SI
++		(sign_extend:SI (vec_select:HI
++				  (match_operand:V2HI 2 "register_operand" " r")
++				  (parallel [(const_int 1)])))
++		(sign_extend:SI (vec_select:HI
++				  (match_operand:V2HI 3 "register_operand" " r")
++				  (parallel [(const_int 1)]))))
++	      (mult:SI
++		(sign_extend:SI (vec_select:HI
++				  (match_dup 2)
++				  (parallel [(const_int 0)])))
++		(sign_extend:SI (vec_select:HI
++				  (match_dup 3)
++				  (parallel [(const_int 0)]))))))))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "smalda\t%0, %2, %3"
++  [(set_attr "type"    "dmac")
++   (set_attr "length"   "4")])
++
++(define_insn "smalds1_le"
++  [(set (match_operand:DI 0 "register_operand"                             "=r")
++	(plus:DI
++	  (match_operand:DI 1 "register_operand"                           " 0")
++	  (sign_extend:DI
++	    (minus:SI
++	      (mult:SI
++		(sign_extend:SI (vec_select:HI
++				  (match_operand:V2HI 2 "register_operand" " r")
++				  (parallel [(const_int 1)])))
++		(sign_extend:SI (vec_select:HI
++				  (match_operand:V2HI 3 "register_operand" " r")
++				  (parallel [(const_int 1)]))))
++	      (mult:SI
++		(sign_extend:SI (vec_select:HI
++				  (match_dup 2)
++				  (parallel [(const_int 0)])))
++		(sign_extend:SI (vec_select:HI
++				  (match_dup 3)
++				  (parallel [(const_int 0)]))))))))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "smalds\t%0, %2, %3"
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++(define_insn "smalda1_be"
++  [(set (match_operand:DI 0 "register_operand"                             "=r")
++	(plus:DI
++	  (match_operand:DI 1 "register_operand"                           " 0")
++	  (sign_extend:DI
++	    (plus:SI
++	      (mult:SI
++		(sign_extend:SI (vec_select:HI
++				  (match_operand:V2HI 2 "register_operand" " r")
++				  (parallel [(const_int 0)])))
++		(sign_extend:SI (vec_select:HI
++				  (match_operand:V2HI 3 "register_operand" " r")
++				  (parallel [(const_int 0)]))))
++	      (mult:SI
++		(sign_extend:SI (vec_select:HI
++				  (match_dup 2)
++				  (parallel [(const_int 1)])))
++		(sign_extend:SI (vec_select:HI
++				  (match_dup 3)
++				  (parallel [(const_int 1)]))))))))]
++  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
++  "smalda\t%0, %2, %3"
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++(define_insn "smalds1_be"
++  [(set (match_operand:DI 0 "register_operand"                             "=r")
++	(plus:DI
++	  (match_operand:DI 1 "register_operand"                           " 0")
++	  (sign_extend:DI
++	    (minus:SI
++	      (mult:SI
++		(sign_extend:SI (vec_select:HI
++				  (match_operand:V2HI 2 "register_operand" " r")
++				  (parallel [(const_int 0)])))
++		(sign_extend:SI (vec_select:HI
++				  (match_operand:V2HI 3 "register_operand" " r")
++				  (parallel [(const_int 0)]))))
++	      (mult:SI
++		(sign_extend:SI (vec_select:HI
++				  (match_dup 2)
++				  (parallel [(const_int 1)])))
++		(sign_extend:SI (vec_select:HI
++				  (match_dup 3)
++				  (parallel [(const_int 1)]))))))))]
++  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
++  "smalds\t%0, %2, %3"
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++(define_expand "smaldrs3"
++  [(match_operand:DI 0 "register_operand" "")
++   (match_operand:DI 1 "register_operand" "")
++   (match_operand:V2HI 2 "register_operand" " r")
++   (match_operand:V2HI 3 "register_operand" " r")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    emit_insn (gen_smaldrs3_be (operands[0], operands[1], operands[2], operands[3]));
++  else
++    emit_insn (gen_smaldrs3_le (operands[0], operands[1], operands[2], operands[3]));
++  DONE;
++})
++
++(define_insn "smaldrs3_le"
++  [(set (match_operand:DI 0 "register_operand"                             "=r")
++	(plus:DI
++	  (match_operand:DI 1 "register_operand"                           " 0")
++	  (sign_extend:DI
++	    (minus:SI
++	      (mult:SI
++		(sign_extend:SI (vec_select:HI
++				  (match_operand:V2HI 2 "register_operand" " r")
++				  (parallel [(const_int 0)])))
++		(sign_extend:SI (vec_select:HI
++				  (match_operand:V2HI 3 "register_operand" " r")
++				  (parallel [(const_int 0)]))))
++	      (mult:SI
++		(sign_extend:SI (vec_select:HI
++				  (match_dup 2)
++				  (parallel [(const_int 1)])))
++		(sign_extend:SI (vec_select:HI
++				  (match_dup 3)
++				  (parallel [(const_int 1)]))))))))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "smaldrs\t%0, %2, %3"
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++(define_insn "smaldrs3_be"
++  [(set (match_operand:DI 0 "register_operand"                             "=r")
++	(plus:DI
++	  (match_operand:DI 1 "register_operand"                           " 0")
++	  (sign_extend:DI
++	    (minus:SI
++	      (mult:SI
++		(sign_extend:SI (vec_select:HI
++				  (match_operand:V2HI 2 "register_operand" " r")
++				  (parallel [(const_int 1)])))
++		(sign_extend:SI (vec_select:HI
++				  (match_operand:V2HI 3 "register_operand" " r")
++				  (parallel [(const_int 1)]))))
++	      (mult:SI
++		(sign_extend:SI (vec_select:HI
++				  (match_dup 2)
++				  (parallel [(const_int 0)])))
++		(sign_extend:SI (vec_select:HI
++				  (match_dup 3)
++				  (parallel [(const_int 0)]))))))))]
++  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
++  "smaldrs\t%0, %2, %3"
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++(define_expand "smalxda1"
++  [(match_operand:DI 0 "register_operand" "")
++   (match_operand:DI 1 "register_operand" "")
++   (match_operand:V2HI 2 "register_operand" " r")
++   (match_operand:V2HI 3 "register_operand" " r")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    emit_insn (gen_smalxda1_be (operands[0], operands[1], operands[2], operands[3]));
++  else
++    emit_insn (gen_smalxda1_le (operands[0], operands[1], operands[2], operands[3]));
++  DONE;
++})
++
++(define_expand "smalxds1"
++  [(match_operand:DI 0 "register_operand" "")
++   (match_operand:DI 1 "register_operand" "")
++   (match_operand:V2HI 2 "register_operand" " r")
++   (match_operand:V2HI 3 "register_operand" " r")]
++  "NDS32_EXT_DSP_P ()"
++{
++  if (TARGET_BIG_ENDIAN)
++    emit_insn (gen_smalxds1_be (operands[0], operands[1], operands[2], operands[3]));
++  else
++    emit_insn (gen_smalxds1_le (operands[0], operands[1], operands[2], operands[3]));
++  DONE;
++})
++
++(define_insn "smalxd<add_sub>1_le"
++  [(set (match_operand:DI 0 "register_operand"                             "=r")
++	(plus:DI
++	  (match_operand:DI 1 "register_operand"                           " 0")
++	  (sign_extend:DI
++	    (plus_minus:SI
++	      (mult:SI
++		(sign_extend:SI (vec_select:HI
++				  (match_operand:V2HI 2 "register_operand" " r")
++				  (parallel [(const_int 1)])))
++		(sign_extend:SI (vec_select:HI
++				  (match_operand:V2HI 3 "register_operand" " r")
++				  (parallel [(const_int 0)]))))
++	      (mult:SI
++		(sign_extend:SI (vec_select:HI
++				  (match_dup 2)
++				  (parallel [(const_int 0)])))
++		(sign_extend:SI (vec_select:HI
++				  (match_dup 3)
++				  (parallel [(const_int 1)]))))))))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "smalxd<add_sub>\t%0, %2, %3"
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++
++(define_insn "smalxd<add_sub>1_be"
++  [(set (match_operand:DI 0 "register_operand"                             "=r")
++	(plus:DI
++	  (match_operand:DI 1 "register_operand"                           " 0")
++	  (sign_extend:DI
++	    (plus_minus:SI
++	      (mult:SI
++		(sign_extend:SI (vec_select:HI
++				  (match_operand:V2HI 2 "register_operand" " r")
++				  (parallel [(const_int 0)])))
++		(sign_extend:SI (vec_select:HI
++				  (match_operand:V2HI 3 "register_operand" " r")
++				  (parallel [(const_int 1)]))))
++	      (mult:SI
++		(sign_extend:SI (vec_select:HI
++				  (match_dup 2)
++				  (parallel [(const_int 1)])))
++		(sign_extend:SI (vec_select:HI
++				  (match_dup 3)
++				  (parallel [(const_int 0)]))))))))]
++  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
++  "smalxd<add_sub>\t%0, %2, %3"
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++(define_insn "smslda1"
++  [(set (match_operand:DI 0 "register_operand"                             "=r")
++	(minus:DI
++	  (minus:DI
++	    (match_operand:DI 1 "register_operand"                           " 0")
++	    (sign_extend:DI
++	      (mult:SI
++		(sign_extend:SI (vec_select:HI
++				  (match_operand:V2HI 2 "register_operand" " r")
++				  (parallel [(const_int 1)])))
++		(sign_extend:SI (vec_select:HI
++				  (match_operand:V2HI 3 "register_operand" " r")
++				  (parallel [(const_int 1)]))))))
++	  (sign_extend:DI
++	    (mult:SI
++	      (sign_extend:SI (vec_select:HI
++				(match_dup 2)
++				(parallel [(const_int 0)])))
++	      (sign_extend:SI (vec_select:HI
++				(match_dup 3)
++				(parallel [(const_int 0)])))))))]
++  "NDS32_EXT_DSP_P ()"
++  "smslda\t%0, %2, %3"
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++(define_insn "smslxda1"
++  [(set (match_operand:DI 0 "register_operand"                             "=r")
++	(minus:DI
++	  (minus:DI
++	    (match_operand:DI 1 "register_operand"                           " 0")
++	      (sign_extend:DI
++		(mult:SI
++		  (sign_extend:SI (vec_select:HI
++				    (match_operand:V2HI 2 "register_operand" " r")
++				    (parallel [(const_int 1)])))
++		  (sign_extend:SI (vec_select:HI
++				    (match_operand:V2HI 3 "register_operand" " r")
++				    (parallel [(const_int 0)]))))))
++	  (sign_extend:DI
++	    (mult:SI
++	      (sign_extend:SI (vec_select:HI
++				(match_dup 2)
++				(parallel [(const_int 0)])))
++	      (sign_extend:SI (vec_select:HI
++				(match_dup 3)
++				(parallel [(const_int 1)])))))))]
++  "NDS32_EXT_DSP_P ()"
++  "smslxda\t%0, %2, %3"
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++;; mada for synthetize smalda
++(define_insn_and_split "mada1"
++  [(set (match_operand:SI 0 "register_operand"                          "=r")
++	(plus:SI
++	  (mult:SI
++	    (sign_extend:SI (vec_select:HI
++			      (match_operand:V2HI 1 "register_operand" "r")
++			      (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iu01")])))
++	    (sign_extend:SI (vec_select:HI
++			      (match_operand:V2HI 2 "register_operand" "r")
++			      (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iu01")]))))
++	  (mult:SI
++	    (sign_extend:SI (vec_select:HI
++			      (match_dup 1)
++			      (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iu01")])))
++	    (sign_extend:SI (vec_select:HI
++			      (match_dup 2)
++			      (parallel [(match_operand:SI 6 "nds32_imm_0_1_operand" " Iu01")]))))))]
++  "NDS32_EXT_DSP_P () && !reload_completed"
++  "#"
++  "NDS32_EXT_DSP_P () && !reload_completed"
++  [(const_int 1)]
++{
++  rtx result0 = gen_reg_rtx (SImode);
++  rtx result1 = gen_reg_rtx (SImode);
++  emit_insn (gen_mulhisi3v (result0, operands[1], operands[2],
++			    operands[3], operands[4]));
++  emit_insn (gen_mulhisi3v (result1, operands[1], operands[2],
++			    operands[5], operands[6]));
++  emit_insn (gen_addsi3 (operands[0], result0, result1));
++  DONE;
++})
++
++(define_insn_and_split "mada2"
++  [(set (match_operand:SI 0 "register_operand"                          "=r")
++	(plus:SI
++	  (mult:SI
++	    (sign_extend:SI (vec_select:HI
++			      (match_operand:V2HI 1 "register_operand" "r")
++			      (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iu01")])))
++	    (sign_extend:SI (vec_select:HI
++			      (match_operand:V2HI 2 "register_operand" "r")
++			      (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iu01")]))))
++	  (mult:SI
++	    (sign_extend:SI (vec_select:HI
++			      (match_dup 2)
++			      (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iu01")])))
++	    (sign_extend:SI (vec_select:HI
++			      (match_dup 1)
++			      (parallel [(match_operand:SI 6 "nds32_imm_0_1_operand" " Iu01")]))))))]
++  "NDS32_EXT_DSP_P () && !reload_completed"
++  "#"
++  "NDS32_EXT_DSP_P () && !reload_completed"
++  [(const_int 1)]
++{
++  rtx result0 = gen_reg_rtx (SImode);
++  rtx result1 = gen_reg_rtx (SImode);
++  emit_insn (gen_mulhisi3v (result0, operands[1], operands[2],
++			    operands[3], operands[4]));
++  emit_insn (gen_mulhisi3v (result1, operands[1], operands[2],
++			    operands[6], operands[5]));
++  emit_insn (gen_addsi3 (operands[0], result0, result1));
++  DONE;
++})
++
++;; sms for synthetize smalds
++(define_insn_and_split "sms1"
++  [(set (match_operand:SI 0 "register_operand"                                       "=   r")
++	(minus:SI
++	  (mult:SI
++	    (sign_extend:SI (vec_select:HI
++			      (match_operand:V2HI 1 "register_operand"               "    r")
++			      (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iu01")])))
++	    (sign_extend:SI (vec_select:HI
++			      (match_operand:V2HI 2 "register_operand"               "    r")
++			      (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iu01")]))))
++	  (mult:SI
++	    (sign_extend:SI (vec_select:HI
++			      (match_dup 1)
++			      (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iu01")])))
++	    (sign_extend:SI (vec_select:HI
++			      (match_dup 2)
++			      (parallel [(match_operand:SI 6 "nds32_imm_0_1_operand" " Iu01")]))))))]
++  "NDS32_EXT_DSP_P ()
++   && (!reload_completed
++       || !nds32_need_split_sms_p (operands[3], operands[4],
++				   operands[5], operands[6]))"
++
++{
++  return nds32_output_sms (operands[3], operands[4],
++			   operands[5], operands[6]);
++}
++  "NDS32_EXT_DSP_P ()
++   && !reload_completed
++   && nds32_need_split_sms_p (operands[3], operands[4],
++			      operands[5], operands[6])"
++  [(const_int 1)]
++{
++  nds32_split_sms (operands[0], operands[1], operands[2],
++		   operands[3], operands[4],
++		   operands[5], operands[6]);
++  DONE;
++}
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++(define_insn_and_split "sms2"
++  [(set (match_operand:SI 0 "register_operand"                                       "=   r")
++	(minus:SI
++	  (mult:SI
++	    (sign_extend:SI (vec_select:HI
++			      (match_operand:V2HI 1 "register_operand"               "    r")
++			      (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iu01")])))
++	    (sign_extend:SI (vec_select:HI
++			      (match_operand:V2HI 2 "register_operand"               "    r")
++			      (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iu01")]))))
++	  (mult:SI
++	    (sign_extend:SI (vec_select:HI
++			      (match_dup 2)
++			      (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iu01")])))
++	    (sign_extend:SI (vec_select:HI
++			      (match_dup 1)
++			      (parallel [(match_operand:SI 6 "nds32_imm_0_1_operand" " Iu01")]))))))]
++  "NDS32_EXT_DSP_P ()
++   && (!reload_completed
++       || !nds32_need_split_sms_p (operands[3], operands[4],
++				   operands[6], operands[5]))"
++{
++  return nds32_output_sms (operands[3], operands[4],
++			   operands[6], operands[5]);
++}
++  "NDS32_EXT_DSP_P ()
++   && !reload_completed
++   && nds32_need_split_sms_p (operands[3], operands[4],
++			      operands[6], operands[5])"
++  [(const_int 1)]
++{
++  nds32_split_sms (operands[0], operands[1], operands[2],
++		   operands[3], operands[4],
++		   operands[6], operands[5]);
++  DONE;
++}
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++(define_insn "kmda"
++  [(set (match_operand:SI 0 "register_operand"                          "=r")
++	(ss_plus:SI
++	  (mult:SI
++	    (sign_extend:SI (vec_select:HI
++			      (match_operand:V2HI 1 "register_operand" "r")
++			      (parallel [(const_int 1)])))
++	    (sign_extend:SI (vec_select:HI
++			      (match_operand:V2HI 2 "register_operand" "r")
++			      (parallel [(const_int 1)]))))
++	  (mult:SI
++	    (sign_extend:SI (vec_select:HI
++			      (match_dup 1)
++			      (parallel [(const_int 0)])))
++	    (sign_extend:SI (vec_select:HI
++			      (match_dup 2)
++			      (parallel [(const_int 0)]))))))]
++  "NDS32_EXT_DSP_P ()"
++  "kmda\t%0, %1, %2"
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++(define_insn "kmxda"
++  [(set (match_operand:SI 0 "register_operand"                          "=r")
++	(ss_plus:SI
++	  (mult:SI
++	    (sign_extend:SI (vec_select:HI
++			      (match_operand:V2HI 1 "register_operand" "r")
++			      (parallel [(const_int 1)])))
++	    (sign_extend:SI (vec_select:HI
++			      (match_operand:V2HI 2 "register_operand" "r")
++			      (parallel [(const_int 0)]))))
++	  (mult:SI
++	    (sign_extend:SI (vec_select:HI
++			      (match_dup 1)
++			      (parallel [(const_int 0)])))
++	    (sign_extend:SI (vec_select:HI
++			      (match_dup 2)
++			      (parallel [(const_int 1)]))))))]
++  "NDS32_EXT_DSP_P ()"
++  "kmxda\t%0, %1, %2"
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++(define_insn "kmada"
++  [(set (match_operand:SI 0 "register_operand"                           "=r")
++	(ss_plus:SI
++	  (match_operand:SI 1 "register_operand"                         " 0")
++	  (ss_plus:SI
++	    (mult:SI
++	      (sign_extend:SI (vec_select:HI
++				(match_operand:V2HI 2 "register_operand" " r")
++				(parallel [(const_int 1)])))
++	      (sign_extend:SI (vec_select:HI
++				(match_operand:V2HI 3 "register_operand" " r")
++				(parallel [(const_int 1)]))))
++	    (mult:SI
++	      (sign_extend:SI (vec_select:HI
++				(match_dup 2)
++				(parallel [(const_int 0)])))
++	      (sign_extend:SI (vec_select:HI
++				(match_dup 3)
++				(parallel [(const_int 0)])))))))]
++  "NDS32_EXT_DSP_P ()"
++  "kmada\t%0, %2, %3"
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++(define_insn "kmada2"
++  [(set (match_operand:SI 0 "register_operand"                           "=r")
++	(ss_plus:SI
++	  (match_operand:SI 1 "register_operand"                         " 0")
++	  (ss_plus:SI
++	    (mult:SI
++	      (sign_extend:SI (vec_select:HI
++				(match_operand:V2HI 2 "register_operand" " r")
++				(parallel [(const_int 0)])))
++	      (sign_extend:SI (vec_select:HI
++				(match_operand:V2HI 3 "register_operand" " r")
++				(parallel [(const_int 0)]))))
++	    (mult:SI
++	      (sign_extend:SI (vec_select:HI
++				(match_dup 2)
++				(parallel [(const_int 1)])))
++	      (sign_extend:SI (vec_select:HI
++				(match_dup 3)
++				(parallel [(const_int 1)])))))))]
++  "NDS32_EXT_DSP_P ()"
++  "kmada\t%0, %2, %3"
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++(define_insn "kmaxda"
++  [(set (match_operand:SI 0 "register_operand"                           "=r")
++	(ss_plus:SI
++	  (match_operand:SI 1 "register_operand"                         " 0")
++	  (ss_plus:SI
++	    (mult:SI
++	      (sign_extend:SI (vec_select:HI
++				(match_operand:V2HI 2 "register_operand" " r")
++				(parallel [(const_int 1)])))
++	      (sign_extend:SI (vec_select:HI
++				(match_operand:V2HI 3 "register_operand" " r")
++				(parallel [(const_int 0)]))))
++	    (mult:SI
++	      (sign_extend:SI (vec_select:HI
++				(match_dup 2)
++				(parallel [(const_int 0)])))
++	      (sign_extend:SI (vec_select:HI
++				(match_dup 3)
++				(parallel [(const_int 1)])))))))]
++  "NDS32_EXT_DSP_P ()"
++  "kmaxda\t%0, %2, %3"
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++(define_insn "kmads"
++  [(set (match_operand:SI 0 "register_operand"                           "=r")
++	(ss_plus:SI
++	  (match_operand:SI 1 "register_operand"                         " 0")
++	  (ss_minus:SI
++	    (mult:SI
++	      (sign_extend:SI (vec_select:HI
++				(match_operand:V2HI 2 "register_operand" " r")
++				(parallel [(const_int 1)])))
++	      (sign_extend:SI (vec_select:HI
++				(match_operand:V2HI 3 "register_operand" " r")
++				(parallel [(const_int 1)]))))
++	    (mult:SI
++	      (sign_extend:SI (vec_select:HI
++				(match_dup 2)
++				(parallel [(const_int 0)])))
++	      (sign_extend:SI (vec_select:HI
++				(match_dup 3)
++				(parallel [(const_int 0)])))))))]
++  "NDS32_EXT_DSP_P ()"
++  "kmads\t%0, %2, %3"
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++(define_insn "kmadrs"
++  [(set (match_operand:SI 0 "register_operand"                           "=r")
++	(ss_plus:SI
++	  (match_operand:SI 1 "register_operand"                         " 0")
++	  (ss_minus:SI
++	    (mult:SI
++	      (sign_extend:SI (vec_select:HI
++				(match_operand:V2HI 2 "register_operand" " r")
++				(parallel [(const_int 0)])))
++	      (sign_extend:SI (vec_select:HI
++				(match_operand:V2HI 3 "register_operand" " r")
++				(parallel [(const_int 0)]))))
++	    (mult:SI
++	      (sign_extend:SI (vec_select:HI
++				(match_dup 2)
++				(parallel [(const_int 1)])))
++	      (sign_extend:SI (vec_select:HI
++				(match_dup 3)
++				(parallel [(const_int 1)])))))))]
++  "NDS32_EXT_DSP_P ()"
++  "kmadrs\t%0, %2, %3"
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++(define_insn "kmaxds"
++  [(set (match_operand:SI 0 "register_operand"                           "=r")
++	(ss_plus:SI
++	  (match_operand:SI 1 "register_operand"                         " 0")
++	  (ss_minus:SI
++	    (mult:SI
++	      (sign_extend:SI (vec_select:HI
++				(match_operand:V2HI 2 "register_operand" " r")
++				(parallel [(const_int 1)])))
++	      (sign_extend:SI (vec_select:HI
++				(match_operand:V2HI 3 "register_operand" " r")
++				(parallel [(const_int 0)]))))
++	    (mult:SI
++	      (sign_extend:SI (vec_select:HI
++				(match_dup 2)
++				(parallel [(const_int 0)])))
++	      (sign_extend:SI (vec_select:HI
++				(match_dup 3)
++				(parallel [(const_int 1)])))))))]
++  "NDS32_EXT_DSP_P ()"
++  "kmaxds\t%0, %2, %3"
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++(define_insn "kmsda"
++  [(set (match_operand:SI 0 "register_operand"                           "=r")
++	(ss_minus:SI
++	  (match_operand:SI 1 "register_operand"                         " 0")
++	  (ss_minus:SI
++	    (mult:SI
++	      (sign_extend:SI (vec_select:HI
++				(match_operand:V2HI 2 "register_operand" " r")
++				(parallel [(const_int 1)])))
++	      (sign_extend:SI (vec_select:HI
++				(match_operand:V2HI 3 "register_operand" " r")
++				(parallel [(const_int 1)]))))
++	    (mult:SI
++	      (sign_extend:SI (vec_select:HI
++				(match_dup 2)
++				(parallel [(const_int 0)])))
++	      (sign_extend:SI (vec_select:HI
++				(match_dup 3)
++				(parallel [(const_int 0)])))))))]
++  "NDS32_EXT_DSP_P ()"
++  "kmsda\t%0, %2, %3"
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++(define_insn "kmsxda"
++  [(set (match_operand:SI 0 "register_operand"                           "=r")
++	(ss_minus:SI
++	  (match_operand:SI 1 "register_operand"                         " 0")
++	  (ss_minus:SI
++	    (mult:SI
++	      (sign_extend:SI (vec_select:HI
++				(match_operand:V2HI 2 "register_operand" " r")
++				(parallel [(const_int 1)])))
++	      (sign_extend:SI (vec_select:HI
++				(match_operand:V2HI 3 "register_operand" " r")
++				(parallel [(const_int 0)]))))
++	    (mult:SI
++	      (sign_extend:SI (vec_select:HI
++				(match_dup 2)
++				(parallel [(const_int 0)])))
++	      (sign_extend:SI (vec_select:HI
++				(match_dup 3)
++				(parallel [(const_int 1)])))))))]
++  "NDS32_EXT_DSP_P ()"
++  "kmsxda\t%0, %2, %3"
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++;; smax[8|16] and umax[8|16]
++(define_insn "<opcode><mode>3"
++  [(set (match_operand:VQIHI 0 "register_operand"               "=r")
++	(sumax:VQIHI (match_operand:VQIHI 1 "register_operand" " r")
++		     (match_operand:VQIHI 2 "register_operand" " r")))]
++  "NDS32_EXT_DSP_P ()"
++  "<opcode><bits>\t%0, %1, %2"
++  [(set_attr "type"   "dalu")
++   (set_attr "length" "4")])
++
++;; smin[8|16] and umin[8|16]
++(define_insn "<opcode><mode>3"
++  [(set (match_operand:VQIHI 0 "register_operand"              "=r")
++	(sumin:VQIHI (match_operand:VQIHI 1 "register_operand" " r")
++		     (match_operand:VQIHI 2 "register_operand" " r")))]
++  "NDS32_EXT_DSP_P ()"
++  "<opcode><bits>\t%0, %1, %2"
++  [(set_attr "type"   "dalu")
++   (set_attr "length" "4")])
++
++(define_insn "<opcode><mode>3_bb"
++  [(set (match_operand:<VELT> 0 "register_operand"                    "=r")
++	(sumin_max:<VELT> (vec_select:<VELT>
++			    (match_operand:VQIHI 1 "register_operand" " r")
++			    (parallel [(const_int 0)]))
++			  (vec_select:<VELT>
++			    (match_operand:VQIHI 2 "register_operand" " r")
++			    (parallel [(const_int 0)]))))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "<opcode><bits>\t%0, %1, %2"
++  [(set_attr "type"   "dalu")
++   (set_attr "length" "4")])
++
++(define_insn_and_split "<opcode><mode>3_tt"
++  [(set (match_operand:<VELT> 0 "register_operand"                    "=r")
++	(sumin_max:<VELT> (vec_select:<VELT>
++			    (match_operand:VQIHI 1 "register_operand" " r")
++			    (parallel [(const_int 1)]))
++			  (vec_select:<VELT>
++			    (match_operand:VQIHI 2 "register_operand" " r")
++			    (parallel [(const_int 1)]))))]
++  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
++  "#"
++  "NDS32_EXT_DSP_P () && !reload_completed"
++  [(const_int 0)]
++{
++  rtx tmp = gen_reg_rtx (<MODE>mode);
++  emit_insn (gen_<opcode><mode>3 (tmp, operands[1], operands[2]));
++  emit_insn (gen_rotr<mode>_1 (tmp, tmp));
++  emit_move_insn (operands[0], simplify_gen_subreg (<VELT>mode, tmp, <MODE>mode, 0));
++  DONE;
++}
++  [(set_attr "type"   "dalu")
++   (set_attr "length" "4")])
++
++(define_insn_and_split "<opcode>v4qi3_22"
++  [(set (match_operand:QI 0 "register_operand"                   "=r")
++	(sumin_max:QI (vec_select:QI
++			(match_operand:V4QI 1 "register_operand" " r")
++			(parallel [(const_int 2)]))
++		      (vec_select:QI
++			(match_operand:V4QI 2 "register_operand" " r")
++			(parallel [(const_int 2)]))))]
++  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
++  "#"
++  "NDS32_EXT_DSP_P () && !reload_completed"
++  [(const_int 0)]
++{
++  rtx tmp = gen_reg_rtx (V4QImode);
++  emit_insn (gen_<opcode>v4qi3 (tmp, operands[1], operands[2]));
++  emit_insn (gen_rotrv4qi_2 (tmp, tmp));
++  emit_move_insn (operands[0], simplify_gen_subreg (QImode, tmp, V4QImode, 0));
++  DONE;
++}
++  [(set_attr "type"   "dalu")
++   (set_attr "length" "4")])
++
++(define_insn_and_split "<opcode>v4qi3_33"
++  [(set (match_operand:QI 0 "register_operand"                   "=r")
++	(sumin_max:QI (vec_select:QI
++			(match_operand:V4QI 1 "register_operand" " r")
++			(parallel [(const_int 3)]))
++		      (vec_select:QI
++			(match_operand:V4QI 2 "register_operand" " r")
++			(parallel [(const_int 3)]))))]
++  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
++  "#"
++  "NDS32_EXT_DSP_P () && !reload_completed"
++  [(const_int 0)]
++{
++  rtx tmp = gen_reg_rtx (V4QImode);
++  emit_insn (gen_<opcode>v4qi3 (tmp, operands[1], operands[2]));
++  emit_insn (gen_rotrv4qi_3 (tmp, tmp));
++  emit_move_insn (operands[0], simplify_gen_subreg (QImode, tmp, V4QImode, 0));
++  DONE;
++}
++  [(set_attr "type"   "dalu")
++   (set_attr "length" "4")])
++
++(define_insn_and_split "<opcode>v2hi3_bbtt"
++  [(set (match_operand:V2HI 0 "register_operand"                         "=r")
++	(vec_merge:V2HI
++	  (vec_duplicate:V2HI
++	    (sumin_max:HI (vec_select:HI
++			    (match_operand:V2HI 1 "register_operand" " r")
++			    (parallel [(const_int 1)]))
++			  (vec_select:HI
++			    (match_operand:V2HI 2 "register_operand" " r")
++			    (parallel [(const_int 1)]))))
++	  (vec_duplicate:V2HI
++	    (sumin_max:HI (vec_select:HI
++			    (match_dup:V2HI 1)
++			    (parallel [(const_int 0)]))
++			  (vec_select:HI
++			    (match_dup:HI 2)
++			    (parallel [(const_int 0)]))))
++	  (const_int 2)))]
++  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
++  "#"
++  "NDS32_EXT_DSP_P ()"
++  [(const_int 0)]
++{
++  emit_insn (gen_<opcode>v2hi3 (operands[0], operands[1], operands[2]));
++  DONE;
++}
++  [(set_attr "type"   "dalu")
++   (set_attr "length" "4")])
++
++(define_expand "abs<mode>2"
++  [(set (match_operand:VQIHI 0 "register_operand"                "=r")
++	(ss_abs:VQIHI (match_operand:VQIHI 1 "register_operand" " r")))]
++  "NDS32_EXT_DSP_P () && TARGET_HW_ABS && !flag_wrapv"
++{
++})
++
++(define_insn "kabs<mode>2"
++  [(set (match_operand:VQIHI 0 "register_operand"                "=r")
++	(ss_abs:VQIHI (match_operand:VQIHI 1 "register_operand" " r")))]
++  "NDS32_EXT_DSP_P ()"
++  "kabs<bits>\t%0, %1"
++  [(set_attr "type"   "dalu")
++   (set_attr "length" "4")])
++
++(define_insn "<su>mar64_1"
++  [(set (match_operand:DI 0 "register_operand"       "=r")
++	(plus:DI
++	  (match_operand:DI 1 "register_operand"     " 0")
++	  (mult:DI
++	    (extend:DI
++	      (match_operand:SI 2 "register_operand" " r"))
++	    (extend:DI
++	      (match_operand:SI 3 "register_operand" " r")))))]
++  "NDS32_EXT_DSP_P ()"
++  "<su>mar64\t%0, %2, %3"
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++(define_insn "<su>mar64_2"
++  [(set (match_operand:DI 0 "register_operand"       "=r")
++	(plus:DI
++	  (mult:DI
++	    (extend:DI
++	      (match_operand:SI 2 "register_operand" " r"))
++	    (extend:DI
++	      (match_operand:SI 3 "register_operand" " r")))
++	  (match_operand:DI 1 "register_operand"     " 0")))]
++  "NDS32_EXT_DSP_P ()"
++  "<su>mar64\t%0, %2, %3"
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++(define_insn "<su>mar64_3"
++  [(set (match_operand:DI 0 "register_operand"       "=r")
++	(plus:DI
++	  (match_operand:DI 1 "register_operand"     " 0")
++	  (extend:DI
++	    (mult:SI
++	      (match_operand:SI 2 "register_operand" " r")
++	      (match_operand:SI 3 "register_operand" " r")))))]
++  "NDS32_EXT_DSP_P ()"
++  "<su>mar64\t%0, %2, %3"
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++(define_insn "<su>mar64_4"
++  [(set (match_operand:DI 0 "register_operand"       "=r")
++	(plus:DI
++	  (extend:DI
++	  (mult:SI
++	      (match_operand:SI 2 "register_operand" " r")
++	      (match_operand:SI 3 "register_operand" " r")))
++	  (match_operand:DI 1 "register_operand"     " 0")))]
++  "NDS32_EXT_DSP_P ()"
++  "<su>mar64\t%0, %2, %3"
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++(define_insn "<su>msr64"
++  [(set (match_operand:DI 0 "register_operand"       "=r")
++	(minus:DI
++	  (match_operand:DI 1 "register_operand"     " 0")
++	  (mult:DI
++	    (extend:DI
++	      (match_operand:SI 2 "register_operand" " r"))
++	    (extend:DI
++	      (match_operand:SI 3 "register_operand" " r")))))]
++  "NDS32_EXT_DSP_P ()"
++  "<su>msr64\t%0, %2, %3"
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++(define_insn "<su>msr64_2"
++  [(set (match_operand:DI 0 "register_operand"       "=r")
++	(minus:DI
++	  (match_operand:DI 1 "register_operand"     " 0")
++	  (extend:DI
++	    (mult:SI
++	      (match_operand:SI 2 "register_operand" " r")
++	      (match_operand:SI 3 "register_operand" " r")))))]
++  "NDS32_EXT_DSP_P ()"
++  "<su>msr64\t%0, %2, %3"
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++;; kmar64, kmsr64, ukmar64 and ukmsr64
++(define_insn "kmar64_1"
++  [(set (match_operand:DI 0 "register_operand"       "=r")
++	(ss_plus:DI
++	  (match_operand:DI 1 "register_operand"     " 0")
++	  (mult:DI
++	    (sign_extend:DI
++	      (match_operand:SI 2 "register_operand" " r"))
++	    (sign_extend:DI
++	      (match_operand:SI 3 "register_operand" " r")))))]
++  "NDS32_EXT_DSP_P ()"
++  "kmar64\t%0, %2, %3"
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++(define_insn "kmar64_2"
++  [(set (match_operand:DI 0 "register_operand"       "=r")
++	(ss_plus:DI
++	  (mult:DI
++	    (sign_extend:DI
++	      (match_operand:SI 2 "register_operand" " r"))
++	    (sign_extend:DI
++	      (match_operand:SI 3 "register_operand" " r")))
++	  (match_operand:DI 1 "register_operand"     " 0")))]
++  "NDS32_EXT_DSP_P ()"
++  "kmar64\t%0, %2, %3"
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++(define_insn "kmsr64"
++  [(set (match_operand:DI 0 "register_operand"       "=r")
++	(ss_minus:DI
++	  (match_operand:DI 1 "register_operand"     " 0")
++	  (mult:DI
++	    (sign_extend:DI
++	      (match_operand:SI 2 "register_operand" " r"))
++	    (sign_extend:DI
++	      (match_operand:SI 3 "register_operand" " r")))))]
++  "NDS32_EXT_DSP_P ()"
++  "kmsr64\t%0, %2, %3"
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++(define_insn "ukmar64_1"
++  [(set (match_operand:DI 0 "register_operand"       "=r")
++	(us_plus:DI
++	  (match_operand:DI 1 "register_operand"     " 0")
++	  (mult:DI
++	    (zero_extend:DI
++	      (match_operand:SI 2 "register_operand" " r"))
++	    (zero_extend:DI
++	      (match_operand:SI 3 "register_operand" " r")))))]
++  "NDS32_EXT_DSP_P ()"
++  "ukmar64\t%0, %2, %3"
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++(define_insn "ukmar64_2"
++  [(set (match_operand:DI 0 "register_operand"       "=r")
++	(us_plus:DI
++	  (mult:DI
++	    (zero_extend:DI
++	      (match_operand:SI 2 "register_operand" " r"))
++	    (zero_extend:DI
++	      (match_operand:SI 3 "register_operand" " r")))
++	  (match_operand:DI 1 "register_operand"     " 0")))]
++  "NDS32_EXT_DSP_P ()"
++  "ukmar64\t%0, %2, %3"
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++(define_insn "ukmsr64"
++  [(set (match_operand:DI 0 "register_operand"       "=r")
++	(us_minus:DI
++	  (match_operand:DI 1 "register_operand"     " 0")
++	  (mult:DI
++	    (zero_extend:DI
++	      (match_operand:SI 2 "register_operand" " r"))
++	    (zero_extend:DI
++	      (match_operand:SI 3 "register_operand" " r")))))]
++  "NDS32_EXT_DSP_P ()"
++  "ukmsr64\t%0, %2, %3"
++  [(set_attr "type"     "dmac")
++   (set_attr "length"   "4")])
++
++(define_insn "bpick1"
++  [(set (match_operand:SI 0 "register_operand"       "=r")
++	  (ior:SI
++	    (and:SI
++	      (match_operand:SI 1 "register_operand" " r")
++	      (match_operand:SI 3 "register_operand" " r"))
++	    (and:SI
++	      (match_operand:SI 2 "register_operand" " r")
++	      (not:SI (match_dup 3)))))]
++  "NDS32_EXT_DSP_P ()"
++  "bpick\t%0, %1, %2, %3"
++  [(set_attr "type"     "dbpick")
++   (set_attr "length"   "4")])
++
++(define_insn "bpick2"
++  [(set (match_operand:SI 0 "register_operand"       "=r")
++	  (ior:SI
++	    (and:SI
++	      (match_operand:SI 1 "register_operand" " r")
++	      (match_operand:SI 2 "register_operand" " r"))
++	    (and:SI
++	      (not:SI (match_dup 2))
++	      (match_operand:SI 3 "register_operand" " r"))))]
++  "NDS32_EXT_DSP_P ()"
++  "bpick\t%0, %1, %3, %2"
++  [(set_attr "type"     "dbpick")
++   (set_attr "length"   "4")])
++
++(define_insn "bpick3"
++  [(set (match_operand:SI 0 "register_operand"       "=r")
++	  (ior:SI
++	    (and:SI
++	      (match_operand:SI 1 "register_operand" " r")
++	      (match_operand:SI 2 "register_operand" " r"))
++	    (and:SI
++	      (match_operand:SI 3 "register_operand" " r")
++	      (not:SI (match_dup 1)))))]
++  "NDS32_EXT_DSP_P ()"
++  "bpick\t%0, %2, %3, %1"
++  [(set_attr "type"     "dbpick")
++   (set_attr "length"   "4")])
++
++(define_insn "bpick4"
++  [(set (match_operand:SI 0 "register_operand"       "=r")
++	  (ior:SI
++	    (and:SI
++	      (match_operand:SI 1 "register_operand" " r")
++	      (match_operand:SI 2 "register_operand" " r"))
++	    (and:SI
++	      (not:SI (match_dup 1))
++	      (match_operand:SI 3 "register_operand" " r"))))]
++  "NDS32_EXT_DSP_P ()"
++  "bpick\t%0, %2, %3, %1"
++  [(set_attr "type"     "dbpick")
++   (set_attr "length"   "4")])
++
++(define_insn "bpick5"
++  [(set (match_operand:SI 0 "register_operand"               "=r")
++	  (ior:SI
++	    (and:SI
++	      (match_operand:SI 1 "register_operand"         " r")
++	      (not:SI (match_operand:SI 2 "register_operand" " r")))
++	    (and:SI
++	      (match_operand:SI 3 "register_operand"         " r")
++	      (match_dup 2))))]
++  "NDS32_EXT_DSP_P ()"
++  "bpick\t%0, %3, %1, %2"
++  [(set_attr "type"     "dbpick")
++   (set_attr "length"   "4")])
++
++(define_insn "bpick6"
++  [(set (match_operand:SI 0 "register_operand"               "=r")
++	  (ior:SI
++	    (and:SI
++	      (not:SI (match_operand:SI 1 "register_operand" " r"))
++	      (match_operand:SI 2 "register_operand"         " r"))
++	    (and:SI
++	      (match_operand:SI 3 "register_operand" " r")
++	      (match_dup 1))))]
++  "NDS32_EXT_DSP_P ()"
++  "bpick\t%0, %3, %2, %1"
++  [(set_attr "type"     "dbpick")
++   (set_attr "length"   "4")])
++
++(define_insn "bpick7"
++  [(set (match_operand:SI 0 "register_operand"               "=r")
++	  (ior:SI
++	    (and:SI
++	      (match_operand:SI 1 "register_operand"         " r")
++	      (not:SI (match_operand:SI 2 "register_operand" " r")))
++	    (and:SI
++	      (match_dup 2)
++	      (match_operand:SI 3 "register_operand"         " r"))))]
++  "NDS32_EXT_DSP_P ()"
++  "bpick\t%0, %3, %1, %2"
++  [(set_attr "type"     "dbpick")
++   (set_attr "length"   "4")])
++
++(define_insn "bpick8"
++  [(set (match_operand:SI 0 "register_operand"               "=r")
++	  (ior:SI
++	    (and:SI
++	      (not:SI (match_operand:SI 1 "register_operand" " r"))
++	      (match_operand:SI 2 "register_operand"         " r"))
++	    (and:SI
++	      (match_dup 1)
++	      (match_operand:SI 3 "register_operand"         " r"))))]
++  "NDS32_EXT_DSP_P ()"
++  "bpick\t%0, %3, %2, %1"
++  [(set_attr "type"     "dbpick")
++   (set_attr "length"   "4")])
++
++(define_insn "sraiu"
++  [(set (match_operand:SI 0 "register_operand"                              "=   r, r")
++	(unspec:SI [(ashiftrt:SI (match_operand:SI 1 "register_operand"     "    r, r")
++				 (match_operand:SI 2 "nds32_rimm5u_operand" " Iu05, r"))]
++		    UNSPEC_ROUND))]
++  "NDS32_EXT_DSP_P ()"
++  "@
++   srai.u\t%0, %1, %2
++   sra.u\t%0, %1, %2"
++  [(set_attr "type"   "daluround")
++   (set_attr "length" "4")])
++
++(define_insn "kssl"
++  [(set (match_operand:SI 0 "register_operand"                   "=   r, r")
++	(ss_ashift:SI (match_operand:SI 1 "register_operand"     "    r, r")
++		      (match_operand:SI 2 "nds32_rimm5u_operand" " Iu05, r")))]
++  "NDS32_EXT_DSP_P ()"
++  "@
++   kslli\t%0, %1, %2
++   ksll\t%0, %1, %2"
++  [(set_attr "type"   "dalu")
++   (set_attr "length" "4")])
++
++(define_insn "kslraw_round"
++  [(set (match_operand:SI 0 "register_operand"                  "=r")
++	(if_then_else:SI
++	  (lt:SI (match_operand:SI 2 "register_operand"        " r")
++		 (const_int 0))
++	  (unspec:SI [(ashiftrt:SI (match_operand:SI 1 "register_operand" " r")
++				   (neg:SI (match_dup 2)))]
++		     UNSPEC_ROUND)
++	  (ss_ashift:SI (match_dup 1)
++			(match_dup 2))))]
++  "NDS32_EXT_DSP_P ()"
++  "kslraw.u\t%0, %1, %2"
++  [(set_attr "type"    "daluround")
++   (set_attr "length"  "4")])
++
++(define_insn_and_split "<shift>di3"
++  [(set (match_operand:DI 0 "register_operand" "")
++	(shift_rotate:DI (match_operand:DI 1 "register_operand" "")
++			 (match_operand:SI 2 "nds32_rimm6u_operand" "")))]
++  "NDS32_EXT_DSP_P () && !reload_completed"
++  "#"
++  "NDS32_EXT_DSP_P () && !reload_completed"
++  [(const_int 0)]
++{
++  if (REGNO (operands[0]) == REGNO (operands[1]))
++    {
++      rtx tmp = gen_reg_rtx (DImode);
++      nds32_split_<code>di3 (tmp, operands[1], operands[2]);
++      emit_move_insn (operands[0], tmp);
++    }
++  else
++    nds32_split_<code>di3 (operands[0], operands[1], operands[2]);
++  DONE;
++})
++
++(define_insn "sclip32"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
++		    (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_CLIPS_OV))]
++  "NDS32_EXT_DSP_P ()"
++  "sclip32\t%0, %1, %2"
++  [(set_attr "type"   "dclip")
++   (set_attr "length" "4")]
++)
++
++(define_insn "uclip32"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
++		    (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_CLIP_OV))]
++  "NDS32_EXT_DSP_P ()"
++  "uclip32\t%0, %1, %2"
++  [(set_attr "type"   "dclip")
++   (set_attr "length" "4")]
++)
++
++(define_insn "bitrev"
++  [(set (match_operand:SI 0 "register_operand"                 "=r,    r")
++	(unspec:SI [(match_operand:SI 1 "register_operand"     " r,    r")
++		    (match_operand:SI 2 "nds32_rimm5u_operand" " r, Iu05")]
++		   UNSPEC_BITREV))]
++  ""
++  "@
++   bitrev\t%0, %1, %2
++   bitrevi\t%0, %1, %2"
++  [(set_attr "type"   "dalu")
++   (set_attr "length" "4")]
++)
++
++;; wext, wexti
++(define_insn "<su>wext"
++  [(set (match_operand:SI 0 "register_operand"                "=r,   r")
++	(truncate:SI
++	  (shiftrt:DI
++	    (match_operand:DI 1 "register_operand"            " r,   r")
++	    (match_operand:SI 2 "nds32_rimm5u_operand"        " r,Iu05"))))]
++  "NDS32_EXT_DSP_P ()"
++  "@
++   wext\t%0, %1, %2
++   wexti\t%0, %1, %2"
++  [(set_attr "type"     "dwext")
++   (set_attr "length"   "4")])
++
++;; 32-bit add/sub instruction: raddw and rsubw.
++(define_insn "r<opcode>si3"
++  [(set (match_operand:SI 0 "register_operand"                       "=r")
++	(truncate:SI
++	  (ashiftrt:DI
++	    (plus_minus:DI
++	      (sign_extend:DI (match_operand:SI 1 "register_operand" " r"))
++	      (sign_extend:DI (match_operand:SI 2 "register_operand" " r")))
++	    (const_int 1))))]
++  "NDS32_EXT_DSP_P ()"
++  "r<opcode>w\t%0, %1, %2"
++  [(set_attr "type"    "dalu")
++   (set_attr "length"  "4")])
++
++;; 32-bit add/sub instruction: uraddw and ursubw.
++(define_insn "ur<opcode>si3"
++  [(set (match_operand:SI 0 "register_operand"                       "=r")
++	(truncate:SI
++	  (lshiftrt:DI
++	    (plus_minus:DI
++	      (zero_extend:DI (match_operand:SI 1 "register_operand" " r"))
++	      (zero_extend:DI (match_operand:SI 2 "register_operand" " r")))
++	    (const_int 1))))]
++  "NDS32_EXT_DSP_P ()"
++  "ur<opcode>w\t%0, %1, %2"
++  [(set_attr "type"    "dalu")
++   (set_attr "length"  "4")])
+diff --git a/gcc/config/nds32/nds32-e8.md b/gcc/config/nds32/nds32-e8.md
+new file mode 100644
+index 0000000..1f24b5c
+--- /dev/null
++++ b/gcc/config/nds32/nds32-e8.md
+@@ -0,0 +1,329 @@
++;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler
++;; Copyright (C) 2012-2016 Free Software Foundation, Inc.
++;; Contributed by Andes Technology Corporation.
++;;
++;; This file is part of GCC.
++;;
++;; GCC is free software; you can redistribute it and/or modify it
++;; under the terms of the GNU General Public License as published
++;; by the Free Software Foundation; either version 3, or (at your
++;; option) any later version.
++;;
++;; GCC is distributed in the hope that it will be useful, but WITHOUT
++;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++;; License for more details.
++;;
++;; You should have received a copy of the GNU General Public License
++;; along with GCC; see the file COPYING3.  If not see
++;; <http://www.gnu.org/licenses/>.
++
++
++;; ------------------------------------------------------------------------
++;; Define E8 pipeline settings.
++;; ------------------------------------------------------------------------
++
++(define_automaton "nds32_e8_machine")
++
++;; ------------------------------------------------------------------------
++;; Pipeline Stages
++;; ------------------------------------------------------------------------
++;; IF - Instruction Fetch
++;; II - Instruction Issue / Address Generation
++;; EX - Instruction Execution
++;; EXD - Psuedo Stage / Load Data Completion
++
++(define_cpu_unit "e8_ii" "nds32_e8_machine")
++(define_cpu_unit "e8_ex" "nds32_e8_machine")
++
++(define_insn_reservation "nds_e8_unknown" 1
++  (and (eq_attr "type" "unknown")
++       (eq_attr "pipeline_model" "e8"))
++  "e8_ii, e8_ex")
++
++(define_insn_reservation "nds_e8_misc" 1
++  (and (eq_attr "type" "misc")
++       (eq_attr "pipeline_model" "e8"))
++  "e8_ii, e8_ex")
++
++(define_insn_reservation "nds_e8_alu" 1
++  (and (eq_attr "type" "alu")
++       (eq_attr "pipeline_model" "e8"))
++  "e8_ii, e8_ex")
++
++(define_insn_reservation "nds_e8_load" 1
++  (and (match_test "nds32::load_single_p (insn)")
++       (eq_attr "pipeline_model" "e8"))
++  "e8_ii, e8_ex")
++
++(define_insn_reservation "nds_e8_store" 1
++  (and (match_test "nds32::store_single_p (insn)")
++       (eq_attr "pipeline_model" "e8"))
++  "e8_ii, e8_ex")
++
++(define_insn_reservation "nds_e8_load_multiple_1" 1
++  (and (and (eq_attr "type" "load_multiple")
++	    (eq_attr "combo" "1"))
++       (eq_attr "pipeline_model" "e8"))
++  "e8_ii, e8_ex")
++
++(define_insn_reservation "nds_e8_load_multiple_2" 1
++  (and (ior (and (eq_attr "type" "load_multiple")
++		 (eq_attr "combo" "2"))
++	    (match_test "nds32::load_double_p (insn)"))
++       (eq_attr "pipeline_model" "e8"))
++  "e8_ii, e8_ii+e8_ex, e8_ex")
++
++(define_insn_reservation "nds_e8_load_multiple_3" 1
++  (and (and (eq_attr "type" "load_multiple")
++	    (eq_attr "combo" "3"))
++       (eq_attr "pipeline_model" "e8"))
++  "e8_ii, (e8_ii+e8_ex)*2, e8_ex")
++
++(define_insn_reservation "nds_e8_load_multiple_4" 1
++  (and (and (eq_attr "type" "load_multiple")
++	    (eq_attr "combo" "4"))
++       (eq_attr "pipeline_model" "e8"))
++  "e8_ii, (e8_ii+e8_ex)*3, e8_ex")
++
++(define_insn_reservation "nds_e8_load_multiple_5" 1
++  (and (and (eq_attr "type" "load_multiple")
++	    (eq_attr "combo" "5"))
++       (eq_attr "pipeline_model" "e8"))
++  "e8_ii, (e8_ii+e8_ex)*4, e8_ex")
++
++(define_insn_reservation "nds_e8_load_multiple_6" 1
++  (and (and (eq_attr "type" "load_multiple")
++	    (eq_attr "combo" "6"))
++       (eq_attr "pipeline_model" "e8"))
++  "e8_ii, (e8_ii+e8_ex)*5, e8_ex")
++
++(define_insn_reservation "nds_e8_load_multiple_7" 1
++  (and (and (eq_attr "type" "load_multiple")
++	    (eq_attr "combo" "7"))
++       (eq_attr "pipeline_model" "e8"))
++  "e8_ii, (e8_ii+e8_ex)*6, e8_ex")
++
++(define_insn_reservation "nds_e8_load_multiple_8" 1
++  (and (and (eq_attr "type" "load_multiple")
++	    (eq_attr "combo" "8"))
++       (eq_attr "pipeline_model" "e8"))
++  "e8_ii, (e8_ii+e8_ex)*7, e8_ex")
++
++(define_insn_reservation "nds_e8_load_multiple_12" 1
++  (and (and (eq_attr "type" "load_multiple")
++	    (eq_attr "combo" "12"))
++       (eq_attr "pipeline_model" "e8"))
++  "e8_ii, (e8_ii+e8_ex)*11, e8_ex")
++
++(define_insn_reservation "nds_e8_store_multiple_1" 1
++  (and (and (eq_attr "type" "store_multiple")
++	    (eq_attr "combo" "1"))
++       (eq_attr "pipeline_model" "e8"))
++  "e8_ii, e8_ex")
++
++(define_insn_reservation "nds_e8_store_multiple_2" 1
++  (and (ior (and (eq_attr "type" "store_multiple")
++		 (eq_attr "combo" "2"))
++	    (match_test "nds32::store_double_p (insn)"))
++       (eq_attr "pipeline_model" "e8"))
++  "e8_ii, e8_ii+e8_ex, e8_ex")
++
++(define_insn_reservation "nds_e8_store_multiple_3" 1
++  (and (and (eq_attr "type" "store_multiple")
++	    (eq_attr "combo" "3"))
++       (eq_attr "pipeline_model" "e8"))
++  "e8_ii, (e8_ii+e8_ex)*2, e8_ex")
++
++(define_insn_reservation "nds_e8_store_multiple_4" 1
++  (and (and (eq_attr "type" "store_multiple")
++	    (eq_attr "combo" "4"))
++       (eq_attr "pipeline_model" "e8"))
++  "e8_ii, (e8_ii+e8_ex)*3, e8_ex")
++
++(define_insn_reservation "nds_e8_store_multiple_5" 1
++  (and (and (eq_attr "type" "store_multiple")
++	    (eq_attr "combo" "5"))
++       (eq_attr "pipeline_model" "e8"))
++  "e8_ii, (e8_ii+e8_ex)*4, e8_ex")
++
++(define_insn_reservation "nds_e8_store_multiple_6" 1
++  (and (and (eq_attr "type" "store_multiple")
++	    (eq_attr "combo" "6"))
++       (eq_attr "pipeline_model" "e8"))
++  "e8_ii, (e8_ii+e8_ex)*5, e8_ex")
++
++(define_insn_reservation "nds_e8_store_multiple_7" 1
++  (and (and (eq_attr "type" "store_multiple")
++	    (eq_attr "combo" "7"))
++       (eq_attr "pipeline_model" "e8"))
++  "e8_ii, (e8_ii+e8_ex)*6, e8_ex")
++
++(define_insn_reservation "nds_e8_store_multiple_8" 1
++  (and (and (eq_attr "type" "store_multiple")
++	    (eq_attr "combo" "8"))
++       (eq_attr "pipeline_model" "e8"))
++  "e8_ii, (e8_ii+e8_ex)*7, e8_ex")
++
++(define_insn_reservation "nds_e8_store_multiple_12" 1
++  (and (and (eq_attr "type" "store_multiple")
++	    (eq_attr "combo" "12"))
++       (eq_attr "pipeline_model" "e8"))
++  "e8_ii, (e8_ii+e8_ex)*11, e8_ex")
++
++(define_insn_reservation "nds_e8_mul_fast" 1
++  (and (match_test "nds32_mul_config != MUL_TYPE_SLOW")
++       (and (eq_attr "type" "mul")
++	    (eq_attr "pipeline_model" "e8")))
++  "e8_ii, e8_ex")
++
++(define_insn_reservation "nds_e8_mul_slow" 1
++  (and (match_test "nds32_mul_config == MUL_TYPE_SLOW")
++       (and (eq_attr "type" "mul")
++	    (eq_attr "pipeline_model" "e8")))
++  "e8_ii, e8_ex*16")
++
++(define_insn_reservation "nds_e8_mac_fast" 1
++  (and (match_test "nds32_mul_config != MUL_TYPE_SLOW")
++       (and (eq_attr "type" "mac")
++	    (eq_attr "pipeline_model" "e8")))
++  "e8_ii, e8_ii+e8_ex, e8_ex")
++
++(define_insn_reservation "nds_e8_mac_slow" 1
++  (and (match_test "nds32_mul_config == MUL_TYPE_SLOW")
++       (and (eq_attr "type" "mac")
++	    (eq_attr "pipeline_model" "e8")))
++  "e8_ii, (e8_ii+e8_ex)*16, e8_ex")
++
++(define_insn_reservation "nds_e8_div" 1
++  (and (eq_attr "type" "div")
++       (eq_attr "pipeline_model" "e8"))
++  "e8_ii, (e8_ii+e8_ex)*36, e8_ex")
++
++(define_insn_reservation "nds_e8_branch" 1
++  (and (eq_attr "type" "branch")
++       (eq_attr "pipeline_model" "e8"))
++  "e8_ii, e8_ex")
++
++;; ------------------------------------------------------------------------
++;; Comment Notations and Bypass Rules
++;; ------------------------------------------------------------------------
++;; Producers (LHS)
++;;   LD
++;;     Load data from the memory and produce the loaded data. The result is
++;;     ready at EXD.
++;;   LMW(N, M)
++;;     There are N micro-operations within an instruction that loads multiple
++;;     words. The result produced by the M-th micro-operation is sent to
++;;     consumers. The result is ready at EXD.
++;;   ADDR_OUT
++;;     Most load/store instructions can produce an address output if updating
++;;     the base register is required. The result is ready at EX, which is
++;;     produced by ALU.
++;;   ALU, MOVD44, MUL, MAC
++;;     The result is ready at EX.
++;;   DIV_Rs
++;;     A division instruction saves the quotient result to Rt and saves the
++;;     remainder result to Rs. The instruction is separated into two micro-
++;;     operations. The first micro-operation writes to Rt, and the seconde
++;;     one writes to Rs. Each of the results is ready at EX.
++;;
++;; Consumers (RHS)
++;;   ALU, MUL, DIV
++;;     Require operands at EX.
++;;   ADDR_IN_MOP(N)
++;;      N denotes the address input is required by the N-th micro-operation.
++;;      Such operand is required at II.
++;;   ST
++;;     A store instruction requires its data at EX.
++;;   SMW(N, M)
++;;     There are N micro-operations within an instruction that stores multiple
++;;     words. Each M-th micro-operation requires its data at EX.
++;;   BR_COND
++;;     If a branch instruction is conditional, its input data is required at EX.
++
++;; LD -> ADDR_IN_MOP(1)
++(define_bypass 2
++  "nds_e8_load"
++  "nds_e8_branch,\
++   nds_e8_load, nds_e8_store,\
++   nds_e8_load_multiple_1,nds_e8_load_multiple_2, nds_e8_load_multiple_3,\
++   nds_e8_load_multiple_4,nds_e8_load_multiple_5, nds_e8_load_multiple_6,\
++   nds_e8_load_multiple_7,nds_e8_load_multiple_8, nds_e8_load_multiple_12,\
++   nds_e8_store_multiple_1,nds_e8_store_multiple_2, nds_e8_store_multiple_3,\
++   nds_e8_store_multiple_4,nds_e8_store_multiple_5, nds_e8_store_multiple_6,\
++   nds_e8_store_multiple_7,nds_e8_store_multiple_8, nds_e8_store_multiple_12"
++  "nds32_e8_load_to_ii_p"
++)
++
++;; LD -> ALU, MUL, MAC, DIV, BR_COND, ST, SMW(N, 1)
++(define_bypass 2
++  "nds_e8_load"
++  "nds_e8_alu,
++   nds_e8_mul_fast, nds_e8_mul_slow,\
++   nds_e8_mac_fast, nds_e8_mac_slow,\
++   nds_e8_div,\
++   nds_e8_branch,\
++   nds_e8_store,\
++   nds_e8_store_multiple_1,nds_e8_store_multiple_2, nds_e8_store_multiple_3,\
++   nds_e8_store_multiple_4,nds_e8_store_multiple_5, nds_e8_store_multiple_6,\
++   nds_e8_store_multiple_7,nds_e8_store_multiple_8, nds_e8_store_multiple_12"
++  "nds32_e8_load_to_ex_p"
++)
++
++;; ALU, MOVD44, MUL, MAC, DIV_Rs, LD_bi, ADDR_OUT -> ADDR_IN_MOP(1)
++(define_bypass 2
++  "nds_e8_alu,
++   nds_e8_mul_fast, nds_e8_mul_slow,\
++   nds_e8_mac_fast, nds_e8_mac_slow,\
++   nds_e8_div,\
++   nds_e8_load, nds_e8_store,\
++   nds_e8_load_multiple_1,nds_e8_load_multiple_2, nds_e8_load_multiple_3,\
++   nds_e8_load_multiple_4,nds_e8_load_multiple_5, nds_e8_load_multiple_6,\
++   nds_e8_load_multiple_7,nds_e8_load_multiple_8, nds_e8_load_multiple_12,\
++   nds_e8_store_multiple_1,nds_e8_store_multiple_2, nds_e8_store_multiple_3,\
++   nds_e8_store_multiple_4,nds_e8_store_multiple_5, nds_e8_store_multiple_6,\
++   nds_e8_store_multiple_7,nds_e8_store_multiple_8, nds_e8_store_multiple_12"
++  "nds_e8_branch,\
++   nds_e8_load, nds_e8_store,\
++   nds_e8_load_multiple_1,nds_e8_load_multiple_2, nds_e8_load_multiple_3,\
++   nds_e8_load_multiple_4,nds_e8_load_multiple_5, nds_e8_load_multiple_6,\
++   nds_e8_load_multiple_7,nds_e8_load_multiple_8, nds_e8_load_multiple_12,\
++   nds_e8_store_multiple_1,nds_e8_store_multiple_2, nds_e8_store_multiple_3,\
++   nds_e8_store_multiple_4,nds_e8_store_multiple_5, nds_e8_store_multiple_6,\
++   nds_e8_store_multiple_7,nds_e8_store_multiple_8, nds_e8_store_multiple_12"
++  "nds32_e8_ex_to_ii_p"
++)
++
++;; LMW(N, N) -> ADDR_IN_MOP(1)
++(define_bypass 2
++  "nds_e8_load_multiple_1,nds_e8_load_multiple_2, nds_e8_load_multiple_3,\
++   nds_e8_load_multiple_4,nds_e8_load_multiple_5, nds_e8_load_multiple_6,\
++   nds_e8_load_multiple_7,nds_e8_load_multiple_8, nds_e8_load_multiple_12"
++  "nds_e8_branch,\
++   nds_e8_load, nds_e8_store,\
++   nds_e8_load_multiple_1,nds_e8_load_multiple_2, nds_e8_load_multiple_3,\
++   nds_e8_load_multiple_4,nds_e8_load_multiple_5, nds_e8_load_multiple_6,\
++   nds_e8_load_multiple_7,nds_e8_load_multiple_8, nds_e8_load_multiple_12,\
++   nds_e8_store_multiple_1,nds_e8_store_multiple_2, nds_e8_store_multiple_3,\
++   nds_e8_store_multiple_4,nds_e8_store_multiple_5, nds_e8_store_multiple_6,\
++   nds_e8_store_multiple_7,nds_e8_store_multiple_8, nds_e8_store_multiple_12"
++  "nds32_e8_last_load_to_ii_p"
++)
++
++;; LMW(N, N) -> ALU, MUL, MAC, DIV, BR_COND, ST, SMW(N, 1)
++(define_bypass 2
++  "nds_e8_load_multiple_1,nds_e8_load_multiple_2, nds_e8_load_multiple_3,\
++   nds_e8_load_multiple_4,nds_e8_load_multiple_5, nds_e8_load_multiple_6,\
++   nds_e8_load_multiple_7,nds_e8_load_multiple_8, nds_e8_load_multiple_12"
++  "nds_e8_alu,
++   nds_e8_mul_fast, nds_e8_mul_slow,\
++   nds_e8_mac_fast, nds_e8_mac_slow,\
++   nds_e8_div,\
++   nds_e8_branch,\
++   nds_e8_store,\
++   nds_e8_store_multiple_1,nds_e8_store_multiple_2, nds_e8_store_multiple_3,\
++   nds_e8_store_multiple_4,nds_e8_store_multiple_5, nds_e8_store_multiple_6,\
++   nds_e8_store_multiple_7,nds_e8_store_multiple_8, nds_e8_store_multiple_12"
++  "nds32_e8_last_load_to_ex_p"
++)
+diff --git a/gcc/config/nds32/nds32-elf.opt b/gcc/config/nds32/nds32-elf.opt
+new file mode 100644
+index 0000000..afe6aad
+--- /dev/null
++++ b/gcc/config/nds32/nds32-elf.opt
+@@ -0,0 +1,16 @@
++mcmodel=
++Target RejectNegative Joined Enum(nds32_cmodel_type) Var(nds32_cmodel_option) Init(CMODEL_MEDIUM)
++Specify the address generation strategy for code model.
++
++Enum
++Name(nds32_cmodel_type) Type(enum nds32_cmodel_type)
++Known cmodel types (for use with the -mcmodel= option):
++
++EnumValue
++Enum(nds32_cmodel_type) String(small) Value(CMODEL_SMALL)
++
++EnumValue
++Enum(nds32_cmodel_type) String(medium) Value(CMODEL_MEDIUM)
++
++EnumValue
++Enum(nds32_cmodel_type) String(large) Value(CMODEL_LARGE)
+diff --git a/gcc/config/nds32/nds32-fp-as-gp.c b/gcc/config/nds32/nds32-fp-as-gp.c
+index f8b2738..6525915 100644
+--- a/gcc/config/nds32/nds32-fp-as-gp.c
++++ b/gcc/config/nds32/nds32-fp-as-gp.c
+@@ -1,4 +1,4 @@
+-/* The fp-as-gp pass of Andes NDS32 cpu for GNU compiler
++/* fp-as-gp pass of Andes NDS32 cpu for GNU compiler
+    Copyright (C) 2012-2016 Free Software Foundation, Inc.
+    Contributed by Andes Technology Corporation.
+ 
+@@ -24,19 +24,280 @@
+ #include "system.h"
+ #include "coretypes.h"
+ #include "backend.h"
++#include "tree.h"
++#include "rtl.h"
++#include "df.h"
++#include "alias.h"
++#include "stor-layout.h"
++#include "varasm.h"
++#include "calls.h"
++#include "regs.h"
++#include "insn-config.h"	/* Required by recog.h.  */
++#include "conditions.h"
++#include "output.h"
++#include "insn-attr.h"		/* For DFA state_t.  */
++#include "insn-codes.h"		/* For CODE_FOR_xxx.  */
++#include "reload.h"		/* For push_reload().  */
++#include "flags.h"
++#include "insn-config.h"
++#include "expmed.h"
++#include "dojump.h"
++#include "explow.h"
++#include "emit-rtl.h"
++#include "stmt.h"
++#include "expr.h"
++#include "recog.h"
++#include "diagnostic-core.h"
++#include "cfgrtl.h"
++#include "cfganal.h"
++#include "lcm.h"
++#include "cfgbuild.h"
++#include "cfgcleanup.h"
++#include "tm_p.h"
++#include "tm-constrs.h"
++#include "optabs.h"		/* For GEN_FCN.  */
++#include "target.h"
++#include "langhooks.h"		/* For add_builtin_function().  */
++#include "builtins.h"
++#include "ira.h"
++#include "ira-int.h"
++#include "tree-pass.h"
+ 
+ /* ------------------------------------------------------------------------ */
+ 
++/* A helper function to check if this function should contain prologue.  */
++static bool
++nds32_have_prologue_p (void)
++{
++  int i;
++
++  for (i = 0; i < 28; i++)
++    if (NDS32_REQUIRED_CALLEE_SAVED_P (i))
++      return true;
++
++  return (flag_pic
++	  || NDS32_REQUIRED_CALLEE_SAVED_P (FP_REGNUM)
++	  || NDS32_REQUIRED_CALLEE_SAVED_P (LP_REGNUM));
++}
++
++static int
++nds32_get_symbol_count (void)
++{
++  int symbol_count = 0;
++  rtx_insn *insn;
++  basic_block bb;
++
++  FOR_EACH_BB_FN (bb, cfun)
++    {
++      FOR_BB_INSNS (bb, insn)
++	{
++	  /* Counting the insn number which the addressing mode is symbol.  */
++	  if (single_set (insn) && nds32_symbol_load_store_p (insn))
++	    {
++	      rtx pattern = PATTERN (insn);
++	      rtx mem;
++	      gcc_assert (GET_CODE (pattern) == SET);
++	      if (GET_CODE (SET_SRC (pattern)) == REG )
++		mem = SET_DEST (pattern);
++	      else
++		mem = SET_SRC (pattern);
++
++	      /* We have only lwi37 and swi37 for fp-as-gp optimization,
++		 so don't count any other than SImode.
++		 MEM for QImode and HImode will wrap by ZERO_EXTEND
++		 or SIGN_EXTEND */
++	      if (GET_CODE (mem) == MEM)
++		symbol_count++;
++	    }
++	}
++    }
++
++  return symbol_count;
++}
++
+ /* Function to determine whether it is worth to do fp_as_gp optimization.
+-   Return 0: It is NOT worth to do fp_as_gp optimization.
+-   Return 1: It is APPROXIMATELY worth to do fp_as_gp optimization.
++   Return false: It is NOT worth to do fp_as_gp optimization.
++   Return true: It is APPROXIMATELY worth to do fp_as_gp optimization.
+    Note that if it is worth to do fp_as_gp optimization,
+    we MUST set FP_REGNUM ever live in this function.  */
+-int
++static bool
+ nds32_fp_as_gp_check_available (void)
+ {
+-  /* By default we return 0.  */
+-  return 0;
++  basic_block bb;
++  basic_block exit_bb;
++  edge_iterator ei;
++  edge e;
++  bool first_exit_blocks_p;
++
++  /* If there exists ANY of following conditions,
++     we DO NOT perform fp_as_gp optimization:
++       1. TARGET_FORBID_FP_AS_GP is set
++	  regardless of the TARGET_FORCE_FP_AS_GP.
++       2. User explicitly uses 'naked'/'no_prologue' attribute.
++	  We use nds32_naked_function_p() to help such checking.
++       3. Not optimize for size.
++       4. Need frame pointer.
++       5. If $fp is already required to be saved,
++	  it means $fp is already choosen by register allocator.
++	  Thus we better not to use it for fp_as_gp optimization.
++       6. This function is a vararg function.
++	  DO NOT apply fp_as_gp optimization on this function
++	  because it may change and break stack frame.
++       7. The epilogue is empty.
++	  This happens when the function uses exit()
++	  or its attribute is no_return.
++	  In that case, compiler will not expand epilogue
++	  so that we have no chance to output .omit_fp_end directive.  */
++  if (TARGET_FORBID_FP_AS_GP
++      || nds32_naked_function_p (current_function_decl)
++      || !optimize_size
++      || frame_pointer_needed
++      || NDS32_REQUIRED_CALLEE_SAVED_P (FP_REGNUM)
++      || (cfun->stdarg == 1)
++      || (find_fallthru_edge (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) == NULL))
++    return false;
++
++  /* Disable fp_as_gp if there is any infinite loop since the fp may
++     reuse in infinite loops by register rename.
++     For check infinite loops we should make sure exit_bb is post dominate
++     all other basic blocks if there is no infinite loops.  */
++  first_exit_blocks_p = true;
++  exit_bb = NULL;
++
++  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
++    {
++      /* More than one exit block also do not perform fp_as_gp optimization.  */
++      if (!first_exit_blocks_p)
++	return false;
++
++      exit_bb = e->src;
++      first_exit_blocks_p = false;
++    }
++
++  /* Not found exit_bb? just abort fp_as_gp!  */
++  if (!exit_bb)
++    return false;
++
++  /* Each bb should post dominate by exit_bb if there is no infinite loop! */
++  FOR_EACH_BB_FN (bb, cfun)
++    {
++      if (!dominated_by_p (CDI_POST_DOMINATORS,
++			   bb,
++			   exit_bb))
++	return false;
++    }
++
++  /* Now we can check the possibility of using fp_as_gp optimization.  */
++  if (TARGET_FORCE_FP_AS_GP)
++    {
++      /* User explicitly issues -mforce-fp-as-gp option.  */
++      return true;
++    }
++  else
++    {
++      /* In the following we are going to evaluate whether
++	 it is worth to do fp_as_gp optimization.  */
++      bool good_gain = false;
++      int symbol_count;
++
++      int threshold;
++
++      /* We check if there already requires prologue.
++	 Note that $gp will be saved in prologue for PIC code generation.
++	 After that, we can set threshold by the existence of prologue.
++	 Each fp-implied instruction will gain 2-byte code size
++	 from gp-aware instruction, so we have following heuristics.  */
++      if (flag_pic
++	  || nds32_have_prologue_p ())
++	{
++	  /* Have-prologue:
++	       Compiler already intends to generate prologue content,
++	       so the fp_as_gp optimization will only insert
++	       'la $fp,_FP_BASE_' instruction, which will be
++	       converted into 4-byte instruction at link time.
++	       The threshold is "3" symbol accesses, 2 + 2 + 2 > 4.  */
++	  threshold = 3;
++	}
++      else
++	{
++	  /* None-prologue:
++	       Compiler originally does not generate prologue content,
++	       so the fp_as_gp optimization will NOT ONLY insert
++	       'la $fp,_FP_BASE' instruction, but also causes
++	       push/pop instructions.
++	       If we are using v3push (push25/pop25),
++	       the threshold is "5" symbol accesses, 5*2 > 4 + 2 + 2;
++	       If we are using normal push (smw/lmw),
++	       the threshold is "5+2" symbol accesses 7*2 > 4 + 4 + 4.  */
++	  threshold = 5 + (TARGET_V3PUSH ? 0 : 2);
++	}
++
++      symbol_count = nds32_get_symbol_count ();
++
++      if (symbol_count >= threshold)
++	good_gain = true;
++
++      /* Enable fp_as_gp optimization when potential gain is good enough.  */
++      return good_gain;
++    }
++}
++
++static unsigned int
++nds32_fp_as_gp (void)
++{
++  bool fp_as_gp_p;
++  calculate_dominance_info (CDI_POST_DOMINATORS);
++  fp_as_gp_p = nds32_fp_as_gp_check_available ();
++
++  /* Here is a hack to IRA for enable/disable a hard register per function.
++     We *MUST* review this way after migrate gcc 4.9! */
++  if (fp_as_gp_p) {
++    SET_HARD_REG_BIT(this_target_ira_int->x_no_unit_alloc_regs, FP_REGNUM);
++    df_set_regs_ever_live (FP_REGNUM, 1);
++  } else {
++    CLEAR_HARD_REG_BIT(this_target_ira_int->x_no_unit_alloc_regs, FP_REGNUM);
++  }
++
++  cfun->machine->fp_as_gp_p = fp_as_gp_p;
++
++  free_dominance_info (CDI_POST_DOMINATORS);
++  return 1;
++}
++
++const pass_data pass_data_nds32_fp_as_gp =
++{
++  RTL_PASS,				/* type */
++  "fp_as_gp",				/* name */
++  OPTGROUP_NONE,			/* optinfo_flags */
++  TV_MACH_DEP,				/* tv_id */
++  0,					/* properties_required */
++  0,					/* properties_provided */
++  0,					/* properties_destroyed */
++  0,					/* todo_flags_start */
++  0					/* todo_flags_finish */
++};
++
++class pass_nds32_fp_as_gp : public rtl_opt_pass
++{
++public:
++  pass_nds32_fp_as_gp (gcc::context *ctxt)
++    : rtl_opt_pass (pass_data_nds32_fp_as_gp, ctxt)
++  {}
++
++  /* opt_pass methods: */
++  bool gate (function *)
++  {
++    return !TARGET_LINUX_ABI
++	   && TARGET_16_BIT
++	   && optimize_size;
++  }
++  unsigned int execute (function *) { return nds32_fp_as_gp (); }
++};
++
++rtl_opt_pass *
++make_pass_nds32_fp_as_gp (gcc::context *ctxt)
++{
++  return new pass_nds32_fp_as_gp (ctxt);
+ }
+ 
+ /* ------------------------------------------------------------------------ */
+diff --git a/gcc/config/nds32/nds32-fpu.md b/gcc/config/nds32/nds32-fpu.md
+new file mode 100644
+index 0000000..11eabd5
+--- /dev/null
++++ b/gcc/config/nds32/nds32-fpu.md
+@@ -0,0 +1,503 @@
++;; Machine description of Andes NDS32 cpu for GNU compiler
++;; Copyright (C) 2012-2016 Free Software Foundation, Inc.
++;; Contributed by Andes Technology Corporation.
++;;
++;; This file is part of GCC.
++;;
++;; GCC is free software; you can redistribute it and/or modify it
++;; under the terms of the GNU General Public License as published
++;; by the Free Software Foundation; either version 3, or (at your
++;; option) any later version.
++;;
++;; GCC is distributed in the hope that it will be useful, but WITHOUT
++;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++;; License for more details.
++;;
++;; You should have received a copy of the GNU General Public License
++;; along with GCC; see the file COPYING3.  If not see
++;; <http://www.gnu.org/licenses/>.
++
++;;SFmode moves
++
++(define_expand "movsf"
++  [(set (match_operand:SF 0 "general_operand" "")
++	(match_operand:SF 1 "general_operand" ""))]
++  ""
++{
++  /* Need to force register if mem <- !reg.  */
++  if (MEM_P (operands[0]) && !REG_P (operands[1]))
++    operands[1] = force_reg (SFmode, operands[1]);
++  if (CONST_DOUBLE_P (operands[1])
++      && !satisfies_constraint_Cs20 (operands[1]))
++    {
++      const REAL_VALUE_TYPE *r;
++      unsigned long l;
++
++      r = CONST_DOUBLE_REAL_VALUE (operands[1]);
++      REAL_VALUE_TO_TARGET_SINGLE (*r, l);
++
++      emit_move_insn (operands[0], gen_rtx_HIGH (SFmode, operands[1]));
++
++      if ((l & 0xFFF) != 0)
++	emit_insn (gen_movsf_lo (operands[0], operands[0], operands[1]));
++      DONE;
++    }
++})
++
++(define_insn "movsf_lo"
++  [(set (match_operand:SF 0 "register_operand" "=r")
++	(lo_sum:SF (match_operand:SF 1 "register_operand" "r")
++		   (match_operand:SF 2 "immediate_operand" "i")))]
++  ""
++  "ori\t%0, %1, lo12(%2)"
++  [(set_attr "type"   "alu")
++   (set_attr "length"   "4")]
++)
++
++(define_insn "*movsf"
++  [(set (match_operand:SF 0 "nonimmediate_operand" "=r, r, U45, U33, U37, U45, m,   l,   l,   l,   d, r, f, *f, *r, f, Q,   r,   r,    r")
++	(match_operand:SF 1 "general_operand"      " r, r,   l,   l,   l,   d, r, U45, U33, U37, U45, m, f, *r, *f, Q, f,Cs05,Cs20, Chig"))]
++  "(register_operand(operands[0], SFmode)
++    || register_operand(operands[1], SFmode))"
++{
++  switch (which_alternative)
++    {
++    case 0:
++      return "mov55\t%0, %1";
++    case 1:
++      return "ori\t%0, %1, 0";
++    case 2:
++    case 3:
++    case 4:
++    case 5:
++      return nds32_output_16bit_store (operands, 4);
++    case 6:
++      return nds32_output_32bit_store (operands, 4);
++    case 7:
++    case 8:
++    case 9:
++    case 10:
++      return nds32_output_16bit_load (operands, 4);
++    case 11:
++      return nds32_output_32bit_load (operands, 4);
++    case 12:
++      if (TARGET_FPU_SINGLE)
++	return "fcpyss\t%0, %1, %1";
++      else
++	return "#";
++    case 13:
++      return "fmtsr\t%1, %0";
++    case 14:
++      return "fmfsr\t%0, %1";
++    case 15:
++      return nds32_output_float_load (operands);
++    case 16:
++      return nds32_output_float_store (operands);
++    case 17:
++      return "movi55\t%0, %1";
++    case 18:
++      return "movi\t%0, %1";
++    case 19:
++      return "sethi\t%0, %1";
++    default:
++      gcc_unreachable ();
++    }
++}
++  [(set_attr "type"    "alu,alu,store,store,store,store,store,load,load,load,load,load,fcpy,fmtsr,fmfsr,fload,fstore,alu,alu,alu")
++   (set_attr "length"  "  2,  4,    2,    2,    2,    2,    4,   2,   2,   2,   2,   4,   4,    4,    4,    4,     4,  2,  4,  4")
++   (set_attr "feature" " v1, v1,   v1,   v1,   v1,   v1,   v1,  v1,  v1,  v1,  v1,  v1, fpu,  fpu,  fpu,  fpu,   fpu, v1, v1, v1")])
++
++;; Conditional Move Instructions
++
++(define_expand "mov<mode>cc"
++  [(set (match_operand:ANYF 0 "register_operand" "")
++	(if_then_else:ANYF (match_operand 1 "nds32_float_comparison_operator" "")
++			   (match_operand:ANYF 2 "register_operand" "")
++			   (match_operand:ANYF 3 "register_operand" "")))]
++  ""
++{
++  if (nds32_cond_move_p (operands[1]))
++    {
++      /* Operands[1] condition code is UNORDERED or ORDERED, and
++	 sub-operands[1] MODE isn't SFmode or SFmode, return FAIL
++	 for gcc, because we don't using slt compare instruction
++	 to generate UNORDERED and ORDERED condition.  */
++      FAIL;
++    }
++  else
++    nds32_expand_float_movcc (operands);
++})
++
++(define_insn "fcmov<mode>_eq"
++  [(set (match_operand:ANYF 0 "register_operand" "=f, f")
++	(if_then_else:ANYF (eq (match_operand:SI 1 "register_operand" "f, f")
++			       (const_int 0))
++			   (match_operand:ANYF 2 "register_operand" "f, 0")
++			   (match_operand:ANYF 3 "register_operand" "0, f")))]
++  ""
++  "@
++   fcmovz<size>\t%0,%2,%1
++   fcmovn<size>\t%0,%3,%1"
++  [(set_attr "type"  "fcmov")
++   (set_attr "length" "4")]
++)
++
++(define_insn "fcmov<mode>_ne"
++  [(set (match_operand:ANYF 0 "register_operand" "=f, f")
++	(if_then_else:ANYF (ne (match_operand:SI 1 "register_operand" "f, f")
++			       (const_int 0))
++			   (match_operand:ANYF 2 "register_operand" "f, 0")
++			   (match_operand:ANYF 3 "register_operand" "0, f")))]
++  ""
++  "@
++   fcmovn<size>\t%0,%2,%1
++   fcmovz<size>\t%0,%3,%1"
++  [(set_attr "type"  "fcmov")
++   (set_attr "length" "4")]
++)
++
++;; Arithmetic instructions.
++
++(define_insn "add<mode>3"
++  [(set (match_operand:ANYF 0 "register_operand" "=f")
++	(plus:ANYF (match_operand:ANYF 1 "register_operand" "f")
++		   (match_operand:ANYF 2 "register_operand" "f")))]
++  ""
++  "fadd<size>\t %0, %1, %2"
++  [(set_attr "type"   "falu")
++   (set_attr "length" "4")]
++)
++
++(define_insn "sub<mode>3"
++  [(set (match_operand:ANYF 0 "register_operand" "=f")
++	(minus:ANYF (match_operand:ANYF 1 "register_operand" "f")
++		    (match_operand:ANYF 2 "register_operand" "f")))]
++  ""
++  "fsub<size>\t %0, %1, %2"
++  [(set_attr "type"   "falu")
++   (set_attr "length" "4")]
++)
++
++;; Multiplication insns.
++
++(define_insn "mul<mode>3"
++  [(set (match_operand:ANYF 0 "register_operand" "=f")
++	(mult:ANYF (match_operand:ANYF 1 "register_operand" "f")
++		   (match_operand:ANYF 2 "register_operand" "f")))]
++  ""
++  "fmul<size>\t %0, %1, %2"
++  [(set_attr "type"   "fmul<size>")
++   (set_attr "length" "4")]
++)
++
++(define_insn "fma<mode>4"
++  [(set (match_operand:ANYF 0 "register_operand" "=f")
++	(fma:ANYF (match_operand:ANYF 1 "register_operand" "f")
++		  (match_operand:ANYF 2 "register_operand" "f")
++		  (match_operand:ANYF 3 "register_operand" "0")))]
++  "TARGET_EXT_FPU_FMA"
++  "fmadd<size>\t%0, %1, %2"
++  [(set_attr "type"   "fmac<size>")
++   (set_attr "length" "4")]
++)
++
++(define_insn "fnma<mode>4"
++  [(set (match_operand:ANYF 0 "register_operand" "=f")
++	(fma:ANYF (neg:ANYF (match_operand:ANYF 1 "register_operand" "f"))
++		  (match_operand:ANYF 2 "register_operand" "f")
++		  (match_operand:ANYF 3 "register_operand" "0")))]
++  "TARGET_EXT_FPU_FMA"
++  "fmsub<size>\t%0, %1, %2"
++  [(set_attr "type"   "fmac<size>")
++   (set_attr "length" "4")]
++)
++
++(define_insn "fms<mode>4"
++  [(set (match_operand:ANYF 0 "register_operand" "=f")
++	(fma:ANYF (match_operand:ANYF 1 "register_operand" "f")
++		  (match_operand:ANYF 2 "register_operand" "f")
++		  (neg:ANYF (match_operand:ANYF 3 "register_operand" "0"))))]
++  "TARGET_EXT_FPU_FMA"
++  "fnmsub<size>\t%0, %1, %2"
++  [(set_attr "type"   "fmac<size>")
++   (set_attr "length" "4")]
++)
++
++(define_insn "fnms<mode>4"
++  [(set (match_operand:ANYF 0 "register_operand" "=f")
++	(fma:ANYF (neg:ANYF (match_operand:ANYF 1 "register_operand" "f"))
++		  (match_operand:ANYF 2 "register_operand" "f")
++		  (neg:ANYF (match_operand:ANYF 3 "register_operand" "0"))))]
++  "TARGET_EXT_FPU_FMA"
++  "fnmadd<size>\t%0, %1, %2"
++  [(set_attr "type"   "fmac<size>")
++   (set_attr "length" "4")]
++)
++
++;; Div Instructions.
++
++(define_insn "div<mode>3"
++  [(set (match_operand:ANYF 0 "register_operand" "=f")
++	(div:ANYF (match_operand:ANYF 1 "register_operand" "f")
++		  (match_operand:ANYF 2 "register_operand" "f")))]
++  ""
++  "fdiv<size>\t %0, %1, %2"
++  [(set_attr "type"   "fdiv<size>")
++   (set_attr "length" "4")]
++)
++
++(define_insn "sqrt<mode>2"
++  [(set (match_operand:ANYF 0 "register_operand" "=f")
++	(sqrt:ANYF (match_operand:ANYF 1 "register_operand" "f")))]
++  ""
++  "fsqrt<size>\t %0, %1"
++  [(set_attr "type"   "fsqrt<size>")
++   (set_attr "length" "4")]
++)
++
++;; Conditional Branch patterns
++
++(define_expand "cstore<mode>4"
++  [(set (match_operand:SI 0 "register_operand" "")
++	(match_operator:SI 1 "nds32_float_comparison_operator"
++	 [(match_operand:ANYF 2 "register_operand" "")
++	  (match_operand:ANYF 3 "register_operand" "")]))]
++  ""
++{
++  nds32_expand_float_cstore (operands);
++  DONE;
++})
++
++(define_expand "cbranch<mode>4"
++  [(set (pc)
++	(if_then_else (match_operator 0 "nds32_float_comparison_operator"
++		       [(match_operand:ANYF 1 "register_operand" "")
++			(match_operand:ANYF 2 "register_operand" "")])
++		      (label_ref (match_operand 3 "" ""))
++		      (pc)))]
++  ""
++{
++  nds32_expand_float_cbranch (operands);
++  DONE;
++})
++
++;; Copysign Instructions.
++
++(define_insn "copysignsf3"
++  [(set (match_operand:SF 0 "register_operand" "=f")
++	(unspec:SF [(match_operand:SF 1 "register_operand" "f")
++		    (match_operand:SF 2 "register_operand" "f")]
++		     UNSPEC_COPYSIGN))]
++  "TARGET_FPU_SINGLE"
++  "fcpyss\t%0,%1,%2"
++  [(set_attr "type"   "fcpy")
++   (set_attr "length" "4")]
++)
++
++(define_insn "copysigndf3"
++  [(set (match_operand:DF 0 "register_operand" "=f")
++	(unspec:DF [(match_operand:DF 1 "register_operand" "f")
++		    (match_operand:DF 2 "register_operand" "f")]
++		     UNSPEC_COPYSIGN))]
++  "TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE"
++  "fcpysd\t%0,%1,%2"
++  [(set_attr "type"   "fcpy")
++   (set_attr "length" "4")]
++)
++
++(define_insn "*ncopysign<mode>3"
++  [(set (match_operand:ANYF 0 "register_operand" "=f")
++	(neg:ANYF (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")
++				(match_operand:ANYF 2 "register_operand" "f")]
++				UNSPEC_COPYSIGN)))]
++  ""
++  "fcpyns<size>\t%0,%1,%2"
++  [(set_attr "type"   "fcpy")
++   (set_attr "length" "4")]
++)
++
++;; Absolute Instructions
++
++(define_insn "abssf2"
++  [(set (match_operand:SF 0 "register_operand" "=f, r")
++	(abs:SF (match_operand:SF 1 "register_operand" "f, r")))]
++  "TARGET_FPU_SINGLE || TARGET_EXT_PERF"
++  "@
++   fabss\t%0, %1
++   bclr\t%0, %1, 31"
++  [(set_attr "type"    "fabs,alu")
++   (set_attr "length"  "4")
++   (set_attr "feature" "fpu,pe1")]
++)
++
++(define_insn "absdf2"
++  [(set (match_operand:DF 0 "register_operand" "=f")
++	(abs:DF (match_operand:DF 1 "register_operand" "f")))]
++  "TARGET_FPU_DOUBLE"
++  "fabsd\t%0, %1"
++  [(set_attr "type"   "fabs")
++   (set_attr "length" "4")]
++)
++
++;; Negation Instructions
++
++(define_insn "*negsf2"
++  [(set (match_operand:SF 0 "register_operand" "=f, r")
++	(neg:SF (match_operand:SF 1 "register_operand" "f, r")))]
++  "TARGET_FPU_SINGLE || TARGET_EXT_PERF"
++  "@
++   fcpynss\t%0, %1, %1
++   btgl\t%0, %1, 31"
++  [(set_attr "type"    "fcpy,alu")
++   (set_attr "length"  "4")
++   (set_attr "feature" "fpu,pe1")]
++)
++
++(define_insn "*negdf2"
++  [(set (match_operand:DF 0 "register_operand" "=f")
++	(neg:DF (match_operand:DF 1 "register_operand" "f")))]
++  "TARGET_FPU_DOUBLE"
++  "fcpynsd\t%0, %1, %1"
++  [(set_attr "type"   "fcpy")
++   (set_attr "length" "4")]
++)
++
++;; Data Format Conversion Instructions
++
++(define_insn "floatunssi<mode>2"
++  [(set (match_operand:ANYF 0 "register_operand" "=f")
++	(unsigned_float:ANYF (match_operand:SI 1 "register_operand" "f")))]
++  ""
++  "fui2<size>\t %0, %1"
++  [(set_attr "type"   "falu")
++   (set_attr "length" "4")]
++)
++
++(define_insn "floatsi<mode>2"
++  [(set (match_operand:ANYF 0 "register_operand" "=f")
++	(float:ANYF (match_operand:SI 1 "register_operand" "f")))]
++  ""
++  "fsi2<size>\t %0, %1"
++  [(set_attr "type"   "falu")
++   (set_attr "length" "4")]
++)
++
++(define_insn "fixuns_trunc<mode>si2"
++  [(set (match_operand:SI 0 "register_operand" "=f")
++	(unsigned_fix:SI (fix:ANYF (match_operand:ANYF 1 "register_operand" "f"))))]
++  ""
++  "f<size>2ui.z\t %0, %1"
++  [(set_attr "type"   "falu")
++   (set_attr "length" "4")]
++)
++
++(define_insn "fix_trunc<mode>si2"
++  [(set (match_operand:SI 0 "register_operand" "=f")
++	(fix:SI (fix:ANYF (match_operand:ANYF 1 "register_operand" "f"))))]
++  ""
++  "f<size>2si.z\t %0, %1"
++  [(set_attr "type"   "falu")
++   (set_attr "length" "4")]
++)
++
++(define_insn "extendsfdf2"
++  [(set (match_operand:DF 0 "register_operand" "=f")
++	(float_extend:DF (match_operand:SF 1 "register_operand" "f")))]
++  "TARGET_FPU_SINGLE && TARGET_FPU_DOUBLE"
++  "fs2d\t%0, %1"
++  [(set_attr "type"   "falu")
++   (set_attr "length" "4")]
++)
++
++(define_insn "truncdfsf2"
++  [(set (match_operand:SF 0 "register_operand" "=f")
++	(float_truncate:SF (match_operand:DF 1 "register_operand" "f")))]
++  "TARGET_FPU_SINGLE && TARGET_FPU_DOUBLE"
++  "fd2s\t%0, %1"
++  [(set_attr "type"   "falu")
++   (set_attr "length" "4")]
++)
++
++;; Compare Instructions
++
++(define_insn "cmp<mode>_eq"
++  [(set (match_operand:SI 0 "register_operand" "=f")
++	(eq:SI (match_operand:ANYF 1 "register_operand" "f")
++	       (match_operand:ANYF 2 "register_operand" "f")))]
++  ""
++  {
++    if (NDS32_EXT_FPU_DOT_E)
++      return "fcmpeq<size>.e %0, %1, %2";
++    else
++      return "fcmpeq<size>\t%0, %1, %2";
++  }
++  [(set_attr "type"   "fcmp")
++   (set_attr "length" "4")]
++)
++
++(define_insn "cmp<mode>_lt"
++  [(set (match_operand:SI 0 "register_operand" "=f")
++	(lt:SI (match_operand:ANYF 1 "register_operand" "f")
++	       (match_operand:ANYF 2 "register_operand" "f")))]
++  ""
++{
++  if (NDS32_EXT_FPU_DOT_E)
++    return "fcmplt<size>.e %0, %1, %2";
++  else
++    return "fcmplt<size>\t%0, %1, %2";
++}
++  [(set_attr "type"   "fcmp")
++   (set_attr "length" "4")]
++)
++
++(define_insn "cmp<mode>_le"
++  [(set (match_operand:SI 0 "register_operand" "=f")
++	(le:SI (match_operand:ANYF 1 "register_operand" "f")
++	       (match_operand:ANYF 2 "register_operand" "f")))]
++  ""
++{
++  if (NDS32_EXT_FPU_DOT_E)
++    return "fcmple<size>.e %0, %1, %2";
++  else
++    return "fcmple<size>\t%0, %1, %2";
++}
++  [(set_attr "type"   "fcmp")
++   (set_attr "length" "4")]
++)
++
++(define_insn "cmp<mode>_un"
++  [(set (match_operand:SI 0 "register_operand" "=f")
++	(unordered:SI (match_operand:ANYF 1 "register_operand" "f")
++		      (match_operand:ANYF 2 "register_operand" "f")))]
++  ""
++{
++  if (NDS32_EXT_FPU_DOT_E)
++    return "fcmpun<size>.e %0, %1, %2";
++  else
++    return "fcmpun<size>\t%0, %1, %2";
++}
++  [(set_attr "type"   "fcmp")
++   (set_attr "length" "4")]
++)
++
++(define_split
++  [(set (match_operand:SF 0 "register_operand" "")
++	(match_operand:SF 1 "register_operand" ""))]
++  "!TARGET_FPU_SINGLE
++   && NDS32_IS_FPR_REGNUM (REGNO (operands[0]))
++   && NDS32_IS_FPR_REGNUM (REGNO (operands[1]))"
++  [(set (match_dup 2) (match_dup 1))
++   (set (match_dup 0) (match_dup 2))]
++{
++  operands[2] = gen_rtx_REG (SFmode, TA_REGNUM);
++})
++
++(define_split
++  [(set (match_operand:SF 0 "register_operand" "")
++	(match_operand:SF 1 "const_double_operand" ""))]
++  "!satisfies_constraint_Cs20 (operands[1])
++   && !satisfies_constraint_Chig (operands[1])"
++  [(set (match_dup 0) (high:SF (match_dup 1)))
++   (set (match_dup 0) (lo_sum:SF (match_dup 0) (match_dup 1)))])
++;; ----------------------------------------------------------------------------
+diff --git a/gcc/config/nds32/nds32-gcse.c b/gcc/config/nds32/nds32-gcse.c
+new file mode 100644
+index 0000000..301981d
+--- /dev/null
++++ b/gcc/config/nds32/nds32-gcse.c
+@@ -0,0 +1,670 @@
++/* Global CSE pass of Andes NDS32 cpu for GNU compiler
++   Copyright (C) 2012-2016 Free Software Foundation, Inc.
++   Contributed by Andes Technology Corporation.
++
++   This file is part of GCC.
++
++   GCC is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published
++   by the Free Software Foundation; either version 3, or (at your
++   option) any later version.
++
++   GCC is distributed in the hope that it will be useful, but WITHOUT
++   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++   License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with GCC; see the file COPYING3.  If not see
++   <http://www.gnu.org/licenses/>.  */
++
++/* ------------------------------------------------------------------------ */
++
++#include "config.h"
++#include "system.h"
++#include "coretypes.h"
++#include "backend.h"
++#include "tree.h"
++#include "rtl.h"
++#include "df.h"
++#include "alias.h"
++#include "stor-layout.h"
++#include "varasm.h"
++#include "calls.h"
++#include "regs.h"
++#include "insn-config.h"	/* Required by recog.h.  */
++#include "conditions.h"
++#include "output.h"
++#include "insn-attr.h"		/* For DFA state_t.  */
++#include "insn-codes.h"		/* For CODE_FOR_xxx.  */
++#include "reload.h"		/* For push_reload().  */
++#include "flags.h"
++#include "insn-config.h"
++#include "expmed.h"
++#include "dojump.h"
++#include "explow.h"
++#include "emit-rtl.h"
++#include "stmt.h"
++#include "expr.h"
++#include "recog.h"
++#include "diagnostic-core.h"
++#include "cfgrtl.h"
++#include "cfganal.h"
++#include "lcm.h"
++#include "cfgbuild.h"
++#include "cfgcleanup.h"
++#include "tm_p.h"
++#include "tm-constrs.h"
++#include "optabs.h"		/* For GEN_FCN.  */
++#include "target.h"
++#include "langhooks.h"		/* For add_builtin_function().  */
++#include "builtins.h"
++#include "cpplib.h"
++#include "params.h"
++#include "tree-pass.h"
++#include "dbgcnt.h"
++#include "df.h"
++#include "reload.h"
++
++/* ------------------------------------------------------------------------ */
++
++struct expr
++{
++  /* The expression.  */
++  rtx expr;
++
++  /* The same hash for this entry.  */
++  hashval_t hash;
++
++  struct occr *antic_occr;
++  /* The number of antic_occr.  */
++  unsigned int count;
++};
++
++struct occr
++{
++  /* Next occurrence of this expression.  */
++  struct occr *next;
++  /* The insn that computes the expression.  */
++  rtx_insn *insn;
++  /* Nonzero if this [anticipatable] occurrence has been deleted.  */
++  char deleted_p;
++};
++
++struct reg_avail_info
++{
++  basic_block last_bb;
++  int first_set;
++  int first_use;
++};
++
++/* Hashtable helpers.  */
++
++struct expr_hasher : nofree_ptr_hash <expr>
++{
++  static inline hashval_t hash (const expr *);
++  static inline bool equal (const expr *, const expr *);
++};
++
++/* Callback for hashtab.
++   Return the hash value for expression EXP.  We don't actually hash
++   here, we just return the cached hash value.  */
++
++inline hashval_t
++expr_hasher::hash (const expr *exp)
++{
++  return exp->hash;
++}
++
++/* Callback for hashtab.
++   Return nonzero if exp1 is equivalent to exp2.  */
++
++inline bool
++expr_hasher::equal (const expr *exp1, const expr *exp2)
++{
++  int equiv_p = exp_equiv_p (exp1->expr, exp2->expr, 0, true);
++
++  gcc_assert (!equiv_p || exp1->hash == exp2->hash);
++  return equiv_p;
++}
++
++static hashval_t
++hash_expr (rtx x, int *do_not_record_p)
++{
++  *do_not_record_p = 0;
++  return hash_rtx (x, GET_MODE (x), do_not_record_p,
++		   NULL,  /*have_reg_qty=*/false);
++}
++
++
++/* Helpers for memory allocation/freeing.  */
++static void alloc_mem (void);
++static void free_mem (void);
++static void compute_hash_table (void);
++/* Scan the pattern of INSN and add an entry to the hash TABLE.
++   After reload we are interested in loads/stores only.  */
++static void hash_scan_set (rtx_insn *);
++static void insert_expr_in_table (rtx, rtx_insn *);
++static void dump_hash_table (FILE *);
++
++static struct obstack expr_obstack;
++/* The table itself.  */
++static hash_table <expr_hasher> *expr_table;
++static struct reg_avail_info *reg_avail_info;
++static sbitmap *hoist_vbein;
++static sbitmap *hoist_vbeout;
++
++/* Allocate memory for the CUID mapping array and register/memory
++   tracking tables.  */
++
++static void
++alloc_mem (void)
++{
++  /* Allocate the available expressions hash table.  We don't want to
++     make the hash table too small, but unnecessarily making it too large
++     also doesn't help.  The i/4 is a gcse.c relic, and seems like a
++     reasonable choice.  */
++  expr_table = new hash_table<expr_hasher> (MAX (get_max_insn_count () / 4,
++					     13));
++
++  /* We allocate everything on obstacks because we often can roll back
++     the whole obstack to some point.  Freeing obstacks is very fast.  */
++  gcc_obstack_init (&expr_obstack);
++}
++
++/* Free memory allocated by alloc_mem.  */
++
++static void
++free_mem (void)
++{
++  delete expr_table;
++  expr_table = NULL;
++
++  obstack_free (&expr_obstack, NULL);
++}
++
++
++/* Dump all expressions and occurrences that are currently in the
++   expression hash table to FILE.  */
++
++/* This helper is called via htab_traverse.  */
++int
++nds32_dump_expr_hash_table_entry (expr **slot, FILE *file)
++{
++  struct expr *exprs = *slot;
++  struct occr *occr;
++
++  fprintf (file, "expr: ");
++  print_rtl (file, exprs->expr);
++  fprintf (file,"\nhashcode: %u\n", exprs->hash);
++  fprintf (file,"list of occurrences:\n");
++  occr = exprs->antic_occr;
++  while (occr)
++    {
++      rtx_insn *insn = occr->insn;
++      print_rtl_single (file, insn);
++      fprintf (file, "\n");
++      occr = occr->next;
++    }
++  fprintf (file, "\n");
++  return 1;
++}
++
++static void
++dump_hash_table (FILE *file)
++{
++  fprintf (file, "\n\nexpression hash table\n");
++  fprintf (file, "size %ld, %ld elements, %f collision/search ratio\n",
++	   (long) expr_table->size (),
++	   (long) expr_table->elements (),
++	   expr_table->collisions ());
++  if (expr_table->elements () > 0)
++    {
++      fprintf (file, "\n\ntable entries:\n");
++      expr_table->traverse <FILE *, nds32_dump_expr_hash_table_entry> (file);
++    }
++  fprintf (file, "\n");
++}
++
++/* Insert expression X in INSN in the hash TABLE.
++   If it is already present, record it as the last occurrence in INSN's
++   basic block.  */
++
++static void
++insert_expr_in_table (rtx x, rtx_insn *insn)
++{
++  int do_not_record_p;
++  hashval_t hash;
++  struct expr *cur_expr, **slot;
++  struct occr *antic_occr, *last_occr = NULL;
++
++  hash = hash_expr (x, &do_not_record_p);
++
++  /* Do not insert expression in the table if it contains volatile operands,
++     or if hash_expr determines the expression is something we don't want
++     to or can't handle.  */
++  if (do_not_record_p)
++    return;
++
++  /* We anticipate that redundant expressions are rare, so for convenience
++     allocate a new hash table element here already and set its fields.
++     If we don't do this, we need a hack with a static struct expr.  Anyway,
++     obstack_free is really fast and one more obstack_alloc doesn't hurt if
++     we're going to see more expressions later on.  */
++  cur_expr = (struct expr *) obstack_alloc (&expr_obstack,
++					    sizeof (struct expr));
++  cur_expr->expr = x;
++  cur_expr->hash = hash;
++  cur_expr->antic_occr = NULL;
++
++  slot = expr_table->find_slot_with_hash (cur_expr, hash, INSERT);
++
++  if (! (*slot))
++    /* The expression isn't found, so insert it.  */
++    *slot = cur_expr;
++  else
++    {
++      /* The expression is already in the table, so roll back the
++	 obstack and use the existing table entry.  */
++      obstack_free (&expr_obstack, cur_expr);
++      cur_expr = *slot;
++    }
++
++  /* Search for another occurrence in the same basic block.  */
++  antic_occr = cur_expr->antic_occr;
++  cur_expr->count++;
++  while (antic_occr
++	 && BLOCK_FOR_INSN (antic_occr->insn) != BLOCK_FOR_INSN (insn))
++    {
++      /* If an occurrence isn't found, save a pointer to the end of
++	 the list.  */
++      last_occr = antic_occr;
++      antic_occr = antic_occr->next;
++    }
++
++  if (antic_occr)
++    /* Found another instance of the expression in the same basic block.
++       Prefer this occurrence to the currently recorded one.  We want
++       the last one in the block and the block is scanned from start
++       to end.  */
++    antic_occr->insn = insn;
++  else
++    {
++      /* First occurrence of this expression in this basic block.  */
++      antic_occr = (struct occr *) obstack_alloc (&expr_obstack,
++						  sizeof (struct occr));
++
++      /* First occurrence of this expression in any block?  */
++      if (cur_expr->antic_occr == NULL)
++	cur_expr->antic_occr = antic_occr;
++      else
++	last_occr->next = antic_occr;
++
++      antic_occr->insn = insn;
++      antic_occr->next = NULL;
++      antic_occr->deleted_p = 0;
++    }
++}
++
++/* Check whether this instruction is supported format.  */
++
++static void
++hash_scan_set (rtx_insn *insn)
++{
++  rtx pat = PATTERN (insn);
++  rtx src = SET_SRC (pat);
++  rtx dest = SET_DEST (pat);
++  int regno;
++  struct reg_avail_info *info;
++
++  /* Don't mess with jumps and nops.  */
++  if (JUMP_P (insn) || set_noop_p (pat))
++    return;
++
++  /* TODO: support more format.  */
++
++  /* Only consider locally anticipatable intructions currently.  */
++  if (REG_P (dest) && REGNO (dest) <= SP_REGNUM)
++    {
++      regno = REGNO (dest);
++      info = &reg_avail_info[regno];
++
++      if (BLOCK_FOR_INSN (insn) == info->last_bb
++	  && info->first_set == DF_INSN_LUID (insn)
++	  && info->first_use >= info->first_set)
++	{
++	  /* Only support immediate input currently because
++	     this is bugzilla case.  */
++	  if (CONST_INT_P (src) || CONST_DOUBLE_P (src))
++	    insert_expr_in_table (PATTERN (insn), insn);
++	}
++    }
++}
++
++/* Record register first use information for REGNO in INSN.
++
++   first_use records the first place in the block where the register
++   is used and is used to compute "anticipatability".
++
++   last_bb records the block for which first_use is valid,
++   as a quick test to invalidate them.  */
++
++static void
++record_first_reg_use_info (rtx_insn *insn, int regno)
++{
++  struct reg_avail_info *info = &reg_avail_info[regno];
++  int luid = DF_INSN_LUID (insn);
++
++  if (info->last_bb != BLOCK_FOR_INSN (insn))
++    {
++      info->last_bb = BLOCK_FOR_INSN (insn);
++      info->first_use = luid;
++      /* Set the value to record the using is former than setting.  */
++      info->first_set = luid + 1;
++    }
++}
++
++/* Called from compute_hash_table via note_stores to handle one
++   SET or CLOBBER in an insn.  DATA is really the instruction in which
++   the SET is taking place.  */
++
++static void
++record_first_use_info (rtx *dest, void *data)
++{
++  rtx_insn *last_set_insn = static_cast<rtx_insn*> (data);
++  int i, j;
++  enum rtx_code code;
++  const char *fmt;
++  rtx x = *dest;
++
++  if (x == 0)
++    return;
++
++  code = GET_CODE (x);
++  if (REG_P (x) && REGNO (x) <= SP_REGNUM)
++    {
++      record_first_reg_use_info (last_set_insn, REGNO (x));
++      /* DF and DI mode may use two registers.  */
++      if (GET_MODE_SIZE (GET_MODE (x)) == 8)
++	record_first_reg_use_info (last_set_insn, REGNO (x) + 1);
++    }
++
++  for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
++    {
++      if (fmt[i] == 'e')
++	record_first_use_info (&XEXP (x, i), data);
++      else if (fmt[i] == 'E')
++	for (j = 0; j < XVECLEN (x, i); j++)
++	  record_first_use_info (&XVECEXP (x, i, j), data);
++    }
++}
++
++/* Record register first/block set information for REGNO in INSN.
++
++   first_set records the first place in the block where the register
++   is set and is used to compute "anticipatability".
++
++   last_bb records the block for which first_set is valid,
++   as a quick test to invalidate them.  */
++
++static void
++record_first_reg_set_info (rtx_insn *insn, int regno)
++{
++  struct reg_avail_info *info = &reg_avail_info[regno];
++  int luid = DF_INSN_LUID (insn);
++
++  if (info->last_bb != BLOCK_FOR_INSN (insn))
++    {
++      info->last_bb = BLOCK_FOR_INSN (insn);
++      info->first_set = luid;
++      /* Set the value to record the using is later than setting.  */
++      info->first_use = luid + 1;
++    }
++}
++
++/* Called from compute_hash_table via note_stores to handle one
++   SET or CLOBBER in an insn.  DATA is really the instruction in which
++   the SET is taking place.  */
++
++static void
++record_first_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, void *data)
++{
++  rtx_insn *last_set_insn = static_cast<rtx_insn *> (data);
++
++  if (GET_CODE (dest) == SUBREG)
++    dest = SUBREG_REG (dest);
++
++  if (REG_P (dest) && REGNO (dest) <= SP_REGNUM)
++    {
++      record_first_reg_set_info (last_set_insn, REGNO (dest));
++      if (GET_MODE_SIZE (GET_MODE (dest)) == 8)
++	record_first_reg_set_info (last_set_insn, REGNO (dest) + 1);
++    }
++}
++
++/* Build hash table for supported format instructions.
++   Only consider if the instruction is anticipatable in the basic block here.
++   We postpone the def-use check until hoisting.  */
++
++static void
++compute_hash_table (void)
++{
++  basic_block bb;
++  int i;
++
++  /* We only take care hard registers.  */
++  reg_avail_info =
++    (struct reg_avail_info *) xmalloc (sizeof (struct reg_avail_info) *
++				       (SP_REGNUM + 1));
++
++  for (i = 0; i < 32; i++)
++    reg_avail_info[i].last_bb = NULL;
++
++  FOR_EACH_BB_FN (bb, cfun)
++    {
++      rtx_insn *insn;
++
++      /* Do not hoist instrucion from block which has more
++	 than one predecessor.  */
++      if (EDGE_COUNT (bb->preds) > 1)
++	continue;
++
++      FOR_BB_INSNS (bb, insn)
++	{
++	  if (!NONDEBUG_INSN_P (insn))
++	    continue;
++
++	  /* Construct a caller save register barrier.  We cannot hoist the
++	     instruction over a function call which sets caller save
++	     registers.  */
++	  if (CALL_P (insn))
++	    {
++	      for (i = 0; i <= SP_REGNUM; i++)
++		if (call_used_regs[i])
++		  record_first_reg_use_info (insn, i);
++	    }
++
++	  note_uses (&PATTERN (insn), record_first_use_info, insn);
++	  note_stores (PATTERN (insn), record_first_set_info, insn);
++	}
++
++      /* Build the hash table.  */
++      FOR_BB_INSNS (bb, insn)
++	if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == SET)
++	  hash_scan_set (insn);
++    }
++}
++
++/* Hoist instructions in this slot if possible.  */
++int
++nds32_find_gcse_expr_table (expr **slot, void *data ATTRIBUTE_UNUSED)
++{
++  struct expr *exprs = *slot;
++  struct occr *occr;
++  rtx_insn *insn = NULL;
++  rtx_insn *last_insn;
++  basic_block bb;
++  edge e;
++  unsigned ix;
++  unsigned emit_done;
++  unsigned cover, regno;
++  df_ref use;
++  enum machine_mode mode;
++
++  if (exprs->count < 2)
++    return 1;
++
++  bitmap_vector_clear (hoist_vbeout, last_basic_block_for_fn (cfun));
++  bitmap_vector_clear (hoist_vbein, last_basic_block_for_fn (cfun));
++
++  /* Set the bit for this slot.  */
++  occr = exprs->antic_occr;
++  while (occr)
++    {
++      insn = occr->insn;
++      bb = BLOCK_FOR_INSN (insn);
++      if (!occr->deleted_p)
++	bitmap_set_bit (hoist_vbein[bb->index], 0);
++      occr = occr->next;
++    }
++
++  /* Try to hoist code for each basic block.  */
++  FOR_EACH_BB_REVERSE_FN (bb, cfun)
++    {
++      if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
++	bitmap_intersection_of_succs (hoist_vbeout[bb->index], hoist_vbein, bb);
++
++      if (bitmap_bit_p (hoist_vbeout[bb->index], 0)
++	  && EDGE_COUNT (bb->succs) > 1)
++	{
++	  emit_done = 0;
++	  cover = FALSE;
++	  for (e = NULL, ix = 0; ix < EDGE_COUNT (bb->succs); ix++)
++	    {
++	      e = EDGE_SUCC (bb, ix);
++	      if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
++		continue;
++	      occr = exprs->antic_occr;
++	      while (occr)
++		{
++		  insn = occr->insn;
++		  if (!occr->deleted_p && e->dest == BLOCK_FOR_INSN (insn))
++		    break;
++		  occr = occr->next;
++		}
++
++	      gcc_assert (insn != NULL);
++
++	      if (!emit_done)
++		{
++		  last_insn = BB_END (bb);
++		  /* Check the defined register is not used by the last
++		     instruction of the previos block.*/
++		  regno = REGNO (SET_DEST (PATTERN (insn)));
++		  mode = GET_MODE (SET_DEST (PATTERN (insn)));
++		  FOR_EACH_INSN_USE (use, last_insn)
++		    {
++		      if (DF_REF_REGNO (use) == regno
++			  || regno_clobbered_p (regno, last_insn, mode, 2))
++			{
++			  cover = TRUE;
++			  break;
++			}
++		    }
++
++		  /* TODO: support more format.  */
++		  if (cover)
++		    break;
++		  else if (JUMP_P (last_insn))
++		    {
++		      emit_insn_before_noloc (PATTERN (insn), last_insn, bb);
++		      emit_done = TRUE;
++		    }
++		  else
++		    break;
++		}
++
++	      if (emit_done)
++		{
++		  delete_insn (insn);
++		  occr->deleted_p = TRUE;
++		}
++	    }
++	}
++    }
++  return 1;
++}
++
++static int
++hoist_code (void)
++{
++  hoist_vbein = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), 1);
++  hoist_vbeout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), 1);
++
++  expr_table->traverse <void *, nds32_find_gcse_expr_table> (NULL);
++
++  sbitmap_vector_free (hoist_vbein);
++  sbitmap_vector_free (hoist_vbeout);
++
++  return 0;
++}
++
++
++static unsigned int
++nds32_gcse_opt (void)
++{
++
++  if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1)
++    return 0;
++  /* Allocate memory for this pass.
++     Also computes and initializes the insns' CUIDs.  */
++  alloc_mem ();
++
++  df_chain_add_problem (DF_DU_CHAIN);
++  df_insn_rescan_all ();
++  df_analyze ();
++
++  compute_hash_table ();
++
++  if (dump_file)
++    dump_hash_table (dump_file);
++
++  hoist_code ();
++
++  df_insn_rescan_all ();
++  free_mem ();
++  return 0;
++}
++
++const pass_data pass_data_nds32_gcse_opt =
++{
++  RTL_PASS,				/* type */
++  "gcse_opt",				/* name */
++  OPTGROUP_NONE,			/* optinfo_flags */
++  TV_MACH_DEP,				/* tv_id */
++  0,					/* properties_required */
++  0,					/* properties_provided */
++  0,					/* properties_destroyed */
++  0,					/* todo_flags_start */
++  0,					/* todo_flags_finish */
++};
++
++class pass_nds32_gcse_opt : public rtl_opt_pass
++{
++public:
++  pass_nds32_gcse_opt (gcc::context *ctxt)
++    : rtl_opt_pass (pass_data_nds32_gcse_opt, ctxt)
++  {}
++
++  /* opt_pass methods: */
++  bool gate (function *) { return TARGET_GCSE_OPT; }
++  unsigned int execute (function *) { return nds32_gcse_opt (); }
++};
++
++rtl_opt_pass *
++make_pass_nds32_gcse_opt (gcc::context *ctxt)
++{
++  return new pass_nds32_gcse_opt (ctxt);
++}
++
++/* ------------------------------------------------------------------------ */
+diff --git a/gcc/config/nds32/nds32-graywolf.md b/gcc/config/nds32/nds32-graywolf.md
+new file mode 100644
+index 0000000..f9ddbd8
+--- /dev/null
++++ b/gcc/config/nds32/nds32-graywolf.md
+@@ -0,0 +1,471 @@
++;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler
++;; Copyright (C) 2012-2016 Free Software Foundation, Inc.
++;; Contributed by Andes Technology Corporation.
++;;
++;; This file is part of GCC.
++;;
++;; GCC is free software; you can redistribute it and/or modify it
++;; under the terms of the GNU General Public License as published
++;; by the Free Software Foundation; either version 3, or (at your
++;; option) any later version.
++;;
++;; GCC is distributed in the hope that it will be useful, but WITHOUT
++;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++;; License for more details.
++;;
++;; You should have received a copy of the GNU General Public License
++;; along with GCC; see the file COPYING3.  If not see
++;; <http://www.gnu.org/licenses/>.
++
++;; ------------------------------------------------------------------------
++;; Define Graywolf pipeline settings.
++;; ------------------------------------------------------------------------
++
++(define_automaton "nds32_graywolf_machine")
++
++(define_cpu_unit "gw_ii_0" "nds32_graywolf_machine")
++(define_cpu_unit "gw_ii_1" "nds32_graywolf_machine")
++(define_cpu_unit "gw_ex_p0" "nds32_graywolf_machine")
++(define_cpu_unit "gw_mm_p0" "nds32_graywolf_machine")
++(define_cpu_unit "gw_wb_p0" "nds32_graywolf_machine")
++(define_cpu_unit "gw_ex_p1" "nds32_graywolf_machine")
++(define_cpu_unit "gw_mm_p1" "nds32_graywolf_machine")
++(define_cpu_unit "gw_wb_p1" "nds32_graywolf_machine")
++(define_cpu_unit "gw_iq_p2" "nds32_graywolf_machine")
++(define_cpu_unit "gw_rf_p2" "nds32_graywolf_machine")
++(define_cpu_unit "gw_e1_p2" "nds32_graywolf_machine")
++(define_cpu_unit "gw_e2_p2" "nds32_graywolf_machine")
++(define_cpu_unit "gw_e3_p2" "nds32_graywolf_machine")
++(define_cpu_unit "gw_e4_p2" "nds32_graywolf_machine")
++
++(define_reservation "gw_ii" "gw_ii_0 | gw_ii_1")
++(define_reservation "gw_ex" "gw_ex_p0 | gw_ex_p1")
++(define_reservation "gw_mm" "gw_mm_p0 | gw_mm_p1")
++(define_reservation "gw_wb" "gw_wb_p0 | gw_wb_p1")
++
++(define_reservation "gw_ii_all" "gw_ii_0 + gw_ii_1")
++
++(define_insn_reservation "nds_gw_unknown" 1
++  (and (eq_attr "type" "unknown")
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii, gw_ex, gw_mm, gw_wb")
++
++(define_insn_reservation "nds_gw_misc" 1
++  (and (eq_attr "type" "misc")
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii, gw_ex, gw_mm, gw_wb")
++
++(define_insn_reservation "nds_gw_mmu" 1
++  (and (eq_attr "type" "mmu")
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii, gw_ex, gw_mm, gw_wb")
++
++(define_insn_reservation "nds_gw_alu" 1
++  (and (and (eq_attr "type" "alu")
++            (match_test "!nds32::movd44_insn_p (insn)"))
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii, gw_ex, gw_mm, gw_wb")
++
++(define_insn_reservation "nds_gw_movd44" 1
++  (and (and (eq_attr "type" "alu")
++            (match_test "nds32::movd44_insn_p (insn)"))
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii_1, gw_ex, gw_mm, gw_wb")
++
++(define_insn_reservation "nds_gw_alu_shift" 1
++  (and (eq_attr "type" "alu_shift")
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii, gw_ex*2, gw_mm, gw_wb")
++
++(define_insn_reservation "nds_gw_pbsad" 1
++  (and (eq_attr "type" "pbsad")
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii, gw_ex*3, gw_mm, gw_wb")
++
++(define_insn_reservation "nds_gw_pbsada" 1
++  (and (eq_attr "type" "pbsada")
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii, gw_ex*3, gw_mm, gw_wb")
++
++(define_insn_reservation "nds_gw_load" 1
++  (and (and (eq_attr "type" "load")
++            (match_test "!nds32::post_update_insn_p (insn)"))
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii_1, gw_ex_p1, gw_mm_p1, gw_wb_p1")
++
++(define_insn_reservation "nds_gw_load_2w" 1
++  (and (and (eq_attr "type" "load")
++            (match_test "nds32::post_update_insn_p (insn)"))
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii_all, gw_ex_p1, gw_mm_p1, gw_wb_p1")
++
++(define_insn_reservation "nds_gw_store" 1
++  (and (and (eq_attr "type" "store")
++            (match_test "!nds32::store_offset_reg_p (insn)"))
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii_1, gw_ex_p1, gw_mm_p1, gw_wb_p1")
++
++(define_insn_reservation "nds_gw_store_3r" 1
++  (and (and (eq_attr "type" "store")
++            (match_test "nds32::store_offset_reg_p (insn)"))
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii_all, gw_ex_p1, gw_mm_p1, gw_wb_p1")
++
++(define_insn_reservation "nds_gw_load_multiple_1" 1
++  (and (and (eq_attr "type" "load_multiple")
++            (eq_attr "combo" "1"))
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii_1, gw_ex_p1, gw_mm_p1, gw_wb_p1")
++
++(define_insn_reservation "nds_gw_load_multiple_2" 1
++  (and (and (eq_attr "type" "load_multiple")
++            (eq_attr "combo" "2"))
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii_1, gw_ex_p1*2, gw_mm_p1, gw_wb_p1")
++
++(define_insn_reservation "nds_gw_load_multiple_3" 1
++  (and (and (eq_attr "type" "load_multiple")
++            (eq_attr "combo" "3"))
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii_1, gw_ex_p1*3, gw_mm_p1, gw_wb_p1")
++
++(define_insn_reservation "nds_gw_load_multiple_4" 1
++  (and (and (eq_attr "type" "load_multiple")
++            (eq_attr "combo" "4"))
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1")
++
++(define_insn_reservation "nds_gw_load_multiple_5" 1
++  (and (and (eq_attr "type" "load_multiple")
++            (eq_attr "combo" "5"))
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1")
++
++(define_insn_reservation "nds_gw_load_multiple_6" 1
++  (and (and (eq_attr "type" "load_multiple")
++            (eq_attr "combo" "6"))
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1")
++
++(define_insn_reservation "nds_gw_load_multiple_7" 1
++  (and (and (eq_attr "type" "load_multiple")
++            (eq_attr "combo" "7"))
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1")
++
++(define_insn_reservation "nds_gw_load_multiple_8" 1
++  (and (and (eq_attr "type" "load_multiple")
++            (eq_attr "combo" "8"))
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1")
++
++(define_insn_reservation "nds_gw_load_multiple_12" 1
++  (and (and (eq_attr "type" "load_multiple")
++            (eq_attr "combo" "12"))
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1")
++
++(define_insn_reservation "nds_gw_store_multiple_1" 1
++  (and (and (eq_attr "type" "store_multiple")
++            (eq_attr "combo" "1"))
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii_1, gw_ex_p1, gw_mm_p1, gw_wb_p1")
++
++(define_insn_reservation "nds_gw_store_multiple_2" 1
++  (and (and (eq_attr "type" "store_multiple")
++            (eq_attr "combo" "2"))
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii_1, gw_ex_p1*2, gw_mm_p1, gw_wb_p1")
++
++(define_insn_reservation "nds_gw_store_multiple_3" 1
++  (and (and (eq_attr "type" "store_multiple")
++            (eq_attr "combo" "3"))
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii_1, gw_ex_p1*3, gw_mm_p1, gw_wb_p1")
++
++(define_insn_reservation "nds_gw_store_multiple_4" 1
++  (and (and (eq_attr "type" "store_multiple")
++            (eq_attr "combo" "4"))
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1")
++
++(define_insn_reservation "nds_gw_store_multiple_5" 1
++  (and (and (eq_attr "type" "store_multiple")
++            (eq_attr "combo" "5"))
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1")
++
++(define_insn_reservation "nds_gw_store_multiple_6" 1
++  (and (and (eq_attr "type" "store_multiple")
++            (eq_attr "combo" "6"))
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1")
++
++(define_insn_reservation "nds_gw_store_multiple_7" 1
++  (and (and (eq_attr "type" "store_multiple")
++            (eq_attr "combo" "7"))
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1")
++
++(define_insn_reservation "nds_gw_store_multiple_8" 1
++  (and (and (eq_attr "type" "store_multiple")
++            (eq_attr "combo" "8"))
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1")
++
++(define_insn_reservation "nds_gw_store_multiple_12" 1
++  (and (and (eq_attr "type" "store_multiple")
++            (eq_attr "combo" "12"))
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1")
++
++(define_insn_reservation "nds_gw_mul_fast1" 1
++  (and (match_test "nds32_mul_config == MUL_TYPE_FAST_1")
++       (and (eq_attr "type" "mul")
++       (eq_attr "pipeline_model" "graywolf")))
++  "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0")
++
++(define_insn_reservation "nds_gw_mul_fast2" 1
++  (and (match_test "nds32_mul_config == MUL_TYPE_FAST_2")
++       (and (eq_attr "type" "mul")
++       (eq_attr "pipeline_model" "graywolf")))
++  "gw_ii_0, gw_ex_p0*2, gw_mm_p0, gw_wb_p0")
++
++(define_insn_reservation "nds_gw_mul_slow" 1
++  (and (match_test "nds32_mul_config == MUL_TYPE_SLOW")
++       (and (eq_attr "type" "mul")
++       (eq_attr "pipeline_model" "graywolf")))
++  "gw_ii_0, gw_ex_p0*4, gw_mm_p0, gw_wb_p0")
++
++(define_insn_reservation "nds_gw_mac_fast1" 1
++  (and (match_test "nds32_mul_config == MUL_TYPE_FAST_1")
++       (and (eq_attr "type" "mac")
++       (eq_attr "pipeline_model" "graywolf")))
++  "gw_ii_all, gw_ex_p0, gw_mm_p0, gw_wb_p0")
++
++(define_insn_reservation "nds_gw_mac_fast2" 1
++  (and (match_test "nds32_mul_config == MUL_TYPE_FAST_2")
++       (and (eq_attr "type" "mac")
++       (eq_attr "pipeline_model" "graywolf")))
++  "gw_ii_all, gw_ex_p0*2, gw_mm_p0, gw_wb_p0")
++
++(define_insn_reservation "nds_gw_mac_slow" 1
++  (and (match_test "nds32_mul_config == MUL_TYPE_SLOW")
++       (and (eq_attr "type" "mac")
++       (eq_attr "pipeline_model" "graywolf")))
++  "gw_ii_all, gw_ex_p0*4, gw_mm_p0, gw_wb_p0")
++
++(define_insn_reservation "nds_gw_div" 1
++  (and (and (eq_attr "type" "div")
++            (match_test "!nds32::divmod_p (insn)"))
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii_0, gw_ex_p0*4, gw_mm_p0, gw_wb_p0")
++
++(define_insn_reservation "nds_gw_div_2w" 1
++  (and (and (eq_attr "type" "div")
++            (match_test "nds32::divmod_p (insn)"))
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii_all, gw_ex_p0*4, gw_mm_p0, gw_wb_p0")
++
++(define_insn_reservation "nds_gw_branch" 1
++  (and (eq_attr "type" "branch")
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0")
++
++(define_insn_reservation "nds_gw_dsp_alu" 1
++  (and (eq_attr "type" "dalu")
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii, gw_ex, gw_mm, gw_wb")
++
++(define_insn_reservation "nds_gw_dsp_alu64" 1
++  (and (eq_attr "type" "dalu64")
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii_all, gw_ex_p0, gw_mm_p0, gw_wb_p0")
++
++(define_insn_reservation "nds_gw_dsp_alu_round" 1
++  (and (eq_attr "type" "daluround")
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0")
++
++(define_insn_reservation "nds_gw_dsp_cmp" 1
++  (and (eq_attr "type" "dcmp")
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0")
++
++(define_insn_reservation "nds_gw_dsp_clip" 1
++  (and (eq_attr "type" "dclip")
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0")
++
++(define_insn_reservation "nds_gw_dsp_mul" 1
++  (and (eq_attr "type" "dmul")
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0")
++
++(define_insn_reservation "nds_gw_dsp_mac" 1
++  (and (eq_attr "type" "dmac")
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii_all, gw_ex_p0, gw_mm_p0, gw_wb_p0")
++
++(define_insn_reservation "nds_gw_dsp_insb" 1
++  (and (eq_attr "type" "dinsb")
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0")
++
++(define_insn_reservation "nds_gw_dsp_pack" 1
++  (and (eq_attr "type" "dpack")
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0")
++
++(define_insn_reservation "nds_gw_dsp_bpick" 1
++  (and (eq_attr "type" "dbpick")
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0")
++
++(define_insn_reservation "nds_gw_dsp_wext" 1
++  (and (eq_attr "type" "dwext")
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii_all, gw_ex_p0, gw_mm_p0, gw_wb_p0")
++
++(define_insn_reservation "nds_gw_fpu_alu" 4
++  (and (eq_attr "type" "falu")
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2, gw_e3_p2, gw_e4_p2")
++
++(define_insn_reservation "nds_gw_fpu_muls" 4
++  (and (eq_attr "type" "fmuls")
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2, gw_e3_p2, gw_e4_p2")
++
++(define_insn_reservation "nds_gw_fpu_muld" 4
++  (and (eq_attr "type" "fmuld")
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2*2, gw_e3_p2, gw_e4_p2")
++
++(define_insn_reservation "nds_gw_fpu_macs" 4
++  (and (eq_attr "type" "fmacs")
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2*3, gw_e3_p2, gw_e4_p2")
++
++(define_insn_reservation "nds_gw_fpu_macd" 4
++  (and (eq_attr "type" "fmacd")
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2*4, gw_e3_p2, gw_e4_p2")
++
++(define_insn_reservation "nds_gw_fpu_divs" 4
++  (and (ior (eq_attr "type" "fdivs")
++	    (eq_attr "type" "fsqrts"))
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2*14, gw_e3_p2, gw_e4_p2")
++
++(define_insn_reservation "nds_gw_fpu_divd" 4
++  (and (ior (eq_attr "type" "fdivd")
++	    (eq_attr "type" "fsqrtd"))
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2*28, gw_e3_p2, gw_e4_p2")
++
++(define_insn_reservation "nds_gw_fpu_fast_alu" 2
++  (and (ior (eq_attr "type" "fcmp")
++	    (ior (eq_attr "type" "fabs")
++		 (ior (eq_attr "type" "fcpy")
++		      (eq_attr "type" "fcmov"))))
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2, gw_e3_p2, gw_e4_p2")
++
++(define_insn_reservation "nds_gw_fpu_fmtsr" 1
++  (and (eq_attr "type" "fmtsr")
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2, gw_e3_p2, gw_e4_p2")
++
++(define_insn_reservation "nds_gw_fpu_fmtdr" 1
++  (and (eq_attr "type" "fmtdr")
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii, gw_ii+gw_iq_p2, gw_iq_p2+gw_rf_p2, gw_rf_p2+gw_e1_p2, gw_e1_p2+gw_e2_p2, gw_e2_p2+gw_e3_p2, gw_e3_p2+gw_e4_p2, gw_e4_p2")
++
++(define_insn_reservation "nds_gw_fpu_fmfsr" 1
++  (and (eq_attr "type" "fmfsr")
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2, gw_e3_p2, gw_e4_p2")
++
++(define_insn_reservation "nds_gw_fpu_fmfdr" 1
++  (and (eq_attr "type" "fmfdr")
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii, gw_ii+gw_iq_p2, gw_iq_p2+gw_rf_p2, gw_rf_p2+gw_e1_p2, gw_e1_p2+gw_e2_p2, gw_e2_p2+gw_e3_p2, gw_e3_p2+gw_e4_p2, gw_e4_p2")
++
++(define_insn_reservation "nds_gw_fpu_load" 3
++  (and (eq_attr "type" "fload")
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2, gw_e3_p2, gw_e4_p2")
++
++(define_insn_reservation "nds_gw_fpu_store" 1
++  (and (eq_attr "type" "fstore")
++       (eq_attr "pipeline_model" "graywolf"))
++  "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2, gw_e3_p2, gw_e4_p2")
++
++;; FPU_ADDR_OUT -> FPU_ADDR_IN
++;; Main pipeline rules don't need this because those default latency is 1.
++(define_bypass 1
++  "nds_gw_fpu_load, nds_gw_fpu_store"
++  "nds_gw_fpu_load, nds_gw_fpu_store"
++  "nds32_gw_ex_to_ex_p"
++)
++
++;; LD, MUL, MAC, DIV, DALU64, DMUL, DMAC, DALUROUND, DBPICK, DWEXT
++;;   -> ALU, ALU_SHIFT_Rb, PBSAD, PBSADA_RaRb, MOVD44, MUL, MAC_RaRb, DIV, ADDR_IN, BR, MMU,
++;;      DALU, DALUROUND, DMUL, DMAC_RaRb, DPACK, DINSB, DCMP, DCLIP, WEXT_O, BPICK_RaRb
++(define_bypass 2
++  "nds_gw_load, nds_gw_load_2w,\
++   nds_gw_mul_fast1, nds_gw_mul_fast2, nds_gw_mul_slow,\
++   nds_gw_mac_fast1, nds_gw_mac_fast2, nds_gw_mac_slow,\
++   nds_gw_div, nds_gw_div_2w,\
++   nds_gw_dsp_alu64, nds_gw_dsp_mul, nds_gw_dsp_mac,\
++   nds_gw_dsp_alu_round, nds_gw_dsp_bpick, nds_gw_dsp_wext"
++  "nds_gw_alu, nds_gw_movd44, nds_gw_alu_shift,\
++   nds_gw_pbsad, nds_gw_pbsada,\
++   nds_gw_mul_fast1, nds_gw_mul_fast2, nds_gw_mul_slow,\
++   nds_gw_mac_fast1, nds_gw_mac_fast2, nds_gw_mac_slow,\
++   nds_gw_branch,\
++   nds_gw_div, nds_gw_div_2w,\
++   nds_gw_load, nds_gw_load_2w, nds_gw_store, nds_gw_store_3r,\
++   nds_gw_load_multiple_1,nds_gw_load_multiple_2, nds_gw_load_multiple_3,\
++   nds_gw_load_multiple_4,nds_gw_load_multiple_5, nds_gw_load_multiple_6,\
++   nds_gw_load_multiple_7,nds_gw_load_multiple_8, nds_gw_load_multiple_12,\
++   nds_gw_store_multiple_1,nds_gw_store_multiple_2, nds_gw_store_multiple_3,\
++   nds_gw_store_multiple_4,nds_gw_store_multiple_5, nds_gw_store_multiple_6,\
++   nds_gw_store_multiple_7,nds_gw_store_multiple_8, nds_gw_store_multiple_12,\
++   nds_gw_mmu,\
++   nds_gw_dsp_alu, nds_gw_dsp_alu_round,\
++   nds_gw_dsp_mul, nds_gw_dsp_mac, nds_gw_dsp_pack,\
++   nds_gw_dsp_insb, nds_gw_dsp_cmp, nds_gw_dsp_clip,\
++   nds_gw_dsp_wext, nds_gw_dsp_bpick"
++  "nds32_gw_mm_to_ex_p"
++)
++
++;; LMW(N, N)
++;;   -> ALU, ALU_SHIFT_Rb, PBSAD, PBSADA_RaRb, MOVD44, MUL, MAC_RaRb, DIV, ADDR_IN, BR, MMU
++;;      DALU, DALUROUND, DMUL, DMAC_RaRb, DPACK, DINSB, DCMP, DCLIP, WEXT_O, BPICK_RaRb
++(define_bypass 2
++  "nds_gw_load_multiple_1,nds_gw_load_multiple_2, nds_gw_load_multiple_3,\
++   nds_gw_load_multiple_4,nds_gw_load_multiple_5, nds_gw_load_multiple_6,\
++   nds_gw_load_multiple_7,nds_gw_load_multiple_8, nds_gw_load_multiple_12"
++  "nds_gw_alu, nds_gw_movd44, nds_gw_alu_shift,\
++   nds_gw_pbsad, nds_gw_pbsada,\
++   nds_gw_mul_fast1, nds_gw_mul_fast2, nds_gw_mul_slow,\
++   nds_gw_mac_fast1, nds_gw_mac_fast2, nds_gw_mac_slow,\
++   nds_gw_branch,\
++   nds_gw_div, nds_gw_div_2w,\
++   nds_gw_load, nds_gw_load_2w, nds_gw_store, nds_gw_store_3r,\
++   nds_gw_load_multiple_1,nds_gw_load_multiple_2, nds_gw_load_multiple_3,\
++   nds_gw_load_multiple_4,nds_gw_load_multiple_5, nds_gw_load_multiple_6,\
++   nds_gw_load_multiple_7,nds_gw_load_multiple_8, nds_gw_load_multiple_12,\
++   nds_gw_store_multiple_1,nds_gw_store_multiple_2, nds_gw_store_multiple_3,\
++   nds_gw_store_multiple_4,nds_gw_store_multiple_5, nds_gw_store_multiple_6,\
++   nds_gw_store_multiple_7,nds_gw_store_multiple_8, nds_gw_store_multiple_12,\
++   nds_gw_mmu,\
++   nds_gw_dsp_alu, nds_gw_dsp_alu_round,\
++   nds_gw_dsp_mul, nds_gw_dsp_mac, nds_gw_dsp_pack,\
++   nds_gw_dsp_insb, nds_gw_dsp_cmp, nds_gw_dsp_clip,\
++   nds_gw_dsp_wext, nds_gw_dsp_bpick"
++  "nds32_gw_last_load_to_ex_p"
++)
+diff --git a/gcc/config/nds32/nds32-intrinsic.c b/gcc/config/nds32/nds32-intrinsic.c
+index fabf262..7547fb1 100644
+--- a/gcc/config/nds32/nds32-intrinsic.c
++++ b/gcc/config/nds32/nds32-intrinsic.c
+@@ -24,210 +24,1867 @@
+ #include "system.h"
+ #include "coretypes.h"
+ #include "backend.h"
+-#include "target.h"
+-#include "rtl.h"
+ #include "tree.h"
+-#include "optabs.h"		/* For GEN_FCN.  */
+-#include "diagnostic-core.h"
++#include "rtl.h"
++#include "df.h"
++#include "alias.h"
+ #include "stor-layout.h"
++#include "varasm.h"
++#include "calls.h"
++#include "regs.h"
++#include "insn-config.h"	/* Required by recog.h.  */
++#include "conditions.h"
++#include "output.h"
++#include "insn-attr.h"		/* For DFA state_t.  */
++#include "insn-codes.h"		/* For CODE_FOR_xxx.  */
++#include "reload.h"		/* For push_reload().  */
++#include "flags.h"
++#include "insn-config.h"
++#include "expmed.h"
++#include "dojump.h"
++#include "explow.h"
++#include "emit-rtl.h"
++#include "stmt.h"
+ #include "expr.h"
++#include "recog.h"
++#include "diagnostic-core.h"
++#include "cfgrtl.h"
++#include "cfganal.h"
++#include "lcm.h"
++#include "cfgbuild.h"
++#include "cfgcleanup.h"
++#include "tm_p.h"
++#include "tm-constrs.h"
++#include "optabs.h"		/* For GEN_FCN.  */
++#include "target.h"
+ #include "langhooks.h"		/* For add_builtin_function().  */
++#include "builtins.h"
+ 
+ /* ------------------------------------------------------------------------ */
+ 
+-/* Function to expand builtin function for
+-   '[(unspec_volatile [(reg)])]'.  */
++/* Read the requested argument from the EXP given by INDEX.
++   Return the value as an rtx.  */
++static rtx
++nds32_read_argument (tree exp, unsigned int index)
++{
++  return expand_normal (CALL_EXPR_ARG (exp, index));
++}
++
++/* Return a legitimate rtx for instruction ICODE's return value.  Use TARGET
++   if it's not null, has the right mode, and satisfies operand 0's
++   predicate.  */
++static rtx
++nds32_legitimize_target (enum insn_code icode, rtx target)
++{
++  enum machine_mode mode = insn_data[icode].operand[0].mode;
++
++  if (! target
++      || GET_MODE (target) != mode
++      || ! (*insn_data[icode].operand[0].predicate) (target, mode))
++    return gen_reg_rtx (mode);
++  else
++    return target;
++}
++
++/* Given that ARG is being passed as operand OPNUM to instruction ICODE,
++   check whether ARG satisfies the operand's constraints.  If it doesn't,
++   copy ARG to a temporary register and return that.  Otherwise return ARG
++   itself.  */
+ static rtx
+-nds32_expand_builtin_null_ftype_reg (enum insn_code icode,
+-				     tree exp, rtx target)
++nds32_legitimize_argument (enum insn_code icode, int opnum, rtx arg)
++{
++  enum machine_mode mode = insn_data[icode].operand[opnum].mode;
++
++  if ((*insn_data[icode].operand[opnum].predicate) (arg, mode))
++    return arg;
++  else if (VECTOR_MODE_P (mode) && CONST_INT_P (arg))
++    {
++      /* Handle CONST_INT covert to CONST_VECTOR.  */
++      int nunits = GET_MODE_NUNITS (mode);
++      int i, shift = 0;
++      rtvec v = rtvec_alloc (nunits);
++      int val = INTVAL (arg);
++      enum machine_mode val_mode = (mode == V4QImode) ? QImode : HImode;
++      int shift_acc = (val_mode == QImode) ? 8 : 16;
++      int mask = (val_mode == QImode) ? 0xff : 0xffff;
++      int tmp_val = val;
++
++      if (TARGET_BIG_ENDIAN)
++	for (i = 0; i < nunits; i++)
++	  {
++	    tmp_val = (val >> shift) & mask;
++	    RTVEC_ELT (v, nunits - i - 1) = gen_int_mode (tmp_val, val_mode);
++	    shift += shift_acc;
++	  }
++      else
++	for (i = 0; i < nunits; i++)
++	  {
++	    tmp_val = (val >> shift) & mask;
++	    RTVEC_ELT (v, i) = gen_int_mode (tmp_val, val_mode);
++	    shift += shift_acc;
++	  }
++
++      return copy_to_mode_reg (mode, gen_rtx_CONST_VECTOR (mode, v));
++    }
++  else
++    {
++      rtx tmp_rtx = gen_reg_rtx (mode);
++      convert_move (tmp_rtx, arg, false);
++      return tmp_rtx;
++    }
++}
++
++/* Return true if OPVAL can be used for operand OPNUM of instruction ICODE.
++   The instruction should require a constant operand of some sort.  The
++   function prints an error if OPVAL is not valid.  */
++static int
++nds32_check_constant_argument (enum insn_code icode, int opnum, rtx opval,
++			       const char *name)
+ {
+-  /* Mapping:
+-       ops[0] <--> value0 <--> arg0 */
+-  struct expand_operand ops[1];
+-  tree arg0;
+-  rtx value0;
++  if (GET_CODE (opval) != CONST_INT)
++    {
++      error ("invalid argument to built-in function %s", name);
++      return false;
++    }
++  if (! (*insn_data[icode].operand[opnum].predicate) (opval, VOIDmode))
++    {
++      error ("constant argument out of range for %s", name);
++
++      return false;
++    }
++  return true;
++}
+ 
+-  /* Grab the incoming arguments and extract its rtx.  */
+-  arg0 = CALL_EXPR_ARG (exp, 0);
+-  value0 = expand_normal (arg0);
++/* Expand builtins that return target.  */
++static rtx
++nds32_expand_noarg_builtin (enum insn_code icode, rtx target)
++{
++  rtx pat;
+ 
+-  /* Create operands.  */
+-  create_input_operand (&ops[0], value0, TYPE_MODE (TREE_TYPE (arg0)));
++  target = nds32_legitimize_target (icode, target);
+ 
+-  /* Emit new instruction.  */
+-  if (!maybe_expand_insn (icode, 1, ops))
+-    error ("invalid argument to built-in function");
++  /* Emit and return the new instruction. */
++  pat = GEN_FCN (icode) (target);
++  if (! pat)
++    return NULL_RTX;
+ 
++  emit_insn (pat);
+   return target;
+ }
+ 
+-/* Function to expand builtin function for
+-   '[(set (reg) (unspec_volatile [(imm)]))]'.  */
++/* Expand builtins that take one operand.  */
+ static rtx
+-nds32_expand_builtin_reg_ftype_imm (enum insn_code icode,
+-				    tree exp, rtx target)
++nds32_expand_unop_builtin (enum insn_code icode, tree exp, rtx target,
++			   bool return_p)
+ {
+-  /* Mapping:
+-       ops[0] <--> target <--> exp
+-       ops[1] <--> value0 <--> arg0 */
+-  struct expand_operand ops[2];
+-  tree arg0;
+-  rtx value0;
++  rtx pat;
++  rtx op0 = nds32_read_argument (exp, 0);
++  int op0_num = return_p ? 1 : 0;
++
++  if (return_p)
++    target = nds32_legitimize_target (icode, target);
+ 
+-  /* Grab the incoming arguments and extract its rtx.  */
+-  arg0 = CALL_EXPR_ARG (exp, 0);
+-  value0 = expand_normal (arg0);
++  op0 = nds32_legitimize_argument (icode, op0_num, op0);
+ 
+-  /* Create operands.  */
+-  create_output_operand (&ops[0], target, TYPE_MODE (TREE_TYPE (exp)));
+-  create_input_operand (&ops[1], value0, TYPE_MODE (TREE_TYPE (arg0)));
++  /* Emit and return the new instruction. */
++  if (return_p)
++    pat = GEN_FCN (icode) (target, op0);
++  else
++    pat = GEN_FCN (icode) (op0);
+ 
+-  /* Emit new instruction.  */
+-  if (!maybe_expand_insn (icode, 2, ops))
+-    error ("invalid argument to built-in function");
++  if (! pat)
++    return NULL_RTX;
+ 
++  emit_insn (pat);
+   return target;
+ }
+ 
+-/* Function to expand builtin function for
+-   '[(unspec_volatile [(reg) (imm)])]' pattern.  */
++/* Expand builtins that take one operands and the first is immediate.  */
+ static rtx
+-nds32_expand_builtin_null_ftype_reg_imm (enum insn_code icode,
+-					 tree exp, rtx target)
+-{
+-  /* Mapping:
+-       ops[0] <--> value0 <--> arg0
+-       ops[1] <--> value1 <--> arg1 */
+-  struct expand_operand ops[2];
+-  tree arg0, arg1;
+-  rtx value0, value1;
+-
+-  /* Grab the incoming arguments and extract its rtx.  */
+-  arg0 = CALL_EXPR_ARG (exp, 0);
+-  arg1 = CALL_EXPR_ARG (exp, 1);
+-  value0 = expand_normal (arg0);
+-  value1 = expand_normal (arg1);
+-
+-  /* Create operands.  */
+-  create_input_operand (&ops[0], value0, TYPE_MODE (TREE_TYPE (arg0)));
+-  create_input_operand (&ops[1], value1, TYPE_MODE (TREE_TYPE (arg1)));
+-
+-  /* Emit new instruction.  */
+-  if (!maybe_expand_insn (icode, 2, ops))
+-    error ("invalid argument to built-in function");
++nds32_expand_unopimm_builtin (enum insn_code icode, tree exp, rtx target,
++			      bool return_p, const char *name)
++{
++  rtx pat;
++  rtx op0 = nds32_read_argument (exp, 0);
++  int op0_num = return_p ? 1 : 0;
++
++  if (return_p)
++    target = nds32_legitimize_target (icode, target);
++
++  if (!nds32_check_constant_argument (icode, op0_num, op0, name))
++    return NULL_RTX;
++
++  op0 = nds32_legitimize_argument (icode, op0_num, op0);
+ 
++  /* Emit and return the new instruction. */
++  if (return_p)
++    pat = GEN_FCN (icode) (target, op0);
++  else
++    pat = GEN_FCN (icode) (op0);
++
++  if (! pat)
++    return NULL_RTX;
++
++  emit_insn (pat);
+   return target;
+ }
+ 
+-/* ------------------------------------------------------------------------ */
++/* Expand builtins that take two operands.  */
++static rtx
++nds32_expand_binop_builtin (enum insn_code icode, tree exp, rtx target,
++			    bool return_p)
++{
++  rtx pat;
++  rtx op0 = nds32_read_argument (exp, 0);
++  rtx op1 = nds32_read_argument (exp, 1);
++  int op0_num = return_p ? 1 : 0;
++  int op1_num = return_p ? 2 : 1;
+ 
+-void
+-nds32_init_builtins_impl (void)
++  if (return_p)
++    target = nds32_legitimize_target (icode, target);
++
++  op0 = nds32_legitimize_argument (icode, op0_num, op0);
++  op1 = nds32_legitimize_argument (icode, op1_num, op1);
++
++  /* Emit and return the new instruction. */
++  if (return_p)
++    pat = GEN_FCN (icode) (target, op0, op1);
++  else
++    pat = GEN_FCN (icode) (op0, op1);
++
++  if (! pat)
++    return NULL_RTX;
++
++  emit_insn (pat);
++  return target;
++}
++
++/* Expand builtins that take two operands and the second is immediate.  */
++static rtx
++nds32_expand_binopimm_builtin (enum insn_code icode, tree exp, rtx target,
++			       bool return_p, const char *name)
+ {
+-  tree pointer_type_node  = build_pointer_type (integer_type_node);
++  rtx pat;
++  rtx op0 = nds32_read_argument (exp, 0);
++  rtx op1 = nds32_read_argument (exp, 1);
++  int op0_num = return_p ? 1 : 0;
++  int op1_num = return_p ? 2 : 1;
+ 
+-  tree void_ftype_void    = build_function_type (void_type_node,
+-						 void_list_node);
++  if (return_p)
++    target = nds32_legitimize_target (icode, target);
+ 
+-  tree void_ftype_pint    = build_function_type_list (void_type_node,
+-						      pointer_type_node,
+-						      NULL_TREE);
++  if (!nds32_check_constant_argument (icode, op1_num, op1, name))
++    return NULL_RTX;
+ 
+-  tree int_ftype_int      = build_function_type_list (integer_type_node,
+-						      integer_type_node,
+-						      NULL_TREE);
++  op0 = nds32_legitimize_argument (icode, op0_num, op0);
++  op1 = nds32_legitimize_argument (icode, op1_num, op1);
+ 
+-  tree void_ftype_int_int = build_function_type_list (void_type_node,
+-						      integer_type_node,
+-						      integer_type_node,
+-						      NULL_TREE);
++  /* Emit and return the new instruction. */
++  if (return_p)
++    pat = GEN_FCN (icode) (target, op0, op1);
++  else
++    pat = GEN_FCN (icode) (op0, op1);
+ 
+-  /* Cache.  */
+-  add_builtin_function ("__builtin_nds32_isync",  void_ftype_pint,
+-			NDS32_BUILTIN_ISYNC,
+-			BUILT_IN_MD, NULL, NULL_TREE);
+-  add_builtin_function ("__builtin_nds32_isb",  void_ftype_void,
+-			NDS32_BUILTIN_ISB,
+-			BUILT_IN_MD, NULL, NULL_TREE);
++  if (! pat)
++    return NULL_RTX;
+ 
+-  /* Register Transfer.  */
+-  add_builtin_function ("__builtin_nds32_mfsr",  int_ftype_int,
+-			NDS32_BUILTIN_MFSR,
+-			BUILT_IN_MD, NULL, NULL_TREE);
+-  add_builtin_function ("__builtin_nds32_mfusr", int_ftype_int,
+-			NDS32_BUILTIN_MFUSR,
+-			BUILT_IN_MD, NULL, NULL_TREE);
+-  add_builtin_function ("__builtin_nds32_mtsr",  void_ftype_int_int,
+-			NDS32_BUILTIN_MTSR,
+-			BUILT_IN_MD, NULL, NULL_TREE);
+-  add_builtin_function ("__builtin_nds32_mtusr", void_ftype_int_int,
+-			NDS32_BUILTIN_MTUSR,
+-			BUILT_IN_MD, NULL, NULL_TREE);
++  emit_insn (pat);
++  return target;
++}
+ 
+-  /* Interrupt.  */
+-  add_builtin_function ("__builtin_nds32_setgie_en",  void_ftype_void,
+-			NDS32_BUILTIN_SETGIE_EN,
+-			BUILT_IN_MD, NULL, NULL_TREE);
+-  add_builtin_function ("__builtin_nds32_setgie_dis", void_ftype_void,
+-			NDS32_BUILTIN_SETGIE_DIS,
+-			BUILT_IN_MD, NULL, NULL_TREE);
++/* Expand builtins that take three operands.  */
++static rtx
++nds32_expand_triop_builtin (enum insn_code icode, tree exp, rtx target,
++			    bool return_p)
++{
++  rtx pat;
++  rtx op0 = nds32_read_argument (exp, 0);
++  rtx op1 = nds32_read_argument (exp, 1);
++  rtx op2 = nds32_read_argument (exp, 2);
++  int op0_num = return_p ? 1 : 0;
++  int op1_num = return_p ? 2 : 1;
++  int op2_num = return_p ? 3 : 2;
++
++  if (return_p)
++    target = nds32_legitimize_target (icode, target);
++
++  op0 = nds32_legitimize_argument (icode, op0_num, op0);
++  op1 = nds32_legitimize_argument (icode, op1_num, op1);
++  op2 = nds32_legitimize_argument (icode, op2_num, op2);
++
++  /* Emit and return the new instruction. */
++  if (return_p)
++    pat = GEN_FCN (icode) (target, op0, op1, op2);
++  else
++    pat = GEN_FCN (icode) (op0, op1, op2);
++
++  if (! pat)
++    return NULL_RTX;
++
++  emit_insn (pat);
++  return target;
++}
++
++/* Expand builtins that take three operands and the third is immediate.  */
++static rtx
++nds32_expand_triopimm_builtin (enum insn_code icode, tree exp, rtx target,
++			       bool return_p, const char *name)
++{
++  rtx pat;
++  rtx op0 = nds32_read_argument (exp, 0);
++  rtx op1 = nds32_read_argument (exp, 1);
++  rtx op2 = nds32_read_argument (exp, 2);
++  int op0_num = return_p ? 1 : 0;
++  int op1_num = return_p ? 2 : 1;
++  int op2_num = return_p ? 3 : 2;
++
++  if (return_p)
++    target = nds32_legitimize_target (icode, target);
++
++  if (!nds32_check_constant_argument (icode, op2_num, op2, name))
++    return NULL_RTX;
++
++  op0 = nds32_legitimize_argument (icode, op0_num, op0);
++  op1 = nds32_legitimize_argument (icode, op1_num, op1);
++  op2 = nds32_legitimize_argument (icode, op2_num, op2);
++
++  /* Emit and return the new instruction. */
++  if (return_p)
++    pat = GEN_FCN (icode) (target, op0, op1, op2);
++  else
++    pat = GEN_FCN (icode) (op0, op1, op2);
++
++  if (! pat)
++    return NULL_RTX;
++
++  emit_insn (pat);
++  return target;
++}
++
++/* Expand builtins for load.  */
++static rtx
++nds32_expand_builtin_load (enum insn_code icode, tree exp, rtx target)
++{
++  /* Load address format is [$ra + $rb],
++     but input arguments not enough,
++     so we need another temp register as $rb.
++     Generating assembly code:
++       movi $temp, 0
++       llw  $rt, [$ra + $temp] */
++  rtx pat;
++  rtx op0 = nds32_read_argument (exp, 0);
++  rtx addr_helper = gen_reg_rtx (insn_data[icode].operand[1].mode);
++
++  target = nds32_legitimize_target (icode, target);
++  op0 = nds32_legitimize_argument (icode, 1, op0);
++
++  /* Emit and return the new instruction. */
++  pat = GEN_FCN (icode) (target, op0, addr_helper);
++  if (!pat)
++    return NULL_RTX;
++
++  emit_move_insn (addr_helper, GEN_INT (0));
++  emit_insn (pat);
++  return target;
++}
++
++/* Expand builtins for store.  */
++static rtx
++nds32_expand_builtin_store (enum insn_code icode, tree exp, rtx target)
++{
++  /* Store address format is [$ra + $rb],
++     but input arguments not enough,
++     so we need another temp register as $rb.
++     Generating assembly code:
++       movi $temp, 0
++       store  $rt, [$ra + $temp] */
++  rtx pat;
++  rtx op0 = nds32_read_argument (exp, 0);
++  rtx op1 = nds32_read_argument (exp, 1);
++  rtx addr_helper = gen_reg_rtx (insn_data[icode].operand[1].mode);
++
++  op0 = nds32_legitimize_argument (icode, 0, op0);
++  op1 = nds32_legitimize_argument (icode, 2, op1);
++
++  /* Emit and return the new instruction. */
++  pat = GEN_FCN (icode) (op0, addr_helper, op1);
++  if (! pat)
++    return NULL_RTX;
++
++  emit_move_insn (addr_helper, GEN_INT (0));
++  emit_insn (pat);
++  return target;
++}
++
++/* Expand cctl builtins.  */
++static rtx
++nds32_expand_cctl_builtin (enum insn_code icode, tree exp, rtx target,
++			   bool return_p, const char *name)
++{
++  rtx pat;
++  rtx op0 = nds32_read_argument (exp, 0);
++  rtx op1 = nds32_read_argument (exp, 1);
++  int op0_num = return_p ? 1 : 0;
++  int op1_num = return_p ? 2 : 1;
++
++  if (return_p)
++    target = nds32_legitimize_target (icode, target);
++
++  if (!nds32_check_constant_argument (icode, op0_num, op0, name))
++    return NULL_RTX;
++
++  op0 = nds32_legitimize_argument (icode, op0_num, op0);
++  op1 = nds32_legitimize_argument (icode, op1_num, op1);
++
++  /* Emit and return the new instruction. */
++  if (icode == CODE_FOR_cctl_idx_write)
++    {
++      /* cctl_idx_write is three argument,
++	 so create operand2 for cctl_idx_write pattern.  */
++      rtx op2 = nds32_read_argument (exp, 2);
++      op2 = nds32_legitimize_argument (icode, 2, op2);
++      pat = GEN_FCN (icode) (op0, op1, op2);
++    }
++  else if (return_p)
++    pat = GEN_FCN (icode) (target, op0, op1);
++  else
++    pat = GEN_FCN (icode) (op0, op1);
++
++  if (! pat)
++    return NULL_RTX;
++
++  emit_insn (pat);
++  return target;
++}
++
++/* Expand scw builtins.  */
++static rtx
++nds32_expand_scw_builtin (enum insn_code icode, tree exp, rtx target)
++{
++  /* SCW address format is [$ra + $rb], but input arguments not enough,
++     so we need another temp register as $rb.
++     Generating assembly code:
++	movi $temp, 0
++	scw  $rt, [$ra + $temp] */
++  rtx pat;
++  rtx op0 = nds32_read_argument (exp, 0);
++  rtx op1 = nds32_read_argument (exp, 1);
++  rtx addr_helper = gen_reg_rtx (insn_data[icode].operand[1].mode);
++
++  target = nds32_legitimize_target (icode, target);
++  op0 = nds32_legitimize_argument (icode, 1, op0);
++  op1 = nds32_legitimize_argument (icode, 2, op1);
++
++  /* Emit and return the new instruction. */
++  pat = GEN_FCN (icode) (target, op0, addr_helper, target);
++
++  if (!pat)
++    return NULL_RTX;
++
++  emit_move_insn (addr_helper, GEN_INT (0));
++  emit_move_insn (target, op1);
++  emit_insn (pat);
++  return target;
+ }
+ 
++/* Expand set int priority builtins. */
++static rtx
++nds32_expand_priority_builtin (enum insn_code icode, tree exp, rtx target,
++			       const char *name)
++{
++  rtx pat;
++  rtx op0 = nds32_read_argument (exp, 0);
++  rtx op1 = nds32_read_argument (exp, 1);
++
++  /* set_int_priority intrinsic function that two arguments are immediate,
++     so check whether auguments are immedite.  */
++
++  if (!nds32_check_constant_argument (icode, 0, op0, name))
++    return NULL_RTX;
++
++  if (!nds32_check_constant_argument (icode, 1, op1, name))
++    return NULL_RTX;
++
++  op0 = nds32_legitimize_argument (icode, 0, op0);
++  op1 = nds32_legitimize_argument (icode, 1, op1);
++
++  /* Emit and return the new instruction. */
++  pat = GEN_FCN (icode) (op0, op1);
++
++  if (! pat)
++    return NULL_RTX;
++
++  emit_insn (pat);
++  return target;
++}
++
++struct builtin_description
++{
++  const enum insn_code icode;
++  const char *name;
++  enum nds32_builtins code;
++  bool return_p;
++};
++
++#define NDS32_BUILTIN(code, string, builtin) \
++  { CODE_FOR_##code, "__nds32__" string, \
++    NDS32_BUILTIN_##builtin, true },
++
++#define NDS32_NO_TARGET_BUILTIN(code, string, builtin) \
++  { CODE_FOR_##code, "__nds32__" string, \
++    NDS32_BUILTIN_##builtin, false },
++
++/* Intrinsics that no argument, and that return value.  */
++static struct builtin_description bdesc_noarg[] =
++{
++  NDS32_BUILTIN(unspec_fmfcfg, "fmfcfg", FMFCFG)
++  NDS32_BUILTIN(unspec_fmfcsr, "fmfcsr", FMFCSR)
++  NDS32_BUILTIN(unspec_volatile_rdov, "rdov", RDOV)
++  NDS32_BUILTIN(unspec_get_current_sp, "get_current_sp", GET_CURRENT_SP)
++  NDS32_BUILTIN(unspec_return_address, "return_address", RETURN_ADDRESS)
++  NDS32_BUILTIN(unspec_get_all_pending_int, "get_all_pending_int",
++		GET_ALL_PENDING_INT)
++  NDS32_BUILTIN(unspec_unaligned_feature, "unaligned_feature",
++		UNALIGNED_FEATURE)
++  NDS32_NO_TARGET_BUILTIN(unspec_enable_unaligned, "enable_unaligned",
++			  ENABLE_UNALIGNED)
++  NDS32_NO_TARGET_BUILTIN(unspec_disable_unaligned, "disable_unaligned",
++			  DISABLE_UNALIGNED)
++};
++
++/* Intrinsics that take just one argument.  */
++static struct builtin_description bdesc_1arg[] =
++{
++  NDS32_BUILTIN(unspec_ssabssi2, "abs", ABS)
++  NDS32_BUILTIN(clzsi2, "clz", CLZ)
++  NDS32_BUILTIN(unspec_clo, "clo", CLO)
++  NDS32_BUILTIN(unspec_wsbh, "wsbh", WSBH)
++  NDS32_BUILTIN(unspec_tlbop_pb, "tlbop_pb",TLBOP_PB)
++  NDS32_BUILTIN(unaligned_load_hw, "unaligned_load_hw", UALOAD_HW)
++  NDS32_BUILTIN(unaligned_loadsi, "unaligned_load_w", UALOAD_W)
++  NDS32_BUILTIN(unaligned_loaddi, "unaligned_load_dw", UALOAD_DW)
++  NDS32_NO_TARGET_BUILTIN(unspec_volatile_isync, "isync", ISYNC)
++  NDS32_NO_TARGET_BUILTIN(unspec_fmtcsr, "fmtcsr", FMTCSR)
++  NDS32_NO_TARGET_BUILTIN(unspec_jr_itoff, "jr_itoff", JR_ITOFF)
++  NDS32_NO_TARGET_BUILTIN(unspec_jr_toff, "jr_toff", JR_TOFF)
++  NDS32_NO_TARGET_BUILTIN(unspec_jral_ton, "jral_ton", JRAL_TON)
++  NDS32_NO_TARGET_BUILTIN(unspec_ret_toff, "ret_toff", RET_TOFF)
++  NDS32_NO_TARGET_BUILTIN(unspec_jral_iton, "jral_iton",JRAL_ITON)
++  NDS32_NO_TARGET_BUILTIN(unspec_tlbop_trd, "tlbop_trd", TLBOP_TRD)
++  NDS32_NO_TARGET_BUILTIN(unspec_tlbop_twr, "tlbop_twr", TLBOP_TWR)
++  NDS32_NO_TARGET_BUILTIN(unspec_tlbop_rwr, "tlbop_rwr", TLBOP_RWR)
++  NDS32_NO_TARGET_BUILTIN(unspec_tlbop_rwlk, "tlbop_rwlk", TLBOP_RWLK)
++  NDS32_NO_TARGET_BUILTIN(unspec_tlbop_unlk, "tlbop_unlk", TLBOP_UNLK)
++  NDS32_NO_TARGET_BUILTIN(unspec_tlbop_inv, "tlbop_inv", TLBOP_INV)
++  NDS32_NO_TARGET_BUILTIN(unspec_ret_itoff, "ret_itoff", RET_ITOFF)
++  NDS32_NO_TARGET_BUILTIN(unspec_set_current_sp,
++			  "set_current_sp", SET_CURRENT_SP)
++  NDS32_BUILTIN(kabsv2hi2, "kabs16", KABS16)
++  NDS32_BUILTIN(kabsv2hi2, "v_kabs16", V_KABS16)
++  NDS32_BUILTIN(kabsv4qi2, "kabs8", KABS8)
++  NDS32_BUILTIN(kabsv4qi2, "v_kabs8", V_KABS8)
++  NDS32_BUILTIN(sunpkd810, "sunpkd810", SUNPKD810)
++  NDS32_BUILTIN(sunpkd810, "v_sunpkd810", V_SUNPKD810)
++  NDS32_BUILTIN(sunpkd820, "sunpkd820", SUNPKD820)
++  NDS32_BUILTIN(sunpkd820, "v_sunpkd820", V_SUNPKD820)
++  NDS32_BUILTIN(sunpkd830, "sunpkd830", SUNPKD830)
++  NDS32_BUILTIN(sunpkd830, "v_sunpkd830", V_SUNPKD830)
++  NDS32_BUILTIN(sunpkd831, "sunpkd831", SUNPKD831)
++  NDS32_BUILTIN(sunpkd831, "v_sunpkd831", V_SUNPKD831)
++  NDS32_BUILTIN(zunpkd810, "zunpkd810", ZUNPKD810)
++  NDS32_BUILTIN(zunpkd810, "v_zunpkd810", V_ZUNPKD810)
++  NDS32_BUILTIN(zunpkd820, "zunpkd820", ZUNPKD820)
++  NDS32_BUILTIN(zunpkd820, "v_zunpkd820", V_ZUNPKD820)
++  NDS32_BUILTIN(zunpkd830, "zunpkd830", ZUNPKD830)
++  NDS32_BUILTIN(zunpkd830, "v_zunpkd830", V_ZUNPKD830)
++  NDS32_BUILTIN(zunpkd831, "zunpkd831", ZUNPKD831)
++  NDS32_BUILTIN(zunpkd831, "v_zunpkd831", V_ZUNPKD831)
++  NDS32_BUILTIN(unspec_kabs, "kabs", KABS)
++  NDS32_BUILTIN(unaligned_loadv2hi, "get_unaligned_u16x2", UALOAD_U16)
++  NDS32_BUILTIN(unaligned_loadv2hi, "get_unaligned_s16x2", UALOAD_S16)
++  NDS32_BUILTIN(unaligned_loadv4qi, "get_unaligned_u8x4", UALOAD_U8)
++  NDS32_BUILTIN(unaligned_loadv4qi, "get_unaligned_s8x4", UALOAD_S8)
++};
++
++/* Intrinsics that take just one argument. and the argument is immediate.  */
++static struct builtin_description bdesc_1argimm[] =
++{
++  NDS32_BUILTIN(unspec_volatile_mfsr, "mfsr", MFSR)
++  NDS32_BUILTIN(unspec_volatile_mfusr, "mfsr", MFUSR)
++  NDS32_BUILTIN(unspec_get_pending_int, "get_pending_int", GET_PENDING_INT)
++  NDS32_BUILTIN(unspec_get_int_priority, "get_int_priority", GET_INT_PRIORITY)
++  NDS32_NO_TARGET_BUILTIN(unspec_trap, "trap", TRAP)
++  NDS32_NO_TARGET_BUILTIN(unspec_break, "break", BREAK)
++  NDS32_NO_TARGET_BUILTIN(unspec_syscall, "syscall", SYSCALL)
++  NDS32_NO_TARGET_BUILTIN(unspec_enable_int, "enable_int", ENABLE_INT)
++  NDS32_NO_TARGET_BUILTIN(unspec_disable_int, "disable_int", DISABLE_INT)
++  NDS32_NO_TARGET_BUILTIN(unspec_clr_pending_hwint, "clr_pending_hwint",
++			  CLR_PENDING_HWINT)
++  NDS32_NO_TARGET_BUILTIN(unspec_set_trig_level, "set_trig_level",
++			  SET_TRIG_LEVEL)
++  NDS32_NO_TARGET_BUILTIN(unspec_set_trig_edge, "set_trig_edge",
++			  SET_TRIG_EDGE)
++  NDS32_BUILTIN(unspec_get_trig_type, "get_trig_type", GET_TRIG_TYPE)
++};
++
++/* Intrinsics that take two arguments.  */
++static struct builtin_description bdesc_2arg[] =
++{
++  NDS32_BUILTIN(unspec_fcpynss, "fcpynss", FCPYNSS)
++  NDS32_BUILTIN(unspec_fcpyss, "fcpyss", FCPYSS)
++  NDS32_BUILTIN(unspec_fcpynsd, "fcpynsd", FCPYNSD)
++  NDS32_BUILTIN(unspec_fcpysd, "fcpysd", FCPYSD)
++  NDS32_BUILTIN(unspec_ave, "ave", AVE)
++  NDS32_BUILTIN(unspec_pbsad, "pbsad", PBSAD)
++  NDS32_BUILTIN(unspec_ffb, "ffb", FFB)
++  NDS32_BUILTIN(unspec_ffmism, "ffmsim", FFMISM)
++  NDS32_BUILTIN(unspec_flmism, "flmism", FLMISM)
++  NDS32_BUILTIN(unspec_kaddw, "kaddw", KADDW)
++  NDS32_BUILTIN(unspec_kaddh, "kaddh", KADDH)
++  NDS32_BUILTIN(unspec_ksubw, "ksubw", KSUBW)
++  NDS32_BUILTIN(unspec_ksubh, "ksubh", KSUBH)
++  NDS32_BUILTIN(unspec_kdmbb, "kdmbb", KDMBB)
++  NDS32_BUILTIN(unspec_kdmbb, "v_kdmbb", V_KDMBB)
++  NDS32_BUILTIN(unspec_kdmbt, "kdmbt", KDMBT)
++  NDS32_BUILTIN(unspec_kdmbt, "v_kdmbt", V_KDMBT)
++  NDS32_BUILTIN(unspec_kdmtb, "kdmtb", KDMTB)
++  NDS32_BUILTIN(unspec_kdmtb, "v_kdmtb", V_KDMTB)
++  NDS32_BUILTIN(unspec_kdmtt, "kdmtt", KDMTT)
++  NDS32_BUILTIN(unspec_kdmtt, "v_kdmtt", V_KDMTT)
++  NDS32_BUILTIN(unspec_khmbb, "khmbb", KHMBB)
++  NDS32_BUILTIN(unspec_khmbb, "v_khmbb", V_KHMBB)
++  NDS32_BUILTIN(unspec_khmbt, "khmbt", KHMBT)
++  NDS32_BUILTIN(unspec_khmbt, "v_khmbt", V_KHMBT)
++  NDS32_BUILTIN(unspec_khmtb, "khmtb", KHMTB)
++  NDS32_BUILTIN(unspec_khmtb, "v_khmtb", V_KHMTB)
++  NDS32_BUILTIN(unspec_khmtt, "khmtt", KHMTT)
++  NDS32_BUILTIN(unspec_khmtt, "v_khmtt", V_KHMTT)
++  NDS32_BUILTIN(unspec_kslraw, "kslraw", KSLRAW)
++  NDS32_BUILTIN(unspec_kslrawu, "kslraw_u", KSLRAW_U)
++  NDS32_BUILTIN(rotrsi3, "rotr", ROTR)
++  NDS32_BUILTIN(unspec_sva, "sva", SVA)
++  NDS32_BUILTIN(unspec_svs, "svs", SVS)
++  NDS32_NO_TARGET_BUILTIN(mtsr_isb, "mtsr_isb", MTSR_ISB)
++  NDS32_NO_TARGET_BUILTIN(mtsr_dsb, "mtsr_dsb", MTSR_DSB)
++  NDS32_NO_TARGET_BUILTIN(unspec_volatile_mtsr, "mtsr", MTSR)
++  NDS32_NO_TARGET_BUILTIN(unspec_volatile_mtusr, "mtusr", MTUSR)
++  NDS32_NO_TARGET_BUILTIN(unaligned_store_hw, "unaligned_store_hw", UASTORE_HW)
++  NDS32_NO_TARGET_BUILTIN(unaligned_storesi, "unaligned_store_hw", UASTORE_W)
++  NDS32_NO_TARGET_BUILTIN(unaligned_storedi, "unaligned_store_hw", UASTORE_DW)
++  NDS32_BUILTIN(addv2hi3, "add16", ADD16)
++  NDS32_BUILTIN(addv2hi3, "v_uadd16", V_UADD16)
++  NDS32_BUILTIN(addv2hi3, "v_sadd16", V_SADD16)
++  NDS32_BUILTIN(raddv2hi3, "radd16", RADD16)
++  NDS32_BUILTIN(raddv2hi3, "v_radd16", V_RADD16)
++  NDS32_BUILTIN(uraddv2hi3, "uradd16", URADD16)
++  NDS32_BUILTIN(uraddv2hi3, "v_uradd16", V_URADD16)
++  NDS32_BUILTIN(kaddv2hi3, "kadd16", KADD16)
++  NDS32_BUILTIN(kaddv2hi3, "v_kadd16", V_KADD16)
++  NDS32_BUILTIN(ukaddv2hi3, "ukadd16", UKADD16)
++  NDS32_BUILTIN(ukaddv2hi3, "v_ukadd16", V_UKADD16)
++  NDS32_BUILTIN(subv2hi3, "sub16", SUB16)
++  NDS32_BUILTIN(subv2hi3, "v_usub16", V_USUB16)
++  NDS32_BUILTIN(subv2hi3, "v_ssub16", V_SSUB16)
++  NDS32_BUILTIN(rsubv2hi3, "rsub16", RSUB16)
++  NDS32_BUILTIN(rsubv2hi3, "v_rsub16", V_RSUB16)
++  NDS32_BUILTIN(ursubv2hi3, "ursub16", URSUB16)
++  NDS32_BUILTIN(ursubv2hi3, "v_ursub16", V_URSUB16)
++  NDS32_BUILTIN(ksubv2hi3, "ksub16", KSUB16)
++  NDS32_BUILTIN(ksubv2hi3, "v_ksub16", V_KSUB16)
++  NDS32_BUILTIN(uksubv2hi3, "uksub16", UKSUB16)
++  NDS32_BUILTIN(uksubv2hi3, "v_uksub16", V_UKSUB16)
++  NDS32_BUILTIN(cras16_1, "cras16", CRAS16)
++  NDS32_BUILTIN(cras16_1, "v_ucras16", V_UCRAS16)
++  NDS32_BUILTIN(cras16_1, "v_scras16", V_SCRAS16)
++  NDS32_BUILTIN(rcras16_1, "rcras16", RCRAS16)
++  NDS32_BUILTIN(rcras16_1, "v_rcras16", V_RCRAS16)
++  NDS32_BUILTIN(urcras16_1, "urcras16", URCRAS16)
++  NDS32_BUILTIN(urcras16_1, "v_urcras16", V_URCRAS16)
++  NDS32_BUILTIN(kcras16_1, "kcras16", KCRAS16)
++  NDS32_BUILTIN(kcras16_1, "v_kcras16", V_KCRAS16)
++  NDS32_BUILTIN(ukcras16_1, "ukcras16", UKCRAS16)
++  NDS32_BUILTIN(ukcras16_1, "v_ukcras16", V_UKCRAS16)
++  NDS32_BUILTIN(crsa16_1, "crsa16", CRSA16)
++  NDS32_BUILTIN(crsa16_1, "v_ucrsa16", V_UCRSA16)
++  NDS32_BUILTIN(crsa16_1, "v_scrsa16", V_SCRSA16)
++  NDS32_BUILTIN(rcrsa16_1, "rcrsa16", RCRSA16)
++  NDS32_BUILTIN(rcrsa16_1, "v_rcrsa16", V_RCRSA16)
++  NDS32_BUILTIN(urcrsa16_1, "urcrsa16", URCRSA16)
++  NDS32_BUILTIN(urcrsa16_1, "v_urcrsa16", V_URCRSA16)
++  NDS32_BUILTIN(kcrsa16_1, "kcrsa16", KCRSA16)
++  NDS32_BUILTIN(kcrsa16_1, "v_kcrsa16", V_KCRSA16)
++  NDS32_BUILTIN(ukcrsa16_1, "ukcrsa16", UKCRSA16)
++  NDS32_BUILTIN(ukcrsa16_1, "v_ukcrsa16", V_UKCRSA16)
++  NDS32_BUILTIN(addv4qi3, "add8", ADD8)
++  NDS32_BUILTIN(addv4qi3, "v_uadd8", V_UADD8)
++  NDS32_BUILTIN(addv4qi3, "v_sadd8", V_SADD8)
++  NDS32_BUILTIN(raddv4qi3, "radd8", RADD8)
++  NDS32_BUILTIN(raddv4qi3, "v_radd8", V_RADD8)
++  NDS32_BUILTIN(uraddv4qi3, "uradd8", URADD8)
++  NDS32_BUILTIN(uraddv4qi3, "v_uradd8", V_URADD8)
++  NDS32_BUILTIN(kaddv4qi3, "kadd8", KADD8)
++  NDS32_BUILTIN(kaddv4qi3, "v_kadd8", V_KADD8)
++  NDS32_BUILTIN(ukaddv4qi3, "ukadd8", UKADD8)
++  NDS32_BUILTIN(ukaddv4qi3, "v_ukadd8", V_UKADD8)
++  NDS32_BUILTIN(subv4qi3, "sub8", SUB8)
++  NDS32_BUILTIN(subv4qi3, "v_usub8", V_USUB8)
++  NDS32_BUILTIN(subv4qi3, "v_ssub8", V_SSUB8)
++  NDS32_BUILTIN(rsubv4qi3, "rsub8", RSUB8)
++  NDS32_BUILTIN(rsubv4qi3, "v_rsub8", V_RSUB8)
++  NDS32_BUILTIN(ursubv4qi3, "ursub8", URSUB8)
++  NDS32_BUILTIN(ursubv4qi3, "v_ursub8", V_URSUB8)
++  NDS32_BUILTIN(ksubv4qi3, "ksub8", KSUB8)
++  NDS32_BUILTIN(ksubv4qi3, "v_ksub8", V_KSUB8)
++  NDS32_BUILTIN(uksubv4qi3, "uksub8", UKSUB8)
++  NDS32_BUILTIN(uksubv4qi3, "v_uksub8", V_UKSUB8)
++  NDS32_BUILTIN(ashrv2hi3, "sra16", SRA16)
++  NDS32_BUILTIN(ashrv2hi3, "v_sra16", V_SRA16)
++  NDS32_BUILTIN(sra16_round, "sra16_u", SRA16_U)
++  NDS32_BUILTIN(sra16_round, "v_sra16_u", V_SRA16_U)
++  NDS32_BUILTIN(lshrv2hi3, "srl16", SRL16)
++  NDS32_BUILTIN(lshrv2hi3, "v_srl16", V_SRL16)
++  NDS32_BUILTIN(srl16_round, "srl16_u", SRL16_U)
++  NDS32_BUILTIN(srl16_round, "v_srl16_u", V_SRL16_U)
++  NDS32_BUILTIN(ashlv2hi3, "sll16", SLL16)
++  NDS32_BUILTIN(ashlv2hi3, "v_sll16", V_SLL16)
++  NDS32_BUILTIN(kslli16, "ksll16", KSLL16)
++  NDS32_BUILTIN(kslli16, "v_ksll16", V_KSLL16)
++  NDS32_BUILTIN(kslra16, "kslra16", KSLRA16)
++  NDS32_BUILTIN(kslra16, "v_kslra16", V_KSLRA16)
++  NDS32_BUILTIN(kslra16_round, "kslra16_u", KSLRA16_U)
++  NDS32_BUILTIN(kslra16_round, "v_kslra16_u", V_KSLRA16_U)
++  NDS32_BUILTIN(cmpeq16, "cmpeq16", CMPEQ16)
++  NDS32_BUILTIN(cmpeq16, "v_scmpeq16", V_SCMPEQ16)
++  NDS32_BUILTIN(cmpeq16, "v_ucmpeq16", V_UCMPEQ16)
++  NDS32_BUILTIN(scmplt16, "scmplt16", SCMPLT16)
++  NDS32_BUILTIN(scmplt16, "v_scmplt16", V_SCMPLT16)
++  NDS32_BUILTIN(scmple16, "scmple16", SCMPLE16)
++  NDS32_BUILTIN(scmple16, "v_scmple16", V_SCMPLE16)
++  NDS32_BUILTIN(ucmplt16, "ucmplt16", UCMPLT16)
++  NDS32_BUILTIN(ucmplt16, "v_ucmplt16", V_UCMPLT16)
++  NDS32_BUILTIN(ucmplt16, "ucmple16", UCMPLE16)
++  NDS32_BUILTIN(ucmplt16, "v_ucmple16", V_UCMPLE16)
++  NDS32_BUILTIN(cmpeq8, "cmpeq8", CMPEQ8)
++  NDS32_BUILTIN(cmpeq8, "v_scmpeq8", V_SCMPEQ8)
++  NDS32_BUILTIN(cmpeq8, "v_ucmpeq8", V_UCMPEQ8)
++  NDS32_BUILTIN(scmplt8, "scmplt8", SCMPLT8)
++  NDS32_BUILTIN(scmplt8, "v_scmplt8", V_SCMPLT8)
++  NDS32_BUILTIN(scmple8, "scmple8", SCMPLE8)
++  NDS32_BUILTIN(scmple8, "v_scmple8", V_SCMPLE8)
++  NDS32_BUILTIN(ucmplt8, "ucmplt8", UCMPLT8)
++  NDS32_BUILTIN(ucmplt8, "v_ucmplt8", V_UCMPLT8)
++  NDS32_BUILTIN(ucmplt8, "ucmple8", UCMPLE8)
++  NDS32_BUILTIN(ucmplt8, "v_ucmple8", V_UCMPLE8)
++  NDS32_BUILTIN(sminv2hi3, "smin16", SMIN16)
++  NDS32_BUILTIN(sminv2hi3, "v_smin16", V_SMIN16)
++  NDS32_BUILTIN(uminv2hi3, "umin16", UMIN16)
++  NDS32_BUILTIN(uminv2hi3, "v_umin16", V_UMIN16)
++  NDS32_BUILTIN(smaxv2hi3, "smax16", SMAX16)
++  NDS32_BUILTIN(smaxv2hi3, "v_smax16", V_SMAX16)
++  NDS32_BUILTIN(umaxv2hi3, "umax16", UMAX16)
++  NDS32_BUILTIN(umaxv2hi3, "v_umax16", V_UMAX16)
++  NDS32_BUILTIN(khm16, "khm16", KHM16)
++  NDS32_BUILTIN(khm16, "v_khm16", V_KHM16)
++  NDS32_BUILTIN(khmx16, "khmx16", KHMX16)
++  NDS32_BUILTIN(khmx16, "v_khmx16", V_KHMX16)
++  NDS32_BUILTIN(sminv4qi3, "smin8", SMIN8)
++  NDS32_BUILTIN(sminv4qi3, "v_smin8", V_SMIN8)
++  NDS32_BUILTIN(uminv4qi3, "umin8", UMIN8)
++  NDS32_BUILTIN(uminv4qi3, "v_umin8", V_UMIN8)
++  NDS32_BUILTIN(smaxv4qi3, "smax8", SMAX8)
++  NDS32_BUILTIN(smaxv4qi3, "v_smax8", V_SMAX8)
++  NDS32_BUILTIN(umaxv4qi3, "umax8", UMAX8)
++  NDS32_BUILTIN(umaxv4qi3, "v_umax8", V_UMAX8)
++  NDS32_BUILTIN(raddsi3, "raddw", RADDW)
++  NDS32_BUILTIN(uraddsi3, "uraddw", URADDW)
++  NDS32_BUILTIN(rsubsi3, "rsubw", RSUBW)
++  NDS32_BUILTIN(ursubsi3, "ursubw", URSUBW)
++  NDS32_BUILTIN(sraiu, "sra_u", SRA_U)
++  NDS32_BUILTIN(kssl, "ksll", KSLL)
++  NDS32_BUILTIN(pkbb, "pkbb16", PKBB16)
++  NDS32_BUILTIN(pkbb, "v_pkbb16", V_PKBB16)
++  NDS32_BUILTIN(pkbt, "pkbt16", PKBT16)
++  NDS32_BUILTIN(pkbt, "v_pkbt16", V_PKBT16)
++  NDS32_BUILTIN(pktb, "pktb16", PKTB16)
++  NDS32_BUILTIN(pktb, "v_pktb16", V_PKTB16)
++  NDS32_BUILTIN(pktt, "pktt16", PKTT16)
++  NDS32_BUILTIN(pktt, "v_pktt16", V_PKTT16)
++  NDS32_BUILTIN(smulsi3_highpart, "smmul", SMMUL)
++  NDS32_BUILTIN(smmul_round, "smmul_u", SMMUL_U)
++  NDS32_BUILTIN(smmwb, "smmwb", SMMWB)
++  NDS32_BUILTIN(smmwb, "v_smmwb", V_SMMWB)
++  NDS32_BUILTIN(smmwb_round, "smmwb_u", SMMWB_U)
++  NDS32_BUILTIN(smmwb_round, "v_smmwb_u", V_SMMWB_U)
++  NDS32_BUILTIN(smmwt, "smmwt", SMMWT)
++  NDS32_BUILTIN(smmwt, "v_smmwt", V_SMMWT)
++  NDS32_BUILTIN(smmwt_round, "smmwt_u", SMMWT_U)
++  NDS32_BUILTIN(smmwt_round, "v_smmwt_u", V_SMMWT_U)
++  NDS32_BUILTIN(smbb, "smbb", SMBB)
++  NDS32_BUILTIN(smbb, "v_smbb", V_SMBB)
++  NDS32_BUILTIN(smbt, "smbt", SMBT)
++  NDS32_BUILTIN(smbt, "v_smbt", V_SMBT)
++  NDS32_BUILTIN(smtt, "smtt", SMTT)
++  NDS32_BUILTIN(smtt, "v_smtt", V_SMTT)
++  NDS32_BUILTIN(kmda, "kmda", KMDA)
++  NDS32_BUILTIN(kmda, "v_kmda", V_KMDA)
++  NDS32_BUILTIN(kmxda, "kmxda", KMXDA)
++  NDS32_BUILTIN(kmxda, "v_kmxda", V_KMXDA)
++  NDS32_BUILTIN(smds, "smds", SMDS)
++  NDS32_BUILTIN(smds, "v_smds", V_SMDS)
++  NDS32_BUILTIN(smdrs, "smdrs", SMDRS)
++  NDS32_BUILTIN(smdrs, "v_smdrs", V_SMDRS)
++  NDS32_BUILTIN(smxdsv, "smxds", SMXDS)
++  NDS32_BUILTIN(smxdsv, "v_smxds", V_SMXDS)
++  NDS32_BUILTIN(smal1, "smal", SMAL)
++  NDS32_BUILTIN(smal1, "v_smal", V_SMAL)
++  NDS32_BUILTIN(bitrev, "bitrev", BITREV)
++  NDS32_BUILTIN(wext, "wext", WEXT)
++  NDS32_BUILTIN(adddi3, "sadd64", SADD64)
++  NDS32_BUILTIN(adddi3, "uadd64", UADD64)
++  NDS32_BUILTIN(radddi3, "radd64", RADD64)
++  NDS32_BUILTIN(uradddi3, "uradd64", URADD64)
++  NDS32_BUILTIN(kadddi3, "kadd64", KADD64)
++  NDS32_BUILTIN(ukadddi3, "ukadd64", UKADD64)
++  NDS32_BUILTIN(subdi3, "ssub64", SSUB64)
++  NDS32_BUILTIN(subdi3, "usub64", USUB64)
++  NDS32_BUILTIN(rsubdi3, "rsub64", RSUB64)
++  NDS32_BUILTIN(ursubdi3, "ursub64", URSUB64)
++  NDS32_BUILTIN(ksubdi3, "ksub64", KSUB64)
++  NDS32_BUILTIN(uksubdi3, "uksub64", UKSUB64)
++  NDS32_BUILTIN(smul16, "smul16", SMUL16)
++  NDS32_BUILTIN(smul16, "v_smul16", V_SMUL16)
++  NDS32_BUILTIN(smulx16, "smulx16", SMULX16)
++  NDS32_BUILTIN(smulx16, "v_smulx16", V_SMULX16)
++  NDS32_BUILTIN(umul16, "umul16", UMUL16)
++  NDS32_BUILTIN(umul16, "v_umul16", V_UMUL16)
++  NDS32_BUILTIN(umulx16, "umulx16", UMULX16)
++  NDS32_BUILTIN(umulx16, "v_umulx16", V_UMULX16)
++  NDS32_BUILTIN(kwmmul, "kwmmul", KWMMUL)
++  NDS32_BUILTIN(kwmmul_round, "kwmmul_u", KWMMUL_U)
++  NDS32_NO_TARGET_BUILTIN(unaligned_storev2hi,
++			  "put_unaligned_u16x2", UASTORE_U16)
++  NDS32_NO_TARGET_BUILTIN(unaligned_storev2hi,
++			  "put_unaligned_s16x2", UASTORE_S16)
++  NDS32_NO_TARGET_BUILTIN(unaligned_storev4qi, "put_unaligned_u8x4", UASTORE_U8)
++  NDS32_NO_TARGET_BUILTIN(unaligned_storev4qi, "put_unaligned_s8x4", UASTORE_S8)
++};
++
++/* Two-argument intrinsics with an immediate second argument.  */
++static struct builtin_description bdesc_2argimm[] =
++{
++  NDS32_BUILTIN(unspec_bclr, "bclr", BCLR)
++  NDS32_BUILTIN(unspec_bset, "bset", BSET)
++  NDS32_BUILTIN(unspec_btgl, "btgl", BTGL)
++  NDS32_BUILTIN(unspec_btst, "btst", BTST)
++  NDS32_BUILTIN(unspec_clip, "clip", CLIP)
++  NDS32_BUILTIN(unspec_clips, "clips", CLIPS)
++  NDS32_NO_TARGET_BUILTIN(unspec_teqz, "teqz", TEQZ)
++  NDS32_NO_TARGET_BUILTIN(unspec_tnez, "tnez", TNEZ)
++  NDS32_BUILTIN(ashrv2hi3, "srl16", SRL16)
++  NDS32_BUILTIN(ashrv2hi3, "v_srl16", V_SRL16)
++  NDS32_BUILTIN(srl16_round, "srl16_u", SRL16_U)
++  NDS32_BUILTIN(srl16_round, "v_srl16_u", V_SRL16_U)
++  NDS32_BUILTIN(kslli16, "ksll16", KSLL16)
++  NDS32_BUILTIN(kslli16, "v_ksll16", V_KSLL16)
++  NDS32_BUILTIN(sclip16, "sclip16", SCLIP16)
++  NDS32_BUILTIN(sclip16, "v_sclip16", V_SCLIP16)
++  NDS32_BUILTIN(uclip16, "uclip16", UCLIP16)
++  NDS32_BUILTIN(uclip16, "v_uclip16", V_UCLIP16)
++  NDS32_BUILTIN(sraiu, "sra_u", SRA_U)
++  NDS32_BUILTIN(kssl, "ksll", KSLL)
++  NDS32_BUILTIN(bitrev, "bitrev", BITREV)
++  NDS32_BUILTIN(wext, "wext", WEXT)
++  NDS32_BUILTIN(uclip32, "uclip32", UCLIP32)
++  NDS32_BUILTIN(sclip32, "sclip32", SCLIP32)
++};
++
++/* Intrinsics that take three arguments.  */
++static struct builtin_description bdesc_3arg[] =
++{
++  NDS32_BUILTIN(unspec_pbsada, "pbsada", PBSADA)
++  NDS32_NO_TARGET_BUILTIN(bse, "bse", BSE)
++  NDS32_NO_TARGET_BUILTIN(bsp, "bsp", BSP)
++  NDS32_BUILTIN(kmabb, "kmabb", KMABB)
++  NDS32_BUILTIN(kmabb, "v_kmabb", V_KMABB)
++  NDS32_BUILTIN(kmabt, "kmabt", KMABT)
++  NDS32_BUILTIN(kmabt, "v_kmabt", V_KMABT)
++  NDS32_BUILTIN(kmatt, "kmatt", KMATT)
++  NDS32_BUILTIN(kmatt, "v_kmatt", V_KMATT)
++  NDS32_BUILTIN(kmada, "kmada", KMADA)
++  NDS32_BUILTIN(kmada, "v_kmada", V_KMADA)
++  NDS32_BUILTIN(kmaxda, "kmaxda", KMAXDA)
++  NDS32_BUILTIN(kmaxda, "v_kmaxda", V_KMAXDA)
++  NDS32_BUILTIN(kmads, "kmads", KMADS)
++  NDS32_BUILTIN(kmads, "v_kmads", V_KMADS)
++  NDS32_BUILTIN(kmadrs, "kmadrs", KMADRS)
++  NDS32_BUILTIN(kmadrs, "v_kmadrs", V_KMADRS)
++  NDS32_BUILTIN(kmaxds, "kmaxds", KMAXDS)
++  NDS32_BUILTIN(kmaxds, "v_kmaxds", V_KMAXDS)
++  NDS32_BUILTIN(kmsda, "kmsda", KMSDA)
++  NDS32_BUILTIN(kmsda, "v_kmsda", V_KMSDA)
++  NDS32_BUILTIN(kmsxda, "kmsxda", KMSXDA)
++  NDS32_BUILTIN(kmsxda, "v_kmsxda", V_KMSXDA)
++  NDS32_BUILTIN(bpick1, "bpick", BPICK)
++  NDS32_BUILTIN(smar64_1, "smar64", SMAR64)
++  NDS32_BUILTIN(smsr64, "smsr64", SMSR64)
++  NDS32_BUILTIN(umar64_1, "umar64", UMAR64)
++  NDS32_BUILTIN(umsr64, "umsr64", UMSR64)
++  NDS32_BUILTIN(kmar64_1, "kmar64", KMAR64)
++  NDS32_BUILTIN(kmsr64, "kmsr64", KMSR64)
++  NDS32_BUILTIN(ukmar64_1, "ukmar64", UKMAR64)
++  NDS32_BUILTIN(ukmsr64, "ukmsr64", UKMSR64)
++  NDS32_BUILTIN(smalbb, "smalbb", SMALBB)
++  NDS32_BUILTIN(smalbb, "v_smalbb", V_SMALBB)
++  NDS32_BUILTIN(smalbt, "smalbt", SMALBT)
++  NDS32_BUILTIN(smalbt, "v_smalbt", V_SMALBT)
++  NDS32_BUILTIN(smaltt, "smaltt", SMALTT)
++  NDS32_BUILTIN(smaltt, "v_smaltt", V_SMALTT)
++  NDS32_BUILTIN(smalda1, "smalda", SMALDA)
++  NDS32_BUILTIN(smalda1, "v_smalda", V_SMALDA)
++  NDS32_BUILTIN(smalxda1, "smalxda", SMALXDA)
++  NDS32_BUILTIN(smalxda1, "v_smalxda", V_SMALXDA)
++  NDS32_BUILTIN(smalds1, "smalds", SMALDS)
++  NDS32_BUILTIN(smalds1, "v_smalds", V_SMALDS)
++  NDS32_BUILTIN(smaldrs3, "smaldrs", SMALDRS)
++  NDS32_BUILTIN(smaldrs3, "v_smaldrs", V_SMALDRS)
++  NDS32_BUILTIN(smalxds1, "smalxds", SMALXDS)
++  NDS32_BUILTIN(smalxds1, "v_smalxds", V_SMALXDS)
++  NDS32_BUILTIN(smslda1, "smslda", SMSLDA)
++  NDS32_BUILTIN(smslda1, "v_smslda", V_SMSLDA)
++  NDS32_BUILTIN(smslxda1, "smslxda", SMSLXDA)
++  NDS32_BUILTIN(smslxda1, "v_smslxda", V_SMSLXDA)
++  NDS32_BUILTIN(kmmawb, "kmmawb", KMMAWB)
++  NDS32_BUILTIN(kmmawb, "v_kmmawb", V_KMMAWB)
++  NDS32_BUILTIN(kmmawb_round, "kmmawb_u", KMMAWB_U)
++  NDS32_BUILTIN(kmmawb_round, "v_kmmawb_u", V_KMMAWB_U)
++  NDS32_BUILTIN(kmmawt, "kmmawt", KMMAWT)
++  NDS32_BUILTIN(kmmawt, "v_kmmawt", V_KMMAWT)
++  NDS32_BUILTIN(kmmawt_round, "kmmawt_u", KMMAWT_U)
++  NDS32_BUILTIN(kmmawt_round, "v_kmmawt_u", V_KMMAWT_U)
++  NDS32_BUILTIN(kmmac, "kmmac", KMMAC)
++  NDS32_BUILTIN(kmmac_round, "kmmac_u", KMMAC_U)
++  NDS32_BUILTIN(kmmsb, "kmmsb", KMMSB)
++  NDS32_BUILTIN(kmmsb_round, "kmmsb_u", KMMSB_U)
++};
++
++/* Three-argument intrinsics with an immediate third argument.  */
++static struct builtin_description bdesc_3argimm[] =
++{
++  NDS32_NO_TARGET_BUILTIN(prefetch_qw, "prefetch_qw", DPREF_QW)
++  NDS32_NO_TARGET_BUILTIN(prefetch_hw, "prefetch_hw", DPREF_HW)
++  NDS32_NO_TARGET_BUILTIN(prefetch_w, "prefetch_w", DPREF_W)
++  NDS32_NO_TARGET_BUILTIN(prefetch_dw, "prefetch_dw", DPREF_DW)
++  NDS32_BUILTIN(insb, "insb", INSB)
++};
++
++/* Intrinsics that load a value.  */
++static struct builtin_description bdesc_load[] =
++{
++  NDS32_BUILTIN(unspec_volatile_llw, "llw", LLW)
++  NDS32_BUILTIN(unspec_lwup, "lwup", LWUP)
++  NDS32_BUILTIN(unspec_lbup, "lbup", LBUP)
++};
++
++/* Intrinsics that store a value.  */
++static struct builtin_description bdesc_store[] =
++{
++  NDS32_BUILTIN(unspec_swup, "swup", SWUP)
++  NDS32_BUILTIN(unspec_sbup, "sbup", SBUP)
++};
++
++static struct builtin_description bdesc_cctl[] =
++{
++  NDS32_BUILTIN(cctl_idx_read, "cctl_idx_read", CCTL_IDX_READ)
++  NDS32_NO_TARGET_BUILTIN(cctl_idx_write, "cctl_idx_write", CCTL_IDX_WRITE)
++  NDS32_NO_TARGET_BUILTIN(cctl_va_lck, "cctl_va_lck", CCTL_VA_LCK)
++  NDS32_NO_TARGET_BUILTIN(cctl_idx_wbinval,
++			  "cctl_idx_wbinval", CCTL_IDX_WBINVAL)
++  NDS32_NO_TARGET_BUILTIN(cctl_va_wbinval_l1,
++			  "cctl_va_wbinval_l1", CCTL_VA_WBINVAL_L1)
++  NDS32_NO_TARGET_BUILTIN(cctl_va_wbinval_la,
++			  "cctl_va_wbinval_la", CCTL_VA_WBINVAL_LA)
++};
+ 
+ rtx
+ nds32_expand_builtin_impl (tree exp,
+ 			   rtx target,
+ 			   rtx subtarget ATTRIBUTE_UNUSED,
+-			   machine_mode mode ATTRIBUTE_UNUSED,
++			   enum machine_mode mode ATTRIBUTE_UNUSED,
+ 			   int ignore ATTRIBUTE_UNUSED)
+ {
+   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
++  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
++  unsigned i;
++  struct builtin_description *d;
++
++  if (!NDS32_EXT_DSP_P ()
++      && fcode > NDS32_BUILTIN_DSP_BEGIN
++      && fcode < NDS32_BUILTIN_DSP_END)
++    error ("don't support DSP extension instructions");
++
++  switch (fcode)
++    {
++    /* FPU Register Transfer.  */
++    case NDS32_BUILTIN_FMFCFG:
++    case NDS32_BUILTIN_FMFCSR:
++    case NDS32_BUILTIN_FMTCSR:
++    case NDS32_BUILTIN_FCPYNSS:
++    case NDS32_BUILTIN_FCPYSS:
++      /* Both v3s and v3f toolchains define TARGET_FPU_SINGLE.  */
++      if (!TARGET_FPU_SINGLE)
++	{
++	  error ("this builtin function is only available "
++		 "on the v3s or v3f toolchain");
++	  return NULL_RTX;
++	}
++      break;
++
++    /* FPU Register Transfer.  */
++    case NDS32_BUILTIN_FCPYNSD:
++    case NDS32_BUILTIN_FCPYSD:
++      /* Only v3f toolchain defines TARGET_FPU_DOUBLE.  */
++      if (!TARGET_FPU_DOUBLE)
++	{
++	  error ("this builtin function is only available "
++		 "on the v3f toolchain");
++	  return NULL_RTX;
++	}
++      break;
++
++    /* Load and Store  */
++    case NDS32_BUILTIN_LLW:
++    case NDS32_BUILTIN_LWUP:
++    case NDS32_BUILTIN_LBUP:
++    case NDS32_BUILTIN_SCW:
++    case NDS32_BUILTIN_SWUP:
++    case NDS32_BUILTIN_SBUP:
++      if (TARGET_ISA_V3M)
++	{
++	  error ("this builtin function not support "
++		 "on the v3m toolchain");
++	  return NULL_RTX;
++	}
++      break;
++
++    /* Performance Extension  */
++    case NDS32_BUILTIN_ABS:
++    case NDS32_BUILTIN_AVE:
++    case NDS32_BUILTIN_BCLR:
++    case NDS32_BUILTIN_BSET:
++    case NDS32_BUILTIN_BTGL:
++    case NDS32_BUILTIN_BTST:
++    case NDS32_BUILTIN_CLIP:
++    case NDS32_BUILTIN_CLIPS:
++    case NDS32_BUILTIN_CLZ:
++    case NDS32_BUILTIN_CLO:
++      if (!TARGET_EXT_PERF)
++	{
++	  error ("don't support performance extension instructions");
++	  return NULL_RTX;
++	}
++      break;
++
++    /* Performance Extension 2  */
++    case NDS32_BUILTIN_PBSAD:
++    case NDS32_BUILTIN_PBSADA:
++    case NDS32_BUILTIN_BSE:
++    case NDS32_BUILTIN_BSP:
++      if (!TARGET_EXT_PERF2)
++	{
++	  error ("don't support performance extension "
++		 "version 2 instructions");
++	  return NULL_RTX;
++	}
++      break;
+ 
+-  int fcode = DECL_FUNCTION_CODE (fndecl);
++    /* String Extension  */
++    case NDS32_BUILTIN_FFB:
++    case NDS32_BUILTIN_FFMISM:
++    case NDS32_BUILTIN_FLMISM:
++      if (!TARGET_EXT_STRING)
++	{
++	  error ("don't support string extension instructions");
++	  return NULL_RTX;
++	}
++      break;
+ 
++    default:
++      break;
++    }
++
++  /* Since there are no result and operands, we can simply emit this rtx.  */
+   switch (fcode)
+     {
+-    /* Cache.  */
+-    case NDS32_BUILTIN_ISYNC:
+-      return nds32_expand_builtin_null_ftype_reg
+-	     (CODE_FOR_unspec_volatile_isync, exp, target);
+     case NDS32_BUILTIN_ISB:
+-      /* Since there are no result and operands for isb instruciton,
+-         we can simply emit this rtx.  */
+       emit_insn (gen_unspec_volatile_isb ());
+       return target;
+-
+-    /* Register Transfer.  */
+-    case NDS32_BUILTIN_MFSR:
+-      return nds32_expand_builtin_reg_ftype_imm
+-	     (CODE_FOR_unspec_volatile_mfsr, exp, target);
+-    case NDS32_BUILTIN_MFUSR:
+-      return nds32_expand_builtin_reg_ftype_imm
+-	     (CODE_FOR_unspec_volatile_mfusr, exp, target);
+-    case NDS32_BUILTIN_MTSR:
+-      return nds32_expand_builtin_null_ftype_reg_imm
+-	     (CODE_FOR_unspec_volatile_mtsr, exp, target);
+-    case NDS32_BUILTIN_MTUSR:
+-      return nds32_expand_builtin_null_ftype_reg_imm
+-	     (CODE_FOR_unspec_volatile_mtusr, exp, target);
+-
+-    /* Interrupt.  */
++    case NDS32_BUILTIN_DSB:
++      emit_insn (gen_unspec_dsb ());
++      return target;
++    case NDS32_BUILTIN_MSYNC_ALL:
++      emit_insn (gen_unspec_msync_all ());
++      return target;
++    case NDS32_BUILTIN_MSYNC_STORE:
++      emit_insn (gen_unspec_msync_store ());
++      return target;
+     case NDS32_BUILTIN_SETGIE_EN:
+-      /* Since there are no result and operands for setgie.e instruciton,
+-         we can simply emit this rtx.  */
+       emit_insn (gen_unspec_volatile_setgie_en ());
++      emit_insn (gen_unspec_dsb ());
+       return target;
+     case NDS32_BUILTIN_SETGIE_DIS:
+-      /* Since there are no result and operands for setgie.d instruciton,
+-         we can simply emit this rtx.  */
+       emit_insn (gen_unspec_volatile_setgie_dis ());
++      emit_insn (gen_unspec_dsb ());
++      return target;
++    case NDS32_BUILTIN_GIE_DIS:
++      emit_insn (gen_unspec_volatile_setgie_dis ());
++      emit_insn (gen_unspec_dsb ());
++      return target;
++    case NDS32_BUILTIN_GIE_EN:
++      emit_insn (gen_unspec_volatile_setgie_en ());
++      emit_insn (gen_unspec_dsb ());
++      return target;
++    case NDS32_BUILTIN_SET_PENDING_SWINT:
++      emit_insn (gen_unspec_set_pending_swint ());
++      return target;
++    case NDS32_BUILTIN_CLR_PENDING_SWINT:
++      emit_insn (gen_unspec_clr_pending_swint ());
++      return target;
++    case NDS32_BUILTIN_CCTL_L1D_INVALALL:
++      emit_insn (gen_cctl_l1d_invalall());
++      return target;
++    case NDS32_BUILTIN_CCTL_L1D_WBALL_ALVL:
++      emit_insn (gen_cctl_l1d_wball_alvl());
++      return target;
++    case NDS32_BUILTIN_CCTL_L1D_WBALL_ONE_LVL:
++      emit_insn (gen_cctl_l1d_wball_one_lvl());
++      return target;
++    case NDS32_BUILTIN_CLROV:
++      emit_insn (gen_unspec_volatile_clrov ());
++      return target;
++    case NDS32_BUILTIN_STANDBY_NO_WAKE_GRANT:
++      emit_insn (gen_unspec_standby_no_wake_grant ());
++      return target;
++    case NDS32_BUILTIN_STANDBY_WAKE_GRANT:
++      emit_insn (gen_unspec_standby_wake_grant ());
++      return target;
++    case NDS32_BUILTIN_STANDBY_WAKE_DONE:
++      emit_insn (gen_unspec_standby_wait_done ());
++      return target;
++    case NDS32_BUILTIN_SETEND_BIG:
++      emit_insn (gen_unspec_setend_big ());
++      return target;
++    case NDS32_BUILTIN_SETEND_LITTLE:
++      emit_insn (gen_unspec_setend_little ());
++      return target;
++    case NDS32_BUILTIN_NOP:
++      emit_insn (gen_unspec_nop ());
++      return target;
++    case NDS32_BUILTIN_SCHE_BARRIER:
++      emit_insn (gen_blockage ());
++      return target;
++    case NDS32_BUILTIN_TLBOP_FLUA:
++      emit_insn (gen_unspec_tlbop_flua ());
++      return target;
++    /* Instruction sequence protection  */
++    case NDS32_BUILTIN_SIGNATURE_BEGIN:
++      emit_insn (gen_unspec_signature_begin ());
++      return target;
++    case NDS32_BUILTIN_SIGNATURE_END:
++      emit_insn (gen_unspec_signature_end ());
++      return target;
++    case NDS32_BUILTIN_SCW:
++      return nds32_expand_scw_builtin (CODE_FOR_unspec_volatile_scw,
++				       exp, target);
++    case NDS32_BUILTIN_SET_INT_PRIORITY:
++      return nds32_expand_priority_builtin (CODE_FOR_unspec_set_int_priority,
++					    exp, target,
++					    "__nds32__set_int_priority");
++    case NDS32_BUILTIN_NO_HWLOOP:
++      emit_insn (gen_no_hwloop ());
+       return target;
+-
+     default:
+-      gcc_unreachable ();
++      break;
+     }
+ 
++  /* Expand groups of builtins.  */
++  for (i = 0, d = bdesc_noarg; i < ARRAY_SIZE (bdesc_noarg); i++, d++)
++    if (d->code == fcode)
++      return nds32_expand_noarg_builtin (d->icode, target);
++
++  for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
++    if (d->code == fcode)
++      return nds32_expand_unop_builtin (d->icode, exp, target, d->return_p);
++
++  for (i = 0, d = bdesc_1argimm; i < ARRAY_SIZE (bdesc_1argimm); i++, d++)
++    if (d->code == fcode)
++      return nds32_expand_unopimm_builtin (d->icode, exp, target,
++					   d->return_p, d->name);
++
++  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
++    if (d->code == fcode)
++      return nds32_expand_binop_builtin (d->icode, exp, target, d->return_p);
++
++  for (i = 0, d = bdesc_2argimm; i < ARRAY_SIZE (bdesc_2argimm); i++, d++)
++    if (d->code == fcode)
++      return nds32_expand_binopimm_builtin (d->icode, exp, target,
++					    d->return_p, d->name);
++
++  for (i = 0, d = bdesc_3arg; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
++    if (d->code == fcode)
++      return nds32_expand_triop_builtin (d->icode, exp, target, d->return_p);
++
++  for (i = 0, d = bdesc_3argimm; i < ARRAY_SIZE (bdesc_3argimm); i++, d++)
++    if (d->code == fcode)
++      return nds32_expand_triopimm_builtin (d->icode, exp, target,
++					    d->return_p, d->name);
++
++  for (i = 0, d = bdesc_load; i < ARRAY_SIZE (bdesc_load); i++, d++)
++    if (d->code == fcode)
++      return nds32_expand_builtin_load (d->icode, exp, target);
++
++  for (i = 0, d = bdesc_store; i < ARRAY_SIZE (bdesc_store); i++, d++)
++    if (d->code == fcode)
++      return nds32_expand_builtin_store (d->icode, exp, target);
++
++  for (i = 0, d = bdesc_cctl; i < ARRAY_SIZE (bdesc_cctl); i++, d++)
++    if (d->code == fcode)
++      return nds32_expand_cctl_builtin (d->icode, exp, target,
++					d->return_p, d->name);
++
+   return NULL_RTX;
+ }
+ 
++static GTY(()) tree nds32_builtin_decls[NDS32_BUILTIN_COUNT];
++
++/* Return the NDS32 builtin for CODE.  */
++tree
++nds32_builtin_decl_impl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
++{
++  if (code >= NDS32_BUILTIN_COUNT)
++    return error_mark_node;
++
++  return nds32_builtin_decls[code];
++}
++
++void
++nds32_init_builtins_impl (void)
++{
++#define ADD_NDS32_BUILTIN0(NAME, RET_TYPE, CODE)		\
++  nds32_builtin_decls[NDS32_BUILTIN_ ## CODE] =			\
++  add_builtin_function ("__builtin_nds32_" NAME,		\
++			build_function_type_list (RET_TYPE##_type_node, \
++						  NULL_TREE),		\
++			NDS32_BUILTIN_ ## CODE, BUILT_IN_MD, NULL, NULL_TREE)
++
++#define ADD_NDS32_BUILTIN1(NAME, RET_TYPE, ARG_TYPE, CODE)	\
++  nds32_builtin_decls[NDS32_BUILTIN_ ## CODE] =			\
++  add_builtin_function ("__builtin_nds32_" NAME,		\
++			build_function_type_list (RET_TYPE##_type_node, \
++						  ARG_TYPE##_type_node, \
++						  NULL_TREE),		\
++			NDS32_BUILTIN_ ## CODE, BUILT_IN_MD, NULL, NULL_TREE)
++
++#define ADD_NDS32_BUILTIN2(NAME, RET_TYPE, ARG_TYPE1, ARG_TYPE2, CODE)	\
++  nds32_builtin_decls[NDS32_BUILTIN_ ## CODE] =				\
++  add_builtin_function ("__builtin_nds32_" NAME,			\
++			build_function_type_list (RET_TYPE##_type_node, \
++						  ARG_TYPE1##_type_node,\
++						  ARG_TYPE2##_type_node,\
++						  NULL_TREE),		\
++			NDS32_BUILTIN_ ## CODE, BUILT_IN_MD, NULL, NULL_TREE)
++
++#define ADD_NDS32_BUILTIN3(NAME, RET_TYPE, ARG_TYPE1, ARG_TYPE2, ARG_TYPE3, CODE) \
++  nds32_builtin_decls[NDS32_BUILTIN_ ## CODE] =				\
++  add_builtin_function ("__builtin_nds32_" NAME,			\
++			build_function_type_list (RET_TYPE##_type_node,	\
++						  ARG_TYPE1##_type_node,\
++						  ARG_TYPE2##_type_node,\
++						  ARG_TYPE3##_type_node,\
++						  NULL_TREE),		\
++			NDS32_BUILTIN_ ## CODE, BUILT_IN_MD, NULL, NULL_TREE)
++
++  /* Looking for return type and argument can be found in tree.h file.  */
++  tree ptr_char_type_node = build_pointer_type (char_type_node);
++  tree ptr_uchar_type_node = build_pointer_type (unsigned_char_type_node);
++  tree ptr_ushort_type_node = build_pointer_type (short_unsigned_type_node);
++  tree ptr_short_type_node = build_pointer_type (short_integer_type_node);
++  tree ptr_uint_type_node = build_pointer_type (unsigned_type_node);
++  tree ptr_ulong_type_node = build_pointer_type (long_long_unsigned_type_node);
++  tree v4qi_type_node = build_vector_type (intQI_type_node, 4);
++  tree u_v4qi_type_node = build_vector_type (unsigned_intQI_type_node, 4);
++  tree v2hi_type_node = build_vector_type (intHI_type_node, 2);
++  tree u_v2hi_type_node = build_vector_type (unsigned_intHI_type_node, 2);
++  tree v2si_type_node = build_vector_type (intSI_type_node, 2);
++  tree u_v2si_type_node = build_vector_type (unsigned_intSI_type_node, 2);
++
++  /* Cache.  */
++  ADD_NDS32_BUILTIN1 ("isync", void, ptr_uint, ISYNC);
++  ADD_NDS32_BUILTIN0 ("isb", void, ISB);
++  ADD_NDS32_BUILTIN0 ("dsb", void, DSB);
++  ADD_NDS32_BUILTIN0 ("msync_all", void, MSYNC_ALL);
++  ADD_NDS32_BUILTIN0 ("msync_store", void, MSYNC_STORE);
++
++  /* Register Transfer.  */
++  ADD_NDS32_BUILTIN1 ("mfsr", unsigned, integer, MFSR);
++  ADD_NDS32_BUILTIN1 ("mfusr", unsigned, integer, MFUSR);
++  ADD_NDS32_BUILTIN2 ("mtsr", void, unsigned, integer, MTSR);
++  ADD_NDS32_BUILTIN2 ("mtsr_isb", void, unsigned, integer, MTSR_ISB);
++  ADD_NDS32_BUILTIN2 ("mtsr_dsb", void, unsigned, integer, MTSR_DSB);
++  ADD_NDS32_BUILTIN2 ("mtusr", void, unsigned, integer, MTUSR);
++
++  /* FPU Register Transfer.  */
++  ADD_NDS32_BUILTIN0 ("fmfcsr", unsigned, FMFCSR);
++  ADD_NDS32_BUILTIN1 ("fmtcsr", void, unsigned, FMTCSR);
++  ADD_NDS32_BUILTIN0 ("fmfcfg", unsigned, FMFCFG);
++  ADD_NDS32_BUILTIN2 ("fcpyss", float, float, float, FCPYSS);
++  ADD_NDS32_BUILTIN2 ("fcpynss", float, float, float, FCPYNSS);
++  ADD_NDS32_BUILTIN2 ("fcpysd", double, double, double, FCPYSD);
++  ADD_NDS32_BUILTIN2 ("fcpynsd", double, double, double, FCPYNSD);
++
++  /* Interrupt.  */
++  ADD_NDS32_BUILTIN0 ("setgie_en", void, SETGIE_EN);
++  ADD_NDS32_BUILTIN0 ("setgie_dis", void, SETGIE_DIS);
++  ADD_NDS32_BUILTIN0 ("gie_en", void, GIE_EN);
++  ADD_NDS32_BUILTIN0 ("gie_dis", void, GIE_DIS);
++  ADD_NDS32_BUILTIN1 ("enable_int", void, integer, ENABLE_INT);
++  ADD_NDS32_BUILTIN1 ("disable_int", void, integer, DISABLE_INT);
++  ADD_NDS32_BUILTIN0 ("set_pending_swint", void, SET_PENDING_SWINT);
++  ADD_NDS32_BUILTIN0 ("clr_pending_swint", void, CLR_PENDING_SWINT);
++  ADD_NDS32_BUILTIN0 ("get_all_pending_int", unsigned, GET_ALL_PENDING_INT);
++  ADD_NDS32_BUILTIN1 ("get_pending_int", unsigned, integer, GET_PENDING_INT);
++  ADD_NDS32_BUILTIN1 ("get_int_priority", unsigned, integer, GET_INT_PRIORITY);
++  ADD_NDS32_BUILTIN2 ("set_int_priority", void, integer, integer,
++		      SET_INT_PRIORITY);
++  ADD_NDS32_BUILTIN1 ("clr_pending_hwint", void, integer, CLR_PENDING_HWINT);
++  ADD_NDS32_BUILTIN1 ("set_trig_level", void, integer, SET_TRIG_LEVEL);
++  ADD_NDS32_BUILTIN1 ("set_trig_edge", void, integer, SET_TRIG_EDGE);
++  ADD_NDS32_BUILTIN1 ("get_trig_type", unsigned, integer, GET_TRIG_TYPE);
++
++  /* Load and Store  */
++  ADD_NDS32_BUILTIN1 ("llw", unsigned, ptr_uint, LLW);
++  ADD_NDS32_BUILTIN1 ("lwup", unsigned, ptr_uint, LWUP);
++  ADD_NDS32_BUILTIN1 ("lbup", char, ptr_uchar, LBUP);
++  ADD_NDS32_BUILTIN2 ("scw", unsigned, ptr_uint, unsigned, SCW);
++  ADD_NDS32_BUILTIN2 ("swup", void, ptr_uint, unsigned, SWUP);
++  ADD_NDS32_BUILTIN2 ("sbup", void, ptr_uchar, char, SBUP);
++
++  /* CCTL  */
++  ADD_NDS32_BUILTIN0 ("cctl_l1d_invalall", void, CCTL_L1D_INVALALL);
++  ADD_NDS32_BUILTIN0 ("cctl_l1d_wball_alvl", void, CCTL_L1D_WBALL_ALVL);
++  ADD_NDS32_BUILTIN0 ("cctl_l1d_wball_one_lvl", void, CCTL_L1D_WBALL_ONE_LVL);
++  ADD_NDS32_BUILTIN2 ("cctl_va_lck", void, integer, ptr_uint, CCTL_VA_LCK);
++  ADD_NDS32_BUILTIN2 ("cctl_idx_wbinval", void, integer, unsigned,
++		      CCTL_IDX_WBINVAL);
++  ADD_NDS32_BUILTIN2 ("cctl_va_wbinval_l1", void, integer, ptr_uint,
++		      CCTL_VA_WBINVAL_L1);
++  ADD_NDS32_BUILTIN2 ("cctl_va_wbinval_la", void, integer, ptr_uint,
++		      CCTL_VA_WBINVAL_LA);
++  ADD_NDS32_BUILTIN2 ("cctl_idx_read", unsigned, integer, unsigned,
++		      CCTL_IDX_READ);
++  ADD_NDS32_BUILTIN3 ("cctl_idx_write", void, integer, unsigned, unsigned,
++		      CCTL_IDX_WRITE);
++
++  /* PREFETCH  */
++  ADD_NDS32_BUILTIN3 ("dpref_qw", void, ptr_uchar, unsigned, integer, DPREF_QW);
++  ADD_NDS32_BUILTIN3 ("dpref_hw", void, ptr_ushort, unsigned, integer,
++		      DPREF_HW);
++  ADD_NDS32_BUILTIN3 ("dpref_w", void, ptr_uint, unsigned, integer, DPREF_W);
++  ADD_NDS32_BUILTIN3 ("dpref_dw", void, ptr_ulong, unsigned, integer, DPREF_DW);
++
++  /* Performance Extension  */
++  ADD_NDS32_BUILTIN1 ("pe_abs", integer, integer, ABS);
++  ADD_NDS32_BUILTIN2 ("pe_ave", integer, integer, integer, AVE);
++  ADD_NDS32_BUILTIN2 ("pe_bclr", unsigned, unsigned, unsigned, BCLR);
++  ADD_NDS32_BUILTIN2 ("pe_bset", unsigned, unsigned, unsigned, BSET);
++  ADD_NDS32_BUILTIN2 ("pe_btgl", unsigned, unsigned, unsigned, BTGL);
++  ADD_NDS32_BUILTIN2 ("pe_btst", unsigned, unsigned, unsigned, BTST);
++  ADD_NDS32_BUILTIN2 ("pe_clip", unsigned, integer, unsigned, CLIP);
++  ADD_NDS32_BUILTIN2 ("pe_clips", integer, integer, unsigned, CLIPS);
++  ADD_NDS32_BUILTIN1 ("pe_clz", unsigned, unsigned, CLZ);
++  ADD_NDS32_BUILTIN1 ("pe_clo", unsigned, unsigned, CLO);
++
++  /* Performance Extension 2  */
++  ADD_NDS32_BUILTIN3 ("pe2_bse", void, ptr_uint, unsigned, ptr_uint, BSE);
++  ADD_NDS32_BUILTIN3 ("pe2_bsp", void, ptr_uint, unsigned, ptr_uint, BSP);
++  ADD_NDS32_BUILTIN2 ("pe2_pbsad", unsigned, unsigned, unsigned, PBSAD);
++  ADD_NDS32_BUILTIN3 ("pe2_pbsada", unsigned, unsigned, unsigned, unsigned,
++		      PBSADA);
++
++  /* String Extension  */
++  ADD_NDS32_BUILTIN2 ("se_ffb", integer, unsigned, unsigned, FFB);
++  ADD_NDS32_BUILTIN2 ("se_ffmism", integer, unsigned, unsigned, FFMISM);
++  ADD_NDS32_BUILTIN2 ("se_flmism", integer, unsigned, unsigned, FLMISM);
++
++  /* SATURATION  */
++  ADD_NDS32_BUILTIN2 ("kaddw", integer, integer, integer, KADDW);
++  ADD_NDS32_BUILTIN2 ("ksubw", integer, integer, integer, KSUBW);
++  ADD_NDS32_BUILTIN2 ("kaddh", integer, integer, integer, KADDH);
++  ADD_NDS32_BUILTIN2 ("ksubh", integer, integer, integer, KSUBH);
++  ADD_NDS32_BUILTIN2 ("kdmbb", integer, unsigned, unsigned, KDMBB);
++  ADD_NDS32_BUILTIN2 ("v_kdmbb", integer, v2hi, v2hi, V_KDMBB);
++  ADD_NDS32_BUILTIN2 ("kdmbt", integer, unsigned, unsigned, KDMBT);
++  ADD_NDS32_BUILTIN2 ("v_kdmbt", integer, v2hi, v2hi, V_KDMBT);
++  ADD_NDS32_BUILTIN2 ("kdmtb", integer, unsigned, unsigned, KDMTB);
++  ADD_NDS32_BUILTIN2 ("v_kdmtb", integer, v2hi, v2hi, V_KDMTB);
++  ADD_NDS32_BUILTIN2 ("kdmtt", integer, unsigned, unsigned, KDMTT);
++  ADD_NDS32_BUILTIN2 ("v_kdmtt", integer, v2hi, v2hi, V_KDMTT);
++  ADD_NDS32_BUILTIN2 ("khmbb", integer, unsigned, unsigned, KHMBB);
++  ADD_NDS32_BUILTIN2 ("v_khmbb", integer, v2hi, v2hi, V_KHMBB);
++  ADD_NDS32_BUILTIN2 ("khmbt", integer, unsigned, unsigned, KHMBT);
++  ADD_NDS32_BUILTIN2 ("v_khmbt", integer, v2hi, v2hi, V_KHMBT);
++  ADD_NDS32_BUILTIN2 ("khmtb", integer, unsigned, unsigned, KHMTB);
++  ADD_NDS32_BUILTIN2 ("v_khmtb", integer, v2hi, v2hi, V_KHMTB);
++  ADD_NDS32_BUILTIN2 ("khmtt", integer, unsigned, unsigned, KHMTT);
++  ADD_NDS32_BUILTIN2 ("v_khmtt", integer, v2hi, v2hi, V_KHMTT);
++  ADD_NDS32_BUILTIN2 ("kslraw", integer, integer, integer, KSLRAW);
++  ADD_NDS32_BUILTIN2 ("kslraw_u", integer, integer, integer, KSLRAW_U);
++  ADD_NDS32_BUILTIN0 ("rdov", unsigned, RDOV);
++  ADD_NDS32_BUILTIN0 ("clrov", void, CLROV);
++
++  /* ROTR  */
++  ADD_NDS32_BUILTIN2 ("rotr", unsigned, unsigned, unsigned, ROTR);
++
++  /* Swap  */
++  ADD_NDS32_BUILTIN1 ("wsbh", unsigned, unsigned, WSBH);
++
++  /* System  */
++  ADD_NDS32_BUILTIN2 ("svs", unsigned, integer, integer, SVS);
++  ADD_NDS32_BUILTIN2 ("sva", unsigned, integer, integer, SVA);
++  ADD_NDS32_BUILTIN1 ("jr_itoff", void, unsigned, JR_ITOFF);
++  ADD_NDS32_BUILTIN1 ("jr_toff", void, unsigned, JR_TOFF);
++  ADD_NDS32_BUILTIN1 ("jral_iton", void, unsigned, JRAL_ITON);
++  ADD_NDS32_BUILTIN1 ("jral_ton", void, unsigned, JRAL_TON);
++  ADD_NDS32_BUILTIN1 ("ret_itoff", void, unsigned, RET_ITOFF);
++  ADD_NDS32_BUILTIN1 ("ret_toff", void, unsigned, RET_TOFF);
++  ADD_NDS32_BUILTIN0 ("standby_no_wake_grant", void, STANDBY_NO_WAKE_GRANT);
++  ADD_NDS32_BUILTIN0 ("standby_wake_grant", void, STANDBY_WAKE_GRANT);
++  ADD_NDS32_BUILTIN0 ("standby_wait_done", void, STANDBY_WAKE_DONE);
++  ADD_NDS32_BUILTIN1 ("break", void, unsigned, BREAK);
++  ADD_NDS32_BUILTIN1 ("syscall", void, unsigned, SYSCALL);
++  ADD_NDS32_BUILTIN0 ("nop", void, NOP);
++  ADD_NDS32_BUILTIN0 ("get_current_sp", unsigned, GET_CURRENT_SP);
++  ADD_NDS32_BUILTIN1 ("set_current_sp", void, unsigned, SET_CURRENT_SP);
++  ADD_NDS32_BUILTIN2 ("teqz", void, unsigned, unsigned, TEQZ);
++  ADD_NDS32_BUILTIN2 ("tnez", void, unsigned, unsigned, TNEZ);
++  ADD_NDS32_BUILTIN1 ("trap", void, unsigned, TRAP);
++  ADD_NDS32_BUILTIN0 ("return_address", unsigned, RETURN_ADDRESS);
++  ADD_NDS32_BUILTIN0 ("setend_big", void, SETEND_BIG);
++  ADD_NDS32_BUILTIN0 ("setend_little", void, SETEND_LITTLE);
++
++  /* Schedule Barrier */
++  ADD_NDS32_BUILTIN0 ("schedule_barrier", void, SCHE_BARRIER);
++
++  /* TLBOP  */
++  ADD_NDS32_BUILTIN1 ("tlbop_trd", void, unsigned, TLBOP_TRD);
++  ADD_NDS32_BUILTIN1 ("tlbop_twr", void, unsigned, TLBOP_TWR);
++  ADD_NDS32_BUILTIN1 ("tlbop_rwr", void, unsigned, TLBOP_RWR);
++  ADD_NDS32_BUILTIN1 ("tlbop_rwlk", void, unsigned, TLBOP_RWLK);
++  ADD_NDS32_BUILTIN1 ("tlbop_unlk", void, unsigned, TLBOP_UNLK);
++  ADD_NDS32_BUILTIN1 ("tlbop_pb", unsigned, unsigned, TLBOP_PB);
++  ADD_NDS32_BUILTIN1 ("tlbop_inv", void, unsigned, TLBOP_INV);
++  ADD_NDS32_BUILTIN0 ("tlbop_flua", void, TLBOP_FLUA);
++
++  /* Unaligned Load/Store  */
++  ADD_NDS32_BUILTIN1 ("unaligned_load_hw", short_unsigned, ptr_ushort,
++		      UALOAD_HW);
++  ADD_NDS32_BUILTIN1 ("unaligned_load_w", unsigned, ptr_uint, UALOAD_W);
++  ADD_NDS32_BUILTIN1 ("unaligned_load_dw", long_long_unsigned, ptr_ulong,
++		      UALOAD_DW);
++  ADD_NDS32_BUILTIN2 ("unaligned_store_hw", void, ptr_ushort, short_unsigned,
++		      UASTORE_HW);
++  ADD_NDS32_BUILTIN2 ("unaligned_store_w", void, ptr_uint, unsigned, UASTORE_W);
++  ADD_NDS32_BUILTIN2 ("unaligned_store_dw", void, ptr_ulong, long_long_unsigned,
++		      UASTORE_DW);
++  ADD_NDS32_BUILTIN0 ("unaligned_feature", unsigned, UNALIGNED_FEATURE);
++  ADD_NDS32_BUILTIN0 ("enable_unaligned", void, ENABLE_UNALIGNED);
++  ADD_NDS32_BUILTIN0 ("disable_unaligned", void, DISABLE_UNALIGNED);
++
++  /* Instruction sequence protection  */
++  ADD_NDS32_BUILTIN0 ("signature_begin", void, SIGNATURE_BEGIN);
++  ADD_NDS32_BUILTIN0 ("signature_end", void, SIGNATURE_END);
++
++  /* DSP Extension: SIMD 16bit Add and Subtract.  */
++  ADD_NDS32_BUILTIN2 ("add16", unsigned, unsigned, unsigned, ADD16);
++  ADD_NDS32_BUILTIN2 ("v_uadd16", u_v2hi, u_v2hi, u_v2hi, V_UADD16);
++  ADD_NDS32_BUILTIN2 ("v_sadd16", v2hi, v2hi, v2hi, V_SADD16);
++  ADD_NDS32_BUILTIN2 ("radd16", unsigned, unsigned, unsigned, RADD16);
++  ADD_NDS32_BUILTIN2 ("v_radd16", v2hi, v2hi, v2hi, V_RADD16);
++  ADD_NDS32_BUILTIN2 ("uradd16", unsigned, unsigned, unsigned, URADD16);
++  ADD_NDS32_BUILTIN2 ("v_uradd16", u_v2hi, u_v2hi, u_v2hi, V_URADD16);
++  ADD_NDS32_BUILTIN2 ("kadd16", unsigned, unsigned, unsigned, KADD16);
++  ADD_NDS32_BUILTIN2 ("v_kadd16", v2hi, v2hi, v2hi, V_KADD16);
++  ADD_NDS32_BUILTIN2 ("ukadd16", unsigned, unsigned, unsigned, UKADD16);
++  ADD_NDS32_BUILTIN2 ("v_ukadd16", u_v2hi, u_v2hi, u_v2hi, V_UKADD16);
++  ADD_NDS32_BUILTIN2 ("sub16", unsigned, unsigned, unsigned, SUB16);
++  ADD_NDS32_BUILTIN2 ("v_usub16", u_v2hi, u_v2hi, u_v2hi, V_USUB16);
++  ADD_NDS32_BUILTIN2 ("v_ssub16", v2hi, v2hi, v2hi, V_SSUB16);
++  ADD_NDS32_BUILTIN2 ("rsub16", unsigned, unsigned, unsigned, RSUB16);
++  ADD_NDS32_BUILTIN2 ("v_rsub16", v2hi, v2hi, v2hi, V_RSUB16);
++  ADD_NDS32_BUILTIN2 ("ursub16", unsigned, unsigned, unsigned, URSUB16);
++  ADD_NDS32_BUILTIN2 ("v_ursub16", u_v2hi, u_v2hi, u_v2hi, V_URSUB16);
++  ADD_NDS32_BUILTIN2 ("ksub16", unsigned, unsigned, unsigned, KSUB16);
++  ADD_NDS32_BUILTIN2 ("v_ksub16", v2hi, v2hi, v2hi, V_KSUB16);
++  ADD_NDS32_BUILTIN2 ("uksub16", unsigned, unsigned, unsigned, UKSUB16);
++  ADD_NDS32_BUILTIN2 ("v_uksub16", u_v2hi, u_v2hi, u_v2hi, V_UKSUB16);
++  ADD_NDS32_BUILTIN2 ("cras16", unsigned, unsigned, unsigned, CRAS16);
++  ADD_NDS32_BUILTIN2 ("v_ucras16", u_v2hi, u_v2hi, u_v2hi, V_UCRAS16);
++  ADD_NDS32_BUILTIN2 ("v_scras16", v2hi, v2hi, v2hi, V_SCRAS16);
++  ADD_NDS32_BUILTIN2 ("rcras16", unsigned, unsigned, unsigned, RCRAS16);
++  ADD_NDS32_BUILTIN2 ("v_rcras16", v2hi, v2hi, v2hi, V_RCRAS16);
++  ADD_NDS32_BUILTIN2 ("urcras16", unsigned, unsigned, unsigned, URCRAS16);
++  ADD_NDS32_BUILTIN2 ("v_urcras16", u_v2hi, u_v2hi, u_v2hi, V_URCRAS16);
++  ADD_NDS32_BUILTIN2 ("kcras16", unsigned, unsigned, unsigned, KCRAS16);
++  ADD_NDS32_BUILTIN2 ("v_kcras16", v2hi, v2hi, v2hi, V_KCRAS16);
++  ADD_NDS32_BUILTIN2 ("ukcras16", unsigned, unsigned, unsigned, UKCRAS16);
++  ADD_NDS32_BUILTIN2 ("v_ukcras16", u_v2hi, u_v2hi, u_v2hi, V_UKCRAS16);
++  ADD_NDS32_BUILTIN2 ("crsa16", unsigned, unsigned, unsigned, CRSA16);
++  ADD_NDS32_BUILTIN2 ("v_ucrsa16", u_v2hi, u_v2hi, u_v2hi, V_UCRSA16);
++  ADD_NDS32_BUILTIN2 ("v_scrsa16", v2hi, v2hi, v2hi, V_SCRSA16);
++  ADD_NDS32_BUILTIN2 ("rcrsa16", unsigned, unsigned, unsigned, RCRSA16);
++  ADD_NDS32_BUILTIN2 ("v_rcrsa16", v2hi, v2hi, v2hi, V_RCRSA16);
++  ADD_NDS32_BUILTIN2 ("urcrsa16", unsigned, unsigned, unsigned, URCRSA16);
++  ADD_NDS32_BUILTIN2 ("v_urcrsa16", u_v2hi, u_v2hi, u_v2hi, V_URCRSA16);
++  ADD_NDS32_BUILTIN2 ("kcrsa16", unsigned, unsigned, unsigned, KCRSA16);
++  ADD_NDS32_BUILTIN2 ("v_kcrsa16", v2hi, v2hi, v2hi, V_KCRSA16);
++  ADD_NDS32_BUILTIN2 ("ukcrsa16", unsigned, unsigned, unsigned, UKCRSA16);
++  ADD_NDS32_BUILTIN2 ("v_ukcrsa16", u_v2hi, u_v2hi, u_v2hi, V_UKCRSA16);
++
++  /* DSP Extension: SIMD 8bit Add and Subtract.  */
++  ADD_NDS32_BUILTIN2 ("add8", integer, integer, integer, ADD8);
++  ADD_NDS32_BUILTIN2 ("v_uadd8", u_v4qi, u_v4qi, u_v4qi, V_UADD8);
++  ADD_NDS32_BUILTIN2 ("v_sadd8", v4qi, v4qi, v4qi, V_SADD8);
++  ADD_NDS32_BUILTIN2 ("radd8", unsigned, unsigned, unsigned, RADD8);
++  ADD_NDS32_BUILTIN2 ("v_radd8", v4qi, v4qi, v4qi, V_RADD8);
++  ADD_NDS32_BUILTIN2 ("uradd8", unsigned, unsigned, unsigned, URADD8);
++  ADD_NDS32_BUILTIN2 ("v_uradd8", u_v4qi, u_v4qi, u_v4qi, V_URADD8);
++  ADD_NDS32_BUILTIN2 ("kadd8", unsigned, unsigned, unsigned, KADD8);
++  ADD_NDS32_BUILTIN2 ("v_kadd8", v4qi, v4qi, v4qi, V_KADD8);
++  ADD_NDS32_BUILTIN2 ("ukadd8", unsigned, unsigned, unsigned, UKADD8);
++  ADD_NDS32_BUILTIN2 ("v_ukadd8", u_v4qi, u_v4qi, u_v4qi, V_UKADD8);
++  ADD_NDS32_BUILTIN2 ("sub8", integer, integer, integer, SUB8);
++  ADD_NDS32_BUILTIN2 ("v_usub8", u_v4qi, u_v4qi, u_v4qi, V_USUB8);
++  ADD_NDS32_BUILTIN2 ("v_ssub8", v4qi, v4qi, v4qi, V_SSUB8);
++  ADD_NDS32_BUILTIN2 ("rsub8", unsigned, unsigned, unsigned, RSUB8);
++  ADD_NDS32_BUILTIN2 ("v_rsub8", v4qi, v4qi, v4qi, V_RSUB8);
++  ADD_NDS32_BUILTIN2 ("ursub8", unsigned, unsigned, unsigned, URSUB8);
++  ADD_NDS32_BUILTIN2 ("v_ursub8", u_v4qi, u_v4qi, u_v4qi, V_URSUB8);
++  ADD_NDS32_BUILTIN2 ("ksub8", unsigned, unsigned, unsigned, KSUB8);
++  ADD_NDS32_BUILTIN2 ("v_ksub8", v4qi, v4qi, v4qi, V_KSUB8);
++  ADD_NDS32_BUILTIN2 ("uksub8", unsigned, unsigned, unsigned, UKSUB8);
++  ADD_NDS32_BUILTIN2 ("v_uksub8", u_v4qi, u_v4qi, u_v4qi, V_UKSUB8);
++
++  /* DSP Extension: SIMD 16bit Shift.  */
++  ADD_NDS32_BUILTIN2 ("sra16", unsigned, unsigned, unsigned, SRA16);
++  ADD_NDS32_BUILTIN2 ("v_sra16", v2hi, v2hi, unsigned, V_SRA16);
++  ADD_NDS32_BUILTIN2 ("sra16_u", unsigned, unsigned, unsigned, SRA16_U);
++  ADD_NDS32_BUILTIN2 ("v_sra16_u", v2hi, v2hi, unsigned, V_SRA16_U);
++  ADD_NDS32_BUILTIN2 ("srl16", unsigned, unsigned, unsigned, SRL16);
++  ADD_NDS32_BUILTIN2 ("v_srl16", u_v2hi, u_v2hi, unsigned, V_SRL16);
++  ADD_NDS32_BUILTIN2 ("srl16_u", unsigned, unsigned, unsigned, SRL16_U);
++  ADD_NDS32_BUILTIN2 ("v_srl16_u", u_v2hi, u_v2hi, unsigned, V_SRL16_U);
++  ADD_NDS32_BUILTIN2 ("sll16", unsigned, unsigned, unsigned, SLL16);
++  ADD_NDS32_BUILTIN2 ("v_sll16", u_v2hi, u_v2hi, unsigned, V_SLL16);
++  ADD_NDS32_BUILTIN2 ("ksll16", unsigned, unsigned, unsigned, KSLL16);
++  ADD_NDS32_BUILTIN2 ("v_ksll16", v2hi, v2hi, unsigned, V_KSLL16);
++  ADD_NDS32_BUILTIN2 ("kslra16", unsigned, unsigned, unsigned, KSLRA16);
++  ADD_NDS32_BUILTIN2 ("v_kslra16", v2hi, v2hi, unsigned, V_KSLRA16);
++  ADD_NDS32_BUILTIN2 ("kslra16_u", unsigned, unsigned, unsigned, KSLRA16_U);
++  ADD_NDS32_BUILTIN2 ("v_kslra16_u", v2hi, v2hi, unsigned, V_KSLRA16_U);
++
++  /* DSP Extension: 16bit Compare.  */
++  ADD_NDS32_BUILTIN2 ("cmpeq16", unsigned, unsigned, unsigned, CMPEQ16);
++  ADD_NDS32_BUILTIN2 ("v_scmpeq16", u_v2hi, v2hi, v2hi, V_SCMPEQ16);
++  ADD_NDS32_BUILTIN2 ("v_ucmpeq16", u_v2hi, u_v2hi, u_v2hi, V_UCMPEQ16);
++  ADD_NDS32_BUILTIN2 ("scmplt16", unsigned, unsigned, unsigned, SCMPLT16);
++  ADD_NDS32_BUILTIN2 ("v_scmplt16", u_v2hi, v2hi, v2hi, V_SCMPLT16);
++  ADD_NDS32_BUILTIN2 ("scmple16", unsigned, unsigned, unsigned, SCMPLE16);
++  ADD_NDS32_BUILTIN2 ("v_scmple16", u_v2hi, v2hi, v2hi, V_SCMPLE16);
++  ADD_NDS32_BUILTIN2 ("ucmplt16", unsigned, unsigned, unsigned, UCMPLT16);
++  ADD_NDS32_BUILTIN2 ("v_ucmplt16", u_v2hi, u_v2hi, u_v2hi, V_UCMPLT16);
++  ADD_NDS32_BUILTIN2 ("ucmple16", unsigned, unsigned, unsigned, UCMPLE16);
++  ADD_NDS32_BUILTIN2 ("v_ucmple16", u_v2hi, u_v2hi, u_v2hi, V_UCMPLE16);
++
++  /* DSP Extension: 8bit Compare.  */
++  ADD_NDS32_BUILTIN2 ("cmpeq8", unsigned, unsigned, unsigned, CMPEQ8);
++  ADD_NDS32_BUILTIN2 ("v_scmpeq8", u_v4qi, v4qi, v4qi, V_SCMPEQ8);
++  ADD_NDS32_BUILTIN2 ("v_ucmpeq8", u_v4qi, u_v4qi, u_v4qi, V_UCMPEQ8);
++  ADD_NDS32_BUILTIN2 ("scmplt8", unsigned, unsigned, unsigned, SCMPLT8);
++  ADD_NDS32_BUILTIN2 ("v_scmplt8", u_v4qi, v4qi, v4qi, V_SCMPLT8);
++  ADD_NDS32_BUILTIN2 ("scmple8", unsigned, unsigned, unsigned, SCMPLE8);
++  ADD_NDS32_BUILTIN2 ("v_scmple8", u_v4qi, v4qi, v4qi, V_SCMPLE8);
++  ADD_NDS32_BUILTIN2 ("ucmplt8", unsigned, unsigned, unsigned, UCMPLT8);
++  ADD_NDS32_BUILTIN2 ("v_ucmplt8", u_v4qi, u_v4qi, u_v4qi, V_UCMPLT8);
++  ADD_NDS32_BUILTIN2 ("ucmple8", unsigned, unsigned, unsigned, UCMPLE8);
++  ADD_NDS32_BUILTIN2 ("v_ucmple8", u_v4qi, u_v4qi, u_v4qi, V_UCMPLE8);
++
++  /* DSP Extension: SIMD 16bit MISC.  */
++  ADD_NDS32_BUILTIN2 ("smin16", unsigned, unsigned, unsigned, SMIN16);
++  ADD_NDS32_BUILTIN2 ("v_smin16", v2hi, v2hi, v2hi, V_SMIN16);
++  ADD_NDS32_BUILTIN2 ("umin16", unsigned, unsigned, unsigned, UMIN16);
++  ADD_NDS32_BUILTIN2 ("v_umin16", u_v2hi, u_v2hi, u_v2hi, V_UMIN16);
++  ADD_NDS32_BUILTIN2 ("smax16", unsigned, unsigned, unsigned, SMAX16);
++  ADD_NDS32_BUILTIN2 ("v_smax16", v2hi, v2hi, v2hi, V_SMAX16);
++  ADD_NDS32_BUILTIN2 ("umax16", unsigned, unsigned, unsigned, UMAX16);
++  ADD_NDS32_BUILTIN2 ("v_umax16", u_v2hi, u_v2hi, u_v2hi, V_UMAX16);
++  ADD_NDS32_BUILTIN2 ("sclip16", unsigned, unsigned, unsigned, SCLIP16);
++  ADD_NDS32_BUILTIN2 ("v_sclip16", v2hi, v2hi, unsigned, V_SCLIP16);
++  ADD_NDS32_BUILTIN2 ("uclip16", unsigned, unsigned, unsigned, UCLIP16);
++  ADD_NDS32_BUILTIN2 ("v_uclip16", v2hi, v2hi, unsigned, V_UCLIP16);
++  ADD_NDS32_BUILTIN2 ("khm16", unsigned, unsigned, unsigned, KHM16);
++  ADD_NDS32_BUILTIN2 ("v_khm16", v2hi, v2hi, v2hi, V_KHM16);
++  ADD_NDS32_BUILTIN2 ("khmx16", unsigned, unsigned, unsigned, KHMX16);
++  ADD_NDS32_BUILTIN2 ("v_khmx16", v2hi, v2hi, v2hi, V_KHMX16);
++  ADD_NDS32_BUILTIN1 ("kabs16", unsigned, unsigned, KABS16);
++  ADD_NDS32_BUILTIN1 ("v_kabs16", v2hi, v2hi, V_KABS16);
++  ADD_NDS32_BUILTIN2 ("smul16", long_long_unsigned, unsigned, unsigned, SMUL16);
++  ADD_NDS32_BUILTIN2 ("v_smul16", v2si, v2hi, v2hi, V_SMUL16);
++  ADD_NDS32_BUILTIN2 ("smulx16",
++		      long_long_unsigned, unsigned, unsigned, SMULX16);
++  ADD_NDS32_BUILTIN2 ("v_smulx16", v2si, v2hi, v2hi, V_SMULX16);
++  ADD_NDS32_BUILTIN2 ("umul16", long_long_unsigned, unsigned, unsigned, UMUL16);
++  ADD_NDS32_BUILTIN2 ("v_umul16", u_v2si, u_v2hi, u_v2hi, V_UMUL16);
++  ADD_NDS32_BUILTIN2 ("umulx16",
++		      long_long_unsigned, unsigned, unsigned, UMULX16);
++  ADD_NDS32_BUILTIN2 ("v_umulx16", u_v2si, u_v2hi, u_v2hi, V_UMULX16);
++
++  /* DSP Extension: SIMD 8bit MISC.  */
++  ADD_NDS32_BUILTIN2 ("smin8", unsigned, unsigned, unsigned, SMIN8);
++  ADD_NDS32_BUILTIN2 ("v_smin8", v4qi, v4qi, v4qi, V_SMIN8);
++  ADD_NDS32_BUILTIN2 ("umin8", unsigned, unsigned, unsigned, UMIN8);
++  ADD_NDS32_BUILTIN2 ("v_umin8", u_v4qi, u_v4qi, u_v4qi, V_UMIN8);
++  ADD_NDS32_BUILTIN2 ("smax8", unsigned, unsigned, unsigned, SMAX8);
++  ADD_NDS32_BUILTIN2 ("v_smax8", v4qi, v4qi, v4qi, V_SMAX8);
++  ADD_NDS32_BUILTIN2 ("umax8", unsigned, unsigned, unsigned, UMAX8);
++  ADD_NDS32_BUILTIN2 ("v_umax8", u_v4qi, u_v4qi, u_v4qi, V_UMAX8);
++  ADD_NDS32_BUILTIN1 ("kabs8", unsigned, unsigned, KABS8);
++  ADD_NDS32_BUILTIN1 ("v_kabs8", v4qi, v4qi, V_KABS8);
++
++  /* DSP Extension: 8bit Unpacking.  */
++  ADD_NDS32_BUILTIN1 ("sunpkd810", unsigned, unsigned, SUNPKD810);
++  ADD_NDS32_BUILTIN1 ("v_sunpkd810", v2hi, v4qi, V_SUNPKD810);
++  ADD_NDS32_BUILTIN1 ("sunpkd820", unsigned, unsigned, SUNPKD820);
++  ADD_NDS32_BUILTIN1 ("v_sunpkd820", v2hi, v4qi, V_SUNPKD820);
++  ADD_NDS32_BUILTIN1 ("sunpkd830", unsigned, unsigned, SUNPKD830);
++  ADD_NDS32_BUILTIN1 ("v_sunpkd830", v2hi, v4qi, V_SUNPKD830);
++  ADD_NDS32_BUILTIN1 ("sunpkd831", unsigned, unsigned, SUNPKD831);
++  ADD_NDS32_BUILTIN1 ("v_sunpkd831", v2hi, v4qi, V_SUNPKD831);
++  ADD_NDS32_BUILTIN1 ("zunpkd810", unsigned, unsigned, ZUNPKD810);
++  ADD_NDS32_BUILTIN1 ("v_zunpkd810", u_v2hi, u_v4qi, V_ZUNPKD810);
++  ADD_NDS32_BUILTIN1 ("zunpkd820", unsigned, unsigned, ZUNPKD820);
++  ADD_NDS32_BUILTIN1 ("v_zunpkd820", u_v2hi, u_v4qi, V_ZUNPKD820);
++  ADD_NDS32_BUILTIN1 ("zunpkd830", unsigned, unsigned, ZUNPKD830);
++  ADD_NDS32_BUILTIN1 ("v_zunpkd830", u_v2hi, u_v4qi, V_ZUNPKD830);
++  ADD_NDS32_BUILTIN1 ("zunpkd831", unsigned, unsigned, ZUNPKD831);
++  ADD_NDS32_BUILTIN1 ("v_zunpkd831", u_v2hi, u_v4qi, V_ZUNPKD831);
++
++  /* DSP Extension: 32bit Add and Subtract.  */
++  ADD_NDS32_BUILTIN2 ("raddw", integer, integer, integer, RADDW);
++  ADD_NDS32_BUILTIN2 ("uraddw", unsigned, unsigned, unsigned, URADDW);
++  ADD_NDS32_BUILTIN2 ("rsubw", integer, integer, integer, RSUBW);
++  ADD_NDS32_BUILTIN2 ("ursubw", unsigned, unsigned, unsigned, URSUBW);
++
++  /* DSP Extension: 32bit Shift.  */
++  ADD_NDS32_BUILTIN2 ("sra_u", integer, integer, unsigned, SRA_U);
++  ADD_NDS32_BUILTIN2 ("ksll", integer, integer, unsigned, KSLL);
++
++  /* DSP Extension: 16bit Packing.  */
++  ADD_NDS32_BUILTIN2 ("pkbb16", unsigned, unsigned, unsigned, PKBB16);
++  ADD_NDS32_BUILTIN2 ("v_pkbb16", u_v2hi, u_v2hi, u_v2hi, V_PKBB16);
++  ADD_NDS32_BUILTIN2 ("pkbt16", unsigned, unsigned, unsigned, PKBT16);
++  ADD_NDS32_BUILTIN2 ("v_pkbt16", u_v2hi, u_v2hi, u_v2hi, V_PKBT16);
++  ADD_NDS32_BUILTIN2 ("pktb16", unsigned, unsigned, unsigned, PKTB16);
++  ADD_NDS32_BUILTIN2 ("v_pktb16", u_v2hi, u_v2hi, u_v2hi, V_PKTB16);
++  ADD_NDS32_BUILTIN2 ("pktt16", unsigned, unsigned, unsigned, PKTT16);
++  ADD_NDS32_BUILTIN2 ("v_pktt16", u_v2hi, u_v2hi, u_v2hi, V_PKTT16);
++
++  /* DSP Extension: Signed MSW 32x32 Multiply and ADD.  */
++  ADD_NDS32_BUILTIN2 ("smmul", integer, integer, integer, SMMUL);
++  ADD_NDS32_BUILTIN2 ("smmul_u", integer, integer, integer, SMMUL_U);
++  ADD_NDS32_BUILTIN3 ("kmmac", integer, integer, integer, integer, KMMAC);
++  ADD_NDS32_BUILTIN3 ("kmmac_u", integer, integer, integer, integer, KMMAC_U);
++  ADD_NDS32_BUILTIN3 ("kmmsb", integer, integer, integer, integer, KMMSB);
++  ADD_NDS32_BUILTIN3 ("kmmsb_u", integer, integer, integer, integer, KMMSB_U);
++  ADD_NDS32_BUILTIN2 ("kwmmul", integer, integer, integer, KWMMUL);
++  ADD_NDS32_BUILTIN2 ("kwmmul_u", integer, integer, integer, KWMMUL_U);
++
++  /* DSP Extension: Most Significant Word 32x16 Multiply and ADD.  */
++  ADD_NDS32_BUILTIN2 ("smmwb", integer, integer, unsigned, SMMWB);
++  ADD_NDS32_BUILTIN2 ("v_smmwb", integer, integer, v2hi, V_SMMWB);
++  ADD_NDS32_BUILTIN2 ("smmwb_u", integer, integer, unsigned, SMMWB_U);
++  ADD_NDS32_BUILTIN2 ("v_smmwb_u", integer, integer, v2hi, V_SMMWB_U);
++  ADD_NDS32_BUILTIN2 ("smmwt", integer, integer, unsigned, SMMWT);
++  ADD_NDS32_BUILTIN2 ("v_smmwt", integer, integer, v2hi, V_SMMWT);
++  ADD_NDS32_BUILTIN2 ("smmwt_u", integer, integer, unsigned, SMMWT_U);
++  ADD_NDS32_BUILTIN2 ("v_smmwt_u", integer, integer, v2hi, V_SMMWT_U);
++  ADD_NDS32_BUILTIN3 ("kmmawb", integer, integer, integer, unsigned, KMMAWB);
++  ADD_NDS32_BUILTIN3 ("v_kmmawb", integer, integer, integer, v2hi, V_KMMAWB);
++  ADD_NDS32_BUILTIN3 ("kmmawb_u",
++		      integer, integer, integer, unsigned, KMMAWB_U);
++  ADD_NDS32_BUILTIN3 ("v_kmmawb_u",
++		      integer, integer, integer, v2hi, V_KMMAWB_U);
++  ADD_NDS32_BUILTIN3 ("kmmawt", integer, integer, integer, unsigned, KMMAWT);
++  ADD_NDS32_BUILTIN3 ("v_kmmawt", integer, integer, integer, v2hi, V_KMMAWT);
++  ADD_NDS32_BUILTIN3 ("kmmawt_u",
++		      integer, integer, integer, unsigned, KMMAWT_U);
++  ADD_NDS32_BUILTIN3 ("v_kmmawt_u",
++		      integer, integer, integer, v2hi, V_KMMAWT_U);
++
++  /* DSP Extension: Signed 16bit Multiply with ADD/Subtract.  */
++  ADD_NDS32_BUILTIN2 ("smbb", integer, unsigned, unsigned, SMBB);
++  ADD_NDS32_BUILTIN2 ("v_smbb", integer, v2hi, v2hi, V_SMBB);
++  ADD_NDS32_BUILTIN2 ("smbt", integer, unsigned, unsigned, SMBT);
++  ADD_NDS32_BUILTIN2 ("v_smbt", integer, v2hi, v2hi, V_SMBT);
++  ADD_NDS32_BUILTIN2 ("smtt", integer, unsigned, unsigned, SMTT);
++  ADD_NDS32_BUILTIN2 ("v_smtt", integer, v2hi, v2hi, V_SMTT);
++  ADD_NDS32_BUILTIN2 ("kmda", integer, unsigned, unsigned, KMDA);
++  ADD_NDS32_BUILTIN2 ("v_kmda", integer, v2hi, v2hi, V_KMDA);
++  ADD_NDS32_BUILTIN2 ("kmxda", integer, unsigned, unsigned, KMXDA);
++  ADD_NDS32_BUILTIN2 ("v_kmxda", integer, v2hi, v2hi, V_KMXDA);
++  ADD_NDS32_BUILTIN2 ("smds", integer, unsigned, unsigned, SMDS);
++  ADD_NDS32_BUILTIN2 ("v_smds", integer, v2hi, v2hi, V_SMDS);
++  ADD_NDS32_BUILTIN2 ("smdrs", integer, unsigned, unsigned, SMDRS);
++  ADD_NDS32_BUILTIN2 ("v_smdrs", integer, v2hi, v2hi, V_SMDRS);
++  ADD_NDS32_BUILTIN2 ("smxds", integer, unsigned, unsigned, SMXDS);
++  ADD_NDS32_BUILTIN2 ("v_smxds", integer, v2hi, v2hi, V_SMXDS);
++  ADD_NDS32_BUILTIN3 ("kmabb", integer, integer, unsigned, unsigned, KMABB);
++  ADD_NDS32_BUILTIN3 ("v_kmabb", integer, integer, v2hi, v2hi, V_KMABB);
++  ADD_NDS32_BUILTIN3 ("kmabt", integer, integer, unsigned, unsigned, KMABT);
++  ADD_NDS32_BUILTIN3 ("v_kmabt", integer, integer, v2hi, v2hi, V_KMABT);
++  ADD_NDS32_BUILTIN3 ("kmatt", integer, integer, unsigned, unsigned, KMATT);
++  ADD_NDS32_BUILTIN3 ("v_kmatt", integer, integer, v2hi, v2hi, V_KMATT);
++  ADD_NDS32_BUILTIN3 ("kmada", integer, integer, unsigned, unsigned, KMADA);
++  ADD_NDS32_BUILTIN3 ("v_kmada", integer, integer, v2hi, v2hi, V_KMADA);
++  ADD_NDS32_BUILTIN3 ("kmaxda", integer, integer, unsigned, unsigned, KMAXDA);
++  ADD_NDS32_BUILTIN3 ("v_kmaxda", integer, integer, v2hi, v2hi, V_KMAXDA);
++  ADD_NDS32_BUILTIN3 ("kmads", integer, integer, unsigned, unsigned, KMADS);
++  ADD_NDS32_BUILTIN3 ("v_kmads", integer, integer, v2hi, v2hi, V_KMADS);
++  ADD_NDS32_BUILTIN3 ("kmadrs", integer, integer, unsigned, unsigned, KMADRS);
++  ADD_NDS32_BUILTIN3 ("v_kmadrs", integer, integer, v2hi, v2hi, V_KMADRS);
++  ADD_NDS32_BUILTIN3 ("kmaxds", integer, integer, unsigned, unsigned, KMAXDS);
++  ADD_NDS32_BUILTIN3 ("v_kmaxds", integer, integer, v2hi, v2hi, V_KMAXDS);
++  ADD_NDS32_BUILTIN3 ("kmsda", integer, integer, unsigned, unsigned, KMSDA);
++  ADD_NDS32_BUILTIN3 ("v_kmsda", integer, integer, v2hi, v2hi, V_KMSDA);
++  ADD_NDS32_BUILTIN3 ("kmsxda", integer, integer, unsigned, unsigned, KMSXDA);
++  ADD_NDS32_BUILTIN3 ("v_kmsxda", integer, integer, v2hi, v2hi, V_KMSXDA);
++
++  /* DSP Extension: Signed 16bit Multiply with 64bit ADD/Subtract.  */
++  ADD_NDS32_BUILTIN2 ("smal", long_long_integer,
++		      long_long_integer, unsigned, SMAL);
++  ADD_NDS32_BUILTIN2 ("v_smal", long_long_integer,
++		      long_long_integer, v2hi, V_SMAL);
++
++  /* DSP Extension: 32bit MISC.  */
++  ADD_NDS32_BUILTIN2 ("bitrev", unsigned, unsigned, unsigned, BITREV);
++  ADD_NDS32_BUILTIN2 ("wext", unsigned, long_long_integer, unsigned, WEXT);
++  ADD_NDS32_BUILTIN3 ("bpick", unsigned, unsigned, unsigned, unsigned, BPICK);
++  ADD_NDS32_BUILTIN3 ("insb", unsigned, unsigned, unsigned, unsigned, INSB);
++
++  /* DSP Extension: 64bit Add and Subtract.  */
++  ADD_NDS32_BUILTIN2 ("sadd64", long_long_integer,
++		      long_long_integer, long_long_integer, SADD64);
++  ADD_NDS32_BUILTIN2 ("uadd64", long_long_unsigned,
++		      long_long_unsigned, long_long_unsigned, UADD64);
++  ADD_NDS32_BUILTIN2 ("radd64", long_long_integer,
++		      long_long_integer, long_long_integer, RADD64);
++  ADD_NDS32_BUILTIN2 ("uradd64", long_long_unsigned,
++		      long_long_unsigned, long_long_unsigned, URADD64);
++  ADD_NDS32_BUILTIN2 ("kadd64", long_long_integer,
++		      long_long_integer, long_long_integer, KADD64);
++  ADD_NDS32_BUILTIN2 ("ukadd64", long_long_unsigned,
++		      long_long_unsigned, long_long_unsigned, UKADD64);
++  ADD_NDS32_BUILTIN2 ("ssub64", long_long_integer,
++		      long_long_integer, long_long_integer, SSUB64);
++  ADD_NDS32_BUILTIN2 ("usub64", long_long_unsigned,
++		      long_long_unsigned, long_long_unsigned, USUB64);
++  ADD_NDS32_BUILTIN2 ("rsub64", long_long_integer,
++		      long_long_integer, long_long_integer, RSUB64);
++  ADD_NDS32_BUILTIN2 ("ursub64", long_long_unsigned,
++		      long_long_unsigned, long_long_unsigned, URSUB64);
++  ADD_NDS32_BUILTIN2 ("ksub64", long_long_integer,
++		      long_long_integer, long_long_integer, KSUB64);
++  ADD_NDS32_BUILTIN2 ("uksub64", long_long_unsigned,
++		      long_long_unsigned, long_long_unsigned, UKSUB64);
++
++  /* DSP Extension: 32bit Multiply with 64bit Add/Subtract.  */
++  ADD_NDS32_BUILTIN3 ("smar64", long_long_integer,
++		      long_long_integer, integer, integer, SMAR64);
++  ADD_NDS32_BUILTIN3 ("smsr64", long_long_integer,
++		      long_long_integer, integer, integer, SMSR64);
++  ADD_NDS32_BUILTIN3 ("umar64", long_long_unsigned,
++		      long_long_unsigned, unsigned, unsigned, UMAR64);
++  ADD_NDS32_BUILTIN3 ("umsr64", long_long_unsigned,
++		      long_long_unsigned, unsigned, unsigned, UMSR64);
++  ADD_NDS32_BUILTIN3 ("kmar64", long_long_integer,
++		      long_long_integer, integer, integer, KMAR64);
++  ADD_NDS32_BUILTIN3 ("kmsr64", long_long_integer,
++		      long_long_integer, integer, integer, KMSR64);
++  ADD_NDS32_BUILTIN3 ("ukmar64", long_long_unsigned,
++		      long_long_unsigned, unsigned, unsigned, UKMAR64);
++  ADD_NDS32_BUILTIN3 ("ukmsr64", long_long_unsigned,
++		      long_long_unsigned, unsigned, unsigned, UKMSR64);
++
++  /* DSP Extension: Signed 16bit Multiply with 64bit Add/Subtract.  */
++  ADD_NDS32_BUILTIN3 ("smalbb", long_long_integer,
++		      long_long_integer, unsigned, unsigned, SMALBB);
++  ADD_NDS32_BUILTIN3 ("v_smalbb", long_long_integer,
++		      long_long_integer, v2hi, v2hi, V_SMALBB);
++  ADD_NDS32_BUILTIN3 ("smalbt", long_long_integer,
++		      long_long_integer, unsigned, unsigned, SMALBT);
++  ADD_NDS32_BUILTIN3 ("v_smalbt", long_long_integer,
++		      long_long_integer, v2hi, v2hi, V_SMALBT);
++  ADD_NDS32_BUILTIN3 ("smaltt", long_long_integer,
++		      long_long_integer, unsigned, unsigned, SMALTT);
++  ADD_NDS32_BUILTIN3 ("v_smaltt", long_long_integer,
++		      long_long_integer, v2hi, v2hi, V_SMALTT);
++  ADD_NDS32_BUILTIN3 ("smalda", long_long_integer,
++		      long_long_integer, unsigned, unsigned, SMALDA);
++  ADD_NDS32_BUILTIN3 ("v_smalda", long_long_integer,
++		      long_long_integer, v2hi, v2hi, V_SMALDA);
++  ADD_NDS32_BUILTIN3 ("smalxda", long_long_integer,
++		      long_long_integer, unsigned, unsigned, SMALXDA);
++  ADD_NDS32_BUILTIN3 ("v_smalxda", long_long_integer,
++		      long_long_integer, v2hi, v2hi, V_SMALXDA);
++  ADD_NDS32_BUILTIN3 ("smalds", long_long_integer,
++		      long_long_integer, unsigned, unsigned, SMALDS);
++  ADD_NDS32_BUILTIN3 ("v_smalds", long_long_integer,
++		      long_long_integer, v2hi, v2hi, V_SMALDS);
++  ADD_NDS32_BUILTIN3 ("smaldrs", long_long_integer,
++		      long_long_integer, unsigned, unsigned, SMALDRS);
++  ADD_NDS32_BUILTIN3 ("v_smaldrs", long_long_integer,
++		      long_long_integer, v2hi, v2hi, V_SMALDRS);
++  ADD_NDS32_BUILTIN3 ("smalxds", long_long_integer,
++		      long_long_integer, unsigned, unsigned, SMALXDS);
++  ADD_NDS32_BUILTIN3 ("v_smalxds", long_long_integer,
++		      long_long_integer, v2hi, v2hi, V_SMALXDS);
++  ADD_NDS32_BUILTIN3 ("smslda", long_long_integer,
++		      long_long_integer, unsigned, unsigned, SMSLDA);
++  ADD_NDS32_BUILTIN3 ("v_smslda", long_long_integer,
++		      long_long_integer, v2hi, v2hi, V_SMSLDA);
++  ADD_NDS32_BUILTIN3 ("smslxda", long_long_integer,
++		      long_long_integer, unsigned, unsigned, SMSLXDA);
++  ADD_NDS32_BUILTIN3 ("v_smslxda", long_long_integer,
++		      long_long_integer, v2hi, v2hi, V_SMSLXDA);
++
++  /* DSP Extension: augmented baseline.  */
++  ADD_NDS32_BUILTIN2 ("uclip32", unsigned, integer, unsigned, UCLIP32);
++  ADD_NDS32_BUILTIN2 ("sclip32", integer, integer, unsigned, SCLIP32);
++  ADD_NDS32_BUILTIN1 ("kabs", integer, integer, KABS);
++
++  /* The builtin turn off hwloop optimization.  */
++  ADD_NDS32_BUILTIN0 ("no_ext_zol", void, NO_HWLOOP);
++
++  /* DSP Extension: vector type unaligned Load/Store  */
++  ADD_NDS32_BUILTIN1 ("get_unaligned_u16x2", u_v2hi, ptr_ushort, UALOAD_U16);
++  ADD_NDS32_BUILTIN1 ("get_unaligned_s16x2", v2hi, ptr_short, UALOAD_S16);
++  ADD_NDS32_BUILTIN1 ("get_unaligned_u8x4", u_v4qi, ptr_uchar, UALOAD_U8);
++  ADD_NDS32_BUILTIN1 ("get_unaligned_s8x4", v4qi, ptr_char, UALOAD_S8);
++  ADD_NDS32_BUILTIN2 ("put_unaligned_u16x2", void, ptr_ushort,
++		      u_v2hi, UASTORE_U16);
++  ADD_NDS32_BUILTIN2 ("put_unaligned_s16x2", void, ptr_short,
++		      v2hi, UASTORE_S16);
++  ADD_NDS32_BUILTIN2 ("put_unaligned_u8x4", void, ptr_uchar,
++		      u_v4qi, UASTORE_U8);
++  ADD_NDS32_BUILTIN2 ("put_unaligned_s8x4", void, ptr_char,
++		      v4qi, UASTORE_S8);
++}
+ /* ------------------------------------------------------------------------ */
+diff --git a/gcc/config/nds32/nds32-intrinsic.md b/gcc/config/nds32/nds32-intrinsic.md
+index 53876c5..6f8b3eb 100644
+--- a/gcc/config/nds32/nds32-intrinsic.md
++++ b/gcc/config/nds32/nds32-intrinsic.md
+@@ -40,6 +40,26 @@
+    (set_attr "length"    "4")]
+ )
+ 
++(define_expand "mtsr_isb"
++  [(set (match_operand:SI 0 "register_operand" "")
++	(match_operand:SI 1 "immediate_operand" ""))]
++  ""
++{
++  emit_insn (gen_unspec_volatile_mtsr (operands[0], operands[1]));
++  emit_insn (gen_unspec_volatile_isb());
++  DONE;
++})
++
++(define_expand "mtsr_dsb"
++  [(set (match_operand:SI 0 "register_operand" "")
++	(match_operand:SI 1 "immediate_operand" ""))]
++  ""
++{
++  emit_insn (gen_unspec_volatile_mtsr (operands[0], operands[1]));
++  emit_insn (gen_unspec_dsb());
++  DONE;
++})
++
+ (define_insn "unspec_volatile_mtsr"
+   [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+ 			(match_operand:SI 1 "immediate_operand" "i")] UNSPEC_VOLATILE_MTSR)]
+@@ -58,6 +78,74 @@
+    (set_attr "length"    "4")]
+ )
+ 
++;; FPU Register Transfer.
++
++(define_insn "unspec_fcpynsd"
++   [(set (match_operand:DF 0 "register_operand" "=f")
++	 (unspec:DF [(match_operand:DF 1 "register_operand" "f")
++		     (match_operand:DF 2 "register_operand" "f")] UNSPEC_FCPYNSD))]
++  ""
++  "fcpynsd\t%0, %1, %2"
++  [(set_attr "type"   "misc")
++   (set_attr "length"    "4")]
++)
++
++(define_insn "unspec_fcpynss"
++   [(set (match_operand:SF 0 "register_operand" "=f")
++	 (unspec:SF [(match_operand:SF 1 "register_operand" "f")
++		     (match_operand:SF 2 "register_operand" "f")] UNSPEC_FCPYNSS))]
++  ""
++  "fcpynss\t%0, %1, %2"
++  [(set_attr "type"   "misc")
++   (set_attr "length"    "4")]
++)
++
++(define_insn "unspec_fcpysd"
++   [(set (match_operand:DF 0 "register_operand" "=f")
++	 (unspec:DF [(match_operand:DF 1 "register_operand" "f")
++		     (match_operand:DF 2 "register_operand" "f")] UNSPEC_FCPYSD))]
++  ""
++  "fcpysd\t%0, %1, %2"
++  [(set_attr "type"   "misc")
++   (set_attr "length"    "4")]
++)
++
++(define_insn "unspec_fcpyss"
++   [(set (match_operand:SF 0 "register_operand" "=f")
++	 (unspec:SF [(match_operand:SF 1 "register_operand" "f")
++		     (match_operand:SF 2 "register_operand" "f")] UNSPEC_FCPYSS))]
++  ""
++  "fcpyss\t%0, %1, %2"
++  [(set_attr "type"   "misc")
++   (set_attr "length"    "4")]
++)
++
++(define_insn "unspec_fmfcsr"
++   [(set (match_operand:SI 0 "register_operand" "=r")
++	 (unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_FMFCSR))]
++  ""
++  "fmfcsr\t%0"
++  [(set_attr "type"   "misc")
++   (set_attr "length"    "4")]
++)
++
++(define_insn "unspec_fmtcsr"
++  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_FMTCSR)]
++  ""
++  "fmtcsr\t%0"
++  [(set_attr "type"   "misc")
++   (set_attr "length"    "4")]
++)
++
++(define_insn "unspec_fmfcfg"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_FMFCFG))]
++  ""
++  "fmfcfg\t%0"
++  [(set_attr "type"   "misc")
++   (set_attr "length"    "4")]
++)
++
+ ;; ------------------------------------------------------------------------
+ 
+ ;; Interrupt Instructions.
+@@ -76,6 +164,445 @@
+   [(set_attr "type" "misc")]
+ )
+ 
++(define_expand "unspec_enable_int"
++  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "")] UNSPEC_VOLATILE_ENABLE_INT)]
++  ""
++{
++  rtx system_reg;
++  rtx temp_reg = gen_reg_rtx (SImode);
++
++  /* Set system register form nds32_intrinsic_register_names[].  */
++  if ((INTVAL (operands[0]) >= NDS32_INT_H16)
++      && (INTVAL (operands[0]) <= NDS32_INT_H31))
++    {
++      system_reg =  GEN_INT (__NDS32_REG_INT_MASK2__);
++      operands[0] = GEN_INT (1 << (INTVAL (operands[0])));
++    }
++  else if ((INTVAL (operands[0]) >= NDS32_INT_H32)
++	   && (INTVAL (operands[0]) <= NDS32_INT_H63))
++    {
++      system_reg =  GEN_INT (__NDS32_REG_INT_MASK3__);
++      operands[0] = GEN_INT (1 << (INTVAL (operands[0]) - 32));
++    }
++  else
++    {
++      system_reg =  GEN_INT (__NDS32_REG_INT_MASK__);
++
++      if (INTVAL (operands[0]) == NDS32_INT_SWI)
++        operands[0] = GEN_INT (1 << 16);
++      else if ((INTVAL (operands[0]) >= NDS32_INT_ALZ)
++	       && (INTVAL (operands[0]) <= NDS32_INT_DSSIM))
++	operands[0] = GEN_INT (1 << (INTVAL (operands[0]) - 4));
++      else
++	operands[0] = GEN_INT (1 << (INTVAL (operands[0])));
++    }
++
++  emit_insn (gen_unspec_volatile_mfsr (temp_reg, system_reg));
++  emit_insn (gen_iorsi3 (temp_reg, temp_reg, operands[0]));
++  emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg));
++  emit_insn (gen_unspec_dsb ());
++  DONE;
++})
++
++(define_expand "unspec_disable_int"
++  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "")] UNSPEC_VOLATILE_DISABLE_INT)]
++  ""
++{
++  rtx system_reg;
++  rtx temp_reg = gen_reg_rtx (SImode);
++
++  /* Set system register form nds32_intrinsic_register_names[].  */
++  if ((INTVAL (operands[0]) >= NDS32_INT_H16)
++      && (INTVAL (operands[0]) <= NDS32_INT_H31))
++    {
++      system_reg =  GEN_INT (__NDS32_REG_INT_MASK2__);
++      operands[0] = GEN_INT (~(1 << INTVAL (operands[0])));
++    }
++  else if ((INTVAL (operands[0]) >= NDS32_INT_H32)
++	   && (INTVAL (operands[0]) <= NDS32_INT_H63))
++    {
++      system_reg =  GEN_INT (__NDS32_REG_INT_MASK3__);
++      operands[0] = GEN_INT (~(1 << (INTVAL (operands[0]) - 32)));
++    }
++  else
++    {
++      system_reg =  GEN_INT (__NDS32_REG_INT_MASK__);
++
++      if (INTVAL (operands[0]) == NDS32_INT_SWI)
++        operands[0] = GEN_INT (~(1 << 16));
++      else if ((INTVAL (operands[0]) >= NDS32_INT_ALZ)
++	       && (INTVAL (operands[0]) <= NDS32_INT_DSSIM))
++	operands[0] = GEN_INT (~(1 << (INTVAL (operands[0]) - 4)));
++      else
++	operands[0] = GEN_INT (~(1 << INTVAL (operands[0])));
++    }
++
++  emit_insn (gen_unspec_volatile_mfsr (temp_reg, system_reg));
++  emit_insn (gen_andsi3 (temp_reg, temp_reg, operands[0]));
++  emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg));
++  emit_insn (gen_unspec_dsb ());
++  DONE;
++})
++
++(define_expand "unspec_set_pending_swint"
++  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_SET_PENDING_SWINT)]
++  ""
++{
++  /* Get $INT_PEND system register form nds32_intrinsic_register_names[]  */
++  rtx system_reg =  GEN_INT (__NDS32_REG_INT_PEND__);
++  rtx temp_reg = gen_reg_rtx (SImode);
++
++  emit_insn (gen_unspec_volatile_mfsr (temp_reg, system_reg));
++  emit_insn (gen_iorsi3 (temp_reg, temp_reg, GEN_INT (65536)));
++  emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg));
++  emit_insn (gen_unspec_dsb ());
++  DONE;
++})
++
++(define_expand "unspec_clr_pending_swint"
++  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_CLR_PENDING_SWINT)]
++  ""
++{
++  /* Get $INT_PEND system register form nds32_intrinsic_register_names[]  */
++  rtx system_reg =  GEN_INT (__NDS32_REG_INT_PEND__);
++  rtx temp_reg = gen_reg_rtx (SImode);
++
++  emit_insn (gen_unspec_volatile_mfsr (temp_reg, system_reg));
++  emit_insn (gen_andsi3 (temp_reg, temp_reg, GEN_INT (~(1 << 16))));
++  emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg));
++  emit_insn (gen_unspec_dsb ());
++  DONE;
++})
++
++(define_expand "unspec_clr_pending_hwint"
++  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "")] UNSPEC_VOLATILE_CLR_PENDING_HWINT)]
++  ""
++{
++  rtx system_reg = NULL_RTX;
++  rtx temp_reg = gen_reg_rtx (SImode);
++  rtx clr_hwint;
++  unsigned offset = 0;
++
++  /* Set system register form nds32_intrinsic_register_names[].  */
++  if ((INTVAL (operands[0]) >= NDS32_INT_H0)
++      && (INTVAL (operands[0]) <= NDS32_INT_H15))
++    {
++      system_reg = GEN_INT (__NDS32_REG_INT_PEND__);
++    }
++  else if ((INTVAL (operands[0]) >= NDS32_INT_H16)
++	   && (INTVAL (operands[0]) <= NDS32_INT_H31))
++    {
++      system_reg = GEN_INT (__NDS32_REG_INT_PEND2__);
++    }
++  else if ((INTVAL (operands[0]) >= NDS32_INT_H32)
++	   && (INTVAL (operands[0]) <= NDS32_INT_H63))
++    {
++      system_reg = GEN_INT (__NDS32_REG_INT_PEND3__);
++      offset = 32;
++    }
++  else
++    error ("__nds32__clr_pending_hwint not support NDS32_INT_SWI,"
++	   " NDS32_INT_ALZ, NDS32_INT_IDIVZE, NDS32_INT_DSSIM");
++
++  /* $INT_PEND type is write one clear.  */
++  clr_hwint = GEN_INT (1 << (INTVAL (operands[0]) - offset));
++
++  if (system_reg != NULL_RTX)
++    {
++      emit_move_insn (temp_reg, clr_hwint);
++      emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg));
++      emit_insn (gen_unspec_dsb ());
++    }
++  DONE;
++})
++
++(define_expand "unspec_get_all_pending_int"
++  [(set (match_operand:SI 0 "register_operand" "")
++	(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_GET_ALL_PENDING_INT))]
++  ""
++{
++  rtx system_reg = GEN_INT (__NDS32_REG_INT_PEND__);
++  emit_insn (gen_unspec_volatile_mfsr (operands[0], system_reg));
++  emit_insn (gen_unspec_dsb ());
++  DONE;
++})
++
++(define_expand "unspec_get_pending_int"
++  [(set (match_operand:SI 0 "register_operand" "")
++	(unspec_volatile:SI [(match_operand:SI 1 "immediate_operand" "")] UNSPEC_VOLATILE_GET_PENDING_INT))]
++  ""
++{
++  rtx system_reg = NULL_RTX;
++
++  /* Set system register form nds32_intrinsic_register_names[].  */
++  if ((INTVAL (operands[1]) >= NDS32_INT_H0)
++      && (INTVAL (operands[1]) <= NDS32_INT_H15))
++    {
++      system_reg = GEN_INT (__NDS32_REG_INT_PEND__);
++      operands[2] = GEN_INT (31 - INTVAL (operands[1]));
++    }
++  else if (INTVAL (operands[1]) == NDS32_INT_SWI)
++    {
++      system_reg = GEN_INT (__NDS32_REG_INT_PEND__);
++      operands[2] = GEN_INT (15);
++    }
++  else if ((INTVAL (operands[1]) >= NDS32_INT_H16)
++	   && (INTVAL (operands[1]) <= NDS32_INT_H31))
++    {
++      system_reg = GEN_INT (__NDS32_REG_INT_PEND2__);
++      operands[2] = GEN_INT (31 - INTVAL (operands[1]));
++    }
++  else if ((INTVAL (operands[1]) >= NDS32_INT_H32)
++	   && (INTVAL (operands[1]) <= NDS32_INT_H63))
++    {
++      system_reg = GEN_INT (__NDS32_REG_INT_PEND3__);
++      operands[2] = GEN_INT (31 - (INTVAL (operands[1]) - 32));
++    }
++  else
++    error ("get_pending_int not support NDS32_INT_ALZ,"
++	   " NDS32_INT_IDIVZE, NDS32_INT_DSSIM");
++
++  /* mfsr op0, sytem_reg  */
++  if (system_reg != NULL_RTX)
++    {
++      emit_insn (gen_unspec_volatile_mfsr (operands[0], system_reg));
++      emit_insn (gen_ashlsi3 (operands[0], operands[0], operands[2]));
++      emit_insn (gen_lshrsi3 (operands[0], operands[0], GEN_INT (31)));
++      emit_insn (gen_unspec_dsb ());
++    }
++  DONE;
++})
++
++(define_expand "unspec_set_int_priority"
++  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "")
++			(match_operand:SI 1 "immediate_operand" "")] UNSPEC_VOLATILE_SET_INT_PRIORITY)]
++  ""
++{
++  rtx system_reg = NULL_RTX;
++  rtx priority = NULL_RTX;
++  rtx mask = NULL_RTX;
++  rtx temp_reg = gen_reg_rtx (SImode);
++  rtx mask_reg = gen_reg_rtx (SImode);
++  rtx set_reg = gen_reg_rtx (SImode);
++  unsigned offset = 0;
++
++  /* Get system register form nds32_intrinsic_register_names[].  */
++  if (INTVAL (operands[0]) <= NDS32_INT_H15)
++    {
++      system_reg =  GEN_INT (__NDS32_REG_INT_PRI__);
++      offset = 0;
++    }
++  else if (INTVAL (operands[0]) >= NDS32_INT_H16
++	   && INTVAL (operands[0]) <= NDS32_INT_H31)
++    {
++      system_reg =  GEN_INT (__NDS32_REG_INT_PRI2__);
++      /* The $INT_PRI2 first bit correspond to H16, so need
++	 subtract 16.  */
++      offset = 16;
++    }
++  else if (INTVAL (operands[0]) >= NDS32_INT_H32
++	   && INTVAL (operands[0]) <= NDS32_INT_H47)
++    {
++      system_reg =  GEN_INT (__NDS32_REG_INT_PRI3__);
++      /* The $INT_PRI3 first bit correspond to H32, so need
++	 subtract 32.  */
++      offset = 32;
++    }
++  else if (INTVAL (operands[0]) >= NDS32_INT_H48
++	   && INTVAL (operands[0]) <= NDS32_INT_H63)
++    {
++      system_reg =  GEN_INT (__NDS32_REG_INT_PRI4__);
++      /* The $INT_PRI3 first bit correspond to H48, so need
++	 subtract 48.  */
++      offset = 48;
++    }
++  else
++    error ("set_int_priority not support NDS32_INT_SWI,"
++	   " NDS32_INT_ALZ, NDS32_INT_IDIVZE, NDS32_INT_DSSIM");
++
++  mask = GEN_INT (~(3 << 2 * (INTVAL (operands[0]) - offset)));
++  priority = GEN_INT ((int) (INTVAL (operands[1])
++			     << ((INTVAL (operands[0]) - offset) * 2)));
++
++  if (system_reg != NULL_RTX)
++    {
++      emit_move_insn (mask_reg, mask);
++      emit_move_insn (set_reg, priority);
++      emit_insn (gen_unspec_volatile_mfsr (temp_reg, system_reg));
++      emit_insn (gen_andsi3 (temp_reg, temp_reg, mask_reg));
++      emit_insn (gen_iorsi3 (temp_reg, temp_reg, set_reg));
++      emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg));
++      emit_insn (gen_unspec_dsb ());
++    }
++  DONE;
++})
++
++(define_expand "unspec_get_int_priority"
++  [(set (match_operand:SI 0 "register_operand" "")
++	(unspec_volatile:SI [(match_operand:SI 1 "immediate_operand" "")] UNSPEC_VOLATILE_GET_INT_PRIORITY))]
++  ""
++{
++  rtx system_reg = NULL_RTX;
++  rtx priority = NULL_RTX;
++  unsigned offset = 0;
++
++  /* Get system register form nds32_intrinsic_register_names[]  */
++  if (INTVAL (operands[1]) <= NDS32_INT_H15)
++    {
++      system_reg =  GEN_INT (__NDS32_REG_INT_PRI__);
++      offset = 0;
++    }
++  else if (INTVAL (operands[1]) >= NDS32_INT_H16
++	   && INTVAL (operands[1]) <= NDS32_INT_H31)
++    {
++      system_reg =  GEN_INT (__NDS32_REG_INT_PRI2__);
++      /* The $INT_PRI2 first bit correspond to H16, so need
++	 subtract 16.  */
++      offset = 16;
++    }
++  else if (INTVAL (operands[1]) >= NDS32_INT_H32
++	   && INTVAL (operands[1]) <= NDS32_INT_H47)
++    {
++      system_reg =  GEN_INT (__NDS32_REG_INT_PRI3__);
++      /* The $INT_PRI3 first bit correspond to H32, so need
++	 subtract 32.  */
++      offset = 32;
++    }
++  else if (INTVAL (operands[1]) >= NDS32_INT_H48
++	   && INTVAL (operands[1]) <= NDS32_INT_H63)
++    {
++      system_reg =  GEN_INT (__NDS32_REG_INT_PRI4__);
++      /* The $INT_PRI4 first bit correspond to H48, so need
++	 subtract 48.  */
++      offset = 48;
++    }
++  else
++    error ("set_int_priority not support NDS32_INT_SWI,"
++	   " NDS32_INT_ALZ, NDS32_INT_IDIVZE, NDS32_INT_DSSIM");
++
++  priority = GEN_INT (31 - 2 * (INTVAL (operands[1]) - offset));
++
++  if (system_reg != NULL_RTX)
++    {
++      emit_insn (gen_unspec_volatile_mfsr (operands[0], system_reg));
++      emit_insn (gen_ashlsi3 (operands[0], operands[0], priority));
++      emit_insn (gen_lshrsi3 (operands[0], operands[0], GEN_INT (30)));
++      emit_insn (gen_unspec_dsb ());
++    }
++  DONE;
++})
++
++(define_expand "unspec_set_trig_level"
++  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "")] UNSPEC_VOLATILE_SET_TRIG_LEVEL)]
++  ""
++{
++  rtx system_reg = NULL_RTX;
++  rtx temp_reg = gen_reg_rtx (SImode);
++  rtx set_level;
++  unsigned offset = 0;
++
++  if (INTVAL (operands[0]) >= NDS32_INT_H0
++      && INTVAL (operands[0]) <= NDS32_INT_H31)
++    {
++      system_reg = GEN_INT (__NDS32_REG_INT_TRIGGER__);
++      offset = 0;
++    }
++  else if (INTVAL (operands[0]) >= NDS32_INT_H32
++	   && INTVAL (operands[0]) <= NDS32_INT_H63)
++    {
++      system_reg = GEN_INT (__NDS32_REG_INT_TRIGGER2__);
++      offset = 32;
++    }
++  else
++    error ("__nds32__set_trig_type_level not support NDS32_INT_SWI,"
++	   " NDS32_INT_ALZ, NDS32_INT_IDIVZE, NDS32_INT_DSSIM");
++
++  if (system_reg != NULL_RTX)
++    {
++      /* TRIGGER register, 0 mean level triggered and 1 mean edge triggered. */
++      set_level = GEN_INT (~(1 << (INTVAL (operands[0]) - offset)));
++
++      emit_insn (gen_unspec_volatile_mfsr (temp_reg, system_reg));
++      emit_insn (gen_andsi3 (temp_reg, temp_reg, set_level));
++      emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg));
++    }
++  DONE;
++})
++
++(define_expand "unspec_set_trig_edge"
++  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "")] UNSPEC_VOLATILE_SET_TRIG_EDGE)]
++  ""
++{
++  rtx system_reg = NULL_RTX;
++  rtx temp_reg = gen_reg_rtx (SImode);
++  rtx set_level;
++  unsigned offset = 0;
++
++  if (INTVAL (operands[0]) >= NDS32_INT_H0
++      && INTVAL (operands[0]) <= NDS32_INT_H31)
++    {
++      system_reg = GEN_INT (__NDS32_REG_INT_TRIGGER__);
++      offset = 0;
++    }
++  else if (INTVAL (operands[0]) >= NDS32_INT_H32
++	   && INTVAL (operands[0]) <= NDS32_INT_H63)
++    {
++      system_reg = GEN_INT (__NDS32_REG_INT_TRIGGER2__);
++      offset = 32;
++    }
++  else
++    error ("__nds32__set_trig_type_edge not support NDS32_INT_SWI,"
++	   " NDS32_INT_ALZ, NDS32_INT_IDIVZE, NDS32_INT_DSSIM");
++
++  if (system_reg != NULL_RTX)
++    {
++      /* TRIGGER register, 0 mean level triggered and 1 mean edge triggered. */
++      set_level = GEN_INT ((1 << (INTVAL (operands[0]) - offset)));
++
++      emit_insn (gen_unspec_volatile_mfsr (temp_reg, system_reg));
++      emit_insn (gen_iorsi3 (temp_reg, temp_reg, set_level));
++      emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg));
++    }
++  DONE;
++})
++
++(define_expand "unspec_get_trig_type"
++  [(set (match_operand:SI 0 "register_operand" "")
++	(unspec_volatile:SI [(match_operand:SI 1 "immediate_operand" "")] UNSPEC_VOLATILE_GET_TRIG_TYPE))]
++  ""
++{
++  rtx system_reg = NULL_RTX;
++  rtx trig_type;
++  unsigned offset = 0;
++
++  if (INTVAL (operands[1]) >= NDS32_INT_H0
++      && INTVAL (operands[1]) <= NDS32_INT_H31)
++    {
++      system_reg = GEN_INT (__NDS32_REG_INT_TRIGGER__);
++      offset = 0;
++    }
++  else if (INTVAL (operands[1]) >= NDS32_INT_H32
++	   && INTVAL (operands[1]) <= NDS32_INT_H63)
++    {
++      system_reg = GEN_INT (__NDS32_REG_INT_TRIGGER2__);
++      offset = 32;
++    }
++  else
++    error ("__nds32__get_trig_type not support NDS32_INT_SWI,"
++	   " NDS32_INT_ALZ, NDS32_INT_IDIVZE, NDS32_INT_DSSIM");
++
++  if (system_reg != NULL_RTX)
++    {
++      trig_type = GEN_INT (31 - (INTVAL (operands[1]) - offset));
++
++      emit_insn (gen_unspec_volatile_mfsr (operands[0], system_reg));
++      emit_insn (gen_ashlsi3 (operands[0], operands[0], trig_type));
++      emit_insn (gen_lshrsi3 (operands[0], operands[0], GEN_INT (31)));
++      emit_insn (gen_unspec_dsb ());
++    }
++  DONE;
++})
++
+ ;; ------------------------------------------------------------------------
+ 
+ ;; Cache Synchronization Instructions
+@@ -84,7 +611,7 @@
+   [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_ISYNC)]
+   ""
+   "isync\t%0"
+-  [(set_attr "type" "misc")]
++  [(set_attr "type" "mmu")]
+ )
+ 
+ (define_insn "unspec_volatile_isb"
+@@ -94,4 +621,1077 @@
+   [(set_attr "type" "misc")]
+ )
+ 
++(define_insn "unspec_dsb"
++  [(unspec_volatile [(const_int 0)] UNSPEC_VOLATILE_DSB)]
++  ""
++  "dsb"
++  [(set_attr "type" "misc")]
++)
++
++(define_insn "unspec_msync"
++  [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "i")] UNSPEC_VOLATILE_MSYNC)]
++  ""
++  "msync\t%0"
++  [(set_attr "type" "misc")]
++)
++
++(define_insn "unspec_msync_all"
++  [(unspec_volatile [(const_int 0)] UNSPEC_VOLATILE_MSYNC_ALL)]
++  ""
++  "msync\tall"
++  [(set_attr "type" "misc")]
++)
++
++(define_insn "unspec_msync_store"
++  [(unspec_volatile [(const_int 0)] UNSPEC_VOLATILE_MSYNC_STORE)]
++  ""
++  "msync\tstore"
++  [(set_attr "type" "misc")]
++)
++
++;; Load and Store
++
++(define_insn "unspec_volatile_llw"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(unspec_volatile:SI [(mem:SI (plus:SI (match_operand:SI 1 "register_operand" "r")
++					      (match_operand:SI 2 "register_operand" "r")))] UNSPEC_VOLATILE_LLW))]
++  ""
++  "llw\t%0, [%1 + %2]"
++  [(set_attr "length"    "4")]
++)
++
++(define_insn "unspec_lwup"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(unspec_volatile:SI [(mem:SI (plus:SI (match_operand:SI 1 "register_operand" "r")
++					      (match_operand:SI 2 "register_operand" "r")))] UNSPEC_LWUP))]
++  ""
++  "lwup\t%0, [%1 + %2]"
++  [(set_attr "length"    "4")]
++)
++
++(define_insn "unspec_lbup"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(unspec_volatile:SI [(mem:SI (plus:SI (match_operand:SI 1 "register_operand" "r")
++					      (match_operand:SI 2 "register_operand" "r")))] UNSPEC_LBUP))]
++  ""
++  "lbup\t%0, [%1 + %2]"
++  [(set_attr "length"    "4")]
++)
++
++(define_insn "unspec_volatile_scw"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(unspec_volatile:SI [(mem:SI (plus:SI (match_operand:SI 1 "register_operand" "r")
++					      (match_operand:SI 2 "register_operand" "r")))
++			     (match_operand:SI 3 "register_operand" "0")] UNSPEC_VOLATILE_SCW))]
++  ""
++  "scw\t%0, [%1 + %2]"
++  [(set_attr "length"     "4")]
++)
++
++(define_insn "unspec_swup"
++  [(set (mem:SI (plus:SI (match_operand:SI 0 "register_operand" "r")
++			 (match_operand:SI 1 "register_operand" "r")))
++	(unspec:SI [(match_operand:SI 2 "register_operand" "r")] UNSPEC_SWUP))]
++  ""
++  "swup\t%2, [%0 + %1]"
++  [(set_attr "length"     "4")]
++)
++
++(define_insn "unspec_sbup"
++  [(set (mem:SI (plus:SI (match_operand:SI 0 "register_operand" "r")
++			 (match_operand:SI 1 "register_operand" "r")))
++	(unspec:SI [(match_operand:SI 2 "register_operand" "r")] UNSPEC_SBUP))]
++  ""
++  "sbup\t%2, [%0 + %1]"
++  [(set_attr "length"     "4")]
++)
++
++;; CCTL
++
++(define_insn "cctl_l1d_invalall"
++  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_CCTL_L1D_INVALALL)]
++  ""
++  "cctl\tL1D_INVALALL"
++  [(set_attr "type" "mmu")]
++)
++
++(define_insn "cctl_l1d_wball_alvl"
++  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_CCTL_L1D_WBALL_ALVL)]
++  ""
++  "cctl\tL1D_WBALL, alevel"
++  [(set_attr "type" "mmu")]
++)
++
++(define_insn "cctl_l1d_wball_one_lvl"
++  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_CCTL_L1D_WBALL_ONE_LVL)]
++  ""
++  "cctl\tL1D_WBALL, 1level"
++  [(set_attr "type" "mmu")]
++)
++
++(define_insn "cctl_idx_read"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(unspec_volatile:SI [(match_operand:SI 1 "immediate_operand" "i")
++			     (match_operand:SI 2 "register_operand" "r")] UNSPEC_VOLATILE_CCTL_IDX_READ))]
++  ""
++  "cctl\t%0, %2, %X1"
++  [(set_attr "type" "mmu")]
++)
++
++(define_insn "cctl_idx_write"
++  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i")
++			(match_operand:SI 1 "register_operand" "r")
++			(match_operand:SI 2 "register_operand" "r")] UNSPEC_VOLATILE_CCTL_IDX_WRITE)]
++  ""
++  "cctl\t%1, %2, %W0"
++  [(set_attr "type" "mmu")]
++)
++
++(define_insn "cctl_va_wbinval_l1"
++  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i")
++			(match_operand:SI 1 "register_operand" "r")] UNSPEC_VOLATILE_CCTL_VA_WBINVAL_L1)]
++  ""
++  "cctl\t%1, %U0, 1level"
++  [(set_attr "type" "mmu")]
++)
++
++(define_insn "cctl_va_wbinval_la"
++  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i")
++			(match_operand:SI 1 "register_operand" "r")] UNSPEC_VOLATILE_CCTL_VA_WBINVAL_LA)]
++  ""
++  "cctl\t%1, %U0, alevel"
++  [(set_attr "type" "mmu")]
++)
++
++(define_insn "cctl_idx_wbinval"
++  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i")
++			(match_operand:SI 1 "register_operand" "r")] UNSPEC_VOLATILE_CCTL_IDX_WBINVAL)]
++  ""
++  "cctl\t%1, %T0"
++  [(set_attr "type" "mmu")]
++)
++
++(define_insn "cctl_va_lck"
++  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i")
++			(match_operand:SI 1 "register_operand" "r")] UNSPEC_VOLATILE_CCTL_VA_LCK)]
++  ""
++  "cctl\t%1, %R0"
++  [(set_attr "type" "mmu")]
++)
++
++;;PREFETCH
++
++(define_insn "prefetch_qw"
++  [(unspec_volatile:QI [(match_operand:SI 0 "register_operand" "r")
++			(match_operand:SI 1 "nonmemory_operand" "r")
++			(match_operand:SI 2 "immediate_operand" "i")] UNSPEC_VOLATILE_DPREF_QW)]
++  ""
++  "dpref\t%Z2, [%0 + %1]"
++  [(set_attr "type" "misc")]
++)
++
++(define_insn "prefetch_hw"
++  [(unspec_volatile:HI [(match_operand:SI 0 "register_operand" "r")
++			(match_operand:SI 1 "nonmemory_operand" "r")
++			(match_operand:SI 2 "immediate_operand" "i")] UNSPEC_VOLATILE_DPREF_HW)]
++  ""
++  "dpref\t%Z2, [%0 + (%1<<1)]"
++  [(set_attr "type" "misc")]
++)
++
++(define_insn "prefetch_w"
++  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "    r, r")
++			(match_operand:SI 1 "nonmemory_operand" "Is15, r")
++			(match_operand:SI 2 "immediate_operand" "   i, i")] UNSPEC_VOLATILE_DPREF_W)]
++  ""
++  "@
++  dprefi.w\t%Z2, [%0 + %1]
++  dpref\t%Z2, [%0 + (%1<<2)]"
++  [(set_attr "type" "misc")]
++)
++
++(define_insn "prefetch_dw"
++  [(unspec_volatile:DI [(match_operand:SI 0 "register_operand"  "   r, r")
++			(match_operand:SI 1 "nonmemory_operand" "Is15, r")
++			(match_operand:SI 2 "immediate_operand" "   i, i")] UNSPEC_VOLATILE_DPREF_DW)]
++  ""
++  "@
++  dprefi.d\t%Z2, [%0 + %1]
++  dpref\t%Z2, [%0 + (%1<<3)]"
++  [(set_attr "type" "misc")]
++)
++
++;; Performance Extension
++
++(define_expand "unspec_ave"
++  [(match_operand:SI 0 "register_operand" "")
++   (match_operand:SI 1 "register_operand" "")
++   (match_operand:SI 2 "register_operand" "")]
++  ""
++{
++  emit_insn (gen_ave (operands[0], operands[1], operands[2]));
++  DONE;
++})
++
++(define_expand "unspec_bclr"
++  [(match_operand:SI 0 "register_operand" "")
++   (match_operand:SI 1 "register_operand" "")
++   (match_operand:SI 2 "immediate_operand" "")]
++  ""
++{
++  unsigned HOST_WIDE_INT val = ~(1u << UINTVAL (operands[2]));
++  emit_insn (gen_andsi3 (operands[0], operands[1], gen_int_mode (val, SImode)));
++  DONE;
++})
++
++(define_expand "unspec_bset"
++  [(match_operand:SI 0 "register_operand" "")
++   (match_operand:SI 1 "register_operand" "")
++   (match_operand:SI 2 "immediate_operand" "")]
++  ""
++{
++  unsigned HOST_WIDE_INT val = 1u << UINTVAL (operands[2]);
++  emit_insn (gen_iorsi3 (operands[0], operands[1], gen_int_mode (val, SImode)));
++  DONE;
++})
++
++(define_expand "unspec_btgl"
++  [(match_operand:SI 0 "register_operand" "")
++   (match_operand:SI 1 "register_operand" "")
++   (match_operand:SI 2 "immediate_operand" "")]
++  ""
++{
++  unsigned HOST_WIDE_INT val = 1u << UINTVAL (operands[2]);
++  emit_insn (gen_xorsi3 (operands[0], operands[1], gen_int_mode (val, SImode)));
++  DONE;
++})
++
++(define_expand "unspec_btst"
++  [(match_operand:SI 0 "register_operand" "")
++   (match_operand:SI 1 "register_operand" "")
++   (match_operand:SI 2 "immediate_operand" "")]
++  ""
++{
++  emit_insn (gen_btst (operands[0], operands[1], operands[2]));
++  DONE;
++})
++
++(define_insn "unspec_clip"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
++		    (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_CLIP))]
++  ""
++  "clip\t%0, %1, %2"
++  [(set_attr "type" "alu")
++   (set_attr "length" "4")]
++)
++
++(define_insn "unspec_clips"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
++		    (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_CLIPS))]
++  ""
++  "clips\t%0, %1, %2"
++  [(set_attr "type" "alu")
++   (set_attr "length" "4")]
++)
++
++(define_insn "unspec_clo"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(unspec:SI [(match_operand:SI 1 "register_operand" "r")] UNSPEC_CLO))]
++  ""
++  "clo\t%0, %1"
++  [(set_attr "type" "alu")
++   (set_attr "length" "4")]
++)
++
++(define_insn "unspec_ssabssi2"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(ss_abs:SI (match_operand:SI 1 "register_operand" "r")))]
++  ""
++  "abs\t%0, %1"
++  [(set_attr "type" "alu")
++   (set_attr "length" "4")]
++)
++
++;; Performance extension 2
++
++(define_insn "unspec_pbsad"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
++		    (match_operand:SI 2 "register_operand" "r")] UNSPEC_PBSAD))]
++  ""
++  "pbsad\t%0, %1, %2"
++  [(set_attr "type" "pbsad")
++   (set_attr "length"   "4")]
++)
++
++(define_insn "unspec_pbsada"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(unspec:SI [(match_operand:SI 1 "register_operand" "0")
++		    (match_operand:SI 2 "register_operand" "r")
++		    (match_operand:SI 3 "register_operand" "r")] UNSPEC_PBSADA))]
++  ""
++  "pbsada\t%0, %2, %3"
++  [(set_attr "type" "pbsada")
++   (set_attr "length"    "4")]
++)
++
++(define_expand "bse"
++  [(match_operand:SI 0 "register_operand" "")
++   (match_operand:SI 1 "register_operand" "")
++   (match_operand:SI 2 "register_operand" "")]
++  ""
++  {
++    rtx temp0 = gen_reg_rtx (SImode);
++    rtx temp2 = gen_reg_rtx (SImode);
++
++    emit_move_insn (temp0, gen_rtx_MEM (Pmode, operands[0]));
++    emit_move_insn (temp2, gen_rtx_MEM (Pmode, operands[2]));
++    emit_insn (gen_unspec_bse (temp0, operands[1], temp2, temp0, temp2));
++    emit_move_insn (gen_rtx_MEM (Pmode, operands[0]), temp0);
++    emit_move_insn (gen_rtx_MEM (Pmode, operands[2]), temp2);
++    DONE;
++  }
++)
++
++(define_insn "unspec_bse"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
++		    (match_operand:SI 2 "register_operand" "r")
++		    (match_operand:SI 3 "register_operand" "0")] UNSPEC_BSE))
++   (set (match_operand:SI 4 "register_operand" "=2")
++	(unspec:SI [(match_dup 1)
++		    (match_dup 2)
++		    (match_dup 0)] UNSPEC_BSE_2))]
++  ""
++  "bse\t%0, %1, %2"
++  [(set_attr "type" "alu")
++   (set_attr "length" "4")]
++)
++
++(define_expand "bsp"
++  [(match_operand:SI 0 "register_operand" "")
++   (match_operand:SI 1 "register_operand" "")
++   (match_operand:SI 2 "register_operand" "")]
++  ""
++  {
++    rtx temp0 = gen_reg_rtx (SImode);
++    rtx temp2 = gen_reg_rtx (SImode);
++
++    emit_move_insn (temp0, gen_rtx_MEM (Pmode, operands[0]));
++    emit_move_insn (temp2, gen_rtx_MEM (Pmode, operands[2]));
++    emit_insn (gen_unspec_bsp (temp0, operands[1], temp2, temp0, temp2));
++    emit_move_insn (gen_rtx_MEM (Pmode, operands[0]), temp0);
++    emit_move_insn (gen_rtx_MEM (Pmode, operands[2]), temp2);
++    DONE;
++  }
++)
++
++(define_insn "unspec_bsp"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
++		    (match_operand:SI 2 "register_operand" "r")
++		    (match_operand:SI 3 "register_operand" "0")] UNSPEC_BSP))
++   (set (match_operand:SI 4 "register_operand" "=2")
++	(unspec:SI [(match_dup 1)
++		    (match_dup 2)
++		    (match_dup 0)] UNSPEC_BSP_2))]
++  ""
++  "bsp\t%0, %1, %2"
++  [(set_attr "type" "alu")
++   (set_attr "length" "4")]
++)
++
++;; String Extension
++
++(define_insn "unspec_ffb"
++  [(set (match_operand:SI 0 "register_operand" "=r, r")
++	(unspec:SI [(match_operand:SI 1 "register_operand" "r, r")
++		    (match_operand:SI 2 "nonmemory_operand" "Iu08, r")] UNSPEC_FFB))]
++  ""
++  "@
++  ffbi\t%0, %1, %2
++  ffb\t%0, %1, %2"
++  [(set_attr "type" "alu")
++   (set_attr "length" "4")]
++)
++
++(define_insn "unspec_ffmism"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
++		    (match_operand:SI 2 "register_operand" "r")] UNSPEC_FFMISM))]
++  ""
++  "ffmism\t%0, %1, %2"
++  [(set_attr "type" "alu")
++   (set_attr "length" "4")]
++)
++
++(define_insn "unspec_flmism"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
++		    (match_operand:SI 2 "register_operand" "r")] UNSPEC_FLMISM))]
++  ""
++  "flmism\t%0, %1, %2"
++  [(set_attr "type" "alu")
++   (set_attr "length" "4")]
++)
++
++;; SATURATION
++
++(define_insn "unspec_kaddw"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(ss_plus:SI (match_operand:SI 1 "register_operand" "r")
++		    (match_operand:SI 2 "register_operand" "r")))]
++  ""
++  "kaddw\t%0, %1, %2"
++  [(set_attr "type"    "alu")
++   (set_attr "length"    "4")]
++)
++
++(define_insn "unspec_ksubw"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(ss_minus:SI (match_operand:SI 1 "register_operand" "r")
++		     (match_operand:SI 2 "register_operand" "r")))]
++  ""
++  "ksubw\t%0, %1, %2"
++  [(set_attr "type"    "alu")
++   (set_attr "length"    "4")]
++)
++
++(define_insn "unspec_kaddh"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(unspec:SI [(plus:SI (match_operand:SI 1 "register_operand" "r")
++			     (match_operand:SI 2 "register_operand" "r"))
++		    (const_int 15)] UNSPEC_CLIPS))]
++  ""
++  "kaddh\t%0, %1, %2"
++  [(set_attr "type"    "alu")
++   (set_attr "length"    "4")]
++)
++
++(define_insn "unspec_ksubh"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(unspec:SI [(minus:SI (match_operand:SI 1 "register_operand" "r")
++			      (match_operand:SI 2 "register_operand" "r"))
++		    (const_int 15)] UNSPEC_CLIPS))]
++  ""
++  "ksubh\t%0, %1, %2"
++  [(set_attr "type"    "alu")
++   (set_attr "length"    "4")]
++)
++
++(define_insn "unspec_kdmbb"
++  [(set (match_operand:V2HI 0 "register_operand" "=r")
++	(unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r")
++		      (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KDMBB))]
++  ""
++  "kdmbb\t%0, %1, %2"
++  [(set_attr "type"    "mul")
++   (set_attr "length"    "4")]
++)
++
++(define_insn "unspec_kdmbt"
++  [(set (match_operand:V2HI 0 "register_operand" "=r")
++	(unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r")
++		      (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KDMBT))]
++  ""
++  "kdmbt\t%0, %1, %2"
++  [(set_attr "type"    "mul")
++   (set_attr "length"    "4")]
++)
++
++(define_insn "unspec_kdmtb"
++  [(set (match_operand:V2HI 0 "register_operand" "=r")
++	(unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r")
++		      (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KDMTB))]
++  ""
++  "kdmtb\t%0, %1, %2"
++  [(set_attr "type"    "mul")
++   (set_attr "length"    "4")]
++)
++
++(define_insn "unspec_kdmtt"
++  [(set (match_operand:V2HI 0 "register_operand" "=r")
++	(unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r")
++		      (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KDMTT))]
++  ""
++  "kdmtt\t%0, %1, %2"
++  [(set_attr "type"    "mul")
++   (set_attr "length"    "4")]
++)
++
++(define_insn "unspec_khmbb"
++  [(set (match_operand:V2HI 0 "register_operand" "=r")
++	(unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r")
++		      (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KHMBB))]
++  ""
++  "khmbb\t%0, %1, %2"
++  [(set_attr "type"    "mul")
++   (set_attr "length"    "4")]
++)
++
++(define_insn "unspec_khmbt"
++  [(set (match_operand:V2HI 0 "register_operand" "=r")
++	(unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r")
++		      (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KHMBT))]
++  ""
++  "khmbt\t%0, %1, %2"
++  [(set_attr "type"    "mul")
++   (set_attr "length"    "4")]
++)
++
++(define_insn "unspec_khmtb"
++  [(set (match_operand:V2HI 0 "register_operand" "=r")
++	(unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r")
++		      (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KHMTB))]
++  ""
++  "khmtb\t%0, %1, %2"
++  [(set_attr "type"    "mul")
++   (set_attr "length"    "4")]
++)
++
++(define_insn "unspec_khmtt"
++  [(set (match_operand:V2HI 0 "register_operand" "=r")
++	(unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r")
++		      (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KHMTT))]
++  ""
++  "khmtt\t%0, %1, %2"
++  [(set_attr "type"    "mul")
++   (set_attr "length"    "4")]
++)
++
++(define_insn "unspec_kslraw"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
++		    (match_operand:SI 2 "register_operand" "r")] UNSPEC_KSLRAW))]
++  ""
++  "kslraw\t%0, %1, %2"
++  [(set_attr "type"    "alu")
++   (set_attr "length"    "4")]
++)
++
++(define_insn "unspec_kslrawu"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
++		    (match_operand:SI 2 "register_operand" "r")] UNSPEC_KSLRAWU))]
++  ""
++  "kslraw.u\t%0, %1, %2"
++  [(set_attr "type"    "alu")
++   (set_attr "length"    "4")]
++)
++
++(define_insn "unspec_volatile_rdov"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_RDOV))]
++  ""
++  "rdov\t%0"
++  [(set_attr "type"   "misc")
++   (set_attr "length"    "4")]
++)
++
++(define_insn "unspec_volatile_clrov"
++  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_CLROV)]
++  ""
++  "clrov"
++  [(set_attr "type"   "misc")
++   (set_attr "length"    "4")]
++)
++
++;; System
++
++(define_insn "unspec_sva"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
++		    (match_operand:SI 2 "register_operand" "r")] UNSPEC_SVA))]
++  ""
++  "sva\t%0, %1, %2"
++  [(set_attr "type"    "alu")
++   (set_attr "length"    "4")]
++)
++
++(define_insn "unspec_svs"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
++		    (match_operand:SI 2 "register_operand" "r")] UNSPEC_SVS))]
++  ""
++  "svs\t%0, %1, %2"
++  [(set_attr "type"    "alu")
++   (set_attr "length"    "4")]
++)
++
++(define_insn "unspec_jr_itoff"
++  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_JR_ITOFF)]
++  ""
++  "jr.itoff\t%0"
++  [(set_attr "type" "misc")]
++)
++
++(define_insn "unspec_jr_toff"
++  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_JR_TOFF)]
++  ""
++  "jr.toff\t%0"
++  [(set_attr "type" "branch")]
++)
++
++(define_insn "unspec_jral_iton"
++  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_JRAL_ITON)]
++  ""
++  "jral.iton\t%0"
++  [(set_attr "type" "branch")]
++)
++
++(define_insn "unspec_jral_ton"
++  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_JRAL_TON)]
++  ""
++  "jral.ton\t%0"
++  [(set_attr "type" "branch")]
++)
++
++(define_insn "unspec_ret_itoff"
++  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_RET_ITOFF)]
++  ""
++  "ret.itoff\t%0"
++  [(set_attr "type" "branch")]
++)
++
++(define_insn "unspec_ret_toff"
++  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_RET_TOFF)]
++  ""
++  "ret.toff\t%0"
++  [(set_attr "type" "branch")]
++)
++
++(define_insn "unspec_standby_no_wake_grant"
++  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_STANDBY_NO_WAKE_GRANT)]
++  ""
++  "standby\tno_wake_grant"
++  [(set_attr "type" "misc")]
++)
++
++(define_insn "unspec_standby_wake_grant"
++  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_STANDBY_WAKE_GRANT)]
++  ""
++  "standby\twake_grant"
++  [(set_attr "type" "misc")]
++)
++
++(define_insn "unspec_standby_wait_done"
++  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_STANDBY_WAKE_DONE)]
++  ""
++  "standby\twait_done"
++  [(set_attr "type" "misc")]
++)
++
++(define_insn "unspec_teqz"
++  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
++			(match_operand:SI 1 "immediate_operand" "i")] UNSPEC_VOLATILE_TEQZ)]
++  ""
++  "teqz\t%0, %1"
++  [(set_attr "type" "misc")]
++)
++
++(define_insn "unspec_tnez"
++  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
++			(match_operand:SI 1 "immediate_operand" "i")] UNSPEC_VOLATILE_TNEZ)]
++  ""
++  "tnez\t%0, %1"
++  [(set_attr "type" "misc")]
++)
++
++(define_insn "unspec_trap"
++  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i")] UNSPEC_VOLATILE_TRAP)]
++  ""
++  "trap\t%0"
++  [(set_attr "type" "misc")]
++)
++
++(define_insn "unspec_setend_big"
++  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_SETEND_BIG)]
++  ""
++  "setend.b"
++  [(set_attr "type" "misc")]
++)
++
++(define_insn "unspec_setend_little"
++  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_SETEND_LITTLE)]
++  ""
++  "setend.l"
++  [(set_attr "type" "misc")]
++)
++
++(define_insn "unspec_break"
++  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i")] UNSPEC_VOLATILE_BREAK)]
++  ""
++  "break\t%0"
++  [(set_attr "type" "misc")]
++)
++
++(define_insn "unspec_syscall"
++  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i")] UNSPEC_VOLATILE_SYSCALL)]
++  ""
++  "syscall\t%0"
++  [(set_attr "type" "misc")]
++)
++
++(define_insn "unspec_nop"
++  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_NOP)]
++  ""
++  "nop"
++  [(set_attr "type" "misc")]
++)
++
++(define_expand "unspec_get_current_sp"
++  [(match_operand:SI 0 "register_operand" "")]
++  ""
++{
++  emit_move_insn (operands[0], gen_rtx_REG (SImode, SP_REGNUM));
++  DONE;
++})
++
++(define_expand "unspec_set_current_sp"
++  [(match_operand:SI 0 "register_operand" "")]
++  ""
++{
++  emit_move_insn (gen_rtx_REG (SImode, SP_REGNUM), operands[0]);
++  DONE;
++})
++
++(define_expand "unspec_return_address"
++  [(match_operand:SI 0 "register_operand" "")]
++  ""
++{
++  emit_move_insn (operands[0], gen_rtx_REG (SImode, LP_REGNUM));
++  DONE;
++})
++
++(define_insn "unspec_signature_begin"
++  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_SIGNATURE_BEGIN)]
++  ""
++  "isps"
++  [(set_attr "length" "4")]
++)
++
++(define_insn "unspec_signature_end"
++  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_SIGNATURE_END)]
++  ""
++  "! -----\;.signature_end\;j8 2\;! -----"
++  [(set_attr "length" "2")]
++)
++
++;; Swap
++
++(define_insn "unspec_wsbh"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(unspec:SI [(match_operand:SI 1 "register_operand" "r")] UNSPEC_WSBH))]
++  ""
++  "wsbh\t%0, %1"
++  [(set_attr "type"    "alu")
++   (set_attr "length"    "4")]
++)
++
++;; TLBOP Intrinsic
++
++(define_insn "unspec_tlbop_trd"
++  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_TLBOP_TRD)]
++  ""
++  "tlbop\t%0, TRD"
++  [(set_attr "type" "mmu")]
++)
++
++(define_insn "unspec_tlbop_twr"
++  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_TLBOP_TWR)]
++  ""
++  "tlbop\t%0, TWR"
++  [(set_attr "type" "mmu")]
++)
++
++(define_insn "unspec_tlbop_rwr"
++  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_TLBOP_RWR)]
++  ""
++  "tlbop\t%0, RWR"
++  [(set_attr "type" "mmu")]
++)
++
++(define_insn "unspec_tlbop_rwlk"
++  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_TLBOP_RWLK)]
++  ""
++  "tlbop\t%0, RWLK"
++  [(set_attr "type" "mmu")]
++)
++
++(define_insn "unspec_tlbop_unlk"
++  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_TLBOP_UNLK)]
++  ""
++  "tlbop\t%0, UNLK"
++  [(set_attr "type" "mmu")]
++)
++
++(define_insn "unspec_tlbop_pb"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(unspec_volatile:SI [(match_operand:SI 1 "register_operand" "r")] UNSPEC_VOLATILE_TLBOP_PB))]
++  ""
++  "tlbop\t%0, %1, PB"
++  [(set_attr "type" "mmu")]
++)
++
++(define_insn "unspec_tlbop_inv"
++  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_TLBOP_INV)]
++  ""
++  "tlbop\t%0, INV"
++  [(set_attr "type" "mmu")]
++)
++
++(define_insn "unspec_tlbop_flua"
++  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_TLBOP_FLUA)]
++  ""
++  "tlbop\tFLUA"
++  [(set_attr "type" "mmu")]
++)
++
++;;Unaligned Load/Store
++
++(define_expand "unaligned_load_hw"
++  [(set (match_operand:HI 0 "register_operand" "")
++	(unspec:HI [(mem:HI (match_operand:SI 1 "register_operand" ""))] UNSPEC_UALOAD_HW))]
++  ""
++{
++  operands[0] = simplify_gen_subreg (SImode, operands[0],
++				     GET_MODE (operands[0]), 0);
++  if (TARGET_ISA_V3M)
++    {
++      nds32_expand_unaligned_load (operands, HImode);
++    }
++  else
++    {
++      emit_insn (gen_unaligned_load_w (operands[0],
++				       gen_rtx_MEM (SImode, operands[1])));
++
++      if (WORDS_BIG_ENDIAN)
++	emit_insn (gen_lshrsi3 (operands[0], operands[0], GEN_INT(16)));
++      else
++	emit_insn (gen_andsi3 (operands[0], operands[0], GEN_INT (0xffff)));
++    }
++
++  DONE;
++})
++
++(define_expand "unaligned_loadsi"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(unspec:SI [(mem:SI (match_operand:SI 1 "register_operand" "r"))] UNSPEC_UALOAD_W))]
++  ""
++{
++  if (flag_unaligned_access)
++    {
++      rtx mem = gen_rtx_MEM (SImode, operands[1]);
++      emit_move_insn (operands[0], mem);
++    }
++  else
++    {
++      if (TARGET_ISA_V3M)
++	nds32_expand_unaligned_load (operands, SImode);
++      else
++	emit_insn (gen_unaligned_load_w (operands[0],
++					 gen_rtx_MEM (SImode, (operands[1]))));
++    }
++  DONE;
++})
++
++(define_insn "unaligned_load_w"
++  [(set (match_operand:SI 0 "register_operand"                       "=  r")
++	(unspec:SI [(match_operand:SI 1 "nds32_lmw_smw_base_operand" " Umw")] UNSPEC_UALOAD_W))]
++  ""
++{
++  return nds32_output_lmw_single_word (operands);
++}
++  [(set_attr "type"   "load")
++   (set_attr "length"    "4")]
++)
++
++(define_expand "unaligned_loaddi"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(unspec:DI [(mem:DI (match_operand:SI 1 "register_operand" "r"))] UNSPEC_UALOAD_DW))]
++  ""
++{
++  if (TARGET_ISA_V3M)
++    {
++      nds32_expand_unaligned_load (operands, DImode);
++    }
++  else
++    emit_insn (gen_unaligned_load_dw (operands[0], operands[1]));
++  DONE;
++})
++
++(define_insn "unaligned_load_dw"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(unspec:DI [(mem:DI (match_operand:SI 1 "register_operand" "r"))] UNSPEC_UALOAD_DW))]
++  ""
++{
++  rtx otherops[3];
++  otherops[0] = gen_rtx_REG (SImode, REGNO (operands[0]));
++  otherops[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
++  otherops[2] = operands[1];
++
++  output_asm_insn ("lmw.bi\t%0, [%2], %1, 0", otherops);
++  return "";
++}
++  [(set_attr "type"   "load")
++   (set_attr "length"    "4")]
++)
++
++(define_expand "unaligned_store_hw"
++  [(set (mem:SI (match_operand:SI 0 "register_operand" ""))
++	(unspec:HI [(match_operand:HI 1 "register_operand" "")] UNSPEC_UASTORE_HW))]
++  ""
++{
++  operands[1] = simplify_gen_subreg (SImode, operands[1],
++				     GET_MODE (operands[1]), 0);
++  nds32_expand_unaligned_store (operands, HImode);
++  DONE;
++})
++
++(define_expand "unaligned_storesi"
++  [(set (mem:SI (match_operand:SI 0 "register_operand" "r"))
++	(unspec:SI [(match_operand:SI 1 "register_operand" "r")] UNSPEC_UASTORE_W))]
++  ""
++{
++  if (flag_unaligned_access)
++    {
++      rtx mem = gen_rtx_MEM (SImode, operands[0]);
++      emit_move_insn (mem, operands[1]);
++    }
++  else
++    {
++      if (TARGET_ISA_V3M)
++	nds32_expand_unaligned_store (operands, SImode);
++      else
++	emit_insn (gen_unaligned_store_w (gen_rtx_MEM (SImode, operands[0]),
++					  operands[1]));
++    }
++  DONE;
++})
++
++(define_insn "unaligned_store_w"
++  [(set (match_operand:SI 0 "nds32_lmw_smw_base_operand"   "=Umw")
++	(unspec:SI [(match_operand:SI 1 "register_operand" "   r")] UNSPEC_UASTORE_W))]
++  ""
++{
++  return nds32_output_smw_single_word (operands);
++}
++  [(set_attr "type"   "store")
++   (set_attr "length"     "4")]
++)
++
++(define_expand "unaligned_storedi"
++  [(set (mem:DI (match_operand:SI 0 "register_operand" "r"))
++	(unspec:DI [(match_operand:DI 1 "register_operand" "r")] UNSPEC_UASTORE_DW))]
++  ""
++{
++  if (TARGET_ISA_V3M)
++    nds32_expand_unaligned_store (operands, DImode);
++  else
++    emit_insn (gen_unaligned_store_dw (gen_rtx_MEM (DImode, operands[0]),
++				       operands[1]));
++  DONE;
++})
++
++(define_insn "unaligned_store_dw"
++  [(set (match_operand:DI 0 "nds32_lmw_smw_base_operand"   "=Umw")
++	(unspec:DI [(match_operand:DI 1 "register_operand" "   r")] UNSPEC_UASTORE_DW))]
++  ""
++{
++  return nds32_output_smw_double_word (operands);
++}
++  [(set_attr "type"   "store")
++   (set_attr "length"     "4")]
++)
++
++(define_expand "unspec_unaligned_feature"
++  [(set (match_operand:SI 0 "register_operand" "")
++	(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_UNALIGNED_FEATURE))]
++  ""
++{
++  /* Get $MMU_CTL system register form nds32_intrinsic_register_names[]  */
++  rtx system_reg =  GEN_INT (__NDS32_REG_MMU_CTL__);
++  rtx temp_reg = gen_reg_rtx (SImode);
++  rtx temp2_reg = gen_reg_rtx (SImode);
++
++  emit_insn (gen_unspec_volatile_mfsr (operands[0], system_reg));
++  emit_move_insn (temp_reg, operands[0]);
++  emit_move_insn (temp2_reg, GEN_INT (0x800 << 12));
++  emit_insn (gen_iorsi3 (operands[0], operands[0], temp2_reg));
++  emit_insn (gen_unspec_volatile_mtsr (operands[0], system_reg));
++  emit_insn (gen_unspec_dsb ());
++
++  emit_insn (gen_unspec_volatile_mfsr (operands[0], system_reg));
++  emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg));
++  emit_insn (gen_unspec_dsb ());
++
++  emit_insn (gen_ashlsi3 (operands[0], operands[0], GEN_INT (8)));
++  emit_insn (gen_lshrsi3 (operands[0], operands[0], GEN_INT (31)));
++  DONE;
++})
++
++(define_expand "unspec_enable_unaligned"
++  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_UNALIGNED_FEATURE)]
++  ""
++{
++  /* Get $MMU_CTL system register form nds32_intrinsic_register_names[]  */
++  rtx system_reg =  GEN_INT (__NDS32_REG_MMU_CTL__);
++  rtx temp_reg = gen_reg_rtx (SImode);
++  rtx temp2_reg = gen_reg_rtx (SImode);
++  emit_insn (gen_unspec_volatile_mfsr (temp_reg, system_reg));
++  emit_move_insn (temp2_reg, GEN_INT (0x800 << 12));
++  emit_insn (gen_iorsi3 (temp_reg, temp_reg, temp2_reg));
++  emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg));
++  emit_insn (gen_unspec_dsb ());
++  DONE;
++})
++
++(define_expand "unspec_disable_unaligned"
++  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_UNALIGNED_FEATURE)]
++  ""
++{
++  /* Get $MMU_CTL system register form nds32_intrinsic_register_names[]  */
++  rtx system_reg =  GEN_INT (__NDS32_REG_MMU_CTL__);
++  rtx temp_reg = gen_reg_rtx (SImode);
++  rtx temp2_reg = gen_reg_rtx (SImode);
++  emit_insn (gen_unspec_volatile_mfsr (temp_reg, system_reg));
++  emit_move_insn (temp2_reg, GEN_INT (0x800 << 12));
++  emit_insn (gen_one_cmplsi2 (temp2_reg, temp2_reg));
++  emit_insn (gen_andsi3 (temp_reg, temp_reg, temp2_reg));
++  emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg));
++  emit_insn (gen_unspec_dsb ());
++  DONE;
++})
++
++;; abs alias kabs
++
++(define_insn "unspec_kabs"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(unspec:SI [(match_operand:SI 1 "register_operand" "r")] UNSPEC_KABS))]
++  ""
++  "kabs\t%0, %1"
++  [(set_attr "type" "alu")
++   (set_attr "length" "4")]
++)
++
++(define_expand "no_hwloop"
++  [(const_int 0)]
++  ""
++{
++  if (NDS32_HW_LOOP_P ())
++    emit_insn (gen_unspec_no_hwloop ());
++  else
++    emit_insn (gen_nop ());
++
++  DONE;
++})
++
++(define_insn "unspec_no_hwloop"
++  [(unspec_volatile [(const_int 0)] UNSPEC_VOLATILE_NO_HWLOOP)]
++  ""
++  ""
++  [(set_attr "type" "misc")]
++)
+ ;; ------------------------------------------------------------------------
+diff --git a/gcc/config/nds32/nds32-isr.c b/gcc/config/nds32/nds32-isr.c
+index 79be27e..be82609 100644
+--- a/gcc/config/nds32/nds32-isr.c
++++ b/gcc/config/nds32/nds32-isr.c
+@@ -24,11 +24,41 @@
+ #include "system.h"
+ #include "coretypes.h"
+ #include "backend.h"
+-#include "target.h"
+-#include "rtl.h"
+ #include "tree.h"
+-#include "diagnostic-core.h"
++#include "rtl.h"
++#include "df.h"
++#include "alias.h"
++#include "stor-layout.h"
++#include "varasm.h"
++#include "calls.h"
++#include "regs.h"
++#include "insn-config.h"	/* Required by recog.h.  */
++#include "conditions.h"
+ #include "output.h"
++#include "insn-attr.h"		/* For DFA state_t.  */
++#include "insn-codes.h"		/* For CODE_FOR_xxx.  */
++#include "reload.h"		/* For push_reload().  */
++#include "flags.h"
++#include "insn-config.h"
++#include "expmed.h"
++#include "dojump.h"
++#include "explow.h"
++#include "emit-rtl.h"
++#include "stmt.h"
++#include "expr.h"
++#include "recog.h"
++#include "diagnostic-core.h"
++#include "cfgrtl.h"
++#include "cfganal.h"
++#include "lcm.h"
++#include "cfgbuild.h"
++#include "cfgcleanup.h"
++#include "tm_p.h"
++#include "tm-constrs.h"
++#include "optabs.h"		/* For GEN_FCN.  */
++#include "target.h"
++#include "langhooks.h"		/* For add_builtin_function().  */
++#include "builtins.h"
+ 
+ /* ------------------------------------------------------------------------ */
+ 
+@@ -39,7 +69,260 @@
+    We use an array to record essential information for each vector.  */
+ static struct nds32_isr_info nds32_isr_vectors[NDS32_N_ISR_VECTORS];
+ 
+-/* ------------------------------------------------------------------------ */
++/* ------------------------------------------------------------- */
++/* FIXME:
++   FOR BACKWARD COMPATIBILITY, we need to support following patterns:
++
++       __attribute__((interrupt("XXX;YYY;id=ZZZ")))
++       __attribute__((exception("XXX;YYY;id=ZZZ")))
++       __attribute__((reset("vectors=XXX;nmi_func=YYY;warm_func=ZZZ")))
++
++   We provide several functions to parse the strings.  */
++
++static void
++nds32_interrupt_attribute_parse_string (const char *original_str,
++					const char *func_name,
++					unsigned int s_level)
++{
++  char target_str[100];
++  enum nds32_isr_save_reg save_reg;
++  enum nds32_isr_nested_type nested_type;
++
++  char *save_all_regs_str, *save_caller_regs_str;
++  char *nested_str, *not_nested_str, *ready_nested_str, *critical_str;
++  char *id_str, *value_str;
++
++  /* Copy original string into a character array so that
++     the string APIs can handle it.  */
++  strcpy (target_str, original_str);
++
++  /* 1. Detect 'save_all_regs'    : NDS32_SAVE_ALL
++	       'save_caller_regs' : NDS32_PARTIAL_SAVE */
++  save_all_regs_str    = strstr (target_str, "save_all_regs");
++  save_caller_regs_str = strstr (target_str, "save_caller_regs");
++
++  /* Note that if no argument is found,
++     use NDS32_PARTIAL_SAVE by default.  */
++  if (save_all_regs_str)
++    save_reg = NDS32_SAVE_ALL;
++  else if (save_caller_regs_str)
++    save_reg = NDS32_PARTIAL_SAVE;
++  else
++    save_reg = NDS32_PARTIAL_SAVE;
++
++  /* 2. Detect 'nested'       : NDS32_NESTED
++	       'not_nested'   : NDS32_NOT_NESTED
++	       'ready_nested' : NDS32_NESTED_READY
++	       'critical'     : NDS32_CRITICAL */
++  nested_str       = strstr (target_str, "nested");
++  not_nested_str   = strstr (target_str, "not_nested");
++  ready_nested_str = strstr (target_str, "ready_nested");
++  critical_str     = strstr (target_str, "critical");
++
++  /* Note that if no argument is found,
++     use NDS32_NOT_NESTED by default.
++     Also, since 'not_nested' and 'ready_nested' both contains
++     'nested' string, we check 'nested' with lowest priority.  */
++  if (not_nested_str)
++    nested_type = NDS32_NOT_NESTED;
++  else if (ready_nested_str)
++    nested_type = NDS32_NESTED_READY;
++  else if (nested_str)
++    nested_type = NDS32_NESTED;
++  else if (critical_str)
++    nested_type = NDS32_CRITICAL;
++  else
++    nested_type = NDS32_NOT_NESTED;
++
++  /* 3. Traverse each id value and set corresponding information.  */
++  id_str = strstr (target_str, "id=");
++
++  /* If user forgets to assign 'id', issue an error message.  */
++  if (id_str == NULL)
++    error ("require id argument in the string");
++  /* Extract the value_str first.  */
++  id_str    = strtok (id_str, "=");
++  value_str = strtok (NULL, ";");
++
++  /* Pick up the first id value token.  */
++  value_str = strtok (value_str, ",");
++  while (value_str != NULL)
++    {
++      int i;
++      i = atoi (value_str);
++
++      /* For interrupt(0..63), the actual vector number is (9..72).  */
++      i = i + 9;
++      if (i < 9 || i > 72)
++	error ("invalid id value for interrupt attribute");
++
++      /* Setup nds32_isr_vectors[] array.  */
++      nds32_isr_vectors[i].category = NDS32_ISR_INTERRUPT;
++      strcpy (nds32_isr_vectors[i].func_name, func_name);
++      nds32_isr_vectors[i].save_reg = save_reg;
++      nds32_isr_vectors[i].nested_type = nested_type;
++      nds32_isr_vectors[i].security_level = s_level;
++
++      /* Fetch next token.  */
++      value_str = strtok (NULL, ",");
++    }
++
++  return;
++}
++
++static void
++nds32_exception_attribute_parse_string (const char *original_str,
++					const char *func_name,
++					unsigned int s_level)
++{
++  char target_str[100];
++  enum nds32_isr_save_reg save_reg;
++  enum nds32_isr_nested_type nested_type;
++
++  char *save_all_regs_str, *save_caller_regs_str;
++  char *nested_str, *not_nested_str, *ready_nested_str, *critical_str;
++  char *id_str, *value_str;
++
++  /* Copy original string into a character array so that
++     the string APIs can handle it.  */
++  strcpy (target_str, original_str);
++
++  /* 1. Detect 'save_all_regs'    : NDS32_SAVE_ALL
++	       'save_caller_regs' : NDS32_PARTIAL_SAVE */
++  save_all_regs_str    = strstr (target_str, "save_all_regs");
++  save_caller_regs_str = strstr (target_str, "save_caller_regs");
++
++  /* Note that if no argument is found,
++     use NDS32_PARTIAL_SAVE by default.  */
++  if (save_all_regs_str)
++    save_reg = NDS32_SAVE_ALL;
++  else if (save_caller_regs_str)
++    save_reg = NDS32_PARTIAL_SAVE;
++  else
++    save_reg = NDS32_PARTIAL_SAVE;
++
++  /* 2. Detect 'nested'       : NDS32_NESTED
++	       'not_nested'   : NDS32_NOT_NESTED
++	       'ready_nested' : NDS32_NESTED_READY
++	       'critical'     : NDS32_CRITICAL */
++  nested_str       = strstr (target_str, "nested");
++  not_nested_str   = strstr (target_str, "not_nested");
++  ready_nested_str = strstr (target_str, "ready_nested");
++  critical_str     = strstr (target_str, "critical");
++
++  /* Note that if no argument is found,
++     use NDS32_NOT_NESTED by default.
++     Also, since 'not_nested' and 'ready_nested' both contains
++     'nested' string, we check 'nested' with lowest priority.  */
++  if (not_nested_str)
++    nested_type = NDS32_NOT_NESTED;
++  else if (ready_nested_str)
++    nested_type = NDS32_NESTED_READY;
++  else if (nested_str)
++    nested_type = NDS32_NESTED;
++  else if (critical_str)
++    nested_type = NDS32_CRITICAL;
++  else
++    nested_type = NDS32_NOT_NESTED;
++
++  /* 3. Traverse each id value and set corresponding information.  */
++  id_str = strstr (target_str, "id=");
++
++  /* If user forgets to assign 'id', issue an error message.  */
++  if (id_str == NULL)
++    error ("require id argument in the string");
++  /* Extract the value_str first.  */
++  id_str    = strtok (id_str, "=");
++  value_str = strtok (NULL, ";");
++
++  /* Pick up the first id value token.  */
++  value_str = strtok (value_str, ",");
++  while (value_str != NULL)
++    {
++      int i;
++      i = atoi (value_str);
++
++      /* For exception(1..8), the actual vector number is (1..8).  */
++      if (i < 1 || i > 8)
++	error ("invalid id value for exception attribute");
++
++      /* Setup nds32_isr_vectors[] array.  */
++      nds32_isr_vectors[i].category = NDS32_ISR_EXCEPTION;
++      strcpy (nds32_isr_vectors[i].func_name, func_name);
++      nds32_isr_vectors[i].save_reg = save_reg;
++      nds32_isr_vectors[i].nested_type = nested_type;
++      nds32_isr_vectors[i].security_level = s_level;
++
++      /* Fetch next token.  */
++      value_str = strtok (NULL, ",");
++    }
++
++  return;
++}
++
++static void
++nds32_reset_attribute_parse_string (const char *original_str,
++				    const char *func_name)
++{
++  char target_str[100];
++  char *vectors_str, *nmi_str, *warm_str, *value_str;
++
++  /* Deal with reset attribute.  Its vector number is always 0.  */
++  nds32_isr_vectors[0].category = NDS32_ISR_RESET;
++
++
++  /* 1. Parse 'vectors=XXXX'.  */
++
++  /* Copy original string into a character array so that
++     the string APIs can handle it.  */
++  strcpy (target_str, original_str);
++  vectors_str = strstr (target_str, "vectors=");
++  /* The total vectors = interrupt + exception numbers + reset.
++     There are 8 exception and 1 reset in nds32 architecture.
++     If user forgets to assign 'vectors', user default 16 interrupts.  */
++  if (vectors_str != NULL)
++    {
++      /* Extract the value_str.  */
++      vectors_str = strtok (vectors_str, "=");
++      value_str  = strtok (NULL, ";");
++      nds32_isr_vectors[0].total_n_vectors = atoi (value_str) + 8 + 1;
++    }
++  else
++    nds32_isr_vectors[0].total_n_vectors = 16 + 8 + 1;
++  strcpy (nds32_isr_vectors[0].func_name, func_name);
++
++
++  /* 2. Parse 'nmi_func=YYYY'.  */
++
++  /* Copy original string into a character array so that
++     the string APIs can handle it.  */
++  strcpy (target_str, original_str);
++  nmi_str = strstr (target_str, "nmi_func=");
++  if (nmi_str != NULL)
++    {
++      /* Extract the value_str.  */
++      nmi_str = strtok (nmi_str, "=");
++      value_str  = strtok (NULL, ";");
++      strcpy (nds32_isr_vectors[0].nmi_name, value_str);
++    }
++
++  /* 3. Parse 'warm_func=ZZZZ'.  */
++
++  /* Copy original string into a character array so that
++     the string APIs can handle it.  */
++  strcpy (target_str, original_str);
++  warm_str = strstr (target_str, "warm_func=");
++  if (warm_str != NULL)
++    {
++      /* Extract the value_str.  */
++      warm_str = strtok (warm_str, "=");
++      value_str  = strtok (NULL, ";");
++      strcpy (nds32_isr_vectors[0].warm_name, value_str);
++    }
++
++  return;
++}
++/* ------------------------------------------------------------- */
+ 
+ /* A helper function to emit section head template.  */
+ static void
+@@ -75,6 +358,15 @@ nds32_emit_isr_jmptbl_section (int vector_id)
+   char section_name[100];
+   char symbol_name[100];
+ 
++  /* A critical isr does not need jump table section because
++     its behavior is not performed by two-level handler.  */
++  if (nds32_isr_vectors[vector_id].nested_type == NDS32_CRITICAL)
++    {
++      fprintf (asm_out_file, "\t! The vector %02d is a critical isr !\n",
++			     vector_id);
++      return;
++    }
++
+   /* Prepare jmptbl section and symbol name.  */
+   snprintf (section_name, sizeof (section_name),
+ 	    ".nds32_jmptbl.%02d", vector_id);
+@@ -95,7 +387,6 @@ nds32_emit_isr_vector_section (int vector_id)
+   const char *c_str = "CATEGORY";
+   const char *sr_str = "SR";
+   const char *nt_str = "NT";
+-  const char *vs_str = "VS";
+   char first_level_handler_name[100];
+   char section_name[100];
+   char symbol_name[100];
+@@ -143,46 +434,63 @@ nds32_emit_isr_vector_section (int vector_id)
+     case NDS32_NESTED_READY:
+       nt_str = "nr";
+       break;
++    case NDS32_CRITICAL:
++      /* The critical isr is not performed by two-level handler.  */
++      nt_str = "";
++      break;
+     }
+ 
+-  /* Currently we have 4-byte or 16-byte size for each vector.
+-     If it is 4-byte, the first level handler name has suffix string "_4b".  */
+-  vs_str = (nds32_isr_vector_size == 4) ? "_4b" : "";
+-
+   /* Now we can create first level handler name.  */
+-  snprintf (first_level_handler_name, sizeof (first_level_handler_name),
+-	    "_nds32_%s_%s_%s%s", c_str, sr_str, nt_str, vs_str);
++  if (nds32_isr_vectors[vector_id].security_level == 0)
++    {
++      /* For security level 0, use normal first level handler name.  */
++      snprintf (first_level_handler_name, sizeof (first_level_handler_name),
++		"_nds32_%s_%s_%s", c_str, sr_str, nt_str);
++    }
++  else
++    {
++      /* For security level 1-3, use corresponding spl_1, spl_2, or spl_3.  */
++      snprintf (first_level_handler_name, sizeof (first_level_handler_name),
++		"_nds32_spl_%d", nds32_isr_vectors[vector_id].security_level);
++    }
+ 
+   /* Prepare vector section and symbol name.  */
+   snprintf (section_name, sizeof (section_name),
+ 	    ".nds32_vector.%02d", vector_id);
+   snprintf (symbol_name, sizeof (symbol_name),
+-	    "_nds32_vector_%02d%s", vector_id, vs_str);
++	    "_nds32_vector_%02d", vector_id);
+ 
+ 
+   /* Everything is ready.  We can start emit vector section content.  */
+   nds32_emit_section_head_template (section_name, symbol_name,
+ 				    floor_log2 (nds32_isr_vector_size), false);
+ 
+-  /* According to the vector size, the instructions in the
+-     vector section may be different.  */
+-  if (nds32_isr_vector_size == 4)
++  /* First we check if it is a critical isr.
++     If so, jump to user handler directly; otherwise, the instructions
++     in the vector section may be different according to the vector size.  */
++  if (nds32_isr_vectors[vector_id].nested_type == NDS32_CRITICAL)
++    {
++      /* This block is for critical isr.  Jump to user handler directly.  */
++      fprintf (asm_out_file, "\tj\t%s ! jump to user handler directly\n",
++			     nds32_isr_vectors[vector_id].func_name);
++    }
++  else if (nds32_isr_vector_size == 4)
+     {
+       /* This block is for 4-byte vector size.
+-         Hardware $VID support is necessary and only one instruction
+-         is needed in vector section.  */
++	 Hardware $VID support is necessary and only one instruction
++	 is needed in vector section.  */
+       fprintf (asm_out_file, "\tj\t%s ! jump to first level handler\n",
+ 			     first_level_handler_name);
+     }
+   else
+     {
+       /* This block is for 16-byte vector size.
+-         There is NO hardware $VID so that we need several instructions
+-         such as pushing GPRs and preparing software vid at vector section.
+-         For pushing GPRs, there are four variations for
+-         16-byte vector content and we have to handle each combination.
+-         For preparing software vid, note that the vid need to
+-         be substracted vector_number_offset.  */
++	 There is NO hardware $VID so that we need several instructions
++	 such as pushing GPRs and preparing software vid at vector section.
++	 For pushing GPRs, there are four variations for
++	 16-byte vector content and we have to handle each combination.
++	 For preparing software vid, note that the vid need to
++	 be substracted vector_number_offset.  */
+       if (TARGET_REDUCED_REGS)
+ 	{
+ 	  if (nds32_isr_vectors[vector_id].save_reg == NDS32_SAVE_ALL)
+@@ -235,13 +543,11 @@ nds32_emit_isr_reset_content (void)
+ {
+   unsigned int i;
+   unsigned int total_n_vectors;
+-  const char *vs_str;
+   char reset_handler_name[100];
+   char section_name[100];
+   char symbol_name[100];
+ 
+   total_n_vectors = nds32_isr_vectors[0].total_n_vectors;
+-  vs_str = (nds32_isr_vector_size == 4) ? "_4b" : "";
+ 
+   fprintf (asm_out_file, "\t! RESET HANDLER CONTENT - BEGIN !\n");
+ 
+@@ -257,7 +563,7 @@ nds32_emit_isr_reset_content (void)
+   /* Emit vector references.  */
+   fprintf (asm_out_file, "\t ! references to vector section entries\n");
+   for (i = 0; i < total_n_vectors; i++)
+-    fprintf (asm_out_file, "\t.word\t_nds32_vector_%02d%s\n", i, vs_str);
++    fprintf (asm_out_file, "\t.word\t_nds32_vector_%02d\n", i);
+ 
+   /* Emit jmptbl_00 section.  */
+   snprintf (section_name, sizeof (section_name), ".nds32_jmptbl.00");
+@@ -271,9 +577,9 @@ nds32_emit_isr_reset_content (void)
+ 
+   /* Emit vector_00 section.  */
+   snprintf (section_name, sizeof (section_name), ".nds32_vector.00");
+-  snprintf (symbol_name, sizeof (symbol_name), "_nds32_vector_00%s", vs_str);
++  snprintf (symbol_name, sizeof (symbol_name), "_nds32_vector_00");
+   snprintf (reset_handler_name, sizeof (reset_handler_name),
+-	    "_nds32_reset%s", vs_str);
++	    "_nds32_reset");
+ 
+   fprintf (asm_out_file, "\t! ....................................\n");
+   nds32_emit_section_head_template (section_name, symbol_name,
+@@ -319,12 +625,12 @@ void
+ nds32_check_isr_attrs_conflict (tree func_decl, tree func_attrs)
+ {
+   int save_all_p, partial_save_p;
+-  int nested_p, not_nested_p, nested_ready_p;
++  int nested_p, not_nested_p, nested_ready_p, critical_p;
+   int intr_p, excp_p, reset_p;
+ 
+   /* Initialize variables.  */
+   save_all_p = partial_save_p = 0;
+-  nested_p = not_nested_p = nested_ready_p = 0;
++  nested_p = not_nested_p = nested_ready_p = critical_p = 0;
+   intr_p = excp_p = reset_p = 0;
+ 
+   /* We must check at MOST one attribute to set save-reg.  */
+@@ -343,8 +649,10 @@ nds32_check_isr_attrs_conflict (tree func_decl, tree func_attrs)
+     not_nested_p = 1;
+   if (lookup_attribute ("nested_ready", func_attrs))
+     nested_ready_p = 1;
++  if (lookup_attribute ("critical", func_attrs))
++    critical_p = 1;
+ 
+-  if ((nested_p + not_nested_p + nested_ready_p) > 1)
++  if ((nested_p + not_nested_p + nested_ready_p + critical_p) > 1)
+     error ("multiple nested types attributes to function %qD", func_decl);
+ 
+   /* We must check at MOST one attribute to
+@@ -358,6 +666,17 @@ nds32_check_isr_attrs_conflict (tree func_decl, tree func_attrs)
+ 
+   if ((intr_p + excp_p + reset_p) > 1)
+     error ("multiple interrupt attributes to function %qD", func_decl);
++
++  /* Do not allow isr attributes under linux toolchain.  */
++  if (TARGET_LINUX_ABI && intr_p)
++      error ("cannot use interrupt attributes to function %qD "
++	     "under linux toolchain", func_decl);
++  if (TARGET_LINUX_ABI && excp_p)
++      error ("cannot use exception attributes to function %qD "
++	     "under linux toolchain", func_decl);
++  if (TARGET_LINUX_ABI && reset_p)
++      error ("cannot use reset attributes to function %qD "
++	     "under linux toolchain", func_decl);
+ }
+ 
+ /* Function to construct isr vectors information array.
+@@ -369,15 +688,21 @@ nds32_construct_isr_vectors_information (tree func_attrs,
+ 					 const char *func_name)
+ {
+   tree save_all, partial_save;
+-  tree nested, not_nested, nested_ready;
++  tree nested, not_nested, nested_ready, critical;
+   tree intr, excp, reset;
+ 
++  tree secure;
++  tree security_level_list;
++  tree security_level;
++  unsigned int s_level;
++
+   save_all     = lookup_attribute ("save_all", func_attrs);
+   partial_save = lookup_attribute ("partial_save", func_attrs);
+ 
+   nested       = lookup_attribute ("nested", func_attrs);
+   not_nested   = lookup_attribute ("not_nested", func_attrs);
+   nested_ready = lookup_attribute ("nested_ready", func_attrs);
++  critical     = lookup_attribute ("critical", func_attrs);
+ 
+   intr  = lookup_attribute ("interrupt", func_attrs);
+   excp  = lookup_attribute ("exception", func_attrs);
+@@ -387,6 +712,63 @@ nds32_construct_isr_vectors_information (tree func_attrs,
+   if (!intr && !excp && !reset)
+     return;
+ 
++  /* At first, we need to retrieve security level.  */
++  secure = lookup_attribute ("secure", func_attrs);
++  if (secure != NULL)
++    {
++      security_level_list = TREE_VALUE (secure);
++      security_level = TREE_VALUE (security_level_list);
++      s_level = TREE_INT_CST_LOW (security_level);
++    }
++  else
++    {
++      /* If there is no secure attribute, the security level is set by
++	 nds32_isr_secure_level, which is controlled by -misr-secure=X option.
++	 By default nds32_isr_secure_level should be 0.  */
++      s_level = nds32_isr_secure_level;
++    }
++
++  /* ------------------------------------------------------------- */
++  /* FIXME:
++     FOR BACKWARD COMPATIBILITY, we need to support following patterns:
++
++	 __attribute__((interrupt("XXX;YYY;id=ZZZ")))
++	 __attribute__((exception("XXX;YYY;id=ZZZ")))
++	 __attribute__((reset("vectors=XXX;nmi_func=YYY;warm_func=ZZZ")))
++
++     If interrupt/exception/reset appears and its argument is a
++     STRING_CST, we will parse string with some auxiliary functions
++     which set necessary isr information in the nds32_isr_vectors[] array.
++     After that, we can return immediately to avoid new-syntax isr
++     information construction.  */
++  if (intr != NULL_TREE
++      && TREE_CODE (TREE_VALUE (TREE_VALUE (intr))) == STRING_CST)
++    {
++      tree string_arg = TREE_VALUE (TREE_VALUE (intr));
++      nds32_interrupt_attribute_parse_string (TREE_STRING_POINTER (string_arg),
++					      func_name,
++					      s_level);
++      return;
++    }
++  if (excp != NULL_TREE
++      && TREE_CODE (TREE_VALUE (TREE_VALUE (excp))) == STRING_CST)
++    {
++      tree string_arg = TREE_VALUE (TREE_VALUE (excp));
++      nds32_exception_attribute_parse_string (TREE_STRING_POINTER (string_arg),
++					      func_name,
++					      s_level);
++      return;
++    }
++  if (reset != NULL_TREE
++      && TREE_CODE (TREE_VALUE (TREE_VALUE (reset))) == STRING_CST)
++    {
++      tree string_arg = TREE_VALUE (TREE_VALUE (reset));
++      nds32_reset_attribute_parse_string (TREE_STRING_POINTER (string_arg),
++					  func_name);
++      return;
++    }
++  /* ------------------------------------------------------------- */
++
+   /* If we are here, either we have interrupt/exception,
+      or reset attribute.  */
+   if (intr || excp)
+@@ -413,6 +795,9 @@ nds32_construct_isr_vectors_information (tree func_attrs,
+ 	  /* Add vector_number_offset to get actual vector number.  */
+ 	  vector_id = TREE_INT_CST_LOW (id) + vector_number_offset;
+ 
++	  /* Set security level.  */
++	  nds32_isr_vectors[vector_id].security_level = s_level;
++
+ 	  /* Enable corresponding vector and set function name.  */
+ 	  nds32_isr_vectors[vector_id].category = (intr)
+ 						  ? (NDS32_ISR_INTERRUPT)
+@@ -432,6 +817,8 @@ nds32_construct_isr_vectors_information (tree func_attrs,
+ 	    nds32_isr_vectors[vector_id].nested_type = NDS32_NOT_NESTED;
+ 	  else if (nested_ready)
+ 	    nds32_isr_vectors[vector_id].nested_type = NDS32_NESTED_READY;
++	  else if (critical)
++	    nds32_isr_vectors[vector_id].nested_type = NDS32_CRITICAL;
+ 
+ 	  /* Advance to next id.  */
+ 	  id_list = TREE_CHAIN (id_list);
+@@ -447,12 +834,12 @@ nds32_construct_isr_vectors_information (tree func_attrs,
+       nds32_isr_vectors[0].category = NDS32_ISR_RESET;
+ 
+       /* Prepare id_list and identify id value so that
+-         we can set total number of vectors.  */
++	 we can set total number of vectors.  */
+       id_list = TREE_VALUE (reset);
+       id = TREE_VALUE (id_list);
+ 
+       /* The total vectors = interrupt + exception numbers + reset.
+-         There are 8 exception and 1 reset in nds32 architecture.  */
++	 There are 8 exception and 1 reset in nds32 architecture.  */
+       nds32_isr_vectors[0].total_n_vectors = TREE_INT_CST_LOW (id) + 8 + 1;
+       strcpy (nds32_isr_vectors[0].func_name, func_name);
+ 
+@@ -488,7 +875,6 @@ nds32_construct_isr_vectors_information (tree func_attrs,
+     }
+ }
+ 
+-/* A helper function to handle isr stuff at the beginning of asm file.  */
+ void
+ nds32_asm_file_start_for_isr (void)
+ {
+@@ -501,15 +887,14 @@ nds32_asm_file_start_for_isr (void)
+       strcpy (nds32_isr_vectors[i].func_name, "");
+       nds32_isr_vectors[i].save_reg = NDS32_PARTIAL_SAVE;
+       nds32_isr_vectors[i].nested_type = NDS32_NOT_NESTED;
++      nds32_isr_vectors[i].security_level = 0;
+       nds32_isr_vectors[i].total_n_vectors = 0;
+       strcpy (nds32_isr_vectors[i].nmi_name, "");
+       strcpy (nds32_isr_vectors[i].warm_name, "");
+     }
+ }
+ 
+-/* A helper function to handle isr stuff at the end of asm file.  */
+-void
+-nds32_asm_file_end_for_isr (void)
++void nds32_asm_file_end_for_isr (void)
+ {
+   int i;
+ 
+@@ -543,6 +928,8 @@ nds32_asm_file_end_for_isr (void)
+ 	  /* Found one vector which is interupt or exception.
+ 	     Output its jmptbl and vector section content.  */
+ 	  fprintf (asm_out_file, "\t! interrupt/exception vector %02d\n", i);
++	  fprintf (asm_out_file, "\t! security level: %d\n",
++		   nds32_isr_vectors[i].security_level);
+ 	  fprintf (asm_out_file, "\t! ------------------------------------\n");
+ 	  nds32_emit_isr_jmptbl_section (i);
+ 	  fprintf (asm_out_file, "\t! ....................................\n");
+@@ -576,4 +963,65 @@ nds32_isr_function_p (tree func)
+ 	  || (t_reset != NULL_TREE));
+ }
+ 
+-/* ------------------------------------------------------------------------ */
++/* Return true if FUNC is a isr function with critical attribute.  */
++bool
++nds32_isr_function_critical_p (tree func)
++{
++  tree t_intr;
++  tree t_excp;
++  tree t_critical;
++
++  tree attrs;
++
++  if (TREE_CODE (func) != FUNCTION_DECL)
++    abort ();
++
++  attrs = DECL_ATTRIBUTES (func);
++
++  t_intr  = lookup_attribute ("interrupt", attrs);
++  t_excp  = lookup_attribute ("exception", attrs);
++
++  t_critical = lookup_attribute ("critical", attrs);
++
++  /* If both interrupt and exception attribute does not appear,
++     we can return false immediately.  */
++  if ((t_intr == NULL_TREE) && (t_excp == NULL_TREE))
++    return false;
++
++  /* Here we can guarantee either interrupt or ecxception attribute
++     does exist, so further check critical attribute.
++     If it also appears, we can return true.  */
++  if (t_critical != NULL_TREE)
++    return true;
++
++  /* ------------------------------------------------------------- */
++  /* FIXME:
++     FOR BACKWARD COMPATIBILITY, we need to handle string type.
++     If the string 'critical' appears in the interrupt/exception
++     string argument, we can return true.  */
++  if (t_intr != NULL_TREE || t_excp != NULL_TREE)
++    {
++      char target_str[100];
++      char *critical_str;
++      tree t_check;
++      tree string_arg;
++
++      t_check = t_intr ? t_intr : t_excp;
++      if (TREE_CODE (TREE_VALUE (TREE_VALUE (t_check))) == STRING_CST)
++	{
++	  string_arg = TREE_VALUE (TREE_VALUE (t_check));
++	  strcpy (target_str, TREE_STRING_POINTER (string_arg));
++	  critical_str = strstr (target_str, "critical");
++
++	  /* Found 'critical' string, so return true.  */
++	  if (critical_str)
++	    return true;
++	}
++    }
++  /* ------------------------------------------------------------- */
++
++  /* Other cases, this isr function is not critical type.  */
++  return false;
++}
++
++/* ------------------------------------------------------------- */
+diff --git a/gcc/config/nds32/nds32-linux.opt b/gcc/config/nds32/nds32-linux.opt
+new file mode 100644
+index 0000000..75ccd76
+--- /dev/null
++++ b/gcc/config/nds32/nds32-linux.opt
+@@ -0,0 +1,16 @@
++mcmodel=
++Target RejectNegative Joined Enum(nds32_cmodel_type) Var(nds32_cmodel_option) Init(CMODEL_LARGE)
++Specify the address generation strategy for code model.
++
++Enum
++Name(nds32_cmodel_type) Type(enum nds32_cmodel_type)
++Known cmodel types (for use with the -mcmodel= option):
++
++EnumValue
++Enum(nds32_cmodel_type) String(small) Value(CMODEL_SMALL)
++
++EnumValue
++Enum(nds32_cmodel_type) String(medium) Value(CMODEL_MEDIUM)
++
++EnumValue
++Enum(nds32_cmodel_type) String(large) Value(CMODEL_LARGE)
+diff --git a/gcc/config/nds32/nds32-lmwsmw.c b/gcc/config/nds32/nds32-lmwsmw.c
+new file mode 100644
+index 0000000..e3b66bf
+--- /dev/null
++++ b/gcc/config/nds32/nds32-lmwsmw.c
+@@ -0,0 +1,1998 @@
++
++/* lmwsmw pass of Andes NDS32 cpu for GNU compiler
++   Copyright (C) 2012-2016 Free Software Foundation, Inc.
++   Contributed by Andes Technology Corporation.
++
++   This file is part of GCC.
++
++   GCC is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published
++   by the Free Software Foundation; either version 3, or (at your
++   option) any later version.
++
++   GCC is distributed in the hope that it will be useful, but WITHOUT
++   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++   License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with GCC; see the file COPYING3.  If not see
++   <http://www.gnu.org/licenses/>.  */
++
++/* ------------------------------------------------------------------------ */
++#include "config.h"
++#include "system.h"
++#include "coretypes.h"
++#include "tm.h"
++#include "hash-set.h"
++#include "machmode.h"
++#include "vec.h"
++#include "double-int.h"
++#include "input.h"
++#include "alias.h"
++#include "symtab.h"
++#include "wide-int.h"
++#include "inchash.h"
++#include "tree.h"
++#include "stor-layout.h"
++#include "varasm.h"
++#include "calls.h"
++#include "rtl.h"
++#include "regs.h"
++#include "hard-reg-set.h"
++#include "insn-config.h"	/* Required by recog.h.  */
++#include "conditions.h"
++#include "output.h"
++#include "insn-attr.h"		/* For DFA state_t.  */
++#include "insn-codes.h"		/* For CODE_FOR_xxx.  */
++#include "reload.h"		/* For push_reload().  */
++#include "flags.h"
++#include "input.h"
++#include "function.h"
++#include "expr.h"
++#include "recog.h"
++#include "diagnostic-core.h"
++#include "dominance.h"
++#include "cfg.h"
++#include "cfgrtl.h"
++#include "cfganal.h"
++#include "lcm.h"
++#include "cfgbuild.h"
++#include "cfgcleanup.h"
++#include "predict.h"
++#include "basic-block.h"
++#include "bitmap.h"
++#include "df.h"
++#include "tm_p.h"
++#include "tm-constrs.h"
++#include "optabs.h"		/* For GEN_FCN.  */
++#include "target.h"
++#include "langhooks.h"		/* For add_builtin_function().  */
++#include "ggc.h"
++#include "tree-pass.h"
++#include "target-globals.h"
++#include "ira.h"
++#include "ira-int.h"
++#include "regrename.h"
++#include "nds32-load-store-opt.h"
++#include "nds32-reg-utils.h"
++#include <set>
++#include <vector>
++#include <algorithm>
++
++#define NDS32_GPR_NUM 32
++
++static int
++compare_order (const void *a, const void *b)
++{
++  const load_store_info_t *fp1 = (const load_store_info_t *) a;
++  const load_store_info_t *fp2 = (const load_store_info_t *) b;
++  const load_store_info_t f1 = *fp1;
++  const load_store_info_t f2 = *fp2;
++
++  return f1.order < f2.order ? -1 : 1;
++}
++
++static int
++compare_offset (const void *a, const void *b)
++{
++  const load_store_info_t *fp1 = (const load_store_info_t *) a;
++  const load_store_info_t *fp2 = (const load_store_info_t *) b;
++  const load_store_info_t f1 = *fp1;
++  const load_store_info_t f2 = *fp2;
++
++  return f1.offset < f2.offset ? -1 : 1;
++}
++
++static bool
++compare_amount(available_reg_info_t a, available_reg_info_t b)
++{
++    return a.amount > b.amount;
++}
++
++static bool
++nds32_load_store_reg_plus_offset (rtx_insn *insn, load_store_info_t *load_store_info)
++{
++  rtx pattern, mem, reg, base_reg, addr;
++  HOST_WIDE_INT offset;
++  bool load_p;
++  enum nds32_memory_post_type post_type = NDS32_NONE;
++
++  pattern = PATTERN (insn);
++  mem = NULL_RTX;
++  reg = NULL_RTX;
++  base_reg = NULL_RTX;
++  offset = 0;
++  load_p = false;
++
++  if (GET_CODE (pattern) != SET)
++    return false;
++
++  if (MEM_P (SET_SRC (pattern)))
++    {
++      mem = SET_SRC (pattern);
++      reg = SET_DEST (pattern);
++      load_p = true;
++    }
++
++  if (MEM_P (SET_DEST (pattern)))
++    {
++      mem = SET_DEST (pattern);
++      reg = SET_SRC (pattern);
++      load_p = false;
++    }
++
++  if (mem == NULL_RTX || reg == NULL_RTX || !REG_P (reg))
++    return false;
++
++  /* The FPU ISA has not load-store-multiple instruction.  */
++  if (!NDS32_IS_GPR_REGNUM (REGNO (reg)))
++    return false;
++
++  if (MEM_VOLATILE_P (mem))
++    return false;
++
++  if (GET_MODE (reg) != SImode)
++    return false;
++
++  gcc_assert (REG_P (reg));
++
++  addr = XEXP (mem, 0);
++
++  /* We only care about [reg] and [reg+const].  */
++  if (REG_P (addr))
++    {
++      base_reg = addr;
++      offset = 0;
++    }
++  else if (GET_CODE (addr) == PLUS
++	   && CONST_INT_P (XEXP (addr, 1)))
++    {
++      base_reg = XEXP (addr, 0);
++      offset = INTVAL (XEXP (addr, 1));
++      if (!REG_P (base_reg))
++	return false;
++    }
++  else if (GET_CODE (addr) == POST_INC)
++    {
++      base_reg = XEXP (addr, 0);
++      offset = 0;
++      post_type = NDS32_POST_INC;
++    }
++  else if (GET_CODE (addr) == POST_DEC)
++    {
++      base_reg = XEXP (addr, 0);
++      offset = 0;
++      post_type = NDS32_POST_DEC;
++    }
++  else
++    return false;
++
++  if ((REGNO (base_reg) > NDS32_LAST_GPR_REGNUM)
++      && (REGNO (base_reg) < FIRST_PSEUDO_REGISTER))
++    return false;
++
++  if (load_store_info)
++    {
++      load_store_info->load_p   = load_p;
++      load_store_info->offset   = offset;
++      load_store_info->reg      = reg;
++      load_store_info->base_reg = base_reg;
++      load_store_info->insn     = insn;
++      load_store_info->mem      = mem;
++      load_store_info->post_type = post_type;
++    }
++
++  return true;
++}
++
++static bool
++nds32_insn_alias_p (rtx memref, rtx x)
++{
++  rtx mem;
++
++  if (GET_CODE (x) == PARALLEL)
++    {
++      int i, j;
++
++      for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
++	{
++	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
++	    if (nds32_insn_alias_p (memref, XVECEXP (x, i, j)))
++	      return true;
++	}
++
++      return false;
++    }
++
++  if (GET_CODE (x) != SET)
++    return true;
++
++  if (MEM_P (SET_SRC (x)))
++    mem = SET_SRC (x);
++  else if (MEM_P (SET_DEST (x)))
++    mem = SET_DEST (x);
++  else
++    return false;
++
++  if (may_alias_p (memref, mem))
++    return true;
++  else
++    return false;
++}
++
++static void
++nds32_emit_multiple_insn (load_store_infos_t *multiple_insn,
++			  rtx base_reg, rtx place, bool update_p)
++{
++  unsigned int i;
++  unsigned int num_use_regs = multiple_insn->length ();
++  int par_index = 0;
++  int offset = 0;
++  bool load_p = (*multiple_insn)[0].load_p;
++
++  rtx reg;
++  rtx mem;
++  rtx push_rtx;
++  rtx update_offset;
++  rtx parallel_insn;
++
++  /* In addition to used registers,
++     we need one more space for (set base base-x) rtx.  */
++  if (update_p)
++    num_use_regs++;
++
++  parallel_insn = gen_rtx_PARALLEL (VOIDmode,
++				    rtvec_alloc (num_use_regs));
++
++  /* Set update insn.  */
++    if (update_p)
++      {
++	update_offset = GEN_INT (multiple_insn->length () * 4);
++	push_rtx = gen_addsi3 (base_reg, base_reg, update_offset);
++	XVECEXP (parallel_insn, 0, par_index) = push_rtx;
++	par_index++;
++      }
++
++  /* Create (set mem regX) from start_reg to end_reg.  */
++  for (i = 0; i < multiple_insn->length (); ++i)
++    {
++      reg = (*multiple_insn)[i].reg;
++      mem = gen_frame_mem (SImode, plus_constant (Pmode,
++						  base_reg,
++						  offset));
++      MEM_COPY_ATTRIBUTES (mem, (*multiple_insn)[i].mem);
++
++      if (load_p)
++	push_rtx = gen_rtx_SET (reg, mem);
++      else
++	push_rtx = gen_rtx_SET (mem, reg);
++
++      XVECEXP (parallel_insn, 0, par_index) = push_rtx;
++      offset = offset + 4;
++      par_index++;
++    }
++
++  emit_insn_before (parallel_insn, place);
++
++  if (dump_file)
++    {
++      fprintf (dump_file, "lmw/smw instruction:\n");
++      print_rtl_single (dump_file, parallel_insn);
++    }
++}
++
++static void
++nds32_emit_add_insn (load_store_info_t insn, rtx base_reg,
++		     rtx place, bool add_p)
++{
++  rtx add_insn;
++  HOST_WIDE_INT offset = insn.offset;
++  if (!add_p)
++    offset = -offset;
++
++  add_insn = gen_addsi3 (base_reg, insn.base_reg, GEN_INT (offset));
++  emit_insn_before (add_insn, place);
++}
++
++/* Get the instruction of same ID.  */
++static void
++nds32_fetch_group_insn (load_store_infos_t *src,
++			load_store_infos_t *dst, int id)
++{
++  unsigned int i = 0;
++
++  while (i < src->length ())
++    {
++      if (id == (*src)[i].group)
++	{
++	  dst->safe_push ((*src)[i]);
++	  src->ordered_remove (i);
++	  i = 0;
++	}
++      else
++	i++;
++    }
++}
++
++/* Check registers are not used and defined.  */
++static rtx
++nds32_lmwsmw_insert_place (load_store_infos_t *insn_set)
++{
++  unsigned int i, position;
++  bool combine_p;
++  rtx_insn *insn;
++  auto_vec<load_store_info_t, 64> temp_set;
++
++  for (i = 0; i < insn_set->length (); i++)
++    temp_set.safe_push ((*insn_set)[i]);
++
++  /* Check registers are not used and defined
++     between first instruction and last instruction,
++     and find insert lmw/smw instruction place.
++       example:
++	 lwi $r0, [$r2 + 4]
++	 lwi $r1, [$r2 + 8]
++
++     Check $r0 and $r1 are not used and defined.  */
++  temp_set.qsort (compare_order);
++
++  for (position = 0; position < temp_set.length (); ++position)
++    {
++      combine_p = true;
++
++      /* Check instruction form first instruction to position.  */
++      for (i = 0; i < position; i++)
++	{
++	  for (insn = NEXT_INSN (temp_set[i].insn);
++	       insn != temp_set[position].insn;
++	       insn = NEXT_INSN (insn))
++	    {
++	      if (!NONDEBUG_INSN_P (insn))
++		continue;
++	      if (df_reg_used (insn, temp_set[i].reg)
++		  || df_reg_defined (insn, temp_set[i].reg))
++		{
++		  if (dump_file)
++		    {
++		      fprintf (dump_file, "Fail:register has modify\n");
++		      fprintf (dump_file, "insn uid:%d, reg: r%d,\n",
++			       INSN_UID (temp_set[position].insn),
++			       REGNO (temp_set[position].reg));
++		      fprintf (dump_file, "Modify instruction:\n");
++		      print_rtl_single (dump_file, insn);
++		    }
++		  combine_p = false;
++		  break;
++		}
++	    }
++	}
++
++      /* Check instruction form position to last instruction.  */
++      for (i = position + 1; i < temp_set.length (); i++)
++	{
++	  for (insn = temp_set[position].insn;
++	       insn != temp_set[i].insn;
++	       insn = NEXT_INSN (insn))
++	    {
++	      if (!NONDEBUG_INSN_P (insn))
++		continue;
++	      if (df_reg_used (insn, temp_set[i].reg)
++		  || df_reg_defined (insn, temp_set[i].reg))
++		{
++		  if (dump_file)
++		    {
++		      fprintf (dump_file, "Fail:register has modify\n");
++		      fprintf (dump_file, "insn uid:%d, reg: r%d,\n",
++			       INSN_UID (temp_set[position].insn),
++			       REGNO (temp_set[position].reg));
++		      fprintf (dump_file, "Modify instruction:\n");
++		      print_rtl_single (dump_file, insn);
++		    }
++		  combine_p = false;
++		  break;
++		}
++	    }
++	}
++
++      if (combine_p)
++	return temp_set[position].insn;
++    }
++
++  return NULL_RTX;
++}
++
++/* Check registers are not used and defined.  */
++static bool
++nds32_base_reg_safe_p (load_store_infos_t *insn_set)
++{
++  unsigned int i;
++  rtx_insn *insn;
++  auto_vec<load_store_info_t, 64> temp_set;
++
++  /* We will change 'insn_set' element order,
++     to avoid change order using 'temp_set'.  */
++  for (i = 0; i < insn_set->length (); i++)
++    temp_set.safe_push ((*insn_set)[i]);
++
++  /* We want to combine load and store instructions,
++     need to check base register is not used and defined
++     between first insn and last insn.
++     example:
++       lwi $r0, [$r3 + 4]
++	    ...		  <- check here
++       lwi $r1, [$r3 + 8]
++	    ...		  <- check here
++       lwi $r2, [$r3 + 12]
++
++     Check $r3 is not used and defined,
++     between first insn and last insn.  */
++
++  /* Scan instruction from top to bottom,
++     so need to sort by order.  */
++  temp_set.qsort (compare_order);
++
++  for (i = 0; i < temp_set.length () - 1; ++i)
++    {
++      for (insn = NEXT_INSN (temp_set[i].insn);
++	   insn != temp_set[i + 1].insn;
++	   insn = NEXT_INSN (insn))
++	{
++	  if (!NONDEBUG_INSN_P (insn))
++	    continue;
++
++	  if (nds32_insn_alias_p (temp_set[0].mem, PATTERN (insn)))
++	    {
++	      if (dump_file)
++		{
++		  fprintf (dump_file, "Memory alias:\n");
++		  print_rtl_single (dump_file, insn);
++		}
++	      return false;
++	    }
++
++	  if (temp_set[0].load_p)
++ 	    {
++	      if (df_reg_defined (insn, temp_set[0].base_reg))
++ 		{
++		  if (dump_file)
++		    {
++		      fprintf (dump_file, "Fail: base register has modify\n");
++		      fprintf (dump_file, "insn uid:%d, base reg: r%d,\n",
++			       INSN_UID (temp_set[i].insn),
++			       REGNO (temp_set[i].reg));
++		      fprintf (dump_file, "Modify instruction:\n");
++		      print_rtl_single (dump_file, insn);
++		    }
++		  return false;
++		}
++	    }
++	  else
++	    {
++	      if (df_reg_used (insn, temp_set[0].base_reg))
++		{
++		  if (dump_file)
++		    {
++		      fprintf (dump_file, "Fail: base register has modify\n");
++		      fprintf (dump_file, "insn uid:%d, base reg: r%d,\n",
++			       INSN_UID (temp_set[i].insn),
++			       REGNO (temp_set[i].reg));
++		      fprintf (dump_file, "Modify instruction:\n");
++		      print_rtl_single (dump_file, insn);
++		    }
++		  return false;
++ 		}
++ 	    }
++	}
++    }
++  return true;
++}
++
++static bool
++nds32_gain_size_p (load_store_infos_t *insn, bool new_base_p)
++{
++  unsigned int i, new_cost = 4, old_cost = 0;
++  rtx reg;
++  rtx base_reg = (*insn)[0].base_reg;
++  HOST_WIDE_INT offset;
++
++  for (i = 0; i < insn->length (); ++i)
++    {
++      reg = (*insn)[i].reg;
++      offset = (*insn)[i].offset;
++
++      if (in_reg_class_p (reg, LOW_REGS))
++	{
++	  /* lwi37.sp/swi37.sp/lwi37/swi37 */
++	  if ((REGNO (base_reg) == SP_REGNUM
++	      || REGNO (base_reg) == FP_REGNUM)
++	      && (offset >= 0 && offset < 512 && (offset % 4 == 0)))
++	    old_cost += 2;
++	  /* lwi333/swi333 */
++	  else if (in_reg_class_p (base_reg, LOW_REGS)
++		   && (offset >= 0 && offset < 32 && (offset % 4 == 0)))
++	    old_cost += 2;
++	  else
++	    old_cost += 4;
++        }
++      else
++	{
++	  /* lwi450/swi450 */
++	  if (in_reg_class_p (reg, MIDDLE_REGS)
++	      && offset == 0)
++	    old_cost += 2;
++	  else
++	    old_cost += 4;
++	}
++    }
++
++  offset = (*insn)[0].offset;
++  if (offset != 0)
++    {
++      /* addi333 */
++      if (in_reg_class_p (base_reg, LOW_REGS)
++	  && satisfies_constraint_Iu05 (GEN_INT (offset)))
++	new_cost += 2;
++      /* addi45 */
++      else if (in_reg_class_p (base_reg, MIDDLE_REGS)
++	       && satisfies_constraint_Iu05 (GEN_INT (offset)))
++	new_cost += 2;
++      else
++	new_cost += 4;
++
++      /* subri */
++      if (!new_base_p)
++	new_cost += 4;
++    }
++
++  if (dump_file)
++    fprintf (dump_file, "Code size compare: old code size is %d,"
++			" new code size is %d\n", old_cost, new_cost);
++
++  return new_cost < old_cost;
++}
++
++static bool
++nds32_gain_speed_p (load_store_infos_t *insn, bool new_base_p)
++{
++  unsigned int new_cost = 0, old_cost = insn->length ();
++
++  if (TARGET_PIPELINE_GRAYWOLF)
++    {
++      new_cost = insn->length () / 2 + insn->length () % 2;
++
++      if ((*insn)[0].offset != 0)
++	{
++	  /* Need addi instruction. */
++	  new_cost += 1;
++
++	  /* Need subri instruction. */
++	  if (!new_base_p)
++	    new_cost += 1;
++	}
++    }
++  else
++    {
++      if ((*insn)[0].offset != 0)
++	return false;
++    }
++
++  return new_cost < old_cost;
++}
++
++/* Check instructions can combine into a mulitple-instruction.  */
++static bool
++nds32_combine_multiple_p (load_store_infos_t *insn_set, bool new_base_p)
++{
++  unsigned int i;
++  auto_vec<load_store_info_t, 64> temp_set;
++
++  /* We will change 'insn_set' element order,
++     to avoid change order using 'temp_set'.  */
++  for (i = 0; i < insn_set->length (); i++)
++    temp_set.safe_push ((*insn_set)[i]);
++
++  /* Check start offset need to sort by offset.  */
++  temp_set.qsort (compare_offset);
++
++  /* The lmw/smw pattern, need two or more instructions.  */
++  if (temp_set.length () < 2)
++    return false;
++
++  /* The lmw/smw pattern, only allow combine 25 instruction.  */
++  if (temp_set.length () > 25)
++    return false;
++
++  if (TARGET_LMWSMW_OPT_SIZE
++      || (TARGET_LMWSMW_OPT_AUTO && optimize_size))
++    {
++      /* Compare original instructions with multiple instruction,
++	 when mupltiple instruction is small than original instructions
++	 then combine it.  */
++      if (!nds32_gain_size_p (&temp_set, new_base_p))
++	return false;
++    }
++  else if (TARGET_LMWSMW_OPT_SPEED
++	   || (TARGET_LMWSMW_OPT_AUTO && !optimize_size))
++    {
++      /* The start offset is not zero, we need add a instrucion
++	 to handle offset, it is not worth on -O3, -O2 level.  */
++      if (!nds32_gain_speed_p (&temp_set, new_base_p))
++	return false;
++    }
++
++  /* Base register is not equal register, when offset is not zero.  */
++  if (temp_set[0].offset != 0)
++    for (i = 0; i < temp_set.length (); ++i)
++      {
++	if (REGNO (temp_set[i].reg)
++	    == REGNO (temp_set[0].base_reg))
++	  return false;
++      }
++
++  /* Don't combine, when start offset is greater then Is15,
++     because need extra register.  */
++  if (!satisfies_constraint_Is15 (GEN_INT (temp_set[0].offset)))
++    return false;
++
++  return true;
++}
++
++static bool
++nds32_use_bim_p (load_store_infos_t *insn_set,
++		 load_store_infos_t *ref_set)
++{
++  rtx_insn *insn;
++  bool combine_p = true;
++
++  /* Generate .bim form, need offset is continuous.  */
++  if (insn_set->last ().offset != ((*ref_set)[0].offset - 4))
++    return false;
++
++  /* Reject 'insn_set' instructions bottom
++     of the 'ref_set' instructions.  */
++  if ((*insn_set)[0].group > (*ref_set)[0].group)
++    return false;
++
++  /* Scan instruction from top to bottom,
++     so need to sort by order.  */
++  insn_set->qsort (compare_order);
++  ref_set->qsort (compare_order);
++
++  /* We want to combine .bim form instruction,
++     so need to check base register is not used and defined
++     between multiple-insn and next mulitple-insn.
++     example:
++      lmw.bim $r0, [$r2], $r1
++		...		       <- check here
++      lmw.bi  $r3, [$r2], $r4
++
++    Use .bim form need to check $r2 is not used and defined,
++    between lmw.bim and lmw.bi.  */
++    for (insn = NEXT_INSN (insn_set->last ().insn);
++	 insn != (*ref_set)[0].insn;
++	 insn = NEXT_INSN (insn))
++      {
++	if (!NONDEBUG_INSN_P (insn))
++	  continue;
++
++	if (nds32_insn_alias_p ((*insn_set)[0].mem, PATTERN (insn)))
++	  {
++	    if (dump_file)
++	      {
++		fprintf (dump_file, "Have memory instruction:\n");
++		print_rtl_single (dump_file, insn);
++	      }
++	    combine_p = false;
++	    break;
++	  }
++
++	if (df_reg_used (insn, (*insn_set)[0].base_reg)
++	    || df_reg_defined (insn, (*insn_set)[0].base_reg))
++	  {
++	    if (dump_file)
++	      {
++		fprintf (dump_file, "Use .bi form: Base reg is"
++			 " used or defined between multiple-insn"
++			 " and next multiple-insn\n");
++		fprintf (dump_file, "Base register: r%d,\n",
++			 REGNO ((*insn_set)[0].base_reg));
++		fprintf (dump_file, "use or def instruction:\n");
++		print_rtl_single (dump_file, insn);
++	      }
++	    combine_p = false;
++	    break;
++	  }
++      }
++
++  /* Restore element order.  */
++  insn_set->qsort (compare_offset);
++  ref_set->qsort (compare_offset);
++
++  if (combine_p)
++    return true;
++  else
++    return false;
++}
++
++static void
++nds32_merge_overlapping_regs (HARD_REG_SET *pset, struct du_head *head)
++{
++  bitmap_iterator bi;
++  unsigned i;
++  IOR_HARD_REG_SET (*pset, head->hard_conflicts);
++  EXECUTE_IF_SET_IN_BITMAP (&head->conflicts, 0, i, bi)
++    {
++      du_head_p other = regrename_chain_from_id (i);
++      unsigned j = other->nregs;
++      gcc_assert (other != head);
++      while (j-- > 0)
++	SET_HARD_REG_BIT (*pset, other->regno + j);
++    }
++}
++
++/* Check if NEW_REG can be the candidate register to rename for
++   REG in THIS_HEAD chain.  THIS_UNAVAILABLE is a set of unavailable hard
++   registers.  */
++static bool
++nds32_check_new_reg_p (int reg ATTRIBUTE_UNUSED, int new_reg,
++		       struct du_head *this_head, HARD_REG_SET this_unavailable)
++{
++  enum machine_mode mode = GET_MODE (*this_head->first->loc);
++  int nregs = hard_regno_nregs[new_reg][mode];
++  int i;
++  struct du_chain *tmp;
++
++  for (i = nregs - 1; i >= 0; --i)
++    if (TEST_HARD_REG_BIT (this_unavailable, new_reg + i)
++	|| fixed_regs[new_reg + i]
++	|| global_regs[new_reg + i]
++	/* Can't use regs which aren't saved by the prologue.  */
++	|| (! df_regs_ever_live_p (new_reg + i)
++	    && ! call_used_regs[new_reg + i])
++#ifdef LEAF_REGISTERS
++	/* We can't use a non-leaf register if we're in a
++	   leaf function.  */
++	|| (crtl->is_leaf
++	    && !LEAF_REGISTERS[new_reg + i])
++#endif
++#ifdef HARD_REGNO_RENAME_OK
++	|| ! HARD_REGNO_RENAME_OK (reg + i, new_reg + i)
++#endif
++	)
++      return false;
++
++  /* See whether it accepts all modes that occur in
++     definition and uses.  */
++  for (tmp = this_head->first; tmp; tmp = tmp->next_use)
++    if ((! HARD_REGNO_MODE_OK (new_reg, GET_MODE (*tmp->loc))
++	 && ! DEBUG_INSN_P (tmp->insn))
++	|| (this_head->need_caller_save_reg
++	    && ! (HARD_REGNO_CALL_PART_CLOBBERED
++		  (reg, GET_MODE (*tmp->loc)))
++	    && (HARD_REGNO_CALL_PART_CLOBBERED
++		(new_reg, GET_MODE (*tmp->loc)))))
++      return false;
++
++  return true;
++}
++
++static int
++nds32_find_best_rename_reg (du_head_p this_head, int new_reg, int old_reg)
++{
++  HARD_REG_SET unavailable;
++  int best_new_reg = old_reg;
++
++  COMPL_HARD_REG_SET (unavailable, reg_class_contents[GENERAL_REGS]);
++  CLEAR_HARD_REG_BIT (unavailable, this_head->regno);
++
++  /* Further narrow the set of registers we can use for renaming.
++     If the chain needs a call-saved register, mark the call-used
++     registers as unavailable.  */
++  if (this_head->need_caller_save_reg)
++    IOR_HARD_REG_SET (unavailable, call_used_reg_set);
++
++  /* Mark registers that overlap this chain's lifetime as unavailable.  */
++  nds32_merge_overlapping_regs (&unavailable, this_head);
++
++  if (nds32_check_new_reg_p (old_reg, new_reg, this_head, unavailable))
++    best_new_reg = new_reg;
++
++  return best_new_reg;
++}
++
++static bool
++nds32_try_rename_reg (rtx_insn *insn, unsigned op_pos, unsigned best_reg)
++{
++  insn_rr_info *info;
++  du_head_p op_chain;
++  unsigned oldreg, newreg;
++
++  info = &insn_rr[INSN_UID (insn)];
++
++  if (info->op_info == NULL)
++    return false;
++
++  if (info->op_info[op_pos].n_chains == 0)
++    return false;
++
++  op_chain = regrename_chain_from_id (info->op_info[op_pos].heads[0]->id);
++
++  if (op_chain->cannot_rename)
++    return false;
++
++  oldreg = op_chain->regno;
++  newreg = nds32_find_best_rename_reg (op_chain, best_reg, oldreg);
++
++  if (newreg == oldreg)
++    return false;
++
++  return true;
++}
++
++/* Grouping consecutive registers.  */
++static void
++nds32_group_available_reg (HARD_REG_SET *available_regset, enum reg_class clazz,
++			   std::vector <available_reg_info_t> *available_group)
++{
++  hard_reg_set_iterator hrsi;
++  unsigned regno, pre_regno = 0;
++  unsigned count = 0;
++  available_reg_info_t reg_info;
++  std::vector<available_reg_info_t>::iterator it;
++
++  if (!available_group->empty ())
++    available_group->clear ();
++
++  /* Find available register form $r16 to $r31.  */
++  EXECUTE_IF_SET_IN_HARD_REG_SET (reg_class_contents[clazz], 2, regno, hrsi)
++    {
++      /* Caller-save register or callee-save register but it's ever live.  */
++      if (TEST_HARD_REG_BIT (*available_regset, regno)
++	  && (call_used_regs[regno] || df_regs_ever_live_p (regno)))
++	{
++	  if (pre_regno == 0
++	      || (pre_regno + 1) == regno)
++	    count++;
++	}
++      else
++	{
++	  if (count >= 2)
++	    {
++	      reg_info.amount = count;
++	      reg_info.end = pre_regno;
++	      reg_info.start = pre_regno - count + 1;
++	      available_group->push_back (reg_info);
++	    }
++	  count = 0;
++	}
++      pre_regno = regno;
++    }
++
++  sort (available_group->begin(), available_group->end(), compare_amount);
++
++  if (dump_file)
++    {
++      for (it = available_group->begin();
++	   it != available_group->end(); ++it)
++	fprintf (dump_file,
++		 "available amount = %d start = %d "
++		 "end = %d \n", it->amount, it->start,
++		 it->end);
++    }
++}
++
++/* Try to rename insn's register in order.  */
++static void
++nds32_find_reg (load_store_infos_t *insn, load_store_infos_t *rename_insn,
++		HARD_REG_SET *available_regset)
++{
++  int can_rename_number;
++  unsigned i, regno, amount;
++  unsigned op_pos = (*insn)[0].load_p ? 0 : 1;
++  auto_vec<load_store_info_t, 64> temp_set;
++  std::vector<available_reg_info_t> available_group;
++  std::vector<available_reg_info_t>::iterator it;
++  auto_vec<load_store_info_t, 64> down_set, up_set;
++  unsigned int down_num = 0, up_num = 0;
++  long offset;
++  int m;
++
++  /* We will change 'insn' element order,
++     to avoid change order using 'temp_set'.  */
++  for (i = 0; i < insn->length (); i++)
++    temp_set.safe_push ((*insn)[i]);
++
++  if (temp_set[0].post_type == NDS32_NONE)
++    temp_set.qsort (compare_offset);
++
++  nds32_group_available_reg (available_regset, GENERAL_REGS, &available_group);
++
++ /* Check rename register form top insn to bottom insn,
++    and avoid using fp, sp, lp, gp registers.  */
++  regno = REGNO (temp_set[0].reg);
++  can_rename_number = regno + temp_set.length () - 1;
++  offset = temp_set[0].offset;
++
++  if (can_rename_number < FP_REGNUM)
++    for (i = 1; i < temp_set.length (); ++i)
++      {
++	/* Find this case:
++	     lwi $r0, [$r2 + 4]
++	     lwi $r3, [$r2 + 8]
++
++	   Rename $r3 to $r1.  */
++	down_num++;
++	if ((regno + i) != REGNO (temp_set[i].reg))
++	  {
++	    if (nds32_try_rename_reg (temp_set[i].insn, op_pos, regno + i))
++	      {
++		/* Store in temparary set.  */
++		down_set.safe_push (temp_set[i]);
++		down_set.last ().new_reg = regno + i;
++	      }
++	    else
++	      /* Stop when the register sequence is broken.  */
++	      break;
++	  }
++      }
++
++  /* Check rename register form bottom insn to top insn,
++     and avoid using fp, sp, lp, gp registers.  */
++  regno = REGNO (temp_set.last ().reg);
++  can_rename_number = regno - temp_set.length () + 1;
++
++  if (can_rename_number > 0 && regno < FP_REGNUM)
++    for (i = temp_set.length () - 1; i > 0; --i)
++      {
++	/* Find this case:
++	     lwi $r1, [$r2 + 4]
++	     lwi $r4, [$r2 + 8]
++
++	   Rename $r1 to $r3.  */
++	up_num++;
++	if ((regno - i) != REGNO (temp_set[i - 1].reg))
++	  {
++	    if (nds32_try_rename_reg (temp_set[i - 1].insn, op_pos, regno - i))
++	      {
++		/* Store in rename_insn.  */
++		up_set.safe_push (temp_set[i - 1]);
++		up_set.last ().new_reg = regno - i;
++	      }
++	    else
++	      /* Stop when the register sequence is broken.  */
++	      break;
++	  }
++      }
++
++  /* Rename for the longest sequence.  */
++  /* The overhead of zero offset instruction is lowest, so try it first.  */
++  if ((offset == 0 || down_num >= up_num) && !down_set.is_empty ())
++    {
++      for (m = down_set.length () - 1; m >= 0; --m)
++	{
++	  regno = REGNO (down_set[m].reg);
++	  CLEAR_HARD_REG_BIT (*available_regset, regno);
++	  rename_insn->safe_push (down_set[m]);
++	}
++      nds32_group_available_reg (available_regset, GENERAL_REGS,
++				 &available_group);
++      return;
++    }
++  else if (up_num >= down_num && !up_set.is_empty ())
++    {
++      for (m = up_set.length () - 1; m >= 0; --m)
++	{
++	  regno = REGNO (up_set[m].reg);
++	  CLEAR_HARD_REG_BIT (*available_regset, regno);
++	  rename_insn->safe_push (up_set[m]);
++	}
++      nds32_group_available_reg (available_regset, GENERAL_REGS,
++				 &available_group);
++      return;
++    }
++  /* Check whether it is empty, We will use available table.  */
++  else if (available_group.empty ())
++    return;
++
++  amount = available_group.begin ()->amount;
++  /* Using the minimum number, as the rename amount.  */
++  if (amount > temp_set.length ())
++    amount = temp_set.length ();
++
++  /* Using most available register number to rename.  */
++  regno = available_group.begin ()->start;
++  for (i = 0; i < amount; ++i)
++    {
++      if (nds32_try_rename_reg (temp_set[i].insn, op_pos, regno))
++	{
++	  rename_insn->safe_push (temp_set[i]);
++	  rename_insn->last ().new_reg = regno;
++	  CLEAR_HARD_REG_BIT (*available_regset, regno);
++	  regno++;
++	}
++      else
++	/* Stop when the register sequence is broken.  */
++	break;
++    }
++
++  /* Check length here because the whole sequence entries
++     have to be renamed.  */
++  if (rename_insn->length () > 1)
++    {
++      /* Update available table.  */
++      nds32_group_available_reg (available_regset, GENERAL_REGS,
++				 &available_group);
++      return;
++    }
++
++  /* Using all available register to rename each insn.  */
++  for (i = 0; i < (temp_set.length () - 1); i += 2)
++    {
++      for (it = available_group.begin();
++	   it != available_group.end(); ++it)
++	{
++	  bool change_p = false;
++	  unsigned int j;
++	  regno = it->start;
++
++	  /* Once replaced two instructions. */
++	  for (j = regno; j < (it->end + 1); j += 2)
++	    {
++	      if (nds32_try_rename_reg (temp_set[i].insn, op_pos, regno)
++		  && nds32_try_rename_reg (temp_set[i + 1].insn,
++					   op_pos, regno + 1))
++		{
++		  rename_insn->safe_push (temp_set[i]);
++		  rename_insn->last ().new_reg = regno;
++		  CLEAR_HARD_REG_BIT (*available_regset, regno);
++
++		  rename_insn->safe_push (temp_set[i + 1]);
++		  rename_insn->last ().new_reg = regno + 1;
++		  CLEAR_HARD_REG_BIT (*available_regset, regno + 1);
++		  change_p = true;
++		  break;
++		}
++	    }
++
++	  if (change_p)
++	    {
++	      nds32_group_available_reg (available_regset, GENERAL_REGS,
++					 &available_group);
++	      break;
++	    }
++	}
++    }
++}
++
++static void
++nds32_rename_reg (rtx_insn *insn, unsigned op_pos, unsigned newreg)
++{
++  insn_rr_info *info;
++  du_head_p op_chain;
++
++  info = &insn_rr[INSN_UID (insn)];
++  op_chain = regrename_chain_from_id (info->op_info[op_pos].heads[0]->id);
++
++  if (dump_file)
++    {
++      fprintf (dump_file, "Try to rename operand %d to %d:\n",
++	       op_pos, newreg);
++      print_rtl_single (dump_file, insn);
++    }
++
++  regrename_do_replace (op_chain, newreg);
++
++  if (dump_file)
++    {
++      print_rtl_single (dump_file, insn);
++    }
++}
++
++/* Combine mutilple load/store insn into a lmw/smw insn.  */
++static void
++nds32_combine_bi_insn (load_store_infos_t *load_store_info)
++{
++  auto_vec<load_store_info_t, 64> candidate_set, bi_set;
++  unsigned int i, j, regno;
++
++  bool load_insn_p;
++  enum nds32_memory_post_type post_type;
++
++  for (i = 0; i < load_store_info->length (); ++i)
++    {
++      /* Recording instruction order of priority and initinal place.  */
++      (*load_store_info)[i].order = i;
++      (*load_store_info)[i].place = false;
++      candidate_set.safe_push ((*load_store_info)[i]);
++    }
++
++  for (i = 0; i < candidate_set.length (); ++i)
++    {
++      load_insn_p = candidate_set[i].load_p;
++      post_type = candidate_set[i].post_type;
++      regno = REGNO (candidate_set[i].reg);
++
++      for (j = i + 1; j < candidate_set.length (); ++j)
++	{
++	  if ((post_type == candidate_set[j].post_type)
++	      && (load_insn_p == candidate_set[j].load_p)
++	      && ((regno + 1) == REGNO (candidate_set[j].reg)))
++	    {
++	      bi_set.safe_push (candidate_set[i]);
++	      bi_set.safe_push (candidate_set[j]);
++
++	      if (nds32_combine_multiple_p (&bi_set, false)
++		  && nds32_base_reg_safe_p (&bi_set)
++		  && nds32_lmwsmw_insert_place (&bi_set) != NULL_RTX)
++		{
++		  rtx place = nds32_lmwsmw_insert_place (&bi_set);
++		  rtx base_reg = bi_set[0].base_reg;
++
++		  nds32_emit_multiple_insn (&bi_set, base_reg, place, true);
++		  delete_insn (bi_set[i].insn);
++		  delete_insn (bi_set[j].insn);
++		  candidate_set.ordered_remove (j);
++		  bi_set.block_remove (0, bi_set.length ());
++		  break;
++		}
++
++	      bi_set.block_remove (0, bi_set.length ());
++	    }
++	}
++    }
++}
++
++/* Combine mutilple load/store insn into a lmw/smw insn.  */
++static void
++nds32_combine_load_store_insn (load_store_infos_t *load_store_info,
++			       HARD_REG_SET *available_regset)
++{
++  auto_vec<load_store_info_t, 64> candidate_set, main_set, temp_set;
++  auto_vec<load_store_info_t, 64> first_set, second_set;
++  HOST_WIDE_INT current_offset, last_offset = 0, add_offset = 0;
++  unsigned int i, j, regno;
++  int group_num = 0, group_id;
++  bool load_insn_p;
++  bool new_base_p = false;
++  bool prev_bim_p = false;
++  bool inc_p = true, dec_p = true;
++  rtx new_base_reg = NULL_RTX;
++  rtx base_reg = (*load_store_info)[0].base_reg;
++  rtx place;
++  unsigned new_base_regnum;
++
++  /* Get available register to add offset for first instruction.  */
++  new_base_regnum = find_available_reg (available_regset, GENERAL_REGS);
++  if (new_base_regnum != INVALID_REGNUM)
++    {
++      CLEAR_HARD_REG_BIT (*available_regset, new_base_regnum);
++      new_base_reg = gen_rtx_REG (Pmode, new_base_regnum);
++      /* Copy attribute form base register to new base register.  */
++      ORIGINAL_REGNO (new_base_reg) =
++	ORIGINAL_REGNO ((*load_store_info)[0].base_reg);
++      REG_ATTRS (new_base_reg) = REG_ATTRS ((*load_store_info)[0].base_reg);
++      new_base_p = true;
++
++      if (dump_file)
++	fprintf (dump_file, "Have new base register: %d\n", new_base_regnum);
++    }
++
++  /* Recording instruction order of priority and initinal place.  */
++  for (i = 0; i < load_store_info->length (); ++i)
++    {
++      (*load_store_info)[i].order = i;
++      (*load_store_info)[i].place = false;
++    }
++
++  /* Fetch first instruction information from 'load_store_info',
++     we will use first instruction as base, to search next instruction.  */
++  candidate_set.safe_push ((*load_store_info)[0]);
++  /* Set offset, regno, load_p state from candidate_set.  */
++  current_offset = candidate_set[0].offset;
++  regno = REGNO (candidate_set[0].reg);
++  load_insn_p = candidate_set[0].load_p;
++  /* Set first instruction group ID,
++     the group ID mark instruction for the same group.  */
++  candidate_set[0].group = group_num;
++
++  /* Search instructions can be combined to a lmw/smw instruction.  */
++  for (i = 1; i < load_store_info->length (); ++i)
++    {
++      /* Collecting register number and offset is increase,
++	 for example:
++
++	   lwi $r0, [$r22 + 4]  <- base instruction
++	   lwi $r1, [$r22 + 8]  <- collect object
++
++	 The collect object (regno + 1), (offset + 4)
++	 from base instruction.  */
++      if ((current_offset == (*load_store_info)[i].offset - 4)
++	  && ((regno + 1) == REGNO ((*load_store_info)[i].reg))
++	  && (load_insn_p == (*load_store_info)[i].load_p)
++	  && inc_p)
++	{
++	  /* Give instruction group ID.  */
++	  (*load_store_info)[i].group = group_num;
++	  /* Save instruction.  */
++	  candidate_set.safe_push ((*load_store_info)[i]);
++	  /* Update state, next register number and offset.  */
++	  regno = REGNO ((*load_store_info)[i].reg);
++	  current_offset += 4;
++	  /* Close decrease type, search increase type.  */
++	  dec_p = false;
++	}
++      /* Collecting register number and offset is decrease,
++	 for example:
++
++	   lwi $r2, [$r22 + 8]  <- base instruction
++	   lwi $r1, [$r22 + 4]  <- collect object
++
++	 The collect object (regno - 1), (offset - 4)
++	 from base instruction.  */
++      else if ((current_offset == (*load_store_info)[i].offset + 4)
++	       && ((regno - 1) == REGNO ((*load_store_info)[i].reg))
++	       && (load_insn_p == (*load_store_info)[i].load_p)
++	       && dec_p)
++	{
++	  /* Give instruction group ID.  */
++	  (*load_store_info)[i].group = group_num;
++	  /* Save instruction.  */
++	  candidate_set.safe_push ((*load_store_info)[i]);
++
++	  /* Update state, next register number and offset.  */
++	  regno = REGNO ((*load_store_info)[i].reg);
++	  current_offset -= 4;
++	  /* Close increase type, search decrease type.  */
++	  inc_p = false;
++	}
++      else
++	{
++	  inc_p = true;
++	  dec_p = true;
++	}
++
++      /* Instructions collect is complete.  */
++      if ((inc_p && dec_p)
++          || (i + 1) == load_store_info->length ())
++	{
++	  /* Filter candidate instructions.  */
++	  if (nds32_combine_multiple_p (&candidate_set, new_base_p)
++	      && nds32_base_reg_safe_p (&candidate_set)
++	      && nds32_lmwsmw_insert_place (&candidate_set) != NULL_RTX)
++	    {
++	      /* Store candidate instructions to 'main_set'.  */
++	      for (j = 0; j < candidate_set.length (); j++)
++		main_set.safe_push (candidate_set[j]);
++	    }
++
++	  /* Scan to the last instruction, it is complete.  */
++	  if ((i + 1) == load_store_info->length ())
++	    break;
++
++	  /* Clean candidate_set sequence.  */
++	  candidate_set.block_remove (0, candidate_set.length ());
++	  /* Reinitialize first instruction infomation
++	     to search next instruction.  */
++	  candidate_set.safe_push ((*load_store_info)[i]);
++	  /* Update group number for next sequence.  */
++	  group_num ++;
++	  /* Set offset, regno, load_p state from candidate_set.  */
++	  current_offset = candidate_set.last ().offset;
++	  regno = REGNO (candidate_set.last ().reg);
++	  load_insn_p = candidate_set.last ().load_p;
++	  candidate_set.last ().group = group_num;
++	}
++      else if (!nds32_base_reg_safe_p (&candidate_set)
++	       || nds32_lmwsmw_insert_place (&candidate_set) == NULL_RTX)
++	{
++	  /* Check collect instruction for each instruction,
++	     we store (n - 1) instructions in group, and
++	     last instruction make next group First instruction.  */
++	  for (j = 0; j < (candidate_set.length () - 1); j++)
++	    temp_set.safe_push (candidate_set[j]);
++
++	  /* Store candidate instructions to 'main_set'.  */
++	  if (nds32_combine_multiple_p (&temp_set, new_base_p))
++	    {
++	      for (j = 0; j < (temp_set.length ()); j++)
++		main_set.safe_push (temp_set[j]);
++	    }
++
++	  /* Clean temp_set sequence.  */
++	  temp_set.block_remove (0, temp_set.length ());
++	  /* Clean candidate_set sequence.  */
++	  candidate_set.block_remove (0, (candidate_set.length () - 1));
++	  /* Update group number for next sequence.  */
++	  group_num ++;
++	  /* Set offset, regno, load_p state from candidate_set.  */
++	  current_offset = candidate_set.last ().offset;
++	  regno = REGNO (candidate_set.last ().reg);
++	  load_insn_p = candidate_set.last ().load_p;
++	  candidate_set.last ().group = group_num;
++	  /* Reset it for search increase and decrease type.  */
++	  inc_p = true;
++	  dec_p = true;
++	}
++    }
++
++  if (dump_file)
++    {
++      if (!main_set.is_empty ())
++	fprintf (dump_file,"Do lmwsmw instructions:\n");
++      for (i = 0; i < main_set.length (); ++i)
++	{
++	  fprintf (dump_file,
++		   "regno = %d base_regno = %d "
++		   "offset = " HOST_WIDE_INT_PRINT_DEC " "
++		   "load_p = %d UID = %u group = %d,"
++		   " order = %d, place = %d\n",
++		   REGNO (main_set[i].reg),
++		   REGNO (main_set[i].base_reg),
++		   main_set[i].offset,
++		   main_set[i].load_p,
++		   INSN_UID (main_set[i].insn),
++		   main_set[i].group,
++		   main_set[i].order,
++		   main_set[i].place);
++	}
++    }
++
++  /* Fetch first group instruction from main_set.  */
++  if (!main_set.is_empty ())
++    {
++      /* Sort main_set by offset.  */
++      main_set.qsort (compare_offset);
++
++      group_id = main_set[0].group;
++      nds32_fetch_group_insn (&main_set, &first_set, group_id);
++      last_offset = first_set.last ().offset;
++    }
++
++  /* Main loop for emit lmw/smw instrucion.  */
++  while (!main_set.is_empty ())
++    {
++      /* Get second group ID.  */
++      group_id = main_set[0].group;
++      for (i = 0; i < main_set.length (); ++i)
++	{
++	  /* Prefer get consecutive offset form
++	     first group to second group  */
++	  if ((last_offset + 4) == main_set[i].offset)
++	    {
++	      group_id = main_set[i].group;
++	      break;
++	    }
++	}
++
++      /* Fetch second instrucion group.  */
++      nds32_fetch_group_insn (&main_set, &second_set, group_id);
++      /* Get lmw/smw insert place.  */
++      place = nds32_lmwsmw_insert_place (&first_set);
++
++      /* Adjust address offset, because lmw/smw instruction
++	 only allow offset is zero.
++	   example:
++	    lwi $r0, [$r3 + 4]
++	    lwi $r1, [$r3 + 8]
++	    lwi $r2, [$r3 + 12]
++
++	    combine into
++
++	    addi $r3, $r3, 4
++	    lwm.bi(m) $r0, [$r3], $r2
++
++	 Need addi instrucion to handle offset.  */
++      if (first_set[0].offset != 0 && !prev_bim_p)
++	{
++	  if (dump_file)
++	    fprintf (dump_file, "Use addi insn handle offset: "
++		     "" HOST_WIDE_INT_PRINT_DEC "\n",
++		     first_set[0].offset);
++	  /* Use available register to process offset,
++	     and don't recovey base register value.  */
++	  if (new_base_p)
++	    {
++	      base_reg = new_base_reg;
++	      add_offset = 0;
++	      CLEAR_HARD_REG_BIT (*available_regset, new_base_regnum);
++	    }
++	  else
++	    add_offset = first_set[0].offset;
++
++	  nds32_emit_add_insn (first_set[0], base_reg, place, true);
++	}
++
++      if (nds32_use_bim_p (&first_set, &second_set))
++	{
++	  if (dump_file)
++	    fprintf (dump_file, "Generate BIM form.\n");
++
++	  nds32_emit_multiple_insn (&first_set, base_reg, place, true);
++
++	  /* Update status, for next instruction sequence.
++	     The add_offset need add 4, because the instruction
++	     is post increase.  */
++	  add_offset = first_set.last ().offset + 4;
++	  prev_bim_p = true;
++	}
++      else
++	{
++	  if (dump_file)
++	    fprintf (dump_file, "Generate BI form.\n");
++
++	  nds32_emit_multiple_insn (&first_set, base_reg, place, false);
++
++	  if (add_offset != 0)
++	    {
++	      if (dump_file)
++		fprintf (dump_file, "Use addi insn handle -offset: "
++			 "" HOST_WIDE_INT_PRINT_DEC "\n",
++			 add_offset);
++
++	      nds32_emit_add_insn (first_set[0], base_reg, place, false);
++	      add_offset = 0;
++	    }
++	  prev_bim_p = false;
++
++	  /* Recovey base register for next instruction sequence.  */
++	  if (REGNO (base_reg) != REGNO (first_set[0].base_reg))
++	    base_reg = first_set[0].base_reg;
++	}
++
++      /* Delete insn, replace by lmw/smw instruction.  */
++      for (i = 0; i < first_set.length (); ++i)
++	delete_insn (first_set[i].insn);
++
++      /* Clean first_set for store next instruction group.  */
++      first_set.block_remove (0, first_set.length ());
++      /* Store next instruction group.  */
++      for (i = 0; i < second_set.length (); ++i)
++	first_set.safe_insert (i, second_set[i]);
++
++      /* Clean second_set.  */
++      second_set.block_remove (0, second_set.length ());
++
++      /* Update last_offset for search next group.  */
++      last_offset = first_set.last ().offset;
++    }
++
++  /* Processing the last instruction group.  */
++  if (!first_set.is_empty ())
++    {
++      /* Get lmw/smw insert place.  */
++      place = nds32_lmwsmw_insert_place (&first_set);
++
++      if (first_set[0].offset != 0 && !prev_bim_p)
++	{
++	  if (dump_file)
++	    fprintf (dump_file, "Use addi insn handle offset: "
++		     "" HOST_WIDE_INT_PRINT_DEC "\n",
++		     first_set[0].offset);
++
++	  if (new_base_p)
++	    {
++	      base_reg = new_base_reg;
++	      add_offset = 0;
++	    }
++	  else
++	    add_offset = first_set[0].offset;
++
++	  nds32_emit_add_insn (first_set[0], base_reg, place, true);
++	}
++
++      if (dump_file)
++	fprintf (dump_file, "Generate BI form.\n");
++
++      nds32_emit_multiple_insn (&first_set, base_reg, place, false);
++
++      if (add_offset != 0)
++	{
++	  if (dump_file)
++	    fprintf (dump_file, "Use addi insn handle -offset: "
++		     "" HOST_WIDE_INT_PRINT_DEC "\n",
++		     -add_offset);
++
++	  nds32_emit_add_insn (first_set[0], base_reg, place, false);
++	}
++
++      /* Delete insn, replace by lmw/smw instruction.  */
++      for (i = 0; i < first_set.length (); ++i)
++	delete_insn (first_set[i].insn);
++    }
++}
++
++/* Combine mutilple load/store insn into a lmw/smw insn.  */
++static void
++nds32_rename_bi_insn (load_store_infos_t *load_store_info,
++		       HARD_REG_SET *available_regset)
++{
++  auto_vec<load_store_info_t, 64> candidate_set, bi_set, replace_set;
++  unsigned int i, j;
++
++  bool load_insn_p;
++  enum nds32_memory_post_type post_type;
++
++  for (i = 0; i < load_store_info->length (); ++i)
++    {
++      /* Recording instruction order of priority and initinal place.  */
++      (*load_store_info)[i].order = i;
++      (*load_store_info)[i].place = false;
++      candidate_set.safe_push ((*load_store_info)[i]);
++    }
++
++  for (i = 0; i < candidate_set.length (); ++i)
++    {
++      load_insn_p = candidate_set[i].load_p;
++      post_type = candidate_set[i].post_type;
++
++      for (j = i + 1; j < candidate_set.length (); ++j)
++	{
++	  if ((post_type == candidate_set[j].post_type)
++	      && (load_insn_p == candidate_set[j].load_p))
++	    {
++	      bi_set.safe_push (candidate_set[i]);
++	      bi_set.safe_push (candidate_set[j]);
++
++	      if (nds32_combine_multiple_p (&bi_set, false)
++		  && nds32_base_reg_safe_p (&bi_set)
++		  && nds32_lmwsmw_insert_place (&bi_set) != NULL_RTX)
++		{
++		  nds32_find_reg (&bi_set, &replace_set, available_regset);
++
++		  if (!replace_set.is_empty ())
++		    {
++		      unsigned k;
++		      unsigned op_pos = replace_set[0].load_p ? 0 : 1;
++
++		      /* Do rename register.  */
++		      for (k = 0; k < replace_set.length (); ++k)
++			nds32_rename_reg (replace_set[k].insn, op_pos,
++					  replace_set[k].new_reg);
++
++		      replace_set.block_remove (0, replace_set.length ());
++		    }
++
++		  candidate_set.ordered_remove (j);
++		  bi_set.block_remove (0, bi_set.length ());
++		  break;
++		}
++
++	      bi_set.block_remove (0, bi_set.length ());
++	    }
++	}
++    }
++}
++
++/* Rename register, can be combined mutilple load/store insn.  */
++static void
++nds32_rename_load_store_reg (load_store_infos_t *load_store_info,
++			     HARD_REG_SET *available_regset)
++{
++  auto_vec<load_store_info_t, 64> rename_set, temp_set, replace_set;
++  HOST_WIDE_INT current_offset;
++  unsigned int i, j;
++  bool load_insn_p;
++  bool inc_p = true, dec_p = true;
++
++  /* Recording instruction order of priority and initinal place.  */
++  for (i = 0; i < load_store_info->length (); ++i)
++    {
++      (*load_store_info)[i].order = i;
++      (*load_store_info)[i].place = false;
++    }
++
++  /* Fetch first instruction information from 'load_store_info',
++     we will use first instruction as base, to search next instruction.  */
++  rename_set.safe_push ((*load_store_info)[0]);
++  /* Set offset, load_p state from rename_set.  */
++  current_offset = rename_set[0].offset;
++  load_insn_p = rename_set[0].load_p;
++
++  /* Search instructions can be combined to a lmw/smw instruction.  */
++  for (i = 1; i < load_store_info->length (); ++i)
++    {
++      /* Collecting offset is increase, for example:
++
++	   lwi pseudo_reg, [$r22 + 4]  <- base instruction
++	   lwi pseudo_reg, [$r22 + 8]  <- collect object
++
++	 The collect object (offset + 4) from base instruction.  */
++      if ((current_offset == (*load_store_info)[i].offset - 4)
++	  && (load_insn_p == (*load_store_info)[i].load_p)
++	  && inc_p)
++	{
++	  /* Save instruction.  */
++	  rename_set.safe_push ((*load_store_info)[i]);
++	  /* Update offset.  */
++	  current_offset += 4;
++	  /* Close decrease type, search increase type.  */
++	  dec_p = false;
++	}
++      /* Collecting offset is decrease, for example:
++
++	   lwi pseudo_reg, [$r22 + 8]  <- base instruction
++	   lwi pseudo_reg, [$r22 + 4]  <- collect object
++
++	 The collect object (offset - 4) from base instruction.  */
++      else if ((current_offset == (*load_store_info)[i].offset + 4)
++	       && (load_insn_p == (*load_store_info)[i].load_p)
++	       && dec_p)
++	{
++	  /* Save instruction.  */
++	  rename_set.safe_push ((*load_store_info)[i]);
++
++	  /* Update offset.  */
++	  current_offset -= 4;
++	  /* Close increase type, search decrease type.  */
++	  inc_p = false;
++	}
++      else
++	{
++	  inc_p = true;
++	  dec_p = true;
++	}
++
++      /* Instructions collect is completed.  */
++      if ((inc_p && dec_p)
++	  || (i + 1) == load_store_info->length ())
++	{
++	  /* Check whether the rename register. */
++	  if (nds32_combine_multiple_p (&rename_set, false)
++	      && nds32_base_reg_safe_p (&rename_set)
++	      && nds32_lmwsmw_insert_place (&rename_set) != NULL_RTX)
++	    {
++	      /* Find can rename instruction, and store in 'replace_set'.  */
++	      nds32_find_reg (&rename_set, &replace_set, available_regset);
++
++	      if (!replace_set.is_empty ())
++		{
++		  unsigned op_pos = replace_set[0].load_p ? 0 : 1;
++
++		  /* Do rename register.  */
++		  for (j = 0; j < replace_set.length (); ++j)
++		    nds32_rename_reg (replace_set[j].insn, op_pos,
++				      replace_set[j].new_reg);
++
++		  replace_set.block_remove (0, replace_set.length ());
++		}
++	    }
++
++	  /* Scan to the last instruction, it is complete.  */
++	  if ((i + 1) == load_store_info->length ())
++	    break;
++
++	  /* Clean rename_set sequence.  */
++	  rename_set.block_remove (0, rename_set.length ());
++	  /* Reinitialize first instruction infomation
++	     to search next instruction.  */
++	  rename_set.safe_push ((*load_store_info)[i]);
++	  /* Set offset, load_p state from rename_set.  */
++	  current_offset = rename_set.last ().offset;
++	  load_insn_p = rename_set.last ().load_p;
++	}
++      else if (!nds32_base_reg_safe_p (&rename_set)
++	       || nds32_lmwsmw_insert_place (&rename_set) == NULL_RTX)
++	{
++	  /* Check collect instruction for each instruction,
++	     we store (n - 1) instructions in group, and
++	     last instruction as the first instruction of the next group.  */
++	  for (j = 0; j < (rename_set.length () - 1); j++)
++	    temp_set.safe_push (rename_set[j]);
++
++	  if (nds32_combine_multiple_p (&temp_set, false))
++	    {
++	      /* Find can rename instruction, and store in 'replace_set'.  */
++	      nds32_find_reg (&temp_set, &replace_set, available_regset);
++
++	      if (!replace_set.is_empty ())
++		{
++		  unsigned op_pos = replace_set[0].load_p ? 0 : 1;
++
++		  /* Do rename register.  */
++		  for (j = 0; j < replace_set.length (); ++j)
++		    nds32_rename_reg (replace_set[j].insn, op_pos,
++				      replace_set[j].new_reg);
++
++		  replace_set.block_remove (0, replace_set.length ());
++		}
++	    }
++
++	  /* Clean temp_set sequence.  */
++	  temp_set.block_remove (0, temp_set.length ());
++	  /* Clean rename_set sequence.  */
++	  rename_set.block_remove (0, (rename_set.length () - 1));
++	  /* Set offset, regno, load_p state from rename_set.  */
++	  current_offset = rename_set.last ().offset;
++	  load_insn_p = rename_set.last ().load_p;
++	  /* Reset it for search increase and decrease type.  */
++	  inc_p = true;
++	  dec_p = true;
++	}
++    }
++}
++
++static void
++nds32_do_lmwsmw_opt (basic_block bb, bool rename_p)
++{
++  rtx_insn *insn;
++  HARD_REG_SET available_regset;
++  load_store_info_t load_store_info;
++  auto_vec<load_store_info_t, 64> load_store_infos[NDS32_GPR_NUM];
++  auto_vec<load_store_info_t, 64> plus_infos[NDS32_GPR_NUM];
++  auto_vec<load_store_info_t, 64> post_infos[NDS32_GPR_NUM];
++  int i;
++  unsigned j;
++  unsigned regno;
++  unsigned polluting;
++  df_ref def;
++  /* Dirty mean a register is define again after
++     first load/store instruction.
++     For example:
++
++     lwi $r2, [$r3 + #0x100]
++     mov $r3, $r4            ! $r3 is dirty after this instruction.
++     lwi $r1, [$r3 + #0x120] ! so this load can't chain with prev load.
++   */
++  bool dirty[NDS32_GPR_NUM];
++
++  if (dump_file)
++    fprintf (dump_file, "scan bb %d\n", bb->index);
++
++  for (i = 0; i < NDS32_GPR_NUM; ++i)
++    dirty[i] = false;
++
++  FOR_BB_INSNS (bb, insn)
++    {
++      if (!INSN_P (insn))
++	continue;
++
++      polluting = INVALID_REGNUM;
++
++      /* Set def reg is dirty if chain is not empty.  */
++      FOR_EACH_INSN_USE (def, insn)
++	{
++	  regno = DF_REF_REGNO (def);
++
++	  if (!NDS32_IS_GPR_REGNUM (regno))
++	    continue;
++
++	  if (!load_store_infos[regno].is_empty ())
++	    {
++	      /* Set pulluting here because the source register
++		 may be the same one.  */
++	      if (dirty[regno] == false)
++		polluting = regno;
++
++	      dirty[regno] = true;
++	    }
++	}
++
++      /* Set all caller-save register is dirty if chain is not empty.  */
++      if (CALL_P (insn))
++	{
++	  for (i = 0; i < NDS32_GPR_NUM; ++i)
++	    {
++	      if (call_used_regs[i] && !load_store_infos[i].is_empty ())
++		dirty[i] = true;
++	    }
++	}
++
++      if (nds32_load_store_reg_plus_offset (insn, &load_store_info))
++	{
++	  regno = REGNO (load_store_info.base_reg);
++	  gcc_assert (NDS32_IS_GPR_REGNUM (regno));
++
++	  /* Don't add to chain if this reg is dirty.  */
++	  if (dirty[regno] && polluting != regno)
++	    break;
++
++	  /* If the register is first time to be used and be polluted
++	     right away, we don't push it.  */
++	  if (regno == REGNO (load_store_info.reg) && load_store_info.load_p
++	      && dirty[regno] == false)
++	    continue;
++
++	  load_store_infos[regno].safe_push (load_store_info);
++	}
++    }
++
++   for (i = 0; i < NDS32_GPR_NUM; ++i)
++    {
++      for (j = 0; j < load_store_infos[i].length (); ++j)
++	{
++	  if (load_store_infos[i][j].post_type == NDS32_NONE)
++	    plus_infos[i].safe_push (load_store_infos[i][j]);
++	  else
++	    post_infos[i].safe_push (load_store_infos[i][j]);
++	}
++    }
++
++  for (i = 0; i < NDS32_GPR_NUM; ++i)
++    {
++      if (load_store_infos[i].length () <= 1)
++	{
++	  if (dump_file && load_store_infos[i].length () == 1)
++	    fprintf (dump_file,
++		     "Skip Chain for $r%d since chain size only 1\n",
++		     i);
++	  continue;
++	}
++
++      if (dump_file)
++	{
++	  fprintf (dump_file,
++		   "Chain for $r%d: (size = %u)\n",
++		   i, load_store_infos[i].length ());
++
++	  for (j = 0; j < load_store_infos[i].length (); ++j)
++	    {
++	      fprintf (dump_file,
++		       "regno = %d base_regno = %d "
++		       "offset = " HOST_WIDE_INT_PRINT_DEC " "
++		       "load_p = %d UID = %u place = %d\n",
++		       REGNO (load_store_infos[i][j].reg),
++		       REGNO (load_store_infos[i][j].base_reg),
++		       load_store_infos[i][j].offset,
++		       load_store_infos[i][j].load_p,
++		       INSN_UID (load_store_infos[i][j].insn),
++		       load_store_infos[i][j].place);
++	    }
++	}
++
++      nds32_get_available_reg_set (bb,
++				   load_store_infos[i][0].insn,
++				   load_store_infos[i].last ().insn,
++				   &available_regset);
++      if (dump_file)
++	print_hard_reg_set (dump_file, "", available_regset);
++
++      /* If rename_p is true, then do rename register of load/store
++	 instruction. Otherwise combination of a multiple load/sotre
++	 a multiple load/store instruction.  */
++      if (rename_p)
++	{
++          if (plus_infos[i].length () > 1)
++	    nds32_rename_load_store_reg (&plus_infos[i], &available_regset);
++          if (post_infos[i].length () > 1)
++	    nds32_rename_bi_insn (&post_infos[i], &available_regset);
++	}
++      else
++	{
++          if (plus_infos[i].length () > 1)
++	    nds32_combine_load_store_insn (&plus_infos[i], &available_regset);
++          if (post_infos[i].length () > 1)
++	    nds32_combine_bi_insn (&post_infos[i]);
++	}
++    }
++}
++
++static void
++nds32_lmwsmw_opt (bool rename_p)
++{
++  basic_block bb;
++
++  FOR_EACH_BB_FN (bb, cfun)
++    nds32_do_lmwsmw_opt (bb, rename_p);
++}
++
++/* Implement rename register for load and store instruction.  */
++static unsigned int
++rest_of_handle_rename_lmwsmw_opt (void)
++{
++  init_alias_analysis ();
++
++  df_set_flags (DF_LR_RUN_DCE);
++  df_note_add_problem ();
++  df_analyze ();
++  df_set_flags (DF_DEFER_INSN_RESCAN);
++
++  regrename_init (true);
++  regrename_analyze (NULL);
++
++  nds32_lmwsmw_opt (true);
++
++  regrename_finish ();
++
++  /* We are finished with alias.  */
++  end_alias_analysis ();
++  return 1;
++}
++
++/* Implement generate lmw and smw instruction.  */
++static unsigned int
++rest_of_handle_gen_lmwsmw_opt (void)
++{
++  init_alias_analysis ();
++
++  df_note_add_problem ();
++  df_analyze ();
++  nds32_lmwsmw_opt (false);
++
++  /* We are finished with alias.  */
++  end_alias_analysis ();
++  return 1;
++}
++
++
++const pass_data pass_data_nds32_rename_lmwsmw_opt =
++{
++  RTL_PASS,				/* type */
++  "rename_lmwsmw_opt",			/* name */
++  OPTGROUP_NONE,			/* optinfo_flags */
++  TV_MACH_DEP,				/* tv_id */
++  0,					/* properties_required */
++  0,					/* properties_provided */
++  0,					/* properties_destroyed */
++  0,					/* todo_flags_start */
++  TODO_df_finish,			/* todo_flags_finish */
++};
++
++class pass_nds32_rename_lmwsmw_opt : public rtl_opt_pass
++{
++public:
++  pass_nds32_rename_lmwsmw_opt (gcc::context *ctxt)
++    : rtl_opt_pass (pass_data_nds32_rename_lmwsmw_opt, ctxt)
++  {}
++
++  /* opt_pass methods: */
++  bool gate (function *) { return flag_nds32_lmwsmw_opt; }
++  unsigned int execute (function *) { return rest_of_handle_rename_lmwsmw_opt (); }
++};
++
++rtl_opt_pass *
++make_pass_nds32_rename_lmwsmw_opt (gcc::context *ctxt)
++{
++  return new pass_nds32_rename_lmwsmw_opt (ctxt);
++}
++
++const pass_data pass_data_nds32_gen_lmwsmw_opt =
++{
++  RTL_PASS,				/* type */
++  "gen_lmwsmw_opt",			/* name */
++  OPTGROUP_NONE,			/* optinfo_flags */
++  TV_MACH_DEP,				/* tv_id */
++  0,					/* properties_required */
++  0,					/* properties_provided */
++  0,					/* properties_destroyed */
++  0,					/* todo_flags_start */
++  TODO_df_finish,			/* todo_flags_finish */
++};
++
++class pass_nds32_gen_lmwsmw_opt : public rtl_opt_pass
++{
++public:
++  pass_nds32_gen_lmwsmw_opt (gcc::context *ctxt)
++    : rtl_opt_pass (pass_data_nds32_gen_lmwsmw_opt, ctxt)
++  {}
++
++  /* opt_pass methods: */
++  bool gate (function *) { return flag_nds32_lmwsmw_opt; }
++  unsigned int execute (function *) { return rest_of_handle_gen_lmwsmw_opt (); }
++};
++
++rtl_opt_pass *
++make_pass_nds32_gen_lmwsmw_opt (gcc::context *ctxt)
++{
++  return new pass_nds32_gen_lmwsmw_opt (ctxt);
++}
+diff --git a/gcc/config/nds32/nds32-load-store-opt.c b/gcc/config/nds32/nds32-load-store-opt.c
+new file mode 100644
+index 0000000..9e5161e
+--- /dev/null
++++ b/gcc/config/nds32/nds32-load-store-opt.c
+@@ -0,0 +1,721 @@
++/* load-store-opt pass of Andes NDS32 cpu for GNU compiler
++   Copyright (C) 2012-2016 Free Software Foundation, Inc.
++   Contributed by Andes Technology Corporation.
++
++   This file is part of GCC.
++
++   GCC is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published
++   by the Free Software Foundation; either version 3, or (at your
++   option) any later version.
++
++   GCC is distributed in the hope that it will be useful, but WITHOUT
++   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++   License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with GCC; see the file COPYING3.  If not see
++   <http://www.gnu.org/licenses/>.  */
++
++
++#include "config.h"
++#include "system.h"
++#include "coretypes.h"
++#include "backend.h"
++#include "tree.h"
++#include "rtl.h"
++#include "df.h"
++#include "alias.h"
++#include "stor-layout.h"
++#include "varasm.h"
++#include "calls.h"
++#include "regs.h"
++#include "insn-config.h"	/* Required by recog.h.  */
++#include "conditions.h"
++#include "output.h"
++#include "insn-attr.h"		/* For DFA state_t.  */
++#include "insn-codes.h"		/* For CODE_FOR_xxx.  */
++#include "reload.h"		/* For push_reload().  */
++#include "flags.h"
++#include "insn-config.h"
++#include "expmed.h"
++#include "dojump.h"
++#include "explow.h"
++#include "emit-rtl.h"
++#include "stmt.h"
++#include "expr.h"
++#include "recog.h"
++#include "diagnostic-core.h"
++#include "cfgrtl.h"
++#include "cfganal.h"
++#include "lcm.h"
++#include "cfgbuild.h"
++#include "cfgcleanup.h"
++#include "tm_p.h"
++#include "tm-constrs.h"
++#include "optabs.h"		/* For GEN_FCN.  */
++#include "target.h"
++#include "langhooks.h"		/* For add_builtin_function().  */
++#include "builtins.h"
++#include "cpplib.h"
++#include "params.h"
++#include "tree-pass.h"
++#include "target-globals.h"
++#include "nds32-load-store-opt.h"
++#include "nds32-reg-utils.h"
++#include <set>
++
++#define NDS32_GPR_NUM 32
++
++static new_base_reg_info_t gen_new_base (rtx,
++					 offset_info_t,
++					 unsigned,
++					 HOST_WIDE_INT,
++					 HOST_WIDE_INT);
++
++static const load_store_optimize_pass *load_store_optimizes[] =
++{
++  /*    allow_regclass, new_base_regclass,
++	offset_lower_bound, offset_upper_bound,
++	load_only_p, name */
++  new load_store_optimize_pass (
++	LOW_REGS, LOW_REGS,
++	0, (32-4),
++	false, "lswi333"),
++  new load_store_optimize_pass (
++	LOW_REGS, FRAME_POINTER_REG,
++	0, (512-4),
++	false, "lswi37"),
++  new load_store_optimize_pass (
++	MIDDLE_REGS, GENERAL_REGS,
++	0, 0,
++	false, "lswi450"),
++  new load_store_optimize_pass (
++	MIDDLE_REGS, R8_REG,
++	-128, -4,
++	true, "lwi45fe")
++};
++
++static const int N_LOAD_STORE_OPT_TYPE = sizeof (load_store_optimizes)
++					 / sizeof (load_store_optimize_pass*);
++
++load_store_optimize_pass
++::load_store_optimize_pass (enum reg_class allow_regclass,
++			    enum reg_class new_base_regclass,
++			    HOST_WIDE_INT offset_lower_bound,
++			    HOST_WIDE_INT offset_upper_bound,
++			    bool load_only_p,
++			    const char *name)
++  : m_allow_regclass (allow_regclass),
++    m_new_base_regclass (new_base_regclass),
++    m_offset_lower_bound (offset_lower_bound),
++    m_offset_upper_bound (offset_upper_bound),
++    m_load_only_p (load_only_p),
++    m_name (name)
++{
++  gcc_assert (offset_lower_bound <= offset_upper_bound);
++}
++
++int
++load_store_optimize_pass::calc_gain (HARD_REG_SET *available_regset,
++				     offset_info_t offset_info,
++				     load_store_infos_t *load_store_info) const
++{
++  int extra_cost = 0;
++  int gain = 0;
++  unsigned i;
++  unsigned chain_size;
++  unsigned new_base_regnum;
++  HOST_WIDE_INT allow_range = m_offset_upper_bound - m_offset_lower_bound;
++  new_base_regnum  = find_available_reg (available_regset, m_new_base_regclass);
++  chain_size = load_store_info->length ();
++
++  if (new_base_regnum == INVALID_REGNUM)
++    {
++      if (dump_file)
++	fprintf (dump_file,
++		 "%s have no avariable register, so give up try %s\n",
++		 reg_class_names[m_new_base_regclass],
++		 m_name);
++      return 0;
++    }
++  else if (dump_file)
++    fprintf (dump_file,
++	     "%s is avariable, get %s, try %s, chain size = %u\n",
++	     reg_class_names[m_new_base_regclass],
++	     reg_names[new_base_regnum],
++	     m_name,
++	     chain_size);
++
++  HOST_WIDE_INT range = offset_info.max_offset - offset_info.min_offset;
++
++  if (range > allow_range)
++    {
++      /* TODO: We can perform load-store opt for only part of load store.  */
++      if (dump_file)
++	fprintf (dump_file,
++		 "range is too large for %s"
++		 " (range = " HOST_WIDE_INT_PRINT_DEC ", "
++		 "allow_range = " HOST_WIDE_INT_PRINT_DEC ")\n",
++		 m_name, range, allow_range);
++      return 0;
++    }
++
++  if (offset_info.min_offset >= m_offset_lower_bound
++      && offset_info.max_offset <= m_offset_upper_bound)
++    {
++      /* mov55.  */
++      extra_cost = 2;
++    }
++  else
++    {
++      if (satisfies_constraint_Is15 (GEN_INT (offset_info.min_offset
++						   - m_offset_lower_bound)))
++	{
++	  /* add.  */
++	  extra_cost = 4;
++	}
++      else
++	{
++	  /* TODO: Try m_offset_upper_bound instead of m_offset_lower_bound
++		   again.  */
++	  /* add45 + movi.  */
++	  if (satisfies_constraint_Is20 (GEN_INT (offset_info.min_offset
++						  - m_offset_lower_bound)))
++	    extra_cost = 6;
++	  else
++	    return -1; /* Give up if this constant is too large.  */
++	}
++    }
++
++  for (i = 0; i < chain_size; ++i)
++    {
++      if (m_load_only_p && !(*load_store_info)[i].load_p)
++	continue;
++
++      if (in_reg_class_p ((*load_store_info)[i].reg, m_allow_regclass))
++	gain += 2;
++    }
++
++  if (dump_file)
++    fprintf (dump_file,
++	     "%s: gain = %d extra_cost = %d\n",
++	     m_name, gain, extra_cost);
++
++  return gain - extra_cost;
++}
++
++
++void
++load_store_optimize_pass::do_optimize (
++  HARD_REG_SET *available_regset,
++  offset_info_t offset_info,
++  load_store_infos_t *load_store_info) const
++{
++  new_base_reg_info_t new_base_reg_info;
++  rtx load_store_insn;
++  unsigned new_base_regnum;
++
++  new_base_regnum  = find_available_reg (available_regset, m_new_base_regclass);
++  gcc_assert (new_base_regnum != INVALID_REGNUM);
++
++  new_base_reg_info =
++    gen_new_base ((*load_store_info)[0].base_reg,
++		  offset_info,
++		  new_base_regnum,
++		  m_offset_lower_bound, m_offset_upper_bound);
++  unsigned i;
++  rtx insn;
++  insn = emit_insn_before (new_base_reg_info.set_insns[0],
++			   (*load_store_info)[0].insn);
++  if (new_base_reg_info.n_set_insns > 1)
++    {
++      gcc_assert (new_base_reg_info.n_set_insns == 2);
++      emit_insn_before (new_base_reg_info.set_insns[1], insn);
++    }
++
++  for (i = 0; i < load_store_info->length (); ++i)
++    {
++      if (m_load_only_p && !(*load_store_info)[i].load_p)
++	continue;
++
++      if (!in_reg_class_p ((*load_store_info)[i].reg, m_allow_regclass))
++	continue;
++
++      HOST_WIDE_INT offset = (*load_store_info)[i].offset;
++
++      if (new_base_reg_info.need_adjust_offset_p)
++	offset = offset + new_base_reg_info.adjust_offset;
++
++      load_store_insn =
++	gen_reg_plus_imm_load_store ((*load_store_info)[i].reg,
++				     new_base_reg_info.reg,
++				     offset,
++				     (*load_store_info)[i].load_p,
++				     (*load_store_info)[i].mem);
++
++      emit_insn_before (load_store_insn, (*load_store_info)[i].insn);
++
++      delete_insn ((*load_store_info)[i].insn);
++    }
++
++  /* Recompute it CFG, to update BB_END() instruction.  */
++  compute_bb_for_insn ();
++}
++
++static new_base_reg_info_t
++gen_new_base (rtx original_base_reg,
++	      offset_info_t offset_info,
++	      unsigned new_base_regno,
++	      HOST_WIDE_INT offset_lower,
++	      HOST_WIDE_INT offset_upper)
++{
++  new_base_reg_info_t new_base_reg_info;
++
++  /* Use gen_raw_REG instead of gen_rtx_REG to prevent break the reg
++     info for global one.
++     For example, gen_rtx_REG will return frame_pointer_rtx immediate
++     instead of create new rtx for gen_raw_REG (Pmode, FP_REGNUM). */
++  new_base_reg_info.reg = gen_raw_REG (Pmode, new_base_regno);
++
++  /* Setup register info.  */
++  ORIGINAL_REGNO (new_base_reg_info.reg) = ORIGINAL_REGNO (original_base_reg);
++  REG_ATTRS (new_base_reg_info.reg) = REG_ATTRS (original_base_reg);
++
++  if (offset_info.max_offset <= offset_upper
++      && offset_info.min_offset >= offset_lower)
++    {
++      new_base_reg_info.set_insns[0] = gen_movsi (new_base_reg_info.reg,
++						  original_base_reg);
++      new_base_reg_info.n_set_insns = 1;
++      new_base_reg_info.need_adjust_offset_p = false;
++      new_base_reg_info.adjust_offset = 0;
++    }
++  else
++    {
++      /* For example:
++	 lwi45.fe allow -4 ~ -128 range:
++	 offset_lower = #-4
++	 offset_upper = #-128
++
++	 lwi $r2, [$r12 + #10]
++	 ->
++	 addi $r8, $r12, #14      ! $r8 = $r12 + #10 - offset_lower
++				  ! = $r12 + #10 - #-4
++				  ! = $r12 + #14
++	 lwi45.fe $r2, [$r8 - #4] ! [$r8 - #4]
++				  ! = [$r12 + #14 - #4]
++				  ! = [$r12 + #10]
++      */
++      new_base_reg_info.adjust_offset =
++	-(offset_info.min_offset - offset_lower);
++
++      rtx offset = GEN_INT (-new_base_reg_info.adjust_offset);
++
++
++      if (satisfies_constraint_Is15 (offset))
++	{
++	  new_base_reg_info.set_insns[0] =
++	    gen_addsi3(new_base_reg_info.reg,
++		       original_base_reg,
++		       offset);
++
++	  new_base_reg_info.n_set_insns = 1;
++	}
++      else
++	{
++	  if (!satisfies_constraint_Is20 (offset))
++	    gcc_unreachable ();
++
++	  new_base_reg_info.set_insns[1] =
++	    gen_rtx_SET (new_base_reg_info.reg,
++			 GEN_INT (-new_base_reg_info.adjust_offset));
++
++	  new_base_reg_info.set_insns[0] =
++	    gen_addsi3 (new_base_reg_info.reg,
++			new_base_reg_info.reg,
++			original_base_reg);
++
++	  new_base_reg_info.n_set_insns = 2;
++	}
++
++      new_base_reg_info.need_adjust_offset_p = true;
++    }
++
++  return new_base_reg_info;
++}
++
++static bool
++nds32_4byte_load_store_reg_plus_offset (
++  rtx_insn *insn,
++  load_store_info_t *load_store_info)
++{
++  if (!INSN_P (insn))
++    return false;
++
++  rtx pattern = PATTERN (insn);
++  rtx mem = NULL_RTX;
++  rtx reg = NULL_RTX;
++  rtx base_reg = NULL_RTX;
++  rtx addr;
++  HOST_WIDE_INT offset = 0;
++  bool load_p = false;
++
++  if (GET_CODE (pattern) != SET)
++    return false;
++
++  if (MEM_P (SET_SRC (pattern)))
++    {
++      mem = SET_SRC (pattern);
++      reg = SET_DEST (pattern);
++      load_p = true;
++    }
++
++  if (MEM_P (SET_DEST (pattern)))
++    {
++      mem = SET_DEST (pattern);
++      reg = SET_SRC (pattern);
++      load_p = false;
++    }
++
++  if (mem == NULL_RTX || reg == NULL_RTX || !REG_P (reg))
++    return false;
++
++  gcc_assert (REG_P (reg));
++
++  addr = XEXP (mem, 0);
++
++  /* We only care about [reg] and [reg+const].  */
++  if (REG_P (addr))
++    {
++      base_reg = addr;
++      offset = 0;
++    }
++  else if (GET_CODE (addr) == PLUS
++	   && CONST_INT_P (XEXP (addr, 1)))
++    {
++      base_reg = XEXP (addr, 0);
++      offset = INTVAL (XEXP (addr, 1));
++      if (!REG_P (base_reg))
++	return false;
++    }
++  else
++    return false;
++
++  /* At least need MIDDLE_REGS.  */
++  if (!in_reg_class_p (reg, MIDDLE_REGS))
++    return false;
++
++  /* lwi450/swi450 */
++  if (offset == 0)
++    return false;
++
++  if (in_reg_class_p (reg, LOW_REGS))
++    {
++      /* lwi37.sp/swi37.sp/lwi37/swi37 */
++      if ((REGNO (base_reg) == SP_REGNUM
++	   || REGNO (base_reg) == FP_REGNUM)
++	  && (offset >= 0 && offset < 512 && (offset % 4 == 0)))
++	return false;
++
++      /* lwi333/swi333 */
++      if (in_reg_class_p (base_reg, LOW_REGS)
++	  && (offset >= 0 && offset < 32 && (offset % 4 == 0)))
++	return false;
++    }
++
++  if (load_store_info)
++    {
++      load_store_info->load_p   = load_p;
++      load_store_info->offset   = offset;
++      load_store_info->reg      = reg;
++      load_store_info->base_reg = base_reg;
++      load_store_info->insn     = insn;
++      load_store_info->mem      = mem;
++    }
++
++  if (GET_MODE (reg) != SImode)
++    return false;
++
++  return true;
++}
++
++static bool
++nds32_4byte_load_store_reg_plus_offset_p (rtx_insn *insn)
++{
++  return nds32_4byte_load_store_reg_plus_offset (insn, NULL);
++}
++
++static bool
++nds32_load_store_opt_profitable_p (basic_block bb)
++{
++  int candidate = 0;
++  int threshold = 2;
++  rtx_insn *insn;
++
++  if (dump_file)
++    fprintf (dump_file, "scan bb %d\n", bb->index);
++
++  FOR_BB_INSNS (bb, insn)
++    {
++      if (nds32_4byte_load_store_reg_plus_offset_p (insn))
++	candidate++;
++    }
++
++  if (dump_file)
++    fprintf (dump_file, " candidate = %d\n", candidate);
++
++  return candidate >= threshold;
++}
++
++static offset_info_t
++nds32_get_offset_info (auto_vec<load_store_info_t, 64> *load_store_info)
++{
++  unsigned i;
++  std::set<HOST_WIDE_INT> offsets;
++  offset_info_t offset_info;
++  offset_info.max_offset = 0;
++  offset_info.min_offset = 0;
++  offset_info.num_offset = 0;
++
++  if (load_store_info->length () == 0)
++    return offset_info;
++
++  offset_info.max_offset = (*load_store_info)[0].offset;
++  offset_info.min_offset = (*load_store_info)[0].offset;
++  offsets.insert ((*load_store_info)[0].offset);
++
++  for (i = 1; i < load_store_info->length (); i++)
++    {
++      HOST_WIDE_INT offset = (*load_store_info)[i].offset;
++      offset_info.max_offset = MAX (offset_info.max_offset, offset);
++      offset_info.min_offset = MIN (offset_info.min_offset, offset);
++      offsets.insert (offset);
++    }
++
++  offset_info.num_offset = offsets.size ();
++
++  return offset_info;
++}
++
++static void
++nds32_do_load_store_opt (basic_block bb)
++{
++  rtx_insn *insn;
++  load_store_info_t load_store_info;
++  auto_vec<load_store_info_t, 64> load_store_infos[NDS32_GPR_NUM];
++  HARD_REG_SET available_regset;
++  int i;
++  unsigned j;
++  unsigned regno;
++  unsigned polluting;
++  df_ref def;
++  /* Dirty mean a register is define again after
++     first load/store instruction.
++     For example:
++
++     lwi $r2, [$r3 + #0x100]
++     mov $r3, $r4            ! $r3 is dirty after this instruction.
++     lwi $r1, [$r3 + #0x120] ! so this load can't chain with prev load.
++   */
++  bool dirty[NDS32_GPR_NUM];
++
++  if (dump_file)
++    fprintf (dump_file, "try load store opt for bb %d\n", bb->index);
++
++  for (i = 0; i < NDS32_GPR_NUM; ++i)
++    dirty[i] = false;
++
++  FOR_BB_INSNS (bb, insn)
++    {
++      if (!INSN_P (insn))
++	continue;
++
++      polluting = INVALID_REGNUM;
++
++      /* Set def reg is dirty if chain is not empty.  */
++      FOR_EACH_INSN_DEF (def, insn)
++	{
++	  regno = DF_REF_REGNO (def);
++
++	  if (!NDS32_IS_GPR_REGNUM (regno))
++	    continue;
++
++	  if (!load_store_infos[regno].is_empty ())
++	    {
++	      /* Set pulluting here because the source register
++		 may be the same one.  */
++	      if (dirty[regno] == false)
++		polluting = regno;
++
++	      dirty[regno] = true;
++	    }
++	}
++
++      /* Set all caller-save register is dirty if chain is not empty.  */
++      if (CALL_P (insn))
++	{
++	  for (i = 0; i < NDS32_GPR_NUM; ++i)
++	    {
++	      if (call_used_regs[i] && !load_store_infos[i].is_empty ())
++		dirty[i] = true;
++	    }
++	}
++
++      if (nds32_4byte_load_store_reg_plus_offset (insn, &load_store_info))
++	{
++	  regno = REGNO (load_store_info.base_reg);
++	  gcc_assert (NDS32_IS_GPR_REGNUM (regno));
++
++	  /* Don't add to chain if this reg is dirty.  */
++	  if (dirty[regno] && polluting != regno)
++	    break;
++
++	  /* If the register is first time to be used and be polluted
++	     right away, we don't push it.  */
++	  if (regno == REGNO (load_store_info.reg) && load_store_info.load_p
++	      && dirty[regno] == false)
++	    continue;
++
++	  load_store_infos[regno].safe_push (load_store_info);
++	}
++    }
++  for (i = 0; i < NDS32_GPR_NUM; ++i)
++    {
++      if (load_store_infos[i].length () <= 1)
++	{
++	  if (dump_file && load_store_infos[i].length () == 1)
++	    fprintf (dump_file,
++		     "Skip Chain for $r%d since chain size only 1\n",
++		     i);
++	  continue;
++	}
++
++      if (dump_file)
++	{
++	  fprintf (dump_file,
++		   "Chain for $r%d: (size = %u)\n",
++		   i, load_store_infos[i].length ());
++
++	  for (j = 0; j < load_store_infos[i].length (); ++j)
++	    {
++	      fprintf (dump_file,
++		       "regno = %d base_regno = %d "
++		       "offset = " HOST_WIDE_INT_PRINT_DEC " "
++		       "load_p = %d UID = %u\n",
++		       REGNO (load_store_infos[i][j].reg),
++		       REGNO (load_store_infos[i][j].base_reg),
++		       load_store_infos[i][j].offset,
++		       load_store_infos[i][j].load_p,
++		       INSN_UID (load_store_infos[i][j].insn));
++	    }
++	}
++
++      nds32_get_available_reg_set (bb,
++				   load_store_infos[i][0].insn,
++				   load_store_infos[i].last ().insn,
++				   &available_regset);
++
++      if (dump_file)
++	{
++	  print_hard_reg_set (dump_file, "", available_regset);
++	}
++
++      offset_info_t offset_info = nds32_get_offset_info (&load_store_infos[i]);
++      if (dump_file)
++	{
++	  fprintf (dump_file,
++		   "max offset = " HOST_WIDE_INT_PRINT_DEC "\n"
++		   "min offset = " HOST_WIDE_INT_PRINT_DEC "\n"
++		   "num offset = %d\n",
++		   offset_info.max_offset,
++		   offset_info.min_offset,
++		   offset_info.num_offset);
++	}
++
++      int gain;
++      int best_gain = 0;
++      const load_store_optimize_pass *best_load_store_optimize_pass = NULL;
++
++      for (j = 0; j < N_LOAD_STORE_OPT_TYPE; ++j)
++	{
++	  gain = load_store_optimizes[j]->calc_gain (&available_regset,
++						     offset_info,
++						     &load_store_infos[i]);
++
++	  if (dump_file)
++	    fprintf (dump_file, "%s gain = %d\n",
++		     load_store_optimizes[j]->name (), gain);
++
++	  if (gain > best_gain)
++	    {
++	      best_gain = gain;
++	      best_load_store_optimize_pass = load_store_optimizes[j];
++	    }
++	}
++
++      if (best_load_store_optimize_pass)
++	{
++	  if (dump_file)
++	    fprintf (dump_file, "%s is most profit, optimize it!\n",
++		     best_load_store_optimize_pass->name ());
++
++	  best_load_store_optimize_pass->do_optimize (&available_regset,
++						      offset_info,
++						      &load_store_infos[i]);
++
++	  df_insn_rescan_all ();
++	}
++
++    }
++}
++
++static unsigned int
++nds32_load_store_opt (void)
++{
++  basic_block bb;
++
++  df_set_flags (DF_LR_RUN_DCE);
++  df_note_add_problem ();
++  df_analyze ();
++
++  FOR_EACH_BB_FN (bb, cfun)
++    {
++      if (nds32_load_store_opt_profitable_p (bb))
++	nds32_do_load_store_opt (bb);
++    }
++
++  return 1;
++}
++
++const pass_data pass_data_nds32_load_store_opt =
++{
++  RTL_PASS,				/* type */
++  "load_store_opt",			/* name */
++  OPTGROUP_NONE,			/* optinfo_flags */
++  TV_MACH_DEP,				/* tv_id */
++  0,					/* properties_required */
++  0,					/* properties_provided */
++  0,					/* properties_destroyed */
++  0,					/* todo_flags_start */
++  TODO_df_finish,			/* todo_flags_finish */
++};
++
++class pass_nds32_load_store_opt : public rtl_opt_pass
++{
++public:
++  pass_nds32_load_store_opt (gcc::context *ctxt)
++    : rtl_opt_pass (pass_data_nds32_load_store_opt, ctxt)
++  {}
++
++  /* opt_pass methods: */
++  bool gate (function *) { return TARGET_16_BIT && TARGET_LOAD_STORE_OPT; }
++  unsigned int execute (function *) { return nds32_load_store_opt (); }
++};
++
++rtl_opt_pass *
++make_pass_nds32_load_store_opt (gcc::context *ctxt)
++{
++  return new pass_nds32_load_store_opt (ctxt);
++}
+diff --git a/gcc/config/nds32/nds32-load-store-opt.h b/gcc/config/nds32/nds32-load-store-opt.h
+new file mode 100644
+index 0000000..f94b56a
+--- /dev/null
++++ b/gcc/config/nds32/nds32-load-store-opt.h
+@@ -0,0 +1,117 @@
++/* Prototypes for load-store-opt of Andes NDS32 cpu for GNU compiler
++   Copyright (C) 2012-2016 Free Software Foundation, Inc.
++   Contributed by Andes Technology Corporation.
++
++   This file is part of GCC.
++
++   GCC is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published
++   by the Free Software Foundation; either version 3, or (at your
++   option) any later version.
++
++   GCC is distributed in the hope that it will be useful, but WITHOUT
++   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++   License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with GCC; see the file COPYING3.  If not see
++   <http://www.gnu.org/licenses/>.  */
++
++#ifndef NDS32_LOAD_STORE_OPT_H
++#define NDS32_LOAD_STORE_OPT_H
++
++/* Define the type of a set of hard registers.  */
++
++enum nds32_memory_post_type
++{
++  NDS32_NONE,
++  NDS32_POST_INC,
++  NDS32_POST_DEC
++};
++
++typedef struct {
++  rtx reg;
++  rtx base_reg;
++  rtx offset;
++  HOST_WIDE_INT shift;
++  bool load_p;
++  rtx insn;
++} rr_load_store_info_t;
++
++typedef struct {
++  rtx reg;
++  rtx base_reg;
++  HOST_WIDE_INT offset;
++  bool load_p;
++  rtx_insn *insn;
++  rtx mem;
++  int new_reg;
++  int order;
++  int group;
++  bool place;
++  enum nds32_memory_post_type post_type;
++} load_store_info_t;
++
++typedef struct {
++  HOST_WIDE_INT max_offset;
++  HOST_WIDE_INT min_offset;
++  /* How many different offset.  */
++  int num_offset;
++} offset_info_t;
++
++typedef struct {
++  rtx set_insns[2];
++  int n_set_insns;
++  rtx reg;
++  bool need_adjust_offset_p;
++  HOST_WIDE_INT adjust_offset;
++} new_base_reg_info_t;
++
++typedef struct {
++  unsigned int amount;
++  unsigned int start;
++  unsigned int end;
++} available_reg_info_t;
++
++typedef auto_vec<load_store_info_t, 64> load_store_infos_t;
++
++class load_store_optimize_pass
++{
++public:
++  load_store_optimize_pass (enum reg_class,
++			    enum reg_class,
++			    HOST_WIDE_INT,
++			    HOST_WIDE_INT,
++			    bool,
++			    const char *);
++  const char *name () const { return m_name; };
++  int calc_gain (HARD_REG_SET *,
++		 offset_info_t,
++		 load_store_infos_t *) const;
++  void do_optimize (HARD_REG_SET *,
++		    offset_info_t,
++		    load_store_infos_t *) const;
++private:
++  enum reg_class m_allow_regclass;
++  enum reg_class m_new_base_regclass;
++  HOST_WIDE_INT m_offset_lower_bound;
++  HOST_WIDE_INT m_offset_upper_bound;
++  bool m_load_only_p;
++  const char *m_name;
++};
++
++static inline rtx
++gen_reg_plus_imm_load_store (rtx reg, rtx base_reg,
++			     HOST_WIDE_INT offset, bool load_p, rtx oldmem)
++{
++  rtx addr = plus_constant(Pmode, base_reg, offset);
++  rtx mem = gen_rtx_MEM (SImode, addr);
++  MEM_COPY_ATTRIBUTES (mem, oldmem);
++  if (load_p)
++    return gen_movsi (reg, mem);
++  else
++    return gen_movsi (mem, reg);
++}
++
++#endif /* ! NDS32_LOAD_STORE_OPT_H */
+diff --git a/gcc/config/nds32/nds32-md-auxiliary.c b/gcc/config/nds32/nds32-md-auxiliary.c
+index def8eda..3881df7 100644
+--- a/gcc/config/nds32/nds32-md-auxiliary.c
++++ b/gcc/config/nds32/nds32-md-auxiliary.c
+@@ -25,17 +25,74 @@
+ #include "system.h"
+ #include "coretypes.h"
+ #include "backend.h"
+-#include "target.h"
+-#include "rtl.h"
+ #include "tree.h"
+-#include "tm_p.h"
+-#include "optabs.h"		/* For GEN_FCN.  */
+-#include "recog.h"
++#include "rtl.h"
++#include "df.h"
++#include "alias.h"
++#include "stor-layout.h"
++#include "varasm.h"
++#include "calls.h"
++#include "regs.h"
++#include "insn-config.h"	/* Required by recog.h.  */
++#include "conditions.h"
+ #include "output.h"
++#include "insn-attr.h"		/* For DFA state_t.  */
++#include "insn-codes.h"		/* For CODE_FOR_xxx.  */
++#include "reload.h"		/* For push_reload().  */
++#include "flags.h"
++#include "insn-config.h"
++#include "expmed.h"
++#include "dojump.h"
++#include "explow.h"
++#include "emit-rtl.h"
++#include "stmt.h"
++#include "expr.h"
++#include "recog.h"
++#include "diagnostic-core.h"
++#include "cfgrtl.h"
++#include "cfganal.h"
++#include "lcm.h"
++#include "cfgbuild.h"
++#include "cfgcleanup.h"
++#include "tm_p.h"
+ #include "tm-constrs.h"
++#include "optabs.h"		/* For GEN_FCN.  */
++#include "target.h"
++#include "langhooks.h"		/* For add_builtin_function().  */
++#include "builtins.h"
++
++/* ------------------------------------------------------------------------ */
++
++/* This file is divided into three parts:
++
++     PART 1: Auxiliary static function definitions.
++
++     PART 2: Auxiliary function for expand RTL pattern.
++
++     PART 3: Auxiliary function for output asm template.  */
+ 
+ /* ------------------------------------------------------------------------ */
+ 
++/* PART 1: Auxiliary static function definitions.  */
++
++static int
++nds32_regno_to_enable4 (unsigned regno)
++{
++  switch (regno)
++    {
++    case 28: /* $r28/fp */
++      return 0x8;
++    case 29: /* $r29/gp */
++      return 0x4;
++    case 30: /* $r30/lp */
++      return 0x2;
++    case 31: /* $r31/sp */
++      return 0x1;
++    default:
++      gcc_unreachable ();
++    }
++}
++
+ /* A helper function to return character based on byte size.  */
+ static char
+ nds32_byte_to_size (int byte)
+@@ -54,796 +111,3825 @@ nds32_byte_to_size (int byte)
+     }
+ }
+ 
+-/* A helper function to return memory format.  */
+-enum nds32_16bit_address_type
+-nds32_mem_format (rtx op)
++static int
++nds32_inverse_cond_code (int code)
+ {
+-  machine_mode mode_test;
+-  int val;
+-  int regno;
+-
+-  if (!TARGET_16_BIT)
+-    return ADDRESS_NOT_16BIT_FORMAT;
+-
+-  mode_test = GET_MODE (op);
+-
+-  op = XEXP (op, 0);
++  switch (code)
++    {
++      case NE:
++	return EQ;
++      case EQ:
++	return NE;
++      case GT:
++	return LE;
++      case LE:
++	return GT;
++      case GE:
++	return LT;
++      case LT:
++	return GE;
++      default:
++	gcc_unreachable ();
++    }
++}
+ 
+-  /* 45 format.  */
+-  if (GET_CODE (op) == REG && (mode_test == SImode))
+-    return ADDRESS_REG;
++static const char *
++nds32_cond_code_str (int code)
++{
++  switch (code)
++    {
++      case NE:
++	return "ne";
++      case EQ:
++	return "eq";
++      case GT:
++	return "gt";
++      case LE:
++	return "le";
++      case GE:
++	return "ge";
++      case LT:
++	return "lt";
++      default:
++	gcc_unreachable ();
++    }
++}
+ 
+-  /* 333 format for QI/HImode.  */
+-  if (GET_CODE (op) == REG && (REGNO (op) < R8_REGNUM))
+-    return ADDRESS_LO_REG_IMM3U;
++static void
++output_cond_branch (int code, const char *suffix, bool r5_p,
++		    bool long_jump_p, rtx *operands)
++{
++  char pattern[256];
++  const char *cond_code;
++  bool align_p = NDS32_ALIGN_P ();
++  const char *align = align_p ? "\t.align\t2\n" : "";
+ 
+-  /* post_inc 333 format.  */
+-  if ((GET_CODE (op) == POST_INC) && (mode_test == SImode))
++  if (r5_p && REGNO (operands[2]) == 5 && TARGET_16_BIT)
+     {
+-      regno = REGNO(XEXP (op, 0));
+-
+-      if (regno < 8)
+-	return ADDRESS_POST_INC_LO_REG_IMM3U;
++      /* This is special case for beqs38 and bnes38,
++	 second operand 2 can't be $r5 and it's almost meanless,
++	 however it may occur after copy propgation.  */
++      if (code == EQ)
++	{
++	  /* $r5 == $r5 always taken! */
++	  if (long_jump_p)
++	    snprintf (pattern, sizeof (pattern),
++		      "j\t%%3");
++	  else
++	    snprintf (pattern, sizeof (pattern),
++		      "j8\t%%3");
++	}
++      else
++	/* Don't output anything since $r5 != $r5 never taken! */
++	pattern[0] = '\0';
+     }
+-
+-  /* post_inc 333 format.  */
+-  if ((GET_CODE (op) == POST_MODIFY)
+-      && (mode_test == SImode)
+-      && (REG_P (XEXP (XEXP (op, 1), 0)))
+-      && (CONST_INT_P (XEXP (XEXP (op, 1), 1))))
++  else if (long_jump_p)
+     {
+-      regno = REGNO (XEXP (XEXP (op, 1), 0));
+-      val = INTVAL (XEXP (XEXP (op, 1), 1));
+-      if (regno < 8 && val < 32)
+-	return ADDRESS_POST_INC_LO_REG_IMM3U;
++      int inverse_code = nds32_inverse_cond_code (code);
++      cond_code = nds32_cond_code_str (inverse_code);
++
++      /*      b<cond><suffix>  $r0, $r1, .L0
++	    =>
++	      b<inverse_cond><suffix>  $r0, $r1, .LCB0
++	      j  .L0
++	    .LCB0:
++
++	    or
++
++	      b<cond><suffix>  $r0, $r1, .L0
++	    =>
++	      b<inverse_cond><suffix>  $r0, $r1, .LCB0
++	      j  .L0
++	    .LCB0:
++      */
++      if (r5_p && TARGET_16_BIT)
++	{
++	  snprintf (pattern, sizeof (pattern),
++		    "b%ss38\t %%2, .LCB%%=\n\tj\t%%3\n%s.LCB%%=:",
++		    cond_code, align);
++	}
++      else
++	{
++	  snprintf (pattern, sizeof (pattern),
++		    "b%s%s\t%%1, %%2, .LCB%%=\n\tj\t%%3\n%s.LCB%%=:",
++		    cond_code, suffix, align);
++	}
+     }
+-
+-  if ((GET_CODE (op) == PLUS)
+-      && (GET_CODE (XEXP (op, 0)) == REG)
+-      && (GET_CODE (XEXP (op, 1)) == CONST_INT))
++  else
+     {
+-      val = INTVAL (XEXP (op, 1));
+-
+-      regno = REGNO(XEXP (op, 0));
+-
+-      if (regno > 7
+-	  && regno != SP_REGNUM
+-	  && regno != FP_REGNUM)
+-	return ADDRESS_NOT_16BIT_FORMAT;
+-
+-      switch (mode_test)
++      cond_code = nds32_cond_code_str (code);
++      if (r5_p && TARGET_16_BIT)
+ 	{
+-	case QImode:
+-	  /* 333 format.  */
+-	  if (val >= 0 && val < 8 && regno < 8)
+-	    return ADDRESS_LO_REG_IMM3U;
+-	  break;
+-
+-	case HImode:
+-	  /* 333 format.  */
+-	  if (val >= 0 && val < 16 && (val % 2 == 0) && regno < 8)
+-	    return ADDRESS_LO_REG_IMM3U;
+-	  break;
+-
+-	case SImode:
+-	case SFmode:
+-	case DFmode:
+-	  /* fp imply 37 format.  */
+-	  if ((regno == FP_REGNUM) &&
+-	      (val >= 0 && val < 512 && (val % 4 == 0)))
+-	    return ADDRESS_FP_IMM7U;
+-	  /* sp imply 37 format.  */
+-	  else if ((regno == SP_REGNUM) &&
+-		   (val >= 0 && val < 512 && (val % 4 == 0)))
+-	    return ADDRESS_SP_IMM7U;
+-	  /* 333 format.  */
+-	  else if (val >= 0 && val < 32 && (val % 4 == 0) && regno < 8)
+-	    return ADDRESS_LO_REG_IMM3U;
+-	  break;
+-
+-	default:
+-	  break;
++	  /* b<cond>s38  $r1, .L0   */
++	  snprintf (pattern, sizeof (pattern),
++		    "b%ss38\t %%2, %%3", cond_code);
++	}
++      else
++	{
++	  /* b<cond><suffix>  $r0, $r1, .L0   */
++	  snprintf (pattern, sizeof (pattern),
++		    "b%s%s\t%%1, %%2, %%3", cond_code, suffix);
+ 	}
+     }
+ 
+-  return ADDRESS_NOT_16BIT_FORMAT;
++  output_asm_insn (pattern, operands);
+ }
+ 
+-/* Output 16-bit store.  */
+-const char *
+-nds32_output_16bit_store (rtx *operands, int byte)
++static void
++output_cond_branch_compare_zero (int code, const char *suffix,
++				 bool long_jump_p, rtx *operands,
++				 bool ta_implied_p)
+ {
+-  char pattern[100];
+-  char size;
+-  rtx code = XEXP (operands[0], 0);
+-
+-  size = nds32_byte_to_size (byte);
++  char pattern[256];
++  const char *cond_code;
++  bool align_p = NDS32_ALIGN_P ();
++  const char *align = align_p ? "\t.align\t2\n" : "";
++  if (long_jump_p)
++    {
++      int inverse_code = nds32_inverse_cond_code (code);
++      cond_code = nds32_cond_code_str (inverse_code);
+ 
+-  switch (nds32_mem_format (operands[0]))
++      if (ta_implied_p && TARGET_16_BIT)
++	{
++	  /*    b<cond>z<suffix>  .L0
++	      =>
++		b<inverse_cond>z<suffix>  .LCB0
++		j  .L0
++	      .LCB0:
++	   */
++	  snprintf (pattern, sizeof (pattern),
++		    "b%sz%s\t.LCB%%=\n\tj\t%%2\n%s.LCB%%=:",
++		    cond_code, suffix, align);
++	}
++      else
++	{
++	  /*      b<cond>z<suffix>  $r0, .L0
++		=>
++		  b<inverse_cond>z<suffix>  $r0, .LCB0
++		  j  .L0
++		.LCB0:
++	   */
++	  snprintf (pattern, sizeof (pattern),
++		    "b%sz%s\t%%1, .LCB%%=\n\tj\t%%2\n%s.LCB%%=:",
++		    cond_code, suffix, align);
++	}
++    }
++  else
+     {
+-    case ADDRESS_REG:
+-      operands[0] = code;
+-      output_asm_insn ("swi450\t%1, [%0]", operands);
+-      break;
+-    case ADDRESS_LO_REG_IMM3U:
+-      snprintf (pattern, sizeof (pattern), "s%ci333\t%%1, %%0", size);
+-      output_asm_insn (pattern, operands);
+-      break;
+-    case ADDRESS_POST_INC_LO_REG_IMM3U:
+-      snprintf (pattern, sizeof (pattern), "s%ci333.bi\t%%1, %%0", size);
+-      output_asm_insn (pattern, operands);
+-      break;
+-    case ADDRESS_FP_IMM7U:
+-      output_asm_insn ("swi37\t%1, %0", operands);
+-      break;
+-    case ADDRESS_SP_IMM7U:
+-      /* Get immediate value and set back to operands[1].  */
+-      operands[0] = XEXP (code, 1);
+-      output_asm_insn ("swi37.sp\t%1, [ + (%0)]", operands);
+-      break;
+-    default:
+-      break;
++      cond_code = nds32_cond_code_str (code);
++      if (ta_implied_p && TARGET_16_BIT)
++	{
++	  /* b<cond>z<suffix>  .L0  */
++	  snprintf (pattern, sizeof (pattern),
++		    "b%sz%s\t%%2", cond_code, suffix);
++	}
++      else
++	{
++	  /* b<cond>z<suffix>  $r0, .L0  */
++	  snprintf (pattern, sizeof (pattern),
++		    "b%sz%s\t%%1, %%2", cond_code, suffix);
++	}
+     }
+ 
+-  return "";
++  output_asm_insn (pattern, operands);
+ }
+ 
+-/* Output 16-bit load.  */
+-const char *
+-nds32_output_16bit_load (rtx *operands, int byte)
++static void
++nds32_split_shiftrtdi3 (rtx dst, rtx src, rtx shiftamount, bool logic_shift_p)
+ {
+-  char pattern[100];
+-  unsigned char size;
+-  rtx code = XEXP (operands[1], 0);
++  rtx src_high_part;
++  rtx dst_high_part, dst_low_part;
+ 
+-  size = nds32_byte_to_size (byte);
++  dst_high_part = nds32_di_high_part_subreg (dst);
++  src_high_part = nds32_di_high_part_subreg (src);
++  dst_low_part = nds32_di_low_part_subreg (dst);
+ 
+-  switch (nds32_mem_format (operands[1]))
++  if (CONST_INT_P (shiftamount))
+     {
+-    case ADDRESS_REG:
+-      operands[1] = code;
+-      output_asm_insn ("lwi450\t%0, [%1]", operands);
+-      break;
+-    case ADDRESS_LO_REG_IMM3U:
+-      snprintf (pattern, sizeof (pattern), "l%ci333\t%%0, %%1", size);
+-      output_asm_insn (pattern, operands);
+-      break;
+-    case ADDRESS_POST_INC_LO_REG_IMM3U:
+-      snprintf (pattern, sizeof (pattern), "l%ci333.bi\t%%0, %%1", size);
+-      output_asm_insn (pattern, operands);
+-      break;
+-    case ADDRESS_FP_IMM7U:
+-      output_asm_insn ("lwi37\t%0, %1", operands);
+-      break;
+-    case ADDRESS_SP_IMM7U:
+-      /* Get immediate value and set back to operands[0].  */
+-      operands[1] = XEXP (code, 1);
+-      output_asm_insn ("lwi37.sp\t%0, [ + (%1)]", operands);
+-      break;
+-    default:
+-      break;
++      if (INTVAL (shiftamount) < 32)
++	{
++	  if (logic_shift_p)
++	    {
++	      emit_insn (gen_uwext (dst_low_part, src,
++						  shiftamount));
++	      emit_insn (gen_lshrsi3 (dst_high_part, src_high_part,
++						     shiftamount));
++	    }
++	  else
++	    {
++	      emit_insn (gen_wext (dst_low_part, src,
++						 shiftamount));
++	      emit_insn (gen_ashrsi3 (dst_high_part, src_high_part,
++						     shiftamount));
++	    }
++	}
++      else
++	{
++	  rtx new_shift_amout = gen_int_mode(INTVAL (shiftamount) - 32, SImode);
++
++	  if (logic_shift_p)
++	    {
++	      emit_insn (gen_lshrsi3 (dst_low_part, src_high_part,
++						    new_shift_amout));
++	      emit_move_insn (dst_high_part, const0_rtx);
++	    }
++	  else
++	    {
++	      emit_insn (gen_ashrsi3 (dst_low_part, src_high_part,
++						    new_shift_amout));
++	      emit_insn (gen_ashrsi3 (dst_high_part, src_high_part,
++						     GEN_INT (31)));
++	    }
++	}
+     }
++  else
++    {
++      rtx dst_low_part_l32, dst_high_part_l32;
++      rtx dst_low_part_g32, dst_high_part_g32;
++      rtx new_shift_amout, select_reg;
++      dst_low_part_l32 = gen_reg_rtx (SImode);
++      dst_high_part_l32 = gen_reg_rtx (SImode);
++      dst_low_part_g32 = gen_reg_rtx (SImode);
++      dst_high_part_g32 = gen_reg_rtx (SImode);
++      new_shift_amout = gen_reg_rtx (SImode);
++      select_reg = gen_reg_rtx (SImode);
++
++      emit_insn (gen_andsi3 (shiftamount, shiftamount, GEN_INT (0x3f)));
++
++      if (logic_shift_p)
++	{
++	  /*
++	     if (shiftamount < 32)
++	       dst_low_part = wext (src, shiftamount)
++	       dst_high_part = src_high_part >> shiftamount
++	     else
++	       dst_low_part = src_high_part >> (shiftamount & 0x1f)
++	       dst_high_part = 0
++	  */
++	  emit_insn (gen_uwext (dst_low_part_l32, src, shiftamount));
++	  emit_insn (gen_lshrsi3 (dst_high_part_l32, src_high_part,
++						     shiftamount));
++
++	  emit_insn (gen_andsi3 (new_shift_amout, shiftamount, GEN_INT (0x1f)));
++	  emit_insn (gen_lshrsi3 (dst_low_part_g32, src_high_part,
++						    new_shift_amout));
++	  emit_move_insn (dst_high_part_g32, const0_rtx);
++	}
++      else
++	{
++	  /*
++	     if (shiftamount < 32)
++	       dst_low_part = wext (src, shiftamount)
++	       dst_high_part = src_high_part >> shiftamount
++	     else
++	       dst_low_part = src_high_part >> (shiftamount & 0x1f)
++	       # shift 31 for sign extend
++	       dst_high_part = src_high_part >> 31
++	  */
++	  emit_insn (gen_wext (dst_low_part_l32, src, shiftamount));
++	  emit_insn (gen_ashrsi3 (dst_high_part_l32, src_high_part,
++						     shiftamount));
++
++	  emit_insn (gen_andsi3 (new_shift_amout, shiftamount, GEN_INT (0x1f)));
++	  emit_insn (gen_ashrsi3 (dst_low_part_g32, src_high_part,
++						    new_shift_amout));
++	  emit_insn (gen_ashrsi3 (dst_high_part_g32, src_high_part,
++						     GEN_INT (31)));
++	}
+ 
+-  return "";
++      emit_insn (gen_slt_compare (select_reg, shiftamount, GEN_INT (32)));
++
++      emit_insn (gen_cmovnsi (dst_low_part, select_reg,
++			      dst_low_part_l32, dst_low_part_g32));
++      emit_insn (gen_cmovnsi (dst_high_part, select_reg,
++			      dst_high_part_l32, dst_high_part_g32));
++  }
+ }
+ 
+-/* Output 32-bit store.  */
+-const char *
+-nds32_output_32bit_store (rtx *operands, int byte)
+-{
+-  char pattern[100];
+-  unsigned char size;
+-  rtx code = XEXP (operands[0], 0);
++/* ------------------------------------------------------------------------ */
+ 
+-  size = nds32_byte_to_size (byte);
++/* PART 2: Auxiliary function for expand RTL pattern.  */
+ 
+-  switch (GET_CODE (code))
++enum nds32_expand_result_type
++nds32_expand_cbranch (rtx *operands)
++{
++  rtx tmp_reg;
++  enum rtx_code code;
++
++  code = GET_CODE (operands[0]);
++
++  /* If operands[2] is (const_int 0),
++     we can use beqz,bnez,bgtz,bgez,bltz,or blez instructions.
++     So we have gcc generate original template rtx.  */
++  if (GET_CODE (operands[2]) == CONST_INT)
++    if (INTVAL (operands[2]) == 0)
++      if ((code != GTU)
++	  && (code != GEU)
++	  && (code != LTU)
++	  && (code != LEU))
++	return EXPAND_CREATE_TEMPLATE;
++
++  /* For other comparison, NDS32 ISA only has slt (Set-on-Less-Than)
++     behavior for the comparison, we might need to generate other
++     rtx patterns to achieve same semantic.  */
++  switch (code)
+     {
+-    case REG:
+-      /* (mem (reg X))
+-	 => access location by using register,
+-	 use "sbi / shi / swi" */
+-      snprintf (pattern, sizeof (pattern), "s%ci\t%%1, %%0", size);
+-      break;
+-
+-    case SYMBOL_REF:
+-    case CONST:
+-      /* (mem (symbol_ref X))
+-	 (mem (const (...)))
+-	 => access global variables,
+-	 use "sbi.gp / shi.gp / swi.gp" */
+-      operands[0] = XEXP (operands[0], 0);
+-      snprintf (pattern, sizeof (pattern), "s%ci.gp\t%%1, [ + %%0]", size);
+-      break;
++    case GT:
++    case GTU:
++      if (GET_CODE (operands[2]) == CONST_INT)
++	{
++	  /* GT  reg_A, const_int  =>  !(LT  reg_A, const_int + 1) */
++	  if (optimize_size || optimize == 0)
++	    tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
++	  else
++	    tmp_reg = gen_reg_rtx (SImode);
++
++	  /* We want to plus 1 into the integer value
++	     of operands[2] to create 'slt' instruction.
++	     This caculation is performed on the host machine,
++	     which may be 64-bit integer.
++	     So the meaning of caculation result may be
++	     different from the 32-bit nds32 target.
++
++	     For example:
++	       0x7fffffff + 0x1 -> 0x80000000,
++	       this value is POSITIVE on 64-bit machine,
++	       but the expected value on 32-bit nds32 target
++	       should be NEGATIVE value.
++
++	     Hence, instead of using GEN_INT(), we use gen_int_mode() to
++	     explicitly create SImode constant rtx.  */
++	  enum rtx_code cmp_code;
++
++	  rtx plus1 = gen_int_mode (INTVAL (operands[2]) + 1, SImode);
++	  if (satisfies_constraint_Is15 (plus1))
++	    {
++	      operands[2] = plus1;
++	      cmp_code = EQ;
++	      if (code == GT)
++		{
++		  /* GT, use slts instruction */
++		  emit_insn (
++		    gen_slts_compare (tmp_reg, operands[1], operands[2]));
++		}
++	      else
++		{
++		  /* GTU, use slt instruction */
++		  emit_insn (
++		    gen_slt_compare  (tmp_reg, operands[1], operands[2]));
++		}
++	    }
++	  else
++	    {
++	      cmp_code = NE;
++	      if (code == GT)
++		{
++		  /* GT, use slts instruction */
++		  emit_insn (
++		    gen_slts_compare (tmp_reg, operands[2], operands[1]));
++		}
++	      else
++		{
++		  /* GTU, use slt instruction */
++		  emit_insn (
++		    gen_slt_compare  (tmp_reg, operands[2], operands[1]));
++		}
++	    }
++
++	  PUT_CODE (operands[0], cmp_code);
++	  operands[1] = tmp_reg;
++	  operands[2] = const0_rtx;
++	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
++				     operands[2], operands[3]));
++
++	  return EXPAND_DONE;
++	}
++      else
++	{
++	  /* GT  reg_A, reg_B  =>  LT  reg_B, reg_A */
++	  if (optimize_size || optimize == 0)
++	    tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
++	  else
++	    tmp_reg = gen_reg_rtx (SImode);
++
++	  if (code == GT)
++	    {
++	      /* GT, use slts instruction */
++	      emit_insn (gen_slts_compare (tmp_reg, operands[2], operands[1]));
++	    }
++	  else
++	    {
++	      /* GTU, use slt instruction */
++	      emit_insn (gen_slt_compare  (tmp_reg, operands[2], operands[1]));
++	    }
++
++	  PUT_CODE (operands[0], NE);
++	  operands[1] = tmp_reg;
++	  operands[2] = const0_rtx;
++	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
++				     operands[2], operands[3]));
++
++	  return EXPAND_DONE;
++	}
+ 
+-    case POST_INC:
+-      /* (mem (post_inc reg))
+-	 => access location by using register which will be post increment,
+-	 use "sbi.bi / shi.bi / swi.bi" */
+-      snprintf (pattern, sizeof (pattern),
+-		"s%ci.bi\t%%1, %%0, %d", size, byte);
+-      break;
++    case GE:
++    case GEU:
++      /* GE  reg_A, reg_B      =>  !(LT  reg_A, reg_B) */
++      /* GE  reg_A, const_int  =>  !(LT  reg_A, const_int) */
++      if (optimize_size || optimize == 0)
++	tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
++      else
++	tmp_reg = gen_reg_rtx (SImode);
+ 
+-    case POST_DEC:
+-      /* (mem (post_dec reg))
+-	 => access location by using register which will be post decrement,
+-	 use "sbi.bi / shi.bi / swi.bi" */
+-      snprintf (pattern, sizeof (pattern),
+-		"s%ci.bi\t%%1, %%0, -%d", size, byte);
+-      break;
++      if (code == GE)
++	{
++	  /* GE, use slts instruction */
++	  emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
++	}
++      else
++	{
++	  /* GEU, use slt instruction */
++	  emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
++	}
+ 
+-    case POST_MODIFY:
+-      switch (GET_CODE (XEXP (XEXP (code, 1), 1)))
++      PUT_CODE (operands[0], EQ);
++      operands[1] = tmp_reg;
++      operands[2] = const0_rtx;
++      emit_insn (gen_cbranchsi4 (operands[0], operands[1],
++				 operands[2], operands[3]));
++
++      return EXPAND_DONE;
++
++    case LT:
++    case LTU:
++      /* LT  reg_A, reg_B      =>  LT  reg_A, reg_B */
++      /* LT  reg_A, const_int  =>  LT  reg_A, const_int */
++      if (optimize_size || optimize == 0)
++	tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
++      else
++	tmp_reg = gen_reg_rtx (SImode);
++
++      if (code == LT)
+ 	{
+-	case REG:
+-	case SUBREG:
+-	  /* (mem (post_modify (reg) (plus (reg) (reg))))
+-	     => access location by using register which will be
+-	     post modified with reg,
+-	     use "sb.bi/ sh.bi / sw.bi" */
+-	  snprintf (pattern, sizeof (pattern), "s%c.bi\t%%1, %%0", size);
+-	  break;
+-	case CONST_INT:
+-	  /* (mem (post_modify (reg) (plus (reg) (const_int))))
+-	     => access location by using register which will be
+-	     post modified with const_int,
+-	     use "sbi.bi/ shi.bi / swi.bi" */
+-	  snprintf (pattern, sizeof (pattern), "s%ci.bi\t%%1, %%0", size);
+-	  break;
+-	default:
+-	  abort ();
++	  /* LT, use slts instruction */
++	  emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
++	}
++      else
++	{
++	  /* LTU, use slt instruction */
++	  emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+ 	}
+-      break;
+ 
+-    case PLUS:
+-      switch (GET_CODE (XEXP (code, 1)))
++      PUT_CODE (operands[0], NE);
++      operands[1] = tmp_reg;
++      operands[2] = const0_rtx;
++      emit_insn (gen_cbranchsi4 (operands[0], operands[1],
++				 operands[2], operands[3]));
++
++      return EXPAND_DONE;
++
++    case LE:
++    case LEU:
++      if (GET_CODE (operands[2]) == CONST_INT)
+ 	{
+-	case REG:
+-	case SUBREG:
+-	  /* (mem (plus reg reg)) or (mem (plus (mult reg const_int) reg))
+-	     => access location by adding two registers,
+-	     use "sb / sh / sw" */
+-	  snprintf (pattern, sizeof (pattern), "s%c\t%%1, %%0", size);
+-	  break;
+-	case CONST_INT:
+-	  /* (mem (plus reg const_int))
+-	     => access location by adding one register with const_int,
+-	     use "sbi / shi / swi" */
+-	  snprintf (pattern, sizeof (pattern), "s%ci\t%%1, %%0", size);
+-	  break;
+-	default:
+-	  abort ();
++	  /* LE  reg_A, const_int  =>  LT  reg_A, const_int + 1 */
++	  if (optimize_size || optimize == 0)
++	    tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
++	  else
++	    tmp_reg = gen_reg_rtx (SImode);
++
++	  enum rtx_code cmp_code;
++	  /* Note that (le:SI X INT_MAX) is not the same as (lt:SI X INT_MIN).
++	     We better have an assert here in case GCC does not properly
++	     optimize it away.  The INT_MAX here is 0x7fffffff for target.  */
++	  rtx plus1 = gen_int_mode (INTVAL (operands[2]) + 1, SImode);
++	  if (satisfies_constraint_Is15 (plus1))
++	    {
++	      operands[2] = plus1;
++	      cmp_code = NE;
++	      if (code == LE)
++		{
++		  /* LE, use slts instruction */
++		  emit_insn (
++		    gen_slts_compare (tmp_reg, operands[1], operands[2]));
++		}
++	      else
++		{
++		  /* LEU, use slt instruction */
++		  emit_insn (
++		    gen_slt_compare  (tmp_reg, operands[1], operands[2]));
++		}
++	    }
++	  else
++	    {
++	      cmp_code = EQ;
++	      if (code == LE)
++		{
++		  /* LE, use slts instruction */
++		  emit_insn (
++		    gen_slts_compare (tmp_reg, operands[2], operands[1]));
++		}
++	      else
++		{
++		  /* LEU, use slt instruction */
++		  emit_insn (
++		    gen_slt_compare  (tmp_reg, operands[2], operands[1]));
++		}
++	    }
++
++	  PUT_CODE (operands[0], cmp_code);
++	  operands[1] = tmp_reg;
++	  operands[2] = const0_rtx;
++	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
++				     operands[2], operands[3]));
++
++	  return EXPAND_DONE;
++	}
++      else
++	{
++	  /* LE  reg_A, reg_B  =>  !(LT  reg_B, reg_A) */
++	  if (optimize_size || optimize == 0)
++	    tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
++	  else
++	    tmp_reg = gen_reg_rtx (SImode);
++
++	  if (code == LE)
++	    {
++	      /* LE, use slts instruction */
++	      emit_insn (gen_slts_compare (tmp_reg, operands[2], operands[1]));
++	    }
++	  else
++	    {
++	      /* LEU, use slt instruction */
++	      emit_insn (gen_slt_compare  (tmp_reg, operands[2], operands[1]));
++	    }
++
++	  PUT_CODE (operands[0], EQ);
++	  operands[1] = tmp_reg;
++	  operands[2] = const0_rtx;
++	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
++				     operands[2], operands[3]));
++
++	  return EXPAND_DONE;
+ 	}
+-      break;
+ 
+-    case LO_SUM:
+-      operands[2] = XEXP (code, 1);
+-      operands[0] = XEXP (code, 0);
+-      snprintf (pattern, sizeof (pattern),
+-		"s%ci\t%%1, [%%0 + lo12(%%2)]", size);
+-      break;
++    case EQ:
++    case NE:
++      /* NDS32 ISA has various form for eq/ne behavior no matter
++	 what kind of the operand is.
++	 So just generate original template rtx.  */
++
++      /* Put operands[2] into register if operands[2] is a large
++	 const_int or ISAv2.  */
++      if (GET_CODE (operands[2]) == CONST_INT
++	  && (!satisfies_constraint_Is11 (operands[2])
++	      || TARGET_ISA_V2))
++	operands[2] = force_reg (SImode, operands[2]);
++
++      return EXPAND_CREATE_TEMPLATE;
+ 
+     default:
+-      abort ();
++      return EXPAND_FAIL;
+     }
+-
+-  output_asm_insn (pattern, operands);
+-  return "";
+ }
+ 
+-/* Output 32-bit load.  */
+-const char *
+-nds32_output_32bit_load (rtx *operands, int byte)
++enum nds32_expand_result_type
++nds32_expand_cstore (rtx *operands)
+ {
+-  char pattern[100];
+-  unsigned char size;
+-  rtx code;
+-
+-  code = XEXP (operands[1], 0);
++  rtx tmp_reg;
++  enum rtx_code code;
+ 
+-  size = nds32_byte_to_size (byte);
++  code = GET_CODE (operands[1]);
+ 
+-  switch (GET_CODE (code))
++  switch (code)
+     {
+-    case REG:
+-      /* (mem (reg X))
+-	 => access location by using register,
+-	 use "lbi / lhi / lwi" */
+-      snprintf (pattern, sizeof (pattern), "l%ci\t%%0, %%1", size);
+-      break;
+-
+-    case SYMBOL_REF:
+-    case CONST:
+-      /* (mem (symbol_ref X))
+-	 (mem (const (...)))
+-	 => access global variables,
+-	 use "lbi.gp / lhi.gp / lwi.gp" */
+-      operands[1] = XEXP (operands[1], 0);
+-      snprintf (pattern, sizeof (pattern), "l%ci.gp\t%%0, [ + %%1]", size);
+-      break;
++    case EQ:
++    case NE:
++      if (GET_CODE (operands[3]) == CONST_INT)
++	{
++	  /* reg_R = (reg_A == const_int_B)
++	     --> xori reg_C, reg_A, const_int_B
++		 slti reg_R, reg_C, const_int_1
++	     reg_R = (reg_A != const_int_B)
++	     --> xori reg_C, reg_A, const_int_B
++		 slti reg_R, const_int0, reg_C */
++	  tmp_reg = gen_reg_rtx (SImode);
++
++	  /* If the integer value is not in the range of imm15s,
++	     we need to force register first because our addsi3 pattern
++	     only accept nds32_rimm15s_operand predicate.  */
++	  rtx new_imm = gen_int_mode (-INTVAL (operands[3]), SImode);
++	  if (satisfies_constraint_Is15 (new_imm))
++	    emit_insn (gen_addsi3 (tmp_reg, operands[2], new_imm));
++	  else
++	    {
++	      if (!(satisfies_constraint_Iu15 (operands[3])
++		    || (TARGET_EXT_PERF
++			&& satisfies_constraint_It15 (operands[3]))))
++		operands[3] = force_reg (SImode, operands[3]);
++	      emit_insn (gen_xorsi3 (tmp_reg, operands[2], operands[3]));
++	    }
++
++	  if (code == EQ)
++	    emit_insn (gen_slt_eq0 (operands[0], tmp_reg));
++	  else
++	    emit_insn (gen_slt_compare (operands[0], const0_rtx, tmp_reg));
++
++	  return EXPAND_DONE;
++	}
++      else
++	{
++	  /* reg_R = (reg_A == reg_B)
++	     --> xor  reg_C, reg_A, reg_B
++		 slti reg_R, reg_C, const_int_1
++	     reg_R = (reg_A != reg_B)
++	     --> xor  reg_C, reg_A, reg_B
++		 slti reg_R, const_int0, reg_C */
++	  tmp_reg = gen_reg_rtx (SImode);
++	  emit_insn (gen_xorsi3 (tmp_reg, operands[2], operands[3]));
++	  if (code == EQ)
++	    emit_insn (gen_slt_eq0 (operands[0], tmp_reg));
++	  else
++	    emit_insn (gen_slt_compare (operands[0], const0_rtx, tmp_reg));
++
++	  return EXPAND_DONE;
++	}
++    case GT:
++    case GTU:
++      /* reg_R = (reg_A > reg_B)       --> slt reg_R, reg_B, reg_A */
++      /* reg_R = (reg_A > const_int_B) --> slt reg_R, const_int_B, reg_A */
++      if (code == GT)
++	{
++	  /* GT, use slts instruction */
++	  emit_insn (gen_slts_compare (operands[0], operands[3], operands[2]));
++	}
++      else
++	{
++	  /* GTU, use slt instruction */
++	  emit_insn (gen_slt_compare  (operands[0], operands[3], operands[2]));
++	}
+ 
+-    case POST_INC:
+-      /* (mem (post_inc reg))
+-	 => access location by using register which will be post increment,
+-	 use "lbi.bi / lhi.bi / lwi.bi" */
+-      snprintf (pattern, sizeof (pattern),
+-		"l%ci.bi\t%%0, %%1, %d", size, byte);
+-      break;
++      return EXPAND_DONE;
+ 
+-    case POST_DEC:
+-      /* (mem (post_dec reg))
+-	 => access location by using register which will be post decrement,
+-	 use "lbi.bi / lhi.bi / lwi.bi" */
+-      snprintf (pattern, sizeof (pattern),
+-		"l%ci.bi\t%%0, %%1, -%d", size, byte);
+-      break;
++    case GE:
++    case GEU:
++      if (GET_CODE (operands[3]) == CONST_INT)
++	{
++	  /* reg_R = (reg_A >= const_int_B)
++	     --> movi reg_C, const_int_B - 1
++		 slt  reg_R, reg_C, reg_A */
++	  tmp_reg = gen_reg_rtx (SImode);
++
++	  emit_insn (gen_movsi (tmp_reg,
++				gen_int_mode (INTVAL (operands[3]) - 1,
++					      SImode)));
++	  if (code == GE)
++	    {
++	      /* GE, use slts instruction */
++	      emit_insn (gen_slts_compare (operands[0], tmp_reg, operands[2]));
++	    }
++	  else
++	    {
++	      /* GEU, use slt instruction */
++	      emit_insn (gen_slt_compare  (operands[0], tmp_reg, operands[2]));
++	    }
++
++	  return EXPAND_DONE;
++	}
++      else
++	{
++	  /* reg_R = (reg_A >= reg_B)
++	     --> slt  reg_R, reg_A, reg_B
++		 xori reg_R, reg_R, const_int_1 */
++	  if (code == GE)
++	    {
++	      /* GE, use slts instruction */
++	      emit_insn (gen_slts_compare (operands[0],
++					   operands[2], operands[3]));
++	    }
++	  else
++	    {
++	      /* GEU, use slt instruction */
++	      emit_insn (gen_slt_compare  (operands[0],
++					   operands[2], operands[3]));
++	    }
++
++	  /* perform 'not' behavior */
++	  emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx));
++
++	  return EXPAND_DONE;
++	}
+ 
+-    case POST_MODIFY:
+-      switch (GET_CODE (XEXP (XEXP (code, 1), 1)))
++    case LT:
++    case LTU:
++      /* reg_R = (reg_A < reg_B)       --> slt reg_R, reg_A, reg_B */
++      /* reg_R = (reg_A < const_int_B) --> slt reg_R, reg_A, const_int_B */
++      if (code == LT)
+ 	{
+-	case REG:
+-	case SUBREG:
+-	  /* (mem (post_modify (reg) (plus (reg) (reg))))
+-	     => access location by using register which will be
+-	     post modified with reg,
+-	     use "lb.bi/ lh.bi / lw.bi" */
+-	  snprintf (pattern, sizeof (pattern), "l%c.bi\t%%0, %%1", size);
+-	  break;
+-	case CONST_INT:
+-	  /* (mem (post_modify (reg) (plus (reg) (const_int))))
+-	     => access location by using register which will be
+-	     post modified with const_int,
+-	     use "lbi.bi/ lhi.bi / lwi.bi" */
+-	  snprintf (pattern, sizeof (pattern), "l%ci.bi\t%%0, %%1", size);
+-	  break;
+-	default:
+-	  abort ();
++	  /* LT, use slts instruction */
++	  emit_insn (gen_slts_compare (operands[0], operands[2], operands[3]));
++	}
++      else
++	{
++	  /* LTU, use slt instruction */
++	  emit_insn (gen_slt_compare  (operands[0], operands[2], operands[3]));
+ 	}
+-      break;
+ 
+-    case PLUS:
+-      switch (GET_CODE (XEXP (code, 1)))
++      return EXPAND_DONE;
++
++    case LE:
++    case LEU:
++      if (GET_CODE (operands[3]) == CONST_INT)
+ 	{
+-	case REG:
+-	case SUBREG:
+-	  /* (mem (plus reg reg)) or (mem (plus (mult reg const_int) reg))
+-	     use "lb / lh / lw" */
+-	  snprintf (pattern, sizeof (pattern), "l%c\t%%0, %%1", size);
+-	  break;
+-	case CONST_INT:
+-	  /* (mem (plus reg const_int))
+-	     => access location by adding one register with const_int,
+-	     use "lbi / lhi / lwi" */
+-	  snprintf (pattern, sizeof (pattern), "l%ci\t%%0, %%1", size);
+-	  break;
+-	default:
+-	  abort ();
++	  /* reg_R = (reg_A <= const_int_B)
++	     --> movi reg_C, const_int_B + 1
++		 slt  reg_R, reg_A, reg_C */
++	  tmp_reg = gen_reg_rtx (SImode);
++
++	  emit_insn (gen_movsi (tmp_reg,
++				gen_int_mode (INTVAL (operands[3]) + 1,
++						      SImode)));
++	  if (code == LE)
++	    {
++	      /* LE, use slts instruction */
++	      emit_insn (gen_slts_compare (operands[0], operands[2], tmp_reg));
++	    }
++	  else
++	    {
++	      /* LEU, use slt instruction */
++	      emit_insn (gen_slt_compare  (operands[0], operands[2], tmp_reg));
++	    }
++
++	  return EXPAND_DONE;
++	}
++      else
++	{
++	  /* reg_R = (reg_A <= reg_B) --> slt  reg_R, reg_B, reg_A
++					  xori reg_R, reg_R, const_int_1 */
++	  if (code == LE)
++	    {
++	      /* LE, use slts instruction */
++	      emit_insn (gen_slts_compare (operands[0],
++					   operands[3], operands[2]));
++	    }
++	  else
++	    {
++	      /* LEU, use slt instruction */
++	      emit_insn (gen_slt_compare  (operands[0],
++					   operands[3], operands[2]));
++	    }
++
++	  /* perform 'not' behavior */
++	  emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx));
++
++	  return EXPAND_DONE;
+ 	}
+-      break;
+ 
+-    case LO_SUM:
+-      operands[2] = XEXP (code, 1);
+-      operands[1] = XEXP (code, 0);
+-      snprintf (pattern, sizeof (pattern),
+-		"l%ci\t%%0, [%%1 + lo12(%%2)]", size);
+-      break;
+ 
+     default:
+-      abort ();
++      gcc_unreachable ();
+     }
+-
+-  output_asm_insn (pattern, operands);
+-  return "";
+ }
+ 
+-/* Output 32-bit load with signed extension.  */
+-const char *
+-nds32_output_32bit_load_s (rtx *operands, int byte)
++void
++nds32_expand_float_cbranch (rtx *operands)
+ {
+-  char pattern[100];
+-  unsigned char size;
+-  rtx code;
++  enum rtx_code code = GET_CODE (operands[0]);
++  enum rtx_code new_code = code;
++  rtx cmp_op0 = operands[1];
++  rtx cmp_op1 = operands[2];
++  rtx tmp_reg;
++  rtx tmp;
+ 
+-  code = XEXP (operands[1], 0);
++  int reverse = 0;
+ 
+-  size = nds32_byte_to_size (byte);
++  /* Main Goal: Use compare instruction + branch instruction.
+ 
+-  switch (GET_CODE (code))
++     For example:
++     GT, GE: swap condition and swap operands and generate
++     compare instruction(LT, LE) + branch not equal instruction.
++
++     UNORDERED, LT, LE, EQ: no need to change and generate
++     compare instruction(UNORDERED, LT, LE, EQ) + branch not equal instruction.
++
++     ORDERED, NE: reverse condition and generate
++     compare instruction(EQ) + branch equal instruction. */
++
++  switch (code)
+     {
+-    case REG:
+-      /* (mem (reg X))
+-         => access location by using register,
+-         use "lbsi / lhsi" */
+-      snprintf (pattern, sizeof (pattern), "l%csi\t%%0, %%1", size);
++    case GT:
++    case GE:
++      tmp = cmp_op0;
++      cmp_op0 = cmp_op1;
++      cmp_op1 = tmp;
++      new_code = swap_condition (new_code);
+       break;
+-
+-    case SYMBOL_REF:
+-    case CONST:
+-      /* (mem (symbol_ref X))
+-         (mem (const (...)))
+-         => access global variables,
+-         use "lbsi.gp / lhsi.gp" */
+-      operands[1] = XEXP (operands[1], 0);
+-      snprintf (pattern, sizeof (pattern), "l%csi.gp\t%%0, [ + %%1]", size);
++    case UNORDERED:
++    case LT:
++    case LE:
++    case EQ:
+       break;
+-
+-    case POST_INC:
+-      /* (mem (post_inc reg))
+-         => access location by using register which will be post increment,
+-         use "lbsi.bi / lhsi.bi" */
+-      snprintf (pattern, sizeof (pattern),
+-		"l%csi.bi\t%%0, %%1, %d", size, byte);
++    case ORDERED:
++    case NE:
++      new_code = reverse_condition (new_code);
++      reverse = 1;
++      break;
++    case UNGT:
++    case UNGE:
++      new_code = reverse_condition_maybe_unordered (new_code);
++      reverse = 1;
+       break;
++    case UNLT:
++    case UNLE:
++      new_code = reverse_condition_maybe_unordered (new_code);
++      tmp = cmp_op0;
++      cmp_op0 = cmp_op1;
++      cmp_op1 = tmp;
++      new_code = swap_condition (new_code);
++      reverse = 1;
++      break;
++    default:
++      return;
++    }
+ 
+-    case POST_DEC:
+-      /* (mem (post_dec reg))
+-         => access location by using register which will be post decrement,
+-         use "lbsi.bi / lhsi.bi" */
+-      snprintf (pattern, sizeof (pattern),
+-		"l%csi.bi\t%%0, %%1, -%d", size, byte);
++  tmp_reg = gen_reg_rtx (SImode);
++  emit_insn (gen_rtx_SET (tmp_reg,
++			  gen_rtx_fmt_ee (new_code, SImode,
++					  cmp_op0, cmp_op1)));
++
++  PUT_CODE (operands[0], reverse ? EQ : NE);
++  emit_insn (gen_cbranchsi4 (operands[0], tmp_reg,
++			     const0_rtx, operands[3]));
++}
++
++void
++nds32_expand_float_cstore (rtx *operands)
++{
++  enum rtx_code code = GET_CODE (operands[1]);
++  enum rtx_code new_code = code;
++  enum machine_mode mode = GET_MODE (operands[2]);
++
++  rtx cmp_op0 = operands[2];
++  rtx cmp_op1 = operands[3];
++  rtx tmp;
++
++  /* Main Goal: Use compare instruction to store value.
++
++     For example:
++     GT, GE: swap condition and swap operands.
++       reg_R = (reg_A >  reg_B) --> fcmplt reg_R, reg_B, reg_A
++       reg_R = (reg_A >= reg_B) --> fcmple reg_R, reg_B, reg_A
++
++     LT, LE, EQ: no need to change, it is already LT, LE, EQ.
++       reg_R = (reg_A <  reg_B) --> fcmplt reg_R, reg_A, reg_B
++       reg_R = (reg_A <= reg_B) --> fcmple reg_R, reg_A, reg_B
++       reg_R = (reg_A == reg_B) --> fcmpeq reg_R, reg_A, reg_B
++
++     ORDERED: reverse condition and using xor insturction to achieve 'ORDERED'.
++       reg_R = (reg_A != reg_B) --> fcmpun reg_R, reg_A, reg_B
++				       xor reg_R, reg_R, const1_rtx
++
++     NE: reverse condition and using xor insturction to achieve 'NE'.
++       reg_R = (reg_A != reg_B) --> fcmpeq reg_R, reg_A, reg_B
++				       xor reg_R, reg_R, const1_rtx */
++  switch (code)
++    {
++    case GT:
++    case GE:
++      tmp = cmp_op0;
++      cmp_op0 = cmp_op1;
++      cmp_op1 =tmp;
++      new_code = swap_condition (new_code);
+       break;
++    case UNORDERED:
++    case LT:
++    case LE:
++    case EQ:
++      break;
++    case ORDERED:
++      if (mode == SFmode)
++	emit_insn (gen_cmpsf_un (operands[0], cmp_op0, cmp_op1));
++      else
++	emit_insn (gen_cmpdf_un (operands[0], cmp_op0, cmp_op1));
+ 
+-    case POST_MODIFY:
+-      switch (GET_CODE (XEXP (XEXP (code, 1), 1)))
++      emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx));
++      return;
++    case NE:
++      if (mode == SFmode)
++	emit_insn (gen_cmpsf_eq (operands[0], cmp_op0, cmp_op1));
++      else
++	emit_insn (gen_cmpdf_eq (operands[0], cmp_op0, cmp_op1));
++
++      emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx));
++      return;
++    default:
++      return;
++    }
++
++  emit_insn (gen_rtx_SET (operands[0],
++			  gen_rtx_fmt_ee (new_code, SImode,
++					  cmp_op0, cmp_op1)));
++}
++
++enum nds32_expand_result_type
++nds32_expand_movcc (rtx *operands)
++{
++  enum rtx_code code = GET_CODE (operands[1]);
++  enum rtx_code new_code = code;
++  enum machine_mode cmp0_mode = GET_MODE (XEXP (operands[1], 0));
++  rtx cmp_op0 = XEXP (operands[1], 0);
++  rtx cmp_op1 = XEXP (operands[1], 1);
++  rtx tmp;
++
++  if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE)
++      && XEXP (operands[1], 1) == const0_rtx)
++    {
++      /* If the operands[1] rtx is already (eq X 0) or (ne X 0),
++	 we have gcc generate original template rtx.  */
++      return EXPAND_CREATE_TEMPLATE;
++    }
++  else if ((TARGET_FPU_SINGLE && cmp0_mode == SFmode)
++	   || (TARGET_FPU_DOUBLE && cmp0_mode == DFmode))
++    {
++      nds32_expand_float_movcc (operands);
++    }
++  else
++    {
++      /* Since there is only 'slt'(Set when Less Than) instruction for
++	 comparison in Andes ISA, the major strategy we use here is to
++	 convert conditional move into 'LT + EQ' or 'LT + NE' rtx combination.
++	 We design constraints properly so that the reload phase will assist
++	 to make one source operand to use same register as result operand.
++	 Then we can use cmovz/cmovn to catch the other source operand
++	 which has different register.  */
++      int reverse = 0;
++
++      /* Main Goal: Use 'LT + EQ' or 'LT + NE' to target "then" part
++	 Strategy : Reverse condition and swap comparison operands
++
++	 For example:
++
++	     a <= b ? P : Q   (LE or LEU)
++	 --> a >  b ? Q : P   (reverse condition)
++	 --> b <  a ? Q : P   (swap comparison operands to achieve 'LT/LTU')
++
++	     a >= b ? P : Q   (GE or GEU)
++	 --> a <  b ? Q : P   (reverse condition to achieve 'LT/LTU')
++
++	     a <  b ? P : Q   (LT or LTU)
++	 --> (NO NEED TO CHANGE, it is already 'LT/LTU')
++
++	     a >  b ? P : Q   (GT or GTU)
++	 --> b <  a ? P : Q   (swap comparison operands to achieve 'LT/LTU') */
++      switch (code)
+ 	{
+-	case REG:
+-	case SUBREG:
+-	  /* (mem (post_modify (reg) (plus (reg) (reg))))
+-	     => access location by using register which will be
+-	     post modified with reg,
+-	     use "lbs.bi/ lhs.bi" */
+-	  snprintf (pattern, sizeof (pattern), "l%cs.bi\t%%0, %%1", size);
++	case GE: case GEU: case LE: case LEU:
++	  new_code = reverse_condition (code);
++	  reverse = 1;
+ 	  break;
+-	case CONST_INT:
+-	  /* (mem (post_modify (reg) (plus (reg) (const_int))))
+-	     => access location by using register which will be
+-	     post modified with const_int,
+-	     use "lbsi.bi/ lhsi.bi" */
+-	  snprintf (pattern, sizeof (pattern), "l%csi.bi\t%%0, %%1", size);
++	case EQ:
++	case NE:
++	  /* no need to reverse condition */
+ 	  break;
+ 	default:
+-	  abort ();
++	  return EXPAND_FAIL;
+ 	}
+-      break;
+ 
+-    case PLUS:
+-      switch (GET_CODE (XEXP (code, 1)))
++      /* For '>' comparison operator, we swap operands
++	 so that we can have 'LT/LTU' operator.  */
++      if (new_code == GT || new_code == GTU)
+ 	{
+-	case REG:
+-	case SUBREG:
+-	  /* (mem (plus reg reg)) or (mem (plus (mult reg const_int) reg))
+-	     use "lbs / lhs" */
+-	  snprintf (pattern, sizeof (pattern), "l%cs\t%%0, %%1", size);
++	  tmp     = cmp_op0;
++	  cmp_op0 = cmp_op1;
++	  cmp_op1 = tmp;
++
++	  new_code = swap_condition (new_code);
++	}
++
++      /* Use a temporary register to store slt/slts result.  */
++      tmp = gen_reg_rtx (SImode);
++
++      if (new_code == EQ || new_code == NE)
++	{
++	  emit_insn (gen_xorsi3 (tmp, cmp_op0, cmp_op1));
++	  /* tmp == 0 if cmp_op0 == cmp_op1.  */
++	  operands[1] = gen_rtx_fmt_ee (new_code, VOIDmode, tmp, const0_rtx);
++	}
++      else
++	{
++	  /* This emit_insn will create corresponding 'slt/slts'
++	      insturction.  */
++	  if (new_code == LT)
++	    emit_insn (gen_slts_compare (tmp, cmp_op0, cmp_op1));
++	  else if (new_code == LTU)
++	    emit_insn (gen_slt_compare (tmp, cmp_op0, cmp_op1));
++	  else
++	    gcc_unreachable ();
++
++	  /* Change comparison semantic into (eq X 0) or (ne X 0) behavior
++	     so that cmovz or cmovn will be matched later.
++
++	     For reverse condition cases, we want to create a semantic that:
++	       (eq X 0) --> pick up "else" part
++	     For normal cases, we want to create a semantic that:
++	       (ne X 0) --> pick up "then" part
++
++	     Later we will have cmovz/cmovn instruction pattern to
++	     match corresponding behavior and output instruction.  */
++	  operands[1] = gen_rtx_fmt_ee (reverse ? EQ : NE,
++					VOIDmode, tmp, const0_rtx);
++	}
++    }
++  return EXPAND_CREATE_TEMPLATE;
++}
++
++void
++nds32_expand_float_movcc (rtx *operands)
++{
++  if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE)
++      && GET_MODE (XEXP (operands[1], 0)) == SImode
++      && XEXP (operands[1], 1) == const0_rtx)
++    {
++      /* If the operands[1] rtx is already (eq X 0) or (ne X 0),
++	 we have gcc generate original template rtx.  */
++      return;
++    }
++  else
++    {
++      enum rtx_code code = GET_CODE (operands[1]);
++      enum rtx_code new_code = code;
++      enum machine_mode cmp0_mode = GET_MODE (XEXP (operands[1], 0));
++      enum machine_mode cmp1_mode = GET_MODE (XEXP (operands[1], 1));
++      rtx cmp_op0 = XEXP (operands[1], 0);
++      rtx cmp_op1 = XEXP (operands[1], 1);
++      rtx tmp;
++
++      /* Compare instruction Operations: (cmp_op0 condition cmp_op1) ? 1 : 0,
++	 when result is 1, and 'reverse' be set 1 for fcmovzs instructuin. */
++      int reverse = 0;
++
++      /* Main Goal: Use cmpare instruction + conditional move instruction.
++	 Strategy : swap condition and swap comparison operands.
++
++	 For example:
++	     a > b ? P : Q   (GT)
++	 --> a < b ? Q : P   (swap condition)
++	 --> b < a ? Q : P   (swap comparison operands to achieve 'GT')
++
++	     a >= b ? P : Q  (GE)
++	 --> a <= b ? Q : P  (swap condition)
++	 --> b <= a ? Q : P  (swap comparison operands to achieve 'GE')
++
++	     a <  b ? P : Q  (LT)
++	 --> (NO NEED TO CHANGE, it is already 'LT')
++
++	     a >= b ? P : Q  (LE)
++	 --> (NO NEED TO CHANGE, it is already 'LE')
++
++	     a == b ? P : Q  (EQ)
++	 --> (NO NEED TO CHANGE, it is already 'EQ') */
++
++      switch (code)
++	{
++	case GT:
++	case GE:
++	  tmp = cmp_op0;
++	  cmp_op0 = cmp_op1;
++	  cmp_op1 =tmp;
++	  new_code = swap_condition (new_code);
+ 	  break;
+-	case CONST_INT:
+-	  /* (mem (plus reg const_int))
+-	     => access location by adding one register with const_int,
+-	     use "lbsi / lhsi" */
+-	  snprintf (pattern, sizeof (pattern), "l%csi\t%%0, %%1", size);
++	case UNORDERED:
++	case LT:
++	case LE:
++	case EQ:
++	  break;
++	case ORDERED:
++	case NE:
++	  reverse = 1;
++	  new_code = reverse_condition (new_code);
++	  break;
++	case UNGT:
++	case UNGE:
++	  new_code = reverse_condition_maybe_unordered (new_code);
++	  reverse = 1;
++	  break;
++	case UNLT:
++	case UNLE:
++	  new_code = reverse_condition_maybe_unordered (new_code);
++	  tmp = cmp_op0;
++	  cmp_op0 = cmp_op1;
++	  cmp_op1 = tmp;
++	  new_code = swap_condition (new_code);
++	  reverse = 1;
+ 	  break;
+ 	default:
+-	  abort ();
++	  return;
+ 	}
+-      break;
+ 
+-    case LO_SUM:
+-      operands[2] = XEXP (code, 1);
+-      operands[1] = XEXP (code, 0);
+-      snprintf (pattern, sizeof (pattern),
+-		"l%csi\t%%0, [%%1 + lo12(%%2)]", size);
+-      break;
++      /* Use a temporary register to store fcmpxxs result.  */
++      tmp = gen_reg_rtx (SImode);
++
++      /* Create float compare instruction for SFmode and DFmode,
++	 other MODE using cstoresi create compare instruction. */
++      if ((cmp0_mode == DFmode || cmp0_mode == SFmode)
++	  && (cmp1_mode == DFmode || cmp1_mode == SFmode))
++	{
++	  /* This emit_insn create corresponding float compare instruction */
++	  emit_insn (gen_rtx_SET (tmp,
++				  gen_rtx_fmt_ee (new_code, SImode,
++						  cmp_op0, cmp_op1)));
++	}
++      else
++	{
++	  /* This emit_insn using cstoresi create corresponding
++	     compare instruction */
++	  PUT_CODE (operands[1], new_code);
++	  emit_insn (gen_cstoresi4 (tmp, operands[1],
++				    cmp_op0, cmp_op1));
++	}
++      /* operands[1] crete corresponding condition move instruction
++	 for fcmovzs and fcmovns.  */
++      operands[1] = gen_rtx_fmt_ee (reverse ? EQ : NE,
++				    VOIDmode, tmp, const0_rtx);
++    }
++}
++
++void
++nds32_emit_push_fpr_callee_saved (int base_offset)
++{
++  rtx fpu_insn;
++  rtx reg, mem;
++  unsigned int regno = cfun->machine->callee_saved_first_fpr_regno;
++  unsigned int last_fpr = cfun->machine->callee_saved_last_fpr_regno;
++
++  while (regno <= last_fpr)
++    {
++      /* Handling two registers, using fsdi instruction.  */
++      reg = gen_rtx_REG (DFmode, regno);
++      mem = gen_frame_mem (DFmode, plus_constant (Pmode,
++						  stack_pointer_rtx,
++						  base_offset));
++      base_offset += 8;
++      regno += 2;
++      fpu_insn = emit_move_insn (mem, reg);
++      RTX_FRAME_RELATED_P (fpu_insn) = 1;
++    }
++}
++
++void
++nds32_emit_pop_fpr_callee_saved (int gpr_padding_size)
++{
++  rtx fpu_insn;
++  rtx reg, mem, addr;
++  rtx dwarf, adjust_sp_rtx;
++  unsigned int regno = cfun->machine->callee_saved_first_fpr_regno;
++  unsigned int last_fpr = cfun->machine->callee_saved_last_fpr_regno;
++  int padding = 0;
++
++  while (regno <= last_fpr)
++    {
++      /* Handling two registers, using fldi.bi instruction.  */
++      if ((regno + 1) >= last_fpr)
++	padding = gpr_padding_size;
++
++      reg = gen_rtx_REG (DFmode, (regno));
++      addr = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx,
++				  gen_rtx_PLUS (Pmode, stack_pointer_rtx,
++						GEN_INT (8 + padding)));
++      mem = gen_frame_mem (DFmode, addr);
++      regno += 2;
++      fpu_insn = emit_move_insn (reg, mem);
++
++      adjust_sp_rtx =
++	gen_rtx_SET (stack_pointer_rtx,
++		     plus_constant (Pmode, stack_pointer_rtx,
++				    8 + padding));
++
++      dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, NULL_RTX);
++      /* Tell gcc we adjust SP in this insn.  */
++      dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, copy_rtx (adjust_sp_rtx),
++			      dwarf);
++      RTX_FRAME_RELATED_P (fpu_insn) = 1;
++      REG_NOTES (fpu_insn) = dwarf;
++    }
++}
++
++void
++nds32_emit_v3pop_fpr_callee_saved (int base)
++{
++  int fpu_base_addr = base;
++  int regno;
++  rtx fpu_insn;
++  rtx reg, mem;
++  rtx dwarf;
++
++  regno = cfun->machine->callee_saved_first_fpr_regno;
++  while (regno <= cfun->machine->callee_saved_last_fpr_regno)
++    {
++      /* Handling two registers, using fldi instruction.  */
++      reg = gen_rtx_REG (DFmode, regno);
++      mem = gen_frame_mem (DFmode, plus_constant (Pmode,
++						  stack_pointer_rtx,
++						  fpu_base_addr));
++      fpu_base_addr += 8;
++      regno += 2;
++      fpu_insn = emit_move_insn (reg, mem);
++      dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, NULL_RTX);
++      RTX_FRAME_RELATED_P (fpu_insn) = 1;
++      REG_NOTES (fpu_insn) = dwarf;
++    }
++}
++
++enum nds32_expand_result_type
++nds32_expand_extv (rtx *operands)
++{
++  gcc_assert (CONST_INT_P (operands[2]) && CONST_INT_P (operands[3]));
++  HOST_WIDE_INT width = INTVAL (operands[2]);
++  HOST_WIDE_INT bitpos = INTVAL (operands[3]);
++  rtx dst = operands[0];
++  rtx src = operands[1];
++
++  if (MEM_P (src)
++      && width == 32
++      && (bitpos % BITS_PER_UNIT)  == 0
++      && GET_MODE_BITSIZE (GET_MODE (dst)) == width)
++    {
++      rtx newmem = adjust_address (src, GET_MODE (dst),
++				   bitpos / BITS_PER_UNIT);
++
++      rtx base_addr = force_reg (Pmode, XEXP (newmem, 0));
++
++      emit_insn (gen_unaligned_loadsi (dst, base_addr));
++
++      return EXPAND_DONE;
++    }
++  return EXPAND_FAIL;
++}
++
++enum nds32_expand_result_type
++nds32_expand_insv (rtx *operands)
++{
++  gcc_assert (CONST_INT_P (operands[1]) && CONST_INT_P (operands[2]));
++  HOST_WIDE_INT width = INTVAL (operands[1]);
++  HOST_WIDE_INT bitpos = INTVAL (operands[2]);
++  rtx dst = operands[0];
++  rtx src = operands[3];
++
++  if (MEM_P (dst)
++      && width == 32
++      && (bitpos % BITS_PER_UNIT)  == 0
++      && GET_MODE_BITSIZE (GET_MODE (src)) == width)
++    {
++      rtx newmem = adjust_address (dst, GET_MODE (src),
++				      bitpos / BITS_PER_UNIT);
++
++      rtx base_addr = force_reg (Pmode, XEXP (newmem, 0));
++
++      emit_insn (gen_unaligned_storesi (base_addr, src));
++
++      return EXPAND_DONE;
++    }
++  return EXPAND_FAIL;
++}
++
++/* ------------------------------------------------------------------------ */
++
++/* PART 3: Auxiliary function for output asm template. */
++
++/* Function to generate PC relative jump table.
++   Refer to nds32.md for more details.
++
++   The following is the sample for the case that diff value
++   can be presented in '.short' size.
++
++     addi    $r1, $r1, -(case_lower_bound)
++     slti    $ta, $r1, (case_number)
++     beqz    $ta, .L_skip_label
++
++     la      $ta, .L35             ! get jump table address
++     lh      $r1, [$ta + $r1 << 1] ! load symbol diff from jump table entry
++     addi    $ta, $r1, $ta
++     jr5     $ta
++
++     ! jump table entry
++   L35:
++     .short  .L25-.L35
++     .short  .L26-.L35
++     .short  .L27-.L35
++     .short  .L28-.L35
++     .short  .L29-.L35
++     .short  .L30-.L35
++     .short  .L31-.L35
++     .short  .L32-.L35
++     .short  .L33-.L35
++     .short  .L34-.L35 */
++const char *
++nds32_output_casesi_pc_relative (rtx *operands)
++{
++  enum machine_mode mode;
++  rtx diff_vec;
++
++  diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[1])));
++
++  gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
++
++  /* Step C: "t <-- operands[1]".  */
++  if (flag_pic)
++    {
++      output_asm_insn ("sethi\t$ta, hi20(%l1@GOTOFF)", operands);
++      output_asm_insn ("ori\t$ta, $ta, lo12(%l1@GOTOFF)", operands);
++      output_asm_insn ("add\t$ta, $ta, $gp", operands);
++    }
++  else
++    output_asm_insn ("la\t$ta, %l1", operands);
++
++  /* Get the mode of each element in the difference vector.  */
++  mode = GET_MODE (diff_vec);
+ 
++  /* Step D: "z <-- (mem (plus (operands[0] << m) t))",
++     where m is 0, 1, or 2 to load address-diff value from table.  */
++  switch (mode)
++    {
++    case QImode:
++      output_asm_insn ("lb\t%2, [$ta + %0 << 0]", operands);
++      break;
++    case HImode:
++      output_asm_insn ("lh\t%2, [$ta + %0 << 1]", operands);
++      break;
++    case SImode:
++      output_asm_insn ("lw\t%2, [$ta + %0 << 2]", operands);
++      break;
+     default:
+-      abort ();
++      gcc_unreachable ();
+     }
+ 
+-  output_asm_insn (pattern, operands);
+-  return "";
++  /* Step E: "t <-- z + t".
++     Add table label_ref with address-diff value to
++     obtain target case address.  */
++  output_asm_insn ("add\t$ta, %2, $ta", operands);
++
++  /* Step F: jump to target with register t.  */
++  if (TARGET_16_BIT)
++    return "jr5\t$ta";
++  else
++    return "jr\t$ta";
+ }
+ 
+-/* Function to output stack push operation.
+-   We need to deal with normal stack push multiple or stack v3push.  */
++/* Function to generate normal jump table.  */
+ const char *
+-nds32_output_stack_push (rtx par_rtx)
++nds32_output_casesi (rtx *operands)
+ {
+-  /* A string pattern for output_asm_insn().  */
+-  char pattern[100];
+-  /* The operands array which will be used in output_asm_insn().  */
+-  rtx operands[3];
+-  /* Pick up varargs first regno and last regno for further use.  */
+-  int rb_va_args = cfun->machine->va_args_first_regno;
+-  int re_va_args = cfun->machine->va_args_last_regno;
+-  int last_argument_regno = NDS32_FIRST_GPR_REGNUM
+-			    + NDS32_MAX_GPR_REGS_FOR_ARGS
+-			    - 1;
+-  /* Pick up callee-saved first regno and last regno for further use.  */
+-  int rb_callee_saved = cfun->machine->callee_saved_first_gpr_regno;
+-  int re_callee_saved = cfun->machine->callee_saved_last_gpr_regno;
++  /* Step C: "t <-- operands[1]".  */
++  if (flag_pic)
++    {
++      output_asm_insn ("sethi\t$ta, hi20(%l1@GOTOFF)", operands);
++      output_asm_insn ("ori\t$ta, $ta, lo12(%l1@GOTOFF)", operands);
++      output_asm_insn ("add\t$ta, $ta, $gp", operands);
++    }
++  else
++    output_asm_insn ("la\t$ta, %l1", operands);
+ 
+-  /* First we need to check if we are pushing argument registers not used
+-     for the named arguments.  If so, we have to create 'smw.adm' (push.s)
+-     instruction.  */
+-  if (reg_mentioned_p (gen_rtx_REG (SImode, last_argument_regno), par_rtx))
++  /* Step D: "z <-- (mem (plus (operands[0] << 2) t))".  */
++  output_asm_insn ("lw\t%2, [$ta + %0 << 2]", operands);
++
++  /* No need to perform Step E, which is only used for
++     pc relative jump table.  */
++
++  /* Step F: jump to target with register z.  */
++  if (TARGET_16_BIT)
++    return "jr5\t%2";
++  else
++    return "jr\t%2";
++}
++
++
++/* Function to return memory format.  */
++enum nds32_16bit_address_type
++nds32_mem_format (rtx op)
++{
++  enum machine_mode mode_test;
++  int val;
++  int regno;
++
++  if (!TARGET_16_BIT)
++    return ADDRESS_NOT_16BIT_FORMAT;
++
++  mode_test = GET_MODE (op);
++
++  op = XEXP (op, 0);
++
++  /* 45 format.  */
++  if (GET_CODE (op) == REG
++      && ((mode_test == SImode) || (mode_test == SFmode)))
++    return ADDRESS_REG;
++
++  /* 333 format for QI/HImode.  */
++  if (GET_CODE (op) == REG && (REGNO (op) < R8_REGNUM))
++    return ADDRESS_LO_REG_IMM3U;
++
++  /* post_inc 333 format.  */
++  if ((GET_CODE (op) == POST_INC)
++      && ((mode_test == SImode) || (mode_test == SFmode)))
+     {
+-      /* Set operands[0] and operands[1].  */
+-      operands[0] = gen_rtx_REG (SImode, rb_va_args);
+-      operands[1] = gen_rtx_REG (SImode, re_va_args);
+-      /* Create assembly code pattern: "Rb, Re, { }".  */
+-      snprintf (pattern, sizeof (pattern), "push.s\t%s", "%0, %1, { }");
+-      /* We use output_asm_insn() to output assembly code by ourself.  */
+-      output_asm_insn (pattern, operands);
+-      return "";
++      regno = REGNO(XEXP (op, 0));
++
++      if (regno < 8)
++	return ADDRESS_POST_INC_LO_REG_IMM3U;
++    }
++
++  /* post_inc 333 format.  */
++  if ((GET_CODE (op) == POST_MODIFY)
++      && ((mode_test == SImode) || (mode_test == SFmode))
++      && (REG_P (XEXP (XEXP (op, 1), 0)))
++      && (CONST_INT_P (XEXP (XEXP (op, 1), 1))))
++    {
++      regno = REGNO (XEXP (XEXP (op, 1), 0));
++      val = INTVAL (XEXP (XEXP (op, 1), 1));
++      if (regno < 8 && val > 0 && val < 32)
++	return ADDRESS_POST_MODIFY_LO_REG_IMM3U;
+     }
+ 
+-  /* If we step here, we are going to do v3push or multiple push operation.  */
++  if ((GET_CODE (op) == PLUS)
++      && (GET_CODE (XEXP (op, 0)) == REG)
++      && (GET_CODE (XEXP (op, 1)) == CONST_INT))
++    {
++      val = INTVAL (XEXP (op, 1));
++
++      regno = REGNO(XEXP (op, 0));
++
++      if (regno > 8
++	  && regno != SP_REGNUM
++	  && regno != FP_REGNUM)
++	return ADDRESS_NOT_16BIT_FORMAT;
++
++      switch (mode_test)
++	{
++	case QImode:
++	  /* 333 format.  */
++	  if (val >= 0 && val < 8 && regno < 8)
++	    return ADDRESS_LO_REG_IMM3U;
++	  break;
++
++	case HImode:
++	  /* 333 format.  */
++	  if (val >= 0 && val < 16 && (val % 2 == 0) && regno < 8)
++	    return ADDRESS_LO_REG_IMM3U;
++	  break;
++
++	case SImode:
++	case SFmode:
++	case DFmode:
++	  /* r8 imply fe format.  */
++	  if ((regno == 8) &&
++	      (val >= -128 && val <= -4 && (val % 4 == 0)))
++	    return ADDRESS_R8_IMM7U;
++	  /* fp imply 37 format.  */
++	  if ((regno == FP_REGNUM) &&
++	      (val >= 0 && val < 512 && (val % 4 == 0)))
++	    return ADDRESS_FP_IMM7U;
++	  /* sp imply 37 format.  */
++	  else if ((regno == SP_REGNUM) &&
++		   (val >= 0 && val < 512 && (val % 4 == 0)))
++	    return ADDRESS_SP_IMM7U;
++	  /* 333 format.  */
++	  else if (val >= 0 && val < 32 && (val % 4 == 0) && regno < 8)
++	    return ADDRESS_LO_REG_IMM3U;
++	  break;
++
++	default:
++	  break;
++	}
++    }
++
++  return ADDRESS_NOT_16BIT_FORMAT;
++}
++
++/* Output 16-bit store.  */
++const char *
++nds32_output_16bit_store (rtx *operands, int byte)
++{
++  char pattern[100];
++  char size;
++  rtx code = XEXP (operands[0], 0);
++
++  size = nds32_byte_to_size (byte);
++
++  switch (nds32_mem_format (operands[0]))
++    {
++    case ADDRESS_REG:
++      operands[0] = code;
++      output_asm_insn ("swi450\t%1, [%0]", operands);
++      break;
++    case ADDRESS_LO_REG_IMM3U:
++      snprintf (pattern, sizeof (pattern), "s%ci333\t%%1, %%0", size);
++      output_asm_insn (pattern, operands);
++      break;
++    case ADDRESS_POST_INC_LO_REG_IMM3U:
++      snprintf (pattern, sizeof (pattern), "swi333.bi\t%%1, %%0, 4");
++      output_asm_insn (pattern, operands);
++      break;
++    case ADDRESS_POST_MODIFY_LO_REG_IMM3U:
++      snprintf (pattern, sizeof (pattern), "swi333.bi\t%%1, %%0");
++      output_asm_insn (pattern, operands);
++      break;
++    case ADDRESS_FP_IMM7U:
++      output_asm_insn ("swi37\t%1, %0", operands);
++      break;
++    case ADDRESS_SP_IMM7U:
++      /* Get immediate value and set back to operands[1].  */
++      operands[0] = XEXP (code, 1);
++      output_asm_insn ("swi37.sp\t%1, [ + (%0)]", operands);
++      break;
++    default:
++      break;
++    }
++
++  return "";
++}
++
++/* Output 16-bit load.  */
++const char *
++nds32_output_16bit_load (rtx *operands, int byte)
++{
++  char pattern[100];
++  unsigned char size;
++  rtx code = XEXP (operands[1], 0);
++
++  size = nds32_byte_to_size (byte);
++
++  switch (nds32_mem_format (operands[1]))
++    {
++    case ADDRESS_REG:
++      operands[1] = code;
++      output_asm_insn ("lwi450\t%0, [%1]", operands);
++      break;
++    case ADDRESS_LO_REG_IMM3U:
++      snprintf (pattern, sizeof (pattern), "l%ci333\t%%0, %%1", size);
++      output_asm_insn (pattern, operands);
++      break;
++    case ADDRESS_POST_INC_LO_REG_IMM3U:
++      snprintf (pattern, sizeof (pattern), "lwi333.bi\t%%0, %%1, 4");
++      output_asm_insn (pattern, operands);
++      break;
++    case ADDRESS_POST_MODIFY_LO_REG_IMM3U:
++      snprintf (pattern, sizeof (pattern), "lwi333.bi\t%%0, %%1");
++      output_asm_insn (pattern, operands);
++      break;
++    case ADDRESS_R8_IMM7U:
++      output_asm_insn ("lwi45.fe\t%0, %e1", operands);
++      break;
++    case ADDRESS_FP_IMM7U:
++      output_asm_insn ("lwi37\t%0, %1", operands);
++      break;
++    case ADDRESS_SP_IMM7U:
++      /* Get immediate value and set back to operands[0].  */
++      operands[1] = XEXP (code, 1);
++      output_asm_insn ("lwi37.sp\t%0, [ + (%1)]", operands);
++      break;
++    default:
++      break;
++    }
++
++  return "";
++}
++
++/* Output 32-bit store.  */
++const char *
++nds32_output_32bit_store (rtx *operands, int byte)
++{
++  char pattern[100];
++  unsigned char size;
++  rtx code = XEXP (operands[0], 0);
++
++  size = nds32_byte_to_size (byte);
++
++  switch (GET_CODE (code))
++    {
++    case REG:
++      /* (mem (reg X))
++	 => access location by using register,
++	 use "sbi / shi / swi" */
++      snprintf (pattern, sizeof (pattern), "s%ci\t%%1, %%0", size);
++      break;
++
++    case SYMBOL_REF:
++    case CONST:
++      /* (mem (symbol_ref X))
++	 (mem (const (...)))
++	 => access global variables,
++	 use "sbi.gp / shi.gp / swi.gp" */
++      operands[0] = XEXP (operands[0], 0);
++      snprintf (pattern, sizeof (pattern), "s%ci.gp\t%%1, [ + %%0]", size);
++      break;
++
++    case POST_INC:
++      /* (mem (post_inc reg))
++	 => access location by using register which will be post increment,
++	 use "sbi.bi / shi.bi / swi.bi" */
++      snprintf (pattern, sizeof (pattern),
++		"s%ci.bi\t%%1, %%0, %d", size, byte);
++      break;
++
++    case POST_DEC:
++      /* (mem (post_dec reg))
++	 => access location by using register which will be post decrement,
++	 use "sbi.bi / shi.bi / swi.bi" */
++      snprintf (pattern, sizeof (pattern),
++		"s%ci.bi\t%%1, %%0, -%d", size, byte);
++      break;
++
++    case POST_MODIFY:
++      switch (GET_CODE (XEXP (XEXP (code, 1), 1)))
++	{
++	case REG:
++	case SUBREG:
++	  /* (mem (post_modify (reg) (plus (reg) (reg))))
++	     => access location by using register which will be
++	     post modified with reg,
++	     use "sb.bi/ sh.bi / sw.bi" */
++	  snprintf (pattern, sizeof (pattern), "s%c.bi\t%%1, %%0", size);
++	  break;
++	case CONST_INT:
++	  /* (mem (post_modify (reg) (plus (reg) (const_int))))
++	     => access location by using register which will be
++	     post modified with const_int,
++	     use "sbi.bi/ shi.bi / swi.bi" */
++	  snprintf (pattern, sizeof (pattern), "s%ci.bi\t%%1, %%0", size);
++	  break;
++	default:
++	  abort ();
++	}
++      break;
++
++    case PLUS:
++      switch (GET_CODE (XEXP (code, 1)))
++	{
++	case REG:
++	case SUBREG:
++	  /* (mem (plus reg reg)) or (mem (plus (mult reg const_int) reg))
++	     => access location by adding two registers,
++	     use "sb / sh / sw" */
++	  snprintf (pattern, sizeof (pattern), "s%c\t%%1, %%0", size);
++	  break;
++	case CONST_INT:
++	  /* (mem (plus reg const_int))
++	     => access location by adding one register with const_int,
++	     use "sbi / shi / swi" */
++	  snprintf (pattern, sizeof (pattern), "s%ci\t%%1, %%0", size);
++	  break;
++	default:
++	  abort ();
++	}
++      break;
++
++    case LO_SUM:
++      operands[2] = XEXP (code, 1);
++      operands[0] = XEXP (code, 0);
++      snprintf (pattern, sizeof (pattern),
++		"s%ci\t%%1, [%%0 + lo12(%%2)]", size);
++      break;
++
++    default:
++      abort ();
++    }
++
++  output_asm_insn (pattern, operands);
++  return "";
++}
++
++/* Output 32-bit load.  */
++const char *
++nds32_output_32bit_load (rtx *operands, int byte)
++{
++  char pattern[100];
++  unsigned char size;
++  rtx code;
++
++  code = XEXP (operands[1], 0);
++
++  size = nds32_byte_to_size (byte);
++
++  switch (GET_CODE (code))
++    {
++    case REG:
++      /* (mem (reg X))
++	 => access location by using register,
++	 use "lbi / lhi / lwi" */
++      snprintf (pattern, sizeof (pattern), "l%ci\t%%0, %%1", size);
++      break;
++
++    case SYMBOL_REF:
++    case CONST:
++      /* (mem (symbol_ref X))
++	 (mem (const (...)))
++	 => access global variables,
++	 use "lbi.gp / lhi.gp / lwi.gp" */
++      operands[1] = XEXP (operands[1], 0);
++      snprintf (pattern, sizeof (pattern), "l%ci.gp\t%%0, [ + %%1]", size);
++      break;
++
++    case POST_INC:
++      /* (mem (post_inc reg))
++	 => access location by using register which will be post increment,
++	 use "lbi.bi / lhi.bi / lwi.bi" */
++      snprintf (pattern, sizeof (pattern),
++		"l%ci.bi\t%%0, %%1, %d", size, byte);
++      break;
++
++    case POST_DEC:
++      /* (mem (post_dec reg))
++	 => access location by using register which will be post decrement,
++	 use "lbi.bi / lhi.bi / lwi.bi" */
++      snprintf (pattern, sizeof (pattern),
++		"l%ci.bi\t%%0, %%1, -%d", size, byte);
++      break;
++
++    case POST_MODIFY:
++      switch (GET_CODE (XEXP (XEXP (code, 1), 1)))
++	{
++	case REG:
++	case SUBREG:
++	  /* (mem (post_modify (reg) (plus (reg) (reg))))
++	     => access location by using register which will be
++	     post modified with reg,
++	     use "lb.bi/ lh.bi / lw.bi" */
++	  snprintf (pattern, sizeof (pattern), "l%c.bi\t%%0, %%1", size);
++	  break;
++	case CONST_INT:
++	  /* (mem (post_modify (reg) (plus (reg) (const_int))))
++	     => access location by using register which will be
++	     post modified with const_int,
++	     use "lbi.bi/ lhi.bi / lwi.bi" */
++	  snprintf (pattern, sizeof (pattern), "l%ci.bi\t%%0, %%1", size);
++	  break;
++	default:
++	  abort ();
++	}
++      break;
++
++    case PLUS:
++      switch (GET_CODE (XEXP (code, 1)))
++	{
++	case REG:
++	case SUBREG:
++	  /* (mem (plus reg reg)) or (mem (plus (mult reg const_int) reg))
++	     use "lb / lh / lw" */
++	  snprintf (pattern, sizeof (pattern), "l%c\t%%0, %%1", size);
++	  break;
++	case CONST_INT:
++	  /* (mem (plus reg const_int))
++	     => access location by adding one register with const_int,
++	     use "lbi / lhi / lwi" */
++	  snprintf (pattern, sizeof (pattern), "l%ci\t%%0, %%1", size);
++	  break;
++	default:
++	  abort ();
++	}
++      break;
++
++    case LO_SUM:
++      operands[2] = XEXP (code, 1);
++      operands[1] = XEXP (code, 0);
++      snprintf (pattern, sizeof (pattern),
++		"l%ci\t%%0, [%%1 + lo12(%%2)]", size);
++      break;
++
++    default:
++      abort ();
++    }
++
++  output_asm_insn (pattern, operands);
++  return "";
++}
++
++/* Output 32-bit load with signed extension.  */
++const char *
++nds32_output_32bit_load_se (rtx *operands, int byte)
++{
++  char pattern[100];
++  unsigned char size;
++  rtx code;
++
++  code = XEXP (operands[1], 0);
++
++  size = nds32_byte_to_size (byte);
++
++  switch (GET_CODE (code))
++    {
++    case REG:
++      /* (mem (reg X))
++	 => access location by using register,
++	 use "lbsi / lhsi" */
++      snprintf (pattern, sizeof (pattern), "l%csi\t%%0, %%1", size);
++      break;
++
++    case SYMBOL_REF:
++    case CONST:
++      /* (mem (symbol_ref X))
++	 (mem (const (...)))
++	 => access global variables,
++	 use "lbsi.gp / lhsi.gp" */
++      operands[1] = XEXP (operands[1], 0);
++      snprintf (pattern, sizeof (pattern), "l%csi.gp\t%%0, [ + %%1]", size);
++      break;
++
++    case POST_INC:
++      /* (mem (post_inc reg))
++	 => access location by using register which will be post increment,
++	 use "lbsi.bi / lhsi.bi" */
++      snprintf (pattern, sizeof (pattern),
++		"l%csi.bi\t%%0, %%1, %d", size, byte);
++      break;
++
++    case POST_DEC:
++      /* (mem (post_dec reg))
++	 => access location by using register which will be post decrement,
++	 use "lbsi.bi / lhsi.bi" */
++      snprintf (pattern, sizeof (pattern),
++		"l%csi.bi\t%%0, %%1, -%d", size, byte);
++      break;
++
++    case POST_MODIFY:
++      switch (GET_CODE (XEXP (XEXP (code, 1), 1)))
++	{
++	case REG:
++	case SUBREG:
++	  /* (mem (post_modify (reg) (plus (reg) (reg))))
++	     => access location by using register which will be
++	     post modified with reg,
++	     use "lbs.bi/ lhs.bi" */
++	  snprintf (pattern, sizeof (pattern), "l%cs.bi\t%%0, %%1", size);
++	  break;
++	case CONST_INT:
++	  /* (mem (post_modify (reg) (plus (reg) (const_int))))
++	     => access location by using register which will be
++	     post modified with const_int,
++	     use "lbsi.bi/ lhsi.bi" */
++	  snprintf (pattern, sizeof (pattern), "l%csi.bi\t%%0, %%1", size);
++	  break;
++	default:
++	  abort ();
++	}
++      break;
++
++    case PLUS:
++      switch (GET_CODE (XEXP (code, 1)))
++	{
++	case REG:
++	case SUBREG:
++	  /* (mem (plus reg reg)) or (mem (plus (mult reg const_int) reg))
++	     use "lbs / lhs" */
++	  snprintf (pattern, sizeof (pattern), "l%cs\t%%0, %%1", size);
++	  break;
++	case CONST_INT:
++	  /* (mem (plus reg const_int))
++	     => access location by adding one register with const_int,
++	     use "lbsi / lhsi" */
++	  snprintf (pattern, sizeof (pattern), "l%csi\t%%0, %%1", size);
++	  break;
++	default:
++	  abort ();
++	}
++      break;
++
++    case LO_SUM:
++      operands[2] = XEXP (code, 1);
++      operands[1] = XEXP (code, 0);
++      snprintf (pattern, sizeof (pattern),
++		"l%csi\t%%0, [%%1 + lo12(%%2)]", size);
++      break;
++
++    default:
++      abort ();
++    }
++
++  output_asm_insn (pattern, operands);
++  return "";
++}
++
++/* Function to output stack push operation.
++   We need to deal with normal stack push multiple or stack v3push.  */
++const char *
++nds32_output_stack_push (rtx par_rtx)
++{
++  /* A string pattern for output_asm_insn().  */
++  char pattern[100];
++  /* The operands array which will be used in output_asm_insn().  */
++  rtx operands[3];
++  /* Pick up varargs first regno and last regno for further use.  */
++  int rb_va_args = cfun->machine->va_args_first_regno;
++  int re_va_args = cfun->machine->va_args_last_regno;
++  int last_argument_regno = NDS32_FIRST_GPR_REGNUM
++			    + NDS32_MAX_GPR_REGS_FOR_ARGS
++			    - 1;
++  /* Pick up first and last eh data regno for further use.  */
++  int rb_eh_data = cfun->machine->eh_return_data_first_regno;
++  int re_eh_data = cfun->machine->eh_return_data_last_regno;
++  int first_eh_data_regno = EH_RETURN_DATA_REGNO (0);
++  /* Pick up callee-saved first regno and last regno for further use.  */
++  int rb_callee_saved = cfun->machine->callee_saved_first_gpr_regno;
++  int re_callee_saved = cfun->machine->callee_saved_last_gpr_regno;
++
++  /* First we need to check if we are pushing argument registers not used
++     for the named arguments.  If so, we have to create 'smw.adm' (push.s)
++     instruction.  */
++  if (reg_mentioned_p (gen_rtx_REG (SImode, last_argument_regno), par_rtx))
++    {
++      /* Set operands[0] and operands[1].  */
++      operands[0] = gen_rtx_REG (SImode, rb_va_args);
++      operands[1] = gen_rtx_REG (SImode, re_va_args);
++      /* Create assembly code pattern: "Rb, Re, { }".  */
++      snprintf (pattern, sizeof (pattern), "push.s\t%s", "%0, %1, { }");
++      /* We use output_asm_insn() to output assembly code by ourself.  */
++      output_asm_insn (pattern, operands);
++      return "";
++    }
++
++  /* If last_argument_regno is not mentioned in par_rtx, we can confirm that
++     we do not need to push argument registers for variadic function.
++     But we still need to check if we need to push exception handling
++     data registers.  */
++  if (reg_mentioned_p (gen_rtx_REG (SImode, first_eh_data_regno), par_rtx))
++    {
++      /* Set operands[0] and operands[1].  */
++      operands[0] = gen_rtx_REG (SImode, rb_eh_data);
++      operands[1] = gen_rtx_REG (SImode, re_eh_data);
++      /* Create assembly code pattern: "Rb, Re, { }".  */
++      snprintf (pattern, sizeof (pattern), "push.s\t%s", "%0, %1, { }");
++      /* We use output_asm_insn() to output assembly code by ourself.  */
++      output_asm_insn (pattern, operands);
++      return "";
++    }
++
++  /* If we step here, we are going to do v3push or multiple push operation.  */
++
++  /* Refer to nds32.h, where we comment when push25/pop25 are available.  */
++  if (NDS32_V3PUSH_AVAILABLE_P)
++    {
++      /* For stack v3push:
++	   operands[0]: Re
++	   operands[1]: imm8u */
++
++      /* This variable is to check if 'push25 Re,imm8u' is available.  */
++      int sp_adjust;
++
++      /* Set operands[0].  */
++      operands[0] = gen_rtx_REG (SImode, re_callee_saved);
++
++      /* Check if we can generate 'push25 Re,imm8u',
++	 otherwise, generate 'push25 Re,0'.  */
++      sp_adjust = cfun->machine->local_size
++		  + cfun->machine->out_args_size
++		  + cfun->machine->callee_saved_area_gpr_padding_bytes
++		  + cfun->machine->callee_saved_fpr_regs_size;
++      if (satisfies_constraint_Iu08 (GEN_INT (sp_adjust))
++	  && NDS32_DOUBLE_WORD_ALIGN_P (sp_adjust))
++	operands[1] = GEN_INT (sp_adjust);
++      else
++	{
++	  /* Allocate callee saved fpr space.  */
++	  if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM)
++	    {
++	      sp_adjust = cfun->machine->callee_saved_area_gpr_padding_bytes
++			  + cfun->machine->callee_saved_fpr_regs_size;
++	      operands[1] = GEN_INT (sp_adjust);
++	    }
++	  else
++	    {
++	      operands[1] = GEN_INT (0);
++	    }
++	}
++
++      /* Create assembly code pattern.  */
++      snprintf (pattern, sizeof (pattern), "push25\t%%0, %%1");
++    }
++  else
++    {
++      /* For normal stack push multiple:
++	 operands[0]: Rb
++	 operands[1]: Re
++	 operands[2]: En4 */
++
++      /* This variable is used to check if we only need to generate En4 field.
++	 As long as Rb==Re=SP_REGNUM, we set this variable to 1.  */
++      int push_en4_only_p = 0;
++
++      /* Set operands[0] and operands[1].  */
++      operands[0] = gen_rtx_REG (SImode, rb_callee_saved);
++      operands[1] = gen_rtx_REG (SImode, re_callee_saved);
++
++      /* 'smw.adm $sp,[$sp],$sp,0' means push nothing.  */
++      if (!cfun->machine->fp_size
++	  && !cfun->machine->gp_size
++	  && !cfun->machine->lp_size
++	  && REGNO (operands[0]) == SP_REGNUM
++	  && REGNO (operands[1]) == SP_REGNUM)
++	{
++	  /* No need to generate instruction.  */
++	  return "";
++	}
++      else
++	{
++	  /* If Rb==Re=SP_REGNUM, we only need to generate En4 field.  */
++	  if (REGNO (operands[0]) == SP_REGNUM
++	      && REGNO (operands[1]) == SP_REGNUM)
++	    push_en4_only_p = 1;
++
++	  /* Create assembly code pattern.
++	     We need to handle the form: "Rb, Re, { $fp $gp $lp }".  */
++	  snprintf (pattern, sizeof (pattern),
++		    "push.s\t%s{%s%s%s }",
++		    push_en4_only_p ? "" : "%0, %1, ",
++		    cfun->machine->fp_size ? " $fp" : "",
++		    cfun->machine->gp_size ? " $gp" : "",
++		    cfun->machine->lp_size ? " $lp" : "");
++	}
++    }
++
++  /* We use output_asm_insn() to output assembly code by ourself.  */
++  output_asm_insn (pattern, operands);
++  return "";
++}
++
++/* Function to output stack pop operation.
++   We need to deal with normal stack pop multiple or stack v3pop.  */
++const char *
++nds32_output_stack_pop (rtx par_rtx ATTRIBUTE_UNUSED)
++{
++  /* A string pattern for output_asm_insn().  */
++  char pattern[100];
++  /* The operands array which will be used in output_asm_insn().  */
++  rtx operands[3];
++  /* Pick up first and last eh data regno for further use.  */
++  int rb_eh_data = cfun->machine->eh_return_data_first_regno;
++  int re_eh_data = cfun->machine->eh_return_data_last_regno;
++  int first_eh_data_regno = EH_RETURN_DATA_REGNO (0);
++  /* Pick up callee-saved first regno and last regno for further use.  */
++  int rb_callee_saved = cfun->machine->callee_saved_first_gpr_regno;
++  int re_callee_saved = cfun->machine->callee_saved_last_gpr_regno;
++
++  /* We need to check if we need to push exception handling
++     data registers.  */
++  if (reg_mentioned_p (gen_rtx_REG (SImode, first_eh_data_regno), par_rtx))
++    {
++      /* Set operands[0] and operands[1].  */
++      operands[0] = gen_rtx_REG (SImode, rb_eh_data);
++      operands[1] = gen_rtx_REG (SImode, re_eh_data);
++      /* Create assembly code pattern: "Rb, Re, { }".  */
++      snprintf (pattern, sizeof (pattern), "pop.s\t%s", "%0, %1, { }");
++      /* We use output_asm_insn() to output assembly code by ourself.  */
++      output_asm_insn (pattern, operands);
++      return "";
++    }
++
++  /* If we step here, we are going to do v3pop or multiple pop operation.  */
++
++  /* Refer to nds32.h, where we comment when push25/pop25 are available.  */
++  if (NDS32_V3PUSH_AVAILABLE_P)
++    {
++      /* For stack v3pop:
++	   operands[0]: Re
++	   operands[1]: imm8u */
++
++      /* This variable is to check if 'pop25 Re,imm8u' is available.  */
++      int sp_adjust;
++
++      /* Set operands[0].  */
++      operands[0] = gen_rtx_REG (SImode, re_callee_saved);
++
++      /* Check if we can generate 'pop25 Re,imm8u',
++	 otherwise, generate 'pop25 Re,0'.
++	 We have to consider alloca issue as well.
++	 If the function does call alloca(), the stack pointer is not fixed.
++	 In that case, we cannot use 'pop25 Re,imm8u' directly.
++	 We have to caculate stack pointer from frame pointer
++	 and then use 'pop25 Re,0'.  */
++      sp_adjust = cfun->machine->local_size
++		  + cfun->machine->out_args_size
++		  + cfun->machine->callee_saved_area_gpr_padding_bytes
++		  + cfun->machine->callee_saved_fpr_regs_size;
++      if (satisfies_constraint_Iu08 (GEN_INT (sp_adjust))
++	  && NDS32_DOUBLE_WORD_ALIGN_P (sp_adjust)
++	  && !cfun->calls_alloca)
++	operands[1] = GEN_INT (sp_adjust);
++      else
++	{
++	  if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM)
++	    {
++	      /* If has fpr need to restore, the $sp on callee saved fpr
++		 position, so we need to consider gpr pading bytes and
++		 callee saved fpr size.  */
++	      sp_adjust = cfun->machine->callee_saved_area_gpr_padding_bytes
++			  + cfun->machine->callee_saved_fpr_regs_size;
++	      operands[1] = GEN_INT (sp_adjust);
++	    }
++	  else
++	    {
++	      operands[1] = GEN_INT (0);
++	    }
++	}
++
++      /* Create assembly code pattern.  */
++      snprintf (pattern, sizeof (pattern), "pop25\t%%0, %%1");
++    }
++  else
++    {
++      /* For normal stack pop multiple:
++	 operands[0]: Rb
++	 operands[1]: Re
++	 operands[2]: En4 */
++
++      /* This variable is used to check if we only need to generate En4 field.
++	 As long as Rb==Re=SP_REGNUM, we set this variable to 1.  */
++      int pop_en4_only_p = 0;
++
++      /* Set operands[0] and operands[1].  */
++      operands[0] = gen_rtx_REG (SImode, rb_callee_saved);
++      operands[1] = gen_rtx_REG (SImode, re_callee_saved);
++
++      /* 'lmw.bim $sp,[$sp],$sp,0' means pop nothing.  */
++      if (!cfun->machine->fp_size
++	  && !cfun->machine->gp_size
++	  && !cfun->machine->lp_size
++	  && REGNO (operands[0]) == SP_REGNUM
++	  && REGNO (operands[1]) == SP_REGNUM)
++	{
++	  /* No need to generate instruction.  */
++	  return "";
++	}
++      else
++	{
++	  /* If Rb==Re=SP_REGNUM, we only need to generate En4 field.  */
++	  if (REGNO (operands[0]) == SP_REGNUM
++	      && REGNO (operands[1]) == SP_REGNUM)
++	    pop_en4_only_p = 1;
++
++	  /* Create assembly code pattern.
++	     We need to handle the form: "Rb, Re, { $fp $gp $lp }".  */
++	  snprintf (pattern, sizeof (pattern),
++		    "pop.s\t%s{%s%s%s }",
++		    pop_en4_only_p ? "" : "%0, %1, ",
++		    cfun->machine->fp_size ? " $fp" : "",
++		    cfun->machine->gp_size ? " $gp" : "",
++		    cfun->machine->lp_size ? " $lp" : "");
++	}
++    }
++
++  /* We use output_asm_insn() to output assembly code by ourself.  */
++  output_asm_insn (pattern, operands);
++  return "";
++}
++
++/* Function to output return operation.  */
++const char *
++nds32_output_return (void)
++{
++  /* A string pattern for output_asm_insn().  */
++  char pattern[100];
++  /* The operands array which will be used in output_asm_insn().  */
++  rtx operands[2];
++  /* For stack v3pop:
++     operands[0]: Re
++     operands[1]: imm8u */
++  int re_callee_saved = cfun->machine->callee_saved_last_gpr_regno;
++  int sp_adjust;
++
++  /* Set operands[0].  */
++  operands[0] = gen_rtx_REG (SImode, re_callee_saved);
++
++  /* Check if we can generate 'pop25 Re,imm8u',
++     otherwise, generate 'pop25 Re,0'.
++     We have to consider alloca issue as well.
++     If the function does call alloca(), the stack pointer is not fixed.
++     In that case, we cannot use 'pop25 Re,imm8u' directly.
++     We have to caculate stack pointer from frame pointer
++     and then use 'pop25 Re,0'.  */
++  sp_adjust = cfun->machine->local_size
++    + cfun->machine->out_args_size
++    + cfun->machine->callee_saved_area_gpr_padding_bytes
++    + cfun->machine->callee_saved_fpr_regs_size;
++  if (satisfies_constraint_Iu08 (GEN_INT (sp_adjust))
++      && NDS32_DOUBLE_WORD_ALIGN_P (sp_adjust)
++      && !cfun->calls_alloca)
++    operands[1] = GEN_INT (sp_adjust);
++  else
++    operands[1] = GEN_INT (0);
++
++  /* Create assembly code pattern.  */
++  snprintf (pattern, sizeof (pattern), "pop25\t%%0, %%1");
++  /* We use output_asm_insn() to output assembly code by ourself.  */
++  output_asm_insn (pattern, operands);
++  return "";
++}
++
++
++/* output a float load instruction */
++const char *
++nds32_output_float_load (rtx *operands)
++{
++  char buff[100];
++  const char *pattern;
++  rtx addr, addr_op0, addr_op1;
++  int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
++  addr = XEXP (operands[1], 0);
++  switch (GET_CODE (addr))
++    {
++    case REG:
++      pattern = "fl%ci\t%%0, %%1";
++      break;
++
++    case PLUS:
++      addr_op0 = XEXP (addr, 0);
++      addr_op1 = XEXP (addr, 1);
++
++      if (REG_P (addr_op0) && REG_P (addr_op1))
++	pattern = "fl%c\t%%0, %%1";
++      else if (REG_P (addr_op0) && CONST_INT_P (addr_op1))
++	pattern = "fl%ci\t%%0, %%1";
++      else if (GET_CODE (addr_op0) == MULT && REG_P (addr_op1)
++	       && REG_P (XEXP (addr_op0, 0))
++	       && CONST_INT_P (XEXP (addr_op0, 1)))
++	pattern = "fl%c\t%%0, %%1";
++      else
++	gcc_unreachable ();
++      break;
++
++    case POST_MODIFY:
++      addr_op0 = XEXP (addr, 0);
++      addr_op1 = XEXP (addr, 1);
++
++      if (REG_P (addr_op0) && GET_CODE (addr_op1) == PLUS
++	  && REG_P (XEXP (addr_op1, 1)))
++	pattern = "fl%c.bi\t%%0, %%1";
++      else if (REG_P (addr_op0) && GET_CODE (addr_op1) == PLUS
++	       && CONST_INT_P (XEXP (addr_op1, 1)))
++	pattern = "fl%ci.bi\t%%0, %%1";
++      else
++	gcc_unreachable ();
++      break;
++
++    case POST_INC:
++      if (REG_P (XEXP (addr, 0)))
++	{
++	  if (dp)
++	    pattern = "fl%ci.bi\t%%0, %%1, 8";
++	  else
++	    pattern = "fl%ci.bi\t%%0, %%1, 4";
++	}
++      else
++	gcc_unreachable ();
++      break;
++
++    case POST_DEC:
++      if (REG_P (XEXP (addr, 0)))
++	{
++	  if (dp)
++	    pattern = "fl%ci.bi\t%%0, %%1, -8";
++	  else
++	    pattern = "fl%ci.bi\t%%0, %%1, -4";
++	}
++      else
++	gcc_unreachable ();
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
++  sprintf (buff, pattern, dp ? 'd' : 's');
++  output_asm_insn (buff, operands);
++  return "";
++}
++
++/* output a float store instruction */
++const char *
++nds32_output_float_store (rtx *operands)
++{
++  char buff[100];
++  const char *pattern;
++  rtx addr, addr_op0, addr_op1;
++  int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
++  addr = XEXP (operands[0], 0);
++  switch (GET_CODE (addr))
++    {
++    case REG:
++      pattern = "fs%ci\t%%1, %%0";
++      break;
++
++    case PLUS:
++      addr_op0 = XEXP (addr, 0);
++      addr_op1 = XEXP (addr, 1);
++
++      if (REG_P (addr_op0) && REG_P (addr_op1))
++	pattern = "fs%c\t%%1, %%0";
++      else if (REG_P (addr_op0) && CONST_INT_P (addr_op1))
++	pattern = "fs%ci\t%%1, %%0";
++      else if (GET_CODE (addr_op0) == MULT && REG_P (addr_op1)
++	       && REG_P (XEXP (addr_op0, 0))
++	       && CONST_INT_P (XEXP (addr_op0, 1)))
++	pattern = "fs%c\t%%1, %%0";
++      else
++	gcc_unreachable ();
++      break;
++
++    case POST_MODIFY:
++      addr_op0 = XEXP (addr, 0);
++      addr_op1 = XEXP (addr, 1);
++
++      if (REG_P (addr_op0) && GET_CODE (addr_op1) == PLUS
++	  && REG_P (XEXP (addr_op1, 1)))
++	pattern = "fs%c.bi\t%%1, %%0";
++      else if (REG_P (addr_op0) && GET_CODE (addr_op1) == PLUS
++	       && CONST_INT_P (XEXP (addr_op1, 1)))
++	pattern = "fs%ci.bi\t%%1, %%0";
++      else
++	gcc_unreachable ();
++      break;
++
++    case POST_INC:
++      if (REG_P (XEXP (addr, 0)))
++	{
++	  if (dp)
++	    pattern = "fs%ci.bi\t%%1, %%0, 8";
++	  else
++	    pattern = "fs%ci.bi\t%%1, %%0, 4";
++	}
++      else
++	gcc_unreachable ();
++      break;
++
++    case POST_DEC:
++      if (REG_P (XEXP (addr, 0)))
++	{
++	  if (dp)
++	    pattern = "fs%ci.bi\t%%1, %%0, -8";
++	  else
++	    pattern = "fs%ci.bi\t%%1, %%0, -4";
++	}
++      else
++	gcc_unreachable ();
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
++  sprintf (buff, pattern, dp ? 'd' : 's');
++  output_asm_insn (buff, operands);
++  return "";
++}
++
++const char *
++nds32_output_smw_single_word (rtx *operands)
++{
++  char buff[100];
++  unsigned regno;
++  int enable4;
++  bool update_base_p;
++  rtx base_addr = operands[0];
++  rtx base_reg;
++  rtx otherops[2];
++
++  if (REG_P (XEXP (base_addr, 0)))
++    {
++      update_base_p = false;
++      base_reg = XEXP (base_addr, 0);
++    }
++  else
++    {
++      update_base_p = true;
++      base_reg = XEXP (XEXP (base_addr, 0), 0);
++    }
++
++  const char *update_base = update_base_p ? "m" : "";
++
++  regno = REGNO (operands[1]);
++
++  otherops[0] = base_reg;
++  otherops[1] = operands[1];
++
++  if (regno >= 28)
++    {
++      enable4 = nds32_regno_to_enable4 (regno);
++      sprintf (buff, "smw.bi%s\t$sp, [%%0], $sp, %x", update_base, enable4);
++    }
++  else
++    {
++      sprintf (buff, "smw.bi%s\t%%1, [%%0], %%1", update_base);
++    }
++  output_asm_insn (buff, otherops);
++  return "";
++}
++
++const char *
++nds32_output_smw_double_word (rtx *operands)
++{
++  char buff[100];
++  unsigned regno;
++  int enable4;
++  bool update_base_p;
++  rtx base_addr = operands[0];
++  rtx base_reg;
++  rtx otherops[3];
++
++  if (REG_P (XEXP (base_addr, 0)))
++    {
++      update_base_p = false;
++      base_reg = XEXP (base_addr, 0);
++    }
++  else
++    {
++      update_base_p = true;
++      base_reg = XEXP (XEXP (base_addr, 0), 0);
++    }
++
++  const char *update_base = update_base_p ? "m" : "";
++
++  regno = REGNO (operands[1]);
++
++  otherops[0] = base_reg;
++  otherops[1] = operands[1];
++  otherops[2] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);;
++
++  if (regno >= 28)
++    {
++      enable4 = nds32_regno_to_enable4 (regno)
++		| nds32_regno_to_enable4 (regno + 1);
++      sprintf (buff, "smw.bi%s\t$sp, [%%0], $sp, %x", update_base, enable4);
++    }
++  else if (regno == 27)
++    {
++      enable4 = nds32_regno_to_enable4 (regno + 1);
++      sprintf (buff, "smw.bi%s\t%%1, [%%0], %%1, %x", update_base, enable4);
++    }
++  else
++    {
++      sprintf (buff, "smw.bi%s\t%%1, [%%0], %%2", update_base);
++    }
++  output_asm_insn (buff, otherops);
++  return "";
++}
++
++
++const char *
++nds32_output_lmw_single_word (rtx *operands)
++{
++  char buff[100];
++  unsigned regno;
++  bool update_base_p;
++  int enable4;
++  rtx base_addr = operands[1];
++  rtx base_reg;
++  rtx otherops[2];
++
++  if (REG_P (XEXP (base_addr, 0)))
++    {
++      update_base_p = false;
++      base_reg = XEXP (base_addr, 0);
++    }
++  else
++    {
++      update_base_p = true;
++      base_reg = XEXP (XEXP (base_addr, 0), 0);
++    }
++
++  const char *update_base = update_base_p ? "m" : "";
++
++  regno = REGNO (operands[0]);
++
++  otherops[0] = operands[0];
++  otherops[1] = base_reg;
++
++  if (regno >= 28)
++    {
++      enable4 = nds32_regno_to_enable4 (regno);
++      sprintf (buff, "lmw.bi%s\t$sp, [%%1], $sp, %x", update_base, enable4);
++    }
++  else
++    {
++      sprintf (buff, "lmw.bi%s\t%%0, [%%1], %%0", update_base);
++    }
++  output_asm_insn (buff, otherops);
++  return "";
++}
++
++void
++nds32_expand_unaligned_load (rtx *operands, enum machine_mode mode)
++{
++  /* Initial memory offset.  */
++  int offset = WORDS_BIG_ENDIAN ? GET_MODE_SIZE (mode) - 1 : 0;
++  int offset_adj = WORDS_BIG_ENDIAN ? -1 : 1;
++  /* Initial register shift byte.  */
++  int shift = 0;
++  /* The first load byte instruction is not the same. */
++  int width = GET_MODE_SIZE (mode) - 1;
++  rtx mem[2];
++  rtx reg[2];
++  rtx sub_reg;
++  rtx temp_reg, temp_sub_reg;
++  int num_reg;
++
++  /* Generating a series of load byte instructions.
++     The first load byte instructions and other
++     load byte instructions are not the same. like:
++     First:
++       lbi reg0, [mem]
++       zeh reg0, reg0
++     Second:
++       lbi temp_reg, [mem + offset]
++       sll temp_reg, (8 * shift)
++       ior reg0, temp_reg
++
++       lbi temp_reg, [mem + (offset + 1)]
++       sll temp_reg, (8 * (shift + 1))
++       ior reg0, temp_reg  */
++
++  temp_reg = gen_reg_rtx (SImode);
++  temp_sub_reg = gen_lowpart (QImode, temp_reg);
++
++  if (mode == DImode)
++    {
++      /* Load doubleword, we need two registers to access.  */
++      reg[0] = nds32_di_low_part_subreg (operands[0]);
++      reg[1] = nds32_di_high_part_subreg (operands[0]);
++      /* A register only store 4 byte.  */
++      width = GET_MODE_SIZE (SImode) - 1;
++    }
++  else
++    {
++      if (VECTOR_MODE_P (mode))
++	reg[0] = gen_reg_rtx (SImode);
++      else
++	reg[0] = operands[0];
++    }
++
++  for (num_reg = (mode == DImode) ? 2 : 1; num_reg > 0; num_reg--)
++    {
++      sub_reg = gen_lowpart (QImode, reg[0]);
++      mem[0] = gen_rtx_MEM (QImode, plus_constant (Pmode, operands[1], offset));
++
++      /* Generating the first part instructions.
++	   lbi reg0, [mem]
++	   zeh reg0, reg0 */
++      emit_move_insn (sub_reg, mem[0]);
++      emit_insn (gen_zero_extendqisi2 (reg[0], sub_reg));
++
++      while (width > 0)
++	{
++	  offset = offset + offset_adj;
++	  shift++;
++	  width--;
++
++	  mem[1] = gen_rtx_MEM (QImode, plus_constant (Pmode,
++						       operands[1],
++						       offset));
++	  /* Generating the second part instructions.
++	       lbi temp_reg, [mem + offset]
++	       sll temp_reg, (8 * shift)
++	       ior reg0, temp_reg  */
++	  emit_move_insn (temp_sub_reg, mem[1]);
++	  emit_insn (gen_ashlsi3 (temp_reg, temp_reg,
++				  GEN_INT (shift * 8)));
++	  emit_insn (gen_iorsi3 (reg[0], reg[0], temp_reg));
++	}
++
++      if (mode == DImode)
++	{
++	  /* Using the second register to load memory information. */
++	  reg[0] = reg[1];
++	  shift = 0;
++	  width = GET_MODE_SIZE (SImode) - 1;
++	  offset = offset + offset_adj;
++	}
++    }
++    if (VECTOR_MODE_P (mode))
++      convert_move (operands[0], reg[0], false);
++}
++
++void
++nds32_expand_unaligned_store (rtx *operands, enum machine_mode mode)
++{
++  /* Initial memory offset.  */
++  int offset = WORDS_BIG_ENDIAN ? GET_MODE_SIZE (mode) - 1 : 0;
++  int offset_adj = WORDS_BIG_ENDIAN ? -1 : 1;
++  /* Initial register shift byte.  */
++  int shift = 0;
++  /* The first load byte instruction is not the same. */
++  int width = GET_MODE_SIZE (mode) - 1;
++  rtx mem[2];
++  rtx reg[2];
++  rtx sub_reg;
++  rtx temp_reg, temp_sub_reg;
++  int num_reg;
++
++  /* Generating a series of store byte instructions.
++     The first store byte instructions and other
++     load byte instructions are not the same. like:
++     First:
++	sbi  reg0, [mem + 0]
++     Second:
++	srli    temp_reg, reg0, (8 * shift)
++	sbi	temp_reg, [mem + offset]  */
++
++  temp_reg = gen_reg_rtx (SImode);
++  temp_sub_reg = gen_lowpart (QImode, temp_reg);
++
++  if (mode == DImode)
++    {
++      /* Load doubleword, we need two registers to access.  */
++      reg[0] = nds32_di_low_part_subreg (operands[1]);
++      reg[1] = nds32_di_high_part_subreg (operands[1]);
++      /* A register only store 4 byte.  */
++      width = GET_MODE_SIZE (SImode) - 1;
++    }
++  else
++    {
++      if (VECTOR_MODE_P (mode))
++	{
++	  reg[0] = gen_reg_rtx (SImode);
++	  convert_move (reg[0], operands[1], false);
++	}
++      else
++	reg[0] = operands[1];
++    }
++
++  for (num_reg = (mode == DImode) ? 2 : 1; num_reg > 0; num_reg--)
++    {
++      sub_reg = gen_lowpart (QImode, reg[0]);
++      mem[0] = gen_rtx_MEM (QImode, plus_constant (Pmode, operands[0], offset));
++
++      /* Generating the first part instructions.
++	   sbi reg0, [mem + 0] */
++      emit_move_insn (mem[0], sub_reg);
++
++      while (width > 0)
++	{
++	  offset = offset + offset_adj;
++	  shift++;
++	  width--;
++
++	  mem[1] = gen_rtx_MEM (QImode, plus_constant (Pmode,
++						       operands[0],
++						       offset));
++	  /* Generating the second part instructions.
++	       srli  temp_reg, reg0, (8 * shift)
++	       sbi   temp_reg, [mem + offset]  */
++	  emit_insn (gen_lshrsi3 (temp_reg, reg[0],
++				  GEN_INT (shift * 8)));
++	  emit_move_insn (mem[1], temp_sub_reg);
++	}
++
++      if (mode == DImode)
++	{
++	  /* Using the second register to load memory information. */
++	  reg[0] = reg[1];
++	  shift = 0;
++	  width = GET_MODE_SIZE (SImode) - 1;
++	  offset = offset + offset_adj;
++	}
++    }
++}
++
++/* Using multiple load/store instruction to output doubleword instruction.  */
++const char *
++nds32_output_double (rtx *operands, bool load_p)
++{
++  char pattern[100];
++  int reg = load_p ? 0 : 1;
++  int mem = load_p ? 1 : 0;
++  rtx otherops[3];
++  rtx addr = XEXP (operands[mem], 0);
++
++  otherops[0] = gen_rtx_REG (SImode, REGNO (operands[reg]));
++  otherops[1] = gen_rtx_REG (SImode, REGNO (operands[reg]) + 1);
++
++  if (GET_CODE (addr)  == POST_INC)
++    {
++      /* (mem (post_inc (reg))) */
++      otherops[2] = XEXP (addr, 0);
++      snprintf (pattern, sizeof (pattern),
++		"%cmw.bim\t%%0, [%%2], %%1, 0", load_p ? 'l' : 's');
++    }
++  else
++    {
++      /* (mem (reg)) */
++      otherops[2] = addr;
++      snprintf (pattern, sizeof (pattern),
++		"%cmw.bi\t%%0, [%%2], %%1, 0", load_p ? 'l' : 's');
++
++    }
++
++  output_asm_insn (pattern, otherops);
++  return "";
++}
++
++const char *
++nds32_output_cbranchsi4_equality_zero (rtx_insn *insn, rtx *operands)
++{
++  enum rtx_code code;
++  bool long_jump_p = false;
++
++  code = GET_CODE (operands[0]);
++
++  /* This zero-comparison conditional branch has two forms:
++       32-bit instruction =>          beqz/bnez           imm16s << 1
++       16-bit instruction => beqzs8/bnezs8/beqz38/bnez38  imm8s << 1
++
++     For 32-bit case,
++     we assume it is always reachable. (but check range -65500 ~ 65500)
++
++     For 16-bit case,
++     it must satisfy { 255 >= (label - pc) >= -256 } condition.
++     However, since the $pc for nds32 is at the beginning of the instruction,
++     we should leave some length space for current insn.
++     So we use range -250 ~ 250.  */
++
++  switch (get_attr_length (insn))
++    {
++    case 8:
++      long_jump_p = true;
++      /* fall through  */
++    case 2:
++      if (which_alternative == 0)
++	{
++	  /* constraint: t */
++	  /*    b<cond>zs8  .L0
++	      or
++		b<inverse_cond>zs8  .LCB0
++		j  .L0
++	      .LCB0:
++	   */
++	  output_cond_branch_compare_zero (code, "s8", long_jump_p,
++					   operands, true);
++	  return "";
++	}
++      else if (which_alternative == 1)
++	{
++	  /* constraint: l */
++	  /*    b<cond>z38  $r0, .L0
++	      or
++		b<inverse_cond>z38  $r0, .LCB0
++		j  .L0
++	      .LCB0:
++	   */
++	  output_cond_branch_compare_zero (code, "38", long_jump_p,
++					   operands, false);
++	  return "";
++	}
++      else
++	{
++	  /* constraint: r */
++	  /* For which_alternative==2, it should not be here.  */
++	  gcc_unreachable ();
++	}
++    case 10:
++      /* including constraints: t, l, and r */
++      long_jump_p = true;
++      /* fall through  */
++    case 4:
++      /* including constraints: t, l, and r */
++      output_cond_branch_compare_zero (code, "", long_jump_p, operands, false);
++      return "";
++
++    default:
++      gcc_unreachable ();
++    }
++}
++
++const char *
++nds32_output_cbranchsi4_equality_reg (rtx_insn *insn, rtx *operands)
++{
++  enum rtx_code code;
++  bool long_jump_p, r5_p;
++  int insn_length;
++
++  insn_length = get_attr_length (insn);
++
++  long_jump_p = (insn_length == 10 || insn_length == 8) ? true : false;
++  r5_p = (insn_length == 2 || insn_length == 8) ? true : false;
++
++  code = GET_CODE (operands[0]);
++
++  /* This register-comparison conditional branch has one form:
++       32-bit instruction =>          beq/bne           imm14s << 1
++
++     For 32-bit case,
++     we assume it is always reachable. (but check range -16350 ~ 16350).  */
++
++  switch (code)
++    {
++    case EQ:
++    case NE:
++      output_cond_branch (code, "", r5_p, long_jump_p, operands);
++      return "";
++
++    default:
++      gcc_unreachable ();
++    }
++}
++
++const char *
++nds32_output_cbranchsi4_equality_reg_or_const_int (rtx_insn *insn,
++						   rtx *operands)
++{
++  enum rtx_code code;
++  bool long_jump_p, r5_p;
++  int insn_length;
++
++  insn_length = get_attr_length (insn);
++
++  long_jump_p = (insn_length == 10 || insn_length == 8) ? true : false;
++  r5_p = (insn_length == 2 || insn_length == 8) ? true : false;
++
++  code = GET_CODE (operands[0]);
++
++  /* This register-comparison conditional branch has one form:
++       32-bit instruction =>          beq/bne           imm14s << 1
++       32-bit instruction =>         beqc/bnec          imm8s << 1
++
++     For 32-bit case, we assume it is always reachable.
++     (but check range -16350 ~ 16350 and -250 ~ 250).  */
++
++  switch (code)
++    {
++    case EQ:
++    case NE:
++      if (which_alternative == 2)
++	{
++	  /* r, Is11 */
++	  /* b<cond>c */
++	  output_cond_branch (code, "c", r5_p, long_jump_p, operands);
++	}
++      else
++	{
++	  /* r, r */
++	  /* v, r */
++	  output_cond_branch (code, "", r5_p, long_jump_p, operands);
++	}
++      return "";
++    default:
++      gcc_unreachable ();
++    }
++}
++
++const char *
++nds32_output_cbranchsi4_greater_less_zero (rtx_insn *insn, rtx *operands)
++{
++  enum rtx_code code;
++  bool long_jump_p;
++  int insn_length;
++
++  insn_length = get_attr_length (insn);
++
++  gcc_assert (insn_length == 4 || insn_length == 10);
++
++  long_jump_p = (insn_length == 10) ? true : false;
++
++  code = GET_CODE (operands[0]);
++
++  /* This zero-greater-less-comparison conditional branch has one form:
++       32-bit instruction =>      bgtz/bgez/bltz/blez     imm16s << 1
++
++     For 32-bit case, we assume it is always reachable.
++     (but check range -65500 ~ 65500).  */
++
++  switch (code)
++    {
++    case GT:
++    case GE:
++    case LT:
++    case LE:
++      output_cond_branch_compare_zero (code, "", long_jump_p, operands, false);
++      break;
++    default:
++      gcc_unreachable ();
++    }
++  return "";
++}
++
++const char *
++nds32_output_unpkd8 (rtx output, rtx input,
++		     rtx high_idx_rtx, rtx low_idx_rtx,
++		     bool signed_p)
++{
++  char pattern[100];
++  rtx output_operands[2];
++  HOST_WIDE_INT high_idx, low_idx;
++  high_idx = INTVAL (high_idx_rtx);
++  low_idx = INTVAL (low_idx_rtx);
++
++  gcc_assert (high_idx >= 0 && high_idx <= 3);
++  gcc_assert (low_idx >= 0 && low_idx <= 3);
++
++  /* We only have 10, 20, 30 and 31.  */
++  if ((low_idx != 0 || high_idx == 0) &&
++      !(low_idx == 1 && high_idx == 3))
++    return "#";
++
++  char sign_char = signed_p ? 's' : 'z';
++
++  sprintf (pattern,
++	   "%cunpkd8" HOST_WIDE_INT_PRINT_DEC HOST_WIDE_INT_PRINT_DEC "\t%%0, %%1",
++	   sign_char, high_idx, low_idx);
++  output_operands[0] = output;
++  output_operands[1] = input;
++  output_asm_insn (pattern, output_operands);
++  return "";
++}
++
++/* Return true if SYMBOL_REF X binds locally.  */
++
++static bool
++nds32_symbol_binds_local_p (const_rtx x)
++{
++  return (SYMBOL_REF_DECL (x)
++	  ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
++	  : SYMBOL_REF_LOCAL_P (x));
++}
++
++const char *
++nds32_output_call (rtx insn, rtx *operands, rtx symbol, const char *long_call,
++		   const char *call, bool align_p)
++{
++  char pattern[100];
++  bool noreturn_p;
++
++  if (nds32_long_call_p (symbol))
++    strcpy (pattern, long_call);
++  else
++    strcpy (pattern, call);
++
++  if (flag_pic && CONSTANT_P (symbol)
++      && !nds32_symbol_binds_local_p (symbol))
++    strcat (pattern, "@PLT");
++
++  if (align_p)
++    strcat (pattern, "\n\t.align 2");
++
++  noreturn_p = find_reg_note (insn, REG_NORETURN, NULL_RTX) != NULL_RTX;
++
++  if (noreturn_p)
++    {
++      if (TARGET_16_BIT)
++	strcat (pattern, "\n\tnop16");
++      else
++	strcat (pattern, "\n\tnop");
++    }
++
++  output_asm_insn (pattern, operands);
++  return "";
++}
++
++bool
++nds32_need_split_sms_p (rtx in0_idx0, rtx in1_idx0,
++			rtx in0_idx1, rtx in1_idx1)
++{
++  /* smds or smdrs.  */
++  if (INTVAL (in0_idx0) == INTVAL (in1_idx0)
++      && INTVAL (in0_idx1) == INTVAL (in1_idx1)
++      && INTVAL (in0_idx0) != INTVAL (in0_idx1))
++    return false;
++
++  /* smxds.  */
++  if (INTVAL (in0_idx0) != INTVAL (in0_idx1)
++      && INTVAL (in1_idx0) != INTVAL (in1_idx1))
++    return false;
++
++  return true;
++}
++
++const char *
++nds32_output_sms (rtx in0_idx0, rtx in1_idx0,
++		  rtx in0_idx1, rtx in1_idx1)
++{
++  if (nds32_need_split_sms_p (in0_idx0, in1_idx0,
++			      in0_idx1, in1_idx1))
++    return "#";
++  /* out = in0[in0_idx0] * in1[in1_idx0] - in0[in0_idx1] * in1[in1_idx1] */
++
++  /* smds or smdrs.  */
++  if (INTVAL (in0_idx0) == INTVAL (in1_idx0)
++      && INTVAL (in0_idx1) == INTVAL (in1_idx1)
++      && INTVAL (in0_idx0) != INTVAL (in0_idx1))
++    {
++      if (INTVAL (in0_idx0) == 0)
++	{
++	  if (TARGET_BIG_ENDIAN)
++	    return "smds\t%0, %1, %2";
++	  else
++	    return "smdrs\t%0, %1, %2";
++	}
++      else
++	{
++	  if (TARGET_BIG_ENDIAN)
++	    return "smdrs\t%0, %1, %2";
++	  else
++	    return "smds\t%0, %1, %2";
++	}
++    }
++
++  if (INTVAL (in0_idx0) != INTVAL (in0_idx1)
++      && INTVAL (in1_idx0) != INTVAL (in1_idx1))
++    {
++      if (INTVAL (in0_idx0) == 1)
++	{
++	  if (TARGET_BIG_ENDIAN)
++	    return "smxds\t%0, %2, %1";
++	  else
++	    return "smxds\t%0, %1, %2";
++	}
++      else
++	{
++	  if (TARGET_BIG_ENDIAN)
++	    return "smxds\t%0, %1, %2";
++	  else
++	    return "smxds\t%0, %2, %1";
++	}
++    }
++
++  gcc_unreachable ();
++  return "";
++}
++
++void
++nds32_split_sms (rtx out, rtx in0, rtx in1,
++		 rtx in0_idx0, rtx in1_idx0,
++		 rtx in0_idx1, rtx in1_idx1)
++{
++  rtx result0 = gen_reg_rtx (SImode);
++  rtx result1 = gen_reg_rtx (SImode);
++  emit_insn (gen_mulhisi3v (result0, in0, in1,
++			    in0_idx0, in1_idx0));
++  emit_insn (gen_mulhisi3v (result1, in0, in1,
++			    in0_idx1, in1_idx1));
++  emit_insn (gen_subsi3 (out, result0, result1));
++}
++
++/* Spilt a doubleword instrucion to two single word instructions.  */
++void
++nds32_spilt_doubleword (rtx *operands, bool load_p)
++{
++  int reg = load_p ? 0 : 1;
++  int mem = load_p ? 1 : 0;
++  rtx reg_rtx = load_p ? operands[0] : operands[1];
++  rtx mem_rtx = load_p ? operands[1] : operands[0];
++  rtx low_part[2], high_part[2];
++  rtx sub_mem = XEXP (mem_rtx, 0);
++
++  /* Generate low_part and high_part register pattern.
++     i.e. register pattern like:
++     (reg:DI) -> (subreg:SI (reg:DI))
++		 (subreg:SI (reg:DI)) */
++  low_part[reg] = simplify_gen_subreg (SImode, reg_rtx, GET_MODE (reg_rtx), 0);
++  high_part[reg] = simplify_gen_subreg (SImode, reg_rtx, GET_MODE (reg_rtx), 4);
++
++  /* Generate low_part and high_part memory pattern.
++     Memory format is (post_dec) will generate:
++       low_part:  lwi.bi reg, [mem], 4
++       high_part: lwi.bi reg, [mem], -12 */
++  if (GET_CODE (sub_mem) == POST_DEC)
++    {
++      /* memory format is (post_dec (reg)),
++	 so that extract (reg) from the (post_dec (reg)) pattern.  */
++      sub_mem = XEXP (sub_mem, 0);
++
++      /* generate low_part and high_part memory format:
++	   low_part:  (post_modify ((reg) (plus (reg) (const 4)))
++	   high_part: (post_modify ((reg) (plus (reg) (const -12))) */
++      low_part[mem] = gen_frame_mem (SImode,
++				     gen_rtx_POST_MODIFY (Pmode, sub_mem,
++							  gen_rtx_PLUS (Pmode,
++							  sub_mem,
++							  GEN_INT (4))));
++      high_part[mem] = gen_frame_mem (SImode,
++				      gen_rtx_POST_MODIFY (Pmode, sub_mem,
++							   gen_rtx_PLUS (Pmode,
++							   sub_mem,
++							   GEN_INT (-12))));
++    }
++  else if (GET_CODE (sub_mem) == POST_MODIFY)
++    {
++      /* Memory format is (post_modify (reg) (plus (reg) (const))),
++	 so that extract (reg) from the post_modify pattern.  */
++      rtx post_mem = XEXP (sub_mem, 0);
++
++      /* Extract (const) from the (post_modify (reg) (plus (reg) (const)))
++	 pattern.  */
++
++      rtx plus_op = XEXP (sub_mem, 1);
++      rtx post_val = XEXP (plus_op, 1);
++
++      /* Generate low_part and high_part memory format:
++	   low_part:  (post_modify ((reg) (plus (reg) (const)))
++	   high_part: ((plus (reg) (const 4))) */
++      low_part[mem] = gen_frame_mem (SImode,
++				     gen_rtx_POST_MODIFY (Pmode, post_mem,
++							  gen_rtx_PLUS (Pmode,
++							  post_mem,
++							  post_val)));
++      high_part[mem] = gen_frame_mem (SImode, plus_constant (Pmode,
++							     post_mem,
++							     4));
++    }
++  else
++    {
++      /* memory format: (symbol_ref), (const), (reg + const_int).  */
++      low_part[mem] = adjust_address (mem_rtx, SImode, 0);
++      high_part[mem] = adjust_address (mem_rtx, SImode, 4);
++    }
++
++  /* After reload completed, we have dependent issue by low part register and
++     higt part memory. i.e. we cannot split a sequence
++     like:
++	load $r0, [%r1]
++     spilt to
++	lw  $r0, [%r0]
++	lwi $r1, [%r0 + 4]
++     swap position
++	lwi $r1, [%r0 + 4]
++	lw  $r0, [%r0]
++     For store instruction we don't have a problem.
++
++     When memory format is [post_modify], we need to emit high part instruction,
++     before low part instruction.
++     expamle:
++       load $r0, [%r2], post_val
++     spilt to
++       load $r1, [%r2 + 4]
++       load $r0, [$r2], post_val.  */
++  if ((load_p && reg_overlap_mentioned_p (low_part[0], high_part[1]))
++      || GET_CODE (sub_mem) == POST_MODIFY)
++    {
++      operands[2] = high_part[0];
++      operands[3] = high_part[1];
++      operands[4] = low_part[0];
++      operands[5] = low_part[1];
++    }
++  else
++    {
++      operands[2] = low_part[0];
++      operands[3] = low_part[1];
++      operands[4] = high_part[0];
++      operands[5] = high_part[1];
++    }
++}
++
++void
++nds32_split_ashiftdi3 (rtx dst, rtx src, rtx shiftamount)
++{
++  rtx src_high_part, src_low_part;
++  rtx dst_high_part, dst_low_part;
++
++  dst_high_part = nds32_di_high_part_subreg (dst);
++  dst_low_part = nds32_di_low_part_subreg (dst);
++
++  src_high_part = nds32_di_high_part_subreg (src);
++  src_low_part = nds32_di_low_part_subreg (src);
++
++  /* We need to handle shift more than 32 bit!!!! */
++  if (CONST_INT_P (shiftamount))
++    {
++      if (INTVAL (shiftamount) < 32)
++	{
++	  rtx ext_start;
++	  ext_start = gen_int_mode(32 - INTVAL (shiftamount), SImode);
++
++	  emit_insn (gen_wext (dst_high_part, src, ext_start));
++	  emit_insn (gen_ashlsi3 (dst_low_part, src_low_part, shiftamount));
++	}
++      else
++	{
++	  rtx new_shift_amout = gen_int_mode(INTVAL (shiftamount) - 32, SImode);
++
++	  emit_insn (gen_ashlsi3 (dst_high_part, src_low_part,
++						 new_shift_amout));
++
++	  emit_move_insn (dst_low_part, GEN_INT (0));
++	}
++    }
++  else
++    {
++      rtx dst_low_part_l32, dst_high_part_l32;
++      rtx dst_low_part_g32, dst_high_part_g32;
++      rtx new_shift_amout, select_reg;
++      dst_low_part_l32 = gen_reg_rtx (SImode);
++      dst_high_part_l32 = gen_reg_rtx (SImode);
++      dst_low_part_g32 = gen_reg_rtx (SImode);
++      dst_high_part_g32 = gen_reg_rtx (SImode);
++      new_shift_amout = gen_reg_rtx (SImode);
++      select_reg = gen_reg_rtx (SImode);
++
++      rtx ext_start;
++      ext_start = gen_reg_rtx (SImode);
++
++      /*
++	 if (shiftamount < 32)
++	   dst_low_part = src_low_part << shiftamout
++	   dst_high_part = wext (src, 32 - shiftamount)
++	   # wext can't handle wext (src, 32) since it's only take rb[0:4]
++	   # for extract.
++	   dst_high_part = shiftamount == 0 ? src_high_part : dst_high_part
++	 else
++	   dst_low_part = 0
++	   dst_high_part = src_low_part << shiftamount & 0x1f
++      */
++
++      emit_insn (gen_subsi3 (ext_start,
++			     gen_int_mode (32, SImode),
++			     shiftamount));
++      emit_insn (gen_wext (dst_high_part_l32, src, ext_start));
++
++      /* Handle for shiftamout == 0.  */
++      emit_insn (gen_cmovzsi (dst_high_part_l32, shiftamount,
++			      src_high_part, dst_high_part_l32));
++
++      emit_insn (gen_ashlsi3 (dst_low_part_l32, src_low_part, shiftamount));
++
++      emit_move_insn (dst_low_part_g32, const0_rtx);
++      emit_insn (gen_andsi3 (new_shift_amout, shiftamount, GEN_INT (0x1f)));
++      emit_insn (gen_ashlsi3 (dst_high_part_g32, src_low_part,
++						 new_shift_amout));
++
++      emit_insn (gen_slt_compare (select_reg, shiftamount, GEN_INT (32)));
++
++      emit_insn (gen_cmovnsi (dst_low_part, select_reg,
++			      dst_low_part_l32, dst_low_part_g32));
++      emit_insn (gen_cmovnsi (dst_high_part, select_reg,
++			      dst_high_part_l32, dst_high_part_g32));
++    }
++}
++
++void
++nds32_split_ashiftrtdi3 (rtx dst, rtx src, rtx shiftamount)
++{
++  nds32_split_shiftrtdi3 (dst, src, shiftamount, false);
++}
++
++void
++nds32_split_lshiftrtdi3 (rtx dst, rtx src, rtx shiftamount)
++{
++  nds32_split_shiftrtdi3 (dst, src, shiftamount, true);
++}
++
++void
++nds32_split_rotatertdi3 (rtx dst, rtx src, rtx shiftamount)
++{
++  rtx dst_low_part_l32, dst_high_part_l32;
++  rtx dst_low_part_g32, dst_high_part_g32;
++  rtx select_reg, low5bit, low5bit_inv, minus32sa;
++  rtx dst_low_part_g32_tmph;
++  rtx dst_low_part_g32_tmpl;
++  rtx dst_high_part_l32_tmph;
++  rtx dst_high_part_l32_tmpl;
++
++  rtx src_low_part, src_high_part;
++  rtx dst_high_part, dst_low_part;
++
++  shiftamount = force_reg (SImode, shiftamount);
++
++  emit_insn (gen_andsi3 (shiftamount,
++			 shiftamount,
++			 gen_int_mode (0x3f, SImode)));
++
++  dst_high_part = nds32_di_high_part_subreg (dst);
++  dst_low_part = nds32_di_low_part_subreg (dst);
++
++  src_high_part = nds32_di_high_part_subreg (src);
++  src_low_part = nds32_di_low_part_subreg (src);
++
++  dst_low_part_l32 = gen_reg_rtx (SImode);
++  dst_high_part_l32 = gen_reg_rtx (SImode);
++  dst_low_part_g32 = gen_reg_rtx (SImode);
++  dst_high_part_g32 = gen_reg_rtx (SImode);
++  low5bit = gen_reg_rtx (SImode);
++  low5bit_inv = gen_reg_rtx (SImode);
++  minus32sa = gen_reg_rtx (SImode);
++  select_reg = gen_reg_rtx (SImode);
++
++  dst_low_part_g32_tmph = gen_reg_rtx (SImode);
++  dst_low_part_g32_tmpl = gen_reg_rtx (SImode);
++
++  dst_high_part_l32_tmph = gen_reg_rtx (SImode);
++  dst_high_part_l32_tmpl = gen_reg_rtx (SImode);
++
++  emit_insn (gen_slt_compare (select_reg, shiftamount, GEN_INT (32)));
++
++  /* if shiftamount < 32
++       dst_low_part = wext(src, shiftamount)
++     else
++       dst_low_part = ((src_high_part >> (shiftamount & 0x1f))
++		       | (src_low_part << (32 - (shiftamount & 0x1f))))
++  */
++  emit_insn (gen_andsi3 (low5bit, shiftamount, gen_int_mode (0x1f, SImode)));
++  emit_insn (gen_subsi3 (low5bit_inv, gen_int_mode (32, SImode), low5bit));
++
++  emit_insn (gen_wext (dst_low_part_l32, src, shiftamount));
++
++  emit_insn (gen_lshrsi3 (dst_low_part_g32_tmpl, src_high_part, low5bit));
++  emit_insn (gen_ashlsi3 (dst_low_part_g32_tmph, src_low_part, low5bit_inv));
++
++  emit_insn (gen_iorsi3 (dst_low_part_g32,
++			 dst_low_part_g32_tmpl,
++			 dst_low_part_g32_tmph));
++
++  emit_insn (gen_cmovnsi (dst_low_part, select_reg,
++			  dst_low_part_l32, dst_low_part_g32));
++
++  /* if shiftamount < 32
++       dst_high_part = ((src_high_part >> shiftamount)
++			| (src_low_part << (32 - shiftamount)))
++       dst_high_part = shiftamount == 0 ? src_high_part : dst_high_part
++     else
++       dst_high_part = wext(src, shiftamount & 0x1f)
++  */
++
++  emit_insn (gen_subsi3 (minus32sa, gen_int_mode (32, SImode), shiftamount));
++
++  emit_insn (gen_lshrsi3 (dst_high_part_l32_tmpl, src_high_part, shiftamount));
++  emit_insn (gen_ashlsi3 (dst_high_part_l32_tmph, src_low_part, minus32sa));
++
++  emit_insn (gen_iorsi3 (dst_high_part_l32,
++			 dst_high_part_l32_tmpl,
++			 dst_high_part_l32_tmph));
++
++  emit_insn (gen_cmovzsi (dst_high_part_l32, shiftamount,
++			  src_high_part, dst_high_part_l32));
++
++  emit_insn (gen_wext (dst_high_part_g32, src, low5bit));
++
++  emit_insn (gen_cmovnsi (dst_high_part, select_reg,
++			  dst_high_part_l32, dst_high_part_g32));
++}
++
++/* Return true if OP contains a symbol reference.  */
++bool
++symbolic_reference_mentioned_p (rtx op)
++{
++  const char *fmt;
++  int i;
+ 
+-  /* The v3push/v3pop instruction should only be applied on
+-     none-isr and none-variadic function.  */
+-  if (TARGET_V3PUSH
+-      && !nds32_isr_function_p (current_function_decl)
+-      && (cfun->machine->va_args_size == 0))
++  if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
++    return true;
++
++  fmt = GET_RTX_FORMAT (GET_CODE (op));
++  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
+     {
+-      /* For stack v3push:
+-           operands[0]: Re
+-           operands[1]: imm8u */
++      if (fmt[i] == 'E')
++	{
++	  int j;
+ 
+-      /* This variable is to check if 'push25 Re,imm8u' is available.  */
+-      int sp_adjust;
++	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
++	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
++	      return true;
++	}
+ 
+-      /* Set operands[0].  */
+-      operands[0] = gen_rtx_REG (SImode, re_callee_saved);
++      else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
++	return true;
++    }
+ 
+-      /* Check if we can generate 'push25 Re,imm8u',
+-         otherwise, generate 'push25 Re,0'.  */
+-      sp_adjust = cfun->machine->local_size
+-		  + cfun->machine->out_args_size
+-		  + cfun->machine->callee_saved_area_gpr_padding_bytes;
+-      if (satisfies_constraint_Iu08 (GEN_INT (sp_adjust))
+-	  && NDS32_DOUBLE_WORD_ALIGN_P (sp_adjust))
+-	operands[1] = GEN_INT (sp_adjust);
+-      else
+-	operands[1] = GEN_INT (0);
++  return false;
++}
+ 
+-      /* Create assembly code pattern.  */
+-      snprintf (pattern, sizeof (pattern), "push25\t%%0, %%1");
+-    }
+-  else
+-    {
+-      /* For normal stack push multiple:
+-         operands[0]: Rb
+-         operands[1]: Re
+-         operands[2]: En4 */
++/* Expand PIC code for @GOTOFF and @GOT.
+ 
+-      /* This variable is used to check if we only need to generate En4 field.
+-         As long as Rb==Re=SP_REGNUM, we set this variable to 1.  */
+-      int push_en4_only_p = 0;
++  Example for @GOTOFF:
+ 
+-      /* Set operands[0] and operands[1].  */
+-      operands[0] = gen_rtx_REG (SImode, rb_callee_saved);
+-      operands[1] = gen_rtx_REG (SImode, re_callee_saved);
++    la $r0, symbol@GOTOFF
++      -> sethi $ta, hi20(symbol@GOTOFF)
++	 ori $ta, $ta, lo12(symbol@GOTOFF)
++	 add $r0, $ta, $gp
+ 
+-      /* 'smw.adm $sp,[$sp],$sp,0' means push nothing.  */
+-      if (!cfun->machine->fp_size
+-	  && !cfun->machine->gp_size
+-	  && !cfun->machine->lp_size
+-	  && REGNO (operands[0]) == SP_REGNUM
+-	  && REGNO (operands[1]) == SP_REGNUM)
++  Example for @GOT:
++
++    la $r0, symbol@GOT
++      -> sethi $ta, hi20(symbol@GOT)
++	 ori $ta, $ta, lo12(symbol@GOT)
++	 lw  $r0, [$ta + $gp]
++*/
++rtx
++nds32_legitimize_pic_address (rtx x)
++{
++  rtx addr = x;
++  rtx reg = gen_reg_rtx (Pmode);
++  rtx pat;
++
++  if (GET_CODE (x) == LABEL_REF
++      || (GET_CODE (x) == SYMBOL_REF
++	  && (CONSTANT_POOL_ADDRESS_P (x)
++	      || SYMBOL_REF_LOCAL_P (x))))
++    {
++      addr = gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_GOTOFF);
++      addr = gen_rtx_CONST (SImode, addr);
++      emit_insn (gen_sethi (reg, addr));
++      emit_insn (gen_lo_sum (reg, reg, addr));
++      x = gen_rtx_PLUS (Pmode, reg, pic_offset_table_rtx);
++    }
++  else if (GET_CODE (x) == SYMBOL_REF)
++    {
++      addr = gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_GOT);
++      addr = gen_rtx_CONST (SImode, addr);
++      emit_insn (gen_sethi (reg, addr));
++      emit_insn (gen_lo_sum (reg, reg, addr));
++
++      x = gen_const_mem (SImode, gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
++					       reg));
++    }
++  else if (GET_CODE (x) == CONST)
++    {
++      /* We don't split constant in expand_pic_move because GOTOFF can combine
++	 the addend with the symbol.  */
++      addr = XEXP (x, 0);
++      gcc_assert (GET_CODE (addr) == PLUS);
++
++      rtx op0 = XEXP (addr, 0);
++      rtx op1 = XEXP (addr, 1);
++
++      if ((GET_CODE (op0) == LABEL_REF
++	   || (GET_CODE (op0) == SYMBOL_REF
++	       && (CONSTANT_POOL_ADDRESS_P (op0)
++		   || SYMBOL_REF_LOCAL_P (op0))))
++	  && GET_CODE (op1) == CONST_INT)
+ 	{
+-	  /* No need to generate instruction.  */
+-	  return "";
++	  pat = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), UNSPEC_GOTOFF);
++	  pat = gen_rtx_PLUS (Pmode, pat, op1);
++	  pat = gen_rtx_CONST (Pmode, pat);
++	  emit_insn (gen_sethi (reg, pat));
++	  emit_insn (gen_lo_sum (reg, reg, pat));
++	  x = gen_rtx_PLUS (Pmode, reg, pic_offset_table_rtx);
++	}
++      else if (GET_CODE (op0) == SYMBOL_REF
++	       && GET_CODE (op1) == CONST_INT)
++	{
++	  /* This is a constant offset from a @GOT symbol reference.  */
++	  addr = gen_rtx_UNSPEC (SImode, gen_rtvec (1, op0), UNSPEC_GOT);
++	  addr = gen_rtx_CONST (SImode, addr);
++	  emit_insn (gen_sethi (reg, addr));
++	  emit_insn (gen_lo_sum (reg, reg, addr));
++	  addr = gen_const_mem (SImode, gen_rtx_PLUS (Pmode,
++						      pic_offset_table_rtx,
++						      reg));
++	  emit_move_insn (reg, addr);
++	  if (satisfies_constraint_Is15 (op1))
++	    x = gen_rtx_PLUS (Pmode, reg, op1);
++	  else
++	    {
++	      rtx tmp_reg = gen_reg_rtx (SImode);
++	      emit_insn (gen_movsi (tmp_reg, op1));
++	      x = gen_rtx_PLUS (Pmode, reg, tmp_reg);
++	    }
+ 	}
+       else
+ 	{
+-	  /* If Rb==Re=SP_REGNUM, we only need to generate En4 field.  */
+-	  if (REGNO (operands[0]) == SP_REGNUM
+-	      && REGNO (operands[1]) == SP_REGNUM)
+-	    push_en4_only_p = 1;
+-
+-	  /* Create assembly code pattern.
+-	     We need to handle the form: "Rb, Re, { $fp $gp $lp }".  */
+-	  snprintf (pattern, sizeof (pattern),
+-		    "push.s\t%s{%s%s%s }",
+-		    push_en4_only_p ? "" : "%0, %1, ",
+-		    cfun->machine->fp_size ? " $fp" : "",
+-		    cfun->machine->gp_size ? " $gp" : "",
+-		    cfun->machine->lp_size ? " $lp" : "");
++	  /* Don't handle this pattern.  */
++	  debug_rtx (x);
++	  gcc_unreachable ();
+ 	}
+     }
++  return x;
++}
+ 
+-  /* We use output_asm_insn() to output assembly code by ourself.  */
+-  output_asm_insn (pattern, operands);
+-  return "";
++void
++nds32_expand_pic_move (rtx *operands)
++{
++  rtx src;
++
++  src = nds32_legitimize_pic_address (operands[1]);
++  emit_move_insn (operands[0], src);
+ }
+ 
+-/* Function to output stack pop operation.
+-   We need to deal with normal stack pop multiple or stack v3pop.  */
+-const char *
+-nds32_output_stack_pop (rtx par_rtx ATTRIBUTE_UNUSED)
++/* Expand ICT symbol.
++    Example for @ICT and ICT model=large:
++
++    la $r0, symbol@ICT
++      -> sethi $rt, hi20(symbol@ICT)
++	 lwi $r0, [$rt + lo12(symbol@ICT)]
++
++*/
++rtx
++nds32_legitimize_ict_address (rtx x)
+ {
+-  /* A string pattern for output_asm_insn().  */
+-  char pattern[100];
+-  /* The operands array which will be used in output_asm_insn().  */
+-  rtx operands[3];
+-  /* Pick up callee-saved first regno and last regno for further use.  */
+-  int rb_callee_saved = cfun->machine->callee_saved_first_gpr_regno;
+-  int re_callee_saved = cfun->machine->callee_saved_last_gpr_regno;
++  rtx symbol = x;
++  rtx addr = x;
++  rtx reg = gen_reg_rtx (Pmode);
++  gcc_assert (GET_CODE (x) == SYMBOL_REF
++	      && nds32_indirect_call_referenced_p (x));
+ 
+-  /* If we step here, we are going to do v3pop or multiple pop operation.  */
++  addr = gen_rtx_UNSPEC (SImode, gen_rtvec (1, symbol), UNSPEC_ICT);
++  addr = gen_rtx_CONST (SImode, addr);
++  emit_insn (gen_sethi (reg, addr));
+ 
+-  /* The v3push/v3pop instruction should only be applied on
+-     none-isr and none-variadic function.  */
+-  if (TARGET_V3PUSH
+-      && !nds32_isr_function_p (current_function_decl)
+-      && (cfun->machine->va_args_size == 0))
+-    {
+-      /* For stack v3pop:
+-           operands[0]: Re
+-           operands[1]: imm8u */
++  x = gen_const_mem (SImode, gen_rtx_LO_SUM (Pmode, reg, addr));
+ 
+-      /* This variable is to check if 'pop25 Re,imm8u' is available.  */
+-      int sp_adjust;
++  return x;
++}
+ 
+-      /* Set operands[0].  */
+-      operands[0] = gen_rtx_REG (SImode, re_callee_saved);
++void
++nds32_expand_ict_move (rtx *operands)
++{
++  rtx src = operands[1];
+ 
+-      /* Check if we can generate 'pop25 Re,imm8u',
+-         otherwise, generate 'pop25 Re,0'.
+-         We have to consider alloca issue as well.
+-         If the function does call alloca(), the stack pointer is not fixed.
+-         In that case, we cannot use 'pop25 Re,imm8u' directly.
+-         We have to caculate stack pointer from frame pointer
+-         and then use 'pop25 Re,0'.  */
+-      sp_adjust = cfun->machine->local_size
+-		  + cfun->machine->out_args_size
+-		  + cfun->machine->callee_saved_area_gpr_padding_bytes;
+-      if (satisfies_constraint_Iu08 (GEN_INT (sp_adjust))
+-	  && NDS32_DOUBLE_WORD_ALIGN_P (sp_adjust)
+-	  && !cfun->calls_alloca)
+-	operands[1] = GEN_INT (sp_adjust);
+-      else
+-	operands[1] = GEN_INT (0);
++  src = nds32_legitimize_ict_address (src);
+ 
+-      /* Create assembly code pattern.  */
+-      snprintf (pattern, sizeof (pattern), "pop25\t%%0, %%1");
++  emit_move_insn (operands[0], src);
++}
++
++/* Return true X is a indirect call symbol.  */
++bool
++nds32_indirect_call_referenced_p (rtx x)
++{
++  if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_ICT)
++    x = XVECEXP (x, 0, 0);
++
++  if (GET_CODE (x) == SYMBOL_REF)
++    {
++      tree decl = SYMBOL_REF_DECL (x);
++
++      return decl
++	     && (lookup_attribute("indirect_call",
++				  DECL_ATTRIBUTES(decl))
++		 != NULL);
+     }
++
++  return false;
++}
++
++/* Return true X is need use long call.  */
++bool
++nds32_long_call_p (rtx symbol)
++{
++  if (nds32_indirect_call_referenced_p (symbol))
++    return TARGET_ICT_MODEL_LARGE;
+   else
+-    {
+-      /* For normal stack pop multiple:
+-         operands[0]: Rb
+-         operands[1]: Re
+-         operands[2]: En4 */
++    return TARGET_CMODEL_LARGE;
++}
+ 
+-      /* This variable is used to check if we only need to generate En4 field.
+-         As long as Rb==Re=SP_REGNUM, we set this variable to 1.  */
+-      int pop_en4_only_p = 0;
++/* Return true if X contains a thread-local symbol.  */
++bool
++nds32_tls_referenced_p (rtx x)
++{
++  if (!targetm.have_tls)
++   return false;
+ 
+-      /* Set operands[0] and operands[1].  */
+-      operands[0] = gen_rtx_REG (SImode, rb_callee_saved);
+-      operands[1] = gen_rtx_REG (SImode, re_callee_saved);
++  if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
++    x = XEXP (XEXP (x, 0), 0);
+ 
+-      /* 'lmw.bim $sp,[$sp],$sp,0' means pop nothing.  */
+-      if (!cfun->machine->fp_size
+-	  && !cfun->machine->gp_size
+-	  && !cfun->machine->lp_size
+-	  && REGNO (operands[0]) == SP_REGNUM
+-	  && REGNO (operands[1]) == SP_REGNUM)
+-	{
+-	  /* No need to generate instruction.  */
+-	  return "";
+-	}
+-      else
+-	{
+-	  /* If Rb==Re=SP_REGNUM, we only need to generate En4 field.  */
+-	  if (REGNO (operands[0]) == SP_REGNUM
+-	      && REGNO (operands[1]) == SP_REGNUM)
+-	    pop_en4_only_p = 1;
++  if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
++    return true;
+ 
+-	  /* Create assembly code pattern.
+-	     We need to handle the form: "Rb, Re, { $fp $gp $lp }".  */
+-	  snprintf (pattern, sizeof (pattern),
+-		    "pop.s\t%s{%s%s%s }",
+-		    pop_en4_only_p ? "" : "%0, %1, ",
+-		    cfun->machine->fp_size ? " $fp" : "",
+-		    cfun->machine->gp_size ? " $gp" : "",
+-		    cfun->machine->lp_size ? " $lp" : "");
++  return false;
++}
++
++/* ADDR contains a thread-local SYMBOL_REF.  Generate code to compute
++   this (thread-local) address.  */
++rtx
++nds32_legitimize_tls_address (rtx x)
++{
++  rtx tmp_reg;
++  rtx tp_reg = gen_rtx_REG (Pmode, TP_REGNUM);
++  rtx pat, insns, reg0;
++
++  if (GET_CODE (x) == SYMBOL_REF)
++    switch (SYMBOL_REF_TLS_MODEL (x))
++      {
++      case TLS_MODEL_GLOBAL_DYNAMIC:
++      case TLS_MODEL_LOCAL_DYNAMIC:
++	/* Emit UNSPEC_TLS_DESC rather than expand rtl directly because spill
++	   may destroy the define-use chain anylysis to insert relax_hint.  */
++	if (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC)
++	  pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_TLSGD);
++	else
++	  pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_TLSLD);
++
++	pat = gen_rtx_CONST (SImode, pat);
++	reg0 = gen_rtx_REG (Pmode, 0);
++	/* If we can confirm all clobber reigsters, it doesn't have to use call
++	   instruction.  */
++	insns = emit_call_insn (gen_tls_desc (pat, GEN_INT (0)));
++	use_reg (&CALL_INSN_FUNCTION_USAGE (insns), pic_offset_table_rtx);
++	RTL_CONST_CALL_P (insns) = 1;
++	tmp_reg = gen_reg_rtx (SImode);
++	emit_move_insn (tmp_reg, reg0);
++	x = tmp_reg;
++	break;
++
++      case TLS_MODEL_INITIAL_EXEC:
++	pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_TLSIE);
++	tmp_reg  = gen_reg_rtx (SImode);
++	pat = gen_rtx_CONST (SImode, pat);
++	emit_insn (gen_tls_ie (tmp_reg, pat, GEN_INT (0)));
++	if (flag_pic)
++	  emit_use (pic_offset_table_rtx);
++	x = gen_rtx_PLUS (Pmode, tmp_reg, tp_reg);
++	break;
++
++      case TLS_MODEL_LOCAL_EXEC:
++	/* Expand symbol_ref@TPOFF':
++	     sethi $ta, hi20(symbol_ref@TPOFF)
++	     ori   $ta, $ta, lo12(symbol_ref@TPOFF)
++	     add   $r0, $ta, $tp */
++	tmp_reg  = gen_reg_rtx (SImode);
++	pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_TLSLE);
++	pat = gen_rtx_CONST (SImode, pat);
++	emit_insn (gen_sethi (tmp_reg, pat));
++	emit_insn (gen_lo_sum (tmp_reg, tmp_reg, pat));
++	x = gen_rtx_PLUS (Pmode, tmp_reg, tp_reg);
++	break;
++
++      default:
++	gcc_unreachable ();
++      }
++  else if (GET_CODE (x) == CONST)
++    {
++      rtx base, addend;
++      split_const (x, &base, &addend);
++
++      if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_LOCAL_EXEC)
++	{
++	  /* Expand symbol_ref@TPOFF':
++	     sethi $ta, hi20(symbol_ref@TPOFF + addend)
++	     ori   $ta, $ta, lo12(symbol_ref@TPOFF + addend)
++	     add   $r0, $ta, $tp */
++	  tmp_reg  = gen_reg_rtx (SImode);
++	  pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, base), UNSPEC_TLSLE);
++	  pat = gen_rtx_PLUS (SImode, pat, addend);
++	  pat = gen_rtx_CONST (SImode, pat);
++	  emit_insn (gen_sethi (tmp_reg, pat));
++	  emit_insn (gen_lo_sum (tmp_reg, tmp_reg, pat));
++	  x = gen_rtx_PLUS (Pmode, tmp_reg, tp_reg);
+ 	}
+     }
+ 
+-  /* We use output_asm_insn() to output assembly code by ourself.  */
+-  output_asm_insn (pattern, operands);
+-  return "";
++  return x;
+ }
+ 
+-/* Function to generate PC relative jump table.
+-   Refer to nds32.md for more details.
++void
++nds32_expand_tls_move (rtx *operands)
++{
++  rtx src = operands[1];
++  rtx base, addend;
+ 
+-   The following is the sample for the case that diff value
+-   can be presented in '.short' size.
++  if (CONSTANT_P (src))
++    split_const (src, &base, &addend);
+ 
+-     addi    $r1, $r1, -(case_lower_bound)
+-     slti    $ta, $r1, (case_number)
+-     beqz    $ta, .L_skip_label
++  if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_LOCAL_EXEC)
++    src = nds32_legitimize_tls_address (src);
++  else
++    {
++      src = nds32_legitimize_tls_address (base);
++      if (addend != const0_rtx)
++	{
++	  src = gen_rtx_PLUS (SImode, src, addend);
++	  src = force_operand (src, operands[0]);
++	}
++    }
+ 
+-     la      $ta, .L35             ! get jump table address
+-     lh      $r1, [$ta + $r1 << 1] ! load symbol diff from jump table entry
+-     addi    $ta, $r1, $ta
+-     jr5     $ta
++  emit_move_insn (operands[0], src);
++}
+ 
+-     ! jump table entry
+-   L35:
+-     .short  .L25-.L35
+-     .short  .L26-.L35
+-     .short  .L27-.L35
+-     .short  .L28-.L35
+-     .short  .L29-.L35
+-     .short  .L30-.L35
+-     .short  .L31-.L35
+-     .short  .L32-.L35
+-     .short  .L33-.L35
+-     .short  .L34-.L35 */
+-const char *
+-nds32_output_casesi_pc_relative (rtx *operands)
++void
++nds32_expand_constant (enum machine_mode mode, HOST_WIDE_INT val,
++		       rtx target, rtx source)
+ {
+-  machine_mode mode;
+-  rtx diff_vec;
++  rtx temp = gen_reg_rtx (mode);
++  int clear_sign_bit_copies = 0;
++  int clear_zero_bit_copies = 0;
++  unsigned HOST_WIDE_INT remainder = val & 0xffffffffUL;
++
++  /* Count number of leading zeros.  */
++  clear_sign_bit_copies =  __builtin_clz (remainder);
++  /* Count number of trailing zeros.  */
++  clear_zero_bit_copies = __builtin_ctz (remainder);
++
++  HOST_WIDE_INT sign_shift_mask = ((0xffffffffUL
++				    << (32 - clear_sign_bit_copies))
++				   & 0xffffffffUL);
++  HOST_WIDE_INT zero_shift_mask = (1 << clear_zero_bit_copies) - 1;
++
++  if (clear_sign_bit_copies > 0 && clear_sign_bit_copies < 17
++      && (remainder | sign_shift_mask) == 0xffffffffUL)
++    {
++      /* Transfer AND to two shifts, example:
++	 a = b & 0x7fffffff => (b << 1) >> 1 */
++      rtx shift = GEN_INT (clear_sign_bit_copies);
+ 
+-  diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[1])));
++      emit_insn (gen_ashlsi3 (temp, source, shift));
++      emit_insn (gen_lshrsi3 (target, temp, shift));
++    }
++  else if (clear_zero_bit_copies > 0 && clear_sign_bit_copies < 17
++	   && (remainder | zero_shift_mask) == 0xffffffffUL)
++    {
++      /* Transfer AND to two shifts, example:
++	 a = b & 0xfff00000 => (b >> 20) << 20 */
++      rtx shift = GEN_INT (clear_zero_bit_copies);
+ 
+-  gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
++      emit_insn (gen_lshrsi3 (temp, source, shift));
++      emit_insn (gen_ashlsi3 (target, temp, shift));
++    }
++  else
++    {
++      emit_move_insn (temp, GEN_INT (val));
++      emit_move_insn (target, gen_rtx_fmt_ee (AND, mode, source, temp));
++    }
++}
+ 
+-  /* Step C: "t <-- operands[1]".  */
+-  output_asm_insn ("la\t$ta, %l1", operands);
++/* Auxiliary functions for lwm/smw.  */
++bool
++nds32_valid_smw_lwm_base_p (rtx op)
++{
++  rtx base_addr;
+ 
+-  /* Get the mode of each element in the difference vector.  */
+-  mode = GET_MODE (diff_vec);
++  if (!MEM_P (op))
++    return false;
+ 
+-  /* Step D: "z <-- (mem (plus (operands[0] << m) t))",
+-     where m is 0, 1, or 2 to load address-diff value from table.  */
+-  switch (mode)
++  base_addr = XEXP (op, 0);
++
++  if (REG_P (base_addr))
++    return true;
++  else
+     {
+-    case QImode:
+-      output_asm_insn ("lb\t%2, [$ta + %0 << 0]", operands);
+-      break;
+-    case HImode:
+-      output_asm_insn ("lh\t%2, [$ta + %0 << 1]", operands);
+-      break;
+-    case SImode:
+-      output_asm_insn ("lw\t%2, [$ta + %0 << 2]", operands);
+-      break;
+-    default:
+-      gcc_unreachable ();
++      if (GET_CODE (base_addr) == POST_INC
++	  && REG_P (XEXP (base_addr, 0)))
++	return true;
+     }
+ 
+-  /* Step E: "t <-- z + t".
+-     Add table label_ref with address-diff value to
+-     obtain target case address.  */
+-  output_asm_insn ("add\t$ta, %2, $ta", operands);
++  return false;
++}
+ 
+-  /* Step F: jump to target with register t.  */
+-  if (TARGET_16_BIT)
+-    return "jr5\t$ta";
+-  else
+-    return "jr\t$ta";
++/* Auxiliary functions for manipulation DI mode.  */
++rtx nds32_di_high_part_subreg(rtx reg)
++{
++  unsigned high_part_offset = subreg_highpart_offset (SImode, DImode);
++
++  return simplify_gen_subreg (
++	   SImode, reg,
++	   DImode, high_part_offset);
+ }
+ 
+-/* Function to generate normal jump table.  */
+-const char *
+-nds32_output_casesi (rtx *operands)
++rtx nds32_di_low_part_subreg(rtx reg)
+ {
+-  /* Step C: "t <-- operands[1]".  */
+-  output_asm_insn ("la\t$ta, %l1", operands);
++  unsigned low_part_offset = subreg_lowpart_offset (SImode, DImode);
+ 
+-  /* Step D: "z <-- (mem (plus (operands[0] << 2) t))".  */
+-  output_asm_insn ("lw\t%2, [$ta + %0 << 2]", operands);
++  return simplify_gen_subreg (
++	   SImode, reg,
++	   DImode, low_part_offset);
++}
+ 
+-  /* No need to perform Step E, which is only used for
+-     pc relative jump table.  */
++/* ------------------------------------------------------------------------ */
+ 
+-  /* Step F: jump to target with register z.  */
+-  if (TARGET_16_BIT)
+-    return "jr5\t%2";
++/* Auxiliary function for output TLS patterns.  */
++
++const char *
++nds32_output_tls_desc (rtx *operands)
++{
++  char pattern[1000];
++
++  if (TARGET_RELAX_HINT)
++    snprintf (pattern, sizeof (pattern),
++	      ".relax_hint %%1\n\tsethi $r0, hi20(%%0)\n\t"
++	      ".relax_hint %%1\n\tori $r0, $r0, lo12(%%0)\n\t"
++	      ".relax_hint %%1\n\tlw $r15, [$r0 + $gp]\n\t"
++	      ".relax_hint %%1\n\tadd $r0, $r0, $gp\n\t"
++	      ".relax_hint %%1\n\tjral $r15");
+   else
+-    return "jr\t%2";
++    snprintf (pattern, sizeof (pattern),
++	      "sethi $r0, hi20(%%0)\n\t"
++	      "ori $r0, $r0, lo12(%%0)\n\t"
++	      "lw $r15, [$r0 + $gp]\n\t"
++	      "add $r0, $r0, $gp\n\t"
++	      "jral $r15");
++  output_asm_insn (pattern, operands);
++  return "";
+ }
+ 
+-/* ------------------------------------------------------------------------ */
++const char *
++nds32_output_tls_ie (rtx *operands)
++{
++  char pattern[1000];
++
++  if (flag_pic)
++  {
++      if (TARGET_RELAX_HINT)
++	snprintf (pattern, sizeof (pattern),
++		  ".relax_hint %%2\n\tsethi %%0, hi20(%%1)\n\t"
++		  ".relax_hint %%2\n\tori %%0, %%0, lo12(%%1)\n\t"
++		  ".relax_hint %%2\n\tlw %%0, [%%0 + $gp]");
++      else
++	snprintf (pattern, sizeof (pattern),
++		  "sethi %%0, hi20(%%1)\n\t"
++		  "ori %%0, %%0, lo12(%%1)\n\t"
++		  "lw %%0, [%%0 + $gp]");
++  }
++  else
++    {
++      if (TARGET_RELAX_HINT)
++	snprintf (pattern, sizeof (pattern),
++		  ".relax_hint %%2\n\tsethi %%0, hi20(%%1)\n\t"
++		  ".relax_hint %%2\n\tlwi %%0, [%%0 + lo12(%%1)]");
++      else
++	snprintf (pattern, sizeof (pattern),
++		  "sethi %%0, hi20(%%1)\n\t"
++		  "lwi %%0, [%%0 + lo12(%%1)]");
++    }
++  output_asm_insn (pattern, operands);
++  return "";
++}
+diff --git a/gcc/config/nds32/nds32-memory-manipulation.c b/gcc/config/nds32/nds32-memory-manipulation.c
+index 4c26dcc..c46ac8f 100644
+--- a/gcc/config/nds32/nds32-memory-manipulation.c
++++ b/gcc/config/nds32/nds32-memory-manipulation.c
+@@ -25,28 +25,1255 @@
+ #include "system.h"
+ #include "coretypes.h"
+ #include "backend.h"
+-#include "target.h"
++#include "tree.h"
+ #include "rtl.h"
+-#include "emit-rtl.h"
++#include "df.h"
++#include "alias.h"
++#include "stor-layout.h"
++#include "varasm.h"
++#include "calls.h"
++#include "regs.h"
++#include "insn-config.h"	/* Required by recog.h.  */
++#include "conditions.h"
++#include "output.h"
++#include "insn-attr.h"		/* For DFA state_t.  */
++#include "insn-codes.h"		/* For CODE_FOR_xxx.  */
++#include "reload.h"		/* For push_reload().  */
++#include "flags.h"
++#include "insn-config.h"
++#include "expmed.h"
++#include "dojump.h"
+ #include "explow.h"
++#include "emit-rtl.h"
++#include "stmt.h"
++#include "expr.h"
++#include "recog.h"
++#include "diagnostic-core.h"
++#include "cfgrtl.h"
++#include "cfganal.h"
++#include "lcm.h"
++#include "cfgbuild.h"
++#include "cfgcleanup.h"
++#include "tm_p.h"
++#include "tm-constrs.h"
++#include "optabs.h"		/* For GEN_FCN.  */
++#include "target.h"
++#include "langhooks.h"		/* For add_builtin_function().  */
++#include "builtins.h"
++
++/* ------------------------------------------------------------------------ */
++
++/* This file is divided into six parts:
++
++     PART 1: Auxiliary static function definitions.
++
++     PART 2: Auxiliary function for expand movmem pattern.
++
++     PART 3: Auxiliary function for expand setmem pattern.
++
++     PART 4: Auxiliary function for expand movstr pattern.
++
++     PART 5: Auxiliary function for expand strlen pattern.
++
++     PART 6: Auxiliary function for expand load_multiple/store_multiple
++	     pattern.  */
++
++/* ------------------------------------------------------------------------ */
++
++/* PART 1: Auxiliary static function definitions.  */
++
++static void
++nds32_emit_load_store (rtx reg, rtx mem,
++		       enum machine_mode mode,
++		       int offset, bool load_p)
++{
++  rtx new_mem;
++  new_mem = adjust_address (mem, mode, offset);
++  if (load_p)
++    emit_move_insn (reg, new_mem);
++  else
++    emit_move_insn (new_mem, reg);
++}
++
++static void
++nds32_emit_post_inc_load_store (rtx reg, rtx base_reg,
++				enum machine_mode mode,
++				bool load_p)
++{
++  gcc_assert (GET_MODE (reg) == mode);
++  gcc_assert (GET_MODE (base_reg) == Pmode);
++
++  /* Do not gen (set (reg) (mem (post_inc (reg)))) directly here since it may
++     not recognize by gcc, so let gcc combine it at auto_inc_dec pass.  */
++  if (load_p)
++    emit_move_insn (reg,
++		    gen_rtx_MEM (mode,
++				 base_reg));
++  else
++    emit_move_insn (gen_rtx_MEM (mode,
++				 base_reg),
++		    reg);
++
++  emit_move_insn (base_reg,
++		  plus_constant(Pmode, base_reg, GET_MODE_SIZE (mode)));
++}
++
++static void
++nds32_emit_mem_move (rtx src, rtx dst,
++		     enum machine_mode mode,
++		     int addr_offset)
++{
++  gcc_assert (MEM_P (src) && MEM_P (dst));
++  rtx tmp_reg = gen_reg_rtx (mode);
++  nds32_emit_load_store (tmp_reg, src, mode,
++			 addr_offset, /* load_p */ true);
++  nds32_emit_load_store (tmp_reg, dst, mode,
++			 addr_offset, /* load_p */ false);
++}
++
++static void
++nds32_emit_mem_move_block (int base_regno, int count,
++			   rtx *dst_base_reg, rtx *dst_mem,
++			   rtx *src_base_reg, rtx *src_mem,
++			   bool update_base_reg_p)
++{
++  rtx new_base_reg;
++
++  emit_insn (nds32_expand_load_multiple (base_regno, count,
++					 *src_base_reg, *src_mem,
++					 update_base_reg_p, &new_base_reg));
++  if (update_base_reg_p)
++    {
++      *src_base_reg = new_base_reg;
++      *src_mem = gen_rtx_MEM (SImode, *src_base_reg);
++    }
++
++  emit_insn (nds32_expand_store_multiple (base_regno, count,
++					  *dst_base_reg, *dst_mem,
++					  update_base_reg_p, &new_base_reg));
++
++  if (update_base_reg_p)
++    {
++      *dst_base_reg = new_base_reg;
++      *dst_mem = gen_rtx_MEM (SImode, *dst_base_reg);
++    }
++}
++
++/* ------------------------------------------------------------------------ */
++
++/* PART 2: Auxiliary function for expand movmem pattern.  */
++
++static bool
++nds32_expand_movmemsi_loop_unknown_size (rtx dstmem, rtx srcmem,
++					 rtx size,
++					 rtx alignment, bool use_zol_p)
++{
++  /* Emit loop version of movmem.
++
++       andi    $size_least_3_bit, $size, #~7
++       add     $dst_end, $dst, $size
++       move    $dst_itr, $dst
++       move    $src_itr, $src
++       beqz    $size_least_3_bit, .Lbyte_mode_entry ! Not large enough.
++       add     $double_word_end, $dst, $size_least_3_bit
++
++     .Ldouble_word_mode_loop:
++       lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr
++       smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr
++       ! move will delete after register allocation
++       move    $src_itr, $src_itr'
++       move    $dst_itr, $dst_itr'
++       ! Not readch upper bound. Loop.
++       bne     $double_word_end, $dst_itr, .Ldouble_word_mode_loop
++
++     .Lbyte_mode_entry:
++       beq     $dst_itr, $dst_end, .Lend_label
++     .Lbyte_mode_loop:
++       lbi.bi  $tmp, [$src_itr], #1
++       sbi.bi  $tmp, [$dst_itr], #1
++       ! Not readch upper bound. Loop.
++       bne     $dst_itr, $dst_end, .Lbyte_mode_loop
++     .Lend_label:
++  */
++  rtx dst_base_reg, src_base_reg;
++  rtx dst_itr, src_itr;
++  rtx dstmem_m, srcmem_m, dst_itr_m, src_itr_m;
++  rtx dst_end;
++  rtx size_least_3_bit;
++  rtx double_word_end = NULL;
++  rtx double_word_mode_loop, byte_mode_entry, byte_mode_loop, end_label;
++  rtx tmp;
++  rtx mask_least_3_bit;
++  int start_regno;
++  bool align_to_4_bytes = (INTVAL (alignment) & 3) == 0;
++  int hwloop_id = cfun->machine->hwloop_group_id;
++
++  if (TARGET_ISA_V3M && !align_to_4_bytes)
++    return 0;
++
++  if (TARGET_REDUCED_REGS)
++    start_regno = 2;
++  else
++    start_regno = 16;
++
++  dst_itr = gen_reg_rtx (Pmode);
++  src_itr = gen_reg_rtx (Pmode);
++  dst_end = gen_reg_rtx (Pmode);
++  tmp = gen_reg_rtx (QImode);
++  mask_least_3_bit = GEN_INT (~7);
++
++  double_word_mode_loop = gen_label_rtx ();
++  byte_mode_entry = gen_label_rtx ();
++  byte_mode_loop = gen_label_rtx ();
++  end_label = gen_label_rtx ();
++
++  dst_base_reg = copy_to_mode_reg (Pmode, XEXP (dstmem, 0));
++  src_base_reg = copy_to_mode_reg (Pmode, XEXP (srcmem, 0));
++  /* andi   $size_least_3_bit, $size, #~7 */
++  size_least_3_bit = expand_binop (SImode, and_optab, size, mask_least_3_bit,
++				   NULL_RTX, 0, OPTAB_WIDEN);
++  /* add     $dst_end, $dst, $size */
++  dst_end = expand_binop (Pmode, add_optab, dst_base_reg, size,
++			  NULL_RTX, 0, OPTAB_WIDEN);
++
++  /* move    $dst_itr, $dst
++     move    $src_itr, $src */
++  emit_move_insn (dst_itr, dst_base_reg);
++  emit_move_insn (src_itr, src_base_reg);
++
++  /* beqz    $size_least_3_bit, .Lbyte_mode_entry ! Not large enough. */
++  emit_cmp_and_jump_insns (size_least_3_bit, const0_rtx, EQ, NULL,
++			   SImode, 1, byte_mode_entry);
++  if (TARGET_HWLOOP && use_zol_p)
++    {
++      rtx start_label = gen_rtx_LABEL_REF (Pmode, double_word_mode_loop);
++      /* We use multiple-load/store instruction once to process 8-bytes,
++	 division 8-bytes for one cycle, generate
++	 srli $size_least_3_bit, size_least_3_bit, 3.  */
++      emit_insn (gen_lshrsi3 (size_least_3_bit, size_least_3_bit, GEN_INT (3)));
++      /* mtlbi .Ldouble_word_mode_loop */
++      emit_insn (gen_mtlbi_hint (start_label, GEN_INT (hwloop_id)));
++      emit_insn (gen_init_lc (size_least_3_bit, GEN_INT (hwloop_id)));
++      emit_insn (gen_no_hwloop ());
++    }
++  else
++    {
++      /* add     $double_word_end, $dst, $size_least_3_bit */
++      double_word_end = expand_binop (Pmode, add_optab,
++				      dst_base_reg, size_least_3_bit,
++				      NULL_RTX, 0, OPTAB_WIDEN);
++    }
++
++  /* .Ldouble_word_mode_loop: */
++  emit_label (double_word_mode_loop);
++  /* lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr
++     smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr */
++  src_itr_m = src_itr;
++  dst_itr_m = dst_itr;
++  srcmem_m = srcmem;
++  dstmem_m = dstmem;
++  nds32_emit_mem_move_block (start_regno, 2,
++			     &dst_itr_m, &dstmem_m,
++			     &src_itr_m, &srcmem_m,
++			     true);
++  /* move    $src_itr, $src_itr'
++     move    $dst_itr, $dst_itr' */
++  emit_move_insn (dst_itr, dst_itr_m);
++  emit_move_insn (src_itr, src_itr_m);
++
++  if (TARGET_HWLOOP && use_zol_p)
++    {
++      rtx start_label = gen_rtx_LABEL_REF (Pmode, double_word_mode_loop);
++      /* Hwloop pseduo instrtion to handle CFG.  */
++      rtx cfg_insn = emit_jump_insn (gen_hwloop_cfg (GEN_INT (hwloop_id),
++				     start_label));
++      JUMP_LABEL (cfg_insn) = double_word_mode_loop;
++      cfun->machine->hwloop_group_id++;
++    }
++  else
++    {
++      /* ! Not readch upper bound. Loop.
++	 bne     $double_word_end, $dst_itr, .Ldouble_word_mode_loop */
++      emit_cmp_and_jump_insns (double_word_end, dst_itr, NE, NULL,
++			       Pmode, 1, double_word_mode_loop);
++    }
++
++  /* .Lbyte_mode_entry: */
++  emit_label (byte_mode_entry);
++
++  /* beq     $dst_itr, $dst_end, .Lend_label */
++  emit_cmp_and_jump_insns (dst_itr, dst_end, EQ, NULL,
++			   Pmode, 1, end_label);
++  /* .Lbyte_mode_loop: */
++  emit_label (byte_mode_loop);
++
++  emit_insn (gen_no_hwloop ());
++  /* lbi.bi  $tmp, [$src_itr], #1 */
++  nds32_emit_post_inc_load_store (tmp, src_itr, QImode, true);
++
++  /* sbi.bi  $tmp, [$dst_itr], #1 */
++  nds32_emit_post_inc_load_store (tmp, dst_itr, QImode, false);
++  /* ! Not readch upper bound. Loop.
++     bne     $dst_itr, $dst_end, .Lbyte_mode_loop */
++  emit_cmp_and_jump_insns (dst_itr, dst_end, NE, NULL,
++			   SImode, 1, byte_mode_loop);
++
++  /* .Lend_label: */
++  emit_label (end_label);
++
++  return true;
++}
++
++static bool
++nds32_expand_movmemsi_loop_known_size (rtx dstmem, rtx srcmem,
++				       rtx size, rtx alignment)
++{
++  rtx dst_base_reg, src_base_reg;
++  rtx dst_itr, src_itr;
++  rtx dstmem_m, srcmem_m, dst_itr_m, src_itr_m;
++  rtx dst_end;
++  rtx double_word_mode_loop, byte_mode_loop;
++  rtx tmp;
++  int start_regno;
++  bool align_to_4_bytes = (INTVAL (alignment) & 3) == 0;
++  int hwloop_id = cfun->machine->hwloop_group_id;
++  unsigned HOST_WIDE_INT total_bytes = UINTVAL (size);
++
++  if (TARGET_ISA_V3M && !align_to_4_bytes)
++    return 0;
++
++  if (TARGET_REDUCED_REGS)
++    start_regno = 2;
++  else
++    start_regno = 16;
++
++  dst_itr = gen_reg_rtx (Pmode);
++  src_itr = gen_reg_rtx (Pmode);
++  dst_end = gen_reg_rtx (Pmode);
++  tmp = gen_reg_rtx (QImode);
++
++  double_word_mode_loop = gen_label_rtx ();
++  byte_mode_loop = gen_label_rtx ();
++
++  dst_base_reg = copy_to_mode_reg (Pmode, XEXP (dstmem, 0));
++  src_base_reg = copy_to_mode_reg (Pmode, XEXP (srcmem, 0));
++
++  if (total_bytes < 8)
++    {
++      /* Emit total_bytes less than 8 loop version of movmem.
++	add     $dst_end, $dst, $size
++	move    $dst_itr, $dst
++	.Lbyte_mode_loop:
++	lbi.bi  $tmp, [$src_itr], #1
++	sbi.bi  $tmp, [$dst_itr], #1
++	! Not readch upper bound. Loop.
++	bne     $dst_itr, $dst_end, .Lbyte_mode_loop */
++
++      /* add     $dst_end, $dst, $size */
++      dst_end = expand_binop (Pmode, add_optab, dst_base_reg, size,
++			      NULL_RTX, 0, OPTAB_WIDEN);
++      /* move    $dst_itr, $dst
++	 move    $src_itr, $src */
++      emit_move_insn (dst_itr, dst_base_reg);
++      emit_move_insn (src_itr, src_base_reg);
++
++      /* .Lbyte_mode_loop: */
++      emit_label (byte_mode_loop);
++
++      emit_insn (gen_no_hwloop ());
++      /* lbi.bi  $tmp, [$src_itr], #1 */
++      nds32_emit_post_inc_load_store (tmp, src_itr, QImode, true);
++
++      /* sbi.bi  $tmp, [$dst_itr], #1 */
++      nds32_emit_post_inc_load_store (tmp, dst_itr, QImode, false);
++      /* ! Not readch upper bound. Loop.
++	 bne     $dst_itr, $dst_end, .Lbyte_mode_loop */
++      emit_cmp_and_jump_insns (dst_itr, dst_end, NE, NULL,
++			       SImode, 1, byte_mode_loop);
++      return true;
++    }
++  else if (total_bytes % 8 == 0)
++    {
++      /* Emit multiple of 8 loop version of movmem.
++
++	 add     $dst_end, $dst, $size
++	 move    $dst_itr, $dst
++	 move    $src_itr, $src
++
++	.Ldouble_word_mode_loop:
++	lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr
++	smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr
++	! move will delete after register allocation
++	move    $src_itr, $src_itr'
++	move    $dst_itr, $dst_itr'
++	! Not readch upper bound. Loop.
++	bne     $double_word_end, $dst_itr, .Ldouble_word_mode_loop */
++
++      if (TARGET_HWLOOP)
++	{
++	  rtx start_label = gen_rtx_LABEL_REF (Pmode, double_word_mode_loop);
++
++	  rtx loop_count_reg = gen_reg_rtx (Pmode);
++	  /* movi $loop_count_reg, total_bytes / 8 */
++	  emit_move_insn (loop_count_reg, GEN_INT (total_bytes / 8));
++	  /* mtlbi .Ldouble_word_mode_loop */
++	  emit_insn (gen_mtlbi_hint (start_label, GEN_INT (hwloop_id)));
++	  /* mtusr  $loop_count_reg, LC */
++	  emit_insn (gen_init_lc (loop_count_reg, GEN_INT (hwloop_id)));
++	  emit_insn (gen_no_hwloop ());
++	}
++      else
++	{
++	  /* add     $dst_end, $dst, $size */
++	  dst_end = expand_binop (Pmode, add_optab, dst_base_reg, size,
++				  NULL_RTX, 0, OPTAB_WIDEN);
++	}
++
++      /* move    $dst_itr, $dst
++	 move    $src_itr, $src */
++      emit_move_insn (dst_itr, dst_base_reg);
++      emit_move_insn (src_itr, src_base_reg);
++
++      /* .Ldouble_word_mode_loop: */
++      emit_label (double_word_mode_loop);
++      /* lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr
++	 smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr */
++      src_itr_m = src_itr;
++      dst_itr_m = dst_itr;
++      srcmem_m = srcmem;
++      dstmem_m = dstmem;
++      nds32_emit_mem_move_block (start_regno, 2,
++				 &dst_itr_m, &dstmem_m,
++				 &src_itr_m, &srcmem_m,
++				 true);
++      /* move    $src_itr, $src_itr'
++	 move    $dst_itr, $dst_itr' */
++      emit_move_insn (dst_itr, dst_itr_m);
++      emit_move_insn (src_itr, src_itr_m);
++
++      if (TARGET_HWLOOP)
++	{
++	  rtx start_label = gen_rtx_LABEL_REF (Pmode, double_word_mode_loop);
++	  /* Hwloop pseduo instrtion to handle CFG.  */
++	  rtx cfg_insn = emit_jump_insn (gen_hwloop_cfg (GEN_INT (hwloop_id),
++					 start_label));
++	  JUMP_LABEL (cfg_insn) = double_word_mode_loop;
++	  cfun->machine->hwloop_group_id++;
++	}
++      else
++	{
++	  /* ! Not readch upper bound. Loop.
++	     bne     $double_word_end, $dst_itr, .Ldouble_word_mode_loop */
++	  emit_cmp_and_jump_insns (dst_end, dst_itr, NE, NULL,
++				   Pmode, 1, double_word_mode_loop);
++	}
++    }
++  else
++    {
++      /* Handle size greater than 8, and not a multiple of 8.  */
++      return nds32_expand_movmemsi_loop_unknown_size (dstmem, srcmem,
++						      size, alignment,
++						      true);
++    }
++
++  return true;
++}
++
++static bool
++nds32_expand_movmemsi_loop (rtx dstmem, rtx srcmem,
++			    rtx size, rtx alignment)
++{
++  if (CONST_INT_P (size))
++    return nds32_expand_movmemsi_loop_known_size (dstmem, srcmem,
++						  size, alignment);
++  else
++    return nds32_expand_movmemsi_loop_unknown_size (dstmem, srcmem,
++						    size, alignment, false);
++}
++
++static bool
++nds32_expand_movmemsi_unroll (rtx dstmem, rtx srcmem,
++			      rtx total_bytes, rtx alignment)
++{
++  rtx dst_base_reg, src_base_reg;
++  rtx tmp_reg;
++  int maximum_bytes;
++  int maximum_bytes_per_inst;
++  int maximum_regs;
++  int start_regno;
++  int i, inst_num;
++  HOST_WIDE_INT remain_bytes, remain_words;
++  bool align_to_4_bytes = (INTVAL (alignment) & 3) == 0;
++  bool align_to_2_bytes = (INTVAL (alignment) & 1) == 0;
++
++  /* Because reduced-set regsiters has few registers
++     (r0~r5, r6~10, r15, r28~r31, where 'r15' and 'r28~r31'
++      cannot be used for register allocation),
++     using 8 registers (32 bytes) for moving memory block
++     may easily consume all of them.
++     It makes register allocation/spilling hard to work.
++     So we only allow maximum=4 registers (16 bytes) for
++     moving memory block under reduced-set registers.  */
++  if (TARGET_REDUCED_REGS)
++    {
++      maximum_regs  = 4;
++      maximum_bytes = 64;
++      start_regno   = 2;
++    }
++  else
++    {
++      if (TARGET_LINUX_ABI)
++	{
++	  /* $r25 is $tp so we use up to 8 registers if using Linux ABI.  */
++	  maximum_regs  = 8;
++	  maximum_bytes = 160;
++	  start_regno   = 16;
++	}
++      else
++	{
++	  maximum_regs  = 10;
++	  maximum_bytes = 160;
++	  start_regno   = 16;
++	}
++    }
++  maximum_bytes_per_inst = maximum_regs * UNITS_PER_WORD;
++
++  /* 1. Total_bytes is integer for sure.
++     2. Alignment is integer for sure.
++     3. Maximum 4 or 10 registers and up to 4 instructions,
++	4 * 4 * 4 = 64 bytes, 8 * 4 * 10 = 160 bytes.
++     4. The dstmem cannot be volatile memory access.
++     5. The srcmem cannot be volatile memory access.
++     6. Known shared alignment not align to 4 byte in v3m since lmw/smw *NOT*
++	support unalign access with v3m configure.  */
++  if (GET_CODE (total_bytes) != CONST_INT
++      || GET_CODE (alignment) != CONST_INT
++      || INTVAL (total_bytes) > maximum_bytes
++      || MEM_VOLATILE_P (dstmem)
++      || MEM_VOLATILE_P (srcmem)
++      || (TARGET_ISA_V3M && !align_to_4_bytes))
++    return false;
++
++  dst_base_reg = copy_to_mode_reg (SImode, XEXP (dstmem, 0));
++  src_base_reg = copy_to_mode_reg (SImode, XEXP (srcmem, 0));
++  remain_bytes = INTVAL (total_bytes);
++
++  /* Do not update base address for last lmw/smw pair.  */
++  inst_num = ((INTVAL (total_bytes) + (maximum_bytes_per_inst - 1))
++	      / maximum_bytes_per_inst) - 1;
++
++  for (i = 0; i < inst_num; i++)
++    {
++      nds32_emit_mem_move_block (start_regno, maximum_regs,
++				 &dst_base_reg, &dstmem,
++				 &src_base_reg, &srcmem,
++				 true);
++    }
++  remain_bytes -= maximum_bytes_per_inst * inst_num;
++
++  remain_words = remain_bytes / UNITS_PER_WORD;
++  remain_bytes = remain_bytes - (remain_words * UNITS_PER_WORD);
++
++  if (remain_words != 0)
++    {
++      if (remain_bytes != 0)
++	nds32_emit_mem_move_block (start_regno, remain_words,
++				   &dst_base_reg, &dstmem,
++				   &src_base_reg, &srcmem,
++				   true);
++      else
++	{
++	  /* Do not update address if no further byte to move.  */
++	  if (remain_words == 1)
++	   {
++	      /* emit move instruction if align to 4 byte and only 1
++		 word to move.  */
++	      if (align_to_4_bytes)
++		nds32_emit_mem_move (srcmem, dstmem, SImode, 0);
++	      else
++		{
++		  tmp_reg = gen_reg_rtx (SImode);
++		  emit_insn (
++		    gen_unaligned_load_w (tmp_reg,
++					  gen_rtx_MEM (SImode, src_base_reg)));
++		  emit_insn (
++		    gen_unaligned_store_w (gen_rtx_MEM (SImode, dst_base_reg),
++					   tmp_reg));
++		}
++	    }
++	  else
++	    nds32_emit_mem_move_block (start_regno, remain_words,
++				       &dst_base_reg, &dstmem,
++				       &src_base_reg, &srcmem,
++				       false);
++	}
++    }
++
++  switch (remain_bytes)
++    {
++    case 3:
++    case 2:
++      {
++	if (align_to_2_bytes)
++	  nds32_emit_mem_move (srcmem, dstmem, HImode, 0);
++	else
++	  {
++	    nds32_emit_mem_move (srcmem, dstmem, QImode, 0);
++	    nds32_emit_mem_move (srcmem, dstmem, QImode, 1);
++	  }
++
++	if (remain_bytes == 3)
++	  nds32_emit_mem_move (srcmem, dstmem, QImode, 2);
++	break;
++      }
++    case 1:
++      nds32_emit_mem_move (srcmem, dstmem, QImode, 0);
++      break;
++    case 0:
++      break;
++    default:
++      gcc_unreachable ();
++    }
++
++  /* Successfully create patterns, return true.  */
++  return true;
++}
++
++/* Function to move block memory content by
++   using load_multiple and store_multiple.
++   This is auxiliary extern function to help create rtx template.
++   Check nds32-multiple.md file for the patterns.  */
++bool
++nds32_expand_movmemsi (rtx dstmem, rtx srcmem, rtx total_bytes, rtx alignment)
++{
++  if (nds32_expand_movmemsi_unroll (dstmem, srcmem, total_bytes, alignment))
++    return true;
++
++  if (!optimize_size && optimize > 2)
++    return nds32_expand_movmemsi_loop (dstmem, srcmem, total_bytes, alignment);
++
++  return false;
++}
++
++/* ------------------------------------------------------------------------ */
++
++/* PART 3: Auxiliary function for expand setmem pattern.  */
++
++static rtx
++nds32_gen_dup_4_byte_to_word_value_aux (rtx value, rtx value4word)
++{
++  gcc_assert (GET_MODE (value) == QImode || CONST_INT_P (value));
++
++  if (CONST_INT_P (value))
++    {
++      unsigned HOST_WIDE_INT val = UINTVAL (value) & GET_MODE_MASK(QImode);
++      rtx new_val = gen_int_mode (val | (val << 8)
++				  | (val << 16) | (val << 24), SImode);
++      /* Just calculate at here if it's constant value.  */
++      emit_move_insn (value4word, new_val);
++    }
++  else
++    {
++      if (NDS32_EXT_DSP_P ())
++	{
++	  /* ! prepare word
++	     insb    $tmp, $value, 1         ! $tmp  <- 0x0000abab
++	     pkbb16  $tmp6, $tmp2, $tmp2   ! $value4word  <- 0xabababab */
++	  rtx tmp = gen_reg_rtx (SImode);
++
++	  convert_move (tmp, value, true);
++
++	  emit_insn (
++	    gen_insvsi_internal (tmp, gen_int_mode (0x8, SImode), tmp));
++
++	  emit_insn (gen_pkbbsi_1 (value4word, tmp, tmp));
++	}
++      else
++	{
++	  /* ! prepare word
++	     andi    $tmp1, $value, 0xff       ! $tmp1  <- 0x000000ab
++	     slli    $tmp2, $tmp1, 8           ! $tmp2  <- 0x0000ab00
++	     or      $tmp3, $tmp1, $tmp2       ! $tmp3  <- 0x0000abab
++	     slli    $tmp4, $tmp3, 16          ! $tmp4  <- 0xabab0000
++	     or      $val4word, $tmp3, $tmp4   ! $value4word  <- 0xabababab  */
++
++	  rtx tmp1, tmp2, tmp3, tmp4;
++	  tmp1 = expand_binop (SImode, and_optab, value,
++			       gen_int_mode (0xff, SImode),
++			       NULL_RTX, 0, OPTAB_WIDEN);
++	  tmp2 = expand_binop (SImode, ashl_optab, tmp1,
++			       gen_int_mode (8, SImode),
++			       NULL_RTX, 0, OPTAB_WIDEN);
++	  tmp3 = expand_binop (SImode, ior_optab, tmp1, tmp2,
++			       NULL_RTX, 0, OPTAB_WIDEN);
++	  tmp4 = expand_binop (SImode, ashl_optab, tmp3,
++			       gen_int_mode (16, SImode),
++			       NULL_RTX, 0, OPTAB_WIDEN);
++
++	  emit_insn (gen_iorsi3 (value4word, tmp3, tmp4));
++	}
++    }
++
++  return value4word;
++}
++
++static rtx
++nds32_gen_dup_4_byte_to_word_value (rtx value)
++{
++  rtx value4word = gen_reg_rtx (SImode);
++  nds32_gen_dup_4_byte_to_word_value_aux (value, value4word);
++
++  return value4word;
++}
++
++static rtx
++nds32_gen_dup_8_byte_to_double_word_value (rtx value)
++{
++  rtx value4doubleword = gen_reg_rtx (DImode);
++
++  nds32_gen_dup_4_byte_to_word_value_aux (
++    value, nds32_di_low_part_subreg(value4doubleword));
++
++  emit_move_insn (nds32_di_high_part_subreg(value4doubleword),
++		  nds32_di_low_part_subreg(value4doubleword));
++  return value4doubleword;
++}
++
++
++static rtx
++emit_setmem_doubleword_loop (rtx itr, rtx size, rtx value)
++{
++  rtx word_mode_label = gen_label_rtx ();
++  rtx word_mode_end_label = gen_label_rtx ();
++  rtx byte_mode_size = gen_reg_rtx (SImode);
++  rtx byte_mode_size_tmp = gen_reg_rtx (SImode);
++  rtx word_mode_end = gen_reg_rtx (SImode);
++  rtx size_for_word = gen_reg_rtx (SImode);
++
++  /* and     $size_for_word, $size, #~0x7  */
++  size_for_word = expand_binop (SImode, and_optab, size,
++				gen_int_mode (~0x7, SImode),
++				NULL_RTX, 0, OPTAB_WIDEN);
++
++  emit_move_insn (byte_mode_size, size);
++
++  /* beqz    $size_for_word, .Lbyte_mode_entry  */
++  emit_cmp_and_jump_insns (size_for_word, const0_rtx, EQ, NULL,
++			   SImode, 1, word_mode_end_label);
++  /* add     $word_mode_end, $dst, $size_for_word  */
++  word_mode_end = expand_binop (Pmode, add_optab, itr, size_for_word,
++				NULL_RTX, 0, OPTAB_WIDEN);
++
++  /* andi    $byte_mode_size, $size, 0x7  */
++  byte_mode_size_tmp = expand_binop (SImode, and_optab, size, GEN_INT (0x7),
++				     NULL_RTX, 0, OPTAB_WIDEN);
++
++  emit_move_insn (byte_mode_size, byte_mode_size_tmp);
++
++  /* .Lword_mode:  */
++  emit_label (word_mode_label);
++  /*   ! word-mode set loop
++       smw.bim $value4word, [$dst_itr], $value4word, 0
++       bne     $word_mode_end, $dst_itr, .Lword_mode  */
++  emit_insn (gen_unaligned_store_update_base_dw (itr,
++						 itr,
++						 value));
++  emit_cmp_and_jump_insns (word_mode_end, itr, NE, NULL,
++			   Pmode, 1, word_mode_label);
++
++  emit_label (word_mode_end_label);
++
++  return byte_mode_size;
++}
++
++static rtx
++emit_setmem_byte_loop (rtx itr, rtx size, rtx value, bool need_end)
++{
++  rtx end  = gen_reg_rtx (Pmode);
++  rtx byte_mode_label = gen_label_rtx ();
++  rtx end_label = gen_label_rtx ();
++
++  value = force_reg (QImode, value);
++
++  if (need_end)
++    end = expand_binop (Pmode, add_optab, itr, size,
++			NULL_RTX, 0, OPTAB_WIDEN);
++  /*   beqz    $byte_mode_size, .Lend
++       add     $byte_mode_end, $dst_itr, $byte_mode_size  */
++  emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL,
++			   SImode, 1, end_label);
++
++  if (!need_end)
++    end = expand_binop (Pmode, add_optab, itr, size,
++			NULL_RTX, 0, OPTAB_WIDEN);
++
++  /* .Lbyte_mode:  */
++  emit_label (byte_mode_label);
++
++  emit_insn (gen_no_hwloop ());
++  /*   ! byte-mode set loop
++       sbi.bi  $value, [$dst_itr] ,1
++       bne     $byte_mode_end, $dst_itr, .Lbyte_mode */
++  nds32_emit_post_inc_load_store (value, itr, QImode, false);
++
++  emit_cmp_and_jump_insns (end, itr, NE, NULL,
++			   Pmode, 1, byte_mode_label);
++  /* .Lend: */
++  emit_label (end_label);
++
++  if (need_end)
++    return end;
++  else
++    return NULL_RTX;
++}
++
++static bool
++nds32_expand_setmem_loop (rtx dstmem, rtx size, rtx value)
++{
++  rtx value4doubleword;
++  rtx value4byte;
++  rtx dst;
++  rtx byte_mode_size;
++
++  /* Emit loop version of setmem.
++     memset:
++       ! prepare word
++       andi    $tmp1, $val, 0xff               ! $tmp1  <- 0x000000ab
++       slli    $tmp2, $tmp1, 8                 ! $tmp2  <- 0x0000ab00
++       or      $tmp3, $val, $tmp2              ! $tmp3  <- 0x0000abab
++       slli    $tmp4, $tmp3, 16                ! $tmp4  <- 0xabab0000
++       or      $val4word, $tmp3, $tmp4         ! $value4word  <- 0xabababab
++
++       and     $size_for_word, $size, #-4
++       beqz    $size_for_word, .Lword_mode_end
++
++       add     $word_mode_end, $dst, $size_for_word
++       andi    $byte_mode_size, $size, 3
++
++     .Lword_mode:
++       ! word-mode set loop
++       smw.bim $value4word, [$dst], $value4word, 0
++       bne     $word_mode_end, $dst, .Lword_mode
++
++     .Lword_mode_end:
++       beqz    $byte_mode_size, .Lend
++       add     $byte_mode_end, $dst, $byte_mode_size
++
++     .Lbyte_mode:
++       ! byte-mode set loop
++       sbi.bi  $value4word, [$dst] ,1
++       bne     $byte_mode_end, $dst, .Lbyte_mode
++     .Lend: */
++
++  dst = copy_to_mode_reg (SImode, XEXP (dstmem, 0));
++
++  /* ! prepare word
++     andi    $tmp1, $value, 0xff             ! $tmp1  <- 0x000000ab
++     slli    $tmp2, $tmp1, 8                 ! $tmp2  <- 0x0000ab00
++     or      $tmp3, $tmp1, $tmp2             ! $tmp3  <- 0x0000abab
++     slli    $tmp4, $tmp3, 16                ! $tmp4  <- 0xabab0000
++     or      $val4word, $tmp3, $tmp4         ! $value4word  <- 0xabababab  */
++  value4doubleword = nds32_gen_dup_8_byte_to_double_word_value (value);
++
++  /*   and     $size_for_word, $size, #-4
++       beqz    $size_for_word, .Lword_mode_end
++
++       add     $word_mode_end, $dst, $size_for_word
++       andi    $byte_mode_size, $size, 3
++
++     .Lword_mode:
++       ! word-mode set loop
++       smw.bim $value4word, [$dst], $value4word, 0
++       bne     $word_mode_end, $dst, .Lword_mode
++     .Lword_mode_end:  */
++  byte_mode_size = emit_setmem_doubleword_loop (dst, size, value4doubleword);
++
++  /*   beqz    $byte_mode_size, .Lend
++       add     $byte_mode_end, $dst, $byte_mode_size
++
++     .Lbyte_mode:
++       ! byte-mode set loop
++       sbi.bi  $value, [$dst] ,1
++       bne     $byte_mode_end, $dst, .Lbyte_mode
++     .Lend: */
++
++  value4byte = simplify_gen_subreg (QImode, value4doubleword, DImode,
++				    subreg_lowpart_offset (QImode, DImode));
++
++  emit_setmem_byte_loop (dst, byte_mode_size, value4byte, false);
++
++  return true;
++}
++
++static bool
++nds32_expand_setmem_loop_v3m (rtx dstmem, rtx size, rtx value)
++{
++  rtx base_reg = copy_to_mode_reg (Pmode, XEXP (dstmem, 0));
++  rtx need_align_bytes = gen_reg_rtx (SImode);
++  rtx last_2_bit = gen_reg_rtx (SImode);
++  rtx byte_loop_base = gen_reg_rtx (SImode);
++  rtx byte_loop_size = gen_reg_rtx (SImode);
++  rtx remain_size = gen_reg_rtx (SImode);
++  rtx new_base_reg;
++  rtx value4byte, value4doubleword;
++  rtx byte_mode_size;
++  rtx last_byte_loop_label = gen_label_rtx ();
++
++  size = force_reg (SImode, size);
++
++  value4doubleword = nds32_gen_dup_8_byte_to_double_word_value (value);
++  value4byte = simplify_gen_subreg (QImode, value4doubleword, DImode,
++				    subreg_lowpart_offset (QImode, DImode));
++
++  emit_move_insn (byte_loop_size, size);
++  emit_move_insn (byte_loop_base, base_reg);
++
++  /* Jump to last byte loop if size is less than 16.  */
++  emit_cmp_and_jump_insns (size, gen_int_mode (16, SImode), LE, NULL,
++			   SImode, 1, last_byte_loop_label);
++
++  /* Make sure align to 4 byte first since v3m can't unalign access.  */
++  emit_insn (gen_andsi3 (last_2_bit,
++			 base_reg,
++			 gen_int_mode (0x3, SImode)));
++
++  emit_insn (gen_subsi3 (need_align_bytes,
++			 gen_int_mode (4, SImode),
++			 last_2_bit));
++
++  /* Align to 4 byte. */
++  new_base_reg = emit_setmem_byte_loop (base_reg,
++					need_align_bytes,
++					value4byte,
++					true);
++
++  /* Calculate remain size. */
++  emit_insn (gen_subsi3 (remain_size, size, need_align_bytes));
++
++  /* Set memory word by word. */
++  byte_mode_size = emit_setmem_doubleword_loop (new_base_reg,
++						remain_size,
++						value4doubleword);
++
++  emit_move_insn (byte_loop_base, new_base_reg);
++  emit_move_insn (byte_loop_size, byte_mode_size);
++
++  emit_label (last_byte_loop_label);
++
++  /* And set memory for remain bytes. */
++  emit_setmem_byte_loop (byte_loop_base, byte_loop_size, value4byte, false);
++  return true;
++}
++
++static bool
++nds32_expand_setmem_unroll (rtx dstmem, rtx size, rtx value,
++			    rtx align ATTRIBUTE_UNUSED,
++			    rtx expected_align ATTRIBUTE_UNUSED,
++			    rtx expected_size ATTRIBUTE_UNUSED)
++{
++  unsigned maximum_regs, maximum_bytes, start_regno, regno;
++  rtx value4word;
++  rtx dst_base_reg, new_base_reg;
++  unsigned HOST_WIDE_INT remain_bytes, remain_words, prepare_regs, fill_per_smw;
++  unsigned HOST_WIDE_INT real_size;
++
++  if (TARGET_REDUCED_REGS)
++    {
++      maximum_regs  = 4;
++      maximum_bytes = 64;
++      start_regno   = 2;
++    }
++  else
++    {
++      maximum_regs  = 8;
++      maximum_bytes = 128;
++      start_regno   = 16;
++    }
++
++  real_size = UINTVAL (size) & GET_MODE_MASK(SImode);
++
++  if (!(CONST_INT_P (size) && real_size <= maximum_bytes))
++    return false;
++
++  remain_bytes = real_size;
++
++  gcc_assert (GET_MODE (value) == QImode || CONST_INT_P (value));
++
++  value4word = nds32_gen_dup_4_byte_to_word_value (value);
++
++  prepare_regs = remain_bytes / UNITS_PER_WORD;
++
++  dst_base_reg = copy_to_mode_reg (SImode, XEXP (dstmem, 0));
++
++  if (prepare_regs > maximum_regs)
++    prepare_regs = maximum_regs;
++
++  fill_per_smw = prepare_regs * UNITS_PER_WORD;
++
++  regno = start_regno;
++  switch (prepare_regs)
++    {
++    case 2:
++    default:
++      {
++	rtx reg0 = gen_rtx_REG (SImode, regno);
++	rtx reg1 = gen_rtx_REG (SImode, regno+1);
++	unsigned last_regno = start_regno + prepare_regs - 1;
++
++	emit_move_insn (reg0, value4word);
++	emit_move_insn (reg1, value4word);
++	rtx regd = gen_rtx_REG (DImode, regno);
++	regno += 2;
++
++	/* Try to utilize movd44!  */
++	while (regno <= last_regno)
++	  {
++	    if ((regno + 1) <=last_regno)
++	      {
++		rtx reg = gen_rtx_REG (DImode, regno);
++		emit_move_insn (reg, regd);
++		regno += 2;
++	      }
++	    else
++	      {
++		rtx reg = gen_rtx_REG (SImode, regno);
++		emit_move_insn (reg, reg0);
++		regno += 1;
++	      }
++	  }
++	break;
++      }
++    case 1:
++      {
++	rtx reg = gen_rtx_REG (SImode, regno++);
++	emit_move_insn (reg, value4word);
++      }
++      break;
++    case 0:
++      break;
++    }
++
++  if (fill_per_smw)
++    for (;remain_bytes >= fill_per_smw;remain_bytes -= fill_per_smw)
++      {
++	emit_insn (nds32_expand_store_multiple (start_regno, prepare_regs,
++						dst_base_reg, dstmem,
++						true, &new_base_reg));
++	dst_base_reg = new_base_reg;
++	dstmem = gen_rtx_MEM (SImode, dst_base_reg);
++      }
++
++  remain_words = remain_bytes / UNITS_PER_WORD;
++
++  if (remain_words)
++    {
++      emit_insn (nds32_expand_store_multiple (start_regno, remain_words,
++					      dst_base_reg, dstmem,
++					      true, &new_base_reg));
++      dst_base_reg = new_base_reg;
++      dstmem = gen_rtx_MEM (SImode, dst_base_reg);
++    }
++
++  remain_bytes = remain_bytes - (remain_words * UNITS_PER_WORD);
++
++  if (remain_bytes)
++    {
++      value = simplify_gen_subreg (QImode, value4word, SImode,
++				   subreg_lowpart_offset(QImode, SImode));
++      int offset = 0;
++      for (;remain_bytes;--remain_bytes, ++offset)
++	{
++	  nds32_emit_load_store (value, dstmem, QImode, offset, false);
++	}
++    }
++
++  return true;
++}
++
++bool
++nds32_expand_setmem (rtx dstmem, rtx size, rtx value, rtx align,
++		     rtx expected_align,
++		     rtx expected_size)
++{
++  bool align_to_4_bytes = (INTVAL (align) & 3) == 0;
++
++  /* Only expand at O3 */
++  if (optimize_size || optimize < 3)
++    return false;
++
++  if (TARGET_ISA_V3M && !align_to_4_bytes)
++    return nds32_expand_setmem_loop_v3m (dstmem, size, value);
++
++  if (nds32_expand_setmem_unroll (dstmem, size, value,
++				  align, expected_align, expected_size))
++    return true;
++
++  return nds32_expand_setmem_loop (dstmem, size, value);
++}
++
++/* ------------------------------------------------------------------------ */
++
++/* PART 4: Auxiliary function for expand movstr pattern.  */
++
++bool
++nds32_expand_movstr (rtx dst_end_ptr,
++		     rtx dstmem,
++		     rtx srcmem)
++{
++  rtx tmp;
++  rtx dst_base_reg, src_base_reg;
++  rtx new_dst_base_reg, new_src_base_reg;
++  rtx last_non_null_char_ptr;
++  rtx ffbi_result;
++  rtx loop_label;
++
++  if (optimize_size || optimize < 3)
++    return false;
++
++  tmp = gen_reg_rtx (SImode);
++  ffbi_result = gen_reg_rtx (Pmode);
++  new_dst_base_reg = gen_reg_rtx (Pmode);
++  new_src_base_reg = gen_reg_rtx (Pmode);
++  dst_base_reg = copy_to_mode_reg (SImode, XEXP (dstmem, 0));
++  src_base_reg = copy_to_mode_reg (SImode, XEXP (srcmem, 0));
++  loop_label = gen_label_rtx ();
++
++  emit_label (loop_label);
++  emit_insn (gen_lmwzb (new_src_base_reg, src_base_reg, tmp));
++  emit_insn (gen_smwzb (new_dst_base_reg, dst_base_reg, tmp));
++  emit_insn (gen_unspec_ffb (ffbi_result, tmp, const0_rtx));
++
++  emit_move_insn (src_base_reg, new_src_base_reg);
++  emit_move_insn (dst_base_reg, new_dst_base_reg);
++
++  emit_cmp_and_jump_insns (ffbi_result, const0_rtx, EQ, NULL,
++			   SImode, 1, loop_label);
++
++  last_non_null_char_ptr = expand_binop (Pmode, add_optab, dst_base_reg,
++					 ffbi_result, NULL_RTX, 0, OPTAB_WIDEN);
++
++  emit_move_insn (dst_end_ptr, last_non_null_char_ptr);
++
++  return true;
++}
++
++/* ------------------------------------------------------------------------ */
++
++/* PART 5: Auxiliary function for expand strlen pattern.  */
++
++bool
++nds32_expand_strlen (rtx result, rtx str,
++		     rtx target_char, rtx align ATTRIBUTE_UNUSED)
++{
++  rtx base_reg, backup_base_reg;
++  rtx ffb_result;
++  rtx target_char_ptr, length;
++  rtx loop_label, tmp;
++
++  if (optimize_size || optimize < 3)
++    return false;
++
++  gcc_assert (MEM_P (str));
++  gcc_assert (CONST_INT_P (target_char) || REG_P (target_char));
++
++  base_reg = copy_to_mode_reg (SImode, XEXP (str, 0));
++  loop_label = gen_label_rtx ();
++
++  ffb_result = gen_reg_rtx (Pmode);
++  tmp = gen_reg_rtx (SImode);
++  backup_base_reg = gen_reg_rtx (SImode);
++
++  /* Emit loop version of strlen.
++       move  $backup_base, $base
++     .Lloop:
++       lmw.bim $tmp, [$base], $tmp, 0
++       ffb   $ffb_result, $tmp, $target_char   ! is there $target_char?
++       beqz  $ffb_result, .Lloop
++       add   $last_char_ptr, $base, $ffb_result
++       sub   $length, $last_char_ptr, $backup_base  */
++
++  /* move  $backup_base, $base  */
++  emit_move_insn (backup_base_reg, base_reg);
++
++  /* .Lloop:  */
++  emit_label (loop_label);
++  /* lmw.bim $tmp, [$base], $tmp, 0  */
++  emit_insn (gen_unaligned_load_update_base_w (base_reg, tmp, base_reg));
++
++  /*  ffb   $ffb_result, $tmp, $target_char   ! is there $target_char?  */
++  emit_insn (gen_unspec_ffb (ffb_result, tmp, target_char));
++
++  /* beqz  $ffb_result, .Lloop  */
++  emit_cmp_and_jump_insns (ffb_result, const0_rtx, EQ, NULL,
++			   SImode, 1, loop_label);
++
++  /* add   $target_char_ptr, $base, $ffb_result   */
++  target_char_ptr = expand_binop (Pmode, add_optab, base_reg,
++				ffb_result, NULL_RTX, 0, OPTAB_WIDEN);
++
++  /* sub   $length, $target_char_ptr, $backup_base  */
++  length = expand_binop (Pmode, sub_optab, target_char_ptr,
++			 backup_base_reg, NULL_RTX, 0, OPTAB_WIDEN);
++
++  emit_move_insn (result, length);
++
++  return true;
++}
+ 
+ /* ------------------------------------------------------------------------ */
+ 
++/* PART 6: Auxiliary function for expand load_multiple/store_multiple
++	   pattern.  */
++
+ /* Functions to expand load_multiple and store_multiple.
+    They are auxiliary extern functions to help create rtx template.
+    Check nds32-multiple.md file for the patterns.  */
+ rtx
+ nds32_expand_load_multiple (int base_regno, int count,
+-			    rtx base_addr, rtx basemem)
++			    rtx base_addr, rtx basemem,
++			    bool update_base_reg_p,
++			    rtx *update_base_reg)
+ {
+   int par_index;
+   int offset;
++  int start_idx;
+   rtx result;
+   rtx new_addr, mem, reg;
+ 
++  /* Generate a unaligned load to prevent load instruction pull out from
++     parallel, and then it will generate lwi, and lose unaligned acces */
++  if (count == 1)
++    {
++      reg = gen_rtx_REG (SImode, base_regno);
++      if (update_base_reg_p)
++	{
++	  *update_base_reg = gen_reg_rtx (SImode);
++	  return gen_unaligned_load_update_base_w (*update_base_reg, reg, base_addr);
++	}
++      else
++	return gen_unaligned_load_w (reg, gen_rtx_MEM (SImode, base_addr));
++    }
++
+   /* Create the pattern that is presented in nds32-multiple.md.  */
++  if (update_base_reg_p)
++    {
++      result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count + 1));
++      start_idx = 1;
++    }
++  else
++    {
++      result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
++      start_idx = 0;
++    }
++
++  if (update_base_reg_p)
++    {
++      offset           = count * 4;
++      new_addr         = plus_constant (Pmode, base_addr, offset);
++      *update_base_reg = gen_reg_rtx (SImode);
+ 
+-  result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
++      XVECEXP (result, 0, 0) = gen_rtx_SET (*update_base_reg, new_addr);
++    }
+ 
+   for (par_index = 0; par_index < count; par_index++)
+     {
+@@ -57,7 +1284,7 @@ nds32_expand_load_multiple (int base_regno, int count,
+ 					       new_addr, offset);
+       reg      = gen_rtx_REG (SImode, base_regno + par_index);
+ 
+-      XVECEXP (result, 0, par_index) = gen_rtx_SET (reg, mem);
++      XVECEXP (result, 0, (par_index + start_idx)) = gen_rtx_SET (reg, mem);
+     }
+ 
+   return result;
+@@ -65,16 +1292,49 @@ nds32_expand_load_multiple (int base_regno, int count,
+ 
+ rtx
+ nds32_expand_store_multiple (int base_regno, int count,
+-			     rtx base_addr, rtx basemem)
++			     rtx base_addr, rtx basemem,
++			     bool update_base_reg_p,
++			     rtx *update_base_reg)
+ {
+   int par_index;
+   int offset;
++  int start_idx;
+   rtx result;
+   rtx new_addr, mem, reg;
+ 
++  if (count == 1)
++    {
++      reg = gen_rtx_REG (SImode, base_regno);
++      if (update_base_reg_p)
++	{
++	  *update_base_reg = gen_reg_rtx (SImode);
++	  return gen_unaligned_store_update_base_w (*update_base_reg, base_addr, reg);
++	}
++      else
++	return gen_unaligned_store_w (gen_rtx_MEM (SImode, base_addr), reg);
++    }
++
+   /* Create the pattern that is presented in nds32-multiple.md.  */
+ 
+-  result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
++  if (update_base_reg_p)
++    {
++      result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count + 1));
++      start_idx = 1;
++    }
++  else
++    {
++      result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
++      start_idx = 0;
++    }
++
++  if (update_base_reg_p)
++    {
++      offset           = count * 4;
++      new_addr         = plus_constant (Pmode, base_addr, offset);
++      *update_base_reg = gen_reg_rtx (SImode);
++
++      XVECEXP (result, 0, 0) = gen_rtx_SET (*update_base_reg, new_addr);
++    }
+ 
+   for (par_index = 0; par_index < count; par_index++)
+     {
+@@ -85,58 +1345,11 @@ nds32_expand_store_multiple (int base_regno, int count,
+ 					       new_addr, offset);
+       reg      = gen_rtx_REG (SImode, base_regno + par_index);
+ 
+-      XVECEXP (result, 0, par_index) = gen_rtx_SET (mem, reg);
++      XVECEXP (result, 0, par_index + start_idx) = gen_rtx_SET (mem, reg);
+     }
+ 
+-  return result;
+-}
+-
+-/* Function to move block memory content by
+-   using load_multiple and store_multiple.
+-   This is auxiliary extern function to help create rtx template.
+-   Check nds32-multiple.md file for the patterns.  */
+-int
+-nds32_expand_movmemqi (rtx dstmem, rtx srcmem, rtx total_bytes, rtx alignment)
+-{
+-  HOST_WIDE_INT in_words, out_words;
+-  rtx dst_base_reg, src_base_reg;
+-  int maximum_bytes;
+-
+-  /* Because reduced-set regsiters has few registers
+-     (r0~r5, r6~10, r15, r28~r31, where 'r15' and 'r28~r31'
+-      cannot be used for register allocation),
+-     using 8 registers (32 bytes) for moving memory block
+-     may easily consume all of them.
+-     It makes register allocation/spilling hard to work.
+-     So we only allow maximum=4 registers (16 bytes) for
+-     moving memory block under reduced-set registers.  */
+-  if (TARGET_REDUCED_REGS)
+-    maximum_bytes = 16;
+-  else
+-    maximum_bytes = 32;
+-
+-  /* 1. Total_bytes is integer for sure.
+-     2. Alignment is integer for sure.
+-     3. Maximum 4 or 8 registers, 4 * 4 = 16 bytes, 8 * 4 = 32 bytes.
+-     4. Requires (n * 4) block size.
+-     5. Requires 4-byte alignment.  */
+-  if (GET_CODE (total_bytes) != CONST_INT
+-      || GET_CODE (alignment) != CONST_INT
+-      || INTVAL (total_bytes) > maximum_bytes
+-      || INTVAL (total_bytes) & 3
+-      || INTVAL (alignment) & 3)
+-    return 0;
+ 
+-  dst_base_reg = copy_to_mode_reg (SImode, XEXP (dstmem, 0));
+-  src_base_reg = copy_to_mode_reg (SImode, XEXP (srcmem, 0));
+-
+-  out_words = in_words = INTVAL (total_bytes) / UNITS_PER_WORD;
+-
+-  emit_insn (nds32_expand_load_multiple (0, in_words, src_base_reg, srcmem));
+-  emit_insn (nds32_expand_store_multiple (0, out_words, dst_base_reg, dstmem));
+-
+-  /* Successfully create patterns, return 1.  */
+-  return 1;
++  return result;
+ }
+ 
+ /* ------------------------------------------------------------------------ */
+diff --git a/gcc/config/nds32/nds32-modes.def b/gcc/config/nds32/nds32-modes.def
+index f2d0e6c..7a6f953 100644
+--- a/gcc/config/nds32/nds32-modes.def
++++ b/gcc/config/nds32/nds32-modes.def
+@@ -18,4 +18,6 @@
+    along with GCC; see the file COPYING3.  If not see
+    <http://www.gnu.org/licenses/>.  */
+ 
+-/* So far, there is no need to define any modes for nds32 target.  */
++/* Vector modes.  */
++VECTOR_MODES (INT, 4);        /*            V4QI V2HI */
++VECTOR_MODES (INT, 8);        /*            V8QI V4HI V2SI */
+diff --git a/gcc/config/nds32/nds32-multiple.md b/gcc/config/nds32/nds32-multiple.md
+index babc7f2..500a1c6 100644
+--- a/gcc/config/nds32/nds32-multiple.md
++++ b/gcc/config/nds32/nds32-multiple.md
+@@ -49,17 +49,19 @@
+      otherwise we have to FAIL this rtx generation:
+        1. The number of consecutive registers must be integer.
+        2. Maximum 4 or 8 registers for lmw.bi instruction
+-          (based on this nds32-multiple.md design).
++	  (based on this nds32-multiple.md design).
+        3. Minimum 2 registers for lmw.bi instruction
+-          (based on this nds32-multiple.md design).
++	  (based on this nds32-multiple.md design).
+        4. operands[0] must be register for sure.
+        5. operands[1] must be memory for sure.
+-       6. Do not cross $r15 register because it is not allocatable.  */
++       6. operands[1] is not volatile memory access.
++       7. Do not cross $r15 register because it is not allocatable.  */
+   if (GET_CODE (operands[2]) != CONST_INT
+       || INTVAL (operands[2]) > maximum
+       || INTVAL (operands[2]) < 2
+       || GET_CODE (operands[0]) != REG
+       || GET_CODE (operands[1]) != MEM
++      || MEM_VOLATILE_P (operands[1])
+       || REGNO (operands[0]) + INTVAL (operands[2]) > TA_REGNUM)
+     FAIL;
+ 
+@@ -69,12 +71,943 @@
+ 					    INTVAL (operands[2]),
+ 					    force_reg (SImode,
+ 						       XEXP (operands[1], 0)),
+-					    operands[1]);
++					    operands[1],
++					    false, NULL);
+ })
+ 
+ ;; Ordinary Load Multiple.
++(define_insn "*lmw_bim_si25"
++  [(match_parallel 0 "nds32_load_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 100)))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (match_dup 2)))
++     (set (match_operand:SI 4 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 4))))
++     (set (match_operand:SI 5 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 8))))
++     (set (match_operand:SI 6 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 12))))
++     (set (match_operand:SI 7 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 16))))
++     (set (match_operand:SI 8 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 20))))
++     (set (match_operand:SI 9 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 24))))
++     (set (match_operand:SI 10 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 28))))
++     (set (match_operand:SI 11 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 32))))
++     (set (match_operand:SI 12 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 36))))
++     (set (match_operand:SI 13 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 40))))
++     (set (match_operand:SI 14 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 44))))
++     (set (match_operand:SI 15 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 48))))
++     (set (match_operand:SI 16 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 52))))
++     (set (match_operand:SI 17 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 56))))
++     (set (match_operand:SI 18 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 60))))
++     (set (match_operand:SI 19 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 64))))
++     (set (match_operand:SI 20 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 68))))
++     (set (match_operand:SI 21 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 72))))
++     (set (match_operand:SI 22 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 76))))
++     (set (match_operand:SI 23 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 80))))
++     (set (match_operand:SI 24 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 84))))
++     (set (match_operand:SI 25 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 88))))
++     (set (match_operand:SI 26 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 92))))
++     (set (match_operand:SI 27 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 96))))])]
++  "(XVECLEN (operands[0], 0) == 26)"
++  "lmw.bim\t%3, [%1], %27, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"             "25")
++   (set_attr "length"             "4")]
++)
+ 
+-(define_insn "*lmwsi8"
++(define_insn "*lmw_bim_si24"
++  [(match_parallel 0 "nds32_load_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 96)))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (match_dup 2)))
++     (set (match_operand:SI 4 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 4))))
++     (set (match_operand:SI 5 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 8))))
++     (set (match_operand:SI 6 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 12))))
++     (set (match_operand:SI 7 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 16))))
++     (set (match_operand:SI 8 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 20))))
++     (set (match_operand:SI 9 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 24))))
++     (set (match_operand:SI 10 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 28))))
++     (set (match_operand:SI 11 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 32))))
++     (set (match_operand:SI 12 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 36))))
++     (set (match_operand:SI 13 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 40))))
++     (set (match_operand:SI 14 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 44))))
++     (set (match_operand:SI 15 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 48))))
++     (set (match_operand:SI 16 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 52))))
++     (set (match_operand:SI 17 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 56))))
++     (set (match_operand:SI 18 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 60))))
++     (set (match_operand:SI 19 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 64))))
++     (set (match_operand:SI 20 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 68))))
++     (set (match_operand:SI 21 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 72))))
++     (set (match_operand:SI 22 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 76))))
++     (set (match_operand:SI 23 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 80))))
++     (set (match_operand:SI 24 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 84))))
++     (set (match_operand:SI 25 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 88))))
++     (set (match_operand:SI 26 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 92))))])]
++  "(XVECLEN (operands[0], 0) == 25)"
++  "lmw.bim\t%3, [%1], %26, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"             "24")
++   (set_attr "length"             "4")]
++)
++
++(define_insn "*lmw_bim_si23"
++  [(match_parallel 0 "nds32_load_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 92)))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (match_dup 2)))
++     (set (match_operand:SI 4 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 4))))
++     (set (match_operand:SI 5 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 8))))
++     (set (match_operand:SI 6 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 12))))
++     (set (match_operand:SI 7 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 16))))
++     (set (match_operand:SI 8 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 20))))
++     (set (match_operand:SI 9 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 24))))
++     (set (match_operand:SI 10 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 28))))
++     (set (match_operand:SI 11 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 32))))
++     (set (match_operand:SI 12 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 36))))
++     (set (match_operand:SI 13 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 40))))
++     (set (match_operand:SI 14 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 44))))
++     (set (match_operand:SI 15 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 48))))
++     (set (match_operand:SI 16 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 52))))
++     (set (match_operand:SI 17 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 56))))
++     (set (match_operand:SI 18 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 60))))
++     (set (match_operand:SI 19 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 64))))
++     (set (match_operand:SI 20 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 68))))
++     (set (match_operand:SI 21 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 72))))
++     (set (match_operand:SI 22 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 76))))
++     (set (match_operand:SI 23 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 80))))
++     (set (match_operand:SI 24 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 84))))
++     (set (match_operand:SI 25 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 88))))])]
++  "(XVECLEN (operands[0], 0) == 24)"
++  "lmw.bim\t%3, [%1], %25, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"             "23")
++   (set_attr "length"             "4")]
++)
++
++(define_insn "*lmw_bim_si22"
++  [(match_parallel 0 "nds32_load_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 88)))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (match_dup 2)))
++     (set (match_operand:SI 4 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 4))))
++     (set (match_operand:SI 5 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 8))))
++     (set (match_operand:SI 6 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 12))))
++     (set (match_operand:SI 7 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 16))))
++     (set (match_operand:SI 8 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 20))))
++     (set (match_operand:SI 9 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 24))))
++     (set (match_operand:SI 10 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 28))))
++     (set (match_operand:SI 11 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 32))))
++     (set (match_operand:SI 12 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 36))))
++     (set (match_operand:SI 13 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 40))))
++     (set (match_operand:SI 14 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 44))))
++     (set (match_operand:SI 15 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 48))))
++     (set (match_operand:SI 16 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 52))))
++     (set (match_operand:SI 17 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 56))))
++     (set (match_operand:SI 18 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 60))))
++     (set (match_operand:SI 19 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 64))))
++     (set (match_operand:SI 20 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 68))))
++     (set (match_operand:SI 21 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 72))))
++     (set (match_operand:SI 22 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 76))))
++     (set (match_operand:SI 23 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 80))))
++     (set (match_operand:SI 24 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 84))))])]
++  "(XVECLEN (operands[0], 0) == 23)"
++  "lmw.bim\t%3, [%1], %24, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"             "22")
++   (set_attr "length"             "4")]
++)
++
++(define_insn "*lmw_bim_si21"
++  [(match_parallel 0 "nds32_load_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 84)))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (match_dup 2)))
++     (set (match_operand:SI 4 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 4))))
++     (set (match_operand:SI 5 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 8))))
++     (set (match_operand:SI 6 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 12))))
++     (set (match_operand:SI 7 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 16))))
++     (set (match_operand:SI 8 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 20))))
++     (set (match_operand:SI 9 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 24))))
++     (set (match_operand:SI 10 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 28))))
++     (set (match_operand:SI 11 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 32))))
++     (set (match_operand:SI 12 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 36))))
++     (set (match_operand:SI 13 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 40))))
++     (set (match_operand:SI 14 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 44))))
++     (set (match_operand:SI 15 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 48))))
++     (set (match_operand:SI 16 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 52))))
++     (set (match_operand:SI 17 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 56))))
++     (set (match_operand:SI 18 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 60))))
++     (set (match_operand:SI 19 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 64))))
++     (set (match_operand:SI 20 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 68))))
++     (set (match_operand:SI 21 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 72))))
++     (set (match_operand:SI 22 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 76))))
++     (set (match_operand:SI 23 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 80))))])]
++  "(XVECLEN (operands[0], 0) == 22)"
++  "lmw.bim\t%3, [%1], %23, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"             "21")
++   (set_attr "length"             "4")]
++)
++
++(define_insn "*lmw_bim_si20"
++  [(match_parallel 0 "nds32_load_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 80)))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (match_dup 2)))
++     (set (match_operand:SI 4 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 4))))
++     (set (match_operand:SI 5 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 8))))
++     (set (match_operand:SI 6 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 12))))
++     (set (match_operand:SI 7 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 16))))
++     (set (match_operand:SI 8 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 20))))
++     (set (match_operand:SI 9 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 24))))
++     (set (match_operand:SI 10 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 28))))
++     (set (match_operand:SI 11 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 32))))
++     (set (match_operand:SI 12 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 36))))
++     (set (match_operand:SI 13 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 40))))
++     (set (match_operand:SI 14 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 44))))
++     (set (match_operand:SI 15 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 48))))
++     (set (match_operand:SI 16 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 52))))
++     (set (match_operand:SI 17 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 56))))
++     (set (match_operand:SI 18 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 60))))
++     (set (match_operand:SI 19 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 64))))
++     (set (match_operand:SI 20 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 68))))
++     (set (match_operand:SI 21 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 72))))
++     (set (match_operand:SI 22 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 76))))])]
++  "(XVECLEN (operands[0], 0) == 21)"
++  "lmw.bim\t%3, [%1], %22, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"             "20")
++   (set_attr "length"             "4")]
++)
++
++(define_insn "*lmw_bim_si19"
++  [(match_parallel 0 "nds32_load_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 76)))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (match_dup 2)))
++     (set (match_operand:SI 4 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 4))))
++     (set (match_operand:SI 5 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 8))))
++     (set (match_operand:SI 6 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 12))))
++     (set (match_operand:SI 7 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 16))))
++     (set (match_operand:SI 8 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 20))))
++     (set (match_operand:SI 9 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 24))))
++     (set (match_operand:SI 10 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 28))))
++     (set (match_operand:SI 11 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 32))))
++     (set (match_operand:SI 12 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 36))))
++     (set (match_operand:SI 13 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 40))))
++     (set (match_operand:SI 14 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 44))))
++     (set (match_operand:SI 15 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 48))))
++     (set (match_operand:SI 16 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 52))))
++     (set (match_operand:SI 17 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 56))))
++     (set (match_operand:SI 18 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 60))))
++     (set (match_operand:SI 19 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 64))))
++     (set (match_operand:SI 20 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 68))))
++     (set (match_operand:SI 21 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 72))))])]
++  "(XVECLEN (operands[0], 0) == 20)"
++  "lmw.bim\t%3, [%1], %21, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"             "19")
++   (set_attr "length"             "4")]
++)
++
++(define_insn "*lmw_bim_si18"
++  [(match_parallel 0 "nds32_load_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 72)))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (match_dup 2)))
++     (set (match_operand:SI 4 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 4))))
++     (set (match_operand:SI 5 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 8))))
++     (set (match_operand:SI 6 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 12))))
++     (set (match_operand:SI 7 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 16))))
++     (set (match_operand:SI 8 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 20))))
++     (set (match_operand:SI 9 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 24))))
++     (set (match_operand:SI 10 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 28))))
++     (set (match_operand:SI 11 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 32))))
++     (set (match_operand:SI 12 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 36))))
++     (set (match_operand:SI 13 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 40))))
++     (set (match_operand:SI 14 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 44))))
++     (set (match_operand:SI 15 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 48))))
++     (set (match_operand:SI 16 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 52))))
++     (set (match_operand:SI 17 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 56))))
++     (set (match_operand:SI 18 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 60))))
++     (set (match_operand:SI 19 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 64))))
++     (set (match_operand:SI 20 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 68))))])]
++  "(XVECLEN (operands[0], 0) == 19)"
++  "lmw.bim\t%3, [%1], %20, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"             "18")
++   (set_attr "length"             "4")]
++)
++
++(define_insn "*lmw_bim_si17"
++  [(match_parallel 0 "nds32_load_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 68)))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (match_dup 2)))
++     (set (match_operand:SI 4 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 4))))
++     (set (match_operand:SI 5 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 8))))
++     (set (match_operand:SI 6 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 12))))
++     (set (match_operand:SI 7 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 16))))
++     (set (match_operand:SI 8 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 20))))
++     (set (match_operand:SI 9 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 24))))
++     (set (match_operand:SI 10 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 28))))
++     (set (match_operand:SI 11 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 32))))
++     (set (match_operand:SI 12 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 36))))
++     (set (match_operand:SI 13 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 40))))
++     (set (match_operand:SI 14 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 44))))
++     (set (match_operand:SI 15 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 48))))
++     (set (match_operand:SI 16 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 52))))
++     (set (match_operand:SI 17 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 56))))
++     (set (match_operand:SI 18 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 60))))
++     (set (match_operand:SI 19 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 64))))])]
++  "(XVECLEN (operands[0], 0) == 18)"
++  "lmw.bim\t%3, [%1], %19, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"             "17")
++   (set_attr "length"             "4")]
++)
++
++(define_insn "*lmw_bim_si16"
++  [(match_parallel 0 "nds32_load_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 64)))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (match_dup 2)))
++     (set (match_operand:SI 4 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 4))))
++     (set (match_operand:SI 5 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 8))))
++     (set (match_operand:SI 6 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 12))))
++     (set (match_operand:SI 7 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 16))))
++     (set (match_operand:SI 8 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 20))))
++     (set (match_operand:SI 9 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 24))))
++     (set (match_operand:SI 10 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 28))))
++     (set (match_operand:SI 11 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 32))))
++     (set (match_operand:SI 12 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 36))))
++     (set (match_operand:SI 13 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 40))))
++     (set (match_operand:SI 14 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 44))))
++     (set (match_operand:SI 15 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 48))))
++     (set (match_operand:SI 16 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 52))))
++     (set (match_operand:SI 17 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 56))))
++     (set (match_operand:SI 18 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 60))))])]
++  "(XVECLEN (operands[0], 0) == 17)"
++  "lmw.bim\t%3, [%1], %18, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"             "16")
++   (set_attr "length"             "4")]
++)
++
++(define_insn "*lmw_bim_si15"
++  [(match_parallel 0 "nds32_load_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 60)))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (match_dup 2)))
++     (set (match_operand:SI 4 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 4))))
++     (set (match_operand:SI 5 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 8))))
++     (set (match_operand:SI 6 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 12))))
++     (set (match_operand:SI 7 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 16))))
++     (set (match_operand:SI 8 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 20))))
++     (set (match_operand:SI 9 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 24))))
++     (set (match_operand:SI 10 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 28))))
++     (set (match_operand:SI 11 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 32))))
++     (set (match_operand:SI 12 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 36))))
++     (set (match_operand:SI 13 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 40))))
++     (set (match_operand:SI 14 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 44))))
++     (set (match_operand:SI 15 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 48))))
++     (set (match_operand:SI 16 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 52))))
++     (set (match_operand:SI 17 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 56))))])]
++  "(XVECLEN (operands[0], 0) == 16)"
++  "lmw.bim\t%3, [%1], %17, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"             "15")
++   (set_attr "length"             "4")]
++)
++
++(define_insn "*lmw_bim_si14"
++  [(match_parallel 0 "nds32_load_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 56)))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (match_dup 2)))
++     (set (match_operand:SI 4 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 4))))
++     (set (match_operand:SI 5 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 8))))
++     (set (match_operand:SI 6 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 12))))
++     (set (match_operand:SI 7 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 16))))
++     (set (match_operand:SI 8 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 20))))
++     (set (match_operand:SI 9 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 24))))
++     (set (match_operand:SI 10 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 28))))
++     (set (match_operand:SI 11 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 32))))
++     (set (match_operand:SI 12 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 36))))
++     (set (match_operand:SI 13 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 40))))
++     (set (match_operand:SI 14 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 44))))
++     (set (match_operand:SI 15 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 48))))
++     (set (match_operand:SI 16 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 52))))])]
++  "(XVECLEN (operands[0], 0) == 15)"
++  "lmw.bim\t%3, [%1], %16, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"             "14")
++   (set_attr "length"             "4")]
++)
++
++(define_insn "*lmw_bim_si13"
++  [(match_parallel 0 "nds32_load_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 52)))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (match_dup 2)))
++     (set (match_operand:SI 4 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 4))))
++     (set (match_operand:SI 5 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 8))))
++     (set (match_operand:SI 6 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 12))))
++     (set (match_operand:SI 7 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 16))))
++     (set (match_operand:SI 8 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 20))))
++     (set (match_operand:SI 9 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 24))))
++     (set (match_operand:SI 10 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 28))))
++     (set (match_operand:SI 11 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 32))))
++     (set (match_operand:SI 12 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 36))))
++     (set (match_operand:SI 13 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 40))))
++     (set (match_operand:SI 14 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 44))))
++     (set (match_operand:SI 15 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 48))))])]
++  "(XVECLEN (operands[0], 0) == 14)"
++  "lmw.bim\t%3, [%1], %15, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"             "13")
++   (set_attr "length"             "4")]
++)
++
++(define_insn "*lmw_bim_si12"
++  [(match_parallel 0 "nds32_load_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 48)))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (match_dup 2)))
++     (set (match_operand:SI 4 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 4))))
++     (set (match_operand:SI 5 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 8))))
++     (set (match_operand:SI 6 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 12))))
++     (set (match_operand:SI 7 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 16))))
++     (set (match_operand:SI 8 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 20))))
++     (set (match_operand:SI 9 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 24))))
++     (set (match_operand:SI 10 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 28))))
++     (set (match_operand:SI 11 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 32))))
++     (set (match_operand:SI 12 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 36))))
++     (set (match_operand:SI 13 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 40))))
++     (set (match_operand:SI 14 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 44))))])]
++  "(XVECLEN (operands[0], 0) == 13)"
++  "lmw.bim\t%3, [%1], %14, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"             "12")
++   (set_attr "length"             "4")]
++)
++
++(define_insn "*lmw_bim_si11"
++  [(match_parallel 0 "nds32_load_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 44)))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (match_dup 2)))
++     (set (match_operand:SI 4 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 4))))
++     (set (match_operand:SI 5 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 8))))
++     (set (match_operand:SI 6 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 12))))
++     (set (match_operand:SI 7 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 16))))
++     (set (match_operand:SI 8 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 20))))
++     (set (match_operand:SI 9 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 24))))
++     (set (match_operand:SI 10 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 28))))
++     (set (match_operand:SI 11 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 32))))
++     (set (match_operand:SI 12 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 36))))
++     (set (match_operand:SI 13 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 40))))])]
++  "(XVECLEN (operands[0], 0) == 12)"
++  "lmw.bim\t%3, [%1], %13, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"             "11")
++   (set_attr "length"             "4")]
++)
++
++(define_insn "*lmw_bim_si10"
++  [(match_parallel 0 "nds32_load_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 40)))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (match_dup 2)))
++     (set (match_operand:SI 4 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 4))))
++     (set (match_operand:SI 5 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 8))))
++     (set (match_operand:SI 6 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 12))))
++     (set (match_operand:SI 7 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 16))))
++     (set (match_operand:SI 8 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 20))))
++     (set (match_operand:SI 9 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 24))))
++     (set (match_operand:SI 10 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 28))))
++     (set (match_operand:SI 11 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 32))))
++     (set (match_operand:SI 12 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 36))))])]
++  "(XVECLEN (operands[0], 0) == 11)"
++  "lmw.bim\t%3, [%1], %12, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"             "10")
++   (set_attr "length"             "4")]
++)
++
++(define_insn "*lmw_bim_si9"
++  [(match_parallel 0 "nds32_load_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 36)))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (match_dup 2)))
++     (set (match_operand:SI 4 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 4))))
++     (set (match_operand:SI 5 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 8))))
++     (set (match_operand:SI 6 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 12))))
++     (set (match_operand:SI 7 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 16))))
++     (set (match_operand:SI 8 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 20))))
++     (set (match_operand:SI 9 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 24))))
++     (set (match_operand:SI 10 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 28))))
++     (set (match_operand:SI 11 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 32))))])]
++  "(XVECLEN (operands[0], 0) == 10)"
++  "lmw.bim\t%3, [%1], %11, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"              "9")
++   (set_attr "length"             "4")]
++)
++
++(define_insn "*lmw_bim_si8"
++  [(match_parallel 0 "nds32_load_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 32)))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (match_dup 2)))
++     (set (match_operand:SI 4 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 4))))
++     (set (match_operand:SI 5 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 8))))
++     (set (match_operand:SI 6 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 12))))
++     (set (match_operand:SI 7 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 16))))
++     (set (match_operand:SI 8 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 20))))
++     (set (match_operand:SI 9 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 24))))
++     (set (match_operand:SI 10 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 28))))])]
++  "(XVECLEN (operands[0], 0) == 9)"
++  "lmw.bim\t%3, [%1], %10, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"              "8")
++   (set_attr "length"             "4")]
++)
++
++(define_insn "*lmw_bim_si7"
++  [(match_parallel 0 "nds32_load_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 28)))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (match_dup 2)))
++     (set (match_operand:SI 4 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 4))))
++     (set (match_operand:SI 5 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 8))))
++     (set (match_operand:SI 6 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 12))))
++     (set (match_operand:SI 7 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 16))))
++     (set (match_operand:SI 8 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 20))))
++     (set (match_operand:SI 9 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 24))))])]
++  "(XVECLEN (operands[0], 0) == 8)"
++  "lmw.bim\t%3, [%1], %9, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"              "7")
++   (set_attr "length"             "4")]
++)
++
++(define_insn "*lmw_bim_si6"
++  [(match_parallel 0 "nds32_load_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 24)))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (match_dup 2)))
++     (set (match_operand:SI 4 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 4))))
++     (set (match_operand:SI 5 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 8))))
++     (set (match_operand:SI 6 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 12))))
++     (set (match_operand:SI 7 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 16))))
++     (set (match_operand:SI 8 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 20))))])]
++  "(XVECLEN (operands[0], 0) == 7)"
++  "lmw.bim\t%3, [%1], %8, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"              "6")
++   (set_attr "length"             "4")]
++)
++
++(define_insn "*lmw_bim_si5"
++  [(match_parallel 0 "nds32_load_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 20)))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (match_dup 2)))
++     (set (match_operand:SI 4 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 4))))
++     (set (match_operand:SI 5 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 8))))
++     (set (match_operand:SI 6 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 12))))
++     (set (match_operand:SI 7 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 16))))])]
++  "(XVECLEN (operands[0], 0) == 6)"
++  "lmw.bim\t%3, [%1], %7, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"              "5")
++   (set_attr "length"             "4")]
++)
++
++(define_insn "*lmw_bim_si4"
++  [(match_parallel 0 "nds32_load_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 16)))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (match_dup 2)))
++     (set (match_operand:SI 4 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 4))))
++     (set (match_operand:SI 5 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 8))))
++     (set (match_operand:SI 6 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 12))))])]
++  "(XVECLEN (operands[0], 0) == 5)"
++  "lmw.bim\t%3, [%1], %6, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"              "4")
++   (set_attr "length"             "4")]
++)
++
++(define_insn "*lmw_bim_si3"
++  [(match_parallel 0 "nds32_load_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 12)))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (match_dup 2)))
++     (set (match_operand:SI 4 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 4))))
++     (set (match_operand:SI 5 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 8))))])]
++  "(XVECLEN (operands[0], 0) == 4)"
++  "lmw.bim\t%3, [%1], %5, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"              "3")
++   (set_attr "length"             "4")]
++)
++
++(define_insn "*lmw_bim_si2"
++  [(match_parallel 0 "nds32_load_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 8)))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (match_dup 2)))
++     (set (match_operand:SI 4 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 2) (const_int 4))))])]
++  "(XVECLEN (operands[0], 0) == 3)"
++  "lmw.bim\t%3, [%1], %4, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"              "2")
++   (set_attr "length"             "4")]
++)
++
++(define_expand "unaligned_load_update_base_w"
++  [(parallel [(set (match_operand:SI 0 "register_operand" "")
++		   (plus:SI (match_operand:SI 2 "register_operand" "") (const_int 4)))
++	      (set (match_operand:SI 1 "register_operand" "")
++		   (unspec:SI [(mem:SI (match_dup 2))] UNSPEC_UALOAD_W))])]
++  ""
++{
++  /* DO NOT emit unaligned_load_w_m immediately since web pass don't
++     recognize post_inc, try it again after GCC 5.0.
++     REF: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63156  */
++  emit_insn (gen_unaligned_load_w (operands[1], gen_rtx_MEM (SImode, operands[2])));
++  emit_insn (gen_addsi3 (operands[0], operands[2], gen_int_mode (4, Pmode)));
++  DONE;
++}
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"              "1")
++   (set_attr "length"             "4")]
++)
++
++(define_insn "*lmwsi25"
+   [(match_parallel 0 "nds32_load_multiple_operation"
+     [(set (match_operand:SI 2 "register_operand" "")
+ 	  (mem:SI (match_operand:SI 1 "register_operand" "r")))
+@@ -91,14 +1024,49 @@
+      (set (match_operand:SI 8 "register_operand" "")
+ 	  (mem:SI (plus:SI (match_dup 1) (const_int 24))))
+      (set (match_operand:SI 9 "register_operand" "")
+-	  (mem:SI (plus:SI (match_dup 1) (const_int 28))))])]
+-  "(XVECLEN (operands[0], 0) == 8)"
+-  "lmw.bi\t%2, [%1], %9, 0x0"
+-  [(set_attr "type"   "load")
+-   (set_attr "length"    "4")]
++	  (mem:SI (plus:SI (match_dup 1) (const_int 28))))
++     (set (match_operand:SI 10 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 32))))
++     (set (match_operand:SI 11 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 36))))
++     (set (match_operand:SI 12 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 40))))
++     (set (match_operand:SI 13 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 44))))
++     (set (match_operand:SI 14 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 48))))
++     (set (match_operand:SI 15 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 52))))
++     (set (match_operand:SI 16 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 56))))
++     (set (match_operand:SI 17 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 60))))
++     (set (match_operand:SI 18 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 64))))
++     (set (match_operand:SI 19 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 68))))
++     (set (match_operand:SI 20 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 72))))
++     (set (match_operand:SI 21 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 76))))
++     (set (match_operand:SI 22 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 80))))
++     (set (match_operand:SI 23 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 84))))
++     (set (match_operand:SI 24 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 88))))
++     (set (match_operand:SI 25 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 92))))
++     (set (match_operand:SI 26 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 96))))])]
++  "(XVECLEN (operands[0], 0) == 25)"
++  "lmw.bi\t%2, [%1], %26, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"             "25")
++   (set_attr "length"             "4")]
+ )
+ 
+-(define_insn "*lmwsi7"
++(define_insn "*lmwsi24"
+   [(match_parallel 0 "nds32_load_multiple_operation"
+     [(set (match_operand:SI 2 "register_operand" "")
+ 	  (mem:SI (match_operand:SI 1 "register_operand" "r")))
+@@ -113,14 +1081,49 @@
+      (set (match_operand:SI 7 "register_operand" "")
+ 	  (mem:SI (plus:SI (match_dup 1) (const_int 20))))
+      (set (match_operand:SI 8 "register_operand" "")
+-	  (mem:SI (plus:SI (match_dup 1) (const_int 24))))])]
+-  "(XVECLEN (operands[0], 0) == 7)"
+-  "lmw.bi\t%2, [%1], %8, 0x0"
+-  [(set_attr "type"   "load")
+-   (set_attr "length"    "4")]
++	  (mem:SI (plus:SI (match_dup 1) (const_int 24))))
++     (set (match_operand:SI 9 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 28))))
++     (set (match_operand:SI 10 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 32))))
++     (set (match_operand:SI 11 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 36))))
++     (set (match_operand:SI 12 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 40))))
++     (set (match_operand:SI 13 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 44))))
++     (set (match_operand:SI 14 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 48))))
++     (set (match_operand:SI 15 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 52))))
++     (set (match_operand:SI 16 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 56))))
++     (set (match_operand:SI 17 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 60))))
++     (set (match_operand:SI 18 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 64))))
++     (set (match_operand:SI 19 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 68))))
++     (set (match_operand:SI 20 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 72))))
++     (set (match_operand:SI 21 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 76))))
++     (set (match_operand:SI 22 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 80))))
++     (set (match_operand:SI 23 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 84))))
++     (set (match_operand:SI 24 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 88))))
++     (set (match_operand:SI 25 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 92))))])]
++  "(XVECLEN (operands[0], 0) == 24)"
++  "lmw.bi\t%2, [%1], %25, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"             "24")
++   (set_attr "length"             "4")]
+ )
+ 
+-(define_insn "*lmwsi6"
++(define_insn "*lmwsi23"
+   [(match_parallel 0 "nds32_load_multiple_operation"
+     [(set (match_operand:SI 2 "register_operand" "")
+ 	  (mem:SI (match_operand:SI 1 "register_operand" "r")))
+@@ -133,14 +1136,49 @@
+      (set (match_operand:SI 6 "register_operand" "")
+ 	  (mem:SI (plus:SI (match_dup 1) (const_int 16))))
+      (set (match_operand:SI 7 "register_operand" "")
+-	  (mem:SI (plus:SI (match_dup 1) (const_int 20))))])]
+-  "(XVECLEN (operands[0], 0) == 6)"
+-  "lmw.bi\t%2, [%1], %7, 0x0"
+-  [(set_attr "type"   "load")
+-   (set_attr "length"    "4")]
++	  (mem:SI (plus:SI (match_dup 1) (const_int 20))))
++     (set (match_operand:SI 8 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 24))))
++     (set (match_operand:SI 9 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 28))))
++     (set (match_operand:SI 10 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 32))))
++     (set (match_operand:SI 11 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 36))))
++     (set (match_operand:SI 12 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 40))))
++     (set (match_operand:SI 13 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 44))))
++     (set (match_operand:SI 14 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 48))))
++     (set (match_operand:SI 15 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 52))))
++     (set (match_operand:SI 16 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 56))))
++     (set (match_operand:SI 17 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 60))))
++     (set (match_operand:SI 18 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 64))))
++     (set (match_operand:SI 19 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 68))))
++     (set (match_operand:SI 20 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 72))))
++     (set (match_operand:SI 21 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 76))))
++     (set (match_operand:SI 22 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 80))))
++     (set (match_operand:SI 23 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 84))))
++     (set (match_operand:SI 24 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 88))))])]
++  "(XVECLEN (operands[0], 0) == 23)"
++  "lmw.bi\t%2, [%1], %24, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"             "23")
++   (set_attr "length"             "4")]
+ )
+ 
+-(define_insn "*lmwsi5"
++(define_insn "*lmwsi22"
+   [(match_parallel 0 "nds32_load_multiple_operation"
+     [(set (match_operand:SI 2 "register_operand" "")
+ 	  (mem:SI (match_operand:SI 1 "register_operand" "r")))
+@@ -151,110 +1189,2430 @@
+      (set (match_operand:SI 5 "register_operand" "")
+ 	  (mem:SI (plus:SI (match_dup 1) (const_int 12))))
+      (set (match_operand:SI 6 "register_operand" "")
+-	  (mem:SI (plus:SI (match_dup 1) (const_int 16))))])]
+-  "(XVECLEN (operands[0], 0) == 5)"
+-  "lmw.bi\t%2, [%1], %6, 0x0"
+-  [(set_attr "type"   "load")
+-   (set_attr "length"    "4")]
++	  (mem:SI (plus:SI (match_dup 1) (const_int 16))))
++     (set (match_operand:SI 7 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 20))))
++     (set (match_operand:SI 8 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 24))))
++     (set (match_operand:SI 9 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 28))))
++     (set (match_operand:SI 10 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 32))))
++     (set (match_operand:SI 11 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 36))))
++     (set (match_operand:SI 12 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 40))))
++     (set (match_operand:SI 13 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 44))))
++     (set (match_operand:SI 14 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 48))))
++     (set (match_operand:SI 15 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 52))))
++     (set (match_operand:SI 16 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 56))))
++     (set (match_operand:SI 17 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 60))))
++     (set (match_operand:SI 18 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 64))))
++     (set (match_operand:SI 19 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 68))))
++     (set (match_operand:SI 20 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 72))))
++     (set (match_operand:SI 21 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 76))))
++     (set (match_operand:SI 22 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 80))))
++     (set (match_operand:SI 23 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 84))))])]
++  "(XVECLEN (operands[0], 0) == 22)"
++  "lmw.bi\t%2, [%1], %23, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"             "22")
++   (set_attr "length"             "4")]
++)
++
++(define_insn "*lmwsi21"
++  [(match_parallel 0 "nds32_load_multiple_operation"
++    [(set (match_operand:SI 2 "register_operand" "")
++	  (mem:SI (match_operand:SI 1 "register_operand" "r")))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 4))))
++     (set (match_operand:SI 4 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 8))))
++     (set (match_operand:SI 5 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 12))))
++     (set (match_operand:SI 6 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 16))))
++     (set (match_operand:SI 7 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 20))))
++     (set (match_operand:SI 8 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 24))))
++     (set (match_operand:SI 9 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 28))))
++     (set (match_operand:SI 10 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 32))))
++     (set (match_operand:SI 11 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 36))))
++     (set (match_operand:SI 12 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 40))))
++     (set (match_operand:SI 13 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 44))))
++     (set (match_operand:SI 14 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 48))))
++     (set (match_operand:SI 15 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 52))))
++     (set (match_operand:SI 16 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 56))))
++     (set (match_operand:SI 17 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 60))))
++     (set (match_operand:SI 18 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 64))))
++     (set (match_operand:SI 19 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 68))))
++     (set (match_operand:SI 20 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 72))))
++     (set (match_operand:SI 21 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 76))))
++     (set (match_operand:SI 22 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 80))))])]
++  "(XVECLEN (operands[0], 0) == 21)"
++  "lmw.bi\t%2, [%1], %22, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"             "21")
++   (set_attr "length"             "4")]
++)
++
++(define_insn "*lmwsi20"
++  [(match_parallel 0 "nds32_load_multiple_operation"
++    [(set (match_operand:SI 2 "register_operand" "")
++	  (mem:SI (match_operand:SI 1 "register_operand" "r")))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 4))))
++     (set (match_operand:SI 4 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 8))))
++     (set (match_operand:SI 5 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 12))))
++     (set (match_operand:SI 6 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 16))))
++     (set (match_operand:SI 7 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 20))))
++     (set (match_operand:SI 8 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 24))))
++     (set (match_operand:SI 9 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 28))))
++     (set (match_operand:SI 10 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 32))))
++     (set (match_operand:SI 11 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 36))))
++     (set (match_operand:SI 12 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 40))))
++     (set (match_operand:SI 13 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 44))))
++     (set (match_operand:SI 14 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 48))))
++     (set (match_operand:SI 15 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 52))))
++     (set (match_operand:SI 16 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 56))))
++     (set (match_operand:SI 17 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 60))))
++     (set (match_operand:SI 18 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 64))))
++     (set (match_operand:SI 19 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 68))))
++     (set (match_operand:SI 20 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 72))))
++     (set (match_operand:SI 21 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 76))))])]
++  "(XVECLEN (operands[0], 0) == 20)"
++  "lmw.bi\t%2, [%1], %21, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"             "20")
++   (set_attr "length"             "4")]
++)
++
++(define_insn "*lmwsi19"
++  [(match_parallel 0 "nds32_load_multiple_operation"
++    [(set (match_operand:SI 2 "register_operand" "")
++	  (mem:SI (match_operand:SI 1 "register_operand" "r")))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 4))))
++     (set (match_operand:SI 4 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 8))))
++     (set (match_operand:SI 5 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 12))))
++     (set (match_operand:SI 6 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 16))))
++     (set (match_operand:SI 7 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 20))))
++     (set (match_operand:SI 8 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 24))))
++     (set (match_operand:SI 9 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 28))))
++     (set (match_operand:SI 10 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 32))))
++     (set (match_operand:SI 11 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 36))))
++     (set (match_operand:SI 12 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 40))))
++     (set (match_operand:SI 13 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 44))))
++     (set (match_operand:SI 14 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 48))))
++     (set (match_operand:SI 15 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 52))))
++     (set (match_operand:SI 16 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 56))))
++     (set (match_operand:SI 17 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 60))))
++     (set (match_operand:SI 18 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 64))))
++     (set (match_operand:SI 19 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 68))))
++     (set (match_operand:SI 20 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 72))))])]
++  "(XVECLEN (operands[0], 0) == 19)"
++  "lmw.bi\t%2, [%1], %20, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"             "19")
++   (set_attr "length"             "4")]
++)
++
++(define_insn "*lmwsi18"
++  [(match_parallel 0 "nds32_load_multiple_operation"
++    [(set (match_operand:SI 2 "register_operand" "")
++	  (mem:SI (match_operand:SI 1 "register_operand" "r")))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 4))))
++     (set (match_operand:SI 4 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 8))))
++     (set (match_operand:SI 5 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 12))))
++     (set (match_operand:SI 6 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 16))))
++     (set (match_operand:SI 7 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 20))))
++     (set (match_operand:SI 8 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 24))))
++     (set (match_operand:SI 9 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 28))))
++     (set (match_operand:SI 10 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 32))))
++     (set (match_operand:SI 11 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 36))))
++     (set (match_operand:SI 12 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 40))))
++     (set (match_operand:SI 13 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 44))))
++     (set (match_operand:SI 14 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 48))))
++     (set (match_operand:SI 15 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 52))))
++     (set (match_operand:SI 16 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 56))))
++     (set (match_operand:SI 17 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 60))))
++     (set (match_operand:SI 18 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 64))))
++     (set (match_operand:SI 19 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 68))))])]
++  "(XVECLEN (operands[0], 0) == 18)"
++  "lmw.bi\t%2, [%1], %19, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"             "18")
++   (set_attr "length"             "4")]
++)
++
++(define_insn "*lmwsi17"
++  [(match_parallel 0 "nds32_load_multiple_operation"
++    [(set (match_operand:SI 2 "register_operand" "")
++	  (mem:SI (match_operand:SI 1 "register_operand" "r")))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 4))))
++     (set (match_operand:SI 4 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 8))))
++     (set (match_operand:SI 5 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 12))))
++     (set (match_operand:SI 6 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 16))))
++     (set (match_operand:SI 7 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 20))))
++     (set (match_operand:SI 8 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 24))))
++     (set (match_operand:SI 9 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 28))))
++     (set (match_operand:SI 10 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 32))))
++     (set (match_operand:SI 11 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 36))))
++     (set (match_operand:SI 12 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 40))))
++     (set (match_operand:SI 13 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 44))))
++     (set (match_operand:SI 14 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 48))))
++     (set (match_operand:SI 15 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 52))))
++     (set (match_operand:SI 16 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 56))))
++     (set (match_operand:SI 17 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 60))))
++     (set (match_operand:SI 18 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 64))))])]
++  "(XVECLEN (operands[0], 0) == 17)"
++  "lmw.bi\t%2, [%1], %18, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"             "17")
++   (set_attr "length"             "4")]
++)
++
++(define_insn "*lmwsi16"
++  [(match_parallel 0 "nds32_load_multiple_operation"
++    [(set (match_operand:SI 2 "register_operand" "")
++	  (mem:SI (match_operand:SI 1 "register_operand" "r")))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 4))))
++     (set (match_operand:SI 4 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 8))))
++     (set (match_operand:SI 5 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 12))))
++     (set (match_operand:SI 6 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 16))))
++     (set (match_operand:SI 7 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 20))))
++     (set (match_operand:SI 8 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 24))))
++     (set (match_operand:SI 9 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 28))))
++     (set (match_operand:SI 10 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 32))))
++     (set (match_operand:SI 11 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 36))))
++     (set (match_operand:SI 12 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 40))))
++     (set (match_operand:SI 13 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 44))))
++     (set (match_operand:SI 14 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 48))))
++     (set (match_operand:SI 15 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 52))))
++     (set (match_operand:SI 16 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 56))))
++     (set (match_operand:SI 17 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 60))))])]
++  "(XVECLEN (operands[0], 0) == 16)"
++  "lmw.bi\t%2, [%1], %17, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"             "16")
++   (set_attr "length"             "4")]
++)
++
++(define_insn "*lmwsi15"
++  [(match_parallel 0 "nds32_load_multiple_operation"
++    [(set (match_operand:SI 2 "register_operand" "")
++	  (mem:SI (match_operand:SI 1 "register_operand" "r")))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 4))))
++     (set (match_operand:SI 4 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 8))))
++     (set (match_operand:SI 5 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 12))))
++     (set (match_operand:SI 6 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 16))))
++     (set (match_operand:SI 7 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 20))))
++     (set (match_operand:SI 8 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 24))))
++     (set (match_operand:SI 9 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 28))))
++     (set (match_operand:SI 10 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 32))))
++     (set (match_operand:SI 11 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 36))))
++     (set (match_operand:SI 12 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 40))))
++     (set (match_operand:SI 13 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 44))))
++     (set (match_operand:SI 14 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 48))))
++     (set (match_operand:SI 15 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 52))))
++     (set (match_operand:SI 16 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 56))))])]
++  "(XVECLEN (operands[0], 0) == 15)"
++  "lmw.bi\t%2, [%1], %16, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"             "15")
++   (set_attr "length"             "4")]
++)
++
++(define_insn "*lmwsi14"
++  [(match_parallel 0 "nds32_load_multiple_operation"
++    [(set (match_operand:SI 2 "register_operand" "")
++	  (mem:SI (match_operand:SI 1 "register_operand" "r")))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 4))))
++     (set (match_operand:SI 4 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 8))))
++     (set (match_operand:SI 5 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 12))))
++     (set (match_operand:SI 6 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 16))))
++     (set (match_operand:SI 7 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 20))))
++     (set (match_operand:SI 8 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 24))))
++     (set (match_operand:SI 9 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 28))))
++     (set (match_operand:SI 10 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 32))))
++     (set (match_operand:SI 11 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 36))))
++     (set (match_operand:SI 12 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 40))))
++     (set (match_operand:SI 13 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 44))))
++     (set (match_operand:SI 14 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 48))))
++     (set (match_operand:SI 15 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 52))))])]
++  "(XVECLEN (operands[0], 0) == 14)"
++  "lmw.bi\t%2, [%1], %15, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"             "14")
++   (set_attr "length"             "4")]
++)
++
++(define_insn "*lmwsi13"
++  [(match_parallel 0 "nds32_load_multiple_operation"
++    [(set (match_operand:SI 2 "register_operand" "")
++	  (mem:SI (match_operand:SI 1 "register_operand" "r")))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 4))))
++     (set (match_operand:SI 4 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 8))))
++     (set (match_operand:SI 5 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 12))))
++     (set (match_operand:SI 6 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 16))))
++     (set (match_operand:SI 7 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 20))))
++     (set (match_operand:SI 8 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 24))))
++     (set (match_operand:SI 9 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 28))))
++     (set (match_operand:SI 10 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 32))))
++     (set (match_operand:SI 11 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 36))))
++     (set (match_operand:SI 12 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 40))))
++     (set (match_operand:SI 13 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 44))))
++     (set (match_operand:SI 14 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 48))))])]
++  "(XVECLEN (operands[0], 0) == 13)"
++  "lmw.bi\t%2, [%1], %14, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"             "13")
++   (set_attr "length"             "4")]
++)
++
++(define_insn "*lmwsi12"
++  [(match_parallel 0 "nds32_load_multiple_operation"
++    [(set (match_operand:SI 2 "register_operand" "")
++	  (mem:SI (match_operand:SI 1 "register_operand" "r")))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 4))))
++     (set (match_operand:SI 4 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 8))))
++     (set (match_operand:SI 5 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 12))))
++     (set (match_operand:SI 6 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 16))))
++     (set (match_operand:SI 7 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 20))))
++     (set (match_operand:SI 8 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 24))))
++     (set (match_operand:SI 9 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 28))))
++     (set (match_operand:SI 10 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 32))))
++     (set (match_operand:SI 11 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 36))))
++     (set (match_operand:SI 12 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 40))))
++     (set (match_operand:SI 13 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 44))))])]
++  "(XVECLEN (operands[0], 0) == 12)"
++  "lmw.bi\t%2, [%1], %13, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"             "12")
++   (set_attr "length"             "4")]
++)
++
++(define_insn "*lmwsi11"
++  [(match_parallel 0 "nds32_load_multiple_operation"
++    [(set (match_operand:SI 2 "register_operand" "")
++	  (mem:SI (match_operand:SI 1 "register_operand" "r")))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 4))))
++     (set (match_operand:SI 4 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 8))))
++     (set (match_operand:SI 5 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 12))))
++     (set (match_operand:SI 6 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 16))))
++     (set (match_operand:SI 7 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 20))))
++     (set (match_operand:SI 8 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 24))))
++     (set (match_operand:SI 9 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 28))))
++     (set (match_operand:SI 10 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 32))))
++     (set (match_operand:SI 11 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 36))))
++     (set (match_operand:SI 12 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 40))))])]
++  "(XVECLEN (operands[0], 0) == 11)"
++  "lmw.bi\t%2, [%1], %12, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"             "11")
++   (set_attr "length"             "4")]
++)
++
++(define_insn "*lmwsi10"
++  [(match_parallel 0 "nds32_load_multiple_operation"
++    [(set (match_operand:SI 2 "register_operand" "")
++	  (mem:SI (match_operand:SI 1 "register_operand" "r")))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 4))))
++     (set (match_operand:SI 4 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 8))))
++     (set (match_operand:SI 5 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 12))))
++     (set (match_operand:SI 6 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 16))))
++     (set (match_operand:SI 7 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 20))))
++     (set (match_operand:SI 8 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 24))))
++     (set (match_operand:SI 9 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 28))))
++     (set (match_operand:SI 10 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 32))))
++     (set (match_operand:SI 11 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 36))))])]
++  "(XVECLEN (operands[0], 0) == 10)"
++  "lmw.bi\t%2, [%1], %11, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"             "10")
++   (set_attr "length"             "4")]
++)
++
++(define_insn "*lmwsi9"
++  [(match_parallel 0 "nds32_load_multiple_operation"
++    [(set (match_operand:SI 2 "register_operand" "")
++	  (mem:SI (match_operand:SI 1 "register_operand" "r")))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 4))))
++     (set (match_operand:SI 4 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 8))))
++     (set (match_operand:SI 5 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 12))))
++     (set (match_operand:SI 6 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 16))))
++     (set (match_operand:SI 7 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 20))))
++     (set (match_operand:SI 8 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 24))))
++     (set (match_operand:SI 9 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 28))))
++     (set (match_operand:SI 10 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 32))))])]
++  "(XVECLEN (operands[0], 0) == 9)"
++  "lmw.bi\t%2, [%1], %10, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"              "9")
++   (set_attr "length"             "4")]
++)
++
++(define_insn "*lmwsi8"
++  [(match_parallel 0 "nds32_load_multiple_operation"
++    [(set (match_operand:SI 2 "register_operand" "")
++	  (mem:SI (match_operand:SI 1 "register_operand" "r")))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 4))))
++     (set (match_operand:SI 4 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 8))))
++     (set (match_operand:SI 5 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 12))))
++     (set (match_operand:SI 6 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 16))))
++     (set (match_operand:SI 7 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 20))))
++     (set (match_operand:SI 8 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 24))))
++     (set (match_operand:SI 9 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 28))))])]
++  "(XVECLEN (operands[0], 0) == 8)"
++  "lmw.bi\t%2, [%1], %9, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"              "8")
++   (set_attr "length"             "4")]
++)
++
++(define_insn "*lmwsi7"
++  [(match_parallel 0 "nds32_load_multiple_operation"
++    [(set (match_operand:SI 2 "register_operand" "")
++	  (mem:SI (match_operand:SI 1 "register_operand" "r")))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 4))))
++     (set (match_operand:SI 4 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 8))))
++     (set (match_operand:SI 5 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 12))))
++     (set (match_operand:SI 6 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 16))))
++     (set (match_operand:SI 7 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 20))))
++     (set (match_operand:SI 8 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 24))))])]
++  "(XVECLEN (operands[0], 0) == 7)"
++  "lmw.bi\t%2, [%1], %8, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"              "7")
++   (set_attr "length"             "4")]
++)
++
++(define_insn "*lmwsi6"
++  [(match_parallel 0 "nds32_load_multiple_operation"
++    [(set (match_operand:SI 2 "register_operand" "")
++	  (mem:SI (match_operand:SI 1 "register_operand" "r")))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 4))))
++     (set (match_operand:SI 4 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 8))))
++     (set (match_operand:SI 5 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 12))))
++     (set (match_operand:SI 6 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 16))))
++     (set (match_operand:SI 7 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 20))))])]
++  "(XVECLEN (operands[0], 0) == 6)"
++  "lmw.bi\t%2, [%1], %7, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"              "6")
++   (set_attr "length"             "4")]
++)
++
++(define_insn "*lmwsi5"
++  [(match_parallel 0 "nds32_load_multiple_operation"
++    [(set (match_operand:SI 2 "register_operand" "")
++	  (mem:SI (match_operand:SI 1 "register_operand" "r")))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 4))))
++     (set (match_operand:SI 4 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 8))))
++     (set (match_operand:SI 5 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 12))))
++     (set (match_operand:SI 6 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 16))))])]
++  "(XVECLEN (operands[0], 0) == 5)"
++  "lmw.bi\t%2, [%1], %6, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"              "5")
++   (set_attr "length"             "4")]
++)
++
++(define_insn "*lmwsi4"
++  [(match_parallel 0 "nds32_load_multiple_operation"
++    [(set (match_operand:SI 2 "register_operand" "")
++	  (mem:SI (match_operand:SI 1 "register_operand" "r")))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 4))))
++     (set (match_operand:SI 4 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 8))))
++     (set (match_operand:SI 5 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 12))))])]
++  "(XVECLEN (operands[0], 0) == 4)"
++  "lmw.bi\t%2, [%1], %5, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"              "4")
++   (set_attr "length"             "4")]
++)
++
++(define_insn "*lmwsi3"
++  [(match_parallel 0 "nds32_load_multiple_operation"
++    [(set (match_operand:SI 2 "register_operand" "")
++	  (mem:SI (match_operand:SI 1 "register_operand" "r")))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 4))))
++     (set (match_operand:SI 4 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 8))))])]
++  "(XVECLEN (operands[0], 0) == 3)"
++  "lmw.bi\t%2, [%1], %4, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"              "3")
++   (set_attr "length"             "4")]
++)
++
++(define_insn "*lmwsi2"
++  [(match_parallel 0 "nds32_load_multiple_operation"
++    [(set (match_operand:SI 2 "register_operand" "")
++	  (mem:SI (match_operand:SI 1 "register_operand" "r")))
++     (set (match_operand:SI 3 "register_operand" "")
++	  (mem:SI (plus:SI (match_dup 1) (const_int 4))))])]
++  "(XVECLEN (operands[0], 0) == 2)"
++  "lmw.bi\t%2, [%1], %3, 0x0"
++  [(set_attr "type"   "load_multiple")
++   (set_attr "combo"              "2")
++   (set_attr "length"             "4")]
++)
++
++;; Store Multiple Insns.
++;;
++;; operands[0] is the first memory location.
++;; operands[1] is the first of the consecutive registers.
++;; operands[2] is the number of consecutive registers.
++
++(define_expand "store_multiple"
++  [(match_par_dup 3 [(set (match_operand:SI 0 "" "")
++			  (match_operand:SI 1 "" ""))
++		     (use (match_operand:SI 2 "" ""))])]
++  ""
++{
++  int maximum;
++
++  /* Because reduced-set regsiters has few registers
++     (r0~r5, r6~10, r15, r28~r31, where 'r15' and 'r28~r31' cannot
++     be used for register allocation),
++     using 8 registers for store_multiple may easily consume all of them.
++     It makes register allocation/spilling hard to work.
++     So we only allow maximum=4 registers for store_multiple
++     under reduced-set registers.  */
++  if (TARGET_REDUCED_REGS)
++    maximum = 4;
++  else
++    maximum = 8;
++
++  /* Here are the conditions that must be all passed,
++     otherwise we have to FAIL this rtx generation:
++       1. The number of consecutive registers must be integer.
++       2. Maximum 4 or 8 registers for smw.bi instruction
++	  (based on this nds32-multiple.md design).
++       3. Minimum 2 registers for smw.bi instruction
++	  (based on this nds32-multiple.md design).
++       4. operands[0] must be memory for sure.
++       5. operands[1] must be register for sure.
++       6. operands[0] is not volatile memory access.
++       7. Do not cross $r15 register because it is not allocatable.  */
++  if (GET_CODE (operands[2]) != CONST_INT
++      || INTVAL (operands[2]) > maximum
++      || INTVAL (operands[2]) < 2
++      || GET_CODE (operands[0]) != MEM
++      || GET_CODE (operands[1]) != REG
++      || MEM_VOLATILE_P (operands[0])
++      || REGNO (operands[1]) + INTVAL (operands[2]) > TA_REGNUM)
++    FAIL;
++
++  /* For (mem addr), we force_reg on addr here,
++     so that nds32_expand_store_multiple can easily use it.  */
++  operands[3] = nds32_expand_store_multiple (REGNO (operands[1]),
++					     INTVAL (operands[2]),
++					     force_reg (SImode,
++							XEXP (operands[0], 0)),
++					     operands[0],
++					     false, NULL);
++})
++
++;; Ordinary Store Multiple.
++(define_insn "*stm_bim_si25"
++  [(match_parallel 0 "nds32_store_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 100)))
++     (set (mem:SI (match_dup 2))
++	  (match_operand:SI 3 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 4)))
++	  (match_operand:SI 4 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 8)))
++	  (match_operand:SI 5 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 12)))
++	  (match_operand:SI 6 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 16)))
++	  (match_operand:SI 7 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 20)))
++	  (match_operand:SI 8 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 24)))
++	  (match_operand:SI 9 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 28)))
++	  (match_operand:SI 10 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 32)))
++	  (match_operand:SI 11 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 36)))
++	  (match_operand:SI 12 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 40)))
++	  (match_operand:SI 13 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 44)))
++	  (match_operand:SI 14 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 48)))
++	  (match_operand:SI 15 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 52)))
++	  (match_operand:SI 16 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 56)))
++	  (match_operand:SI 17 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 60)))
++	  (match_operand:SI 18 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 64)))
++	  (match_operand:SI 19 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 68)))
++	  (match_operand:SI 20 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 72)))
++	  (match_operand:SI 21 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 76)))
++	  (match_operand:SI 22 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 80)))
++	  (match_operand:SI 23 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 84)))
++	  (match_operand:SI 24 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 88)))
++	  (match_operand:SI 25 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 92)))
++	  (match_operand:SI 26 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 96)))
++	  (match_operand:SI 27 "register_operand" ""))])]
++  "(XVECLEN (operands[0], 0) == 26)"
++  "smw.bim\t%3, [%1], %27, 0x0"
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"              "25")
++   (set_attr "length"              "4")]
++)
++
++(define_insn "*stm_bim_si24"
++  [(match_parallel 0 "nds32_store_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 96)))
++     (set (mem:SI (match_dup 2))
++	  (match_operand:SI 3 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 4)))
++	  (match_operand:SI 4 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 8)))
++	  (match_operand:SI 5 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 12)))
++	  (match_operand:SI 6 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 16)))
++	  (match_operand:SI 7 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 20)))
++	  (match_operand:SI 8 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 24)))
++	  (match_operand:SI 9 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 28)))
++	  (match_operand:SI 10 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 32)))
++	  (match_operand:SI 11 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 36)))
++	  (match_operand:SI 12 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 40)))
++	  (match_operand:SI 13 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 44)))
++	  (match_operand:SI 14 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 48)))
++	  (match_operand:SI 15 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 52)))
++	  (match_operand:SI 16 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 56)))
++	  (match_operand:SI 17 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 60)))
++	  (match_operand:SI 18 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 64)))
++	  (match_operand:SI 19 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 68)))
++	  (match_operand:SI 20 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 72)))
++	  (match_operand:SI 21 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 76)))
++	  (match_operand:SI 22 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 80)))
++	  (match_operand:SI 23 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 84)))
++	  (match_operand:SI 24 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 88)))
++	  (match_operand:SI 25 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 92)))
++	  (match_operand:SI 26 "register_operand" ""))])]
++  "(XVECLEN (operands[0], 0) == 25)"
++  "smw.bim\t%3, [%1], %26, 0x0"
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"              "24")
++   (set_attr "length"              "4")]
++)
++
++(define_insn "*stm_bim_si23"
++  [(match_parallel 0 "nds32_store_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 92)))
++     (set (mem:SI (match_dup 2))
++	  (match_operand:SI 3 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 4)))
++	  (match_operand:SI 4 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 8)))
++	  (match_operand:SI 5 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 12)))
++	  (match_operand:SI 6 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 16)))
++	  (match_operand:SI 7 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 20)))
++	  (match_operand:SI 8 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 24)))
++	  (match_operand:SI 9 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 28)))
++	  (match_operand:SI 10 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 32)))
++	  (match_operand:SI 11 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 36)))
++	  (match_operand:SI 12 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 40)))
++	  (match_operand:SI 13 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 44)))
++	  (match_operand:SI 14 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 48)))
++	  (match_operand:SI 15 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 52)))
++	  (match_operand:SI 16 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 56)))
++	  (match_operand:SI 17 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 60)))
++	  (match_operand:SI 18 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 64)))
++	  (match_operand:SI 19 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 68)))
++	  (match_operand:SI 20 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 72)))
++	  (match_operand:SI 21 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 76)))
++	  (match_operand:SI 22 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 80)))
++	  (match_operand:SI 23 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 84)))
++	  (match_operand:SI 24 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 88)))
++	  (match_operand:SI 25 "register_operand" ""))])]
++  "(XVECLEN (operands[0], 0) == 24)"
++  "smw.bim\t%3, [%1], %25, 0x0"
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"              "23")
++   (set_attr "length"              "4")]
++)
++
++(define_insn "*stm_bim_si22"
++  [(match_parallel 0 "nds32_store_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 88)))
++     (set (mem:SI (match_dup 2))
++	  (match_operand:SI 3 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 4)))
++	  (match_operand:SI 4 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 8)))
++	  (match_operand:SI 5 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 12)))
++	  (match_operand:SI 6 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 16)))
++	  (match_operand:SI 7 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 20)))
++	  (match_operand:SI 8 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 24)))
++	  (match_operand:SI 9 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 28)))
++	  (match_operand:SI 10 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 32)))
++	  (match_operand:SI 11 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 36)))
++	  (match_operand:SI 12 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 40)))
++	  (match_operand:SI 13 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 44)))
++	  (match_operand:SI 14 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 48)))
++	  (match_operand:SI 15 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 52)))
++	  (match_operand:SI 16 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 56)))
++	  (match_operand:SI 17 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 60)))
++	  (match_operand:SI 18 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 64)))
++	  (match_operand:SI 19 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 68)))
++	  (match_operand:SI 20 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 72)))
++	  (match_operand:SI 21 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 76)))
++	  (match_operand:SI 22 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 80)))
++	  (match_operand:SI 23 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 84)))
++	  (match_operand:SI 24 "register_operand" ""))])]
++  "(XVECLEN (operands[0], 0) == 23)"
++  "smw.bim\t%3, [%1], %24, 0x0"
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"              "22")
++   (set_attr "length"              "4")]
++)
++
++(define_insn "*stm_bim_si21"
++  [(match_parallel 0 "nds32_store_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 84)))
++     (set (mem:SI (match_dup 2))
++	  (match_operand:SI 3 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 4)))
++	  (match_operand:SI 4 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 8)))
++	  (match_operand:SI 5 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 12)))
++	  (match_operand:SI 6 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 16)))
++	  (match_operand:SI 7 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 20)))
++	  (match_operand:SI 8 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 24)))
++	  (match_operand:SI 9 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 28)))
++	  (match_operand:SI 10 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 32)))
++	  (match_operand:SI 11 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 36)))
++	  (match_operand:SI 12 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 40)))
++	  (match_operand:SI 13 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 44)))
++	  (match_operand:SI 14 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 48)))
++	  (match_operand:SI 15 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 52)))
++	  (match_operand:SI 16 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 56)))
++	  (match_operand:SI 17 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 60)))
++	  (match_operand:SI 18 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 64)))
++	  (match_operand:SI 19 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 68)))
++	  (match_operand:SI 20 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 72)))
++	  (match_operand:SI 21 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 76)))
++	  (match_operand:SI 22 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 80)))
++	  (match_operand:SI 23 "register_operand" ""))])]
++  "(XVECLEN (operands[0], 0) == 22)"
++  "smw.bim\t%3, [%1], %23, 0x0"
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"              "21")
++   (set_attr "length"              "4")]
++)
++
++(define_insn "*stm_bim_si20"
++  [(match_parallel 0 "nds32_store_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 80)))
++     (set (mem:SI (match_dup 2))
++	  (match_operand:SI 3 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 4)))
++	  (match_operand:SI 4 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 8)))
++	  (match_operand:SI 5 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 12)))
++	  (match_operand:SI 6 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 16)))
++	  (match_operand:SI 7 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 20)))
++	  (match_operand:SI 8 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 24)))
++	  (match_operand:SI 9 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 28)))
++	  (match_operand:SI 10 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 32)))
++	  (match_operand:SI 11 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 36)))
++	  (match_operand:SI 12 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 40)))
++	  (match_operand:SI 13 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 44)))
++	  (match_operand:SI 14 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 48)))
++	  (match_operand:SI 15 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 52)))
++	  (match_operand:SI 16 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 56)))
++	  (match_operand:SI 17 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 60)))
++	  (match_operand:SI 18 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 64)))
++	  (match_operand:SI 19 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 68)))
++	  (match_operand:SI 20 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 72)))
++	  (match_operand:SI 21 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 76)))
++	  (match_operand:SI 22 "register_operand" ""))])]
++  "(XVECLEN (operands[0], 0) == 21)"
++  "smw.bim\t%3, [%1], %22, 0x0"
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"              "20")
++   (set_attr "length"              "4")]
++)
++
++(define_insn "*stm_bim_si19"
++  [(match_parallel 0 "nds32_store_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 76)))
++     (set (mem:SI (match_dup 2))
++	  (match_operand:SI 3 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 4)))
++	  (match_operand:SI 4 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 8)))
++	  (match_operand:SI 5 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 12)))
++	  (match_operand:SI 6 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 16)))
++	  (match_operand:SI 7 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 20)))
++	  (match_operand:SI 8 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 24)))
++	  (match_operand:SI 9 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 28)))
++	  (match_operand:SI 10 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 32)))
++	  (match_operand:SI 11 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 36)))
++	  (match_operand:SI 12 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 40)))
++	  (match_operand:SI 13 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 44)))
++	  (match_operand:SI 14 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 48)))
++	  (match_operand:SI 15 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 52)))
++	  (match_operand:SI 16 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 56)))
++	  (match_operand:SI 17 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 60)))
++	  (match_operand:SI 18 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 64)))
++	  (match_operand:SI 19 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 68)))
++	  (match_operand:SI 20 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 72)))
++	  (match_operand:SI 21 "register_operand" ""))])]
++  "(XVECLEN (operands[0], 0) == 20)"
++  "smw.bim\t%3, [%1], %21, 0x0"
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"              "19")
++   (set_attr "length"              "4")]
++)
++
++(define_insn "*stm_bim_si18"
++  [(match_parallel 0 "nds32_store_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 72)))
++     (set (mem:SI (match_dup 2))
++	  (match_operand:SI 3 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 4)))
++	  (match_operand:SI 4 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 8)))
++	  (match_operand:SI 5 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 12)))
++	  (match_operand:SI 6 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 16)))
++	  (match_operand:SI 7 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 20)))
++	  (match_operand:SI 8 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 24)))
++	  (match_operand:SI 9 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 28)))
++	  (match_operand:SI 10 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 32)))
++	  (match_operand:SI 11 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 36)))
++	  (match_operand:SI 12 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 40)))
++	  (match_operand:SI 13 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 44)))
++	  (match_operand:SI 14 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 48)))
++	  (match_operand:SI 15 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 52)))
++	  (match_operand:SI 16 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 56)))
++	  (match_operand:SI 17 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 60)))
++	  (match_operand:SI 18 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 64)))
++	  (match_operand:SI 19 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 68)))
++	  (match_operand:SI 20 "register_operand" ""))])]
++  "(XVECLEN (operands[0], 0) == 19)"
++  "smw.bim\t%3, [%1], %20, 0x0"
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"              "18")
++   (set_attr "length"              "4")]
++)
++
++(define_insn "*stm_bim_si17"
++  [(match_parallel 0 "nds32_store_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 68)))
++     (set (mem:SI (match_dup 2))
++	  (match_operand:SI 3 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 4)))
++	  (match_operand:SI 4 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 8)))
++	  (match_operand:SI 5 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 12)))
++	  (match_operand:SI 6 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 16)))
++	  (match_operand:SI 7 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 20)))
++	  (match_operand:SI 8 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 24)))
++	  (match_operand:SI 9 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 28)))
++	  (match_operand:SI 10 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 32)))
++	  (match_operand:SI 11 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 36)))
++	  (match_operand:SI 12 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 40)))
++	  (match_operand:SI 13 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 44)))
++	  (match_operand:SI 14 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 48)))
++	  (match_operand:SI 15 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 52)))
++	  (match_operand:SI 16 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 56)))
++	  (match_operand:SI 17 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 60)))
++	  (match_operand:SI 18 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 64)))
++	  (match_operand:SI 19 "register_operand" ""))])]
++  "(XVECLEN (operands[0], 0) == 18)"
++  "smw.bim\t%3, [%1], %19, 0x0"
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"              "17")
++   (set_attr "length"              "4")]
++)
++
++(define_insn "*stm_bim_si16"
++  [(match_parallel 0 "nds32_store_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 64)))
++     (set (mem:SI (match_dup 2))
++	  (match_operand:SI 3 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 4)))
++	  (match_operand:SI 4 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 8)))
++	  (match_operand:SI 5 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 12)))
++	  (match_operand:SI 6 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 16)))
++	  (match_operand:SI 7 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 20)))
++	  (match_operand:SI 8 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 24)))
++	  (match_operand:SI 9 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 28)))
++	  (match_operand:SI 10 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 32)))
++	  (match_operand:SI 11 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 36)))
++	  (match_operand:SI 12 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 40)))
++	  (match_operand:SI 13 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 44)))
++	  (match_operand:SI 14 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 48)))
++	  (match_operand:SI 15 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 52)))
++	  (match_operand:SI 16 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 56)))
++	  (match_operand:SI 17 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 60)))
++	  (match_operand:SI 18 "register_operand" ""))])]
++  "(XVECLEN (operands[0], 0) == 17)"
++  "smw.bim\t%3, [%1], %18, 0x0"
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"              "16")
++   (set_attr "length"              "4")]
++)
++
++(define_insn "*stm_bim_si15"
++  [(match_parallel 0 "nds32_store_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 60)))
++     (set (mem:SI (match_dup 2))
++	  (match_operand:SI 3 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 4)))
++	  (match_operand:SI 4 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 8)))
++	  (match_operand:SI 5 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 12)))
++	  (match_operand:SI 6 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 16)))
++	  (match_operand:SI 7 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 20)))
++	  (match_operand:SI 8 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 24)))
++	  (match_operand:SI 9 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 28)))
++	  (match_operand:SI 10 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 32)))
++	  (match_operand:SI 11 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 36)))
++	  (match_operand:SI 12 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 40)))
++	  (match_operand:SI 13 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 44)))
++	  (match_operand:SI 14 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 48)))
++	  (match_operand:SI 15 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 52)))
++	  (match_operand:SI 16 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 56)))
++	  (match_operand:SI 17 "register_operand" ""))])]
++  "(XVECLEN (operands[0], 0) == 16)"
++  "smw.bim\t%3, [%1], %17, 0x0"
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"              "15")
++   (set_attr "length"              "4")]
++)
++
++(define_insn "*stm_bim_si14"
++  [(match_parallel 0 "nds32_store_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 56)))
++     (set (mem:SI (match_dup 2))
++	  (match_operand:SI 3 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 4)))
++	  (match_operand:SI 4 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 8)))
++	  (match_operand:SI 5 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 12)))
++	  (match_operand:SI 6 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 16)))
++	  (match_operand:SI 7 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 20)))
++	  (match_operand:SI 8 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 24)))
++	  (match_operand:SI 9 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 28)))
++	  (match_operand:SI 10 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 32)))
++	  (match_operand:SI 11 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 36)))
++	  (match_operand:SI 12 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 40)))
++	  (match_operand:SI 13 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 44)))
++	  (match_operand:SI 14 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 48)))
++	  (match_operand:SI 15 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 52)))
++	  (match_operand:SI 16 "register_operand" ""))])]
++  "(XVECLEN (operands[0], 0) == 15)"
++  "smw.bim\t%3, [%1], %16, 0x0"
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"              "14")
++   (set_attr "length"              "4")]
++)
++
++(define_insn "*stm_bim_si13"
++  [(match_parallel 0 "nds32_store_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 52)))
++     (set (mem:SI (match_dup 2))
++	  (match_operand:SI 3 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 4)))
++	  (match_operand:SI 4 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 8)))
++	  (match_operand:SI 5 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 12)))
++	  (match_operand:SI 6 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 16)))
++	  (match_operand:SI 7 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 20)))
++	  (match_operand:SI 8 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 24)))
++	  (match_operand:SI 9 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 28)))
++	  (match_operand:SI 10 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 32)))
++	  (match_operand:SI 11 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 36)))
++	  (match_operand:SI 12 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 40)))
++	  (match_operand:SI 13 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 44)))
++	  (match_operand:SI 14 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 48)))
++	  (match_operand:SI 15 "register_operand" ""))])]
++  "(XVECLEN (operands[0], 0) == 14)"
++  "smw.bim\t%3, [%1], %15, 0x0"
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"              "13")
++   (set_attr "length"              "4")]
++)
++
++(define_insn "*stm_bim_si12"
++  [(match_parallel 0 "nds32_store_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 48)))
++     (set (mem:SI (match_dup 2))
++	  (match_operand:SI 3 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 4)))
++	  (match_operand:SI 4 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 8)))
++	  (match_operand:SI 5 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 12)))
++	  (match_operand:SI 6 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 16)))
++	  (match_operand:SI 7 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 20)))
++	  (match_operand:SI 8 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 24)))
++	  (match_operand:SI 9 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 28)))
++	  (match_operand:SI 10 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 32)))
++	  (match_operand:SI 11 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 36)))
++	  (match_operand:SI 12 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 40)))
++	  (match_operand:SI 13 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 44)))
++	  (match_operand:SI 14 "register_operand" ""))])]
++  "(XVECLEN (operands[0], 0) == 13)"
++  "smw.bim\t%3, [%1], %14, 0x0"
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"              "12")
++   (set_attr "length"              "4")]
++)
++
++(define_insn "*stm_bim_si11"
++  [(match_parallel 0 "nds32_store_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 44)))
++     (set (mem:SI (match_dup 2))
++	  (match_operand:SI 3 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 4)))
++	  (match_operand:SI 4 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 8)))
++	  (match_operand:SI 5 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 12)))
++	  (match_operand:SI 6 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 16)))
++	  (match_operand:SI 7 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 20)))
++	  (match_operand:SI 8 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 24)))
++	  (match_operand:SI 9 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 28)))
++	  (match_operand:SI 10 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 32)))
++	  (match_operand:SI 11 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 36)))
++	  (match_operand:SI 12 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 40)))
++	  (match_operand:SI 13 "register_operand" ""))])]
++  "(XVECLEN (operands[0], 0) == 12)"
++  "smw.bim\t%3, [%1], %13, 0x0"
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"              "11")
++   (set_attr "length"              "4")]
++)
++
++(define_insn "*stm_bim_si10"
++  [(match_parallel 0 "nds32_store_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 40)))
++     (set (mem:SI (match_dup 2))
++	  (match_operand:SI 3 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 4)))
++	  (match_operand:SI 4 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 8)))
++	  (match_operand:SI 5 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 12)))
++	  (match_operand:SI 6 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 16)))
++	  (match_operand:SI 7 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 20)))
++	  (match_operand:SI 8 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 24)))
++	  (match_operand:SI 9 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 28)))
++	  (match_operand:SI 10 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 32)))
++	  (match_operand:SI 11 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 36)))
++	  (match_operand:SI 12 "register_operand" ""))])]
++  "(XVECLEN (operands[0], 0) == 11)"
++  "smw.bim\t%3, [%1], %12, 0x0"
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"              "10")
++   (set_attr "length"              "4")]
++)
++
++(define_insn "*stm_bim_si9"
++  [(match_parallel 0 "nds32_store_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 36)))
++     (set (mem:SI (match_dup 2))
++	  (match_operand:SI 3 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 4)))
++	  (match_operand:SI 4 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 8)))
++	  (match_operand:SI 5 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 12)))
++	  (match_operand:SI 6 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 16)))
++	  (match_operand:SI 7 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 20)))
++	  (match_operand:SI 8 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 24)))
++	  (match_operand:SI 9 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 28)))
++	  (match_operand:SI 10 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 32)))
++	  (match_operand:SI 11 "register_operand" ""))])]
++  "(XVECLEN (operands[0], 0) == 10)"
++  "smw.bim\t%3, [%1], %11, 0x0"
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"               "9")
++   (set_attr "length"              "4")]
++)
++
++
++(define_insn "*stm_bim_si8"
++  [(match_parallel 0 "nds32_store_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 32)))
++     (set (mem:SI (match_dup 2))
++	  (match_operand:SI 3 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 4)))
++	  (match_operand:SI 4 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 8)))
++	  (match_operand:SI 5 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 12)))
++	  (match_operand:SI 6 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 16)))
++	  (match_operand:SI 7 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 20)))
++	  (match_operand:SI 8 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 24)))
++	  (match_operand:SI 9 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 28)))
++	  (match_operand:SI 10 "register_operand" ""))])]
++  "(XVECLEN (operands[0], 0) == 9)"
++  "smw.bim\t%3, [%1], %10, 0x0"
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"               "8")
++   (set_attr "length"              "4")]
++)
++
++(define_insn "*stm_bim_si7"
++  [(match_parallel 0 "nds32_store_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 28)))
++     (set (mem:SI (match_dup 2))
++	  (match_operand:SI 3 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 4)))
++	  (match_operand:SI 4 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 8)))
++	  (match_operand:SI 5 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 12)))
++	  (match_operand:SI 6 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 16)))
++	  (match_operand:SI 7 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 20)))
++	  (match_operand:SI 8 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 24)))
++	  (match_operand:SI 9 "register_operand" ""))])]
++  "(XVECLEN (operands[0], 0) == 8)"
++  "smw.bim\t%3, [%1], %9, 0x0"
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"               "7")
++   (set_attr "length"              "4")]
++)
++
++(define_insn "*stm_bim_si6"
++  [(match_parallel 0 "nds32_store_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 24)))
++     (set (mem:SI (match_dup 2))
++	  (match_operand:SI 3 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 4)))
++	  (match_operand:SI 4 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 8)))
++	  (match_operand:SI 5 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 12)))
++	  (match_operand:SI 6 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 16)))
++	  (match_operand:SI 7 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 20)))
++	  (match_operand:SI 8 "register_operand" ""))])]
++  "(XVECLEN (operands[0], 0) == 7)"
++  "smw.bim\t%3, [%1], %8, 0x0"
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"               "6")
++   (set_attr "length"              "4")]
++)
++
++(define_insn "*stm_bim_si5"
++  [(match_parallel 0 "nds32_store_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 20)))
++     (set (mem:SI (match_dup 2))
++	  (match_operand:SI 3 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 4)))
++	  (match_operand:SI 4 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 8)))
++	  (match_operand:SI 5 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 12)))
++	  (match_operand:SI 6 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 16)))
++	  (match_operand:SI 7 "register_operand" ""))])]
++  "(XVECLEN (operands[0], 0) == 6)"
++  "smw.bim\t%3, [%1], %7, 0x0"
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"               "5")
++   (set_attr "length"              "4")]
++)
++
++(define_insn "*stm_bim_si4"
++  [(match_parallel 0 "nds32_store_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 16)))
++     (set (mem:SI (match_dup 2))
++	  (match_operand:SI 3 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 4)))
++	  (match_operand:SI 4 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 8)))
++	  (match_operand:SI 5 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 12)))
++	  (match_operand:SI 6 "register_operand" ""))])]
++  "(XVECLEN (operands[0], 0) == 5)"
++  "smw.bim\t%3, [%1], %6, 0x0"
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"               "4")
++   (set_attr "length"              "4")]
++)
++
++(define_insn "*stm_bim_si3"
++  [(match_parallel 0 "nds32_store_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 12)))
++     (set (mem:SI (match_dup 2))
++	  (match_operand:SI 3 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 4)))
++	  (match_operand:SI 4 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 8)))
++	  (match_operand:SI 5 "register_operand" ""))])]
++  "(XVECLEN (operands[0], 0) == 4)"
++  "smw.bim\t%3, [%1], %5, 0x0"
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"               "3")
++   (set_attr "length"              "4")]
++)
++
++(define_insn "*stm_bim_si2"
++  [(match_parallel 0 "nds32_store_multiple_and_update_address_operation"
++    [(set (match_operand:SI 1 "register_operand" "=r")
++	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 8)))
++     (set (mem:SI (match_dup 2))
++	  (match_operand:SI 3 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 2) (const_int 4)))
++	  (match_operand:SI 4 "register_operand" ""))])]
++  "(XVECLEN (operands[0], 0) == 3)"
++  "smw.bim\t%3, [%1], %4, 0x0"
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"               "2")
++   (set_attr "length"              "4")]
++)
++
++(define_expand "unaligned_store_update_base_w"
++  [(parallel [(set (match_operand:SI 0 "register_operand" "=r")
++		   (plus:SI (match_operand:SI 1 "register_operand" "0") (const_int 4)))
++	      (set (mem:SI (match_dup 1))
++		   (unspec:SI [(match_operand:SI 2 "register_operand" "r")] UNSPEC_UASTORE_W))])]
++  ""
++{
++  /* DO NOT emit unaligned_store_w_m immediately since web pass don't
++     recognize post_inc, try it again after GCC 5.0.
++     REF: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63156  */
++  emit_insn (gen_unaligned_store_w (gen_rtx_MEM (SImode, operands[1]), operands[2]));
++  emit_insn (gen_addsi3 (operands[0], operands[1], gen_int_mode (4, Pmode)));
++  DONE;
++}
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"               "1")
++   (set_attr "length"              "4")]
++)
++
++(define_expand "unaligned_store_update_base_dw"
++  [(parallel [(set (match_operand:SI 0 "register_operand" "=r")
++		   (plus:SI (match_operand:SI 1 "register_operand" "0") (const_int 8)))
++	      (set (mem:DI (match_dup 1))
++		   (unspec:DI [(match_operand:DI 2 "register_operand" "r")] UNSPEC_UASTORE_DW))])]
++  ""
++{
++  /* DO NOT emit unaligned_store_w_m immediately since web pass don't
++     recognize post_inc, try it again after GCC 5.0.
++     REF: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63156  */
++  emit_insn (gen_unaligned_store_dw (gen_rtx_MEM (DImode, operands[1]), operands[2]));
++  emit_insn (gen_addsi3 (operands[0], operands[1], gen_int_mode (8, Pmode)));
++  DONE;
++}
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"               "2")
++   (set_attr "length"              "4")]
++)
++
++(define_insn "*stmsi25"
++  [(match_parallel 0 "nds32_store_multiple_operation"
++    [(set (mem:SI (match_operand:SI 1 "register_operand" "r"))
++	  (match_operand:SI 2 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
++	  (match_operand:SI 3 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
++	  (match_operand:SI 4 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 12)))
++	  (match_operand:SI 5 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 16)))
++	  (match_operand:SI 6 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 20)))
++	  (match_operand:SI 7 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 24)))
++	  (match_operand:SI 8 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 28)))
++	  (match_operand:SI 9 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 32)))
++	  (match_operand:SI 10 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 36)))
++	  (match_operand:SI 11 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 40)))
++	  (match_operand:SI 12 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 44)))
++	  (match_operand:SI 13 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 48)))
++	  (match_operand:SI 14 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 52)))
++	  (match_operand:SI 15 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 56)))
++	  (match_operand:SI 16 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 60)))
++	  (match_operand:SI 17 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 64)))
++	  (match_operand:SI 18 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 68)))
++	  (match_operand:SI 19 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 72)))
++	  (match_operand:SI 20 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 76)))
++	  (match_operand:SI 21 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 80)))
++	  (match_operand:SI 22 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 84)))
++	  (match_operand:SI 23 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 88)))
++	  (match_operand:SI 24 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 92)))
++	  (match_operand:SI 25 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 96)))
++	  (match_operand:SI 26 "register_operand" ""))])]
++  "(XVECLEN (operands[0], 0) == 25)"
++  "smw.bi\t%2, [%1], %26, 0x0"
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"              "25")
++   (set_attr "length"              "4")]
++)
++
++(define_insn "*stmsi24"
++  [(match_parallel 0 "nds32_store_multiple_operation"
++    [(set (mem:SI (match_operand:SI 1 "register_operand" "r"))
++	  (match_operand:SI 2 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
++	  (match_operand:SI 3 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
++	  (match_operand:SI 4 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 12)))
++	  (match_operand:SI 5 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 16)))
++	  (match_operand:SI 6 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 20)))
++	  (match_operand:SI 7 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 24)))
++	  (match_operand:SI 8 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 28)))
++	  (match_operand:SI 9 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 32)))
++	  (match_operand:SI 10 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 36)))
++	  (match_operand:SI 11 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 40)))
++	  (match_operand:SI 12 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 44)))
++	  (match_operand:SI 13 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 48)))
++	  (match_operand:SI 14 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 52)))
++	  (match_operand:SI 15 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 56)))
++	  (match_operand:SI 16 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 60)))
++	  (match_operand:SI 17 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 64)))
++	  (match_operand:SI 18 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 68)))
++	  (match_operand:SI 19 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 72)))
++	  (match_operand:SI 20 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 76)))
++	  (match_operand:SI 21 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 80)))
++	  (match_operand:SI 22 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 84)))
++	  (match_operand:SI 23 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 88)))
++	  (match_operand:SI 24 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 92)))
++	  (match_operand:SI 25 "register_operand" ""))
++])]
++  "(XVECLEN (operands[0], 0) == 24)"
++  "smw.bi\t%2, [%1], %25, 0x0"
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"              "24")
++   (set_attr "length"              "4")]
++)
++
++(define_insn "*stmsi23"
++  [(match_parallel 0 "nds32_store_multiple_operation"
++    [(set (mem:SI (match_operand:SI 1 "register_operand" "r"))
++	  (match_operand:SI 2 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
++	  (match_operand:SI 3 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
++	  (match_operand:SI 4 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 12)))
++	  (match_operand:SI 5 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 16)))
++	  (match_operand:SI 6 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 20)))
++	  (match_operand:SI 7 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 24)))
++	  (match_operand:SI 8 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 28)))
++	  (match_operand:SI 9 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 32)))
++	  (match_operand:SI 10 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 36)))
++	  (match_operand:SI 11 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 40)))
++	  (match_operand:SI 12 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 44)))
++	  (match_operand:SI 13 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 48)))
++	  (match_operand:SI 14 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 52)))
++	  (match_operand:SI 15 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 56)))
++	  (match_operand:SI 16 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 60)))
++	  (match_operand:SI 17 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 64)))
++	  (match_operand:SI 18 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 68)))
++	  (match_operand:SI 19 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 72)))
++	  (match_operand:SI 20 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 76)))
++	  (match_operand:SI 21 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 80)))
++	  (match_operand:SI 22 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 84)))
++	  (match_operand:SI 23 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 88)))
++	  (match_operand:SI 24 "register_operand" ""))
++])]
++  "(XVECLEN (operands[0], 0) == 23)"
++  "smw.bi\t%2, [%1], %24, 0x0"
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"              "23")
++   (set_attr "length"              "4")]
++)
++
++(define_insn "*stmsi22"
++  [(match_parallel 0 "nds32_store_multiple_operation"
++    [(set (mem:SI (match_operand:SI 1 "register_operand" "r"))
++	  (match_operand:SI 2 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
++	  (match_operand:SI 3 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
++	  (match_operand:SI 4 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 12)))
++	  (match_operand:SI 5 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 16)))
++	  (match_operand:SI 6 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 20)))
++	  (match_operand:SI 7 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 24)))
++	  (match_operand:SI 8 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 28)))
++	  (match_operand:SI 9 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 32)))
++	  (match_operand:SI 10 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 36)))
++	  (match_operand:SI 11 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 40)))
++	  (match_operand:SI 12 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 44)))
++	  (match_operand:SI 13 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 48)))
++	  (match_operand:SI 14 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 52)))
++	  (match_operand:SI 15 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 56)))
++	  (match_operand:SI 16 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 60)))
++	  (match_operand:SI 17 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 64)))
++	  (match_operand:SI 18 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 68)))
++	  (match_operand:SI 19 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 72)))
++	  (match_operand:SI 20 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 76)))
++	  (match_operand:SI 21 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 80)))
++	  (match_operand:SI 22 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 84)))
++	  (match_operand:SI 23 "register_operand" ""))
++])]
++  "(XVECLEN (operands[0], 0) == 22)"
++  "smw.bi\t%2, [%1], %23, 0x0"
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"              "22")
++   (set_attr "length"              "4")]
++)
++
++(define_insn "*stmsi21"
++  [(match_parallel 0 "nds32_store_multiple_operation"
++    [(set (mem:SI (match_operand:SI 1 "register_operand" "r"))
++	  (match_operand:SI 2 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
++	  (match_operand:SI 3 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
++	  (match_operand:SI 4 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 12)))
++	  (match_operand:SI 5 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 16)))
++	  (match_operand:SI 6 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 20)))
++	  (match_operand:SI 7 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 24)))
++	  (match_operand:SI 8 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 28)))
++	  (match_operand:SI 9 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 32)))
++	  (match_operand:SI 10 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 36)))
++	  (match_operand:SI 11 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 40)))
++	  (match_operand:SI 12 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 44)))
++	  (match_operand:SI 13 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 48)))
++	  (match_operand:SI 14 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 52)))
++	  (match_operand:SI 15 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 56)))
++	  (match_operand:SI 16 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 60)))
++	  (match_operand:SI 17 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 64)))
++	  (match_operand:SI 18 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 68)))
++	  (match_operand:SI 19 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 72)))
++	  (match_operand:SI 20 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 76)))
++	  (match_operand:SI 21 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 80)))
++	  (match_operand:SI 22 "register_operand" ""))
++])]
++  "(XVECLEN (operands[0], 0) == 21)"
++  "smw.bi\t%2, [%1], %22, 0x0"
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"              "21")
++   (set_attr "length"              "4")]
++)
++
++(define_insn "*stmsi20"
++  [(match_parallel 0 "nds32_store_multiple_operation"
++    [(set (mem:SI (match_operand:SI 1 "register_operand" "r"))
++	  (match_operand:SI 2 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
++	  (match_operand:SI 3 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
++	  (match_operand:SI 4 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 12)))
++	  (match_operand:SI 5 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 16)))
++	  (match_operand:SI 6 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 20)))
++	  (match_operand:SI 7 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 24)))
++	  (match_operand:SI 8 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 28)))
++	  (match_operand:SI 9 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 32)))
++	  (match_operand:SI 10 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 36)))
++	  (match_operand:SI 11 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 40)))
++	  (match_operand:SI 12 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 44)))
++	  (match_operand:SI 13 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 48)))
++	  (match_operand:SI 14 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 52)))
++	  (match_operand:SI 15 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 56)))
++	  (match_operand:SI 16 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 60)))
++	  (match_operand:SI 17 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 64)))
++	  (match_operand:SI 18 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 68)))
++	  (match_operand:SI 19 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 72)))
++	  (match_operand:SI 20 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 76)))
++	  (match_operand:SI 21 "register_operand" ""))])]
++  "(XVECLEN (operands[0], 0) == 20)"
++  "smw.bi\t%2, [%1], %21, 0x0"
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"              "20")
++   (set_attr "length"              "4")]
++)
++
++(define_insn "*stmsi19"
++  [(match_parallel 0 "nds32_store_multiple_operation"
++    [(set (mem:SI (match_operand:SI 1 "register_operand" "r"))
++	  (match_operand:SI 2 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
++	  (match_operand:SI 3 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
++	  (match_operand:SI 4 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 12)))
++	  (match_operand:SI 5 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 16)))
++	  (match_operand:SI 6 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 20)))
++	  (match_operand:SI 7 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 24)))
++	  (match_operand:SI 8 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 28)))
++	  (match_operand:SI 9 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 32)))
++	  (match_operand:SI 10 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 36)))
++	  (match_operand:SI 11 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 40)))
++	  (match_operand:SI 12 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 44)))
++	  (match_operand:SI 13 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 48)))
++	  (match_operand:SI 14 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 52)))
++	  (match_operand:SI 15 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 56)))
++	  (match_operand:SI 16 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 60)))
++	  (match_operand:SI 17 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 64)))
++	  (match_operand:SI 18 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 68)))
++	  (match_operand:SI 19 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 72)))
++	  (match_operand:SI 20 "register_operand" ""))])]
++  "(XVECLEN (operands[0], 0) == 19)"
++  "smw.bi\t%2, [%1], %20, 0x0"
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"              "19")
++   (set_attr "length"              "4")]
++)
++
++(define_insn "*stmsi18"
++  [(match_parallel 0 "nds32_store_multiple_operation"
++    [(set (mem:SI (match_operand:SI 1 "register_operand" "r"))
++	  (match_operand:SI 2 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
++	  (match_operand:SI 3 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
++	  (match_operand:SI 4 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 12)))
++	  (match_operand:SI 5 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 16)))
++	  (match_operand:SI 6 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 20)))
++	  (match_operand:SI 7 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 24)))
++	  (match_operand:SI 8 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 28)))
++	  (match_operand:SI 9 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 32)))
++	  (match_operand:SI 10 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 36)))
++	  (match_operand:SI 11 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 40)))
++	  (match_operand:SI 12 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 44)))
++	  (match_operand:SI 13 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 48)))
++	  (match_operand:SI 14 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 52)))
++	  (match_operand:SI 15 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 56)))
++	  (match_operand:SI 16 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 60)))
++	  (match_operand:SI 17 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 64)))
++	  (match_operand:SI 18 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 68)))
++	  (match_operand:SI 19 "register_operand" ""))])]
++  "(XVECLEN (operands[0], 0) == 18)"
++  "smw.bi\t%2, [%1], %19, 0x0"
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"              "18")
++   (set_attr "length"              "4")]
+ )
+ 
+-(define_insn "*lmwsi4"
+-  [(match_parallel 0 "nds32_load_multiple_operation"
+-    [(set (match_operand:SI 2 "register_operand" "")
+-	  (mem:SI (match_operand:SI 1 "register_operand" "r")))
+-     (set (match_operand:SI 3 "register_operand" "")
+-	  (mem:SI (plus:SI (match_dup 1) (const_int 4))))
+-     (set (match_operand:SI 4 "register_operand" "")
+-	  (mem:SI (plus:SI (match_dup 1) (const_int 8))))
+-     (set (match_operand:SI 5 "register_operand" "")
+-	  (mem:SI (plus:SI (match_dup 1) (const_int 12))))])]
+-  "(XVECLEN (operands[0], 0) == 4)"
+-  "lmw.bi\t%2, [%1], %5, 0x0"
+-  [(set_attr "type"   "load")
+-   (set_attr "length"    "4")]
++(define_insn "*stmsi17"
++  [(match_parallel 0 "nds32_store_multiple_operation"
++    [(set (mem:SI (match_operand:SI 1 "register_operand" "r"))
++	  (match_operand:SI 2 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
++	  (match_operand:SI 3 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
++	  (match_operand:SI 4 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 12)))
++	  (match_operand:SI 5 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 16)))
++	  (match_operand:SI 6 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 20)))
++	  (match_operand:SI 7 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 24)))
++	  (match_operand:SI 8 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 28)))
++	  (match_operand:SI 9 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 32)))
++	  (match_operand:SI 10 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 36)))
++	  (match_operand:SI 11 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 40)))
++	  (match_operand:SI 12 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 44)))
++	  (match_operand:SI 13 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 48)))
++	  (match_operand:SI 14 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 52)))
++	  (match_operand:SI 15 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 56)))
++	  (match_operand:SI 16 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 60)))
++	  (match_operand:SI 17 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 64)))
++	  (match_operand:SI 18 "register_operand" ""))])]
++  "(XVECLEN (operands[0], 0) == 17)"
++  "smw.bi\t%2, [%1], %18, 0x0"
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"              "17")
++   (set_attr "length"              "4")]
+ )
+ 
+-(define_insn "*lmwsi3"
+-  [(match_parallel 0 "nds32_load_multiple_operation"
+-    [(set (match_operand:SI 2 "register_operand" "")
+-	  (mem:SI (match_operand:SI 1 "register_operand" "r")))
+-     (set (match_operand:SI 3 "register_operand" "")
+-	  (mem:SI (plus:SI (match_dup 1) (const_int 4))))
+-     (set (match_operand:SI 4 "register_operand" "")
+-	  (mem:SI (plus:SI (match_dup 1) (const_int 8))))])]
+-  "(XVECLEN (operands[0], 0) == 3)"
+-  "lmw.bi\t%2, [%1], %4, 0x0"
+-  [(set_attr "type"   "load")
+-   (set_attr "length"    "4")]
++(define_insn "*stmsi16"
++  [(match_parallel 0 "nds32_store_multiple_operation"
++    [(set (mem:SI (match_operand:SI 1 "register_operand" "r"))
++	  (match_operand:SI 2 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
++	  (match_operand:SI 3 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
++	  (match_operand:SI 4 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 12)))
++	  (match_operand:SI 5 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 16)))
++	  (match_operand:SI 6 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 20)))
++	  (match_operand:SI 7 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 24)))
++	  (match_operand:SI 8 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 28)))
++	  (match_operand:SI 9 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 32)))
++	  (match_operand:SI 10 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 36)))
++	  (match_operand:SI 11 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 40)))
++	  (match_operand:SI 12 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 44)))
++	  (match_operand:SI 13 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 48)))
++	  (match_operand:SI 14 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 52)))
++	  (match_operand:SI 15 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 56)))
++	  (match_operand:SI 16 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 60)))
++	  (match_operand:SI 17 "register_operand" ""))])]
++  "(XVECLEN (operands[0], 0) == 16)"
++  "smw.bi\t%2, [%1], %17, 0x0"
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"              "16")
++   (set_attr "length"              "4")]
+ )
+ 
+-(define_insn "*lmwsi2"
+-  [(match_parallel 0 "nds32_load_multiple_operation"
+-    [(set (match_operand:SI 2 "register_operand" "")
+-	  (mem:SI (match_operand:SI 1 "register_operand" "r")))
+-     (set (match_operand:SI 3 "register_operand" "")
+-	  (mem:SI (plus:SI (match_dup 1) (const_int 4))))])]
+-  "(XVECLEN (operands[0], 0) == 2)"
+-  "lmw.bi\t%2, [%1], %3, 0x0"
+-  [(set_attr "type"   "load")
+-   (set_attr "length"    "4")]
++(define_insn "*stmsi15"
++  [(match_parallel 0 "nds32_store_multiple_operation"
++    [(set (mem:SI (match_operand:SI 1 "register_operand" "r"))
++	  (match_operand:SI 2 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
++	  (match_operand:SI 3 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
++	  (match_operand:SI 4 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 12)))
++	  (match_operand:SI 5 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 16)))
++	  (match_operand:SI 6 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 20)))
++	  (match_operand:SI 7 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 24)))
++	  (match_operand:SI 8 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 28)))
++	  (match_operand:SI 9 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 32)))
++	  (match_operand:SI 10 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 36)))
++	  (match_operand:SI 11 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 40)))
++	  (match_operand:SI 12 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 44)))
++	  (match_operand:SI 13 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 48)))
++	  (match_operand:SI 14 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 52)))
++	  (match_operand:SI 15 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 56)))
++	  (match_operand:SI 16 "register_operand" ""))])]
++  "(XVECLEN (operands[0], 0) == 15)"
++  "smw.bi\t%2, [%1], %16, 0x0"
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"              "15")
++   (set_attr "length"              "4")]
+ )
+ 
++(define_insn "*stmsi14"
++  [(match_parallel 0 "nds32_store_multiple_operation"
++    [(set (mem:SI (match_operand:SI 1 "register_operand" "r"))
++	  (match_operand:SI 2 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
++	  (match_operand:SI 3 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
++	  (match_operand:SI 4 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 12)))
++	  (match_operand:SI 5 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 16)))
++	  (match_operand:SI 6 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 20)))
++	  (match_operand:SI 7 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 24)))
++	  (match_operand:SI 8 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 28)))
++	  (match_operand:SI 9 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 32)))
++	  (match_operand:SI 10 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 36)))
++	  (match_operand:SI 11 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 40)))
++	  (match_operand:SI 12 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 44)))
++	  (match_operand:SI 13 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 48)))
++	  (match_operand:SI 14 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 52)))
++	  (match_operand:SI 15 "register_operand" ""))])]
++  "(XVECLEN (operands[0], 0) == 14)"
++  "smw.bi\t%2, [%1], %15, 0x0"
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"              "14")
++   (set_attr "length"              "4")]
++)
+ 
+-;; Store Multiple Insns.
+-;;
+-;; operands[0] is the first memory location.
+-;; opernads[1] is the first of the consecutive registers.
+-;; operands[2] is the number of consecutive registers.
+-
+-(define_expand "store_multiple"
+-  [(match_par_dup 3 [(set (match_operand:SI 0 "" "")
+-			  (match_operand:SI 1 "" ""))
+-		     (use (match_operand:SI 2 "" ""))])]
+-  ""
+-{
+-  int maximum;
++(define_insn "*stmsi13"
++  [(match_parallel 0 "nds32_store_multiple_operation"
++    [(set (mem:SI (match_operand:SI 1 "register_operand" "r"))
++	  (match_operand:SI 2 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
++	  (match_operand:SI 3 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
++	  (match_operand:SI 4 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 12)))
++	  (match_operand:SI 5 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 16)))
++	  (match_operand:SI 6 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 20)))
++	  (match_operand:SI 7 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 24)))
++	  (match_operand:SI 8 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 28)))
++	  (match_operand:SI 9 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 32)))
++	  (match_operand:SI 10 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 36)))
++	  (match_operand:SI 11 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 40)))
++	  (match_operand:SI 12 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 44)))
++	  (match_operand:SI 13 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 48)))
++	  (match_operand:SI 14 "register_operand" ""))])]
++  "(XVECLEN (operands[0], 0) == 13)"
++  "smw.bi\t%2, [%1], %14, 0x0"
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"              "13")
++   (set_attr "length"              "4")]
++)
+ 
+-  /* Because reduced-set regsiters has few registers
+-     (r0~r5, r6~10, r15, r28~r31, where 'r15' and 'r28~r31' cannot
+-     be used for register allocation),
+-     using 8 registers for store_multiple may easily consume all of them.
+-     It makes register allocation/spilling hard to work.
+-     So we only allow maximum=4 registers for store_multiple
+-     under reduced-set registers.  */
+-  if (TARGET_REDUCED_REGS)
+-    maximum = 4;
+-  else
+-    maximum = 8;
++(define_insn "*stmsi12"
++  [(match_parallel 0 "nds32_store_multiple_operation"
++    [(set (mem:SI (match_operand:SI 1 "register_operand" "r"))
++	  (match_operand:SI 2 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
++	  (match_operand:SI 3 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
++	  (match_operand:SI 4 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 12)))
++	  (match_operand:SI 5 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 16)))
++	  (match_operand:SI 6 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 20)))
++	  (match_operand:SI 7 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 24)))
++	  (match_operand:SI 8 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 28)))
++	  (match_operand:SI 9 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 32)))
++	  (match_operand:SI 10 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 36)))
++	  (match_operand:SI 11 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 40)))
++	  (match_operand:SI 12 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 44)))
++	  (match_operand:SI 13 "register_operand" ""))])]
++  "(XVECLEN (operands[0], 0) == 12)"
++  "smw.bi\t%2, [%1], %13, 0x0"
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"              "12")
++   (set_attr "length"              "4")]
++)
+ 
+-  /* Here are the conditions that must be all passed,
+-     otherwise we have to FAIL this rtx generation:
+-       1. The number of consecutive registers must be integer.
+-       2. Maximum 4 or 8 registers for smw.bi instruction
+-          (based on this nds32-multiple.md design).
+-       3. Minimum 2 registers for smw.bi instruction
+-          (based on this nds32-multiple.md design).
+-       4. operands[0] must be memory for sure.
+-       5. operands[1] must be register for sure.
+-       6. Do not cross $r15 register because it is not allocatable.  */
+-  if (GET_CODE (operands[2]) != CONST_INT
+-      || INTVAL (operands[2]) > maximum
+-      || INTVAL (operands[2]) < 2
+-      || GET_CODE (operands[0]) != MEM
+-      || GET_CODE (operands[1]) != REG
+-      || REGNO (operands[1]) + INTVAL (operands[2]) > TA_REGNUM)
+-    FAIL;
++(define_insn "*stmsi11"
++  [(match_parallel 0 "nds32_store_multiple_operation"
++    [(set (mem:SI (match_operand:SI 1 "register_operand" "r"))
++	  (match_operand:SI 2 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
++	  (match_operand:SI 3 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
++	  (match_operand:SI 4 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 12)))
++	  (match_operand:SI 5 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 16)))
++	  (match_operand:SI 6 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 20)))
++	  (match_operand:SI 7 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 24)))
++	  (match_operand:SI 8 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 28)))
++	  (match_operand:SI 9 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 32)))
++	  (match_operand:SI 10 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 36)))
++	  (match_operand:SI 11 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 40)))
++	  (match_operand:SI 12 "register_operand" ""))])]
++  "(XVECLEN (operands[0], 0) == 11)"
++  "smw.bi\t%2, [%1], %12, 0x0"
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"              "11")
++   (set_attr "length"              "4")]
++)
+ 
+-  /* For (mem addr), we force_reg on addr here,
+-     so that nds32_expand_store_multiple can easily use it.  */
+-  operands[3] = nds32_expand_store_multiple (REGNO (operands[1]),
+-					     INTVAL (operands[2]),
+-					     force_reg (SImode,
+-							XEXP (operands[0], 0)),
+-					     operands[0]);
+-})
++(define_insn "*stmsi10"
++  [(match_parallel 0 "nds32_store_multiple_operation"
++    [(set (mem:SI (match_operand:SI 1 "register_operand" "r"))
++	  (match_operand:SI 2 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
++	  (match_operand:SI 3 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
++	  (match_operand:SI 4 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 12)))
++	  (match_operand:SI 5 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 16)))
++	  (match_operand:SI 6 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 20)))
++	  (match_operand:SI 7 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 24)))
++	  (match_operand:SI 8 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 28)))
++	  (match_operand:SI 9 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 32)))
++	  (match_operand:SI 10 "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 36)))
++	  (match_operand:SI 11 "register_operand" ""))])]
++  "(XVECLEN (operands[0], 0) == 10)"
++  "smw.bi\t%2, [%1], %11, 0x0"
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"              "10")
++   (set_attr "length"              "4")]
++)
+ 
+-;; Ordinary Store Multiple.
++(define_insn "*stmsi9"
++  [(match_parallel 0 "nds32_store_multiple_operation"
++    [(set (mem:SI (match_operand:SI 1 "register_operand" "r"))
++	  (match_operand:SI 2  "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
++	  (match_operand:SI 3  "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
++	  (match_operand:SI 4  "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 12)))
++	  (match_operand:SI 5  "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 16)))
++	  (match_operand:SI 6  "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 20)))
++	  (match_operand:SI 7  "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 24)))
++	  (match_operand:SI 8  "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 28)))
++	  (match_operand:SI 9  "register_operand" ""))
++     (set (mem:SI (plus:SI (match_dup 1) (const_int 32)))
++	  (match_operand:SI 10 "register_operand" ""))])]
++  "(XVECLEN (operands[0], 0) == 9)"
++  "smw.bi\t%2, [%1], %10, 0x0"
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"               "9")
++   (set_attr "length"              "4")]
++)
+ 
+ (define_insn "*stmsi8"
+   [(match_parallel 0 "nds32_store_multiple_operation"
+@@ -276,8 +3634,9 @@
+ 	  (match_operand:SI 9 "register_operand" ""))])]
+   "(XVECLEN (operands[0], 0) == 8)"
+   "smw.bi\t%2, [%1], %9, 0x0"
+-  [(set_attr "type"   "store")
+-   (set_attr "length"     "4")]
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"               "8")
++   (set_attr "length"              "4")]
+ )
+ 
+ (define_insn "*stmsi7"
+@@ -298,8 +3657,9 @@
+ 	  (match_operand:SI 8 "register_operand" ""))])]
+   "(XVECLEN (operands[0], 0) == 7)"
+   "smw.bi\t%2, [%1], %8, 0x0"
+-  [(set_attr "type"   "store")
+-   (set_attr "length"     "4")]
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"               "7")
++   (set_attr "length"              "4")]
+ )
+ 
+ (define_insn "*stmsi6"
+@@ -318,8 +3678,9 @@
+ 	  (match_operand:SI 7 "register_operand" ""))])]
+   "(XVECLEN (operands[0], 0) == 6)"
+   "smw.bi\t%2, [%1], %7, 0x0"
+-  [(set_attr "type"   "store")
+-   (set_attr "length"     "4")]
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"               "6")
++   (set_attr "length"              "4")]
+ )
+ 
+ (define_insn "*stmsi5"
+@@ -336,8 +3697,9 @@
+ 	  (match_operand:SI 6 "register_operand" ""))])]
+   "(XVECLEN (operands[0], 0) == 5)"
+   "smw.bi\t%2, [%1], %6, 0x0"
+-  [(set_attr "type"   "store")
+-   (set_attr "length"     "4")]
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"               "5")
++   (set_attr "length"              "4")]
+ )
+ 
+ (define_insn "*stmsi4"
+@@ -352,8 +3714,9 @@
+ 	  (match_operand:SI 5 "register_operand" ""))])]
+   "(XVECLEN (operands[0], 0) == 4)"
+   "smw.bi\t%2, [%1], %5, 0x0"
+-  [(set_attr "type"   "store")
+-   (set_attr "length"     "4")]
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"               "4")
++   (set_attr "length"              "4")]
+ )
+ 
+ (define_insn "*stmsi3"
+@@ -366,8 +3729,9 @@
+ 	  (match_operand:SI 4 "register_operand" ""))])]
+   "(XVECLEN (operands[0], 0) == 3)"
+   "smw.bi\t%2, [%1], %4, 0x0"
+-  [(set_attr "type"   "store")
+-   (set_attr "length"     "4")]
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"               "3")
++   (set_attr "length"              "4")]
+ )
+ 
+ (define_insn "*stmsi2"
+@@ -378,8 +3742,9 @@
+ 	  (match_operand:SI 3 "register_operand" ""))])]
+   "(XVECLEN (operands[0], 0) == 2)"
+   "smw.bi\t%2, [%1], %3, 0x0"
+-  [(set_attr "type"   "store")
+-   (set_attr "length"     "4")]
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"               "2")
++   (set_attr "length"              "4")]
+ )
+ 
+ ;; Move a block of memory if it is word aligned and MORE than 2 words long.
+@@ -391,14 +3756,14 @@
+ ;; operands[2] is the number of bytes to move.
+ ;; operands[3] is the known shared alignment.
+ 
+-(define_expand "movmemqi"
++(define_expand "movmemsi"
+   [(match_operand:BLK 0 "general_operand" "")
+    (match_operand:BLK 1 "general_operand" "")
+-   (match_operand:SI 2 "const_int_operand" "")
++   (match_operand:SI 2 "nds32_reg_constant_operand" "")
+    (match_operand:SI 3 "const_int_operand" "")]
+   ""
+ {
+-  if (nds32_expand_movmemqi (operands[0],
++  if (nds32_expand_movmemsi (operands[0],
+ 			     operands[1],
+ 			     operands[2],
+ 			     operands[3]))
+@@ -408,3 +3773,75 @@
+ })
+ 
+ ;; ------------------------------------------------------------------------
++
++(define_insn "lmwzb"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(plus:SI (match_operand:SI 1 "register_operand" "0") (const_int 4)))
++   (set (match_operand:SI 2 "register_operand" "=r")
++	(unspec:SI [(mem:SI (match_dup 1))] UNSPEC_LMWZB))]
++  ""
++  "lmwzb.bm\t%2, [%1], %2, 0x0"
++  [(set_attr "type"    "load_multiple")
++   (set_attr "combo"               "1")
++   (set_attr "length"              "4")]
++)
++
++(define_insn "smwzb"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(plus:SI (match_operand:SI 1 "register_operand" "0") (const_int 4)))
++   (set (mem:SI (match_dup 1))
++	(unspec:SI [(match_operand:SI 2 "register_operand" "r")] UNSPEC_SMWZB))]
++  ""
++  "smwzb.bm\t%2, [%1], %2, 0x0"
++  [(set_attr "type"   "store_multiple")
++   (set_attr "combo"               "1")
++   (set_attr "length"              "4")]
++)
++
++(define_expand "movstr"
++  [(match_operand:SI 0 "register_operand" "")
++   (match_operand:BLK 1 "memory_operand" "")
++   (match_operand:BLK 2 "memory_operand" "")]
++  "TARGET_EXT_STRING && TARGET_INLINE_STRCPY"
++{
++  if (nds32_expand_movstr (operands[0],
++			   operands[1],
++			   operands[2]))
++    DONE;
++
++  FAIL;
++})
++
++(define_expand "strlensi"
++  [(match_operand:SI  0 "register_operand")
++   (match_operand:BLK 1 "memory_operand")
++   (match_operand:QI  2 "nds32_reg_constant_operand")
++   (match_operand     3 "const_int_operand")]
++  "TARGET_EXT_STRING"
++{
++  if (nds32_expand_strlen (operands[0], operands[1], operands[2], operands[3]))
++    DONE;
++
++  FAIL;
++})
++
++(define_expand "setmemsi"
++   [(use (match_operand:BLK 0 "memory_operand"))
++    (use (match_operand:SI 1 "nds32_reg_constant_operand"))
++    (use (match_operand:QI 2 "nonmemory_operand"))
++    (use (match_operand 3 "const_int_operand"))
++    (use (match_operand:SI 4 "const_int_operand"))
++    (use (match_operand:SI 5 "const_int_operand"))]
++  ""
++{
++ if (nds32_expand_setmem (operands[0], operands[1],
++			  operands[2], operands[3],
++			  operands[4], operands[5]))
++   DONE;
++
++ FAIL;
++})
++
++
++
++;; ------------------------------------------------------------------------
+diff --git a/gcc/config/nds32/nds32-n10.md b/gcc/config/nds32/nds32-n10.md
+new file mode 100644
+index 0000000..7261608
+--- /dev/null
++++ b/gcc/config/nds32/nds32-n10.md
+@@ -0,0 +1,439 @@
++;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler
++;; Copyright (C) 2012-2016 Free Software Foundation, Inc.
++;; Contributed by Andes Technology Corporation.
++;;
++;; This file is part of GCC.
++;;
++;; GCC is free software; you can redistribute it and/or modify it
++;; under the terms of the GNU General Public License as published
++;; by the Free Software Foundation; either version 3, or (at your
++;; option) any later version.
++;;
++;; GCC is distributed in the hope that it will be useful, but WITHOUT
++;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++;; License for more details.
++;;
++;; You should have received a copy of the GNU General Public License
++;; along with GCC; see the file COPYING3.  If not see
++;; <http://www.gnu.org/licenses/>.
++
++
++;; ------------------------------------------------------------------------
++;; Define N10 pipeline settings.
++;; ------------------------------------------------------------------------
++
++(define_automaton "nds32_n10_machine")
++
++;; ------------------------------------------------------------------------
++;; Pipeline Stages
++;; ------------------------------------------------------------------------
++;; IF - Instruction Fetch
++;; II - Instruction Issue / Instruction Decode
++;; EX - Instruction Execution
++;; MM - Memory Execution
++;; WB - Instruction Retire / Result Write-Back
++
++(define_cpu_unit "n10_ii" "nds32_n10_machine")
++(define_cpu_unit "n10_ex" "nds32_n10_machine")
++(define_cpu_unit "n10_mm" "nds32_n10_machine")
++(define_cpu_unit "n10_wb" "nds32_n10_machine")
++(define_cpu_unit "n10f_iq" "nds32_n10_machine")
++(define_cpu_unit "n10f_rf" "nds32_n10_machine")
++(define_cpu_unit "n10f_e1" "nds32_n10_machine")
++(define_cpu_unit "n10f_e2" "nds32_n10_machine")
++(define_cpu_unit "n10f_e3" "nds32_n10_machine")
++(define_cpu_unit "n10f_e4" "nds32_n10_machine")
++
++(define_insn_reservation "nds_n10_unknown" 1
++  (and (eq_attr "type" "unknown")
++       (eq_attr "pipeline_model" "n10"))
++  "n10_ii, n10_ex, n10_mm, n10_wb")
++
++(define_insn_reservation "nds_n10_misc" 1
++  (and (eq_attr "type" "misc")
++       (eq_attr "pipeline_model" "n10"))
++  "n10_ii, n10_ex, n10_mm, n10_wb")
++
++(define_insn_reservation "nds_n10_mmu" 1
++  (and (eq_attr "type" "mmu")
++       (eq_attr "pipeline_model" "n10"))
++  "n10_ii, n10_ex, n10_mm, n10_wb")
++
++(define_insn_reservation "nds_n10_alu" 1
++  (and (eq_attr "type" "alu")
++       (eq_attr "pipeline_model" "n10"))
++  "n10_ii, n10_ex, n10_mm, n10_wb")
++
++(define_insn_reservation "nds_n10_alu_shift" 1
++  (and (eq_attr "type" "alu_shift")
++       (eq_attr "pipeline_model" "n10"))
++  "n10_ii, n10_ii+n10_ex, n10_ex+n10_mm, n10_mm+n10_wb, n10_wb")
++
++(define_insn_reservation "nds_n10_pbsad" 1
++  (and (eq_attr "type" "pbsad")
++       (eq_attr "pipeline_model" "n10"))
++  "n10_ii, n10_ex*3, n10_mm, n10_wb")
++
++(define_insn_reservation "nds_n10_pbsada" 1
++  (and (eq_attr "type" "pbsada")
++       (eq_attr "pipeline_model" "n10"))
++  "n10_ii, n10_ex*3, n10_mm, n10_wb")
++
++(define_insn_reservation "nds_n10_load" 1
++  (and (match_test "nds32::load_single_p (insn)")
++       (eq_attr "pipeline_model" "n10"))
++  "n10_ii, n10_ex, n10_mm, n10_wb")
++
++(define_insn_reservation "nds_n10_store" 1
++  (and (match_test "nds32::store_single_p (insn)")
++       (eq_attr "pipeline_model" "n10"))
++  "n10_ii, n10_ex, n10_mm, n10_wb")
++
++(define_insn_reservation "nds_n10_load_multiple_1" 1
++  (and (eq_attr "pipeline_model" "n10")
++       (and (eq_attr "type" "load_multiple")
++	    (eq_attr "combo" "1")))
++  "n10_ii, n10_ex, n10_mm, n10_wb")
++
++(define_insn_reservation "nds_n10_load_multiple_2" 1
++  (and (eq_attr "pipeline_model" "n10")
++       (ior (and (eq_attr "type" "load_multiple")
++		 (eq_attr "combo" "2"))
++	    (match_test "nds32::load_double_p (insn)")))
++  "n10_ii, n10_ii+n10_ex, n10_ex+n10_mm, n10_mm+n10_wb, n10_wb")
++
++(define_insn_reservation "nds_n10_load_multiple_3" 1
++  (and (eq_attr "pipeline_model" "n10")
++       (and (eq_attr "type" "load_multiple")
++	    (eq_attr "combo" "3")))
++  "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb")
++
++(define_insn_reservation "nds_n10_load_multiple_4" 1
++  (and (eq_attr "pipeline_model" "n10")
++       (and (eq_attr "type" "load_multiple")
++	    (eq_attr "combo" "4")))
++  "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, n10_ii+n10_ex+n10_mm+n10_wb, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb")
++
++(define_insn_reservation "nds_n10_load_multiple_5" 1
++  (and (eq_attr "pipeline_model" "n10")
++       (and (eq_attr "type" "load_multiple")
++	    (eq_attr "combo" "5")))
++  "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*2, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb")
++
++(define_insn_reservation "nds_n10_load_multiple_6" 1
++  (and (eq_attr "pipeline_model" "n10")
++       (and (eq_attr "type" "load_multiple")
++	    (eq_attr "combo" "6")))
++  "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*3, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb")
++
++(define_insn_reservation "nds_n10_load_multiple_7" 1
++  (and (eq_attr "pipeline_model" "n10")
++       (and (eq_attr "type" "load_multiple")
++	    (eq_attr "combo" "7")))
++  "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*4, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb")
++
++(define_insn_reservation "nds_n10_load_multiple_N" 1
++  (and (eq_attr "pipeline_model" "n10")
++       (and (eq_attr "type" "load_multiple")
++	    (match_test "get_attr_combo (insn) >= 8")))
++  "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*5, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb")
++
++(define_insn_reservation "nds_n10_store_multiple_1" 1
++  (and (eq_attr "pipeline_model" "n10")
++       (and (eq_attr "type" "store_multiple")
++	    (eq_attr "combo" "1")))
++  "n10_ii, n10_ex, n10_mm, n10_wb")
++
++(define_insn_reservation "nds_n10_store_multiple_2" 1
++  (and (eq_attr "pipeline_model" "n10")
++       (ior (and (eq_attr "type" "store_multiple")
++		 (eq_attr "combo" "2"))
++       (match_test "nds32::store_double_p (insn)")))
++  "n10_ii, n10_ii+n10_ex, n10_ex+n10_mm, n10_mm+n10_wb, n10_wb")
++
++(define_insn_reservation "nds_n10_store_multiple_3" 1
++  (and (eq_attr "pipeline_model" "n10")
++       (and (eq_attr "type" "store_multiple")
++	    (eq_attr "combo" "3")))
++  "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb")
++
++(define_insn_reservation "nds_n10_store_multiple_4" 1
++  (and (eq_attr "pipeline_model" "n10")
++       (and (eq_attr "type" "store_multiple")
++	    (eq_attr "combo" "4")))
++  "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, n10_ii+n10_ex+n10_mm+n10_wb, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb")
++
++(define_insn_reservation "nds_n10_store_multiple_5" 1
++  (and (eq_attr "pipeline_model" "n10")
++       (and (eq_attr "type" "store_multiple")
++	    (eq_attr "combo" "5")))
++  "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*2, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb")
++
++(define_insn_reservation "nds_n10_store_multiple_6" 1
++  (and (eq_attr "pipeline_model" "n10")
++       (and (eq_attr "type" "store_multiple")
++	    (eq_attr "combo" "6")))
++  "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*3, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb")
++
++(define_insn_reservation "nds_n10_store_multiple_7" 1
++  (and (eq_attr "pipeline_model" "n10")
++       (and (eq_attr "type" "store_multiple")
++	    (eq_attr "combo" "7")))
++  "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*4, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb")
++
++(define_insn_reservation "nds_n10_store_multiple_N" 1
++  (and (eq_attr "pipeline_model" "n10")
++       (and (eq_attr "type" "store_multiple")
++	    (match_test "get_attr_combo (insn) >= 8")))
++  "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*5, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb")
++
++(define_insn_reservation "nds_n10_mul" 1
++  (and (eq_attr "type" "mul")
++       (eq_attr "pipeline_model" "n10"))
++  "n10_ii, n10_ex, n10_mm, n10_wb")
++
++(define_insn_reservation "nds_n10_mac" 1
++       (and (eq_attr "type" "mac")
++	    (eq_attr "pipeline_model" "n10"))
++  "n10_ii, n10_ex, n10_mm, n10_wb")
++
++(define_insn_reservation "nds_n10_div" 1
++  (and (eq_attr "type" "div")
++       (eq_attr "pipeline_model" "n10"))
++  "n10_ii, n10_ex*34, n10_mm, n10_wb")
++
++(define_insn_reservation "nds_n10_branch" 1
++  (and (eq_attr "type" "branch")
++       (eq_attr "pipeline_model" "n10"))
++  "n10_ii, n10_ex, n10_mm, n10_wb")
++
++(define_insn_reservation "nds_n10_dsp_alu" 1
++  (and (eq_attr "type" "dalu")
++       (eq_attr "pipeline_model" "n10"))
++  "n10_ii, n10_ex, n10_mm, n10_wb")
++
++(define_insn_reservation "nds_n10_dsp_alu64" 1
++  (and (eq_attr "type" "dalu64")
++       (eq_attr "pipeline_model" "n10"))
++  "n10_ii, n10_ex, n10_mm, n10_wb")
++
++(define_insn_reservation "nds_n10_dsp_alu_round" 1
++  (and (eq_attr "type" "daluround")
++       (eq_attr "pipeline_model" "n10"))
++  "n10_ii, n10_ex, n10_mm, n10_wb")
++
++(define_insn_reservation "nds_n10_dsp_cmp" 1
++  (and (eq_attr "type" "dcmp")
++       (eq_attr "pipeline_model" "n10"))
++  "n10_ii, n10_ex, n10_mm, n10_wb")
++
++(define_insn_reservation "nds_n10_dsp_clip" 1
++  (and (eq_attr "type" "dclip")
++       (eq_attr "pipeline_model" "n10"))
++  "n10_ii, n10_ex, n10_mm, n10_wb")
++
++(define_insn_reservation "nds_n10_dsp_mul" 1
++  (and (eq_attr "type" "dmul")
++       (eq_attr "pipeline_model" "n10"))
++  "n10_ii, n10_ex, n10_mm, n10_wb")
++
++(define_insn_reservation "nds_n10_dsp_mac" 1
++  (and (eq_attr "type" "dmac")
++       (eq_attr "pipeline_model" "n10"))
++  "n10_ii, n10_ex, n10_mm, n10_wb")
++
++(define_insn_reservation "nds_n10_dsp_insb" 1
++  (and (eq_attr "type" "dinsb")
++       (eq_attr "pipeline_model" "n10"))
++  "n10_ii, n10_ex, n10_mm, n10_wb")
++
++(define_insn_reservation "nds_n10_dsp_pack" 1
++  (and (eq_attr "type" "dpack")
++       (eq_attr "pipeline_model" "n10"))
++  "n10_ii, n10_ex, n10_mm, n10_wb")
++
++(define_insn_reservation "nds_n10_dsp_bpick" 1
++  (and (eq_attr "type" "dbpick")
++       (eq_attr "pipeline_model" "n10"))
++  "n10_ii, n10_ex, n10_mm, n10_wb")
++
++(define_insn_reservation "nds_n10_dsp_wext" 1
++  (and (eq_attr "type" "dwext")
++       (eq_attr "pipeline_model" "n10"))
++  "n10_ii, n10_ex, n10_mm, n10_wb")
++
++(define_insn_reservation "nds_n10_fpu_alu" 4
++  (and (eq_attr "type" "falu")
++       (eq_attr "pipeline_model" "n10"))
++  "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4")
++
++(define_insn_reservation "nds_n10_fpu_muls" 4
++  (and (eq_attr "type" "fmuls")
++       (eq_attr "pipeline_model" "n10"))
++  "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4")
++
++(define_insn_reservation "nds_n10_fpu_muld" 4
++  (and (eq_attr "type" "fmuld")
++       (eq_attr "pipeline_model" "n10"))
++  "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2*2, n10f_e3, n10f_e4")
++
++(define_insn_reservation "nds_n10_fpu_macs" 4
++  (and (eq_attr "type" "fmacs")
++       (eq_attr "pipeline_model" "n10"))
++  "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2*3, n10f_e3, n10f_e4")
++
++(define_insn_reservation "nds_n10_fpu_macd" 4
++  (and (eq_attr "type" "fmacd")
++       (eq_attr "pipeline_model" "n10"))
++  "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2*4, n10f_e3, n10f_e4")
++
++(define_insn_reservation "nds_n10_fpu_divs" 4
++  (and (ior (eq_attr "type" "fdivs")
++	    (eq_attr "type" "fsqrts"))
++       (eq_attr "pipeline_model" "n10"))
++  "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2*14, n10f_e3, n10f_e4")
++
++(define_insn_reservation "nds_n10_fpu_divd" 4
++  (and (ior (eq_attr "type" "fdivd")
++	    (eq_attr "type" "fsqrtd"))
++       (eq_attr "pipeline_model" "n10"))
++  "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2*28, n10f_e3, n10f_e4")
++
++(define_insn_reservation "nds_n10_fpu_fast_alu" 2
++  (and (ior (eq_attr "type" "fcmp")
++	    (ior (eq_attr "type" "fabs")
++		 (ior (eq_attr "type" "fcpy")
++		      (eq_attr "type" "fcmov"))))
++       (eq_attr "pipeline_model" "n10"))
++  "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4")
++
++(define_insn_reservation "nds_n10_fpu_fmtsr" 4
++  (and (eq_attr "type" "fmtsr")
++       (eq_attr "pipeline_model" "n10"))
++  "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4")
++
++(define_insn_reservation "nds_n10_fpu_fmtdr" 4
++  (and (eq_attr "type" "fmtdr")
++       (eq_attr "pipeline_model" "n10"))
++  "n10_ii, n10_ii+n10f_iq, n10f_iq+n10f_rf, n10f_rf+n10f_e1, n10f_e1+n10f_e2, n10f_e2+n10f_e3, n10f_e3+n10f_e4, n10f_e4")
++
++(define_insn_reservation "nds_n10_fpu_fmfsr" 2
++  (and (eq_attr "type" "fmfsr")
++       (eq_attr "pipeline_model" "n10"))
++  "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4")
++
++(define_insn_reservation "nds_n10_fpu_fmfdr" 2
++  (and (eq_attr "type" "fmfdr")
++       (eq_attr "pipeline_model" "n10"))
++  "n10_ii, n10_ii+n10f_iq, n10f_iq+n10f_rf, n10f_rf+n10f_e1, n10f_e1+n10f_e2, n10f_e2+n10f_e3, n10f_e3+n10f_e4, n10f_e4")
++
++(define_insn_reservation "nds_n10_fpu_load" 3
++  (and (eq_attr "type" "fload")
++       (eq_attr "pipeline_model" "n10"))
++  "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4")
++
++(define_insn_reservation "nds_n10_fpu_store" 1
++  (and (eq_attr "type" "fstore")
++       (eq_attr "pipeline_model" "n10"))
++  "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4")
++
++;; ------------------------------------------------------------------------
++;; Comment Notations and Bypass Rules
++;; ------------------------------------------------------------------------
++;; Producers (LHS)
++;;   LD
++;;     Load data from the memory and produce the loaded data. The result is
++;;     ready at MM.
++;;   LMW(N, M)
++;;     There are N micro-operations within an instruction that loads multiple
++;;     words. The result produced by the M-th micro-operation is sent to
++;;     consumers. The result is ready at MM.
++;;   MUL, MAC
++;;     Compute data in the multiply-adder and produce the data. The result
++;;     is ready at MM.
++;;   DIV
++;;     Compute data in the divider and produce the data. The result is ready
++;;     at MM.
++;;
++;; Consumers (RHS)
++;;   ALU, MOVD44, PBSAD, PBSADA_RaRb, MUL, MAC, DIV, MMU
++;;     Require operands at EX.
++;;   ALU_SHIFT_Rb
++;;     An ALU-SHIFT instruction consists of a shift micro-operation followed
++;;     by an arithmetic micro-operation. The operand Rb is used by the first
++;;     micro-operation, and there are some latencies if data dependency occurs.
++;;   MAC_RaRb
++;;     A MAC instruction does multiplication at EX and does accumulation at MM,
++;;     so the operand Rt is required at MM, and operands Ra and Rb are required
++;;     at EX.
++;;   ADDR_IN
++;;     If an instruction requires an address as its input operand, the address
++;;     is required at EX.
++;;   ST
++;;     A store instruction requires its data at MM.
++;;   SMW(N, M)
++;;     There are N micro-operations within an instruction that stores multiple
++;;     words. Each M-th micro-operation requires its data at MM.
++;;   BR
++;;     If a branch instruction is conditional, its input data is required at EX.
++
++;; FPU_ADDR_OUT -> FPU_ADDR_IN
++;; Main pipeline rules don't need this because those default latency is 1.
++(define_bypass 1
++  "nds_n10_fpu_load, nds_n10_fpu_store"
++  "nds_n10_fpu_load, nds_n10_fpu_store"
++  "nds32_n10_ex_to_ex_p"
++)
++
++;; LD, MUL, MAC, DIV, DALU64, DMUL, DMAC, DALUROUND, DBPICK, DWEXT
++;;   -> ALU, ALU_SHIFT_Rb, PBSAD, PBSADA_RaRb, MOVD44, MUL, MAC_RaRb, DIV, ADDR_IN, BR, MMU,
++;;      DALU, DALUROUND, DMUL, DMAC_RaRb, DPACK, DINSB, DCMP, DCLIP, WEXT_O, BPICK_RaRb
++(define_bypass 2
++  "nds_n10_load, nds_n10_mul, nds_n10_mac, nds_n10_div,\
++   nds_n10_dsp_alu64, nds_n10_dsp_mul, nds_n10_dsp_mac,\
++   nds_n10_dsp_alu_round, nds_n10_dsp_bpick, nds_n10_dsp_wext"
++  "nds_n10_alu, nds_n10_alu_shift,\
++   nds_n10_pbsad, nds_n10_pbsada,\
++   nds_n10_mul, nds_n10_mac, nds_n10_div,\
++   nds_n10_branch,\
++   nds_n10_load, nds_n10_store,\
++   nds_n10_load_multiple_1, nds_n10_load_multiple_2, nds_n10_load_multiple_3,\
++   nds_n10_load_multiple_4, nds_n10_load_multiple_5, nds_n10_load_multiple_6,\
++   nds_n10_load_multiple_7, nds_n10_load_multiple_N,\
++   nds_n10_store_multiple_1, nds_n10_store_multiple_2, nds_n10_store_multiple_3,\
++   nds_n10_store_multiple_4, nds_n10_store_multiple_5, nds_n10_store_multiple_6,\
++   nds_n10_store_multiple_7, nds_n10_store_multiple_N,\
++   nds_n10_mmu,\
++   nds_n10_dsp_alu, nds_n10_dsp_alu_round,\
++   nds_n10_dsp_mul, nds_n10_dsp_mac, nds_n10_dsp_pack,\
++   nds_n10_dsp_insb, nds_n10_dsp_cmp, nds_n10_dsp_clip,\
++   nds_n10_dsp_wext, nds_n10_dsp_bpick"
++  "nds32_n10_mm_to_ex_p"
++)
++
++;; LMW(N, N)
++;;   -> ALU, ALU_SHIFT_Rb, PBSAD, PBSADA_RaRb, MOVD44, MUL, MAC_RaRb, DIV, ADDR_IN, BR, MMU
++;;      DALU, DALUROUND, DMUL, DMAC_RaRb, DPACK, DINSB, DCMP, DCLIP, WEXT_O, BPICK_RaRb
++(define_bypass 2
++  "nds_n10_load_multiple_1, nds_n10_load_multiple_2, nds_n10_load_multiple_3,\
++   nds_n10_load_multiple_4, nds_n10_load_multiple_5, nds_n10_load_multiple_6,\
++   nds_n10_load_multiple_7, nds_n10_load_multiple_N"
++  "nds_n10_alu, nds_n10_alu_shift,\
++   nds_n10_pbsad, nds_n10_pbsada,\
++   nds_n10_mul, nds_n10_mac, nds_n10_div,\
++   nds_n10_branch,\
++   nds_n10_load, nds_n10_store,\
++   nds_n10_load_multiple_1, nds_n10_load_multiple_2, nds_n10_load_multiple_3,\
++   nds_n10_load_multiple_4, nds_n10_load_multiple_5, nds_n10_load_multiple_6,\
++   nds_n10_load_multiple_7, nds_n10_load_multiple_N,\
++   nds_n10_store_multiple_1, nds_n10_store_multiple_2, nds_n10_store_multiple_3,\
++   nds_n10_store_multiple_4, nds_n10_store_multiple_5, nds_n10_store_multiple_6,\
++   nds_n10_store_multiple_7, nds_n10_store_multiple_N,\
++   nds_n10_mmu,\
++   nds_n10_dsp_alu, nds_n10_dsp_alu_round,\
++   nds_n10_dsp_mul, nds_n10_dsp_mac, nds_n10_dsp_pack,\
++   nds_n10_dsp_insb, nds_n10_dsp_cmp, nds_n10_dsp_clip,\
++   nds_n10_dsp_wext, nds_n10_dsp_bpick"
++  "nds32_n10_last_load_to_ex_p"
++)
+diff --git a/gcc/config/nds32/nds32-n13.md b/gcc/config/nds32/nds32-n13.md
+new file mode 100644
+index 0000000..622480d
+--- /dev/null
++++ b/gcc/config/nds32/nds32-n13.md
+@@ -0,0 +1,401 @@
++;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler
++;; Copyright (C) 2012-2016 Free Software Foundation, Inc.
++;; Contributed by Andes Technology Corporation.
++;;
++;; This file is part of GCC.
++;;
++;; GCC is free software; you can redistribute it and/or modify it
++;; under the terms of the GNU General Public License as published
++;; by the Free Software Foundation; either version 3, or (at your
++;; option) any later version.
++;;
++;; GCC is distributed in the hope that it will be useful, but WITHOUT
++;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++;; License for more details.
++;;
++;; You should have received a copy of the GNU General Public License
++;; along with GCC; see the file COPYING3.  If not see
++;; <http://www.gnu.org/licenses/>.
++
++
++;; ------------------------------------------------------------------------
++;; Define N13 pipeline settings.
++;; ------------------------------------------------------------------------
++
++(define_automaton "nds32_n13_machine")
++
++;; ------------------------------------------------------------------------
++;; Pipeline Stages
++;; ------------------------------------------------------------------------
++;; F1 - Instruction Fetch First
++;;   Instruction Tag/Data Arrays
++;;   ITLB Address Translation
++;;   Branch Target Buffer Prediction
++;; F2 - Instruction Fetch Second
++;;   Instruction Cache Hit Detection
++;;   Cache Way Selection
++;;   Inustruction Alignment
++;; I1 - Instruction Issue First / Instruction Decode
++;;   Instruction Cache Replay Triggering
++;;   32/16-Bit Instruction Decode
++;;   Return Address Stack Prediction
++;; I2 - Instruction Issue Second / Register File Access
++;;   Instruction Issue Logic
++;;   Register File Access
++;; E1 - Instruction Execute First / Address Generation / MAC First
++;;   Data Access Address generation
++;;   Multiply Operation
++;; E2 - Instruction Execute Second / Data Access First / MAC Second /
++;;      ALU Execute
++;;   Skewed ALU
++;;   Branch/Jump/Return Resolution
++;;   Data Tag/Data arrays
++;;   DTLB address translation
++;;   Accumulation Operation
++;; E3 - Instruction Execute Third / Data Access Second
++;;   Data Cache Hit Detection
++;;   Cache Way Selection
++;;   Data Alignment
++;; E4 - Instruction Execute Fourth / Write Back
++;;   Interruption Resolution
++;;   Instruction Retire
++;;   Register File Write Back
++
++(define_cpu_unit "n13_i1" "nds32_n13_machine")
++(define_cpu_unit "n13_i2" "nds32_n13_machine")
++(define_cpu_unit "n13_e1" "nds32_n13_machine")
++(define_cpu_unit "n13_e2" "nds32_n13_machine")
++(define_cpu_unit "n13_e3" "nds32_n13_machine")
++(define_cpu_unit "n13_e4" "nds32_n13_machine")
++
++(define_insn_reservation "nds_n13_unknown" 1
++  (and (eq_attr "type" "unknown")
++       (eq_attr "pipeline_model" "n13"))
++  "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4")
++
++(define_insn_reservation "nds_n13_misc" 1
++  (and (eq_attr "type" "misc")
++       (eq_attr "pipeline_model" "n13"))
++  "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4")
++
++(define_insn_reservation "nds_n13_mmu" 1
++  (and (eq_attr "type" "mmu")
++       (eq_attr "pipeline_model" "n13"))
++  "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4")
++
++(define_insn_reservation "nds_n13_alu" 1
++  (and (eq_attr "type" "alu")
++       (eq_attr "pipeline_model" "n13"))
++  "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4")
++
++(define_insn_reservation "nds_n13_alu_shift" 1
++  (and (eq_attr "type" "alu_shift")
++       (eq_attr "pipeline_model" "n13"))
++  "n13_i1, n13_i1+n13_i2, n13_i2+n13_e1, n13_e1+n13_e2, n13_e2+n13_e3, n13_e3+n13_e4, n13_e4")
++
++(define_insn_reservation "nds_n13_pbsad" 1
++  (and (eq_attr "type" "pbsad")
++       (eq_attr "pipeline_model" "n13"))
++  "n13_i1, n13_i2, n13_e1, n13_e2*2, n13_e3, n13_e4")
++
++(define_insn_reservation "nds_n13_pbsada" 1
++  (and (eq_attr "type" "pbsada")
++       (eq_attr "pipeline_model" "n13"))
++  "n13_i1, n13_i2, n13_e1, n13_e2*3, n13_e3, n13_e4")
++
++(define_insn_reservation "nds_n13_load" 1
++  (and (match_test "nds32::load_single_p (insn)")
++       (eq_attr "pipeline_model" "n13"))
++  "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4")
++
++(define_insn_reservation "nds_n13_store" 1
++  (and (match_test "nds32::store_single_p (insn)")
++       (eq_attr "pipeline_model" "n13"))
++  "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4")
++
++(define_insn_reservation "nds_n13_load_multiple_1" 1
++  (and (and (eq_attr "type" "load_multiple")
++	    (eq_attr "combo" "1"))
++       (eq_attr "pipeline_model" "n13"))
++  "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4")
++
++(define_insn_reservation "nds_n13_load_multiple_2" 1
++  (and (ior (and (eq_attr "type" "load_multiple")
++		 (eq_attr "combo" "2"))
++	    (match_test "nds32::load_double_p (insn)"))
++       (eq_attr "pipeline_model" "n13"))
++  "n13_i1, n13_i1+n13_i2, n13_i2+n13_e1, n13_e1+n13_e2, n13_e2+n13_e3, n13_e3+n13_e4, n13_e4")
++
++(define_insn_reservation "nds_n13_load_multiple_3" 1
++  (and (and (eq_attr "type" "load_multiple")
++	    (eq_attr "combo" "3"))
++       (eq_attr "pipeline_model" "n13"))
++  "n13_i1, n13_i2+n13_i2, n13_i1+n13_i2+n13_e1, n13_i2+n13_e1+n13_e2, n13_e1+n13_e2+n13_e3, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
++
++(define_insn_reservation "nds_n13_load_multiple_4" 1
++  (and (and (eq_attr "type" "load_multiple")
++	    (eq_attr "combo" "4"))
++       (eq_attr "pipeline_model" "n13"))
++  "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i2+n13_e1+n13_e2+n13_e3, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
++
++(define_insn_reservation "nds_n13_load_multiple_5" 1
++  (and (and (eq_attr "type" "load_multiple")
++	    (eq_attr "combo" "5"))
++       (eq_attr "pipeline_model" "n13"))
++  "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
++
++(define_insn_reservation "nds_n13_load_multiple_6" 1
++  (and (and (eq_attr "type" "load_multiple")
++	    (eq_attr "combo" "6"))
++       (eq_attr "pipeline_model" "n13"))
++  "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
++
++(define_insn_reservation "nds_n13_load_multiple_7" 1
++  (and (and (eq_attr "type" "load_multiple")
++	    (eq_attr "combo" "7"))
++       (eq_attr "pipeline_model" "n13"))
++  "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*2, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
++
++(define_insn_reservation "nds_n13_load_multiple_8" 1
++  (and (and (eq_attr "type" "load_multiple")
++	    (eq_attr "combo" "8"))
++       (eq_attr "pipeline_model" "n13"))
++  "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*3, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
++
++(define_insn_reservation "nds_n13_load_multiple_12" 1
++  (and (and (eq_attr "type" "load_multiple")
++	    (eq_attr "combo" "12"))
++       (eq_attr "pipeline_model" "n13"))
++  "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*7, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
++
++(define_insn_reservation "nds_n13_store_multiple_1" 1
++  (and (and (eq_attr "type" "store_multiple")
++	    (eq_attr "combo" "1"))
++       (eq_attr "pipeline_model" "n13"))
++  "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4")
++
++(define_insn_reservation "nds_n13_store_multiple_2" 1
++  (and (ior (and (eq_attr "type" "store_multiple")
++		 (eq_attr "combo" "2"))
++	    (match_test "nds32::store_double_p (insn)"))
++       (eq_attr "pipeline_model" "n13"))
++  "n13_i1, n13_i1+n13_i2, n13_i2+n13_e1, n13_e1+n13_e2, n13_e2+n13_e3, n13_e3+n13_e4, n13_e4")
++
++(define_insn_reservation "nds_n13_store_multiple_3" 1
++  (and (and (eq_attr "type" "store_multiple")
++	    (eq_attr "combo" "3"))
++       (eq_attr "pipeline_model" "n13"))
++  "n13_i1, n13_i2+n13_i2, n13_i1+n13_i2+n13_e1, n13_i2+n13_e1+n13_e2, n13_e1+n13_e2+n13_e3, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
++
++(define_insn_reservation "nds_n13_store_multiple_4" 1
++  (and (and (eq_attr "type" "store_multiple")
++	    (eq_attr "combo" "4"))
++       (eq_attr "pipeline_model" "n13"))
++  "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i2+n13_e1+n13_e2+n13_e3, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
++
++(define_insn_reservation "nds_n13_store_multiple_5" 1
++  (and (and (eq_attr "type" "store_multiple")
++	    (eq_attr "combo" "5"))
++       (eq_attr "pipeline_model" "n13"))
++  "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
++
++(define_insn_reservation "nds_n13_store_multiple_6" 1
++  (and (and (eq_attr "type" "store_multiple")
++	    (eq_attr "combo" "6"))
++       (eq_attr "pipeline_model" "n13"))
++  "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
++
++(define_insn_reservation "nds_n13_store_multiple_7" 1
++  (and (and (eq_attr "type" "store_multiple")
++	    (eq_attr "combo" "7"))
++       (eq_attr "pipeline_model" "n13"))
++  "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*2, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
++
++(define_insn_reservation "nds_n13_store_multiple_8" 1
++  (and (and (eq_attr "type" "store_multiple")
++	    (eq_attr "combo" "8"))
++       (eq_attr "pipeline_model" "n13"))
++  "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*3, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
++
++(define_insn_reservation "nds_n13_store_multiple_12" 1
++  (and (and (eq_attr "type" "store_multiple")
++	    (eq_attr "combo" "12"))
++       (eq_attr "pipeline_model" "n13"))
++  "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*7, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
++
++;; The multiplier at E1 takes two cycles.
++(define_insn_reservation "nds_n13_mul" 1
++  (and (eq_attr "type" "mul")
++       (eq_attr "pipeline_model" "n13"))
++  "n13_i1, n13_i2, n13_e1*2, n13_e2, n13_e3, n13_e4")
++
++(define_insn_reservation "nds_n13_mac" 1
++  (and (eq_attr "type" "mac")
++       (eq_attr "pipeline_model" "n13"))
++  "n13_i1, n13_i2, n13_e1*2, n13_e2, n13_e3, n13_e4")
++
++;; The cycles consumed at E2 are 32 - CLZ(abs(Ra)) + 2,
++;; so the worst case is 34.
++(define_insn_reservation "nds_n13_div" 1
++  (and (eq_attr "type" "div")
++       (eq_attr "pipeline_model" "n13"))
++  "n13_i1, n13_i2, n13_e1, n13_e2*34, n13_e3, n13_e4")
++
++(define_insn_reservation "nds_n13_branch" 1
++  (and (eq_attr "type" "branch")
++       (eq_attr "pipeline_model" "n13"))
++  "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4")
++
++;; ------------------------------------------------------------------------
++;; Comment Notations and Bypass Rules
++;; ------------------------------------------------------------------------
++;; Producers (LHS)
++;;   LD
++;;     Load data from the memory and produce the loaded data. The result is
++;;     ready at E3.
++;;   LMW(N, M)
++;;     There are N micro-operations within an instruction that loads multiple
++;;     words. The result produced by the M-th micro-operation is sent to
++;;     consumers. The result is ready at E3.
++;;   ADDR_OUT
++;;     Most load/store instructions can produce an address output if updating
++;;     the base register is required. The result is ready at E2, which is
++;;     produced by ALU.
++;;   ALU, ALU_SHIFT, SIMD
++;;     Compute data in ALU and produce the data. The result is ready at E2.
++;;   MUL, MAC
++;;     Compute data in the multiply-adder and produce the data. The result
++;;     is ready at E2.
++;;   DIV
++;;     Compute data in the divider and produce the data. The result is ready
++;;     at E2.
++;;   BR
++;;     Branch-with-link instructions produces a result containing the return
++;;     address. The result is ready at E2.
++;;
++;; Consumers (RHS)
++;;   ALU
++;;     General ALU instructions require operands at E2.
++;;   ALU_E1
++;;     Some special ALU instructions, such as BSE, BSP and MOVD44, require
++;;     operand at E1.
++;;   MUL, DIV, PBSAD, MMU
++;;     Operands are required at E1.
++;;   PBSADA_Rt, PBSADA_RaRb
++;;     Operands Ra and Rb are required at E1, and the operand Rt is required
++;;     at E2.
++;;   ALU_SHIFT_Rb
++;;     An ALU-SHIFT instruction consists of a shift micro-operation followed
++;;     by an arithmetic micro-operation. The operand Rb is used by the first
++;;     micro-operation, and there are some latencies if data dependency occurs.
++;;   MAC_RaRb
++;;     A MAC instruction does multiplication at E1 and does accumulation at E2,
++;;     so the operand Rt is required at E2, and operands Ra and Rb are required
++;;     at E1.
++;;   ADDR_IN
++;;     If an instruction requires an address as its input operand, the address
++;;     is required at E1.
++;;   ST
++;;     A store instruction requires its data at E2.
++;;   SMW(N, M)
++;;     There are N micro-operations within an instruction that stores multiple
++;;     words. Each M-th micro-operation requires its data at E2.
++;;   BR
++;;     If a branch instruction is conditional, its input data is required at E2.
++
++;; LD -> ALU_E1, PBSAD, PBSADA_RaRb, MUL, MAC_RaRb, DIV, MMU, ADDR_IN
++(define_bypass 3
++  "nds_n13_load"
++  "nds_n13_alu, nds_n13_pbsad, nds_n13_pbsada,\
++   nds_n13_mul, nds_n13_mac, nds_n13_div,\
++   nds_n13_mmu,\
++   nds_n13_load, nds_n13_store,\
++   nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\
++   nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\
++   nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12,\
++   nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\
++   nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\
++   nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12"
++  "nds32_n13_load_to_e1_p"
++)
++
++;; LD -> ALU, ALU_SHIFT_Rb, PBSADA_Rt, BR, ST, SMW(N, 1)
++(define_bypass 2
++  "nds_n13_load"
++  "nds_n13_alu, nds_n13_alu_shift, nds_n13_pbsada, nds_n13_branch, nds_n13_store,\
++   nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\
++   nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\
++   nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12"
++  "nds32_n13_load_to_e2_p"
++)
++
++;; LMW(N, N) -> ALU_E1, PBSAD, PBSADA_RaRb, MUL, MAC_RaRb, DIV, MMU, ADDR_IN
++(define_bypass 3
++  "nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\
++   nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\
++   nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12"
++  "nds_n13_alu, nds_n13_pbsad, nds_n13_pbsada,\
++   nds_n13_mul, nds_n13_mac, nds_n13_div,\
++   nds_n13_mmu,\
++   nds_n13_load, nds_n13_store,\
++   nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\
++   nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\
++   nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12,\
++   nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\
++   nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\
++   nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12"
++  "nds32_n13_last_load_to_e1_p")
++
++;; LMW(N, N) -> ALU, ALU_SHIFT_Rb, PBSADA_Rt, BR, ST, SMW(N, 1)
++(define_bypass 2
++  "nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\
++   nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\
++   nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12"
++  "nds_n13_alu, nds_n13_alu_shift, nds_n13_pbsada, nds_n13_branch, nds_n13_store,\
++   nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\
++   nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\
++   nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12"
++  "nds32_n13_last_load_to_e2_p"
++)
++
++;; LMW(N, N - 1) -> ALU_E1, PBSAD, PBSADA_RaRb, MUL, MAC_RaRb, DIV, MMU, ADDR_IN
++(define_bypass 2
++  "nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\
++   nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\
++   nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12"
++  "nds_n13_alu, nds_n13_pbsad, nds_n13_pbsada,\
++   nds_n13_mul, nds_n13_mac, nds_n13_div,\
++   nds_n13_mmu,\
++   nds_n13_load, nds_n13_store,\
++   nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\
++   nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\
++   nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12,\
++   nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\
++   nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\
++   nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12"
++  "nds32_n13_last_two_load_to_e1_p")
++
++;; ALU, ALU_SHIFT, SIMD, BR, MUL, MAC, DIV, ADDR_OUT
++;;   ->  ALU_E1, PBSAD, PBSADA_RaRb, MUL, MAC_RaRb, DIV, MMU, ADDR_IN
++(define_bypass 2
++  "nds_n13_alu, nds_n13_alu_shift, nds_n13_pbsad, nds_n13_pbsada, nds_n13_branch,\
++   nds_n13_mul, nds_n13_mac, nds_n13_div,\
++   nds_n13_load, nds_n13_store,\
++   nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\
++   nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\
++   nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12,\
++   nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\
++   nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\
++   nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12"
++  "nds_n13_alu, nds_n13_pbsad, nds_n13_pbsada,\
++   nds_n13_mul, nds_n13_mac, nds_n13_div,\
++   nds_n13_mmu,\
++   nds_n13_load, nds_n13_store,\
++   nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\
++   nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\
++   nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12,\
++   nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\
++   nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\
++   nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12"
++  "nds32_n13_e2_to_e1_p")
+diff --git a/gcc/config/nds32/nds32-n7.md b/gcc/config/nds32/nds32-n7.md
+new file mode 100644
+index 0000000..ff788ce
+--- /dev/null
++++ b/gcc/config/nds32/nds32-n7.md
+@@ -0,0 +1,298 @@
++;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler
++;; Copyright (C) 2012-2016 Free Software Foundation, Inc.
++;; Contributed by Andes Technology Corporation.
++;;
++;; This file is part of GCC.
++;;
++;; GCC is free software; you can redistribute it and/or modify it
++;; under the terms of the GNU General Public License as published
++;; by the Free Software Foundation; either version 3, or (at your
++;; option) any later version.
++;;
++;; GCC is distributed in the hope that it will be useful, but WITHOUT
++;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++;; License for more details.
++;;
++;; You should have received a copy of the GNU General Public License
++;; along with GCC; see the file COPYING3.  If not see
++;; <http://www.gnu.org/licenses/>.
++
++
++;; ------------------------------------------------------------------------
++;; Define N8 pipeline settings.
++;; ------------------------------------------------------------------------
++
++(define_automaton "nds32_n7_machine")
++
++;; ------------------------------------------------------------------------
++;; Pipeline Stages
++;; ------------------------------------------------------------------------
++;; IF - Instruction Fetch
++;;   Instruction Alignment
++;;   Instruction Pre-decode
++;; II - Instruction Issue
++;;   Instruction Decode
++;;   Register File Access
++;;   Instruction Execution
++;;   Interrupt Handling
++;; EXD - Psuedo Stage
++;;   Load Data Completion
++
++(define_cpu_unit "n7_ii" "nds32_n7_machine")
++
++(define_insn_reservation "nds_n7_unknown" 1
++  (and (eq_attr "type" "unknown")
++       (eq_attr "pipeline_model" "n7"))
++  "n7_ii")
++
++(define_insn_reservation "nds_n7_misc" 1
++  (and (eq_attr "type" "misc")
++       (eq_attr "pipeline_model" "n7"))
++  "n7_ii")
++
++(define_insn_reservation "nds_n7_alu" 1
++  (and (eq_attr "type" "alu")
++       (eq_attr "pipeline_model" "n7"))
++  "n7_ii")
++
++(define_insn_reservation "nds_n7_load" 1
++  (and (match_test "nds32::load_single_p (insn)")
++       (eq_attr "pipeline_model" "n7"))
++  "n7_ii")
++
++(define_insn_reservation "nds_n7_store" 1
++  (and (match_test "nds32::store_single_p (insn)")
++       (eq_attr "pipeline_model" "n7"))
++  "n7_ii")
++
++(define_insn_reservation "nds_n7_load_multiple_1" 1
++  (and (and (eq_attr "type" "load_multiple")
++	    (eq_attr "combo" "1"))
++       (eq_attr "pipeline_model" "n7"))
++  "n7_ii")
++
++(define_insn_reservation "nds_n7_load_multiple_2" 1
++  (and (ior (and (eq_attr "type" "load_multiple")
++		 (eq_attr "combo" "2"))
++	    (match_test "nds32::load_double_p (insn)"))
++       (eq_attr "pipeline_model" "n7"))
++  "n7_ii*2")
++
++(define_insn_reservation "nds_n7_load_multiple_3" 1
++  (and (and (eq_attr "type" "load_multiple")
++	    (eq_attr "combo" "3"))
++       (eq_attr "pipeline_model" "n7"))
++  "n7_ii*3")
++
++(define_insn_reservation "nds_n7_load_multiple_4" 1
++  (and (and (eq_attr "type" "load_multiple")
++	    (eq_attr "combo" "4"))
++       (eq_attr "pipeline_model" "n7"))
++  "n7_ii*4")
++
++(define_insn_reservation "nds_n7_load_multiple_5" 1
++  (and (and (eq_attr "type" "load_multiple")
++	    (eq_attr "combo" "5"))
++       (eq_attr "pipeline_model" "n7"))
++  "n7_ii*5")
++
++(define_insn_reservation "nds_n7_load_multiple_6" 1
++  (and (and (eq_attr "type" "load_multiple")
++	    (eq_attr "combo" "6"))
++       (eq_attr "pipeline_model" "n7"))
++  "n7_ii*6")
++
++(define_insn_reservation "nds_n7_load_multiple_7" 1
++  (and (and (eq_attr "type" "load_multiple")
++	    (eq_attr "combo" "7"))
++       (eq_attr "pipeline_model" "n7"))
++  "n7_ii*7")
++
++(define_insn_reservation "nds_n7_load_multiple_8" 1
++  (and (and (eq_attr "type" "load_multiple")
++	    (eq_attr "combo" "8"))
++       (eq_attr "pipeline_model" "n7"))
++  "n7_ii*8")
++
++(define_insn_reservation "nds_n7_load_multiple_12" 1
++  (and (and (eq_attr "type" "load_multiple")
++	    (eq_attr "combo" "12"))
++       (eq_attr "pipeline_model" "n7"))
++  "n7_ii*12")
++
++(define_insn_reservation "nds_n7_store_multiple_1" 1
++  (and (and (eq_attr "type" "store_multiple")
++	    (eq_attr "combo" "1"))
++       (eq_attr "pipeline_model" "n7"))
++  "n7_ii")
++
++(define_insn_reservation "nds_n7_store_multiple_2" 1
++  (and (ior (and (eq_attr "type" "store_multiple")
++		 (eq_attr "combo" "2"))
++	    (match_test "nds32::store_double_p (insn)"))
++       (eq_attr "pipeline_model" "n7"))
++  "n7_ii*2")
++
++(define_insn_reservation "nds_n7_store_multiple_3" 1
++  (and (and (eq_attr "type" "store_multiple")
++	    (eq_attr "combo" "3"))
++       (eq_attr "pipeline_model" "n7"))
++  "n7_ii*3")
++
++(define_insn_reservation "nds_n7_store_multiple_4" 1
++  (and (and (eq_attr "type" "store_multiple")
++	    (eq_attr "combo" "4"))
++       (eq_attr "pipeline_model" "n7"))
++  "n7_ii*4")
++
++(define_insn_reservation "nds_n7_store_multiple_5" 1
++  (and (and (eq_attr "type" "store_multiple")
++	    (eq_attr "combo" "5"))
++       (eq_attr "pipeline_model" "n7"))
++  "n7_ii*5")
++
++(define_insn_reservation "nds_n7_store_multiple_6" 1
++  (and (and (eq_attr "type" "store_multiple")
++	    (eq_attr "combo" "6"))
++       (eq_attr "pipeline_model" "n7"))
++  "n7_ii*6")
++
++(define_insn_reservation "nds_n7_store_multiple_7" 1
++  (and (and (eq_attr "type" "store_multiple")
++	    (eq_attr "combo" "7"))
++       (eq_attr "pipeline_model" "n7"))
++  "n7_ii*7")
++
++(define_insn_reservation "nds_n7_store_multiple_8" 1
++  (and (and (eq_attr "type" "store_multiple")
++	    (eq_attr "combo" "8"))
++       (eq_attr "pipeline_model" "n7"))
++  "n7_ii*8")
++
++(define_insn_reservation "nds_n7_store_multiple_12" 1
++  (and (and (eq_attr "type" "store_multiple")
++	    (eq_attr "combo" "12"))
++       (eq_attr "pipeline_model" "n7"))
++  "n7_ii*12")
++
++(define_insn_reservation "nds_n7_mul_fast" 1
++  (and (match_test "nds32_mul_config != MUL_TYPE_SLOW")
++       (and (eq_attr "type" "mul")
++	    (eq_attr "pipeline_model" "n7")))
++  "n7_ii")
++
++(define_insn_reservation "nds_n7_mul_slow" 1
++  (and (match_test "nds32_mul_config == MUL_TYPE_SLOW")
++       (and (eq_attr "type" "mul")
++	    (eq_attr "pipeline_model" "n7")))
++  "n7_ii*17")
++
++(define_insn_reservation "nds_n7_mac_fast" 1
++  (and (match_test "nds32_mul_config != MUL_TYPE_SLOW")
++       (and (eq_attr "type" "mac")
++	    (eq_attr "pipeline_model" "n7")))
++  "n7_ii*2")
++
++(define_insn_reservation "nds_n7_mac_slow" 1
++  (and (match_test "nds32_mul_config == MUL_TYPE_SLOW")
++       (and (eq_attr "type" "mac")
++	    (eq_attr "pipeline_model" "n7")))
++  "n7_ii*18")
++
++(define_insn_reservation "nds_n7_div" 1
++  (and (eq_attr "type" "div")
++       (eq_attr "pipeline_model" "n7"))
++  "n7_ii*37")
++
++(define_insn_reservation "nds_n7_branch" 1
++  (and (eq_attr "type" "branch")
++       (eq_attr "pipeline_model" "n7"))
++  "n7_ii")
++
++;; ------------------------------------------------------------------------
++;; Comment Notations and Bypass Rules
++;; ------------------------------------------------------------------------
++;; Producers (LHS)
++;;   LD_!bi
++;;     Load data from the memory (without updating the base register) and
++;;     produce the loaded data. The result is ready at EXD.
++;;   LMW(N, M)
++;;     There are N micro-operations within an instruction that loads multiple
++;;     words. The result produced by the M-th micro-operation is sent to
++;;     consumers. The result is ready at EXD. If the base register should be
++;;     updated, an extra micro-operation is inserted to the sequence, and the
++;;     result is ready at II.
++;;
++;; Consumers (RHS)
++;;   ALU, MUL, DIV
++;;     Require operands at II.
++;;   MOVD44_E
++;;     A double-word move instruction needs two micro-operations because the
++;;     reigster ports is 2R1W. The first micro-operation writes an even number
++;;     register, and the second micro-operation writes an odd number register.
++;;     Each input operand is required at II for each micro-operation. The letter
++;;     'E' stands for even.
++;;   MAC_RaRb
++;;     A MAC instruction is separated into two micro-operations. The first
++;;     micro-operation does the multiplication, which requires operands Ra
++;;     and Rb at II. The second micro-options does the accumulation, which
++;;     requires the operand Rt at II.
++;;   ADDR_IN_MOP(N)
++;;     Because the reigster port is 2R1W, some load/store instructions are
++;;     separated into many micro-operations. N denotes the address input is
++;;     required by the N-th micro-operation. Such operand is required at II.
++;;   ST_bi
++;;     A post-increment store instruction requires its data at II.
++;;   ST_!bi_RI
++;;     A store instruction with an immediate offset requires its data at II.
++;;     If the offset field is a register (ST_!bi_RR), the instruction will be
++;;     separated into two micro-operations, and the second one requires the
++;;     input operand at II in order to store it to the memory.
++;;   SMW(N, M)
++;;     There are N micro-operations within an instruction that stores multiple
++;;     words. Each M-th micro-operation requires its data at II. If the base
++;;     register should be updated, an extra micro-operation is inserted to the
++;;     sequence.
++;;   BR_COND
++;;     If a branch instruction is conditional, its input data is required at II.
++
++;; LD_!bi
++;;   -> ALU, MOVD44_E, MUL, MAC_RaRb, DIV, BR, ADDR_IN_MOP(1), ST_bi, ST_!bi_RI, SMW(N, 1)
++(define_bypass 2
++  "nds_n7_load"
++  "nds_n7_alu,\
++   nds_n7_mul_fast, nds_n7_mul_slow,\
++   nds_n7_mac_fast, nds_n7_mac_slow,\
++   nds_n7_div,\
++   nds_n7_branch,\
++   nds_n7_load, nds_n7_store,\
++   nds_n7_load_multiple_1,nds_n7_load_multiple_2, nds_n7_load_multiple_3,\
++   nds_n7_load_multiple_4,nds_n7_load_multiple_5, nds_n7_load_multiple_6,\
++   nds_n7_load_multiple_7,nds_n7_load_multiple_8, nds_n7_load_multiple_12,\
++   nds_n7_store_multiple_1,nds_n7_store_multiple_2, nds_n7_store_multiple_3,\
++   nds_n7_store_multiple_4,nds_n7_store_multiple_5, nds_n7_store_multiple_6,\
++   nds_n7_store_multiple_7,nds_n7_store_multiple_8, nds_n7_store_multiple_12"
++  "nds32_n7_load_to_ii_p"
++)
++
++;; LMW(N, N)
++;;   -> ALU, MOVD44_E, MUL, MAC_RaRb, DIV, BR, AADR_IN_MOP(1), ST_bi, ST_!bi_RI, SMW(N, 1)
++(define_bypass 2
++  "nds_n7_load_multiple_1,nds_n7_load_multiple_2, nds_n7_load_multiple_3,\
++   nds_n7_load_multiple_4,nds_n7_load_multiple_5, nds_n7_load_multiple_6,\
++   nds_n7_load_multiple_7,nds_n7_load_multiple_8, nds_n7_load_multiple_12"
++  "nds_n7_alu,\
++   nds_n7_mul_fast, nds_n7_mul_slow,\
++   nds_n7_mac_fast, nds_n7_mac_slow,\
++   nds_n7_div,\
++   nds_n7_branch,\
++   nds_n7_load, nds_n7_store,\
++   nds_n7_load_multiple_1,nds_n7_load_multiple_2, nds_n7_load_multiple_3,\
++   nds_n7_load_multiple_4,nds_n7_load_multiple_5, nds_n7_load_multiple_6,\
++   nds_n7_load_multiple_7,nds_n7_load_multiple_8, nds_n7_load_multiple_12,\
++   nds_n7_store_multiple_1,nds_n7_store_multiple_2, nds_n7_store_multiple_3,\
++   nds_n7_store_multiple_4,nds_n7_store_multiple_5, nds_n7_store_multiple_6,\
++   nds_n7_store_multiple_7,nds_n7_store_multiple_8, nds_n7_store_multiple_12"
++  "nds32_n7_last_load_to_ii_p"
++)
+diff --git a/gcc/config/nds32/nds32-n8.md b/gcc/config/nds32/nds32-n8.md
+new file mode 100644
+index 0000000..c3db9cd
+--- /dev/null
++++ b/gcc/config/nds32/nds32-n8.md
+@@ -0,0 +1,389 @@
++;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler
++;; Copyright (C) 2012-2016 Free Software Foundation, Inc.
++;; Contributed by Andes Technology Corporation.
++;;
++;; This file is part of GCC.
++;;
++;; GCC is free software; you can redistribute it and/or modify it
++;; under the terms of the GNU General Public License as published
++;; by the Free Software Foundation; either version 3, or (at your
++;; option) any later version.
++;;
++;; GCC is distributed in the hope that it will be useful, but WITHOUT
++;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++;; License for more details.
++;;
++;; You should have received a copy of the GNU General Public License
++;; along with GCC; see the file COPYING3.  If not see
++;; <http://www.gnu.org/licenses/>.
++
++
++;; ------------------------------------------------------------------------
++;; Define N8 pipeline settings.
++;; ------------------------------------------------------------------------
++
++(define_automaton "nds32_n8_machine")
++
++;; ------------------------------------------------------------------------
++;; Pipeline Stages
++;; ------------------------------------------------------------------------
++;; IF - Instruction Fetch
++;; II - Instruction Issue / Address Generation
++;; EX - Instruction Execution
++;; EXD - Psuedo Stage / Load Data Completion
++
++(define_cpu_unit "n8_ii" "nds32_n8_machine")
++(define_cpu_unit "n8_ex" "nds32_n8_machine")
++
++(define_insn_reservation "nds_n8_unknown" 1
++  (and (eq_attr "type" "unknown")
++       (eq_attr "pipeline_model" "n8"))
++  "n8_ii, n8_ex")
++
++(define_insn_reservation "nds_n8_misc" 1
++  (and (eq_attr "type" "misc")
++       (eq_attr "pipeline_model" "n8"))
++  "n8_ii, n8_ex")
++
++(define_insn_reservation "nds_n8_alu" 1
++  (and (eq_attr "type" "alu")
++       (eq_attr "pipeline_model" "n8"))
++  "n8_ii, n8_ex")
++
++(define_insn_reservation "nds_n8_load" 1
++  (and (match_test "nds32::load_single_p (insn)")
++       (eq_attr "pipeline_model" "n8"))
++  "n8_ii, n8_ex")
++
++(define_insn_reservation "nds_n8_store" 1
++  (and (match_test "nds32::store_single_p (insn)")
++       (eq_attr "pipeline_model" "n8"))
++  "n8_ii, n8_ex")
++
++(define_insn_reservation "nds_n8_load_multiple_1" 1
++  (and (and (eq_attr "type" "load_multiple")
++	    (eq_attr "combo" "1"))
++       (eq_attr "pipeline_model" "n8"))
++  "n8_ii, n8_ex")
++
++(define_insn_reservation "nds_n8_load_multiple_2" 1
++  (and (ior (and (eq_attr "type" "load_multiple")
++		 (eq_attr "combo" "2"))
++	    (match_test "nds32::load_double_p (insn)"))
++       (eq_attr "pipeline_model" "n8"))
++  "n8_ii, n8_ii+n8_ex, n8_ex")
++
++(define_insn_reservation "nds_n8_load_multiple_3" 1
++  (and (and (eq_attr "type" "load_multiple")
++	    (eq_attr "combo" "3"))
++       (eq_attr "pipeline_model" "n8"))
++  "n8_ii, (n8_ii+n8_ex)*2, n8_ex")
++
++(define_insn_reservation "nds_n8_load_multiple_4" 1
++  (and (and (eq_attr "type" "load_multiple")
++	    (eq_attr "combo" "4"))
++       (eq_attr "pipeline_model" "n8"))
++  "n8_ii, (n8_ii+n8_ex)*3, n8_ex")
++
++(define_insn_reservation "nds_n8_load_multiple_5" 1
++  (and (and (eq_attr "type" "load_multiple")
++	    (eq_attr "combo" "5"))
++       (eq_attr "pipeline_model" "n8"))
++  "n8_ii, (n8_ii+n8_ex)*4, n8_ex")
++
++(define_insn_reservation "nds_n8_load_multiple_6" 1
++  (and (and (eq_attr "type" "load_multiple")
++	    (eq_attr "combo" "6"))
++       (eq_attr "pipeline_model" "n8"))
++  "n8_ii, (n8_ii+n8_ex)*5, n8_ex")
++
++(define_insn_reservation "nds_n8_load_multiple_7" 1
++  (and (and (eq_attr "type" "load_multiple")
++	    (eq_attr "combo" "7"))
++       (eq_attr "pipeline_model" "n8"))
++  "n8_ii, (n8_ii+n8_ex)*6, n8_ex")
++
++(define_insn_reservation "nds_n8_load_multiple_8" 1
++  (and (and (eq_attr "type" "load_multiple")
++	    (eq_attr "combo" "8"))
++       (eq_attr "pipeline_model" "n8"))
++  "n8_ii, (n8_ii+n8_ex)*7, n8_ex")
++
++(define_insn_reservation "nds_n8_load_multiple_12" 1
++  (and (and (eq_attr "type" "load_multiple")
++	    (eq_attr "combo" "12"))
++       (eq_attr "pipeline_model" "n8"))
++  "n8_ii, (n8_ii+n8_ex)*11, n8_ex")
++
++(define_insn_reservation "nds_n8_store_multiple_1" 1
++  (and (and (eq_attr "type" "store_multiple")
++	    (eq_attr "combo" "1"))
++       (eq_attr "pipeline_model" "n8"))
++  "n8_ii, n8_ex")
++
++(define_insn_reservation "nds_n8_store_multiple_2" 1
++  (and (ior (and (eq_attr "type" "store_multiple")
++		 (eq_attr "combo" "2"))
++	    (match_test "nds32::store_double_p (insn)"))
++       (eq_attr "pipeline_model" "n8"))
++  "n8_ii, n8_ii+n8_ex, n8_ex")
++
++(define_insn_reservation "nds_n8_store_multiple_3" 1
++  (and (and (eq_attr "type" "store_multiple")
++	    (eq_attr "combo" "3"))
++       (eq_attr "pipeline_model" "n8"))
++  "n8_ii, (n8_ii+n8_ex)*2, n8_ex")
++
++(define_insn_reservation "nds_n8_store_multiple_4" 1
++  (and (and (eq_attr "type" "store_multiple")
++	    (eq_attr "combo" "4"))
++       (eq_attr "pipeline_model" "n8"))
++  "n8_ii, (n8_ii+n8_ex)*3, n8_ex")
++
++(define_insn_reservation "nds_n8_store_multiple_5" 1
++  (and (and (eq_attr "type" "store_multiple")
++	    (eq_attr "combo" "5"))
++       (eq_attr "pipeline_model" "n8"))
++  "n8_ii, (n8_ii+n8_ex)*4, n8_ex")
++
++(define_insn_reservation "nds_n8_store_multiple_6" 1
++  (and (and (eq_attr "type" "store_multiple")
++	    (eq_attr "combo" "6"))
++       (eq_attr "pipeline_model" "n8"))
++  "n8_ii, (n8_ii+n8_ex)*5, n8_ex")
++
++(define_insn_reservation "nds_n8_store_multiple_7" 1
++  (and (and (eq_attr "type" "store_multiple")
++	    (eq_attr "combo" "7"))
++       (eq_attr "pipeline_model" "n8"))
++  "n8_ii, (n8_ii+n8_ex)*6, n8_ex")
++
++(define_insn_reservation "nds_n8_store_multiple_8" 1
++  (and (and (eq_attr "type" "store_multiple")
++	    (eq_attr "combo" "8"))
++       (eq_attr "pipeline_model" "n8"))
++  "n8_ii, (n8_ii+n8_ex)*7, n8_ex")
++
++(define_insn_reservation "nds_n8_store_multiple_12" 1
++  (and (and (eq_attr "type" "store_multiple")
++	    (eq_attr "combo" "12"))
++       (eq_attr "pipeline_model" "n8"))
++  "n8_ii, (n8_ii+n8_ex)*11, n8_ex")
++
++(define_insn_reservation "nds_n8_mul_fast" 1
++  (and (match_test "nds32_mul_config != MUL_TYPE_SLOW")
++       (and (eq_attr "type" "mul")
++	    (eq_attr "pipeline_model" "n8")))
++  "n8_ii, n8_ex")
++
++(define_insn_reservation "nds_n8_mul_slow" 1
++  (and (match_test "nds32_mul_config == MUL_TYPE_SLOW")
++       (and (eq_attr "type" "mul")
++	    (eq_attr "pipeline_model" "n8")))
++  "n8_ii, n8_ex*16")
++
++(define_insn_reservation "nds_n8_mac_fast" 1
++  (and (match_test "nds32_mul_config != MUL_TYPE_SLOW")
++       (and (eq_attr "type" "mac")
++	    (eq_attr "pipeline_model" "n8")))
++  "n8_ii, n8_ii+n8_ex, n8_ex")
++
++(define_insn_reservation "nds_n8_mac_slow" 1
++  (and (match_test "nds32_mul_config == MUL_TYPE_SLOW")
++       (and (eq_attr "type" "mac")
++	    (eq_attr "pipeline_model" "n8")))
++  "n8_ii, (n8_ii+n8_ex)*16, n8_ex")
++
++(define_insn_reservation "nds_n8_div" 1
++  (and (eq_attr "type" "div")
++       (eq_attr "pipeline_model" "n8"))
++  "n8_ii, (n8_ii+n8_ex)*36, n8_ex")
++
++(define_insn_reservation "nds_n8_branch" 1
++  (and (eq_attr "type" "branch")
++       (eq_attr "pipeline_model" "n8"))
++  "n8_ii, n8_ex")
++
++;; ------------------------------------------------------------------------
++;; Comment Notations and Bypass Rules
++;; ------------------------------------------------------------------------
++;; Producers (LHS)
++;;   LD_!bi
++;;     Load data from the memory (without updating the base register) and
++;;     produce the loaded data. The result is ready at EXD.
++;;   LD_bi
++;;     Load data from the memory (with updating the base register) and
++;;     produce the loaded data. The result is ready at EXD. Because the
++;;     register port is 2R1W, two micro-operations are required in order
++;;     to write two registers. The base register is updated by the second
++;;     micro-operation and the result is ready at EX.
++;;   LMW(N, M)
++;;     There are N micro-operations within an instruction that loads multiple
++;;     words. The result produced by the M-th micro-operation is sent to
++;;     consumers. The result is ready at EXD. If the base register should be
++;;     updated, an extra micro-operation is inserted to the sequence, and the
++;;     result is ready at EX.
++;;   ADDR_OUT
++;;     Most load/store instructions can produce an address output if updating
++;;     the base register is required. The result is ready at EX, which is
++;;     produced by ALU.
++;;   ALU, MUL, MAC
++;;     The result is ready at EX.
++;;   MOVD44_O
++;;     A double-word move instruction needs to write registers twice. Because
++;;     the register port is 2R1W, two micro-operations are required. The even
++;;     number reigster is updated by the first one, and the odd number register
++;;     is updated by the second one. Each of the results is ready at EX.
++;;     The letter 'O' stands for odd.
++;;   DIV_Rs
++;;     A division instruction saves the quotient result to Rt and saves the
++;;     remainder result to Rs. It requires two micro-operations because the
++;;     register port is 2R1W. The first micro-operation writes to Rt, and
++;;     the seconde one writes to Rs. Each of the results is ready at EX.
++;;
++;; Consumers (RHS)
++;;   ALU, MUL, DIV
++;;     Require operands at EX.
++;;   MOVD44_E
++;;     The letter 'E' stands for even, which is accessed by the first micro-
++;;     operation and a movd44 instruction. The operand is required at EX.
++;;   MAC_RaRb
++;;     A MAC instruction is separated into two micro-operations. The first
++;;     micro-operation does the multiplication, which requires operands Ra
++;;     and Rb at EX. The second micro-options does the accumulation, which
++;;     requires the operand Rt at EX.
++;;   ADDR_IN_MOP(N)
++;;     Because the reigster port is 2R1W, some load/store instructions are
++;;     separated into many micro-operations. N denotes the address input is
++;;     required by the N-th micro-operation. Such operand is required at II.
++;;   ST_bi
++;;     A post-increment store instruction requires its data at EX.
++;;   ST_!bi_RI
++;;     A store instruction with an immediate offset requires its data at EX.
++;;     If the offset field is a register (ST_!bi_RR), the instruction will be
++;;     separated into two micro-operations, and the second one requires the
++;;     input operand at EX in order to store it to the memory.
++;;   SMW(N, M)
++;;     There are N micro-operations within an instruction that stores multiple
++;;     words. Each M-th micro-operation requires its data at EX. If the base
++;;     register should be updated, an extra micro-operation is inserted to the
++;;     sequence.
++;;   BR_COND
++;;     If a branch instruction is conditional, its input data is required at EX.
++
++;; LD_!bi -> ADDR_IN_MOP(1)
++(define_bypass 3
++  "nds_n8_load"
++  "nds_n8_branch,\
++   nds_n8_load, nds_n8_store,\
++   nds_n8_load_multiple_1,nds_n8_load_multiple_2, nds_n8_load_multiple_3,\
++   nds_n8_load_multiple_4,nds_n8_load_multiple_5, nds_n8_load_multiple_6,\
++   nds_n8_load_multiple_7,nds_n8_load_multiple_8, nds_n8_load_multiple_12,\
++   nds_n8_store_multiple_1,nds_n8_store_multiple_2, nds_n8_store_multiple_3,\
++   nds_n8_store_multiple_4,nds_n8_store_multiple_5, nds_n8_store_multiple_6,\
++   nds_n8_store_multiple_7,nds_n8_store_multiple_8, nds_n8_store_multiple_12"
++  "nds32_n8_load_to_ii_p"
++)
++
++;; LMW(N, N) -> ADDR_IN_MOP(1)
++(define_bypass 3
++  "nds_n8_load_multiple_1,nds_n8_load_multiple_2, nds_n8_load_multiple_3,\
++   nds_n8_load_multiple_4,nds_n8_load_multiple_5, nds_n8_load_multiple_6,\
++   nds_n8_load_multiple_7,nds_n8_load_multiple_8, nds_n8_load_multiple_12"
++  "nds_n8_branch,\
++   nds_n8_load, nds_n8_store,\
++   nds_n8_load_multiple_1,nds_n8_load_multiple_2, nds_n8_load_multiple_3,\
++   nds_n8_load_multiple_4,nds_n8_load_multiple_5, nds_n8_load_multiple_6,\
++   nds_n8_load_multiple_7,nds_n8_load_multiple_8, nds_n8_load_multiple_12,\
++   nds_n8_store_multiple_1,nds_n8_store_multiple_2, nds_n8_store_multiple_3,\
++   nds_n8_store_multiple_4,nds_n8_store_multiple_5, nds_n8_store_multiple_6,\
++   nds_n8_store_multiple_7,nds_n8_store_multiple_8, nds_n8_store_multiple_12"
++  "nds32_n8_last_load_to_ii_p"
++)
++
++;; LMW(N, N - 1) -> ADDR_IN_MOP(1)
++(define_bypass 2
++  "nds_n8_load_multiple_1,nds_n8_load_multiple_2, nds_n8_load_multiple_3,\
++   nds_n8_load_multiple_4,nds_n8_load_multiple_5, nds_n8_load_multiple_6,\
++   nds_n8_load_multiple_7,nds_n8_load_multiple_8, nds_n8_load_multiple_12"
++  "nds_n8_branch,\
++   nds_n8_load, nds_n8_store,\
++   nds_n8_load_multiple_1,nds_n8_load_multiple_2, nds_n8_load_multiple_3,\
++   nds_n8_load_multiple_4,nds_n8_load_multiple_5, nds_n8_load_multiple_6,\
++   nds_n8_load_multiple_7,nds_n8_load_multiple_8, nds_n8_load_multiple_12,\
++   nds_n8_store_multiple_1,nds_n8_store_multiple_2, nds_n8_store_multiple_3,\
++   nds_n8_store_multiple_4,nds_n8_store_multiple_5, nds_n8_store_multiple_6,\
++   nds_n8_store_multiple_7,nds_n8_store_multiple_8, nds_n8_store_multiple_12"
++  "nds32_n8_last_load_two_to_ii_p"
++)
++
++;; LD_bi -> ADDR_IN_MOP(1)
++(define_bypass 2
++  "nds_n8_load"
++  "nds_n8_branch,\
++   nds_n8_load, nds_n8_store,\
++   nds_n8_load_multiple_1,nds_n8_load_multiple_2, nds_n8_load_multiple_3,\
++   nds_n8_load_multiple_4,nds_n8_load_multiple_5, nds_n8_load_multiple_6,\
++   nds_n8_load_multiple_7,nds_n8_load_multiple_8, nds_n8_load_multiple_12,\
++   nds_n8_store_multiple_1,nds_n8_store_multiple_2, nds_n8_store_multiple_3,\
++   nds_n8_store_multiple_4,nds_n8_store_multiple_5, nds_n8_store_multiple_6,\
++   nds_n8_store_multiple_7,nds_n8_store_multiple_8, nds_n8_store_multiple_12"
++  "nds32_n8_load_bi_to_ii_p"
++)
++
++;; LD_!bi -> ALU, MOVD44_E, MUL, MAC_RaRb, DIV, BR_COND, ST_bi, ST_!bi_RI, SMW(N, 1)
++(define_bypass 2
++  "nds_n8_load"
++  "nds_n8_alu,
++   nds_n8_mul_fast, nds_n8_mul_slow,\
++   nds_n8_mac_fast, nds_n8_mac_slow,\
++   nds_n8_div,\
++   nds_n8_branch,\
++   nds_n8_store,\
++   nds_n8_store_multiple_1,nds_n8_store_multiple_2, nds_n8_store_multiple_3,\
++   nds_n8_store_multiple_4,nds_n8_store_multiple_5, nds_n8_store_multiple_6,\
++   nds_n8_store_multiple_7,nds_n8_store_multiple_8, nds_n8_store_multiple_12"
++  "nds32_n8_load_to_ex_p"
++)
++
++;; ALU, MOVD44_O, MUL, MAC, DIV_Rs, LD_bi, ADDR_OUT -> ADDR_IN_MOP(1)
++(define_bypass 2
++  "nds_n8_alu,
++   nds_n8_mul_fast, nds_n8_mul_slow,\
++   nds_n8_mac_fast, nds_n8_mac_slow,\
++   nds_n8_div,\
++   nds_n8_load, nds_n8_store,\
++   nds_n8_load_multiple_1,nds_n8_load_multiple_2, nds_n8_load_multiple_3,\
++   nds_n8_load_multiple_4,nds_n8_load_multiple_5, nds_n8_load_multiple_6,\
++   nds_n8_load_multiple_7,nds_n8_load_multiple_8, nds_n8_load_multiple_12,\
++   nds_n8_store_multiple_1,nds_n8_store_multiple_2, nds_n8_store_multiple_3,\
++   nds_n8_store_multiple_4,nds_n8_store_multiple_5, nds_n8_store_multiple_6,\
++   nds_n8_store_multiple_7,nds_n8_store_multiple_8, nds_n8_store_multiple_12"
++  "nds_n8_branch,\
++   nds_n8_load, nds_n8_store,\
++   nds_n8_load_multiple_1,nds_n8_load_multiple_2, nds_n8_load_multiple_3,\
++   nds_n8_load_multiple_4,nds_n8_load_multiple_5, nds_n8_load_multiple_6,\
++   nds_n8_load_multiple_7,nds_n8_load_multiple_8, nds_n8_load_multiple_12,\
++   nds_n8_store_multiple_1,nds_n8_store_multiple_2, nds_n8_store_multiple_3,\
++   nds_n8_store_multiple_4,nds_n8_store_multiple_5, nds_n8_store_multiple_6,\
++   nds_n8_store_multiple_7,nds_n8_store_multiple_8, nds_n8_store_multiple_12"
++  "nds32_n8_ex_to_ii_p"
++)
++
++;; LMW(N, N) -> ALU, MOVD44_E, MUL, MAC_RaRb, DIV, BR_COND, ST_bi, ST_!bi_RI, SMW(N, 1)
++(define_bypass 2
++  "nds_n8_load_multiple_1,nds_n8_load_multiple_2, nds_n8_load_multiple_3,\
++   nds_n8_load_multiple_4,nds_n8_load_multiple_5, nds_n8_load_multiple_6,\
++   nds_n8_load_multiple_7,nds_n8_load_multiple_8, nds_n8_load_multiple_12"
++  "nds_n8_alu,
++   nds_n8_mul_fast, nds_n8_mul_slow,\
++   nds_n8_mac_fast, nds_n8_mac_slow,\
++   nds_n8_div,\
++   nds_n8_branch,\
++   nds_n8_store,\
++   nds_n8_store_multiple_1,nds_n8_store_multiple_2, nds_n8_store_multiple_3,\
++   nds_n8_store_multiple_4,nds_n8_store_multiple_5, nds_n8_store_multiple_6,\
++   nds_n8_store_multiple_7,nds_n8_store_multiple_8, nds_n8_store_multiple_12"
++  "nds32_n8_last_load_to_ex_p"
++)
+diff --git a/gcc/config/nds32/nds32-n9-2r1w.md b/gcc/config/nds32/nds32-n9-2r1w.md
+new file mode 100644
+index 0000000..d0db953
+--- /dev/null
++++ b/gcc/config/nds32/nds32-n9-2r1w.md
+@@ -0,0 +1,362 @@
++;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler
++;; Copyright (C) 2012-2016 Free Software Foundation, Inc.
++;; Contributed by Andes Technology Corporation.
++;;
++;; This file is part of GCC.
++;;
++;; GCC is free software; you can redistribute it and/or modify it
++;; under the terms of the GNU General Public License as published
++;; by the Free Software Foundation; either version 3, or (at your
++;; option) any later version.
++;;
++;; GCC is distributed in the hope that it will be useful, but WITHOUT
++;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++;; License for more details.
++;;
++;; You should have received a copy of the GNU General Public License
++;; along with GCC; see the file COPYING3.  If not see
++;; <http://www.gnu.org/licenses/>.
++
++
++;; ------------------------------------------------------------------------
++;; Define N9 2R1W pipeline settings.
++;; ------------------------------------------------------------------------
++
++(define_automaton "nds32_n9_2r1w_machine")
++
++;; ------------------------------------------------------------------------
++;; Pipeline Stages
++;; ------------------------------------------------------------------------
++;; IF - Instruction Fetch
++;; II - Instruction Issue / Instruction Decode
++;; EX - Instruction Execution
++;; MM - Memory Execution
++;; WB - Instruction Retire / Result Write-Back
++
++(define_cpu_unit "n9_2r1w_ii" "nds32_n9_2r1w_machine")
++(define_cpu_unit "n9_2r1w_ex" "nds32_n9_2r1w_machine")
++(define_cpu_unit "n9_2r1w_mm" "nds32_n9_2r1w_machine")
++(define_cpu_unit "n9_2r1w_wb" "nds32_n9_2r1w_machine")
++
++(define_insn_reservation "nds_n9_2r1w_unknown" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
++       (and (eq_attr "type" "unknown")
++	    (eq_attr "pipeline_model" "n9")))
++  "n9_2r1w_ii, n9_2r1w_ex, n9_2r1w_mm, n9_2r1w_wb")
++
++(define_insn_reservation "nds_n9_2r1w_misc" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
++       (and (eq_attr "type" "misc")
++	    (eq_attr "pipeline_model" "n9")))
++  "n9_2r1w_ii, n9_2r1w_ex, n9_2r1w_mm, n9_2r1w_wb")
++
++(define_insn_reservation "nds_n9_2r1w_mmu" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
++       (and (eq_attr "type" "mmu")
++	    (eq_attr "pipeline_model" "n9")))
++  "n9_2r1w_ii, n9_2r1w_ex, n9_2r1w_mm, n9_2r1w_wb")
++
++(define_insn_reservation "nds_n9_2r1w_alu" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
++       (and (eq_attr "type" "alu")
++	    (eq_attr "pipeline_model" "n9")))
++  "n9_2r1w_ii, n9_2r1w_ex, n9_2r1w_mm, n9_2r1w_wb")
++
++(define_insn_reservation "nds_n9_2r1w_alu_shift" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
++       (and (eq_attr "type" "alu_shift")
++	    (eq_attr "pipeline_model" "n9")))
++  "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ex+n9_2r1w_mm, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb")
++
++(define_insn_reservation "nds_n9_2r1w_pbsad" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
++       (and (eq_attr "type" "pbsad")
++	    (eq_attr "pipeline_model" "n9")))
++  "n9_2r1w_ii, n9_2r1w_ex*3, n9_2r1w_mm, n9_2r1w_wb")
++
++(define_insn_reservation "nds_n9_2r1w_pbsada" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
++       (and (eq_attr "type" "pbsada")
++	    (eq_attr "pipeline_model" "n9")))
++  "n9_2r1w_ii, n9_2r1w_ex*3, n9_2r1w_mm, n9_2r1w_wb")
++
++(define_insn_reservation "nds_n9_2r1w_load" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
++       (and (match_test "nds32::load_single_p (insn)")
++	    (eq_attr "pipeline_model" "n9")))
++  "n9_2r1w_ii, n9_2r1w_ex, n9_2r1w_mm, n9_2r1w_wb")
++
++(define_insn_reservation "nds_n9_2r1w_store" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
++       (and (match_test "nds32::store_single_p (insn)")
++	    (eq_attr "pipeline_model" "n9")))
++  "n9_2r1w_ii, n9_2r1w_ex, n9_2r1w_mm, n9_2r1w_wb")
++
++(define_insn_reservation "nds_n9_2r1w_load_multiple_1" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
++       (and (eq_attr "pipeline_model" "n9")
++	    (and (eq_attr "type" "load_multiple")
++		 (eq_attr "combo" "1"))))
++  "n9_2r1w_ii, n9_2r1w_ex, n9_2r1w_mm, n9_2r1w_wb")
++
++(define_insn_reservation "nds_n9_2r1w_load_multiple_2" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
++       (and (eq_attr "pipeline_model" "n9")
++	    (ior (and (eq_attr "type" "load_multiple")
++		      (eq_attr "combo" "2"))
++		 (match_test "nds32::load_double_p (insn)"))))
++  "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ex+n9_2r1w_mm, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb")
++
++(define_insn_reservation "nds_n9_2r1w_load_multiple_3" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
++       (and (eq_attr "pipeline_model" "n9")
++	    (and (eq_attr "type" "load_multiple")
++		 (eq_attr "combo" "3"))))
++  "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb")
++
++(define_insn_reservation "nds_n9_2r1w_load_multiple_4" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
++       (and (eq_attr "pipeline_model" "n9")
++	    (and (eq_attr "type" "load_multiple")
++		 (eq_attr "combo" "4"))))
++  "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb")
++
++(define_insn_reservation "nds_n9_2r1w_load_multiple_5" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
++       (and (eq_attr "pipeline_model" "n9")
++	    (and (eq_attr "type" "load_multiple")
++		 (eq_attr "combo" "5"))))
++  "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, (n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb)*2, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb")
++
++(define_insn_reservation "nds_n9_2r1w_load_multiple_6" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
++       (and (eq_attr "pipeline_model" "n9")
++	    (and (eq_attr "type" "load_multiple")
++		 (eq_attr "combo" "6"))))
++  "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, (n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb)*3, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb")
++
++(define_insn_reservation "nds_n9_2r1w_load_multiple_7" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
++       (and (eq_attr "pipeline_model" "n9")
++	    (and (eq_attr "type" "load_multiple")
++		 (eq_attr "combo" "7"))))
++  "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, (n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb)*4, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb")
++
++(define_insn_reservation "nds_n9_2r1w_load_multiple_8" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
++       (and (eq_attr "pipeline_model" "n9")
++	    (and (eq_attr "type" "load_multiple")
++		 (eq_attr "combo" "8"))))
++  "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, (n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb)*5, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb")
++
++(define_insn_reservation "nds_n9_2r1w_load_multiple_12" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
++       (and (eq_attr "pipeline_model" "n9")
++	    (and (eq_attr "type" "load_multiple")
++		 (eq_attr "combo" "12"))))
++  "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, (n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb)*9, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb")
++
++(define_insn_reservation "nds_n9_2r1w_store_multiple_1" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
++       (and (eq_attr "pipeline_model" "n9")
++	    (and (eq_attr "type" "store_multiple")
++		 (eq_attr "combo" "1"))))
++  "n9_2r1w_ii, n9_2r1w_ex, n9_2r1w_mm, n9_2r1w_wb")
++
++(define_insn_reservation "nds_n9_2r1w_store_multiple_2" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
++       (and (eq_attr "pipeline_model" "n9")
++	    (ior (and (eq_attr "type" "store_multiple")
++		      (eq_attr "combo" "2"))
++		 (match_test "nds32::store_double_p (insn)"))))
++  "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ex+n9_2r1w_mm, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb")
++
++(define_insn_reservation "nds_n9_2r1w_store_multiple_3" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
++       (and (eq_attr "pipeline_model" "n9")
++	    (and (eq_attr "type" "store_multiple")
++		 (eq_attr "combo" "3"))))
++  "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb")
++
++(define_insn_reservation "nds_n9_2r1w_store_multiple_4" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
++       (and (eq_attr "pipeline_model" "n9")
++	    (and (eq_attr "type" "store_multiple")
++		 (eq_attr "combo" "4"))))
++  "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb")
++
++(define_insn_reservation "nds_n9_2r1w_store_multiple_5" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
++       (and (eq_attr "pipeline_model" "n9")
++	    (and (eq_attr "type" "store_multiple")
++		 (eq_attr "combo" "5"))))
++  "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, (n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb)*2, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb")
++
++(define_insn_reservation "nds_n9_2r1w_store_multiple_6" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
++       (and (eq_attr "pipeline_model" "n9")
++	    (and (eq_attr "type" "store_multiple")
++		 (eq_attr "combo" "6"))))
++  "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, (n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb)*3, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb")
++
++(define_insn_reservation "nds_n9_2r1w_store_multiple_7" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
++       (and (eq_attr "pipeline_model" "n9")
++	    (and (eq_attr "type" "store_multiple")
++		 (eq_attr "combo" "7"))))
++  "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, (n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb)*4, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb")
++
++(define_insn_reservation "nds_n9_2r1w_store_multiple_8" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
++       (and (eq_attr "pipeline_model" "n9")
++	    (and (eq_attr "type" "store_multiple")
++		 (eq_attr "combo" "8"))))
++  "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, (n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb)*5, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb")
++
++(define_insn_reservation "nds_n9_2r1w_store_multiple_12" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
++       (and (eq_attr "pipeline_model" "n9")
++	    (and (eq_attr "type" "store_multiple")
++		 (eq_attr "combo" "12"))))
++  "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, (n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb)*9, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb")
++
++(define_insn_reservation "nds_n9_2r1w_mul_fast" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W && nds32_mul_config != MUL_TYPE_SLOW")
++       (and (eq_attr "type" "mul")
++	    (eq_attr "pipeline_model" "n9")))
++  "n9_2r1w_ii, n9_2r1w_ex, n9_2r1w_mm, n9_2r1w_wb")
++
++(define_insn_reservation "nds_n9_2r1w_mul_slow" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W && nds32_mul_config == MUL_TYPE_SLOW")
++       (and (eq_attr "type" "mul")
++	    (eq_attr "pipeline_model" "n9")))
++  "n9_2r1w_ii, n9_2r1w_ex*17, n9_2r1w_mm, n9_2r1w_wb")
++
++(define_insn_reservation "nds_n9_2r1w_mac_fast" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W && nds32_mul_config != MUL_TYPE_SLOW")
++       (and (eq_attr "type" "mac")
++	    (eq_attr "pipeline_model" "n9")))
++  "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ex+n9_2r1w_mm, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb")
++
++(define_insn_reservation "nds_n9_2r1w_mac_slow" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W && nds32_mul_config == MUL_TYPE_SLOW")
++       (and (eq_attr "type" "mac")
++	    (eq_attr "pipeline_model" "n9")))
++  "n9_2r1w_ii, (n9_2r1w_ii+n9_2r1w_ex)*17, n9_2r1w_ex+n9_2r1w_mm, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb")
++
++(define_insn_reservation "nds_n9_2r1w_div" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
++       (and (eq_attr "type" "div")
++	    (eq_attr "pipeline_model" "n9")))
++  "n9_2r1w_ii, (n9_2r1w_ii+n9_2r1w_ex)*34, n9_2r1w_ex+n9_2r1w_mm, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb")
++
++(define_insn_reservation "nds_n9_2r1w_branch" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
++       (and (eq_attr "type" "branch")
++	    (eq_attr "pipeline_model" "n9")))
++  "n9_2r1w_ii, n9_2r1w_ex, n9_2r1w_mm, n9_2r1w_wb")
++
++;; ------------------------------------------------------------------------
++;; Comment Notations and Bypass Rules
++;; ------------------------------------------------------------------------
++;; Producers (LHS)
++;;   LD_!bi
++;;     Load data from the memory (without updating the base register) and
++;;     produce the loaded data. The result is ready at MM. Because the register
++;;     port is 2R1W, two micro-operations are required if the base register
++;;     should be updated. In this case, the base register is updated by the
++;;     second micro-operation, and the updated result is ready at EX.
++;;   LMW(N, M)
++;;     There are N micro-operations within an instruction that loads multiple
++;;     words. The result produced by the M-th micro-operation is sent to
++;;     consumers. The result is ready at MM.  If the base register should be
++;;     updated, an extra micro-operation is apppended to the end of the
++;;     sequence, and the result is ready at EX.
++;;   MUL, MAC
++;;     Compute data in the multiply-adder and produce the data. The result
++;;     is ready at MM.
++;;   DIV
++;;     Compute data in the divider and produce the data. The result is ready
++;;     at MM.
++;;
++;; Consumers (RHS)
++;;   ALU, PBSAD, PBSADA_RaRb, MUL, MAC, DIV, MMU
++;;     Require operands at EX.
++;;   ALU_SHIFT_Rb
++;;     An ALU-SHIFT instruction consists of a shift micro-operation followed
++;;     by an arithmetic micro-operation. The operand Rb is used by the first
++;;     micro-operation, and there are some latencies if data dependency occurs.
++;;   MOVD44_E
++;;     A double-word move instruction needs two micro-operations because the
++;;     reigster ports is 2R1W. The first micro-operation writes an even number
++;;     register, and the second micro-operation writes an odd number register.
++;;     Each input operand is required at EX for each micro-operation. MOVD44_E
++;;     stands for the first micro-operation.
++;;   MAC_RaRb, M2R
++;;     MAC instructions do multiplication at EX and do accumulation at MM, but
++;;     MAC instructions which operate on general purpose registers always
++;;     require operands at EX because MM stage cannot be forwarded in 2R1W mode.
++;;   ADDR_IN
++;;     If an instruction requires an address as its input operand, the address
++;;     is required at EX.
++;;   ST_bi
++;;     A post-increment store instruction requires its data at EX because MM
++;;     cannot be forwarded in 2R1W mode.
++;;   ST_!bi_RI
++;;     A store instruction with an immediate offset requires its data at EX
++;;     because MM cannot be forwarded in 2R1W mode. If the offset field is a
++;;     register (ST_!bi_RR), the instruction will be separated into two micro-
++;;     operations, and the second one requires the input operand at EX in order
++;;     to store it to the memory.
++;;   SMW(N, M)
++;;     There are N micro-operations within an instruction that stores multiple
++;;     words. Each M-th micro-operation requires its data at MM.
++;;   BR
++;;     If a branch instruction is conditional, its input data is required at EX.
++
++;; LD_!bi, MUL, MAC
++;;   -> ALU, ALU_SHIFT_Rb, PBSAD, PBSADA_RaRb, MOVD44_E, MUL, MAC_RaRb, M2R, DIV, ADDR_IN_!bi, ADDR_IN_bi_Ra, ST_bi, ST_!bi_RI, BR, MMU
++(define_bypass 2
++  "nds_n9_2r1w_load,\
++   nds_n9_2r1w_mul_fast, nds_n9_2r1w_mul_slow,\
++   nds_n9_2r1w_mac_fast, nds_n9_2r1w_mac_slow"
++  "nds_n9_2r1w_alu, nds_n9_2r1w_alu_shift,\
++   nds_n9_2r1w_pbsad, nds_n9_2r1w_pbsada,\
++   nds_n9_2r1w_mul_fast, nds_n9_2r1w_mul_slow,\
++   nds_n9_2r1w_mac_fast, nds_n9_2r1w_mac_slow,\
++   nds_n9_2r1w_branch,\
++   nds_n9_2r1w_div,\
++   nds_n9_2r1w_load,nds_n9_2r1w_store,\
++   nds_n9_2r1w_load_multiple_1,nds_n9_2r1w_load_multiple_2, nds_n9_2r1w_load_multiple_3,\
++   nds_n9_2r1w_load_multiple_4,nds_n9_2r1w_load_multiple_5, nds_n9_2r1w_load_multiple_6,\
++   nds_n9_2r1w_load_multiple_7,nds_n9_2r1w_load_multiple_8, nds_n9_2r1w_load_multiple_12,\
++   nds_n9_2r1w_store_multiple_1,nds_n9_2r1w_store_multiple_2, nds_n9_2r1w_store_multiple_3,\
++   nds_n9_2r1w_store_multiple_4,nds_n9_2r1w_store_multiple_5, nds_n9_2r1w_store_multiple_6,\
++   nds_n9_2r1w_store_multiple_7,nds_n9_2r1w_store_multiple_8, nds_n9_2r1w_store_multiple_12,\
++   nds_n9_2r1w_mmu"
++  "nds32_n9_2r1w_mm_to_ex_p"
++)
++
++;; LMW(N, N)
++;;   -> ALU, ALU_SHIFT_Rb, PBSAD, PBSADA_RaRb, MOVD44_E, MUL, MAC_RaRb, M2R, DIV, ADDR_IN_!bi, ADDR_IN_bi_Ra, ST_bi, ST_!bi_RI, BR, MMU
++(define_bypass 2
++  "nds_n9_2r1w_load_multiple_1,nds_n9_2r1w_load_multiple_2, nds_n9_2r1w_load_multiple_3,\
++   nds_n9_2r1w_load_multiple_4,nds_n9_2r1w_load_multiple_5, nds_n9_2r1w_load_multiple_6,\
++   nds_n9_2r1w_load_multiple_7,nds_n9_2r1w_load_multiple_8, nds_n9_2r1w_load_multiple_12"
++  "nds_n9_2r1w_alu, nds_n9_2r1w_alu_shift,\
++   nds_n9_2r1w_pbsad, nds_n9_2r1w_pbsada,\
++   nds_n9_2r1w_mul_fast, nds_n9_2r1w_mul_slow,\
++   nds_n9_2r1w_mac_fast, nds_n9_2r1w_mac_slow,\
++   nds_n9_2r1w_branch,\
++   nds_n9_2r1w_div,\
++   nds_n9_2r1w_load,nds_n9_2r1w_store,\
++   nds_n9_2r1w_load_multiple_1,nds_n9_2r1w_load_multiple_2, nds_n9_2r1w_load_multiple_3,\
++   nds_n9_2r1w_load_multiple_4,nds_n9_2r1w_load_multiple_5, nds_n9_2r1w_load_multiple_6,\
++   nds_n9_2r1w_load_multiple_7,nds_n9_2r1w_load_multiple_8, nds_n9_2r1w_load_multiple_12,\
++   nds_n9_2r1w_store_multiple_1,nds_n9_2r1w_store_multiple_2, nds_n9_2r1w_store_multiple_3,\
++   nds_n9_2r1w_store_multiple_4,nds_n9_2r1w_store_multiple_5, nds_n9_2r1w_store_multiple_6,\
++   nds_n9_2r1w_store_multiple_7,nds_n9_2r1w_store_multiple_8, nds_n9_2r1w_store_multiple_12,\
++   nds_n9_2r1w_mmu"
++  "nds32_n9_last_load_to_ex_p"
++)
+diff --git a/gcc/config/nds32/nds32-n9-3r2w.md b/gcc/config/nds32/nds32-n9-3r2w.md
+new file mode 100644
+index 0000000..7849c72
+--- /dev/null
++++ b/gcc/config/nds32/nds32-n9-3r2w.md
+@@ -0,0 +1,357 @@
++;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler
++;; Copyright (C) 2012-2016 Free Software Foundation, Inc.
++;; Contributed by Andes Technology Corporation.
++;;
++;; This file is part of GCC.
++;;
++;; GCC is free software; you can redistribute it and/or modify it
++;; under the terms of the GNU General Public License as published
++;; by the Free Software Foundation; either version 3, or (at your
++;; option) any later version.
++;;
++;; GCC is distributed in the hope that it will be useful, but WITHOUT
++;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++;; License for more details.
++;;
++;; You should have received a copy of the GNU General Public License
++;; along with GCC; see the file COPYING3.  If not see
++;; <http://www.gnu.org/licenses/>.
++
++
++;; ------------------------------------------------------------------------
++;; Define N9 3R2W pipeline settings.
++;; ------------------------------------------------------------------------
++
++(define_automaton "nds32_n9_3r2w_machine")
++
++;; ------------------------------------------------------------------------
++;; Pipeline Stages
++;; ------------------------------------------------------------------------
++;; IF - Instruction Fetch
++;; II - Instruction Issue / Instruction Decode
++;; EX - Instruction Execution
++;; MM - Memory Execution
++;; WB - Instruction Retire / Result Write-Back
++
++(define_cpu_unit "n9_3r2w_ii" "nds32_n9_3r2w_machine")
++(define_cpu_unit "n9_3r2w_ex" "nds32_n9_3r2w_machine")
++(define_cpu_unit "n9_3r2w_mm" "nds32_n9_3r2w_machine")
++(define_cpu_unit "n9_3r2w_wb" "nds32_n9_3r2w_machine")
++
++(define_insn_reservation "nds_n9_3r2w_unknown" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
++       (and (eq_attr "type" "unknown")
++	    (eq_attr "pipeline_model" "n9")))
++  "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb")
++
++(define_insn_reservation "nds_n9_3r2w_misc" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
++       (and (eq_attr "type" "misc")
++	    (eq_attr "pipeline_model" "n9")))
++  "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb")
++
++(define_insn_reservation "nds_n9_3r2w_mmu" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
++       (and (eq_attr "type" "mmu")
++	    (eq_attr "pipeline_model" "n9")))
++  "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb")
++
++(define_insn_reservation "nds_n9_3r2w_alu" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
++       (and (eq_attr "type" "alu")
++	    (eq_attr "pipeline_model" "n9")))
++  "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb")
++
++(define_insn_reservation "nds_n9_3r2w_alu_shift" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
++       (and (eq_attr "type" "alu_shift")
++	    (eq_attr "pipeline_model" "n9")))
++  "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ex+n9_3r2w_mm, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb")
++
++(define_insn_reservation "nds_n9_3r2w_pbsad" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
++       (and (eq_attr "type" "pbsad")
++	    (eq_attr "pipeline_model" "n9")))
++  "n9_3r2w_ii, n9_3r2w_ex*3, n9_3r2w_mm, n9_3r2w_wb")
++
++(define_insn_reservation "nds_n9_3r2w_pbsada" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
++       (and (eq_attr "type" "pbsada")
++	    (eq_attr "pipeline_model" "n9")))
++  "n9_3r2w_ii, n9_3r2w_ex*3, n9_3r2w_mm, n9_3r2w_wb")
++
++(define_insn_reservation "nds_n9_3r2w_load" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
++       (and (match_test "nds32::load_single_p (insn)")
++	    (eq_attr "pipeline_model" "n9")))
++  "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb")
++
++(define_insn_reservation "nds_n9_3r2w_store" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
++       (and (match_test "nds32::store_single_p (insn)")
++	    (eq_attr "pipeline_model" "n9")))
++  "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb")
++
++(define_insn_reservation "nds_n9_3r2w_load_multiple_1" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
++       (and (eq_attr "pipeline_model" "n9")
++	    (and (eq_attr "type" "load_multiple")
++		 (eq_attr "combo" "1"))))
++  "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb")
++
++(define_insn_reservation "nds_n9_3r2w_load_multiple_2" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
++       (and (eq_attr "pipeline_model" "n9")
++	    (ior (and (eq_attr "type" "load_multiple")
++		      (eq_attr "combo" "2"))
++		 (match_test "nds32::load_double_p (insn)"))))
++  "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ex+n9_3r2w_mm, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb")
++
++(define_insn_reservation "nds_n9_3r2w_load_multiple_3" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
++       (and (eq_attr "pipeline_model" "n9")
++	    (and (eq_attr "type" "load_multiple")
++		 (eq_attr "combo" "3"))))
++  "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb")
++
++(define_insn_reservation "nds_n9_3r2w_load_multiple_4" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
++       (and (eq_attr "pipeline_model" "n9")
++	    (and (eq_attr "type" "load_multiple")
++		 (eq_attr "combo" "4"))))
++  "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb")
++
++(define_insn_reservation "nds_n9_3r2w_load_multiple_5" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
++       (and (eq_attr "pipeline_model" "n9")
++	    (and (eq_attr "type" "load_multiple")
++		 (eq_attr "combo" "5"))))
++  "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, (n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb)*2, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb")
++
++(define_insn_reservation "nds_n9_3r2w_load_multiple_6" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
++       (and (eq_attr "pipeline_model" "n9")
++	    (and (eq_attr "type" "load_multiple")
++		 (eq_attr "combo" "6"))))
++  "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, (n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb)*3, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb")
++
++(define_insn_reservation "nds_n9_3r2w_load_multiple_7" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
++       (and (eq_attr "pipeline_model" "n9")
++	    (and (eq_attr "type" "load_multiple")
++		 (eq_attr "combo" "7"))))
++  "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, (n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb)*4, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb")
++
++(define_insn_reservation "nds_n9_3r2w_load_multiple_8" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
++       (and (eq_attr "pipeline_model" "n9")
++	    (and (eq_attr "type" "load_multiple")
++		 (eq_attr "combo" "8"))))
++  "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, (n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb)*5, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb")
++
++(define_insn_reservation "nds_n9_3r2w_load_multiple_12" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
++       (and (eq_attr "pipeline_model" "n9")
++	    (and (eq_attr "type" "load_multiple")
++		 (eq_attr "combo" "12"))))
++  "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, (n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb)*9, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb")
++
++(define_insn_reservation "nds_n9_3r2w_store_multiple_1" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
++       (and (eq_attr "pipeline_model" "n9")
++	    (and (eq_attr "type" "store_multiple")
++		 (eq_attr "combo" "1"))))
++  "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb")
++
++(define_insn_reservation "nds_n9_3r2w_store_multiple_2" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
++       (and (eq_attr "pipeline_model" "n9")
++	    (ior (and (eq_attr "type" "store_multiple")
++		      (eq_attr "combo" "2"))
++		 (match_test "nds32::store_double_p (insn)"))))
++  "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ex+n9_3r2w_mm, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb")
++
++(define_insn_reservation "nds_n9_3r2w_store_multiple_3" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
++       (and (eq_attr "pipeline_model" "n9")
++	    (and (eq_attr "type" "store_multiple")
++		 (eq_attr "combo" "3"))))
++  "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb")
++
++(define_insn_reservation "nds_n9_3r2w_store_multiple_4" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
++       (and (eq_attr "pipeline_model" "n9")
++	    (and (eq_attr "type" "store_multiple")
++		 (eq_attr "combo" "4"))))
++  "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb")
++
++(define_insn_reservation "nds_n9_3r2w_store_multiple_5" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
++       (and (eq_attr "pipeline_model" "n9")
++	    (and (eq_attr "type" "store_multiple")
++		 (eq_attr "combo" "5"))))
++  "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, (n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb)*2, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb")
++
++(define_insn_reservation "nds_n9_3r2w_store_multiple_6" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
++       (and (eq_attr "pipeline_model" "n9")
++	    (and (eq_attr "type" "store_multiple")
++		 (eq_attr "combo" "6"))))
++  "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, (n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb)*3, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb")
++
++(define_insn_reservation "nds_n9_3r2w_store_multiple_7" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
++       (and (eq_attr "pipeline_model" "n9")
++	    (and (eq_attr "type" "store_multiple")
++		 (eq_attr "combo" "7"))))
++  "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, (n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb)*4, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb")
++
++(define_insn_reservation "nds_n9_3r2w_store_multiple_8" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
++       (and (eq_attr "pipeline_model" "n9")
++	    (and (eq_attr "type" "store_multiple")
++		 (eq_attr "combo" "8"))))
++  "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, (n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb)*5, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb")
++
++(define_insn_reservation "nds_n9_3r2w_store_multiple_12" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
++       (and (eq_attr "pipeline_model" "n9")
++	    (and (eq_attr "type" "store_multiple")
++		 (eq_attr "combo" "12"))))
++  "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, (n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb)*9, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb")
++
++(define_insn_reservation "nds_n9_3r2w_mul_fast1" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W && nds32_mul_config == MUL_TYPE_FAST_1")
++       (and (eq_attr "type" "mul")
++	    (eq_attr "pipeline_model" "n9")))
++  "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb")
++
++(define_insn_reservation "nds_n9_3r2w_mul_fast2" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W && nds32_mul_config == MUL_TYPE_FAST_2")
++       (and (eq_attr "type" "mul")
++	    (eq_attr "pipeline_model" "n9")))
++  "n9_3r2w_ii, n9_3r2w_ex*2, n9_3r2w_mm, n9_3r2w_wb")
++
++(define_insn_reservation "nds_n9_3r2w_mul_slow" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W && nds32_mul_config == MUL_TYPE_SLOW")
++       (and (eq_attr "type" "mul")
++	    (eq_attr "pipeline_model" "n9")))
++  "n9_3r2w_ii, n9_3r2w_ex*17, n9_3r2w_mm, n9_3r2w_wb")
++
++(define_insn_reservation "nds_n9_3r2w_mac_fast1" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W && nds32_mul_config == MUL_TYPE_FAST_1")
++       (and (eq_attr "type" "mac")
++	    (eq_attr "pipeline_model" "n9")))
++  "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb")
++
++(define_insn_reservation "nds_n9_3r2w_mac_fast2" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W && nds32_mul_config == MUL_TYPE_FAST_2")
++       (and (eq_attr "type" "mac")
++	    (eq_attr "pipeline_model" "n9")))
++  "n9_3r2w_ii, n9_3r2w_ex*2, n9_3r2w_mm, n9_3r2w_wb")
++
++(define_insn_reservation "nds_n9_3r2w_mac_slow" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W && nds32_mul_config == MUL_TYPE_SLOW")
++       (and (eq_attr "type" "mac")
++	    (eq_attr "pipeline_model" "n9")))
++  "n9_3r2w_ii, n9_3r2w_ex*17, n9_3r2w_ex+n9_3r2w_mm, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb")
++
++(define_insn_reservation "nds_n9_3r2w_div" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
++       (and (eq_attr "type" "div")
++	    (eq_attr "pipeline_model" "n9")))
++  "n9_3r2w_ii, n9_3r2w_ex*34, n9_3r2w_mm, n9_3r2w_wb")
++
++(define_insn_reservation "nds_n9_3r2w_branch" 1
++  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
++       (and (eq_attr "type" "branch")
++	    (eq_attr "pipeline_model" "n9")))
++  "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb")
++
++;; ------------------------------------------------------------------------
++;; Comment Notations and Bypass Rules
++;; ------------------------------------------------------------------------
++;; Producers (LHS)
++;;   LD
++;;     Load data from the memory and produce the loaded data. The result is
++;;     ready at MM.
++;;   LMW(N, M)
++;;     There are N micro-operations within an instruction that loads multiple
++;;     words. The result produced by the M-th micro-operation is sent to
++;;     consumers. The result is ready at MM.
++;;   MUL, MAC
++;;     Compute data in the multiply-adder and produce the data. The result
++;;     is ready at MM.
++;;   DIV
++;;     Compute data in the divider and produce the data. The result is ready
++;;     at MM.
++;;
++;; Consumers (RHS)
++;;   ALU, MOVD44, PBSAD, PBSADA_RaRb, MUL, MAC, DIV, MMU
++;;     Require operands at EX.
++;;   ALU_SHIFT_Rb
++;;     An ALU-SHIFT instruction consists of a shift micro-operation followed
++;;     by an arithmetic micro-operation. The operand Rb is used by the first
++;;     micro-operation, and there are some latencies if data dependency occurs.
++;;   MAC_RaRb
++;;     A MAC instruction does multiplication at EX and does accumulation at MM,
++;;     so the operand Rt is required at MM, and operands Ra and Rb are required
++;;     at EX.
++;;   ADDR_IN
++;;     If an instruction requires an address as its input operand, the address
++;;     is required at EX.
++;;   ST
++;;     A store instruction requires its data at MM.
++;;   SMW(N, M)
++;;     There are N micro-operations within an instruction that stores multiple
++;;     words. Each M-th micro-operation requires its data at MM.
++;;   BR
++;;     If a branch instruction is conditional, its input data is required at EX.
++
++;; LD, MUL, MAC, DIV
++;;   -> ALU, ALU_SHIFT_Rb, PBSAD, PBSADA_RaRb, MOVD44, MUL, MAC_RaRb, DIV, ADDR_IN, BR, MMU
++(define_bypass 2
++  "nds_n9_3r2w_load,\
++   nds_n9_3r2w_mul_fast1, nds_n9_3r2w_mul_fast2, nds_n9_3r2w_mul_slow,\
++   nds_n9_3r2w_mac_fast1, nds_n9_3r2w_mac_fast2, nds_n9_3r2w_mac_slow,\
++   nds_n9_3r2w_div"
++  "nds_n9_3r2w_alu, nds_n9_3r2w_alu_shift,\
++   nds_n9_3r2w_pbsad, nds_n9_3r2w_pbsada,\
++   nds_n9_3r2w_mul_fast1, nds_n9_3r2w_mul_fast2, nds_n9_3r2w_mul_slow,\
++   nds_n9_3r2w_mac_fast1, nds_n9_3r2w_mac_fast2, nds_n9_3r2w_mac_slow,\
++   nds_n9_3r2w_branch,\
++   nds_n9_3r2w_div,\
++   nds_n9_3r2w_load,nds_n9_3r2w_store,\
++   nds_n9_3r2w_load_multiple_1,nds_n9_3r2w_load_multiple_2, nds_n9_3r2w_load_multiple_3,\
++   nds_n9_3r2w_load_multiple_4,nds_n9_3r2w_load_multiple_5, nds_n9_3r2w_load_multiple_6,\
++   nds_n9_3r2w_load_multiple_7,nds_n9_3r2w_load_multiple_8, nds_n9_3r2w_load_multiple_12,\
++   nds_n9_3r2w_store_multiple_1,nds_n9_3r2w_store_multiple_2, nds_n9_3r2w_store_multiple_3,\
++   nds_n9_3r2w_store_multiple_4,nds_n9_3r2w_store_multiple_5, nds_n9_3r2w_store_multiple_6,\
++   nds_n9_3r2w_store_multiple_7,nds_n9_3r2w_store_multiple_8, nds_n9_3r2w_store_multiple_12,\
++   nds_n9_3r2w_mmu"
++  "nds32_n9_3r2w_mm_to_ex_p"
++)
++
++;; LMW(N, N)
++;;   -> ALU, ALU_SHIFT_Rb, PBSAD, PBSADA_RaRb, MOVD44, MUL, MAC_RaRb, DIV, ADDR_IN, BR, MMU
++(define_bypass 2
++  "nds_n9_3r2w_load_multiple_1,nds_n9_3r2w_load_multiple_2, nds_n9_3r2w_load_multiple_3,\
++   nds_n9_3r2w_load_multiple_4,nds_n9_3r2w_load_multiple_5, nds_n9_3r2w_load_multiple_6,\
++   nds_n9_3r2w_load_multiple_7,nds_n9_3r2w_load_multiple_8, nds_n9_3r2w_load_multiple_12"
++  "nds_n9_3r2w_alu, nds_n9_3r2w_alu_shift,\
++   nds_n9_3r2w_pbsad, nds_n9_3r2w_pbsada,\
++   nds_n9_3r2w_mul_fast1, nds_n9_3r2w_mul_fast2, nds_n9_3r2w_mul_slow,\
++   nds_n9_3r2w_mac_fast1, nds_n9_3r2w_mac_fast2, nds_n9_3r2w_mac_slow,\
++   nds_n9_3r2w_branch,\
++   nds_n9_3r2w_div,\
++   nds_n9_3r2w_load,nds_n9_3r2w_store,\
++   nds_n9_3r2w_load_multiple_1,nds_n9_3r2w_load_multiple_2, nds_n9_3r2w_load_multiple_3,\
++   nds_n9_3r2w_load_multiple_4,nds_n9_3r2w_load_multiple_5, nds_n9_3r2w_load_multiple_6,\
++   nds_n9_3r2w_load_multiple_7,nds_n9_3r2w_load_multiple_8, nds_n9_3r2w_load_multiple_12,\
++   nds_n9_3r2w_store_multiple_1,nds_n9_3r2w_store_multiple_2, nds_n9_3r2w_store_multiple_3,\
++   nds_n9_3r2w_store_multiple_4,nds_n9_3r2w_store_multiple_5, nds_n9_3r2w_store_multiple_6,\
++   nds_n9_3r2w_store_multiple_7,nds_n9_3r2w_store_multiple_8, nds_n9_3r2w_store_multiple_12,\
++   nds_n9_3r2w_mmu"
++  "nds32_n9_last_load_to_ex_p"
++)
+diff --git a/gcc/config/nds32/nds32-opts.h b/gcc/config/nds32/nds32-opts.h
+index 25c4081..e4017bb 100644
+--- a/gcc/config/nds32/nds32-opts.h
++++ b/gcc/config/nds32/nds32-opts.h
+@@ -22,14 +22,42 @@
+ #define NDS32_OPTS_H
+ 
+ #define NDS32_DEFAULT_CACHE_BLOCK_SIZE 16
+-#define NDS32_DEFAULT_ISR_VECTOR_SIZE (TARGET_ISA_V3 ? 4 : 16)
++#define NDS32_DEFAULT_ISR_VECTOR_SIZE TARGET_DEFAULT_ISR_VECTOR_SIZE
+ 
+ /* The various ANDES ISA.  */
+ enum nds32_arch_type
+ {
+   ARCH_V2,
++  ARCH_V2J,
+   ARCH_V3,
+-  ARCH_V3M
++  ARCH_V3J,
++  ARCH_V3M,
++  ARCH_V3M_PLUS,
++  ARCH_V3F,
++  ARCH_V3S
++};
++
++/* The various ANDES CPU.  */
++enum nds32_cpu_type
++{
++  CPU_N6,
++  CPU_N7,
++  CPU_N8,
++  CPU_E8,
++  CPU_N9,
++  CPU_N10,
++  CPU_GRAYWOLF,
++  CPU_N12,
++  CPU_N13,
++  CPU_PANTHER,
++  CPU_SIMPLE
++};
++
++/* The code model defines the address generation strategy.  */
++enum nds32_memory_model_type
++{
++  MEMORY_MODEL_SLOW,
++  MEMORY_MODEL_FAST
+ };
+ 
+ /* The code model defines the address generation strategy.  */
+@@ -40,4 +68,56 @@ enum nds32_cmodel_type
+   CMODEL_LARGE
+ };
+ 
++/* The code model defines the address generation strategy.  */
++enum nds32_ict_model_type
++{
++  ICT_MODEL_SMALL,
++  ICT_MODEL_LARGE
++};
++
++
++/* Multiply instruction configuration.  */
++enum nds32_mul_type
++{
++  MUL_TYPE_FAST_1,
++  MUL_TYPE_FAST_2,
++  MUL_TYPE_SLOW
++};
++
++/* Register ports configuration.  */
++enum nds32_register_ports
++{
++  REG_PORT_3R2W,
++  REG_PORT_2R1W
++};
++
++/* Which ABI to use.  */
++enum abi_type
++{
++  NDS32_ABI_V2,
++  NDS32_ABI_V2_FP_PLUS
++};
++
++/* The various FPU number of registers.  */
++enum float_reg_number
++{
++  NDS32_CONFIG_FPU_0,
++  NDS32_CONFIG_FPU_1,
++  NDS32_CONFIG_FPU_2,
++  NDS32_CONFIG_FPU_3,
++  NDS32_CONFIG_FPU_4,
++  NDS32_CONFIG_FPU_5,
++  NDS32_CONFIG_FPU_6,
++  NDS32_CONFIG_FPU_7
++};
++
++/* Do lmwsmw opt model.  */
++enum lmwsmw_cost_type
++{
++  LMWSMW_OPT_SIZE,
++  LMWSMW_OPT_SPEED,
++  LMWSMW_OPT_ALL,
++  LMWSMW_OPT_AUTO
++};
++
+ #endif
+diff --git a/gcc/config/nds32/nds32-panther.md b/gcc/config/nds32/nds32-panther.md
+new file mode 100644
+index 0000000..d45de1c
+--- /dev/null
++++ b/gcc/config/nds32/nds32-panther.md
+@@ -0,0 +1,446 @@
++;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler
++;; Copyright (C) 2012-2016 Free Software Foundation, Inc.
++;; Contributed by Andes Technology Corporation.
++;;
++;; This file is part of GCC.
++;;
++;; GCC is free software; you can redistribute it and/or modify it
++;; under the terms of the GNU General Public License as published
++;; by the Free Software Foundation; either version 3, or (at your
++;; option) any later version.
++;;
++;; GCC is distributed in the hope that it will be useful, but WITHOUT
++;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++;; License for more details.
++;;
++;; You should have received a copy of the GNU General Public License
++;; along with GCC; see the file COPYING3.  If not see
++;; <http://www.gnu.org/licenses/>.
++
++;; ------------------------------------------------------------------------
++;; Define Panther pipeline settings.
++;; ------------------------------------------------------------------------
++
++(define_automaton "nds32_pn_machine")
++
++(define_cpu_unit "pn_i3_0" "nds32_pn_machine")
++(define_cpu_unit "pn_i3_1" "nds32_pn_machine")
++(define_cpu_unit "pn_e1_p0" "nds32_pn_machine")
++(define_cpu_unit "pn_e2_p0" "nds32_pn_machine")
++(define_cpu_unit "pn_e3_p0" "nds32_pn_machine")
++(define_cpu_unit "pn_e4_p0" "nds32_pn_machine")
++(define_cpu_unit "pn_wb_p0" "nds32_pn_machine")
++(define_cpu_unit "pn_e1_p1" "nds32_pn_machine")
++(define_cpu_unit "pn_e2_p1" "nds32_pn_machine")
++(define_cpu_unit "pn_e3_p1" "nds32_pn_machine")
++(define_cpu_unit "pn_e4_p1" "nds32_pn_machine")
++(define_cpu_unit "pn_wb_p1" "nds32_pn_machine")
++(define_cpu_unit "pn_e1_p2" "nds32_pn_machine")
++(define_cpu_unit "pn_e2_p2" "nds32_pn_machine")
++(define_cpu_unit "pn_e3_p2" "nds32_pn_machine")
++(define_cpu_unit "pn_e4_p2" "nds32_pn_machine")
++(define_cpu_unit "pn_wb_p2" "nds32_pn_machine")
++
++(define_reservation "pn_i3" "pn_i3_0 | pn_i3_1")
++(define_reservation "pn_e1" "pn_e1_p0 | pn_e1_p1")
++(define_reservation "pn_e2" "pn_e2_p0 | pn_e2_p1")
++(define_reservation "pn_e3" "pn_e3_p0 | pn_e3_p1")
++(define_reservation "pn_e4" "pn_e4_p0 | pn_e4_p1")
++(define_reservation "pn_wb" "pn_wb_p0 | pn_wb_p1")
++
++(define_insn_reservation "nds_pn_unknown" 1
++  (and (eq_attr "type" "unknown")
++       (eq_attr "pipeline_model" "panther"))
++  "pn_i3, pn_e1, pn_e2, pn_e3, pn_e4, pn_wb")
++
++(define_insn_reservation "nds_pn_misc" 1
++  (and (eq_attr "type" "misc")
++       (eq_attr "pipeline_model" "panther"))
++  "pn_i3, pn_e1, pn_e2, pn_e3, pn_e4, pn_wb")
++
++(define_insn_reservation "nds_pn_mmu" 1
++  (and (eq_attr "type" "mmu")
++       (eq_attr "pipeline_model" "panther"))
++  "pn_i3, pn_e1, pn_e2, pn_e3, pn_e4, pn_wb")
++
++(define_insn_reservation "nds_pn_movd44" 1
++  (and (and (and (eq_attr "type" "alu")
++		 (eq_attr "subtype" "simple"))
++	    (match_test "nds32::movd44_insn_p (insn)"))
++       (eq_attr "pipeline_model" "panther"))
++  "pn_i3, pn_e1_p1, pn_e2_p1, pn_e3_p1, pn_e4_p1, pn_wb_p1")
++
++(define_insn_reservation "nds_pn_alu" 1
++  (and (and (and (eq_attr "type" "alu")
++		 (eq_attr "subtype" "simple"))
++	    (match_test "!nds32::movd44_insn_p (insn)"))
++       (eq_attr "pipeline_model" "panther"))
++  "pn_i3, pn_e1, pn_e2, pn_e3, pn_e4, pn_wb")
++
++(define_insn_reservation "nds_pn_shift" 1
++  (and (and (eq_attr "type" "alu")
++	    (eq_attr "subtype" "shift"))
++       (eq_attr "pipeline_model" "panther"))
++  "pn_i3, pn_e1, pn_e2, pn_e3, pn_e4, pn_wb")
++
++(define_insn_reservation "nds_pn_alu_shift" 1
++  (and (eq_attr "type" "alu_shift")
++       (eq_attr "pipeline_model" "panther"))
++  "pn_i3, pn_e1, pn_e2, pn_e3, pn_e4, pn_wb")
++
++(define_insn_reservation "nds_pn_pbsad" 1
++  (and (eq_attr "type" "pbsad")
++       (eq_attr "pipeline_model" "panther"))
++  "pn_i3, pn_e1, pn_e2, pn_e3*2, pn_e4, pn_wb")
++
++(define_insn_reservation "nds_pn_pbsada" 1
++  (and (eq_attr "type" "pbsada")
++       (eq_attr "pipeline_model" "panther"))
++  "pn_i3, pn_e1, pn_e2, pn_e3*3, pn_e4, pn_wb")
++
++(define_insn_reservation "nds_pn_load_full_word" 1
++  (and (match_test "nds32::load_full_word_p (insn)")
++       (eq_attr "pipeline_model" "panther"))
++  "pn_i3, pn_e1_p2, pn_e2_p2, pn_e3_p2, pn_e4_p2, pn_wb_p2")
++
++(define_insn_reservation "nds_pn_load_partial_word" 1
++  (and (match_test "nds32::load_partial_word_p (insn)")
++       (eq_attr "pipeline_model" "panther"))
++  "pn_i3, pn_e1_p2, pn_e2_p2, pn_e3_p2, pn_e4_p2, pn_wb_p2")
++
++(define_insn_reservation "nds_pn_store" 1
++  (and (match_test "nds32::store_single_p (insn)")
++       (eq_attr "pipeline_model" "panther"))
++  "pn_i3, pn_e1_p2, pn_e2_p2, pn_e3_p2, pn_e4_p2, pn_wb_p2")
++
++(define_insn_reservation "nds_pn_load_multiple_1" 1
++  (and (and (eq_attr "type" "load_multiple")
++            (eq_attr "combo" "1"))
++       (eq_attr "pipeline_model" "panther"))
++  "pn_i3, pn_e1_p2, pn_e2_p2, pn_e3_p2, pn_e4_p2, pn_wb_p2")
++
++(define_insn_reservation "nds_pn_load_multiple_2" 1
++  (and (ior (and (eq_attr "type" "load_multiple")
++		 (eq_attr "combo" "2"))
++	    (match_test "nds32::load_double_p (insn)"))
++       (eq_attr "pipeline_model" "panther"))
++  "pn_i3, pn_e1_p2*2, pn_e2_p2, pn_e3_p2, pn_e4_p2, pn_wb_p2")
++
++(define_insn_reservation "nds_pn_load_multiple_3" 1
++  (and (and (eq_attr "type" "load_multiple")
++            (eq_attr "combo" "3"))
++       (eq_attr "pipeline_model" "panther"))
++  "pn_i3, pn_e1_p2*3, pn_e2_p2, pn_e3_p2, pn_e4_p2, pn_wb_p2")
++
++(define_insn_reservation "nds_pn_load_multiple_4" 1
++  (and (and (eq_attr "type" "load_multiple")
++            (eq_attr "combo" "4"))
++       (eq_attr "pipeline_model" "panther"))
++  "pn_i3, pn_e1_p2*4, pn_e2_p2, pn_e3_p2, pn_e4_p2, pn_wb_p2")
++
++(define_insn_reservation "nds_pn_load_multiple_5" 1
++  (and (and (eq_attr "type" "load_multiple")
++            (eq_attr "combo" "5"))
++       (eq_attr "pipeline_model" "panther"))
++  "pn_i3, pn_e1_p2*5, pn_e2_p2, pn_e3_p2, pn_e4_p2, pn_wb_p2")
++
++(define_insn_reservation "nds_pn_load_multiple_6" 1
++  (and (and (eq_attr "type" "load_multiple")
++            (eq_attr "combo" "5"))
++       (eq_attr "pipeline_model" "panther"))
++  "pn_i3, pn_e1_p2*6, pn_e2_p2, pn_e3_p2, pn_e4_p2, pn_wb_p2")
++
++(define_insn_reservation "nds_pn_load_multiple_7" 1
++  (and (and (eq_attr "type" "load_multiple")
++            (eq_attr "combo" "7"))
++       (eq_attr "pipeline_model" "panther"))
++  "pn_i3, pn_e1_p2*7, pn_e2_p2, pn_e3_p2, pn_e4_p2, pn_wb_p2")
++
++(define_insn_reservation "nds_pn_load_multiple_8" 1
++  (and (and (eq_attr "type" "load_multiple")
++            (eq_attr "combo" "8"))
++       (eq_attr "pipeline_model" "panther"))
++  "pn_i3, pn_e1_p2*8, pn_e2_p2, pn_e3_p2, pn_e4_p2, pn_wb_p2")
++
++(define_insn_reservation "nds_pn_load_multiple_12" 1
++  (and (and (eq_attr "type" "load_multiple")
++            (eq_attr "combo" "12"))
++       (eq_attr "pipeline_model" "panther"))
++  "pn_i3, pn_e1_p2*12, pn_e2_p2, pn_e3_p2, pn_e4_p2, pn_wb_p2")
++
++(define_insn_reservation "nds_pn_store_multiple_1" 1
++  (and (and (eq_attr "type" "store_multiple")
++            (eq_attr "combo" "1"))
++       (eq_attr "pipeline_model" "panther"))
++  "pn_i3, pn_e1_p2, pn_e2_p2, pn_e3_p2, pn_e4_p2, pn_wb_p2")
++
++(define_insn_reservation "nds_pn_store_multiple_2" 1
++  (and (ior (and (eq_attr "type" "store_multiple")
++		 (eq_attr "combo" "2"))
++	    (match_test "nds32::store_double_p (insn)"))
++       (eq_attr "pipeline_model" "panther"))
++  "pn_i3, pn_e1_p2*2, pn_e2_p2, pn_e3_p2, pn_e4_p2, pn_wb_p2")
++
++(define_insn_reservation "nds_pn_store_multiple_3" 1
++  (and (and (eq_attr "type" "store_multiple")
++            (eq_attr "combo" "3"))
++       (eq_attr "pipeline_model" "panther"))
++  "pn_i3, pn_e1_p2*3, pn_e2_p2, pn_e3_p2, pn_e4_p2, pn_wb_p2")
++
++(define_insn_reservation "nds_pn_store_multiple_4" 1
++  (and (and (eq_attr "type" "store_multiple")
++            (eq_attr "combo" "4"))
++       (eq_attr "pipeline_model" "panther"))
++  "pn_i3, pn_e1_p2*4, pn_e2_p2, pn_e3_p2, pn_e4_p2, pn_wb_p2")
++
++(define_insn_reservation "nds_pn_store_multiple_5" 1
++  (and (and (eq_attr "type" "store_multiple")
++            (eq_attr "combo" "5"))
++       (eq_attr "pipeline_model" "panther"))
++  "pn_i3, pn_e1_p2*5, pn_e2_p2, pn_e3_p2, pn_e4_p2, pn_wb_p2")
++
++(define_insn_reservation "nds_pn_store_multiple_6" 1
++  (and (and (eq_attr "type" "store_multiple")
++            (eq_attr "combo" "5"))
++       (eq_attr "pipeline_model" "panther"))
++  "pn_i3, pn_e1_p2*6, pn_e2_p2, pn_e3_p2, pn_e4_p2, pn_wb_p2")
++
++(define_insn_reservation "nds_pn_store_multiple_7" 1
++  (and (and (eq_attr "type" "store_multiple")
++            (eq_attr "combo" "7"))
++       (eq_attr "pipeline_model" "panther"))
++  "pn_i3, pn_e1_p2*7, pn_e2_p2, pn_e3_p2, pn_e4_p2, pn_wb_p2")
++
++(define_insn_reservation "nds_pn_store_multiple_8" 1
++  (and (and (eq_attr "type" "store_multiple")
++            (eq_attr "combo" "8"))
++       (eq_attr "pipeline_model" "panther"))
++  "pn_i3, pn_e1_p2*8, pn_e2_p2, pn_e3_p2, pn_e4_p2, pn_wb_p2")
++
++(define_insn_reservation "nds_pn_store_multiple_12" 1
++  (and (and (eq_attr "type" "store_multiple")
++            (eq_attr "combo" "12"))
++       (eq_attr "pipeline_model" "panther"))
++  "pn_i3, pn_e1_p2*12, pn_e2_p2, pn_e3_p2, pn_e4_p2, pn_wb_p2")
++
++(define_insn_reservation "nds_pn_mul" 1
++  (and (eq_attr "type" "mul")
++       (eq_attr "pipeline_model" "panther"))
++  "pn_i3, pn_e1_p1, pn_e2_p1, pn_e3_p1, pn_e4_p1, pn_wb_p1")
++
++(define_insn_reservation "nds_pn_mac" 1
++  (and (eq_attr "type" "mac")
++       (eq_attr "pipeline_model" "panther"))
++  "pn_i3, pn_e1_p1, pn_e2_p1, pn_e3_p1, pn_e4_p1, pn_wb_p1")
++
++;; The cycles consumed in E4 stage is 32 - CLZ(abs(Ra)) + 2,
++;; so the worst case is 34.
++(define_insn_reservation "nds_pn_div" 1
++  (and (eq_attr "type" "div")
++       (eq_attr "pipeline_model" "panther"))
++  "pn_i3, pn_e1_p1, pn_e2_p1, pn_e3_p1, pn_e4_p1*34, pn_wb_p1")
++
++(define_insn_reservation "nds_pn_branch" 1
++  (and (eq_attr "type" "branch")
++       (eq_attr "pipeline_model" "panther"))
++  "pn_i3, pn_e1_p0, pn_e2_p0, pn_e3_p0, pn_e4_p0, pn_wb_p0")
++
++;; SHIFT -> ADDR_IN
++(define_bypass 2
++  "nds_pn_shift"
++  "nds_pn_load_full_word, nds_pn_load_partial_word, nds_pn_store,\
++   nds_pn_load_multiple_1, nds_pn_load_multiple_2, nds_pn_load_multiple_3,\
++   nds_pn_load_multiple_4, nds_pn_load_multiple_5, nds_pn_load_multiple_6,\
++   nds_pn_load_multiple_7, nds_pn_load_multiple_8, nds_pn_load_multiple_12,\
++   nds_pn_store_multiple_1, nds_pn_store_multiple_2, nds_pn_store_multiple_3,\
++   nds_pn_store_multiple_4, nds_pn_store_multiple_5, nds_pn_store_multiple_6,\
++   nds_pn_store_multiple_7, nds_pn_store_multiple_8, nds_pn_store_multiple_12"
++  "nds32_pn_e2_to_e1_p"
++)
++
++;; ALU, MOVD44 -> ADDR_IN
++(define_bypass 3
++  "nds_pn_alu, nds_pn_movd44"
++  "nds_pn_load_full_word, nds_pn_load_partial_word, nds_pn_store,\
++   nds_pn_load_multiple_1, nds_pn_load_multiple_2, nds_pn_load_multiple_3,\
++   nds_pn_load_multiple_4, nds_pn_load_multiple_5, nds_pn_load_multiple_6,\
++   nds_pn_load_multiple_7, nds_pn_load_multiple_8, nds_pn_load_multiple_12,\
++   nds_pn_store_multiple_1, nds_pn_store_multiple_2, nds_pn_store_multiple_3,\
++   nds_pn_store_multiple_4, nds_pn_store_multiple_5, nds_pn_store_multiple_6,\
++   nds_pn_store_multiple_7, nds_pn_store_multiple_8, nds_pn_store_multiple_12"
++  "nds32_pn_e3_to_e1_p"
++)
++
++;; ALU, MOVD44 -> SHIFT, MUL, MAC_RaRb
++(define_bypass 2
++  "nds_pn_alu, nds_pn_movd44"
++  "nds_pn_shift, nds_pn_mul, nds_pn_mac"
++  "nds32_pn_e3_to_e2_p"
++)
++
++;; MUL, MAC, DIV, LW, ADDR_OUT -> ADDR_IN
++(define_bypass 4
++  "nds_pn_mul, nds_pn_mac, nds_pn_div,\
++   nds_pn_load_full_word, nds_pn_load_partial_word, nds_pn_store,\
++   nds_pn_load_multiple_1, nds_pn_load_multiple_2, nds_pn_load_multiple_3,\
++   nds_pn_load_multiple_4, nds_pn_load_multiple_5, nds_pn_load_multiple_6,\
++   nds_pn_load_multiple_7, nds_pn_load_multiple_8, nds_pn_load_multiple_12,\
++   nds_pn_store_multiple_1, nds_pn_store_multiple_2, nds_pn_store_multiple_3,\
++   nds_pn_store_multiple_4, nds_pn_store_multiple_5, nds_pn_store_multiple_6,\
++   nds_pn_store_multiple_7, nds_pn_store_multiple_8, nds_pn_store_multiple_12"
++  "nds_pn_load_full_word, nds_pn_load_partial_word, nds_pn_store,\
++   nds_pn_load_multiple_1, nds_pn_load_multiple_2, nds_pn_load_multiple_3,\
++   nds_pn_load_multiple_4, nds_pn_load_multiple_5, nds_pn_load_multiple_6,\
++   nds_pn_load_multiple_7, nds_pn_load_multiple_8, nds_pn_load_multiple_12,\
++   nds_pn_store_multiple_1, nds_pn_store_multiple_2, nds_pn_store_multiple_3,\
++   nds_pn_store_multiple_4, nds_pn_store_multiple_5, nds_pn_store_multiple_6,\
++   nds_pn_store_multiple_7, nds_pn_store_multiple_8, nds_pn_store_multiple_12"
++  "nds32_pn_e4_to_e1_p"
++)
++
++;; MUL, MAC, DIV, LW, ADDR_OUT -> SHIFT, MUL, MAC_RaRb
++(define_bypass 3
++  "nds_pn_mul, nds_pn_mac, nds_pn_div,\
++   nds_pn_load_full_word, nds_pn_load_partial_word, nds_pn_store,\
++   nds_pn_load_multiple_1, nds_pn_load_multiple_2, nds_pn_load_multiple_3,\
++   nds_pn_load_multiple_4, nds_pn_load_multiple_5, nds_pn_load_multiple_6,\
++   nds_pn_load_multiple_7, nds_pn_load_multiple_8, nds_pn_load_multiple_12,\
++   nds_pn_store_multiple_1, nds_pn_store_multiple_2, nds_pn_store_multiple_3,\
++   nds_pn_store_multiple_4, nds_pn_store_multiple_5, nds_pn_store_multiple_6,\
++   nds_pn_store_multiple_7, nds_pn_store_multiple_8, nds_pn_store_multiple_12"
++  "nds_pn_shift, nds_pn_mul, nds_pn_mac"
++  "nds32_pn_e4_to_e2_p"
++)
++
++;; MUL, MAC, DIV, LW, ADDR_OUT -> ALU, MOVD44, BR_COND, ST, SMW(N, 1)
++(define_bypass 2
++  "nds_pn_mul, nds_pn_mac, nds_pn_div,\
++   nds_pn_load_full_word, nds_pn_load_partial_word, nds_pn_store,\
++   nds_pn_load_multiple_1, nds_pn_load_multiple_2, nds_pn_load_multiple_3,\
++   nds_pn_load_multiple_4, nds_pn_load_multiple_5, nds_pn_load_multiple_6,\
++   nds_pn_load_multiple_7, nds_pn_load_multiple_8, nds_pn_load_multiple_12,\
++   nds_pn_store_multiple_1, nds_pn_store_multiple_2, nds_pn_store_multiple_3,\
++   nds_pn_store_multiple_4, nds_pn_store_multiple_5, nds_pn_store_multiple_6,\
++   nds_pn_store_multiple_7, nds_pn_store_multiple_8, nds_pn_store_multiple_12"
++  "nds_pn_alu, nds_pn_movd44, nds_pn_branch,\
++   nds_pn_store,\
++   nds_pn_store_multiple_1, nds_pn_store_multiple_2, nds_pn_store_multiple_3,\
++   nds_pn_store_multiple_4, nds_pn_store_multiple_5, nds_pn_store_multiple_6,\
++   nds_pn_store_multiple_7, nds_pn_store_multiple_8, nds_pn_store_multiple_12"
++  "nds32_pn_e4_to_e3_p"
++)
++
++;; LH, LB -> ADDR_IN
++(define_bypass 5
++  "nds_pn_load_partial_word"
++  "nds_pn_load_full_word, nds_pn_load_partial_word, nds_pn_store,\
++   nds_pn_load_multiple_1, nds_pn_load_multiple_2, nds_pn_load_multiple_3,\
++   nds_pn_load_multiple_4, nds_pn_load_multiple_5, nds_pn_load_multiple_6,\
++   nds_pn_load_multiple_7, nds_pn_load_multiple_8, nds_pn_load_multiple_12,\
++   nds_pn_store_multiple_1, nds_pn_store_multiple_2, nds_pn_store_multiple_3,\
++   nds_pn_store_multiple_4, nds_pn_store_multiple_5, nds_pn_store_multiple_6,\
++   nds_pn_store_multiple_7, nds_pn_store_multiple_8, nds_pn_store_multiple_12"
++  "nds32_pn_wb_to_e1_p"
++)
++
++;; LH, LB -> SHIFT, MUL, MAC_RaRb
++(define_bypass 4
++  "nds_pn_load_partial_word"
++  "nds_pn_shift, nds_pn_mul, nds_pn_mac"
++  "nds32_pn_wb_to_e2_p"
++)
++
++;; LH, LB -> ALU, MOVD44, BR_COND, ST, SMW(N, 1)
++(define_bypass 3
++  "nds_pn_load_partial_word"
++  "nds_pn_alu, nds_pn_movd44, nds_pn_branch,\
++   nds_pn_store,\
++   nds_pn_store_multiple_1, nds_pn_store_multiple_2, nds_pn_store_multiple_3,\
++   nds_pn_store_multiple_4, nds_pn_store_multiple_5, nds_pn_store_multiple_6,\
++   nds_pn_store_multiple_7, nds_pn_store_multiple_8, nds_pn_store_multiple_12"
++  "nds32_pn_wb_to_e3_p"
++)
++
++;; LH, LB -> DIV
++(define_bypass 2
++  "nds_pn_load_partial_word"
++  "nds_pn_div"
++  "nds32_pn_wb_to_e4_p"
++)
++
++;; LMW(N, N) -> ADDR_IN
++(define_bypass 4
++  "nds_pn_load_multiple_1, nds_pn_load_multiple_2, nds_pn_load_multiple_3,\
++   nds_pn_load_multiple_4, nds_pn_load_multiple_5, nds_pn_load_multiple_6,\
++   nds_pn_load_multiple_7, nds_pn_load_multiple_8, nds_pn_load_multiple_12"
++  "nds_pn_load_full_word, nds_pn_load_partial_word, nds_pn_store,\
++   nds_pn_load_multiple_1, nds_pn_load_multiple_2, nds_pn_load_multiple_3,\
++   nds_pn_load_multiple_4, nds_pn_load_multiple_5, nds_pn_load_multiple_6,\
++   nds_pn_load_multiple_7, nds_pn_load_multiple_8, nds_pn_load_multiple_12,\
++   nds_pn_store_multiple_1, nds_pn_store_multiple_2, nds_pn_store_multiple_3,\
++   nds_pn_store_multiple_4, nds_pn_store_multiple_5, nds_pn_store_multiple_6,\
++   nds_pn_store_multiple_7, nds_pn_store_multiple_8, nds_pn_store_multiple_12"
++  "nds32_pn_last_load_to_e1_p"
++)
++
++;; LMW(N, N) -> SHIFT, MUL, MAC_RaRb
++(define_bypass 3
++  "nds_pn_load_multiple_1, nds_pn_load_multiple_2, nds_pn_load_multiple_3,\
++   nds_pn_load_multiple_4, nds_pn_load_multiple_5, nds_pn_load_multiple_6,\
++   nds_pn_load_multiple_7, nds_pn_load_multiple_8, nds_pn_load_multiple_12"
++  "nds_pn_shift, nds_pn_mul, nds_pn_mac"
++  "nds32_pn_last_load_to_e2_p"
++)
++
++;; LMW(N, N - 1) -> ADDR_IN
++(define_bypass 3
++  "nds_pn_load_multiple_1, nds_pn_load_multiple_2, nds_pn_load_multiple_3,\
++   nds_pn_load_multiple_4, nds_pn_load_multiple_5, nds_pn_load_multiple_6,\
++   nds_pn_load_multiple_7, nds_pn_load_multiple_8, nds_pn_load_multiple_12"
++  "nds_pn_load_full_word, nds_pn_load_partial_word, nds_pn_store,\
++   nds_pn_load_multiple_1, nds_pn_load_multiple_2, nds_pn_load_multiple_3,\
++   nds_pn_load_multiple_4, nds_pn_load_multiple_5, nds_pn_load_multiple_6,\
++   nds_pn_load_multiple_7, nds_pn_load_multiple_8, nds_pn_load_multiple_12,\
++   nds_pn_store_multiple_1, nds_pn_store_multiple_2, nds_pn_store_multiple_3,\
++   nds_pn_store_multiple_4, nds_pn_store_multiple_5, nds_pn_store_multiple_6,\
++   nds_pn_store_multiple_7, nds_pn_store_multiple_8, nds_pn_store_multiple_12"
++  "nds32_pn_last_two_load_to_e1_p"
++)
++
++;; LMW(N, N - 2) -> ADDR_IN
++(define_bypass 2
++  "nds_pn_load_multiple_1, nds_pn_load_multiple_2, nds_pn_load_multiple_3,\
++   nds_pn_load_multiple_4, nds_pn_load_multiple_5, nds_pn_load_multiple_6,\
++   nds_pn_load_multiple_7, nds_pn_load_multiple_8, nds_pn_load_multiple_12"
++  "nds_pn_load_full_word, nds_pn_load_partial_word, nds_pn_store,\
++   nds_pn_load_multiple_1, nds_pn_load_multiple_2, nds_pn_load_multiple_3,\
++   nds_pn_load_multiple_4, nds_pn_load_multiple_5, nds_pn_load_multiple_6,\
++   nds_pn_load_multiple_7, nds_pn_load_multiple_8, nds_pn_load_multiple_12,\
++   nds_pn_store_multiple_1, nds_pn_store_multiple_2, nds_pn_store_multiple_3,\
++   nds_pn_store_multiple_4, nds_pn_store_multiple_5, nds_pn_store_multiple_6,\
++   nds_pn_store_multiple_7, nds_pn_store_multiple_8, nds_pn_store_multiple_12"
++  "nds32_pn_last_three_load_to_e1_p"
++)
++
++;; LMW(N, N - 1) -> SHIFT, MUL, MAC_RaRb
++(define_bypass 2
++  "nds_pn_load_multiple_1, nds_pn_load_multiple_2, nds_pn_load_multiple_3,\
++   nds_pn_load_multiple_4, nds_pn_load_multiple_5, nds_pn_load_multiple_6,\
++   nds_pn_load_multiple_7, nds_pn_load_multiple_8, nds_pn_load_multiple_12"
++  "nds_pn_shift, nds_pn_mul, nds_pn_mac"
++  "nds32_pn_last_two_load_to_e2_p"
++)
++
++;; LMW(N, N) -> ALU, MOVD44, BR_COND
++(define_bypass 2
++  "nds_pn_load_multiple_1, nds_pn_load_multiple_2, nds_pn_load_multiple_3,\
++   nds_pn_load_multiple_4, nds_pn_load_multiple_5, nds_pn_load_multiple_6,\
++   nds_pn_load_multiple_7, nds_pn_load_multiple_8, nds_pn_load_multiple_12"
++  "nds_pn_alu, nds_pn_movd44, nds_pn_branch,\
++   nds_pn_store,\
++   nds_pn_store_multiple_1, nds_pn_store_multiple_2, nds_pn_store_multiple_3,\
++   nds_pn_store_multiple_4, nds_pn_store_multiple_5, nds_pn_store_multiple_6,\
++   nds_pn_store_multiple_7, nds_pn_store_multiple_8, nds_pn_store_multiple_12"
++  "nds32_pn_last_load_to_e3_p"
++)
+diff --git a/gcc/config/nds32/nds32-peephole2.md b/gcc/config/nds32/nds32-peephole2.md
+index 07e3a2b..bb47385 100644
+--- a/gcc/config/nds32/nds32-peephole2.md
++++ b/gcc/config/nds32/nds32-peephole2.md
+@@ -19,6 +19,197 @@
+ ;; <http://www.gnu.org/licenses/>.
+ 
+ 
+-;; Use define_peephole2 to handle possible target-specific optimization.
++;; Use define_split, define_peephole, and define_peephole2 to
++;; handle possible target-specific optimization in this file.
+ 
+ ;; ------------------------------------------------------------------------
++;; Try to utilize 16-bit instruction by swap operand if possible.
++;; ------------------------------------------------------------------------
++
++;; Try to make add as add45.
++(define_peephole2
++  [(set (match_operand:QIHISI 0 "register_operand"              "")
++	(plus:QIHISI (match_operand:QIHISI 1 "register_operand" "")
++		     (match_operand:QIHISI 2 "register_operand" "")))]
++  "reload_completed
++   && TARGET_16_BIT
++   && REGNO (operands[0]) == REGNO (operands[2])
++   && REGNO (operands[0]) != REGNO (operands[1])
++   && TEST_HARD_REG_BIT (reg_class_contents[MIDDLE_REGS], REGNO (operands[0]))"
++  [(set (match_dup 0) (plus:QIHISI (match_dup 2) (match_dup 1)))])
++
++;; Try to make xor/ior/and/mult as xor33/ior33/and33/mult33.
++(define_peephole2
++  [(set (match_operand:SI 0 "register_operand"    "")
++	(match_operator:SI 1 "nds32_have_33_inst_operator"
++	  [(match_operand:SI 2 "register_operand" "")
++	   (match_operand:SI 3 "register_operand" "")]))]
++  "reload_completed
++   && TARGET_16_BIT
++   && REGNO (operands[0]) == REGNO (operands[3])
++   && REGNO (operands[0]) != REGNO (operands[2])
++   && TEST_HARD_REG_BIT (reg_class_contents[LOW_REGS], REGNO (operands[0]))
++   && TEST_HARD_REG_BIT (reg_class_contents[LOW_REGS], REGNO (operands[2]))"
++  [(set (match_dup 0) (match_op_dup 1 [(match_dup 3) (match_dup 2)]))])
++
++(define_peephole
++  [(set (match_operand:SI 0 "register_operand" "")
++	(match_operand:SI 1 "register_operand" ""))
++   (set (match_operand:SI 2 "register_operand" "")
++	(match_operand:SI 3 "register_operand" ""))]
++  "TARGET_16_BIT
++   && !TARGET_ISA_V2
++   && NDS32_IS_GPR_REGNUM (REGNO (operands[0]))
++   && NDS32_IS_GPR_REGNUM (REGNO (operands[1]))
++   && ((REGNO (operands[0]) & 0x1) == 0)
++   && ((REGNO (operands[1]) & 0x1) == 0)
++   && (REGNO (operands[0]) + 1) == REGNO (operands[2])
++   && (REGNO (operands[1]) + 1) == REGNO (operands[3])"
++  "movd44\t%0, %1"
++  [(set_attr "type"   "alu")
++   (set_attr "length" "2")])
++
++;; Merge two fcpyss to fcpysd.
++(define_peephole2
++  [(set (match_operand:SF 0 "float_even_register_operand" "")
++	(match_operand:SF 1 "float_even_register_operand" ""))
++   (set (match_operand:SF 2 "float_odd_register_operand"  "")
++	(match_operand:SF 3 "float_odd_register_operand"  ""))]
++  "(TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE)
++   && REGNO (operands[0]) == REGNO (operands[2]) - 1
++   && REGNO (operands[1]) == REGNO (operands[3]) - 1"
++  [(set (match_dup 4) (match_dup 5))]
++  {
++    operands[4] = gen_rtx_REG (DFmode, REGNO (operands[0]));
++    operands[5] = gen_rtx_REG (DFmode, REGNO (operands[1]));
++  })
++
++(define_peephole2
++  [(set (match_operand:SF 0 "float_odd_register_operand"  "")
++	(match_operand:SF 1 "float_odd_register_operand"  ""))
++   (set (match_operand:SF 2 "float_even_register_operand" "")
++	(match_operand:SF 3 "float_even_register_operand" ""))]
++  "(TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE)
++   && REGNO (operands[2]) == REGNO (operands[0]) - 1
++   && REGNO (operands[3]) == REGNO (operands[1]) - 1"
++  [(set (match_dup 4) (match_dup 5))]
++  {
++    operands[4] = gen_rtx_REG (DFmode, REGNO (operands[2]));
++    operands[5] = gen_rtx_REG (DFmode, REGNO (operands[3]));
++  })
++
++;; Merge two flsi to fldi.
++(define_peephole2
++  [(set (match_operand:SF 0 "float_even_register_operand" "")
++	(match_operand:SF 1 "memory_operand" ""))
++   (set (match_operand:SF 2 "float_odd_register_operand" "")
++	(match_operand:SF 3 "memory_operand" ""))]
++  "REGNO (operands[0]) == REGNO (operands[2]) - 1
++   && nds32_memory_merge_peep_p (operands[3], operands[1])"
++  [(set (match_dup 0) (match_dup 1))]
++{
++    operands[1] = widen_memory_access (operands[3], DFmode, 0);
++    operands[0] = gen_rtx_REG (DFmode, REGNO (operands[0]));
++})
++
++(define_peephole2
++  [(set (match_operand:SF 0 "float_odd_register_operand" "")
++	(match_operand:SF 1 "memory_operand" ""))
++   (set (match_operand:SF 2 "float_even_register_operand" "")
++	(match_operand:SF 3 "memory_operand" ""))]
++  "REGNO (operands[2]) == REGNO (operands[0]) - 1
++   && nds32_memory_merge_peep_p (operands[1], operands[3])"
++  [(set (match_dup 0) (match_dup 1))]
++{
++    operands[1] = widen_memory_access (operands[1], DFmode, 0);
++    operands[0] = gen_rtx_REG (DFmode, REGNO (operands[2]));
++})
++
++;; Merge two fssi to fsdi.
++(define_peephole2
++  [(set (match_operand:SF 0 "memory_operand" "")
++	(match_operand:SF 1 "float_even_register_operand" ""))
++   (set (match_operand:SF 2 "memory_operand" "")
++	(match_operand:SF 3 "float_odd_register_operand" ""))]
++  "REGNO (operands[1]) == REGNO (operands[3]) - 1
++   && nds32_memory_merge_peep_p (operands[2], operands[0])"
++  [(set (match_dup 0) (match_dup 1))]
++{
++  operands[0] = widen_memory_access (operands[2], DFmode, 0);
++  operands[1] = gen_rtx_REG (DFmode, REGNO (operands[1]));
++})
++
++(define_peephole2
++  [(set (match_operand:SF 0 "memory_operand" "")
++	(match_operand:SF 1 "float_odd_register_operand" ""))
++   (set (match_operand:SF 2 "memory_operand" "")
++	(match_operand:SF 3 "float_even_register_operand" ""))]
++  "REGNO (operands[3]) == REGNO (operands[1]) - 1
++   && nds32_memory_merge_peep_p (operands[0], operands[2])"
++  [(set (match_dup 0) (match_dup 1))]
++{
++  operands[0] = widen_memory_access (operands[0], DFmode, 0);
++  operands[1] = gen_rtx_REG (DFmode, REGNO (operands[3]));
++})
++
++;; ------------------------------------------------------------------------
++;; GCC will prefer [u]divmodsi3 rather than [u]divsi3 even remainder is
++;; unused, so we use split to drop mod operation for lower register pressure.
++
++(define_split
++  [(set (match_operand:SI 0 "register_operand")
++	(div:SI (match_operand:SI 1 "register_operand")
++		(match_operand:SI 2 "register_operand")))
++   (set (match_operand:SI 3 "register_operand")
++	(mod:SI (match_dup 1) (match_dup 2)))]
++  "find_regno_note (insn, REG_UNUSED, REGNO (operands[3])) != NULL
++   && can_create_pseudo_p ()"
++  [(set (match_dup 0)
++	(div:SI (match_dup 1)
++		(match_dup 2)))])
++
++(define_split
++  [(set (match_operand:SI 0 "register_operand")
++	(udiv:SI (match_operand:SI 1 "register_operand")
++		 (match_operand:SI 2 "register_operand")))
++   (set (match_operand:SI 3 "register_operand")
++	(umod:SI (match_dup 1) (match_dup 2)))]
++  "find_regno_note (insn, REG_UNUSED, REGNO (operands[3])) != NULL
++   && can_create_pseudo_p ()"
++  [(set (match_dup 0)
++	(udiv:SI (match_dup 1)
++		 (match_dup 2)))])
++
++(define_peephole2
++  [(set (match_operand:DI 0 "register_operand")
++	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand"))
++		 (sign_extend:DI (match_operand:SI 2 "register_operand"))))]
++  "NDS32_EXT_DSP_P ()
++   && peep2_regno_dead_p (1, WORDS_BIG_ENDIAN ? REGNO (operands[0]) + 1 : REGNO (operands[0]))"
++  [(const_int 1)]
++{
++  rtx highpart = nds32_di_high_part_subreg (operands[0]);
++  emit_insn (gen_smulsi3_highpart (highpart, operands[1], operands[2]));
++  DONE;
++})
++
++(define_split
++  [(set (match_operand:DI 0 "nds32_general_register_operand" "")
++	(match_operand:DI 1 "nds32_general_register_operand" ""))]
++  "find_regno_note (insn, REG_UNUSED, REGNO (operands[0])) != NULL
++   || find_regno_note (insn, REG_UNUSED, REGNO (operands[0]) + 1) != NULL"
++  [(set (match_dup 0) (match_dup 1))]
++{
++  rtx dead_note = find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
++  HOST_WIDE_INT offset;
++  if (dead_note == NULL_RTX)
++    offset = 0;
++  else
++    offset = 4;
++  operands[0] = simplify_gen_subreg (
++		  SImode, operands[0],
++		  DImode, offset);
++  operands[1] = simplify_gen_subreg (
++		  SImode, operands[1],
++		  DImode, offset);
++})
+diff --git a/gcc/config/nds32/nds32-pipelines-auxiliary.c b/gcc/config/nds32/nds32-pipelines-auxiliary.c
+index a396fff..903a2ed 100644
+--- a/gcc/config/nds32/nds32-pipelines-auxiliary.c
++++ b/gcc/config/nds32/nds32-pipelines-auxiliary.c
+@@ -21,14 +21,2638 @@
+ 
+ /* ------------------------------------------------------------------------ */
+ 
++#include <set>
+ #include "config.h"
+ #include "system.h"
+ #include "coretypes.h"
+ #include "backend.h"
++#include "tree.h"
++#include "rtl.h"
++#include "df.h"
++#include "alias.h"
++#include "stor-layout.h"
++#include "varasm.h"
++#include "calls.h"
++#include "regs.h"
++#include "insn-config.h"	/* Required by recog.h.  */
++#include "conditions.h"
++#include "output.h"
++#include "insn-attr.h"		/* For DFA state_t.  */
++#include "insn-codes.h"		/* For CODE_FOR_xxx.  */
++#include "reload.h"		/* For push_reload().  */
++#include "flags.h"
++#include "insn-config.h"
++#include "expmed.h"
++#include "dojump.h"
++#include "explow.h"
++#include "emit-rtl.h"
++#include "stmt.h"
++#include "expr.h"
++#include "recog.h"
++#include "diagnostic-core.h"
++#include "cfgrtl.h"
++#include "cfganal.h"
++#include "lcm.h"
++#include "cfgbuild.h"
++#include "cfgcleanup.h"
++#include "tm_p.h"
++#include "tm-constrs.h"
++#include "optabs.h"		/* For GEN_FCN.  */
++#include "target.h"
++#include "langhooks.h"		/* For add_builtin_function().  */
++#include "builtins.h"
++#include "tree-pass.h"
+ 
+ /* ------------------------------------------------------------------------ */
+ 
+-/* This file is prepared for future implementation of precise
+-   pipeline description for nds32 target.  */
++namespace nds32 {
++namespace scheduling {
++
++/* Classify the memory access direction.  It's unknown if the offset register
++   is not a constant value.  */
++enum memory_access_direction
++{
++  MEM_ACCESS_DIR_POS,
++  MEM_ACCESS_DIR_NEG,
++  MEM_ACCESS_DIR_UNKNOWN
++};
++
++/* This class provides some wrappers of the DFA scheduler.  Due to the design
++   drawback of the DFA scheduler, creating two instances at the same time is
++   now allowed.  Use the loosest relationship such as 'dependency' instead of
++   'aggregation' or 'composition' can minimize this issue.  */
++class pipeline_simulator
++{
++public:
++  pipeline_simulator ();
++  ~pipeline_simulator ();
++
++  void advance_cycle (int cycles = 1);
++  int query_latency (rtx_insn *producer, rtx_insn *consumer) const;
++  int issue_insn (rtx_insn *insn);
++  int force_issue_insn (rtx_insn *insn);
++
++private:
++  static int gcc_dfa_initialized_;
++  state_t state_;
++};
++
++/* Insert pseudo NOPs so that we can see stall cycles caused by structural or
++   data hazards in the assembly code.  The design of this class is similar to
++   the 'template method' pattern, but we don't need to maintain multiple
++   customized algorithms at the same time.  Hence this class has no virtual
++   functions providing further customizations.  */
++class stall_inserter
++{
++private:
++  enum dep_type { RES_DEP, DATA_DEP };
++
++public:
++  void insert_stalls ();
++
++private:
++  static rtx emit_pseudo_nop_before (rtx_insn *insn, int cycles, enum dep_type type);
++
++  void insert_structural_hazard_stalls ();
++  void insert_data_hazard_stalls ();
++  void emit_pseudo_nops_for_data_hazards (rtx_insn *insn,
++					  pipeline_simulator &simulator);
++};
++
++class pass_nds32_print_stalls : public rtl_opt_pass
++{
++public:
++  pass_nds32_print_stalls (gcc::context *ctxt);
++
++  bool gate (function *);
++  unsigned int execute (function *);
++};
++
++int pipeline_simulator::gcc_dfa_initialized_ = 0;
++
++const pass_data pass_data_nds32_print_stalls =
++{
++  RTL_PASS,				/* type */
++  "print_stalls",			/* name */
++  OPTGROUP_NONE,			/* optinfo_flags */
++  TV_MACH_DEP,				/* tv_id */
++  0,					/* properties_required */
++  0,					/* properties_provided */
++  0,					/* properties_destroyed */
++  0,					/* todo_flags_start */
++  0					/* todo_flags_finish */
++};
++
++rtl_opt_pass *
++make_pass_nds32_print_stalls (gcc::context *ctxt)
++{
++  return new pass_nds32_print_stalls (ctxt);
++}
++
++/* A safe wrapper to the function reg_overlap_mentioned_p ().  */
++bool
++reg_overlap_p (rtx x, rtx in)
++{
++  if (x == NULL_RTX || in == NULL_RTX)
++    return false;
++
++  return static_cast <bool> (reg_overlap_mentioned_p (x, in));
++}
++
++/* Calculate the cycle distance between two insns in pipeline view.
++   Hence each insn can be treated as one cycle.
++   TODO: multi-cycle insns should be handled
++	 specially, but we haven't done it here.  */
++int
++cycle_distance (rtx_insn *from, rtx_insn *to)
++{
++  int count = 1;
++
++  for (from = NEXT_INSN (from); from && from != to; from = NEXT_INSN (from))
++    {
++      if (!insn_executable_p (from))
++	continue;
++
++      if (insn_pseudo_nop_p (from))
++	count += INTVAL (XVECEXP (PATTERN (from), 0, 0));
++      else
++	++count;
++    }
++
++  return count;
++}
++
++/* Determine the memory access direction of a load/store insn.  */
++memory_access_direction
++determine_access_direction (rtx_insn *insn)
++{
++  int post_update_rtx_index;
++  rtx plus_rtx;
++  rtx mem_rtx;
++  rtx offset_rtx;
++
++  switch (get_attr_type (insn))
++  {
++  case TYPE_LOAD_MULTIPLE:
++    gcc_assert (parallel_elements (insn) >= 2);
++
++    post_update_rtx_index = find_post_update_rtx (insn);
++    if (post_update_rtx_index != -1)
++      plus_rtx = SET_SRC (parallel_element (insn, post_update_rtx_index));
++    else
++      {
++	/* (parallel
++	     [(set (reg) (mem (reg)))              : index 0
++	      (set (reg) (mem (plus (reg) (...)))) : index 1
++	      ...])  */
++	mem_rtx = SET_SRC (parallel_element (insn, 1));
++	if (GET_CODE (mem_rtx) == UNSPEC)
++	  mem_rtx = XVECEXP (mem_rtx, 0, 0);
++	gcc_assert (MEM_P (mem_rtx));
++	plus_rtx = XEXP (mem_rtx, 0);
++      }
++    break;
++
++  case TYPE_STORE_MULTIPLE:
++    gcc_assert (parallel_elements (insn) >= 2);
++
++    post_update_rtx_index = find_post_update_rtx (insn);
++    if (post_update_rtx_index != -1)
++      plus_rtx = SET_SRC (parallel_element (insn, post_update_rtx_index));
++    else
++      {
++	/* (parallel
++	     [(set (mem (reg))              (reg)) : index 0
++	      (set (mem (plus (reg) (...))) (reg)) : index 1
++	      ...])  */
++	mem_rtx = SET_DEST (parallel_element (insn, 1));
++	if (GET_CODE (mem_rtx) == UNSPEC)
++	  mem_rtx = XVECEXP (mem_rtx, 0, 0);
++	gcc_assert (MEM_P (mem_rtx));
++	plus_rtx = XEXP (mem_rtx, 0);
++      }
++    break;
++
++  case TYPE_LOAD:
++  case TYPE_STORE:
++    mem_rtx = extract_mem_rtx (insn);
++
++    switch (GET_CODE (XEXP (mem_rtx, 0)))
++      {
++      case POST_INC:
++	/* (mem (post_inc (...)))  */
++	return MEM_ACCESS_DIR_POS;
++
++      case POST_DEC:
++	/* (mem (post_dec (...)))  */
++	return MEM_ACCESS_DIR_NEG;
++
++      case PLUS:
++	/* (mem (plus (reg) (...)))  */
++	plus_rtx = XEXP (mem_rtx, 0);
++	break;
++
++      case POST_MODIFY:
++	/* (mem (post_modify (reg) (plus (reg) (...))))  */
++	plus_rtx = XEXP (XEXP (mem_rtx, 0), 1);
++	break;
++
++      default:
++	gcc_unreachable ();
++      }
++    break;
++
++  default:
++    gcc_unreachable ();
++  }
++
++  gcc_assert (GET_CODE (plus_rtx) == PLUS);
++
++  offset_rtx = XEXP (plus_rtx, 1);
++  if (GET_CODE (offset_rtx) == CONST_INT)
++    {
++      if (INTVAL (offset_rtx) < 0)
++	return MEM_ACCESS_DIR_NEG;
++      else
++	return MEM_ACCESS_DIR_POS;
++    }
++
++  return MEM_ACCESS_DIR_UNKNOWN;
++}
++
++/* Return the nth load/store operation in the real micro-operation
++   accessing order.  */
++rtx
++extract_nth_access_rtx (rtx_insn *insn, int n)
++{
++  int n_elems = parallel_elements (insn);
++  int post_update_rtx_index = find_post_update_rtx (insn);
++  memory_access_direction direction = determine_access_direction (insn);
++
++  gcc_assert (direction != MEM_ACCESS_DIR_UNKNOWN);
++
++  /* Reverse the order if the direction negative.  */
++  if (direction == MEM_ACCESS_DIR_NEG)
++    n = -1 * n - 1;
++
++  if (post_update_rtx_index != -1)
++    {
++      if (n >= 0 && post_update_rtx_index <= n)
++	++n;
++      else if (n < 0 && post_update_rtx_index >= n + n_elems)
++	--n;
++    }
++
++  return parallel_element (insn, n);
++}
++
++/* Returns the register operated by the nth load/store operation in the real
++   micro-operation accessing order.  This function assumes INSN must be a
++   multiple-word load/store insn.  */
++rtx
++extract_nth_lmsw_access_reg (rtx_insn *insn, int n)
++{
++  rtx nth_rtx = extract_nth_access_rtx (insn, n);
++
++  if (nth_rtx == NULL_RTX)
++    return NULL_RTX;
++
++  switch (get_attr_type (insn))
++    {
++    case TYPE_LOAD_MULTIPLE:
++      return SET_DEST (nth_rtx);
++
++    case TYPE_STORE_MULTIPLE:
++      return SET_SRC (nth_rtx);
++
++    default:
++      gcc_unreachable ();
++    }
++}
++
++/* Returns the register operated by the nth load/store operation in the real
++   micro-operation accessing order.  This function assumes INSN must be a
++   double-word load/store insn.  */
++rtx
++extract_nth_ls2_access_reg (rtx_insn *insn, int n)
++{
++  rtx reg;
++  enum machine_mode mode;
++
++  if (post_update_insn_p (insn))
++    {
++      memory_access_direction direction = determine_access_direction (insn);
++      gcc_assert (direction != MEM_ACCESS_DIR_UNKNOWN);
++
++      /* Reverse the order if the direction negative.  */
++      if (direction == MEM_ACCESS_DIR_NEG)
++	n = -1 * n - 1;
++    }
++
++  /* Handle the out-of-range case.  */
++  if (n < -2 || n > 1)
++    return NULL_RTX;
++
++  /* Convert the index to a positive one.  */
++  if (n < 0)
++    n = 2 + n;
++
++  switch (get_attr_type (insn))
++    {
++    case TYPE_LOAD:
++      reg = SET_DEST (PATTERN (insn));
++      break;
++
++    case TYPE_STORE:
++      reg = SET_SRC (PATTERN (insn));
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
++  gcc_assert (REG_P (reg) || GET_CODE (reg) == SUBREG);
++
++  switch (GET_MODE (reg))
++    {
++    case DImode:
++      mode = SImode;
++      break;
++
++    case DFmode:
++      mode = SFmode;
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
++  if (n == 0)
++    return gen_lowpart (mode, reg);
++  else
++    return gen_highpart (mode, reg);
++}
++
++/* Returns the register operated by the nth load/store operation in the real
++   micro-operation accessing order.  */
++rtx
++extract_nth_access_reg (rtx_insn *insn, int index)
++{
++  switch (GET_CODE (PATTERN (insn)))
++    {
++    case PARALLEL:
++      return extract_nth_lmsw_access_reg (insn, index);
++
++    case SET:
++      return extract_nth_ls2_access_reg (insn, index);
++
++    default:
++      gcc_unreachable ();
++    }
++}
++
++/* Determine if the latency is occured when the consumer PBSADA_INSN uses the
++   value of DEF_REG in its Ra or Rb fields.  */
++bool
++pbsada_insn_ra_rb_dep_reg_p (rtx pbsada_insn, rtx def_reg)
++{
++  rtx unspec_rtx = SET_SRC (PATTERN (pbsada_insn));
++  gcc_assert (GET_CODE (unspec_rtx) == UNSPEC);
++
++  rtx pbsada_ra = XVECEXP (unspec_rtx, 0, 0);
++  rtx pbsada_rb = XVECEXP (unspec_rtx, 0, 1);
++
++  if (rtx_equal_p (def_reg, pbsada_ra)
++      || rtx_equal_p (def_reg, pbsada_rb))
++    return true;
++
++  return false;
++}
++
++/* Determine if the latency is occured when the consumer PBSADA_INSN uses the
++   value of DEF_REG in its Rt field.  */
++bool
++pbsada_insn_rt_dep_reg_p (rtx pbsada_insn, rtx def_reg)
++{
++  rtx pbsada_rt = SET_DEST (PATTERN (pbsada_insn));
++
++  if (rtx_equal_p (def_reg, pbsada_rt))
++    return true;
++
++  return false;
++}
++
++/* Check if INSN is a movd44 insn consuming DEF_REG.  */
++bool
++movd44_even_dep_p (rtx_insn *insn, rtx def_reg)
++{
++  if (!movd44_insn_p (insn))
++    return false;
++
++  rtx use_rtx = SET_SRC (PATTERN (insn));
++
++  if (REG_P (def_reg))
++    {
++      return rtx_equal_p (def_reg, use_rtx);
++    }
++  else if (GET_CODE (def_reg) == SUBREG
++	   && GET_MODE (def_reg) == SImode
++	   && rtx_equal_p (SUBREG_REG (def_reg), use_rtx))
++    {
++      if (TARGET_BIG_ENDIAN && SUBREG_BYTE (def_reg) == 4)
++	return true;
++
++      if (!TARGET_BIG_ENDIAN && SUBREG_BYTE (def_reg) == 0)
++	return true;
++
++      return false;
++    }
++
++  return false;
++}
++
++/* Check if INSN is a wext insn consuming DEF_REG.  */
++bool
++wext_odd_dep_p (rtx insn, rtx def_reg)
++{
++  rtx shift_rtx = XEXP (SET_SRC (PATTERN (insn)), 0);
++  rtx use_reg = XEXP (shift_rtx, 0);
++  rtx pos_rtx = XEXP (shift_rtx, 1);
++
++  if (REG_P (pos_rtx) && reg_overlap_p (def_reg, pos_rtx))
++    return true;
++
++  if (GET_MODE (def_reg) == DImode)
++    return reg_overlap_p (def_reg, use_reg);
++
++  gcc_assert (REG_P (def_reg) || GET_CODE (def_reg) == SUBREG);
++  gcc_assert (REG_P (use_reg));
++
++  if (REG_P (def_reg))
++    {
++      if (!TARGET_BIG_ENDIAN)
++	return REGNO (def_reg) == REGNO (use_reg) + 1;
++      else
++	return  REGNO (def_reg) == REGNO (use_reg);
++    }
++
++  if (GET_CODE (def_reg) == SUBREG)
++    {
++      if (!reg_overlap_p (def_reg, use_reg))
++	return false;
++
++      if (!TARGET_BIG_ENDIAN)
++	return SUBREG_BYTE (def_reg) == 4;
++      else
++	return SUBREG_BYTE (def_reg) == 0;
++    }
++
++  return false;
++}
++
++/* Check if INSN is a bpick insn consuming DEF_REG.  */
++bool
++bpick_ra_rb_dep_p (rtx insn, rtx def_reg)
++{
++  rtx ior_rtx = SET_SRC (PATTERN (insn));
++  rtx and1_rtx = XEXP (ior_rtx, 0);
++  rtx and2_rtx = XEXP (ior_rtx, 1);
++  rtx reg1_0 = XEXP (and1_rtx, 0);
++  rtx reg1_1 = XEXP (and1_rtx, 1);
++  rtx reg2_0 = XEXP (and2_rtx, 0);
++  rtx reg2_1 = XEXP (and2_rtx, 1);
++
++  if (GET_CODE (reg1_0) == NOT)
++    {
++      if (rtx_equal_p (reg1_0, reg2_0))
++	return reg_overlap_p (def_reg, reg1_1)
++	       || reg_overlap_p (def_reg, reg2_1);
++
++      if (rtx_equal_p (reg1_0, reg2_1))
++	return reg_overlap_p (def_reg, reg1_1)
++	       || reg_overlap_p (def_reg, reg2_0);
++    }
++
++  if (GET_CODE (reg1_1) == NOT)
++    {
++      if (rtx_equal_p (reg1_1, reg2_0))
++	return reg_overlap_p (def_reg, reg1_0)
++	       || reg_overlap_p (def_reg, reg2_1);
++
++      if (rtx_equal_p (reg1_1, reg2_1))
++	return reg_overlap_p (def_reg, reg1_0)
++	       || reg_overlap_p (def_reg, reg2_0);
++    }
++
++  if (GET_CODE (reg2_0) == NOT)
++    {
++      if (rtx_equal_p (reg2_0, reg1_0))
++	return reg_overlap_p (def_reg, reg2_1)
++	       || reg_overlap_p (def_reg, reg1_1);
++
++      if (rtx_equal_p (reg2_0, reg1_1))
++	return reg_overlap_p (def_reg, reg2_1)
++	       || reg_overlap_p (def_reg, reg1_0);
++    }
++
++  if (GET_CODE (reg2_1) == NOT)
++    {
++      if (rtx_equal_p (reg2_1, reg1_0))
++	return reg_overlap_p (def_reg, reg2_0)
++	       || reg_overlap_p (def_reg, reg1_1);
++
++      if (rtx_equal_p (reg2_1, reg1_1))
++	return reg_overlap_p (def_reg, reg2_0)
++	       || reg_overlap_p (def_reg, reg1_0);
++    }
++
++  gcc_unreachable ();
++}
++
++pipeline_simulator::pipeline_simulator ()
++{
++  /* The design of dfa_start () operates on static global variables and
++     allocates memory space without checking whether the function is called
++     twice or not.  We add some guards in order to protect it from abusing.  */
++  if (!gcc_dfa_initialized_++)
++    dfa_start ();
++
++  state_ = xmalloc (state_size());
++  state_reset (state_);
++}
++
++pipeline_simulator::~pipeline_simulator ()
++{
++  /* The design of dfa_finish () operates on a static global variable and
++     deallocates memory space without checking whether the function is called
++     twice or not.  We add some guards in order to protect it from abusing.  */
++  free (state_);
++
++  gcc_assert(gcc_dfa_initialized_ > 0);
++  if (!--gcc_dfa_initialized_)
++    dfa_finish ();
++}
++
++void
++pipeline_simulator::advance_cycle (int cycles)
++{
++  gcc_assert (cycles > 0);
++
++  /* The second argument was 'NULL', but we found the expression is directly
++     written in insn-automata.c:
++       if (insn == 0)
++	 insn_code = DFA__ADVANCE_CYCLE;
++     Hence we change it to '0' in order to make it consistent.  */
++  while (cycles--)
++    state_transition (state_, 0);
++}
++
++/* A wrapper of insn_latency () provided by the insn-attr.h in the object tree.
++   See that file for more information.  */
++int
++pipeline_simulator::query_latency (rtx_insn *producer, rtx_insn *consumer) const
++{
++  return insn_latency (producer, consumer);
++}
++
++/* Return 0 or negative if we can issue INSN at the current cycle.  Otherwise,
++   return a postive value indicates how many cycles we have to wait.  The
++   interface is consistent with state_transition () provided by insn-attr.h
++   in the object directory.  See that file for more information.  */
++int
++pipeline_simulator::issue_insn (rtx_insn *insn)
++{
++  int stalls;
++
++  /* Skip cycles specified by pseudo NOPs.  */
++  if (insn_pseudo_nop_p (insn))
++    {
++      int nop_stalls = INTVAL (XVECEXP (PATTERN (insn), 0, 0));
++
++      gcc_assert (nop_stalls > 0);
++      advance_cycle (nop_stalls);
++      stalls = -1;
++    }
++  else
++    {
++      stalls = state_transition (state_, insn);
++
++      /* All targets are single-issue, so we advance one cycle once after
++	 an insn has been issued successfully.  */
++      if (stalls <= 0)
++	advance_cycle ();
++    }
++
++  return stalls;
++}
++
++/* This function is similar to issue_insn (), but it advances cycles until INSN
++   can be issued successfully.  If INSN can be issued at the current cycle, the
++   return value will be 0 or negaitive.  Otherwise, the function will return
++   the cycles it has been skipped.  */
++int
++pipeline_simulator::force_issue_insn (rtx_insn *insn)
++{
++  int stalls;
++
++  stalls = issue_insn (insn);
++
++  /* Skip cycles until we can issue the insn.  */
++  if (stalls > 0)
++    {
++      advance_cycle (stalls);
++      issue_insn (insn);
++    }
++
++  return stalls;
++}
++
++/* The main flow of the class STALL_INSERTER.  We insert NOPs for structural
++   hazards because self-stalled instructions also consume the delay cycles
++   caused by data hazards.  */
++void
++stall_inserter::insert_stalls ()
++{
++  compute_bb_for_insn_safe ();
++
++  insert_structural_hazard_stalls ();
++  insert_data_hazard_stalls ();
++
++  /* We have to call the following two functions again after we inserting
++     some insns after it has been invoked.  Otherwise, an assert expression
++     in final () will be triggered and cause to an internal compiler error.  */
++  init_insn_lengths ();
++  shorten_branches (get_insns ());
++
++  free_bb_for_insn ();
++}
++
++/* A helper function inserting NOPs.  CYCLES indicates how many cycles the NOP
++   insn consumes.  TYPE indicates what type of the NOP insn we want to insert;
++   now there are two types available: RES_DEP and DATA_DEP.  */
++rtx
++stall_inserter::emit_pseudo_nop_before (
++    rtx_insn *insn, int cycles, enum dep_type type)
++{
++  rtx nop_pattern;
++  rtx_insn *nop_insn;
++  int recog;
++
++  switch (type)
++  {
++  case RES_DEP:
++    nop_pattern = gen_nop_res_dep (GEN_INT (cycles));
++    break;
++  case DATA_DEP:
++    nop_pattern = gen_nop_data_dep (GEN_INT (cycles));
++    break;
++  default:
++    gcc_unreachable ();
++  }
++
++  nop_insn = emit_insn_before (nop_pattern, insn);
++  recog = recog_memoized (nop_insn);
++  gcc_assert(recog != -1);
++
++  return nop_insn;
++}
++
++void
++stall_inserter::insert_structural_hazard_stalls ()
++{
++  pipeline_simulator simulator;
++  rtx_insn *insn;
++
++  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
++    {
++      if (!insn_executable_p (insn)) continue;
++
++      int stalls = simulator.force_issue_insn (insn);
++
++      if (stalls > 0)
++	emit_pseudo_nop_before (insn, stalls, RES_DEP);
++    }
++}
++
++void
++stall_inserter::insert_data_hazard_stalls ()
++{
++  pipeline_simulator simulator;
++  rtx_insn *insn;
++
++  /* Calling to df_insn_rescan_all here is required in order to avoid crash
++     when some special options are specified by users, such as
++     -O0 -fschedule-insns2.  */
++  df_chain_add_problem (DF_DU_CHAIN);
++  df_insn_rescan_all ();
++  df_analyze ();
++
++  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
++    {
++      if (!insn_executable_p (insn)) continue;
++
++      simulator.force_issue_insn (insn);
++      emit_pseudo_nops_for_data_hazards (insn, simulator);
++    }
++
++  /* We must call df_finish_pass manually because it should be invoked before
++     BB information is destroyed.  Hence we cannot set the TODO_df_finish flag
++     to the pass manager.  */
++  df_insn_rescan_all ();
++  df_finish_pass (false);
++}
++
++/* Traverse all insns using the results produced by INSN and ask SIMULATOR
++   how many delay cycles between them.  If there are some delay cycles, insert
++   corresponding NOP insns there.  */
++void
++stall_inserter::emit_pseudo_nops_for_data_hazards (
++    rtx_insn *insn, pipeline_simulator &simulator)
++{
++  df_ref def;
++  df_link *link;
++  std::set<rtx> processed_insns;
++
++  FOR_EACH_INSN_DEF (def, insn)
++    {
++      for (link = DF_REF_CHAIN (def); link; link = link->next)
++	{
++	  if (!DF_REF_INSN_INFO (link->ref))
++	    continue;
++
++	  rtx_insn *use_insn = DF_REF_INSN (link->ref);
++
++	  if (!insn_executable_p (use_insn)
++	      || processed_insns.count (use_insn))
++	    continue;
++
++	  int stalls = simulator.query_latency (insn, use_insn);
++	  int distance = cycle_distance (insn, use_insn);
++
++	  if (stalls > distance)
++	    {
++	      stalls -= distance;
++	      emit_pseudo_nop_before (use_insn, stalls, DATA_DEP);
++	      processed_insns.insert (use_insn);
++	    }
++	}
++    }
++}
++
++pass_nds32_print_stalls::pass_nds32_print_stalls (gcc::context *ctxt)
++    : rtl_opt_pass (pass_data_nds32_print_stalls, ctxt)
++{
++}
++
++bool pass_nds32_print_stalls::gate (function *)
++{
++  return TARGET_PRINT_STALLS;
++}
++
++unsigned int
++pass_nds32_print_stalls::execute (function *)
++{
++  stall_inserter inserter;
++
++  inserter.insert_stalls ();
++  return 0;
++}
++
++} // namespace scheduling
++} // namespace nds32
++
++/* ------------------------------------------------------------------------ */
++
++using namespace nds32;
++using namespace nds32::scheduling;
++
++namespace { // anonymous namespace
++
++/* Check the dependency between the producer defining DEF_REG and CONSUMER
++   requiring input operand at II.  */
++bool
++n7_consumed_by_ii_dep_p (rtx_insn *consumer, rtx def_reg)
++{
++  rtx use_rtx;
++
++  switch (get_attr_type (consumer))
++    {
++    /* MOVD44_E */
++    case TYPE_ALU:
++      if (movd44_even_dep_p (consumer, def_reg))
++	return true;
++
++      use_rtx = SET_SRC (PATTERN (consumer));
++      break;
++
++    case TYPE_MUL:
++      use_rtx = SET_SRC (PATTERN (consumer));
++      break;
++
++    case TYPE_MAC:
++      use_rtx = extract_mac_non_acc_rtx (consumer);
++      break;
++
++   /* Some special instructions, divmodsi4 and udivmodsi4, produce two
++      results, the quotient and the remainder.  It requires two micro-
++      operations in order to write two registers. We have to check the
++      dependency from the producer to the first micro-operation.  */
++    case TYPE_DIV:
++      if (divmod_p (consumer))
++	use_rtx = SET_SRC (parallel_element (consumer, 0));
++      else
++	use_rtx = SET_SRC (PATTERN (consumer));
++      break;
++
++    case TYPE_LOAD:
++      /* ADDR_IN_bi_Ra, ADDR_IN_!bi */
++      if (post_update_insn_p (consumer))
++	use_rtx = extract_base_reg (consumer);
++      else
++	use_rtx = extract_mem_rtx (consumer);
++      break;
++
++    case TYPE_STORE:
++      /* ADDR_IN_bi_Ra, ADDR_IN_!bi */
++      if (post_update_insn_p (consumer))
++	use_rtx = extract_base_reg (consumer);
++      else
++	use_rtx = extract_mem_rtx (consumer);
++
++      if (reg_overlap_p (def_reg, use_rtx))
++	return true;
++
++      /* ST_bi, ST_!bi_RI */
++      if (!post_update_insn_p (consumer)
++	  && !immed_offset_p (extract_mem_rtx (consumer)))
++	return false;
++
++      use_rtx = SET_SRC (PATTERN (consumer));
++      break;
++
++    case TYPE_LOAD_MULTIPLE:
++      use_rtx = extract_base_reg (consumer);
++      break;
++
++    case TYPE_STORE_MULTIPLE:
++      /* ADDR_IN */
++      use_rtx = extract_base_reg (consumer);
++      if (reg_overlap_p (def_reg, use_rtx))
++	return true;
++
++      /* SMW (N, 1) */
++      use_rtx = extract_nth_access_rtx (consumer, 0);
++      break;
++
++    case TYPE_BRANCH:
++      use_rtx = PATTERN (consumer);
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
++  if (reg_overlap_p (def_reg, use_rtx))
++    return true;
++
++  return false;
++}
++
++/* Check the dependency between the producer defining DEF_REG and CONSUMER
++   requiring input operand at AG (II).  */
++bool
++n8_consumed_by_addr_in_p (rtx_insn *consumer, rtx def_reg)
++{
++  rtx use_rtx;
++
++  switch (get_attr_type (consumer))
++    {
++    case TYPE_BRANCH:
++      use_rtx = extract_branch_target_rtx (consumer);
++      break;
++
++    case TYPE_LOAD:
++      if (load_single_p (consumer))
++	use_rtx = extract_mem_rtx (consumer);
++      else
++	use_rtx = extract_base_reg (consumer);
++      break;
++
++    case TYPE_STORE:
++      if (store_single_p (consumer)
++	  && (!post_update_insn_p (consumer)
++	      || immed_offset_p (extract_mem_rtx (consumer))))
++	use_rtx = extract_mem_rtx (consumer);
++      else
++	use_rtx = extract_base_reg (consumer);
++      break;
++
++    case TYPE_LOAD_MULTIPLE:
++    case TYPE_STORE_MULTIPLE:
++      use_rtx = extract_base_reg (consumer);
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
++  return reg_overlap_p (def_reg, use_rtx);
++}
++
++/* Check the dependency between the producer defining DEF_REG and CONSUMER
++   requiring input operand at EX.  */
++bool
++n8_consumed_by_ex_p (rtx_insn *consumer, rtx def_reg)
++{
++  rtx use_rtx;
++
++  switch (get_attr_type (consumer))
++    {
++    case TYPE_ALU:
++      if (movd44_even_dep_p (consumer, def_reg))
++	return true;
++
++      use_rtx = SET_SRC (PATTERN (consumer));
++      break;
++
++    case TYPE_MUL:
++      use_rtx = SET_SRC (PATTERN (consumer));
++      break;
++
++    case TYPE_MAC:
++      use_rtx = extract_mac_non_acc_rtx (consumer);
++      break;
++
++   /* Some special instructions, divmodsi4 and udivmodsi4, produce two
++      results, the quotient and the remainder.  It requires two micro-
++      operations in order to write two registers. We have to check the
++      dependency from the producer to the first micro-operation.  */
++    case TYPE_DIV:
++      if (divmod_p (consumer))
++	use_rtx = SET_SRC (parallel_element (consumer, 0));
++      else
++	use_rtx = SET_SRC (PATTERN (consumer));
++      break;
++
++    case TYPE_BRANCH:
++      use_rtx = extract_branch_condition_rtx (consumer);
++      break;
++
++    case TYPE_STORE:
++      /* exclude ST_!bi_RR */
++      if (!post_update_insn_p (consumer)
++	  && !immed_offset_p (extract_mem_rtx (consumer)))
++	return false;
++
++      use_rtx = SET_SRC (PATTERN (consumer));
++      break;
++
++    case TYPE_STORE_MULTIPLE:
++      use_rtx = extract_nth_access_rtx (consumer, 0);
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
++  return reg_overlap_p (def_reg, use_rtx);
++}
++
++/* Check the dependency between the producer defining DEF_REG and CONSUMER
++   requiring input operand at AG (II).  */
++bool
++e8_consumed_by_addr_in_p (rtx_insn *consumer, rtx def_reg)
++{
++  return n8_consumed_by_addr_in_p (consumer, def_reg);
++}
++
++/* Check the dependency between the producer defining DEF_REG and CONSUMER
++   requiring input operand at EX.  */
++bool
++e8_consumed_by_ex_p (rtx_insn *consumer, rtx def_reg)
++{
++  rtx use_rtx;
++
++  switch (get_attr_type (consumer))
++    {
++    case TYPE_ALU:
++    case TYPE_STORE:
++      use_rtx = SET_SRC (PATTERN (consumer));
++      break;
++
++    case TYPE_MUL:
++    case TYPE_MAC:
++    case TYPE_DIV:
++    case TYPE_BRANCH:
++    case TYPE_STORE_MULTIPLE:
++      return n8_consumed_by_ex_p (consumer, def_reg);
++
++    default:
++      gcc_unreachable ();
++    }
++
++  return reg_overlap_p (def_reg, use_rtx);
++}
++
++/* Check the dependency between the producer defining DEF_REG and CONSUMER
++   requiring input operand at EX.  */
++bool
++n9_2r1w_consumed_by_ex_dep_p (rtx_insn *consumer, rtx def_reg)
++{
++  rtx use_rtx;
++
++  switch (get_attr_type (consumer))
++    {
++    case TYPE_ALU:
++      if (movd44_even_dep_p (consumer, def_reg))
++	return true;
++
++      use_rtx = SET_SRC (PATTERN (consumer));
++      break;
++
++    case TYPE_PBSAD:
++    case TYPE_MUL:
++      use_rtx = SET_SRC (PATTERN (consumer));
++      break;
++
++    case TYPE_ALU_SHIFT:
++      use_rtx = extract_shift_reg (consumer);
++      break;
++
++    case TYPE_PBSADA:
++      return pbsada_insn_ra_rb_dep_reg_p (consumer, def_reg);
++
++    case TYPE_MAC:
++      use_rtx = PATTERN (consumer);
++      break;
++
++    case TYPE_DIV:
++      if (divmod_p (consumer))
++	use_rtx = SET_SRC (parallel_element (consumer, 0));
++      else
++	use_rtx = SET_SRC (PATTERN (consumer));
++      break;
++
++    case TYPE_MMU:
++      if (GET_CODE (PATTERN (consumer)) == SET)
++	use_rtx = SET_SRC (PATTERN (consumer));
++      else
++	return true;
++      break;
++
++    case TYPE_LOAD:
++      /* ADDR_IN_bi_Ra, ADDR_IN_!bi */
++      if (post_update_insn_p (consumer))
++	use_rtx = extract_base_reg (consumer);
++      else
++	use_rtx = extract_mem_rtx (consumer);
++      break;
++
++    case TYPE_STORE:
++      /* ADDR_IN_bi_Ra, ADDR_IN_!bi */
++      if (post_update_insn_p (consumer))
++	use_rtx = extract_base_reg (consumer);
++      else
++	use_rtx = extract_mem_rtx (consumer);
++
++      if (reg_overlap_p (def_reg, use_rtx))
++	return true;
++
++      /* exclude ST_!bi_RR */
++      if (!post_update_insn_p (consumer)
++	  && !immed_offset_p (extract_mem_rtx (consumer)))
++	return false;
++
++      use_rtx = SET_SRC (PATTERN (consumer));
++      break;
++
++    case TYPE_LOAD_MULTIPLE:
++      use_rtx = extract_base_reg (consumer);
++      break;
++
++    case TYPE_STORE_MULTIPLE:
++      /* ADDR_IN */
++      use_rtx = extract_base_reg (consumer);
++      if (reg_overlap_p (def_reg, use_rtx))
++	return true;
++
++      /* SMW (N, 1) */
++      use_rtx = extract_nth_access_rtx (consumer, 0);
++      break;
++
++    case TYPE_BRANCH:
++      use_rtx = PATTERN (consumer);
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
++  if (reg_overlap_p (def_reg, use_rtx))
++    return true;
++
++  return false;
++}
++
++/* Check the dependency between the producer defining DEF_REG and CONSUMER
++   requiring input operand at EX.  */
++bool
++n9_3r2w_consumed_by_ex_dep_p (rtx_insn *consumer, rtx def_reg)
++{
++  rtx use_rtx;
++
++  switch (get_attr_type (consumer))
++    {
++    case TYPE_ALU:
++    case TYPE_PBSAD:
++    case TYPE_MUL:
++      use_rtx = SET_SRC (PATTERN (consumer));
++      break;
++
++    case TYPE_ALU_SHIFT:
++      use_rtx = extract_shift_reg (consumer);
++      break;
++
++    case TYPE_PBSADA:
++      return pbsada_insn_ra_rb_dep_reg_p (consumer, def_reg);
++
++    case TYPE_MAC:
++      use_rtx = extract_mac_non_acc_rtx (consumer);
++      break;
++
++   /* Some special instructions, divmodsi4 and udivmodsi4, produce two
++      results, the quotient and the remainder.  In 2R1W configuration,
++      it requires two micro-operations in order to write two registers.
++      We have to check the dependency from the producer to the first
++      micro-operation.  */
++    case TYPE_DIV:
++      if (divmod_p (consumer))
++	use_rtx = SET_SRC (parallel_element (consumer, 0));
++      else
++	use_rtx = SET_SRC (PATTERN (consumer));
++      break;
++
++    case TYPE_MMU:
++      if (GET_CODE (PATTERN (consumer)) == SET)
++	use_rtx = SET_SRC (PATTERN (consumer));
++      else
++	return true;
++      break;
++
++    case TYPE_LOAD:
++    case TYPE_STORE:
++      use_rtx = extract_mem_rtx (consumer);
++      break;
++
++    case TYPE_LOAD_MULTIPLE:
++    case TYPE_STORE_MULTIPLE:
++      use_rtx = extract_base_reg (consumer);
++      break;
++
++    case TYPE_BRANCH:
++      use_rtx = PATTERN (consumer);
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
++  if (reg_overlap_p (def_reg, use_rtx))
++    return true;
++
++  return false;
++}
++
++/* Check the dependency between the producer defining DEF_REG and CONSUMER
++   requiring input operand at EX.  */
++bool
++n10_consumed_by_ex_dep_p (rtx_insn *consumer, rtx def_reg)
++{
++  rtx use_rtx;
++
++  switch (get_attr_type (consumer))
++    {
++    case TYPE_ALU:
++    case TYPE_PBSAD:
++    case TYPE_MUL:
++    case TYPE_DALU:
++    case TYPE_DALU64:
++    case TYPE_DMUL:
++    case TYPE_DPACK:
++    case TYPE_DINSB:
++    case TYPE_DCMP:
++    case TYPE_DCLIP:
++    case TYPE_DALUROUND:
++      use_rtx = SET_SRC (PATTERN (consumer));
++      break;
++
++    case TYPE_ALU_SHIFT:
++      use_rtx = extract_shift_reg (consumer);
++      break;
++
++    case TYPE_PBSADA:
++      return pbsada_insn_ra_rb_dep_reg_p (consumer, def_reg);
++
++    case TYPE_MAC:
++    case TYPE_DMAC:
++      use_rtx = extract_mac_non_acc_rtx (consumer);
++      break;
++
++   /* Some special instructions, divmodsi4 and udivmodsi4, produce two
++      results, the quotient and the remainder.  */
++    case TYPE_DIV:
++      if (divmod_p (consumer))
++	use_rtx = SET_SRC (parallel_element (consumer, 0));
++      else
++	use_rtx = SET_SRC (PATTERN (consumer));
++      break;
++
++    case TYPE_DWEXT:
++      return wext_odd_dep_p (consumer, def_reg);
++
++    case TYPE_DBPICK:
++      return bpick_ra_rb_dep_p (consumer, def_reg);
++
++    case TYPE_MMU:
++      if (GET_CODE (PATTERN (consumer)) == SET)
++	use_rtx = SET_SRC (PATTERN (consumer));
++      else
++	return true;
++      break;
++
++    case TYPE_LOAD:
++    case TYPE_STORE:
++      use_rtx = extract_mem_rtx (consumer);
++      break;
++
++    case TYPE_LOAD_MULTIPLE:
++    case TYPE_STORE_MULTIPLE:
++      use_rtx = extract_base_reg (consumer);
++      break;
++
++    case TYPE_BRANCH:
++      use_rtx = PATTERN (consumer);
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
++  if (reg_overlap_p (def_reg, use_rtx))
++    return true;
++
++  return false;
++}
++
++/* Check the dependency between the producer defining DEF_REG and CONSUMER
++   requiring input operand at EX.  */
++bool
++gw_consumed_by_ex_dep_p (rtx_insn *consumer, rtx def_reg)
++{
++  rtx use_rtx;
++
++  switch (get_attr_type (consumer))
++    {
++    case TYPE_ALU:
++    case TYPE_PBSAD:
++    case TYPE_MUL:
++    case TYPE_DALU:
++    case TYPE_DALU64:
++    case TYPE_DMUL:
++    case TYPE_DPACK:
++    case TYPE_DINSB:
++    case TYPE_DCMP:
++    case TYPE_DCLIP:
++    case TYPE_DALUROUND:
++      use_rtx = SET_SRC (PATTERN (consumer));
++      break;
++
++    case TYPE_ALU_SHIFT:
++      use_rtx = extract_shift_reg (consumer);
++      break;
++
++    case TYPE_PBSADA:
++      return pbsada_insn_ra_rb_dep_reg_p (consumer, def_reg);
++
++    case TYPE_MAC:
++    case TYPE_DMAC:
++      use_rtx = extract_mac_non_acc_rtx (consumer);
++      break;
++
++   /* Some special instructions, divmodsi4 and udivmodsi4, produce two
++      results, the quotient and the remainder.  We have to check the
++      dependency from the producer to the first micro-operation.  */
++    case TYPE_DIV:
++      if (divmod_p (consumer))
++	use_rtx = SET_SRC (parallel_element (consumer, 0));
++      else
++	use_rtx = SET_SRC (PATTERN (consumer));
++      break;
++
++    case TYPE_DWEXT:
++      return wext_odd_dep_p (consumer, def_reg);
++
++    case TYPE_DBPICK:
++      return bpick_ra_rb_dep_p (consumer, def_reg);
++
++    case TYPE_MMU:
++      if (GET_CODE (PATTERN (consumer)) == SET)
++	use_rtx = SET_SRC (PATTERN (consumer));
++      else
++	return true;
++      break;
++
++    case TYPE_LOAD:
++    case TYPE_STORE:
++      use_rtx = extract_mem_rtx (consumer);
++      break;
++
++    case TYPE_LOAD_MULTIPLE:
++    case TYPE_STORE_MULTIPLE:
++      use_rtx = extract_base_reg (consumer);
++      break;
++
++    case TYPE_BRANCH:
++      use_rtx = PATTERN (consumer);
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
++  if (reg_overlap_p (def_reg, use_rtx))
++    return true;
++
++  return false;
++}
++
++/* Check dependencies from any stages to ALU_E1 (E1).  This is a helper
++   function of n13_consumed_by_e1_dep_p ().  */
++bool
++n13_alu_e1_insn_dep_reg_p (rtx_insn *alu_e1_insn, rtx def_reg)
++{
++  rtx unspec_rtx, operand_ra, operand_rb;
++  rtx src_rtx, dst_rtx;
++
++  switch (INSN_CODE (alu_e1_insn))
++    {
++    /* BSP and BSE are supported by built-in functions, the corresponding
++       patterns are formed by UNSPEC RTXs.  We have to handle them
++       individually.  */
++    case CODE_FOR_unspec_bsp:
++    case CODE_FOR_unspec_bse:
++      unspec_rtx = SET_SRC (parallel_element (alu_e1_insn, 0));
++      gcc_assert (GET_CODE (unspec_rtx) == UNSPEC);
++
++      operand_ra = XVECEXP (unspec_rtx, 0, 0);
++      operand_rb = XVECEXP (unspec_rtx, 0, 1);
++
++      if (rtx_equal_p (def_reg, operand_ra)
++	  || rtx_equal_p (def_reg, operand_rb))
++	return true;
++
++      return false;
++
++    /* Unlink general ALU instructions, MOVD44 requires operands at E1.  */
++    case CODE_FOR_move_di:
++    case CODE_FOR_move_df:
++      src_rtx = SET_SRC (PATTERN (alu_e1_insn));
++      dst_rtx = SET_DEST (PATTERN (alu_e1_insn));
++
++      if (REG_P (dst_rtx) && REG_P (src_rtx)
++	  && rtx_equal_p (src_rtx, def_reg))
++	return true;
++
++      return false;
++
++    default:
++      return false;
++    }
++}
++
++/* Check the dependency between the producer defining DEF_REG and CONSUMER
++   requiring input operand at E1.  Because the address generation unti is
++   at E1, the address input should be ready at E1.  Note that the branch
++   target is also a kind of addresses, so we have to check it.  */
++bool
++n13_consumed_by_e1_dep_p (rtx_insn *consumer, rtx def_reg)
++{
++  rtx use_rtx;
++
++  switch (get_attr_type (consumer))
++    {
++    /* ALU_E1 */
++    case TYPE_ALU:
++      return n13_alu_e1_insn_dep_reg_p (consumer, def_reg);
++
++    case TYPE_PBSADA:
++      return pbsada_insn_ra_rb_dep_reg_p (consumer, def_reg);
++
++    case TYPE_PBSAD:
++    case TYPE_MUL:
++      use_rtx = SET_SRC (PATTERN (consumer));
++      break;
++
++    case TYPE_MAC:
++      use_rtx = extract_mac_non_acc_rtx (consumer);
++      break;
++
++    case TYPE_DIV:
++      if (divmod_p (consumer))
++	use_rtx = SET_SRC (parallel_element (consumer, 0));
++      else
++	use_rtx = SET_SRC (PATTERN (consumer));
++      break;
++
++    case TYPE_MMU:
++      if (GET_CODE (PATTERN (consumer)) == SET)
++	use_rtx = SET_SRC (PATTERN (consumer));
++      else
++	return true;
++      break;
++
++    case TYPE_BRANCH:
++      use_rtx = extract_branch_target_rtx (consumer);
++      break;
++
++    case TYPE_LOAD:
++    case TYPE_STORE:
++      use_rtx = extract_mem_rtx (consumer);
++      break;
++
++    case TYPE_LOAD_MULTIPLE:
++    case TYPE_STORE_MULTIPLE:
++      use_rtx = extract_base_reg (consumer);
++      break;
++
++    default:
++      return false;
++    }
++
++  if (reg_overlap_p (def_reg, use_rtx))
++    return true;
++
++  return false;
++}
++
++/* Check the dependency between the producer defining DEF_REG and CONSUMER
++   requiring input operand at E2.  */
++bool
++n13_consumed_by_e2_dep_p (rtx_insn *consumer, rtx def_reg)
++{
++  rtx use_rtx;
++
++  switch (get_attr_type (consumer))
++    {
++    case TYPE_ALU:
++    case TYPE_STORE:
++      use_rtx = SET_SRC (PATTERN (consumer));
++      break;
++
++    case TYPE_ALU_SHIFT:
++      use_rtx = extract_shift_reg (consumer);
++      break;
++
++    case TYPE_PBSADA:
++      return pbsada_insn_rt_dep_reg_p (consumer, def_reg);
++
++    case TYPE_STORE_MULTIPLE:
++      use_rtx = extract_nth_access_rtx (consumer, 0);
++      break;
++
++    case TYPE_BRANCH:
++      use_rtx = extract_branch_condition_rtx (consumer);
++      break;
++
++    default:
++      gcc_unreachable();
++    }
++
++  if (reg_overlap_p (def_reg, use_rtx))
++    return true;
++
++  return false;
++}
++
++/* Check the dependency between the producer defining DEF_REG and CONSUMER
++   requiring input operand at AG (E1).  */
++bool
++pn_consumed_by_e1_dep_p (rtx_insn *consumer, rtx def_reg)
++{
++  rtx use_rtx;
++
++  switch (get_attr_type (consumer))
++    {
++    case TYPE_LOAD:
++      if (load_single_p (consumer))
++	use_rtx = extract_mem_rtx (consumer);
++      else
++	use_rtx = extract_base_reg (consumer);
++      break;
++
++    case TYPE_STORE:
++      if (store_single_p (consumer)
++	  && (!post_update_insn_p (consumer)
++	      || immed_offset_p (extract_mem_rtx (consumer))))
++	use_rtx = extract_mem_rtx (consumer);
++      else
++	use_rtx = extract_base_reg (consumer);
++      break;
++
++    case TYPE_LOAD_MULTIPLE:
++    case TYPE_STORE_MULTIPLE:
++      use_rtx = extract_base_reg (consumer);
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
++  return reg_overlap_p (def_reg, use_rtx);
++}
++
++bool
++pn_consumed_by_e2_dep_p (rtx_insn *consumer, rtx def_reg)
++{
++  rtx use_rtx;
++
++  switch (get_attr_type (consumer))
++    {
++    case TYPE_ALU:
++      if (get_attr_subtype (consumer) != SUBTYPE_SHIFT)
++	return false;
++    case TYPE_PBSAD:
++    case TYPE_PBSADA:
++    case TYPE_MUL:
++      use_rtx = SET_SRC (PATTERN (consumer));
++      break;
++
++    case TYPE_MAC:
++      use_rtx = extract_mac_non_acc_rtx (consumer);
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
++  return reg_overlap_p (def_reg, use_rtx);
++}
++
++bool
++pn_consumed_by_e3_dep_p (rtx_insn *consumer, rtx def_reg)
++{
++  rtx use_rtx;
++
++  switch (get_attr_type (consumer))
++    {
++    case TYPE_ALU:
++      if (get_attr_subtype (consumer) == SUBTYPE_SHIFT)
++	return false;
++    case TYPE_PBSAD:
++    case TYPE_PBSADA:
++      use_rtx = SET_SRC (PATTERN (consumer));
++      break;
++
++    case TYPE_BRANCH:
++      return (reg_overlap_p (def_reg, extract_branch_target_rtx (consumer))
++	      || reg_overlap_p (def_reg,
++				extract_branch_condition_rtx (consumer)));
++      break;
++
++    case TYPE_STORE:
++      use_rtx = SET_SRC (PATTERN (consumer));
++      break;
++
++    case TYPE_STORE_MULTIPLE:
++      use_rtx = extract_nth_access_rtx (consumer, 0);
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
++  return reg_overlap_p (def_reg, use_rtx);
++}
++
++bool
++pn_consumed_by_e4_dep_p (rtx_insn *consumer, rtx def_reg)
++{
++  rtx use_rtx;
++
++  switch (get_attr_type (consumer))
++    {
++    case TYPE_MAC:
++      use_rtx = SET_DEST (PATTERN (consumer));
++      break;
++
++    case TYPE_DIV:
++      if (divmod_p (consumer))
++	use_rtx = SET_SRC (parallel_element (consumer, 0));
++      else
++	use_rtx = SET_SRC (PATTERN (consumer));
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
++  return reg_overlap_p (def_reg, use_rtx);
++}
++
++} // anonymous namespace
++
++/* ------------------------------------------------------------------------ */
++
++/* Guard functions for N7 core.  */
++
++bool
++nds32_n7_load_to_ii_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  if (post_update_insn_p (producer))
++    return false;
++
++  rtx def_reg = SET_DEST (PATTERN (producer));
++
++  return n7_consumed_by_ii_dep_p (consumer, def_reg);
++}
++
++bool
++nds32_n7_last_load_to_ii_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  /* If PRODUCER is a post-update LMW insn, the last micro-operation updates
++     the base register and the result is ready in II stage, so we don't need
++     to handle that case in this guard function and the corresponding bypass
++     rule.  */
++  if (post_update_insn_p (producer))
++    return false;
++
++  rtx last_def_reg = extract_nth_access_reg (producer, -1);
++
++  if (last_def_reg == NULL_RTX)
++    return false;
++
++  gcc_assert (REG_P (last_def_reg) || GET_CODE (last_def_reg) == SUBREG);
++
++  return n7_consumed_by_ii_dep_p (consumer, last_def_reg);
++}
++
++/* Guard functions for N8 core.  */
++
++bool
++nds32_n8_load_to_ii_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  if (post_update_insn_p (producer))
++    return false;
++
++  rtx def_reg = SET_DEST (PATTERN (producer));
++
++  return n8_consumed_by_addr_in_p (consumer, def_reg);
++}
++
++bool
++nds32_n8_load_bi_to_ii_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  if (!post_update_insn_p (producer))
++    return false;
++
++  rtx def_reg = SET_DEST (PATTERN (producer));
++
++  return n8_consumed_by_addr_in_p (consumer, def_reg);
++}
++
++bool
++nds32_n8_load_to_ex_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  if (post_update_insn_p (producer))
++    return false;
++
++  rtx def_reg = SET_DEST (PATTERN (producer));
++
++  return n8_consumed_by_ex_p (consumer, def_reg);
++}
++
++bool
++nds32_n8_ex_to_ii_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  rtx def_reg;
++
++  switch (get_attr_type (producer))
++    {
++    case TYPE_ALU:
++      if (movd44_insn_p (producer))
++	def_reg = extract_movd44_odd_reg (producer);
++      else
++	def_reg = SET_DEST (PATTERN (producer));
++      break;
++
++    case TYPE_MUL:
++    case TYPE_MAC:
++      def_reg = SET_DEST (PATTERN (producer));
++      break;
++
++    case TYPE_DIV:
++      if (divmod_p (producer))
++	def_reg = SET_DEST (parallel_element (producer, 1));
++      else
++	def_reg = SET_DEST (PATTERN (producer));
++      break;
++
++    case TYPE_LOAD:
++    case TYPE_STORE:
++    case TYPE_LOAD_MULTIPLE:
++    case TYPE_STORE_MULTIPLE:
++      if (!post_update_insn_p (producer))
++	return false;
++
++      def_reg = extract_base_reg (producer);
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
++  return n8_consumed_by_addr_in_p (consumer, def_reg);
++}
++
++bool
++nds32_n8_last_load_to_ii_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  /* If PRODUCER is a post-update LMW insn, the last micro-operation updates
++     the base register and the result is ready in EX stage, so we don't need
++     to handle that case in this guard function and the corresponding bypass
++     rule.  */
++  if (post_update_insn_p (producer))
++    return false;
++
++  rtx last_def_reg = extract_nth_access_reg (producer, -1);
++
++  if (last_def_reg == NULL_RTX)
++    return false;
++
++  gcc_assert (REG_P (last_def_reg) || GET_CODE (last_def_reg) == SUBREG);
++
++  return n8_consumed_by_addr_in_p (consumer, last_def_reg);
++}
++
++bool
++nds32_n8_last_load_two_to_ii_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  int index = -2;
++
++  /* If PRODUCER is a post-update insn, there is an additional one micro-
++     operation inserted in the end, so the last memory access operation should
++     be handled by this guard function and the corresponding bypass rule.  */
++  if (post_update_insn_p (producer))
++    index = -1;
++
++  rtx last_two_def_reg = extract_nth_access_reg (producer, index);
++
++  if (last_two_def_reg == NULL_RTX)
++    return false;
++
++  gcc_assert (REG_P (last_two_def_reg)
++	      || GET_CODE (last_two_def_reg) == SUBREG);
++
++  return n8_consumed_by_addr_in_p (consumer, last_two_def_reg);
++}
++
++bool
++nds32_n8_last_load_to_ex_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  /* If PRODUCER is a post-update LMW insn, the last micro-operation updates
++     the base register and the result is ready in EX stage, so we don't need
++     to handle that case in this guard function and the corresponding bypass
++     rule.  */
++  if (post_update_insn_p (producer))
++    return false;
++
++  rtx last_def_reg = extract_nth_access_reg (producer, -1);
++
++  if (last_def_reg == NULL_RTX)
++    return false;
++
++  gcc_assert (REG_P (last_def_reg) || GET_CODE (last_def_reg) == SUBREG);
++
++  return n8_consumed_by_ex_p (consumer, last_def_reg);
++}
++
++/* Guard functions for E8 cores.  */
++
++bool
++nds32_e8_load_to_ii_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  rtx def_reg = SET_DEST (PATTERN (producer));
++
++  return e8_consumed_by_addr_in_p (consumer, def_reg);
++}
++
++bool
++nds32_e8_load_to_ex_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  rtx def_reg = SET_DEST (PATTERN (producer));
++
++  return e8_consumed_by_ex_p (consumer, def_reg);
++}
++
++bool
++nds32_e8_ex_to_ii_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  rtx def_reg;
++
++  switch (get_attr_type (producer))
++    {
++    case TYPE_ALU:
++      /* No data hazards if AGEN's input is produced by MOVI or SETHI.  */
++      if (GET_CODE (PATTERN (producer)) == SET)
++	{
++	  rtx dest = SET_DEST (PATTERN (producer));
++	  rtx src = SET_SRC (PATTERN (producer));
++
++	  if ((REG_P (dest) || GET_CODE (dest) == SUBREG)
++	      && (GET_CODE (src) == CONST_INT || GET_CODE (src) == HIGH))
++	    return false;
++	}
++
++      def_reg = SET_DEST (PATTERN (producer));
++      break;
++
++    case TYPE_MUL:
++    case TYPE_MAC:
++      def_reg = SET_DEST (PATTERN (producer));
++      break;
++
++    case TYPE_DIV:
++      if (divmod_p (producer))
++	{
++	  rtx def_reg1 = SET_DEST (parallel_element (producer, 0));
++	  rtx def_reg2 = SET_DEST (parallel_element (producer, 1));
++
++	  return (e8_consumed_by_addr_in_p (consumer, def_reg1)
++		  || e8_consumed_by_addr_in_p (consumer, def_reg2));
++	}
++
++      def_reg = SET_DEST (PATTERN (producer));
++      break;
++
++    case TYPE_LOAD:
++    case TYPE_STORE:
++    case TYPE_LOAD_MULTIPLE:
++    case TYPE_STORE_MULTIPLE:
++      if (!post_update_insn_p (producer))
++	return false;
++
++      def_reg = extract_base_reg (producer);
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
++  return e8_consumed_by_addr_in_p (consumer, def_reg);
++}
++
++bool
++nds32_e8_last_load_to_ii_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  rtx last_def_reg = extract_nth_access_reg (producer, -1);
++
++  if (last_def_reg == NULL_RTX)
++    return false;
++
++  gcc_assert (REG_P (last_def_reg) || GET_CODE (last_def_reg) == SUBREG);
++
++  return e8_consumed_by_addr_in_p (consumer, last_def_reg);
++}
++
++bool
++nds32_e8_last_load_to_ex_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  rtx last_def_reg = extract_nth_access_reg (producer, -1);
++
++  if (last_def_reg == NULL_RTX)
++    return false;
++
++  gcc_assert (REG_P (last_def_reg) || GET_CODE (last_def_reg) == SUBREG);
++
++  return e8_consumed_by_ex_p (consumer, last_def_reg);
++}
++
++/* Guard functions for N9 cores.  */
++
++/* Check dependencies from MM to EX.  */
++bool
++nds32_n9_2r1w_mm_to_ex_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  rtx def_reg;
++
++  switch (get_attr_type (producer))
++    {
++    /* LD_!bi */
++    case TYPE_LOAD:
++      if (post_update_insn_p (producer))
++	return false;
++
++      def_reg = SET_DEST (PATTERN (producer));
++      break;
++
++    case TYPE_MUL:
++    case TYPE_MAC:
++      def_reg = SET_DEST (PATTERN (producer));
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
++    return n9_2r1w_consumed_by_ex_dep_p (consumer, def_reg);
++}
++
++/* Check dependencies from MM to EX.  */
++bool
++nds32_n9_3r2w_mm_to_ex_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  rtx def_reg;
++
++  switch (get_attr_type (producer))
++    {
++    case TYPE_LOAD:
++    case TYPE_MUL:
++    case TYPE_MAC:
++      def_reg = SET_DEST (PATTERN (producer));
++      break;
++
++   /* Some special instructions, divmodsi4 and udivmodsi4, produce two
++      results, the quotient and the remainder.  We have to handle them
++      individually.  */
++    case TYPE_DIV:
++      if (divmod_p (producer))
++	{
++	  rtx def_reg1 = SET_DEST (parallel_element (producer, 0));
++	  rtx def_reg2 = SET_DEST (parallel_element (producer, 1));
++
++	  return (n9_3r2w_consumed_by_ex_dep_p (consumer, def_reg1)
++		  || n9_3r2w_consumed_by_ex_dep_p (consumer, def_reg2));
++	}
++
++      def_reg = SET_DEST (PATTERN (producer));
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
++    return n9_3r2w_consumed_by_ex_dep_p (consumer, def_reg);
++}
++
++/* Check dependencies from LMW(N, N) to EX.  */
++bool
++nds32_n9_last_load_to_ex_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  rtx last_def_reg = extract_nth_access_reg (producer, -1);
++
++  if (nds32_register_ports_config == REG_PORT_2R1W)
++    {
++      /* The base-update micro operation occupies the last cycle.  */
++      if (post_update_insn_p (producer))
++	return false;
++
++      /* When the base register is in the list of a load multiple insn and the
++	 access order of the base register is not the last one, we need an
++	 additional micro operation to commit the load result to the base
++	 register -- we can treat the base register as the last defined
++	 register.  */
++      size_t i;
++      size_t n_elems = parallel_elements (producer);
++      rtx base_reg = extract_base_reg (producer);
++
++      for (i = 0; i < n_elems; ++i)
++	{
++	  rtx load_rtx = extract_nth_access_rtx (producer, i);
++	  rtx list_element = SET_DEST (load_rtx);
++
++	  if (rtx_equal_p (base_reg, list_element) && i != n_elems - 1)
++	    {
++	      last_def_reg = base_reg;
++	      break;
++	    }
++	}
++
++      return n9_2r1w_consumed_by_ex_dep_p (consumer, last_def_reg);
++    }
++  else
++    return n9_3r2w_consumed_by_ex_dep_p (consumer, last_def_reg);
++}
++
++/* Guard functions for N10 cores.  */
++
++/* Check dependencies from EX to EX (ADDR_OUT -> ADDR_IN).  */
++bool
++nds32_n10_ex_to_ex_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  gcc_assert (get_attr_type (producer) == TYPE_FLOAD
++	      || get_attr_type (producer) == TYPE_FSTORE);
++  gcc_assert (get_attr_type (consumer) == TYPE_FLOAD
++	      || get_attr_type (consumer) == TYPE_FSTORE);
++
++  if (!post_update_insn_p (producer))
++    return false;
++
++  return reg_overlap_p (extract_base_reg (producer),
++			extract_mem_rtx (consumer));
++}
++
++/* Check dependencies from MM to EX.  */
++bool
++nds32_n10_mm_to_ex_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  rtx def_reg;
++
++  switch (get_attr_type (producer))
++    {
++    case TYPE_LOAD:
++    case TYPE_MUL:
++    case TYPE_MAC:
++    case TYPE_DALU64:
++    case TYPE_DMUL:
++    case TYPE_DMAC:
++    case TYPE_DALUROUND:
++    case TYPE_DBPICK:
++    case TYPE_DWEXT:
++      def_reg = SET_DEST (PATTERN (producer));
++      break;
++
++   /* Some special instructions, divmodsi4 and udivmodsi4, produce two
++      results, the quotient and the remainder.  We have to handle them
++      individually.  */
++    case TYPE_DIV:
++      if (divmod_p (producer))
++	{
++	  rtx def_reg1 = SET_DEST (parallel_element (producer, 0));
++	  rtx def_reg2 = SET_DEST (parallel_element (producer, 1));
++
++	  return (n10_consumed_by_ex_dep_p (consumer, def_reg1)
++		  || n10_consumed_by_ex_dep_p (consumer, def_reg2));
++	}
++
++      def_reg = SET_DEST (PATTERN (producer));
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
++    return n10_consumed_by_ex_dep_p (consumer, def_reg);
++}
++
++/* Check dependencies from LMW(N, N) to EX.  */
++bool
++nds32_n10_last_load_to_ex_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  rtx last_def_reg = extract_nth_access_reg (producer, -1);
++
++  return n10_consumed_by_ex_dep_p (consumer, last_def_reg);
++}
++
++/* Guard functions for Graywolf cores.  */
++
++/* Check dependencies from EX to EX (ADDR_OUT -> ADDR_IN).  */
++bool
++nds32_gw_ex_to_ex_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  return nds32_n10_ex_to_ex_p (producer, consumer);
++}
++
++/* Check dependencies from MM to EX.  */
++bool
++nds32_gw_mm_to_ex_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  rtx def_reg;
++
++  switch (get_attr_type (producer))
++    {
++    case TYPE_LOAD:
++    case TYPE_MUL:
++    case TYPE_MAC:
++    case TYPE_DALU64:
++    case TYPE_DMUL:
++    case TYPE_DMAC:
++    case TYPE_DALUROUND:
++    case TYPE_DBPICK:
++    case TYPE_DWEXT:
++      def_reg = SET_DEST (PATTERN (producer));
++      break;
++
++   /* Some special instructions, divmodsi4 and udivmodsi4, produce two
++      results, the quotient and the remainder.  We have to handle them
++      individually.  */
++    case TYPE_DIV:
++      if (divmod_p (producer))
++	{
++	  rtx def_reg1 = SET_DEST (parallel_element (producer, 0));
++	  rtx def_reg2 = SET_DEST (parallel_element (producer, 1));
++
++	  return (gw_consumed_by_ex_dep_p (consumer, def_reg1)
++		  || gw_consumed_by_ex_dep_p (consumer, def_reg2));
++	}
++
++      def_reg = SET_DEST (PATTERN (producer));
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
++    return gw_consumed_by_ex_dep_p (consumer, def_reg);
++}
++
++/* Check dependencies from LMW(N, N) to EX.  */
++bool
++nds32_gw_last_load_to_ex_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  rtx last_def_reg = extract_nth_access_reg (producer, -1);
++
++  return gw_consumed_by_ex_dep_p (consumer, last_def_reg);
++}
++
++/* Guard functions for N12/N13 cores.  */
++
++/* Check dependencies from E2 to E1.  */
++bool
++nds32_n13_e2_to_e1_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  rtx def_reg;
++
++  switch (get_attr_type (producer))
++    {
++    /* Only post-update load/store instructions are considered.  These
++       instructions produces address output at E2.  */
++    case TYPE_LOAD:
++    case TYPE_STORE:
++    case TYPE_LOAD_MULTIPLE:
++    case TYPE_STORE_MULTIPLE:
++      if (!post_update_insn_p (producer))
++	return false;
++
++      def_reg = extract_base_reg (producer);
++      break;
++
++    case TYPE_ALU:
++    case TYPE_ALU_SHIFT:
++    case TYPE_PBSAD:
++    case TYPE_PBSADA:
++    case TYPE_MUL:
++    case TYPE_MAC:
++      def_reg = SET_DEST (PATTERN (producer));
++      break;
++
++    case TYPE_BRANCH:
++      return true;
++
++    case TYPE_DIV:
++      /* Some special instructions, divmodsi4 and udivmodsi4, produce two
++	 results, the quotient and the remainder.  We have to handle them
++	 individually.  */
++      if (divmod_p (producer))
++	{
++	  rtx def_reg1 = SET_DEST (parallel_element (producer, 0));
++	  rtx def_reg2 = SET_DEST (parallel_element (producer, 1));
++
++	  return (n13_consumed_by_e1_dep_p (consumer, def_reg1)
++		  || n13_consumed_by_e1_dep_p (consumer, def_reg2));
++	}
++
++      def_reg = SET_DEST (PATTERN (producer));
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
++  return n13_consumed_by_e1_dep_p (consumer, def_reg);
++}
++
++/* Check dependencies from Load-Store Unit (E3) to E1.  */
++bool
++nds32_n13_load_to_e1_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  rtx def_reg = SET_DEST (PATTERN (producer));
++
++  gcc_assert (get_attr_type (producer) == TYPE_LOAD);
++  gcc_assert (REG_P (def_reg) || GET_CODE (def_reg) == SUBREG);
++
++  return n13_consumed_by_e1_dep_p (consumer, def_reg);
++}
++
++/* Check dependencies from Load-Store Unit (E3) to E2.  */
++bool
++nds32_n13_load_to_e2_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  rtx def_reg = SET_DEST (PATTERN (producer));
++
++  gcc_assert (get_attr_type (producer) == TYPE_LOAD);
++  gcc_assert (REG_P (def_reg) || GET_CODE (def_reg) == SUBREG);
++
++  return n13_consumed_by_e2_dep_p (consumer, def_reg);
++}
++
++/* Check dependencies from LMW(N, N) to E1.  */
++bool
++nds32_n13_last_load_to_e1_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  rtx last_def_reg = extract_nth_access_reg (producer, -1);
++
++  return n13_consumed_by_e1_dep_p (consumer, last_def_reg);
++}
++
++/* Check dependencies from LMW(N, N) to E2.  */
++bool
++nds32_n13_last_load_to_e2_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  rtx last_def_reg = extract_nth_access_reg (producer, -1);
++
++  return n13_consumed_by_e2_dep_p (consumer, last_def_reg);
++}
++
++/* Check dependencies from LMW(N, N-1) to E2.  */
++bool
++nds32_n13_last_two_load_to_e1_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  rtx last_two_def_reg = extract_nth_access_reg (producer, -2);
++
++  if (last_two_def_reg == NULL_RTX)
++    return false;
++
++  return n13_consumed_by_e1_dep_p (consumer, last_two_def_reg);
++}
++
++/* Guard functions for Panther cores.  */
++
++/* Check dependencies from E2 to E1.  */
++bool
++nds32_pn_e2_to_e1_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  rtx def_reg;
++
++  switch (get_attr_type (producer))
++    {
++    case TYPE_ALU:
++      gcc_assert (get_attr_subtype (producer) == SUBTYPE_SHIFT);
++      def_reg = SET_DEST (PATTERN (producer));
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
++  return pn_consumed_by_e1_dep_p (consumer, def_reg);
++}
++
++/* Check dependencies from E3 to E1.  */
++bool
++nds32_pn_e3_to_e1_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  rtx def_reg;
++
++  switch (get_attr_type (producer))
++    {
++    case TYPE_ALU:
++      def_reg = SET_DEST (PATTERN (producer));
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
++  return pn_consumed_by_e1_dep_p (consumer, def_reg);
++}
++
++/* Check dependencies from E3 to E2.  */
++bool
++nds32_pn_e3_to_e2_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  rtx def_reg;
++
++  switch (get_attr_type (producer))
++    {
++    case TYPE_ALU:
++      def_reg = SET_DEST (PATTERN (producer));
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
++  return pn_consumed_by_e2_dep_p (consumer, def_reg);
++}
++
++/* Check dependencies from E4 to E1.  */
++bool
++nds32_pn_e4_to_e1_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  rtx def_reg;
++
++  switch (get_attr_type (producer))
++    {
++    case TYPE_MUL:
++    case TYPE_MAC:
++      def_reg = SET_DEST (PATTERN (producer));
++      break;
++
++    case TYPE_DIV:
++      if (divmod_p (producer))
++	{
++	  rtx def_reg1 = SET_DEST (parallel_element (producer, 0));
++	  rtx def_reg2 = SET_DEST (parallel_element (producer, 1));
++
++	  return (pn_consumed_by_e1_dep_p (consumer, def_reg1)
++		  || pn_consumed_by_e1_dep_p (consumer, def_reg2));
++	}
++
++      def_reg = SET_DEST (PATTERN (producer));
++      break;
++
++    case TYPE_LOAD:
++      if (post_update_insn_p (producer)
++	  && pn_consumed_by_e1_dep_p (consumer, extract_base_reg (producer)))
++	return true;
++
++      if (!load_full_word_p (producer))
++	return false;
++
++      def_reg = SET_DEST (PATTERN (producer));
++      break;
++
++    case TYPE_STORE:
++    case TYPE_LOAD_MULTIPLE:
++    case TYPE_STORE_MULTIPLE:
++      if (!post_update_insn_p (producer))
++	return false;
++
++      def_reg = extract_base_reg (producer);
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
++  return pn_consumed_by_e1_dep_p (consumer, def_reg);
++}
++
++/* Check dependencies from E4 to E2.  */
++bool
++nds32_pn_e4_to_e2_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  rtx def_reg;
++
++  switch (get_attr_type (producer))
++    {
++    case TYPE_MUL:
++    case TYPE_MAC:
++      def_reg = SET_DEST (PATTERN (producer));
++      break;
++
++    case TYPE_DIV:
++      if (divmod_p (producer))
++	{
++	  rtx def_reg1 = SET_DEST (parallel_element (producer, 0));
++	  rtx def_reg2 = SET_DEST (parallel_element (producer, 1));
++
++	  return (pn_consumed_by_e2_dep_p (consumer, def_reg1)
++		  || pn_consumed_by_e2_dep_p (consumer, def_reg2));
++	}
++
++      def_reg = SET_DEST (PATTERN (producer));
++      break;
++
++    case TYPE_LOAD:
++      if (post_update_insn_p (producer)
++	  && pn_consumed_by_e2_dep_p (consumer, extract_base_reg (producer)))
++	return true;
++
++      if (!load_full_word_p (producer))
++	return false;
++
++      def_reg = SET_DEST (PATTERN (producer));
++      break;
++
++    case TYPE_STORE:
++    case TYPE_LOAD_MULTIPLE:
++    case TYPE_STORE_MULTIPLE:
++      if (!post_update_insn_p (producer))
++	return false;
++
++      def_reg = extract_base_reg (producer);
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
++  return pn_consumed_by_e2_dep_p (consumer, def_reg);
++}
++
++/* Check dependencies from E4 to E3.  */
++bool
++nds32_pn_e4_to_e3_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  rtx def_reg;
++
++  switch (get_attr_type (producer))
++    {
++    case TYPE_MUL:
++    case TYPE_MAC:
++      def_reg = SET_DEST (PATTERN (producer));
++      break;
++
++    case TYPE_DIV:
++      if (divmod_p (producer))
++	{
++	  rtx def_reg1 = SET_DEST (parallel_element (producer, 0));
++	  rtx def_reg2 = SET_DEST (parallel_element (producer, 1));
++
++	  return (pn_consumed_by_e3_dep_p (consumer, def_reg1)
++		  || pn_consumed_by_e3_dep_p (consumer, def_reg2));
++	}
++
++      def_reg = SET_DEST (PATTERN (producer));
++      break;
++
++    case TYPE_LOAD:
++      if (post_update_insn_p (producer)
++	  && pn_consumed_by_e3_dep_p (consumer, extract_base_reg (producer)))
++	return true;
++
++      if (load_partial_word_p (producer))
++	return false;
++
++      def_reg = SET_DEST (PATTERN (producer));
++      break;
++
++    case TYPE_STORE:
++    case TYPE_LOAD_MULTIPLE:
++    case TYPE_STORE_MULTIPLE:
++      if (!post_update_insn_p (producer))
++	return false;
++
++      def_reg = extract_base_reg (producer);
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
++  return pn_consumed_by_e3_dep_p (consumer, def_reg);
++}
++
++/* Check dependencies from WB to E1.  */
++bool
++nds32_pn_wb_to_e1_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  rtx def_reg;
++
++  switch (get_attr_type (producer))
++    {
++    case TYPE_LOAD:
++      if (!load_partial_word_p (producer))
++	return false;
++
++      def_reg = SET_DEST (PATTERN (producer));
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
++  return pn_consumed_by_e1_dep_p (consumer, def_reg);
++}
++
++/* Check dependencies from WB to E2.  */
++bool
++nds32_pn_wb_to_e2_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  rtx def_reg;
++
++  switch (get_attr_type (producer))
++    {
++    case TYPE_LOAD:
++      if (!load_partial_word_p (producer))
++	return false;
++
++      def_reg = SET_DEST (PATTERN (producer));
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
++  return pn_consumed_by_e2_dep_p (consumer, def_reg);
++}
++
++/* Check dependencies from WB to E3.  */
++bool
++nds32_pn_wb_to_e3_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  rtx def_reg;
++
++  switch (get_attr_type (producer))
++    {
++    case TYPE_LOAD:
++      if (!load_partial_word_p (producer))
++	return false;
++
++      def_reg = SET_DEST (PATTERN (producer));
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
++  return pn_consumed_by_e3_dep_p (consumer, def_reg);
++}
++
++/* Check dependencies from WB to E4.  */
++bool
++nds32_pn_wb_to_e4_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  rtx def_reg;
++
++  switch (get_attr_type (producer))
++    {
++    case TYPE_LOAD:
++      if (!load_partial_word_p (producer))
++	return false;
++
++      def_reg = SET_DEST (PATTERN (producer));
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
++  return pn_consumed_by_e4_dep_p (consumer, def_reg);
++}
++
++/* Check dependencies from LMW(N, N) to E1.  */
++bool
++nds32_pn_last_load_to_e1_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  rtx last_def_reg = extract_nth_access_reg (producer, -1);
++
++  return pn_consumed_by_e1_dep_p (consumer, last_def_reg);
++}
++
++/* Check dependencies from LMW(N, N) to E2.  */
++bool
++nds32_pn_last_load_to_e2_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  rtx last_def_reg = extract_nth_access_reg (producer, -1);
++
++  return pn_consumed_by_e2_dep_p (consumer, last_def_reg);
++}
++
++/* Check dependencies from LMW(N, N) to E3.  */
++bool
++nds32_pn_last_load_to_e3_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  rtx last_def_reg = extract_nth_access_reg (producer, -1);
++
++  return pn_consumed_by_e3_dep_p (consumer, last_def_reg);
++}
++
++/* Check dependencies from LMW(N, N - 1) to E1.  */
++bool
++nds32_pn_last_two_load_to_e1_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  rtx last_two_def_reg = extract_nth_access_reg (producer, -2);
++
++  if (last_two_def_reg == NULL_RTX)
++    return false;
++
++  return pn_consumed_by_e1_dep_p (consumer, last_two_def_reg);
++}
++
++/* Check dependencies from LMW(N, N - 1) to E2.  */
++bool
++nds32_pn_last_two_load_to_e2_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  rtx last_two_def_reg = extract_nth_access_reg (producer, -2);
++
++  if (last_two_def_reg == NULL_RTX)
++    return false;
++
++  return pn_consumed_by_e2_dep_p (consumer, last_two_def_reg);
++}
++
++/* Check dependencies from LMW(N, N - 2) to E1.  */
++bool
++nds32_pn_last_three_load_to_e1_p (rtx_insn *producer, rtx_insn *consumer)
++{
++  rtx last_three_def_reg = extract_nth_access_reg (producer, -3);
++
++  if (last_three_def_reg == NULL_RTX)
++    return false;
++
++  return pn_consumed_by_e1_dep_p (consumer, last_three_def_reg);
++}
+ 
+ /* ------------------------------------------------------------------------ */
+diff --git a/gcc/config/nds32/nds32-predicates.c b/gcc/config/nds32/nds32-predicates.c
+index 361d001..b45d3e6 100644
+--- a/gcc/config/nds32/nds32-predicates.c
++++ b/gcc/config/nds32/nds32-predicates.c
+@@ -24,14 +24,41 @@
+ #include "system.h"
+ #include "coretypes.h"
+ #include "backend.h"
+-#include "target.h"
+-#include "rtl.h"
+ #include "tree.h"
+-#include "tm_p.h"
+-#include "optabs.h"		/* For GEN_FCN.  */
++#include "rtl.h"
++#include "df.h"
++#include "alias.h"
++#include "stor-layout.h"
++#include "varasm.h"
++#include "calls.h"
++#include "regs.h"
++#include "insn-config.h"	/* Required by recog.h.  */
++#include "conditions.h"
++#include "output.h"
++#include "insn-attr.h"		/* For DFA state_t.  */
++#include "insn-codes.h"		/* For CODE_FOR_xxx.  */
++#include "reload.h"		/* For push_reload().  */
++#include "flags.h"
++#include "insn-config.h"
++#include "expmed.h"
++#include "dojump.h"
++#include "explow.h"
+ #include "emit-rtl.h"
++#include "stmt.h"
++#include "expr.h"
+ #include "recog.h"
++#include "diagnostic-core.h"
++#include "cfgrtl.h"
++#include "cfganal.h"
++#include "lcm.h"
++#include "cfgbuild.h"
++#include "cfgcleanup.h"
++#include "tm_p.h"
+ #include "tm-constrs.h"
++#include "optabs.h"		/* For GEN_FCN.  */
++#include "target.h"
++#include "langhooks.h"		/* For add_builtin_function().  */
++#include "builtins.h"
+ 
+ /* ------------------------------------------------------------------------ */
+ 
+@@ -98,21 +125,33 @@ nds32_consecutive_registers_load_store_p (rtx op,
+    We have to extract reg and mem of every element and
+    check if the information is valid for multiple load/store operation.  */
+ bool
+-nds32_valid_multiple_load_store (rtx op, bool load_p)
++nds32_valid_multiple_load_store_p (rtx op, bool load_p, bool bim_p)
+ {
+   int count;
+   int first_elt_regno;
++  int update_base_elt_idx;
++  int offset;
+   rtx elt;
++  rtx update_base;
+ 
+-  /* Get the counts of elements in the parallel rtx.  */
+-  count = XVECLEN (op, 0);
+-  /* Pick up the first element.  */
+-  elt = XVECEXP (op, 0, 0);
++  /* Get the counts of elements in the parallel rtx.
++     Last one is update base register if bim_p.
++     and pick up the first element.  */
++  if (bim_p)
++    {
++      count = XVECLEN (op, 0) - 1;
++      elt = XVECEXP (op, 0, 1);
++    }
++  else
++    {
++      count = XVECLEN (op, 0);
++      elt = XVECEXP (op, 0, 0);
++    }
+ 
+   /* Perform some quick check for the first element in the parallel rtx.  */
+   if (GET_CODE (elt) != SET
+       || count <= 1
+-      || count > 8)
++      || count > 25)
+     return false;
+ 
+   /* Pick up regno of first element for further detail checking.
+@@ -138,11 +177,29 @@ nds32_valid_multiple_load_store (rtx op, bool load_p)
+      Refer to nds32-multiple.md for more information
+      about following checking.
+      The starting element of parallel rtx is index 0.  */
+-  if (!nds32_consecutive_registers_load_store_p (op, load_p, 0,
++  if (!nds32_consecutive_registers_load_store_p (op, load_p, bim_p ? 1 : 0,
+ 						 first_elt_regno,
+ 						 count))
+     return false;
+ 
++  if (bim_p)
++    {
++      update_base_elt_idx = 0;
++      update_base = XVECEXP (op, 0, update_base_elt_idx);
++      if (!REG_P (SET_DEST (update_base)))
++	return false;
++      if (GET_CODE (SET_SRC (update_base)) != PLUS)
++	return false;
++      else
++	{
++	  offset = count * UNITS_PER_WORD;
++	  elt = XEXP (SET_SRC (update_base), 1);
++	  if (GET_CODE (elt) != CONST_INT
++	      || (INTVAL (elt) != offset))
++	    return false;
++	}
++    }
++
+   /* Pass all test, this is a valid rtx.  */
+   return true;
+ }
+@@ -174,47 +231,47 @@ nds32_valid_stack_push_pop_p (rtx op, bool push_p)
+     {
+       elt = XVECEXP (op, 0, index);
+       if (GET_CODE (elt) != SET)
+-        return false;
++	return false;
+     }
+ 
+   /* For push operation, the parallel rtx looks like:
+      (parallel [(set (mem (plus (reg:SI SP_REGNUM) (const_int -32)))
+-                     (reg:SI Rb))
+-                (set (mem (plus (reg:SI SP_REGNUM) (const_int -28)))
+-                     (reg:SI Rb+1))
+-                ...
+-                (set (mem (plus (reg:SI SP_REGNUM) (const_int -16)))
+-                     (reg:SI Re))
+-                (set (mem (plus (reg:SI SP_REGNUM) (const_int -12)))
+-                     (reg:SI FP_REGNUM))
+-                (set (mem (plus (reg:SI SP_REGNUM) (const_int -8)))
+-                     (reg:SI GP_REGNUM))
+-                (set (mem (plus (reg:SI SP_REGNUM) (const_int -4)))
+-                     (reg:SI LP_REGNUM))
+-                (set (reg:SI SP_REGNUM)
+-                     (plus (reg:SI SP_REGNUM) (const_int -32)))])
++		     (reg:SI Rb))
++		(set (mem (plus (reg:SI SP_REGNUM) (const_int -28)))
++		     (reg:SI Rb+1))
++		...
++		(set (mem (plus (reg:SI SP_REGNUM) (const_int -16)))
++		     (reg:SI Re))
++		(set (mem (plus (reg:SI SP_REGNUM) (const_int -12)))
++		     (reg:SI FP_REGNUM))
++		(set (mem (plus (reg:SI SP_REGNUM) (const_int -8)))
++		     (reg:SI GP_REGNUM))
++		(set (mem (plus (reg:SI SP_REGNUM) (const_int -4)))
++		     (reg:SI LP_REGNUM))
++		(set (reg:SI SP_REGNUM)
++		     (plus (reg:SI SP_REGNUM) (const_int -32)))])
+ 
+      For pop operation, the parallel rtx looks like:
+      (parallel [(set (reg:SI Rb)
+-                     (mem (reg:SI SP_REGNUM)))
+-                (set (reg:SI Rb+1)
+-                     (mem (plus (reg:SI SP_REGNUM) (const_int 4))))
+-                ...
+-                (set (reg:SI Re)
+-                     (mem (plus (reg:SI SP_REGNUM) (const_int 16))))
+-                (set (reg:SI FP_REGNUM)
+-                     (mem (plus (reg:SI SP_REGNUM) (const_int 20))))
+-                (set (reg:SI GP_REGNUM)
+-                     (mem (plus (reg:SI SP_REGNUM) (const_int 24))))
+-                (set (reg:SI LP_REGNUM)
+-                     (mem (plus (reg:SI SP_REGNUM) (const_int 28))))
+-                (set (reg:SI SP_REGNUM)
+-                     (plus (reg:SI SP_REGNUM) (const_int 32)))]) */
++		     (mem (reg:SI SP_REGNUM)))
++		(set (reg:SI Rb+1)
++		     (mem (plus (reg:SI SP_REGNUM) (const_int 4))))
++		...
++		(set (reg:SI Re)
++		     (mem (plus (reg:SI SP_REGNUM) (const_int 16))))
++		(set (reg:SI FP_REGNUM)
++		     (mem (plus (reg:SI SP_REGNUM) (const_int 20))))
++		(set (reg:SI GP_REGNUM)
++		     (mem (plus (reg:SI SP_REGNUM) (const_int 24))))
++		(set (reg:SI LP_REGNUM)
++		     (mem (plus (reg:SI SP_REGNUM) (const_int 28))))
++		(set (reg:SI SP_REGNUM)
++		     (plus (reg:SI SP_REGNUM) (const_int 32)))]) */
+ 
+   /* 1. Consecutive registers push/pop operations.
+-        We need to calculate how many registers should be consecutive.
+-        The $sp adjustment rtx, $fp push rtx, $gp push rtx,
+-        and $lp push rtx are excluded.  */
++	We need to calculate how many registers should be consecutive.
++	The $sp adjustment rtx, $fp push rtx, $gp push rtx,
++	and $lp push rtx are excluded.  */
+ 
+   /* Detect whether we have $fp, $gp, or $lp in the parallel rtx.  */
+   save_fp = reg_mentioned_p (gen_rtx_REG (SImode, FP_REGNUM), op);
+@@ -238,19 +295,19 @@ nds32_valid_stack_push_pop_p (rtx op, bool push_p)
+       first_regno = REGNO (elt_reg);
+ 
+       /* The 'push' operation is a kind of store operation.
+-         The 'pop' operation is a kind of load operation.
+-         Pass corresponding false/true as second argument (bool load_p).
+-         The par_index is supposed to start with index 0.  */
++	 The 'pop' operation is a kind of load operation.
++	 Pass corresponding false/true as second argument (bool load_p).
++	 The par_index is supposed to start with index 0.  */
+       if (!nds32_consecutive_registers_load_store_p (op,
+ 						     !push_p ? true : false,
+ 						     0,
+ 						     first_regno,
+ 						     rest_count))
+-        return false;
++	return false;
+     }
+ 
+   /* 2. Valid $fp/$gp/$lp push/pop operations.
+-        Remember to set start index for checking them.  */
++	Remember to set start index for checking them.  */
+ 
+   /* The rest_count is the start index for checking $fp/$gp/$lp.  */
+   index = rest_count;
+@@ -269,9 +326,9 @@ nds32_valid_stack_push_pop_p (rtx op, bool push_p)
+       index++;
+ 
+       if (GET_CODE (elt_mem) != MEM
+-          || GET_CODE (elt_reg) != REG
+-          || REGNO (elt_reg) != FP_REGNUM)
+-        return false;
++	  || GET_CODE (elt_reg) != REG
++	  || REGNO (elt_reg) != FP_REGNUM)
++	return false;
+     }
+   if (save_gp)
+     {
+@@ -281,9 +338,9 @@ nds32_valid_stack_push_pop_p (rtx op, bool push_p)
+       index++;
+ 
+       if (GET_CODE (elt_mem) != MEM
+-          || GET_CODE (elt_reg) != REG
+-          || REGNO (elt_reg) != GP_REGNUM)
+-        return false;
++	  || GET_CODE (elt_reg) != REG
++	  || REGNO (elt_reg) != GP_REGNUM)
++	return false;
+     }
+   if (save_lp)
+     {
+@@ -293,16 +350,16 @@ nds32_valid_stack_push_pop_p (rtx op, bool push_p)
+       index++;
+ 
+       if (GET_CODE (elt_mem) != MEM
+-          || GET_CODE (elt_reg) != REG
+-          || REGNO (elt_reg) != LP_REGNUM)
+-        return false;
++	  || GET_CODE (elt_reg) != REG
++	  || REGNO (elt_reg) != LP_REGNUM)
++	return false;
+     }
+ 
+   /* 3. The last element must be stack adjustment rtx.
+-        Its form of rtx should be:
+-          (set (reg:SI SP_REGNUM)
+-               (plus (reg:SI SP_REGNUM) (const_int X)))
+-        The X could be positive or negative value.  */
++	Its form of rtx should be:
++	  (set (reg:SI SP_REGNUM)
++	       (plus (reg:SI SP_REGNUM) (const_int X)))
++	The X could be positive or negative value.  */
+ 
+   /* Pick up the last element.  */
+   elt = XVECEXP (op, 0, total_count - 1);
+@@ -322,54 +379,57 @@ nds32_valid_stack_push_pop_p (rtx op, bool push_p)
+ }
+ 
+ /* Function to check if 'bclr' instruction can be used with IVAL.  */
+-int
+-nds32_can_use_bclr_p (int ival)
++bool
++nds32_can_use_bclr_p (HOST_WIDE_INT ival)
+ {
+   int one_bit_count;
++  unsigned HOST_WIDE_INT mask = GET_MODE_MASK (SImode);
+ 
+   /* Calculate the number of 1-bit of (~ival), if there is only one 1-bit,
+      it means the original ival has only one 0-bit,
+      So it is ok to perform 'bclr' operation.  */
+ 
+-  one_bit_count = popcount_hwi ((unsigned HOST_WIDE_INT) (~ival));
++  one_bit_count = popcount_hwi ((unsigned HOST_WIDE_INT) (~ival) & mask);
+ 
+   /* 'bclr' is a performance extension instruction.  */
+-  return (TARGET_PERF_EXT && (one_bit_count == 1));
++  return (TARGET_EXT_PERF && (one_bit_count == 1));
+ }
+ 
+ /* Function to check if 'bset' instruction can be used with IVAL.  */
+-int
+-nds32_can_use_bset_p (int ival)
++bool
++nds32_can_use_bset_p (HOST_WIDE_INT ival)
+ {
+   int one_bit_count;
++  unsigned HOST_WIDE_INT mask = GET_MODE_MASK (SImode);
+ 
+   /* Caculate the number of 1-bit of ival, if there is only one 1-bit,
+      it is ok to perform 'bset' operation.  */
+ 
+-  one_bit_count = popcount_hwi ((unsigned HOST_WIDE_INT) (ival));
++  one_bit_count = popcount_hwi ((unsigned HOST_WIDE_INT) (ival) & mask);
+ 
+   /* 'bset' is a performance extension instruction.  */
+-  return (TARGET_PERF_EXT && (one_bit_count == 1));
++  return (TARGET_EXT_PERF && (one_bit_count == 1));
+ }
+ 
+ /* Function to check if 'btgl' instruction can be used with IVAL.  */
+-int
+-nds32_can_use_btgl_p (int ival)
++bool
++nds32_can_use_btgl_p (HOST_WIDE_INT ival)
+ {
+   int one_bit_count;
++  unsigned HOST_WIDE_INT mask = GET_MODE_MASK (SImode);
+ 
+   /* Caculate the number of 1-bit of ival, if there is only one 1-bit,
+      it is ok to perform 'btgl' operation.  */
+ 
+-  one_bit_count = popcount_hwi ((unsigned HOST_WIDE_INT) (ival));
++  one_bit_count = popcount_hwi ((unsigned HOST_WIDE_INT) (ival) & mask);
+ 
+   /* 'btgl' is a performance extension instruction.  */
+-  return (TARGET_PERF_EXT && (one_bit_count == 1));
++  return (TARGET_EXT_PERF && (one_bit_count == 1));
+ }
+ 
+ /* Function to check if 'bitci' instruction can be used with IVAL.  */
+-int
+-nds32_can_use_bitci_p (int ival)
++bool
++nds32_can_use_bitci_p (HOST_WIDE_INT ival)
+ {
+   /* If we are using V3 ISA, we have 'bitci' instruction.
+      Try to see if we can present 'andi' semantic with
+@@ -381,4 +441,286 @@ nds32_can_use_bitci_p (int ival)
+ 	  && satisfies_constraint_Iu15 (gen_int_mode (~ival, SImode)));
+ }
+ 
++/* Return true if is load/store with SYMBOL_REF addressing mode
++   and memory mode is SImode.  */
++bool
++nds32_symbol_load_store_p (rtx_insn *insn)
++{
++  rtx mem_src = NULL_RTX;
++
++  switch (get_attr_type (insn))
++    {
++    case TYPE_LOAD:
++      mem_src = SET_SRC (PATTERN (insn));
++      break;
++    case TYPE_STORE:
++      mem_src = SET_DEST (PATTERN (insn));
++      break;
++    default:
++      break;
++    }
++
++  /* Find load/store insn with addressing mode is SYMBOL_REF.  */
++  if (mem_src != NULL_RTX)
++    {
++      if ((GET_CODE (mem_src) == ZERO_EXTEND)
++	  || (GET_CODE (mem_src) == SIGN_EXTEND))
++	mem_src = XEXP (mem_src, 0);
++
++      if ((GET_CODE (XEXP (mem_src, 0)) == SYMBOL_REF)
++	   || (GET_CODE (XEXP (mem_src, 0)) == LO_SUM))
++	return true;
++    }
++
++  return false;
++}
++
++/* Vaild memory operand for floating-point loads and stores */
++bool
++nds32_float_mem_operand_p (rtx op)
++{
++  enum machine_mode mode = GET_MODE (op);
++  rtx addr = XEXP (op, 0);
++
++  /* Not support [symbol] [const] memory */
++  if (GET_CODE (addr) == SYMBOL_REF
++      || GET_CODE (addr) == CONST
++      || GET_CODE (addr) == LO_SUM)
++    return false;
++
++  if (GET_CODE (addr) == PLUS)
++    {
++      if (GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
++	return false;
++
++      /* Restrict const range: (imm12s << 2) */
++      if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
++	{
++	  if ((mode == SImode || mode == SFmode)
++	      && NDS32_SINGLE_WORD_ALIGN_P (INTVAL (XEXP (addr, 1)))
++	      && !satisfies_constraint_Is14 ( XEXP(addr, 1)))
++	    return false;
++
++	  if ((mode == DImode || mode == DFmode)
++	      && NDS32_DOUBLE_WORD_ALIGN_P (INTVAL (XEXP (addr, 1)))
++	      && !satisfies_constraint_Is14 (XEXP (addr, 1)))
++	    return false;
++	}
++    }
++
++  return true;
++}
++
++int
++nds32_cond_move_p (rtx cmp_rtx)
++{
++  enum machine_mode cmp0_mode = GET_MODE (XEXP (cmp_rtx, 0));
++  enum machine_mode cmp1_mode = GET_MODE (XEXP (cmp_rtx, 1));
++  enum rtx_code cond = GET_CODE (cmp_rtx);
++
++  if ((cmp0_mode == DFmode || cmp0_mode == SFmode)
++      && (cmp1_mode == DFmode || cmp1_mode == SFmode)
++      && (cond == ORDERED || cond == UNORDERED))
++    return true;
++  return false;
++}
++
++/* Return true if the addresses in mem1 and mem2 are suitable for use in
++   an fldi or fsdi instruction.
++
++   This can only happen when addr1 and addr2, the addresses in mem1
++   and mem2, are consecutive memory locations (addr1 + 4 == addr2).
++   addr1 must also be aligned on a 64-bit boundary.  */
++bool
++nds32_memory_merge_peep_p (rtx mem1, rtx mem2)
++{
++  rtx addr1, addr2;
++  unsigned int reg1;
++  HOST_WIDE_INT offset1;
++
++  /* The mems cannot be volatile.  */
++  if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
++    return false;
++
++  /* MEM1 should be aligned on a 64-bit boundary.  */
++  if (MEM_ALIGN (mem1) < 64)
++    return false;
++
++  addr1 = XEXP (mem1, 0);
++  addr2 = XEXP (mem2, 0);
++
++  /* Extract a register number and offset (if used) from the first addr.  */
++  if (GET_CODE (addr1) == PLUS)
++    {
++      if (GET_CODE (XEXP (addr1, 0)) != REG)
++	return false;
++      else
++	{
++	  reg1 = REGNO (XEXP (addr1, 0));
++	  if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
++	    return false;
++
++	  offset1 = INTVAL (XEXP (addr1, 1));
++	}
++    }
++  else if (GET_CODE (addr1) != REG)
++    return false;
++  else
++    {
++     reg1 = REGNO (addr1);
++      /* This was a simple (mem (reg)) expression.  Offset is 0.  */
++      offset1 = 0;
++    }
++  /* Make sure the second address is a (mem (plus (reg) (const_int).  */
++  if (GET_CODE (addr2) != PLUS)
++    return false;
++
++  if (GET_CODE (XEXP (addr2, 0)) != REG
++      || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
++    return false;
++
++  if (reg1 != REGNO (XEXP (addr2, 0)))
++    return false;
++
++  /* The first offset must be evenly divisible by 8 to ensure the
++     address is 64 bit aligned.  */
++  if (offset1 % 8 != 0)
++    return false;
++
++  /* The offset for the second addr must be 4 more than the first addr.  */
++  if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
++    return false;
++
++  return true;
++}
++
++bool
++nds32_const_double_range_ok_p (rtx op, enum machine_mode mode,
++			       HOST_WIDE_INT lower, HOST_WIDE_INT upper)
++{
++  if (GET_CODE (op) != CONST_DOUBLE
++      || GET_MODE (op) != mode)
++    return false;
++
++  const REAL_VALUE_TYPE *rv;
++  long val;
++
++  rv = CONST_DOUBLE_REAL_VALUE (op);
++  REAL_VALUE_TO_TARGET_SINGLE (*rv, val);
++
++  return val >= lower && val < upper;
++}
++
++bool
++nds32_const_unspec_p (rtx x)
++{
++  if (GET_CODE (x) == CONST)
++    {
++      x = XEXP (x, 0);
++
++      if (GET_CODE (x) == PLUS)
++	x = XEXP (x, 0);
++
++      if (GET_CODE (x) == UNSPEC)
++	{
++	  switch (XINT (x, 1))
++	    {
++	    case UNSPEC_GOTINIT:
++	    case UNSPEC_GOT:
++	    case UNSPEC_GOTOFF:
++	    case UNSPEC_PLT:
++	    case UNSPEC_TLSGD:
++	    case UNSPEC_TLSLD:
++	    case UNSPEC_TLSIE:
++	    case UNSPEC_TLSLE:
++	      return false;
++	    default:
++	      return true;
++	    }
++	}
++    }
++
++  if (GET_CODE (x) == SYMBOL_REF
++      && SYMBOL_REF_TLS_MODEL (x))
++    return false;
++
++  return true;
++}
++
++HOST_WIDE_INT
++const_vector_to_hwint (rtx op)
++{
++  HOST_WIDE_INT hwint = 0;
++  HOST_WIDE_INT mask;
++  int i;
++  int shift_adv;
++  int shift = 0;
++  int nelem;
++
++  switch (GET_MODE (op))
++    {
++      case V2HImode:
++	mask = 0xffff;
++	shift_adv = 16;
++	nelem = 2;
++	break;
++      case V4QImode:
++	mask = 0xff;
++	shift_adv = 8;
++	nelem = 4;
++	break;
++      default:
++	gcc_unreachable ();
++    }
++
++  if (TARGET_BIG_ENDIAN)
++    {
++      for (i = 0; i < nelem; ++i)
++	{
++	  HOST_WIDE_INT val = XINT (XVECEXP (op, 0, nelem - i - 1), 0);
++	  hwint |= (val & mask) << shift;
++	  shift = shift + shift_adv;
++	}
++    }
++  else
++    {
++      for (i = 0; i < nelem; ++i)
++	{
++	  HOST_WIDE_INT val = XINT (XVECEXP (op, 0, i), 0);
++	  hwint |= (val & mask) << shift;
++	  shift = shift + shift_adv;
++	}
++    }
++
++  return hwint;
++}
++
++bool
++nds32_valid_CVp5_p (rtx op)
++{
++  HOST_WIDE_INT ival = const_vector_to_hwint (op);
++  return (ival < ((1 << 5) + 16)) && (ival >= (0 + 16));
++}
++
++bool
++nds32_valid_CVs5_p (rtx op)
++{
++  HOST_WIDE_INT ival = const_vector_to_hwint (op);
++  return (ival < (1 << 4)) && (ival >= -(1 << 4));
++}
++
++bool
++nds32_valid_CVs2_p (rtx op)
++{
++  HOST_WIDE_INT ival = const_vector_to_hwint (op);
++  return (ival < (1 << 19)) && (ival >= -(1 << 19));
++}
++
++bool
++nds32_valid_CVhi_p (rtx op)
++{
++  HOST_WIDE_INT ival = const_vector_to_hwint (op);
++  return (ival != 0) && ((ival & 0xfff) == 0);
++}
++
+ /* ------------------------------------------------------------------------ */
+diff --git a/gcc/config/nds32/nds32-protos.h b/gcc/config/nds32/nds32-protos.h
+index d66749d..19e69e3 100644
+--- a/gcc/config/nds32/nds32-protos.h
++++ b/gcc/config/nds32/nds32-protos.h
+@@ -28,10 +28,14 @@ extern void nds32_init_expanders (void);
+ 
+ /* Register Usage.  */
+ 
++/* -- Order of Allocation of Registers.  */
++extern void nds32_adjust_reg_alloc_order (void);
++
+ /* -- How Values Fit in Registers.  */
+ 
+-extern int nds32_hard_regno_nregs (int, machine_mode);
+-extern int nds32_hard_regno_mode_ok (int, machine_mode);
++extern int nds32_hard_regno_nregs (int, enum machine_mode);
++extern int nds32_hard_regno_mode_ok (int, enum machine_mode);
++extern int nds32_modes_tieable_p (enum machine_mode, enum machine_mode);
+ 
+ 
+ /* Register Classes.  */
+@@ -43,6 +47,7 @@ extern enum reg_class nds32_regno_reg_class (int);
+ 
+ /* -- Basic Stack Layout.  */
+ 
++extern rtx nds32_dynamic_chain_address (rtx);
+ extern rtx nds32_return_addr_rtx (int, rtx);
+ 
+ /* -- Eliminating Frame Pointer and Arg Pointer.  */
+@@ -61,22 +66,88 @@ extern void nds32_expand_prologue (void);
+ extern void nds32_expand_epilogue (bool);
+ extern void nds32_expand_prologue_v3push (void);
+ extern void nds32_expand_epilogue_v3pop (bool);
++extern void nds32_emit_push_fpr_callee_saved (int);
++extern void nds32_emit_pop_fpr_callee_saved (int);
++extern void nds32_emit_v3pop_fpr_callee_saved (int);
++
++/* Controlling Debugging Information Format.  */
++
++extern unsigned int nds32_dbx_register_number (unsigned int);
+ 
+ /* ------------------------------------------------------------------------ */
+ 
+-/* Auxiliary functions for auxiliary macros in nds32.h.  */
++/* Auxiliary functions for manipulation DI mode.  */
+ 
+-extern bool nds32_ls_333_p (rtx, rtx, rtx, machine_mode);
++extern rtx nds32_di_high_part_subreg(rtx);
++extern rtx nds32_di_low_part_subreg(rtx);
+ 
+ /* Auxiliary functions for expanding rtl used in nds32-multiple.md.  */
+ 
+-extern rtx nds32_expand_load_multiple (int, int, rtx, rtx);
+-extern rtx nds32_expand_store_multiple (int, int, rtx, rtx);
+-extern int nds32_expand_movmemqi (rtx, rtx, rtx, rtx);
++extern rtx nds32_expand_load_multiple (int, int, rtx, rtx, bool, rtx *);
++extern rtx nds32_expand_store_multiple (int, int, rtx, rtx, bool, rtx *);
++extern bool nds32_expand_movmemsi (rtx, rtx, rtx, rtx);
++extern bool nds32_expand_setmem (rtx, rtx, rtx, rtx, rtx, rtx);
++extern bool nds32_expand_movstr (rtx, rtx, rtx);
++extern bool nds32_expand_strlen (rtx, rtx, rtx, rtx);
+ 
+ /* Auxiliary functions for multiple load/store predicate checking.  */
+ 
+-extern bool nds32_valid_multiple_load_store (rtx, bool);
++extern bool nds32_valid_multiple_load_store_p (rtx, bool, bool);
++
++/* Auxiliary functions for guard function checking in pipelines.md.  */
++
++extern bool nds32_n7_load_to_ii_p (rtx_insn *, rtx_insn *);
++extern bool nds32_n7_last_load_to_ii_p (rtx_insn *, rtx_insn *);
++
++extern bool nds32_n8_load_to_ii_p (rtx_insn *, rtx_insn *);
++extern bool nds32_n8_load_bi_to_ii_p (rtx_insn *, rtx_insn *);
++extern bool nds32_n8_load_to_ex_p (rtx_insn *, rtx_insn *);
++extern bool nds32_n8_ex_to_ii_p (rtx_insn *, rtx_insn *);
++extern bool nds32_n8_last_load_to_ii_p (rtx_insn *, rtx_insn *);
++extern bool nds32_n8_last_load_two_to_ii_p (rtx_insn *, rtx_insn *);
++extern bool nds32_n8_last_load_to_ex_p (rtx_insn *, rtx_insn *);
++
++extern bool nds32_e8_load_to_ii_p (rtx_insn *, rtx_insn *);
++extern bool nds32_e8_load_to_ex_p (rtx_insn *, rtx_insn *);
++extern bool nds32_e8_ex_to_ii_p (rtx_insn *, rtx_insn *);
++extern bool nds32_e8_last_load_to_ii_p (rtx_insn *, rtx_insn *);
++extern bool nds32_e8_last_load_to_ex_p (rtx_insn *, rtx_insn *);
++
++extern bool nds32_n9_2r1w_mm_to_ex_p (rtx_insn *, rtx_insn *);
++extern bool nds32_n9_3r2w_mm_to_ex_p (rtx_insn *, rtx_insn *);
++extern bool nds32_n9_last_load_to_ex_p (rtx_insn *, rtx_insn *);
++
++extern bool nds32_n10_ex_to_ex_p (rtx_insn *, rtx_insn *);
++extern bool nds32_n10_mm_to_ex_p (rtx_insn *, rtx_insn *);
++extern bool nds32_n10_last_load_to_ex_p (rtx_insn *, rtx_insn *);
++
++extern bool nds32_gw_ex_to_ex_p (rtx_insn *, rtx_insn *);
++extern bool nds32_gw_mm_to_ex_p (rtx_insn *, rtx_insn *);
++extern bool nds32_gw_last_load_to_ex_p (rtx_insn *, rtx_insn *);
++
++extern bool nds32_n13_e2_to_e1_p (rtx_insn *, rtx_insn *);
++extern bool nds32_n13_load_to_e1_p (rtx_insn *, rtx_insn *);
++extern bool nds32_n13_load_to_e2_p (rtx_insn *, rtx_insn *);
++extern bool nds32_n13_last_load_to_e1_p (rtx_insn *, rtx_insn *);
++extern bool nds32_n13_last_load_to_e2_p (rtx_insn *, rtx_insn *);
++extern bool nds32_n13_last_two_load_to_e1_p (rtx_insn *, rtx_insn *);
++
++extern bool nds32_pn_e2_to_e1_p (rtx_insn *, rtx_insn *);
++extern bool nds32_pn_e3_to_e1_p (rtx_insn *, rtx_insn *);
++extern bool nds32_pn_e3_to_e2_p (rtx_insn *, rtx_insn *);
++extern bool nds32_pn_e4_to_e1_p (rtx_insn *, rtx_insn *);
++extern bool nds32_pn_e4_to_e2_p (rtx_insn *, rtx_insn *);
++extern bool nds32_pn_e4_to_e3_p (rtx_insn *, rtx_insn *);
++extern bool nds32_pn_wb_to_e1_p (rtx_insn *, rtx_insn *);
++extern bool nds32_pn_wb_to_e2_p (rtx_insn *, rtx_insn *);
++extern bool nds32_pn_wb_to_e3_p (rtx_insn *, rtx_insn *);
++extern bool nds32_pn_wb_to_e4_p (rtx_insn *, rtx_insn *);
++extern bool nds32_pn_last_load_to_e1_p (rtx_insn *, rtx_insn *);
++extern bool nds32_pn_last_load_to_e2_p (rtx_insn *, rtx_insn *);
++extern bool nds32_pn_last_load_to_e3_p (rtx_insn *, rtx_insn *);
++extern bool nds32_pn_last_two_load_to_e1_p (rtx_insn *, rtx_insn *);
++extern bool nds32_pn_last_two_load_to_e2_p (rtx_insn *, rtx_insn *);
++extern bool nds32_pn_last_three_load_to_e1_p (rtx_insn *, rtx_insn *);
+ 
+ /* Auxiliary functions for stack operation predicate checking.  */
+ 
+@@ -84,55 +155,176 @@ extern bool nds32_valid_stack_push_pop_p (rtx, bool);
+ 
+ /* Auxiliary functions for bit operation detection.  */
+ 
+-extern int nds32_can_use_bclr_p (int);
+-extern int nds32_can_use_bset_p (int);
+-extern int nds32_can_use_btgl_p (int);
++extern bool nds32_can_use_bclr_p (HOST_WIDE_INT);
++extern bool nds32_can_use_bset_p (HOST_WIDE_INT);
++extern bool nds32_can_use_btgl_p (HOST_WIDE_INT);
+ 
+-extern int nds32_can_use_bitci_p (int);
++extern bool nds32_can_use_bitci_p (HOST_WIDE_INT);
+ 
+-/* Auxiliary function for 'Computing the Length of an Insn'.  */
++extern bool nds32_const_double_range_ok_p (rtx, enum machine_mode,
++					   HOST_WIDE_INT, HOST_WIDE_INT);
+ 
+-extern int nds32_adjust_insn_length (rtx_insn *, int);
++extern bool nds32_const_unspec_p (rtx x);
+ 
+ /* Auxiliary functions for FP_AS_GP detection.  */
+ 
+-extern int nds32_fp_as_gp_check_available (void);
++extern bool nds32_symbol_load_store_p (rtx_insn *);
++extern bool nds32_naked_function_p (tree);
+ 
+ /* Auxiliary functions for jump table generation.  */
+ 
+ extern const char *nds32_output_casesi_pc_relative (rtx *);
+ extern const char *nds32_output_casesi (rtx *);
+ 
++/* Auxiliary functions for conditional branch generation.  */
++
++extern enum nds32_expand_result_type nds32_expand_cbranch (rtx *);
++extern enum nds32_expand_result_type nds32_expand_cstore (rtx *);
++extern void nds32_expand_float_cbranch (rtx *);
++extern void nds32_expand_float_cstore (rtx *);
++
++/* Auxiliary functions for conditional move generation.  */
++
++extern enum nds32_expand_result_type nds32_expand_movcc (rtx *);
++extern void nds32_expand_float_movcc (rtx *);
++
++/* Auxiliary functions for expand unalign load instruction.  */
++
++extern void nds32_expand_unaligned_load (rtx *, enum machine_mode);
++
++/* Auxiliary functions for expand extv/insv instruction.  */
++
++extern enum nds32_expand_result_type nds32_expand_extv (rtx *);
++extern enum nds32_expand_result_type nds32_expand_insv (rtx *);
++
++/* Auxiliary functions for expand unalign store instruction.  */
++
++extern void nds32_expand_unaligned_store (rtx *, enum machine_mode);
++
++/* Auxiliary functions for expand PIC instruction.  */
++
++extern void nds32_expand_pic_move (rtx *);
++
++/* Auxiliary functions to legitimize PIC address.  */
++
++extern rtx nds32_legitimize_pic_address (rtx);
++
++/* Auxiliary functions for expand TLS instruction.  */
++
++extern void nds32_expand_tls_move (rtx *);
++
++/* Auxiliary functions to legitimize TLS address.  */
++
++extern rtx nds32_legitimize_tls_address (rtx);
++
++/* Auxiliary functions to identify thread-local symbol.  */
++
++extern bool nds32_tls_referenced_p (rtx);
++
++/* Auxiliary functions for expand ICT instruction.  */
++
++extern void nds32_expand_ict_move (rtx *);
++
++/* Auxiliary functions to legitimize address for indirect-call symbol.  */
++
++extern rtx nds32_legitimize_ict_address (rtx);
++
++/* Auxiliary functions to identify indirect-call symbol.  */
++
++extern bool nds32_indirect_call_referenced_p (rtx);
++
++/* Auxiliary functions to identify long-call symbol.  */
++extern bool nds32_long_call_p (rtx);
++
++/* Auxiliary functions to identify SYMBOL_REF and LABEL_REF pattern.  */
++
++extern bool symbolic_reference_mentioned_p (rtx);
++
++/* Auxiliary functions to identify conditional move comparison operand.  */
++
++extern int nds32_cond_move_p (rtx);
++
++/* Auxiliary functions to identify address for peephole2 merge instruction.  */
++
++extern bool nds32_memory_merge_peep_p (rtx, rtx);
++
+ /* Auxiliary functions to identify 16 bit addresing mode.  */
+ 
+ extern enum nds32_16bit_address_type nds32_mem_format (rtx);
+ 
++/* Auxiliary functions to identify floating-point addresing mode.  */
++
++extern bool nds32_float_mem_operand_p (rtx);
++
+ /* Auxiliary functions to output assembly code.  */
+ 
+ extern const char *nds32_output_16bit_store (rtx *, int);
+ extern const char *nds32_output_16bit_load (rtx *, int);
+ extern const char *nds32_output_32bit_store (rtx *, int);
+ extern const char *nds32_output_32bit_load (rtx *, int);
+-extern const char *nds32_output_32bit_load_s (rtx *, int);
++extern const char *nds32_output_32bit_load_se (rtx *, int);
++extern const char *nds32_output_float_load(rtx *);
++extern const char *nds32_output_float_store(rtx *);
++extern const char *nds32_output_smw_single_word (rtx *);
++extern const char *nds32_output_smw_double_word (rtx *);
++extern const char *nds32_output_lmw_single_word (rtx *);
++extern const char *nds32_output_double (rtx *, bool);
++extern const char *nds32_output_cbranchsi4_equality_zero (rtx_insn *, rtx *);
++extern const char *nds32_output_cbranchsi4_equality_reg (rtx_insn *, rtx *);
++extern const char *nds32_output_cbranchsi4_equality_reg_or_const_int (rtx_insn *,
++								      rtx *);
++extern const char *nds32_output_cbranchsi4_greater_less_zero (rtx_insn *, rtx *);
++
++extern const char *nds32_output_unpkd8 (rtx, rtx, rtx, rtx, bool);
++
++extern const char *nds32_output_call (rtx, rtx *, rtx,
++				      const char *, const char *, bool);
++extern const char *nds32_output_tls_desc (rtx *);
++extern const char *nds32_output_tls_ie (rtx *);
+ 
+ /* Auxiliary functions to output stack push/pop instruction.  */
+ 
+ extern const char *nds32_output_stack_push (rtx);
+ extern const char *nds32_output_stack_pop (rtx);
++extern const char *nds32_output_return (void);
++
++
++/* Auxiliary functions to split/output sms pattern.  */
++extern bool nds32_need_split_sms_p (rtx, rtx, rtx, rtx);
++extern const char *nds32_output_sms (rtx, rtx, rtx, rtx);
++extern void nds32_split_sms (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
++
++/* Auxiliary functions to split double word RTX pattern.  */
++
++extern void nds32_spilt_doubleword (rtx *, bool);
++extern void nds32_split_ashiftdi3 (rtx, rtx, rtx);
++extern void nds32_split_ashiftrtdi3 (rtx, rtx, rtx);
++extern void nds32_split_lshiftrtdi3 (rtx, rtx, rtx);
++extern void nds32_split_rotatertdi3 (rtx, rtx, rtx);
++
++/* Auxiliary functions to split large constant RTX pattern.  */
++
++extern void nds32_expand_constant (enum machine_mode,
++				   HOST_WIDE_INT, rtx, rtx);
+ 
+ /* Auxiliary functions to check using return with null epilogue.  */
+ 
+ extern int nds32_can_use_return_insn (void);
++extern enum machine_mode nds32_case_vector_shorten_mode (int, int, rtx);
+ 
+ /* Auxiliary functions to decide output alignment or not.  */
+ 
+ extern int nds32_target_alignment (rtx);
++extern unsigned int nds32_data_alignment (tree, unsigned int);
++extern unsigned int nds32_constant_alignment (tree, unsigned int);
++extern unsigned int nds32_local_alignment (tree, unsigned int);
+ 
+ /* Auxiliary functions to expand builtin functions.  */
+ 
+ extern void nds32_init_builtins_impl (void);
+ extern rtx nds32_expand_builtin_impl (tree, rtx, rtx,
+-				      machine_mode, int);
++				      enum machine_mode, int);
++extern tree nds32_builtin_decl_impl (unsigned, bool);
+ 
+ /* Auxiliary functions for ISR implementation.  */
+ 
+@@ -141,10 +333,86 @@ extern void nds32_construct_isr_vectors_information (tree, const char *);
+ extern void nds32_asm_file_start_for_isr (void);
+ extern void nds32_asm_file_end_for_isr (void);
+ extern bool nds32_isr_function_p (tree);
++extern bool nds32_isr_function_critical_p (tree);
+ 
+ /* Auxiliary functions for cost calculation.  */
+ 
++extern void nds32_init_rtx_costs (void);
+ extern bool nds32_rtx_costs_impl (rtx, machine_mode, int, int, int *, bool);
+-extern int nds32_address_cost_impl (rtx, machine_mode, addr_space_t, bool);
++extern int nds32_address_cost_impl (rtx, enum machine_mode, addr_space_t, bool);
++extern struct register_pass_info insert_pass_fp_as_gp;
++
++extern int nds32_adjust_insn_length (rtx_insn *, int);
++
++/* Auxiliary functions for pre-define marco.  */
++extern void nds32_cpu_cpp_builtins(struct cpp_reader *);
++
++/* Auxiliary functions for const_vector's constraints.  */
++
++extern HOST_WIDE_INT const_vector_to_hwint (rtx);
++extern bool nds32_valid_CVp5_p (rtx);
++extern bool nds32_valid_CVs5_p (rtx);
++extern bool nds32_valid_CVs2_p (rtx);
++extern bool nds32_valid_CVhi_p (rtx);
++
++/* Auxiliary functions for lwm/smw.  */
++
++extern bool nds32_valid_smw_lwm_base_p (rtx);
++
++/* Auxiliary functions for register rename pass.  */
++extern reg_class_t nds32_preferred_rename_class_impl (reg_class_t);
++
++extern bool nds32_split_double_word_load_store_p (rtx *,bool);
++
++namespace nds32 {
++
++extern rtx extract_pattern_from_insn (rtx);
++
++size_t parallel_elements (rtx);
++rtx parallel_element (rtx, int);
++
++bool insn_pseudo_nop_p (rtx_insn *);
++bool insn_executable_p (rtx_insn *);
++rtx_insn *prev_executable_insn (rtx_insn *);
++rtx_insn *next_executable_insn (rtx_insn *);
++rtx_insn *prev_executable_insn_local (rtx_insn *);
++rtx_insn *next_executable_insn_local (rtx_insn *);
++bool insn_deleted_p (rtx_insn *);
++
++bool load_single_p (rtx_insn *);
++bool store_single_p (rtx_insn *);
++bool load_double_p (rtx_insn *);
++bool store_double_p (rtx_insn *);
++bool store_offset_reg_p (rtx_insn *);
++bool load_full_word_p (rtx_insn *);
++bool load_partial_word_p (rtx_insn *);
++bool post_update_insn_p (rtx_insn *);
++bool immed_offset_p (rtx);
++int find_post_update_rtx (rtx_insn *);
++rtx extract_mem_rtx (rtx_insn *);
++rtx extract_base_reg (rtx_insn *);
++rtx extract_offset_rtx (rtx_insn *);
++
++rtx extract_shift_reg (rtx_insn *);
++
++bool movd44_insn_p (rtx_insn *);
++rtx extract_movd44_even_reg (rtx_insn *);
++rtx extract_movd44_odd_reg (rtx_insn *);
++
++rtx extract_mac_acc_rtx (rtx_insn *);
++rtx extract_mac_non_acc_rtx (rtx_insn *);
++
++bool divmod_p (rtx_insn *);
++
++rtx extract_branch_target_rtx (rtx_insn *);
++rtx extract_branch_condition_rtx (rtx_insn *);
++
++void compute_bb_for_insn_safe ();
++
++void exchange_insns (rtx_insn *, rtx_insn *);
++
++} // namespace nds32
++
++extern bool nds32_include_fp_arith;
+ 
+ /* ------------------------------------------------------------------------ */
+diff --git a/gcc/config/nds32/nds32-reg-utils.c b/gcc/config/nds32/nds32-reg-utils.c
+new file mode 100644
+index 0000000..1fd8a83
+--- /dev/null
++++ b/gcc/config/nds32/nds32-reg-utils.c
+@@ -0,0 +1,190 @@
++
++/* lmwsmw pass of Andes NDS32 cpu for GNU compiler
++   Copyright (C) 2012-2016 Free Software Foundation, Inc.
++   Contributed by Andes Technology Corporation.
++
++   This file is part of GCC.
++
++   GCC is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published
++   by the Free Software Foundation; either version 3, or (at your
++   option) any later version.
++
++   GCC is distributed in the hope that it will be useful, but WITHOUT
++   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++   License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with GCC; see the file COPYING3.  If not see
++   <http://www.gnu.org/licenses/>.  */
++
++/* ------------------------------------------------------------------------ */
++#include "config.h"
++#include "system.h"
++#include "coretypes.h"
++#include "tm.h"
++#include "hash-set.h"
++#include "machmode.h"
++#include "vec.h"
++#include "double-int.h"
++#include "input.h"
++#include "alias.h"
++#include "symtab.h"
++#include "wide-int.h"
++#include "inchash.h"
++#include "tree.h"
++#include "stor-layout.h"
++#include "varasm.h"
++#include "calls.h"
++#include "rtl.h"
++#include "regs.h"
++#include "hard-reg-set.h"
++#include "insn-config.h"	/* Required by recog.h.  */
++#include "conditions.h"
++#include "output.h"
++#include "insn-attr.h"		/* For DFA state_t.  */
++#include "insn-codes.h"		/* For CODE_FOR_xxx.  */
++#include "reload.h"		/* For push_reload().  */
++#include "flags.h"
++#include "input.h"
++#include "function.h"
++#include "expr.h"
++#include "recog.h"
++#include "diagnostic-core.h"
++#include "dominance.h"
++#include "cfg.h"
++#include "cfgrtl.h"
++#include "cfganal.h"
++#include "lcm.h"
++#include "cfgbuild.h"
++#include "cfgcleanup.h"
++#include "predict.h"
++#include "basic-block.h"
++#include "bitmap.h"
++#include "df.h"
++#include "tm_p.h"
++#include "tm-constrs.h"
++#include "optabs.h"		/* For GEN_FCN.  */
++#include "target.h"
++#include "langhooks.h"		/* For add_builtin_function().  */
++#include "ggc.h"
++#include "tree-pass.h"
++#include "target-globals.h"
++#include "ira.h"
++#include "ira-int.h"
++#include "nds32-reg-utils.h"
++
++#define NDS32_GPR_NUM 32
++
++static bool debug_live_reg = false;
++
++void
++nds32_live_regs (basic_block bb, rtx_insn *first, rtx_insn *last, bitmap *live)
++{
++  df_ref def;
++  rtx_insn *insn;
++  bitmap_copy (*live, DF_LR_IN (bb));
++  df_simulate_initialize_forwards (bb, *live);
++  rtx_insn *first_insn = BB_HEAD (bb);
++
++  for (insn = first_insn; insn != first; insn = NEXT_INSN (insn))
++    df_simulate_one_insn_forwards (bb, insn, *live);
++
++  if (dump_file && debug_live_reg)
++    {
++      fprintf (dump_file, "scan live regs:\nfrom:\n");
++      print_rtl_single (dump_file, first);
++
++      fprintf (dump_file, "to:\n");
++      print_rtl_single (dump_file, last);
++
++      fprintf (dump_file, "bb lr in:\n");
++      dump_bitmap (dump_file, DF_LR_IN (bb));
++
++      fprintf (dump_file, "init:\n");
++      dump_bitmap (dump_file, *live);
++    }
++
++  for (insn = first; insn != last; insn = NEXT_INSN (insn))
++    {
++      if (!INSN_P (insn))
++	continue;
++
++      FOR_EACH_INSN_DEF (def, insn)
++	bitmap_set_bit (*live, DF_REF_REGNO (def));
++
++      if (dump_file && debug_live_reg)
++	{
++	  fprintf (dump_file, "scaning:\n");
++	  print_rtl_single (dump_file, insn);
++	  dump_bitmap (dump_file, *live);
++	}
++    }
++
++  gcc_assert (INSN_P (insn));
++
++  FOR_EACH_INSN_DEF (def, insn)
++    bitmap_set_bit (*live, DF_REF_REGNO (def));
++
++  if (dump_file && debug_live_reg)
++    {
++      fprintf (dump_file, "scaning:\n");
++      print_rtl_single (dump_file, last);
++      dump_bitmap (dump_file, *live);
++    }
++}
++
++void
++print_hard_reg_set (FILE *file, const char *prefix, HARD_REG_SET set)
++{
++  int i;
++  bool first = true;
++  fprintf (file, "%s{ ", prefix);
++
++  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
++    {
++      if (TEST_HARD_REG_BIT (set, i))
++	{
++	  if (first)
++	    {
++	      fprintf (file, "%s", reg_names[i]);
++	      first = false;
++	    }
++	  else
++	    fprintf (file, ", %s", reg_names[i]);
++	}
++    }
++  fprintf (file, "}\n");
++}
++
++void
++nds32_get_available_reg_set (basic_block bb,
++			     rtx_insn *first,
++			     rtx_insn *last,
++			     HARD_REG_SET *available_regset)
++{
++  bitmap live;
++  HARD_REG_SET live_regset;
++  unsigned i;
++  live = BITMAP_ALLOC (&reg_obstack);
++
++  nds32_live_regs (bb, first, last, &live);
++
++  REG_SET_TO_HARD_REG_SET (live_regset, live);
++
++  /* Reverse available_regset. */
++  COMPL_HARD_REG_SET (*available_regset, live_regset);
++
++  /* We only care $r0-$r31, so mask $r0-$r31.  */
++  AND_HARD_REG_SET (*available_regset, reg_class_contents[GENERAL_REGS]);
++
++  /* Fixed register also not available.  */
++  for (i = NDS32_FIRST_GPR_REGNUM; i <= NDS32_LAST_GPR_REGNUM; ++i)
++    {
++      if (fixed_regs[i])
++	CLEAR_HARD_REG_BIT (*available_regset, i);
++    }
++
++  BITMAP_FREE (live);
++}
+diff --git a/gcc/config/nds32/nds32-reg-utils.h b/gcc/config/nds32/nds32-reg-utils.h
+new file mode 100644
+index 0000000..16c23a3
+--- /dev/null
++++ b/gcc/config/nds32/nds32-reg-utils.h
+@@ -0,0 +1,61 @@
++/* Prototypes for load-store-opt of Andes NDS32 cpu for GNU compiler
++   Copyright (C) 2012-2016 Free Software Foundation, Inc.
++   Contributed by Andes Technology Corporation.
++
++   This file is part of GCC.
++
++   GCC is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published
++   by the Free Software Foundation; either version 3, or (at your
++   option) any later version.
++
++   GCC is distributed in the hope that it will be useful, but WITHOUT
++   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++   License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with GCC; see the file COPYING3.  If not see
++   <http://www.gnu.org/licenses/>.  */
++
++#ifndef NDS32_REG_UTILS_OPT_H
++#define NDS32_REG_UTILS_OPT_H
++
++/* Auxiliary functions for register usage analysis.  */
++extern void nds32_live_regs (basic_block, rtx_insn *, rtx_insn *, bitmap *);
++extern void print_hard_reg_set (FILE *, const char *, HARD_REG_SET);
++extern void nds32_get_available_reg_set (basic_block, rtx_insn *,
++					 rtx_insn *, HARD_REG_SET *);
++
++static inline bool
++in_reg_class_p (unsigned regno, enum reg_class clazz)
++{
++  return TEST_HARD_REG_BIT (reg_class_contents[clazz], regno);
++}
++
++static inline bool
++in_reg_class_p (rtx reg, enum reg_class clazz)
++{
++  gcc_assert (REG_P (reg));
++  return in_reg_class_p (REGNO (reg), clazz);
++}
++
++static inline unsigned
++find_available_reg (HARD_REG_SET *available_regset, enum reg_class clazz)
++{
++  hard_reg_set_iterator hrsi;
++  unsigned regno;
++  EXECUTE_IF_SET_IN_HARD_REG_SET (reg_class_contents[clazz], 0, regno, hrsi)
++    {
++      /* Caller-save register or callee-save register but it's ever live.  */
++      if (TEST_HARD_REG_BIT (*available_regset, regno)
++	  && (call_used_regs[regno] || df_regs_ever_live_p (regno)))
++	return regno;
++    }
++
++  return INVALID_REGNUM;
++}
++
++
++
++#endif
+diff --git a/gcc/config/nds32/nds32-regrename.c b/gcc/config/nds32/nds32-regrename.c
+new file mode 100644
+index 0000000..0875722
+--- /dev/null
++++ b/gcc/config/nds32/nds32-regrename.c
+@@ -0,0 +1,389 @@
++/* Register rename pass of Andes NDS32 cpu for GNU compiler
++   Copyright (C) 2012-2016 Free Software Foundation, Inc.
++   Contributed by Andes Technology Corporation.
++
++   This file is part of GCC.
++
++   GCC is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published
++   by the Free Software Foundation; either version 3, or (at your
++   option) any later version.
++
++   GCC is distributed in the hope that it will be useful, but WITHOUT
++   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++   License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with GCC; see the file COPYING3.  If not see
++   <http://www.gnu.org/licenses/>.  */
++
++
++#include "config.h"
++#include "system.h"
++#include "coretypes.h"
++#include "backend.h"
++#include "tree.h"
++#include "rtl.h"
++#include "df.h"
++#include "alias.h"
++#include "stor-layout.h"
++#include "varasm.h"
++#include "calls.h"
++#include "regs.h"
++#include "insn-config.h"	/* Required by recog.h.  */
++#include "conditions.h"
++#include "output.h"
++#include "insn-attr.h"		/* For DFA state_t.  */
++#include "insn-codes.h"		/* For CODE_FOR_xxx.  */
++#include "reload.h"		/* For push_reload().  */
++#include "flags.h"
++#include "insn-config.h"
++#include "expmed.h"
++#include "dojump.h"
++#include "explow.h"
++#include "emit-rtl.h"
++#include "stmt.h"
++#include "expr.h"
++#include "recog.h"
++#include "diagnostic-core.h"
++#include "cfgrtl.h"
++#include "cfganal.h"
++#include "lcm.h"
++#include "cfgbuild.h"
++#include "cfgcleanup.h"
++#include "tm_p.h"
++#include "tm-constrs.h"
++#include "optabs.h"		/* For GEN_FCN.  */
++#include "target.h"
++#include "langhooks.h"		/* For add_builtin_function().  */
++#include "builtins.h"
++#include "cpplib.h"
++#include "params.h"
++#include "tree-pass.h"
++#include "regrename.h"
++
++static reg_class_t current_preferred_rename_class = NO_REGS;
++
++reg_class_t
++nds32_preferred_rename_class_impl (reg_class_t rclass)
++{
++  if (rclass == GENERAL_REGS)
++    return current_preferred_rename_class;
++  else
++    return NO_REGS;
++}
++
++static void
++print_hard_reg_set (FILE *file, HARD_REG_SET set)
++{
++  int i;
++
++  fprintf (file, "{ ");
++  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
++    {
++      if (TEST_HARD_REG_BIT (set, i))
++	fprintf (file, "%d ", i);
++    }
++  fprintf (file, "}\n");
++}
++
++void
++dump_hard_reg_set (FILE *file, HARD_REG_SET set)
++{
++  print_hard_reg_set (file, set);
++}
++
++static bool
++in_reg_class_p (unsigned regno, enum reg_class clazz)
++{
++  return TEST_HARD_REG_BIT (reg_class_contents[clazz], regno);
++}
++
++static unsigned
++try_find_best_rename_reg (du_head_p op_chain, reg_class_t preferred_class)
++{
++  HARD_REG_SET unavailable;
++  unsigned new_reg;
++  current_preferred_rename_class = preferred_class;
++
++  COMPL_HARD_REG_SET (unavailable, reg_class_contents[preferred_class]);
++  CLEAR_HARD_REG_BIT (unavailable, op_chain->regno);
++
++  new_reg = find_rename_reg (op_chain, GENERAL_REGS,
++			     &unavailable, op_chain->regno, false);
++
++  current_preferred_rename_class = NO_REGS;
++  return new_reg;
++}
++
++static bool
++try_rename_operand_to (rtx insn, unsigned op_pos,
++		       reg_class_t preferred_rename_class)
++{
++  insn_rr_info *info;
++  du_head_p op_chain;
++  unsigned newreg;
++  unsigned oldreg;
++
++  info = &insn_rr[INSN_UID (insn)];
++
++  if (info->op_info == NULL)
++    return false;
++
++  if (info->op_info[op_pos].n_chains == 0)
++    return false;
++
++  op_chain = regrename_chain_from_id (info->op_info[op_pos].heads[0]->id);
++
++  if (op_chain->cannot_rename)
++    return false;
++
++  /* Already use preferred class, so do nothing.  */
++  if (TEST_HARD_REG_BIT (reg_class_contents[preferred_rename_class],
++			 op_chain->regno))
++    return false;
++
++  if (dump_file)
++    {
++      fprintf (dump_file, "Try to rename operand %d to %s:\n",
++	       op_pos, reg_class_names[preferred_rename_class]);
++      print_rtl_single (dump_file, insn);
++    }
++
++  oldreg = op_chain->regno;
++  newreg = try_find_best_rename_reg (op_chain, preferred_rename_class);
++
++  if (newreg == oldreg)
++    return false;
++
++  regrename_do_replace (op_chain, newreg);
++
++  if (dump_file)
++    {
++      fprintf (dump_file, "Rename operand %d to %s is Done:\n",
++	       op_pos, reg_class_names[preferred_rename_class]);
++      print_rtl_single (dump_file, insn);
++    }
++  return true;
++}
++
++static bool
++rename_slt_profitlable (rtx insn)
++{
++  rtx pattern;
++  pattern = PATTERN (insn);
++  rtx src = SET_SRC (pattern);
++  rtx op0 = XEXP (src, 0);
++  rtx op1 = XEXP (src, 0);
++
++  insn_rr_info *info;
++  du_head_p op_chain;
++  int op_pos = 0;
++
++  info = &insn_rr[INSN_UID (insn)];
++
++  if (info->op_info == NULL)
++    return false;
++
++  if (info->op_info[op_pos].n_chains == 0)
++    return false;
++
++  op_chain = regrename_chain_from_id (info->op_info[op_pos].heads[0]->id);
++
++  if (in_reg_class_p (op_chain->regno, R15_TA_REG))
++    return false;
++
++  /* slt[s]45 need second operand in MIDDLE_REGS class.  */
++  if (!REG_P (op0) || !in_reg_class_p (REGNO (op0), MIDDLE_REGS))
++    return false;
++
++  /* slt[s]i45 only allow 5 bit unsigned integer.  */
++  if (REG_P (op1)
++      || (CONST_INT_P (op1) && satisfies_constraint_Iu05 (op1)))
++    return true;
++
++  return false;
++}
++
++static bool
++rename_cbranch_eq0_low_reg_profitlable (rtx insn)
++{
++  insn_rr_info *info;
++  du_head_p op_chain;
++  int op_pos = 1;
++
++  info = &insn_rr[INSN_UID (insn)];
++
++  if (info->op_info == NULL)
++    return false;
++
++  if (info->op_info[op_pos].n_chains == 0)
++    return false;
++
++  op_chain = regrename_chain_from_id (info->op_info[op_pos].heads[0]->id);
++
++  if (in_reg_class_p (op_chain->regno, LOW_REGS))
++    return false;
++
++  return true;
++}
++
++
++static bool
++rename_cbranch_eq0_r15_profitlable (rtx insn)
++{
++  rtx pattern;
++  pattern = PATTERN (insn);
++  rtx if_then_else = SET_SRC (pattern);
++  rtx cond = XEXP (if_then_else, 0);
++  rtx op0 = XEXP (cond, 0);
++
++  insn_rr_info *info;
++  du_head_p op_chain;
++  int op_pos = 1;
++
++  info = &insn_rr[INSN_UID (insn)];
++
++  if (info->op_info == NULL)
++    return false;
++
++  if (info->op_info[op_pos].n_chains == 0)
++    return false;
++
++  op_chain = regrename_chain_from_id (info->op_info[op_pos].heads[0]->id);
++
++  if (in_reg_class_p (op_chain->regno, R15_TA_REG))
++    return false;
++
++  /* LOW_REGS or R15_TA_REG both are 2-byte instruction.  */
++  if (REG_P (op0) && in_reg_class_p (REGNO (op0), LOW_REGS))
++    return false;
++
++  return true;
++}
++
++static bool
++rename_cbranch_eq_reg_profitlable (rtx insn)
++{
++  rtx pattern;
++  pattern = PATTERN (insn);
++  rtx if_then_else = SET_SRC (pattern);
++  rtx cond = XEXP (if_then_else, 0);
++  rtx op1 = XEXP (cond, 1);
++
++  insn_rr_info *info;
++  du_head_p op_chain;
++  int op_pos = 1;
++
++  info = &insn_rr[INSN_UID (insn)];
++
++  if (info->op_info == NULL)
++    return false;
++
++  if (info->op_info[op_pos].n_chains == 0)
++    return false;
++
++  op_chain = regrename_chain_from_id (info->op_info[op_pos].heads[0]->id);
++
++  if (in_reg_class_p (op_chain->regno, R5_REG))
++    return false;
++
++  if (REG_P (op1) && in_reg_class_p (REGNO (op1), LOW_REGS))
++     return true;
++  else
++    return false;
++}
++
++static void
++do_regrename ()
++{
++  basic_block bb;
++  rtx_insn *insn;
++
++  FOR_EACH_BB_FN (bb, cfun)
++    {
++      FOR_BB_INSNS (bb, insn)
++	{
++	  if (!INSN_P (insn))
++	    continue;
++
++	  switch (recog_memoized (insn))
++	    {
++	    case CODE_FOR_slts_compare_impl:
++	    case CODE_FOR_slt_compare_impl:
++	      /* Try to rename operand 0 to $r15 if profitable.  */
++	      if (rename_slt_profitlable (insn))
++		try_rename_operand_to (insn, 0, R15_TA_REG);
++	      break;
++	    case CODE_FOR_slt_eq0:
++	      /* Try to rename operand 0 to $r15.  */
++	      if (rename_slt_profitlable (insn))
++		try_rename_operand_to (insn, 0, R15_TA_REG);
++	      break;
++	    case CODE_FOR_cbranchsi4_equality_zero:
++	      /* Try to rename operand 1 to $r15.  */
++	      if (rename_cbranch_eq0_r15_profitlable (insn))
++		if (!try_rename_operand_to (insn, 1, R15_TA_REG))
++		  if (rename_cbranch_eq0_low_reg_profitlable (insn))
++		    try_rename_operand_to (insn, 1, LOW_REGS);
++	      break;
++	    case CODE_FOR_cbranchsi4_equality_reg:
++	    case CODE_FOR_cbranchsi4_equality_reg_or_const_int:
++	      /* Try to rename operand 1 to $r5.  */
++	      if (rename_cbranch_eq_reg_profitlable (insn))
++		try_rename_operand_to (insn, 1, R5_REG);
++	      break;
++	    }
++	}
++    }
++}
++
++static unsigned int
++nds32_regrename (void)
++{
++  df_set_flags (DF_LR_RUN_DCE);
++  df_note_add_problem ();
++  df_analyze ();
++  df_set_flags (DF_DEFER_INSN_RESCAN);
++
++  regrename_init (true);
++
++  regrename_analyze (NULL);
++
++  do_regrename ();
++
++  regrename_finish ();
++  return 1;
++}
++
++const pass_data pass_data_nds32_regrename =
++{
++  RTL_PASS,				/* type */
++  "nds32-regrename",			/* name */
++  OPTGROUP_NONE,			/* optinfo_flags */
++  TV_MACH_DEP,				/* tv_id */
++  0,					/* properties_required */
++  0,					/* properties_provided */
++  0,					/* properties_destroyed */
++  0,					/* todo_flags_start */
++  TODO_df_finish,			/* todo_flags_finish */
++};
++
++class pass_nds32_regrename_opt : public rtl_opt_pass
++{
++public:
++  pass_nds32_regrename_opt (gcc::context *ctxt)
++    : rtl_opt_pass (pass_data_nds32_regrename, ctxt)
++  {}
++
++  /* opt_pass methods: */
++  bool gate (function *) { return TARGET_16_BIT && TARGET_REGRENAME_OPT; }
++  unsigned int execute (function *) { return nds32_regrename (); }
++};
++
++rtl_opt_pass *
++make_pass_nds32_regrename_opt (gcc::context *ctxt)
++{
++  return new pass_nds32_regrename_opt (ctxt);
++}
+diff --git a/gcc/config/nds32/nds32-relax-opt.c b/gcc/config/nds32/nds32-relax-opt.c
+new file mode 100644
+index 0000000..0919af6
+--- /dev/null
++++ b/gcc/config/nds32/nds32-relax-opt.c
+@@ -0,0 +1,612 @@
++/* relax-opt pass of Andes NDS32 cpu for GNU compiler
++   Copyright (C) 2012-2016 Free Software Foundation, Inc.
++   Contributed by Andes Technology Corporation.
++
++   This file is part of GCC.
++
++   GCC is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published
++   by the Free Software Foundation; either version 3, or (at your
++   option) any later version.
++
++   GCC is distributed in the hope that it will be useful, but WITHOUT
++   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++   License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with GCC; see the file COPYING3.  If not see
++   <http://www.gnu.org/licenses/>.  */
++
++#include "config.h"
++#include "system.h"
++#include "coretypes.h"
++#include "tm.h"
++#include "hash-set.h"
++#include "machmode.h"
++#include "vec.h"
++#include "double-int.h"
++#include "input.h"
++#include "alias.h"
++#include "symtab.h"
++#include "wide-int.h"
++#include "inchash.h"
++#include "tree.h"
++#include "stor-layout.h"
++#include "varasm.h"
++#include "calls.h"
++#include "rtl.h"
++#include "regs.h"
++#include "hard-reg-set.h"
++#include "insn-config.h"   /* Required by recog.h.  */
++#include "conditions.h"
++#include "output.h"
++#include "insn-attr.h"     /* For DFA state_t.  */
++#include "insn-codes.h"    /* For CODE_FOR_xxx.  */
++#include "reload.h"     /* For push_reload ().  */
++#include "flags.h"
++#include "input.h"
++#include "function.h"
++#include "emit-rtl.h"
++#include "expr.h"
++#include "recog.h"
++#include "diagnostic-core.h"
++#include "dominance.h"
++#include "cfg.h"
++#include "cfgrtl.h"
++#include "cfganal.h"
++#include "lcm.h"
++#include "cfgbuild.h"
++#include "cfgcleanup.h"
++#include "predict.h"
++#include "basic-block.h"
++#include "bitmap.h"
++#include "df.h"
++#include "tm_p.h"
++#include "tm-constrs.h"
++#include "optabs.h"		/* For GEN_FCN.  */
++#include "target.h"
++#include "langhooks.h"     /* For add_builtin_function ().  */
++#include "ggc.h"
++#include "tree-pass.h"
++#include "target-globals.h"
++using namespace nds32;
++
++/* This is used to create unique relax hint id value.
++   The initial value is 0.  */
++static int relax_group_id = 0;
++
++/* Group the following pattern as relax candidates:
++
++   1. sethi	$ra, hi20(sym)
++      ori	$ra, $ra, lo12(sym)
++    ==>
++      addi.gp	$ra, sym
++
++   2. sethi	$ra, hi20(sym)
++      lwi	$rb, [$ra + lo12(sym)]
++    ==>
++      lwi.gp	$rb, [(sym)]
++
++   3. sethi	$ra, hi20(sym)
++      ori	$ra, $ra, lo12(sym)
++      lwi	$rb, [$ra]
++      swi	$rc, [$ra]
++    ==>
++      lwi37	$rb, [(sym)]
++      swi37	$rc, [(sym)] */
++
++/* Return true if is load/store with REG addressing mode
++   and memory mode is SImode.  */
++static bool
++nds32_reg_base_load_store_p (rtx_insn *insn)
++{
++  rtx mem_src = NULL_RTX;
++
++  switch (get_attr_type (insn))
++    {
++    case TYPE_LOAD:
++      mem_src = SET_SRC (PATTERN (insn));
++      break;
++    case TYPE_STORE:
++      mem_src = SET_DEST (PATTERN (insn));
++      break;
++    default:
++      break;
++    }
++
++  /* Find load/store insn with addressing mode is REG.  */
++  if (mem_src != NULL_RTX)
++    {
++      if ((GET_CODE (mem_src) == ZERO_EXTEND)
++	  || (GET_CODE (mem_src) == SIGN_EXTEND))
++	mem_src = XEXP (mem_src, 0);
++
++      if (GET_CODE (XEXP (mem_src, 0)) == REG)
++	return true;
++    }
++
++  return false;
++}
++
++/* Return true if insn is a sp/fp base or sp/fp plus load-store instruction.  */
++
++static bool
++nds32_sp_base_or_plus_load_store_p (rtx_insn *insn)
++{
++  rtx mem_src = NULL_RTX;
++
++  switch (get_attr_type (insn))
++    {
++    case TYPE_LOAD:
++      mem_src = SET_SRC (PATTERN (insn));
++      break;
++    case TYPE_STORE:
++      mem_src = SET_DEST (PATTERN (insn));
++      break;
++    default:
++      break;
++    }
++  /* Find load/store insn with addressing mode is REG.  */
++  if (mem_src != NULL_RTX)
++    {
++      if ((GET_CODE (mem_src) == ZERO_EXTEND)
++	  || (GET_CODE (mem_src) == SIGN_EXTEND))
++	mem_src = XEXP (mem_src, 0);
++
++      if ((GET_CODE (XEXP (mem_src, 0)) == PLUS))
++	mem_src = XEXP (mem_src, 0);
++
++      if (REG_P (XEXP (mem_src, 0))
++	  && ((frame_pointer_needed
++	       && REGNO (XEXP (mem_src, 0)) == FP_REGNUM)
++	      || REGNO (XEXP (mem_src, 0)) == SP_REGNUM))
++	return true;
++    }
++
++  return false;
++}
++
++/* Return true if is load with [REG + REG/CONST_INT]  addressing mode.  */
++static bool
++nds32_plus_reg_load_store_p (rtx_insn *insn)
++{
++  rtx mem_src = NULL_RTX;
++
++  switch (get_attr_type (insn))
++    {
++    case TYPE_LOAD:
++      mem_src = SET_SRC (PATTERN (insn));
++      break;
++    case TYPE_STORE:
++      mem_src = SET_DEST (PATTERN (insn));
++      break;
++    default:
++      break;
++    }
++
++  /* Find load/store insn with addressing mode is [REG + REG/CONST].  */
++  if (mem_src != NULL_RTX)
++    {
++      if ((GET_CODE (mem_src) == ZERO_EXTEND)
++	  || (GET_CODE (mem_src) == SIGN_EXTEND))
++	mem_src = XEXP (mem_src, 0);
++
++      if ((GET_CODE (XEXP (mem_src, 0)) == PLUS))
++	mem_src = XEXP (mem_src, 0);
++      else
++	return false;
++
++      if (GET_CODE (XEXP (mem_src, 0)) == REG)
++	return true;
++
++    }
++
++  return false;
++}
++
++/* Return true if ins is hwloop last instruction.  */
++static bool
++nds32_hwloop_last_insn_p (rtx_insn *insn)
++{
++  if (recog_memoized (insn) == CODE_FOR_hwloop_last_insn)
++    return true;
++
++  return false;
++}
++
++/* Return true if x is const and the referance is ict symbol.  */
++static bool
++nds32_ict_const_p (rtx x)
++{
++  if (GET_CODE (x) == CONST)
++    {
++      x = XEXP (x, 0);
++      return nds32_indirect_call_referenced_p (x);
++    }
++  return FALSE;
++}
++
++/* Group the following pattern as relax candidates:
++
++   GOT:
++      sethi	$ra, hi20(sym)
++      ori	$ra, $ra, lo12(sym)
++      lw	$rb, [$ra + $gp]
++
++   GOTOFF, TLSLE:
++      sethi	$ra, hi20(sym)
++      ori	$ra, $ra, lo12(sym)
++      LS	$rb, [$ra + $gp]
++
++   GOTOFF, TLSLE:
++      sethi	$ra, hi20(sym)
++      ori	$ra, $ra, lo12(sym)
++      add	$rb, $ra, $gp($tp)
++
++   Initial GOT table:
++      sethi	$gp,hi20(sym)
++      ori	$gp, $gp, lo12(sym)
++      add5.pc	$gp  */
++
++static auto_vec<rtx_insn *, 32> nds32_group_infos;
++/* Group the PIC and TLS relax candidate instructions for linker.  */
++static bool
++nds32_pic_tls_group (rtx_insn *def_insn,
++		     enum nds32_relax_insn_type relax_type,
++		     int sym_type)
++{
++  df_ref def_record;
++  df_link *link;
++  rtx_insn *use_insn = NULL;
++  rtx pat, new_pat;
++  def_record = DF_INSN_DEFS (def_insn);
++  for (link = DF_REF_CHAIN (def_record); link; link = link->next)
++    {
++      if (!DF_REF_INSN_INFO (link->ref))
++	continue;
++
++      use_insn = DF_REF_INSN (link->ref);
++
++      /* Skip if define insn and use insn not in the same basic block.  */
++      if (!dominated_by_p (CDI_DOMINATORS,
++			   BLOCK_FOR_INSN (use_insn),
++			   BLOCK_FOR_INSN (def_insn)))
++	return FALSE;
++
++      /* Skip if use_insn not active insn.  */
++      if (!active_insn_p (use_insn))
++	return FALSE;
++
++      switch (relax_type)
++	{
++	case RELAX_ORI:
++
++	  /* GOTOFF, TLSLE:
++	     sethi	$ra, hi20(sym)
++	     ori	$ra, $ra, lo12(sym)
++	     add	$rb, $ra, $gp($tp)  */
++	  if ((sym_type == UNSPEC_TLSLE
++	       || sym_type == UNSPEC_GOTOFF)
++	      && (recog_memoized (use_insn) == CODE_FOR_addsi3))
++	    {
++	      pat = XEXP (PATTERN (use_insn), 1);
++	      new_pat =
++		gen_rtx_UNSPEC (SImode,
++				gen_rtvec (2, XEXP (pat, 0), XEXP (pat, 1)),
++				UNSPEC_ADD32);
++	      validate_replace_rtx (pat, new_pat, use_insn);
++	      nds32_group_infos.safe_push (use_insn);
++	    }
++	  else if (nds32_plus_reg_load_store_p (use_insn)
++		   && !nds32_sp_base_or_plus_load_store_p (use_insn))
++	    nds32_group_infos.safe_push (use_insn);
++	  else
++	    return FALSE;
++	  break;
++
++	default:
++	  return FALSE;
++	}
++    }
++  return TRUE;
++}
++
++static int
++nds32_pic_tls_symbol_type (rtx x)
++{
++  x = XEXP (SET_SRC (PATTERN (x)), 1);
++
++  if (GET_CODE (x) == CONST)
++    {
++      x = XEXP (x, 0);
++
++      if (GET_CODE (x) == PLUS)
++	x = XEXP (x, 0);
++
++      return XINT (x, 1);
++    }
++
++  return XINT (x, 1);
++}
++
++/* Group the relax candidates with group id.  */
++static void
++nds32_group_insns (rtx sethi)
++{
++  df_ref def_record, use_record;
++  df_link *link;
++  rtx_insn *use_insn = NULL;
++  rtx group_id;
++  bool valid;
++
++  def_record = DF_INSN_DEFS (sethi);
++
++  for (link = DF_REF_CHAIN (def_record); link; link = link->next)
++    {
++      if (!DF_REF_INSN_INFO (link->ref))
++	continue;
++
++      use_insn = DF_REF_INSN (link->ref);
++
++      /* Skip if define insn and use insn not in the same basic block.  */
++      if (!dominated_by_p (CDI_DOMINATORS,
++			   BLOCK_FOR_INSN (use_insn),
++			   BLOCK_FOR_INSN (sethi)))
++	return;
++
++      /* Skip if the low-part used register is from different high-part
++	 instructions.  */
++      use_record = DF_INSN_USES (use_insn);
++      if (DF_REF_CHAIN (use_record) && DF_REF_CHAIN (use_record)->next)
++	return;
++
++      /* Skip if use_insn not active insn.  */
++      if (!active_insn_p (use_insn))
++	return;
++
++     /* Initial use_insn_type.  */
++      if (!(recog_memoized (use_insn) == CODE_FOR_lo_sum
++	    || nds32_symbol_load_store_p (use_insn)
++	    || (nds32_reg_base_load_store_p (use_insn)
++		&&!nds32_sp_base_or_plus_load_store_p (use_insn))))
++	return;
++    }
++
++  group_id = GEN_INT (relax_group_id);
++  /* Insert .relax_* directive for sethi.  */
++  emit_insn_before (gen_relax_group (group_id), sethi);
++
++  /* Scan the use insns and insert the directive.  */
++  for (link = DF_REF_CHAIN (def_record); link; link = link->next)
++    {
++      if (!DF_REF_INSN_INFO (link->ref))
++	continue;
++
++      use_insn = DF_REF_INSN (link->ref);
++
++      /* Insert .relax_* directive.  */
++      if (active_insn_p (use_insn))
++	emit_insn_before (gen_relax_group (group_id), use_insn);
++
++      /* Find ori ra, ra, unspec(symbol) instruction.  */
++      if (use_insn != NULL
++	  && recog_memoized (use_insn) == CODE_FOR_lo_sum
++	  && !nds32_const_unspec_p (XEXP (SET_SRC (PATTERN (use_insn)), 1)))
++	{
++	  int sym_type = nds32_pic_tls_symbol_type (use_insn);
++	  valid = nds32_pic_tls_group (use_insn, RELAX_ORI, sym_type);
++
++	  /* Insert .relax_* directive.  */
++	  while (!nds32_group_infos.is_empty ())
++	    {
++	      use_insn = nds32_group_infos.pop ();
++	      if (valid)
++		emit_insn_before (gen_relax_group (group_id), use_insn);
++	    }
++	}
++    }
++
++  relax_group_id++;
++}
++
++/* Convert relax group id in rtl.  */
++
++static void
++nds32_group_tls_insn (rtx insn)
++{
++  rtx pat = PATTERN (insn);
++  rtx unspec_relax_group = XEXP (XVECEXP (pat, 0, 1), 0);
++
++  while (GET_CODE (pat) != SET && GET_CODE (pat) == PARALLEL)
++    {
++      pat = XVECEXP (pat, 0, 0);
++    }
++
++  if (GET_CODE (unspec_relax_group) == UNSPEC
++      && XINT (unspec_relax_group, 1) == UNSPEC_VOLATILE_RELAX_GROUP)
++    {
++      XVECEXP (unspec_relax_group, 0, 0) = GEN_INT (relax_group_id);
++    }
++
++  relax_group_id++;
++}
++
++static bool
++nds32_float_reg_load_store_p (rtx_insn *insn)
++{
++  rtx pat = PATTERN (insn);
++
++  if (get_attr_type (insn) == TYPE_FLOAD
++      && GET_CODE (pat) == SET
++      && (GET_MODE (XEXP (pat, 0)) == SFmode
++	  || GET_MODE (XEXP (pat, 0)) == DFmode)
++      && MEM_P (XEXP (pat, 1)))
++    {
++      rtx addr = XEXP (XEXP (pat, 1), 0);
++
++      /* [$ra] */
++      if (REG_P (addr))
++	return true;
++      /* [$ra + offset] */
++      if (GET_CODE (addr) == PLUS
++	  && REG_P (XEXP (addr, 0))
++	  && CONST_INT_P (XEXP (addr, 1)))
++	return true;
++    }
++  return false;
++}
++
++
++/* Group float load-store instructions:
++   la $ra, symbol
++   flsi $rt, [$ra + offset] */
++
++static void
++nds32_group_float_insns (rtx insn)
++{
++  df_ref def_record, use_record;
++  df_link *link;
++  rtx_insn *use_insn = NULL;
++  rtx group_id;
++
++  def_record = DF_INSN_DEFS (insn);
++
++  for (link = DF_REF_CHAIN (def_record); link; link = link->next)
++    {
++      if (!DF_REF_INSN_INFO (link->ref))
++	continue;
++
++      use_insn = DF_REF_INSN (link->ref);
++
++      /* Skip if define insn and use insn not in the same basic block.  */
++      if (!dominated_by_p (CDI_DOMINATORS,
++			   BLOCK_FOR_INSN (use_insn),
++			   BLOCK_FOR_INSN (insn)))
++	return;
++
++      /* Skip if the low-part used register is from different high-part
++	 instructions.  */
++      use_record = DF_INSN_USES (use_insn);
++      if (DF_REF_CHAIN (use_record) && DF_REF_CHAIN (use_record)->next)
++	return;
++
++      /* Skip if use_insn not active insn.  */
++      if (!active_insn_p (use_insn))
++	return;
++
++      if (!nds32_float_reg_load_store_p (use_insn)
++	  || find_post_update_rtx (use_insn) != -1)
++	return;
++    }
++
++  group_id = GEN_INT (relax_group_id);
++  /* Insert .relax_* directive for insn.  */
++  emit_insn_before (gen_relax_group (group_id), insn);
++
++  /* Scan the use insns and insert the directive.  */
++  for (link = DF_REF_CHAIN (def_record); link; link = link->next)
++    {
++      if (!DF_REF_INSN_INFO (link->ref))
++	continue;
++
++      use_insn = DF_REF_INSN (link->ref);
++
++      /* Insert .relax_* directive.  */
++	emit_insn_before (gen_relax_group (group_id), use_insn);
++    }
++
++  relax_group_id++;
++}
++
++/* Group the relax candidate instructions for linker.  */
++static void
++nds32_relax_group (void)
++{
++  rtx_insn *insn;
++
++  compute_bb_for_insn ();
++
++  df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
++  df_insn_rescan_all ();
++  df_analyze ();
++  df_set_flags (DF_DEFER_INSN_RESCAN);
++  calculate_dominance_info (CDI_DOMINATORS);
++
++  insn = get_insns ();
++  gcc_assert (NOTE_P (insn));
++
++  for (insn = next_active_insn (insn); insn; insn = next_active_insn (insn))
++    {
++      if (NONJUMP_INSN_P (insn))
++	{
++	  /* Find sethi ra, symbol  instruction.  */
++	  if (recog_memoized (insn) == CODE_FOR_sethi
++	      && nds32_symbolic_operand (XEXP (SET_SRC (PATTERN (insn)), 0),
++					 SImode)
++	      && !nds32_ict_const_p (XEXP (SET_SRC (PATTERN (insn)), 0))
++	      && !nds32_hwloop_last_insn_p (next_active_insn (insn)))
++
++	    nds32_group_insns (insn);
++	  else if (recog_memoized (insn) == CODE_FOR_tls_ie)
++	    nds32_group_tls_insn (insn);
++	  else if (TARGET_FPU_SINGLE
++		   && recog_memoized (insn) == CODE_FOR_move_addr
++		   && !nds32_ict_const_p (XEXP (SET_SRC (PATTERN (insn)), 0))
++		   && !nds32_hwloop_last_insn_p (next_active_insn (insn)))
++	    {
++	      nds32_group_float_insns (insn);
++	    }
++	}
++      else if (CALL_P (insn) && recog_memoized (insn) == CODE_FOR_tls_desc)
++	{
++	  nds32_group_tls_insn (insn);
++	}
++    }
++
++  /* We must call df_finish_pass manually because it should be invoked before
++     BB information is destroyed. Hence we cannot set the TODO_df_finish flag
++     to the pass manager.  */
++  df_insn_rescan_all ();
++  df_finish_pass (false);
++  free_dominance_info (CDI_DOMINATORS);
++}
++
++static unsigned int
++nds32_relax_opt (void)
++{
++  if (TARGET_RELAX_HINT)
++    nds32_relax_group ();
++  return 1;
++}
++
++const pass_data pass_data_nds32_relax_opt =
++{
++  RTL_PASS,				/* type */
++  "relax_opt",				/* name */
++  OPTGROUP_NONE,			/* optinfo_flags */
++  TV_MACH_DEP,				/* tv_id */
++  0,					/* properties_required */
++  0,					/* properties_provided */
++  0,					/* properties_destroyed */
++  0,					/* todo_flags_start */
++  TODO_df_finish,			/* todo_flags_finish */
++};
++
++class pass_nds32_relax_opt : public rtl_opt_pass
++{
++public:
++  pass_nds32_relax_opt (gcc::context *ctxt)
++    : rtl_opt_pass (pass_data_nds32_relax_opt, ctxt)
++  {}
++
++  /* opt_pass methods: */
++  bool gate (function *) { return TARGET_RELAX_HINT; }
++  unsigned int execute (function *) { return nds32_relax_opt (); }
++};
++
++rtl_opt_pass *
++make_pass_nds32_relax_opt (gcc::context *ctxt)
++{
++  return new pass_nds32_relax_opt (ctxt);
++}
+diff --git a/gcc/config/nds32/nds32-scalbn-transform.c b/gcc/config/nds32/nds32-scalbn-transform.c
+new file mode 100644
+index 0000000..fba7c6f
+--- /dev/null
++++ b/gcc/config/nds32/nds32-scalbn-transform.c
+@@ -0,0 +1,364 @@
++/* A Gimple-level pass of Andes NDS32 cpu for GNU compiler.
++   This pass transforms the multiplications whose multiplier is a
++   power of 2.
++
++   Copyright (C) 2012-2016 Free Software Foundation, Inc.
++   Contributed by Andes Technology Corporation.
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify it under
++the terms of the GNU General Public License as published by the Free
++Software Foundation; either version 3, or (at your option) any later
++version.
++
++GCC is distributed in the hope that it will be useful, but WITHOUT ANY
++WARRANTY; without even the implied warranty of MERCHANTABILITY or
++FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
++
++
++#include "config.h"
++#include "system.h"
++#include "coretypes.h"
++#include "tm.h"
++#include "hash-set.h"
++#include "machmode.h"
++#include "vec.h"
++#include "double-int.h"
++#include "input.h"
++#include "alias.h"
++#include "symtab.h"
++#include "wide-int.h"
++#include "inchash.h"
++#include "tree.h"
++#include "stor-layout.h"
++#include "varasm.h"
++#include "calls.h"
++#include "rtl.h"
++#include "regs.h"
++#include "hard-reg-set.h"
++#include "insn-config.h"   /* Required by recog.h.  */
++#include "conditions.h"
++#include "output.h"
++#include "insn-attr.h"     /* For DFA state_t.  */
++#include "insn-codes.h"    /* For CODE_FOR_xxx.  */
++#include "reload.h"     /* For push_reload ().  */
++#include "flags.h"
++#include "input.h"
++#include "function.h"
++#include "expr.h"
++#include "recog.h"
++#include "diagnostic-core.h"
++#include "dominance.h"
++#include "cfg.h"
++#include "cfgrtl.h"
++#include "cfganal.h"
++#include "lcm.h"
++#include "cfgbuild.h"
++#include "cfgcleanup.h"
++#include "predict.h"
++#include "basic-block.h"
++#include "bitmap.h"
++#include "df.h"
++#include "tm_p.h"
++#include "tm-constrs.h"
++#include "optabs.h"		/* For GEN_FCN.  */
++#include "target.h"
++#include "langhooks.h"     /* For add_builtin_function ().  */
++#include "ggc.h"
++#include "tree-pass.h"
++#include "tree-ssa-alias.h"
++#include "fold-const.h"
++#include "gimple-expr.h"
++#include "is-a.h"
++#include "gimple.h"
++#include "gimplify.h"
++#include "gimple-iterator.h"
++#include "gimplify-me.h"
++#include "gimple-ssa.h"
++#include "ipa-ref.h"
++#include "lto-streamer.h"
++#include "cgraph.h"
++#include "tree-cfg.h"
++#include "tree-phinodes.h"
++#include "stringpool.h"
++#include "tree-ssanames.h"
++#include "tree-pass.h"
++#include "gimple-pretty-print.h"
++#include "gimple-fold.h"
++
++
++/* Return true if the current function name is scalbn/scalbnf, or its alias
++   includes scalbn/scalbnf, otherwise return false.  */
++
++static bool
++nds32_is_scalbn_alias_func_p (void)
++{
++  int i;
++  struct ipa_ref *ref;
++  struct cgraph_node *cfun_node;
++
++  if (!strcmp (function_name (cfun), "scalbn")
++      || !strcmp (function_name (cfun), "scalbnf"))
++    return true;
++
++  cfun_node = cgraph_node::get (current_function_decl);
++
++  if (!cfun_node)
++    return false;
++
++  for (i = 0; cfun_node->iterate_referring (i, ref); i++)
++    if (ref->use == IPA_REF_ALIAS)
++      {
++	struct cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring);
++	if (!strcmp (alias->asm_name (), "scalbn")
++	    || !strcmp (alias->asm_name (), "scalbnf"))
++	  return true;
++      }
++
++  return false;
++}
++
++/* Return true if value of tree node RT is power of 2.  */
++
++static bool
++nds32_real_ispow2_p (tree rt)
++{
++  if (TREE_CODE (rt) != REAL_CST)
++    return false;
++
++  if (TREE_REAL_CST_PTR (rt)->cl != rvc_normal)
++    return false;
++
++  int i;
++  for (i = 0; i < SIGSZ-1; ++i)
++    if (TREE_REAL_CST_PTR (rt)->sig[i] != 0)
++      return false;
++  if (TREE_REAL_CST_PTR (rt)->sig[SIGSZ-1] != SIG_MSB)
++    return false;
++
++  return true;
++}
++
++/* Return the exponent of tree node RT in base 2.  */
++
++static int
++nds32_real_pow2exp (tree rt)
++{
++  return REAL_EXP (TREE_REAL_CST_PTR (rt)) - 1;
++}
++
++/* Return true if GS is the target of scalbn transform.  */
++
++static bool
++nds32_scalbn_transform_target_p (gimple *gs)
++{
++  if (is_gimple_assign (gs))
++    if ((gimple_assign_rhs_code (gs) == MULT_EXPR)
++	&& (TREE_CODE (TREE_TYPE (gimple_assign_rhs1 (gs))) == REAL_TYPE)
++	&& nds32_real_ispow2_p (gimple_assign_rhs2 (gs)))
++      return true;
++  return false;
++}
++
++/* Do scalbn transform for a GIMPLE statement GS.
++
++   When the multiplier of GIMPLE statement GS is a positive number,
++   GS will be transform to one gimple_call statement and one
++   gimple_assign statement as follows:
++   A = B * 128.0	-> temp = BUILT_IN_SCALBN (B, 7)
++			   A = temp
++
++   When the multiplier is a negative number, the multiplier will be
++   conversed the sign first since BUILT_IN_SCALBN can't handle
++   negative multiplier. The example is shown below:
++   A = B * -128.0	-> temp = BUILT_IN_SCALBN (B, 7)
++			   A = -temp
++*/
++
++static void
++nds32_do_scalbn_transform (gimple *gs)
++{
++  tree mult_cand = gimple_assign_rhs1 (gs);	/* Multiplicand  */
++  tree mult_er = gimple_assign_rhs2 (gs);	/* Multiplier  */
++  bool is_neg = false;
++
++  /* Choose the function by type of arg.  */
++  enum built_in_function fn_name;
++  tree type = TREE_TYPE (mult_cand);
++  if (TYPE_MAIN_VARIANT (type) == double_type_node)
++    fn_name = BUILT_IN_SCALBN;
++  else if (TYPE_MAIN_VARIANT (type) == float_type_node)
++    fn_name = BUILT_IN_SCALBNF;
++  /* Do not transform long double to scalbnl since some c library don't provide
++     it if target don't have real long double type
++  else if (TYPE_MAIN_VARIANT (type) == long_double_type_node)
++    fn_name = BUILT_IN_SCALBNL;
++  */
++  else
++    return;
++
++  /* Converse the sign of negative number.  */
++  if (REAL_VALUE_NEGATIVE (TREE_REAL_CST (mult_er)))
++    {
++      is_neg = true;
++      mult_er = build_real (TREE_TYPE (mult_er),
++			    real_value_negate (&TREE_REAL_CST (mult_er)));
++    }
++
++  /* Set function name for building gimple_call.  */
++  tree fndecl = builtin_decl_explicit (fn_name);
++
++  /* Set last arg for building gimple_call.  */
++  tree exp = build_int_cst (integer_type_node,
++			    nds32_real_pow2exp (mult_er));
++
++  /* Build a new temp ssa.  */
++  tree temp_call_ssa = make_ssa_name (TREE_TYPE (gimple_assign_lhs (gs)), NULL);
++
++  /* Build gimple_call stmt to replace GS.  */
++  gimple *call_stmt = gimple_build_call (fndecl,
++					 2,
++					 mult_cand,
++					 exp);
++  gimple_call_set_lhs (call_stmt, temp_call_ssa);
++
++  enum tree_code subcode = NOP_EXPR;
++  /* Handle negative value.  */
++  if (is_neg)
++    subcode = NEGATE_EXPR;
++
++  /* Build gimple_assign for return value or change the sign.  */
++  gimple *assign_stmt =
++    gimple_build_assign (gimple_assign_lhs (gs),
++			 subcode,
++			 gimple_call_lhs (call_stmt));
++
++  /* Replace gimple_assign GS by new gimple_call.  */
++  gimple_stmt_iterator gsi = gsi_for_stmt (gs);
++  update_stmt (call_stmt);
++  gsi_insert_before (&gsi, call_stmt, GSI_NEW_STMT);
++
++  /* Insert the gimple_assign after the scalbn call.  */
++  update_stmt (assign_stmt);
++  gsi_next (&gsi);
++  gsi_replace (&gsi, assign_stmt, false);
++}
++
++/* Do scalbn transform for each basic block BB.  */
++
++static int
++nds32_scalbn_transform_basic_block (basic_block bb)
++{
++  gimple_stmt_iterator gsi;
++  int transform_number = 0;
++
++  if (dump_file)
++    fprintf (dump_file,
++	     "\n;; Transforming the multiplication for basic block %d\n",
++	     bb->index);
++
++  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
++    {
++      gimple *stmt = gsi_stmt (gsi);
++
++      if (nds32_scalbn_transform_target_p (stmt))
++	{
++	  if (dump_file)
++	    {
++	      fprintf (dump_file,
++		       "* The multiplier of stmt %d is transforming.\n",
++		       gimple_uid (stmt));
++	      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM|TDF_RAW);
++	    }
++	  nds32_do_scalbn_transform (stmt);
++	  transform_number++;
++	}
++    }
++
++  return transform_number;
++}
++
++/* This function is the entry of scalbn transform pass.  */
++
++static int
++nds32_scalbn_transform_opt (void)
++{
++  basic_block bb;
++  int total_transform_number = 0;
++
++  /* Ignore current and builtin function name are the same.  */
++  if (nds32_is_scalbn_alias_func_p ())
++    {
++      if (dump_file)
++	fprintf (dump_file,
++		 "* Ignore function %s. "
++		 "Transform it will cause infinite loop.\n",
++		 function_name (cfun));
++      return 0;
++    }
++
++  FOR_EACH_BB_FN (bb, cfun)
++    {
++      total_transform_number += nds32_scalbn_transform_basic_block (bb);
++    }
++
++  if (dump_file)
++    {
++      if (total_transform_number > 0)
++	fprintf (dump_file,
++		 "\n;; Transform %d multiplication stmt in function %s\n",
++		 total_transform_number,
++		 current_function_name ());
++      else
++	fprintf (dump_file,
++		 "\n;; No multiplication stmt is transformed in function %s\n",
++		 current_function_name ());
++    }
++
++  return 1;
++}
++
++static bool
++gate_nds32_scalbn_transform (void)
++{
++  return flag_nds32_scalbn_transform
++    && !TARGET_FPU_SINGLE
++    && !flag_no_builtin;
++}
++
++const pass_data pass_data_nds32_scalbn_transform_opt =
++{
++  GIMPLE_PASS,				/* type */
++  "scalbn_transform",			/* name */
++  OPTGROUP_NONE,			/* optinfo_flags */
++  TV_MACH_DEP,				/* tv_id */
++  ( PROP_cfg | PROP_ssa ),		/* properties_required */
++  0,					/* properties_provided */
++  0,					/* properties_destroyed */
++  0,					/* todo_flags_start */
++  TODO_update_ssa,			/* todo_flags_finish */
++};
++
++class pass_nds32_scalbn_transform_opt : public gimple_opt_pass
++{
++public:
++  pass_nds32_scalbn_transform_opt (gcc::context *ctxt)
++    : gimple_opt_pass (pass_data_nds32_scalbn_transform_opt, ctxt)
++  {}
++
++  /* opt_pass methods: */
++  bool gate (function *) { return gate_nds32_scalbn_transform (); }
++  unsigned int execute (function *) { return nds32_scalbn_transform_opt (); }
++};
++
++gimple_opt_pass *
++make_pass_nds32_scalbn_transform_opt (gcc::context *ctxt)
++{
++  return new pass_nds32_scalbn_transform_opt (ctxt);
++}
+diff --git a/gcc/config/nds32/nds32-sign-conversion.c b/gcc/config/nds32/nds32-sign-conversion.c
+new file mode 100644
+index 0000000..74eefba
+--- /dev/null
++++ b/gcc/config/nds32/nds32-sign-conversion.c
+@@ -0,0 +1,218 @@
++/* A Gimple-level pass of Andes NDS32 cpu for GNU compiler that
++   converse the sign of constant operand when the FPU do not be
++   accessed.
++
++   Copyright (C) 2012-2016 Free Software Foundation, Inc.
++   Contributed by Andes Technology Corporation.
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify it under
++the terms of the GNU General Public License as published by the Free
++Software Foundation; either version 3, or (at your option) any later
++version.
++
++GCC is distributed in the hope that it will be useful, but WITHOUT ANY
++WARRANTY; without even the implied warranty of MERCHANTABILITY or
++FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
++
++
++#include "config.h"
++#include "system.h"
++#include "coretypes.h"
++#include "tm.h"
++#include "hash-set.h"
++#include "machmode.h"
++#include "vec.h"
++#include "double-int.h"
++#include "input.h"
++#include "alias.h"
++#include "symtab.h"
++#include "wide-int.h"
++#include "inchash.h"
++#include "tree.h"
++#include "stor-layout.h"
++#include "varasm.h"
++#include "calls.h"
++#include "rtl.h"
++#include "regs.h"
++#include "hard-reg-set.h"
++#include "insn-config.h"   /* Required by recog.h.  */
++#include "conditions.h"
++#include "output.h"
++#include "insn-attr.h"     /* For DFA state_t.  */
++#include "insn-codes.h"    /* For CODE_FOR_xxx.  */
++#include "reload.h"     /* For push_reload ().  */
++#include "flags.h"
++#include "input.h"
++#include "function.h"
++#include "expr.h"
++#include "recog.h"
++#include "diagnostic-core.h"
++#include "dominance.h"
++#include "cfg.h"
++#include "cfgrtl.h"
++#include "cfganal.h"
++#include "lcm.h"
++#include "cfgbuild.h"
++#include "cfgcleanup.h"
++#include "predict.h"
++#include "basic-block.h"
++#include "bitmap.h"
++#include "df.h"
++#include "tm_p.h"
++#include "tm-constrs.h"
++#include "optabs.h"		/* For GEN_FCN.  */
++#include "target.h"
++#include "langhooks.h"     /* For add_builtin_function ().  */
++#include "ggc.h"
++#include "tree-pass.h"
++#include "tree-ssa-alias.h"
++#include "fold-const.h"
++#include "gimple-expr.h"
++#include "is-a.h"
++#include "gimple.h"
++#include "gimplify.h"
++#include "gimple-iterator.h"
++#include "gimplify-me.h"
++#include "gimple-ssa.h"
++#include "ipa-ref.h"
++#include "lto-streamer.h"
++#include "cgraph.h"
++#include "tree-cfg.h"
++#include "tree-phinodes.h"
++#include "stringpool.h"
++#include "tree-ssanames.h"
++#include "tree-pass.h"
++#include "gimple-pretty-print.h"
++#include "gimple-fold.h"
++
++/* Return true if GS is the target of sign conversion.  */
++
++static bool
++nds32_sign_conversion_target_p (gimple *gs)
++{
++  if (is_gimple_assign (gs))
++    if ((gimple_assign_rhs_code (gs) == MINUS_EXPR)
++	&& (TREE_CODE (gimple_assign_rhs2 (gs)) == REAL_CST))
++      return true;
++  return false;
++}
++
++/* Do sign conversion for a GIMPLE statement GS.  */
++
++static void
++nds32_do_sign_conversion (gimple *gs)
++{
++  /* Rewrite the rhs operand.  */
++  enum tree_code op_code = gimple_assign_rhs_code (gs);
++  op_code = PLUS_EXPR;
++  gimple_assign_set_rhs_code (gs, op_code);
++  /* Rewrite the constant value.  */
++  tree rhs2 = gimple_assign_rhs2 (gs);
++  rhs2 = build_real (TREE_TYPE (rhs2),
++		     real_value_negate (&TREE_REAL_CST (rhs2)));
++  gimple_assign_set_rhs2 (gs, rhs2);
++  /* When the statement is modified, please mark this statement is modified.  */
++  update_stmt (gs);
++}
++
++/* Do sign conversion for each basic block BB.  */
++
++static int
++nds32_sign_conversion_basic_block (basic_block bb)
++{
++  gimple_stmt_iterator gsi;
++  int converse_number = 0;
++
++  if (dump_file)
++    fprintf (dump_file,
++	     "\n;; Conversing the sign of gimple stmts for basic block %d\n",
++	     bb->index);
++
++  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
++    {
++      gimple *stmt = gsi_stmt (gsi);
++
++      if (nds32_sign_conversion_target_p (stmt))
++	{
++	  if (dump_file)
++	    {
++	      fprintf (dump_file, "* The sign of stmt %d is conversing.\n",
++		       gimple_uid (stmt));
++	      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM|TDF_RAW);
++	    }
++	  nds32_do_sign_conversion (stmt);
++	  converse_number++;
++	}
++    }
++
++  return converse_number;
++}
++
++/* This function is the entry of sign conversion pass.  */
++
++static int
++nds32_sign_conversion_opt (void)
++{
++  basic_block bb;
++  int total_converse_number = 0;
++
++  FOR_EACH_BB_FN (bb, cfun)
++    {
++      total_converse_number += nds32_sign_conversion_basic_block (bb);
++    }
++
++  if (dump_file)
++    {
++      if (total_converse_number > 0)
++	fprintf (dump_file, "\n;; Converse %d stmts in function %s\n",
++		 total_converse_number,
++		 current_function_name ());
++      else
++	fprintf (dump_file,
++		 "\n;; No sign of stmt is conversed in function %s\n",
++		 current_function_name ());
++    }
++
++  return 1;
++}
++
++const pass_data pass_data_nds32_sign_conversion_opt =
++{
++  GIMPLE_PASS,				/* type */
++  "sign_conversion",			/* name */
++  OPTGROUP_NONE,			/* optinfo_flags */
++  TV_MACH_DEP,				/* tv_id */
++  ( PROP_cfg | PROP_ssa ),		/* properties_required */
++  0,					/* properties_provided */
++  0,					/* properties_destroyed */
++  0,					/* todo_flags_start */
++  TODO_update_ssa,			/* todo_flags_finish */
++};
++
++class pass_nds32_sign_conversion_opt : public gimple_opt_pass
++{
++public:
++  pass_nds32_sign_conversion_opt (gcc::context *ctxt)
++    : gimple_opt_pass (pass_data_nds32_sign_conversion_opt, ctxt)
++  {}
++
++  /* opt_pass methods: */
++  bool gate (function *)
++  {
++    return flag_nds32_sign_conversion && !TARGET_FPU_SINGLE;
++  }
++  unsigned int execute (function *) { return nds32_sign_conversion_opt (); }
++};
++
++gimple_opt_pass *
++make_pass_nds32_sign_conversion_opt (gcc::context *ctxt)
++{
++  return new pass_nds32_sign_conversion_opt (ctxt);
++}
+diff --git a/gcc/config/nds32/nds32-soft-fp-comm.c b/gcc/config/nds32/nds32-soft-fp-comm.c
+new file mode 100644
+index 0000000..98ba3d5
+--- /dev/null
++++ b/gcc/config/nds32/nds32-soft-fp-comm.c
+@@ -0,0 +1,205 @@
++/* Operand commutative for soft floating point arithmetic pass
++   of Andes NDS32 cpu for GNU compiler
++   Copyright (C) 2012-2016 Free Software Foundation, Inc.
++   Contributed by Andes Technology Corporation.
++
++   This file is part of GCC.
++
++   GCC is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published
++   by the Free Software Foundation; either version 3, or (at your
++   option) any later version.
++
++   GCC is distributed in the hope that it will be useful, but WITHOUT
++   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++   License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with GCC; see the file COPYING3.  If not see
++   <http://www.gnu.org/licenses/>.  */
++
++
++#include "config.h"
++#include "system.h"
++#include "coretypes.h"
++#include "backend.h"
++#include "tree.h"
++#include "rtl.h"
++#include "df.h"
++#include "alias.h"
++#include "stor-layout.h"
++#include "varasm.h"
++#include "calls.h"
++#include "regs.h"
++#include "insn-config.h"	/* Required by recog.h.  */
++#include "conditions.h"
++#include "output.h"
++#include "insn-attr.h"		/* For DFA state_t.  */
++#include "insn-codes.h"		/* For CODE_FOR_xxx.  */
++#include "reload.h"		/* For push_reload().  */
++#include "flags.h"
++#include "insn-config.h"
++#include "expmed.h"
++#include "dojump.h"
++#include "explow.h"
++#include "emit-rtl.h"
++#include "stmt.h"
++#include "expr.h"
++#include "recog.h"
++#include "diagnostic-core.h"
++#include "cfgrtl.h"
++#include "cfganal.h"
++#include "lcm.h"
++#include "cfgbuild.h"
++#include "cfgcleanup.h"
++#include "tm_p.h"
++#include "tm-constrs.h"
++#include "optabs.h"		/* For GEN_FCN.  */
++#include "target.h"
++#include "langhooks.h"		/* For add_builtin_function().  */
++#include "builtins.h"
++#include "cpplib.h"
++#include "params.h"
++#include "tree-pass.h"
++
++#define SF_ARG0_REGNO 0
++#define SF_ARG1_REGNO 1
++
++#define DF_ARG0_REGNO 0
++#define DF_ARG1_REGNO 2
++
++static int
++nds32_soft_fp_arith_comm_opt (void)
++{
++  basic_block bb;
++  rtx_insn *insn;
++  FOR_EACH_BB_FN (bb, cfun)
++    {
++      FOR_BB_INSNS (bb, insn)
++	{
++	  if (!CALL_P (insn))
++	    continue;
++
++	  rtx pat = PATTERN (insn);
++	  rtx call_rtx = XVECEXP (pat, 0, 0);
++
++	  if (GET_CODE (call_rtx) == SET)
++	    call_rtx = SET_SRC (call_rtx);
++
++	  rtx func_mem = XEXP (call_rtx, 0);
++	  rtx symbol = XEXP (func_mem, 0);
++
++	  if (GET_CODE (symbol) != SYMBOL_REF)
++	    continue;
++
++	  const char *func_name = XSTR (symbol, 0);
++	  bool df_p;
++	  if (((strcmp("__mulsf3", func_name) == 0)
++	       || (strcmp("__addsf3", func_name) == 0)))
++	    df_p = false;
++	  else if (((strcmp("__muldf3", func_name) == 0)
++		   || (strcmp("__adddf3", func_name) == 0)))
++	    df_p = true;
++	  else
++	    continue;
++
++	  rtx_insn *prev_insn = insn;
++	  rtx_insn *arg0_insn = NULL;
++	  rtx_insn *arg1_insn = NULL;
++	  unsigned arg0_regno = df_p ? DF_ARG0_REGNO : SF_ARG0_REGNO;
++	  unsigned arg1_regno = df_p ? DF_ARG1_REGNO : SF_ARG1_REGNO;
++	  enum machine_mode mode = df_p ? DFmode : SFmode;
++	  while ((prev_insn = PREV_INSN (prev_insn)) && prev_insn)
++	    {
++	      if (arg0_insn != NULL && arg1_insn != NULL)
++		break;
++
++	      if (BLOCK_FOR_INSN (prev_insn) != BLOCK_FOR_INSN (insn))
++		break;
++
++	      if (!NONJUMP_INSN_P (prev_insn))
++		break;
++
++	      if (!INSN_P (prev_insn))
++		continue;
++
++	      rtx set = PATTERN (prev_insn);
++
++	      if (GET_CODE (set) != SET)
++		continue;
++
++	      rtx dst_reg = SET_DEST (set);
++
++	      if (!REG_P (dst_reg))
++		break;
++
++	      unsigned regno = REGNO (dst_reg);
++
++	      if (regno == arg0_regno)
++		{
++		  arg0_insn = prev_insn;
++		  continue;
++		}
++	      else if (regno == arg1_regno)
++		{
++		  arg1_insn = prev_insn;
++		  continue;
++		}
++	      break;
++	    }
++	  if (arg0_insn == NULL || arg1_insn == NULL)
++	   continue;
++
++	  rtx arg0_src = SET_SRC (PATTERN (arg0_insn));
++	  rtx arg1_src = SET_SRC (PATTERN (arg1_insn));
++
++	  if ((REG_P (arg0_src)
++	       && GET_MODE (arg0_src) == mode
++	       && REGNO (arg0_src) == arg1_regno)
++	      || (REG_P (arg1_src)
++		  && GET_MODE (arg1_src) == mode
++		  && REGNO (arg1_src) == arg0_regno))
++	    {
++	      /* Swap operand! */
++	      rtx tmp = SET_DEST (PATTERN (arg0_insn));
++	      SET_DEST (PATTERN (arg0_insn)) = SET_DEST (PATTERN (arg1_insn));
++	      SET_DEST (PATTERN (arg1_insn)) = tmp;
++	    }
++	}
++    }
++  return 1;
++}
++
++const pass_data pass_data_nds32_soft_fp_arith_comm_opt =
++{
++  RTL_PASS,				/* type */
++  "soft_fp_arith_comm",			/* name */
++  OPTGROUP_NONE,			/* optinfo_flags */
++  TV_MACH_DEP,				/* tv_id */
++  0,					/* properties_required */
++  0,					/* properties_provided */
++  0,					/* properties_destroyed */
++  0,					/* todo_flags_start */
++  0,					/* todo_flags_finish */
++};
++
++class pass_nds32_soft_fp_arith_comm_opt : public rtl_opt_pass
++{
++public:
++  pass_nds32_soft_fp_arith_comm_opt (gcc::context *ctxt)
++    : rtl_opt_pass (pass_data_nds32_soft_fp_arith_comm_opt, ctxt)
++  {}
++
++  /* opt_pass methods: */
++  bool gate (function *) {
++    return TARGET_SOFT_FP_ARITH_COMM && !TARGET_FPU_SINGLE;
++  }
++  unsigned int execute (function *) { return nds32_soft_fp_arith_comm_opt (); }
++};
++
++rtl_opt_pass *
++make_pass_nds32_soft_fp_arith_comm_opt (gcc::context *ctxt)
++{
++  return new pass_nds32_soft_fp_arith_comm_opt (ctxt);
++}
+diff --git a/gcc/config/nds32/nds32-utils.c b/gcc/config/nds32/nds32-utils.c
+new file mode 100644
+index 0000000..3b16738
+--- /dev/null
++++ b/gcc/config/nds32/nds32-utils.c
+@@ -0,0 +1,923 @@
++/* Auxiliary functions for pipeline descriptions pattern of Andes
++   NDS32 cpu for GNU compiler
++   Copyright (C) 2012-2016 Free Software Foundation, Inc.
++   Contributed by Andes Technology Corporation.
++
++   This file is part of GCC.
++
++   GCC is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published
++   by the Free Software Foundation; either version 3, or (at your
++   option) any later version.
++
++   GCC is distributed in the hope that it will be useful, but WITHOUT
++   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++   License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with GCC; see the file COPYING3.  If not see
++   <http://www.gnu.org/licenses/>.  */
++
++/* ------------------------------------------------------------------------ */
++
++#include "config.h"
++#include "system.h"
++#include "coretypes.h"
++#include "tm.h"
++#include "hash-set.h"
++#include "machmode.h"
++#include "vec.h"
++#include "double-int.h"
++#include "input.h"
++#include "alias.h"
++#include "symtab.h"
++#include "wide-int.h"
++#include "inchash.h"
++#include "tree.h"
++#include "stor-layout.h"
++#include "varasm.h"
++#include "calls.h"
++#include "rtl.h"
++#include "regs.h"
++#include "hard-reg-set.h"
++#include "insn-config.h"	/* Required by recog.h.  */
++#include "conditions.h"
++#include "output.h"
++#include "insn-attr.h"		/* For DFA state_t.  */
++#include "insn-codes.h"		/* For CODE_FOR_xxx.  */
++#include "reload.h"		/* For push_reload().  */
++#include "flags.h"
++#include "input.h"
++#include "function.h"
++#include "expr.h"
++#include "recog.h"
++#include "diagnostic-core.h"
++#include "dominance.h"
++#include "cfg.h"
++#include "cfgrtl.h"
++#include "cfganal.h"
++#include "lcm.h"
++#include "cfgbuild.h"
++#include "cfgcleanup.h"
++#include "predict.h"
++#include "basic-block.h"
++#include "nds32-protos.h"
++
++namespace nds32 {
++
++/* Get the rtx in the PATTERN field of an insn.  If INSN is not an insn,
++   the funciton doesn't change anything and returns it directly.  */
++rtx
++extract_pattern_from_insn (rtx insn)
++{
++  if (INSN_P (insn))
++    return PATTERN (insn);
++
++  return insn;
++}
++
++/* Get the number of elements in a parallel rtx.  */
++size_t
++parallel_elements (rtx parallel_rtx)
++{
++  parallel_rtx = extract_pattern_from_insn (parallel_rtx);
++  gcc_assert (GET_CODE (parallel_rtx) == PARALLEL);
++
++  return XVECLEN (parallel_rtx, 0);
++}
++
++/* Extract an rtx from a parallel rtx with index NTH.  If NTH is a negative
++   value, the function returns the last NTH rtx.  */
++rtx
++parallel_element (rtx parallel_rtx, int nth)
++{
++  parallel_rtx = extract_pattern_from_insn (parallel_rtx);
++  gcc_assert (GET_CODE (parallel_rtx) == PARALLEL);
++
++  int len = parallel_elements (parallel_rtx);
++
++  if (nth >= 0)
++    {
++      if (nth >= len)
++	return NULL_RTX;
++
++      return XVECEXP (parallel_rtx, 0, nth);
++    }
++  else
++    {
++      if (len + nth < 0)
++	return NULL_RTX;
++
++      return XVECEXP (parallel_rtx, 0, len + nth);
++    }
++}
++
++/* Return true if an insn is a pseudo NOP that is not a real instruction
++   occupying a real cycle and space of the text section.  */
++bool
++insn_pseudo_nop_p (rtx_insn *insn)
++{
++  if (INSN_CODE (insn) == CODE_FOR_nop_data_dep
++      || INSN_CODE (insn) == CODE_FOR_nop_res_dep)
++    return true;
++
++  return false;
++}
++
++/* Indicate whether an insn is a real insn which occupy at least one cycle
++   or not.  The determination cannot be target-independent because some targets
++   use UNSPEC or UNSPEC_VOLATILE insns to represent real instructions.  */
++bool
++insn_executable_p (rtx_insn *insn)
++{
++  if (!INSN_P (insn))
++    return false;
++
++  if (insn_pseudo_nop_p (insn))
++    return true;
++
++  if (get_attr_length (insn) == 0)
++    return false;
++
++  switch (GET_CODE (PATTERN (insn)))
++    {
++    case CONST_INT:
++    case USE:
++    case CLOBBER:
++    case ADDR_VEC:
++    case ADDR_DIFF_VEC:
++    case UNSPEC:
++    case UNSPEC_VOLATILE:
++      return false;
++
++    default:
++      return true;
++    }
++
++  return true;
++}
++
++/* Find the previous executable insn.  */
++rtx_insn *
++prev_executable_insn (rtx_insn *insn)
++{
++  insn = PREV_INSN (insn);
++  while (insn && !insn_executable_p (insn))
++    insn = PREV_INSN (insn);
++
++  return insn;
++}
++
++/* Find the next executable insn.  */
++rtx_insn *
++next_executable_insn (rtx_insn *insn)
++{
++  insn = NEXT_INSN (insn);
++  while (insn && !insn_executable_p (insn))
++    insn = NEXT_INSN (insn);
++
++  return insn;
++}
++
++/* Find the previous executable insn in the current basic block.  */
++rtx_insn *
++prev_executable_insn_local (rtx_insn *insn)
++{
++  insn = PREV_INSN (insn);
++  while (insn && !insn_executable_p (insn))
++    {
++      if(LABEL_P (insn) || JUMP_P (insn) || CALL_P (insn))
++	return NULL;
++
++      insn = PREV_INSN (insn);
++    }
++
++  return insn;
++}
++
++/* Find the next executable insn in the current basic block.  */
++rtx_insn *
++next_executable_insn_local (rtx_insn *insn)
++{
++  insn = NEXT_INSN (insn);
++  while (insn && !insn_executable_p (insn))
++    {
++      if(LABEL_P (insn) || JUMP_P (insn) || CALL_P (insn))
++	return NULL;
++
++      insn = NEXT_INSN (insn);
++    }
++
++  return insn;
++}
++
++/* Return true if an insn is marked as deleted.  */
++bool
++insn_deleted_p (rtx_insn *insn)
++{
++  if (insn->deleted ())
++    return true;
++
++  if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED)
++    return true;
++
++  return false;
++}
++
++/* Functions to determine whether INSN is single-word, double-word
++   or partial-word load/store insn.  */
++
++bool
++load_single_p (rtx_insn *insn)
++{
++  if (get_attr_type (insn) != TYPE_LOAD)
++    return false;
++
++  if (INSN_CODE (insn) == CODE_FOR_move_di ||
++      INSN_CODE (insn) == CODE_FOR_move_df)
++    return false;
++
++  return true;
++}
++
++bool
++store_single_p (rtx_insn *insn)
++{
++  if (get_attr_type (insn) != TYPE_STORE)
++    return false;
++
++  if (INSN_CODE (insn) == CODE_FOR_move_di ||
++      INSN_CODE (insn) == CODE_FOR_move_df)
++    return false;
++
++  return true;
++}
++
++bool
++load_double_p (rtx_insn *insn)
++{
++  if (get_attr_type (insn) != TYPE_LOAD)
++    return false;
++
++  if (INSN_CODE (insn) != CODE_FOR_move_di &&
++      INSN_CODE (insn) != CODE_FOR_move_df)
++    return false;
++
++  return true;
++}
++
++bool
++store_double_p (rtx_insn *insn)
++{
++  if (get_attr_type (insn) != TYPE_STORE)
++    return false;
++
++  if (INSN_CODE (insn) != CODE_FOR_move_di &&
++      INSN_CODE (insn) != CODE_FOR_move_df)
++    return false;
++
++  return true;
++}
++
++bool
++store_offset_reg_p (rtx_insn *insn)
++{
++  if (get_attr_type (insn) != TYPE_STORE)
++    return false;
++
++  rtx offset_rtx = extract_offset_rtx (insn);
++
++  if (offset_rtx == NULL_RTX)
++    return false;
++
++  if (REG_P (offset_rtx))
++    return true;
++
++  return false;
++}
++
++bool
++load_full_word_p (rtx_insn *insn)
++{
++  if (!nds32::load_single_p (insn))
++    return false;
++
++  if (GET_MODE (SET_SRC (PATTERN (insn))) == SImode)
++    return true;
++
++  return false;
++}
++
++bool
++load_partial_word_p (rtx_insn *insn)
++{
++  if (!nds32::load_single_p (insn))
++    return false;
++
++  if (GET_MODE (SET_SRC (PATTERN (insn))) == HImode
++      || GET_MODE (SET_SRC (PATTERN (insn))) == QImode)
++    return true;
++
++  return false;
++}
++
++/* Determine if INSN is a post update insn.  */
++bool
++post_update_insn_p (rtx_insn *insn)
++{
++  if (find_post_update_rtx (insn) == -1)
++    return false;
++  else
++    return true;
++}
++
++/* Check if the address of MEM_RTX consists of a base register and an
++   immediate offset.  */
++bool
++immed_offset_p (rtx mem_rtx)
++{
++  gcc_assert (MEM_P (mem_rtx));
++
++  rtx addr_rtx = XEXP (mem_rtx, 0);
++
++  /* (mem (reg)) is equivalent to (mem (plus (reg) (const_int 0))) */
++  if (REG_P (addr_rtx))
++    return true;
++
++  /* (mem (plus (reg) (const_int))) */
++  if (GET_CODE (addr_rtx) == PLUS
++      && GET_CODE (XEXP (addr_rtx, 1)) == CONST_INT)
++    return true;
++
++  return false;
++}
++
++/* Find the post update rtx in INSN.  If INSN is a load/store multiple insn,
++   the function returns the vector index of its parallel part.  If INSN is a
++   single load/store insn, the function returns 0.  If INSN is not a post-
++   update insn, the function returns -1.  */
++int
++find_post_update_rtx (rtx_insn *insn)
++{
++  rtx mem_rtx;
++  int i, len;
++
++  switch (get_attr_type (insn))
++    {
++    case TYPE_LOAD_MULTIPLE:
++    case TYPE_STORE_MULTIPLE:
++      /* Find a pattern in a parallel rtx:
++	 (set (reg) (plus (reg) (const_int)))  */
++      len = parallel_elements (insn);
++      for (i = 0; i < len; ++i)
++	{
++	  rtx curr_insn = parallel_element (insn, i);
++
++	  if (GET_CODE (curr_insn) == SET
++	      && REG_P (SET_DEST (curr_insn))
++	      && GET_CODE (SET_SRC (curr_insn)) == PLUS)
++		return i;
++	}
++      return -1;
++
++    case TYPE_LOAD:
++    case TYPE_FLOAD:
++    case TYPE_STORE:
++    case TYPE_FSTORE:
++      mem_rtx = extract_mem_rtx (insn);
++      /* (mem (post_inc (reg)))  */
++      switch (GET_CODE (XEXP (mem_rtx, 0)))
++	{
++	case POST_INC:
++	case POST_DEC:
++	case POST_MODIFY:
++	  return 0;
++
++	default:
++	  return -1;
++	}
++
++    default:
++      gcc_unreachable ();
++    }
++}
++
++/* Extract the MEM rtx from a load/store insn.  */
++rtx
++extract_mem_rtx (rtx_insn *insn)
++{
++  rtx body = PATTERN (insn);
++
++  switch (get_attr_type (insn))
++    {
++    case TYPE_LOAD:
++    case TYPE_FLOAD:
++      if (MEM_P (SET_SRC (body)))
++	return SET_SRC (body);
++
++      /* unaligned address: (unspec [(mem)])  */
++      if (GET_CODE (SET_SRC (body)) == UNSPEC)
++	{
++	  gcc_assert (MEM_P (XVECEXP (SET_SRC (body), 0, 0)));
++	  return XVECEXP (SET_SRC (body), 0, 0);
++	}
++
++      /* (sign_extend (mem)) */
++      gcc_assert (MEM_P (XEXP (SET_SRC (body), 0)));
++      return XEXP (SET_SRC (body), 0);
++
++    case TYPE_STORE:
++    case TYPE_FSTORE:
++      if (MEM_P (SET_DEST (body)))
++	return SET_DEST (body);
++
++      /* unaligned address: (unspec [(mem)])  */
++      if (GET_CODE (SET_DEST (body)) == UNSPEC)
++	{
++	  gcc_assert (MEM_P (XVECEXP (SET_DEST (body), 0, 0)));
++	  return XVECEXP (SET_DEST (body), 0, 0);
++	}
++
++      /* (sign_extend (mem)) */
++      gcc_assert (MEM_P (XEXP (SET_DEST (body), 0)));
++      return XEXP (SET_DEST (body), 0);
++
++    default:
++      gcc_unreachable ();
++    }
++}
++
++/* Extract the base register from load/store insns.  The function returns
++   NULL_RTX if the address is not consist of any registers.  */
++rtx
++extract_base_reg (rtx_insn *insn)
++{
++  int post_update_rtx_index;
++  rtx mem_rtx;
++  rtx plus_rtx;
++
++  /* Find the MEM rtx.  If we can find an insn updating the base register,
++     the base register will be returned directly.  */
++  switch (get_attr_type (insn))
++    {
++    case TYPE_LOAD_MULTIPLE:
++      post_update_rtx_index = find_post_update_rtx (insn);
++
++      if (post_update_rtx_index != -1)
++        return SET_DEST (parallel_element (insn, post_update_rtx_index));
++
++      mem_rtx = SET_SRC (parallel_element (insn, 0));
++      break;
++
++    case TYPE_STORE_MULTIPLE:
++      post_update_rtx_index = find_post_update_rtx (insn);
++
++      if (post_update_rtx_index != -1)
++        return SET_DEST (parallel_element (insn, post_update_rtx_index));
++
++      mem_rtx = SET_DEST (parallel_element (insn, 0));
++      break;
++
++    case TYPE_LOAD:
++    case TYPE_FLOAD:
++    case TYPE_STORE:
++    case TYPE_FSTORE:
++      mem_rtx = extract_mem_rtx (insn);
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
++  gcc_assert (MEM_P (mem_rtx));
++
++  /* (mem (reg))  */
++  if (REG_P (XEXP (mem_rtx, 0)))
++    return XEXP (mem_rtx, 0);
++
++  /* (mem (lo_sum (reg) (symbol_ref)) */
++  if (GET_CODE (XEXP (mem_rtx, 0)) == LO_SUM)
++    return XEXP (XEXP (mem_rtx, 0), 0);
++
++  plus_rtx = XEXP (mem_rtx, 0);
++
++  if (GET_CODE (plus_rtx) == SYMBOL_REF
++      || GET_CODE (plus_rtx) == CONST)
++    return NULL_RTX;
++
++  /* (mem (plus (reg) (const_int))) or
++     (mem (plus (mult (reg) (const_int 4)) (reg))) or
++     (mem (post_inc (reg))) or
++     (mem (post_dec (reg))) or
++     (mem (post_modify (reg) (plus (reg) (reg))))  */
++  gcc_assert (GET_CODE (plus_rtx) == PLUS
++	      || GET_CODE (plus_rtx) == POST_INC
++	      || GET_CODE (plus_rtx) == POST_DEC
++	      || GET_CODE (plus_rtx) == POST_MODIFY);
++
++  if (REG_P (XEXP (plus_rtx, 0)))
++    return XEXP (plus_rtx, 0);
++
++  gcc_assert (REG_P (XEXP (plus_rtx, 1)));
++  return XEXP (plus_rtx, 1);
++}
++
++/* Extract the offset rtx from load/store insns.  The function returns
++   NULL_RTX if offset is absent.  */
++rtx
++extract_offset_rtx (rtx_insn *insn)
++{
++  rtx mem_rtx;
++  rtx plus_rtx;
++  rtx offset_rtx;
++
++  /* Find the MEM rtx.  The multiple load/store insns doens't have
++     the offset field so we can return NULL_RTX here.  */
++  switch (get_attr_type (insn))
++    {
++    case TYPE_LOAD_MULTIPLE:
++    case TYPE_STORE_MULTIPLE:
++      return NULL_RTX;
++
++    case TYPE_LOAD:
++    case TYPE_FLOAD:
++    case TYPE_STORE:
++    case TYPE_FSTORE:
++      mem_rtx = extract_mem_rtx (insn);
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
++  gcc_assert (MEM_P (mem_rtx));
++
++  /* (mem (reg))  */
++  if (REG_P (XEXP (mem_rtx, 0)))
++    return NULL_RTX;
++
++  plus_rtx = XEXP (mem_rtx, 0);
++
++  switch (GET_CODE (plus_rtx))
++    {
++    case SYMBOL_REF:
++    case CONST:
++    case POST_INC:
++    case POST_DEC:
++      return NULL_RTX;
++
++    case PLUS:
++      /* (mem (plus (reg) (const_int))) or
++         (mem (plus (mult (reg) (const_int 4)) (reg))) */
++      if (REG_P (XEXP (plus_rtx, 0)))
++        offset_rtx = XEXP (plus_rtx, 1);
++      else
++	{
++	  gcc_assert (REG_P (XEXP (plus_rtx, 1)));
++	  offset_rtx = XEXP (plus_rtx, 0);
++	}
++
++      if (ARITHMETIC_P (offset_rtx))
++	{
++	  gcc_assert (GET_CODE (offset_rtx) == MULT);
++	  gcc_assert (REG_P (XEXP (offset_rtx, 0)));
++	  offset_rtx = XEXP (offset_rtx, 0);
++	}
++      break;
++
++    case LO_SUM:
++      /* (mem (lo_sum (reg) (symbol_ref)) */
++      offset_rtx = XEXP (plus_rtx, 1);
++      break;
++
++    case POST_MODIFY:
++      /* (mem (post_modify (reg) (plus (reg) (reg / const_int)))) */
++      gcc_assert (REG_P (XEXP (plus_rtx, 0)));
++      plus_rtx = XEXP (plus_rtx, 1);
++      gcc_assert (GET_CODE (plus_rtx) == PLUS);
++      offset_rtx = XEXP (plus_rtx, 0);
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
++  return offset_rtx;
++}
++
++/* Extract the register of the shift operand from an ALU_SHIFT rtx.  */
++rtx
++extract_shift_reg (rtx_insn *insn)
++{
++  rtx alu_shift_rtx = extract_pattern_from_insn (insn);
++
++  rtx alu_rtx = SET_SRC (alu_shift_rtx);
++  rtx shift_rtx;
++
++  /* Various forms of ALU_SHIFT can be made by the combiner.
++     See the difference between add_slli and sub_slli in nds32.md.  */
++  if (REG_P (XEXP (alu_rtx, 0)))
++    shift_rtx = XEXP (alu_rtx, 1);
++  else
++    shift_rtx = XEXP (alu_rtx, 0);
++
++  return XEXP (shift_rtx, 0);
++}
++
++/* Check if INSN is a movd44 insn.  */
++bool
++movd44_insn_p (rtx_insn *insn)
++{
++  if (get_attr_type (insn) == TYPE_ALU
++      && (INSN_CODE (insn) == CODE_FOR_move_di
++	  || INSN_CODE (insn) == CODE_FOR_move_df))
++    {
++      rtx body = PATTERN (insn);
++      gcc_assert (GET_CODE (body) == SET);
++
++      rtx src = SET_SRC (body);
++      rtx dest = SET_DEST (body);
++
++      if ((REG_P (src) || GET_CODE (src) == SUBREG)
++	  && (REG_P (dest) || GET_CODE (dest) == SUBREG))
++	return true;
++
++      return false;
++    }
++
++  return false;
++}
++
++/* Extract the first result (even reg) of a movd44 insn.  */
++rtx
++extract_movd44_even_reg (rtx_insn *insn)
++{
++  gcc_assert (movd44_insn_p (insn));
++
++  rtx def_reg = SET_DEST (PATTERN (insn));
++  enum machine_mode mode;
++
++  gcc_assert (REG_P (def_reg) || GET_CODE (def_reg) == SUBREG);
++  switch (GET_MODE (def_reg))
++    {
++    case DImode:
++      mode = SImode;
++      break;
++
++    case DFmode:
++      mode = SFmode;
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
++  return gen_lowpart (mode, def_reg);
++}
++
++/* Extract the second result (odd reg) of a movd44 insn.  */
++rtx
++extract_movd44_odd_reg (rtx_insn *insn)
++{
++  gcc_assert (movd44_insn_p (insn));
++
++  rtx def_reg = SET_DEST (PATTERN (insn));
++  enum machine_mode mode;
++
++  gcc_assert (REG_P (def_reg) || GET_CODE (def_reg) == SUBREG);
++  switch (GET_MODE (def_reg))
++    {
++    case DImode:
++      mode = SImode;
++      break;
++
++    case DFmode:
++      mode = SFmode;
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
++  return gen_highpart (mode, def_reg);
++}
++
++/* Extract the rtx representing the accumulation operand of a MAC insn.  */
++rtx
++extract_mac_acc_rtx (rtx_insn *insn)
++{
++  return SET_DEST (PATTERN (insn));
++}
++
++/* Extract the rtx representing non-accumulation operands of a MAC insn.  */
++rtx
++extract_mac_non_acc_rtx (rtx_insn *insn)
++{
++  rtx exp = SET_SRC (PATTERN (insn));
++
++  switch (get_attr_type (insn))
++    {
++    case TYPE_MAC:
++    case TYPE_DMAC:
++      if (REG_P (XEXP (exp, 0)))
++	return XEXP (exp, 1);
++      else
++	return XEXP (exp, 0);
++
++    default:
++      gcc_unreachable ();
++    }
++}
++
++/* Check if the DIV insn needs two write ports.  */
++bool
++divmod_p (rtx_insn *insn)
++{
++  gcc_assert (get_attr_type (insn) == TYPE_DIV);
++
++  if (INSN_CODE (insn) == CODE_FOR_divmodsi4
++      || INSN_CODE (insn) == CODE_FOR_udivmodsi4)
++    return true;
++
++  return false;
++}
++
++/* Extract the rtx representing the branch target to help recognize
++   data hazards.  */
++rtx
++extract_branch_target_rtx (rtx_insn *insn)
++{
++  gcc_assert (CALL_P (insn) || JUMP_P (insn));
++
++  rtx body = PATTERN (insn);
++
++  if (GET_CODE (body) == SET)
++    {
++      /* RTXs in IF_THEN_ELSE are branch conditions.  */
++      if (GET_CODE (SET_SRC (body)) == IF_THEN_ELSE)
++        return NULL_RTX;
++
++      return SET_SRC (body);
++    }
++
++  if (GET_CODE (body) == CALL)
++    return XEXP (body, 0);
++
++  if (GET_CODE (body) == PARALLEL)
++    {
++      rtx first_rtx = parallel_element (body, 0);
++
++      if (GET_CODE (first_rtx) == SET)
++	return SET_SRC (first_rtx);
++
++      if (GET_CODE (first_rtx) == CALL)
++	return XEXP (first_rtx, 0);
++    }
++
++  /* Handle special cases of bltzal, bgezal and jralnez.  */
++  if (GET_CODE (body) == COND_EXEC)
++    {
++      rtx addr_rtx = XEXP (body, 1);
++
++      if (GET_CODE (addr_rtx) == SET)
++	return SET_SRC (addr_rtx);
++
++      if (GET_CODE (addr_rtx) == PARALLEL)
++	{
++	  rtx first_rtx = parallel_element (addr_rtx, 0);
++
++	  if (GET_CODE (first_rtx) == SET)
++	    {
++	      rtx call_rtx = SET_SRC (first_rtx);
++	      gcc_assert (GET_CODE (call_rtx) == CALL);
++
++	      return XEXP (call_rtx, 0);
++	    }
++
++	  if (GET_CODE (first_rtx) == CALL)
++	    return XEXP (first_rtx, 0);
++	}
++    }
++
++  gcc_unreachable ();
++}
++
++/* Extract the rtx representing the branch condition to help recognize
++   data hazards.  */
++rtx
++extract_branch_condition_rtx (rtx_insn *insn)
++{
++  gcc_assert (CALL_P (insn) || JUMP_P (insn));
++
++  rtx body = PATTERN (insn);
++
++  if (GET_CODE (body) == SET)
++    {
++      rtx if_then_else_rtx = SET_SRC (body);
++
++      if (GET_CODE (if_then_else_rtx) == IF_THEN_ELSE)
++        return XEXP (if_then_else_rtx, 0);
++
++      return NULL_RTX;
++    }
++
++  if (GET_CODE (body) == COND_EXEC)
++    return XEXP (body, 0);
++
++  return NULL_RTX;
++}
++
++/* Building the CFG in later back end passes cannot call compute_bb_for_insn ()
++   directly because calling to BLOCK_FOR_INSN (insn) when some insns have been
++   deleted can cause a segmentation fault.  Use this function to rebuild the CFG
++   can avoid such issues.  */
++void
++compute_bb_for_insn_safe ()
++{
++  basic_block bb;
++
++  FOR_EACH_BB_FN (bb, cfun)
++    {
++      rtx_insn *insn, *next_insn, *last_insn;
++      bool after_last_insn = false;
++
++      /* Find the last non-deleted insn. */
++      for (last_insn = BB_END (bb);
++	   PREV_INSN (last_insn) && insn_deleted_p (last_insn);
++	   last_insn = PREV_INSN (last_insn));
++
++      /* Bind each insn to its BB and adjust BB_END (bb).  */
++      for (insn = BB_HEAD (bb); insn; insn = NEXT_INSN (insn))
++	{
++	  BLOCK_FOR_INSN (insn) = bb;
++
++	  if (insn == last_insn)
++	    after_last_insn = true;
++
++	  next_insn = NEXT_INSN (insn);
++
++	  if (after_last_insn
++	      && (!next_insn
++		  || LABEL_P (next_insn)
++		  || NOTE_INSN_BASIC_BLOCK_P (next_insn)))
++	    {
++	      BB_END (bb) = insn;
++	      break;
++	    }
++	}
++    }
++}
++
++/* Exchange insns positions.  */
++void
++exchange_insns (rtx_insn *insn1, rtx_insn *insn2)
++{
++  if (INSN_UID (insn1) == INSN_UID (insn2))
++    return;
++
++  rtx_insn *insn1_from = insn1;
++  rtx_insn *insn1_to = insn1;
++  rtx_insn *insn2_from = insn2;
++  rtx_insn *insn2_to = insn2;
++
++  if (PREV_INSN (insn1)
++      && INSN_CODE (PREV_INSN (insn1)) == CODE_FOR_relax_group)
++    insn1_from = PREV_INSN (insn1);
++
++  if (PREV_INSN (insn2)
++      && INSN_CODE (PREV_INSN (insn2)) == CODE_FOR_relax_group)
++    insn2_from = PREV_INSN (insn2);
++
++  if (GET_MODE (insn1) == TImode && GET_MODE (insn2) == VOIDmode)
++    {
++      PUT_MODE (insn1, VOIDmode);
++      PUT_MODE (insn2, TImode);
++    }
++  else if (GET_MODE (insn1) == VOIDmode && GET_MODE (insn2) == TImode)
++    {
++      PUT_MODE (insn1, TImode);
++      PUT_MODE (insn2, VOIDmode);
++    }
++
++  if (PREV_INSN (insn1_from))
++    {
++      rtx_insn *insn1_prev = PREV_INSN (insn1_from);
++
++      reorder_insns (insn1_from, insn1_to, insn2);
++      reorder_insns (insn2_from, insn2_to, insn1_prev);
++
++      return;
++    }
++
++  gcc_assert (PREV_INSN (insn2_from));
++
++  rtx_insn *insn2_prev = PREV_INSN (insn2_from);
++
++  reorder_insns (insn2_from, insn2_to, insn1);
++  reorder_insns (insn1_from, insn1_to, insn2_prev);
++
++  return;
++}
++
++} // namespace nds32
+diff --git a/gcc/config/nds32/nds32.c b/gcc/config/nds32/nds32.c
+index c47c122..5f5e668 100644
+--- a/gcc/config/nds32/nds32.c
++++ b/gcc/config/nds32/nds32.c
+@@ -24,48 +24,103 @@
+ #include "system.h"
+ #include "coretypes.h"
+ #include "backend.h"
+-#include "target.h"
+-#include "rtl.h"
+ #include "tree.h"
++#include "rtl.h"
+ #include "df.h"
+-#include "tm_p.h"
+-#include "optabs.h"		/* For GEN_FCN.  */
+-#include "regs.h"
+-#include "emit-rtl.h"
+-#include "recog.h"
+-#include "diagnostic-core.h"
++#include "alias.h"
++#include "stringpool.h"
+ #include "stor-layout.h"
+ #include "varasm.h"
+ #include "calls.h"
++#include "regs.h"
++#include "insn-config.h"	/* Required by recog.h.  */
++#include "conditions.h"
+ #include "output.h"
++#include "insn-attr.h"		/* For DFA state_t.  */
++#include "insn-codes.h"		/* For CODE_FOR_xxx.  */
++#include "reload.h"		/* For push_reload().  */
++#include "flags.h"
++#include "insn-config.h"
++#include "expmed.h"
++#include "dojump.h"
+ #include "explow.h"
++#include "emit-rtl.h"
++#include "stmt.h"
+ #include "expr.h"
++#include "recog.h"
++#include "diagnostic-core.h"
++#include "cfgrtl.h"
++#include "cfganal.h"
++#include "lcm.h"
++#include "cfgbuild.h"
++#include "cfgcleanup.h"
++#include "tm_p.h"
+ #include "tm-constrs.h"
++#include "optabs.h"		/* For GEN_FCN.  */
++#include "target.h"
++#include "langhooks.h"		/* For add_builtin_function().  */
+ #include "builtins.h"
++#include "cpplib.h"
++#include "params.h"
++#include "tree-pass.h"
++#include "cfgloop.h"
++#include "cfghooks.h"
++#include "hw-doloop.h"
++#include "context.h"
++#include "sched-int.h"
+ 
+ /* This file should be included last.  */
+ #include "target-def.h"
+ 
+ /* ------------------------------------------------------------------------ */
+ 
+-/* This file is divided into five parts:
++/* This file is divided into six parts:
+ 
+-     PART 1: Auxiliary static variable definitions and
+-             target hook static variable definitions.
++     PART 1: Auxiliary external function and variable declarations.
+ 
+-     PART 2: Auxiliary static function definitions.
++     PART 2: Auxiliary static variable definitions and
++	     target hook static variable definitions.
+ 
+-     PART 3: Implement target hook stuff definitions.
++     PART 3: Auxiliary static function definitions.
+ 
+-     PART 4: Implemet extern function definitions,
+-             the prototype is in nds32-protos.h.
++     PART 4: Implement target hook stuff definitions.
+ 
+-     PART 5: Initialize target hook structure and definitions.  */
++     PART 5: Implemet extern function definitions,
++	     the prototype is in nds32-protos.h.
++
++     PART 6: Initialize target hook structure and definitions.  */
++
++/* ------------------------------------------------------------------------ */
++
++/* PART 1: Auxiliary function and variable declarations.  */
++
++namespace nds32 {
++namespace scheduling {
++
++rtl_opt_pass *make_pass_nds32_print_stalls (gcc::context *);
++
++} // namespace scheduling
++} // namespace nds32
++
++rtl_opt_pass *make_pass_nds32_fp_as_gp (gcc::context *);
++rtl_opt_pass *make_pass_nds32_load_store_opt (gcc::context *);
++rtl_opt_pass *make_pass_nds32_soft_fp_arith_comm_opt(gcc::context *);
++rtl_opt_pass *make_pass_nds32_regrename_opt (gcc::context *);
++rtl_opt_pass *make_pass_nds32_gcse_opt (gcc::context *);
++rtl_opt_pass *make_pass_nds32_relax_opt (gcc::context *);
++rtl_opt_pass *make_pass_nds32_rename_lmwsmw_opt (gcc::context *);
++rtl_opt_pass *make_pass_nds32_gen_lmwsmw_opt (gcc::context *);
++rtl_opt_pass *make_pass_nds32_const_remater_opt (gcc::context *);
++rtl_opt_pass *make_pass_nds32_cprop_acc_opt (gcc::context *);
++
++gimple_opt_pass *make_pass_nds32_sign_conversion_opt (gcc::context *);
++gimple_opt_pass *make_pass_nds32_scalbn_transform_opt (gcc::context *);
++gimple_opt_pass *make_pass_nds32_abi_compatible (gcc::context *);
+ 
+ /* ------------------------------------------------------------------------ */
+ 
+-/* PART 1: Auxiliary static variable definitions and
+-           target hook static variable definitions.  */
++/* PART 2: Auxiliary static variable definitions and
++	   target hook static variable definitions.  */
+ 
+ /* Define intrinsic register names.
+    Please refer to nds32_intrinsic.h file, the index is corresponding to
+@@ -73,14 +128,217 @@
+    NOTE that the base value starting from 1024.  */
+ static const char * const nds32_intrinsic_register_names[] =
+ {
+-  "$PSW", "$IPSW", "$ITYPE", "$IPC"
++  "$CPU_VER",
++  "$ICM_CFG",
++  "$DCM_CFG",
++  "$MMU_CFG",
++  "$MSC_CFG",
++  "$MSC_CFG2",
++  "$CORE_ID",
++  "$FUCOP_EXIST",
++
++  "$PSW",
++  "$IPSW",
++  "$P_IPSW",
++  "$IVB",
++  "$EVA",
++  "$P_EVA",
++  "$ITYPE",
++  "$P_ITYPE",
++
++  "$MERR",
++  "$IPC",
++  "$P_IPC",
++  "$OIPC",
++  "$P_P0",
++  "$P_P1",
++
++  "$INT_MASK",
++  "$INT_MASK2",
++  "$INT_MASK3",
++  "$INT_PEND",
++  "$INT_PEND2",
++  "$INT_PEND3",
++  "$SP_USR",
++  "$SP_PRIV",
++  "$INT_PRI",
++  "$INT_PRI2",
++  "$INT_PRI3",
++  "$INT_PRI4",
++  "$INT_CTRL",
++  "$INT_TRIGGER",
++  "$INT_TRIGGER2",
++  "$INT_GPR_PUSH_DIS",
++
++  "$MMU_CTL",
++  "$L1_PPTB",
++  "$TLB_VPN",
++  "$TLB_DATA",
++  "$TLB_MISC",
++  "$VLPT_IDX",
++  "$ILMB",
++  "$DLMB",
++
++  "$CACHE_CTL",
++  "$HSMP_SADDR",
++  "$HSMP_EADDR",
++  "$SDZ_CTL",
++  "$N12MISC_CTL",
++  "$MISC_CTL",
++  "$ECC_MISC",
++
++  "$BPC0",
++  "$BPC1",
++  "$BPC2",
++  "$BPC3",
++  "$BPC4",
++  "$BPC5",
++  "$BPC6",
++  "$BPC7",
++
++  "$BPA0",
++  "$BPA1",
++  "$BPA2",
++  "$BPA3",
++  "$BPA4",
++  "$BPA5",
++  "$BPA6",
++  "$BPA7",
++
++  "$BPAM0",
++  "$BPAM1",
++  "$BPAM2",
++  "$BPAM3",
++  "$BPAM4",
++  "$BPAM5",
++  "$BPAM6",
++  "$BPAM7",
++
++  "$BPV0",
++  "$BPV1",
++  "$BPV2",
++  "$BPV3",
++  "$BPV4",
++  "$BPV5",
++  "$BPV6",
++  "$BPV7",
++
++  "$BPCID0",
++  "$BPCID1",
++  "$BPCID2",
++  "$BPCID3",
++  "$BPCID4",
++  "$BPCID5",
++  "$BPCID6",
++  "$BPCID7",
++
++  "$EDM_CFG",
++  "$EDMSW",
++  "$EDM_CTL",
++  "$EDM_DTR",
++  "$BPMTC",
++  "$DIMBR",
++
++  "$TECR0",
++  "$TECR1",
++  "$PFMC0",
++  "$PFMC1",
++  "$PFMC2",
++  "$PFM_CTL",
++  "$PFT_CTL",
++  "$HSP_CTL",
++  "$SP_BOUND",
++  "$SP_BOUND_PRIV",
++  "$SP_BASE",
++  "$SP_BASE_PRIV",
++  "$FUCOP_CTL",
++  "$PRUSR_ACC_CTL",
++
++  "$DMA_CFG",
++  "$DMA_GCSW",
++  "$DMA_CHNSEL",
++  "$DMA_ACT",
++  "$DMA_SETUP",
++  "$DMA_ISADDR",
++  "$DMA_ESADDR",
++  "$DMA_TCNT",
++  "$DMA_STATUS",
++  "$DMA_2DSET",
++  "$DMA_2DSCTL",
++  "$DMA_RCNT",
++  "$DMA_HSTATUS",
++
++  "$PC",
++  "$SP_USR1",
++  "$SP_USR2",
++  "$SP_USR3",
++  "$SP_PRIV1",
++  "$SP_PRIV2",
++  "$SP_PRIV3",
++  "$BG_REGION",
++  "$SFCR",
++  "$SIGN",
++  "$ISIGN",
++  "$P_ISIGN",
++  "$IFC_LP",
++  "$ITB"
++};
++
++/* Define instrinsic cctl names.  */
++static const char * const nds32_cctl_names[] =
++{
++  "L1D_VA_FILLCK",
++  "L1D_VA_ULCK",
++  "L1I_VA_FILLCK",
++  "L1I_VA_ULCK",
++
++  "L1D_IX_WBINVAL",
++  "L1D_IX_INVAL",
++  "L1D_IX_WB",
++  "L1I_IX_INVAL",
++
++  "L1D_VA_INVAL",
++  "L1D_VA_WB",
++  "L1D_VA_WBINVAL",
++  "L1I_VA_INVAL",
++
++  "L1D_IX_RTAG",
++  "L1D_IX_RWD",
++  "L1I_IX_RTAG",
++  "L1I_IX_RWD",
++
++  "L1D_IX_WTAG",
++  "L1D_IX_WWD",
++  "L1I_IX_WTAG",
++  "L1I_IX_WWD"
++};
++
++static const char * const nds32_dpref_names[] =
++{
++  "SRD",
++  "MRD",
++  "SWR",
++  "MWR",
++  "PTE",
++  "CLWR"
++};
++
++/* Defining register allocation order for performance.
++   We want to allocate callee-saved registers after others.
++   It may be used by nds32_adjust_reg_alloc_order().  */
++static const int nds32_reg_alloc_order_for_speed[] =
++{
++   0,   1,   2,   3,   4,   5,  16,  17,
++  18,  19,  20,  21,  22,  23,  24,  25,
++  26,  27,   6,   7,   8,   9,  10,  11,
++  12,  13,  14,  15
+ };
+ 
+ /* Defining target-specific uses of __attribute__.  */
+ static const struct attribute_spec nds32_attribute_table[] =
+ {
+   /* Syntax: { name, min_len, max_len, decl_required, type_required,
+-               function_type_required, handler, affects_type_identity } */
++	       function_type_required, handler, affects_type_identity } */
+ 
+   /* The interrupt vid: [0-63]+ (actual vector number starts from 9 to 72).  */
+   { "interrupt",    1, 64, false, false, false, NULL, false },
+@@ -93,6 +351,7 @@ static const struct attribute_spec nds32_attribute_table[] =
+   { "nested",       0,  0, false, false, false, NULL, false },
+   { "not_nested",   0,  0, false, false, false, NULL, false },
+   { "nested_ready", 0,  0, false, false, false, NULL, false },
++  { "critical",     0,  0, false, false, false, NULL, false },
+ 
+   /* The attributes describing isr register save scheme.  */
+   { "save_all",     0,  0, false, false, false, NULL, false },
+@@ -102,17 +361,32 @@ static const struct attribute_spec nds32_attribute_table[] =
+   { "nmi",          1,  1, false, false, false, NULL, false },
+   { "warm",         1,  1, false, false, false, NULL, false },
+ 
++  /* The attributes describing isr security level. */
++  { "secure",       1,  1, false, false, false, NULL, false },
++
+   /* The attribute telling no prologue/epilogue.  */
+   { "naked",        0,  0, false, false, false, NULL, false },
+ 
++  /* The attribute is used to set signature.  */
++  { "signature",    0,  0, false, false, false, NULL, false },
++
++  /* The attribute is used to tell this function to be ROM patch.  */
++  { "indirect_call",0,  0, false, false, false, NULL, false },
++
++  /* FOR BACKWARD COMPATIBILITY,
++     this attribute also tells no prologue/epilogue.  */
++  { "no_prologue",  0,  0, false, false, false, NULL, false },
++
++  /* The attribute turn off hwloop optimization.  */
++  { "no_ext_zol",    0,  0, false,  false, false, NULL, false},
++
+   /* The last attribute spec is set to be NULL.  */
+   { NULL,           0,  0, false, false, false, NULL, false }
+ };
+ 
+-
+ /* ------------------------------------------------------------------------ */
+ 
+-/* PART 2: Auxiliary static function definitions.  */
++/* PART 3: Auxiliary static function definitions.  */
+ 
+ /* Function to save and restore machine-specific function data.  */
+ static struct machine_function *
+@@ -121,12 +395,24 @@ nds32_init_machine_status (void)
+   struct machine_function *machine;
+   machine = ggc_cleared_alloc<machine_function> ();
+ 
++  /* Initially assume this function does not use __builtin_eh_return.  */
++  machine->use_eh_return_p = 0;
++
+   /* Initially assume this function needs prologue/epilogue.  */
+   machine->naked_p = 0;
+ 
+   /* Initially assume this function does NOT use fp_as_gp optimization.  */
+   machine->fp_as_gp_p = 0;
+ 
++  /* Initially this function is not under strictly aligned situation.  */
++  machine->strict_aligned_p = 0;
++
++  /* Initially this function has no naked and no_prologue attributes.  */
++  machine->attr_naked_p = 0;
++  machine->attr_no_prologue_p = 0;
++
++  /* Initially this function hwloop group ID number.  */
++  machine->hwloop_group_id = 0;
+   return machine;
+ }
+ 
+@@ -137,23 +423,63 @@ nds32_compute_stack_frame (void)
+ {
+   int r;
+   int block_size;
++  bool v3pushpop_p;
+ 
+   /* Because nds32_compute_stack_frame() will be called from different place,
+      everytime we enter this function, we have to assume this function
+      needs prologue/epilogue.  */
+   cfun->machine->naked_p = 0;
+ 
++  /* We need to mark whether this function has naked and no_prologue
++     attribute so that we can distinguish the difference if users applies
++     -mret-in-naked-func option.  */
++  cfun->machine->attr_naked_p
++    = lookup_attribute ("naked", DECL_ATTRIBUTES (current_function_decl))
++      ? 1 : 0;
++  cfun->machine->attr_no_prologue_p
++    = lookup_attribute ("no_prologue", DECL_ATTRIBUTES (current_function_decl))
++      ? 1 : 0;
++
++  /* If __builtin_eh_return is used, we better have frame pointer needed
++     so that we can easily locate the stack slot of return address.  */
++  if (crtl->calls_eh_return)
++    {
++      frame_pointer_needed = 1;
++
++      /* We need to mark eh data registers that need to be saved
++	 in the stack.  */
++      cfun->machine->eh_return_data_first_regno = EH_RETURN_DATA_REGNO (0);
++      for (r = 0; EH_RETURN_DATA_REGNO (r) != INVALID_REGNUM; r++)
++	cfun->machine->eh_return_data_last_regno = r;
++
++      cfun->machine->eh_return_data_regs_size
++	= 4 * (cfun->machine->eh_return_data_last_regno
++	       - cfun->machine->eh_return_data_first_regno
++	       + 1);
++      cfun->machine->use_eh_return_p = 1;
++    }
++  else
++    {
++      /* Assigning SP_REGNUM to eh_first_regno and eh_last_regno means we
++	 do not need to handle __builtin_eh_return case in this function.  */
++      cfun->machine->eh_return_data_first_regno = SP_REGNUM;
++      cfun->machine->eh_return_data_last_regno  = SP_REGNUM;
++
++      cfun->machine->eh_return_data_regs_size = 0;
++      cfun->machine->use_eh_return_p = 0;
++    }
++
+   /* Get variadic arguments size to prepare pretend arguments and
+      we will push them into stack at prologue by ourself.  */
+   cfun->machine->va_args_size = crtl->args.pretend_args_size;
+   if (cfun->machine->va_args_size != 0)
+     {
+       cfun->machine->va_args_first_regno
+-        = NDS32_GPR_ARG_FIRST_REGNUM
+-          + NDS32_MAX_GPR_REGS_FOR_ARGS
+-          - (crtl->args.pretend_args_size / UNITS_PER_WORD);
++	= NDS32_GPR_ARG_FIRST_REGNUM
++	  + NDS32_MAX_GPR_REGS_FOR_ARGS
++	  - (crtl->args.pretend_args_size / UNITS_PER_WORD);
+       cfun->machine->va_args_last_regno
+-        = NDS32_GPR_ARG_FIRST_REGNUM + NDS32_MAX_GPR_REGS_FOR_ARGS - 1;
++	= NDS32_GPR_ARG_FIRST_REGNUM + NDS32_MAX_GPR_REGS_FOR_ARGS - 1;
+     }
+   else
+     {
+@@ -183,11 +509,13 @@ nds32_compute_stack_frame (void)
+ 
+   /* If $gp value is required to be saved on stack, it needs 4 bytes space.
+      Check whether we are using PIC code genration.  */
+-  cfun->machine->gp_size = (flag_pic) ? 4 : 0;
++  cfun->machine->gp_size =
++    (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)) ? 4 : 0;
+ 
+   /* If $lp value is required to be saved on stack, it needs 4 bytes space.
+      Check whether $lp is ever live.  */
+-  cfun->machine->lp_size = (df_regs_ever_live_p (LP_REGNUM)) ? 4 : 0;
++  cfun->machine->lp_size
++    = (flag_always_save_lp || df_regs_ever_live_p (LP_REGNUM)) ? 4 : 0;
+ 
+   /* Initially there is no padding bytes.  */
+   cfun->machine->callee_saved_area_gpr_padding_bytes = 0;
+@@ -196,6 +524,10 @@ nds32_compute_stack_frame (void)
+   cfun->machine->callee_saved_gpr_regs_size = 0;
+   cfun->machine->callee_saved_first_gpr_regno = SP_REGNUM;
+   cfun->machine->callee_saved_last_gpr_regno  = SP_REGNUM;
++  cfun->machine->callee_saved_fpr_regs_size = 0;
++  cfun->machine->callee_saved_first_fpr_regno = SP_REGNUM;
++  cfun->machine->callee_saved_last_fpr_regno  = SP_REGNUM;
++
+   /* Currently, there is no need to check $r28~$r31
+      because we will save them in another way.  */
+   for (r = 0; r < 28; r++)
+@@ -213,43 +545,77 @@ nds32_compute_stack_frame (void)
+ 	}
+     }
+ 
++  /* Recording fpu callee-saved register.  */
++  if (TARGET_HARD_FLOAT)
++    {
++      for (r = NDS32_FIRST_FPR_REGNUM; r < NDS32_LAST_FPR_REGNUM; r++)
++	{
++	  if (NDS32_REQUIRED_CALLEE_SAVED_P (r))
++	    {
++	      /* Mark the first required callee-saved register.  */
++	      if (cfun->machine->callee_saved_first_fpr_regno == SP_REGNUM)
++		{
++		  /* Make first callee-saved number is even,
++		     bacause we use doubleword access, and this way
++		     promise 8-byte alignemt.  */
++		  if (!NDS32_FPR_REGNO_OK_FOR_DOUBLE (r))
++		    cfun->machine->callee_saved_first_fpr_regno = r - 1;
++		  else
++		    cfun->machine->callee_saved_first_fpr_regno = r;
++		}
++	      cfun->machine->callee_saved_last_fpr_regno = r;
++	    }
++	}
++
++      /* Make last callee-saved register number is odd,
++	 we hope callee-saved register is even.  */
++      int last_fpr = cfun->machine->callee_saved_last_fpr_regno;
++      if (NDS32_FPR_REGNO_OK_FOR_DOUBLE (last_fpr))
++	cfun->machine->callee_saved_last_fpr_regno++;
++    }
++
+   /* Check if this function can omit prologue/epilogue code fragment.
+-     If there is 'naked' attribute in this function,
++     If there is 'no_prologue'/'naked'/'secure' attribute in this function,
+      we can set 'naked_p' flag to indicate that
+      we do not have to generate prologue/epilogue.
+      Or, if all the following conditions succeed,
+      we can set this function 'naked_p' as well:
+        condition 1: first_regno == last_regno == SP_REGNUM,
+-                    which means we do not have to save
+-                    any callee-saved registers.
++		    which means we do not have to save
++		    any callee-saved registers.
+        condition 2: Both $lp and $fp are NOT live in this function,
+-                    which means we do not need to save them and there
+-                    is no outgoing size.
++		    which means we do not need to save them and there
++		    is no outgoing size.
+        condition 3: There is no local_size, which means
+-                    we do not need to adjust $sp.  */
+-  if (lookup_attribute ("naked", DECL_ATTRIBUTES (current_function_decl))
++		    we do not need to adjust $sp.  */
++  if (lookup_attribute ("no_prologue", DECL_ATTRIBUTES (current_function_decl))
++      || lookup_attribute ("naked", DECL_ATTRIBUTES (current_function_decl))
++      || lookup_attribute ("secure", DECL_ATTRIBUTES (current_function_decl))
+       || (cfun->machine->callee_saved_first_gpr_regno == SP_REGNUM
+ 	  && cfun->machine->callee_saved_last_gpr_regno == SP_REGNUM
++	  && cfun->machine->callee_saved_first_fpr_regno == SP_REGNUM
++	  && cfun->machine->callee_saved_last_fpr_regno == SP_REGNUM
+ 	  && !df_regs_ever_live_p (FP_REGNUM)
+ 	  && !df_regs_ever_live_p (LP_REGNUM)
+-	  && cfun->machine->local_size == 0))
++	  && cfun->machine->local_size == 0
++	  && !flag_pic))
+     {
+       /* Set this function 'naked_p' and other functions can check this flag.
+-         Note that in nds32 port, the 'naked_p = 1' JUST means there is no
+-         callee-saved, local size, and outgoing size.
+-         The varargs space and ret instruction may still present in
+-         the prologue/epilogue expanding.  */
++	 Note that in nds32 port, the 'naked_p = 1' JUST means there is no
++	 callee-saved, local size, and outgoing size.
++	 The varargs space and ret instruction may still present in
++	 the prologue/epilogue expanding.  */
+       cfun->machine->naked_p = 1;
+ 
+       /* No need to save $fp, $gp, and $lp.
+-         We should set these value to be zero
+-         so that nds32_initial_elimination_offset() can work properly.  */
++	 We should set these value to be zero
++	 so that nds32_initial_elimination_offset() can work properly.  */
+       cfun->machine->fp_size = 0;
+       cfun->machine->gp_size = 0;
+       cfun->machine->lp_size = 0;
+ 
+       /* If stack usage computation is required,
+-         we need to provide the static stack size.  */
++	 we need to provide the static stack size.  */
+       if (flag_stack_usage_info)
+ 	current_function_static_stack_size = 0;
+ 
+@@ -257,6 +623,8 @@ nds32_compute_stack_frame (void)
+       return;
+     }
+ 
++  v3pushpop_p = NDS32_V3PUSH_AVAILABLE_P;
++
+   /* Adjustment for v3push instructions:
+      If we are using v3push (push25/pop25) instructions,
+      we need to make sure Rb is $r6 and Re is
+@@ -264,16 +632,14 @@ nds32_compute_stack_frame (void)
+      Some results above will be discarded and recomputed.
+      Note that it is only available under V3/V3M ISA and we
+      DO NOT setup following stuff for isr or variadic function.  */
+-  if (TARGET_V3PUSH
+-      && !nds32_isr_function_p (current_function_decl)
+-      && (cfun->machine->va_args_size == 0))
++  if (v3pushpop_p)
+     {
+       /* Recompute:
+-           cfun->machine->fp_size
+-           cfun->machine->gp_size
+-           cfun->machine->lp_size
+-           cfun->machine->callee_saved_regs_first_regno
+-           cfun->machine->callee_saved_regs_last_regno */
++	   cfun->machine->fp_size
++	   cfun->machine->gp_size
++	   cfun->machine->lp_size
++	   cfun->machine->callee_saved_first_gpr_regno
++	   cfun->machine->callee_saved_last_gpr_regno */
+ 
+       /* For v3push instructions, $fp, $gp, and $lp are always saved.  */
+       cfun->machine->fp_size = 4;
+@@ -316,11 +682,46 @@ nds32_compute_stack_frame (void)
+ 	}
+     }
+ 
+-  /* We have correctly set callee_saved_regs_first_regno
+-     and callee_saved_regs_last_regno.
+-     Initially, the callee_saved_regs_size is supposed to be 0.
+-     As long as callee_saved_regs_last_regno is not SP_REGNUM,
+-     we can update callee_saved_regs_size with new size.  */
++  int sp_adjust = cfun->machine->local_size
++		  + cfun->machine->out_args_size
++		  + cfun->machine->callee_saved_area_gpr_padding_bytes
++		  + cfun->machine->callee_saved_fpr_regs_size;
++
++  if (!v3pushpop_p
++      && nds32_memory_model_option == MEMORY_MODEL_FAST
++      && sp_adjust == 0
++      && !frame_pointer_needed)
++    {
++      block_size = cfun->machine->fp_size
++		   + cfun->machine->gp_size
++		   + cfun->machine->lp_size
++		   + (4 * (cfun->machine->callee_saved_last_gpr_regno
++			   - cfun->machine->callee_saved_first_gpr_regno
++			   + 1));
++
++      if (!NDS32_DOUBLE_WORD_ALIGN_P (block_size))
++	{
++	  /* $r14 is last callee save register.  */
++	  if (cfun->machine->callee_saved_last_gpr_regno
++	      < NDS32_LAST_CALLEE_SAVE_GPR_REGNUM)
++	    {
++	      cfun->machine->callee_saved_last_gpr_regno++;
++	    }
++	  else if (cfun->machine->callee_saved_first_gpr_regno == SP_REGNUM)
++	    {
++	      cfun->machine->callee_saved_first_gpr_regno
++		= NDS32_FIRST_CALLEE_SAVE_GPR_REGNUM;
++	      cfun->machine->callee_saved_last_gpr_regno
++		= NDS32_FIRST_CALLEE_SAVE_GPR_REGNUM;
++	    }
++	}
++    }
++
++  /* We have correctly set callee_saved_first_gpr_regno
++     and callee_saved_last_gpr_regno.
++     Initially, the callee_saved_gpr_regs_size is supposed to be 0.
++     As long as callee_saved_last_gpr_regno is not SP_REGNUM,
++     we can update callee_saved_gpr_regs_size with new size.  */
+   if (cfun->machine->callee_saved_last_gpr_regno != SP_REGNUM)
+     {
+       /* Compute pushed size of callee-saved registers.  */
+@@ -330,10 +731,22 @@ nds32_compute_stack_frame (void)
+ 	       + 1);
+     }
+ 
++  if (TARGET_HARD_FLOAT)
++    {
++      /* Compute size of callee svaed floating-point registers.  */
++      if (cfun->machine->callee_saved_last_fpr_regno != SP_REGNUM)
++	{
++	  cfun->machine->callee_saved_fpr_regs_size
++	   = 4 * (cfun->machine->callee_saved_last_fpr_regno
++		  - cfun->machine->callee_saved_first_fpr_regno
++		  + 1);
++	}
++    }
++
+   /* Important: We need to make sure that
+-                (fp_size + gp_size + lp_size + callee_saved_regs_size)
+-                is 8-byte alignment.
+-                If it is not, calculate the padding bytes.  */
++		(fp_size + gp_size + lp_size + callee_saved_gpr_regs_size)
++		is 8-byte alignment.
++		If it is not, calculate the padding bytes.  */
+   block_size = cfun->machine->fp_size
+ 	       + cfun->machine->gp_size
+ 	       + cfun->machine->lp_size
+@@ -361,14 +774,15 @@ nds32_compute_stack_frame (void)
+      "push registers to memory",
+      "adjust stack pointer".  */
+ static void
+-nds32_emit_stack_push_multiple (rtx Rb, rtx Re, rtx En4, bool vaarg_p)
++nds32_emit_stack_push_multiple (unsigned Rb, unsigned Re,
++				bool save_fp_p, bool save_gp_p, bool save_lp_p,
++				bool vaarg_p)
+ {
+-  int regno;
++  unsigned regno;
+   int extra_count;
+   int num_use_regs;
+   int par_index;
+   int offset;
+-  int save_fp, save_gp, save_lp;
+ 
+   rtx reg;
+   rtx mem;
+@@ -381,39 +795,34 @@ nds32_emit_stack_push_multiple (rtx Rb, rtx Re, rtx En4, bool vaarg_p)
+      necessary information for data analysis,
+      so we create a parallel rtx like this:
+      (parallel [(set (mem (plus (reg:SI SP_REGNUM) (const_int -32)))
+-                     (reg:SI Rb))
+-                (set (mem (plus (reg:SI SP_REGNUM) (const_int -28)))
+-                     (reg:SI Rb+1))
+-                ...
+-                (set (mem (plus (reg:SI SP_REGNUM) (const_int -16)))
+-                     (reg:SI Re))
+-                (set (mem (plus (reg:SI SP_REGNUM) (const_int -12)))
+-                     (reg:SI FP_REGNUM))
+-                (set (mem (plus (reg:SI SP_REGNUM) (const_int -8)))
+-                     (reg:SI GP_REGNUM))
+-                (set (mem (plus (reg:SI SP_REGNUM) (const_int -4)))
+-                     (reg:SI LP_REGNUM))
+-                (set (reg:SI SP_REGNUM)
+-                     (plus (reg:SI SP_REGNUM) (const_int -32)))]) */
+-
+-  /* Determine whether we need to save $fp, $gp, or $lp.  */
+-  save_fp = INTVAL (En4) & 0x8;
+-  save_gp = INTVAL (En4) & 0x4;
+-  save_lp = INTVAL (En4) & 0x2;
++		     (reg:SI Rb))
++		(set (mem (plus (reg:SI SP_REGNUM) (const_int -28)))
++		     (reg:SI Rb+1))
++		...
++		(set (mem (plus (reg:SI SP_REGNUM) (const_int -16)))
++		     (reg:SI Re))
++		(set (mem (plus (reg:SI SP_REGNUM) (const_int -12)))
++		     (reg:SI FP_REGNUM))
++		(set (mem (plus (reg:SI SP_REGNUM) (const_int -8)))
++		     (reg:SI GP_REGNUM))
++		(set (mem (plus (reg:SI SP_REGNUM) (const_int -4)))
++		     (reg:SI LP_REGNUM))
++		(set (reg:SI SP_REGNUM)
++		     (plus (reg:SI SP_REGNUM) (const_int -32)))]) */
+ 
+   /* Calculate the number of registers that will be pushed.  */
+   extra_count = 0;
+-  if (save_fp)
++  if (save_fp_p)
+     extra_count++;
+-  if (save_gp)
++  if (save_gp_p)
+     extra_count++;
+-  if (save_lp)
++  if (save_lp_p)
+     extra_count++;
+   /* Note that Rb and Re may be SP_REGNUM.  DO NOT count it in.  */
+-  if (REGNO (Rb) == SP_REGNUM && REGNO (Re) == SP_REGNUM)
++  if (Rb == SP_REGNUM && Re == SP_REGNUM)
+     num_use_regs = extra_count;
+   else
+-    num_use_regs = REGNO (Re) - REGNO (Rb) + 1 + extra_count;
++    num_use_regs = Re - Rb + 1 + extra_count;
+ 
+   /* In addition to used registers,
+      we need one more space for (set sp sp-x) rtx.  */
+@@ -425,10 +834,10 @@ nds32_emit_stack_push_multiple (rtx Rb, rtx Re, rtx En4, bool vaarg_p)
+   offset = -(num_use_regs * 4);
+ 
+   /* Create (set mem regX) from Rb, Rb+1 up to Re.  */
+-  for (regno = REGNO (Rb); regno <= (int) REGNO (Re); regno++)
++  for (regno = Rb; regno <= Re; regno++)
+     {
+       /* Rb and Re may be SP_REGNUM.
+-         We need to break this loop immediately.  */
++	 We need to break this loop immediately.  */
+       if (regno == SP_REGNUM)
+ 	break;
+ 
+@@ -444,7 +853,7 @@ nds32_emit_stack_push_multiple (rtx Rb, rtx Re, rtx En4, bool vaarg_p)
+     }
+ 
+   /* Create (set mem fp), (set mem gp), and (set mem lp) if necessary.  */
+-  if (save_fp)
++  if (save_fp_p)
+     {
+       reg = gen_rtx_REG (SImode, FP_REGNUM);
+       mem = gen_frame_mem (SImode, plus_constant (Pmode,
+@@ -456,7 +865,7 @@ nds32_emit_stack_push_multiple (rtx Rb, rtx Re, rtx En4, bool vaarg_p)
+       offset = offset + 4;
+       par_index++;
+     }
+-  if (save_gp)
++  if (save_gp_p)
+     {
+       reg = gen_rtx_REG (SImode, GP_REGNUM);
+       mem = gen_frame_mem (SImode, plus_constant (Pmode,
+@@ -468,7 +877,7 @@ nds32_emit_stack_push_multiple (rtx Rb, rtx Re, rtx En4, bool vaarg_p)
+       offset = offset + 4;
+       par_index++;
+     }
+-  if (save_lp)
++  if (save_lp_p)
+     {
+       reg = gen_rtx_REG (SImode, LP_REGNUM);
+       mem = gen_frame_mem (SImode, plus_constant (Pmode,
+@@ -514,14 +923,14 @@ nds32_emit_stack_push_multiple (rtx Rb, rtx Re, rtx En4, bool vaarg_p)
+      "pop registers from memory",
+      "adjust stack pointer".  */
+ static void
+-nds32_emit_stack_pop_multiple (rtx Rb, rtx Re, rtx En4)
++nds32_emit_stack_pop_multiple (unsigned Rb, unsigned Re,
++			       bool save_fp_p, bool save_gp_p, bool save_lp_p)
+ {
+-  int regno;
++  unsigned regno;
+   int extra_count;
+   int num_use_regs;
+   int par_index;
+   int offset;
+-  int save_fp, save_gp, save_lp;
+ 
+   rtx reg;
+   rtx mem;
+@@ -534,39 +943,34 @@ nds32_emit_stack_pop_multiple (rtx Rb, rtx Re, rtx En4)
+      necessary information for data analysis,
+      so we create a parallel rtx like this:
+      (parallel [(set (reg:SI Rb)
+-                     (mem (reg:SI SP_REGNUM)))
+-                (set (reg:SI Rb+1)
+-                     (mem (plus (reg:SI SP_REGNUM) (const_int 4))))
+-                ...
+-                (set (reg:SI Re)
+-                     (mem (plus (reg:SI SP_REGNUM) (const_int 16))))
+-                (set (reg:SI FP_REGNUM)
+-                     (mem (plus (reg:SI SP_REGNUM) (const_int 20))))
+-                (set (reg:SI GP_REGNUM)
+-                     (mem (plus (reg:SI SP_REGNUM) (const_int 24))))
+-                (set (reg:SI LP_REGNUM)
+-                     (mem (plus (reg:SI SP_REGNUM) (const_int 28))))
+-                (set (reg:SI SP_REGNUM)
+-                     (plus (reg:SI SP_REGNUM) (const_int 32)))]) */
+-
+-  /* Determine whether we need to restore $fp, $gp, or $lp.  */
+-  save_fp = INTVAL (En4) & 0x8;
+-  save_gp = INTVAL (En4) & 0x4;
+-  save_lp = INTVAL (En4) & 0x2;
++		     (mem (reg:SI SP_REGNUM)))
++		(set (reg:SI Rb+1)
++		     (mem (plus (reg:SI SP_REGNUM) (const_int 4))))
++		...
++		(set (reg:SI Re)
++		     (mem (plus (reg:SI SP_REGNUM) (const_int 16))))
++		(set (reg:SI FP_REGNUM)
++		     (mem (plus (reg:SI SP_REGNUM) (const_int 20))))
++		(set (reg:SI GP_REGNUM)
++		     (mem (plus (reg:SI SP_REGNUM) (const_int 24))))
++		(set (reg:SI LP_REGNUM)
++		     (mem (plus (reg:SI SP_REGNUM) (const_int 28))))
++		(set (reg:SI SP_REGNUM)
++		     (plus (reg:SI SP_REGNUM) (const_int 32)))]) */
+ 
+   /* Calculate the number of registers that will be poped.  */
+   extra_count = 0;
+-  if (save_fp)
++  if (save_fp_p)
+     extra_count++;
+-  if (save_gp)
++  if (save_gp_p)
+     extra_count++;
+-  if (save_lp)
++  if (save_lp_p)
+     extra_count++;
+   /* Note that Rb and Re may be SP_REGNUM.  DO NOT count it in.  */
+-  if (REGNO (Rb) == SP_REGNUM && REGNO (Re) == SP_REGNUM)
++  if (Rb == SP_REGNUM && Re == SP_REGNUM)
+     num_use_regs = extra_count;
+   else
+-    num_use_regs = REGNO (Re) - REGNO (Rb) + 1 + extra_count;
++    num_use_regs = Re - Rb + 1 + extra_count;
+ 
+   /* In addition to used registers,
+      we need one more space for (set sp sp+x) rtx.  */
+@@ -578,10 +982,10 @@ nds32_emit_stack_pop_multiple (rtx Rb, rtx Re, rtx En4)
+   offset = 0;
+ 
+   /* Create (set regX mem) from Rb, Rb+1 up to Re.  */
+-  for (regno = REGNO (Rb); regno <= (int) REGNO (Re); regno++)
++  for (regno = Rb; regno <= Re; regno++)
+     {
+       /* Rb and Re may be SP_REGNUM.
+-         We need to break this loop immediately.  */
++	 We need to break this loop immediately.  */
+       if (regno == SP_REGNUM)
+ 	break;
+ 
+@@ -599,7 +1003,7 @@ nds32_emit_stack_pop_multiple (rtx Rb, rtx Re, rtx En4)
+     }
+ 
+   /* Create (set fp mem), (set gp mem), and (set lp mem) if necessary.  */
+-  if (save_fp)
++  if (save_fp_p)
+     {
+       reg = gen_rtx_REG (SImode, FP_REGNUM);
+       mem = gen_frame_mem (SImode, plus_constant (Pmode,
+@@ -613,7 +1017,7 @@ nds32_emit_stack_pop_multiple (rtx Rb, rtx Re, rtx En4)
+ 
+       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
+     }
+-  if (save_gp)
++  if (save_gp_p)
+     {
+       reg = gen_rtx_REG (SImode, GP_REGNUM);
+       mem = gen_frame_mem (SImode, plus_constant (Pmode,
+@@ -627,7 +1031,7 @@ nds32_emit_stack_pop_multiple (rtx Rb, rtx Re, rtx En4)
+ 
+       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
+     }
+-  if (save_lp)
++  if (save_lp_p)
+     {
+       reg = gen_rtx_REG (SImode, LP_REGNUM);
+       mem = gen_frame_mem (SImode, plus_constant (Pmode,
+@@ -670,12 +1074,11 @@ nds32_emit_stack_pop_multiple (rtx Rb, rtx Re, rtx En4)
+      "push registers to memory",
+      "adjust stack pointer".  */
+ static void
+-nds32_emit_stack_v3push (rtx Rb,
+-			 rtx Re,
+-			 rtx En4 ATTRIBUTE_UNUSED,
+-			 rtx imm8u)
++nds32_emit_stack_v3push (unsigned Rb,
++			 unsigned Re,
++			 unsigned imm8u)
+ {
+-  int regno;
++  unsigned regno;
+   int num_use_regs;
+   int par_index;
+   int offset;
+@@ -690,27 +1093,27 @@ nds32_emit_stack_v3push (rtx Rb,
+      necessary information for data analysis,
+      so we create a parallel rtx like this:
+      (parallel [(set (mem (plus (reg:SI SP_REGNUM) (const_int -32)))
+-                     (reg:SI Rb))
+-                (set (mem (plus (reg:SI SP_REGNUM) (const_int -28)))
+-                     (reg:SI Rb+1))
+-                ...
+-                (set (mem (plus (reg:SI SP_REGNUM) (const_int -16)))
+-                     (reg:SI Re))
+-                (set (mem (plus (reg:SI SP_REGNUM) (const_int -12)))
+-                     (reg:SI FP_REGNUM))
+-                (set (mem (plus (reg:SI SP_REGNUM) (const_int -8)))
+-                     (reg:SI GP_REGNUM))
+-                (set (mem (plus (reg:SI SP_REGNUM) (const_int -4)))
+-                     (reg:SI LP_REGNUM))
+-                (set (reg:SI SP_REGNUM)
+-                     (plus (reg:SI SP_REGNUM) (const_int -32-imm8u)))]) */
++		     (reg:SI Rb))
++		(set (mem (plus (reg:SI SP_REGNUM) (const_int -28)))
++		     (reg:SI Rb+1))
++		...
++		(set (mem (plus (reg:SI SP_REGNUM) (const_int -16)))
++		     (reg:SI Re))
++		(set (mem (plus (reg:SI SP_REGNUM) (const_int -12)))
++		     (reg:SI FP_REGNUM))
++		(set (mem (plus (reg:SI SP_REGNUM) (const_int -8)))
++		     (reg:SI GP_REGNUM))
++		(set (mem (plus (reg:SI SP_REGNUM) (const_int -4)))
++		     (reg:SI LP_REGNUM))
++		(set (reg:SI SP_REGNUM)
++		     (plus (reg:SI SP_REGNUM) (const_int -32-imm8u)))]) */
+ 
+   /* Calculate the number of registers that will be pushed.
+      Since $fp, $gp, and $lp is always pushed with v3push instruction,
+      we need to count these three registers.
+      Under v3push, Rb is $r6, while Re is $r6, $r8, $r10, or $r14.
+      So there is no need to worry about Rb=Re=SP_REGNUM case.  */
+-  num_use_regs = REGNO (Re) - REGNO (Rb) + 1 + 3;
++  num_use_regs = Re - Rb + 1 + 3;
+ 
+   /* In addition to used registers,
+      we need one more space for (set sp sp-x-imm8u) rtx.  */
+@@ -724,7 +1127,7 @@ nds32_emit_stack_v3push (rtx Rb,
+   /* Create (set mem regX) from Rb, Rb+1 up to Re.
+      Under v3push, Rb is $r6, while Re is $r6, $r8, $r10, or $r14.
+      So there is no need to worry about Rb=Re=SP_REGNUM case.  */
+-  for (regno = REGNO (Rb); regno <= (int) REGNO (Re); regno++)
++  for (regno = Rb; regno <= Re; regno++)
+     {
+       reg = gen_rtx_REG (SImode, regno);
+       mem = gen_frame_mem (SImode, plus_constant (Pmode,
+@@ -776,7 +1179,7 @@ nds32_emit_stack_v3push (rtx Rb,
+     = gen_rtx_SET (stack_pointer_rtx,
+ 		   plus_constant (Pmode,
+ 				  stack_pointer_rtx,
+-				  offset - INTVAL (imm8u)));
++				  offset - imm8u));
+   XVECEXP (parallel_insn, 0, par_index) = adjust_sp_rtx;
+   RTX_FRAME_RELATED_P (adjust_sp_rtx) = 1;
+ 
+@@ -794,12 +1197,11 @@ nds32_emit_stack_v3push (rtx Rb,
+      "pop registers from memory",
+      "adjust stack pointer".  */
+ static void
+-nds32_emit_stack_v3pop (rtx Rb,
+-			rtx Re,
+-			rtx En4 ATTRIBUTE_UNUSED,
+-			rtx imm8u)
++nds32_emit_stack_v3pop (unsigned Rb,
++			unsigned Re,
++			unsigned imm8u)
+ {
+-  int regno;
++  unsigned regno;
+   int num_use_regs;
+   int par_index;
+   int offset;
+@@ -815,27 +1217,27 @@ nds32_emit_stack_v3pop (rtx Rb,
+      necessary information for data analysis,
+      so we create a parallel rtx like this:
+      (parallel [(set (reg:SI Rb)
+-                     (mem (reg:SI SP_REGNUM)))
+-                (set (reg:SI Rb+1)
+-                     (mem (plus (reg:SI SP_REGNUM) (const_int 4))))
+-                ...
+-                (set (reg:SI Re)
+-                     (mem (plus (reg:SI SP_REGNUM) (const_int 16))))
+-                (set (reg:SI FP_REGNUM)
+-                     (mem (plus (reg:SI SP_REGNUM) (const_int 20))))
+-                (set (reg:SI GP_REGNUM)
+-                     (mem (plus (reg:SI SP_REGNUM) (const_int 24))))
+-                (set (reg:SI LP_REGNUM)
+-                     (mem (plus (reg:SI SP_REGNUM) (const_int 28))))
+-                (set (reg:SI SP_REGNUM)
+-                     (plus (reg:SI SP_REGNUM) (const_int 32+imm8u)))]) */
++		     (mem (reg:SI SP_REGNUM)))
++		(set (reg:SI Rb+1)
++		     (mem (plus (reg:SI SP_REGNUM) (const_int 4))))
++		...
++		(set (reg:SI Re)
++		     (mem (plus (reg:SI SP_REGNUM) (const_int 16))))
++		(set (reg:SI FP_REGNUM)
++		     (mem (plus (reg:SI SP_REGNUM) (const_int 20))))
++		(set (reg:SI GP_REGNUM)
++		     (mem (plus (reg:SI SP_REGNUM) (const_int 24))))
++		(set (reg:SI LP_REGNUM)
++		     (mem (plus (reg:SI SP_REGNUM) (const_int 28))))
++		(set (reg:SI SP_REGNUM)
++		     (plus (reg:SI SP_REGNUM) (const_int 32+imm8u)))]) */
+ 
+   /* Calculate the number of registers that will be poped.
+      Since $fp, $gp, and $lp is always poped with v3pop instruction,
+      we need to count these three registers.
+      Under v3push, Rb is $r6, while Re is $r6, $r8, $r10, or $r14.
+      So there is no need to worry about Rb=Re=SP_REGNUM case.  */
+-  num_use_regs = REGNO (Re) - REGNO (Rb) + 1 + 3;
++  num_use_regs = Re - Rb + 1 + 3;
+ 
+   /* In addition to used registers,
+      we need one more space for (set sp sp+x+imm8u) rtx.  */
+@@ -849,7 +1251,7 @@ nds32_emit_stack_v3pop (rtx Rb,
+   /* Create (set regX mem) from Rb, Rb+1 up to Re.
+      Under v3pop, Rb is $r6, while Re is $r6, $r8, $r10, or $r14.
+      So there is no need to worry about Rb=Re=SP_REGNUM case.  */
+-  for (regno = REGNO (Rb); regno <= (int) REGNO (Re); regno++)
++  for (regno = Rb; regno <= Re; regno++)
+     {
+       reg = gen_rtx_REG (SImode, regno);
+       mem = gen_frame_mem (SImode, plus_constant (Pmode,
+@@ -907,11 +1309,24 @@ nds32_emit_stack_v3pop (rtx Rb,
+     = gen_rtx_SET (stack_pointer_rtx,
+ 		   plus_constant (Pmode,
+ 				  stack_pointer_rtx,
+-				  offset + INTVAL (imm8u)));
++				  offset + imm8u));
+   XVECEXP (parallel_insn, 0, par_index) = adjust_sp_rtx;
+ 
+-  /* Tell gcc we adjust SP in this insn.  */
+-  dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, copy_rtx (adjust_sp_rtx), dwarf);
++  if (frame_pointer_needed)
++    {
++      /* (expr_list:REG_CFA_DEF_CFA (plus:SI (reg/f:SI $sp)
++					     (const_int 0))
++	 mean reset frame pointer to $sp and reset to offset 0.  */
++      rtx cfa_adjust_rtx = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
++					 const0_rtx);
++      dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf);
++    }
++  else
++    {
++      /* Tell gcc we adjust SP in this insn.  */
++      dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA,
++			      copy_rtx (adjust_sp_rtx), dwarf);
++    }
+ 
+   parallel_insn = emit_insn (parallel_insn);
+ 
+@@ -924,6 +1339,32 @@ nds32_emit_stack_v3pop (rtx Rb,
+   REG_NOTES (parallel_insn) = dwarf;
+ }
+ 
++static void
++nds32_emit_load_gp (void)
++{
++  rtx got_symbol, pat;
++
++  /* Initial GLOBAL OFFSET TABLE don't do the scheduling.  */
++  emit_insn (gen_blockage ());
++
++  got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
++  /* sethi $gp, _GLOBAL_OFFSET_TABLE_ -8 */
++  pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, got_symbol), UNSPEC_GOTINIT);
++  pat = gen_rtx_CONST (SImode, gen_rtx_PLUS (Pmode, pat, GEN_INT (-8)));
++  emit_insn (gen_sethi (pic_offset_table_rtx,pat));
++
++  /* ori $gp, $gp, _GLOBAL_OFFSET_TABLE_ -4 */
++  pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, got_symbol), UNSPEC_GOTINIT);
++  pat = gen_rtx_CONST (SImode, gen_rtx_PLUS (Pmode, pat, GEN_INT (-4)));
++  emit_insn (gen_lo_sum (pic_offset_table_rtx, pic_offset_table_rtx, pat));
++
++  /* add5.pc $gp */
++  emit_insn (gen_add_pc (pic_offset_table_rtx, pic_offset_table_rtx));
++
++  /* Initial GLOBAL OFFSET TABLE don't do the scheduling.  */
++  emit_insn (gen_blockage ());
++}
++
+ /* Function that may creates more instructions
+    for large value on adjusting stack pointer.
+ 
+@@ -933,79 +1374,70 @@ nds32_emit_stack_v3pop (rtx Rb,
+    the adjustment value is not able to be fit in the 'addi' instruction.
+    One solution is to move value into a register
+    and then use 'add' instruction.
+-   In practice, we use TA_REGNUM ($r15) to accomplish this purpose.
+-   Also, we need to return zero for sp adjustment so that
+-   proglogue/epilogue knows there is no need to create 'addi' instruction.  */
+-static int
+-nds32_force_addi_stack_int (int full_value)
++   In practice, we use TA_REGNUM ($r15) to accomplish this purpose.  */
++static void
++nds32_emit_adjust_frame (rtx to_reg, rtx from_reg, int adjust_value)
+ {
+-  int adjust_value;
+-
+   rtx tmp_reg;
+-  rtx sp_adjust_insn;
++  rtx frame_adjust_insn;
++  rtx adjust_value_rtx = GEN_INT (adjust_value);
+ 
+-  if (!satisfies_constraint_Is15 (GEN_INT (full_value)))
++  if (adjust_value == 0)
++    return;
++
++  if (!satisfies_constraint_Is15 (adjust_value_rtx))
+     {
+       /* The value is not able to fit in single addi instruction.
+-         Create more instructions of moving value into a register
+-         and then add stack pointer with it.  */
++	 Create more instructions of moving value into a register
++	 and then add stack pointer with it.  */
+ 
+       /* $r15 is going to be temporary register to hold the value.  */
+       tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+ 
+       /* Create one more instruction to move value
+-         into the temporary register.  */
+-      emit_move_insn (tmp_reg, GEN_INT (full_value));
++	 into the temporary register.  */
++      emit_move_insn (tmp_reg, adjust_value_rtx);
+ 
+       /* Create new 'add' rtx.  */
+-      sp_adjust_insn = gen_addsi3 (stack_pointer_rtx,
+-				   stack_pointer_rtx,
+-				   tmp_reg);
++      frame_adjust_insn = gen_addsi3 (to_reg,
++				      from_reg,
++				      tmp_reg);
+       /* Emit rtx into insn list and receive its transformed insn rtx.  */
+-      sp_adjust_insn = emit_insn (sp_adjust_insn);
+-
+-      /* At prologue, we need to tell GCC that this is frame related insn,
+-         so that we can consider this instruction to output debug information.
+-         If full_value is NEGATIVE, it means this function
+-         is invoked by expand_prologue.  */
+-      if (full_value < 0)
+-	{
+-	  /* Because (tmp_reg <- full_value) may be split into two
+-	     rtl patterns, we can not set its RTX_FRAME_RELATED_P.
+-	     We need to construct another (sp <- sp + full_value)
+-	     and then insert it into sp_adjust_insn's reg note to
+-	     represent a frame related expression.
+-	     GCC knows how to refer it and output debug information.  */
+-
+-	  rtx plus_rtx;
+-	  rtx set_rtx;
++      frame_adjust_insn = emit_insn (frame_adjust_insn);
+ 
+-	  plus_rtx = plus_constant (Pmode, stack_pointer_rtx, full_value);
+-	  set_rtx = gen_rtx_SET (stack_pointer_rtx, plus_rtx);
+-	  add_reg_note (sp_adjust_insn, REG_FRAME_RELATED_EXPR, set_rtx);
++      /* Because (tmp_reg <- full_value) may be split into two
++	 rtl patterns, we can not set its RTX_FRAME_RELATED_P.
++	 We need to construct another (sp <- sp + full_value)
++	 and then insert it into sp_adjust_insn's reg note to
++	 represent a frame related expression.
++	 GCC knows how to refer it and output debug information.  */
+ 
+-	  RTX_FRAME_RELATED_P (sp_adjust_insn) = 1;
+-	}
++      rtx plus_rtx;
++      rtx set_rtx;
+ 
+-      /* We have used alternative way to adjust stack pointer value.
+-         Return zero so that prologue/epilogue
+-         will not generate other instructions.  */
+-      return 0;
++      plus_rtx = plus_constant (Pmode, from_reg, adjust_value);
++      set_rtx = gen_rtx_SET (to_reg, plus_rtx);
++      add_reg_note (frame_adjust_insn, REG_FRAME_RELATED_EXPR, set_rtx);
+     }
+   else
+     {
+-      /* The value is able to fit in addi instruction.
+-         However, remember to make it to be positive value
+-         because we want to return 'adjustment' result.  */
+-      adjust_value = (full_value < 0) ? (-full_value) : (full_value);
+-
+-      return adjust_value;
++      /* Generate sp adjustment instruction if and only if sp_adjust != 0.  */
++      frame_adjust_insn = gen_addsi3 (to_reg,
++				      from_reg,
++				      adjust_value_rtx);
++      /* Emit rtx into instructions list and receive INSN rtx form.  */
++      frame_adjust_insn = emit_insn (frame_adjust_insn);
+     }
++
++    /* The insn rtx 'sp_adjust_insn' will change frame layout.
++       We need to use RTX_FRAME_RELATED_P so that GCC is able to
++       generate CFI (Call Frame Information) stuff.  */
++    RTX_FRAME_RELATED_P (frame_adjust_insn) = 1;
+ }
+ 
+ /* Return true if MODE/TYPE need double word alignment.  */
+ static bool
+-nds32_needs_double_word_align (machine_mode mode, const_tree type)
++nds32_needs_double_word_align (enum machine_mode mode, const_tree type)
+ {
+   unsigned int align;
+ 
+@@ -1015,18 +1447,25 @@ nds32_needs_double_word_align (machine_mode mode, const_tree type)
+   return (align > PARM_BOUNDARY);
+ }
+ 
+-/* Return true if FUNC is a naked function.  */
+-static bool
++bool
+ nds32_naked_function_p (tree func)
+ {
+-  tree t;
++  /* FOR BACKWARD COMPATIBILITY,
++     we need to support 'no_prologue' attribute as well.  */
++  tree t_naked;
++  tree t_no_prologue;
+ 
+   if (TREE_CODE (func) != FUNCTION_DECL)
+     abort ();
+ 
+-  t = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
++  /* We have to use lookup_attribute() to check attributes.
++     Because attr_naked_p and attr_no_prologue_p are set in
++     nds32_compute_stack_frame() and the function has not been
++     invoked yet.  */
++  t_naked       = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
++  t_no_prologue = lookup_attribute ("no_prologue", DECL_ATTRIBUTES (func));
+ 
+-  return (t != NULL_TREE);
++  return ((t_naked != NULL_TREE) || (t_no_prologue != NULL_TREE));
+ }
+ 
+ /* Function that check if 'X' is a valid address register.
+@@ -1035,7 +1474,7 @@ nds32_naked_function_p (tree func)
+ 
+    STRICT : true
+      => We are in reload pass or after reload pass.
+-        The register number should be strictly limited in general registers.
++	The register number should be strictly limited in general registers.
+ 
+    STRICT : false
+      => Before reload pass, we are free to use any register number.  */
+@@ -1058,10 +1497,10 @@ nds32_address_register_rtx_p (rtx x, bool strict)
+ /* Function that check if 'INDEX' is valid to be a index rtx for address.
+ 
+    OUTER_MODE : Machine mode of outer address rtx.
+-        INDEX : Check if this rtx is valid to be a index for address.
++	INDEX : Check if this rtx is valid to be a index for address.
+        STRICT : If it is true, we are in reload pass or after reload pass.  */
+ static bool
+-nds32_legitimate_index_p (machine_mode outer_mode,
++nds32_legitimate_index_p (enum machine_mode outer_mode,
+ 			  rtx index,
+ 			  bool strict)
+ {
+@@ -1074,7 +1513,7 @@ nds32_legitimate_index_p (machine_mode outer_mode,
+     case REG:
+       regno = REGNO (index);
+       /* If we are in reload pass or after reload pass,
+-         we need to limit it to general register.  */
++	 we need to limit it to general register.  */
+       if (strict)
+ 	return REGNO_OK_FOR_INDEX_P (regno);
+       else
+@@ -1082,45 +1521,73 @@ nds32_legitimate_index_p (machine_mode outer_mode,
+ 
+     case CONST_INT:
+       /* The alignment of the integer value is determined by 'outer_mode'.  */
+-      if (GET_MODE_SIZE (outer_mode) == 1)
++      switch (GET_MODE_SIZE (outer_mode))
+ 	{
++	case 1:
+ 	  /* Further check if the value is legal for the 'outer_mode'.  */
+-	  if (!satisfies_constraint_Is15 (index))
+-	    return false;
++	  if (satisfies_constraint_Is15 (index))
++	    return true;
++	  break;
+ 
+-	  /* Pass all test, the value is valid, return true.  */
+-	  return true;
+-	}
+-      if (GET_MODE_SIZE (outer_mode) == 2
+-	  && NDS32_HALF_WORD_ALIGN_P (INTVAL (index)))
+-	{
++	case 2:
+ 	  /* Further check if the value is legal for the 'outer_mode'.  */
+-	  if (!satisfies_constraint_Is16 (index))
+-	    return false;
++	  if (satisfies_constraint_Is16 (index))
++	    {
++	      /* If it is not under strictly aligned situation,
++		 we can return true without checking alignment.  */
++	      if (!cfun->machine->strict_aligned_p)
++		return true;
++	     /* Make sure address is half word alignment.  */
++	      else if (NDS32_HALF_WORD_ALIGN_P (INTVAL (index)))
++		return true;
++	    }
++	  break;
+ 
+-	  /* Pass all test, the value is valid, return true.  */
+-	  return true;
+-	}
+-      if (GET_MODE_SIZE (outer_mode) == 4
+-	  && NDS32_SINGLE_WORD_ALIGN_P (INTVAL (index)))
+-	{
++	case 4:
+ 	  /* Further check if the value is legal for the 'outer_mode'.  */
+-	  if (!satisfies_constraint_Is17 (index))
+-	    return false;
++	  if (satisfies_constraint_Is17 (index))
++	    {
++	      if ((TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE))
++		{
++		  if (!satisfies_constraint_Is14 (index))
++		    return false;
++		}
++
++	      /* If it is not under strictly aligned situation,
++		 we can return true without checking alignment.  */
++	      if (!cfun->machine->strict_aligned_p)
++		return true;
++	     /* Make sure address is word alignment.  */
++	      else if (NDS32_SINGLE_WORD_ALIGN_P (INTVAL (index)))
++		return true;
++	    }
++	  break;
+ 
+-	  /* Pass all test, the value is valid, return true.  */
+-	  return true;
+-	}
+-      if (GET_MODE_SIZE (outer_mode) == 8
+-	  && NDS32_SINGLE_WORD_ALIGN_P (INTVAL (index)))
+-	{
+-	  /* Further check if the value is legal for the 'outer_mode'.  */
+-	  if (!satisfies_constraint_Is17 (gen_int_mode (INTVAL (index) + 4,
+-							SImode)))
+-	    return false;
++	case 8:
++	  if (satisfies_constraint_Is17 (gen_int_mode (INTVAL (index) + 4,
++						       SImode)))
++	    {
++	      if ((TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE))
++		{
++		  if (!satisfies_constraint_Is14 (index))
++		    return false;
++		}
++
++	      /* If it is not under strictly aligned situation,
++		 we can return true without checking alignment.  */
++	      if (!cfun->machine->strict_aligned_p)
++		return true;
++	     /* Make sure address is word alignment.
++		Currently we do not have 64-bit load/store yet,
++		so we will use two 32-bit load/store instructions to do
++		memory access and they are single word alignment.  */
++	      else if (NDS32_SINGLE_WORD_ALIGN_P (INTVAL (index)))
++		return true;
++	    }
++	  break;
+ 
+-	  /* Pass all test, the value is valid, return true.  */
+-	  return true;
++	default:
++	  return false;
+ 	}
+ 
+       return false;
+@@ -1134,9 +1601,10 @@ nds32_legitimate_index_p (machine_mode outer_mode,
+ 	  int multiplier;
+ 	  multiplier = INTVAL (op1);
+ 
+-	  /* We only allow (mult reg const_int_1)
+-	     or (mult reg const_int_2) or (mult reg const_int_4).  */
+-	  if (multiplier != 1 && multiplier != 2 && multiplier != 4)
++	  /* We only allow (mult reg const_int_1), (mult reg const_int_2),
++	     (mult reg const_int_4) or (mult reg const_int_8).  */
++	  if (multiplier != 1 && multiplier != 2
++	      && multiplier != 4 && multiplier != 8)
+ 	    return false;
+ 
+ 	  regno = REGNO (op0);
+@@ -1161,8 +1629,9 @@ nds32_legitimate_index_p (machine_mode outer_mode,
+ 	  sv = INTVAL (op1);
+ 
+ 	  /* We only allow (ashift reg const_int_0)
+-	     or (ashift reg const_int_1) or (ashift reg const_int_2).  */
+-	  if (sv != 0 && sv != 1 && sv !=2)
++	     or (ashift reg const_int_1) or (ashift reg const_int_2) or
++	     (ashift reg const_int_3).  */
++	  if (sv != 0 && sv != 1 && sv !=2 && sv != 3)
+ 	    return false;
+ 
+ 	  regno = REGNO (op0);
+@@ -1181,18 +1650,302 @@ nds32_legitimate_index_p (machine_mode outer_mode,
+     }
+ }
+ 
++static void
++nds32_insert_innermost_loop (void)
++{
++  struct loop *loop;
++  basic_block *bbs, bb;
++
++  compute_bb_for_insn ();
++  /* initial loop structure */
++  loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
++
++  /* Scan all inner most loops.  */
++  FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
++    {
++      bbs = get_loop_body (loop);
++      bb = *bbs;
++      free (bbs);
++
++      emit_insn_before (gen_innermost_loop_begin (),
++			BB_HEAD (bb));
++
++      /* Find the final basic block in the loop.  */
++      while (bb)
++	{
++	  if (bb->next_bb == NULL)
++	    break;
++
++	  if (bb->next_bb->loop_father != loop)
++	    break;
++
++	  bb = bb->next_bb;
++	}
++
++      emit_insn_before (gen_innermost_loop_end (),
++			BB_END (bb));
++    }
++
++  /* release loop structre */
++  loop_optimizer_finalize ();
++}
++
++/* Insert isps for function with signature attribute.  */
++static void
++nds32_insert_isps (void)
++{
++  rtx_insn *insn;
++  unsigned first = 0;
++
++  if (!lookup_attribute ("signature", DECL_ATTRIBUTES (current_function_decl)))
++    return;
++
++  insn = get_insns ();
++  while (insn)
++    {
++      /* In order to ensure protect whole function, emit the first
++	 isps here rather than in prologue.*/
++      if (!first && INSN_P (insn))
++	{
++	  emit_insn_before (gen_unspec_signature_begin (), insn);
++	  first = 1;
++	}
++
++      if (LABEL_P (insn) || CALL_P (insn) || any_condjump_p (insn)
++	  || (INSN_P (insn) && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
++	      && (XINT (PATTERN (insn), 1) == UNSPEC_VOLATILE_SYSCALL
++		  || XINT (PATTERN (insn), 1) == UNSPEC_VOLATILE_TRAP
++		  || XINT (PATTERN (insn), 1) == UNSPEC_VOLATILE_TEQZ
++		  || XINT (PATTERN (insn), 1) == UNSPEC_VOLATILE_TNEZ)))
++	{
++	  emit_insn_after (gen_unspec_signature_begin (), insn);
++	}
++      insn = NEXT_INSN (insn);
++    }
++}
++
++static void
++nds32_register_pass (
++  rtl_opt_pass *(*make_pass_func) (gcc::context *),
++  enum pass_positioning_ops pass_pos,
++  const char *ref_pass_name)
++{
++  opt_pass *new_opt_pass = make_pass_func (g);
++
++  struct register_pass_info insert_pass =
++    {
++      new_opt_pass,	/* pass */
++      ref_pass_name,	/* reference_pass_name */
++      1,		/* ref_pass_instance_number */
++      pass_pos		/* po_op */
++    };
++
++  register_pass (&insert_pass);
++}
++
++static void
++nds32_register_pass (
++  gimple_opt_pass *(*make_pass_func) (gcc::context *),
++  enum pass_positioning_ops pass_pos,
++  const char *ref_pass_name)
++{
++  opt_pass *new_opt_pass = make_pass_func (g);
++
++  struct register_pass_info insert_pass =
++    {
++      new_opt_pass,	/* pass */
++      ref_pass_name,	/* reference_pass_name */
++      1,		/* ref_pass_instance_number */
++      pass_pos		/* po_op */
++    };
++
++  register_pass (&insert_pass);
++}
++
++/* This function is called from nds32_option_override ().
++   All new passes should be registered here.  */
++static void
++nds32_register_passes (void)
++{
++  nds32_register_pass (
++    make_pass_nds32_fp_as_gp,
++    PASS_POS_INSERT_BEFORE,
++    "ira");
++
++  nds32_register_pass (
++    make_pass_nds32_relax_opt,
++    PASS_POS_INSERT_AFTER,
++    "mach");
++
++  nds32_register_pass (
++    make_pass_nds32_load_store_opt,
++    PASS_POS_INSERT_AFTER,
++    "mach");
++
++  nds32_register_pass (
++    make_pass_nds32_soft_fp_arith_comm_opt,
++    PASS_POS_INSERT_BEFORE,
++    "mach");
++
++  nds32_register_pass (
++    make_pass_nds32_regrename_opt,
++    PASS_POS_INSERT_AFTER,
++    "mach");
++
++  nds32_register_pass (
++    make_pass_nds32_gcse_opt,
++    PASS_POS_INSERT_BEFORE,
++    "cprop_hardreg");
++
++  nds32_register_pass (
++    make_pass_nds32_cprop_acc_opt,
++    PASS_POS_INSERT_AFTER,
++    "cprop_hardreg");
++
++  nds32_register_pass (
++    make_pass_cprop_hardreg,
++    PASS_POS_INSERT_AFTER,
++    "mach");
++
++  nds32_register_pass (
++    make_pass_nds32_rename_lmwsmw_opt,
++    PASS_POS_INSERT_AFTER,
++    "jump2");
++
++  nds32_register_pass (
++    make_pass_nds32_gen_lmwsmw_opt,
++    PASS_POS_INSERT_BEFORE,
++    "peephole2");
++
++  nds32_register_pass (
++    make_pass_nds32_const_remater_opt,
++    PASS_POS_INSERT_BEFORE,
++    "ira");
++
++  nds32_register_pass (
++    make_pass_nds32_scalbn_transform_opt,
++    PASS_POS_INSERT_AFTER,
++    "optimized");
++
++  nds32_register_pass (
++    make_pass_nds32_sign_conversion_opt,
++    PASS_POS_INSERT_BEFORE,
++    "optimized");
++
++  nds32_register_pass (
++    make_pass_nds32_abi_compatible,
++    PASS_POS_INSERT_BEFORE,
++    "optimized");
++
++  nds32_register_pass (
++    nds32::scheduling::make_pass_nds32_print_stalls,
++    PASS_POS_INSERT_BEFORE,
++    "final");
++}
++
+ /* ------------------------------------------------------------------------ */
+ 
+-/* PART 3: Implement target hook stuff definitions.  */
++/* PART 4: Implement target hook stuff definitions.  */
++
++
++/* Computing the Length of an Insn.
++   Modifies the length assigned to instruction INSN.
++   LEN is the initially computed length of the insn.  */
++int
++nds32_adjust_insn_length (rtx_insn *insn, int length)
++{
++  int adjust_value = 0;
++  switch (recog_memoized (insn))
++    {
++    case CODE_FOR_call_internal:
++    case CODE_FOR_call_value_internal:
++      {
++	if (NDS32_ALIGN_P ())
++	  {
++	    rtx_insn *next_insn = next_active_insn (insn);
++	    if (next_insn && get_attr_length (next_insn) != 2)
++	      adjust_value += 2;
++	  }
++	/* We need insert a nop after a noretun function call
++	   to prevent software breakpoint corrupt the next function. */
++	if (find_reg_note (insn, REG_NORETURN, NULL_RTX))
++	  {
++	    if (TARGET_16_BIT)
++	      adjust_value += 2;
++	    else
++	      adjust_value += 4;
++	  }
++      }
++      return length + adjust_value;
++
++    default:
++      return length;
++    }
++}
++
++/* Storage Layout.  */
++
++/* This function will be called just before expansion into rtl.  */
++static void
++nds32_expand_to_rtl_hook (void)
++{
++  /* We need to set strictly aligned situation.
++     After that, the memory address checking in nds32_legitimate_address_p()
++     will take alignment offset into consideration so that it will not create
++     unaligned [base + offset] access during the rtl optimization.  */
++  cfun->machine->strict_aligned_p = 1;
++}
++
++
++/* Register Usage.  */
++
++static void
++nds32_conditional_register_usage (void)
++{
++  int regno;
++
++  if (TARGET_LINUX_ABI)
++    fixed_regs[TP_REGNUM] = 1;
++
++  if (TARGET_HARD_FLOAT)
++    {
++      for (regno = NDS32_FIRST_FPR_REGNUM;
++	   regno <= NDS32_LAST_FPR_REGNUM; regno++)
++	{
++	  fixed_regs[regno] = 0;
++	  if (regno < NDS32_FIRST_FPR_REGNUM + NDS32_MAX_FPR_REGS_FOR_ARGS)
++	    call_used_regs[regno] = 1;
++	  else if (regno >= NDS32_FIRST_FPR_REGNUM + 22
++		   && regno < NDS32_FIRST_FPR_REGNUM + 48)
++	    call_used_regs[regno] = 1;
++	  else
++	    call_used_regs[regno] = 0;
++	}
++    }
++  else if (TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE)
++    {
++      for (regno = NDS32_FIRST_FPR_REGNUM;
++	   regno <= NDS32_LAST_FPR_REGNUM;
++	   regno++)
++	fixed_regs[regno] = 0;
++    }
++}
++
+ 
+ /* Register Classes.  */
+ 
++static reg_class_t
++nds32_preferred_rename_class (reg_class_t rclass)
++{
++  return nds32_preferred_rename_class_impl (rclass);
++}
++
+ static unsigned char
+ nds32_class_max_nregs (reg_class_t rclass ATTRIBUTE_UNUSED,
+-		       machine_mode mode)
++		       enum machine_mode mode)
+ {
+   /* Return the maximum number of consecutive registers
+-     needed to represent "mode" in a register of "rclass".  */
++     needed to represent MODE in a register of RCLASS.  */
+   return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
+ }
+ 
+@@ -1200,9 +1953,24 @@ static int
+ nds32_register_priority (int hard_regno)
+ {
+   /* Encourage to use r0-r7 for LRA when optimize for size.  */
+-  if (optimize_size && hard_regno < 8)
+-    return 4;
+-  return 3;
++  if (optimize_size)
++    {
++      if (hard_regno < 8)
++	return 4;
++      else if (hard_regno < 16)
++	return 3;
++      else if (hard_regno < 28)
++	return 2;
++      else
++	return 1;
++    }
++  else
++    {
++      if (hard_regno > 27)
++	return 1;
++      else
++	return 4;
++    }
+ }
+ 
+ 
+@@ -1222,8 +1990,8 @@ nds32_register_priority (int hard_regno)
+        2. return address
+        3. callee-saved registers
+        4. <padding bytes> (we will calculte in nds32_compute_stack_frame()
+-                           and save it at
+-                           cfun->machine->callee_saved_area_padding_bytes)
++			   and save it at
++			   cfun->machine->callee_saved_area_padding_bytes)
+ 
+      [Block B]
+        1. local variables
+@@ -1241,29 +2009,29 @@ nds32_register_priority (int hard_regno)
+    By applying the basic frame/stack/argument pointers concept,
+    the layout of a stack frame shoule be like this:
+ 
+-                            |    |
++			    |    |
+        old stack pointer ->  ----
+-                            |    | \
+-                            |    |   saved arguments for
+-                            |    |   vararg functions
+-                            |    | /
++			    |    | \
++			    |    |   saved arguments for
++			    |    |   vararg functions
++			    |    | /
+       hard frame pointer ->   --
+       & argument pointer    |    | \
+-                            |    |   previous hardware frame pointer
+-                            |    |   return address
+-                            |    |   callee-saved registers
+-                            |    | /
+-           frame pointer ->   --
+-                            |    | \
+-                            |    |   local variables
+-                            |    |   and incoming arguments
+-                            |    | /
+-                              --
+-                            |    | \
+-                            |    |   outgoing
+-                            |    |   arguments
+-                            |    | /
+-           stack pointer ->  ----
++			    |    |   previous hardware frame pointer
++			    |    |   return address
++			    |    |   callee-saved registers
++			    |    | /
++	   frame pointer ->   --
++			    |    | \
++			    |    |   local variables
++			    |    |   and incoming arguments
++			    |    | /
++			      --
++			    |    | \
++			    |    |   outgoing
++			    |    |   arguments
++			    |    | /
++	   stack pointer ->  ----
+ 
+   $SFP and $AP are used to represent frame pointer and arguments pointer,
+   which will be both eliminated as hard frame pointer.  */
+@@ -1291,7 +2059,7 @@ nds32_can_eliminate (const int from_reg, const int to_reg)
+ /* -- Passing Arguments in Registers.  */
+ 
+ static rtx
+-nds32_function_arg (cumulative_args_t ca, machine_mode mode,
++nds32_function_arg (cumulative_args_t ca, enum machine_mode mode,
+ 		    const_tree type, bool named)
+ {
+   unsigned int regno;
+@@ -1306,7 +2074,7 @@ nds32_function_arg (cumulative_args_t ca, machine_mode mode,
+   if (!named)
+     {
+       /* If we are under hard float abi, we have arguments passed on the
+-         stack and all situation can be handled by GCC itself.  */
++	 stack and all situation can be handled by GCC itself.  */
+       if (TARGET_HARD_FLOAT)
+ 	return NULL_RTX;
+ 
+@@ -1320,7 +2088,7 @@ nds32_function_arg (cumulative_args_t ca, machine_mode mode,
+ 	}
+ 
+       /* No register available, return NULL_RTX.
+-         The compiler will use stack to pass argument instead.  */
++	 The compiler will use stack to pass argument instead.  */
+       return NULL_RTX;
+     }
+ 
+@@ -1329,14 +2097,34 @@ nds32_function_arg (cumulative_args_t ca, machine_mode mode,
+      are different.  */
+   if (TARGET_HARD_FLOAT)
+     {
+-      /* Currently we have not implemented hard float yet.  */
+-      gcc_unreachable ();
++      /* For TARGET_HARD_FLOAT calling convention, we use GPR and FPR
++	 to pass argument.  We have to further check TYPE and MODE so
++	 that we can determine which kind of register we shall use.  */
++
++      /* Note that we need to pass argument entirely in registers under
++	 hard float abi.  */
++      if (GET_MODE_CLASS (mode) == MODE_FLOAT
++	  && NDS32_ARG_ENTIRE_IN_FPR_REG_P (cum->fpr_offset, mode, type))
++	{
++	  /* Pick up the next available FPR register number.  */
++	  regno
++	    = NDS32_AVAILABLE_REGNUM_FOR_FPR_ARG (cum->fpr_offset, mode, type);
++	  return gen_rtx_REG (mode, regno);
++	}
++      else if (GET_MODE_CLASS (mode) != MODE_FLOAT
++	       && NDS32_ARG_ENTIRE_IN_GPR_REG_P (cum->gpr_offset, mode, type))
++	{
++	  /* Pick up the next available GPR register number.  */
++	  regno
++	    = NDS32_AVAILABLE_REGNUM_FOR_GPR_ARG (cum->gpr_offset, mode, type);
++	  return gen_rtx_REG (mode, regno);
++	}
+     }
+   else
+     {
+       /* For !TARGET_HARD_FLOAT calling convention, we always use GPR to pass
+-         argument.  Since we allow to pass argument partially in registers,
+-         we can just return it if there are still registers available.  */
++	 argument.  Since we allow to pass argument partially in registers,
++	 we can just return it if there are still registers available.  */
+       if (NDS32_ARG_PARTIAL_IN_GPR_REG_P (cum->gpr_offset, mode, type))
+ 	{
+ 	  /* Pick up the next available register number.  */
+@@ -1353,7 +2141,7 @@ nds32_function_arg (cumulative_args_t ca, machine_mode mode,
+ }
+ 
+ static bool
+-nds32_must_pass_in_stack (machine_mode mode, const_tree type)
++nds32_must_pass_in_stack (enum machine_mode mode, const_tree type)
+ {
+   /* Return true if a type must be passed in memory.
+      If it is NOT using hard float abi, small aggregates can be
+@@ -1366,7 +2154,7 @@ nds32_must_pass_in_stack (machine_mode mode, const_tree type)
+ }
+ 
+ static int
+-nds32_arg_partial_bytes (cumulative_args_t ca, machine_mode mode,
++nds32_arg_partial_bytes (cumulative_args_t ca, enum machine_mode mode,
+ 			 tree type, bool named ATTRIBUTE_UNUSED)
+ {
+   /* Returns the number of bytes at the beginning of an argument that
+@@ -1400,7 +2188,7 @@ nds32_arg_partial_bytes (cumulative_args_t ca, machine_mode mode,
+   remaining_reg_count
+     = NDS32_MAX_GPR_REGS_FOR_ARGS
+       - (NDS32_AVAILABLE_REGNUM_FOR_GPR_ARG (cum->gpr_offset, mode, type)
+-         - NDS32_GPR_ARG_FIRST_REGNUM);
++	 - NDS32_GPR_ARG_FIRST_REGNUM);
+ 
+   /* Note that we have to return the nubmer of bytes, not registers count.  */
+   if (needed_reg_count > remaining_reg_count)
+@@ -1410,26 +2198,23 @@ nds32_arg_partial_bytes (cumulative_args_t ca, machine_mode mode,
+ }
+ 
+ static void
+-nds32_function_arg_advance (cumulative_args_t ca, machine_mode mode,
++nds32_function_arg_advance (cumulative_args_t ca, enum machine_mode mode,
+ 			    const_tree type, bool named)
+ {
+-  machine_mode sub_mode;
+   CUMULATIVE_ARGS *cum = get_cumulative_args (ca);
+ 
+   if (named)
+     {
+       /* We need to further check TYPE and MODE so that we can determine
+-         which kind of register we shall advance.  */
+-      if (type && TREE_CODE (type) == COMPLEX_TYPE)
+-	sub_mode = TYPE_MODE (TREE_TYPE (type));
+-      else
+-	sub_mode = mode;
++	 which kind of register we shall advance.  */
+ 
+       /* Under hard float abi, we may advance FPR registers.  */
+-      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (sub_mode) == MODE_FLOAT)
++      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
+ 	{
+-	  /* Currently we have not implemented hard float yet.  */
+-	  gcc_unreachable ();
++	  cum->fpr_offset
++	    = NDS32_AVAILABLE_REGNUM_FOR_FPR_ARG (cum->fpr_offset, mode, type)
++	      - NDS32_FPR_ARG_FIRST_REGNUM
++	      + NDS32_NEED_N_REGS_FOR_ARG (mode, type);
+ 	}
+       else
+ 	{
+@@ -1442,9 +2227,9 @@ nds32_function_arg_advance (cumulative_args_t ca, machine_mode mode,
+   else
+     {
+       /* If this nameless argument is NOT under TARGET_HARD_FLOAT,
+-         we can advance next register as well so that caller is
+-         able to pass arguments in registers and callee must be
+-         in charge of pushing all of them into stack.  */
++	 we can advance next register as well so that caller is
++	 able to pass arguments in registers and callee must be
++	 in charge of pushing all of them into stack.  */
+       if (!TARGET_HARD_FLOAT)
+ 	{
+ 	  cum->gpr_offset
+@@ -1456,13 +2241,23 @@ nds32_function_arg_advance (cumulative_args_t ca, machine_mode mode,
+ }
+ 
+ static unsigned int
+-nds32_function_arg_boundary (machine_mode mode, const_tree type)
++nds32_function_arg_boundary (enum machine_mode mode, const_tree type)
+ {
+   return (nds32_needs_double_word_align (mode, type)
+ 	  ? NDS32_DOUBLE_WORD_ALIGNMENT
+ 	  : PARM_BOUNDARY);
+ }
+ 
++bool
++nds32_vector_mode_supported_p (enum machine_mode mode)
++{
++  if (mode == V4QImode
++      || mode == V2HImode)
++    return NDS32_EXT_DSP_P ();
++
++  return false;
++}
++
+ /* -- How Scalar Function Values Are Returned.  */
+ 
+ static rtx
+@@ -1470,28 +2265,68 @@ nds32_function_value (const_tree ret_type,
+ 		      const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
+ 		      bool outgoing ATTRIBUTE_UNUSED)
+ {
+-  machine_mode mode;
++  enum machine_mode mode;
+   int unsignedp;
+ 
+   mode = TYPE_MODE (ret_type);
+   unsignedp = TYPE_UNSIGNED (ret_type);
+ 
+-  mode = promote_mode (ret_type, mode, &unsignedp);
++  if (INTEGRAL_TYPE_P (ret_type))
++    mode = promote_mode (ret_type, mode, &unsignedp);
+ 
+-  return gen_rtx_REG (mode, NDS32_GPR_RET_FIRST_REGNUM);
++  if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
++    return gen_rtx_REG (mode, NDS32_FPR_RET_FIRST_REGNUM);
++  else
++    return gen_rtx_REG (mode, NDS32_GPR_RET_FIRST_REGNUM);
+ }
+ 
+ static rtx
+-nds32_libcall_value (machine_mode mode,
++nds32_libcall_value (enum machine_mode mode,
+ 		     const_rtx fun ATTRIBUTE_UNUSED)
+ {
++  if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
++    return gen_rtx_REG (mode, NDS32_FPR_RET_FIRST_REGNUM);
++
+   return gen_rtx_REG (mode, NDS32_GPR_RET_FIRST_REGNUM);
+ }
+ 
+ static bool
+ nds32_function_value_regno_p (const unsigned int regno)
+ {
+-  return (regno == NDS32_GPR_RET_FIRST_REGNUM);
++  if (regno == NDS32_GPR_RET_FIRST_REGNUM
++      || (TARGET_HARD_FLOAT
++	  && regno == NDS32_FPR_RET_FIRST_REGNUM))
++    return true;
++
++  return false;
++}
++
++/* -- How Large Values Are Returned.  */
++
++static bool
++nds32_return_in_memory (const_tree type,
++			const_tree fntype ATTRIBUTE_UNUSED)
++{
++  /* Note that int_size_in_bytes can return -1 if the size can vary
++     or is larger than an integer.  */
++  HOST_WIDE_INT size = int_size_in_bytes (type);
++
++  /* For COMPLEX_TYPE, if the total size cannot be hold within two registers,
++     the return value is supposed to be in memory.  We need to be aware of
++     that the size may be -1.  */
++  if (TREE_CODE (type) == COMPLEX_TYPE)
++    if (size < 0 || size > 2 * UNITS_PER_WORD)
++      return true;
++
++  /* If it is BLKmode and the total size cannot be hold within two registers,
++     the return value is supposed to be in memory.  We need to be aware of
++     that the size may be -1.  */
++  if (TYPE_MODE (type) == BLKmode)
++    if (size < 0 || size > 2 * UNITS_PER_WORD)
++      return true;
++
++  /* For other cases, having result in memory is unnecessary.  */
++  return false;
+ }
+ 
+ /* -- Function Entry and Exit.  */
+@@ -1522,7 +2357,7 @@ nds32_asm_function_prologue (FILE *file,
+   /* Use df_regs_ever_live_p() to detect if the register
+      is ever used in the current function.  */
+   fprintf (file, "\t! registers ever_live: ");
+-  for (r = 0; r < 32; r++)
++  for (r = 0; r < 65; r++)
+     {
+       if (df_regs_ever_live_p (r))
+ 	fprintf (file, "%s, ", reg_names[r]);
+@@ -1554,6 +2389,10 @@ nds32_asm_function_prologue (FILE *file,
+       attrs = TREE_CHAIN (attrs);
+     }
+   fputc ('\n', file);
++
++  /* If there is any critical isr in this file, disable linker ifc.  */
++  if (nds32_isr_function_critical_p (current_function_decl))
++    fprintf (file, "\t.no_relax ifc\n");
+ }
+ 
+ /* After rtl prologue has been expanded, this function is used.  */
+@@ -1561,56 +2400,12 @@ static void
+ nds32_asm_function_end_prologue (FILE *file)
+ {
+   fprintf (file, "\t! END PROLOGUE\n");
+-
+-  /* If frame pointer is NOT needed and -mfp-as-gp is issued,
+-     we can generate special directive: ".omit_fp_begin"
+-     to guide linker doing fp-as-gp optimization.
+-     However, for a naked function, which means
+-     it should not have prologue/epilogue,
+-     using fp-as-gp still requires saving $fp by push/pop behavior and
+-     there is no benefit to use fp-as-gp on such small function.
+-     So we need to make sure this function is NOT naked as well.  */
+-  if (!frame_pointer_needed
+-      && !cfun->machine->naked_p
+-      && cfun->machine->fp_as_gp_p)
+-    {
+-      fprintf (file, "\t! ----------------------------------------\n");
+-      fprintf (file, "\t! Guide linker to do "
+-		     "link time optimization: fp-as-gp\n");
+-      fprintf (file, "\t! We add one more instruction to "
+-		     "initialize $fp near to $gp location.\n");
+-      fprintf (file, "\t! If linker fails to use fp-as-gp transformation,\n");
+-      fprintf (file, "\t! this extra instruction should be "
+-		     "eliminated at link stage.\n");
+-      fprintf (file, "\t.omit_fp_begin\n");
+-      fprintf (file, "\tla\t$fp,_FP_BASE_\n");
+-      fprintf (file, "\t! ----------------------------------------\n");
+-    }
+ }
+ 
+ /* Before rtl epilogue has been expanded, this function is used.  */
+ static void
+ nds32_asm_function_begin_epilogue (FILE *file)
+ {
+-  /* If frame pointer is NOT needed and -mfp-as-gp is issued,
+-     we can generate special directive: ".omit_fp_end"
+-     to claim fp-as-gp optimization range.
+-     However, for a naked function,
+-     which means it should not have prologue/epilogue,
+-     using fp-as-gp still requires saving $fp by push/pop behavior and
+-     there is no benefit to use fp-as-gp on such small function.
+-     So we need to make sure this function is NOT naked as well.  */
+-  if (!frame_pointer_needed
+-      && !cfun->machine->naked_p
+-      && cfun->machine->fp_as_gp_p)
+-    {
+-      fprintf (file, "\t! ----------------------------------------\n");
+-      fprintf (file, "\t! Claim the range of fp-as-gp "
+-		     "link time optimization\n");
+-      fprintf (file, "\t.omit_fp_end\n");
+-      fprintf (file, "\t! ----------------------------------------\n");
+-    }
+-
+   fprintf (file, "\t! BEGIN EPILOGUE\n");
+ }
+ 
+@@ -1638,41 +2433,104 @@ nds32_asm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
+ 		? 1
+ 		: 0);
+ 
++  if (flag_pic)
++    {
++      fprintf (file, "\tsmw.adm\t$r31, [$r31], $r31, 4\n");
++      fprintf (file, "\tsethi\t%s, hi20(_GLOBAL_OFFSET_TABLE_-8)\n",
++		      reg_names [PIC_OFFSET_TABLE_REGNUM]);
++      fprintf (file, "\tori\t%s, %s, lo12(_GLOBAL_OFFSET_TABLE_-4)\n",
++		      reg_names [PIC_OFFSET_TABLE_REGNUM],
++		      reg_names [PIC_OFFSET_TABLE_REGNUM]);
++
++      if (TARGET_ISA_V3)
++	fprintf (file, "\tadd5.pc\t$gp\n");
++      else
++	{
++	  fprintf (file, "\tmfusr\t$ta, $pc\n");
++	  fprintf (file, "\tadd\t%s, $ta, %s\n",
++			  reg_names [PIC_OFFSET_TABLE_REGNUM],
++			  reg_names [PIC_OFFSET_TABLE_REGNUM]);
++	}
++    }
++
+   if (delta != 0)
+     {
+       if (satisfies_constraint_Is15 (GEN_INT (delta)))
+ 	{
+-	  fprintf (file, "\taddi\t$r%d, $r%d, %ld\n",
++	  fprintf (file, "\taddi\t$r%d, $r%d, " HOST_WIDE_INT_PRINT_DEC "\n",
+ 		   this_regno, this_regno, delta);
+ 	}
+       else if (satisfies_constraint_Is20 (GEN_INT (delta)))
+ 	{
+-	  fprintf (file, "\tmovi\t$ta, %ld\n", delta);
++	  fprintf (file, "\tmovi\t$ta, " HOST_WIDE_INT_PRINT_DEC "\n", delta);
+ 	  fprintf (file, "\tadd\t$r%d, $r%d, $ta\n", this_regno, this_regno);
+ 	}
+       else
+ 	{
+-	  fprintf (file, "\tsethi\t$ta, hi20(%ld)\n", delta);
+-	  fprintf (file, "\tori\t$ta, $ta, lo12(%ld)\n", delta);
++	  fprintf (file,
++		   "\tsethi\t$ta, hi20(" HOST_WIDE_INT_PRINT_DEC ")\n",
++		   delta);
++	  fprintf (file,
++		   "\tori\t$ta, $ta, lo12(" HOST_WIDE_INT_PRINT_DEC ")\n",
++		   delta);
+ 	  fprintf (file, "\tadd\t$r%d, $r%d, $ta\n", this_regno, this_regno);
+ 	}
+     }
+ 
+-  fprintf (file, "\tb\t");
+-  assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
+-  fprintf (file, "\n");
++  if (flag_pic)
++    {
++      fprintf (file, "\tla\t$ta, ");
++      assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
++      fprintf (file, "@PLT\n");
++      fprintf (file, "\t! epilogue\n");
++      fprintf (file, "\tlwi.bi\t%s, [%s], 4\n",
++	       reg_names[PIC_OFFSET_TABLE_REGNUM],
++	       reg_names[STACK_POINTER_REGNUM]);
++      fprintf (file, "\tbr\t$ta\n");
++    }
++  else
++    {
++      fprintf (file, "\tb\t");
++      assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
++      fprintf (file, "\n");
++    }
+ 
+   final_end_function ();
+ }
+ 
+ /* -- Permitting tail calls.  */
+ 
++/* Return true if it is ok to do sibling call optimization.  */
++static bool
++nds32_function_ok_for_sibcall (tree decl,
++			       tree exp ATTRIBUTE_UNUSED)
++{
++  /* The DECL is NULL if it is an indirect call.  */
++
++  /* 1. Do not apply sibling call if -mv3push is enabled,
++	because pop25 instruction also represents return behavior.
++     2. If this function is a isr function, do not apply sibling call
++	because it may perform the behavior that user does not expect.
++     3. If this function is a variadic function, do not apply sibling call
++	because the stack layout may be a mess.
++     4. We don't want to apply sibling call optimization for indirect
++	sibcall because the pop behavior in epilogue may pollute the
++	content of caller-saved regsiter when the register is used for
++	indirect sibcall.
++     5. In pic mode, it may use some registers for PLT call.  */
++  return (!TARGET_V3PUSH
++	  && !nds32_isr_function_p (current_function_decl)
++	  && (cfun->machine->va_args_size == 0)
++	  && decl
++	  && !flag_pic);
++}
++
+ /* Determine whether we need to enable warning for function return check.  */
+ static bool
+ nds32_warn_func_return (tree decl)
+ {
+-/* Naked functions are implemented entirely in assembly, including the
+-   return sequence, so suppress warnings about this.  */
++  /* Naked functions are implemented entirely in assembly, including the
++     return sequence, so suppress warnings about this.  */
+   return !nds32_naked_function_p (decl);
+ }
+ 
+@@ -1681,7 +2539,7 @@ nds32_warn_func_return (tree decl)
+ 
+ static void
+ nds32_setup_incoming_varargs (cumulative_args_t ca,
+-			      machine_mode mode,
++			      enum machine_mode mode,
+ 			      tree type,
+ 			      int *pretend_args_size,
+ 			      int second_time ATTRIBUTE_UNUSED)
+@@ -1795,7 +2653,7 @@ nds32_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+     sorry ("a nested function is not supported for reduced registers");
+ 
+   /* STEP 1: Copy trampoline code template into stack,
+-             fill up essential data into stack.  */
++	     fill up essential data into stack.  */
+ 
+   /* Extract nested function address rtx.  */
+   fnaddr = XEXP (DECL_RTL (fndecl), 0);
+@@ -1831,8 +2689,8 @@ nds32_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+       && (tramp_align_in_bytes % nds32_cache_block_size) == 0)
+     {
+       /* Under this condition, the starting address of trampoline
+-         must be aligned to the starting address of each cache block
+-         and we do not have to worry about cross-boundary issue.  */
++	 must be aligned to the starting address of each cache block
++	 and we do not have to worry about cross-boundary issue.  */
+       for (i = 0;
+ 	   i < (TRAMPOLINE_SIZE + nds32_cache_block_size - 1)
+ 	       / nds32_cache_block_size;
+@@ -1847,10 +2705,10 @@ nds32_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+   else if (TRAMPOLINE_SIZE > nds32_cache_block_size)
+     {
+       /* The starting address of trampoline code
+-         may not be aligned to the cache block,
+-         so the trampoline code may be across two cache block.
+-         We need to sync the last element, which is 4-byte size,
+-         of trampoline template.  */
++	 may not be aligned to the cache block,
++	 so the trampoline code may be across two cache block.
++	 We need to sync the last element, which is 4-byte size,
++	 of trampoline template.  */
+       for (i = 0;
+ 	   i < (TRAMPOLINE_SIZE + nds32_cache_block_size - 1)
+ 	       / nds32_cache_block_size;
+@@ -1871,16 +2729,16 @@ nds32_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+   else
+     {
+       /* This is the simplest case.
+-         Because TRAMPOLINE_SIZE is less than or
+-         equal to nds32_cache_block_size,
+-         we can just sync start address and
+-         the last element of trampoline code.  */
++	 Because TRAMPOLINE_SIZE is less than or
++	 equal to nds32_cache_block_size,
++	 we can just sync start address and
++	 the last element of trampoline code.  */
+ 
+       /* Sync starting address of tampoline code.  */
+       emit_move_insn (tmp_reg, sync_cache_addr);
+       emit_insn (isync_insn);
+       /* Sync the last element, which is 4-byte size,
+-         of trampoline template.  */
++	 of trampoline template.  */
+       emit_move_insn (tmp_reg,
+ 		      plus_constant (Pmode, sync_cache_addr,
+ 				     TRAMPOLINE_SIZE - 4));
+@@ -1896,11 +2754,52 @@ nds32_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+ /* Addressing Modes.  */
+ 
+ static bool
+-nds32_legitimate_address_p (machine_mode mode, rtx x, bool strict)
++nds32_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
+ {
++  if (TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE)
++    {
++     /* When using floating-point instructions,
++	we don't allow 'addr' to be [symbol_ref], [CONST] pattern.  */
++      if ((mode == DFmode || mode == SFmode)
++	  && (GET_CODE (x) == SYMBOL_REF
++	  || GET_CODE(x) == CONST))
++	return false;
++
++      /* Allow [post_modify] addressing mode, when using FPU instructions.  */
++      if (GET_CODE (x) == POST_MODIFY
++	  && mode == DFmode)
++	{
++	  if (GET_CODE (XEXP (x, 0)) == REG
++	      && GET_CODE (XEXP (x, 1)) == PLUS)
++	    {
++	      rtx plus_op = XEXP (x, 1);
++	      rtx op0 = XEXP (plus_op, 0);
++	      rtx op1 = XEXP (plus_op, 1);
++
++	      if (nds32_address_register_rtx_p (op0, strict)
++		  && CONST_INT_P (op1))
++		{
++		  if (satisfies_constraint_Is14 (op1))
++		    {
++		      /* If it is not under strictly aligned situation,
++			 we can return true without checking alignment.  */
++		      if (!cfun->machine->strict_aligned_p)
++			return true;
++		     /* Make sure address is word alignment.
++			Currently we do not have 64-bit load/store yet,
++			so we will use two 32-bit load/store instructions to do
++			memory access and they are single word alignment.  */
++		      else if (NDS32_SINGLE_WORD_ALIGN_P (INTVAL (op1)))
++			return true;
++		    }
++		}
++	    }
++	}
++    }
++
+   /* For (mem:DI addr) or (mem:DF addr) case,
+      we only allow 'addr' to be [reg], [symbol_ref],
+-                                [const], or [reg + const_int] pattern.  */
++				[const], or [reg + const_int] pattern.  */
+   if (mode == DImode || mode == DFmode)
+     {
+       /* Allow [Reg + const_int] addressing mode.  */
+@@ -1910,13 +2809,19 @@ nds32_legitimate_address_p (machine_mode mode, rtx x, bool strict)
+ 	      && nds32_legitimate_index_p (mode, XEXP (x, 1), strict)
+ 	      && CONST_INT_P (XEXP (x, 1)))
+ 	    return true;
+-
+ 	  else if (nds32_address_register_rtx_p (XEXP (x, 1), strict)
+ 		   && nds32_legitimate_index_p (mode, XEXP (x, 0), strict)
+ 		   && CONST_INT_P (XEXP (x, 0)))
+ 	    return true;
+ 	}
+ 
++      /* Allow [post_inc] and [post_dec] addressing mode.  */
++      if (GET_CODE (x) == POST_INC || GET_CODE (x) == POST_DEC)
++	{
++	  if (nds32_address_register_rtx_p (XEXP (x, 0), strict))
++	    return true;
++	}
++
+       /* Now check [reg], [symbol_ref], and [const].  */
+       if (GET_CODE (x) != REG
+ 	  && GET_CODE (x) != SYMBOL_REF
+@@ -1933,18 +2838,26 @@ nds32_legitimate_address_p (machine_mode mode, rtx x, bool strict)
+ 
+     case SYMBOL_REF:
+       /* (mem (symbol_ref A)) => [symbol_ref] */
++
++      if (flag_pic || SYMBOL_REF_TLS_MODEL (x))
++	return false;
++
++      if (TARGET_ICT_MODEL_LARGE && nds32_indirect_call_referenced_p (x))
++	return false;
++
+       /* If -mcmodel=large, the 'symbol_ref' is not a valid address
+-         during or after LRA/reload phase.  */
++	 during or after LRA/reload phase.  */
+       if (TARGET_CMODEL_LARGE
+ 	  && (reload_completed
+ 	      || reload_in_progress
+ 	      || lra_in_progress))
+ 	return false;
+       /* If -mcmodel=medium and the symbol references to rodata section,
+-         the 'symbol_ref' is not a valid address during or after
+-         LRA/reload phase.  */
++	 the 'symbol_ref' is not a valid address during or after
++	 LRA/reload phase.  */
+       if (TARGET_CMODEL_MEDIUM
+-	  && NDS32_SYMBOL_REF_RODATA_P (x)
++	  && (NDS32_SYMBOL_REF_RODATA_P (x)
++	      || CONSTANT_POOL_ADDRESS_P (x))
+ 	  && (reload_completed
+ 	      || reload_in_progress
+ 	      || lra_in_progress))
+@@ -1954,7 +2867,7 @@ nds32_legitimate_address_p (machine_mode mode, rtx x, bool strict)
+ 
+     case CONST:
+       /* (mem (const (...)))
+-         => [ + const_addr ], where const_addr = symbol_ref + const_int */
++	 => [ + const_addr ], where const_addr = symbol_ref + const_int */
+       if (GET_CODE (XEXP (x, 0)) == PLUS)
+ 	{
+ 	  rtx plus_op = XEXP (x, 0);
+@@ -1965,17 +2878,21 @@ nds32_legitimate_address_p (machine_mode mode, rtx x, bool strict)
+ 	  if (GET_CODE (op0) == SYMBOL_REF && CONST_INT_P (op1))
+ 	    {
+ 	      /* Now we see the [ + const_addr ] pattern, but we need
+-	         some further checking.  */
++		 some further checking.  */
++
++	      if (flag_pic)
++		return false;
++
+ 	      /* If -mcmodel=large, the 'const_addr' is not a valid address
+-	         during or after LRA/reload phase.  */
++		 during or after LRA/reload phase.  */
+ 	      if (TARGET_CMODEL_LARGE
+ 		  && (reload_completed
+ 		      || reload_in_progress
+ 		      || lra_in_progress))
+ 		return false;
+ 	      /* If -mcmodel=medium and the symbol references to rodata section,
+-	         the 'const_addr' is not a valid address during or after
+-	         LRA/reload phase.  */
++		 the 'const_addr' is not a valid address during or after
++		 LRA/reload phase.  */
+ 	      if (TARGET_CMODEL_MEDIUM
+ 		  && NDS32_SYMBOL_REF_RODATA_P (op0)
+ 		  && (reload_completed
+@@ -1993,9 +2910,9 @@ nds32_legitimate_address_p (machine_mode mode, rtx x, bool strict)
+ 
+     case POST_MODIFY:
+       /* (mem (post_modify (reg) (plus (reg) (reg))))
+-         => [Ra], Rb */
++	 => [Ra], Rb */
+       /* (mem (post_modify (reg) (plus (reg) (const_int))))
+-         => [Ra], const_int */
++	 => [Ra], const_int */
+       if (GET_CODE (XEXP (x, 0)) == REG
+ 	  && GET_CODE (XEXP (x, 1)) == PLUS)
+ 	{
+@@ -2018,7 +2935,7 @@ nds32_legitimate_address_p (machine_mode mode, rtx x, bool strict)
+       /* (mem (post_inc reg)) => [Ra], 1/2/4 */
+       /* (mem (post_dec reg)) => [Ra], -1/-2/-4 */
+       /* The 1/2/4 or -1/-2/-4 have been displayed in nds32.md.
+-         We only need to deal with register Ra.  */
++	 We only need to deal with register Ra.  */
+       if (nds32_address_register_rtx_p (XEXP (x, 0), strict))
+ 	return true;
+       else
+@@ -2026,11 +2943,11 @@ nds32_legitimate_address_p (machine_mode mode, rtx x, bool strict)
+ 
+     case PLUS:
+       /* (mem (plus reg const_int))
+-         => [Ra + imm] */
++	 => [Ra + imm] */
+       /* (mem (plus reg reg))
+-         => [Ra + Rb] */
++	 => [Ra + Rb] */
+       /* (mem (plus (mult reg const_int) reg))
+-         => [Ra + Rb << sv] */
++	 => [Ra + Rb << sv] */
+       if (nds32_address_register_rtx_p (XEXP (x, 0), strict)
+ 	  && nds32_legitimate_index_p (mode, XEXP (x, 1), strict))
+ 	return true;
+@@ -2042,39 +2959,292 @@ nds32_legitimate_address_p (machine_mode mode, rtx x, bool strict)
+ 
+     case LO_SUM:
+       /* (mem (lo_sum (reg) (symbol_ref))) */
+-      /* (mem (lo_sum (reg) (const))) */
+-      gcc_assert (REG_P (XEXP (x, 0)));
+-      if (GET_CODE (XEXP (x, 1)) == SYMBOL_REF
+-	  || GET_CODE (XEXP (x, 1)) == CONST)
+-	return nds32_legitimate_address_p (mode, XEXP (x, 1), strict);
+-      else
++      /* (mem (lo_sum (reg) (const (plus (symbol_ref) (reg)))) */
++      /* TLS case: (mem (lo_sum (reg) (const (unspec symbol_ref X)))) */
++      /* The LO_SUM is a valid address if and only if we would like to
++	 generate 32-bit full address memory access with any of following
++	 circumstance:
++	   1. -mcmodel=large.
++	   2. -mcmodel=medium and the symbol_ref references to rodata.  */
++      {
++	rtx sym = NULL_RTX;
++
++	if (flag_pic)
++	  return false;
++
++	if (!REG_P (XEXP (x, 0)))
++	  return false;
++
++	if (GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
++	  sym = XEXP (x, 1);
++	else if (GET_CODE (XEXP (x, 1)) == CONST)
++	  {
++	    rtx plus = XEXP(XEXP (x, 1), 0);
++	    if (GET_CODE (plus) == PLUS)
++	      sym = XEXP (plus, 0);
++	    else if (GET_CODE (plus) == UNSPEC)
++	      sym = XVECEXP (plus, 0, 0);
++	  }
++	else
++	  return false;
++
++	gcc_assert (GET_CODE (sym) == SYMBOL_REF);
++
++	if (TARGET_ICT_MODEL_LARGE
++	    && nds32_indirect_call_referenced_p (sym))
++	  return true;
++
++	if (TARGET_CMODEL_LARGE)
++	  return true;
++	else if (TARGET_CMODEL_MEDIUM
++		 && NDS32_SYMBOL_REF_RODATA_P (sym))
++	  return true;
++	else
++	  return false;
++      }
++
++    default:
++      return false;
++    }
++}
++
++static rtx
++nds32_legitimize_address (rtx x,
++			  rtx oldx ATTRIBUTE_UNUSED,
++			  enum machine_mode mode ATTRIBUTE_UNUSED)
++{
++  if (nds32_tls_referenced_p (x))
++    x = nds32_legitimize_tls_address (x);
++  else if (flag_pic && SYMBOLIC_CONST_P (x))
++    x = nds32_legitimize_pic_address (x);
++  else if (TARGET_ICT_MODEL_LARGE && nds32_indirect_call_referenced_p (x))
++    x = nds32_legitimize_ict_address (x);
++
++  return x;
++}
++
++static bool
++nds32_legitimate_constant_p (enum machine_mode mode, rtx x)
++{
++  switch (GET_CODE (x))
++    {
++    case CONST_DOUBLE:
++      if ((TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE)
++	  && (mode == DFmode || mode == SFmode))
++	return false;
++      break;
++    case CONST:
++      x = XEXP (x, 0);
++
++      if (GET_CODE (x) == PLUS)
++	{
++	  if (!CONST_INT_P (XEXP (x, 1)))
++	    return false;
++	  x = XEXP (x, 0);
++	}
++
++      if (GET_CODE (x) == UNSPEC)
++	{
++	  switch (XINT (x, 1))
++	    {
++	    case UNSPEC_GOT:
++	    case UNSPEC_GOTOFF:
++	    case UNSPEC_PLT:
++	    case UNSPEC_TLSGD:
++	    case UNSPEC_TLSLD:
++	    case UNSPEC_TLSIE:
++	    case UNSPEC_TLSLE:
++	    case UNSPEC_ICT:
++	      return false;
++	    default:
++	      return true;
++	    }
++	}
++      break;
++    case SYMBOL_REF:
++      /* TLS symbols need a call to resolve in
++	 precompute_register_parameters.  */
++      if (SYMBOL_REF_TLS_MODEL (x))
+ 	return false;
++      break;
++    default:
++      return true;
++    }
++
++  return true;
++}
++
++/* Reorgnize the UNSPEC CONST and return its direct symbol.  */
++static rtx
++nds32_delegitimize_address (rtx x)
++{
++  x = delegitimize_mem_from_attrs (x);
++
++  if (GET_CODE(x) == CONST)
++    {
++      rtx inner = XEXP (x, 0);
++
++      /* Handle for GOTOFF.  */
++      if (GET_CODE (inner) == PLUS)
++	inner = XEXP (inner, 0);
++
++      if (GET_CODE (inner) == UNSPEC)
++	{
++	  switch (XINT (inner, 1))
++	    {
++	    case UNSPEC_GOTINIT:
++	    case UNSPEC_GOT:
++	    case UNSPEC_GOTOFF:
++	    case UNSPEC_PLT:
++	    case UNSPEC_TLSGD:
++	    case UNSPEC_TLSLD:
++	    case UNSPEC_TLSIE:
++	    case UNSPEC_TLSLE:
++	    case UNSPEC_ICT:
++	      x = XVECEXP (inner, 0, 0);
++	      break;
++	    default:
++	      break;
++	    }
++	}
++    }
++  return x;
++}
++
++static enum machine_mode
++nds32_vectorize_preferred_simd_mode (enum machine_mode mode)
++{
++  if (!NDS32_EXT_DSP_P ())
++    return word_mode;
++
++  switch (mode)
++    {
++    case QImode:
++      return V4QImode;
++    case HImode:
++      return V2HImode;
++    default:
++      return word_mode;
++    }
++}
+ 
++static bool
++nds32_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
++{
++  switch (GET_CODE (x))
++    {
++    case CONST:
++      return !nds32_legitimate_constant_p (mode, x);
++    case SYMBOL_REF:
++      /* All symbols have to be accessed through gp-relative in PIC mode.  */
++      /* We don't want to force symbol as constant pool in .text section,
++	 because we use the gp-relatived instruction to load in small
++	 or medium model.  */
++      if (flag_pic
++	  || SYMBOL_REF_TLS_MODEL (x)
++	  || TARGET_CMODEL_SMALL
++	  || TARGET_CMODEL_MEDIUM)
++	return true;
++      break;
++    case CONST_INT:
++    case CONST_DOUBLE:
++      if (flag_pic && (lra_in_progress || reload_completed))
++	return true;
++      break;
+     default:
+       return false;
+     }
++  return false;
++}
++
++
++/* Condition Code Status.  */
++
++/* -- Representation of condition codes using registers.  */
++
++static void
++nds32_canonicalize_comparison (int *code,
++			       rtx *op0 ATTRIBUTE_UNUSED,
++			       rtx *op1,
++			       bool op0_preserve_value ATTRIBUTE_UNUSED)
++{
++  /* When the instruction combination pass tries to combine a comparison insn
++     with its previous insns, it also transforms the operator in order to
++     minimize its constant field.  For example, it tries to transform a
++     comparison insn from
++       (set (reg:SI 54)
++	   (ltu:SI (reg:SI 52)
++	       (const_int 10 [0xa])))
++     to
++       (set (reg:SI 54)
++	   (leu:SI (reg:SI 52)
++	       (const_int 9 [0x9])))
++
++     However, the nds32 target only provides instructions supporting the LTU
++     operation directly, and the implementation of the pattern "cbranchsi4"
++     only expands the LTU form.  In order to handle the non-LTU operations
++     generated from passes other than the RTL expansion pass, we have to
++     implement this hook to revert those changes.  Since we only expand the LTU
++     operator in the RTL expansion pass, we might only need to handle the LEU
++     case, unless we find other optimization passes perform more aggressive
++     transformations.  */
++
++  if (*code == LEU && CONST_INT_P (*op1))
++    {
++      *op1 = gen_int_mode (INTVAL (*op1) + 1, SImode);
++      *code = LTU;
++    }
+ }
+ 
+ 
+ /* Describing Relative Costs of Operations.  */
+ 
+ static int
+-nds32_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
++nds32_register_move_cost (enum machine_mode mode,
+ 			  reg_class_t from,
+ 			  reg_class_t to)
+ {
+-  if (from == HIGH_REGS || to == HIGH_REGS)
+-    return 6;
++  /* In garywolf cpu, FPR to GPR is chaper than other cpu.  */
++  if (TARGET_PIPELINE_GRAYWOLF)
++    {
++      if (GET_MODE_SIZE (mode) == 8)
++	{
++	  /* DPR to GPR.  */
++	  if (from == FP_REGS && to != FP_REGS)
++	    return 3;
++	  /* GPR to DPR.  */
++	  if (from != FP_REGS && to == FP_REGS)
++	    return 2;
++	}
++      else
++	{
++	  if ((from == FP_REGS && to != FP_REGS)
++	      || (from != FP_REGS && to == FP_REGS))
++	    return 2;
++	}
++    }
+ 
+-  return 2;
++  if ((from == FP_REGS && to != FP_REGS)
++      || (from != FP_REGS && to == FP_REGS))
++    return 3;
++  else if (from == HIGH_REGS || to == HIGH_REGS)
++    return optimize_size ? 6 : 2;
++  else
++    return 2;
+ }
+ 
+ static int
+-nds32_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
++nds32_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+ 			reg_class_t rclass ATTRIBUTE_UNUSED,
+ 			bool in ATTRIBUTE_UNUSED)
+ {
+-  return 8;
++  /* Memory access is only need 1 cycle in our low-end processor,
++     however memory access is most 4-byte instruction,
++     so let it 8 for optimize_size, otherwise be 2.   */
++  if (nds32_memory_model_option == MEMORY_MODEL_FAST)
++    return optimize_size ? 8 : 4;
++  else
++    return 8;
+ }
+ 
+ /* This target hook describes the relative costs of RTL expressions.
+@@ -2094,7 +3264,7 @@ nds32_rtx_costs (rtx x,
+ 
+ static int
+ nds32_address_cost (rtx address,
+-		    machine_mode mode,
++		    enum machine_mode mode,
+ 		    addr_space_t as,
+ 		    bool speed)
+ {
+@@ -2102,6 +3272,55 @@ nds32_address_cost (rtx address,
+ }
+ 
+ 
++/* Adjusting the Instruction Scheduler.  */
++
++static int
++nds32_sched_issue_rate (void)
++{
++  switch (nds32_cpu_option)
++  {
++  case CPU_GRAYWOLF:
++  case CPU_PANTHER:
++    return 2;
++
++  default:
++    return 1;
++  }
++}
++
++static int
++nds32_sched_adjust_cost (rtx_insn *insn ATTRIBUTE_UNUSED, rtx link, rtx_insn *dep ATTRIBUTE_UNUSED, int cost)
++{
++  if (REG_NOTE_KIND (link) == REG_DEP_ANTI
++      || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
++    {
++      if (nds32_sched_issue_rate () > 1)
++        return 1;
++
++      return 0;
++    }
++
++  return cost;
++}
++
++static void
++nds32_set_sched_flags (spec_info_t spec_info ATTRIBUTE_UNUSED)
++{
++  if (!flag_reorg_out_of_order
++      || nds32_sched_issue_rate () < 2)
++    return;
++
++  unsigned int *flags = &(current_sched_info->flags);
++
++  // Disallow the sheculder to find inc/mem pairs and break dependencies by
++  // duplication address computations. Otherwise, after doing so, the
++  // scheduler will treat that the two insns can be issued at the same cycle
++  // so that the later insn isn't marked as TImode. It will result in a wrong
++  // behavior for out-of-order reorganization.
++  *flags |= DONT_BREAK_DEPENDENCIES;
++}
++
++
+ /* Dividing the Output into Sections (Texts, Data, . . . ).  */
+ 
+ /* If references to a symbol or a constant must be treated differently
+@@ -2150,17 +3369,56 @@ nds32_asm_file_start (void)
+ {
+   default_file_start ();
+ 
++  if (flag_pic)
++    fprintf (asm_out_file, "\t.pic\n");
++
+   /* Tell assembler which ABI we are using.  */
+   fprintf (asm_out_file, "\t! ABI version\n");
+-  fprintf (asm_out_file, "\t.abi_2\n");
++  if (TARGET_HARD_FLOAT)
++    fprintf (asm_out_file, "\t.abi_2fp_plus\n");
++  else
++    fprintf (asm_out_file, "\t.abi_2\n");
+ 
+   /* Tell assembler that this asm code is generated by compiler.  */
+   fprintf (asm_out_file, "\t! This asm file is generated by compiler\n");
+   fprintf (asm_out_file, "\t.flag\tverbatim\n");
+-  /* Give assembler the size of each vector for interrupt handler.  */
+-  fprintf (asm_out_file, "\t! This vector size directive is required "
+-			 "for checking inconsistency on interrupt handler\n");
+-  fprintf (asm_out_file, "\t.vec_size\t%d\n", nds32_isr_vector_size);
++
++  /* We need to provide the size of each vector for interrupt handler
++     under elf toolchain.  */
++  if (!TARGET_LINUX_ABI)
++    {
++      fprintf (asm_out_file, "\t! This vector size directive is required "
++			     "for checking inconsistency on interrupt handler\n");
++      fprintf (asm_out_file, "\t.vec_size\t%d\n", nds32_isr_vector_size);
++    }
++
++  /* If user enables '-mforce-fp-as-gp' or compiles programs with -Os,
++     the compiler may produce 'la $fp,_FP_BASE_' instruction
++     at prologue for fp-as-gp optimization.
++     We should emit weak reference of _FP_BASE_ to avoid undefined reference
++     in case user does not pass '--relax' option to linker.  */
++  if (!TARGET_LINUX_ABI && (TARGET_FORCE_FP_AS_GP || optimize_size))
++    {
++      fprintf (asm_out_file, "\t! This weak reference is required to do "
++			     "fp-as-gp link time optimization\n");
++      fprintf (asm_out_file, "\t.weak\t_FP_BASE_\n");
++    }
++  /* If user enables '-mifc', we should emit relaxation directive
++     to tell linker that this file is allowed to do ifc optimization.  */
++  if (TARGET_IFC)
++    {
++      fprintf (asm_out_file, "\t! This relaxation directive is required "
++			     "to do ifc link time optimization\n");
++      fprintf (asm_out_file, "\t.relax\tifc\n");
++    }
++  /* If user enables '-mex9', we should emit relaxation directive
++     to tell linker that this file is allowed to do ex9 optimization.  */
++  if (TARGET_EX9)
++    {
++      fprintf (asm_out_file, "\t! This relaxation directive is required "
++			     "to do ex9 link time optimization\n");
++      fprintf (asm_out_file, "\t.relax\tex9\n");
++    }
+ 
+   fprintf (asm_out_file, "\t! ------------------------------------\n");
+ 
+@@ -2171,6 +3429,53 @@ nds32_asm_file_start (void)
+   if (TARGET_ISA_V3M)
+     fprintf (asm_out_file, "\t! ISA family\t\t: %s\n", "V3M");
+ 
++  switch (nds32_cpu_option)
++    {
++    case CPU_N6:
++      fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "N6");
++      break;
++
++    case CPU_N7:
++      fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "N7");
++      break;
++
++    case CPU_N8:
++      fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "N8");
++      break;
++
++    case CPU_E8:
++      fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "E8");
++      break;
++
++    case CPU_N9:
++      fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "N9");
++      break;
++
++    case CPU_N10:
++      fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "N10");
++      break;
++
++    case CPU_GRAYWOLF:
++      fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "Graywolf");
++      break;
++
++    case CPU_N12:
++    case CPU_N13:
++      fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "N13");
++      break;
++
++    case CPU_PANTHER:
++      fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "Panther");
++      break;
++
++    case CPU_SIMPLE:
++      fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "SIMPLE");
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
+   if (TARGET_CMODEL_SMALL)
+     fprintf (asm_out_file, "\t! Code model\t\t: %s\n", "SMALL");
+   if (TARGET_CMODEL_MEDIUM)
+@@ -2181,6 +3486,15 @@ nds32_asm_file_start (void)
+   fprintf (asm_out_file, "\t! Endian setting\t: %s\n",
+ 			 ((TARGET_BIG_ENDIAN) ? "big-endian"
+ 					      : "little-endian"));
++  fprintf (asm_out_file, "\t! Use SP floating-point instruction\t: %s\n",
++			 ((TARGET_FPU_SINGLE) ? "Yes"
++					      : "No"));
++  fprintf (asm_out_file, "\t! Use DP floating-point instruction\t: %s\n",
++			 ((TARGET_FPU_DOUBLE) ? "Yes"
++					      : "No"));
++  fprintf (asm_out_file, "\t! ABI version\t\t: %s\n",
++			 ((TARGET_HARD_FLOAT) ? "ABI2FP+"
++					      : "ABI2"));
+ 
+   fprintf (asm_out_file, "\t! ------------------------------------\n");
+ 
+@@ -2188,8 +3502,14 @@ nds32_asm_file_start (void)
+ 			 ((TARGET_CMOV) ? "Yes"
+ 					: "No"));
+   fprintf (asm_out_file, "\t! Use performance extension\t: %s\n",
+-			 ((TARGET_PERF_EXT) ? "Yes"
++			 ((TARGET_EXT_PERF) ? "Yes"
+ 					    : "No"));
++  fprintf (asm_out_file, "\t! Use performance extension 2\t: %s\n",
++			 ((TARGET_EXT_PERF2) ? "Yes"
++					     : "No"));
++  fprintf (asm_out_file, "\t! Use string extension\t\t: %s\n",
++			 ((TARGET_EXT_STRING) ? "Yes"
++					      : "No"));
+ 
+   fprintf (asm_out_file, "\t! ------------------------------------\n");
+ 
+@@ -2203,10 +3523,18 @@ nds32_asm_file_start (void)
+ 			 ((TARGET_REDUCED_REGS) ? "Yes"
+ 						: "No"));
+ 
++  fprintf (asm_out_file, "\t! Support unaligned access\t\t: %s\n",
++			 (flag_unaligned_access ? "Yes"
++						: "No"));
++
+   fprintf (asm_out_file, "\t! ------------------------------------\n");
+ 
+   if (optimize_size)
+     fprintf (asm_out_file, "\t! Optimization level\t: -Os\n");
++  else if (optimize_fast)
++    fprintf (asm_out_file, "\t! Optimization level\t: -Ofast\n");
++  else if (optimize_debug)
++    fprintf (asm_out_file, "\t! Optimization level\t: -Og\n");
+   else
+     fprintf (asm_out_file, "\t! Optimization level\t: -O%d\n", optimize);
+ 
+@@ -2225,9 +3553,65 @@ nds32_asm_file_end (void)
+ {
+   nds32_asm_file_end_for_isr ();
+ 
++  /* The NDS32 Linux stack is mapped non-executable by default, so add a
++     .note.GNU-stack section.  */
++  if (TARGET_LINUX_ABI)
++    file_end_indicate_exec_stack ();
++
+   fprintf (asm_out_file, "\t! ------------------------------------\n");
+ }
+ 
++static bool
++nds32_asm_output_addr_const_extra (FILE *file, rtx x)
++{
++  if (GET_CODE (x) == UNSPEC)
++    {
++      switch (XINT (x, 1))
++	{
++	case UNSPEC_GOTINIT:
++	  output_addr_const (file, XVECEXP (x, 0, 0));
++	  break;
++	case UNSPEC_GOTOFF:
++	  output_addr_const (file, XVECEXP (x, 0, 0));
++	  fputs ("@GOTOFF", file);
++	  break;
++	case UNSPEC_GOT:
++	  output_addr_const (file, XVECEXP (x, 0, 0));
++	  fputs ("@GOT", file);
++	  break;
++	case UNSPEC_PLT:
++	  output_addr_const (file, XVECEXP (x, 0, 0));
++	  fputs ("@PLT", file);
++	  break;
++	case UNSPEC_TLSGD:
++	  output_addr_const (file, XVECEXP (x, 0, 0));
++	  fputs ("@TLSDESC", file);
++	  break;
++	case UNSPEC_TLSLD:
++	  output_addr_const (file, XVECEXP (x, 0, 0));
++	  fputs ("@TLSDESC", file);
++	  break;
++	case UNSPEC_TLSIE:
++	  output_addr_const (file, XVECEXP (x, 0, 0));
++	  fputs ("@GOTTPOFF", file);
++	  break;
++	case UNSPEC_TLSLE:
++	  output_addr_const (file, XVECEXP (x, 0, 0));
++	  fputs ("@TPOFF", file);
++	  break;
++	case UNSPEC_ICT:
++	  output_addr_const (file, XVECEXP (x, 0, 0));
++	  fputs ("@ICT", file);
++	  break;
++	default:
++	  return false;
++	}
++      return true;
++    }
++  else
++    return false;
++}
++
+ /* -- Output and Generation of Labels.  */
+ 
+ static void
+@@ -2243,7 +3627,15 @@ nds32_asm_globalize_label (FILE *stream, const char *name)
+ static void
+ nds32_print_operand (FILE *stream, rtx x, int code)
+ {
+-  int op_value;
++  HOST_WIDE_INT op_value = 0;
++  HOST_WIDE_INT one_position;
++  HOST_WIDE_INT zero_position;
++  bool pick_lsb_p = false;
++  bool pick_msb_p = false;
++  int regno;
++
++  if (CONST_INT_P (x))
++    op_value = INTVAL (x);
+ 
+   switch (code)
+     {
+@@ -2251,29 +3643,82 @@ nds32_print_operand (FILE *stream, rtx x, int code)
+       /* Do nothing special.  */
+       break;
+ 
+-    case 'V':
+-      /* 'x' is supposed to be CONST_INT, get the value.  */
++    case 'b':
++      /* Use exact_log2() to search the 0-bit position.  */
+       gcc_assert (CONST_INT_P (x));
+-      op_value = INTVAL (x);
++      zero_position = exact_log2 (~UINTVAL (x) & GET_MODE_MASK (SImode));
++      gcc_assert (zero_position != -1);
++      fprintf (stream, HOST_WIDE_INT_PRINT_DEC, zero_position);
+ 
+-      /* According to the Andes architecture,
+-         the system/user register index range is 0 ~ 1023.
+-         In order to avoid conflict between user-specified-integer value
+-         and enum-specified-register value,
+-         the 'enum nds32_intrinsic_registers' value
+-         in nds32_intrinsic.h starts from 1024.  */
+-      if (op_value < 1024 && op_value >= 0)
+-	{
+-	  /* If user gives integer value directly (0~1023),
+-	     we just print out the value.  */
+-	  fprintf (stream, "%d", op_value);
+-	}
+-      else if (op_value < 0
+-	       || op_value >= ((int) ARRAY_SIZE (nds32_intrinsic_register_names)
+-			       + 1024))
+-	{
+-	  /* The enum index value for array size is out of range.  */
+-	  error ("intrinsic register index is out of range");
++      /* No need to handle following process, so return immediately.  */
++      return;
++
++    case 'e':
++      gcc_assert (MEM_P (x)
++		  && GET_CODE (XEXP (x, 0)) == PLUS
++		  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT);
++      fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (XEXP (XEXP (x, 0), 1)));
++
++      /* No need to handle following process, so return immediately.  */
++      return;
++
++    case 'v':
++      gcc_assert (CONST_INT_P (x)
++		  && (INTVAL (x) == 0
++		      || INTVAL (x) == 8
++		      || INTVAL (x) == 16
++		      || INTVAL (x) == 24));
++      fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) / 8);
++
++      /* No need to handle following process, so return immediately.  */
++      return;
++
++    case 'B':
++      /* Use exact_log2() to search the 1-bit position.  */
++      gcc_assert (CONST_INT_P (x));
++      one_position = exact_log2 (UINTVAL (x) & GET_MODE_MASK (SImode));
++      gcc_assert (one_position != -1);
++      fprintf (stream, HOST_WIDE_INT_PRINT_DEC, one_position);
++
++      /* No need to handle following process, so return immediately.  */
++      return;
++
++    case 'L':
++      /* X is supposed to be REG rtx.  */
++      gcc_assert (REG_P (x));
++      /* Claim that we are going to pick LSB part of X.  */
++      pick_lsb_p = true;
++      break;
++
++    case 'H':
++      /* X is supposed to be REG rtx.  */
++      gcc_assert (REG_P (x));
++      /* Claim that we are going to pick MSB part of X.  */
++      pick_msb_p = true;
++      break;
++
++    case 'V':
++      /* X is supposed to be CONST_INT, get the value.  */
++      gcc_assert (CONST_INT_P (x));
++
++      /* According to the Andes architecture,
++	 the system/user register index range is 0 ~ 1023.
++	 In order to avoid conflict between user-specified-integer value
++	 and enum-specified-register value,
++	 the 'enum nds32_intrinsic_registers' value
++	 in nds32_intrinsic.h starts from 1024.  */
++      if (op_value < 1024 && op_value >= 0)
++	{
++	  /* If user gives integer value directly (0~1023),
++	     we just print out the value.  */
++	  fprintf (stream, HOST_WIDE_INT_PRINT_DEC, op_value);
++	}
++      else if (op_value < 0
++	       || op_value >= ((int) ARRAY_SIZE (nds32_intrinsic_register_names)
++			       + 1024))
++	{
++	  /* The enum index value for array size is out of range.  */
++	  error ("intrinsic register index is out of range");
+ 	}
+       else
+ 	{
+@@ -2286,6 +3731,45 @@ nds32_print_operand (FILE *stream, rtx x, int code)
+       /* No need to handle following process, so return immediately.  */
+       return;
+ 
++    case 'R': /* cctl valck  */
++      /* Note the cctl divide to 5 group and share the same name table.  */
++      if (op_value < 0 || op_value > 4)
++	error ("CCTL intrinsic function subtype out of range!");
++      fprintf (stream, "%s", nds32_cctl_names[op_value]);
++      return;
++
++    case 'T': /* cctl idxwbinv  */
++      /* Note the cctl divide to 5 group and share the same name table.  */
++      if (op_value < 0 || op_value > 4)
++	error ("CCTL intrinsic function subtype out of range!");
++      fprintf (stream, "%s", nds32_cctl_names[op_value + 4]);
++      return;
++
++    case 'U': /* cctl vawbinv  */
++      /* Note the cctl divide to 5 group and share the same name table.  */
++      if (op_value < 0 || op_value > 4)
++	error ("CCTL intrinsic function subtype out of range!");
++      fprintf (stream, "%s", nds32_cctl_names[op_value + 8]);
++      return;
++
++    case 'X': /* cctl idxread  */
++      /* Note the cctl divide to 5 group and share the same name table.  */
++      if (op_value < 0 || op_value > 4)
++	error ("CCTL intrinsic function subtype out of range!");
++      fprintf (stream, "%s", nds32_cctl_names[op_value + 12]);
++      return;
++
++    case 'W': /* cctl idxwitre  */
++      /* Note the cctl divide to 5 group and share the same name table.  */
++      if (op_value < 0 || op_value > 4)
++	error ("CCTL intrinsic function subtype out of range!");
++      fprintf (stream, "%s", nds32_cctl_names[op_value + 16]);
++      return;
++
++    case 'Z': /* dpref  */
++      fprintf (stream, "%s", nds32_dpref_names[op_value]);
++      return;
++
+     default :
+       /* Unknown flag.  */
+       output_operand_lossage ("invalid operand output code");
+@@ -2295,35 +3779,113 @@ nds32_print_operand (FILE *stream, rtx x, int code)
+   switch (GET_CODE (x))
+     {
+     case LABEL_REF:
++      output_addr_const (stream, x);
++      break;
++
+     case SYMBOL_REF:
+       output_addr_const (stream, x);
++
++      if (!TARGET_LINUX_ABI && nds32_indirect_call_referenced_p (x))
++	fprintf (stream, "@ICT");
++
+       break;
+ 
+     case REG:
++      /* Print a Double-precision register name.  */
++      if ((GET_MODE (x) == DImode || GET_MODE (x) == DFmode)
++	  && NDS32_IS_FPR_REGNUM (REGNO (x)))
++	{
++	  regno = REGNO (x);
++	  if (!NDS32_FPR_REGNO_OK_FOR_DOUBLE (regno))
++	    {
++	      output_operand_lossage ("invalid operand for code '%c'", code);
++	      break;
++	    }
++	  fprintf (stream, "$fd%d", (regno - NDS32_FIRST_FPR_REGNUM) >> 1);
++	  break;
++	}
++
++      /* Print LSB or MSB part of register pair if the
++	 constraint modifier 'L' or 'H' is specified.  */
++      if ((GET_MODE (x) == DImode || GET_MODE (x) == DFmode)
++	  && NDS32_IS_GPR_REGNUM (REGNO (x)))
++	{
++	  if ((pick_lsb_p && WORDS_BIG_ENDIAN)
++	      || (pick_msb_p && !WORDS_BIG_ENDIAN))
++	    {
++	      /* If we would like to print out LSB register under big-endian,
++		 or print out MSB register under little-endian, we need to
++		 increase register number.  */
++	      regno = REGNO (x);
++	      regno++;
++	      fputs (reg_names[regno], stream);
++	      break;
++	    }
++	}
++
+       /* Forbid using static chain register ($r16)
+-         on reduced-set registers configuration.  */
++	 on reduced-set registers configuration.  */
+       if (TARGET_REDUCED_REGS
+ 	  && REGNO (x) == STATIC_CHAIN_REGNUM)
+ 	sorry ("a nested function is not supported for reduced registers");
+ 
+       /* Normal cases, print out register name.  */
+-      fputs (reg_names[REGNO (x)], stream);
++      regno = REGNO (x);
++      fputs (reg_names[regno], stream);
+       break;
+ 
+     case MEM:
+       output_address (GET_MODE (x), XEXP (x, 0));
+       break;
+ 
++    case HIGH:
++      if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE)
++	{
++	  const REAL_VALUE_TYPE *rv;
++	  long val;
++	  gcc_assert (GET_MODE (x) == SFmode);
++
++	  rv = CONST_DOUBLE_REAL_VALUE (XEXP (x, 0));
++	  REAL_VALUE_TO_TARGET_SINGLE (*rv, val);
++
++	  fprintf (stream, "hi20(0x%lx)", val);
++	}
++      else
++	gcc_unreachable ();
++      break;
++
++    case CONST_DOUBLE:
++      const REAL_VALUE_TYPE *rv;
++      long val;
++      gcc_assert (GET_MODE (x) == SFmode);
++
++      rv = CONST_DOUBLE_REAL_VALUE (x);
++      REAL_VALUE_TO_TARGET_SINGLE (*rv, val);
++
++      fprintf (stream, "0x%lx", val);
++      break;
++
+     case CODE_LABEL:
+     case CONST_INT:
+     case CONST:
+       output_addr_const (stream, x);
+       break;
+ 
++    case CONST_VECTOR:
++      fprintf (stream, HOST_WIDE_INT_PRINT_HEX, const_vector_to_hwint (x));
++      break;
++
++    case LO_SUM:
++      /* This is a special case for inline assembly using memory address 'p'.
++	 The inline assembly code is expected to use pesudo instruction
++	 for the operand.  EX: la  */
++      output_addr_const (stream, XEXP(x, 1));
++      break;
++
+     default:
+       /* Generally, output_addr_const () is able to handle most cases.
+-         We want to see what CODE could appear,
+-         so we use gcc_unreachable() to stop it.  */
++	 We want to see what CODE could appear,
++	 so we use gcc_unreachable() to stop it.  */
+       debug_rtx (x);
+       gcc_unreachable ();
+       break;
+@@ -2331,7 +3893,9 @@ nds32_print_operand (FILE *stream, rtx x, int code)
+ }
+ 
+ static void
+-nds32_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x)
++nds32_print_operand_address (FILE *stream,
++			     machine_mode mode ATTRIBUTE_UNUSED,
++			     rtx x)
+ {
+   rtx op0, op1;
+ 
+@@ -2346,15 +3910,25 @@ nds32_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x)
+       fputs ("]", stream);
+       break;
+ 
++    case LO_SUM:
++      /* This is a special case for inline assembly using memory operand 'm'.
++	 The inline assembly code is expected to use pesudo instruction
++	 for the operand.  EX: [ls].[bhw]  */
++      fputs ("[ + ", stream);
++      op1 = XEXP (x, 1);
++      output_addr_const (stream, op1);
++      fputs ("]", stream);
++      break;
++
+     case REG:
+       /* Forbid using static chain register ($r16)
+-         on reduced-set registers configuration.  */
++	 on reduced-set registers configuration.  */
+       if (TARGET_REDUCED_REGS
+ 	  && REGNO (x) == STATIC_CHAIN_REGNUM)
+ 	sorry ("a nested function is not supported for reduced registers");
+ 
+       /* [Ra] */
+-      fprintf (stream, "[%s]", reg_names[REGNO (x)]);
++      fprintf (stream, "[%s + 0]", reg_names[REGNO (x)]);
+       break;
+ 
+     case PLUS:
+@@ -2362,13 +3936,13 @@ nds32_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x)
+       op1 = XEXP (x, 1);
+ 
+       /* Checking op0, forbid using static chain register ($r16)
+-         on reduced-set registers configuration.  */
++	 on reduced-set registers configuration.  */
+       if (TARGET_REDUCED_REGS
+ 	  && REG_P (op0)
+ 	  && REGNO (op0) == STATIC_CHAIN_REGNUM)
+ 	sorry ("a nested function is not supported for reduced registers");
+       /* Checking op1, forbid using static chain register ($r16)
+-         on reduced-set registers configuration.  */
++	 on reduced-set registers configuration.  */
+       if (TARGET_REDUCED_REGS
+ 	  && REG_P (op1)
+ 	  && REGNO (op1) == STATIC_CHAIN_REGNUM)
+@@ -2377,8 +3951,8 @@ nds32_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x)
+       if (REG_P (op0) && CONST_INT_P (op1))
+ 	{
+ 	  /* [Ra + imm] */
+-	  fprintf (stream, "[%s + (%d)]",
+-			   reg_names[REGNO (op0)], (int)INTVAL (op1));
++	  fprintf (stream, "[%s + (" HOST_WIDE_INT_PRINT_DEC ")]",
++			   reg_names[REGNO (op0)], INTVAL (op1));
+ 	}
+       else if (REG_P (op0) && REG_P (op1))
+ 	{
+@@ -2391,8 +3965,8 @@ nds32_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x)
+ 	  /* [Ra + Rb << sv]
+ 	     From observation, the pattern looks like:
+ 	     (plus:SI (mult:SI (reg:SI 58)
+-	                       (const_int 4 [0x4]))
+-	              (reg/f:SI 57)) */
++			       (const_int 4 [0x4]))
++		      (reg/f:SI 57)) */
+ 	  int sv;
+ 
+ 	  /* We need to set sv to output shift value.  */
+@@ -2402,6 +3976,8 @@ nds32_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x)
+ 	    sv = 1;
+ 	  else if (INTVAL (XEXP (op0, 1)) == 4)
+ 	    sv = 2;
++	  else if (INTVAL (XEXP (op0, 1)) == 8)
++	    sv = 3;
+ 	  else
+ 	    gcc_unreachable ();
+ 
+@@ -2410,6 +3986,20 @@ nds32_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x)
+ 			   reg_names[REGNO (XEXP (op0, 0))],
+ 			   sv);
+ 	}
++      else if (GET_CODE (op0) == ASHIFT && REG_P (op1))
++	{
++	  /* [Ra + Rb << sv]
++	     In normal, ASHIFT can be converted to MULT like above case.
++	     But when the address rtx does not go through canonicalize_address
++	     defined in fwprop, we'll need this case.  */
++	  int sv = INTVAL (XEXP (op0, 1));
++	  gcc_assert (sv <= 3 && sv >=0);
++
++	  fprintf (stream, "[%s + %s << %d]",
++		   reg_names[REGNO (op1)],
++		   reg_names[REGNO (XEXP (op0, 0))],
++		   sv);
++	}
+       else
+ 	{
+ 	  /* The control flow is not supposed to be here.  */
+@@ -2421,20 +4011,20 @@ nds32_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x)
+ 
+     case POST_MODIFY:
+       /* (post_modify (regA) (plus (regA) (regB)))
+-         (post_modify (regA) (plus (regA) (const_int)))
+-         We would like to extract
+-         regA and regB (or const_int) from plus rtx.  */
++	 (post_modify (regA) (plus (regA) (const_int)))
++	 We would like to extract
++	 regA and regB (or const_int) from plus rtx.  */
+       op0 = XEXP (XEXP (x, 1), 0);
+       op1 = XEXP (XEXP (x, 1), 1);
+ 
+       /* Checking op0, forbid using static chain register ($r16)
+-         on reduced-set registers configuration.  */
++	 on reduced-set registers configuration.  */
+       if (TARGET_REDUCED_REGS
+ 	  && REG_P (op0)
+ 	  && REGNO (op0) == STATIC_CHAIN_REGNUM)
+ 	sorry ("a nested function is not supported for reduced registers");
+       /* Checking op1, forbid using static chain register ($r16)
+-         on reduced-set registers configuration.  */
++	 on reduced-set registers configuration.  */
+       if (TARGET_REDUCED_REGS
+ 	  && REG_P (op1)
+ 	  && REGNO (op1) == STATIC_CHAIN_REGNUM)
+@@ -2449,8 +4039,8 @@ nds32_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x)
+       else if (REG_P (op0) && CONST_INT_P (op1))
+ 	{
+ 	  /* [Ra], imm */
+-	  fprintf (stream, "[%s], %d",
+-			   reg_names[REGNO (op0)], (int)INTVAL (op1));
++	  fprintf (stream, "[%s], " HOST_WIDE_INT_PRINT_DEC,
++			   reg_names[REGNO (op0)], INTVAL (op1));
+ 	}
+       else
+ 	{
+@@ -2466,7 +4056,7 @@ nds32_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x)
+       op0 = XEXP (x, 0);
+ 
+       /* Checking op0, forbid using static chain register ($r16)
+-         on reduced-set registers configuration.  */
++	 on reduced-set registers configuration.  */
+       if (TARGET_REDUCED_REGS
+ 	  && REG_P (op0)
+ 	  && REGNO (op0) == STATIC_CHAIN_REGNUM)
+@@ -2490,14 +4080,92 @@ nds32_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x)
+ 
+     default :
+       /* Generally, output_addr_const () is able to handle most cases.
+-         We want to see what CODE could appear,
+-         so we use gcc_unreachable() to stop it.  */
++	 We want to see what CODE could appear,
++	 so we use gcc_unreachable() to stop it.  */
+       debug_rtx (x);
+       gcc_unreachable ();
+       break;
+     }
+ }
+ 
++/* -- Assembler Commands for Exception Regions.  */
++
++static rtx
++nds32_dwarf_register_span (rtx reg)
++{
++  rtx dwarf_high, dwarf_low;
++  rtx dwarf_single;
++  enum machine_mode mode;
++  int regno;
++
++  mode = GET_MODE (reg);
++  regno = REGNO (reg);
++
++  /* We need to adjust dwarf register information for floating-point registers
++     rather than using default register number mapping.  */
++  if (regno >= NDS32_FIRST_FPR_REGNUM
++      && regno <= NDS32_LAST_FPR_REGNUM)
++    {
++      if (mode == DFmode || mode == SCmode)
++	{
++	  /* By default, GCC maps increasing register numbers to increasing
++	     memory locations, but paired FPRs in NDS32 target are always
++	     big-endian, i.e.:
++
++	       fd0 :  fs0   fs1
++		     (MSB) (LSB)
++
++	     We must return parallel rtx to represent such layout.  */
++	  dwarf_high = gen_rtx_REG (word_mode, regno);
++	  dwarf_low = gen_rtx_REG (word_mode, regno + 1);
++	  return gen_rtx_PARALLEL (VOIDmode,
++				   gen_rtvec (2, dwarf_low, dwarf_high));
++	}
++      else if (mode == DCmode)
++	{
++	  rtx dwarf_high_re = gen_rtx_REG (word_mode, regno);
++	  rtx dwarf_low_re = gen_rtx_REG (word_mode, regno + 1);
++	  rtx dwarf_high_im = gen_rtx_REG (word_mode, regno);
++	  rtx dwarf_low_im = gen_rtx_REG (word_mode, regno + 1);
++	  return gen_rtx_PARALLEL (VOIDmode,
++				   gen_rtvec (4, dwarf_low_re, dwarf_high_re,
++						 dwarf_high_im, dwarf_low_im));
++	}
++      else if (mode == SFmode || mode == SImode)
++	{
++	  /* Create new dwarf information with adjusted register number.  */
++	  dwarf_single = gen_rtx_REG (word_mode, regno);
++	  return gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, dwarf_single));
++	}
++      else
++	{
++	  /* We should not be here.  */
++	  gcc_unreachable ();
++	}
++    }
++
++  return NULL_RTX;
++}
++
++/* Map internal gcc register numbers to DWARF2 register numbers.  */
++
++unsigned int
++nds32_dbx_register_number (unsigned int regno)
++{
++  /* The nds32 port in GDB maintains a mapping between dwarf register
++     number and displayed register name.  For backward compatibility to
++     previous toolchain, currently our gdb still has four registers
++     (d0.l, d0.h, d1.l, and d1.h) between GPR and FPR while compiler
++     does not count those four registers in its register number table.
++     So we have to add 4 on its register number and then create new
++     dwarf information.  Hopefully we can discard such workaround
++     in the future.  */
++  if (NDS32_IS_FPR_REGNUM (regno))
++    return regno + 4;
++
++  return regno;
++}
++
+ 
+ /* Defining target-specific uses of __attribute__.  */
+ 
+@@ -2526,6 +4194,27 @@ nds32_merge_decl_attributes (tree olddecl, tree newdecl)
+ static void
+ nds32_insert_attributes (tree decl, tree *attributes)
+ {
++  /* A "indirect_call" function attribute implies "noinline" and "noclone"
++     for elf toolchain to support ROM patch mechanism.  */
++  if (TREE_CODE (decl) == FUNCTION_DECL
++      && lookup_attribute ("indirect_call", *attributes) != NULL)
++    {
++      tree new_attrs = *attributes;
++
++      if (TARGET_LINUX_ABI)
++	error("cannot use indirect_call attribute under linux toolchain");
++
++      if (lookup_attribute ("noinline", new_attrs) == NULL)
++	new_attrs = tree_cons (get_identifier ("noinline"), NULL, new_attrs);
++      if (lookup_attribute ("noclone", new_attrs) == NULL)
++	new_attrs = tree_cons (get_identifier ("noclone"), NULL, new_attrs);
++
++      if (!TREE_PUBLIC (decl))
++	error("indirect_call attribute can't apply for static function");
++
++      *attributes = new_attrs;
++    }
++
+   /* For function declaration, we need to check isr-specific attributes:
+        1. Call nds32_check_isr_attrs_conflict() to check any conflict.
+        2. Check valid integer value for interrupt/exception.
+@@ -2543,14 +4232,46 @@ nds32_insert_attributes (tree decl, tree *attributes)
+       nds32_check_isr_attrs_conflict (decl, func_attrs);
+ 
+       /* Now we are starting to check valid id value
+-         for interrupt/exception/reset.
+-         Note that we ONLY check its validity here.
+-         To construct isr vector information, it is still performed
+-         by nds32_construct_isr_vectors_information().  */
++	 for interrupt/exception/reset.
++	 Note that we ONLY check its validity here.
++	 To construct isr vector information, it is still performed
++	 by nds32_construct_isr_vectors_information().  */
+       intr  = lookup_attribute ("interrupt", func_attrs);
+       excp  = lookup_attribute ("exception", func_attrs);
+       reset = lookup_attribute ("reset", func_attrs);
+ 
++      /* The following code may use attribute arguments.  If there is no
++	 argument from source code, it will cause segmentation fault.
++	 Therefore, return dircetly and report error message later.  */
++      if ((intr && TREE_VALUE (intr) == NULL)
++	  || (excp && TREE_VALUE (excp) == NULL)
++	  || (reset && TREE_VALUE (reset) == NULL))
++	return;
++
++      /* ------------------------------------------------------------- */
++      /* FIXME:
++	 FOR BACKWARD COMPATIBILITY, we need to support following patterns:
++
++	     __attribute__((interrupt("XXX;YYY;id=ZZZ")))
++	     __attribute__((exception("XXX;YYY;id=ZZZ")))
++	     __attribute__((reset("vectors=XXX;nmi_func=YYY;warm_func=ZZZ")))
++
++	 If interrupt/exception/reset appears and its argument is a
++	 STRING_CST, we will use other functions to parse string in the
++	 nds32_construct_isr_vectors_information() and then set necessary
++	 isr information in the nds32_isr_vectors[] array.  Here we can
++	 just return immediately to avoid new-syntax checking.  */
++      if (intr != NULL_TREE
++	  && TREE_CODE (TREE_VALUE (TREE_VALUE (intr))) == STRING_CST)
++	return;
++      if (excp != NULL_TREE
++	  && TREE_CODE (TREE_VALUE (TREE_VALUE (excp))) == STRING_CST)
++	return;
++      if (reset != NULL_TREE
++	  && TREE_CODE (TREE_VALUE (TREE_VALUE (reset))) == STRING_CST)
++	return;
++      /* ------------------------------------------------------------- */
++
+       if (intr || excp)
+ 	{
+ 	  /* Deal with interrupt/exception.  */
+@@ -2576,8 +4297,8 @@ nds32_insert_attributes (tree decl, tree *attributes)
+ 	      id = TREE_VALUE (id_list);
+ 	      /* Issue error if it is not a valid integer value.  */
+ 	      if (TREE_CODE (id) != INTEGER_CST
+-		  || wi::ltu_p (id, lower_bound)
+-		  || wi::gtu_p (id, upper_bound))
++		  || TREE_INT_CST_LOW (id) < lower_bound
++		  || TREE_INT_CST_LOW (id) > upper_bound)
+ 		error ("invalid id value for interrupt/exception attribute");
+ 
+ 	      /* Advance to next id.  */
+@@ -2604,8 +4325,8 @@ nds32_insert_attributes (tree decl, tree *attributes)
+ 
+ 	  /* 3. Check valid integer value for reset.  */
+ 	  if (TREE_CODE (id) != INTEGER_CST
+-	      || wi::ltu_p (id, lower_bound)
+-	      || wi::gtu_p (id, upper_bound))
++	      || TREE_INT_CST_LOW (id) < lower_bound
++	      || TREE_INT_CST_LOW (id) > upper_bound)
+ 	    error ("invalid id value for reset attribute");
+ 
+ 	  /* 4. Check valid function for nmi/warm.  */
+@@ -2667,17 +4388,40 @@ nds32_option_override (void)
+     {
+       /* Under V2 ISA, we need to strictly disable TARGET_V3PUSH.  */
+       target_flags &= ~MASK_V3PUSH;
++      /* Under V2 ISA, we need to strictly disable TARGET_IFC.  */
++      target_flags &= ~MASK_IFC;
++      /* Under V2 ISA, we need to strictly disable TARGET_EX9.  */
++      target_flags &= ~MASK_EX9;
++      /* If this is ARCH_V2J, we need to enable TARGET_REDUCED_REGS.  */
++      if (nds32_arch_option == ARCH_V2J)
++	target_flags |= MASK_REDUCED_REGS;
+     }
+   if (TARGET_ISA_V3)
+     {
+-      /* Under V3 ISA, currently nothing should be strictly set.  */
++      /* If this is ARCH_V3J, we need to enable TARGET_REDUCED_REGS.  */
++      if (nds32_arch_option == ARCH_V3J)
++	target_flags |= MASK_REDUCED_REGS;
+     }
+   if (TARGET_ISA_V3M)
+     {
+       /* Under V3M ISA, we need to strictly enable TARGET_REDUCED_REGS.  */
+       target_flags |= MASK_REDUCED_REGS;
+-      /* Under V3M ISA, we need to strictly disable TARGET_PERF_EXT.  */
+-      target_flags &= ~MASK_PERF_EXT;
++      if (nds32_arch_option != ARCH_V3M_PLUS)
++	{
++	  /* Under V3M ISA, we need to strictly disable TARGET_IFC.  */
++	  target_flags &= ~MASK_IFC;
++	  /* Under V3M ISA, we need to strictly disable TARGET_EX9.  */
++	  target_flags &= ~MASK_EX9;
++	}
++      /* Under V3M ISA, we need to strictly disable TARGET_EXT_PERF.  */
++      target_flags &= ~MASK_EXT_PERF;
++      /* Under V3M ISA, we need to strictly disable TARGET_EXT_PERF2.  */
++      target_flags &= ~MASK_EXT_PERF2;
++      /* Under V3M ISA, we need to strictly disable TARGET_EXT_STRING.  */
++      target_flags &= ~MASK_EXT_STRING;
++
++      if (flag_pic)
++	error ("not support -fpic option for v3m toolchain");
+     }
+ 
+   /* See if we are using reduced-set registers:
+@@ -2688,48 +4432,568 @@ nds32_option_override (void)
+       int r;
+ 
+       /* Prevent register allocator from
+-         choosing it as doing register allocation.  */
++	 choosing it as doing register allocation.  */
+       for (r = 11; r <= 14; r++)
+ 	fixed_regs[r] = call_used_regs[r] = 1;
+       for (r = 16; r <= 27; r++)
+ 	fixed_regs[r] = call_used_regs[r] = 1;
+     }
+ 
++  /* See if user explicitly would like to use fp-as-gp optimization.
++     If so, we must prevent $fp from being allocated
++     during register allocation.  */
++  if (TARGET_FORCE_FP_AS_GP)
++    fixed_regs[FP_REGNUM] = call_used_regs[FP_REGNUM] = 1;
++
+   if (!TARGET_16_BIT)
+     {
+       /* Under no 16 bit ISA, we need to strictly disable TARGET_V3PUSH.  */
+       target_flags &= ~MASK_V3PUSH;
+     }
+ 
+-  /* Currently, we don't support PIC code generation yet.  */
+-  if (flag_pic)
+-    sorry ("not support -fpic");
++  if (TARGET_HARD_FLOAT && !(TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE))
++    {
++      if (nds32_arch_option == ARCH_V3S || nds32_arch_option == ARCH_V3F)
++	error ("Disable FPU ISA, "
++	       "the ABI option must be enable '-mfloat-abi=soft'");
++      else
++	error ("'-mabi=2fp+' option only support when FPU available, "
++	       "must be enable '-mext-fpu-sp' or '-mext-fpu-dp'");
++    }
++
++  nds32_register_passes ();
++
++  nds32_init_rtx_costs ();
+ }
+ 
+ 
+ /* Miscellaneous Parameters.  */
+ 
++static rtx_insn *
++nds32_md_asm_adjust (vec<rtx> &outputs ATTRIBUTE_UNUSED,
++		     vec<rtx> &inputs ATTRIBUTE_UNUSED,
++		     vec<const char *> &constraints ATTRIBUTE_UNUSED,
++		     vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
++{
++  clobbers.safe_push (gen_rtx_REG (SImode, TA_REGNUM));
++  SET_HARD_REG_BIT (clobbered_regs, TA_REGNUM);
++  return NULL;
++}
++/* Insert end_label and check loop body whether is empty.  */
++static bool
++nds32_hwloop_insert_end_label (rtx loop_id, rtx end_label)
++{
++  rtx_insn *insn = NULL;
++  basic_block bb;
++  rtx cfg_id;
++  rtx_insn *last_insn;
++
++  FOR_EACH_BB_FN (bb, cfun)
++    {
++      FOR_BB_INSNS (bb, insn)
++	{
++	  if (NOTE_P (insn))
++	    continue;
++
++	  if (recog_memoized (insn) == CODE_FOR_hwloop_cfg
++	      && INSN_P (insn))
++	    {
++	      cfg_id = XVECEXP (XVECEXP (PATTERN (insn), 0, 5), 0, 0);
++	      if (cfg_id == loop_id)
++		{
++		  for (last_insn = PREV_INSN (insn); last_insn != BB_HEAD (bb);
++		       last_insn = PREV_INSN (last_insn))
++		    {
++		      if (NONDEBUG_INSN_P (last_insn))
++			{
++			  emit_label_before (end_label, last_insn);
++			  if (TARGET_IFC)
++			    {
++			      /* The last_insn don't do ifcall.  */
++			      emit_insn_before (gen_no_ifc_begin (), last_insn);
++			      emit_insn_after (gen_no_ifc_end (), last_insn);
++			    }
++			  if (TARGET_EX9)
++			    {
++			      /* The last_insn don't do ex9.  */
++			      emit_insn_before (gen_no_ex9_begin (), last_insn);
++			      emit_insn_after (gen_no_ex9_end (), last_insn);
++			    }
++			  /* Record last instruction for identify in relax pass.  */
++			  emit_insn_after (gen_hwloop_last_insn (), last_insn);
++			  return true;
++			}
++		    }
++
++		  if (NOTE_INSN_BASIC_BLOCK_P (last_insn))
++		    {
++		      rtx_insn *nop = emit_insn_before (gen_unspec_nop (),
++							last_insn);
++		      emit_label_before (end_label, nop);
++		      if (TARGET_IFC)
++			{
++			  /* The last_insn don't do ifcall.  */
++			  emit_insn_before (gen_no_ifc_begin (), last_insn);
++			  emit_insn_after (gen_no_ifc_end (), last_insn);
++			}
++		      if (TARGET_EX9)
++			{
++			  /* The last_insn don't do ex9.  */
++			  emit_insn_before (gen_no_ex9_begin (), last_insn);
++			  emit_insn_after (gen_no_ex9_end (), last_insn);
++			}
++		      return true;
++		    }
++		}
++	    }
++	}
++    }
++
++  if (insn != NULL)
++    delete_insn (insn);
++  return false;
++}
++
++static void
++nds32_hwloop_remove (rtx loop_id)
++{
++  rtx_insn *insn;
++  rtx le_id;
++  basic_block bb;
++
++  FOR_EACH_BB_FN (bb, cfun)
++    {
++      FOR_BB_INSNS (bb, insn)
++	{
++	  if (NOTE_P (insn))
++	    continue;
++
++	  if (recog_memoized (insn) == CODE_FOR_init_lc
++	      && INSN_P (insn))
++	    {
++	      le_id = XVECEXP (XVECEXP (PATTERN (insn), 0, 1), 0, 0);
++	      if (loop_id == le_id)
++		{
++		  delete_insn (insn);
++		  return;
++		}
++	    }
++	}
++    }
++}
++
++/* Insert isb instruction for hwloop.  */
++static void
++nds32_hwloop_insert_isb (rtx loop_id)
++{
++  rtx_insn *insn;
++  rtx le_id;
++  basic_block bb;
++
++  FOR_EACH_BB_FN (bb, cfun)
++    {
++      FOR_BB_INSNS (bb, insn)
++	{
++	  if (NOTE_P (insn))
++	    continue;
++
++	  if (recog_memoized (insn) == CODE_FOR_init_lc
++	      && INSN_P (insn))
++	    {
++	      le_id = XVECEXP (XVECEXP (PATTERN (insn), 0, 1), 0, 0);
++	      if (loop_id == le_id)
++		{
++		  emit_insn_after (gen_unspec_volatile_isb (), insn);
++		  return;
++		}
++	    }
++	}
++    }
++}
++/* Insert mtlei instruction for hwloop.  */
++static void
++nds32_hwloop_insert_init_end ()
++{
++  rtx_insn *insn;
++  basic_block bb;
++  rtx loop_id, end_label;
++  bool hwloop_p;
++
++  FOR_EACH_BB_FN (bb, cfun)
++    {
++      FOR_BB_INSNS (bb, insn)
++	{
++	  if (NOTE_P (insn))
++	    continue;
++
++	  if (recog_memoized (insn) == CODE_FOR_mtlbi_hint
++	      && INSN_P (insn))
++	    {
++	      end_label = gen_label_rtx ();
++	      loop_id = XVECEXP (XVECEXP (PATTERN (insn), 0, 1), 0, 0);
++	      hwloop_p = nds32_hwloop_insert_end_label (loop_id, end_label);
++
++	      if (!hwloop_p)
++		{
++		  delete_insn (insn);
++		  nds32_hwloop_remove (loop_id);
++		}
++	      else
++		{
++		  emit_insn_after (gen_mtlei (gen_rtx_LABEL_REF (Pmode, end_label)), insn);
++		  nds32_hwloop_insert_isb (loop_id);
++		}
++	    }
++	}
++    }
++}
++
++/* Reorganize insns issued at the same cycle in out of order.  */
++static void
++nds32_reorg_out_of_order ()
++{
++  using namespace nds32;
++
++  // The function is controoled by -mreorg-out-of-order and the issue rate.
++  if (!flag_reorg_out_of_order
++      || nds32_sched_issue_rate () < 2)
++    return;
++
++  // We only move load insns up at this moment.
++  rtx_insn *insn;
++
++  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
++    {
++      if (!insn_executable_p (insn)
++          || GET_MODE (insn) != TImode
++          || get_attr_type (insn) == TYPE_STORE_MULTIPLE
++          || get_attr_type (insn) == TYPE_LOAD_MULTIPLE
++          || get_attr_type (insn) == TYPE_LOAD
++          || get_attr_type (insn) == TYPE_FLOAD
++          || get_attr_type (insn) == TYPE_STORE
++          || get_attr_type (insn) == TYPE_FSTORE)
++	continue;
++
++      rtx_insn *load_insn = insn;
++
++      while ((load_insn = next_executable_insn_local (load_insn)))
++	{
++	  if (GET_MODE (load_insn) == TImode)
++	    {
++	      load_insn = NULL;
++	      break;
++	    }
++
++	  if ((get_attr_type (load_insn) == TYPE_LOAD
++	       || get_attr_type (load_insn) == TYPE_FLOAD)
++	      && get_attr_length (load_insn) < 4)
++	    break;
++	}
++
++      if (load_insn == NULL_RTX)
++	continue;
++
++      exchange_insns (insn, load_insn);
++    }
++}
++
++/* Perform machine-dependent processing.  */
++static void
++nds32_machine_dependent_reorg (void)
++{
++  /* We are freeing block_for_insn in the toplev to keep compatibility
++     with old MDEP_REORGS that are not CFG based.  Recompute it
++     now.  */
++  compute_bb_for_insn ();
++
++  nds32_reorg_out_of_order ();
++
++  if (TARGET_HWLOOP)
++    nds32_hwloop_insert_init_end ();
++
++  if (flag_var_tracking)
++    {
++      df_analyze ();
++      timevar_push (TV_VAR_TRACKING);
++      variable_tracking_main ();
++      timevar_pop (TV_VAR_TRACKING);
++      df_finish_pass (false);
++    }
++
++  /* Use -minnermost-loop to enable,
++     need more testing to verify result.  */
++  if (TARGET_INNERMOST_LOOP)
++    nds32_insert_innermost_loop ();
++
++  nds32_insert_isps ();
++}
++
+ static void
+ nds32_init_builtins (void)
+ {
+   nds32_init_builtins_impl ();
+ }
+ 
++static tree
++nds32_builtin_decl (unsigned code, bool initialize_p)
++{
++  /* Implement in nds32-intrinsic.c.  */
++  return nds32_builtin_decl_impl (code, initialize_p);
++}
++
+ static rtx
+ nds32_expand_builtin (tree exp,
+ 		      rtx target,
+ 		      rtx subtarget,
+-		      machine_mode mode,
++		      enum machine_mode mode,
+ 		      int ignore)
+ {
++  /* Implement in nds32-intrinsic.c.  */
+   return nds32_expand_builtin_impl (exp, target, subtarget, mode, ignore);
+ }
+ 
++static bool
++nds32_have_conditional_execution (void)
++{
++  /* Lie to gcc that we have conditional execution for change optimization flow
++     in if-conversion, LRA and scheduling phase.
++     In our experiment result show that cand reduce about 2% code size with very
++     minor performance degradation in average.  */
++  return optimize_size;
++}
++
++/* Implement TARGET_INIT_LIBFUNCS.  */
++static void
++nds32_init_libfuncs (void)
++{
++  if (TARGET_LINUX_ABI)
++    init_sync_libfuncs (UNITS_PER_WORD);
++}
++
++/* Implement TARGET_CAN_USE_DOLOOP_P.  */
++static bool
++nds32_can_use_doloop_p (const widest_int &, const widest_int &iterations_max,
++			unsigned int, bool entered_at_top)
++{
++  /* Using hwloop must be entered from the top.  */
++  if (!entered_at_top)
++    return false;
++
++  if (lookup_attribute ("no_ext_zol", DECL_ATTRIBUTES (current_function_decl)))
++    return false;
++
++  /* Initial hardware loops too costly, so we must avoid to
++     generate a hardware loops when loop count less then 8. */
++  if (!NDS32_HW_LOOP_P ()
++      || iterations_max.ulow() < 8)
++    return false;
++  return true;
++}
++
++/* NULL if INSN insn is valid within a low-overhead loop.
++   Otherwise return why doloop cannot be applied.  */
++static const char *
++nds32_invalid_within_doloop (const rtx_insn *insn)
++{
++  if (CALL_P (insn))
++    return "Function call in the loop.";
++  else if (INSN_CODE (insn) == CODE_FOR_pop25return
++	   || INSN_CODE (insn) == CODE_FOR_return_internal)
++    return "Simple return in the loop.";
++  else if (INSN_CODE (insn) == CODE_FOR_unspec_no_hwloop)
++    return "no_hwloop hint in the loop";
++
++  return NULL;
++}
+ 
+ /* ------------------------------------------------------------------------ */
+ 
+-/* PART 4: Implemet extern function definitions,
+-           the prototype is in nds32-protos.h.  */
++/* PART 5: Implemet extern function definitions,
++	   the prototype is in nds32-protos.h.  */
++
++/* Run-time Target Specification.  */
++
++void
++nds32_cpu_cpp_builtins(struct cpp_reader *pfile)
++{
++#define builtin_define(TXT) cpp_define (pfile, TXT)
++#define builtin_assert(TXT) cpp_assert (pfile, TXT)
++  builtin_define ("__nds32__");
++  builtin_define ("__NDS32__");
++
++  /* We need to provide builtin macro to describe the size of
++     each vector for interrupt handler under elf toolchain.  */
++  if (!TARGET_LINUX_ABI)
++    {
++      if (TARGET_ISR_VECTOR_SIZE_4_BYTE)
++	builtin_define ("__NDS32_ISR_VECTOR_SIZE_4__");
++      else
++	builtin_define ("__NDS32_ISR_VECTOR_SIZE_16__");
++    }
++
++  if (TARGET_HARD_FLOAT)
++    builtin_define ("__NDS32_ABI_2FP_PLUS__");
++  else
++    builtin_define ("__NDS32_ABI_2__");
++
++  if (TARGET_ISA_V2)
++    builtin_define ("__NDS32_ISA_V2__");
++  if (TARGET_ISA_V3)
++    builtin_define ("__NDS32_ISA_V3__");
++  if (TARGET_ISA_V3M)
++    builtin_define ("__NDS32_ISA_V3M__");
++
++  if (TARGET_FPU_SINGLE)
++    builtin_define ("__NDS32_EXT_FPU_SP__");
++  if (TARGET_FPU_DOUBLE)
++    builtin_define ("__NDS32_EXT_FPU_DP__");
++
++  if (TARGET_EXT_FPU_FMA)
++    builtin_define ("__NDS32_EXT_FPU_FMA__");
++  if (NDS32_EXT_FPU_DOT_E)
++    builtin_define ("__NDS32_EXT_FPU_DOT_E__");
++  if (TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE)
++    {
++      switch (nds32_fp_regnum)
++	{
++	case 0:
++	case 4:
++	  builtin_define ("__NDS32_EXT_FPU_CONFIG_0__");
++	  break;
++	case 1:
++	case 5:
++	  builtin_define ("__NDS32_EXT_FPU_CONFIG_1__");
++	  break;
++	case 2:
++	case 6:
++	  builtin_define ("__NDS32_EXT_FPU_CONFIG_2__");
++	  break;
++	case 3:
++	case 7:
++	  builtin_define ("__NDS32_EXT_FPU_CONFIG_3__");
++	  break;
++	default:
++	  abort ();
++	}
++    }
++
++  if (TARGET_BIG_ENDIAN)
++    builtin_define ("__NDS32_EB__");
++  else
++    builtin_define ("__NDS32_EL__");
++
++  if (TARGET_REDUCED_REGS)
++    builtin_define ("__NDS32_REDUCED_REGS__");
++  if (TARGET_CMOV)
++    builtin_define ("__NDS32_CMOV__");
++  if (TARGET_EXT_PERF)
++    builtin_define ("__NDS32_EXT_PERF__");
++  if (TARGET_EXT_PERF2)
++    builtin_define ("__NDS32_EXT_PERF2__");
++  if (TARGET_EXT_STRING)
++    builtin_define ("__NDS32_EXT_STRING__");
++  if (TARGET_16_BIT)
++    builtin_define ("__NDS32_16_BIT__");
++  if (TARGET_GP_DIRECT)
++    builtin_define ("__NDS32_GP_DIRECT__");
++  if (TARGET_VH)
++    builtin_define ("__NDS32_VH__");
++  if (NDS32_EXT_DSP_P ())
++    builtin_define ("__NDS32_EXT_DSP__");
++  if (NDS32_HW_LOOP_P ())
++    builtin_define ("__NDS32_EXT_ZOL__");
++
++  /* Extra builtin macros.  */
++  if (TARGET_ISA_V3 || TARGET_ISA_V3M_PLUS)
++    builtin_define ("__NDS32_EXT_IFC__");
++  if (TARGET_ISA_V3 || TARGET_ISA_V3M_PLUS)
++    builtin_define ("__NDS32_EXT_EX9__");
++  if (TARGET_BIG_ENDIAN)
++    builtin_define ("__big_endian__");
++
++  builtin_assert ("cpu=nds32");
++  builtin_assert ("machine=nds32");
++
++  /* FOR BACKWARD COMPATIBILITY.  */
++  if (TARGET_ISA_V2)
++    builtin_define ("__NDS32_BASELINE_V2__");
++  if (TARGET_ISA_V3)
++    builtin_define ("__NDS32_BASELINE_V3__");
++  if (TARGET_ISA_V3M)
++    builtin_define ("__NDS32_BASELINE_V3M__");
++  if (TARGET_REDUCED_REGS)
++    builtin_define ("__NDS32_REDUCE_REGS__");
++
++  if (TARGET_ISA_V2)
++    builtin_define ("NDS32_BASELINE_V2");
++  if (TARGET_ISA_V3)
++    builtin_define ("NDS32_BASELINE_V3");
++  if (TARGET_ISA_V3M)
++    builtin_define ("NDS32_BASELINE_V3M");
++  if (TARGET_REDUCED_REGS)
++    builtin_define ("NDS32_REDUCE_REGS");
++  if (TARGET_FPU_SINGLE)
++    builtin_define ("NDS32_EXT_FPU_SP");
++  if (TARGET_FPU_DOUBLE)
++    builtin_define ("NDS32_EXT_FPU_DP");
++  if (TARGET_EXT_PERF)
++    builtin_define ("NDS32_EXT_PERF");
++  if (TARGET_EXT_PERF2)
++    builtin_define ("NDS32_EXT_PERF2");
++  if (TARGET_EXT_STRING)
++    builtin_define ("NDS32_EXT_STRING");
++  if (TARGET_ISA_V3)
++    builtin_define ("NDS32_EXT_IFC");
++  if (TARGET_ISA_V3)
++    builtin_define ("NDS32_EXT_EX9");
++
++  if (TARGET_HARD_FLOAT)
++    builtin_define ("NDS32_ABI_2FP_PLUS");
++  else
++    builtin_define ("NDS32_ABI_2");
++
++  if (TARGET_BIG_ENDIAN)
++    builtin_define ("NDS32_EB");
++  else
++    builtin_define ("NDS32_EL");
++
++  if (TARGET_ISA_V2)
++    builtin_define ("__NDS32_BASELINE_V2");
++  if (TARGET_ISA_V3)
++    builtin_define ("__NDS32_BASELINE_V3");
++  if (TARGET_ISA_V3M)
++    builtin_define ("__NDS32_BASELINE_V3M");
++  if (TARGET_REDUCED_REGS)
++    builtin_define ("__NDS32_REDUCE_REGS");
++  if (TARGET_FPU_SINGLE)
++    builtin_define ("__NDS32_EXT_FPU_SP");
++  if (TARGET_FPU_DOUBLE)
++    builtin_define ("__NDS32_EXT_FPU_DP");
++  if (TARGET_EXT_PERF)
++    builtin_define ("__NDS32_EXT_PERF");
++  if (TARGET_EXT_PERF2)
++    builtin_define ("__NDS32_EXT_PERF2");
++  if (TARGET_EXT_STRING)
++    builtin_define ("__NDS32_EXT_STRING");
++  if (TARGET_ISA_V3)
++    builtin_define ("__NDS32_EXT_IFC");
++
++  if (TARGET_ISA_V3)
++    builtin_define ("__NDS32_EXT_EX9");
++
++  if (TARGET_HARD_FLOAT)
++    builtin_define ("__NDS32_ABI_2FP_PLUS");
++  else
++    builtin_define ("__NDS32_ABI_2");
++
++  if (TARGET_BIG_ENDIAN)
++    builtin_define ("__NDS32_EB");
++  else
++    builtin_define ("__NDS32_EL");
++#undef builtin_define
++#undef builtin_assert
++}
++
+ 
+ /* Defining Data Structures for Per-function Information.  */
+ 
+@@ -2743,26 +5007,80 @@ nds32_init_expanders (void)
+ 
+ /* Register Usage.  */
+ 
++/* -- Order of Allocation of Registers.  */
++
++void
++nds32_adjust_reg_alloc_order (void)
++{
++  const int nds32_reg_alloc_order[] = REG_ALLOC_ORDER;
++
++  /* Copy the default register allocation order, which is designed
++     to optimize for code size.  */
++  memcpy(reg_alloc_order, nds32_reg_alloc_order, sizeof (reg_alloc_order));
++
++  /* Adjust few register allocation order when optimizing for speed.  */
++  if (!optimize_size)
++    {
++      memcpy (reg_alloc_order, nds32_reg_alloc_order_for_speed,
++	      sizeof (nds32_reg_alloc_order_for_speed));
++    }
++}
++
+ /* -- How Values Fit in Registers.  */
+ 
+ int
+ nds32_hard_regno_nregs (int regno ATTRIBUTE_UNUSED,
+-			machine_mode mode)
++			enum machine_mode mode)
+ {
+   return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
+ }
+ 
+ int
+-nds32_hard_regno_mode_ok (int regno, machine_mode mode)
++nds32_hard_regno_mode_ok (int regno, enum machine_mode mode)
+ {
++  if (regno > FIRST_PSEUDO_REGISTER)
++    return true;
++
++  if ((TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE) && NDS32_IS_FPR_REGNUM (regno))
++    {
++      if (NDS32_IS_EXT_FPR_REGNUM(regno))
++	return (NDS32_FPR_REGNO_OK_FOR_DOUBLE(regno) && (mode == DFmode));
++      else if (mode == SFmode || mode == SImode)
++	return NDS32_FPR_REGNO_OK_FOR_SINGLE (regno);
++      else if (mode == DFmode)
++	return NDS32_FPR_REGNO_OK_FOR_DOUBLE (regno);
++
++      return false;
++    }
++
+   /* Restrict double-word quantities to even register pairs.  */
+-  if (HARD_REGNO_NREGS (regno, mode) == 1
+-      || !((regno) & 1))
+-    return 1;
++  if (regno <= NDS32_LAST_GPR_REGNUM)
++    return (HARD_REGNO_NREGS (regno, mode) == 1
++	    || !((regno) & 1));
+ 
+-  return 0;
++  return false;
+ }
+ 
++int
++nds32_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
++{
++  if ((GET_MODE_CLASS (mode1) == MODE_INT
++       && GET_MODE_CLASS (mode2) == MODE_INT)
++      && GET_MODE_SIZE (mode1) <= UNITS_PER_WORD
++      && GET_MODE_SIZE (mode2) <= UNITS_PER_WORD)
++    return true;
++
++  if (GET_MODE_SIZE (mode1) == GET_MODE_SIZE (mode2))
++    {
++      if ((TARGET_FPU_SINGLE && !TARGET_FPU_DOUBLE)
++	  && (mode1 == DFmode || mode2 == DFmode))
++	return false;
++      else
++	return true;
++    }
++
++  return false;
++}
+ 
+ /* Register Classes.  */
+ 
+@@ -2784,7 +5102,16 @@ nds32_regno_reg_class (int regno)
+   else if (regno >= 20 && regno <= 31)
+     return HIGH_REGS;
+   else if (regno == 32 || regno == 33)
+-    return FRAME_REGS;
++    {
++      /* $SFP and $AP is FRAME_REGS in fact, However prevent IRA don't
++	 know how to allocate register for $SFP and $AP, just tell IRA they
++	 are GENERAL_REGS, and ARM do this hack too.  */
++      return GENERAL_REGS;
++    }
++  else if (regno >= 34 && regno <= 97)
++    return FP_REGS;
++  else if (regno >= 98 && regno <= 100)
++    return LOOP_REGS;
+   else
+     return NO_REGS;
+ }
+@@ -2795,14 +5122,39 @@ nds32_regno_reg_class (int regno)
+ /* -- Basic Stack Layout.  */
+ 
+ rtx
++nds32_dynamic_chain_address (rtx frameaddr)
++{
++  if (TARGET_V3PUSH)
++    {
++      /* If -mv3push is specified, we push $fp, $gp, and $lp into stack.
++         We can access dynamic chain address from stack by [$fp - 12].  */
++      return plus_constant (Pmode, frameaddr, -12);
++    }
++  else
++    {
++      /* For general case we push $fp and $lp into stack at prologue.
++         We can access dynamic chain address from stack by [$fp - 8].  */
++      return plus_constant (Pmode, frameaddr, -8);
++    }
++}
++
++rtx
+ nds32_return_addr_rtx (int count,
+-		       rtx frameaddr ATTRIBUTE_UNUSED)
++		       rtx frameaddr)
+ {
+-  /* There is no way to determine the return address
+-     if frameaddr is the frame that has 'count' steps
+-     up from current frame.  */
++  int offset;
++  rtx addr;
++
+   if (count != 0)
+-    return NULL_RTX;
++    {
++      /* In nds32 ABI design, we can expect that $lp is always available
++         from stack by [$fp - 4] location.  */
++      offset = -4;
++      addr = plus_constant (Pmode, frameaddr, offset);
++      addr = memory_address (Pmode, addr);
++
++      return gen_rtx_MEM (Pmode, addr);
++    }
+ 
+   /* If count == 0, it means we are at current frame,
+      the return address is $r30 ($lp).  */
+@@ -2821,15 +5173,18 @@ nds32_initial_elimination_offset (unsigned int from_reg, unsigned int to_reg)
+   nds32_compute_stack_frame ();
+ 
+   /* Remember to consider
+-     cfun->machine->callee_saved_area_padding_bytes
++     cfun->machine->callee_saved_area_gpr_padding_bytes and
++     cfun->machine->eh_return_data_regs_size
+      when calculating offset.  */
+   if (from_reg == ARG_POINTER_REGNUM && to_reg == STACK_POINTER_REGNUM)
+     {
+       offset = (cfun->machine->fp_size
+-	        + cfun->machine->gp_size
++		+ cfun->machine->gp_size
+ 		+ cfun->machine->lp_size
+ 		+ cfun->machine->callee_saved_gpr_regs_size
+ 		+ cfun->machine->callee_saved_area_gpr_padding_bytes
++		+ cfun->machine->callee_saved_fpr_regs_size
++		+ cfun->machine->eh_return_data_regs_size
+ 		+ cfun->machine->local_size
+ 		+ cfun->machine->out_args_size);
+     }
+@@ -2850,7 +5205,9 @@ nds32_initial_elimination_offset (unsigned int from_reg, unsigned int to_reg)
+ 		       + cfun->machine->gp_size
+ 		       + cfun->machine->lp_size
+ 		       + cfun->machine->callee_saved_gpr_regs_size
+-		       + cfun->machine->callee_saved_area_gpr_padding_bytes);
++		       + cfun->machine->callee_saved_area_gpr_padding_bytes
++		       + cfun->machine->callee_saved_fpr_regs_size
++		       + cfun->machine->eh_return_data_regs_size);
+     }
+   else
+     {
+@@ -2869,10 +5226,11 @@ nds32_init_cumulative_args (CUMULATIVE_ARGS *cum,
+ 			    tree fndecl ATTRIBUTE_UNUSED,
+ 			    int n_named_args ATTRIBUTE_UNUSED)
+ {
+-  /* Initial available registers
+-     (in offset, corresponding to NDS32_GPR_ARG_FIRST_REGNUM)
++  /* Initial available registers.  The values are offset against
++     NDS32_GPR_ARG_FIRST_REGNUM and NDS32_FPR_ARG_FIRST_REGNUM
+      for passing arguments.  */
+   cum->gpr_offset = 0;
++  cum->fpr_offset = 0;
+ }
+ 
+ /* -- Function Entry and Exit.  */
+@@ -2883,125 +5241,178 @@ nds32_expand_prologue (void)
+ {
+   int fp_adjust;
+   int sp_adjust;
+-  int en4_const;
+-
+-  rtx Rb, Re;
+-  rtx fp_adjust_insn, sp_adjust_insn;
++  unsigned Rb, Re;
+ 
+   /* Compute and setup stack frame size.
+      The result will be in cfun->machine.  */
+   nds32_compute_stack_frame ();
+ 
++  /* Check frame_pointer_needed again to prevent fp is need after reload.  */
++  if (frame_pointer_needed)
++    cfun->machine->fp_as_gp_p = false;
++
+   /* If this is a variadic function, first we need to push argument
+      registers that hold the unnamed argument value.  */
+   if (cfun->machine->va_args_size != 0)
+     {
+-      Rb = gen_rtx_REG (SImode, cfun->machine->va_args_first_regno);
+-      Re = gen_rtx_REG (SImode, cfun->machine->va_args_last_regno);
+-      /* No need to push $fp, $gp, or $lp, so use GEN_INT(0).  */
+-      nds32_emit_stack_push_multiple (Rb, Re, GEN_INT (0), true);
++      Rb = cfun->machine->va_args_first_regno;
++      Re = cfun->machine->va_args_last_regno;
++      /* No need to push $fp, $gp, or $lp.  */
++      nds32_emit_stack_push_multiple (Rb, Re, false, false, false, true);
+ 
+       /* We may also need to adjust stack pointer for padding bytes
+-         because varargs may cause $sp not 8-byte aligned.  */
++	 because varargs may cause $sp not 8-byte aligned.  */
+       if (cfun->machine->va_args_area_padding_bytes)
+ 	{
+ 	  /* Generate sp adjustment instruction.  */
+ 	  sp_adjust = cfun->machine->va_args_area_padding_bytes;
+-	  sp_adjust_insn = gen_addsi3 (stack_pointer_rtx,
+-				       stack_pointer_rtx,
+-				       GEN_INT (-1 * sp_adjust));
+ 
+-	  /* Emit rtx into instructions list and receive INSN rtx form.  */
+-	  sp_adjust_insn = emit_insn (sp_adjust_insn);
+-
+-	  /* The insn rtx 'sp_adjust_insn' will change frame layout.
+-	     We need to use RTX_FRAME_RELATED_P so that GCC is able to
+-	     generate CFI (Call Frame Information) stuff.  */
+-	  RTX_FRAME_RELATED_P (sp_adjust_insn) = 1;
++	  nds32_emit_adjust_frame (stack_pointer_rtx,
++	  			   stack_pointer_rtx,
++				   -1 * sp_adjust);
+ 	}
+     }
+ 
+   /* If the function is 'naked',
+      we do not have to generate prologue code fragment.  */
+-  if (cfun->machine->naked_p)
++  if (cfun->machine->naked_p && !flag_pic)
+     return;
+ 
+   /* Get callee_first_regno and callee_last_regno.  */
+-  Rb = gen_rtx_REG (SImode, cfun->machine->callee_saved_first_gpr_regno);
+-  Re = gen_rtx_REG (SImode, cfun->machine->callee_saved_last_gpr_regno);
+-
+-  /* nds32_emit_stack_push_multiple(first_regno, last_regno),
+-     the pattern 'stack_push_multiple' is implemented in nds32.md.
+-     For En4 field, we have to calculate its constant value.
+-     Refer to Andes ISA for more information.  */
+-  en4_const = 0;
+-  if (cfun->machine->fp_size)
+-    en4_const += 8;
+-  if (cfun->machine->gp_size)
+-    en4_const += 4;
+-  if (cfun->machine->lp_size)
+-    en4_const += 2;
++  Rb = cfun->machine->callee_saved_first_gpr_regno;
++  Re = cfun->machine->callee_saved_last_gpr_regno;
+ 
+   /* If $fp, $gp, $lp, and all callee-save registers are NOT required
+      to be saved, we don't have to create multiple push instruction.
+      Otherwise, a multiple push instruction is needed.  */
+-  if (!(REGNO (Rb) == SP_REGNUM && REGNO (Re) == SP_REGNUM && en4_const == 0))
++  if (!(Rb == SP_REGNUM && Re == SP_REGNUM
++	&& cfun->machine->fp_size == 0
++	&& cfun->machine->gp_size == 0
++	&& cfun->machine->lp_size == 0))
+     {
+       /* Create multiple push instruction rtx.  */
+-      nds32_emit_stack_push_multiple (Rb, Re, GEN_INT (en4_const), false);
++      nds32_emit_stack_push_multiple (
++	Rb, Re,
++	cfun->machine->fp_size, cfun->machine->gp_size, cfun->machine->lp_size,
++	false);
++    }
++
++  /* Save eh data registers.  */
++  if (cfun->machine->use_eh_return_p)
++    {
++      Rb = cfun->machine->eh_return_data_first_regno;
++      Re = cfun->machine->eh_return_data_last_regno;
++
++      /* No need to push $fp, $gp, or $lp.
++	 Also, this is not variadic arguments push.  */
++      nds32_emit_stack_push_multiple (Rb, Re, false, false, false, false);
+     }
+ 
+-  /* Check frame_pointer_needed to see
+-     if we shall emit fp adjustment instruction.  */
+-  if (frame_pointer_needed)
+-    {
+-      /* adjust $fp = $sp + ($fp size) + ($gp size) + ($lp size)
+-                          + (4 * callee-saved-registers)
+-         Note: No need to adjust
+-               cfun->machine->callee_saved_area_padding_bytes,
+-               because, at this point, stack pointer is just
+-               at the position after push instruction.  */
+-      fp_adjust = cfun->machine->fp_size
+-		  + cfun->machine->gp_size
+-		  + cfun->machine->lp_size
+-		  + cfun->machine->callee_saved_gpr_regs_size;
+-      fp_adjust_insn = gen_addsi3 (hard_frame_pointer_rtx,
++  /* Check frame_pointer_needed to see
++     if we shall emit fp adjustment instruction.  */
++  if (frame_pointer_needed)
++    {
++      /* adjust $fp = $sp + ($fp size) + ($gp size) + ($lp size)
++			  + (4 * callee-saved-registers)
++			  + (4 * exception-handling-data-registers)
++	 Note: No need to adjust
++	       cfun->machine->callee_saved_area_gpr_padding_bytes,
++	       because, at this point, stack pointer is just
++	       at the position after push instruction.  */
++      fp_adjust = cfun->machine->fp_size
++		  + cfun->machine->gp_size
++		  + cfun->machine->lp_size
++		  + cfun->machine->callee_saved_gpr_regs_size
++		  + cfun->machine->eh_return_data_regs_size;
++
++      nds32_emit_adjust_frame (hard_frame_pointer_rtx,
++			       stack_pointer_rtx,
++			       fp_adjust);
++    }
++
++  /* Save fpu registers.  */
++  if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM)
++    {
++      /* When $sp moved to bottom of stack, we need to check whether
++	 the range of offset in the FPU instruction.  */
++      int fpr_offset = cfun->machine->local_size
++		       + cfun->machine->out_args_size
++		       + cfun->machine->callee_saved_fpr_regs_size;
++
++      /* Check FPU instruction offset imm14s.  */
++      if (!satisfies_constraint_Is14 (GEN_INT (fpr_offset)))
++	{
++	  int fpr_space = cfun->machine->callee_saved_area_gpr_padding_bytes
++			  + cfun->machine->callee_saved_fpr_regs_size;
++
++	  /* Save fpu registers, need to allocate stack space
++	     for fpu callee registers.  And now $sp position
++	     on callee saved fpr registers.  */
++	  nds32_emit_adjust_frame (stack_pointer_rtx,
++				   stack_pointer_rtx,
++				   -1 * fpr_space);
++
++	  /* Emit fpu store instruction, using [$sp + offset] store
++	     fpu registers.  */
++	  nds32_emit_push_fpr_callee_saved (0);
++
++          /* Adjust $sp = $sp - local_size - out_args_size.  */
++	  sp_adjust = cfun->machine->local_size
++		      + cfun->machine->out_args_size;
++
++	  /* Allocate stack space for local size and out args size.  */
++	  nds32_emit_adjust_frame (stack_pointer_rtx,
++				   stack_pointer_rtx,
++				   -1 * sp_adjust);
++	}
++      else
++	{
++	  /* Offset range in Is14, so $sp moved to bottom of stack.  */
++
++          /* Adjust $sp = $sp - local_size - out_args_size
++			      - callee_saved_area_gpr_padding_bytes
++			      - callee_saved_fpr_regs_size.  */
++	  sp_adjust = cfun->machine->local_size
++		      + cfun->machine->out_args_size
++		      + cfun->machine->callee_saved_area_gpr_padding_bytes
++		      + cfun->machine->callee_saved_fpr_regs_size;
++
++	  nds32_emit_adjust_frame (stack_pointer_rtx,
+ 				   stack_pointer_rtx,
+-				   GEN_INT (fp_adjust));
+-      /* Emit rtx into instructions list and receive INSN rtx form.  */
+-      fp_adjust_insn = emit_insn (fp_adjust_insn);
++				   -1 * sp_adjust);
+ 
+-      /* The insn rtx 'fp_adjust_insn' will change frame layout.  */
+-      RTX_FRAME_RELATED_P (fp_adjust_insn) = 1;
++	  /* Emit fpu store instruction, using [$sp + offset] store
++	     fpu registers.  */
++	  int fpr_position = cfun->machine->out_args_size
++			     + cfun->machine->local_size;
++	  nds32_emit_push_fpr_callee_saved (fpr_position);
++	}
+     }
+-
+-  /* Adjust $sp = $sp - local_size - out_args_size
+-                      - callee_saved_area_padding_bytes.  */
+-  sp_adjust = cfun->machine->local_size
+-	      + cfun->machine->out_args_size
+-	      + cfun->machine->callee_saved_area_gpr_padding_bytes;
+-  /* sp_adjust value may be out of range of the addi instruction,
+-     create alternative add behavior with TA_REGNUM if necessary,
+-     using NEGATIVE value to tell that we are decreasing address.  */
+-  sp_adjust = nds32_force_addi_stack_int ( (-1) * sp_adjust);
+-  if (sp_adjust)
++  else
+     {
+-      /* Generate sp adjustment instruction if and only if sp_adjust != 0.  */
+-      sp_adjust_insn = gen_addsi3 (stack_pointer_rtx,
+-				   stack_pointer_rtx,
+-				   GEN_INT (-1 * sp_adjust));
+-      /* Emit rtx into instructions list and receive INSN rtx form.  */
+-      sp_adjust_insn = emit_insn (sp_adjust_insn);
++      /* Adjust $sp = $sp - local_size - out_args_size
++			  - callee_saved_area_gpr_padding_bytes.  */
++      sp_adjust = cfun->machine->local_size
++		  + cfun->machine->out_args_size
++		  + cfun->machine->callee_saved_area_gpr_padding_bytes;
+ 
+-      /* The insn rtx 'sp_adjust_insn' will change frame layout.
+-         We need to use RTX_FRAME_RELATED_P so that GCC is able to
+-         generate CFI (Call Frame Information) stuff.  */
+-      RTX_FRAME_RELATED_P (sp_adjust_insn) = 1;
++      /* sp_adjust value may be out of range of the addi instruction,
++	 create alternative add behavior with TA_REGNUM if necessary,
++	 using NEGATIVE value to tell that we are decreasing address.  */
++      nds32_emit_adjust_frame (stack_pointer_rtx,
++			       stack_pointer_rtx,
++			       -1 * sp_adjust);
+     }
+ 
+-  /* Prevent the instruction scheduler from
+-     moving instructions across the boundary.  */
+-  emit_insn (gen_blockage ());
++  /* Emit gp setup instructions for -fpic.  */
++  if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
++    nds32_emit_load_gp ();
++
++  /* If user applies -mno-sched-prolog-epilog option,
++     we need to prevent instructions of function body from being
++     scheduled with stack adjustment in prologue.  */
++  if (!flag_sched_prolog_epilog)
++    emit_insn (gen_blockage ());
+ }
+ 
+ /* Function for normal multiple pop epilogue.  */
+@@ -3009,18 +5420,17 @@ void
+ nds32_expand_epilogue (bool sibcall_p)
+ {
+   int sp_adjust;
+-  int en4_const;
+-
+-  rtx Rb, Re;
+-  rtx sp_adjust_insn;
++  unsigned Rb, Re;
+ 
+   /* Compute and setup stack frame size.
+      The result will be in cfun->machine.  */
+   nds32_compute_stack_frame ();
+ 
+-  /* Prevent the instruction scheduler from
+-     moving instructions across the boundary.  */
+-  emit_insn (gen_blockage ());
++  /* If user applies -mno-sched-prolog-epilog option,
++     we need to prevent instructions of function body from being
++     scheduled with stack adjustment in epilogue.  */
++  if (!flag_sched_prolog_epilog)
++    emit_insn (gen_blockage ());
+ 
+   /* If the function is 'naked', we do not have to generate
+      epilogue code fragment BUT 'ret' instruction.
+@@ -3029,110 +5439,156 @@ nds32_expand_epilogue (bool sibcall_p)
+   if (cfun->machine->naked_p)
+     {
+       /* If this is a variadic function, we do not have to restore argument
+-         registers but need to adjust stack pointer back to previous stack
+-         frame location before return.  */
++	 registers but need to adjust stack pointer back to previous stack
++	 frame location before return.  */
+       if (cfun->machine->va_args_size != 0)
+ 	{
+ 	  /* Generate sp adjustment instruction.
+ 	     We  need to consider padding bytes here.  */
+ 	  sp_adjust = cfun->machine->va_args_size
+ 		      + cfun->machine->va_args_area_padding_bytes;
+-	  sp_adjust_insn = gen_addsi3 (stack_pointer_rtx,
+-				       stack_pointer_rtx,
+-				       GEN_INT (sp_adjust));
+-	  /* Emit rtx into instructions list and receive INSN rtx form.  */
+-	  sp_adjust_insn = emit_insn (sp_adjust_insn);
+ 
+-	  /* The insn rtx 'sp_adjust_insn' will change frame layout.
+-	     We need to use RTX_FRAME_RELATED_P so that GCC is able to
+-	     generate CFI (Call Frame Information) stuff.  */
+-	  RTX_FRAME_RELATED_P (sp_adjust_insn) = 1;
++  	  nds32_emit_adjust_frame (stack_pointer_rtx,
++				   stack_pointer_rtx,
++				   sp_adjust);
+ 	}
+ 
+       /* Generate return instruction by using 'return_internal' pattern.
+-         Make sure this instruction is after gen_blockage().  */
++	 Make sure this instruction is after gen_blockage().
++	 First we need to check this is a function without sibling call.  */
+       if (!sibcall_p)
+-	emit_jump_insn (gen_return_internal ());
++	{
++	  /* We need to further check attributes to determine whether
++	     there should be return instruction at epilogue.
++	     If the attribute naked exists but -mno-ret-in-naked-func
++	     is issued, there is NO need to generate return instruction.  */
++	  if (cfun->machine->attr_naked_p && !flag_ret_in_naked_func)
++	    return;
++
++	  emit_jump_insn (gen_return_internal ());
++	}
+       return;
+     }
+ 
+   if (frame_pointer_needed)
+     {
+-      /* adjust $sp = $fp - ($fp size) - ($gp size) - ($lp size)
+-                          - (4 * callee-saved-registers)
+-         Note: No need to adjust
+-               cfun->machine->callee_saved_area_padding_bytes,
+-               because we want to adjust stack pointer
+-               to the position for pop instruction.  */
+-      sp_adjust = cfun->machine->fp_size
+-		  + cfun->machine->gp_size
+-		  + cfun->machine->lp_size
+-		  + cfun->machine->callee_saved_gpr_regs_size;
+-      sp_adjust_insn = gen_addsi3 (stack_pointer_rtx,
++      /* Restore fpu registers.  */
++      if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM)
++	{
++	  int gpr_padding = cfun->machine->callee_saved_area_gpr_padding_bytes;
++
++	  /* adjust $sp = $fp - ($fp size) - ($gp size) - ($lp size)
++			      - (4 * callee-saved-registers)
++			      - (4 * exception-handling-data-registers)
++			      - (4 * callee-saved-gpr-registers padding byte)
++			      - (4 * callee-saved-fpr-registers)
++	     Note:  we want to adjust stack pointer
++		    to the position for callee-saved fpr register,
++		    And restore fpu register use .bi instruction to adjust $sp
++		    from callee-saved fpr register to pop instruction.  */
++	  sp_adjust = cfun->machine->fp_size
++		      + cfun->machine->gp_size
++		      + cfun->machine->lp_size
++		      + cfun->machine->callee_saved_gpr_regs_size
++		      + cfun->machine->eh_return_data_regs_size
++		      + cfun->machine->callee_saved_area_gpr_padding_bytes
++		      + cfun->machine->callee_saved_fpr_regs_size;
++
++	  nds32_emit_adjust_frame (stack_pointer_rtx,
+ 				   hard_frame_pointer_rtx,
+-				   GEN_INT (-1 * sp_adjust));
+-      /* Emit rtx into instructions list and receive INSN rtx form.  */
+-      sp_adjust_insn = emit_insn (sp_adjust_insn);
++				   -1 * sp_adjust);
++
++	  /* Emit fpu load instruction, using .bi instruction
++	     load fpu registers.  */
++	  nds32_emit_pop_fpr_callee_saved (gpr_padding);
++	}
++      else
++	{
++	  /* adjust $sp = $fp - ($fp size) - ($gp size) - ($lp size)
++			      - (4 * callee-saved-registers)
++			      - (4 * exception-handling-data-registers)
++	     Note: No need to adjust
++		   cfun->machine->callee_saved_area_gpr_padding_bytes,
++		   because we want to adjust stack pointer
++		   to the position for pop instruction.  */
++	  sp_adjust = cfun->machine->fp_size
++		      + cfun->machine->gp_size
++		      + cfun->machine->lp_size
++		      + cfun->machine->callee_saved_gpr_regs_size
++		      + cfun->machine->eh_return_data_regs_size;
+ 
+-      /* The insn rtx 'sp_adjust_insn' will change frame layout.  */
+-      RTX_FRAME_RELATED_P (sp_adjust_insn) = 1;
++	  nds32_emit_adjust_frame (stack_pointer_rtx,
++				   hard_frame_pointer_rtx,
++				   -1 * sp_adjust);
++	}
+     }
+   else
+     {
+-      /* If frame pointer is NOT needed,
+-         we cannot calculate the sp adjustment from frame pointer.
+-         Instead, we calculate the adjustment by local_size,
+-         out_args_size, and callee_saved_area_padding_bytes.
+-         Notice that such sp adjustment value may be out of range,
+-         so we have to deal with it as well.  */
++      /* Restore fpu registers.  */
++      if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM)
++	{
++	  int gpr_padding = cfun->machine->callee_saved_area_gpr_padding_bytes;
+ 
+-      /* Adjust $sp = $sp + local_size + out_args_size
+-                          + callee_saved_area_padding_bytes.  */
+-      sp_adjust = cfun->machine->local_size
+-		  + cfun->machine->out_args_size
+-		  + cfun->machine->callee_saved_area_gpr_padding_bytes;
+-      /* sp_adjust value may be out of range of the addi instruction,
+-         create alternative add behavior with TA_REGNUM if necessary,
+-         using POSITIVE value to tell that we are increasing address.  */
+-      sp_adjust = nds32_force_addi_stack_int (sp_adjust);
+-      if (sp_adjust)
+-	{
+-	  /* Generate sp adjustment instruction
+-	     if and only if sp_adjust != 0.  */
+-	  sp_adjust_insn = gen_addsi3 (stack_pointer_rtx,
+-				       stack_pointer_rtx,
+-				       GEN_INT (sp_adjust));
+-	  /* Emit rtx into instructions list and receive INSN rtx form.  */
+-	  sp_adjust_insn = emit_insn (sp_adjust_insn);
++	  /* Adjust $sp = $sp + local_size + out_args_size.  */
++	  sp_adjust = cfun->machine->local_size
++		      + cfun->machine->out_args_size;
+ 
+-	  /* The insn rtx 'sp_adjust_insn' will change frame layout.  */
+-	  RTX_FRAME_RELATED_P (sp_adjust_insn) = 1;
++	  nds32_emit_adjust_frame (stack_pointer_rtx,
++				   stack_pointer_rtx,
++				   sp_adjust);
++
++	  /* Emit fpu load instruction, using .bi instruction
++	     load fpu registers, and adjust $sp from callee-saved fpr register
++	     to callee-saved gpr register.  */
++	  nds32_emit_pop_fpr_callee_saved (gpr_padding);
++	}
++      else
++	{
++	  /* If frame pointer is NOT needed,
++	     we cannot calculate the sp adjustment from frame pointer.
++	     Instead, we calculate the adjustment by local_size,
++	     out_args_size, and callee_saved_area_gpr_padding_bytes.
++	     Notice that such sp adjustment value may be out of range,
++	     so we have to deal with it as well.  */
++
++	  /* Adjust $sp = $sp + local_size + out_args_size
++			      + callee_saved_area_gpr_padding_bytes.  */
++	  sp_adjust = cfun->machine->local_size
++		      + cfun->machine->out_args_size
++		      + cfun->machine->callee_saved_area_gpr_padding_bytes;
++
++	  nds32_emit_adjust_frame (stack_pointer_rtx,
++				   stack_pointer_rtx,
++				   sp_adjust);
+ 	}
+     }
+ 
++  /* Restore eh data registers.  */
++  if (cfun->machine->use_eh_return_p)
++    {
++      Rb = cfun->machine->eh_return_data_first_regno;
++      Re = cfun->machine->eh_return_data_last_regno;
++
++      /* No need to pop $fp, $gp, or $lp.  */
++      nds32_emit_stack_pop_multiple (Rb, Re, false, false, false);
++    }
++
+   /* Get callee_first_regno and callee_last_regno.  */
+-  Rb = gen_rtx_REG (SImode, cfun->machine->callee_saved_first_gpr_regno);
+-  Re = gen_rtx_REG (SImode, cfun->machine->callee_saved_last_gpr_regno);
+-
+-  /* nds32_emit_stack_pop_multiple(first_regno, last_regno),
+-     the pattern 'stack_pop_multiple' is implementad in nds32.md.
+-     For En4 field, we have to calculate its constant value.
+-     Refer to Andes ISA for more information.  */
+-  en4_const = 0;
+-  if (cfun->machine->fp_size)
+-    en4_const += 8;
+-  if (cfun->machine->gp_size)
+-    en4_const += 4;
+-  if (cfun->machine->lp_size)
+-    en4_const += 2;
++  Rb = cfun->machine->callee_saved_first_gpr_regno;
++  Re = cfun->machine->callee_saved_last_gpr_regno;
+ 
+   /* If $fp, $gp, $lp, and all callee-save registers are NOT required
+      to be saved, we don't have to create multiple pop instruction.
+      Otherwise, a multiple pop instruction is needed.  */
+-  if (!(REGNO (Rb) == SP_REGNUM && REGNO (Re) == SP_REGNUM && en4_const == 0))
++  if (!(Rb == SP_REGNUM && Re == SP_REGNUM
++	&& cfun->machine->fp_size == 0
++	&& cfun->machine->gp_size == 0
++	&& cfun->machine->lp_size == 0))
+     {
+       /* Create multiple pop instruction rtx.  */
+-      nds32_emit_stack_pop_multiple (Rb, Re, GEN_INT (en4_const));
++      nds32_emit_stack_pop_multiple (
++	Rb, Re,
++	cfun->machine->fp_size, cfun->machine->gp_size, cfun->machine->lp_size);
+     }
+ 
+   /* If this is a variadic function, we do not have to restore argument
+@@ -3141,19 +5597,49 @@ nds32_expand_epilogue (bool sibcall_p)
+   if (cfun->machine->va_args_size != 0)
+     {
+       /* Generate sp adjustment instruction.
+-         We  need to consider padding bytes here.  */
++	 We need to consider padding bytes here.  */
+       sp_adjust = cfun->machine->va_args_size
+ 		  + cfun->machine->va_args_area_padding_bytes;
+-      sp_adjust_insn = gen_addsi3 (stack_pointer_rtx,
+-				   stack_pointer_rtx,
+-				   GEN_INT (sp_adjust));
+-      /* Emit rtx into instructions list and receive INSN rtx form.  */
+-      sp_adjust_insn = emit_insn (sp_adjust_insn);
+ 
+-      /* The insn rtx 'sp_adjust_insn' will change frame layout.
+-         We need to use RTX_FRAME_RELATED_P so that GCC is able to
+-         generate CFI (Call Frame Information) stuff.  */
+-      RTX_FRAME_RELATED_P (sp_adjust_insn) = 1;
++      nds32_emit_adjust_frame (stack_pointer_rtx,
++			       stack_pointer_rtx,
++			       sp_adjust);
++    }
++
++  /* If this function uses __builtin_eh_return, make stack adjustment
++     for exception handler.  */
++  if (cfun->machine->use_eh_return_p)
++    {
++      /* We need to unwind the stack by the offset computed by
++	 EH_RETURN_STACKADJ_RTX.  However, at this point the CFA is
++	 based on SP.  Ideally we would update the SP and define the
++	 CFA along the lines of:
++
++	 SP = SP + EH_RETURN_STACKADJ_RTX
++	 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
++
++	 However the dwarf emitter only understands a constant
++	 register offset.
++
++	 The solution chosen here is to use the otherwise $ta ($r15)
++	 as a temporary register to hold the current SP value.  The
++	 CFA is described using $ta then SP is modified.  */
++
++      rtx ta_reg;
++      rtx insn;
++
++      ta_reg = gen_rtx_REG (SImode, TA_REGNUM);
++
++      insn = emit_move_insn (ta_reg, stack_pointer_rtx);
++      add_reg_note (insn, REG_CFA_DEF_CFA, ta_reg);
++      RTX_FRAME_RELATED_P (insn) = 1;
++
++      emit_insn (gen_addsi3 (stack_pointer_rtx,
++			     stack_pointer_rtx,
++			     EH_RETURN_STACKADJ_RTX));
++
++      /* Ensure the assignment to $ta does not get optimized away.  */
++      emit_use (ta_reg);
+     }
+ 
+   /* Generate return instruction.  */
+@@ -3167,28 +5653,35 @@ nds32_expand_prologue_v3push (void)
+ {
+   int fp_adjust;
+   int sp_adjust;
+-
+-  rtx Rb, Re;
+-  rtx fp_adjust_insn, sp_adjust_insn;
++  int fpr_space = 0;
++  unsigned Rb, Re;
+ 
+   /* Compute and setup stack frame size.
+      The result will be in cfun->machine.  */
+   nds32_compute_stack_frame ();
+ 
++  if (cfun->machine->callee_saved_gpr_regs_size > 0)
++    df_set_regs_ever_live (FP_REGNUM, 1);
++
++  /* Check frame_pointer_needed again to prevent fp is need after reload.  */
++  if (frame_pointer_needed)
++    cfun->machine->fp_as_gp_p = false;
++
+   /* If the function is 'naked',
+      we do not have to generate prologue code fragment.  */
+-  if (cfun->machine->naked_p)
++  if (cfun->machine->naked_p && !flag_pic)
+     return;
+ 
+   /* Get callee_first_regno and callee_last_regno.  */
+-  Rb = gen_rtx_REG (SImode, cfun->machine->callee_saved_first_gpr_regno);
+-  Re = gen_rtx_REG (SImode, cfun->machine->callee_saved_last_gpr_regno);
++  Rb = cfun->machine->callee_saved_first_gpr_regno;
++  Re = cfun->machine->callee_saved_last_gpr_regno;
+ 
+   /* Calculate sp_adjust first to test if 'push25 Re,imm8u' is available,
+      where imm8u has to be 8-byte alignment.  */
+   sp_adjust = cfun->machine->local_size
+ 	      + cfun->machine->out_args_size
+-	      + cfun->machine->callee_saved_area_gpr_padding_bytes;
++	      + cfun->machine->callee_saved_area_gpr_padding_bytes
++	      + cfun->machine->callee_saved_fpr_regs_size;
+ 
+   if (satisfies_constraint_Iu08 (GEN_INT (sp_adjust))
+       && NDS32_DOUBLE_WORD_ALIGN_P (sp_adjust))
+@@ -3196,94 +5689,118 @@ nds32_expand_prologue_v3push (void)
+       /* We can use 'push25 Re,imm8u'.  */
+ 
+       /* nds32_emit_stack_v3push(last_regno, sp_adjust),
+-         the pattern 'stack_v3push' is implemented in nds32.md.
+-         The (const_int 14) means v3push always push { $fp $gp $lp }.  */
+-      nds32_emit_stack_v3push (Rb, Re,
+-			       GEN_INT (14), GEN_INT (sp_adjust));
++	 the pattern 'stack_v3push' is implemented in nds32.md.  */
++      nds32_emit_stack_v3push (Rb, Re, sp_adjust);
++
++      /* Save fpu registers.  */
++      if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM)
++	{
++	  /* Calculate fpr position.  */
++	  int fpr_position = cfun->machine->local_size
++			     + cfun->machine->out_args_size;
++	  /* Emit fpu store instruction, using [$sp + offset] store
++	     fpu registers.  */
++	  nds32_emit_push_fpr_callee_saved (fpr_position);
++	}
+ 
+       /* Check frame_pointer_needed to see
+-         if we shall emit fp adjustment instruction.  */
++	 if we shall emit fp adjustment instruction.  */
+       if (frame_pointer_needed)
+ 	{
+ 	  /* adjust $fp = $sp   + 4         ($fp size)
+-	                        + 4         ($gp size)
+-	                        + 4         ($lp size)
+-	                        + (4 * n)   (callee-saved registers)
+-	                        + sp_adjust ('push25 Re,imm8u')
++				+ 4         ($gp size)
++				+ 4         ($lp size)
++				+ (4 * n)   (callee-saved registers)
++				+ sp_adjust ('push25 Re,imm8u')
+ 	     Note: Since we use 'push25 Re,imm8u',
+-	           the position of stack pointer is further
+-	           changed after push instruction.
+-	           Hence, we need to take sp_adjust value
+-	           into consideration.  */
++		the position of stack pointer is further
++		changed after push instruction.
++		Hence, we need to take sp_adjust value
++		into consideration.  */
+ 	  fp_adjust = cfun->machine->fp_size
+ 		      + cfun->machine->gp_size
+ 		      + cfun->machine->lp_size
+ 		      + cfun->machine->callee_saved_gpr_regs_size
+ 		      + sp_adjust;
+-	  fp_adjust_insn = gen_addsi3 (hard_frame_pointer_rtx,
+-				       stack_pointer_rtx,
+-				       GEN_INT (fp_adjust));
+-	  /* Emit rtx into instructions list and receive INSN rtx form.  */
+-	  fp_adjust_insn = emit_insn (fp_adjust_insn);
++
++	  nds32_emit_adjust_frame (hard_frame_pointer_rtx,
++				   stack_pointer_rtx,
++				   fp_adjust);
+ 	}
+     }
+   else
+     {
+-      /* We have to use 'push25 Re,0' and
+-         expand one more instruction to adjust $sp later.  */
++      if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM)
++	{
++	  /* Calculate fpr space.  */
++	  fpr_space = cfun->machine->callee_saved_area_gpr_padding_bytes
++		      + cfun->machine->callee_saved_fpr_regs_size;
++
++	  /* We have to use 'push25 Re, fpr_space', to pre-allocate
++	     callee saved fpr registers space.  */
++	  nds32_emit_stack_v3push (Rb, Re, fpr_space);
++	  nds32_emit_push_fpr_callee_saved (0);
++	}
++      else
++	{
++	  /* We have to use 'push25 Re,0' and
++	     expand one more instruction to adjust $sp later.  */
+ 
+-      /* nds32_emit_stack_v3push(last_regno, sp_adjust),
+-         the pattern 'stack_v3push' is implemented in nds32.md.
+-         The (const_int 14) means v3push always push { $fp $gp $lp }.  */
+-      nds32_emit_stack_v3push (Rb, Re,
+-			       GEN_INT (14), GEN_INT (0));
++	  /* nds32_emit_stack_v3push(last_regno, sp_adjust),
++	     the pattern 'stack_v3push' is implemented in nds32.md.  */
++	  nds32_emit_stack_v3push (Rb, Re, 0);
++	}
+ 
+       /* Check frame_pointer_needed to see
+-         if we shall emit fp adjustment instruction.  */
++	 if we shall emit fp adjustment instruction.  */
+       if (frame_pointer_needed)
+ 	{
+ 	  /* adjust $fp = $sp + 4        ($fp size)
+-	                      + 4        ($gp size)
+-	                      + 4        ($lp size)
+-	                      + (4 * n)  (callee-saved registers)
++			      + 4        ($gp size)
++			      + 4        ($lp size)
++			      + (4 * n)  (callee-saved registers)
+ 	     Note: Since we use 'push25 Re,0',
+-	           the stack pointer is just at the position
+-	           after push instruction.
+-	           No need to take sp_adjust into consideration.  */
++		   the stack pointer is just at the position
++		   after push instruction.
++		   No need to take sp_adjust into consideration.  */
+ 	  fp_adjust = cfun->machine->fp_size
+ 		      + cfun->machine->gp_size
+ 		      + cfun->machine->lp_size
+ 		      + cfun->machine->callee_saved_gpr_regs_size;
+-	  fp_adjust_insn = gen_addsi3 (hard_frame_pointer_rtx,
+-				       stack_pointer_rtx,
+-				       GEN_INT (fp_adjust));
+-	  /* Emit rtx into instructions list and receive INSN rtx form.  */
+-	  fp_adjust_insn = emit_insn (fp_adjust_insn);
+-	}
+ 
+-      /* Because we use 'push25 Re,0',
+-         we need to expand one more instruction to adjust $sp.
+-         However, sp_adjust value may be out of range of the addi instruction,
+-         create alternative add behavior with TA_REGNUM if necessary,
+-         using NEGATIVE value to tell that we are decreasing address.  */
+-      sp_adjust = nds32_force_addi_stack_int ( (-1) * sp_adjust);
+-      if (sp_adjust)
+-	{
+-	  /* Generate sp adjustment instruction
+-	     if and only if sp_adjust != 0.  */
+-	  sp_adjust_insn = gen_addsi3 (stack_pointer_rtx,
+-				       stack_pointer_rtx,
+-				       GEN_INT (-1 * sp_adjust));
+-	  /* Emit rtx into instructions list and receive INSN rtx form.  */
+-	  sp_adjust_insn = emit_insn (sp_adjust_insn);
++	  if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM)
++	    {
++	      /* We use 'push25 Re, fpr_space', the $sp is
++		 on callee saved fpr position, so need to consider
++		 fpr space.  */
++	      fp_adjust = fp_adjust + fpr_space;
++	    }
++
++	  nds32_emit_adjust_frame (hard_frame_pointer_rtx,
++				   stack_pointer_rtx,
++				   fp_adjust);
++	}
+ 
+-	  /* The insn rtx 'sp_adjust_insn' will change frame layout.
+-	     We need to use RTX_FRAME_RELATED_P so that GCC is able to
+-	     generate CFI (Call Frame Information) stuff.  */
+-	  RTX_FRAME_RELATED_P (sp_adjust_insn) = 1;
++      if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM)
++	{
++	  /* We use 'push25 Re, fpr_space',
++	     the $sp is on callee saved fpr position,
++	     no need to consider fpr space.  */
++	  sp_adjust = sp_adjust - fpr_space;
+ 	}
++
++      /* Because we use 'push25 Re,0',
++	 we need to expand one more instruction to adjust $sp.
++	 using NEGATIVE value to tell that we are decreasing address.  */
++      nds32_emit_adjust_frame (stack_pointer_rtx,
++			       stack_pointer_rtx,
++			       -1 * sp_adjust);
+     }
+ 
++  /* Emit gp setup instructions for -fpic.  */
++  if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
++    nds32_emit_load_gp ();
++
+   /* Prevent the instruction scheduler from
+      moving instructions across the boundary.  */
+   emit_insn (gen_blockage ());
+@@ -3294,9 +5811,7 @@ void
+ nds32_expand_epilogue_v3pop (bool sibcall_p)
+ {
+   int sp_adjust;
+-
+-  rtx Rb, Re;
+-  rtx sp_adjust_insn;
++  unsigned Rb, Re;
+ 
+   /* Compute and setup stack frame size.
+      The result will be in cfun->machine.  */
+@@ -3311,21 +5826,32 @@ nds32_expand_epilogue_v3pop (bool sibcall_p)
+   if (cfun->machine->naked_p)
+     {
+       /* Generate return instruction by using 'return_internal' pattern.
+-         Make sure this instruction is after gen_blockage().  */
++	 Make sure this instruction is after gen_blockage().
++	 First we need to check this is a function without sibling call.  */
+       if (!sibcall_p)
+-	emit_jump_insn (gen_return_internal ());
++	{
++	  /* We need to further check attributes to determine whether
++	     there should be return instruction at epilogue.
++	     If the attribute naked exists but -mno-ret-in-naked-func
++	     is issued, there is NO need to generate return instruction.  */
++	  if (cfun->machine->attr_naked_p && !flag_ret_in_naked_func)
++	    return;
++
++	  emit_jump_insn (gen_return_internal ());
++	}
+       return;
+     }
+ 
+   /* Get callee_first_regno and callee_last_regno.  */
+-  Rb = gen_rtx_REG (SImode, cfun->machine->callee_saved_first_gpr_regno);
+-  Re = gen_rtx_REG (SImode, cfun->machine->callee_saved_last_gpr_regno);
++  Rb = cfun->machine->callee_saved_first_gpr_regno;
++  Re = cfun->machine->callee_saved_last_gpr_regno;
+ 
+   /* Calculate sp_adjust first to test if 'pop25 Re,imm8u' is available,
+      where imm8u has to be 8-byte alignment.  */
+   sp_adjust = cfun->machine->local_size
+ 	      + cfun->machine->out_args_size
+-	      + cfun->machine->callee_saved_area_gpr_padding_bytes;
++	      + cfun->machine->callee_saved_area_gpr_padding_bytes
++	      + cfun->machine->callee_saved_fpr_regs_size;
+ 
+   /* We have to consider alloca issue as well.
+      If the function does call alloca(), the stack pointer is not fixed.
+@@ -3338,38 +5864,65 @@ nds32_expand_epilogue_v3pop (bool sibcall_p)
+       && NDS32_DOUBLE_WORD_ALIGN_P (sp_adjust)
+       && !cfun->calls_alloca)
+     {
++      /* Restore fpu registers.  */
++      if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM)
++	{
++	  int fpr_position = cfun->machine->local_size
++			     + cfun->machine->out_args_size;
++	  /* Emit fpu load instruction, using [$sp + offset] restore
++	     fpu registers.  */
++	  nds32_emit_v3pop_fpr_callee_saved (fpr_position);
++	}
++
+       /* We can use 'pop25 Re,imm8u'.  */
+ 
+       /* nds32_emit_stack_v3pop(last_regno, sp_adjust),
+-         the pattern 'stack_v3pop' is implementad in nds32.md.
+-         The (const_int 14) means v3pop always pop { $fp $gp $lp }.  */
+-      nds32_emit_stack_v3pop (Rb, Re,
+-			      GEN_INT (14), GEN_INT (sp_adjust));
++	 the pattern 'stack_v3pop' is implementad in nds32.md.  */
++      nds32_emit_stack_v3pop (Rb, Re, sp_adjust);
+     }
+   else
+     {
+       /* We have to use 'pop25 Re,0', and prior to it,
+-         we must expand one more instruction to adjust $sp.  */
++	 we must expand one more instruction to adjust $sp.  */
+ 
+       if (frame_pointer_needed)
+ 	{
+ 	  /* adjust $sp = $fp - 4        ($fp size)
+-	                      - 4        ($gp size)
+-	                      - 4        ($lp size)
+-	                      - (4 * n)  (callee-saved registers)
++			      - 4        ($gp size)
++			      - 4        ($lp size)
++			      - (4 * n)  (callee-saved registers)
+ 	     Note: No need to adjust
+-	           cfun->machine->callee_saved_area_padding_bytes,
+-	           because we want to adjust stack pointer
+-	           to the position for pop instruction.  */
++		   cfun->machine->callee_saved_area_gpr_padding_bytes,
++		   because we want to adjust stack pointer
++		   to the position for pop instruction.  */
+ 	  sp_adjust = cfun->machine->fp_size
+ 		      + cfun->machine->gp_size
+ 		      + cfun->machine->lp_size
+ 		      + cfun->machine->callee_saved_gpr_regs_size;
+-	  sp_adjust_insn = gen_addsi3 (stack_pointer_rtx,
++
++	  /* Restore fpu registers.  */
++	  if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM)
++	    {
++	      /* Set $sp to callee saved fpr position, we need to restore
++		 fpr registers.  */
++	      sp_adjust = sp_adjust
++			  + cfun->machine->callee_saved_area_gpr_padding_bytes
++			  + cfun->machine->callee_saved_fpr_regs_size;
++
++	      nds32_emit_adjust_frame (stack_pointer_rtx,
++				       hard_frame_pointer_rtx,
++				       -1 * sp_adjust);
++
++	      /* Emit fpu load instruction, using [$sp + offset] restore
++		 fpu registers.  */
++	      nds32_emit_v3pop_fpr_callee_saved (0);
++	    }
++	  else
++	    {
++	      nds32_emit_adjust_frame (stack_pointer_rtx,
+ 				       hard_frame_pointer_rtx,
+-				       GEN_INT (-1 * sp_adjust));
+-	  /* Emit rtx into instructions list and receive INSN rtx form.  */
+-	  sp_adjust_insn = emit_insn (sp_adjust_insn);
++				       -1 * sp_adjust);
++	    }
+ 	}
+       else
+ 	{
+@@ -3381,33 +5934,57 @@ nds32_expand_epilogue_v3pop (bool sibcall_p)
+ 	     so we have to deal with it as well.  */
+ 
+ 	  /* Adjust $sp = $sp + local_size + out_args_size
+-			      + callee_saved_area_padding_bytes.  */
++			      + callee_saved_area_gpr_padding_bytes
++			      + callee_saved_fpr_regs_size.  */
+ 	  sp_adjust = cfun->machine->local_size
+ 		      + cfun->machine->out_args_size
+-		      + cfun->machine->callee_saved_area_gpr_padding_bytes;
+-	  /* sp_adjust value may be out of range of the addi instruction,
+-	     create alternative add behavior with TA_REGNUM if necessary,
+-	     using POSITIVE value to tell that we are increasing address.  */
+-	  sp_adjust = nds32_force_addi_stack_int (sp_adjust);
+-	  if (sp_adjust)
++		      + cfun->machine->callee_saved_area_gpr_padding_bytes
++		      + cfun->machine->callee_saved_fpr_regs_size;
++
++	  /* Restore fpu registers.  */
++	  if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM)
++	    {
++	      /* Set $sp to callee saved fpr position, we need to restore
++		 fpr registers.  */
++	      sp_adjust = sp_adjust
++			  - cfun->machine->callee_saved_area_gpr_padding_bytes
++			  - cfun->machine->callee_saved_fpr_regs_size;
++
++	      nds32_emit_adjust_frame (stack_pointer_rtx,
++				       stack_pointer_rtx,
++				       sp_adjust);
++
++	      /* Emit fpu load instruction, using [$sp + offset] restore
++		 fpu registers.  */
++	      nds32_emit_v3pop_fpr_callee_saved (0);
++	    }
++	  else
+ 	    {
+-	      /* Generate sp adjustment instruction
+-	         if and only if sp_adjust != 0.  */
+-	      sp_adjust_insn = gen_addsi3 (stack_pointer_rtx,
+-					   stack_pointer_rtx,
+-					   GEN_INT (sp_adjust));
+-	      /* Emit rtx into instructions list and receive INSN rtx form.  */
+-	      sp_adjust_insn = emit_insn (sp_adjust_insn);
++	       /* sp_adjust value may be out of range of the addi instruction,
++		  create alternative add behavior with TA_REGNUM if necessary,
++		  using POSITIVE value to tell that we are increasing
++		  address.  */
++	      nds32_emit_adjust_frame (stack_pointer_rtx,
++				       stack_pointer_rtx,
++				       sp_adjust);
+ 	    }
+ 	}
+ 
+-      /* nds32_emit_stack_v3pop(last_regno, sp_adjust),
+-         the pattern 'stack_v3pop' is implementad in nds32.md.  */
+-      /* The (const_int 14) means v3pop always pop { $fp $gp $lp }.  */
+-      nds32_emit_stack_v3pop (Rb, Re,
+-			      GEN_INT (14), GEN_INT (0));
++      if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM)
++	{
++	  /* We have fpr need to restore, so $sp is set on callee saved fpr
++	     position.  And we use 'pop25 Re, fpr_space' to adjust $sp.  */
++	  int fpr_space = cfun->machine->callee_saved_area_gpr_padding_bytes
++			  + cfun->machine->callee_saved_fpr_regs_size;
++	  nds32_emit_stack_v3pop (Rb, Re, fpr_space);
++	}
++      else
++	{
++	  /* nds32_emit_stack_v3pop(last_regno, sp_adjust),
++	     the pattern 'stack_v3pop' is implementad in nds32.md.  */
++	  nds32_emit_stack_v3pop (Rb, Re, 0);
++	}
+     }
+-
+   /* Generate return instruction.  */
+   emit_jump_insn (gen_pop25return ());
+ }
+@@ -3418,97 +5995,179 @@ nds32_expand_epilogue_v3pop (bool sibcall_p)
+ int
+ nds32_can_use_return_insn (void)
+ {
++  int sp_adjust;
++
+   /* Prior to reloading, we can't tell how many registers must be saved.
+      Thus we can not determine whether this function has null epilogue.  */
+   if (!reload_completed)
+     return 0;
+ 
++  /* If attribute 'naked' appears but -mno-ret-in-naked-func is used,
++     we cannot use return instruction.  */
++  if (cfun->machine->attr_naked_p && !flag_ret_in_naked_func)
++    return 0;
++
++  sp_adjust = cfun->machine->local_size
++	      + cfun->machine->out_args_size
++	      + cfun->machine->callee_saved_area_gpr_padding_bytes
++	      + cfun->machine->callee_saved_fpr_regs_size;
++  if (!cfun->machine->fp_as_gp_p
++      && satisfies_constraint_Iu08 (GEN_INT (sp_adjust))
++      && NDS32_DOUBLE_WORD_ALIGN_P (sp_adjust)
++      && !cfun->calls_alloca
++      && NDS32_V3PUSH_AVAILABLE_P
++      && !(TARGET_HARD_FLOAT
++	   && (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM)))
++    return 1;
++
+   /* If no stack was created, two conditions must be satisfied:
+      1. This is a naked function.
+-        So there is no callee-saved, local size, or outgoing size.
++	So there is no callee-saved, local size, or outgoing size.
+      2. This is NOT a variadic function.
+-        So there is no pushing arguement registers into the stack.  */
+-  return (cfun->machine->naked_p && (cfun->machine->va_args_size == 0));
++	So there is no pushing arguement registers into the stack.  */
++  return ((cfun->machine->naked_p && (cfun->machine->va_args_size == 0)));
+ }
+ 
+-/* ------------------------------------------------------------------------ */
+-
+-/* Function to test 333-form for load/store instructions.
+-   This is auxiliary extern function for auxiliary macro in nds32.h.
+-   Because it is a little complicated, we use function instead of macro.  */
+-bool
+-nds32_ls_333_p (rtx rt, rtx ra, rtx imm, machine_mode mode)
++enum machine_mode
++nds32_case_vector_shorten_mode (int min_offset, int max_offset,
++				rtx body ATTRIBUTE_UNUSED)
+ {
+-  if (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS
+-      && REGNO_REG_CLASS (REGNO (ra)) == LOW_REGS)
++  if (min_offset < 0 || max_offset >= 0x2000)
++    return SImode;
++  else
+     {
+-      if (GET_MODE_SIZE (mode) == 4)
+-	return satisfies_constraint_Iu05 (imm);
+-
+-      if (GET_MODE_SIZE (mode) == 2)
+-	return satisfies_constraint_Iu04 (imm);
+-
+-      if (GET_MODE_SIZE (mode) == 1)
+-	return satisfies_constraint_Iu03 (imm);
++      /* The jump table maybe need to 2 byte alignment,
++	 so reserved 1 byte for check max_offset.  */
++      if (max_offset >= 0xff)
++	return HImode;
++      else
++	return QImode;
+     }
++}
++
++static bool
++nds32_cannot_copy_insn_p (rtx_insn *insn)
++{
++  /* The hwloop_cfg insn cannot be copied.  */
++  if (recog_memoized (insn) == CODE_FOR_hwloop_cfg)
++    return true;
+ 
+   return false;
+ }
+ 
+-
+-/* Computing the Length of an Insn.
+-   Modifies the length assigned to instruction INSN.
+-   LEN is the initially computed length of the insn.  */
++/* Return alignment for the label.  */
+ int
+-nds32_adjust_insn_length (rtx_insn *insn, int length)
++nds32_target_alignment (rtx label)
+ {
+-  rtx src, dst;
++  rtx_insn *insn;
+ 
+-  switch (recog_memoized (insn))
++  if (!NDS32_ALIGN_P ())
++    return 0;
++
++  insn = next_active_insn (label);
++
++  /* Always align to 4 byte when first instruction after label is jump
++     instruction since length for that might changed, so let's always align
++     it for make sure we don't lose any perfomance here.  */
++  if (insn == 0
++      || (get_attr_length (insn) == 2
++	  && !JUMP_P (insn) && !CALL_P (insn)))
++    return 0;
++  else
++    return 2;
++}
++
++/* Return alignment for data.  */
++unsigned int
++nds32_data_alignment (tree data,
++		      unsigned int basic_align)
++{
++  if ((basic_align < BITS_PER_WORD)
++      && (TREE_CODE (data) == ARRAY_TYPE
++	 || TREE_CODE (data) == UNION_TYPE
++	 || TREE_CODE (data) == RECORD_TYPE))
++    return BITS_PER_WORD;
++  else
++    return basic_align;
++}
++
++/* Return alignment for constant value.  */
++unsigned int
++nds32_constant_alignment (tree constant,
++			  unsigned int basic_align)
++{
++  /* Make string literal and constant for constructor to word align.  */
++  if (((TREE_CODE (constant) == STRING_CST
++	|| TREE_CODE (constant) == CONSTRUCTOR
++	|| TREE_CODE (constant) == UNION_TYPE
++	|| TREE_CODE (constant) == RECORD_TYPE
++	|| TREE_CODE (constant) == ARRAY_TYPE)
++       && basic_align < BITS_PER_WORD))
++    return BITS_PER_WORD;
++  else
++    return basic_align;
++}
++
++/* Return alignment for local variable.  */
++unsigned int
++nds32_local_alignment (tree local ATTRIBUTE_UNUSED,
++		       unsigned int basic_align)
++{
++  bool at_least_align_to_word = false;
++  /* Make local array, struct and union at least align to word for make
++     sure it can unroll memcpy when initialize by constant.  */
++  switch (TREE_CODE (local))
+     {
+-    case CODE_FOR_move_df:
+-    case CODE_FOR_move_di:
+-      /* Adjust length of movd44 to 2.  */
+-      src = XEXP (PATTERN (insn), 1);
+-      dst = XEXP (PATTERN (insn), 0);
+-
+-      if (REG_P (src)
+-	  && REG_P (dst)
+-	  && (REGNO (src) % 2) == 0
+-	  && (REGNO (dst) % 2) == 0)
+-	length = 2;
++    case ARRAY_TYPE:
++    case RECORD_TYPE:
++    case UNION_TYPE:
++      at_least_align_to_word = true;
+       break;
+-
+     default:
++      at_least_align_to_word = false;
+       break;
+     }
+-
+-  return length;
++  if (at_least_align_to_word
++      && (basic_align < BITS_PER_WORD))
++    return BITS_PER_WORD;
++  else
++    return basic_align;
+ }
+ 
+-
+-/* Return align 2 (log base 2) if the next instruction of LABEL is 4 byte.  */
+-int
+-nds32_target_alignment (rtx label)
++bool
++nds32_split_double_word_load_store_p(rtx *operands, bool load_p)
+ {
+-  rtx_insn *insn;
++  rtx mem = load_p ? operands[1] : operands[0];
++  /* Do split at split2 if -O0 or schedule 2 not enable.  */
++  if (optimize == 0 || !flag_schedule_insns_after_reload)
++    return !satisfies_constraint_Da (mem) || MEM_VOLATILE_P (mem);
+ 
+-  if (optimize_size)
+-    return 0;
++  /* Split double word load store after copy propgation.  */
++  if (current_pass == NULL)
++    return false;
+ 
+-  insn = next_active_insn (label);
++  const char *pass_name = current_pass->name;
++  if (pass_name && ((strcmp (pass_name, "split4") == 0)
++		     || (strcmp (pass_name, "split5") == 0)))
++    return !satisfies_constraint_Da (mem) || MEM_VOLATILE_P (mem);
+ 
+-  if (insn == 0)
+-    return 0;
+-  else if ((get_attr_length (insn) % 4) == 0)
+-    return 2;
++  return false;
++}
++
++static bool
++nds32_use_blocks_for_constant_p (enum machine_mode mode,
++				 const_rtx x ATTRIBUTE_UNUSED)
++{
++  if ((TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE)
++      && (mode == DFmode || mode == SFmode))
++    return true;
+   else
+-    return 0;
++    return false;
+ }
+ 
+ /* ------------------------------------------------------------------------ */
+ 
+-/* PART 5: Initialize target hook structure and definitions.  */
++/* PART 6: Initialize target hook structure and definitions.  */
+ 
+ /* Controlling the Compilation Driver.  */
+ 
+@@ -3525,6 +6184,9 @@ nds32_target_alignment (rtx label)
+ #define TARGET_PROMOTE_FUNCTION_MODE \
+   default_promote_function_mode_always_promote
+ 
++#undef TARGET_EXPAND_TO_RTL_HOOK
++#define TARGET_EXPAND_TO_RTL_HOOK nds32_expand_to_rtl_hook
++
+ 
+ /* Layout of Source Language Data Types.  */
+ 
+@@ -3533,6 +6195,9 @@ nds32_target_alignment (rtx label)
+ 
+ /* -- Basic Characteristics of Registers.  */
+ 
++#undef TARGET_CONDITIONAL_REGISTER_USAGE
++#define TARGET_CONDITIONAL_REGISTER_USAGE nds32_conditional_register_usage
++
+ /* -- Order of Allocation of Registers.  */
+ 
+ /* -- How Values Fit in Registers.  */
+@@ -3544,6 +6209,9 @@ nds32_target_alignment (rtx label)
+ 
+ /* Register Classes.  */
+ 
++#undef TARGET_PREFERRED_RENAME_CLASS
++#define TARGET_PREFERRED_RENAME_CLASS nds32_preferred_rename_class
++
+ #undef TARGET_CLASS_MAX_NREGS
+ #define TARGET_CLASS_MAX_NREGS nds32_class_max_nregs
+ 
+@@ -3591,6 +6259,9 @@ nds32_target_alignment (rtx label)
+ #undef TARGET_FUNCTION_ARG_BOUNDARY
+ #define TARGET_FUNCTION_ARG_BOUNDARY nds32_function_arg_boundary
+ 
++#undef TARGET_VECTOR_MODE_SUPPORTED_P
++#define TARGET_VECTOR_MODE_SUPPORTED_P nds32_vector_mode_supported_p
++
+ /* -- How Scalar Function Values Are Returned.  */
+ 
+ #undef TARGET_FUNCTION_VALUE
+@@ -3604,6 +6275,9 @@ nds32_target_alignment (rtx label)
+ 
+ /* -- How Large Values Are Returned.  */
+ 
++#undef TARGET_RETURN_IN_MEMORY
++#define TARGET_RETURN_IN_MEMORY nds32_return_in_memory
++
+ /* -- Caller-Saves Register Allocation.  */
+ 
+ /* -- Function Entry and Exit.  */
+@@ -3630,6 +6304,9 @@ nds32_target_alignment (rtx label)
+ 
+ /* -- Permitting tail calls.  */
+ 
++#undef TARGET_FUNCTION_OK_FOR_SIBCALL
++#define TARGET_FUNCTION_OK_FOR_SIBCALL nds32_function_ok_for_sibcall
++
+ #undef TARGET_WARN_FUNC_RETURN
+ #define TARGET_WARN_FUNC_RETURN nds32_warn_func_return
+ 
+@@ -3662,6 +6339,21 @@ nds32_target_alignment (rtx label)
+ #undef TARGET_LEGITIMATE_ADDRESS_P
+ #define TARGET_LEGITIMATE_ADDRESS_P nds32_legitimate_address_p
+ 
++#undef TARGET_LEGITIMIZE_ADDRESS
++#define TARGET_LEGITIMIZE_ADDRESS nds32_legitimize_address
++
++#undef TARGET_LEGITIMATE_CONSTANT_P
++#define TARGET_LEGITIMATE_CONSTANT_P nds32_legitimate_constant_p
++
++#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
++#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE nds32_vectorize_preferred_simd_mode
++
++#undef TARGET_CANNOT_FORCE_CONST_MEM
++#define TARGET_CANNOT_FORCE_CONST_MEM nds32_cannot_force_const_mem
++
++#undef TARGET_DELEGITIMIZE_ADDRESS
++#define TARGET_DELEGITIMIZE_ADDRESS nds32_delegitimize_address
++
+ 
+ /* Anchored Addresses.  */
+ 
+@@ -3672,6 +6364,9 @@ nds32_target_alignment (rtx label)
+ 
+ /* -- Representation of condition codes using registers.  */
+ 
++#undef TARGET_CANONICALIZE_COMPARISON
++#define TARGET_CANONICALIZE_COMPARISON nds32_canonicalize_comparison
++
+ /* -- Macros to control conditional execution.  */
+ 
+ 
+@@ -3692,6 +6387,15 @@ nds32_target_alignment (rtx label)
+ 
+ /* Adjusting the Instruction Scheduler.  */
+ 
++#undef TARGET_SCHED_ISSUE_RATE
++#define TARGET_SCHED_ISSUE_RATE nds32_sched_issue_rate
++
++#undef  TARGET_SCHED_ADJUST_COST
++#define TARGET_SCHED_ADJUST_COST nds32_sched_adjust_cost
++
++#undef TARGET_SCHED_SET_SCHED_FLAGS
++#define TARGET_SCHED_SET_SCHED_FLAGS nds32_set_sched_flags
++
+ 
+ /* Dividing the Output into Sections (Texts, Data, . . . ).  */
+ 
+@@ -3719,6 +6423,9 @@ nds32_target_alignment (rtx label)
+ #undef TARGET_ASM_ALIGNED_SI_OP
+ #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
+ 
++#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
++#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA nds32_asm_output_addr_const_extra
++
+ /* -- Output of Uninitialized Variables.  */
+ 
+ /* -- Output and Generation of Labels.  */
+@@ -3741,6 +6448,9 @@ nds32_target_alignment (rtx label)
+ 
+ /* -- Assembler Commands for Exception Regions.  */
+ 
++#undef TARGET_DWARF_REGISTER_SPAN
++#define TARGET_DWARF_REGISTER_SPAN nds32_dwarf_register_span
++
+ /* -- Assembler Commands for Alignment.  */
+ 
+ 
+@@ -3756,6 +6466,11 @@ nds32_target_alignment (rtx label)
+ 
+ /* -- Macros for SDB and DWARF Output.  */
+ 
++/* Variable tracking should be run after all optimizations which
++   change order of insns.  It also needs a valid CFG.  */
++#undef TARGET_DELAY_VARTRACK
++#define TARGET_DELAY_VARTRACK true
++
+ /* -- Macros for VMS Debug Format.  */
+ 
+ 
+@@ -3785,6 +6500,9 @@ nds32_target_alignment (rtx label)
+ 
+ /* Emulating TLS.  */
+ 
++#undef TARGET_HAVE_TLS
++#define TARGET_HAVE_TLS TARGET_LINUX_ABI
++
+ 
+ /* Defining coprocessor specifics for MIPS targets.  */
+ 
+@@ -3800,12 +6518,43 @@ nds32_target_alignment (rtx label)
+ 
+ /* Miscellaneous Parameters.  */
+ 
++#undef TARGET_MD_ASM_ADJUST
++#define TARGET_MD_ASM_ADJUST nds32_md_asm_adjust
++
++#undef TARGET_MACHINE_DEPENDENT_REORG
++#define TARGET_MACHINE_DEPENDENT_REORG nds32_machine_dependent_reorg
++
+ #undef TARGET_INIT_BUILTINS
+ #define TARGET_INIT_BUILTINS nds32_init_builtins
+ 
++#undef  TARGET_BUILTIN_DECL
++#define TARGET_BUILTIN_DECL nds32_builtin_decl
++
+ #undef TARGET_EXPAND_BUILTIN
+ #define TARGET_EXPAND_BUILTIN nds32_expand_builtin
+ 
++#undef TARGET_HAVE_CONDITIONAL_EXECUTION
++#define TARGET_HAVE_CONDITIONAL_EXECUTION nds32_have_conditional_execution
++
++#undef TARGET_INIT_LIBFUNCS
++#define TARGET_INIT_LIBFUNCS nds32_init_libfuncs
++
++#undef TARGET_CAN_USE_DOLOOP_P
++#define TARGET_CAN_USE_DOLOOP_P nds32_can_use_doloop_p
++
++#undef TARGET_INVALID_WITHIN_DOLOOP
++#define TARGET_INVALID_WITHIN_DOLOOP nds32_invalid_within_doloop
++
++#undef  TARGET_CANNOT_COPY_INSN_P
++#define TARGET_CANNOT_COPY_INSN_P nds32_cannot_copy_insn_p
++
++#undef TARGET_MIN_ANCHOR_OFFSET
++#define TARGET_MIN_ANCHOR_OFFSET -((long long int) 1 << 14)
++#undef TARGET_MAX_ANCHOR_OFFSET
++#define TARGET_MAX_ANCHOR_OFFSET (((long long int) 1 << 14) - 1)
++#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
++#define TARGET_USE_BLOCKS_FOR_CONSTANT_P nds32_use_blocks_for_constant_p
++
+ 
+ /* ------------------------------------------------------------------------ */
+ 
+diff --git a/gcc/config/nds32/nds32.h b/gcc/config/nds32/nds32.h
+index eb4558c..a3e07cd 100644
+--- a/gcc/config/nds32/nds32.h
++++ b/gcc/config/nds32/nds32.h
+@@ -24,6 +24,9 @@
+ /* The following are auxiliary macros or structure declarations
+    that are used all over the nds32.c and nds32.h.  */
+ 
++#define ADJUST_INSN_LENGTH(INSN, LENGTH) \
++  (LENGTH = nds32_adjust_insn_length (INSN, LENGTH))
++
+ /* Use SYMBOL_FLAG_MACH_DEP to define our own symbol_ref flag.
+    It is used in nds32_encode_section_info() to store flag in symbol_ref
+    in case the symbol should be placed in .rodata section.
+@@ -33,68 +36,23 @@
+ #define NDS32_SYMBOL_REF_RODATA_P(x) \
+   ((SYMBOL_REF_FLAGS (x) & NDS32_SYMBOL_FLAG_RODATA) != 0)
+ 
+-/* Computing the Length of an Insn.  */
+-#define ADJUST_INSN_LENGTH(INSN, LENGTH) \
+-  (LENGTH = nds32_adjust_insn_length (INSN, LENGTH))
++enum nds32_relax_insn_type
++{
++  RELAX_ORI,
++  RELAX_PLT_ADD,
++  RELAX_TLS_ADD_or_LW,
++  RELAX_TLS_ADD_LW,
++  RELAX_TLS_LW_JRAL,
++  RELAX_DONE
++};
+ 
+-/* Check instruction LS-37-FP-implied form.
+-   Note: actually its immediate range is imm9u
+-         since it is used for lwi37/swi37 instructions.  */
+-#define NDS32_LS_37_FP_P(rt, ra, imm)       \
+-  (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS \
+-   && REGNO (ra) == FP_REGNUM               \
+-   && satisfies_constraint_Iu09 (imm))
+-
+-/* Check instruction LS-37-SP-implied form.
+-   Note: actually its immediate range is imm9u
+-         since it is used for lwi37/swi37 instructions.  */
+-#define NDS32_LS_37_SP_P(rt, ra, imm)       \
+-  (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS \
+-   && REGNO (ra) == SP_REGNUM               \
+-   && satisfies_constraint_Iu09 (imm))
+-
+-
+-/* Check load/store instruction form : Rt3, Ra3, imm3u.  */
+-#define NDS32_LS_333_P(rt, ra, imm, mode) nds32_ls_333_p (rt, ra, imm, mode)
+-
+-/* Check load/store instruction form : Rt4, Ra5, const_int_0.
+-   Note: no need to check ra because Ra5 means it covers all registers.  */
+-#define NDS32_LS_450_P(rt, ra, imm)                     \
+-  ((imm == const0_rtx)                                  \
+-   && (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS         \
+-       || REGNO_REG_CLASS (REGNO (rt)) == MIDDLE_REGS))
+-
+-/* Check instruction RRI-333-form.  */
+-#define NDS32_RRI_333_P(rt, ra, imm)           \
+-  (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS    \
+-   && REGNO_REG_CLASS (REGNO (ra)) == LOW_REGS \
+-   && satisfies_constraint_Iu03 (imm))
+-
+-/* Check instruction RI-45-form.  */
+-#define NDS32_RI_45_P(rt, ra, imm)                     \
+-  (REGNO (rt) == REGNO (ra)                            \
+-   && (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS        \
+-       || REGNO_REG_CLASS (REGNO (rt)) == MIDDLE_REGS) \
+-   && satisfies_constraint_Iu05 (imm))
+-
+-
+-/* Check instruction RR-33-form.  */
+-#define NDS32_RR_33_P(rt, ra)                   \
+-  (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS     \
+-   && REGNO_REG_CLASS (REGNO (ra)) == LOW_REGS)
+-
+-/* Check instruction RRR-333-form.  */
+-#define NDS32_RRR_333_P(rt, ra, rb)             \
+-  (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS     \
+-   && REGNO_REG_CLASS (REGNO (ra)) == LOW_REGS  \
+-   && REGNO_REG_CLASS (REGNO (rb)) == LOW_REGS)
+-
+-/* Check instruction RR-45-form.
+-   Note: no need to check rb because Rb5 means it covers all registers.  */
+-#define NDS32_RR_45_P(rt, ra, rb)               \
+-  (REGNO (rt) == REGNO (ra)                     \
+-   && (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS \
+-       || REGNO_REG_CLASS (REGNO (rt)) == MIDDLE_REGS))
++/* Classifies expand result for expand helper function.  */
++enum nds32_expand_result_type
++{
++  EXPAND_DONE,
++  EXPAND_FAIL,
++  EXPAND_CREATE_TEMPLATE
++};
+ 
+ /* Classifies address type to distinguish 16-bit/32-bit format.  */
+ enum nds32_16bit_address_type
+@@ -105,6 +63,10 @@ enum nds32_16bit_address_type
+   ADDRESS_LO_REG_IMM3U,
+   /* post_inc [lo_reg + imm3u]: 333 format address.  */
+   ADDRESS_POST_INC_LO_REG_IMM3U,
++  /* post_modify [lo_reg + imm3u]: 333 format address.  */
++  ADDRESS_POST_MODIFY_LO_REG_IMM3U,
++  /* [$r8 + imm7u]: r8 imply address.  */
++  ADDRESS_R8_IMM7U,
+   /* [$fp + imm7u]: fp imply address.  */
+   ADDRESS_FP_IMM7U,
+   /* [$sp + imm7u]: sp imply address.  */
+@@ -113,23 +75,67 @@ enum nds32_16bit_address_type
+   ADDRESS_NOT_16BIT_FORMAT
+ };
+ 
+-
+ /* ------------------------------------------------------------------------ */
+ 
+ /* Define maximum numbers of registers for passing arguments.  */
+ #define NDS32_MAX_GPR_REGS_FOR_ARGS 6
++#define NDS32_MAX_FPR_REGS_FOR_ARGS 6
+ 
+ /* Define the register number for first argument.  */
+ #define NDS32_GPR_ARG_FIRST_REGNUM 0
++#define NDS32_FPR_ARG_FIRST_REGNUM 34
+ 
+ /* Define the register number for return value.  */
+ #define NDS32_GPR_RET_FIRST_REGNUM 0
++#define NDS32_FPR_RET_FIRST_REGNUM 34
+ 
+ /* Define the first integer register number.  */
+ #define NDS32_FIRST_GPR_REGNUM 0
+ /* Define the last integer register number.  */
+ #define NDS32_LAST_GPR_REGNUM 31
+ 
++#define NDS32_FIRST_CALLEE_SAVE_GPR_REGNUM 6
++#define NDS32_LAST_CALLEE_SAVE_GPR_REGNUM \
++  (TARGET_REDUCED_REGS ? 10 : 14)
++
++/* Define the floating-point number of registers.  */
++#define NDS32_FLOAT_REGISTER_NUMBER                           \
++ (((nds32_fp_regnum == NDS32_CONFIG_FPU_0)              \
++   || (nds32_fp_regnum == NDS32_CONFIG_FPU_4)) ? 8      \
++  : ((nds32_fp_regnum == NDS32_CONFIG_FPU_1)            \
++    || (nds32_fp_regnum == NDS32_CONFIG_FPU_5)) ? 16    \
++  : ((nds32_fp_regnum == NDS32_CONFIG_FPU_2)            \
++    || (nds32_fp_regnum == NDS32_CONFIG_FPU_6)) ? 32    \
++  : ((nds32_fp_regnum == NDS32_CONFIG_FPU_3)            \
++    || (nds32_fp_regnum == NDS32_CONFIG_FPU_7)) ? 64    \
++  : 32)
++
++#define NDS32_EXT_FPU_DOT_E (nds32_fp_regnum >= 4)
++
++/* Define the first floating-point register number.  */
++#define NDS32_FIRST_FPR_REGNUM 34
++/* Define the last floating-point register number.  */
++#define NDS32_LAST_FPR_REGNUM \
++  (NDS32_FIRST_FPR_REGNUM + NDS32_FLOAT_REGISTER_NUMBER - 1)
++
++
++#define NDS32_IS_EXT_FPR_REGNUM(regno) \
++  (((regno) >= NDS32_FIRST_FPR_REGNUM + 32) \
++   && ((regno) < NDS32_FIRST_FPR_REGNUM + 64))
++
++#define NDS32_IS_FPR_REGNUM(regno) \
++  (((regno) >= NDS32_FIRST_FPR_REGNUM) \
++   && ((regno) <= NDS32_LAST_FPR_REGNUM))
++
++#define NDS32_FPR_REGNO_OK_FOR_SINGLE(regno) \
++  ((regno) <= NDS32_LAST_FPR_REGNUM)
++
++#define NDS32_FPR_REGNO_OK_FOR_DOUBLE(regno) \
++  ((((regno) - NDS32_FIRST_FPR_REGNUM) & 1) == 0)
++
++#define NDS32_IS_GPR_REGNUM(regno) \
++  (((regno) <= NDS32_LAST_GPR_REGNUM))
++
+ /* Define double word alignment bits.  */
+ #define NDS32_DOUBLE_WORD_ALIGNMENT 64
+ 
+@@ -138,6 +144,16 @@ enum nds32_16bit_address_type
+ #define NDS32_SINGLE_WORD_ALIGN_P(value) (((value) & 0x03) == 0)
+ #define NDS32_DOUBLE_WORD_ALIGN_P(value) (((value) & 0x07) == 0)
+ 
++/* Determine whether we would like to have code generation strictly aligned.
++   We set it strictly aligned when -malways-align is enabled.
++   Check gcc/common/config/nds32/nds32-common.c for the optimizations that
++   apply -malways-align.  */
++#define NDS32_ALIGN_P() (TARGET_ALWAYS_ALIGN)
++
++#define NDS32_HW_LOOP_P() (TARGET_HWLOOP && !TARGET_FORCE_NO_HWLOOP)
++
++#define NDS32_EXT_DSP_P() (TARGET_EXT_DSP && !TARGET_FORCE_NO_EXT_DSP)
++
+ /* Get alignment according to mode or type information.
+    When 'type' is nonnull, there is no need to look at 'mode'.  */
+ #define NDS32_MODE_TYPE_ALIGN(mode, type) \
+@@ -159,21 +175,28 @@ enum nds32_16bit_address_type
+ /* This macro is used to return the register number for passing argument.
+    We need to obey the following rules:
+      1. If it is required MORE THAN one register,
+-        we need to further check if it really needs to be
+-        aligned on double words.
+-          a) If double word alignment is necessary,
+-             the register number must be even value.
+-          b) Otherwise, the register number can be odd or even value.
++	we need to further check if it really needs to be
++	aligned on double words.
++	  a) If double word alignment is necessary,
++	     the register number must be even value.
++	  b) Otherwise, the register number can be odd or even value.
+      2. If it is required ONLY one register,
+-        the register number can be odd or even value.  */
+-#define NDS32_AVAILABLE_REGNUM_FOR_GPR_ARG(reg_offset, mode, type)  \
+-  ((NDS32_NEED_N_REGS_FOR_ARG (mode, type) > 1)                     \
+-   ? ((NDS32_MODE_TYPE_ALIGN (mode, type) > PARM_BOUNDARY)          \
+-      ? (((reg_offset) + NDS32_GPR_ARG_FIRST_REGNUM + 1) & ~1)      \
+-      : ((reg_offset) + NDS32_GPR_ARG_FIRST_REGNUM))                \
++	the register number can be odd or even value.  */
++#define NDS32_AVAILABLE_REGNUM_FOR_GPR_ARG(reg_offset, mode, type) \
++  ((NDS32_NEED_N_REGS_FOR_ARG (mode, type) > 1)                    \
++   ? ((NDS32_MODE_TYPE_ALIGN (mode, type) > PARM_BOUNDARY)         \
++      ? (((reg_offset) + NDS32_GPR_ARG_FIRST_REGNUM + 1) & ~1)     \
++      : ((reg_offset) + NDS32_GPR_ARG_FIRST_REGNUM))               \
+    : ((reg_offset) + NDS32_GPR_ARG_FIRST_REGNUM))
+ 
+-/* This macro is to check if there are still available registers
++#define NDS32_AVAILABLE_REGNUM_FOR_FPR_ARG(reg_offset, mode, type) \
++  ((NDS32_NEED_N_REGS_FOR_ARG (mode, type) > 1)                    \
++   ? ((NDS32_MODE_TYPE_ALIGN (mode, type) > PARM_BOUNDARY)         \
++      ? (((reg_offset) + NDS32_FPR_ARG_FIRST_REGNUM + 1) & ~1)     \
++      : ((reg_offset) + NDS32_FPR_ARG_FIRST_REGNUM))               \
++   : ((reg_offset) + NDS32_FPR_ARG_FIRST_REGNUM))
++
++/* These two macros are to check if there are still available registers
+    for passing argument, which must be entirely in registers.  */
+ #define NDS32_ARG_ENTIRE_IN_GPR_REG_P(reg_offset, mode, type)   \
+   ((NDS32_AVAILABLE_REGNUM_FOR_GPR_ARG (reg_offset, mode, type) \
+@@ -181,13 +204,23 @@ enum nds32_16bit_address_type
+    <= (NDS32_GPR_ARG_FIRST_REGNUM                               \
+        + NDS32_MAX_GPR_REGS_FOR_ARGS))
+ 
+-/* This macro is to check if there are still available registers
++#define NDS32_ARG_ENTIRE_IN_FPR_REG_P(reg_offset, mode, type)   \
++  ((NDS32_AVAILABLE_REGNUM_FOR_FPR_ARG (reg_offset, mode, type) \
++    + NDS32_NEED_N_REGS_FOR_ARG (mode, type))                   \
++   <= (NDS32_FPR_ARG_FIRST_REGNUM                               \
++       + NDS32_MAX_FPR_REGS_FOR_ARGS))
++
++/* These two macros are to check if there are still available registers
+    for passing argument, either entirely in registers or partially
+    in registers.  */
+ #define NDS32_ARG_PARTIAL_IN_GPR_REG_P(reg_offset, mode, type) \
+   (NDS32_AVAILABLE_REGNUM_FOR_GPR_ARG (reg_offset, mode, type) \
+    < NDS32_GPR_ARG_FIRST_REGNUM + NDS32_MAX_GPR_REGS_FOR_ARGS)
+ 
++#define NDS32_ARG_PARTIAL_IN_FPR_REG_P(reg_offset, mode, type) \
++  (NDS32_AVAILABLE_REGNUM_FOR_FPR_ARG (reg_offset, mode, type) \
++   < NDS32_FPR_ARG_FIRST_REGNUM + NDS32_MAX_FPR_REGS_FOR_ARGS)
++
+ /* This macro is to check if the register is required to be saved on stack.
+    If call_used_regs[regno] == 0, regno is the callee-saved register.
+    If df_regs_ever_live_p(regno) == true, it is used in the current function.
+@@ -196,6 +229,19 @@ enum nds32_16bit_address_type
+ #define NDS32_REQUIRED_CALLEE_SAVED_P(regno)                  \
+   ((!call_used_regs[regno]) && (df_regs_ever_live_p (regno)))
+ 
++/* This macro is to check if the push25/pop25 are available to be used
++   for code generation.  Because pop25 also performs return behavior,
++   the instructions may not be available for some cases.
++   If we want to use push25/pop25, all the following conditions must
++   be satisfied:
++     1. TARGET_V3PUSH is set.
++     2. Current function is not an ISR function.
++     3. Current function is not a variadic function.*/
++#define NDS32_V3PUSH_AVAILABLE_P  \
++  (TARGET_V3PUSH \
++   && !nds32_isr_function_p (current_function_decl) \
++   && (cfun->machine->va_args_size == 0))
++
+ /* ------------------------------------------------------------------------ */
+ 
+ /* A C structure for machine-specific, per-function data.
+@@ -222,6 +268,10 @@ struct GTY(()) machine_function
+      callee-saved registers.  */
+   int callee_saved_gpr_regs_size;
+ 
++  /* Number of bytes on the stack for saving floating-point
++     callee-saved registers.  */
++  int callee_saved_fpr_regs_size;
++
+   /* The padding bytes in callee-saved area may be required.  */
+   int callee_saved_area_gpr_padding_bytes;
+ 
+@@ -230,26 +280,57 @@ struct GTY(()) machine_function
+   /* The last required general purpose callee-saved register.  */
+   int callee_saved_last_gpr_regno;
+ 
++  /* The first required floating-point callee-saved register.  */
++  int callee_saved_first_fpr_regno;
++  /* The last required floating-point callee-saved register.  */
++  int callee_saved_last_fpr_regno;
++
+   /* The padding bytes in varargs area may be required.  */
+   int va_args_area_padding_bytes;
+-
+   /* The first required register that should be saved on stack for va_args.  */
+   int va_args_first_regno;
+   /* The last required register that should be saved on stack for va_args.  */
+   int va_args_last_regno;
+ 
++  /* Number of bytes on the stack for saving exception handling registers.  */
++  int eh_return_data_regs_size;
++  /* The first register of passing exception handling information.  */
++  int eh_return_data_first_regno;
++  /* The last register of passing exception handling information.  */
++  int eh_return_data_last_regno;
++
++  /* Indicate that whether this function
++     calls __builtin_eh_return.  */
++  int use_eh_return_p;
++
+   /* Indicate that whether this function needs
+      prologue/epilogue code generation.  */
+   int naked_p;
+   /* Indicate that whether this function
+      uses fp_as_gp optimization.  */
+   int fp_as_gp_p;
++  /* Indicate that whether this function is under strictly aligned
++     situation for legitimate address checking.  This flag informs
++     nds32_legitimate_address_p() how to treat offset alignment:
++       1. The IVOPT phase needs to detect available range for memory access,
++	  such as checking [base + 32767] ~ [base + (-32768)].
++	  For this case we do not want address to be strictly aligned.
++       2. The rtl lowering and optimization are close to target code.
++	  For this case we need address to be strictly aligned.  */
++  int strict_aligned_p;
++
++  /* Record two similar attributes status.  */
++  int attr_naked_p;
++  int attr_no_prologue_p;
++  /* Record hwloop group, use in reorg pass.  */
++  int hwloop_group_id;
+ };
+ 
+ /* A C structure that contains the arguments information.  */
+ typedef struct
+ {
+   unsigned int gpr_offset;
++  unsigned int fpr_offset;
+ } nds32_cumulative_args;
+ 
+ /* ------------------------------------------------------------------------ */
+@@ -288,7 +369,8 @@ enum nds32_isr_nested_type
+ {
+   NDS32_NESTED,
+   NDS32_NOT_NESTED,
+-  NDS32_NESTED_READY
++  NDS32_NESTED_READY,
++  NDS32_CRITICAL
+ };
+ 
+ /* Define structure to record isr information.
+@@ -316,6 +398,13 @@ struct nds32_isr_info
+      unless user specifies attribute to change it.  */
+   enum nds32_isr_nested_type nested_type;
+ 
++  /* Secure isr level.
++     Currently we have 0-3 security level.
++     It should be set to 0 by default.
++     For security processors, this is determined by secure
++     attribute or compiler options.  */
++  unsigned int security_level;
++
+   /* Total vectors.
+      The total vectors = interrupt + exception numbers + reset.
+      It should be set to 0 by default.
+@@ -340,19 +429,477 @@ enum nds32_builtins
+ {
+   NDS32_BUILTIN_ISYNC,
+   NDS32_BUILTIN_ISB,
++  NDS32_BUILTIN_DSB,
++  NDS32_BUILTIN_MSYNC_ALL,
++  NDS32_BUILTIN_MSYNC_STORE,
+   NDS32_BUILTIN_MFSR,
+   NDS32_BUILTIN_MFUSR,
+   NDS32_BUILTIN_MTSR,
++  NDS32_BUILTIN_MTSR_ISB,
++  NDS32_BUILTIN_MTSR_DSB,
+   NDS32_BUILTIN_MTUSR,
+   NDS32_BUILTIN_SETGIE_EN,
+-  NDS32_BUILTIN_SETGIE_DIS
++  NDS32_BUILTIN_SETGIE_DIS,
++  NDS32_BUILTIN_FMFCFG,
++  NDS32_BUILTIN_FMFCSR,
++  NDS32_BUILTIN_FMTCSR,
++  NDS32_BUILTIN_FCPYNSS,
++  NDS32_BUILTIN_FCPYSS,
++  NDS32_BUILTIN_FCPYNSD,
++  NDS32_BUILTIN_FCPYSD,
++  NDS32_BUILTIN_FABSS,
++  NDS32_BUILTIN_FABSD,
++  NDS32_BUILTIN_FSQRTS,
++  NDS32_BUILTIN_FSQRTD,
++  NDS32_BUILTIN_ABS,
++  NDS32_BUILTIN_AVE,
++  NDS32_BUILTIN_BCLR,
++  NDS32_BUILTIN_BSET,
++  NDS32_BUILTIN_BTGL,
++  NDS32_BUILTIN_BTST,
++  NDS32_BUILTIN_CLIP,
++  NDS32_BUILTIN_CLIPS,
++  NDS32_BUILTIN_CLZ,
++  NDS32_BUILTIN_CLO,
++  NDS32_BUILTIN_MAX,
++  NDS32_BUILTIN_MIN,
++  NDS32_BUILTIN_PBSAD,
++  NDS32_BUILTIN_PBSADA,
++  NDS32_BUILTIN_BSE,
++  NDS32_BUILTIN_BSP,
++  NDS32_BUILTIN_FFB,
++  NDS32_BUILTIN_FFMISM,
++  NDS32_BUILTIN_FLMISM,
++  NDS32_BUILTIN_KADDW,
++  NDS32_BUILTIN_KSUBW,
++  NDS32_BUILTIN_KADDH,
++  NDS32_BUILTIN_KSUBH,
++  NDS32_BUILTIN_KDMBB,
++  NDS32_BUILTIN_V_KDMBB,
++  NDS32_BUILTIN_KDMBT,
++  NDS32_BUILTIN_V_KDMBT,
++  NDS32_BUILTIN_KDMTB,
++  NDS32_BUILTIN_V_KDMTB,
++  NDS32_BUILTIN_KDMTT,
++  NDS32_BUILTIN_V_KDMTT,
++  NDS32_BUILTIN_KHMBB,
++  NDS32_BUILTIN_V_KHMBB,
++  NDS32_BUILTIN_KHMBT,
++  NDS32_BUILTIN_V_KHMBT,
++  NDS32_BUILTIN_KHMTB,
++  NDS32_BUILTIN_V_KHMTB,
++  NDS32_BUILTIN_KHMTT,
++  NDS32_BUILTIN_V_KHMTT,
++  NDS32_BUILTIN_KSLRAW,
++  NDS32_BUILTIN_KSLRAW_U,
++  NDS32_BUILTIN_RDOV,
++  NDS32_BUILTIN_CLROV,
++  NDS32_BUILTIN_ROTR,
++  NDS32_BUILTIN_SVA,
++  NDS32_BUILTIN_SVS,
++  NDS32_BUILTIN_WSBH,
++  NDS32_BUILTIN_JR_ITOFF,
++  NDS32_BUILTIN_JR_TOFF,
++  NDS32_BUILTIN_JRAL_ITON,
++  NDS32_BUILTIN_JRAL_TON,
++  NDS32_BUILTIN_RET_ITOFF,
++  NDS32_BUILTIN_RET_TOFF,
++  NDS32_BUILTIN_STANDBY_NO_WAKE_GRANT,
++  NDS32_BUILTIN_STANDBY_WAKE_GRANT,
++  NDS32_BUILTIN_STANDBY_WAKE_DONE,
++  NDS32_BUILTIN_TEQZ,
++  NDS32_BUILTIN_TNEZ,
++  NDS32_BUILTIN_TRAP,
++  NDS32_BUILTIN_SETEND_BIG,
++  NDS32_BUILTIN_SETEND_LITTLE,
++  NDS32_BUILTIN_SYSCALL,
++  NDS32_BUILTIN_BREAK,
++  NDS32_BUILTIN_NOP,
++  NDS32_BUILTIN_SCHE_BARRIER,
++  NDS32_BUILTIN_GET_CURRENT_SP,
++  NDS32_BUILTIN_SET_CURRENT_SP,
++  NDS32_BUILTIN_RETURN_ADDRESS,
++  NDS32_BUILTIN_LLW,
++  NDS32_BUILTIN_LWUP,
++  NDS32_BUILTIN_LBUP,
++  NDS32_BUILTIN_SCW,
++  NDS32_BUILTIN_SWUP,
++  NDS32_BUILTIN_SBUP,
++  NDS32_BUILTIN_CCTL_VA_LCK,
++  NDS32_BUILTIN_CCTL_IDX_WBINVAL,
++  NDS32_BUILTIN_CCTL_VA_WBINVAL_L1,
++  NDS32_BUILTIN_CCTL_VA_WBINVAL_LA,
++  NDS32_BUILTIN_CCTL_IDX_READ,
++  NDS32_BUILTIN_CCTL_IDX_WRITE,
++  NDS32_BUILTIN_CCTL_L1D_INVALALL,
++  NDS32_BUILTIN_CCTL_L1D_WBALL_ALVL,
++  NDS32_BUILTIN_CCTL_L1D_WBALL_ONE_LVL,
++  NDS32_BUILTIN_DPREF_QW,
++  NDS32_BUILTIN_DPREF_HW,
++  NDS32_BUILTIN_DPREF_W,
++  NDS32_BUILTIN_DPREF_DW,
++  NDS32_BUILTIN_TLBOP_TRD,
++  NDS32_BUILTIN_TLBOP_TWR,
++  NDS32_BUILTIN_TLBOP_RWR,
++  NDS32_BUILTIN_TLBOP_RWLK,
++  NDS32_BUILTIN_TLBOP_UNLK,
++  NDS32_BUILTIN_TLBOP_PB,
++  NDS32_BUILTIN_TLBOP_INV,
++  NDS32_BUILTIN_TLBOP_FLUA,
++  NDS32_BUILTIN_UALOAD_HW,
++  NDS32_BUILTIN_UALOAD_W,
++  NDS32_BUILTIN_UALOAD_DW,
++  NDS32_BUILTIN_UASTORE_HW,
++  NDS32_BUILTIN_UASTORE_W,
++  NDS32_BUILTIN_UASTORE_DW,
++  NDS32_BUILTIN_GIE_DIS,
++  NDS32_BUILTIN_GIE_EN,
++  NDS32_BUILTIN_ENABLE_INT,
++  NDS32_BUILTIN_DISABLE_INT,
++  NDS32_BUILTIN_SET_PENDING_SWINT,
++  NDS32_BUILTIN_CLR_PENDING_SWINT,
++  NDS32_BUILTIN_CLR_PENDING_HWINT,
++  NDS32_BUILTIN_GET_ALL_PENDING_INT,
++  NDS32_BUILTIN_GET_PENDING_INT,
++  NDS32_BUILTIN_SET_INT_PRIORITY,
++  NDS32_BUILTIN_GET_INT_PRIORITY,
++  NDS32_BUILTIN_SET_TRIG_LEVEL,
++  NDS32_BUILTIN_SET_TRIG_EDGE,
++  NDS32_BUILTIN_GET_TRIG_TYPE,
++  NDS32_BUILTIN_SIGNATURE_BEGIN,
++  NDS32_BUILTIN_SIGNATURE_END,
++  NDS32_BUILTIN_DSP_BEGIN,
++  NDS32_BUILTIN_ADD16,
++  NDS32_BUILTIN_V_UADD16,
++  NDS32_BUILTIN_V_SADD16,
++  NDS32_BUILTIN_RADD16,
++  NDS32_BUILTIN_V_RADD16,
++  NDS32_BUILTIN_URADD16,
++  NDS32_BUILTIN_V_URADD16,
++  NDS32_BUILTIN_KADD16,
++  NDS32_BUILTIN_V_KADD16,
++  NDS32_BUILTIN_UKADD16,
++  NDS32_BUILTIN_V_UKADD16,
++  NDS32_BUILTIN_SUB16,
++  NDS32_BUILTIN_V_USUB16,
++  NDS32_BUILTIN_V_SSUB16,
++  NDS32_BUILTIN_RSUB16,
++  NDS32_BUILTIN_V_RSUB16,
++  NDS32_BUILTIN_URSUB16,
++  NDS32_BUILTIN_V_URSUB16,
++  NDS32_BUILTIN_KSUB16,
++  NDS32_BUILTIN_V_KSUB16,
++  NDS32_BUILTIN_UKSUB16,
++  NDS32_BUILTIN_V_UKSUB16,
++  NDS32_BUILTIN_CRAS16,
++  NDS32_BUILTIN_V_UCRAS16,
++  NDS32_BUILTIN_V_SCRAS16,
++  NDS32_BUILTIN_RCRAS16,
++  NDS32_BUILTIN_V_RCRAS16,
++  NDS32_BUILTIN_URCRAS16,
++  NDS32_BUILTIN_V_URCRAS16,
++  NDS32_BUILTIN_KCRAS16,
++  NDS32_BUILTIN_V_KCRAS16,
++  NDS32_BUILTIN_UKCRAS16,
++  NDS32_BUILTIN_V_UKCRAS16,
++  NDS32_BUILTIN_CRSA16,
++  NDS32_BUILTIN_V_UCRSA16,
++  NDS32_BUILTIN_V_SCRSA16,
++  NDS32_BUILTIN_RCRSA16,
++  NDS32_BUILTIN_V_RCRSA16,
++  NDS32_BUILTIN_URCRSA16,
++  NDS32_BUILTIN_V_URCRSA16,
++  NDS32_BUILTIN_KCRSA16,
++  NDS32_BUILTIN_V_KCRSA16,
++  NDS32_BUILTIN_UKCRSA16,
++  NDS32_BUILTIN_V_UKCRSA16,
++  NDS32_BUILTIN_ADD8,
++  NDS32_BUILTIN_V_UADD8,
++  NDS32_BUILTIN_V_SADD8,
++  NDS32_BUILTIN_RADD8,
++  NDS32_BUILTIN_V_RADD8,
++  NDS32_BUILTIN_URADD8,
++  NDS32_BUILTIN_V_URADD8,
++  NDS32_BUILTIN_KADD8,
++  NDS32_BUILTIN_V_KADD8,
++  NDS32_BUILTIN_UKADD8,
++  NDS32_BUILTIN_V_UKADD8,
++  NDS32_BUILTIN_SUB8,
++  NDS32_BUILTIN_V_USUB8,
++  NDS32_BUILTIN_V_SSUB8,
++  NDS32_BUILTIN_RSUB8,
++  NDS32_BUILTIN_V_RSUB8,
++  NDS32_BUILTIN_URSUB8,
++  NDS32_BUILTIN_V_URSUB8,
++  NDS32_BUILTIN_KSUB8,
++  NDS32_BUILTIN_V_KSUB8,
++  NDS32_BUILTIN_UKSUB8,
++  NDS32_BUILTIN_V_UKSUB8,
++  NDS32_BUILTIN_SRA16,
++  NDS32_BUILTIN_V_SRA16,
++  NDS32_BUILTIN_SRA16_U,
++  NDS32_BUILTIN_V_SRA16_U,
++  NDS32_BUILTIN_SRL16,
++  NDS32_BUILTIN_V_SRL16,
++  NDS32_BUILTIN_SRL16_U,
++  NDS32_BUILTIN_V_SRL16_U,
++  NDS32_BUILTIN_SLL16,
++  NDS32_BUILTIN_V_SLL16,
++  NDS32_BUILTIN_KSLL16,
++  NDS32_BUILTIN_V_KSLL16,
++  NDS32_BUILTIN_KSLRA16,
++  NDS32_BUILTIN_V_KSLRA16,
++  NDS32_BUILTIN_KSLRA16_U,
++  NDS32_BUILTIN_V_KSLRA16_U,
++  NDS32_BUILTIN_CMPEQ16,
++  NDS32_BUILTIN_V_SCMPEQ16,
++  NDS32_BUILTIN_V_UCMPEQ16,
++  NDS32_BUILTIN_SCMPLT16,
++  NDS32_BUILTIN_V_SCMPLT16,
++  NDS32_BUILTIN_SCMPLE16,
++  NDS32_BUILTIN_V_SCMPLE16,
++  NDS32_BUILTIN_UCMPLT16,
++  NDS32_BUILTIN_V_UCMPLT16,
++  NDS32_BUILTIN_UCMPLE16,
++  NDS32_BUILTIN_V_UCMPLE16,
++  NDS32_BUILTIN_CMPEQ8,
++  NDS32_BUILTIN_V_SCMPEQ8,
++  NDS32_BUILTIN_V_UCMPEQ8,
++  NDS32_BUILTIN_SCMPLT8,
++  NDS32_BUILTIN_V_SCMPLT8,
++  NDS32_BUILTIN_SCMPLE8,
++  NDS32_BUILTIN_V_SCMPLE8,
++  NDS32_BUILTIN_UCMPLT8,
++  NDS32_BUILTIN_V_UCMPLT8,
++  NDS32_BUILTIN_UCMPLE8,
++  NDS32_BUILTIN_V_UCMPLE8,
++  NDS32_BUILTIN_SMIN16,
++  NDS32_BUILTIN_V_SMIN16,
++  NDS32_BUILTIN_UMIN16,
++  NDS32_BUILTIN_V_UMIN16,
++  NDS32_BUILTIN_SMAX16,
++  NDS32_BUILTIN_V_SMAX16,
++  NDS32_BUILTIN_UMAX16,
++  NDS32_BUILTIN_V_UMAX16,
++  NDS32_BUILTIN_SCLIP16,
++  NDS32_BUILTIN_V_SCLIP16,
++  NDS32_BUILTIN_UCLIP16,
++  NDS32_BUILTIN_V_UCLIP16,
++  NDS32_BUILTIN_KHM16,
++  NDS32_BUILTIN_V_KHM16,
++  NDS32_BUILTIN_KHMX16,
++  NDS32_BUILTIN_V_KHMX16,
++  NDS32_BUILTIN_KABS16,
++  NDS32_BUILTIN_V_KABS16,
++  NDS32_BUILTIN_SMIN8,
++  NDS32_BUILTIN_V_SMIN8,
++  NDS32_BUILTIN_UMIN8,
++  NDS32_BUILTIN_V_UMIN8,
++  NDS32_BUILTIN_SMAX8,
++  NDS32_BUILTIN_V_SMAX8,
++  NDS32_BUILTIN_UMAX8,
++  NDS32_BUILTIN_V_UMAX8,
++  NDS32_BUILTIN_KABS8,
++  NDS32_BUILTIN_V_KABS8,
++  NDS32_BUILTIN_SUNPKD810,
++  NDS32_BUILTIN_V_SUNPKD810,
++  NDS32_BUILTIN_SUNPKD820,
++  NDS32_BUILTIN_V_SUNPKD820,
++  NDS32_BUILTIN_SUNPKD830,
++  NDS32_BUILTIN_V_SUNPKD830,
++  NDS32_BUILTIN_SUNPKD831,
++  NDS32_BUILTIN_V_SUNPKD831,
++  NDS32_BUILTIN_ZUNPKD810,
++  NDS32_BUILTIN_V_ZUNPKD810,
++  NDS32_BUILTIN_ZUNPKD820,
++  NDS32_BUILTIN_V_ZUNPKD820,
++  NDS32_BUILTIN_ZUNPKD830,
++  NDS32_BUILTIN_V_ZUNPKD830,
++  NDS32_BUILTIN_ZUNPKD831,
++  NDS32_BUILTIN_V_ZUNPKD831,
++  NDS32_BUILTIN_RADDW,
++  NDS32_BUILTIN_URADDW,
++  NDS32_BUILTIN_RSUBW,
++  NDS32_BUILTIN_URSUBW,
++  NDS32_BUILTIN_SRA_U,
++  NDS32_BUILTIN_KSLL,
++  NDS32_BUILTIN_PKBB16,
++  NDS32_BUILTIN_V_PKBB16,
++  NDS32_BUILTIN_PKBT16,
++  NDS32_BUILTIN_V_PKBT16,
++  NDS32_BUILTIN_PKTB16,
++  NDS32_BUILTIN_V_PKTB16,
++  NDS32_BUILTIN_PKTT16,
++  NDS32_BUILTIN_V_PKTT16,
++  NDS32_BUILTIN_SMMUL,
++  NDS32_BUILTIN_SMMUL_U,
++  NDS32_BUILTIN_KMMAC,
++  NDS32_BUILTIN_KMMAC_U,
++  NDS32_BUILTIN_KMMSB,
++  NDS32_BUILTIN_KMMSB_U,
++  NDS32_BUILTIN_KWMMUL,
++  NDS32_BUILTIN_KWMMUL_U,
++  NDS32_BUILTIN_SMMWB,
++  NDS32_BUILTIN_V_SMMWB,
++  NDS32_BUILTIN_SMMWB_U,
++  NDS32_BUILTIN_V_SMMWB_U,
++  NDS32_BUILTIN_SMMWT,
++  NDS32_BUILTIN_V_SMMWT,
++  NDS32_BUILTIN_SMMWT_U,
++  NDS32_BUILTIN_V_SMMWT_U,
++  NDS32_BUILTIN_KMMAWB,
++  NDS32_BUILTIN_V_KMMAWB,
++  NDS32_BUILTIN_KMMAWB_U,
++  NDS32_BUILTIN_V_KMMAWB_U,
++  NDS32_BUILTIN_KMMAWT,
++  NDS32_BUILTIN_V_KMMAWT,
++  NDS32_BUILTIN_KMMAWT_U,
++  NDS32_BUILTIN_V_KMMAWT_U,
++  NDS32_BUILTIN_SMBB,
++  NDS32_BUILTIN_V_SMBB,
++  NDS32_BUILTIN_SMBT,
++  NDS32_BUILTIN_V_SMBT,
++  NDS32_BUILTIN_SMTT,
++  NDS32_BUILTIN_V_SMTT,
++  NDS32_BUILTIN_KMDA,
++  NDS32_BUILTIN_V_KMDA,
++  NDS32_BUILTIN_KMXDA,
++  NDS32_BUILTIN_V_KMXDA,
++  NDS32_BUILTIN_SMDS,
++  NDS32_BUILTIN_V_SMDS,
++  NDS32_BUILTIN_SMDRS,
++  NDS32_BUILTIN_V_SMDRS,
++  NDS32_BUILTIN_SMXDS,
++  NDS32_BUILTIN_V_SMXDS,
++  NDS32_BUILTIN_KMABB,
++  NDS32_BUILTIN_V_KMABB,
++  NDS32_BUILTIN_KMABT,
++  NDS32_BUILTIN_V_KMABT,
++  NDS32_BUILTIN_KMATT,
++  NDS32_BUILTIN_V_KMATT,
++  NDS32_BUILTIN_KMADA,
++  NDS32_BUILTIN_V_KMADA,
++  NDS32_BUILTIN_KMAXDA,
++  NDS32_BUILTIN_V_KMAXDA,
++  NDS32_BUILTIN_KMADS,
++  NDS32_BUILTIN_V_KMADS,
++  NDS32_BUILTIN_KMADRS,
++  NDS32_BUILTIN_V_KMADRS,
++  NDS32_BUILTIN_KMAXDS,
++  NDS32_BUILTIN_V_KMAXDS,
++  NDS32_BUILTIN_KMSDA,
++  NDS32_BUILTIN_V_KMSDA,
++  NDS32_BUILTIN_KMSXDA,
++  NDS32_BUILTIN_V_KMSXDA,
++  NDS32_BUILTIN_SMAL,
++  NDS32_BUILTIN_V_SMAL,
++  NDS32_BUILTIN_BITREV,
++  NDS32_BUILTIN_WEXT,
++  NDS32_BUILTIN_BPICK,
++  NDS32_BUILTIN_INSB,
++  NDS32_BUILTIN_SADD64,
++  NDS32_BUILTIN_UADD64,
++  NDS32_BUILTIN_RADD64,
++  NDS32_BUILTIN_URADD64,
++  NDS32_BUILTIN_KADD64,
++  NDS32_BUILTIN_UKADD64,
++  NDS32_BUILTIN_SSUB64,
++  NDS32_BUILTIN_USUB64,
++  NDS32_BUILTIN_RSUB64,
++  NDS32_BUILTIN_URSUB64,
++  NDS32_BUILTIN_KSUB64,
++  NDS32_BUILTIN_UKSUB64,
++  NDS32_BUILTIN_SMAR64,
++  NDS32_BUILTIN_SMSR64,
++  NDS32_BUILTIN_UMAR64,
++  NDS32_BUILTIN_UMSR64,
++  NDS32_BUILTIN_KMAR64,
++  NDS32_BUILTIN_KMSR64,
++  NDS32_BUILTIN_UKMAR64,
++  NDS32_BUILTIN_UKMSR64,
++  NDS32_BUILTIN_SMALBB,
++  NDS32_BUILTIN_V_SMALBB,
++  NDS32_BUILTIN_SMALBT,
++  NDS32_BUILTIN_V_SMALBT,
++  NDS32_BUILTIN_SMALTT,
++  NDS32_BUILTIN_V_SMALTT,
++  NDS32_BUILTIN_SMALDA,
++  NDS32_BUILTIN_V_SMALDA,
++  NDS32_BUILTIN_SMALXDA,
++  NDS32_BUILTIN_V_SMALXDA,
++  NDS32_BUILTIN_SMALDS,
++  NDS32_BUILTIN_V_SMALDS,
++  NDS32_BUILTIN_SMALDRS,
++  NDS32_BUILTIN_V_SMALDRS,
++  NDS32_BUILTIN_SMALXDS,
++  NDS32_BUILTIN_V_SMALXDS,
++  NDS32_BUILTIN_SMUL16,
++  NDS32_BUILTIN_V_SMUL16,
++  NDS32_BUILTIN_SMULX16,
++  NDS32_BUILTIN_V_SMULX16,
++  NDS32_BUILTIN_UMUL16,
++  NDS32_BUILTIN_V_UMUL16,
++  NDS32_BUILTIN_UMULX16,
++  NDS32_BUILTIN_V_UMULX16,
++  NDS32_BUILTIN_SMSLDA,
++  NDS32_BUILTIN_V_SMSLDA,
++  NDS32_BUILTIN_SMSLXDA,
++  NDS32_BUILTIN_V_SMSLXDA,
++  NDS32_BUILTIN_UCLIP32,
++  NDS32_BUILTIN_SCLIP32,
++  NDS32_BUILTIN_KABS,
++  NDS32_BUILTIN_UALOAD_U16,
++  NDS32_BUILTIN_UALOAD_S16,
++  NDS32_BUILTIN_UALOAD_U8,
++  NDS32_BUILTIN_UALOAD_S8,
++  NDS32_BUILTIN_UASTORE_U16,
++  NDS32_BUILTIN_UASTORE_S16,
++  NDS32_BUILTIN_UASTORE_U8,
++  NDS32_BUILTIN_UASTORE_S8,
++  NDS32_BUILTIN_DSP_END,
++  NDS32_BUILTIN_NO_HWLOOP,
++  NDS32_BUILTIN_UNALIGNED_FEATURE,
++  NDS32_BUILTIN_ENABLE_UNALIGNED,
++  NDS32_BUILTIN_DISABLE_UNALIGNED,
++  NDS32_BUILTIN_COUNT
+ };
+ 
+ /* ------------------------------------------------------------------------ */
+ 
+-#define TARGET_ISA_V2   (nds32_arch_option == ARCH_V2)
+-#define TARGET_ISA_V3   (nds32_arch_option == ARCH_V3)
+-#define TARGET_ISA_V3M  (nds32_arch_option == ARCH_V3M)
++#define TARGET_ISR_VECTOR_SIZE_4_BYTE \
++  (nds32_isr_vector_size == 4)
++
++#define TARGET_ISA_V2 \
++  (nds32_arch_option == ARCH_V2 || nds32_arch_option == ARCH_V2J)
++#define TARGET_ISA_V3 \
++  (nds32_arch_option == ARCH_V3 \
++   || nds32_arch_option == ARCH_V3J \
++   || nds32_arch_option == ARCH_V3F \
++   || nds32_arch_option == ARCH_V3S)
++#define TARGET_ISA_V3M \
++  (nds32_arch_option == ARCH_V3M || \
++   nds32_arch_option == ARCH_V3M_PLUS)
++
++#define TARGET_ISA_V3M_PLUS \
++  (nds32_arch_option == ARCH_V3M_PLUS)
++
++#define TARGET_PIPELINE_N7 \
++  (nds32_cpu_option == CPU_N7)
++#define TARGET_PIPELINE_N8 \
++  (nds32_cpu_option == CPU_N6 \
++   || nds32_cpu_option == CPU_N8)
++#define TARGET_PIPELINE_N9 \
++  (nds32_cpu_option == CPU_N9)
++#define TARGET_PIPELINE_N10 \
++  (nds32_cpu_option == CPU_N10)
++#define TARGET_PIPELINE_N13 \
++  (nds32_cpu_option == CPU_N12 || nds32_cpu_option == CPU_N13)
++#define TARGET_PIPELINE_GRAYWOLF \
++  (nds32_cpu_option == CPU_GRAYWOLF)
++#define TARGET_PIPELINE_PANTHER \
++  (nds32_cpu_option == CPU_PANTHER)
++#define TARGET_PIPELINE_SIMPLE \
++  (nds32_cpu_option == CPU_SIMPLE)
+ 
+ #define TARGET_CMODEL_SMALL \
+    (nds32_cmodel_option == CMODEL_SMALL)
+@@ -361,55 +908,153 @@ enum nds32_builtins
+ #define TARGET_CMODEL_LARGE \
+    (nds32_cmodel_option == CMODEL_LARGE)
+ 
++#define TARGET_ICT_MODEL_SMALL \
++   (nds32_ict_model == ICT_MODEL_SMALL)
++
++#define TARGET_ICT_MODEL_LARGE \
++   (nds32_ict_model == ICT_MODEL_LARGE)
++
+ /* When -mcmodel=small or -mcmodel=medium,
+    compiler may generate gp-base instruction directly.  */
+ #define TARGET_GP_DIRECT \
+    (nds32_cmodel_option == CMODEL_SMALL\
+     || nds32_cmodel_option == CMODEL_MEDIUM)
+ 
+-#define TARGET_SOFT_FLOAT 1
+-#define TARGET_HARD_FLOAT 0
++/* There are three kinds of mul configurations:
++   1-cycle fast mul, 2-cycle fast mul, and slow mul operation.  */
++#define TARGET_MUL_FAST_1 \
++  (nds32_mul_config == MUL_TYPE_FAST_1)
++#define TARGET_MUL_FAST_2 \
++  (nds32_mul_config == MUL_TYPE_FAST_2)
++#define TARGET_MUL_SLOW \
++  (nds32_mul_config == MUL_TYPE_SLOW)
++
++/* Run-time Target Specification.  */
++#define TARGET_SOFT_FLOAT (nds32_abi == NDS32_ABI_V2)
++/* Use hardware floating point calling convention.  */
++#define TARGET_HARD_FLOAT (nds32_abi == NDS32_ABI_V2_FP_PLUS)
++
++/* Record arch version in TARGET_ARCH_DEFAULT. 0 means soft ABI,
++   1 means  hard ABI and using full floating-point instruction,
++   2 means hard ABI and only using single-precision floating-point
++   instruction  */
++#if TARGET_ARCH_DEFAULT == 1
++#  define TARGET_DEFAULT_ABI NDS32_ABI_V2_FP_PLUS
++#  define TARGET_DEFAULT_FPU_ISA MASK_FPU_DOUBLE | MASK_FPU_SINGLE
++#  define TARGET_DEFAULT_FPU_FMA 0
++#else
++#  if TARGET_ARCH_DEFAULT == 2
++#    define TARGET_DEFAULT_ABI NDS32_ABI_V2_FP_PLUS
++#    define TARGET_DEFAULT_FPU_ISA MASK_FPU_SINGLE
++#    define TARGET_DEFAULT_FPU_FMA 0
++#  else
++#    define TARGET_DEFAULT_ABI NDS32_ABI_V2
++#    define TARGET_DEFAULT_FPU_ISA 0
++#    define TARGET_DEFAULT_FPU_FMA 0
++#  endif
++#endif
++
++#define TARGET_CONFIG_FPU_DEFAULT NDS32_CONFIG_FPU_2
++
++#define TARGET_LMWSMW_OPT_AUTO \
++   (flag_lmwsmw_cost == LMWSMW_OPT_AUTO)
++
++#define TARGET_LMWSMW_OPT_SIZE \
++   (flag_lmwsmw_cost == LMWSMW_OPT_SIZE)
++
++#define TARGET_LMWSMW_OPT_SPEED \
++   (flag_lmwsmw_cost == LMWSMW_OPT_SPEED)
++
++#define TARGET_LMWSMW_OPT_ALL \
++   (flag_lmwsmw_cost == LMWSMW_OPT_ALL)
++
++/* ------------------------------------------------------------------------ */
++
++#ifdef TARGET_DEFAULT_RELAX
++#  define NDS32_RELAX_SPEC " %{!mno-relax:--relax}"
++#else
++#  define NDS32_RELAX_SPEC " %{mrelax:--relax}"
++#endif
++
++#ifdef TARGET_OS_DEFAULT_IFC
++#  define NDS32_IFC_SPEC " %{Os3|Os|mifc:%{!mno-ifc:--mifc}}"
++#else
++#  define NDS32_IFC_SPEC " %{mifc:--mifc}"
++#endif
++#define NDS32_IFC_V3M_PLUS_SPEC " %{march=v3m+:%{Os3|Os|mifc:%{!mno-ifc:-mifc}}}"
++
++#ifdef TARGET_OS_DEFAULT_EX9
++#  define NDS32_EX9_SPEC " %{Os3|Os|mex9:%{!mno-ex9:--mex9}}"
++#else
++#  define NDS32_EX9_SPEC " %{mex9:--mex9}"
++#endif
++#define NDS32_EX9_V3M_PLUS_SPEC " %{march=v3m+:%{Os3|Os|mex9:%{!mno-ex9:-mex9}}}"
++
++#ifdef TARGET_DEFAULT_EXT_DSP
++#  define NDS32_EXT_DSP_SPEC " %{!mno-ext-dsp:-mext-dsp}"
++#else
++#  define NDS32_EXT_DSP_SPEC ""
++#endif
++
++#ifdef TARGET_DEFAULT_HWLOOP
++#  define NDS32_HWLOOP_SPEC " %{!mno-ext-zol:-mext-zol}"
++#else
++#  define NDS32_HWLOOP_SPEC ""
++#endif
++
++#ifdef TARGET_DEFAULT_16BIT
++#  define NDS32_16BIT_SPEC " %{!mno-16-bit:%{!mno-16bit:-m16bit}}"
++#else
++#  define NDS32_16BIT_SPEC " %{!m16-bit:%{!m16bit:-mno-16bit}}"
++#endif
+ 
+ /* ------------------------------------------------------------------------ */
+ 
+ /* Controlling the Compilation Driver.  */
+ 
++#define DRIVER_SELF_SPECS \
++  " %{mno-16bit|mno-16-bit:-mno-ifc -mno-ex9}" \
++  NDS32_IFC_V3M_PLUS_SPEC \
++  NDS32_EX9_V3M_PLUS_SPEC \
++  NDS32_16BIT_SPEC
++
+ #define OPTION_DEFAULT_SPECS \
+-  {"arch", "%{!march=*:-march=%(VALUE)}" }
++  {"arch", " %{!march=*:-march=%(VALUE)}" \
++	   " %{march=v3f:%{!mfloat-abi=*:-mfloat-abi=hard}" \
++	   " %{!mno-ext-fpu-sp:%{!mext-fpu-sp:-mext-fpu-sp}}" \
++	   " %{!mno-ext-fpu-dp:%{!mext-fpu-dp:-mext-fpu-dp}}}" \
++	   " %{march=v3s:%{!mfloat-abi=*:-mfloat-abi=hard}" \
++	   " %{!mno-ext-fpu-sp:%{!mext-fpu-sp:-mext-fpu-sp}}}" }, \
++  {"cpu",  "%{!mcpu=*:-mcpu=%(VALUE)}" },   \
++  {"memory_model", "%{!mmemory-model=*:-mmemory-model=%(VALUE)}"}, \
++  {"float", "%{!mfloat-abi=*:-mfloat-abi=%(VALUE)}" }
+ 
+ #define CC1_SPEC \
+-  ""
++  " %{Os1:-Os -mno-ifc -mno-ex9;" \
++     "Os2:-Os -minnermost-loop;" \
++     "Os3:-Os}" \
++  " %{ffast-math:%{!mno-soft-fp-arith-comm:-msoft-fp-arith-comm}}" \
++  NDS32_EXT_DSP_SPEC \
++  NDS32_HWLOOP_SPEC
+ 
+ #define ASM_SPEC \
+-  " %{mbig-endian:-EB} %{mlittle-endian:-EL}"
+-
+-/* If user issues -mrelax, we need to pass '--relax' to linker.  */
+-#define LINK_SPEC \
+   " %{mbig-endian:-EB} %{mlittle-endian:-EL}" \
+-  " %{mrelax:--relax}"
+-
+-#define LIB_SPEC \
+-  " -lc -lgloss"
+-
+-/* The option -mno-ctor-dtor can disable constructor/destructor feature
+-   by applying different crt stuff.  In the convention, crt0.o is the
+-   startup file without constructor/destructor;
+-   crt1.o, crti.o, crtbegin.o, crtend.o, and crtn.o are the
+-   startup files with constructor/destructor.
+-   Note that crt0.o, crt1.o, crti.o, and crtn.o are provided
+-   by newlib/mculib/glibc/ublic, while crtbegin.o and crtend.o are
+-   currently provided by GCC for nds32 target.
+-
+-   For nds32 target so far:
+-   If -mno-ctor-dtor, we are going to link
+-   "crt0.o [user objects]".
+-   If general cases, we are going to link
+-   "crt1.o crtbegin1.o [user objects] crtend1.o".  */
+-#define STARTFILE_SPEC \
+-  " %{!mno-ctor-dtor:crt1.o%s;:crt0.o%s}" \
+-  " %{!mno-ctor-dtor:crtbegin1.o%s}"
+-#define ENDFILE_SPEC \
+-  " %{!mno-ctor-dtor:crtend1.o%s}"
++  " %{march=*:-march=%*}" \
++  " %{mno-16-bit|mno-16bit:-mno-16bit-ext}" \
++  " %{march=v3m:%{!mfull-regs:%{!mreduced-regs:-mreduced-regs}}}" \
++  " %{mfull-regs:-mno-reduced-regs}" \
++  " %{mreduced-regs:-mreduced-regs}" \
++  " %{mabi=*:-mabi=v%*}" \
++  " %{mconfig-fpu=*:-mfpu-freg=%*}" \
++  " %{mext-fpu-mac:-mmac}" \
++  " %{mno-ext-fpu-mac:-mno-mac}" \
++  " %{mext-fpu-sp:-mfpu-sp-ext}" \
++  " %{mno-ext-fpu-sp:-mno-fpu-sp-ext}" \
++  " %{mext-fpu-dp:-mfpu-dp-ext}" \
++  " %{mno-ext-fpu-sp:-mno-fpu-dp-ext}" \
++  " %{mext-dsp:-mdsp-ext}" \
++  " %{mext-zol:-mzol-ext}" \
++  " %{O|O1|O2|O3|Ofast:-O1;:-Os}"
+ 
+ /* The TARGET_BIG_ENDIAN_DEFAULT is defined if we
+    configure gcc with --target=nds32be-* setting.
+@@ -422,7 +1067,11 @@ enum nds32_builtins
+ 
+ /* Currently we only have elf toolchain,
+    where -mcmodel=medium is always the default.  */
+-#define NDS32_CMODEL_DEFAULT "mcmodel=medium"
++#if TARGET_ELF
++#  define NDS32_CMODEL_DEFAULT "mcmodel=medium"
++#else
++#  define NDS32_CMODEL_DEFAULT "mcmodel=medium"
++#endif
+ 
+ #define MULTILIB_DEFAULTS \
+   { NDS32_ENDIAN_DEFAULT, NDS32_CMODEL_DEFAULT }
+@@ -430,34 +1079,8 @@ enum nds32_builtins
+ 
+ /* Run-time Target Specification.  */
+ 
+-#define TARGET_CPU_CPP_BUILTINS()                     \
+-  do                                                  \
+-    {                                                 \
+-      builtin_define ("__nds32__");                   \
+-                                                      \
+-      if (TARGET_ISA_V2)                              \
+-        builtin_define ("__NDS32_ISA_V2__");          \
+-      if (TARGET_ISA_V3)                              \
+-        builtin_define ("__NDS32_ISA_V3__");          \
+-      if (TARGET_ISA_V3M)                             \
+-        builtin_define ("__NDS32_ISA_V3M__");         \
+-                                                      \
+-      if (TARGET_BIG_ENDIAN)                          \
+-        builtin_define ("__big_endian__");            \
+-      if (TARGET_REDUCED_REGS)                        \
+-        builtin_define ("__NDS32_REDUCED_REGS__");    \
+-      if (TARGET_CMOV)                                \
+-        builtin_define ("__NDS32_CMOV__");            \
+-      if (TARGET_PERF_EXT)                            \
+-        builtin_define ("__NDS32_PERF_EXT__");        \
+-      if (TARGET_16_BIT)                              \
+-        builtin_define ("__NDS32_16_BIT__");          \
+-      if (TARGET_GP_DIRECT)                           \
+-        builtin_define ("__NDS32_GP_DIRECT__");       \
+-                                                      \
+-      builtin_assert ("cpu=nds32");                   \
+-      builtin_assert ("machine=nds32");               \
+-    } while (0)
++#define TARGET_CPU_CPP_BUILTINS() \
++  nds32_cpu_cpp_builtins (pfile)
+ 
+ 
+ /* Defining Data Structures for Per-function Information.  */
+@@ -487,10 +1110,20 @@ enum nds32_builtins
+ 
+ #define STACK_BOUNDARY 64
+ 
+-#define FUNCTION_BOUNDARY 32
++#define FUNCTION_BOUNDARY \
++  ((NDS32_ALIGN_P () || TARGET_ALIGN_FUNCTION) ? (TARGET_PIPELINE_PANTHER ? 64 : 32) : 16)
+ 
+ #define BIGGEST_ALIGNMENT 64
+ 
++#define DATA_ALIGNMENT(constant, basic_align) \
++  nds32_data_alignment (constant, basic_align)
++
++#define CONSTANT_ALIGNMENT(constant, basic_align) \
++  nds32_constant_alignment (constant, basic_align)
++
++#define LOCAL_ALIGNMENT(type, basic_align) \
++  nds32_local_alignment (type, basic_align)
++
+ #define EMPTY_FIELD_BOUNDARY 32
+ 
+ #define STRUCTURE_SIZE_BOUNDARY 8
+@@ -515,8 +1148,8 @@ enum nds32_builtins
+ 
+ #define SIZE_TYPE "long unsigned int"
+ #define PTRDIFF_TYPE "long int"
+-#define WCHAR_TYPE "short unsigned int"
+-#define WCHAR_TYPE_SIZE 16
++#define WCHAR_TYPE "unsigned int"
++#define WCHAR_TYPE_SIZE 32
+ 
+ 
+ /* Register Usage.  */
+@@ -526,7 +1159,7 @@ enum nds32_builtins
+    from 0 to just below FIRST_PSEUDO_REGISTER.
+    All registers that the compiler knows about must be given numbers,
+    even those that are not normally considered general registers.  */
+-#define FIRST_PSEUDO_REGISTER 34
++#define FIRST_PSEUDO_REGISTER 101
+ 
+ /* An initializer that says which registers are used for fixed
+    purposes all throughout the compiled code and are therefore
+@@ -537,24 +1170,38 @@ enum nds32_builtins
+    $r30 : $lp
+    $r31 : $sp
+ 
+-   caller-save registers: $r0 ~ $r5, $r16 ~ $r23
+-   callee-save registers: $r6 ~ $r10, $r11 ~ $r14
++   caller-save registers: $r0 ~ $r5, $r16 ~ $r23, $fs0 ~ $fs5, $fs22 ~ $fs47
++   callee-save registers: $r6 ~ $r10, $r11 ~ $r14, $fs6 ~ $fs21, $fs48 ~ $fs63
+ 
+    reserved for assembler : $r15
+    reserved for other use : $r24, $r25, $r26, $r27 */
+-#define FIXED_REGISTERS                 \
+-{ /* r0  r1  r2  r3  r4  r5  r6  r7  */ \
+-      0,  0,  0,  0,  0,  0,  0,  0,    \
+-  /* r8  r9  r10 r11 r12 r13 r14 r15 */ \
+-      0,  0,  0,  0,  0,  0,  0,  1,    \
+-  /* r16 r17 r18 r19 r20 r21 r22 r23 */ \
+-      0,  0,  0,  0,  0,  0,  0,  0,    \
+-  /* r24 r25 r26 r27 r28 r29 r30 r31 */ \
+-      1,  1,  1,  1,  0,  1,  0,  1,    \
+-  /* ARG_POINTER:32 */                  \
+-      1,                                \
+-  /* FRAME_POINTER:33 */                \
+-      1                                 \
++#define FIXED_REGISTERS \
++{ /* r0   r1   r2   r3   r4   r5   r6   r7   */ \
++      0,   0,   0,   0,   0,   0,   0,   0,     \
++  /* r8   r9   r10  r11  r12  r13  r14  r15  */ \
++      0,   0,   0,   0,   0,   0,   0,   0,     \
++  /* r16  r17  r18  r19  r20  r21  r22  r23  */ \
++      0,   0,   0,   0,   0,   0,   0,   0,     \
++  /* r24  r25  r26  r27  r28  r29  r30  r31  */ \
++      0,   0,   1,   1,   0,   1,   0,   1,     \
++  /* AP   FP   fs0  fs1  fs2  fs3  fs4  fs5  */ \
++      1,   1,   1,   1,   1,   1,   1,   1,     \
++  /* fs6  fs7  fs8  fs9  fs10 fs11 fs12 fs13 */ \
++      1,   1,   1,   1,   1,   1,   1,   1,     \
++  /* fs14 fs15 fs16 fs17 fs18 fs19 fs20 fs21 */ \
++      1,   1,   1,   1,   1,   1,   1,   1,     \
++  /* fs22 fs23 fs24 fs25 fs26 fs27 fs28 fs29 */ \
++      1,   1,   1,   1,   1,   1,   1,   1,     \
++  /* fs30 fs31 fd16      fd17      fd18      */ \
++      1,   1,   1,   1,   1,   1,   1,   1,     \
++  /* fd19      fd20      fd21      fd22      */ \
++      1,   1,   1,   1,   1,   1,   1,   1,     \
++  /* fd23      fd24      fd25      fd26      */ \
++      1,   1,   1,   1,   1,   1,   1,   1,     \
++  /* fd27      fd28      fd29      fd30      */ \
++      1,   1,   1,   1,   1,   1,   1,   1,     \
++  /* fd31      LB   LE   LC                  */ \
++      1,   1,   1,   1,   1                     \
+ }
+ 
+ /* Identifies the registers that are not available for
+@@ -563,35 +1210,59 @@ enum nds32_builtins
+ 
+    0 : callee-save registers
+    1 : caller-save registers */
+-#define CALL_USED_REGISTERS             \
+-{ /* r0  r1  r2  r3  r4  r5  r6  r7  */ \
+-      1,  1,  1,  1,  1,  1,  0,  0,    \
+-  /* r8  r9  r10 r11 r12 r13 r14 r15 */ \
+-      0,  0,  0,  0,  0,  0,  0,  1,    \
+-  /* r16 r17 r18 r19 r20 r21 r22 r23 */ \
+-      1,  1,  1,  1,  1,  1,  1,  1,    \
+-  /* r24 r25 r26 r27 r28 r29 r30 r31 */ \
+-      1,  1,  1,  1,  0,  1,  0,  1,    \
+-  /* ARG_POINTER:32 */                  \
+-      1,                                \
+-  /* FRAME_POINTER:33 */                \
+-      1                                 \
++#define CALL_USED_REGISTERS \
++{ /* r0   r1   r2   r3   r4   r5   r6   r7   */ \
++      1,   1,   1,   1,   1,   1,   0,   0,     \
++  /* r8   r9   r10  r11  r12  r13  r14  r15  */ \
++      0,   0,   0,   0,   0,   0,   0,   1,     \
++  /* r16  r17  r18  r19  r20  r21  r22  r23  */ \
++      1,   1,   1,   1,   1,   1,   1,   1,     \
++  /* r24  r25  r26  r27  r28  r29  r30  r31  */ \
++      1,   1,   1,   1,   0,   1,   0,   1,     \
++  /* AP   FP   fs0  fs1  fs2  fs3  fs4  fs5  */ \
++      1,   1,   1,   1,   1,   1,   1,   1,     \
++  /* fs6  fs7  fs8  fs9  fs10 fs11 fs12 fs13 */ \
++      1,   1,   1,   1,   1,   1,   1,   1,     \
++  /* fs14 fs15 fs16 fs17 fs18 fs19 fs20 fs21 */ \
++      1,   1,   1,   1,   1,   1,   1,   1,     \
++  /* fs22 fs23 fs24 fs25 fs26 fs27 fs28 fs29 */ \
++      1,   1,   1,   1,   1,   1,   1,   1,     \
++  /* fs30 fs31 fd16      fd17      fd18      */ \
++      1,   1,   1,   1,   1,   1,   1,   1,     \
++  /* fd19      fd20      fd21      fd22      */ \
++      1,   1,   1,   1,   1,   1,   1,   1,     \
++  /* fd23      fd24      fd25      fd26      */ \
++      1,   1,   1,   1,   1,   1,   1,   1,     \
++  /* fd27      fd28      fd29      fd30      */ \
++      1,   1,   1,   1,   1,   1,   1,   1,     \
++  /* fd31      LB   LE   LC                  */ \
++      1,   1,   1,   1,   1                     \
+ }
+ 
+ /* In nds32 target, we have three levels of registers:
+      LOW_COST_REGS    : $r0 ~ $r7
+      MIDDLE_COST_REGS : $r8 ~ $r11, $r16 ~ $r19
+      HIGH_COST_REGS   : $r12 ~ $r14, $r20 ~ $r31 */
+-#define REG_ALLOC_ORDER           \
+-{                                 \
+-   0,  1,  2,  3,  4,  5,  6,  7, \
+-   8,  9, 10, 11, 16, 17, 18, 19, \
+-  12, 13, 14, 15, 20, 21, 22, 23, \
+-  24, 25, 26, 27, 28, 29, 30, 31, \
+-  32,                             \
+-  33                              \
++#define REG_ALLOC_ORDER \
++{   0,   1,   2,   3,   4,   5,   6,   7, \
++   16,  17,  18,  19,   9,  10,  11,  12, \
++   13,  14,  8,   15,  20,  21,  22,  23, \
++   24,  25,  26,  27,  28,  29,  30,  31, \
++   32,  33,  34,  35,  36,  37,  38,  39, \
++   40,  41,  42,  43,  44,  45,  46,  47, \
++   48,  49,  50,  51,  52,  53,  54,  55, \
++   56,  57,  58,  59,  60,  61,  62,  63, \
++   64,  65,  66,  67,  68,  69,  70,  71, \
++   72,  73,  74,  75,  76,  77,  78,  79, \
++   80,  81,  82,  83,  84,  85,  86,  87, \
++   88,  89,  90,  91,  92,  93,  94,  95, \
++   96,  97,  98,  99, 100,                \
+ }
+ 
++/* ADJUST_REG_ALLOC_ORDER is a macro which permits reg_alloc_order
++   to be rearranged based on optimizing for speed or size.  */
++#define ADJUST_REG_ALLOC_ORDER nds32_adjust_reg_alloc_order ()
++
+ /* Tell IRA to use the order we define rather than messing it up with its
+    own cost calculations.  */
+ #define HONOR_REG_ALLOC_ORDER optimize_size
+@@ -609,11 +1280,7 @@ enum nds32_builtins
+    Define this macro to return nonzero in as many cases as possible
+    since doing so will allow GCC to perform better register allocation.
+    We can use general registers to tie QI/HI/SI modes together.  */
+-#define MODES_TIEABLE_P(mode1, mode2)          \
+-  (GET_MODE_CLASS (mode1) == MODE_INT          \
+-   && GET_MODE_CLASS (mode2) == MODE_INT       \
+-   && GET_MODE_SIZE (mode1) <= UNITS_PER_WORD  \
+-   && GET_MODE_SIZE (mode2) <= UNITS_PER_WORD)
++#define MODES_TIEABLE_P(mode1, mode2) nds32_modes_tieable_p (mode1, mode2)
+ 
+ 
+ /* Register Classes.  */
+@@ -628,13 +1295,18 @@ enum nds32_builtins
+ enum reg_class
+ {
+   NO_REGS,
++  R5_REG,
++  R8_REG,
+   R15_TA_REG,
+   STACK_REG,
++  FRAME_POINTER_REG,
+   LOW_REGS,
+   MIDDLE_REGS,
+   HIGH_REGS,
+   GENERAL_REGS,
+   FRAME_REGS,
++  FP_REGS,
++  LOOP_REGS,
+   ALL_REGS,
+   LIM_REG_CLASSES
+ };
+@@ -644,27 +1316,50 @@ enum reg_class
+ #define REG_CLASS_NAMES \
+ {                       \
+   "NO_REGS",            \
++  "R5_REG",             \
++  "R8_REG",             \
+   "R15_TA_REG",         \
+   "STACK_REG",          \
++  "FRAME_POINTER_REG",  \
+   "LOW_REGS",           \
+   "MIDDLE_REGS",        \
+   "HIGH_REGS",          \
+   "GENERAL_REGS",       \
+   "FRAME_REGS",         \
++  "FP_REGS",            \
++  "LOOP_REGS",          \
+   "ALL_REGS"            \
+ }
+ 
+ #define REG_CLASS_CONTENTS \
+-{                                                            \
+-  {0x00000000, 0x00000000}, /* NO_REGS     :              */ \
+-  {0x00008000, 0x00000000}, /* R15_TA_REG  : 15           */ \
+-  {0x80000000, 0x00000000}, /* STACK_REG   : 31           */ \
+-  {0x000000ff, 0x00000000}, /* LOW_REGS    : 0-7          */ \
+-  {0x000f0fff, 0x00000000}, /* MIDDLE_REGS : 0-11, 16-19  */ \
+-  {0xfff07000, 0x00000000}, /* HIGH_REGS   : 12-14, 20-31 */ \
+-  {0xffffffff, 0x00000000}, /* GENERAL_REGS: 0-31         */ \
+-  {0x00000000, 0x00000003}, /* FRAME_REGS  : 32, 33       */ \
+-  {0xffffffff, 0x00000003}  /* ALL_REGS    : 0-31, 32, 33 */ \
++{ /* NO_REGS                                    */  \
++  {0x00000000, 0x00000000, 0x00000000, 0x00000000}, \
++  /* R5_REG              : 5                    */  \
++  {0x00000020, 0x00000000, 0x00000000, 0x00000000}, \
++  /* R8_REG              : 8                    */  \
++  {0x00000100, 0x00000000, 0x00000000, 0x00000000}, \
++  /* R15_TA_REG          : 15                   */  \
++  {0x00008000, 0x00000000, 0x00000000, 0x00000000}, \
++  /* STACK_REG           : 31                   */  \
++  {0x80000000, 0x00000000, 0x00000000, 0x00000000}, \
++  /* FRAME_POINTER_REG   : 28                   */  \
++  {0x10000000, 0x00000000, 0x00000000, 0x00000000}, \
++  /* LOW_REGS            : 0-7                  */  \
++  {0x000000ff, 0x00000000, 0x00000000, 0x00000000}, \
++  /* MIDDLE_REGS         : 0-11, 16-19          */  \
++  {0x000f0fff, 0x00000000, 0x00000000, 0x00000000}, \
++  /* HIGH_REGS           : 12-14, 20-31         */  \
++  {0xfff07000, 0x00000000, 0x00000000, 0x00000000}, \
++  /* GENERAL_REGS        : 0-31                 */  \
++  {0xffffffff, 0x00000000, 0x00000000, 0x00000000}, \
++  /* FRAME_REGS          : 32, 33               */  \
++  {0x00000000, 0x00000003, 0x00000000, 0x00000000}, \
++  /* FP_REGS             : 34-98                */  \
++  {0x00000000, 0xfffffffc, 0xffffffff, 0x00000003}, \
++  /* LOOP_REGS             99-101               */  \
++  {0x00000000, 0x00000000, 0x00000000, 0x0000001c}, \
++  /* ALL_REGS            : 0-101                */  \
++  {0xffffffff, 0xffffffff, 0xffffffff, 0x0000001f}  \
+ }
+ 
+ #define REGNO_REG_CLASS(regno) nds32_regno_reg_class (regno)
+@@ -672,13 +1367,18 @@ enum reg_class
+ #define BASE_REG_CLASS GENERAL_REGS
+ #define INDEX_REG_CLASS GENERAL_REGS
+ 
++#define TEST_REGNO(R, TEST, VALUE) \
++  ((R TEST VALUE) || ((unsigned) reg_renumber[R] TEST VALUE))
++
+ /* Return nonzero if it is suitable for use as a
+    base register in operand addresses.
+    So far, we return nonzero only if "num" is a hard reg
+    of the suitable class or a pseudo register which is
+    allocated to a suitable hard reg.  */
+ #define REGNO_OK_FOR_BASE_P(num) \
+-  ((num) < 32 || (unsigned) reg_renumber[num] < 32)
++  (TEST_REGNO (num, <, 32) \
++   || TEST_REGNO (num, ==, FRAME_POINTER_REGNUM) \
++   || TEST_REGNO (num, ==, ARG_POINTER_REGNUM))
+ 
+ /* Return nonzero if it is suitable for use as a
+    index register in operand addresses.
+@@ -688,7 +1388,15 @@ enum reg_class
+    The difference between an index register and a base register is that
+    the index register may be scaled.  */
+ #define REGNO_OK_FOR_INDEX_P(num) \
+-  ((num) < 32 || (unsigned) reg_renumber[num] < 32)
++  (TEST_REGNO (num, <, 32) \
++   || TEST_REGNO (num, ==, FRAME_POINTER_REGNUM) \
++   || TEST_REGNO (num, ==, ARG_POINTER_REGNUM))
++
++/* Don't spill double-precision register to two singal-precision registers  */
++#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
++ ((TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE)        \
++  && GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO)   \
++  ? reg_classes_intersect_p (CLASS, FP_REGS) : 0)
+ 
+ 
+ /* Obsolete Macros for Defining Constraints.  */
+@@ -707,6 +1415,11 @@ enum reg_class
+ #define FIRST_PARM_OFFSET(fundecl) \
+   (NDS32_DOUBLE_WORD_ALIGN_P (crtl->args.pretend_args_size) ? 0 : 4)
+ 
++/* A C expression whose value is RTL representing the address in a stack frame
++   where the pointer to the caller's frame is stored.  */
++#define DYNAMIC_CHAIN_ADDRESS(frameaddr) \
++  nds32_dynamic_chain_address (frameaddr)
++
+ #define RETURN_ADDR_RTX(count, frameaddr) \
+   nds32_return_addr_rtx (count, frameaddr)
+ 
+@@ -718,6 +1431,15 @@ enum reg_class
+ #define INCOMING_RETURN_ADDR_RTX    gen_rtx_REG (Pmode, LP_REGNUM)
+ #define DWARF_FRAME_RETURN_COLUMN   DWARF_FRAME_REGNUM (LP_REGNUM)
+ 
++/* Use $r0 $r1 to pass exception handling information.  */
++#define EH_RETURN_DATA_REGNO(N) (((N) < 2) ? (N) : INVALID_REGNUM)
++/* The register $r2 that represents a location in which to store a stack
++   adjustment to be applied before function return.
++   This is used to unwind the stack to an exception handler's call frame.  */
++#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, 2)
++
++#define DBX_REGISTER_NUMBER(REGNO) nds32_dbx_register_number (REGNO)
++
+ #define STACK_POINTER_REGNUM SP_REGNUM
+ 
+ #define FRAME_POINTER_REGNUM 33
+@@ -746,12 +1468,11 @@ enum reg_class
+ #define INIT_CUMULATIVE_ARGS(cum, fntype, libname, fndecl, n_named_args) \
+   nds32_init_cumulative_args (&cum, fntype, libname, fndecl, n_named_args)
+ 
+-/* The REGNO is an unsigned integer but NDS32_GPR_ARG_FIRST_REGNUM may be 0.
+-   We better cast REGNO into signed integer so that we can avoid
+-   'comparison of unsigned expression >= 0 is always true' warning.  */
+-#define FUNCTION_ARG_REGNO_P(regno)                                        \
+-  (((int) regno - NDS32_GPR_ARG_FIRST_REGNUM >= 0)                         \
+-   && ((int) regno - NDS32_GPR_ARG_FIRST_REGNUM < NDS32_MAX_GPR_REGS_FOR_ARGS))
++#define FUNCTION_ARG_REGNO_P(regno)                                           \
++ (IN_RANGE ((regno), NDS32_FIRST_GPR_REGNUM, NDS32_MAX_GPR_REGS_FOR_ARGS - 1) \
++  || ((TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE)                                \
++      && IN_RANGE ((regno), NDS32_FPR_ARG_FIRST_REGNUM,                       \
++		   NDS32_FIRST_FPR_REGNUM + NDS32_MAX_FPR_REGS_FOR_ARGS - 1)))
+ 
+ #define DEFAULT_PCC_STRUCT_RETURN 0
+ 
+@@ -763,7 +1484,15 @@ enum reg_class
+ #define EXIT_IGNORE_STACK 1
+ 
+ #define FUNCTION_PROFILER(file, labelno) \
+-  fprintf (file, "/* profiler %d */", (labelno))
++  fprintf (file, "/* profiler %d */\n", (labelno))
++
++#define PROFILE_HOOK(LABEL)                                             \
++  {                                                                     \
++    rtx fun, lp;                                                        \
++    lp = get_hard_reg_initial_val (Pmode, LP_REGNUM);                   \
++    fun = gen_rtx_SYMBOL_REF (Pmode, "_mcount");                        \
++    emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lp, Pmode);        \
++  }
+ 
+ 
+ /* Implementing the Varargs Macros.  */
+@@ -780,13 +1509,13 @@ enum reg_class
+    The trampoline code for nds32 target must contains following parts:
+ 
+      1. instructions (4 * 4 = 16 bytes):
+-          get $pc first
+-          load chain_value to static chain register via $pc
+-          load nested function address to $r15 via $pc
+-          jump to desired nested function via $r15
++	  get $pc first
++	  load chain_value to static chain register via $pc
++	  load nested function address to $r15 via $pc
++	  jump to desired nested function via $r15
+      2. data (4 * 2 = 8 bytes):
+-          chain_value
+-          nested function address
++	  chain_value
++	  nested function address
+ 
+    Please check nds32.c implementation for more information.  */
+ #define TRAMPOLINE_SIZE 24
+@@ -811,9 +1540,22 @@ enum reg_class
+ /* We have "LW.bi   Rt, [Ra], Rb" instruction form.  */
+ #define HAVE_POST_MODIFY_REG  1
+ 
+-#define CONSTANT_ADDRESS_P(x) (CONSTANT_P (x) && GET_CODE (x) != CONST_DOUBLE)
++#define USE_LOAD_POST_INCREMENT(mode) \
++  (GET_MODE_SIZE (mode) <= GET_MODE_SIZE(DImode))
++#define USE_LOAD_POST_DECREMENT(mode) \
++  (GET_MODE_SIZE (mode) <= GET_MODE_SIZE(DImode))
++#define USE_STORE_POST_DECREMENT(mode) USE_LOAD_POST_DECREMENT(mode)
++#define USE_STORE_POST_INCREMENT(mode) USE_LOAD_POST_INCREMENT(mode)
++
++#define CONSTANT_ADDRESS_P(x) \
++  (CONSTANT_P (x) && memory_address_p (GET_MODE (x), x))
+ 
+-#define MAX_REGS_PER_ADDRESS 2
++/* CONST_DOUBLE is legal without TARGET_FPU in legitimate_constant_p.
++   Therefore, let it be a legal PIC operand and split it later.*/
++#define LEGITIMATE_PIC_OPERAND_P(x) \
++  (GET_CODE (x) != CONST_DOUBLE || !(TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE))
++
++#define MAX_REGS_PER_ADDRESS 3
+ 
+ 
+ /* Anchored Addresses.  */
+@@ -827,7 +1569,11 @@ enum reg_class
+ /* A C expression for the cost of a branch instruction.
+    A value of 1 is the default;
+    other values are interpreted relative to that.  */
+-#define BRANCH_COST(speed_p, predictable_p) ((speed_p) ? 2 : 0)
++#define BRANCH_COST(speed_p, predictable_p) ((speed_p) ? 2 : 1)
++
++/* Override BRANCH_COST heuristic which empirically produces worse
++   performance for removing short circuiting from the logical ops.  */
++#define LOGICAL_OP_NON_SHORT_CIRCUIT 0
+ 
+ #define SLOW_BYTE_ACCESS 1
+ 
+@@ -857,12 +1603,17 @@ enum reg_class
+ 
+ #define PIC_OFFSET_TABLE_REGNUM GP_REGNUM
+ 
++#define SYMBOLIC_CONST_P(X)	\
++(GET_CODE (X) == SYMBOL_REF						\
++ || GET_CODE (X) == LABEL_REF						\
++ || (GET_CODE (X) == CONST && symbolic_reference_mentioned_p (X)))
++
+ 
+ /* Defining the Output Assembler Language.  */
+ 
+ #define ASM_COMMENT_START "!"
+ 
+-#define ASM_APP_ON "! #APP"
++#define ASM_APP_ON "! #APP\n"
+ 
+ #define ASM_APP_OFF "! #NO_APP\n"
+ 
+@@ -877,14 +1628,77 @@ enum reg_class
+ 
+ #define LOCAL_LABEL_PREFIX "."
+ 
+-#define REGISTER_NAMES                                            \
+-{                                                                 \
+-  "$r0",  "$r1",  "$r2",  "$r3",  "$r4",  "$r5",  "$r6",  "$r7",  \
++#define REGISTER_NAMES \
++{ "$r0",  "$r1",  "$r2",  "$r3",  "$r4",  "$r5",  "$r6",  "$r7",  \
+   "$r8",  "$r9",  "$r10", "$r11", "$r12", "$r13", "$r14", "$ta",  \
+   "$r16", "$r17", "$r18", "$r19", "$r20", "$r21", "$r22", "$r23", \
+   "$r24", "$r25", "$r26", "$r27", "$fp",  "$gp",  "$lp",  "$sp",  \
+-  "$AP",                                                          \
+-  "$SFP"                                                          \
++  "$AP",  "$SFP", "$fs0", "$fs1", "$fs2", "$fs3", "$fs4", "$fs5", \
++  "$fs6", "$fs7", "$fs8", "$fs9", "$fs10","$fs11","$fs12","$fs13",\
++  "$fs14","$fs15","$fs16","$fs17","$fs18","$fs19","$fs20","$fs21",\
++  "$fs22","$fs23","$fs24","$fs25","$fs26","$fs27","$fs28","$fs29",\
++  "$fs30","$fs31","$fs32","$fs33","$fs34","$fs35","$fs36","$fs37",\
++  "$fs38","$fs39","$fs40","$fs41","$fs42","$fs43","$fs44","$fs45",\
++  "$fs46","$fs47","$fs48","$fs49","$fs50","$fs51","$fs52","$fs53",\
++  "$fs54","$fs55","$fs56","$fs57","$fs58","$fs59","$fs60","$fs61",\
++  "$fs62","$fs63",   "LB",   "LE",   "LC"                         \
++}
++
++#define ADDITIONAL_REGISTER_NAMES				\
++{								\
++  {"$r15", 15},							\
++  {"$r28", 28},	{"$r29", 29},	{"$r30", 30},	{"$r31", 31},	\
++  {"$a0", 0},	{"$a1", 1},	{"$a2", 2},			\
++  {"$a3", 3},	{"$a4", 4},	{"$a5", 5},			\
++  {"$s0", 6},	{"$s1", 7},	{"$s2", 8},	{"$s3", 9},	\
++  {"$s4", 10},	{"$s5", 11},	{"$s6", 12},	{"$s7", 13},	\
++  {"$s8", 14},							\
++  {"$t0", 16},	{"$t1", 17},	{"$t2", 18},	{"$t3", 19},	\
++  {"$t4", 20},	{"$t5", 21},	{"$t6", 22},	{"$t7", 23},	\
++  {"$t8", 24},	{"$t9", 25},					\
++  {"$p0", 26},	{"$p1", 27},					\
++  {"$h0", 0},	{"$h1", 1},	{"$h2", 2},	{"$h3", 3},	\
++  {"$h4", 4},	{"$h5", 5},	{"$h6", 6},	{"$h7", 7},	\
++  {"$h8", 8},	{"$h9", 9},	{"$h10", 10},	{"$h11", 11},	\
++  {"$h12", 16},	{"$h13", 17},	{"$h14", 18},	{"$h15", 19},	\
++  {"$o0", 0},	{"$o1", 1},	{"$o2", 2},	{"$o3", 3},	\
++  {"$o4", 4},	{"$o5", 5},	{"$o6", 6},	{"$o7", 7},	\
++}
++
++#define OVERLAPPING_REGISTER_NAMES		\
++{						\
++  {"$fd0",  NDS32_FIRST_FPR_REGNUM + 0,  2},	\
++  {"$fd1",  NDS32_FIRST_FPR_REGNUM + 2,  2},	\
++  {"$fd2",  NDS32_FIRST_FPR_REGNUM + 4,  2},	\
++  {"$fd3",  NDS32_FIRST_FPR_REGNUM + 6,  2},	\
++  {"$fd4",  NDS32_FIRST_FPR_REGNUM + 8,  2},	\
++  {"$fd5",  NDS32_FIRST_FPR_REGNUM + 10, 2},	\
++  {"$fd6",  NDS32_FIRST_FPR_REGNUM + 12, 2},	\
++  {"$fd7",  NDS32_FIRST_FPR_REGNUM + 14, 2},	\
++  {"$fd8",  NDS32_FIRST_FPR_REGNUM + 16, 2},	\
++  {"$fd9",  NDS32_FIRST_FPR_REGNUM + 18, 2},	\
++  {"$fd10", NDS32_FIRST_FPR_REGNUM + 20, 2},	\
++  {"$fd11", NDS32_FIRST_FPR_REGNUM + 22, 2},	\
++  {"$fd12", NDS32_FIRST_FPR_REGNUM + 24, 2},	\
++  {"$fd13", NDS32_FIRST_FPR_REGNUM + 26, 2},	\
++  {"$fd14", NDS32_FIRST_FPR_REGNUM + 28, 2},	\
++  {"$fd15", NDS32_FIRST_FPR_REGNUM + 30, 2},	\
++  {"$fd16", NDS32_FIRST_FPR_REGNUM + 32, 2},	\
++  {"$fd17", NDS32_FIRST_FPR_REGNUM + 34, 2},	\
++  {"$fd18", NDS32_FIRST_FPR_REGNUM + 36, 2},	\
++  {"$fd19", NDS32_FIRST_FPR_REGNUM + 38, 2},	\
++  {"$fd20", NDS32_FIRST_FPR_REGNUM + 40, 2},	\
++  {"$fd21", NDS32_FIRST_FPR_REGNUM + 42, 2},	\
++  {"$fd22", NDS32_FIRST_FPR_REGNUM + 44, 2},	\
++  {"$fd23", NDS32_FIRST_FPR_REGNUM + 46, 2},	\
++  {"$fd24", NDS32_FIRST_FPR_REGNUM + 48, 2},	\
++  {"$fd25", NDS32_FIRST_FPR_REGNUM + 50, 2},	\
++  {"$fd26", NDS32_FIRST_FPR_REGNUM + 52, 2},	\
++  {"$fd27", NDS32_FIRST_FPR_REGNUM + 54, 2},	\
++  {"$fd28", NDS32_FIRST_FPR_REGNUM + 56, 2},	\
++  {"$fd29", NDS32_FIRST_FPR_REGNUM + 58, 2},	\
++  {"$fd30", NDS32_FIRST_FPR_REGNUM + 60, 2},	\
++  {"$fd31", NDS32_FIRST_FPR_REGNUM + 62, 2},	\
+ }
+ 
+ /* Output normal jump table entry.  */
+@@ -896,19 +1710,19 @@ enum reg_class
+   do                                                                    \
+     {                                                                   \
+       switch (GET_MODE (body))                                          \
+-        {                                                               \
+-        case QImode:                                                    \
+-          asm_fprintf (stream, "\t.byte\t.L%d-.L%d\n", value, rel);     \
+-          break;                                                        \
+-        case HImode:                                                    \
+-          asm_fprintf (stream, "\t.short\t.L%d-.L%d\n", value, rel);    \
+-          break;                                                        \
+-        case SImode:                                                    \
+-          asm_fprintf (stream, "\t.word\t.L%d-.L%d\n", value, rel);     \
+-          break;                                                        \
+-        default:                                                        \
+-          gcc_unreachable();                                            \
+-        }                                                               \
++	{                                                               \
++	case QImode:                                                    \
++	  asm_fprintf (stream, "\t.byte\t.L%d-.L%d\n", value, rel);     \
++	  break;                                                        \
++	case HImode:                                                    \
++	  asm_fprintf (stream, "\t.short\t.L%d-.L%d\n", value, rel);    \
++	  break;                                                        \
++	case SImode:                                                    \
++	  asm_fprintf (stream, "\t.word\t.L%d-.L%d\n", value, rel);     \
++	  break;                                                        \
++	default:                                                        \
++	  gcc_unreachable();                                            \
++	}                                                               \
+     } while (0)
+ 
+ /* We have to undef it first because elfos.h formerly define it
+@@ -925,10 +1739,10 @@ enum reg_class
+   do                                                   \
+     {                                                  \
+       /* Because our jump table is in text section,    \
+-         we need to make sure 2-byte alignment after   \
+-         the jump table for instructions fetch.  */    \
++	 we need to make sure 2-byte alignment after   \
++	 the jump table for instructions fetch.  */    \
+       if (GET_MODE (PATTERN (table)) == QImode)        \
+-        ASM_OUTPUT_ALIGN (stream, 1);                  \
++	ASM_OUTPUT_ALIGN (stream, 1);                  \
+       asm_fprintf (stream, "\t! Jump Table End\n");    \
+     }  while (0)
+ 
+@@ -992,9 +1806,7 @@ enum reg_class
+ /* Return the preferred mode for and addr_diff_vec when the mininum
+    and maximum offset are known.  */
+ #define CASE_VECTOR_SHORTEN_MODE(min_offset, max_offset, body)  \
+-   ((min_offset < 0 || max_offset >= 0x2000 ) ? SImode          \
+-   : (max_offset >= 100) ? HImode                               \
+-   : QImode)
++  nds32_case_vector_shorten_mode (min_offset, max_offset, body)
+ 
+ /* Generate pc relative jump table when -fpic or -Os.  */
+ #define CASE_VECTOR_PC_RELATIVE (flag_pic || optimize_size)
+@@ -1027,6 +1839,11 @@ enum reg_class
+    when the condition is true.  */
+ #define STORE_FLAG_VALUE 1
+ 
++/* A C expression that indicates whether the architecture defines a value for
++   clz or ctz with a zero operand.  In nds32 clz for 0 result 32 is defined
++   in ISA spec */
++#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE)  ((VALUE) = 32, 1)
++
+ /* An alias for the machine mode for pointers.  */
+ #define Pmode SImode
+ 
+diff --git a/gcc/config/nds32/nds32.md b/gcc/config/nds32/nds32.md
+index 5cdd8b2..557c466 100644
+--- a/gcc/config/nds32/nds32.md
++++ b/gcc/config/nds32/nds32.md
+@@ -46,58 +46,144 @@
+ ;; Include DImode/DFmode operations.
+ (include "nds32-doubleword.md")
+ 
++;; Include floating-point patterns.
++(include "nds32-fpu.md")
++
+ ;; Include peephole patterns.
+ (include "nds32-peephole2.md")
+ 
+ 
++;; ------------------------------------------------------------------------
++
++;; CPU pipeline model.
++(define_attr "pipeline_model" "n7,n8,e8,n9,n10,graywolf,n13,panther,simple"
++  (const
++    (cond [(match_test "nds32_cpu_option == CPU_N7")  (const_string "n7")
++	   (match_test "nds32_cpu_option == CPU_N6 || nds32_cpu_option == CPU_N8")  (const_string "n8")
++	   (match_test "nds32_cpu_option == CPU_E8")  (const_string "e8")
++	   (match_test "nds32_cpu_option == CPU_N9")  (const_string "n9")
++	   (match_test "nds32_cpu_option == CPU_N10") (const_string "n10")
++	   (match_test "nds32_cpu_option == CPU_GRAYWOLF") (const_string "graywolf")
++	   (match_test "nds32_cpu_option == CPU_N12") (const_string "n13")
++	   (match_test "nds32_cpu_option == CPU_N13") (const_string "n13")
++	   (match_test "nds32_cpu_option == CPU_PANTHER") (const_string "panther")
++	   (match_test "nds32_cpu_option == CPU_SIMPLE") (const_string "simple")]
++	  (const_string "n9"))))
++
+ ;; Insn type, it is used to default other attribute values.
+ (define_attr "type"
+-  "unknown,move,load,store,alu,compare,branch,call,misc"
++  "unknown,load,store,load_multiple,store_multiple,alu,alu_shift,pbsad,pbsada,mul,mac,div,branch,mmu,misc,\
++   falu,fmuls,fmuld,fmacs,fmacd,fdivs,fdivd,fsqrts,fsqrtd,fcmp,fabs,fcpy,fcmov,fmfsr,fmfdr,fmtsr,fmtdr,fload,fstore,\
++   dalu,dalu64,daluround,dcmp,dclip,dmul,dmac,dinsb,dpack,dbpick,dwext"
+   (const_string "unknown"))
+ 
++;; Insn sub-type
++(define_attr "subtype"
++  "simple,shift,saturation"
++  (const_string "simple"))
+ 
+ ;; Length, in bytes, default is 4-bytes.
+ (define_attr "length" "" (const_int 4))
+ 
++;; Indicate the amount of micro instructions.
++(define_attr "combo"
++  "0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25"
++  (const_string "1"))
++
++;; Insn in which feature set, it is used to enable/disable insn alternatives.
++;; v1  : Baseline Instructions
++;; v2  : Baseline Version 2 Instructions
++;; v3m : Baseline Version 3m Instructions
++;; v3  : Baseline Version 3 Instructions
++;; pe1 : Performance Extension Instructions
++;; pe2 : Performance Extension Version 2 Instructions
++;; se  : String Extension instructions
++(define_attr "feature"
++  "v1,v2,v3m,v3,pe1,pe2,se,fpu"
++  (const_string "v1"))
+ 
+ ;; Enabled, which is used to enable/disable insn alternatives.
+ ;; Note that we use length and TARGET_16_BIT here as criteria.
+-;; If the instruction pattern already check TARGET_16_BIT to
+-;; determine the length by itself, its enabled attribute should be
+-;; always 1 to avoid the conflict with the settings here.
+-(define_attr "enabled" ""
+-  (cond [(and (eq_attr "length" "2")
+-	      (match_test "!TARGET_16_BIT"))
+-	 (const_int 0)]
+-	(const_int 1)))
++;; If the instruction pattern already check TARGET_16_BIT to determine
++;; the length by itself, its enabled attribute should be customized to
++;; avoid the conflict between length attribute and this default setting.
++(define_attr "enabled" "no,yes"
++  (if_then_else
++    (and (eq_attr "length" "2")
++	 (match_test "!TARGET_16_BIT"))
++    (const_string "no")
++    (cond [(eq_attr "feature" "v1")   (const_string "yes")
++	   (eq_attr "feature" "v2")   (if_then_else (match_test "TARGET_ISA_V2 || TARGET_ISA_V3 || TARGET_ISA_V3M")
++						    (const_string "yes")
++						    (const_string "no"))
++	   (eq_attr "feature" "v3")   (if_then_else (match_test "TARGET_ISA_V3")
++						    (const_string "yes")
++						    (const_string "no"))
++	   (eq_attr "feature" "v3m")  (if_then_else (match_test "TARGET_ISA_V3 || TARGET_ISA_V3M")
++						    (const_string "yes")
++						    (const_string "no"))
++	   (eq_attr "feature" "pe1")  (if_then_else (match_test "TARGET_EXT_PERF")
++						    (const_string "yes")
++						    (const_string "no"))
++	   (eq_attr "feature" "pe2")  (if_then_else (match_test "TARGET_EXT_PERF2")
++						    (const_string "yes")
++						    (const_string "no"))
++	   (eq_attr "feature" "se")   (if_then_else (match_test "TARGET_EXT_STRING")
++						    (const_string "yes")
++						    (const_string "no"))
++	   (eq_attr "feature" "fpu")  (if_then_else (match_test "TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE")
++						    (const_string "yes")
++						    (const_string "no"))]
++	   (const_string "yes"))))
+ 
+ 
+ ;; ----------------------------------------------------------------------------
+ 
++(include "nds32-dspext.md")
+ 
+ ;; Move instructions.
+ 
+ ;; For QImode and HImode, the immediate value can be fit in imm20s.
+ ;; So there is no need to split rtx for QI and HI patterns.
+ 
+-(define_expand "movqi"
+-  [(set (match_operand:QI 0 "general_operand" "")
+-	(match_operand:QI 1 "general_operand" ""))]
++(define_expand "mov<mode>"
++  [(set (match_operand:QIHI 0 "general_operand" "")
++	(match_operand:QIHI 1 "general_operand" ""))]
+   ""
+ {
+   /* Need to force register if mem <- !reg.  */
+   if (MEM_P (operands[0]) && !REG_P (operands[1]))
+-    operands[1] = force_reg (QImode, operands[1]);
++    operands[1] = force_reg (<MODE>mode, operands[1]);
++
++  if (MEM_P (operands[1]) && optimize > 0)
++    {
++      rtx reg = gen_reg_rtx (SImode);
++
++      emit_insn (gen_zero_extend<mode>si2 (reg, operands[1]));
++      operands[1] = gen_lowpart (<MODE>mode, reg);
++    }
+ })
+ 
+-(define_expand "movhi"
+-  [(set (match_operand:HI 0 "general_operand" "")
+-	(match_operand:HI 1 "general_operand" ""))]
++(define_expand "movmisalign<mode>"
++  [(set (match_operand:SIDI 0 "general_operand" "")
++	(match_operand:SIDI 1 "general_operand" ""))]
+   ""
+ {
+-  /* Need to force register if mem <- !reg.  */
++  rtx addr;
+   if (MEM_P (operands[0]) && !REG_P (operands[1]))
+-    operands[1] = force_reg (HImode, operands[1]);
++    operands[1] = force_reg (<MODE>mode, operands[1]);
++
++  if (MEM_P (operands[0]))
++    {
++      addr = force_reg (Pmode, XEXP (operands[0], 0));
++      emit_insn (gen_unaligned_store<mode> (addr, operands[1]));
++    }
++  else
++    {
++      addr = force_reg (Pmode, XEXP (operands[1], 0));
++      emit_insn (gen_unaligned_load<mode> (operands[0], addr));
++    }
++  DONE;
+ })
+ 
+ (define_expand "movsi"
+@@ -130,12 +216,33 @@
+ 						  low12_int));
+       DONE;
+     }
++
++  if (REG_P (operands[0]) && SYMBOLIC_CONST_P (operands[1]))
++    {
++      if (TARGET_ICT_MODEL_LARGE
++	  && nds32_indirect_call_referenced_p (operands[1]))
++	{
++	  nds32_expand_ict_move (operands);
++	  DONE;
++	}
++      else if (nds32_tls_referenced_p (operands [1]))
++	{
++	  nds32_expand_tls_move (operands);
++	  DONE;
++	}
++      else if (flag_pic)
++	{
++	  nds32_expand_pic_move (operands);
++	  DONE;
++	}
++    }
+ })
+ 
+ (define_insn "*mov<mode>"
+-  [(set (match_operand:QIHISI 0 "nonimmediate_operand" "=r, r, U45, U33, U37, U45, m,   l,   l,   l,   d, r,    d,    r,    r,    r")
+-	(match_operand:QIHISI 1 "nds32_move_operand"   " r, r,   l,   l,   l,   d, r, U45, U33, U37, U45, m, Ip05, Is05, Is20, Ihig"))]
+-  ""
++  [(set (match_operand:QIHISI 0 "nonimmediate_operand" "=r, r,U45,U33,U37,U45, m,  l,  l,  l,  d,  d, r,   d,    r,    r,    r, *f, *f,  r, *f,  Q, A")
++	(match_operand:QIHISI 1 "nds32_move_operand"   " r, r,  l,  l,  l,  d, r,U45,U33,U37,U45,Ufe, m,Ip05, Is05, Is20, Ihig, *f,  r, *f,  Q, *f, r"))]
++  "register_operand(operands[0], <MODE>mode)
++   || register_operand(operands[1], <MODE>mode)"
+ {
+   switch (which_alternative)
+     {
+@@ -154,37 +261,54 @@
+     case 8:
+     case 9:
+     case 10:
+-      return nds32_output_16bit_load (operands, <byte>);
+     case 11:
+-      return nds32_output_32bit_load (operands, <byte>);
++      return nds32_output_16bit_load (operands, <byte>);
+     case 12:
+-      return "movpi45\t%0, %1";
++      return nds32_output_32bit_load (operands, <byte>);
+     case 13:
+-      return "movi55\t%0, %1";
++      return "movpi45\t%0, %1";
+     case 14:
+-      return "movi\t%0, %1";
++      return "movi55\t%0, %1";
+     case 15:
++      return "movi\t%0, %1";
++    case 16:
+       return "sethi\t%0, hi20(%1)";
++    case 17:
++      if (TARGET_FPU_SINGLE)
++	return "fcpyss\t%0, %1, %1";
++      else
++	return "#";
++    case 18:
++      return "fmtsr\t%1, %0";
++    case 19:
++      return "fmfsr\t%0, %1";
++    case 20:
++      return nds32_output_float_load (operands);
++    case 21:
++      return nds32_output_float_store (operands);
++    case 22:
++      return "mtusr\t%1, %0";
+     default:
+       gcc_unreachable ();
+     }
+ }
+-  [(set_attr "type"   "alu,alu,store,store,store,store,store,load,load,load,load,load,alu,alu,alu,alu")
+-   (set_attr "length" "  2,  4,    2,    2,    2,    2,    4,   2,   2,   2,   2,   4,  2,  2,  4,  4")])
++  [(set_attr "type"    "alu,alu,store,store,store,store,store,load,load,load,load,load,load,alu,alu,alu,alu,fcpy,fmtsr,fmfsr,fload,fstore,alu")
++   (set_attr "length"  "  2,  4,    2,    2,    2,    2,    4,   2,   2,   2,   2,   2,   4,  2,  2,  4,  4,   4,    4,    4,    4,     4,  4")
++   (set_attr "feature" " v1, v1,   v1,   v1,   v1,   v1,   v1,  v1,  v1,  v1,  v1, v3m,  v1, v1, v1, v1, v1, fpu,  fpu,  fpu,  fpu,   fpu, v1")])
+ 
+ 
+ ;; We use nds32_symbolic_operand to limit that only CONST/SYMBOL_REF/LABEL_REF
+ ;; are able to match such instruction template.
+-(define_insn "*move_addr"
+-  [(set (match_operand:SI 0 "register_operand"       "=l, r")
+-	(match_operand:SI 1 "nds32_symbolic_operand" " i, i"))]
++(define_insn "move_addr"
++  [(set (match_operand:SI 0 "nds32_general_register_operand"   "=l, r")
++	(match_operand:SI 1 "nds32_nonunspec_symbolic_operand" " i, i"))]
+   ""
+   "la\t%0, %1"
+-  [(set_attr "type" "move")
++  [(set_attr "type"  "alu")
+    (set_attr "length"  "8")])
+ 
+ 
+-(define_insn "*sethi"
++(define_insn "sethi"
+   [(set (match_operand:SI 0 "register_operand"                "=r")
+ 	(high:SI (match_operand:SI 1 "nds32_symbolic_operand" " i")))]
+   ""
+@@ -193,7 +317,7 @@
+    (set_attr "length" "4")])
+ 
+ 
+-(define_insn "*lo_sum"
++(define_insn "lo_sum"
+   [(set (match_operand:SI 0 "register_operand"                  "=r")
+ 	(lo_sum:SI (match_operand:SI 1 "register_operand"       " r")
+ 		   (match_operand:SI 2 "nds32_symbolic_operand" " i")))]
+@@ -208,8 +332,8 @@
+ ;; Zero extension instructions.
+ 
+ (define_insn "zero_extend<mode>si2"
+-  [(set (match_operand:SI 0 "register_operand"                       "=l, r,   l, *r")
+-	(zero_extend:SI (match_operand:QIHI 1 "nonimmediate_operand" " l, r, U33,  m")))]
++  [(set (match_operand:SI 0 "register_operand"                       "=l, r,  l, *r")
++	(zero_extend:SI (match_operand:QIHI 1 "nonimmediate_operand" " l, r,U33,  m")))]
+   ""
+ {
+   switch (which_alternative)
+@@ -245,7 +369,7 @@
+     case 1:
+       return "se<size>\t%0, %1";
+     case 2:
+-      return nds32_output_32bit_load_s (operands, <byte>);
++      return nds32_output_32bit_load_se (operands, <byte>);
+ 
+     default:
+       gcc_unreachable ();
+@@ -256,25 +380,70 @@
+ 
+ 
+ ;; ----------------------------------------------------------------------------
++(define_expand "extv"
++  [(set (match_operand 0 "register_operand" "")
++        (sign_extract (match_operand 1 "nonimmediate_operand" "")
++                      (match_operand 2 "const_int_operand" "")
++                      (match_operand 3 "const_int_operand" "")))]
++  ""
++{
++  enum nds32_expand_result_type result = nds32_expand_extv (operands);
++  switch (result)
++    {
++    case EXPAND_DONE:
++      DONE;
++      break;
++    case EXPAND_FAIL:
++      FAIL;
++      break;
++    case EXPAND_CREATE_TEMPLATE:
++      break;
++    default:
++      gcc_unreachable ();
++    }
++})
++
++(define_expand "insv"
++  [(set (zero_extract (match_operand 0 "nonimmediate_operand" "")
++                      (match_operand 1 "const_int_operand" "")
++                      (match_operand 2 "const_int_operand" ""))
++        (match_operand 3 "register_operand" ""))]
++  ""
++{
++  enum nds32_expand_result_type result = nds32_expand_insv (operands);
++  switch (result)
++    {
++    case EXPAND_DONE:
++      DONE;
++      break;
++    case EXPAND_FAIL:
++      FAIL;
++      break;
++    case EXPAND_CREATE_TEMPLATE:
++      break;
++    default:
++      gcc_unreachable ();
++    }
++})
+ 
+ ;; Arithmetic instructions.
+ 
+-(define_insn "add<mode>3"
+-  [(set (match_operand:QIHISI 0 "register_operand"                   "=   d,    l,    d,    l,  d, l,    k,    l,    r, r")
+-	(plus:QIHISI (match_operand:QIHISI 1 "register_operand"      "%   0,    l,    0,    l,  0, l,    0,    k,    r, r")
+-		     (match_operand:QIHISI 2 "nds32_rimm15s_operand" " In05, In03, Iu05, Iu03,  r, l, Is10, Iu06, Is15, r")))]
++(define_insn "addsi3"
++  [(set (match_operand:SI 0 "register_operand"               "=   d,   l,   d,   l, d,l,   k,   l,    r, r")
++	(plus:SI (match_operand:SI 1 "register_operand"      "%   0,   l,   0,   l, 0,l,   0,   k,    r, r")
++		 (match_operand:SI 2 "nds32_rimm15s_operand" " In05,In03,Iu05,Iu03, r,l,Is10,IU06, Is15, r")))]
+   ""
+ {
+   switch (which_alternative)
+     {
+     case 0:
+       /* addi Rt4,Rt4,-x  ==>  subi45 Rt4,x
+-         where 0 <= x <= 31 */
++	 where 0 <= x <= 31 */
+       operands[2] = gen_int_mode (-INTVAL (operands[2]), SImode);
+       return "subi45\t%0, %2";
+     case 1:
+       /* addi Rt3,Ra3,-x  ==>  subi333 Rt3,Ra3,x
+-         where 0 <= x <= 7 */
++	 where 0 <= x <= 7 */
+       operands[2] = gen_int_mode (-INTVAL (operands[2]), SImode);
+       return "subi333\t%0, %1, %2";
+     case 2:
+@@ -298,19 +467,20 @@
+       gcc_unreachable ();
+     }
+ }
+-  [(set_attr "type"   "alu,alu,alu,alu,alu,alu,alu,alu,alu,alu")
+-   (set_attr "length" "  2,  2,  2,  2,  2,  2,  2,  2,  4,  4")])
+-
+-(define_insn "sub<mode>3"
+-  [(set (match_operand:QIHISI 0 "register_operand"                    "=d, l,    r, r")
+-	(minus:QIHISI (match_operand:QIHISI 1 "nds32_rimm15s_operand" " 0, l, Is15, r")
+-		      (match_operand:QIHISI 2 "register_operand"      " r, l,    r, r")))]
++  [(set_attr "type"    "alu,alu,alu,alu,alu,alu,alu,alu,alu,alu")
++   (set_attr "length"  "  2,  2,  2,  2,  2,  2,  2,  2,  4,  4")
++   (set_attr "feature" " v1, v1, v1, v1, v1, v1, v2, v1, v1, v1")])
++
++(define_insn "subsi3"
++  [(set (match_operand:SI 0 "register_operand"                "=d, l,    r, r")
++	(minus:SI (match_operand:SI 1 "nds32_rimm15s_operand" " 0, l, Is15, r")
++		  (match_operand:SI 2 "register_operand"      " r, l,    r, r")))]
+   ""
+   "@
+-  sub45\t%0, %2
+-  sub333\t%0, %1, %2
+-  subri\t%0, %2, %1
+-  sub\t%0, %1, %2"
++   sub45\t%0, %2
++   sub333\t%0, %1, %2
++   subri\t%0, %2, %1
++   sub\t%0, %1, %2"
+   [(set_attr "type"   "alu,alu,alu,alu")
+    (set_attr "length" "  2,  2,  4,  4")])
+ 
+@@ -320,10 +490,10 @@
+ ;; and needs to ensure it is exact_log2 value.
+ (define_insn "*add_slli"
+   [(set (match_operand:SI 0 "register_operand"                    "=r")
+-        (plus:SI (mult:SI (match_operand:SI 1 "register_operand"  " r")
++	(plus:SI (mult:SI (match_operand:SI 1 "register_operand"  " r")
+ 			  (match_operand:SI 2 "immediate_operand" " i"))
+ 		 (match_operand:SI 3 "register_operand"           " r")))]
+-  "TARGET_ISA_V3
++  "TARGET_ISA_V3 && (TARGET_PIPELINE_PANTHER || optimize_size)
+    && (exact_log2 (INTVAL (operands[2])) != -1)
+    && (exact_log2 (INTVAL (operands[2])) <= 31)"
+ {
+@@ -333,18 +503,20 @@
+ 
+   return "add_slli\t%0, %3, %1, %2";
+ }
+-  [(set_attr "type" "alu")
+-   (set_attr "length" "4")])
++  [(set_attr "type" "alu_shift")
++   (set_attr "combo"        "2")
++   (set_attr "length"       "4")])
+ 
+ (define_insn "*add_srli"
+-  [(set (match_operand:SI 0 "register_operand"                        "=   r")
+-	(plus:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+-			      (match_operand:SI 2 "immediate_operand" " Iu05"))
+-		 (match_operand:SI 3 "register_operand"               "    r")))]
+-  "TARGET_ISA_V3"
++  [(set (match_operand:SI 0 "register_operand"                          "=   r")
++	(plus:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"    "    r")
++			      (match_operand:SI 2 "nds32_imm5u_operand" " Iu05"))
++		 (match_operand:SI 3 "register_operand"                 "    r")))]
++  "TARGET_ISA_V3 && (TARGET_PIPELINE_PANTHER || optimize_size)"
+   "add_srli\t%0, %3, %1, %2"
+-  [(set_attr "type" "alu")
+-   (set_attr "length" "4")])
++  [(set_attr "type" "alu_shift")
++   (set_attr "combo"        "2")
++   (set_attr "length"       "4")])
+ 
+ 
+ ;; GCC intends to simplify (minus (reg) (ashift ...))
+@@ -355,7 +527,7 @@
+ 	(minus:SI (match_operand:SI 1 "register_operand"           " r")
+ 		  (mult:SI (match_operand:SI 2 "register_operand"  " r")
+ 			   (match_operand:SI 3 "immediate_operand" " i"))))]
+-  "TARGET_ISA_V3
++  "TARGET_ISA_V3 && (TARGET_PIPELINE_PANTHER || optimize_size)
+    && (exact_log2 (INTVAL (operands[3])) != -1)
+    && (exact_log2 (INTVAL (operands[3])) <= 31)"
+ {
+@@ -365,32 +537,35 @@
+ 
+   return "sub_slli\t%0, %1, %2, %3";
+ }
+-  [(set_attr "type" "alu")
+-   (set_attr "length" "4")])
++  [(set_attr "type" "alu_shift")
++   (set_attr "combo"        "2")
++   (set_attr "length"       "4")])
+ 
+ (define_insn "*sub_srli"
+-  [(set (match_operand:SI 0 "register_operand"                         "=   r")
+-	(minus:SI (match_operand:SI 1 "register_operand"               "    r")
+-		  (lshiftrt:SI (match_operand:SI 2 "register_operand"  "    r")
+-			       (match_operand:SI 3 "immediate_operand" " Iu05"))))]
+-  "TARGET_ISA_V3"
++  [(set (match_operand:SI 0 "register_operand"                           "=   r")
++	(minus:SI (match_operand:SI 1 "register_operand"                 "    r")
++		  (lshiftrt:SI (match_operand:SI 2 "register_operand"    "    r")
++			       (match_operand:SI 3 "nds32_imm5u_operand" " Iu05"))))]
++  "TARGET_ISA_V3 && (TARGET_PIPELINE_PANTHER || optimize_size)"
+   "sub_srli\t%0, %1, %2, %3"
+-  [(set_attr "type" "alu")
+-   (set_attr "length" "4")])
++  [(set_attr "type" "alu_shift")
++   (set_attr "combo"        "2")
++   (set_attr "length"       "4")])
+ 
+ 
+ ;; Multiplication instructions.
+ 
+ (define_insn "mulsi3"
+-  [(set (match_operand:SI 0 "register_operand"          "=w, r")
++  [(set (match_operand:SI 0 "register_operand"          "=l, r")
+ 	(mult:SI (match_operand:SI 1 "register_operand" "%0, r")
+-		 (match_operand:SI 2 "register_operand" " w, r")))]
++		 (match_operand:SI 2 "register_operand" " l, r")))]
+   ""
+   "@
+-  mul33\t%0, %2
+-  mul\t%0, %1, %2"
+-  [(set_attr "type"   "alu,alu")
+-   (set_attr "length" "  2,  4")])
++   mul33\t%0, %2
++   mul\t%0, %1, %2"
++  [(set_attr "type"    "mul,mul")
++   (set_attr "length"  "  2,  4")
++   (set_attr "feature" "v3m, v1")])
+ 
+ (define_insn "mulsidi3"
+   [(set (match_operand:DI 0 "register_operand"                          "=r")
+@@ -398,7 +573,7 @@
+ 		 (sign_extend:DI (match_operand:SI 2 "register_operand" " r"))))]
+   "TARGET_ISA_V2 || TARGET_ISA_V3"
+   "mulsr64\t%0, %1, %2"
+-  [(set_attr "type"   "alu")
++  [(set_attr "type"   "mul")
+    (set_attr "length"   "4")])
+ 
+ (define_insn "umulsidi3"
+@@ -407,7 +582,7 @@
+ 		 (zero_extend:DI (match_operand:SI 2 "register_operand" " r"))))]
+   "TARGET_ISA_V2 || TARGET_ISA_V3"
+   "mulr64\t%0, %1, %2"
+-  [(set_attr "type"   "alu")
++  [(set_attr "type"   "mul")
+    (set_attr "length"   "4")])
+ 
+ 
+@@ -415,32 +590,32 @@
+ 
+ (define_insn "*maddr32_0"
+   [(set (match_operand:SI 0 "register_operand"                   "=r")
+-        (plus:SI (match_operand:SI 3 "register_operand"          " 0")
+-                 (mult:SI (match_operand:SI 1 "register_operand" " r")
+-                          (match_operand:SI 2 "register_operand" " r"))))]
++	(plus:SI (match_operand:SI 3 "register_operand"          " 0")
++		 (mult:SI (match_operand:SI 1 "register_operand" " r")
++			  (match_operand:SI 2 "register_operand" " r"))))]
+   ""
+   "maddr32\t%0, %1, %2"
+-  [(set_attr "type"   "alu")
++  [(set_attr "type"   "mac")
+    (set_attr "length"   "4")])
+ 
+ (define_insn "*maddr32_1"
+   [(set (match_operand:SI 0 "register_operand"                   "=r")
+-        (plus:SI (mult:SI (match_operand:SI 1 "register_operand" " r")
+-                          (match_operand:SI 2 "register_operand" " r"))
+-                 (match_operand:SI 3 "register_operand"          " 0")))]
++	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" " r")
++			  (match_operand:SI 2 "register_operand" " r"))
++		 (match_operand:SI 3 "register_operand"          " 0")))]
+   ""
+   "maddr32\t%0, %1, %2"
+-  [(set_attr "type"   "alu")
++  [(set_attr "type"   "mac")
+    (set_attr "length"   "4")])
+ 
+ (define_insn "*msubr32"
+   [(set (match_operand:SI 0 "register_operand"                    "=r")
+-        (minus:SI (match_operand:SI 3 "register_operand"          " 0")
+-                  (mult:SI (match_operand:SI 1 "register_operand" " r")
+-                           (match_operand:SI 2 "register_operand" " r"))))]
++	(minus:SI (match_operand:SI 3 "register_operand"          " 0")
++		  (mult:SI (match_operand:SI 1 "register_operand" " r")
++			   (match_operand:SI 2 "register_operand" " r"))))]
+   ""
+   "msubr32\t%0, %1, %2"
+-  [(set_attr "type"   "alu")
++  [(set_attr "type"   "mac")
+    (set_attr "length"   "4")])
+ 
+ 
+@@ -448,26 +623,46 @@
+ 
+ (define_insn "divmodsi4"
+   [(set (match_operand:SI 0 "register_operand"         "=r")
+-        (div:SI (match_operand:SI 1 "register_operand" " r")
+-                (match_operand:SI 2 "register_operand" " r")))
++	(div:SI (match_operand:SI 1 "register_operand" " r")
++		(match_operand:SI 2 "register_operand" " r")))
+    (set (match_operand:SI 3 "register_operand"         "=r")
+-        (mod:SI (match_dup 1) (match_dup 2)))]
++	(mod:SI (match_dup 1) (match_dup 2)))]
+   ""
+   "divsr\t%0, %3, %1, %2"
+-  [(set_attr "type"   "alu")
++  [(set_attr "type"   "div")
+    (set_attr "length"   "4")])
+ 
+ (define_insn "udivmodsi4"
+   [(set (match_operand:SI 0 "register_operand"          "=r")
+-        (udiv:SI (match_operand:SI 1 "register_operand" " r")
+-                (match_operand:SI 2 "register_operand"  " r")))
++	(udiv:SI (match_operand:SI 1 "register_operand" " r")
++		 (match_operand:SI 2 "register_operand"  " r")))
+    (set (match_operand:SI 3 "register_operand"          "=r")
+-        (umod:SI (match_dup 1) (match_dup 2)))]
++	(umod:SI (match_dup 1) (match_dup 2)))]
+   ""
+   "divr\t%0, %3, %1, %2"
+-  [(set_attr "type"   "alu")
++  [(set_attr "type"   "div")
++   (set_attr "length"   "4")])
++
++;; divsr/divr will keep quotient only when quotient and remainder is the same
++;; register in our ISA spec, it's can reduce 1 register presure if we don't
++;; want remainder.
++(define_insn "divsi4"
++  [(set (match_operand:SI 0 "register_operand"         "=r")
++	(div:SI (match_operand:SI 1 "register_operand" " r")
++		(match_operand:SI 2 "register_operand" " r")))]
++  ""
++  "divsr\t%0, %0, %1, %2"
++  [(set_attr "type"   "div")
+    (set_attr "length"   "4")])
+ 
++(define_insn "udivsi4"
++  [(set (match_operand:SI 0 "register_operand"          "=r")
++	(udiv:SI (match_operand:SI 1 "register_operand" " r")
++		 (match_operand:SI 2 "register_operand"  " r")))]
++  ""
++  "divr\t%0, %0, %1, %2"
++  [(set_attr "type"   "div")
++   (set_attr "length"   "4")])
+ 
+ ;; ----------------------------------------------------------------------------
+ 
+@@ -488,14 +683,28 @@
+    (set_attr "length" "4")]
+ )
+ 
+-(define_insn "andsi3"
+-  [(set (match_operand:SI 0 "register_operand"         "=w, r,    l,    l,    l,    l,    l,    l,    r,   r,     r,    r,    r")
+-	(and:SI (match_operand:SI 1 "register_operand" "%0, r,    l,    l,    l,    l,    0,    0,    r,   r,     r,    r,    r")
+-		(match_operand:SI 2 "general_operand"  " w, r, Izeb, Izeh, Ixls, Ix11, Ibms, Ifex, Izeb, Izeh, Iu15, Ii15, Ic15")))]
++(define_expand "andsi3"
++  [(set (match_operand:SI 0 "register_operand" "")
++	(and:SI (match_operand:SI 1 "register_operand" "")
++		(match_operand:SI 2 "nds32_reg_constant_operand" "")))]
++  ""
++{
++  if (CONST_INT_P (operands[2])
++      && !nds32_and_operand (operands[2], SImode))
++    {
++      nds32_expand_constant (SImode, INTVAL (operands[2]),
++			     operands[0], operands[1]);
++      DONE;
++    }
++})
++
++(define_insn "*andsi3"
++  [(set (match_operand:SI 0 "register_operand"          "=l, r,   l,   l,   l,   l,   l,   l,    r,   r,     r,    r,    r")
++	(and:SI (match_operand:SI 1 "register_operand"  "%0, r,   l,   l,   l,   l,   0,   0,    r,   r,     r,    r,    r")
++		(match_operand:SI 2 "nds32_and_operand" " l, r,Izeb,Izeh,Ixls,Ix11,Ibms,Ifex, Izeb, Izeh, Iu15, Ii15, Ic15")))]
+   ""
+ {
+   HOST_WIDE_INT mask = INTVAL (operands[2]);
+-  int zero_position;
+ 
+   /* 16-bit andi instructions:
+      andi Rt3,Ra3,0xff   -> zeb33  Rt3,Ra3
+@@ -520,8 +729,7 @@
+     case 5:
+       return "x11b33\t%0, %1";
+     case 6:
+-      operands[2] = GEN_INT (floor_log2 (mask));
+-      return "bmski33\t%0, %2";
++      return "bmski33\t%0, %B2";
+     case 7:
+       operands[2] = GEN_INT (floor_log2 (mask + 1) - 1);
+       return "fexti33\t%0, %2";
+@@ -535,47 +743,35 @@
+       operands[2] = GEN_INT (~mask);
+       return "bitci\t%0, %1, %2";
+     case 12:
+-      /* If we reach this alternative,
+-         it must pass the nds32_can_use_bclr_p() test,
+-         so that we can guarantee there is only one 0-bit
+-         within the immediate value.  */
+-      for (zero_position = 31; zero_position >= 0; zero_position--)
+-	{
+-	  if ((INTVAL (operands[2]) & (1 << zero_position)) == 0)
+-	    {
+-	      /* Found the 0-bit position.  */
+-	      operands[2] = GEN_INT (zero_position);
+-	      break;
+-	    }
+-	}
+-      return "bclr\t%0, %1, %2";
++      return "bclr\t%0, %1, %b2";
+ 
+     default:
+       gcc_unreachable ();
+     }
+ }
+-  [(set_attr "type"   "alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu")
+-   (set_attr "length" "  2,  4,  2,  2,  2,  2,  2,  2,  4,  4,  4,  4,  4")])
++  [(set_attr "type"    "alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu")
++   (set_attr "length"  "  2,  4,  2,  2,  2,  2,  2,  2,  4,  4,  4,  4,  4")
++   (set_attr "feature" "v3m, v1, v1, v1, v1, v1,v3m,v3m, v1, v1, v1, v3,pe1")])
+ 
+ (define_insn "*and_slli"
+-  [(set (match_operand:SI 0 "register_operand"                      "=   r")
+-	(and:SI (ashift:SI (match_operand:SI 1 "register_operand"   "    r")
+-			    (match_operand:SI 2 "immediate_operand" " Iu05"))
+-		(match_operand:SI 3 "register_operand"              "    r")))]
+-  "TARGET_ISA_V3"
++  [(set (match_operand:SI 0 "register_operand"                        "=   r")
++	(and:SI (ashift:SI (match_operand:SI 1 "register_operand"     "    r")
++			    (match_operand:SI 2 "nds32_imm5u_operand" " Iu05"))
++		(match_operand:SI 3 "register_operand"                "    r")))]
++  "TARGET_ISA_V3 && (TARGET_PIPELINE_PANTHER || optimize_size)"
+   "and_slli\t%0, %3, %1, %2"
+-  [(set_attr "type" "alu")
+-   (set_attr "length" "4")])
++  [(set_attr "type" "alu_shift")
++   (set_attr "length"       "4")])
+ 
+ (define_insn "*and_srli"
+-  [(set (match_operand:SI 0 "register_operand"                       "=   r")
+-	(and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+-			     (match_operand:SI 2 "immediate_operand" " Iu05"))
+-		(match_operand:SI 3 "register_operand"               "    r")))]
+-  "TARGET_ISA_V3"
++  [(set (match_operand:SI 0 "register_operand"                         "=   r")
++	(and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"    "    r")
++			     (match_operand:SI 2 "nds32_imm5u_operand" " Iu05"))
++		(match_operand:SI 3 "register_operand"                 "    r")))]
++  "TARGET_ISA_V3 && (TARGET_PIPELINE_PANTHER || optimize_size)"
+   "and_srli\t%0, %3, %1, %2"
+-  [(set_attr "type" "alu")
+-   (set_attr "length" "4")])
++  [(set_attr "type" "alu_shift")
++   (set_attr "length"       "4")])
+ 
+ 
+ ;; ----------------------------------------------------------------------------
+@@ -584,58 +780,50 @@
+ 
+ ;; For V3/V3M ISA, we have 'or33' instruction.
+ ;; So we can identify 'or Rt3,Rt3,Ra3' case and set its length to be 2.
+-(define_insn "iorsi3"
+-  [(set (match_operand:SI 0 "register_operand"         "=w, r,    r,    r")
+-	(ior:SI (match_operand:SI 1 "register_operand" "%0, r,    r,    r")
+-		(match_operand:SI 2 "general_operand"  " w, r, Iu15, Ie15")))]
++
++(define_expand "iorsi3"
++  [(set (match_operand:SI 0 "register_operand"         "")
++	(ior:SI (match_operand:SI 1 "register_operand" "")
++		(match_operand:SI 2 "general_operand"  "")))]
+   ""
+ {
+-  int one_position;
+-
+-  switch (which_alternative)
+-    {
+-    case 0:
+-      return "or33\t%0, %2";
+-    case 1:
+-      return "or\t%0, %1, %2";
+-    case 2:
+-      return "ori\t%0, %1, %2";
+-    case 3:
+-      /* If we reach this alternative,
+-         it must pass the nds32_can_use_bset_p() test,
+-         so that we can guarantee there is only one 1-bit
+-         within the immediate value.  */
+-      /* Use exact_log2() to search the 1-bit position.  */
+-      one_position = exact_log2 (INTVAL (operands[2]));
+-      operands[2] = GEN_INT (one_position);
+-      return "bset\t%0, %1, %2";
++  if (!nds32_ior_operand (operands[2], SImode))
++    operands[2] = force_reg (SImode, operands[2]);
++})
+ 
+-    default:
+-      gcc_unreachable ();
+-    }
+-}
+-  [(set_attr "type"   "alu,alu,alu,alu")
+-   (set_attr "length" "  2,  4,  4,  4")])
++(define_insn "*iorsi3"
++  [(set (match_operand:SI 0 "register_operand"          "=l, r,    r,    r")
++	(ior:SI (match_operand:SI 1 "register_operand"  "%0, r,    r,    r")
++		(match_operand:SI 2 "nds32_ior_operand" " l, r, Iu15, Ie15")))]
++  ""
++  "@
++   or33\t%0, %2
++   or\t%0, %1, %2
++   ori\t%0, %1, %2
++   bset\t%0, %1, %B2"
++  [(set_attr "type"    "alu,alu,alu,alu")
++   (set_attr "length"  "  2,  4,  4,  4")
++   (set_attr "feature" "v3m, v1, v1,pe1")])
+ 
+ (define_insn "*or_slli"
+-  [(set (match_operand:SI 0 "register_operand"                     "=   r")
+-	(ior:SI (ashift:SI (match_operand:SI 1 "register_operand"  "    r")
+-			   (match_operand:SI 2 "immediate_operand" " Iu05"))
+-		(match_operand:SI 3 "register_operand"             "    r")))]
+-  "TARGET_ISA_V3"
++  [(set (match_operand:SI 0 "register_operand"                       "=   r")
++	(ior:SI (ashift:SI (match_operand:SI 1 "register_operand"    "    r")
++			   (match_operand:SI 2 "nds32_imm5u_operand" " Iu05"))
++		(match_operand:SI 3 "register_operand"               "    r")))]
++  "TARGET_ISA_V3 && (TARGET_PIPELINE_PANTHER || optimize_size)"
+   "or_slli\t%0, %3, %1, %2"
+-  [(set_attr "type" "alu")
+-   (set_attr "length" "4")])
++  [(set_attr "type" "alu_shift")
++   (set_attr "length"       "4")])
+ 
+ (define_insn "*or_srli"
+-  [(set (match_operand:SI 0 "register_operand"                       "=   r")
+-	(ior:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+-			     (match_operand:SI 2 "immediate_operand" " Iu05"))
+-		(match_operand:SI 3 "register_operand"               "    r")))]
+-  "TARGET_ISA_V3"
++  [(set (match_operand:SI 0 "register_operand"                         "=   r")
++	(ior:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"    "    r")
++			     (match_operand:SI 2 "nds32_imm5u_operand" " Iu05"))
++		(match_operand:SI 3 "register_operand"                 "    r")))]
++  "TARGET_ISA_V3 && (TARGET_PIPELINE_PANTHER || optimize_size)"
+   "or_srli\t%0, %3, %1, %2"
+-  [(set_attr "type" "alu")
+-   (set_attr "length" "4")])
++  [(set_attr "type" "alu_shift")
++   (set_attr "length"       "4")])
+ 
+ 
+ ;; ----------------------------------------------------------------------------
+@@ -644,71 +832,64 @@
+ 
+ ;; For V3/V3M ISA, we have 'xor33' instruction.
+ ;; So we can identify 'xor Rt3,Rt3,Ra3' case and set its length to be 2.
+-(define_insn "xorsi3"
+-  [(set (match_operand:SI 0 "register_operand"         "=w, r,    r,    r")
+-	(xor:SI (match_operand:SI 1 "register_operand" "%0, r,    r,    r")
+-		(match_operand:SI 2 "general_operand"  " w, r, Iu15, It15")))]
++
++(define_expand "xorsi3"
++  [(set (match_operand:SI 0 "register_operand"         "")
++	(xor:SI (match_operand:SI 1 "register_operand" "")
++		(match_operand:SI 2 "general_operand"  "")))]
+   ""
+ {
+-  int one_position;
+-
+-  switch (which_alternative)
+-    {
+-    case 0:
+-      return "xor33\t%0, %2";
+-    case 1:
+-      return "xor\t%0, %1, %2";
+-    case 2:
+-      return "xori\t%0, %1, %2";
+-    case 3:
+-      /* If we reach this alternative,
+-         it must pass the nds32_can_use_btgl_p() test,
+-         so that we can guarantee there is only one 1-bit
+-         within the immediate value.  */
+-      /* Use exact_log2() to search the 1-bit position.  */
+-      one_position = exact_log2 (INTVAL (operands[2]));
+-      operands[2] = GEN_INT (one_position);
+-      return "btgl\t%0, %1, %2";
++  if (!nds32_xor_operand (operands[2], SImode))
++    operands[2] = force_reg (SImode, operands[2]);
++})
+ 
+-    default:
+-      gcc_unreachable ();
+-    }
+-}
+-  [(set_attr "type"   "alu,alu,alu,alu")
+-   (set_attr "length" "  2,  4,  4,  4")])
++(define_insn "*xorsi3"
++  [(set (match_operand:SI 0 "register_operand"          "=l, r,    r,    r")
++	(xor:SI (match_operand:SI 1 "register_operand"  "%0, r,    r,    r")
++		(match_operand:SI 2 "nds32_xor_operand" " l, r, Iu15, It15")))]
++  ""
++  "@
++   xor33\t%0, %2
++   xor\t%0, %1, %2
++   xori\t%0, %1, %2
++   btgl\t%0, %1, %B2"
++  [(set_attr "type"    "alu,alu,alu,alu")
++   (set_attr "length"  "  2,  4,  4,  4")
++   (set_attr "feature" "v3m, v1, v1,pe1")])
+ 
+ (define_insn "*xor_slli"
+   [(set (match_operand:SI 0 "register_operand"                     "=   r")
+ 	(xor:SI (ashift:SI (match_operand:SI 1 "register_operand"  "    r")
+-			   (match_operand:SI 2 "immediate_operand" " Iu05"))
++			   (match_operand:SI 2 "nds32_imm5u_operand" " Iu05"))
+ 		(match_operand:SI 3 "register_operand"             "    r")))]
+-  "TARGET_ISA_V3"
++  "TARGET_ISA_V3 && (TARGET_PIPELINE_PANTHER || optimize_size)"
+   "xor_slli\t%0, %3, %1, %2"
+-  [(set_attr "type" "alu")
+-   (set_attr "length" "4")])
++  [(set_attr "type" "alu_shift")
++   (set_attr "length"       "4")])
+ 
+ (define_insn "*xor_srli"
+-  [(set (match_operand:SI 0 "register_operand"                       "=   r")
+-	(xor:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+-			     (match_operand:SI 2 "immediate_operand" " Iu05"))
+-		(match_operand:SI 3 "register_operand"               "    r")))]
+-  "TARGET_ISA_V3"
++  [(set (match_operand:SI 0 "register_operand"                         "=   r")
++	(xor:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"    "    r")
++			     (match_operand:SI 2 "nds32_imm5u_operand" " Iu05"))
++		(match_operand:SI 3 "register_operand"                 "    r")))]
++  "TARGET_ISA_V3 && (TARGET_PIPELINE_PANTHER || optimize_size)"
+   "xor_srli\t%0, %3, %1, %2"
+-  [(set_attr "type" "alu")
+-   (set_attr "length" "4")])
++  [(set_attr "type" "alu_shift")
++   (set_attr "length"       "4")])
+ 
+ ;; Rotate Right Instructions.
+ 
+-(define_insn "rotrsi3"
+-  [(set (match_operand:SI 0 "register_operand"                 "=   r, r")
+-	  (rotatert:SI (match_operand:SI 1 "register_operand"  "    r, r")
+-		       (match_operand:SI 2 "nonmemory_operand" " Iu05, r")))]
++(define_insn "*rotrsi3"
++  [(set (match_operand:SI 0 "register_operand"                    "=   r, r")
++	  (rotatert:SI (match_operand:SI 1 "register_operand"     "    r, r")
++		       (match_operand:SI 2 "nds32_rimm5u_operand" " Iu05, r")))]
+   ""
+   "@
+-  rotri\t%0, %1, %2
+-  rotr\t%0, %1, %2"
+-  [(set_attr "type"   "alu,alu")
+-   (set_attr "length" "  4,  4")])
++   rotri\t%0, %1, %2
++   rotr\t%0, %1, %2"
++  [(set_attr "type"    "  alu,  alu")
++   (set_attr "subtype" "shift,shift")
++   (set_attr "length"  "    4,    4")])
+ 
+ 
+ ;; ----------------------------------------------------------------------------
+@@ -720,14 +901,95 @@
+ ;; And for V2 ISA, there is NO 'neg33' instruction.
+ ;; The only option is to use 'subri A,B,0' (its semantic is 'A = 0 - B').
+ (define_insn "negsi2"
+-  [(set (match_operand:SI 0 "register_operand"         "=w, r")
+-	(neg:SI (match_operand:SI 1 "register_operand" " w, r")))]
++  [(set (match_operand:SI 0 "register_operand"         "=l, r")
++	(neg:SI (match_operand:SI 1 "register_operand" " l, r")))]
+   ""
+   "@
+    neg33\t%0, %1
+    subri\t%0, %1, 0"
+-  [(set_attr "type"   "alu,alu")
+-   (set_attr "length" "  2,  4")])
++  [(set_attr "type"    "alu,alu")
++   (set_attr "length"  "  2,  4")
++   (set_attr "feature" "v3m, v1")])
++
++(define_expand "negsf2"
++  [(set (match_operand:SF 0 "register_operand" "")
++	(neg:SF (match_operand:SF 1 "register_operand" "")))]
++  ""
++{
++  if (!TARGET_FPU_SINGLE && !TARGET_EXT_PERF)
++    {
++      rtx new_dst = simplify_gen_subreg (SImode, operands[0], SFmode, 0);
++      rtx new_src = simplify_gen_subreg (SImode, operands[1], SFmode, 0);
++
++      emit_insn (gen_xorsi3 (new_dst,
++			     new_src,
++			     gen_int_mode (0x80000000, SImode)));
++
++      DONE;
++    }
++})
++
++(define_expand "negdf2"
++  [(set (match_operand:DF 0 "register_operand" "")
++	(neg:DF (match_operand:DF 1 "register_operand" "")))]
++  ""
++{
++})
++
++(define_insn_and_split "soft_negdf2"
++  [(set (match_operand:DF 0 "register_operand" "")
++	(neg:DF (match_operand:DF 1 "register_operand" "")))]
++  "!TARGET_FPU_DOUBLE"
++  "#"
++  "!TARGET_FPU_DOUBLE"
++  [(const_int 1)]
++{
++    rtx src = operands[1];
++    rtx dst = operands[0];
++    rtx ori_dst = operands[0];
++
++    bool need_extra_move_for_dst_p;
++    /* FPU register can't change mode to SI directly, so we need create a
++       tmp register to handle it, and FPU register can't do `xor` or btgl.  */
++    if (HARD_REGISTER_P (src)
++	&& TEST_HARD_REG_BIT (reg_class_contents[FP_REGS], REGNO (src)))
++      {
++	rtx tmp = gen_reg_rtx (DFmode);
++	emit_move_insn (tmp, src);
++	src = tmp;
++      }
++
++    if (HARD_REGISTER_P (dst)
++	&& TEST_HARD_REG_BIT (reg_class_contents[FP_REGS], REGNO (dst)))
++      {
++	need_extra_move_for_dst_p = true;
++	rtx tmp = gen_reg_rtx (DFmode);
++	dst = tmp;
++      }
++
++    rtx dst_high_part = simplify_gen_subreg (
++			  SImode, dst,
++			  DFmode, subreg_highpart_offset (SImode, DFmode));
++    rtx dst_low_part = simplify_gen_subreg (
++			  SImode, dst,
++			  DFmode, subreg_lowpart_offset (SImode, DFmode));
++    rtx src_high_part = simplify_gen_subreg (
++			  SImode, src,
++			  DFmode, subreg_highpart_offset (SImode, DFmode));
++    rtx src_low_part = simplify_gen_subreg (
++			  SImode, src,
++			  DFmode, subreg_lowpart_offset (SImode, DFmode));
++
++    emit_insn (gen_xorsi3 (dst_high_part,
++			   src_high_part,
++			   gen_int_mode (0x80000000, SImode)));
++    emit_move_insn (dst_low_part, src_low_part);
++
++    if (need_extra_move_for_dst_p)
++      emit_move_insn (ori_dst, dst);
++
++    DONE;
++})
+ 
+ 
+ ;; ----------------------------------------------------------------------------
+@@ -737,55 +999,72 @@
+ ;; For V3/V3M ISA, we have 'not33' instruction.
+ ;; So we can identify 'not Rt3,Ra3' case and set its length to be 2.
+ (define_insn "one_cmplsi2"
+-  [(set (match_operand:SI 0 "register_operand"         "=w, r")
+-	(not:SI (match_operand:SI 1 "register_operand" " w, r")))]
++  [(set (match_operand:SI 0 "register_operand"         "=l, r")
++	(not:SI (match_operand:SI 1 "register_operand" " l, r")))]
+   ""
+   "@
+    not33\t%0, %1
+    nor\t%0, %1, %1"
+-  [(set_attr "type"   "alu,alu")
+-   (set_attr "length" "  2,  4")])
++  [(set_attr "type"    "alu,alu")
++   (set_attr "length"  "  2,  4")
++   (set_attr "feature" "v3m, v1")])
+ 
+ 
+ ;; ----------------------------------------------------------------------------
+ 
+ ;; Shift instructions.
+ 
+-(define_insn "ashlsi3"
+-  [(set (match_operand:SI 0 "register_operand"             "=   l,    r, r")
+-	(ashift:SI (match_operand:SI 1 "register_operand"  "    l,    r, r")
+-		   (match_operand:SI 2 "nonmemory_operand" " Iu03, Iu05, r")))]
++(define_expand "<shift>si3"
++  [(set (match_operand:SI 0 "register_operand"                      "")
++	(shift_rotate:SI (match_operand:SI 1 "register_operand"     "")
++			 (match_operand:SI 2 "nds32_rimm5u_operand" "")))]
+   ""
+-  "@
+-  slli333\t%0, %1, %2
+-  slli\t%0, %1, %2
+-  sll\t%0, %1, %2"
+-  [(set_attr "type"   "alu,alu,alu")
+-   (set_attr "length" "  2,  4,  4")])
++{
++  if (operands[2] == const0_rtx)
++    {
++      emit_move_insn (operands[0], operands[1]);
++      DONE;
++    }
++})
+ 
+-(define_insn "ashrsi3"
+-  [(set (match_operand:SI 0 "register_operand"               "=   d,    r, r")
+-	(ashiftrt:SI (match_operand:SI 1 "register_operand"  "    0,    r, r")
+-		     (match_operand:SI 2 "nonmemory_operand" " Iu05, Iu05, r")))]
++(define_insn "*ashlsi3"
++  [(set (match_operand:SI 0 "register_operand"                "=   l,    r, r")
++	(ashift:SI (match_operand:SI 1 "register_operand"     "    l,    r, r")
++		   (match_operand:SI 2 "nds32_rimm5u_operand" " Iu03, Iu05, r")))]
+   ""
+   "@
+-  srai45\t%0, %2
+-  srai\t%0, %1, %2
+-  sra\t%0, %1, %2"
+-  [(set_attr "type"   "alu,alu,alu")
+-   (set_attr "length" "  2,  4,  4")])
+-
+-(define_insn "lshrsi3"
+-  [(set (match_operand:SI 0 "register_operand"               "=   d,    r, r")
+-	(lshiftrt:SI (match_operand:SI 1 "register_operand"  "    0,    r, r")
+-		     (match_operand:SI 2 "nonmemory_operand" " Iu05, Iu05, r")))]
++   slli333\t%0, %1, %2
++   slli\t%0, %1, %2
++   sll\t%0, %1, %2"
++  [(set_attr "type"    "  alu,  alu,  alu")
++   (set_attr "subtype" "shift,shift,shift")
++   (set_attr "length"  "    2,    4,    4")])
++
++(define_insn "*ashrsi3"
++  [(set (match_operand:SI 0 "register_operand"                  "=   d,    r, r")
++	(ashiftrt:SI (match_operand:SI 1 "register_operand"     "    0,    r, r")
++		     (match_operand:SI 2 "nds32_rimm5u_operand" " Iu05, Iu05, r")))]
++  ""
++  "@
++   srai45\t%0, %2
++   srai\t%0, %1, %2
++   sra\t%0, %1, %2"
++  [(set_attr "type"    "  alu,  alu,  alu")
++   (set_attr "subtype" "shift,shift,shift")
++   (set_attr "length"  "    2,    4,    4")])
++
++(define_insn "*lshrsi3"
++  [(set (match_operand:SI 0 "register_operand"                  "=   d,    r, r")
++	(lshiftrt:SI (match_operand:SI 1 "register_operand"     "    0,    r, r")
++		     (match_operand:SI 2 "nds32_rimm5u_operand" " Iu05, Iu05, r")))]
+   ""
+   "@
+-  srli45\t%0, %2
+-  srli\t%0, %1, %2
+-  srl\t%0, %1, %2"
+-  [(set_attr "type"   "alu,alu,alu")
+-   (set_attr "length" "  2,  4,  4")])
++   srli45\t%0, %2
++   srli\t%0, %1, %2
++   srl\t%0, %1, %2"
++  [(set_attr "type"    "  alu,  alu,  alu")
++   (set_attr "subtype" "shift,shift,shift")
++   (set_attr "length"  "    2,    4,    4")])
+ 
+ 
+ ;; ----------------------------------------------------------------------------
+@@ -794,148 +1073,65 @@
+ ;; Conditional Move patterns
+ ;; ----------------------------------------------------------------------------
+ 
+-(define_expand "movsicc"
+-  [(set (match_operand:SI 0 "register_operand" "")
+-	(if_then_else:SI (match_operand 1 "comparison_operator" "")
+-			 (match_operand:SI 2 "register_operand" "")
+-			 (match_operand:SI 3 "register_operand" "")))]
+-  "TARGET_CMOV"
++(define_expand "mov<mode>cc"
++  [(set (match_operand:QIHISI 0 "register_operand" "")
++	(if_then_else:QIHISI (match_operand 1 "nds32_movecc_comparison_operator" "")
++			 (match_operand:QIHISI 2 "register_operand" "")
++			 (match_operand:QIHISI 3 "register_operand" "")))]
++  "TARGET_CMOV && !optimize_size"
+ {
+-  if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE)
+-      && GET_MODE (XEXP (operands[1], 0)) == SImode
+-      && XEXP (operands[1], 1) == const0_rtx)
+-    {
+-      /* If the operands[1] rtx is already (eq X 0) or (ne X 0),
+-         we have gcc generate original template rtx.  */
+-      goto create_template;
+-    }
+-  else
++  enum nds32_expand_result_type result = nds32_expand_movcc (operands);
++  switch (result)
+     {
+-      /* Since there is only 'slt'(Set when Less Than) instruction for
+-         comparison in Andes ISA, the major strategy we use here is to
+-         convert conditional move into 'LT + EQ' or 'LT + NE' rtx combination.
+-         We design constraints properly so that the reload phase will assist
+-         to make one source operand to use same register as result operand.
+-         Then we can use cmovz/cmovn to catch the other source operand
+-         which has different register.  */
+-      enum rtx_code code = GET_CODE (operands[1]);
+-      enum rtx_code new_code = code;
+-      rtx cmp_op0 = XEXP (operands[1], 0);
+-      rtx cmp_op1 = XEXP (operands[1], 1);
+-      rtx tmp;
+-      int reverse = 0;
+-
+-      /* Main Goal: Use 'LT + EQ' or 'LT + NE' to target "then" part
+-         Strategy : Reverse condition and swap comparison operands
+-
+-         For example:
+-
+-             a <= b ? P : Q   (LE or LEU)
+-         --> a >  b ? Q : P   (reverse condition)
+-         --> b <  a ? Q : P   (swap comparison operands to achieve 'LT/LTU')
+-
+-             a >= b ? P : Q   (GE or GEU)
+-         --> a <  b ? Q : P   (reverse condition to achieve 'LT/LTU')
+-
+-             a <  b ? P : Q   (LT or LTU)
+-         --> (NO NEED TO CHANGE, it is already 'LT/LTU')
+-
+-             a >  b ? P : Q   (GT or GTU)
+-         --> b <  a ? P : Q   (swap comparison operands to achieve 'LT/LTU') */
+-      switch (code)
+-	{
+-	case NE:
+-	  /*   (a != b ? P : Q)
+-	     can be expressed as
+-	       (a == b ? Q : P)
+-	     so, fall through to reverse condition */
+-	case GE: case GEU: case LE: case LEU:
+-	  new_code = reverse_condition (code);
+-	  reverse = 1;
+-	  break;
+-	case EQ: case GT: case GTU: case LT: case LTU:
+-	  /* no need to reverse condition */
+-	  break;
+-	default:
+-	  FAIL;
+-	}
+-
+-      /* For '>' comparison operator, we swap operands
+-         so that we can have 'LT/LTU' operator.  */
+-      if (new_code == GT || new_code == GTU)
+-	{
+-	  tmp     = cmp_op0;
+-	  cmp_op0 = cmp_op1;
+-	  cmp_op1 = tmp;
+-
+-	  new_code = swap_condition (new_code);
+-	}
+-
+-      /* Use a temporary register to store slt/slts result.  */
+-      tmp = gen_reg_rtx (SImode);
+-
+-      /* Split EQ and NE because we don't have direct comparison of EQ and NE.
+-         If we don't split it, the conditional move transformation will fail
+-         when producing (SET A (EQ B C)) or (SET A (NE B C)).  */
+-      if (new_code == EQ)
+-	{
+-	  emit_insn (gen_xorsi3 (tmp, cmp_op0, cmp_op1));
+-	  emit_insn (gen_slt_compare (tmp, tmp, GEN_INT (1)));
+-	}
+-      else if (new_code == NE)
+-	{
+-	  emit_insn (gen_xorsi3 (tmp, cmp_op0, cmp_op1));
+-	  emit_insn (gen_slt_compare (tmp, GEN_INT (0), tmp));
+-        }
+-      else
+-	/* This emit_insn will create corresponding 'slt/slts' insturction.  */
+-	emit_insn (gen_rtx_SET (tmp, gen_rtx_fmt_ee (new_code, SImode,
+-						     cmp_op0, cmp_op1)));
+-
+-      /* Change comparison semantic into (eq X 0) or (ne X 0) behavior
+-         so that cmovz or cmovn will be matched later.
+-
+-         For reverse condition cases, we want to create a semantic that:
+-           (eq X 0) --> pick up "else" part
+-         For normal cases, we want to create a semantic that:
+-           (ne X 0) --> pick up "then" part
+-
+-         Later we will have cmovz/cmovn instruction pattern to
+-         match corresponding behavior and output instruction.  */
+-      operands[1] = gen_rtx_fmt_ee (reverse ? EQ : NE,
+-				    VOIDmode, tmp, const0_rtx);
++    case EXPAND_DONE:
++      DONE;
++      break;
++    case EXPAND_FAIL:
++      FAIL;
++      break;
++    case EXPAND_CREATE_TEMPLATE:
++      break;
++    default:
++      gcc_unreachable ();
+     }
+-
+-create_template:
+-  do {} while(0); /* dummy line */
+ })
+ 
+-(define_insn "cmovz"
+-  [(set (match_operand:SI 0 "register_operand"                      "=r, r")
+-        (if_then_else:SI (eq (match_operand:SI 1 "register_operand" " r, r")
++(define_insn "cmovz<mode>"
++  [(set (match_operand:QIHISI 0 "register_operand"                      "=r, r")
++	(if_then_else:QIHISI (eq (match_operand:SI 1 "register_operand" " r, r")
+ 			     (const_int 0))
+-			 (match_operand:SI 2 "register_operand"     " r, 0")
+-			 (match_operand:SI 3 "register_operand"     " 0, r")))]
++			 (match_operand:QIHISI 2 "register_operand"     " r, 0")
++			 (match_operand:QIHISI 3 "register_operand"     " 0, r")))]
+   "TARGET_CMOV"
+   "@
+    cmovz\t%0, %2, %1
+    cmovn\t%0, %3, %1"
+-  [(set_attr "type" "move")
++  [(set_attr "type"  "alu")
+    (set_attr "length"  "4")])
+ 
+-(define_insn "cmovn"
+-  [(set (match_operand:SI 0 "register_operand"                      "=r, r")
+-	(if_then_else:SI (ne (match_operand:SI 1 "register_operand" " r, r")
++(define_insn "cmovn<mode>"
++  [(set (match_operand:QIHISI 0 "register_operand"                      "=r, r")
++	(if_then_else:QIHISI (ne (match_operand:SI 1 "register_operand" " r, r")
+ 			     (const_int 0))
+-			 (match_operand:SI 2 "register_operand"     " r, 0")
+-			 (match_operand:SI 3 "register_operand"     " 0, r")))]
++			 (match_operand:QIHISI 2 "register_operand"     " r, 0")
++			 (match_operand:QIHISI 3 "register_operand"     " 0, r")))]
+   "TARGET_CMOV"
+   "@
+    cmovn\t%0, %2, %1
+    cmovz\t%0, %3, %1"
+-  [(set_attr "type" "move")
++  [(set_attr "type"  "alu")
+    (set_attr "length"  "4")])
+ 
++;; A hotfix to help RTL combiner to merge a cmovn insn and a zero_extend insn.
++;; It should be removed once after we change the expansion form of the cmovn.
++(define_insn "*cmovn_simplified_<mode>"
++  [(set (match_operand:QIHISI 0 "register_operand" "=r")
++	(if_then_else:QIHISI (match_operand:SI 1 "register_operand" "r")
++			 (match_operand:QIHISI 2 "register_operand" "r")
++			 (match_operand:QIHISI 3 "register_operand" "0")))]
++  ""
++  "cmovn\t%0, %2, %1"
++  [(set_attr "type" "alu")])
+ 
+ ;; ----------------------------------------------------------------------------
+ ;; Conditional Branch patterns
+@@ -950,573 +1146,188 @@ create_template:
+ 		      (pc)))]
+   ""
+ {
+-  rtx tmp_reg;
+-  enum rtx_code code;
+-
+-  code = GET_CODE (operands[0]);
+-
+-  /* If operands[2] is (const_int 0),
+-     we can use beqz,bnez,bgtz,bgez,bltz,or blez instructions.
+-     So we have gcc generate original template rtx.  */
+-  if (GET_CODE (operands[2]) == CONST_INT)
+-    if (INTVAL (operands[2]) == 0)
+-      if ((code != GTU)
+-	  && (code != GEU)
+-	  && (code != LTU)
+-	  && (code != LEU))
+-	goto create_template;
+-
+-  /* For other comparison, NDS32 ISA only has slt (Set-on-Less-Than)
+-     behavior for the comparison, we might need to generate other
+-     rtx patterns to achieve same semantic.  */
+-  switch (code)
++  enum nds32_expand_result_type result = nds32_expand_cbranch (operands);
++  switch (result)
+     {
+-    case GT:
+-    case GTU:
+-      if (GET_CODE (operands[2]) == CONST_INT)
+-	{
+-	  /* GT  reg_A, const_int  =>  !(LT  reg_A, const_int + 1) */
+-	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+-
+-	  /* We want to plus 1 into the integer value
+-	     of operands[2] to create 'slt' instruction.
+-	     This caculation is performed on the host machine,
+-	     which may be 64-bit integer.
+-	     So the meaning of caculation result may be
+-	     different from the 32-bit nds32 target.
+-
+-	     For example:
+-	       0x7fffffff + 0x1 -> 0x80000000,
+-	       this value is POSITIVE on 64-bit machine,
+-	       but the expected value on 32-bit nds32 target
+-	       should be NEGATIVE value.
+-
+-	     Hence, instead of using GEN_INT(), we use gen_int_mode() to
+-	     explicitly create SImode constant rtx.  */
+-	  operands[2] = gen_int_mode (INTVAL (operands[2]) + 1, SImode);
+-
+-	  if (code == GT)
+-	    {
+-	      /* GT, use slts instruction */
+-	      emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+-	    }
+-	  else
+-	    {
+-	      /* GTU, use slt instruction */
+-	      emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+-	    }
+-
+-	  PUT_CODE (operands[0], EQ);
+-	  operands[1] = tmp_reg;
+-	  operands[2] = const0_rtx;
+-	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+-				     operands[2], operands[3]));
+-
+-	  DONE;
+-	}
+-      else
+-	{
+-	  /* GT  reg_A, reg_B  =>  LT  reg_B, reg_A */
+-	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+-
+-	  if (code == GT)
+-	    {
+-	      /* GT, use slts instruction */
+-	      emit_insn (gen_slts_compare (tmp_reg, operands[2], operands[1]));
+-	    }
+-	  else
+-	    {
+-	      /* GTU, use slt instruction */
+-	      emit_insn (gen_slt_compare  (tmp_reg, operands[2], operands[1]));
+-	    }
+-
+-	  PUT_CODE (operands[0], NE);
+-	  operands[1] = tmp_reg;
+-	  operands[2] = const0_rtx;
+-	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+-				     operands[2], operands[3]));
+-
+-	  DONE;
+-	}
+-
+-    case GE:
+-    case GEU:
+-      /* GE  reg_A, reg_B      =>  !(LT  reg_A, reg_B) */
+-      /* GE  reg_A, const_int  =>  !(LT  reg_A, const_int) */
+-      tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+-
+-      if (code == GE)
+-	{
+-	  /* GE, use slts instruction */
+-	  emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+-	}
+-      else
+-	{
+-	  /* GEU, use slt instruction */
+-	  emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+-	}
+-
+-      PUT_CODE (operands[0], EQ);
+-      operands[1] = tmp_reg;
+-      operands[2] = const0_rtx;
+-      emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+-				 operands[2], operands[3]));
+-
++    case EXPAND_DONE:
+       DONE;
+-
+-    case LT:
+-    case LTU:
+-      /* LT  reg_A, reg_B      =>  LT  reg_A, reg_B */
+-      /* LT  reg_A, const_int  =>  LT  reg_A, const_int */
+-      tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+-
+-      if (code == LT)
+-	{
+-	  /* LT, use slts instruction */
+-	  emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+-	}
+-      else
+-	{
+-	  /* LTU, use slt instruction */
+-	  emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+-	}
+-
+-      PUT_CODE (operands[0], NE);
+-      operands[1] = tmp_reg;
+-      operands[2] = const0_rtx;
+-      emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+-				 operands[2], operands[3]));
+-
+-      DONE;
+-
+-    case LE:
+-    case LEU:
+-      if (GET_CODE (operands[2]) == CONST_INT)
+-	{
+-	  /* LE  reg_A, const_int  =>  LT  reg_A, const_int + 1 */
+-	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+-
+-	  /* Note that (le:SI X INT_MAX) is not the same as (lt:SI X INT_MIN).
+-	     We better have an assert here in case GCC does not properly
+-	     optimize it away.  The INT_MAX here is 0x7fffffff for target.  */
+-	  gcc_assert (code != LE || INTVAL (operands[2]) != 0x7fffffff);
+-	  operands[2] = gen_int_mode (INTVAL (operands[2]) + 1, SImode);
+-
+-	  if (code == LE)
+-	    {
+-	      /* LE, use slts instruction */
+-	      emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+-	    }
+-	  else
+-	    {
+-	      /* LEU, use slt instruction */
+-	      emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+-	    }
+-
+-	  PUT_CODE (operands[0], NE);
+-	  operands[1] = tmp_reg;
+-	  operands[2] = const0_rtx;
+-	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+-				     operands[2], operands[3]));
+-
+-	  DONE;
+-	}
+-      else
+-	{
+-	  /* LE  reg_A, reg_B  =>  !(LT  reg_B, reg_A) */
+-	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+-
+-	  if (code == LE)
+-	    {
+-	      /* LE, use slts instruction */
+-	      emit_insn (gen_slts_compare (tmp_reg, operands[2], operands[1]));
+-	    }
+-	  else
+-	    {
+-	      /* LEU, use slt instruction */
+-	      emit_insn (gen_slt_compare  (tmp_reg, operands[2], operands[1]));
+-	    }
+-
+-	  PUT_CODE (operands[0], EQ);
+-	  operands[1] = tmp_reg;
+-	  operands[2] = const0_rtx;
+-	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+-				     operands[2], operands[3]));
+-
+-	  DONE;
+-	}
+-
+-    case EQ:
+-    case NE:
+-      /* NDS32 ISA has various form for eq/ne behavior no matter
+-         what kind of the operand is.
+-         So just generate original template rtx.  */
+-      goto create_template;
+-
+-    default:
++      break;
++    case EXPAND_FAIL:
+       FAIL;
++      break;
++    case EXPAND_CREATE_TEMPLATE:
++      break;
++    default:
++      gcc_unreachable ();
+     }
+-
+-create_template:
+-  do {} while(0); /* dummy line */
+ })
+ 
+ 
+-(define_insn "*cbranchsi4_equality_zero"
++(define_insn "cbranchsi4_equality_zero"
+   [(set (pc)
+ 	(if_then_else (match_operator 0 "nds32_equality_comparison_operator"
+-			[(match_operand:SI 1 "register_operand"  "t, l, r")
++			[(match_operand:SI 1 "register_operand"  "t,l, r")
+ 			 (const_int 0)])
+ 		      (label_ref (match_operand 2 "" ""))
+ 		      (pc)))]
+   ""
+ {
+-  enum rtx_code code;
+-
+-  code = GET_CODE (operands[0]);
+-
+-  /* This zero-comparison conditional branch has two forms:
+-       32-bit instruction =>          beqz/bnez           imm16s << 1
+-       16-bit instruction => beqzs8/bnezs8/beqz38/bnez38  imm8s << 1
+-
+-     For 32-bit case,
+-     we assume it is always reachable. (but check range -65500 ~ 65500)
+-
+-     For 16-bit case,
+-     it must satisfy { 255 >= (label - pc) >= -256 } condition.
+-     However, since the $pc for nds32 is at the beginning of the instruction,
+-     we should leave some length space for current insn.
+-     So we use range -250 ~ 250.  */
+-
+-  switch (get_attr_length (insn))
+-    {
+-    case 2:
+-      if (which_alternative == 0)
+-	{
+-	  /* constraint: t */
+-	  return (code == EQ) ? "beqzs8\t%2" : "bnezs8\t%2";
+-	}
+-      else if (which_alternative == 1)
+-	{
+-	  /* constraint: l */
+-	  return (code == EQ) ? "beqz38\t%1, %2" : "bnez38\t%1, %2";
+-	}
+-      else
+-	{
+-	  /* constraint: r */
+-	  /* For which_alternative==2, it should not be here.  */
+-	  gcc_unreachable ();
+-	}
+-    case 4:
+-      /* including constraints: t, l, and r */
+-      return (code == EQ) ? "beqz\t%1, %2" : "bnez\t%1, %2";
+-    case 6:
+-      if (which_alternative == 0)
+-	{
+-	  /* constraint: t */
+-	  if (code == EQ)
+-	    {
+-	      /*    beqzs8  .L0
+-	          =>
+-	            bnezs8  .LCB0
+-	            j  .L0
+-	          .LCB0:
+-	       */
+-	      return "bnezs8\t.LCB%=\;j\t%2\n.LCB%=:";
+-	    }
+-	  else
+-	    {
+-	      /*    bnezs8  .L0
+-	          =>
+-	            beqzs8  .LCB0
+-	            j  .L0
+-	          .LCB0:
+-	       */
+-	      return "beqzs8\t.LCB%=\;j\t%2\n.LCB%=:";
+-	    }
+-	}
+-      else if (which_alternative == 1)
+-	{
+-	  /* constraint: l */
+-	  if (code == EQ)
+-	    {
+-	      /*    beqz38  $r0, .L0
+-	          =>
+-	            bnez38  $r0, .LCB0
+-	            j  .L0
+-	          .LCB0:
+-	       */
+-	      return "bnez38\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+-	    }
+-	  else
+-	    {
+-	      /*    bnez38  $r0, .L0
+-	          =>
+-	            beqz38  $r0, .LCB0
+-	            j  .L0
+-	          .LCB0:
+-	       */
+-	      return "beqz38\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+-	    }
+-	}
+-      else
+-	{
+-	  /* constraint: r */
+-	  /* For which_alternative==2, it should not be here.  */
+-	  gcc_unreachable ();
+-	}
+-    case 8:
+-      /* constraint: t, l, r.  */
+-      if (code == EQ)
+-	{
+-	  /*    beqz  $r8, .L0
+-	      =>
+-	        bnez  $r8, .LCB0
+-	        j  .L0
+-	      .LCB0:
+-	   */
+-	  return "bnez\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+-	}
+-      else
+-	{
+-	  /*    bnez  $r8, .L0
+-	      =>
+-	        beqz  $r8, .LCB0
+-	        j  .L0
+-	      .LCB0:
+-	   */
+-	  return "beqz\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+-	}
+-    default:
+-      gcc_unreachable ();
+-    }
++  return nds32_output_cbranchsi4_equality_zero (insn, operands);
+ }
+   [(set_attr "type" "branch")
+-   (set_attr "enabled" "1")
++   (set_attr_alternative "enabled"
++     [
++       ;; Alternative 0
++       (if_then_else (match_test "TARGET_16_BIT")
++		     (const_string "yes")
++		     (const_string "no"))
++       ;; Alternative 1
++       (if_then_else (match_test "TARGET_16_BIT")
++		     (const_string "yes")
++		     (const_string "no"))
++       ;; Alternative 2
++       (const_string "yes")
++     ])
+    (set_attr_alternative "length"
+      [
+        ;; Alternative 0
+-       (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250))
+-			  (le (minus (match_dup 2) (pc)) (const_int  250)))
+-		     (if_then_else (match_test "TARGET_16_BIT")
+-				   (const_int 2)
+-				   (const_int 4))
+-		     (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+-					(le (minus (match_dup 2) (pc)) (const_int  65500)))
+-				   (const_int 4)
++       (if_then_else (match_test "!CROSSING_JUMP_P (insn)")
++		     (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250))
++					(le (minus (match_dup 2) (pc)) (const_int  250)))
+ 				   (if_then_else (match_test "TARGET_16_BIT")
+-						 (const_int 6)
+-						 (const_int 8))))
++						 (const_int 2)
++						 (const_int 4))
++				   (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
++						      (le (minus (match_dup 2) (pc)) (const_int  65500)))
++						 (const_int 4)
++						 (if_then_else (match_test "TARGET_16_BIT")
++							       (const_int 8)
++							       (const_int 10))))
++		     (const_int 10))
+        ;; Alternative 1
+-       (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250))
+-			  (le (minus (match_dup 2) (pc)) (const_int  250)))
+-		     (if_then_else (match_test "TARGET_16_BIT")
+-				   (const_int 2)
+-				   (const_int 4))
++       (if_then_else (match_test "!CROSSING_JUMP_P (insn)")
++		     (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250))
++					(le (minus (match_dup 2) (pc)) (const_int  250)))
++				   (if_then_else (match_test "TARGET_16_BIT")
++						 (const_int 2)
++						 (const_int 4))
++				   (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
++						      (le (minus (match_dup 2) (pc)) (const_int  65500)))
++						 (const_int 4)
++						 (if_then_else (match_test "TARGET_16_BIT")
++							       (const_int 8)
++							       (const_int 10))))
++		     (const_int 10))
++       ;; Alternative 2
++       (if_then_else (match_test "!CROSSING_JUMP_P (insn)")
+ 		     (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+ 					(le (minus (match_dup 2) (pc)) (const_int  65500)))
+ 				   (const_int 4)
+-				   (if_then_else (match_test "TARGET_16_BIT")
+-						 (const_int 6)
+-						 (const_int 8))))
+-       ;; Alternative 2
+-       (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+-			  (le (minus (match_dup 2) (pc)) (const_int  65500)))
+-		     (const_int 4)
+-		     (const_int 8))
++				   (const_int 10))
++		     (const_int 10))
+      ])])
+ 
+ 
+ ;; This pattern is dedicated to V2 ISA,
+ ;; because V2 DOES NOT HAVE beqc/bnec instruction.
+-(define_insn "*cbranchsi4_equality_reg"
++(define_insn "cbranchsi4_equality_reg"
+   [(set (pc)
+ 	(if_then_else (match_operator 0 "nds32_equality_comparison_operator"
+-			[(match_operand:SI 1 "register_operand"           "r")
+-			 (match_operand:SI 2 "nds32_reg_constant_operand" "r")])
++			[(match_operand:SI 1 "register_operand" "v, r")
++			 (match_operand:SI 2 "register_operand" "l, r")])
+ 		      (label_ref (match_operand 3 "" ""))
+ 		      (pc)))]
+   "TARGET_ISA_V2"
+ {
+-  enum rtx_code code;
+-
+-  code = GET_CODE (operands[0]);
+-
+-  /* This register-comparison conditional branch has one form:
+-       32-bit instruction =>          beq/bne           imm14s << 1
+-
+-     For 32-bit case,
+-     we assume it is always reachable. (but check range -16350 ~ 16350).  */
+-
+-  switch (code)
+-    {
+-    case EQ:
+-      /* r, r */
+-      switch (get_attr_length (insn))
+-	{
+-	case 4:
+-	  return "beq\t%1, %2, %3";
+-	case 8:
+-	  /*    beq  $r0, $r1, .L0
+-	      =>
+-	        bne  $r0, $r1, .LCB0
+-	        j  .L0
+-	      .LCB0:
+-	   */
+-	  return "bne\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+-	default:
+-	  gcc_unreachable ();
+-	}
+-
+-    case NE:
+-      /* r, r */
+-      switch (get_attr_length (insn))
+-	{
+-	case 4:
+-	  return "bne\t%1, %2, %3";
+-	case 8:
+-	  /*    bne  $r0, $r1, .L0
+-	      =>
+-	        beq  $r0, $r1, .LCB0
+-	        j  .L0
+-	      .LCB0:
+-	   */
+-	  return "beq\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+-	default:
+-	  gcc_unreachable ();
+-	}
+-
+-    default:
+-      gcc_unreachable ();
+-    }
++  return nds32_output_cbranchsi4_equality_reg (insn, operands);
+ }
+   [(set_attr "type"   "branch")
+-   (set (attr "length")
+-	(if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350))
+-			   (le (minus (match_dup 3) (pc)) (const_int  16350)))
+-		      (const_int 4)
+-		      (const_int 8)))])
++   (set_attr_alternative "enabled"
++     [
++       ;; Alternative 0
++       (if_then_else (match_test "TARGET_16_BIT")
++		     (const_string "yes")
++		     (const_string "no"))
++       ;; Alternative 1
++       (const_string "yes")
++     ])
++   (set_attr_alternative "length"
++     [
++       ;; Alternative 0
++       (if_then_else (match_test "!CROSSING_JUMP_P (insn)")
++		     (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -250))
++					(le (minus (match_dup 3) (pc)) (const_int  250)))
++				   (const_int 2)
++				   (if_then_else (and (ge (minus (match_dup 3) (pc))
++							  (const_int -16350))
++						      (le (minus (match_dup 3) (pc))
++							  (const_int  16350)))
++						 (const_int 4)
++						 (const_int 8)))
++		     (const_int 8))
++       ;; Alternative 1
++       (if_then_else (match_test "!CROSSING_JUMP_P (insn)")
++		     (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350))
++					(le (minus (match_dup 3) (pc)) (const_int  16350)))
++				   (const_int 4)
++				   (const_int 10))
++		     (const_int 10))
++     ])])
+ 
+ 
+ ;; This pattern is dedicated to V3/V3M,
+ ;; because V3/V3M DO HAVE beqc/bnec instruction.
+-(define_insn "*cbranchsi4_equality_reg_or_const_int"
++(define_insn "cbranchsi4_equality_reg_or_const_int"
+   [(set (pc)
+ 	(if_then_else (match_operator 0 "nds32_equality_comparison_operator"
+-			[(match_operand:SI 1 "register_operand"           "r,    r")
+-			 (match_operand:SI 2 "nds32_reg_constant_operand" "r, Is11")])
++			[(match_operand:SI 1 "register_operand"      "v, r,    r")
++			 (match_operand:SI 2 "nds32_rimm11s_operand" "l, r, Is11")])
+ 		      (label_ref (match_operand 3 "" ""))
+ 		      (pc)))]
+   "TARGET_ISA_V3 || TARGET_ISA_V3M"
+ {
+-  enum rtx_code code;
+-
+-  code = GET_CODE (operands[0]);
+-
+-  /* This register-comparison conditional branch has one form:
+-       32-bit instruction =>          beq/bne           imm14s << 1
+-       32-bit instruction =>         beqc/bnec          imm8s << 1
+-
+-     For 32-bit case, we assume it is always reachable.
+-     (but check range -16350 ~ 16350 and -250 ~ 250).  */
+-
+-  switch (code)
+-    {
+-    case EQ:
+-      if (which_alternative == 0)
+-	{
+-	  /* r, r */
+-	  switch (get_attr_length (insn))
+-	    {
+-	    case 4:
+-	      return "beq\t%1, %2, %3";
+-	    case 8:
+-	      /*    beq  $r0, $r1, .L0
+-	          =>
+-	            bne  $r0, $r1, .LCB0
+-	            j  .L0
+-	          .LCB0:
+-	       */
+-	      return "bne\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+-	    default:
+-	      gcc_unreachable ();
+-	    }
+-	}
+-      else
+-	{
+-	  /* r, Is11 */
+-	  switch (get_attr_length (insn))
+-	    {
+-	    case 4:
+-	      return "beqc\t%1, %2, %3";
+-	    case 8:
+-	      /*    beqc  $r0, constant, .L0
+-	          =>
+-	            bnec  $r0, constant, .LCB0
+-	            j  .L0
+-	          .LCB0:
+-	       */
+-	      return "bnec\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+-	    default:
+-	      gcc_unreachable ();
+-	    }
+-	}
+-    case NE:
+-      if (which_alternative == 0)
+-	{
+-	  /* r, r */
+-	  switch (get_attr_length (insn))
+-	    {
+-	    case 4:
+-	      return "bne\t%1, %2, %3";
+-	    case 8:
+-	      /*    bne  $r0, $r1, .L0
+-	          =>
+-	            beq  $r0, $r1, .LCB0
+-	            j  .L0
+-	          .LCB0:
+-	       */
+-	      return "beq\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+-	    default:
+-	      gcc_unreachable ();
+-	    }
+-	}
+-      else
+-	{
+-	  /* r, Is11 */
+-	  switch (get_attr_length (insn))
+-	    {
+-	    case 4:
+-	      return "bnec\t%1, %2, %3";
+-	    case 8:
+-	      /*    bnec  $r0, constant, .L0
+-	          =>
+-	            beqc  $r0, constant, .LCB0
+-	            j  .L0
+-	          .LCB0:
+-	       */
+-	      return "beqc\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+-	    default:
+-	      gcc_unreachable ();
+-	    }
+-	}
+-    default:
+-      gcc_unreachable ();
+-    }
++  return nds32_output_cbranchsi4_equality_reg_or_const_int (insn, operands);
+ }
+   [(set_attr "type"   "branch")
++   (set_attr_alternative "enabled"
++     [
++       ;; Alternative 0
++       (if_then_else (match_test "TARGET_16_BIT")
++		     (const_string "yes")
++		     (const_string "no"))
++       ;; Alternative 1
++       (const_string "yes")
++       ;; Alternative 2
++       (const_string "yes")
++     ])
+    (set_attr_alternative "length"
+      [
+        ;; Alternative 0
+-       (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350))
+-			  (le (minus (match_dup 3) (pc)) (const_int  16350)))
+-		     (const_int 4)
+-		     (const_int 8))
++       (if_then_else (match_test "!CROSSING_JUMP_P (insn)")
++		     (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -250))
++					(le (minus (match_dup 3) (pc)) (const_int  250)))
++				   (const_int 2)
++				   (if_then_else (and (ge (minus (match_dup 3) (pc))
++							  (const_int -16350))
++						      (le (minus (match_dup 3) (pc))
++							  (const_int  16350)))
++						 (const_int 4)
++						 (const_int 8)))
++		    (const_int 8))
+        ;; Alternative 1
+-       (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -250))
+-			  (le (minus (match_dup 3) (pc)) (const_int  250)))
+-		     (const_int 4)
+-		     (const_int 8))
++       (if_then_else (match_test "!CROSSING_JUMP_P (insn)")
++		     (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350))
++					(le (minus (match_dup 3) (pc)) (const_int  16350)))
++				   (const_int 4)
++				   (const_int 10))
++		    (const_int 10))
++       ;; Alternative 2
++       (if_then_else (match_test "!CROSSING_JUMP_P (insn)")
++		     (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -250))
++					(le (minus (match_dup 3) (pc)) (const_int  250)))
++				   (const_int 4)
++				   (const_int 10))
++		    (const_int 10))
+      ])])
+ 
+ 
+@@ -1529,80 +1340,16 @@ create_template:
+ 		      (pc)))]
+   ""
+ {
+-  enum rtx_code code;
+-
+-  code = GET_CODE (operands[0]);
+-
+-  /* This zero-greater-less-comparison conditional branch has one form:
+-       32-bit instruction =>      bgtz/bgez/bltz/blez     imm16s << 1
+-
+-     For 32-bit case, we assume it is always reachable.
+-     (but check range -65500 ~ 65500).  */
+-
+-  if (get_attr_length (insn) == 8)
+-    {
+-      /* The branch target is too far to simply use one
+-         bgtz/bgez/bltz/blez instruction.
+-         We need to reverse condition and use 'j' to jump to the target.  */
+-      switch (code)
+-	{
+-	case GT:
+-	  /*   bgtz  $r8, .L0
+-	     =>
+-	       blez  $r8, .LCB0
+-	       j  .L0
+-	     .LCB0:
+-	   */
+-	  return "blez\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+-	case GE:
+-	  /*   bgez  $r8, .L0
+-	     =>
+-	       bltz  $r8, .LCB0
+-	       j  .L0
+-	     .LCB0:
+-	   */
+-	  return "bltz\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+-	case LT:
+-	  /*   bltz  $r8, .L0
+-	     =>
+-	       bgez  $r8, .LCB0
+-	       j  .L0
+-	     .LCB0:
+-	   */
+-	  return "bgez\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+-	case LE:
+-	  /*   blez  $r8, .L0
+-	     =>
+-	       bgtz  $r8, .LCB0
+-	       j  .L0
+-	     .LCB0:
+-	   */
+-	  return "bgtz\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+-	default:
+-	  gcc_unreachable ();
+-	}
+-    }
+-
+-  switch (code)
+-    {
+-    case GT:
+-      return "bgtz\t%1, %2";
+-    case GE:
+-      return "bgez\t%1, %2";
+-    case LT:
+-      return "bltz\t%1, %2";
+-    case LE:
+-      return "blez\t%1, %2";
+-    default:
+-      gcc_unreachable ();
+-    }
++  return nds32_output_cbranchsi4_greater_less_zero (insn, operands);
+ }
+   [(set_attr "type"   "branch")
+    (set (attr "length")
+-        (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+-			   (le (minus (match_dup 2) (pc)) (const_int  65500)))
+-		      (const_int 4)
+-		      (const_int 8)))])
++	(if_then_else (match_test "!CROSSING_JUMP_P (insn)")
++		      (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
++					 (le (minus (match_dup 2) (pc)) (const_int  65500)))
++				    (const_int 4)
++				    (const_int 10))
++		      (const_int 10)))])
+ 
+ 
+ (define_expand "cstoresi4"
+@@ -1612,237 +1359,85 @@ create_template:
+ 	   (match_operand:SI 3 "nonmemory_operand" "")]))]
+   ""
+ {
+-  rtx tmp_reg;
+-  enum rtx_code code;
+-
+-  code = GET_CODE (operands[1]);
+-
+-  switch (code)
++  enum nds32_expand_result_type result = nds32_expand_cstore (operands);
++  switch (result)
+     {
+-    case EQ:
+-      if (GET_CODE (operands[3]) == CONST_INT)
+-	{
+-	  /* reg_R = (reg_A == const_int_B)
+-	     --> addi reg_C, reg_A, -const_int_B
+-	         slti reg_R, reg_C, const_int_1 */
+-	  tmp_reg = gen_reg_rtx (SImode);
+-	  operands[3] = gen_int_mode (-INTVAL (operands[3]), SImode);
+-	  /* If the integer value is not in the range of imm15s,
+-	     we need to force register first because our addsi3 pattern
+-	     only accept nds32_rimm15s_operand predicate.  */
+-	  if (!satisfies_constraint_Is15 (operands[3]))
+-	    operands[3] = force_reg (SImode, operands[3]);
+-	  emit_insn (gen_addsi3 (tmp_reg, operands[2], operands[3]));
+-	  emit_insn (gen_slt_compare (operands[0], tmp_reg, const1_rtx));
+-
+-	  DONE;
+-	}
+-      else
+-	{
+-	  /* reg_R = (reg_A == reg_B)
+-	     --> xor  reg_C, reg_A, reg_B
+-	         slti reg_R, reg_C, const_int_1 */
+-	  tmp_reg = gen_reg_rtx (SImode);
+-	  emit_insn (gen_xorsi3 (tmp_reg, operands[2], operands[3]));
+-	  emit_insn (gen_slt_compare (operands[0], tmp_reg, const1_rtx));
+-
+-	  DONE;
+-	}
+-
+-    case NE:
+-      if (GET_CODE (operands[3]) == CONST_INT)
+-	{
+-	  /* reg_R = (reg_A != const_int_B)
+-	     --> addi reg_C, reg_A, -const_int_B
+-	         slti reg_R, const_int_0, reg_C */
+-	  tmp_reg = gen_reg_rtx (SImode);
+-	  operands[3] = gen_int_mode (-INTVAL (operands[3]), SImode);
+-	  /* If the integer value is not in the range of imm15s,
+-	     we need to force register first because our addsi3 pattern
+-	     only accept nds32_rimm15s_operand predicate.  */
+-	  if (!satisfies_constraint_Is15 (operands[3]))
+-	    operands[3] = force_reg (SImode, operands[3]);
+-	  emit_insn (gen_addsi3 (tmp_reg, operands[2], operands[3]));
+-	  emit_insn (gen_slt_compare (operands[0], const0_rtx, tmp_reg));
+-
+-	  DONE;
+-	}
+-      else
+-	{
+-	  /* reg_R = (reg_A != reg_B)
+-	     --> xor  reg_C, reg_A, reg_B
+-	         slti reg_R, const_int_0, reg_C */
+-	  tmp_reg = gen_reg_rtx (SImode);
+-	  emit_insn (gen_xorsi3 (tmp_reg, operands[2], operands[3]));
+-	  emit_insn (gen_slt_compare (operands[0], const0_rtx, tmp_reg));
+-
+-	  DONE;
+-	}
+-
+-    case GT:
+-    case GTU:
+-      /* reg_R = (reg_A > reg_B)       --> slt reg_R, reg_B, reg_A */
+-      /* reg_R = (reg_A > const_int_B) --> slt reg_R, const_int_B, reg_A */
+-      if (code == GT)
+-	{
+-	  /* GT, use slts instruction */
+-	  emit_insn (gen_slts_compare (operands[0], operands[3], operands[2]));
+-	}
+-      else
+-	{
+-	  /* GTU, use slt instruction */
+-	  emit_insn (gen_slt_compare  (operands[0], operands[3], operands[2]));
+-	}
+-
++    case EXPAND_DONE:
+       DONE;
+-
+-    case GE:
+-    case GEU:
+-      if (GET_CODE (operands[3]) == CONST_INT)
+-	{
+-	  /* reg_R = (reg_A >= const_int_B)
+-	     --> movi reg_C, const_int_B - 1
+-	         slt  reg_R, reg_C, reg_A */
+-	  tmp_reg = gen_reg_rtx (SImode);
+-
+-	  emit_insn (gen_movsi (tmp_reg,
+-				gen_int_mode (INTVAL (operands[3]) - 1,
+-					      SImode)));
+-	  if (code == GE)
+-	    {
+-	      /* GE, use slts instruction */
+-	      emit_insn (gen_slts_compare (operands[0], tmp_reg, operands[2]));
+-	    }
+-	  else
+-	    {
+-	      /* GEU, use slt instruction */
+-	      emit_insn (gen_slt_compare  (operands[0], tmp_reg, operands[2]));
+-	    }
+-
+-	  DONE;
+-	}
+-      else
+-	{
+-	  /* reg_R = (reg_A >= reg_B)
+-	     --> slt  reg_R, reg_A, reg_B
+-	         xori reg_R, reg_R, const_int_1 */
+-	  if (code == GE)
+-	    {
+-	      /* GE, use slts instruction */
+-	      emit_insn (gen_slts_compare (operands[0],
+-					   operands[2], operands[3]));
+-	    }
+-	  else
+-	    {
+-	      /* GEU, use slt instruction */
+-	      emit_insn (gen_slt_compare  (operands[0],
+-					   operands[2], operands[3]));
+-	    }
+-
+-	  /* perform 'not' behavior */
+-	  emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx));
+-
+-	  DONE;
+-	}
+-
+-    case LT:
+-    case LTU:
+-      /* reg_R = (reg_A < reg_B)       --> slt reg_R, reg_A, reg_B */
+-      /* reg_R = (reg_A < const_int_B) --> slt reg_R, reg_A, const_int_B */
+-      if (code == LT)
+-	{
+-	  /* LT, use slts instruction */
+-	  emit_insn (gen_slts_compare (operands[0], operands[2], operands[3]));
+-	}
+-      else
+-	{
+-	  /* LTU, use slt instruction */
+-	  emit_insn (gen_slt_compare  (operands[0], operands[2], operands[3]));
+-	}
+-
+-      DONE;
+-
+-    case LE:
+-    case LEU:
+-      if (GET_CODE (operands[3]) == CONST_INT)
+-	{
+-	  /* reg_R = (reg_A <= const_int_B)
+-	     --> movi reg_C, const_int_B + 1
+-	         slt  reg_R, reg_A, reg_C */
+-	  tmp_reg = gen_reg_rtx (SImode);
+-
+-	  emit_insn (gen_movsi (tmp_reg,
+-				gen_int_mode (INTVAL (operands[3]) + 1,
+-						      SImode)));
+-	  if (code == LE)
+-	    {
+-	      /* LE, use slts instruction */
+-	      emit_insn (gen_slts_compare (operands[0], operands[2], tmp_reg));
+-	    }
+-	  else
+-	    {
+-	      /* LEU, use slt instruction */
+-	      emit_insn (gen_slt_compare  (operands[0], operands[2], tmp_reg));
+-	    }
+-
+-	  DONE;
+-	}
+-      else
+-	{
+-	  /* reg_R = (reg_A <= reg_B) --> slt  reg_R, reg_B, reg_A
+-	                                  xori reg_R, reg_R, const_int_1 */
+-	  if (code == LE)
+-	    {
+-	      /* LE, use slts instruction */
+-	      emit_insn (gen_slts_compare (operands[0],
+-					   operands[3], operands[2]));
+-	    }
+-	  else
+-	    {
+-	      /* LEU, use slt instruction */
+-	      emit_insn (gen_slt_compare  (operands[0],
+-					   operands[3], operands[2]));
+-	    }
+-
+-	  /* perform 'not' behavior */
+-	  emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx));
+-
+-	  DONE;
+-	}
+-
+-
++      break;
++    case EXPAND_FAIL:
++      FAIL;
++      break;
++    case EXPAND_CREATE_TEMPLATE:
++      break;
+     default:
+       gcc_unreachable ();
+     }
+ })
+ 
+ 
+-(define_insn "slts_compare"
+-  [(set (match_operand:SI 0 "register_operand"         "=t,    t, r,    r")
+-	(lt:SI (match_operand:SI 1 "nonmemory_operand" " d,    d, r,    r")
+-	       (match_operand:SI 2 "nonmemory_operand" " r, Iu05, r, Is15")))]
++(define_expand "slts_compare"
++  [(set (match_operand:SI 0 "register_operand"       "")
++	(lt:SI (match_operand:SI 1 "general_operand" "")
++	       (match_operand:SI 2 "general_operand" "")))]
++  ""
++{
++  if (!REG_P (operands[1]))
++    operands[1] = force_reg (SImode, operands[1]);
++
++  if (!REG_P (operands[2]) && !satisfies_constraint_Is15 (operands[2]))
++    operands[2] = force_reg (SImode, operands[2]);
++})
++
++(define_insn "slts_compare_impl"
++  [(set (match_operand:SI 0 "register_operand"             "=t,   t, r,    r")
++	(lt:SI (match_operand:SI 1 "register_operand"      " d,   d, r,    r")
++	       (match_operand:SI 2 "nds32_rimm15s_operand" " r,Iu05, r, Is15")))]
+   ""
+   "@
+    slts45\t%1, %2
+    sltsi45\t%1, %2
+    slts\t%0, %1, %2
+    sltsi\t%0, %1, %2"
+-  [(set_attr "type"   "compare,compare,compare,compare")
+-   (set_attr "length" "      2,      2,      4,      4")])
++  [(set_attr "type"   "alu,    alu,    alu,    alu")
++   (set_attr "length" "  2,      2,      4,      4")])
++
++(define_insn "slt_eq0"
++  [(set (match_operand:SI 0 "register_operand"        "=t, r")
++	(eq:SI (match_operand:SI 1 "register_operand" " d, r")
++	       (const_int 0)))]
++  ""
++  "@
++   slti45\t%1, 1
++   slti\t%0, %1, 1"
++  [(set_attr "type"   "alu, alu")
++   (set_attr "length" "  2,   4")])
++
++(define_expand "slt_compare"
++  [(set (match_operand:SI 0 "register_operand"        "")
++	(ltu:SI (match_operand:SI 1 "general_operand" "")
++		(match_operand:SI 2 "general_operand" "")))]
++  ""
++{
++  if (!REG_P (operands[1]))
++    operands[1] = force_reg (SImode, operands[1]);
+ 
+-(define_insn "slt_compare"
+-  [(set (match_operand:SI 0 "register_operand"          "=t,    t, r,    r")
+-	(ltu:SI (match_operand:SI 1 "nonmemory_operand" " d,    d, r,    r")
+-		(match_operand:SI 2 "nonmemory_operand" " r, Iu05, r, Is15")))]
++  if (!REG_P (operands[2]) && !satisfies_constraint_Is15 (operands[2]))
++    operands[2] = force_reg (SImode, operands[2]);
++})
++
++(define_insn "slt_compare_impl"
++  [(set (match_operand:SI 0 "register_operand"              "=t,   t, r,    r")
++	(ltu:SI (match_operand:SI 1 "register_operand"      " d,   d, r,    r")
++		(match_operand:SI 2 "nds32_rimm15s_operand" " r,Iu05, r, Is15")))]
+   ""
+   "@
+    slt45\t%1, %2
+    slti45\t%1, %2
+    slt\t%0, %1, %2
+    slti\t%0, %1, %2"
+-  [(set_attr "type"   "compare,compare,compare,compare")
+-   (set_attr "length" "      2,      2,      4,      4")])
+-
++  [(set_attr "type"   "alu,    alu,    alu,    alu")
++   (set_attr "length" "  2,      2,      4,      4")])
+ 
+ ;; ----------------------------------------------------------------------------
+ 
+@@ -1874,12 +1469,14 @@ create_template:
+     }
+ }
+   [(set_attr "type" "branch")
+-   (set_attr "enabled" "1")
++   (set_attr "enabled" "yes")
+    (set (attr "length")
+-	(if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250))
+-			   (le (minus (match_dup 0) (pc)) (const_int  250)))
+-		      (if_then_else (match_test "TARGET_16_BIT")
+-				    (const_int 2)
++	(if_then_else (match_test "!CROSSING_JUMP_P (insn)")
++		      (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250))
++					 (le (minus (match_dup 0) (pc)) (const_int  250)))
++				    (if_then_else (match_test "TARGET_16_BIT")
++						  (const_int 2)
++						  (const_int 4))
+ 				    (const_int 4))
+ 		      (const_int 4)))])
+ 
+@@ -1887,14 +1484,27 @@ create_template:
+   [(set (pc) (match_operand:SI 0 "register_operand" "r, r"))]
+   ""
+   "@
+-  jr5\t%0
+-  jr\t%0"
++   jr5\t%0
++   jr\t%0"
+   [(set_attr "type"   "branch,branch")
+    (set_attr "length" "     2,     4")])
+ 
++(define_insn "*cond_indirect_jump"
++  [(cond_exec (ne (match_operand:SI 0 "register_operand"       "r")
++		  (const_int 0))
++	      (set (pc) (match_operand:SI 1 "register_operand" "0")))]
++  ""
++  "jrnez\t%0"
++  [(set_attr "type"   "branch")
++   (set_attr "length"      "4")])
++
++;; ----------------------------------------------------------------------------
++
++;; Normal call patterns.
++
+ ;; Subroutine call instruction returning no value.
+ ;;   operands[0]: It should be a mem RTX whose address is
+-;;                the address of the function.
++;;                the the address of the function.
+ ;;   operands[1]: It is the number of bytes of arguments pushed as a const_int.
+ ;;   operands[2]: It is the number of registers used as operands.
+ 
+@@ -1904,39 +1514,114 @@ create_template:
+ 	      (clobber (reg:SI LP_REGNUM))
+ 	      (clobber (reg:SI TA_REGNUM))])]
+   ""
+-  ""
++  {
++    rtx insn;
++    rtx sym = XEXP (operands[0], 0);
++
++    if (TARGET_ICT_MODEL_LARGE
++	&& nds32_indirect_call_referenced_p (sym))
++      {
++	rtx reg = gen_reg_rtx (Pmode);
++	emit_move_insn (reg, sym);
++	operands[0] = gen_const_mem (Pmode, reg);
++      }
++
++    if (flag_pic)
++      {
++	insn = emit_call_insn (gen_call_internal
++			       (XEXP (operands[0], 0), GEN_INT (0)));
++	use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
++	DONE;
++      }
++  }
+ )
+ 
+-(define_insn "*call_register"
+-  [(parallel [(call (mem (match_operand:SI 0 "register_operand" "r, r"))
+-		    (match_operand 1))
+-	      (clobber (reg:SI LP_REGNUM))
+-	      (clobber (reg:SI TA_REGNUM))])]
+-  ""
+-  "@
+-  jral5\t%0
+-  jral\t%0"
+-  [(set_attr "type"   "branch,branch")
+-   (set_attr "length" "     2,     4")])
+-
+-(define_insn "*call_immediate"
+-  [(parallel [(call (mem (match_operand:SI 0 "immediate_operand" "i"))
++(define_insn "call_internal"
++  [(parallel [(call (mem (match_operand:SI 0 "nds32_call_address_operand" "r, i"))
+ 		    (match_operand 1))
+ 	      (clobber (reg:SI LP_REGNUM))
+ 	      (clobber (reg:SI TA_REGNUM))])]
+   ""
+ {
+-  if (TARGET_CMODEL_LARGE)
+-    return "bal\t%0";
+-  else
+-    return "jal\t%0";
++  rtx_insn *next_insn = next_active_insn (insn);
++  bool align_p = (!(next_insn && get_attr_length (next_insn) == 2))
++		 && NDS32_ALIGN_P ();
++  switch (which_alternative)
++    {
++    case 0:
++      if (TARGET_16_BIT)
++	{
++	  if (align_p)
++	    return "jral5\t%0\;.align 2";
++	  else
++	    return "jral5\t%0";
++	}
++      else
++	{
++	  if (align_p)
++	    return "jral\t%0\;.align 2";
++	  else
++	    return "jral\t%0";
++	}
++    case 1:
++      return nds32_output_call (insn, operands, operands[0],
++				"bal\t%0", "jal\t%0", align_p);
++    default:
++      gcc_unreachable ();
++    }
+ }
+-  [(set_attr "type"   "branch")
+-   (set (attr "length")
+-	(if_then_else (match_test "TARGET_CMODEL_LARGE")
+-		      (const_int 12)
+-		      (const_int 4)))])
++  [(set_attr "enabled" "yes")
++   (set_attr "type" "branch")
++   (set_attr_alternative "length"
++     [
++       ;; Alternative 0
++       (if_then_else (match_test "TARGET_16_BIT")
++		     (const_int 2)
++		     (const_int 4))
++       ;; Alternative 1
++       (if_then_else (match_test "flag_pic")
++		     (const_int 16)
++		     (if_then_else (match_test "nds32_long_call_p (operands[0])")
++				   (const_int 12)
++				   (const_int 4)))
++     ])]
++)
+ 
++(define_insn "*cond_call_register"
++  [(cond_exec (ne (match_operand:SI 0 "register_operand"                   "r")
++		  (const_int 0))
++	      (parallel [(call (mem (match_operand:SI 1 "register_operand" "0"))
++			       (match_operand 2))
++			 (clobber (reg:SI LP_REGNUM))
++			 (clobber (reg:SI TA_REGNUM))]))]
++  "TARGET_ISA_V3"
++  "jralnez\t%0"
++  [(set_attr "type"   "branch")
++   (set_attr "length"      "4")])
++
++(define_insn "*cond_call_immediate"
++  [(cond_exec (match_operator 0 "nds32_conditional_call_comparison_operator"
++		[(match_operand:SI 1 "register_operand"                     "r")
++		 (const_int 0)])
++	      (parallel [(call (mem (match_operand:SI 2 "nds32_symbolic_operand" "i"))
++			       (match_operand 3))
++			 (clobber (reg:SI LP_REGNUM))
++			 (clobber (reg:SI TA_REGNUM))]))]
++  "!flag_pic && !TARGET_CMODEL_LARGE
++   && nds32_indirect_call_referenced_p (operands[2])"
++{
++  switch (GET_CODE (operands[0]))
++    {
++    case LT:
++      return "bltzal\t%1, %2";
++    case GE:
++      return "bgezal\t%1, %2";
++    default:
++      gcc_unreachable ();
++    }
++}
++  [(set_attr "type"    "branch")
++   (set_attr "length"       "4")])
+ 
+ ;; Subroutine call instruction returning a value.
+ ;;   operands[0]: It is the hard regiser in which the value is returned.
+@@ -1951,49 +1636,152 @@ create_template:
+ 	      (clobber (reg:SI LP_REGNUM))
+ 	      (clobber (reg:SI TA_REGNUM))])]
+   ""
+-  ""
++  {
++    rtx insn;
++    rtx sym = XEXP (operands[1], 0);
++
++    if (TARGET_ICT_MODEL_LARGE
++	&& nds32_indirect_call_referenced_p (sym))
++      {
++	rtx reg = gen_reg_rtx (Pmode);
++	emit_move_insn (reg, sym);
++	operands[1] = gen_const_mem (Pmode, reg);
++      }
++
++    if (flag_pic)
++      {
++	insn =
++	  emit_call_insn (gen_call_value_internal
++			  (operands[0], XEXP (operands[1], 0), GEN_INT (0)));
++	use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
++	DONE;
++      }
++  }
+ )
+ 
+-(define_insn "*call_value_register"
++(define_insn "call_value_internal"
+   [(parallel [(set (match_operand 0)
+-		   (call (mem (match_operand:SI 1 "register_operand" "r, r"))
++		   (call (mem (match_operand:SI 1 "nds32_call_address_operand" "r, i"))
+ 		         (match_operand 2)))
+ 	      (clobber (reg:SI LP_REGNUM))
+ 	      (clobber (reg:SI TA_REGNUM))])]
+   ""
+-  "@
+-  jral5\t%1
+-  jral\t%1"
+-  [(set_attr "type"   "branch,branch")
+-   (set_attr "length" "     2,     4")])
+-
+-(define_insn "*call_value_immediate"
+-  [(parallel [(set (match_operand 0)
+-		   (call (mem (match_operand:SI 1 "immediate_operand" "i"))
+-			 (match_operand 2)))
+-	      (clobber (reg:SI LP_REGNUM))
+-	      (clobber (reg:SI TA_REGNUM))])]
+-  ""
+ {
+-  if (TARGET_CMODEL_LARGE)
+-    return "bal\t%1";
+-  else
+-    return "jal\t%1";
++  rtx_insn *next_insn = next_active_insn (insn);
++  bool align_p = (!(next_insn && get_attr_length (next_insn) == 2))
++		 && NDS32_ALIGN_P ();
++  switch (which_alternative)
++    {
++    case 0:
++      if (TARGET_16_BIT)
++	{
++	  if (align_p)
++	    return "jral5\t%1\;.align 2";
++	  else
++	    return "jral5\t%1";
++	}
++      else
++	{
++	  if (align_p)
++	    return "jral\t%1\;.align 2";
++	  else
++	    return "jral\t%1";
++	}
++    case 1:
++      return nds32_output_call (insn, operands, operands[1],
++				"bal\t%1", "jal\t%1", align_p);
++    default:
++      gcc_unreachable ();
++    }
++}
++  [(set_attr "enabled" "yes")
++   (set_attr "type" "branch")
++   (set_attr_alternative "length"
++     [
++       ;; Alternative 0
++       (if_then_else (match_test "TARGET_16_BIT")
++		     (const_int 2)
++		     (const_int 4))
++       ;; Alternative 1
++       (if_then_else (match_test "flag_pic")
++		     (const_int 16)
++		     (if_then_else (match_test "nds32_long_call_p (operands[1])")
++				   (const_int 12)
++				   (const_int 4)))
++     ])]
++)
++
++(define_insn "*cond_call_value_register"
++  [(cond_exec (ne (match_operand:SI 0 "register_operand"                        "r")
++		  (const_int 0))
++	      (parallel [(set (match_operand 1)
++			      (call (mem (match_operand:SI 2 "register_operand" "0"))
++				    (match_operand 3)))
++			 (clobber (reg:SI LP_REGNUM))
++			 (clobber (reg:SI TA_REGNUM))]))]
++  "TARGET_ISA_V3"
++  "jralnez\t%0"
++  [(set_attr "type"    "branch")
++   (set_attr "length"       "4")])
++
++(define_insn "*cond_call_value_immediate"
++  [(cond_exec (match_operator 0 "nds32_conditional_call_comparison_operator"
++		[(match_operand:SI 1 "register_operand"                          "r")
++		 (const_int 0)])
++	      (parallel [(set (match_operand 2)
++			      (call (mem (match_operand:SI 3 "nds32_symbolic_operand" "i"))
++				    (match_operand 4)))
++			 (clobber (reg:SI LP_REGNUM))
++			 (clobber (reg:SI TA_REGNUM))]))]
++  "!flag_pic && !TARGET_CMODEL_LARGE
++   && nds32_indirect_call_referenced_p (operands[3])"
++{
++  switch (GET_CODE (operands[0]))
++    {
++    case LT:
++      return "bltzal\t%1, %3";
++    case GE:
++      return "bgezal\t%1, %3";
++    default:
++      gcc_unreachable ();
++    }
+ }
+   [(set_attr "type"   "branch")
+-   (set (attr "length")
+-	(if_then_else (match_test "TARGET_CMODEL_LARGE")
+-		      (const_int 12)
+-		      (const_int 4)))])
++   (set_attr "length"      "4")])
++
++;; Call subroutine returning any type.
++
++(define_expand "untyped_call"
++  [(parallel [(call (match_operand 0 "" "")
++		    (const_int 0))
++	      (match_operand 1 "" "")
++	      (match_operand 2 "" "")])]
++  ""
++{
++  int i;
++
++  emit_call_insn (gen_call (operands[0], const0_rtx));
++
++  for (i = 0; i < XVECLEN (operands[2], 0); i++)
++    {
++      rtx set = XVECEXP (operands[2], 0, i);
++      emit_move_insn (SET_DEST (set), SET_SRC (set));
++    }
+ 
++  /* The optimizer does not know that the call sets the function value
++     registers we stored in the result block.  We avoid problems by
++     claiming that all hard registers are used and clobbered at this
++     point.  */
++  emit_insn (gen_blockage ());
++  DONE;
++})
+ 
+ ;; ----------------------------------------------------------------------------
+ 
+ ;; The sibcall patterns.
+ 
+ ;; sibcall
+-;; sibcall_register
+-;; sibcall_immediate
++;; sibcall_internal
+ 
+ (define_expand "sibcall"
+   [(parallel [(call (match_operand 0 "memory_operand" "")
+@@ -2001,41 +1789,60 @@ create_template:
+ 	      (clobber (reg:SI TA_REGNUM))
+ 	      (return)])]
+   ""
+-  ""
+-)
+-
+-(define_insn "*sibcall_register"
+-  [(parallel [(call (mem (match_operand:SI 0 "register_operand" "r, r"))
+-		    (match_operand 1))
+-	      (clobber (reg:SI TA_REGNUM))
+-	      (return)])]
+-  ""
+-  "@
+-   jr5\t%0
+-   jr\t%0"
+-  [(set_attr "type"   "branch,branch")
+-   (set_attr "length" "     2,     4")])
++{
++    rtx sym = XEXP (operands[0], 0);
++
++    if (TARGET_ICT_MODEL_LARGE
++	&& nds32_indirect_call_referenced_p (sym))
++      {
++	rtx reg = gen_reg_rtx (Pmode);
++	emit_move_insn (reg, sym);
++	operands[0] = gen_const_mem (Pmode, reg);
++      }
++})
+ 
+-(define_insn "*sibcall_immediate"
+-  [(parallel [(call (mem (match_operand:SI 0 "immediate_operand" "i"))
++(define_insn "sibcall_internal"
++  [(parallel [(call (mem (match_operand:SI 0 "nds32_call_address_operand" "r, i"))
+ 		    (match_operand 1))
+ 	      (clobber (reg:SI TA_REGNUM))
+ 	      (return)])]
+   ""
+ {
+-  if (TARGET_CMODEL_LARGE)
+-    return "b\t%0";
+-  else
+-    return "j\t%0";
++  switch (which_alternative)
++    {
++    case 0:
++      if (TARGET_16_BIT)
++	return "jr5\t%0";
++      else
++	return "jr\t%0";
++    case 1:
++      if (nds32_long_call_p (operands[0]))
++	return "b\t%0";
++      else
++	return "j\t%0";
++    default:
++      gcc_unreachable ();
++    }
+ }
+-  [(set_attr "type"   "branch")
+-   (set (attr "length")
+-	(if_then_else (match_test "TARGET_CMODEL_LARGE")
+-		      (const_int 12)
+-		      (const_int 4)))])
++  [(set_attr "enabled" "yes")
++   (set_attr "type" "branch")
++   (set_attr_alternative "length"
++     [
++       ;; Alternative 0
++       (if_then_else (match_test "TARGET_16_BIT")
++		     (const_int 2)
++		     (const_int 4))
++       ;; Alternative 1
++       (if_then_else (match_test "flag_pic")
++		     (const_int 16)
++		     (if_then_else (match_test "nds32_long_call_p (operands[0])")
++				   (const_int 12)
++				   (const_int 4)))
++     ])]
++)
+ 
+ ;; sibcall_value
+-;; sibcall_value_register
++;; sibcall_value_internal
+ ;; sibcall_value_immediate
+ 
+ (define_expand "sibcall_value"
+@@ -2045,73 +1852,106 @@ create_template:
+ 	      (clobber (reg:SI TA_REGNUM))
+ 	      (return)])]
+   ""
+-  ""
+-)
+-
+-(define_insn "*sibcall_value_register"
+-  [(parallel [(set (match_operand 0)
+-		   (call (mem (match_operand:SI 1 "register_operand" "r, r"))
+-			 (match_operand 2)))
+-	      (clobber (reg:SI TA_REGNUM))
+-	      (return)])]
+-  ""
+-  "@
+-   jr5\t%1
+-   jr\t%1"
+-  [(set_attr "type"   "branch,branch")
+-   (set_attr "length" "     2,     4")])
++{
++    rtx sym = XEXP (operands[1], 0);
++
++    if (TARGET_ICT_MODEL_LARGE
++	&& nds32_indirect_call_referenced_p (sym))
++      {
++	rtx reg = gen_reg_rtx (Pmode);
++	emit_move_insn (reg, sym);
++	operands[1] = gen_const_mem (Pmode, reg);
++      }
++})
+ 
+-(define_insn "*sibcall_value_immediate"
++(define_insn "sibcall_value_internal"
+   [(parallel [(set (match_operand 0)
+-		   (call (mem (match_operand:SI 1 "immediate_operand" "i"))
++		   (call (mem (match_operand:SI 1 "nds32_call_address_operand" "r, i"))
+ 			 (match_operand 2)))
+ 	      (clobber (reg:SI TA_REGNUM))
+ 	      (return)])]
+   ""
+ {
+-  if (TARGET_CMODEL_LARGE)
+-    return "b\t%1";
+-  else
+-    return "j\t%1";
++  switch (which_alternative)
++    {
++    case 0:
++      if (TARGET_16_BIT)
++	return "jr5\t%1";
++      else
++	return "jr\t%1";
++    case 1:
++      if (nds32_long_call_p (operands[1]))
++	return "b\t%1";
++      else
++	return "j\t%1";
++    default:
++      gcc_unreachable ();
++    }
+ }
+-  [(set_attr "type"   "branch")
+-   (set (attr "length")
+-	(if_then_else (match_test "TARGET_CMODEL_LARGE")
+-		      (const_int 12)
+-		      (const_int 4)))])
+-
++  [(set_attr "enabled" "yes")
++   (set_attr "type" "branch")
++   (set_attr_alternative "length"
++     [
++       ;; Alternative 0
++       (if_then_else (match_test "TARGET_16_BIT")
++		     (const_int 2)
++		     (const_int 4))
++       ;; Alternative 1
++       (if_then_else (match_test "flag_pic")
++		     (const_int 16)
++		     (if_then_else (match_test "nds32_long_call_p (operands[1])")
++				   (const_int 12)
++				   (const_int 4)))
++     ])]
++)
+ 
+ ;; ----------------------------------------------------------------------------
+ 
+-;; prologue and epilogue.
++;; The prologue and epilogue.
+ 
+ (define_expand "prologue" [(const_int 0)]
+   ""
+ {
+   /* Note that only under V3/V3M ISA, we could use v3push prologue.
+-     In addition, we do not want to use v3push for isr function
+-     and variadic function.  */
+-  if (TARGET_V3PUSH
+-      && !nds32_isr_function_p (current_function_decl)
+-      && (cfun->machine->va_args_size == 0))
++     In addition, we need to check if v3push is indeed available.  */
++  if (NDS32_V3PUSH_AVAILABLE_P)
+     nds32_expand_prologue_v3push ();
+   else
+     nds32_expand_prologue ();
++
++  /* If cfun->machine->fp_as_gp_p is true, we can generate special
++     directive to guide linker doing fp-as-gp optimization.
++     However, for a naked function, which means
++     it should not have prologue/epilogue,
++     using fp-as-gp still requires saving $fp by push/pop behavior and
++     there is no benefit to use fp-as-gp on such small function.
++     So we need to make sure this function is NOT naked as well.  */
++  if (cfun->machine->fp_as_gp_p && !cfun->machine->naked_p)
++    emit_insn (gen_omit_fp_begin (gen_rtx_REG (SImode, FP_REGNUM)));
++
+   DONE;
+ })
+ 
+ (define_expand "epilogue" [(const_int 0)]
+   ""
+ {
++  /* If cfun->machine->fp_as_gp_p is true, we can generate special
++     directive to guide linker doing fp-as-gp optimization.
++     However, for a naked function, which means
++     it should not have prologue/epilogue,
++     using fp-as-gp still requires saving $fp by push/pop behavior and
++     there is no benefit to use fp-as-gp on such small function.
++     So we need to make sure this function is NOT naked as well.  */
++  if (cfun->machine->fp_as_gp_p && !cfun->machine->naked_p)
++    emit_insn (gen_omit_fp_end (gen_rtx_REG (SImode, FP_REGNUM)));
++
+   /* Note that only under V3/V3M ISA, we could use v3pop epilogue.
+-     In addition, we do not want to use v3pop for isr function
+-     and variadic function.  */
+-  if (TARGET_V3PUSH
+-      && !nds32_isr_function_p (current_function_decl)
+-      && (cfun->machine->va_args_size == 0))
++     In addition, we need to check if v3push is indeed available.  */
++  if (NDS32_V3PUSH_AVAILABLE_P)
+     nds32_expand_epilogue_v3pop (false);
+   else
+     nds32_expand_epilogue (false);
++
+   DONE;
+ })
+ 
+@@ -2121,15 +1961,11 @@ create_template:
+   /* Pass true to indicate that this is sibcall epilogue and
+      exit from a function without the final branch back to the
+      calling function.  */
+-  if (TARGET_V3PUSH && !nds32_isr_function_p (current_function_decl))
+-    nds32_expand_epilogue_v3pop (true);
+-  else
+-    nds32_expand_epilogue (true);
++  nds32_expand_epilogue (true);
+ 
+   DONE;
+ })
+ 
+-
+ ;; nop instruction.
+ 
+ (define_insn "nop"
+@@ -2142,7 +1978,7 @@ create_template:
+     return "nop";
+ }
+   [(set_attr "type" "misc")
+-   (set_attr "enabled" "1")
++   (set_attr "enabled" "yes")
+    (set (attr "length")
+ 	(if_then_else (match_test "TARGET_16_BIT")
+ 		      (const_int 2)
+@@ -2166,12 +2002,11 @@ create_template:
+ {
+   return nds32_output_stack_push (operands[0]);
+ }
+-  [(set_attr "type" "misc")
+-   (set_attr "enabled" "1")
++  [(set_attr "type" "store_multiple")
++   (set_attr "combo" "12")
++   (set_attr "enabled" "yes")
+    (set (attr "length")
+-	(if_then_else (match_test "TARGET_V3PUSH
+-				   && !nds32_isr_function_p (cfun->decl)
+-				   && (cfun->machine->va_args_size == 0)")
++	(if_then_else (match_test "NDS32_V3PUSH_AVAILABLE_P")
+ 		      (const_int 2)
+ 		      (const_int 4)))])
+ 
+@@ -2188,12 +2023,11 @@ create_template:
+ {
+   return nds32_output_stack_pop (operands[0]);
+ }
+-  [(set_attr "type" "misc")
+-   (set_attr "enabled" "1")
++  [(set_attr "type" "load_multiple")
++   (set_attr "combo" "12")
++   (set_attr "enabled" "yes")
+    (set (attr "length")
+-	(if_then_else (match_test "TARGET_V3PUSH
+-				   && !nds32_isr_function_p (cfun->decl)
+-				   && (cfun->machine->va_args_size == 0)")
++	(if_then_else (match_test "NDS32_V3PUSH_AVAILABLE_P")
+ 		      (const_int 2)
+ 		      (const_int 4)))])
+ 
+@@ -2205,34 +2039,64 @@ create_template:
+ ;; Use this pattern to expand a return instruction
+ ;; with simple_return rtx if no epilogue is required.
+ (define_expand "return"
+-  [(simple_return)]
++  [(parallel [(return)
++              (clobber (reg:SI FP_REGNUM))])]
+   "nds32_can_use_return_insn ()"
+-  ""
+-)
++{
++  /* Emit as the simple return.  */
++  if (!cfun->machine->fp_as_gp_p
++      && cfun->machine->naked_p
++      && (cfun->machine->va_args_size == 0))
++    {
++      emit_jump_insn (gen_return_internal ());
++      DONE;
++    }
++})
+ 
+ ;; This pattern is expanded only by the shrink-wrapping optimization
+ ;; on paths where the function prologue has not been executed.
++;; However, such optimization may reorder the prologue/epilogue blocks
++;; together with basic blocks within function body.
++;; So we must disable this pattern if we have already decided
++;; to perform fp_as_gp optimization, which requires prologue to be
++;; first block and epilogue to be last block.
+ (define_expand "simple_return"
+   [(simple_return)]
+-  ""
++  "!cfun->machine->fp_as_gp_p"
+   ""
+ )
+ 
++(define_insn "*nds32_return"
++  [(parallel [(return)
++   (clobber (reg:SI FP_REGNUM))])]
++  ""
++{
++  return nds32_output_return ();
++}
++  [(set_attr "type" "branch")
++   (set_attr "enabled" "yes")
++   (set_attr "length" "4")])
++
+ (define_insn "return_internal"
+   [(simple_return)]
+   ""
+ {
++  if (nds32_isr_function_critical_p (current_function_decl))
++    return "iret";
++
+   if (TARGET_16_BIT)
+     return "ret5";
+   else
+     return "ret";
+ }
+   [(set_attr "type" "branch")
+-   (set_attr "enabled" "1")
++   (set_attr "enabled" "yes")
+    (set (attr "length")
+-	(if_then_else (match_test "TARGET_16_BIT")
+-		      (const_int 2)
+-		      (const_int 4)))])
++	(if_then_else (match_test "nds32_isr_function_critical_p (current_function_decl)")
++		      (const_int 4)
++		      (if_then_else (match_test "TARGET_16_BIT")
++				    (const_int 2)
++				    (const_int 4))))])
+ 
+ 
+ ;; ----------------------------------------------------------------------------
+@@ -2267,6 +2131,7 @@ create_template:
+ {
+   rtx add_tmp;
+   rtx reg, test;
++  rtx tmp_reg;
+ 
+   /* Step A: "k <-- (plus (operands[0]) (-operands[1]))".  */
+   if (operands[1] != const0_rtx)
+@@ -2275,8 +2140,8 @@ create_template:
+       add_tmp = gen_int_mode (-INTVAL (operands[1]), SImode);
+ 
+       /* If the integer value is not in the range of imm15s,
+-         we need to force register first because our addsi3 pattern
+-         only accept nds32_rimm15s_operand predicate.  */
++	 we need to force register first because our addsi3 pattern
++	 only accept nds32_rimm15s_operand predicate.  */
+       add_tmp = force_reg (SImode, add_tmp);
+ 
+       emit_insn (gen_addsi3 (reg, operands[0], add_tmp));
+@@ -2288,11 +2153,14 @@ create_template:
+   emit_jump_insn (gen_cbranchsi4 (test, operands[0], operands[2],
+ 				  operands[4]));
+ 
+-  operands[5] = gen_reg_rtx (SImode);
+-  /* Step C, D, E, and F, using another temporary register operands[5].  */
++  tmp_reg = gen_reg_rtx (SImode);
++  /* Step C, D, E, and F, using another temporary register tmp_reg.  */
++  if (flag_pic)
++    emit_use (pic_offset_table_rtx);
++
+   emit_jump_insn (gen_casesi_internal (operands[0],
+ 				       operands[3],
+-				       operands[5]));
++				       tmp_reg));
+   DONE;
+ })
+ 
+@@ -2328,17 +2196,34 @@ create_template:
+   else
+     return nds32_output_casesi (operands);
+ }
+-  [(set_attr "length" "20")
+-   (set_attr "type" "alu")])
++  [(set_attr "type" "branch")
++   (set (attr "length")
++	(if_then_else (match_test "flag_pic")
++		      (const_int 28)
++		      (const_int 20)))])
+ 
+ ;; ----------------------------------------------------------------------------
+ 
+ ;; Performance Extension
+ 
++; If -fwrapv option is issued, GCC expects there will be
++; signed overflow situation.  So the ABS(INT_MIN) is still INT_MIN
++; (e.g. ABS(0x80000000)=0x80000000).
++; However, the hardware ABS instruction of nds32 target
++; always performs saturation: abs 0x80000000 -> 0x7fffffff.
++; So that we can only enable abssi2 pattern if flag_wrapv is NOT presented.
++(define_insn "abssi2"
++  [(set (match_operand:SI 0 "register_operand"         "=r")
++	(abs:SI (match_operand:SI 1 "register_operand" " r")))]
++  "TARGET_EXT_PERF && TARGET_HW_ABS && !flag_wrapv"
++  "abs\t%0, %1"
++  [(set_attr "type" "alu")
++   (set_attr "length" "4")])
++
+ (define_insn "clzsi2"
+   [(set (match_operand:SI 0 "register_operand"         "=r")
+ 	(clz:SI (match_operand:SI 1 "register_operand" " r")))]
+-  "TARGET_PERF_EXT"
++  "TARGET_EXT_PERF"
+   "clz\t%0, %1"
+   [(set_attr "type" "alu")
+    (set_attr "length" "4")])
+@@ -2347,34 +2232,212 @@ create_template:
+   [(set (match_operand:SI 0 "register_operand"          "=r")
+ 	(smax:SI (match_operand:SI 1 "register_operand" " r")
+ 		 (match_operand:SI 2 "register_operand" " r")))]
+-  "TARGET_PERF_EXT"
++  "TARGET_EXT_PERF"
+   "max\t%0, %1, %2"
+   [(set_attr "type" "alu")
+    (set_attr "length" "4")])
+ 
++(define_expand "uminqi3"
++  [(set (match_operand:QI 0 "register_operand" "")
++	(umin:QI (match_operand:QI 1 "register_operand" "")
++		 (match_operand:QI 2 "register_operand" "")))]
++  "TARGET_EXT_PERF"
++{
++  rtx tmpop[3];
++  tmpop[0] = gen_reg_rtx (SImode);
++  tmpop[1] = gen_reg_rtx (SImode);
++  tmpop[2] = gen_reg_rtx (SImode);
++
++  emit_insn (gen_zero_extendqisi2 (tmpop[1], operands[1]));
++  emit_insn (gen_zero_extendqisi2 (tmpop[2], operands[2]));
++  emit_insn (gen_sminsi3 (tmpop[0], tmpop[1], tmpop[2]));
++  convert_move (operands[0], tmpop[0], false);
++  DONE;
++})
++
++(define_expand "sminqi3"
++  [(set (match_operand:QI 0 "register_operand" "")
++	(smin:QI (match_operand:QI 1 "register_operand" "")
++		 (match_operand:QI 2 "register_operand" "")))]
++  "TARGET_EXT_PERF"
++{
++  rtx tmpop[3];
++  tmpop[0] = gen_reg_rtx (SImode);
++  tmpop[1] = gen_reg_rtx (SImode);
++  tmpop[2] = gen_reg_rtx (SImode);
++
++  emit_insn (gen_extendqisi2 (tmpop[1], operands[1]));
++  emit_insn (gen_extendqisi2 (tmpop[2], operands[2]));
++  emit_insn (gen_sminsi3 (tmpop[0], tmpop[1], tmpop[2]));
++  convert_move (operands[0], tmpop[0], false);
++  DONE;
++})
++
++(define_expand "uminhi3"
++  [(set (match_operand:HI 0 "register_operand" "")
++	(umin:HI (match_operand:HI 1 "register_operand" "")
++		 (match_operand:HI 2 "register_operand" "")))]
++  "TARGET_EXT_PERF"
++{
++  rtx tmpop[3];
++  tmpop[0] = gen_reg_rtx (SImode);
++  tmpop[1] = gen_reg_rtx (SImode);
++  tmpop[2] = gen_reg_rtx (SImode);
++
++  emit_insn (gen_zero_extendhisi2 (tmpop[1], operands[1]));
++  emit_insn (gen_zero_extendhisi2 (tmpop[2], operands[2]));
++  emit_insn (gen_sminsi3 (tmpop[0], tmpop[1], tmpop[2]));
++  convert_move (operands[0], tmpop[0], false);
++  DONE;
++})
++
++(define_expand "sminhi3"
++  [(set (match_operand:HI 0 "register_operand" "")
++	(smin:HI (match_operand:HI 1 "register_operand" "")
++		 (match_operand:HI 2 "register_operand" "")))]
++  "TARGET_EXT_PERF"
++{
++  rtx tmpop[3];
++  tmpop[0] = gen_reg_rtx (SImode);
++  tmpop[1] = gen_reg_rtx (SImode);
++  tmpop[2] = gen_reg_rtx (SImode);
++
++  emit_insn (gen_extendhisi2 (tmpop[1], operands[1]));
++  emit_insn (gen_extendhisi2 (tmpop[2], operands[2]));
++  emit_insn (gen_sminsi3 (tmpop[0], tmpop[1], tmpop[2]));
++  convert_move (operands[0], tmpop[0], false);
++  DONE;
++})
++
+ (define_insn "sminsi3"
+   [(set (match_operand:SI 0 "register_operand"          "=r")
+ 	(smin:SI (match_operand:SI 1 "register_operand" " r")
+ 		 (match_operand:SI 2 "register_operand" " r")))]
+-  "TARGET_PERF_EXT"
++  "TARGET_EXT_PERF"
+   "min\t%0, %1, %2"
+   [(set_attr "type" "alu")
+    (set_attr "length" "4")])
+ 
+-(define_insn "*btst"
+-  [(set (match_operand:SI 0 "register_operand"                   "=   r")
+-	(zero_extract:SI (match_operand:SI 1 "register_operand"  "    r")
++(define_insn "btst"
++  [(set (match_operand:SI 0 "register_operand"                     "=   r")
++	(zero_extract:SI (match_operand:SI 1 "register_operand"    "    r")
+ 			 (const_int 1)
+-			 (match_operand:SI 2 "immediate_operand" " Iu05")))]
+-  "TARGET_PERF_EXT"
++			 (match_operand:SI 2 "nds32_imm5u_operand" " Iu05")))]
++  "TARGET_EXT_PERF"
+   "btst\t%0, %1, %2"
+   [(set_attr "type" "alu")
+    (set_attr "length" "4")])
+ 
++(define_insn "ave"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(truncate:SI
++	  (ashiftrt:DI
++	    (plus:DI
++	      (plus:DI
++		(sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
++		(sign_extend:DI (match_operand:SI 2 "register_operand" "r")))
++	      (const_int 1))
++	  (const_int 1))))]
++  "TARGET_EXT_PERF"
++  "ave\t%0, %1, %2"
++  [(set_attr "type" "alu")
++   (set_attr "length" "4")])
++
+ ;; ----------------------------------------------------------------------------
+ 
+ ;; Pseudo NOPs
+ 
++;; Structural hazards NOP
++(define_insn "nop_res_dep"
++  [(unspec [(match_operand 0 "const_int_operand" "i")] UNSPEC_VOLATILE_RES_DEP)]
++  ""
++  "! structural dependency (%0 cycles)"
++  [(set_attr "length" "0")]
++)
++
++;; Data hazards NOP
++(define_insn "nop_data_dep"
++  [(unspec [(match_operand 0 "const_int_operand" "i")] UNSPEC_VOLATILE_DATA_DEP)]
++  ""
++  "! data dependency (%0 cycles)"
++  [(set_attr "length" "0")]
++)
++
++(define_insn "relax_group"
++  [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "i")] UNSPEC_VOLATILE_RELAX_GROUP)]
++  ""
++  ".relax_hint %0"
++  [(set_attr "length" "0")]
++)
++
++(define_insn "innermost_loop_begin"
++  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_INNERMOST_LOOP_BEGIN)]
++  ""
++  ".innermost_loop_begin"
++  [(set_attr "length" "0")]
++)
++
++(define_insn "innermost_loop_end"
++  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_INNERMOST_LOOP_END)]
++  ""
++  ".innermost_loop_end"
++  [(set_attr "length" "0")]
++)
++
++(define_insn "no_ifc_begin"
++  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_NO_IFC_BEGIN)]
++  ""
++  ".no_ifc_begin"
++  [(set_attr "length" "0")]
++)
++
++(define_insn "no_ifc_end"
++  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_NO_IFC_END)]
++  ""
++  ".no_ifc_end"
++  [(set_attr "length" "0")]
++)
++
++(define_insn "no_ex9_begin"
++  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_NO_EX9_BEGIN)]
++  ""
++  ".no_ex9_begin"
++  [(set_attr "length" "0")]
++)
++
++(define_insn "no_ex9_end"
++  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_NO_EX9_END)]
++  ""
++  ".no_ex9_end"
++  [(set_attr "length" "0")]
++)
++
++(define_insn "hwloop_last_insn"
++  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_HWLOOP_LAST_INSN)]
++  ""
++  ""
++  [(set_attr "length" "0")]
++)
++
++;; Output .omit_fp_begin for fp-as-gp optimization.
++;; Also we have to set $fp register.
++(define_insn "omit_fp_begin"
++  [(set (match_operand:SI 0 "register_operand" "=x")
++	(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_OMIT_FP_BEGIN))]
++  ""
++  "! -----\;.omit_fp_begin\;la\t$fp,_FP_BASE_\;! -----"
++  [(set_attr "length" "8")]
++)
++
++;; Output .omit_fp_end for fp-as-gp optimization.
++;; Claim that we have to use $fp register.
++(define_insn "omit_fp_end"
++  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "x")] UNSPEC_VOLATILE_OMIT_FP_END)]
++  ""
++  "! -----\;.omit_fp_end\;! -----"
++  [(set_attr "length" "0")]
++)
++
+ (define_insn "pop25return"
+   [(return)
+    (unspec_volatile:SI [(reg:SI LP_REGNUM)] UNSPEC_VOLATILE_POP25_RETURN)]
+@@ -2383,4 +2446,262 @@ create_template:
+   [(set_attr "length" "0")]
+ )
+ 
++;; Add pc
++(define_insn "add_pc"
++  [(set (match_operand:SI 0 "register_operand"          "=r")
++	(plus:SI (match_operand:SI 1 "register_operand"  "0")
++		 (pc)))]
++  "TARGET_LINUX_ABI || flag_pic"
++  "add5.pc\t%0"
++  [(set_attr "type"    "alu")
++   (set_attr "length"    "4")]
++)
++
++(define_expand "bswapsi2"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(bswap:SI (match_operand:SI 1 "register_operand" "r")))]
++  ""
++{
++  emit_insn (gen_unspec_wsbh (operands[0], operands[1]));
++  emit_insn (gen_rotrsi3 (operands[0], operands[0], GEN_INT (16)));
++  DONE;
++})
++
++(define_insn "bswaphi2"
++  [(set (match_operand:HI 0 "register_operand" "=r")
++	(bswap:HI (match_operand:HI 1 "register_operand" "r")))]
++  ""
++  "wsbh\t%0, %1"
++  [(set_attr "type"    "alu")
++   (set_attr "length"    "4")]
++)
++
++;;  Hardware loop
++
++; operand 0 is the loop count pseudo register
++; operand 1 is the label to jump to at the top of the loop
++(define_expand "doloop_end"
++  [(parallel [(set (pc) (if_then_else
++			  (ne (match_operand:SI 0 "" "")
++			      (const_int 1))
++			  (label_ref (match_operand 1 "" ""))
++			  (pc)))
++	      (set (match_dup 0)
++		   (plus:SI (match_dup 0)
++			    (const_int -1)))
++	      (unspec [(const_int 0)] UNSPEC_LOOP_END)
++	      (clobber (match_dup 2))])] ; match_scratch
++  "NDS32_HW_LOOP_P ()"
++{
++  /* The loop optimizer doesn't check the predicates... */
++  if (GET_MODE (operands[0]) != SImode)
++    FAIL;
++  operands[2] = gen_rtx_SCRATCH (SImode);
++})
++
++(define_insn "loop_end"
++  [(set (pc)
++	(if_then_else (ne (match_operand:SI 3 "nonimmediate_operand" "0, 0, *r, 0")
++			  (const_int 1))
++		      (label_ref (match_operand 1 "" ""))
++		      (pc)))
++   (set (match_operand:SI 0 "nonimmediate_operand" "=r, m, m, *f")
++	(plus:SI (match_dup 3)
++		 (const_int -1)))
++   (unspec [(const_int 0)] UNSPEC_LOOP_END)
++   (clobber (match_scratch:SI 2 "=X, &r, &r, &r"))]
++  "NDS32_HW_LOOP_P ()"
++  "#"
++  [(set_attr "length" "12, 12, 12, 12")])
++
++(define_split
++  [(set (pc)
++	(if_then_else (ne (match_operand:SI 3 "nonimmediate_operand" "")
++			  (const_int 1))
++		      (label_ref (match_operand 1 "" ""))
++		      (pc)))
++   (set (match_operand:SI 0 "fpu_reg_or_memory_operand" "")
++	(plus:SI (match_dup 3)
++		 (const_int -1)))
++   (unspec [(const_int 0)] UNSPEC_LOOP_END)
++   (clobber (match_scratch:SI 2 ""))]
++  "NDS32_HW_LOOP_P ()"
++  [(set (match_dup 2) (plus:SI (match_dup 3) (const_int -1)))
++   (set (match_dup 0) (match_dup 2))
++   (set (pc)
++	(if_then_else (ne (match_dup 2) (const_int 0))
++		      (label_ref (match_dup 1))
++		      (pc)))]
++{
++  if (fpu_reg_or_memory_operand (operands[3], SImode))
++    {
++      emit_move_insn (operands[2], operands[3]);
++      operands[3] = operands[2];
++    }
++})
++
++(define_insn "mtlbi_hint"
++  [(set (reg:SI LB_REGNUM)
++	(match_operand:SI 0 "nds32_label_operand" "i"))
++   (unspec [(match_operand 1 "const_int_operand" "i")] UNSPEC_LOOP_END)]
++  "NDS32_HW_LOOP_P ()"
++  "mtlbi\t%0"
++  [(set_attr "length"	"4")])
++
++(define_insn "mtlbi"
++  [(set (reg:SI LB_REGNUM)
++	(match_operand:SI 0 "nds32_label_operand" "i"))]
++  "NDS32_HW_LOOP_P ()"
++  "mtlbi\t%0"
++  [(set_attr "length"	"4")])
++
++(define_insn "mtlei"
++  [(set (reg:SI LE_REGNUM)
++	(match_operand:SI 0 "nds32_label_operand" "i"))]
++  "NDS32_HW_LOOP_P ()"
++  "mtlei\t%0"
++  [(set_attr "length"	"4")])
++
++(define_insn "init_lc"
++  [(set (reg:SI LC_REGNUM)
++	(match_operand:SI 0 "register_operand" "r"))
++   (unspec [(match_operand 1 "const_int_operand" "i")] UNSPEC_LOOP_END)]
++  "NDS32_HW_LOOP_P ()"
++  "mtusr\t%0, LC"
++  [(set_attr "length"	"4")])
++
++; After replace hwloop, use this is pattern to get right CFG
++(define_insn "hwloop_cfg"
++  [(set (pc)
++	(if_then_else (ne (reg:SI LC_REGNUM)
++			  (const_int 1))
++		      (match_operand:SI 1 "nds32_label_operand" "i")
++		      (pc)))
++   (set (reg:SI LC_REGNUM)
++	(plus:SI (reg:SI LC_REGNUM)
++		 (const_int -1)))
++   (use (reg:SI LB_REGNUM))
++   (use (reg:SI LE_REGNUM))
++   (use (reg:SI LC_REGNUM))
++   (unspec [(match_operand 0 "const_int_operand" "i")] UNSPEC_LOOP_END)]
++  "TARGET_HWLOOP"
++  ""
++  [(set_attr "length" "0")])
++;; ----------------------------------------------------------------------------
++
++;; Patterns for exception handling
++
++(define_expand "eh_return"
++  [(use (match_operand 0 "general_operand"))]
++  ""
++{
++  emit_insn (gen_nds32_eh_return (operands[0]));
++  DONE;
++})
++
++(define_insn_and_split "nds32_eh_return"
++  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_EH_RETURN)]
++  ""
++  "#"
++  "reload_completed"
++  [(const_int 0)]
++{
++  rtx place;
++  rtx addr;
++
++  /* The operands[0] is the handler address.  We need to assign it
++     to return address rtx so that we can jump to exception handler
++     when returning from current function.  */
++
++  if (cfun->machine->lp_size == 0)
++    {
++      /* If $lp is not saved in the stack frame, we can take $lp directly.  */
++      place = gen_rtx_REG (SImode, LP_REGNUM);
++    }
++  else
++    {
++      /* Otherwise, we need to locate the stack slot of return address.
++	 The return address is generally saved in [$fp-4] location.
++	 However, DSE (dead store elimination) does not detect an alias
++	 between [$fp-x] and [$sp+y].  This can result in a store to save
++	 $lp introduced by builtin_eh_return() being incorrectly deleted
++	 if it is based on $fp.  The solution we take here is to compute
++	 the offset relative to stack pointer and then use $sp to access
++	 location so that the alias can be detected.
++	 FIXME: What if the immediate value "offset" is too large to be
++	        fit in a single addi instruction?  */
++      HOST_WIDE_INT offset;
++
++      offset = (cfun->machine->fp_size
++		+ cfun->machine->gp_size
++		+ cfun->machine->lp_size
++		+ cfun->machine->callee_saved_gpr_regs_size
++		+ cfun->machine->callee_saved_area_gpr_padding_bytes
++		+ cfun->machine->callee_saved_fpr_regs_size
++		+ cfun->machine->eh_return_data_regs_size
++		+ cfun->machine->local_size
++		+ cfun->machine->out_args_size);
++
++      addr = plus_constant (Pmode, stack_pointer_rtx, offset - 4);
++      place = gen_frame_mem (SImode, addr);
++    }
++
++  emit_move_insn (place, operands[0]);
++  DONE;
++})
++
++;; ----------------------------------------------------------------------------
++
++;; Patterns for TLS.
++;; The following two tls patterns don't be expanded directly because the
++;; intermediate value may be spilled into the stack.  As a result, it is
++;; hard to analyze the define-use chain in the relax_opt pass.
++
++
++;; There is a unspec operand to record RELAX_GROUP number because each
++;; emitted instruction need a relax_hint above it.
++(define_insn "tls_desc"
++  [(set (reg:SI 0)
++	(call (unspec_volatile:SI [(match_operand:SI 0 "nds32_symbolic_operand" "i")] UNSPEC_TLS_DESC)
++	      (const_int 1)))
++   (use (unspec [(match_operand:SI 1 "immediate_operand" "i")] UNSPEC_VOLATILE_RELAX_GROUP))
++   (use (reg:SI GP_REGNUM))
++   (clobber (reg:SI LP_REGNUM))
++   (clobber (reg:SI TA_REGNUM))]
++  ""
++  {
++    return nds32_output_tls_desc (operands);
++  }
++  [(set_attr "length" "20")
++   (set_attr "type" "branch")]
++)
++
++;; There is a unspec operand to record RELAX_GROUP number because each
++;; emitted instruction need a relax_hint above it.
++(define_insn "tls_ie"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(unspec:SI [(match_operand:SI 1 "nds32_symbolic_operand" "i")] UNSPEC_TLS_IE))
++   (use (unspec [(match_operand:SI 2 "immediate_operand" "i")] UNSPEC_VOLATILE_RELAX_GROUP))
++   (use (reg:SI GP_REGNUM))]
++  ""
++  {
++    return nds32_output_tls_ie (operands);
++  }
++  [(set (attr "length") (if_then_else (match_test "flag_pic")
++				      (const_int 12)
++				      (const_int 8)))
++   (set_attr "type" "misc")]
++)
++
++;; The pattern is for some relaxation groups that have to keep addsi3 in 32-bit mode.
++(define_insn "addsi3_32bit"
++  [(set (match_operand:SI 0 "register_operand"             "=r")
++	(unspec:SI [(match_operand:SI 1 "register_operand" "%r")
++		    (match_operand:SI 2 "register_operand" " r")] UNSPEC_ADD32))]
++  ""
++  "add\t%0, %1, %2";
++  [(set_attr "type"    "alu")
++   (set_attr "length"  "4")
++   (set_attr "feature" "v1")])
++
+ ;; ----------------------------------------------------------------------------
+diff --git a/gcc/config/nds32/nds32.opt b/gcc/config/nds32/nds32.opt
+index 938136f..a70ced9 100644
+--- a/gcc/config/nds32/nds32.opt
++++ b/gcc/config/nds32/nds32.opt
+@@ -21,14 +21,67 @@
+ HeaderInclude
+ config/nds32/nds32-opts.h
+ 
+-mbig-endian
+-Target Report RejectNegative Negative(mlittle-endian) Mask(BIG_ENDIAN)
++; ---------------------------------------------------------------
++; The following options are designed for aliasing and compatibility options.
++
++EB
++Target RejectNegative Alias(mbig-endian)
+ Generate code in big-endian mode.
+ 
+-mlittle-endian
+-Target Report RejectNegative Negative(mbig-endian) InverseMask(BIG_ENDIAN)
++EL
++Target RejectNegative Alias(mlittle-endian)
+ Generate code in little-endian mode.
+ 
++mfp-as-gp
++Target RejectNegative Alias(mforce-fp-as-gp)
++Force performing fp-as-gp optimization.
++
++mno-fp-as-gp
++Target RejectNegative Alias(mforbid-fp-as-gp)
++Forbid performing fp-as-gp optimization.
++
++m16bit
++Target Undocumented Alias(m16-bit)
++Generate 16-bit instructions.
++
++mcrt-arg=yes
++Target Undocumented Alias(mcrt-arg)
++Obsolete option.  Users SHOULD NOT use this option in the command line.
++
++mreduce-regs
++Target Undocumented Alias(mreduced-regs)
++Use reduced-set registers for register allocation.
++
++mcache-line-size=
++Target RejectNegative Joined UInteger Undocumented Alias(mcache-block-size=)
++Alias of -mcache-block-size=
++
++; ---------------------------------------------------------------
++
++mabi=
++Target RejectNegative Joined Enum(abi_type) Var(nds32_abi) Init(TARGET_DEFAULT_ABI)
++Specify which ABI type to generate code for: 2, 2fp+.
++
++Enum
++Name(abi_type) Type(enum abi_type)
++Known ABIs (for use with the -mabi= option):
++
++EnumValue
++Enum(abi_type) String(2) Value(NDS32_ABI_V2)
++
++EnumValue
++Enum(abi_type) String(2fp+) Value(NDS32_ABI_V2_FP_PLUS)
++
++mfloat-abi=soft
++Target RejectNegative Alias(mabi=, 2)
++Specify use soft floating point ABI which mean alias to -mabi=2.
++
++mfloat-abi=hard
++Target RejectNegative Alias(mabi=, 2fp+)
++Specify use soft floating point ABI which mean alias to -mabi=2fp+.
++
++; ---------------------------------------------------------------
++
+ mreduced-regs
+ Target Report RejectNegative Negative(mfull-regs) Mask(REDUCED_REGS)
+ Use reduced-set registers for register allocation.
+@@ -37,14 +90,148 @@ mfull-regs
+ Target Report RejectNegative Negative(mreduced-regs) InverseMask(REDUCED_REGS)
+ Use full-set registers for register allocation.
+ 
++; ---------------------------------------------------------------
++
++Os1
++Target
++Optimize for size level 1. This option will disable IFC and EX9 to prevent performance drop.
++
++Os2
++Target
++Optimize for size level 2. This option will disable IFC and EX9 for innermost loop to prevent performance drop.
++
++Os3
++Target
++Optimize for size level 3 which mean don't care performance.
++
++malways-align
++Target Mask(ALWAYS_ALIGN)
++Always align function entry, jump target and return address.
++
++malign-functions
++Target Mask(ALIGN_FUNCTION)
++Align function entry to 4 byte.
++
++mbig-endian
++Target Undocumented RejectNegative Negative(mlittle-endian) Mask(BIG_ENDIAN)
++Generate code in big-endian mode.
++
++mlittle-endian
++Target Undocumented RejectNegative Negative(mbig-endian) InverseMask(BIG_ENDIAN)
++Generate code in little-endian mode.
++
++mforce-fp-as-gp
++Target Undocumented Mask(FORCE_FP_AS_GP)
++Prevent $fp being allocated during register allocation so that compiler is able to force performing fp-as-gp optimization.
++
++mforbid-fp-as-gp
++Target Undocumented Mask(FORBID_FP_AS_GP)
++Forbid using $fp to access static and global variables.  This option strictly forbids fp-as-gp optimization regardless of '-mforce-fp-as-gp'.
++
++minline-strcpy
++Target Undocumented Mask(INLINE_STRCPY)
++Inlining strcpy function.
++
++mload-store-opt
++Target Mask(LOAD_STORE_OPT)
++Enable load store optimization.
++
++mregrename
++Target Mask(REGRENAME_OPT)
++Enable target dependent register rename optimization.
++
++mgcse
++Target Mask(GCSE_OPT)
++Enable target dependent global CSE optimization.
++
++mconst-remater
++Target Var(flag_nds32_const_remater_opt)
++Enable target dependent constant remeterialization optimization.
++
++msoft-fp-arith-comm
++Target Mask(SOFT_FP_ARITH_COMM)
++Enable operand commutative for soft floating point arithmetic optimization.
++
++msign-conversion
++Target Var(flag_nds32_sign_conversion)
++Enable the sign conversion in Gimple level.
++
++mscalbn-transform
++Target Var(flag_nds32_scalbn_transform)
++Enable the scalbn transform in Gimple level.
++
++mlmwsmw-opt
++Target Var(flag_nds32_lmwsmw_opt)
++Enable the load/store multiple optimization.
++
++mict-model=
++Target Undocumented RejectNegative Joined Enum(nds32_ict_model_type) Var(nds32_ict_model) Init(ICT_MODEL_SMALL)
++Specify the address generation strategy for ICT call's code model.
++
++Enum
++Name(nds32_ict_model_type) Type(enum nds32_ict_model_type)
++Known cmodel types (for use with the -mict-model= option):
++
++EnumValue
++Enum(nds32_ict_model_type) String(small) Value(ICT_MODEL_SMALL)
++
++EnumValue
++Enum(nds32_ict_model_type) String(large) Value(ICT_MODEL_LARGE)
++
++mlmwsmw-cost=
++Target RejectNegative Joined Enum(lmwsmw_cost_type) Var(flag_lmwsmw_cost) Init(LMWSMW_OPT_AUTO)
++Specify the load/store insn generate to lmw/smw.
++
++Enum
++Name(lmwsmw_cost_type) Type(enum lmwsmw_cost_type)
++Known lmwsmw cost type (for use with the -mlmwsmw-cost= option):
++
++EnumValue
++Enum(lmwsmw_cost_type) String(size) Value(LMWSMW_OPT_SIZE)
++
++EnumValue
++Enum(lmwsmw_cost_type) String(speed) Value(LMWSMW_OPT_SPEED)
++
++EnumValue
++Enum(lmwsmw_cost_type) String(all) Value(LMWSMW_OPT_ALL)
++
++EnumValue
++Enum(lmwsmw_cost_type) String(auto) Value(LMWSMW_OPT_AUTO)
++
++mabi-compatible
++Target Var(flag_nds32_abi_compatible)
++Enable the ABI compatible detection.
++
++mcprop-acc
++Target Var(flag_nds32_cprop_acc)
++Enable the copy propagation for accumulate style instructions.
++
++; ---------------------------------------------------------------
++
+ mcmov
+ Target Report Mask(CMOV)
+ Generate conditional move instructions.
+ 
+-mperf-ext
+-Target Report Mask(PERF_EXT)
++mhw-abs
++Target Report Mask(HW_ABS)
++Generate hardware abs instructions.
++
++mext-perf
++Target Report Mask(EXT_PERF)
+ Generate performance extension instructions.
+ 
++mext-perf2
++Target Report Mask(EXT_PERF2)
++Generate performance extension version 2 instructions.
++
++mext-string
++Target Report Mask(EXT_STRING)
++Generate string extension instructions.
++
++mext-dsp
++Target Report Mask(EXT_DSP)
++Generate DSP extension instructions.
++
+ mv3push
+ Target Report Mask(V3PUSH)
+ Generate v3 push25/pop25 instructions.
+@@ -53,10 +240,22 @@ m16-bit
+ Target Report Mask(16_BIT)
+ Generate 16-bit instructions.
+ 
++mrelax-hint
++Target Report Mask(RELAX_HINT)
++Insert relax hint for linker to do relaxation.
++
++mvh
++Target Report Mask(VH) Condition(!TARGET_LINUX_ABI)
++Enable Virtual Hosting support.
++
+ misr-vector-size=
+-Target RejectNegative Joined UInteger Var(nds32_isr_vector_size) Init(NDS32_DEFAULT_ISR_VECTOR_SIZE)
++Target RejectNegative Joined UInteger Var(nds32_isr_vector_size) Init(NDS32_DEFAULT_ISR_VECTOR_SIZE) Condition(!TARGET_LINUX_ABI)
+ Specify the size of each interrupt vector, which must be 4 or 16.
+ 
++misr-secure=
++Target RejectNegative Joined UInteger Var(nds32_isr_secure_level) Init(0)
++Specify the security level of c-isr for the whole file.
++
+ mcache-block-size=
+ Target RejectNegative Joined UInteger Var(nds32_cache_block_size) Init(NDS32_DEFAULT_CACHE_BLOCK_SIZE)
+ Specify the size of each cache block, which must be a power of 2 between 4 and 512.
+@@ -73,32 +272,418 @@ EnumValue
+ Enum(nds32_arch_type) String(v2) Value(ARCH_V2)
+ 
+ EnumValue
++Enum(nds32_arch_type) String(v2j) Value(ARCH_V2J)
++
++EnumValue
+ Enum(nds32_arch_type) String(v3) Value(ARCH_V3)
+ 
+ EnumValue
++Enum(nds32_arch_type) String(v3j) Value(ARCH_V3J)
++
++EnumValue
+ Enum(nds32_arch_type) String(v3m) Value(ARCH_V3M)
+ 
+-mcmodel=
+-Target RejectNegative Joined Enum(nds32_cmodel_type) Var(nds32_cmodel_option) Init(CMODEL_MEDIUM)
+-Specify the address generation strategy for code model.
++EnumValue
++Enum(nds32_arch_type) String(v3m+) Value(ARCH_V3M_PLUS)
++
++EnumValue
++Enum(nds32_arch_type) String(v3f) Value(ARCH_V3F)
++
++EnumValue
++Enum(nds32_arch_type) String(v3s) Value(ARCH_V3S)
++
++mcpu=
++Target RejectNegative Joined Enum(nds32_cpu_type) Var(nds32_cpu_option) Init(CPU_N9)
++Specify the cpu for pipeline model.
+ 
+ Enum
+-Name(nds32_cmodel_type) Type(enum nds32_cmodel_type)
+-Known cmodel types (for use with the -mcmodel= option):
++Name(nds32_cpu_type) Type(enum nds32_cpu_type)
++Known cpu types (for use with the -mcpu= option):
++
++EnumValue
++Enum(nds32_cpu_type) String(n6) Value(CPU_N6)
++
++EnumValue
++Enum(nds32_cpu_type) String(n650) Value(CPU_N6)
++
++EnumValue
++Enum(nds32_cpu_type) String(n7) Value(CPU_N7)
++
++EnumValue
++Enum(nds32_cpu_type) String(n705) Value(CPU_N7)
++
++EnumValue
++Enum(nds32_cpu_type) String(n8) Value(CPU_N8)
++
++EnumValue
++Enum(nds32_cpu_type) String(n801) Value(CPU_N8)
++
++EnumValue
++Enum(nds32_cpu_type) String(sn8) Value(CPU_N8)
++
++EnumValue
++Enum(nds32_cpu_type) String(sn801) Value(CPU_N8)
++
++EnumValue
++Enum(nds32_cpu_type) String(s8) Value(CPU_N8)
++
++EnumValue
++Enum(nds32_cpu_type) String(s801) Value(CPU_N8)
++
++EnumValue
++Enum(nds32_cpu_type) String(e8) Value(CPU_E8)
++
++EnumValue
++Enum(nds32_cpu_type) String(e801) Value(CPU_E8)
++
++EnumValue
++Enum(nds32_cpu_type) String(n820) Value(CPU_E8)
++
++EnumValue
++Enum(nds32_cpu_type) String(s830) Value(CPU_E8)
++
++EnumValue
++Enum(nds32_cpu_type) String(e830) Value(CPU_E8)
++
++EnumValue
++Enum(nds32_cpu_type) String(n9) Value(CPU_N9)
++
++EnumValue
++Enum(nds32_cpu_type) String(n903) Value(CPU_N9)
++
++EnumValue
++Enum(nds32_cpu_type) String(n903a) Value(CPU_N9)
++
++EnumValue
++Enum(nds32_cpu_type) String(n968) Value(CPU_N9)
++
++EnumValue
++Enum(nds32_cpu_type) String(n968a) Value(CPU_N9)
++
++EnumValue
++Enum(nds32_cpu_type) String(n10) Value(CPU_N10)
++
++EnumValue
++Enum(nds32_cpu_type) String(n1033) Value(CPU_N10)
++
++EnumValue
++Enum(nds32_cpu_type) String(n1033a) Value(CPU_N10)
++
++EnumValue
++Enum(nds32_cpu_type) String(n1033-fpu) Value(CPU_N10)
++
++EnumValue
++Enum(nds32_cpu_type) String(n1033-spu) Value(CPU_N10)
+ 
+ EnumValue
+-Enum(nds32_cmodel_type) String(small) Value(CMODEL_SMALL)
++Enum(nds32_cpu_type) String(n1068) Value(CPU_N10)
+ 
+ EnumValue
+-Enum(nds32_cmodel_type) String(medium) Value(CMODEL_MEDIUM)
++Enum(nds32_cpu_type) String(n1068a) Value(CPU_N10)
+ 
+ EnumValue
+-Enum(nds32_cmodel_type) String(large) Value(CMODEL_LARGE)
++Enum(nds32_cpu_type) String(n1068-fpu) Value(CPU_N10)
++
++EnumValue
++Enum(nds32_cpu_type) String(n1068a-fpu) Value(CPU_N10)
++
++EnumValue
++Enum(nds32_cpu_type) String(n1068-spu) Value(CPU_N10)
++
++EnumValue
++Enum(nds32_cpu_type) String(n1068a-spu) Value(CPU_N10)
++
++EnumValue
++Enum(nds32_cpu_type) String(d10) Value(CPU_N10)
++
++EnumValue
++Enum(nds32_cpu_type) String(d1088) Value(CPU_N10)
++
++EnumValue
++Enum(nds32_cpu_type) String(d1088-fpu) Value(CPU_N10)
++
++EnumValue
++Enum(nds32_cpu_type) String(d1088-spu) Value(CPU_N10)
++
++EnumValue
++Enum(nds32_cpu_type) Undocumented String(graywolf) Value(CPU_GRAYWOLF)
++
++EnumValue
++Enum(nds32_cpu_type) String(n15) Value(CPU_GRAYWOLF)
++
++EnumValue
++Enum(nds32_cpu_type) String(d15) Value(CPU_GRAYWOLF)
++
++EnumValue
++Enum(nds32_cpu_type) String(n15s) Value(CPU_GRAYWOLF)
++
++EnumValue
++Enum(nds32_cpu_type) String(d15s) Value(CPU_GRAYWOLF)
++
++EnumValue
++Enum(nds32_cpu_type) String(n15f) Value(CPU_GRAYWOLF)
++
++EnumValue
++Enum(nds32_cpu_type) String(d15f) Value(CPU_GRAYWOLF)
++
++EnumValue
++Enum(nds32_cpu_type) String(n12) Value(CPU_N12)
++
++EnumValue
++Enum(nds32_cpu_type) String(n1213) Value(CPU_N12)
++
++EnumValue
++Enum(nds32_cpu_type) String(n1233) Value(CPU_N12)
++
++EnumValue
++Enum(nds32_cpu_type) String(n1233-fpu) Value(CPU_N12)
++
++EnumValue
++Enum(nds32_cpu_type) String(n1233-spu) Value(CPU_N12)
++
++EnumValue
++Enum(nds32_cpu_type) String(n13) Value(CPU_N13)
++
++EnumValue
++Enum(nds32_cpu_type) String(n1337) Value(CPU_N13)
++
++EnumValue
++Enum(nds32_cpu_type) String(n1337-fpu) Value(CPU_N13)
++
++EnumValue
++Enum(nds32_cpu_type) String(n1337-spu) Value(CPU_N13)
++
++EnumValue
++Enum(nds32_cpu_type) Undocumented String(panther) Value(CPU_PANTHER)
++
++EnumValue
++Enum(nds32_cpu_type) Undocumented String(simple) Value(CPU_SIMPLE)
++
++mcpu=n15
++Target RejectNegative Undocumented Alias(mcpu=, graywolf)
++Alias for multi-lib work.
++
++mcpu=n15f
++Target RejectNegative Undocumented Alias(mcpu=, graywolf)
++Alias for multi-lib work.
++
++mcpu=n15s
++Target RejectNegative Undocumented Alias(mcpu=, graywolf)
++Alias for multi-lib work.
++
++mcpu=d15
++Target RejectNegative Undocumented Alias(mcpu=, graywolf)
++Alias for multi-lib work.
++
++mcpu=d15s
++Target RejectNegative Undocumented Alias(mcpu=, graywolf)
++Alias for multi-lib work.
++
++mcpu=d15f
++Target RejectNegative Undocumented Alias(mcpu=, graywolf)
++Alias for multi-lib work.
++
++mgraywolf
++Target RejectNegative Undocumented Alias(mcpu=, graywolf)
++This alias is only for gcc parallel test.
++
++mv3m+
++Target RejectNegative Undocumented Alias(march=, v3m+)
++This alias is only for gcc parallel test.
++
++mmemory-model=
++Target RejectNegative Joined Enum(nds32_memory_model_type) Var(nds32_memory_model_option) Init(MEMORY_MODEL_FAST)
++Specify the memory model, fast or slow memory.
++
++Enum
++Name(nds32_memory_model_type) Type(enum nds32_memory_model_type)
++
++EnumValue
++Enum(nds32_memory_model_type) String(slow) Value(MEMORY_MODEL_SLOW)
++
++EnumValue
++Enum(nds32_memory_model_type) String(fast) Value(MEMORY_MODEL_FAST)
++
++mconfig-fpu=
++Target RejectNegative Joined Enum(float_reg_number) Var(nds32_fp_regnum) Init(TARGET_CONFIG_FPU_DEFAULT)
++Specify a fpu configuration value from 0 to 7; 0-3 is as FPU spec says, and 4-7 is corresponding to 0-3.
++
++Enum
++Name(float_reg_number) Type(enum float_reg_number)
++Known floating-point number of registers (for use with the -mconfig-fpu= option):
++
++EnumValue
++Enum(float_reg_number) String(0) Value(NDS32_CONFIG_FPU_0)
++
++EnumValue
++Enum(float_reg_number) String(1) Value(NDS32_CONFIG_FPU_1)
++
++EnumValue
++Enum(float_reg_number) String(2) Value(NDS32_CONFIG_FPU_2)
++
++EnumValue
++Enum(float_reg_number) String(3) Value(NDS32_CONFIG_FPU_3)
++
++EnumValue
++Enum(float_reg_number) String(4) Value(NDS32_CONFIG_FPU_4)
++
++EnumValue
++Enum(float_reg_number) String(5) Value(NDS32_CONFIG_FPU_5)
++
++EnumValue
++Enum(float_reg_number) String(6) Value(NDS32_CONFIG_FPU_6)
++
++EnumValue
++Enum(float_reg_number) String(7) Value(NDS32_CONFIG_FPU_7)
++
++mconfig-mul=
++Target RejectNegative Joined Enum(nds32_mul_type) Var(nds32_mul_config) Init(MUL_TYPE_FAST_1)
++Specify configuration of instruction mul: fast1, fast2 or slow. The default is fast1.
++
++Enum
++Name(nds32_mul_type) Type(enum nds32_mul_type)
++
++EnumValue
++Enum(nds32_mul_type) String(fast) Value(MUL_TYPE_FAST_1)
++
++EnumValue
++Enum(nds32_mul_type) String(fast1) Value(MUL_TYPE_FAST_1)
++
++EnumValue
++Enum(nds32_mul_type) String(fast2) Value(MUL_TYPE_FAST_2)
++
++EnumValue
++Enum(nds32_mul_type) String(slow) Value(MUL_TYPE_SLOW)
++
++mconfig-register-ports=
++Target RejectNegative Joined Enum(nds32_register_ports) Var(nds32_register_ports_config) Init(REG_PORT_3R2W)
++Specify how many read/write ports for n9/n10 cores.  The value should be 3r2w or 2r1w.
++
++Enum
++Name(nds32_register_ports) Type(enum nds32_register_ports)
++
++EnumValue
++Enum(nds32_register_ports) String(3r2w) Value(REG_PORT_3R2W)
++
++EnumValue
++Enum(nds32_register_ports) String(2r1w) Value(REG_PORT_2R1W)
++
++mreorg-out-of-order
++Target Report Var(flag_reorg_out_of_order) Init(0)
++Allow out-of-order reorganization for multiple issue micro-architectures.
++
++mifc
++Target Report Mask(IFC)
++Use special directives to guide linker doing ifc optimization.
++
++mex9
++Target Report Mask(EX9)
++Use special directives to guide linker doing ex9 optimization.
++
++mprint-stall-cycles
++Target Report Mask(PRINT_STALLS)
++Print stall cycles due to structural or data dependencies. It should be used with the option '-S'.
++Note that stall cycles are determined by the compiler's pipeline model and it may not be precise.
+ 
+ mctor-dtor
+ Target Report
+ Enable constructor/destructor feature.
+ 
++mcrt-arg
++Target Report
++Enable argc/argv passed by simulator.
++
+ mrelax
+ Target Report
+ Guide linker to relax instructions.
++
++minnermost-loop
++Target Report Mask(INNERMOST_LOOP)
++Insert the innermost loop directive.
++
++mext-fpu-fma
++Target Report Mask(EXT_FPU_FMA)
++Generate floating-point multiply-accumulation instructions.
++
++mext-fpu-sp
++Target Report Mask(FPU_SINGLE)
++Generate single-precision floating-point instructions.
++
++mext-fpu-dp
++Target Report Mask(FPU_DOUBLE)
++Generate double-precision floating-point instructions.
++
++mext-zol
++Target Report Mask(HWLOOP)
++Insert the hardware loop directive.
++
++mforce-no-ext-zol
++Target Undocumented Report Mask(FORCE_NO_HWLOOP)
++Force disable hardware loop, even use -mext-zol.
++
++mforce-no-ext-dsp
++Target Undocumented Report Mask(FORCE_NO_EXT_DSP)
++Force disable hardware loop, even use -mext-dsp.
++
++mforce-memcpy-zol
++Target Report Var(flag_force_memcpy_zol) Init(0)
++Force enable hardware loop in memcpy function.
++
++msched-prolog-epilog
++Target Var(flag_sched_prolog_epilog) Init(1)
++Permit scheduling of a function's prologue and epilogue sequence.
++
++mret-in-naked-func
++Target Var(flag_ret_in_naked_func) Init(1)
++Generate return instruction in naked function.
++
++malways-save-lp
++Target Var(flag_always_save_lp) Init(0)
++Always save $lp in the stack.
++
++munaligned-access
++Target Report Var(flag_unaligned_access) Init(0)
++Enable unaligned word and halfword accesses to packed data.
++
++; ---------------------------------------------------------------
++; The following options are designed for compatibility issue.
++; Hopefully these obsolete options will be removed one day.
++
++mg
++Target Undocumented Warn(%qs is deprecated and has no effect)
++Obsolete option.  Users SHOULD NOT use this option in the command line.
++
++mdx-regs
++Target Undocumented Warn(%qs is deprecated and has no effect)
++Obsolete option.  Users SHOULD NOT use this option in the command line.
++
++mexpand-isr
++Target Undocumented Warn(%qs is deprecated and has no effect)
++Obsolete option.  Users SHOULD NOT use this option in the command line.
++
++mcrt-cpp=yes
++Target Undocumented Warn(%qs is deprecated and has no effect, use -mctor-dtor instead)
++Obsolete option.  Users SHOULD NOT use this option in the command line.
++
++mcrt-exit=yes
++Target Undocumented Warn(%qs is deprecated and has no effect, use -mctor-dtor instead)
++Obsolete option.  Users SHOULD NOT use this option in the command line.
++
++mlib=
++Target RejectNegative Joined Undocumented Warn(%qs is deprecated and has no effect)
++Obsolete option.  Users SHOULD NOT use this option in the command line.
++
++; ---------------------------------------------------------------
++; The following options are designed for compatibility issue.
++; Hopefully these obsolete options will be removed one day.
++
++mace
++Target RejectNegative
++Compile with Andes ACE.
++
++mace-s2s=
++Target Joined RejectNegative
++Argument for pass to Andes's ACE source-to-source translator.
++
++
++; ---------------------------------------------------------------
+diff --git a/gcc/config/nds32/nds32_init.inc b/gcc/config/nds32/nds32_init.inc
+new file mode 100644
+index 0000000..1084ad0
+--- /dev/null
++++ b/gcc/config/nds32/nds32_init.inc
+@@ -0,0 +1,43 @@
++/*
++ * nds32_init.inc
++ *
++ * NDS32 architecture startup assembler header file
++ *
++ */
++
++.macro nds32_init
++
++	! Initialize GP for data access
++	la      $gp, _SDA_BASE_
++
++#if defined(__NDS32_EXT_EX9__)
++	! Check HW for EX9
++	mfsr    $r0, $MSC_CFG
++	li      $r1, (1 << 24)
++	and     $r2, $r0, $r1
++	beqz    $r2, 1f
++
++	! Initialize the table base of EX9 instruction
++	la      $r0, _ITB_BASE_
++	mtusr   $r0, $ITB
++1:
++#endif
++
++#if defined(__NDS32_EXT_FPU_DP__) || defined(__NDS32_EXT_FPU_SP__)
++	! Enable FPU
++	mfsr    $r0, $FUCOP_CTL
++	ori     $r0, $r0, #0x1
++	mtsr    $r0, $FUCOP_CTL
++	dsb
++
++	! Enable denormalized flush-to-Zero mode
++	fmfcsr  $r0
++	ori     $r0,$r0,#0x1000
++	fmtcsr  $r0
++	dsb
++#endif
++
++	! Initialize default stack pointer
++	la      $sp, _stack
++
++.endm
+diff --git a/gcc/config/nds32/nds32_intrinsic.h b/gcc/config/nds32/nds32_intrinsic.h
+index 3e868dc..fef727b 100644
+--- a/gcc/config/nds32/nds32_intrinsic.h
++++ b/gcc/config/nds32/nds32_intrinsic.h
+@@ -26,12 +26,1383 @@
+ #ifndef _NDS32_INTRINSIC_H
+ #define _NDS32_INTRINSIC_H
+ 
++typedef signed char int8x4_t __attribute ((vector_size(4)));
++typedef short int16x2_t __attribute ((vector_size(4)));
++typedef int int32x2_t __attribute__((vector_size(8)));
++typedef unsigned char uint8x4_t __attribute__ ((vector_size (4)));
++typedef unsigned short uint16x2_t __attribute__ ((vector_size (4)));
++typedef unsigned int uint32x2_t __attribute__((vector_size(8)));
++
++/* General instrinsic register names.  */
+ enum nds32_intrinsic_registers
+ {
+-  __NDS32_REG_PSW__ = 1024,
++  __NDS32_REG_CPU_VER__ = 1024,
++  __NDS32_REG_ICM_CFG__,
++  __NDS32_REG_DCM_CFG__,
++  __NDS32_REG_MMU_CFG__,
++  __NDS32_REG_MSC_CFG__,
++  __NDS32_REG_MSC_CFG2__,
++  __NDS32_REG_CORE_ID__,
++  __NDS32_REG_FUCOP_EXIST__,
++
++  __NDS32_REG_PSW__,
+   __NDS32_REG_IPSW__,
++  __NDS32_REG_P_IPSW__,
++  __NDS32_REG_IVB__,
++  __NDS32_REG_EVA__,
++  __NDS32_REG_P_EVA__,
+   __NDS32_REG_ITYPE__,
+-  __NDS32_REG_IPC__
++  __NDS32_REG_P_ITYPE__,
++
++  __NDS32_REG_MERR__,
++  __NDS32_REG_IPC__,
++  __NDS32_REG_P_IPC__,
++  __NDS32_REG_OIPC__,
++  __NDS32_REG_P_P0__,
++  __NDS32_REG_P_P1__,
++
++  __NDS32_REG_INT_MASK__,
++  __NDS32_REG_INT_MASK2__,
++  __NDS32_REG_INT_MASK3__,
++  __NDS32_REG_INT_PEND__,
++  __NDS32_REG_INT_PEND2__,
++  __NDS32_REG_INT_PEND3__,
++  __NDS32_REG_SP_USR__,
++  __NDS32_REG_SP_PRIV__,
++  __NDS32_REG_INT_PRI__,
++  __NDS32_REG_INT_PRI2__,
++  __NDS32_REG_INT_PRI3__,
++  __NDS32_REG_INT_PRI4__,
++  __NDS32_REG_INT_CTRL__,
++  __NDS32_REG_INT_TRIGGER__,
++  __NDS32_REG_INT_TRIGGER2__,
++  __NDS32_REG_INT_GPR_PUSH_DIS__,
++
++  __NDS32_REG_MMU_CTL__,
++  __NDS32_REG_L1_PPTB__,
++  __NDS32_REG_TLB_VPN__,
++  __NDS32_REG_TLB_DATA__,
++  __NDS32_REG_TLB_MISC__,
++  __NDS32_REG_VLPT_IDX__,
++  __NDS32_REG_ILMB__,
++  __NDS32_REG_DLMB__,
++
++  __NDS32_REG_CACHE_CTL__,
++  __NDS32_REG_HSMP_SADDR__,
++  __NDS32_REG_HSMP_EADDR__,
++  __NDS32_REG_SDZ_CTL__,
++  __NDS32_REG_N12MISC_CTL__,
++  __NDS32_REG_MISC_CTL__,
++  __NDS32_REG_ECC_MISC__,
++
++  __NDS32_REG_BPC0__,
++  __NDS32_REG_BPC1__,
++  __NDS32_REG_BPC2__,
++  __NDS32_REG_BPC3__,
++  __NDS32_REG_BPC4__,
++  __NDS32_REG_BPC5__,
++  __NDS32_REG_BPC6__,
++  __NDS32_REG_BPC7__,
++
++  __NDS32_REG_BPA0__,
++  __NDS32_REG_BPA1__,
++  __NDS32_REG_BPA2__,
++  __NDS32_REG_BPA3__,
++  __NDS32_REG_BPA4__,
++  __NDS32_REG_BPA5__,
++  __NDS32_REG_BPA6__,
++  __NDS32_REG_BPA7__,
++
++  __NDS32_REG_BPAM0__,
++  __NDS32_REG_BPAM1__,
++  __NDS32_REG_BPAM2__,
++  __NDS32_REG_BPAM3__,
++  __NDS32_REG_BPAM4__,
++  __NDS32_REG_BPAM5__,
++  __NDS32_REG_BPAM6__,
++  __NDS32_REG_BPAM7__,
++
++  __NDS32_REG_BPV0__,
++  __NDS32_REG_BPV1__,
++  __NDS32_REG_BPV2__,
++  __NDS32_REG_BPV3__,
++  __NDS32_REG_BPV4__,
++  __NDS32_REG_BPV5__,
++  __NDS32_REG_BPV6__,
++  __NDS32_REG_BPV7__,
++
++  __NDS32_REG_BPCID0__,
++  __NDS32_REG_BPCID1__,
++  __NDS32_REG_BPCID2__,
++  __NDS32_REG_BPCID3__,
++  __NDS32_REG_BPCID4__,
++  __NDS32_REG_BPCID5__,
++  __NDS32_REG_BPCID6__,
++  __NDS32_REG_BPCID7__,
++
++  __NDS32_REG_EDM_CFG__,
++  __NDS32_REG_EDMSW__,
++  __NDS32_REG_EDM_CTL__,
++  __NDS32_REG_EDM_DTR__,
++  __NDS32_REG_BPMTC__,
++  __NDS32_REG_DIMBR__,
++
++  __NDS32_REG_TECR0__,
++  __NDS32_REG_TECR1__,
++  __NDS32_REG_PFMC0__,
++  __NDS32_REG_PFMC1__,
++  __NDS32_REG_PFMC2__,
++  __NDS32_REG_PFM_CTL__,
++  __NDS32_REG_PFT_CTL__,
++  __NDS32_REG_HSP_CTL__,
++  __NDS32_REG_SP_BOUND__,
++  __NDS32_REG_SP_BOUND_PRIV__,
++  __NDS32_REG_SP_BASE__,
++  __NDS32_REG_SP_BASE_PRIV__,
++  __NDS32_REG_FUCOP_CTL__,
++  __NDS32_REG_PRUSR_ACC_CTL__,
++
++  __NDS32_REG_DMA_CFG__,
++  __NDS32_REG_DMA_GCSW__,
++  __NDS32_REG_DMA_CHNSEL__,
++  __NDS32_REG_DMA_ACT__,
++  __NDS32_REG_DMA_SETUP__,
++  __NDS32_REG_DMA_ISADDR__,
++  __NDS32_REG_DMA_ESADDR__,
++  __NDS32_REG_DMA_TCNT__,
++  __NDS32_REG_DMA_STATUS__,
++  __NDS32_REG_DMA_2DSET__,
++  __NDS32_REG_DMA_2DSCTL__,
++  __NDS32_REG_DMA_RCNT__,
++  __NDS32_REG_DMA_HSTATUS__,
++
++  __NDS32_REG_PC__,
++  __NDS32_REG_SP_USR1__,
++  __NDS32_REG_SP_USR2__,
++  __NDS32_REG_SP_USR3__,
++  __NDS32_REG_SP_PRIV1__,
++  __NDS32_REG_SP_PRIV2__,
++  __NDS32_REG_SP_PRIV3__,
++  __NDS32_REG_BG_REGION__,
++  __NDS32_REG_SFCR__,
++  __NDS32_REG_SIGN__,
++  __NDS32_REG_ISIGN__,
++  __NDS32_REG_P_ISIGN__,
++  __NDS32_REG_IFC_LP__,
++  __NDS32_REG_ITB__
+ };
+ 
++/* The cctl subtype for intrinsic.  */
++enum nds32_cctl_valck
++{
++  __NDS32_CCTL_L1D_VA_FILLCK__,
++  __NDS32_CCTL_L1D_VA_ULCK__,
++  __NDS32_CCTL_L1I_VA_FILLCK__,
++  __NDS32_CCTL_L1I_VA_ULCK__
++};
++
++enum nds32_cctl_idxwbinv
++{
++  __NDS32_CCTL_L1D_IX_WBINVAL__,
++  __NDS32_CCTL_L1D_IX_INVAL__,
++  __NDS32_CCTL_L1D_IX_WB__,
++  __NDS32_CCTL_L1I_IX_INVAL__
++};
++
++enum nds32_cctl_vawbinv
++{
++  __NDS32_CCTL_L1D_VA_INVAL__,
++  __NDS32_CCTL_L1D_VA_WB__,
++  __NDS32_CCTL_L1D_VA_WBINVAL__,
++  __NDS32_CCTL_L1I_VA_INVAL__
++};
++
++enum nds32_cctl_idxread
++{
++  __NDS32_CCTL_L1D_IX_RTAG__,
++  __NDS32_CCTL_L1D_IX_RWD__,
++  __NDS32_CCTL_L1I_IX_RTAG__,
++  __NDS32_CCTL_L1I_IX_RWD__
++};
++
++enum nds32_cctl_idxwrite
++{
++  __NDS32_CCTL_L1D_IX_WTAG__,
++  __NDS32_CCTL_L1D_IX_WWD__,
++  __NDS32_CCTL_L1I_IX_WTAG__,
++  __NDS32_CCTL_L1I_IX_WWD__
++};
++
++enum nds32_dpref
++{
++  __NDS32_DPREF_SRD__,
++  __NDS32_DPREF_MRD__,
++  __NDS32_DPREF_SWR__,
++  __NDS32_DPREF_MWR__,
++  __NDS32_DPREF_PTE__,
++  __NDS32_DPREF_CLWR__
++};
++
++/* ------------------------------------------------------------------------ */
++
++/* Define interrupt number for intrinsic function.  */
++#define NDS32_INT_H0 0
++#define NDS32_INT_H1 1
++#define NDS32_INT_H2 2
++#define NDS32_INT_H3 3
++#define NDS32_INT_H4 4
++#define NDS32_INT_H5 5
++#define NDS32_INT_H6 6
++#define NDS32_INT_H7 7
++#define NDS32_INT_H8 8
++#define NDS32_INT_H9 9
++#define NDS32_INT_H10 10
++#define NDS32_INT_H11 11
++#define NDS32_INT_H12 12
++#define NDS32_INT_H13 13
++#define NDS32_INT_H14 14
++#define NDS32_INT_H15 15
++#define NDS32_INT_H16 16
++#define NDS32_INT_H17 17
++#define NDS32_INT_H18 18
++#define NDS32_INT_H19 19
++#define NDS32_INT_H20 20
++#define NDS32_INT_H21 21
++#define NDS32_INT_H22 22
++#define NDS32_INT_H23 23
++#define NDS32_INT_H24 24
++#define NDS32_INT_H25 25
++#define NDS32_INT_H26 26
++#define NDS32_INT_H27 27
++#define NDS32_INT_H28 28
++#define NDS32_INT_H29 29
++#define NDS32_INT_H30 30
++#define NDS32_INT_H31 31
++#define NDS32_INT_H32 32
++#define NDS32_INT_H33 33
++#define NDS32_INT_H34 34
++#define NDS32_INT_H35 35
++#define NDS32_INT_H36 36
++#define NDS32_INT_H37 37
++#define NDS32_INT_H38 38
++#define NDS32_INT_H39 39
++#define NDS32_INT_H40 40
++#define NDS32_INT_H41 41
++#define NDS32_INT_H42 42
++#define NDS32_INT_H43 43
++#define NDS32_INT_H44 44
++#define NDS32_INT_H45 45
++#define NDS32_INT_H46 46
++#define NDS32_INT_H47 47
++#define NDS32_INT_H48 48
++#define NDS32_INT_H49 49
++#define NDS32_INT_H50 50
++#define NDS32_INT_H51 51
++#define NDS32_INT_H52 52
++#define NDS32_INT_H53 53
++#define NDS32_INT_H54 54
++#define NDS32_INT_H55 55
++#define NDS32_INT_H56 56
++#define NDS32_INT_H57 57
++#define NDS32_INT_H58 58
++#define NDS32_INT_H59 59
++#define NDS32_INT_H60 60
++#define NDS32_INT_H61 61
++#define NDS32_INT_H62 62
++#define NDS32_INT_H63 63
++#define NDS32_INT_SWI 64
++#define NDS32_INT_ALZ 65
++#define NDS32_INT_IDIVZE 66
++#define NDS32_INT_DSSIM 67
++
++/* ------------------------------------------------------------------------ */
++
++/* Define intrinsic register name macro for compatibility.  */
++#define NDS32_SR_CPU_VER               __NDS32_REG_CPU_VER__
++#define NDS32_SR_ICM_CFG               __NDS32_REG_ICM_CFG__
++#define NDS32_SR_DCM_CFG               __NDS32_REG_DCM_CFG__
++#define NDS32_SR_MMU_CFG               __NDS32_REG_MMU_CFG__
++#define NDS32_SR_MSC_CFG               __NDS32_REG_MSC_CFG__
++#define NDS32_SR_MSC_CFG2              __NDS32_REG_MSC_CFG2__
++#define NDS32_SR_CORE_ID               __NDS32_REG_CORE_ID__
++#define NDS32_SR_FUCOP_EXIST           __NDS32_REG_FUCOP_EXIST__
++#define NDS32_SR_PSW                   __NDS32_REG_PSW__
++#define NDS32_SR_IPSW                  __NDS32_REG_IPSW__
++#define NDS32_SR_P_IPSW                __NDS32_REG_P_IPSW__
++#define NDS32_SR_IVB                   __NDS32_REG_IVB__
++#define NDS32_SR_EVA                   __NDS32_REG_EVA__
++#define NDS32_SR_P_EVA                 __NDS32_REG_P_EVA__
++#define NDS32_SR_ITYPE                 __NDS32_REG_ITYPE__
++#define NDS32_SR_P_ITYPE               __NDS32_REG_P_ITYPE__
++#define NDS32_SR_MERR                  __NDS32_REG_MERR__
++#define NDS32_SR_IPC                   __NDS32_REG_IPC__
++#define NDS32_SR_P_IPC                 __NDS32_REG_P_IPC__
++#define NDS32_SR_OIPC                  __NDS32_REG_OIPC__
++#define NDS32_SR_P_P0                  __NDS32_REG_P_P0__
++#define NDS32_SR_P_P1                  __NDS32_REG_P_P1__
++#define NDS32_SR_INT_MASK              __NDS32_REG_INT_MASK__
++#define NDS32_SR_INT_MASK2             __NDS32_REG_INT_MASK2__
++#define NDS32_SR_INT_MASK3             __NDS32_REG_INT_MASK3__
++#define NDS32_SR_INT_PEND              __NDS32_REG_INT_PEND__
++#define NDS32_SR_INT_PEND2             __NDS32_REG_INT_PEND2__
++#define NDS32_SR_INT_PEND3             __NDS32_REG_INT_PEND3__
++#define NDS32_SR_SP_USR                __NDS32_REG_SP_USR__
++#define NDS32_SR_SP_PRIV               __NDS32_REG_SP_PRIV__
++#define NDS32_SR_INT_PRI               __NDS32_REG_INT_PRI__
++#define NDS32_SR_INT_PRI2              __NDS32_REG_INT_PRI2__
++#define NDS32_SR_INT_PRI3              __NDS32_REG_INT_PRI3__
++#define NDS32_SR_INT_PRI4              __NDS32_REG_INT_PRI4__
++#define NDS32_SR_INT_CTRL              __NDS32_REG_INT_CTRL__
++#define NDS32_SR_INT_TRIGGER           __NDS32_REG_INT_TRIGGER__
++#define NDS32_SR_INT_TRIGGER2          __NDS32_REG_INT_TRIGGER2__
++#define NDS32_SR_INT_GPR_PUSH_DIS      __NDS32_REG_INT_GPR_PUSH_DIS__
++#define NDS32_SR_MMU_CTL               __NDS32_REG_MMU_CTL__
++#define NDS32_SR_L1_PPTB               __NDS32_REG_L1_PPTB__
++#define NDS32_SR_TLB_VPN               __NDS32_REG_TLB_VPN__
++#define NDS32_SR_TLB_DATA              __NDS32_REG_TLB_DATA__
++#define NDS32_SR_TLB_MISC              __NDS32_REG_TLB_MISC__
++#define NDS32_SR_VLPT_IDX              __NDS32_REG_VLPT_IDX__
++#define NDS32_SR_ILMB                  __NDS32_REG_ILMB__
++#define NDS32_SR_DLMB                  __NDS32_REG_DLMB__
++#define NDS32_SR_CACHE_CTL             __NDS32_REG_CACHE_CTL__
++#define NDS32_SR_HSMP_SADDR            __NDS32_REG_HSMP_SADDR__
++#define NDS32_SR_HSMP_EADDR            __NDS32_REG_HSMP_EADDR__
++#define NDS32_SR_SDZ_CTL               __NDS32_REG_SDZ_CTL__
++#define NDS32_SR_N12MISC_CTL           __NDS32_REG_N12MISC_CTL__
++#define NDS32_SR_MISC_CTL              __NDS32_REG_MISC_CTL__
++#define NDS32_SR_ECC_MISC              __NDS32_REG_ECC_MISC__
++#define NDS32_SR_BPC0                  __NDS32_REG_BPC0__
++#define NDS32_SR_BPC1                  __NDS32_REG_BPC1__
++#define NDS32_SR_BPC2                  __NDS32_REG_BPC2__
++#define NDS32_SR_BPC3                  __NDS32_REG_BPC3__
++#define NDS32_SR_BPC4                  __NDS32_REG_BPC4__
++#define NDS32_SR_BPC5                  __NDS32_REG_BPC5__
++#define NDS32_SR_BPC6                  __NDS32_REG_BPC6__
++#define NDS32_SR_BPC7                  __NDS32_REG_BPC7__
++#define NDS32_SR_BPA0                  __NDS32_REG_BPA0__
++#define NDS32_SR_BPA1                  __NDS32_REG_BPA1__
++#define NDS32_SR_BPA2                  __NDS32_REG_BPA2__
++#define NDS32_SR_BPA3                  __NDS32_REG_BPA3__
++#define NDS32_SR_BPA4                  __NDS32_REG_BPA4__
++#define NDS32_SR_BPA5                  __NDS32_REG_BPA5__
++#define NDS32_SR_BPA6                  __NDS32_REG_BPA6__
++#define NDS32_SR_BPA7                  __NDS32_REG_BPA7__
++#define NDS32_SR_BPAM0                 __NDS32_REG_BPAM0__
++#define NDS32_SR_BPAM1                 __NDS32_REG_BPAM1__
++#define NDS32_SR_BPAM2                 __NDS32_REG_BPAM2__
++#define NDS32_SR_BPAM3                 __NDS32_REG_BPAM3__
++#define NDS32_SR_BPAM4                 __NDS32_REG_BPAM4__
++#define NDS32_SR_BPAM5                 __NDS32_REG_BPAM5__
++#define NDS32_SR_BPAM6                 __NDS32_REG_BPAM6__
++#define NDS32_SR_BPAM7                 __NDS32_REG_BPAM7__
++#define NDS32_SR_BPV0                  __NDS32_REG_BPV0__
++#define NDS32_SR_BPV1                  __NDS32_REG_BPV1__
++#define NDS32_SR_BPV2                  __NDS32_REG_BPV2__
++#define NDS32_SR_BPV3                  __NDS32_REG_BPV3__
++#define NDS32_SR_BPV4                  __NDS32_REG_BPV4__
++#define NDS32_SR_BPV5                  __NDS32_REG_BPV5__
++#define NDS32_SR_BPV6                  __NDS32_REG_BPV6__
++#define NDS32_SR_BPV7                  __NDS32_REG_BPV7__
++#define NDS32_SR_BPCID0                __NDS32_REG_BPCID0__
++#define NDS32_SR_BPCID1                __NDS32_REG_BPCID1__
++#define NDS32_SR_BPCID2                __NDS32_REG_BPCID2__
++#define NDS32_SR_BPCID3                __NDS32_REG_BPCID3__
++#define NDS32_SR_BPCID4                __NDS32_REG_BPCID4__
++#define NDS32_SR_BPCID5                __NDS32_REG_BPCID5__
++#define NDS32_SR_BPCID6                __NDS32_REG_BPCID6__
++#define NDS32_SR_BPCID7                __NDS32_REG_BPCID7__
++#define NDS32_SR_EDM_CFG               __NDS32_REG_EDM_CFG__
++#define NDS32_SR_EDMSW                 __NDS32_REG_EDMSW__
++#define NDS32_SR_EDM_CTL               __NDS32_REG_EDM_CTL__
++#define NDS32_SR_EDM_DTR               __NDS32_REG_EDM_DTR__
++#define NDS32_SR_BPMTC                 __NDS32_REG_BPMTC__
++#define NDS32_SR_DIMBR                 __NDS32_REG_DIMBR__
++#define NDS32_SR_TECR0                 __NDS32_REG_TECR0__
++#define NDS32_SR_TECR1                 __NDS32_REG_TECR1__
++#define NDS32_SR_PFMC0                 __NDS32_REG_PFMC0__
++#define NDS32_SR_PFMC1                 __NDS32_REG_PFMC1__
++#define NDS32_SR_PFMC2                 __NDS32_REG_PFMC2__
++#define NDS32_SR_PFM_CTL               __NDS32_REG_PFM_CTL__
++#define NDS32_SR_HSP_CTL               __NDS32_REG_HSP_CTL__
++#define NDS32_SR_SP_BOUND              __NDS32_REG_SP_BOUND__
++#define NDS32_SR_SP_BOUND_PRIV         __NDS32_REG_SP_BOUND_PRIV__
++#define NDS32_SR_SP_BASE               __NDS32_REG_SP_BASE__
++#define NDS32_SR_SP_BASE_PRIV          __NDS32_REG_SP_BASE_PRIV__
++#define NDS32_SR_FUCOP_CTL             __NDS32_REG_FUCOP_CTL__
++#define NDS32_SR_PRUSR_ACC_CTL         __NDS32_REG_PRUSR_ACC_CTL__
++#define NDS32_SR_DMA_CFG               __NDS32_REG_DMA_CFG__
++#define NDS32_SR_DMA_GCSW              __NDS32_REG_DMA_GCSW__
++#define NDS32_SR_DMA_CHNSEL            __NDS32_REG_DMA_CHNSEL__
++#define NDS32_SR_DMA_ACT               __NDS32_REG_DMA_ACT__
++#define NDS32_SR_DMA_SETUP             __NDS32_REG_DMA_SETUP__
++#define NDS32_SR_DMA_ISADDR            __NDS32_REG_DMA_ISADDR__
++#define NDS32_SR_DMA_ESADDR            __NDS32_REG_DMA_ESADDR__
++#define NDS32_SR_DMA_TCNT              __NDS32_REG_DMA_TCNT__
++#define NDS32_SR_DMA_STATUS            __NDS32_REG_DMA_STATUS__
++#define NDS32_SR_DMA_2DSET             __NDS32_REG_DMA_2DSET__
++#define NDS32_SR_DMA_2DSCTL            __NDS32_REG_DMA_2DSCTL__
++#define NDS32_SR_DMA_RCNT              __NDS32_REG_DMA_RCNT__
++#define NDS32_SR_DMA_HSTATUS           __NDS32_REG_DMA_HSTATUS__
++#define NDS32_SR_SP_USR1               __NDS32_REG_SP_USR1__
++#define NDS32_SR_SP_USR2               __NDS32_REG_SP_USR2__
++#define NDS32_SR_SP_USR3               __NDS32_REG_SP_USR3__
++#define NDS32_SR_SP_PRIV1              __NDS32_REG_SP_PRIV1__
++#define NDS32_SR_SP_PRIV2              __NDS32_REG_SP_PRIV2__
++#define NDS32_SR_SP_PRIV3              __NDS32_REG_SP_PRIV3__
++#define NDS32_SR_BG_REGION             __NDS32_REG_BG_REGION__
++#define NDS32_SR_SFCR                  __NDS32_REG_SFCR__
++#define NDS32_SR_SIGN                  __NDS32_REG_SIGN__
++#define NDS32_SR_ISIGN                 __NDS32_REG_ISIGN__
++#define NDS32_SR_P_ISIGN               __NDS32_REG_P_ISIGN__
++
++#define NDS32_USR_PC                    __NDS32_REG_PC__
++#define NDS32_USR_DMA_CFG               __NDS32_REG_DMA_CFG__
++#define NDS32_USR_DMA_GCSW              __NDS32_REG_DMA_GCSW__
++#define NDS32_USR_DMA_CHNSEL            __NDS32_REG_DMA_CHNSEL__
++#define NDS32_USR_DMA_ACT               __NDS32_REG_DMA_ACT__
++#define NDS32_USR_DMA_SETUP             __NDS32_REG_DMA_SETUP__
++#define NDS32_USR_DMA_ISADDR            __NDS32_REG_DMA_ISADDR__
++#define NDS32_USR_DMA_ESADDR            __NDS32_REG_DMA_ESADDR__
++#define NDS32_USR_DMA_TCNT              __NDS32_REG_DMA_TCNT__
++#define NDS32_USR_DMA_STATUS            __NDS32_REG_DMA_STATUS__
++#define NDS32_USR_DMA_2DSET             __NDS32_REG_DMA_2DSET__
++#define NDS32_USR_DMA_2DSCTL            __NDS32_REG_DMA_2DSCTL__
++#define NDS32_USR_PFMC0                 __NDS32_REG_PFMC0__
++#define NDS32_USR_PFMC1                 __NDS32_REG_PFMC1__
++#define NDS32_USR_PFMC2                 __NDS32_REG_PFMC2__
++#define NDS32_USR_PFM_CTL               __NDS32_REG_PFM_CTL__
++#define NDS32_USR_IFC_LP                __NDS32_REG_IFC_LP__
++#define NDS32_USR_ITB                   __NDS32_REG_ITB__
++
++#define NDS32_CCTL_L1D_VA_FILLCK        __NDS32_CCTL_L1D_VA_FILLCK__
++#define NDS32_CCTL_L1D_VA_ULCK          __NDS32_CCTL_L1D_VA_ULCK__
++#define NDS32_CCTL_L1I_VA_FILLCK        __NDS32_CCTL_L1I_VA_FILLCK__
++#define NDS32_CCTL_L1I_VA_ULCK          __NDS32_CCTL_L1I_VA_ULCK__
++
++#define NDS32_CCTL_L1D_IX_WBINVAL       __NDS32_CCTL_L1D_IX_WBINVAL__
++#define NDS32_CCTL_L1D_IX_INVAL         __NDS32_CCTL_L1D_IX_INVAL__
++#define NDS32_CCTL_L1D_IX_WB            __NDS32_CCTL_L1D_IX_WB__
++#define NDS32_CCTL_L1I_IX_INVAL         __NDS32_CCTL_L1I_IX_INVAL__
++
++#define NDS32_CCTL_L1D_VA_INVAL         __NDS32_CCTL_L1D_VA_INVAL__
++#define NDS32_CCTL_L1D_VA_WB            __NDS32_CCTL_L1D_VA_WB__
++#define NDS32_CCTL_L1D_VA_WBINVAL       __NDS32_CCTL_L1D_VA_WBINVAL__
++#define NDS32_CCTL_L1I_VA_INVAL         __NDS32_CCTL_L1I_VA_INVAL__
++
++#define NDS32_CCTL_L1D_IX_RTAG          __NDS32_CCTL_L1D_IX_RTAG__
++#define NDS32_CCTL_L1D_IX_RWD           __NDS32_CCTL_L1D_IX_RWD__
++#define NDS32_CCTL_L1I_IX_RTAG          __NDS32_CCTL_L1I_IX_RTAG__
++#define NDS32_CCTL_L1I_IX_RWD           __NDS32_CCTL_L1I_IX_RWD__
++
++#define NDS32_CCTL_L1D_IX_WTAG          __NDS32_CCTL_L1D_IX_WTAG__
++#define NDS32_CCTL_L1D_IX_WWD           __NDS32_CCTL_L1D_IX_WWD__
++#define NDS32_CCTL_L1I_IX_WTAG          __NDS32_CCTL_L1I_IX_WTAG__
++#define NDS32_CCTL_L1I_IX_WWD           __NDS32_CCTL_L1I_IX_WWD__
++
++#define NDS32_DPREF_SRD                 __NDS32_DPREF_SRD__
++#define NDS32_DPREF_MRD                 __NDS32_DPREF_MRD__
++#define NDS32_DPREF_SWR                 __NDS32_DPREF_SWR__
++#define NDS32_DPREF_MWR                 __NDS32_DPREF_MWR__
++#define NDS32_DPREF_PTE                 __NDS32_DPREF_PTE__
++#define NDS32_DPREF_CLWR                __NDS32_DPREF_CLWR__
++
++/* ------------------------------------------------------------------------ */
++
++/* Define user friendly macro.  */
++#define SIGNATURE_BEGIN	__nds32__signature_begin ()
++#define SIGNATURE_END	__nds32__signature_end ()
++
++/* Map __nds32__xxx() to __builtin_xxx() functions for compatibility.  */
++#define __nds32__llw(a) \
++  (__builtin_nds32_llw ((a)))
++#define __nds32__lwup(a) \
++  (__builtin_nds32_lwup ((a)))
++#define __nds32__lbup(a) \
++  (__builtin_nds32_lbup ((a)))
++#define __nds32__scw(a, b) \
++  (__builtin_nds32_scw ((a), (b)))
++#define __nds32__swup(a, b) \
++  (__builtin_nds32_swup ((a), (b)))
++#define __nds32__sbup(a, b) \
++  (__builtin_nds32_sbup ((a), (b)))
++
++#define __nds32__mfsr(srname) \
++  (__builtin_nds32_mfsr ((srname)))
++#define __nds32__mfusr(usrname) \
++  (__builtin_nds32_mfusr ((usrname)))
++#define __nds32__mtsr(val, srname) \
++  (__builtin_nds32_mtsr ((val), (srname)))
++#define __nds32__mtsr_isb(val, srname) \
++  (__builtin_nds32_mtsr_isb ((val), (srname)))
++#define __nds32__mtsr_dsb(val, srname) \
++  (__builtin_nds32_mtsr_dsb ((val), (srname)))
++#define __nds32__mtusr(val, usrname) \
++  (__builtin_nds32_mtusr ((val), (usrname)))
++
++#define __nds32__break(swid) \
++  (__builtin_nds32_break(swid))
++#define __nds32__cctlva_lck(subtype, va) \
++  (__builtin_nds32_cctl_va_lck ((subtype), (va)))
++#define __nds32__cctlidx_wbinval(subtype, idx) \
++  (__builtin_nds32_cctl_idx_wbinval ((subtype), (idx)))
++#define __nds32__cctlva_wbinval_alvl(subtype, va) \
++  (__builtin_nds32_cctl_va_wbinval_la ((subtype), (va)))
++#define __nds32__cctlva_wbinval_one_lvl(subtype, va) \
++  (__builtin_nds32_cctl_va_wbinval_l1 ((subtype), (va)))
++#define __nds32__cctlidx_read(subtype, idx) \
++  (__builtin_nds32_cctl_idx_read ((subtype), (idx)))
++#define __nds32__cctlidx_write(subtype, b, idxw) \
++  (__builtin_nds32_cctl_idx_write ((subtype), (b), (idxw)))
++#define __nds32__cctl_l1d_invalall()  \
++  (__builtin_nds32_cctl_l1d_invalall())
++#define __nds32__cctl_l1d_wball_alvl() \
++  (__builtin_nds32_cctl_l1d_wball_alvl())
++#define __nds32__cctl_l1d_wball_one_lvl() \
++  (__builtin_nds32_cctl_l1d_wball_one_lvl())
++
++#define __nds32__dsb() \
++  (__builtin_nds32_dsb())
++#define __nds32__isb() \
++  (__builtin_nds32_isb())
++#define __nds32__msync_store() \
++  (__builtin_nds32_msync_store())
++#define __nds32__msync_all() \
++  (__builtin_nds32_msync_all())
++#define __nds32__nop() \
++  (__builtin_nds32_nop())
++
++#define __nds32__standby_wait_done() \
++  (__builtin_nds32_standby_wait_done())
++#define __nds32__standby_no_wake_grant() \
++  (__builtin_nds32_standby_no_wake_grant())
++#define __nds32__standby_wake_grant() \
++  (__builtin_nds32_standby_wake_grant())
++#define __nds32__schedule_barrier() \
++  (__builtin_nds32_schedule_barrier())
++#define __nds32__setend_big() \
++  (__builtin_nds32_setend_big())
++#define __nds32__setend_little() \
++  (__builtin_nds32_setend_little())
++#define __nds32__setgie_en() \
++  (__builtin_nds32_setgie_en())
++#define __nds32__setgie_dis() \
++  (__builtin_nds32_setgie_dis())
++
++#define __nds32__jr_itoff(a) \
++  (__builtin_nds32_jr_itoff ((a)))
++#define __nds32__jr_toff(a) \
++  (__builtin_nds32_jr_toff ((a)))
++#define __nds32__jral_iton(a) \
++  (__builtin_nds32_jral_iton ((a)))
++#define __nds32__jral_ton(a) \
++  (__builtin_nds32_jral_ton ((a)))
++#define __nds32__ret_itoff(a) \
++  (__builtin_nds32_ret_itoff ((a)))
++#define __nds32__ret_toff(a) \
++  (__builtin_nds32_ret_toff ((a)))
++#define __nds32__svs(a, b) \
++  (__builtin_nds32_svs ((a), (b)))
++#define __nds32__sva(a, b) \
++  (__builtin_nds32_sva ((a), (b)))
++#define __nds32__dpref_qw(a, b, subtype) \
++  (__builtin_nds32_dpref_qw ((a), (b), (subtype)))
++#define __nds32__dpref_hw(a, b, subtype) \
++  (__builtin_nds32_dpref_hw ((a), (b), (subtype)))
++#define __nds32__dpref_w(a, b, subtype) \
++  (__builtin_nds32_dpref_w ((a), (b), (subtype)))
++#define __nds32__dpref_dw(a, b, subtype) \
++  (__builtin_nds32_dpref_dw ((a), (b), (subtype)))
++
++#define __nds32__teqz(a, swid) \
++  (__builtin_nds32_teqz ((a), (swid)))
++#define __nds32__tnez(a, swid) \
++  ( __builtin_nds32_tnez ((a), (swid)))
++#define __nds32__trap(swid) \
++  (__builtin_nds32_trap ((swid)))
++#define __nds32__isync(a) \
++  (__builtin_nds32_isync ((a)))
++#define __nds32__rotr(val, ror) \
++  (__builtin_nds32_rotr ((val), (ror)))
++#define __nds32__wsbh(a) \
++  (__builtin_nds32_wsbh ((a)))
++#define __nds32__syscall(a) \
++  (__builtin_nds32_syscall ((a)))
++#define __nds32__return_address() \
++  (__builtin_nds32_return_address())
++#define __nds32__get_current_sp() \
++  (__builtin_nds32_get_current_sp())
++#define __nds32__set_current_sp(a) \
++  (__builtin_nds32_set_current_sp ((a)))
++#define __nds32__abs(a) \
++  (__builtin_nds32_pe_abs ((a)))
++#define __nds32__ave(a, b) \
++  (__builtin_nds32_pe_ave ((a), (b)))
++#define __nds32__bclr(a, pos) \
++  (__builtin_nds32_pe_bclr ((a), (pos)))
++#define __nds32__bset(a, pos) \
++  (__builtin_nds32_pe_bset ((a), (pos)))
++#define __nds32__btgl(a, pos) \
++  (__builtin_nds32_pe_btgl ((a), (pos)))
++#define __nds32__btst(a, pos) \
++  (__builtin_nds32_pe_btst ((a), (pos)))
++
++#define __nds32__clip(a, imm) \
++  (__builtin_nds32_pe_clip ((a), (imm)))
++#define __nds32__clips(a, imm) \
++  (__builtin_nds32_pe_clips ((a), (imm)))
++#define __nds32__clz(a) \
++  (__builtin_nds32_pe_clz ((a)))
++#define __nds32__clo(a) \
++  (__builtin_nds32_pe_clo ((a)))
++#define __nds32__bse(r, a, b) \
++  (__builtin_nds32_pe2_bse ((r), (a), (b)))
++#define __nds32__bsp(r, a, b) \
++  (__builtin_nds32_pe2_bsp ((r), (a), (b)))
++#define __nds32__pbsad(a, b) \
++  (__builtin_nds32_pe2_pbsad ((a), (b)))
++#define __nds32__pbsada(acc, a, b) \
++  (__builtin_nds32_pe2_pbsada ((acc), (a), (b)))
++
++#define __nds32__ffb(a, b) \
++  (__builtin_nds32_se_ffb ((a), (b)))
++#define __nds32__ffmism(a, b) \
++  (__builtin_nds32_se_ffmism ((a), (b)))
++#define __nds32__flmism(a, b) \
++  (__builtin_nds32_se_flmism ((a), (b)))
++#define __nds32__fcpynsd(a, b) \
++  (__builtin_nds32_fcpynsd ((a), (b)))
++#define __nds32__fcpynss(a, b) \
++  (__builtin_nds32_fcpynss ((a), (b)))
++#define __nds32__fcpysd(a, b) \
++  (__builtin_nds32_fcpysd ((a), (b)))
++#define __nds32__fcpyss(a, b) \
++  (__builtin_nds32_fcpyss ((a), (b)))
++#define __nds32__fmfcsr() \
++  (__builtin_nds32_fmfcsr())
++#define __nds32__fmtcsr(fpcsr) \
++  (__builtin_nds32_fmtcsr ((fpcsr)))
++#define __nds32__fmfcfg() \
++  (__builtin_nds32_fmfcfg())
++
++#define __nds32__tlbop_trd(a) \
++  (__builtin_nds32_tlbop_trd ((a)))
++#define __nds32__tlbop_twr(a) \
++  (__builtin_nds32_tlbop_twr ((a)))
++#define __nds32__tlbop_rwr(a) \
++  (__builtin_nds32_tlbop_rwr ((a)))
++#define __nds32__tlbop_rwlk(a) \
++  (__builtin_nds32_tlbop_rwlk ((a)))
++#define __nds32__tlbop_unlk(a) \
++  (__builtin_nds32_tlbop_unlk ((a)))
++#define __nds32__tlbop_pb(a) \
++  (__builtin_nds32_tlbop_pb ((a)))
++#define __nds32__tlbop_inv(a) \
++  (__builtin_nds32_tlbop_inv ((a)))
++#define __nds32__tlbop_flua() \
++(__builtin_nds32_tlbop_flua())
++
++#define __nds32__kaddw(a, b) \
++  (__builtin_nds32_kaddw ((a), (b)))
++#define __nds32__kaddh(a, b) \
++  (__builtin_nds32_kaddh ((a), (b)))
++#define __nds32__ksubw(a, b) \
++  (__builtin_nds32_ksubw ((a), (b)))
++#define __nds32__ksubh(a, b) \
++  (__builtin_nds32_ksubh ((a), (b)))
++#define __nds32__kdmbb(a, b) \
++  (__builtin_nds32_kdmbb ((a), (b)))
++#define __nds32__v_kdmbb(a, b) \
++  (__builtin_nds32_v_kdmbb ((a), (b)))
++#define __nds32__kdmbt(a, b) \
++  (__builtin_nds32_kdmbt ((a), (b)))
++#define __nds32__v_kdmbt(a, b) \
++  (__builtin_nds32_v_kdmbt ((a), (b)))
++#define __nds32__kdmtb(a, b) \
++  (__builtin_nds32_kdmtb ((a), (b)))
++#define __nds32__v_kdmtb(a, b) \
++  (__builtin_nds32_v_kdmtb ((a), (b)))
++#define __nds32__kdmtt(a, b) \
++  (__builtin_nds32_kdmtt ((a), (b)))
++#define __nds32__v_kdmtt(a, b) \
++  (__builtin_nds32_v_kdmtt ((a), (b)))
++#define __nds32__khmbb(a, b) \
++  (__builtin_nds32_khmbb ((a), (b)))
++#define __nds32__v_khmbb(a, b) \
++  (__builtin_nds32_v_khmbb ((a), (b)))
++#define __nds32__khmbt(a, b) \
++  (__builtin_nds32_khmbt ((a), (b)))
++#define __nds32__v_khmbt(a, b) \
++  (__builtin_nds32_v_khmbt ((a), (b)))
++#define __nds32__khmtb(a, b) \
++  (__builtin_nds32_khmtb ((a), (b)))
++#define __nds32__v_khmtb(a, b) \
++  (__builtin_nds32_v_khmtb ((a), (b)))
++#define __nds32__khmtt(a, b) \
++  (__builtin_nds32_khmtt ((a), (b)))
++#define __nds32__v_khmtt(a, b) \
++  (__builtin_nds32_v_khmtt ((a), (b)))
++#define __nds32__kslraw(a, b) \
++  (__builtin_nds32_kslraw ((a), (b)))
++#define __nds32__kslraw_u(a, b) \
++  (__builtin_nds32_kslraw_u ((a), (b)))
++
++#define __nds32__rdov() \
++  (__builtin_nds32_rdov())
++#define __nds32__clrov() \
++  (__builtin_nds32_clrov())
++#define __nds32__gie_dis() \
++  (__builtin_nds32_gie_dis())
++#define __nds32__gie_en() \
++  (__builtin_nds32_gie_en())
++#define __nds32__enable_int(a) \
++  (__builtin_nds32_enable_int ((a)))
++#define __nds32__disable_int(a) \
++  (__builtin_nds32_disable_int ((a)))
++#define __nds32__set_pending_swint() \
++  (__builtin_nds32_set_pending_swint())
++#define __nds32__clr_pending_swint() \
++  (__builtin_nds32_clr_pending_swint())
++#define __nds32__clr_pending_hwint(a) \
++  (__builtin_nds32_clr_pending_hwint(a))
++#define __nds32__get_all_pending_int() \
++  (__builtin_nds32_get_all_pending_int())
++#define __nds32__get_pending_int(a) \
++  (__builtin_nds32_get_pending_int ((a)))
++#define __nds32__set_int_priority(a, b) \
++  (__builtin_nds32_set_int_priority ((a), (b)))
++#define __nds32__get_int_priority(a) \
++  (__builtin_nds32_get_int_priority ((a)))
++#define __nds32__set_trig_type_level(a) \
++  (__builtin_nds32_set_trig_level(a))
++#define __nds32__set_trig_type_edge(a) \
++  (__builtin_nds32_set_trig_edge(a))
++#define __nds32__get_trig_type(a) \
++  (__builtin_nds32_get_trig_type ((a)))
++
++#define __nds32__get_unaligned_hw(a) \
++  (__builtin_nds32_unaligned_load_hw ((a)))
++#define __nds32__get_unaligned_w(a) \
++  (__builtin_nds32_unaligned_load_w ((a)))
++#define __nds32__get_unaligned_dw(a) \
++  (__builtin_nds32_unaligned_load_dw ((a)))
++#define __nds32__put_unaligned_hw(a, data) \
++  (__builtin_nds32_unaligned_store_hw ((a), (data)))
++#define __nds32__put_unaligned_w(a, data) \
++  (__builtin_nds32_unaligned_store_w ((a), (data)))
++#define __nds32__put_unaligned_dw(a, data) \
++  (__builtin_nds32_unaligned_store_dw ((a), (data)))
++
++#define __nds32__signature_begin() \
++  (__builtin_nds32_signature_begin ())
++#define __nds32__signature_end() \
++  (__builtin_nds32_signature_end ())
++
++#define __nds32__add16(a, b) \
++  (__builtin_nds32_add16 ((a), (b)))
++#define __nds32__v_uadd16(a, b) \
++  (__builtin_nds32_v_uadd16 ((a), (b)))
++#define __nds32__v_sadd16(a, b) \
++  (__builtin_nds32_v_sadd16 ((a), (b)))
++#define __nds32__radd16(a, b) \
++  (__builtin_nds32_radd16 ((a), (b)))
++#define __nds32__v_radd16(a, b) \
++  (__builtin_nds32_v_radd16 ((a), (b)))
++#define __nds32__uradd16(a, b) \
++  (__builtin_nds32_uradd16 ((a), (b)))
++#define __nds32__v_uradd16(a, b) \
++  (__builtin_nds32_v_uradd16 ((a), (b)))
++#define __nds32__kadd16(a, b) \
++  (__builtin_nds32_kadd16 ((a), (b)))
++#define __nds32__v_kadd16(a, b) \
++  (__builtin_nds32_v_kadd16 ((a), (b)))
++#define __nds32__ukadd16(a, b) \
++  (__builtin_nds32_ukadd16 ((a), (b)))
++#define __nds32__v_ukadd16(a, b) \
++  (__builtin_nds32_v_ukadd16 ((a), (b)))
++#define __nds32__sub16(a, b) \
++  (__builtin_nds32_sub16 ((a), (b)))
++#define __nds32__v_usub16(a, b) \
++  (__builtin_nds32_v_usub16 ((a), (b)))
++#define __nds32__v_ssub16(a, b) \
++  (__builtin_nds32_v_ssub16 ((a), (b)))
++#define __nds32__rsub16(a, b) \
++  (__builtin_nds32_rsub16 ((a), (b)))
++#define __nds32__v_rsub16(a, b) \
++  (__builtin_nds32_v_rsub16 ((a), (b)))
++#define __nds32__ursub16(a, b) \
++  (__builtin_nds32_ursub16 ((a), (b)))
++#define __nds32__v_ursub16(a, b) \
++  (__builtin_nds32_v_ursub16 ((a), (b)))
++#define __nds32__ksub16(a, b) \
++  (__builtin_nds32_ksub16 ((a), (b)))
++#define __nds32__v_ksub16(a, b) \
++  (__builtin_nds32_v_ksub16 ((a), (b)))
++#define __nds32__uksub16(a, b) \
++  (__builtin_nds32_uksub16 ((a), (b)))
++#define __nds32__v_uksub16(a, b) \
++  (__builtin_nds32_v_uksub16 ((a), (b)))
++#define __nds32__cras16(a, b) \
++  (__builtin_nds32_cras16 ((a), (b)))
++#define __nds32__v_ucras16(a, b) \
++  (__builtin_nds32_v_ucras16 ((a), (b)))
++#define __nds32__v_scras16(a, b) \
++  (__builtin_nds32_v_scras16 ((a), (b)))
++#define __nds32__rcras16(a, b) \
++  (__builtin_nds32_rcras16 ((a), (b)))
++#define __nds32__v_rcras16(a, b) \
++  (__builtin_nds32_v_rcras16 ((a), (b)))
++#define __nds32__urcras16(a, b) \
++  (__builtin_nds32_urcras16 ((a), (b)))
++#define __nds32__v_urcras16(a, b) \
++  (__builtin_nds32_v_urcras16 ((a), (b)))
++#define __nds32__kcras16(a, b) \
++  (__builtin_nds32_kcras16 ((a), (b)))
++#define __nds32__v_kcras16(a, b) \
++  (__builtin_nds32_v_kcras16 ((a), (b)))
++#define __nds32__ukcras16(a, b) \
++  (__builtin_nds32_ukcras16 ((a), (b)))
++#define __nds32__v_ukcras16(a, b) \
++  (__builtin_nds32_v_ukcras16 ((a), (b)))
++#define __nds32__crsa16(a, b) \
++  (__builtin_nds32_crsa16 ((a), (b)))
++#define __nds32__v_ucrsa16(a, b) \
++  (__builtin_nds32_v_ucrsa16 ((a), (b)))
++#define __nds32__v_scrsa16(a, b) \
++  (__builtin_nds32_v_scrsa16 ((a), (b)))
++#define __nds32__rcrsa16(a, b) \
++  (__builtin_nds32_rcrsa16 ((a), (b)))
++#define __nds32__v_rcrsa16(a, b) \
++  (__builtin_nds32_v_rcrsa16 ((a), (b)))
++#define __nds32__urcrsa16(a, b) \
++  (__builtin_nds32_urcrsa16 ((a), (b)))
++#define __nds32__v_urcrsa16(a, b) \
++  (__builtin_nds32_v_urcrsa16 ((a), (b)))
++#define __nds32__kcrsa16(a, b) \
++  (__builtin_nds32_kcrsa16 ((a), (b)))
++#define __nds32__v_kcrsa16(a, b) \
++  (__builtin_nds32_v_kcrsa16 ((a), (b)))
++#define __nds32__ukcrsa16(a, b) \
++  (__builtin_nds32_ukcrsa16 ((a), (b)))
++#define __nds32__v_ukcrsa16(a, b) \
++  (__builtin_nds32_v_ukcrsa16 ((a), (b)))
++
++#define __nds32__add8(a, b) \
++  (__builtin_nds32_add8 ((a), (b)))
++#define __nds32__v_uadd8(a, b) \
++  (__builtin_nds32_v_uadd8 ((a), (b)))
++#define __nds32__v_sadd8(a, b) \
++  (__builtin_nds32_v_sadd8 ((a), (b)))
++#define __nds32__radd8(a, b) \
++  (__builtin_nds32_radd8 ((a), (b)))
++#define __nds32__v_radd8(a, b) \
++  (__builtin_nds32_v_radd8 ((a), (b)))
++#define __nds32__uradd8(a, b) \
++  (__builtin_nds32_uradd8 ((a), (b)))
++#define __nds32__v_uradd8(a, b) \
++  (__builtin_nds32_v_uradd8 ((a), (b)))
++#define __nds32__kadd8(a, b) \
++  (__builtin_nds32_kadd8 ((a), (b)))
++#define __nds32__v_kadd8(a, b) \
++  (__builtin_nds32_v_kadd8 ((a), (b)))
++#define __nds32__ukadd8(a, b) \
++  (__builtin_nds32_ukadd8 ((a), (b)))
++#define __nds32__v_ukadd8(a, b) \
++  (__builtin_nds32_v_ukadd8 ((a), (b)))
++#define __nds32__sub8(a, b) \
++  (__builtin_nds32_sub8 ((a), (b)))
++#define __nds32__v_usub8(a, b) \
++  (__builtin_nds32_v_usub8 ((a), (b)))
++#define __nds32__v_ssub8(a, b) \
++  (__builtin_nds32_v_ssub8 ((a), (b)))
++#define __nds32__rsub8(a, b) \
++  (__builtin_nds32_rsub8 ((a), (b)))
++#define __nds32__v_rsub8(a, b) \
++  (__builtin_nds32_v_rsub8 ((a), (b)))
++#define __nds32__ursub8(a, b) \
++  (__builtin_nds32_ursub8 ((a), (b)))
++#define __nds32__v_ursub8(a, b) \
++  (__builtin_nds32_v_ursub8 ((a), (b)))
++#define __nds32__ksub8(a, b) \
++  (__builtin_nds32_ksub8 ((a), (b)))
++#define __nds32__v_ksub8(a, b) \
++  (__builtin_nds32_v_ksub8 ((a), (b)))
++#define __nds32__uksub8(a, b) \
++  (__builtin_nds32_uksub8 ((a), (b)))
++#define __nds32__v_uksub8(a, b) \
++  (__builtin_nds32_v_uksub8 ((a), (b)))
++
++#define __nds32__sra16(a, b) \
++  (__builtin_nds32_sra16 ((a), (b)))
++#define __nds32__v_sra16(a, b) \
++  (__builtin_nds32_v_sra16 ((a), (b)))
++#define __nds32__sra16_u(a, b) \
++  (__builtin_nds32_sra16_u ((a), (b)))
++#define __nds32__v_sra16_u(a, b) \
++  (__builtin_nds32_v_sra16_u ((a), (b)))
++#define __nds32__srl16(a, b) \
++  (__builtin_nds32_srl16 ((a), (b)))
++#define __nds32__v_srl16(a, b) \
++  (__builtin_nds32_v_srl16 ((a), (b)))
++#define __nds32__srl16_u(a, b) \
++  (__builtin_nds32_srl16_u ((a), (b)))
++#define __nds32__v_srl16_u(a, b) \
++  (__builtin_nds32_v_srl16_u ((a), (b)))
++#define __nds32__sll16(a, b) \
++  (__builtin_nds32_sll16 ((a), (b)))
++#define __nds32__v_sll16(a, b) \
++  (__builtin_nds32_v_sll16 ((a), (b)))
++#define __nds32__ksll16(a, b) \
++  (__builtin_nds32_ksll16 ((a), (b)))
++#define __nds32__v_ksll16(a, b) \
++  (__builtin_nds32_v_ksll16 ((a), (b)))
++#define __nds32__kslra16(a, b) \
++  (__builtin_nds32_kslra16 ((a), (b)))
++#define __nds32__v_kslra16(a, b) \
++  (__builtin_nds32_v_kslra16 ((a), (b)))
++#define __nds32__kslra16_u(a, b) \
++  (__builtin_nds32_kslra16_u ((a), (b)))
++#define __nds32__v_kslra16_u(a, b) \
++  (__builtin_nds32_v_kslra16_u ((a), (b)))
++
++#define __nds32__cmpeq16(a, b) \
++  (__builtin_nds32_cmpeq16 ((a), (b)))
++#define __nds32__v_scmpeq16(a, b) \
++  (__builtin_nds32_v_scmpeq16 ((a), (b)))
++#define __nds32__v_ucmpeq16(a, b) \
++  (__builtin_nds32_v_ucmpeq16 ((a), (b)))
++#define __nds32__scmplt16(a, b) \
++  (__builtin_nds32_scmplt16 ((a), (b)))
++#define __nds32__v_scmplt16(a, b) \
++  (__builtin_nds32_v_scmplt16 ((a), (b)))
++#define __nds32__scmple16(a, b) \
++  (__builtin_nds32_scmple16 ((a), (b)))
++#define __nds32__v_scmple16(a, b) \
++  (__builtin_nds32_v_scmple16 ((a), (b)))
++#define __nds32__ucmplt16(a, b) \
++  (__builtin_nds32_ucmplt16 ((a), (b)))
++#define __nds32__v_ucmplt16(a, b) \
++  (__builtin_nds32_v_ucmplt16 ((a), (b)))
++#define __nds32__ucmple16(a, b) \
++  (__builtin_nds32_ucmple16 ((a), (b)))
++#define __nds32__v_ucmple16(a, b) \
++  (__builtin_nds32_v_ucmple16 ((a), (b)))
++
++#define __nds32__cmpeq8(a, b) \
++  (__builtin_nds32_cmpeq8 ((a), (b)))
++#define __nds32__v_scmpeq8(a, b) \
++  (__builtin_nds32_v_scmpeq8 ((a), (b)))
++#define __nds32__v_ucmpeq8(a, b) \
++  (__builtin_nds32_v_ucmpeq8 ((a), (b)))
++#define __nds32__scmplt8(a, b) \
++  (__builtin_nds32_scmplt8 ((a), (b)))
++#define __nds32__v_scmplt8(a, b) \
++  (__builtin_nds32_v_scmplt8 ((a), (b)))
++#define __nds32__scmple8(a, b) \
++  (__builtin_nds32_scmple8 ((a), (b)))
++#define __nds32__v_scmple8(a, b) \
++  (__builtin_nds32_v_scmple8 ((a), (b)))
++#define __nds32__ucmplt8(a, b) \
++  (__builtin_nds32_ucmplt8 ((a), (b)))
++#define __nds32__v_ucmplt8(a, b) \
++  (__builtin_nds32_v_ucmplt8 ((a), (b)))
++#define __nds32__ucmple8(a, b) \
++  (__builtin_nds32_ucmple8 ((a), (b)))
++#define __nds32__v_ucmple8(a, b) \
++  (__builtin_nds32_v_ucmple8 ((a), (b)))
++
++#define __nds32__smin16(a, b) \
++  (__builtin_nds32_smin16 ((a), (b)))
++#define __nds32__v_smin16(a, b) \
++  (__builtin_nds32_v_smin16 ((a), (b)))
++#define __nds32__umin16(a, b) \
++  (__builtin_nds32_umin16 ((a), (b)))
++#define __nds32__v_umin16(a, b) \
++  (__builtin_nds32_v_umin16 ((a), (b)))
++#define __nds32__smax16(a, b) \
++  (__builtin_nds32_smax16 ((a), (b)))
++#define __nds32__v_smax16(a, b) \
++  (__builtin_nds32_v_smax16 ((a), (b)))
++#define __nds32__umax16(a, b) \
++  (__builtin_nds32_umax16 ((a), (b)))
++#define __nds32__v_umax16(a, b) \
++  (__builtin_nds32_v_umax16 ((a), (b)))
++#define __nds32__sclip16(a, b) \
++  (__builtin_nds32_sclip16 ((a), (b)))
++#define __nds32__v_sclip16(a, b) \
++  (__builtin_nds32_v_sclip16 ((a), (b)))
++#define __nds32__uclip16(a, b) \
++  (__builtin_nds32_uclip16 ((a), (b)))
++#define __nds32__v_uclip16(a, b) \
++  (__builtin_nds32_v_uclip16 ((a), (b)))
++#define __nds32__khm16(a, b) \
++  (__builtin_nds32_khm16 ((a), (b)))
++#define __nds32__v_khm16(a, b) \
++  (__builtin_nds32_v_khm16 ((a), (b)))
++#define __nds32__khmx16(a, b) \
++  (__builtin_nds32_khmx16 ((a), (b)))
++#define __nds32__v_khmx16(a, b) \
++  (__builtin_nds32_v_khmx16 ((a), (b)))
++#define __nds32__kabs16(a) \
++  (__builtin_nds32_kabs16 ((a)))
++#define __nds32__v_kabs16(a) \
++  (__builtin_nds32_v_kabs16 ((a)))
++
++#define __nds32__smin8(a, b) \
++  (__builtin_nds32_smin8 ((a), (b)))
++#define __nds32__v_smin8(a, b) \
++  (__builtin_nds32_v_smin8 ((a), (b)))
++#define __nds32__umin8(a, b) \
++  (__builtin_nds32_umin8 ((a), (b)))
++#define __nds32__v_umin8(a, b) \
++  (__builtin_nds32_v_umin8 ((a), (b)))
++#define __nds32__smax8(a, b) \
++  (__builtin_nds32_smax8 ((a), (b)))
++#define __nds32__v_smax8(a, b) \
++  (__builtin_nds32_v_smax8 ((a), (b)))
++#define __nds32__umax8(a, b) \
++  (__builtin_nds32_umax8 ((a), (b)))
++#define __nds32__v_umax8(a, b) \
++  (__builtin_nds32_v_umax8 ((a), (b)))
++#define __nds32__kabs8(a) \
++  (__builtin_nds32_kabs8 ((a)))
++#define __nds32__v_kabs8(a) \
++  (__builtin_nds32_v_kabs8 ((a)))
++
++#define __nds32__sunpkd810(a) \
++  (__builtin_nds32_sunpkd810 ((a)))
++#define __nds32__v_sunpkd810(a) \
++  (__builtin_nds32_v_sunpkd810 ((a)))
++#define __nds32__sunpkd820(a) \
++  (__builtin_nds32_sunpkd820 ((a)))
++#define __nds32__v_sunpkd820(a) \
++  (__builtin_nds32_v_sunpkd820 ((a)))
++#define __nds32__sunpkd830(a) \
++  (__builtin_nds32_sunpkd830 ((a)))
++#define __nds32__v_sunpkd830(a) \
++  (__builtin_nds32_v_sunpkd830 ((a)))
++#define __nds32__sunpkd831(a) \
++  (__builtin_nds32_sunpkd831 ((a)))
++#define __nds32__v_sunpkd831(a) \
++  (__builtin_nds32_v_sunpkd831 ((a)))
++#define __nds32__zunpkd810(a) \
++  (__builtin_nds32_zunpkd810 ((a)))
++#define __nds32__v_zunpkd810(a) \
++  (__builtin_nds32_v_zunpkd810 ((a)))
++#define __nds32__zunpkd820(a) \
++  (__builtin_nds32_zunpkd820 ((a)))
++#define __nds32__v_zunpkd820(a) \
++  (__builtin_nds32_v_zunpkd820 ((a)))
++#define __nds32__zunpkd830(a) \
++  (__builtin_nds32_zunpkd830 ((a)))
++#define __nds32__v_zunpkd830(a) \
++  (__builtin_nds32_v_zunpkd830 ((a)))
++#define __nds32__zunpkd831(a) \
++  (__builtin_nds32_zunpkd831 ((a)))
++#define __nds32__v_zunpkd831(a) \
++  (__builtin_nds32_v_zunpkd831 ((a)))
++
++#define __nds32__raddw(a, b) \
++  (__builtin_nds32_raddw ((a), (b)))
++#define __nds32__uraddw(a, b) \
++  (__builtin_nds32_uraddw ((a), (b)))
++#define __nds32__rsubw(a, b) \
++  (__builtin_nds32_rsubw ((a), (b)))
++#define __nds32__ursubw(a, b) \
++  (__builtin_nds32_ursubw ((a), (b)))
++
++#define __nds32__sra_u(a, b) \
++  (__builtin_nds32_sra_u ((a), (b)))
++#define __nds32__ksll(a, b) \
++  (__builtin_nds32_ksll ((a), (b)))
++#define __nds32__pkbb16(a, b) \
++  (__builtin_nds32_pkbb16 ((a), (b)))
++#define __nds32__v_pkbb16(a, b) \
++  (__builtin_nds32_v_pkbb16 ((a), (b)))
++#define __nds32__pkbt16(a, b) \
++  (__builtin_nds32_pkbt16 ((a), (b)))
++#define __nds32__v_pkbt16(a, b) \
++  (__builtin_nds32_v_pkbt16 ((a), (b)))
++#define __nds32__pktb16(a, b) \
++  (__builtin_nds32_pktb16 ((a), (b)))
++#define __nds32__v_pktb16(a, b) \
++  (__builtin_nds32_v_pktb16 ((a), (b)))
++#define __nds32__pktt16(a, b) \
++  (__builtin_nds32_pktt16 ((a), (b)))
++#define __nds32__v_pktt16(a, b) \
++  (__builtin_nds32_v_pktt16 ((a), (b)))
++
++#define __nds32__smmul(a, b) \
++  (__builtin_nds32_smmul ((a), (b)))
++#define __nds32__smmul_u(a, b) \
++  (__builtin_nds32_smmul_u ((a), (b)))
++#define __nds32__kmmac(r, a, b) \
++  (__builtin_nds32_kmmac ((r), (a), (b)))
++#define __nds32__kmmac_u(r, a, b) \
++  (__builtin_nds32_kmmac_u ((r), (a), (b)))
++#define __nds32__kmmsb(r, a, b) \
++  (__builtin_nds32_kmmsb ((r), (a), (b)))
++#define __nds32__kmmsb_u(r, a, b) \
++  (__builtin_nds32_kmmsb_u ((r), (a), (b)))
++#define __nds32__kwmmul(a, b) \
++  (__builtin_nds32_kwmmul ((a), (b)))
++#define __nds32__kwmmul_u(a, b) \
++  (__builtin_nds32_kwmmul_u ((a), (b)))
++
++#define __nds32__smmwb(a, b) \
++  (__builtin_nds32_smmwb ((a), (b)))
++#define __nds32__v_smmwb(a, b) \
++  (__builtin_nds32_v_smmwb ((a), (b)))
++#define __nds32__smmwb_u(a, b) \
++  (__builtin_nds32_smmwb_u ((a), (b)))
++#define __nds32__v_smmwb_u(a, b) \
++  (__builtin_nds32_v_smmwb_u ((a), (b)))
++#define __nds32__smmwt(a, b) \
++  (__builtin_nds32_smmwt ((a), (b)))
++#define __nds32__v_smmwt(a, b) \
++  (__builtin_nds32_v_smmwt ((a), (b)))
++#define __nds32__smmwt_u(a, b) \
++  (__builtin_nds32_smmwt_u ((a), (b)))
++#define __nds32__v_smmwt_u(a, b) \
++  (__builtin_nds32_v_smmwt_u ((a), (b)))
++#define __nds32__kmmawb(r, a, b) \
++  (__builtin_nds32_kmmawb ((r), (a), (b)))
++#define __nds32__v_kmmawb(r, a, b) \
++  (__builtin_nds32_v_kmmawb ((r), (a), (b)))
++#define __nds32__kmmawb_u(r, a, b) \
++  (__builtin_nds32_kmmawb_u ((r), (a), (b)))
++#define __nds32__v_kmmawb_u(r, a, b) \
++  (__builtin_nds32_v_kmmawb_u ((r), (a), (b)))
++#define __nds32__kmmawt(r, a, b) \
++  (__builtin_nds32_kmmawt ((r), (a), (b)))
++#define __nds32__v_kmmawt(r, a, b) \
++  (__builtin_nds32_v_kmmawt ((r), (a), (b)))
++#define __nds32__kmmawt_u(r, a, b) \
++  (__builtin_nds32_kmmawt_u ((r), (a), (b)))
++#define __nds32__v_kmmawt_u(r, a, b) \
++  (__builtin_nds32_v_kmmawt_u ((r), (a), (b)))
++
++#define __nds32__smbb(a, b) \
++  (__builtin_nds32_smbb ((a), (b)))
++#define __nds32__v_smbb(a, b) \
++  (__builtin_nds32_v_smbb ((a), (b)))
++#define __nds32__smbt(a, b) \
++  (__builtin_nds32_smbt ((a), (b)))
++#define __nds32__v_smbt(a, b) \
++  (__builtin_nds32_v_smbt ((a), (b)))
++#define __nds32__smtt(a, b) \
++  (__builtin_nds32_smtt ((a), (b)))
++#define __nds32__v_smtt(a, b) \
++  (__builtin_nds32_v_smtt ((a), (b)))
++#define __nds32__kmda(a, b) \
++  (__builtin_nds32_kmda ((a), (b)))
++#define __nds32__v_kmda(a, b) \
++  (__builtin_nds32_v_kmda ((a), (b)))
++#define __nds32__kmxda(a, b) \
++  (__builtin_nds32_kmxda ((a), (b)))
++#define __nds32__v_kmxda(a, b) \
++  (__builtin_nds32_v_kmxda ((a), (b)))
++#define __nds32__smds(a, b) \
++  (__builtin_nds32_smds ((a), (b)))
++#define __nds32__v_smds(a, b) \
++  (__builtin_nds32_v_smds ((a), (b)))
++#define __nds32__smdrs(a, b) \
++  (__builtin_nds32_smdrs ((a), (b)))
++#define __nds32__v_smdrs(a, b) \
++  (__builtin_nds32_v_smdrs ((a), (b)))
++#define __nds32__smxds(a, b) \
++  (__builtin_nds32_smxds ((a), (b)))
++#define __nds32__v_smxds(a, b) \
++  (__builtin_nds32_v_smxds ((a), (b)))
++#define __nds32__kmabb(r, a, b) \
++  (__builtin_nds32_kmabb ((r), (a), (b)))
++#define __nds32__v_kmabb(r, a, b) \
++  (__builtin_nds32_v_kmabb ((r), (a), (b)))
++#define __nds32__kmabt(r, a, b) \
++  (__builtin_nds32_kmabt ((r), (a), (b)))
++#define __nds32__v_kmabt(r, a, b) \
++  (__builtin_nds32_v_kmabt ((r), (a), (b)))
++#define __nds32__kmatt(r, a, b) \
++  (__builtin_nds32_kmatt ((r), (a), (b)))
++#define __nds32__v_kmatt(r, a, b) \
++  (__builtin_nds32_v_kmatt ((r), (a), (b)))
++#define __nds32__kmada(r, a, b) \
++  (__builtin_nds32_kmada ((r), (a), (b)))
++#define __nds32__v_kmada(r, a, b) \
++  (__builtin_nds32_v_kmada ((r), (a), (b)))
++#define __nds32__kmaxda(r, a, b) \
++  (__builtin_nds32_kmaxda ((r), (a), (b)))
++#define __nds32__v_kmaxda(r, a, b) \
++  (__builtin_nds32_v_kmaxda ((r), (a), (b)))
++#define __nds32__kmads(r, a, b) \
++  (__builtin_nds32_kmads ((r), (a), (b)))
++#define __nds32__v_kmads(r, a, b) \
++  (__builtin_nds32_v_kmads ((r), (a), (b)))
++#define __nds32__kmadrs(r, a, b) \
++  (__builtin_nds32_kmadrs ((r), (a), (b)))
++#define __nds32__v_kmadrs(r, a, b) \
++  (__builtin_nds32_v_kmadrs ((r), (a), (b)))
++#define __nds32__kmaxds(r, a, b) \
++  (__builtin_nds32_kmaxds ((r), (a), (b)))
++#define __nds32__v_kmaxds(r, a, b) \
++  (__builtin_nds32_v_kmaxds ((r), (a), (b)))
++#define __nds32__kmsda(r, a, b) \
++  (__builtin_nds32_kmsda ((r), (a), (b)))
++#define __nds32__v_kmsda(r, a, b) \
++  (__builtin_nds32_v_kmsda ((r), (a), (b)))
++#define __nds32__kmsxda(r, a, b) \
++  (__builtin_nds32_kmsxda ((r), (a), (b)))
++#define __nds32__v_kmsxda(r, a, b) \
++  (__builtin_nds32_v_kmsxda ((r), (a), (b)))
++
++#define __nds32__smal(a, b) \
++  (__builtin_nds32_smal ((a), (b)))
++#define __nds32__v_smal(a, b) \
++  (__builtin_nds32_v_smal ((a), (b)))
++
++#define __nds32__bitrev(a, b) \
++  (__builtin_nds32_bitrev ((a), (b)))
++#define __nds32__wext(a, b) \
++  (__builtin_nds32_wext ((a), (b)))
++#define __nds32__bpick(r, a, b) \
++  (__builtin_nds32_bpick ((r), (a), (b)))
++#define __nds32__insb(r, a, b) \
++  (__builtin_nds32_insb ((r), (a), (b)))
++
++#define __nds32__sadd64(a, b) \
++  (__builtin_nds32_sadd64 ((a), (b)))
++#define __nds32__uadd64(a, b) \
++  (__builtin_nds32_uadd64 ((a), (b)))
++#define __nds32__radd64(a, b) \
++  (__builtin_nds32_radd64 ((a), (b)))
++#define __nds32__uradd64(a, b) \
++  (__builtin_nds32_uradd64 ((a), (b)))
++#define __nds32__kadd64(a, b) \
++  (__builtin_nds32_kadd64 ((a), (b)))
++#define __nds32__ukadd64(a, b) \
++  (__builtin_nds32_ukadd64 ((a), (b)))
++#define __nds32__ssub64(a, b) \
++  (__builtin_nds32_ssub64 ((a), (b)))
++#define __nds32__usub64(a, b) \
++  (__builtin_nds32_usub64 ((a), (b)))
++#define __nds32__rsub64(a, b) \
++  (__builtin_nds32_rsub64 ((a), (b)))
++#define __nds32__ursub64(a, b) \
++  (__builtin_nds32_ursub64 ((a), (b)))
++#define __nds32__ksub64(a, b) \
++  (__builtin_nds32_ksub64 ((a), (b)))
++#define __nds32__uksub64(a, b) \
++  (__builtin_nds32_uksub64 ((a), (b)))
++
++#define __nds32__smar64(r, a, b) \
++  (__builtin_nds32_smar64 ((r), (a), (b)))
++#define __nds32__smsr64(r, a, b) \
++  (__builtin_nds32_smsr64 ((r), (a), (b)))
++#define __nds32__umar64(r, a, b) \
++  (__builtin_nds32_umar64 ((r), (a), (b)))
++#define __nds32__umsr64(r, a, b) \
++  (__builtin_nds32_umsr64 ((r), (a), (b)))
++#define __nds32__kmar64(r, a, b) \
++  (__builtin_nds32_kmar64 ((r), (a), (b)))
++#define __nds32__kmsr64(r, a, b) \
++  (__builtin_nds32_kmsr64 ((r), (a), (b)))
++#define __nds32__ukmar64(r, a, b) \
++  (__builtin_nds32_ukmar64 ((r), (a), (b)))
++#define __nds32__ukmsr64(r, a, b) \
++  (__builtin_nds32_ukmsr64 ((r), (a), (b)))
++
++#define __nds32__smalbb(r, a, b) \
++  (__builtin_nds32_smalbb ((r), (a), (b)))
++#define __nds32__v_smalbb(r, a, b) \
++  (__builtin_nds32_v_smalbb ((r), (a), (b)))
++#define __nds32__smalbt(r, a, b) \
++  (__builtin_nds32_smalbt ((r), (a), (b)))
++#define __nds32__v_smalbt(r, a, b) \
++  (__builtin_nds32_v_smalbt ((r), (a), (b)))
++#define __nds32__smaltt(r, a, b) \
++  (__builtin_nds32_smaltt ((r), (a), (b)))
++#define __nds32__v_smaltt(r, a, b) \
++  (__builtin_nds32_v_smaltt ((r), (a), (b)))
++#define __nds32__smalda(r, a, b) \
++  (__builtin_nds32_smalda ((r), (a), (b)))
++#define __nds32__v_smalda(r, a, b) \
++  (__builtin_nds32_v_smalda ((r), (a), (b)))
++#define __nds32__smalxda(r, a, b) \
++  (__builtin_nds32_smalxda ((r), (a), (b)))
++#define __nds32__v_smalxda(r, a, b) \
++  (__builtin_nds32_v_smalxda ((r), (a), (b)))
++#define __nds32__smalds(r, a, b) \
++  (__builtin_nds32_smalds ((r), (a), (b)))
++#define __nds32__v_smalds(r, a, b) \
++  (__builtin_nds32_v_smalds ((r), (a), (b)))
++#define __nds32__smaldrs(r, a, b) \
++  (__builtin_nds32_smaldrs ((r), (a), (b)))
++#define __nds32__v_smaldrs(r, a, b) \
++  (__builtin_nds32_v_smaldrs ((r), (a), (b)))
++#define __nds32__smalxds(r, a, b) \
++  (__builtin_nds32_smalxds ((r), (a), (b)))
++#define __nds32__v_smalxds(r, a, b) \
++  (__builtin_nds32_v_smalxds ((r), (a), (b)))
++#define __nds32__smslda(r, a, b) \
++  (__builtin_nds32_smslda ((r), (a), (b)))
++#define __nds32__v_smslda(r, a, b) \
++  (__builtin_nds32_v_smslda ((r), (a), (b)))
++#define __nds32__smslxda(r, a, b) \
++  (__builtin_nds32_smslxda ((r), (a), (b)))
++#define __nds32__v_smslxda(r, a, b) \
++  (__builtin_nds32_v_smslxda ((r), (a), (b)))
++
++#define __nds32__smul16(a, b) \
++  (__builtin_nds32_smul16 ((a), (b)))
++#define __nds32__v_smul16(a, b) \
++  (__builtin_nds32_v_smul16 ((a), (b)))
++#define __nds32__smulx16(a, b) \
++  (__builtin_nds32_smulx16 ((a), (b)))
++#define __nds32__v_smulx16(a, b) \
++  (__builtin_nds32_v_smulx16 ((a), (b)))
++#define __nds32__umul16(a, b) \
++  (__builtin_nds32_umul16 ((a), (b)))
++#define __nds32__v_umul16(a, b) \
++  (__builtin_nds32_v_umul16 ((a), (b)))
++#define __nds32__umulx16(a, b) \
++  (__builtin_nds32_umulx16 ((a), (b)))
++#define __nds32__v_umulx16(a, b) \
++  (__builtin_nds32_v_umulx16 ((a), (b)))
++
++#define __nds32__uclip32(a, imm) \
++  (__builtin_nds32_uclip32 ((a), (imm)))
++#define __nds32__sclip32(a, imm) \
++  (__builtin_nds32_sclip32 ((a), (imm)))
++#define __nds32__kabs(a) \
++  (__builtin_nds32_kabs ((a)))
++
++#define __nds32__no_ext_zol() \
++  (__builtin_nds32_no_ext_zol())
++
++#define __nds32__unaligned_feature() \
++  (__builtin_nds32_unaligned_feature())
++#define __nds32__enable_unaligned() \
++  (__builtin_nds32_enable_unaligned())
++#define __nds32__disable_unaligned() \
++  (__builtin_nds32_disable_unaligned())
++
++#define __nds32__get_unaligned_u16x2(a) \
++  (__builtin_nds32_get_unaligned_u16x2 ((a)))
++#define __nds32__get_unaligned_s16x2(a) \
++  (__builtin_nds32_get_unaligned_s16x2 ((a)))
++#define __nds32__get_unaligned_u8x4(a) \
++  (__builtin_nds32_get_unaligned_u8x4 ((a)))
++#define __nds32__get_unaligned_s8x4(a) \
++  (__builtin_nds32_get_unaligned_s8x4 ((a)))
++
++#define __nds32__put_unaligned_u16x2(a, data) \
++  (__builtin_nds32_put_unaligned_u16x2 ((a), (data)))
++#define __nds32__put_unaligned_s16x2(a, data) \
++  (__builtin_nds32_put_unaligned_s16x2 ((a), (data)))
++#define __nds32__put_unaligned_u8x4(a, data) \
++  (__builtin_nds32_put_unaligned_u8x4 ((a), (data)))
++#define __nds32__put_unaligned_s8x4(a, data) \
++  (__builtin_nds32_put_unaligned_s8x4 ((a), (data)))
++
++#define NDS32ATTR_SIGNATURE              __attribute__((signature))
++
+ #endif /* nds32_intrinsic.h */
+diff --git a/gcc/config/nds32/nds32_isr.h b/gcc/config/nds32/nds32_isr.h
+new file mode 100644
+index 0000000..6fabd3e
+--- /dev/null
++++ b/gcc/config/nds32/nds32_isr.h
+@@ -0,0 +1,526 @@
++/* Intrinsic definitions of Andes NDS32 cpu for GNU compiler
++   Copyright (C) 2012-2016 Free Software Foundation, Inc.
++   Contributed by Andes Technology Corporation.
++
++   This file is part of GCC.
++
++   GCC is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published
++   by the Free Software Foundation; either version 3, or (at your
++   option) any later version.
++
++   GCC is distributed in the hope that it will be useful, but WITHOUT
++   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++   License for more details.
++
++   Under Section 7 of GPL version 3, you are granted additional
++   permissions described in the GCC Runtime Library Exception, version
++   3.1, as published by the Free Software Foundation.
++
++   You should have received a copy of the GNU General Public License and
++   a copy of the GCC Runtime Library Exception along with this program;
++   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++   <http://www.gnu.org/licenses/>.  */
++
++#ifndef _NDS32_ISR_H
++#define _NDS32_ISR_H
++
++/* Attribute of a interrupt or exception handler:
++
++   NDS32_READY_NESTED: This handler is interruptible if user re-enable GIE bit.
++   NDS32_NESTED      : This handler is interruptible.  This is not suitable
++                       exception handler.
++   NDS32_NOT_NESTED  : This handler is NOT interruptible.  Users have to do
++                       some work if nested is wanted
++   NDS32_CRITICAL    : This handler is critical ISR, which means it is small
++                       and efficient.  */
++#define NDS32_READY_NESTED   0
++#define NDS32_NESTED         1
++#define NDS32_NOT_NESTED     2
++#define NDS32_CRITICAL       3
++
++/* Attribute of a interrupt or exception handler:
++
++   NDS32_SAVE_ALL_REGS    : Save all registers in a table.
++   NDS32_SAVE_PARTIAL_REGS: Save partial registers.  */
++#define NDS32_SAVE_CALLER_REGS   0
++#define NDS32_SAVE_ALL_REGS      1
++
++/* There are two version of Register table for interrupt and exception handler,
++   one for 16-register CPU the other for 32-register CPU.  These structures are
++   used for context switching or system call handling.  The address of this
++   data can be get from the input argument of the handler functions.
++
++   For system call handling, r0 to r5 are used to pass arguments.  If more
++   arguments are used they are put into the stack and its starting address is
++   in sp.  Return value of system call can be put into r0 and r1 upon exit from
++   system call handler.  System call ID is in a system register and it can be
++   fetched via intrinsic function.  For more information please read ABI and
++   other related documents.
++
++   For context switching, at least 2 values need to saved in kernel.  One is
++   IPC and the other is the stack address of current task.  Use intrinsic
++   function to get IPC and  the input argument of the handler functions + 8 to
++   get stack address of current task.  To do context switching, you replace
++   new_sp with the stack address of new task and replace IPC system register
++   with IPC of new task, then, just return from handler.  The context switching
++   will happen.  */
++
++/* Register table for exception handler; 32-register version.  */
++typedef struct
++{
++  int r0;
++  int r1;
++  int r2;
++  int r3;
++  int r4;
++  int r5;
++  int r6;
++  int r7;
++  int r8;
++  int r9;
++  int r10;
++  int r11;
++  int r12;
++  int r13;
++  int r14;
++  int r15;
++  int r16;
++  int r17;
++  int r18;
++  int r19;
++  int r20;
++  int r21;
++  int r22;
++  int r23;
++  int r24;
++  int r25;
++  int r26;
++  int r27;
++  int fp;
++  int gp;
++  int lp;
++  int sp;
++} NDS32_GPR32;
++
++/* Register table for exception handler; 16-register version.  */
++typedef struct
++{
++  int r0;
++  int r1;
++  int r2;
++  int r3;
++  int r4;
++  int r5;
++  int r6;
++  int r7;
++  int r8;
++  int r9;
++  int r10;
++  int r15;
++  int fp;
++  int gp;
++  int lp;
++  int sp;
++} NDS32_GPR16;
++
++
++/* Use NDS32_REG32_TAB or NDS32_REG16_TAB in your program to
++   access register table.  */
++typedef struct
++{
++  union
++    {
++      int          reg_a[32] ;
++      NDS32_GPR32  reg_s ;
++    } u ;
++} NDS32_REG32_TAB;
++
++typedef struct
++{
++  union
++    {
++      int          reg_a[16] ;
++      NDS32_GPR16  reg_s ;
++    } u ;
++} NDS32_REG16_TAB;
++
++typedef struct
++{
++  int    d0lo;
++  int    d0hi;
++  int    d1lo;
++  int    d1hi;
++} NDS32_DX_TAB;
++
++typedef struct
++{
++#ifdef __NDS32_EB__
++  float    fsr0;
++  float    fsr1;
++  float    fsr2;
++  float    fsr3;
++  float    fsr4;
++  float    fsr5;
++  float    fsr6;
++  float    fsr7;
++#else
++  float    fsr1;
++  float    fsr0;
++  float    fsr3;
++  float    fsr2;
++  float    fsr5;
++  float    fsr4;
++  float    fsr7;
++  float    fsr6;
++#endif
++} NDS32_FSR8;
++
++typedef struct
++{
++  double   dsr0;
++  double   dsr1;
++  double   dsr2;
++  double   dsr3;
++} NDS32_DSR4;
++
++typedef struct
++{
++#ifdef __NDS32_EB__
++  float    fsr0;
++  float    fsr1;
++  float    fsr2;
++  float    fsr3;
++  float    fsr4;
++  float    fsr5;
++  float    fsr6;
++  float    fsr7;
++  float    fsr8;
++  float    fsr9;
++  float    fsr10;
++  float    fsr11;
++  float    fsr12;
++  float    fsr13;
++  float    fsr14;
++  float    fsr15;
++#else
++  float    fsr1;
++  float    fsr0;
++  float    fsr3;
++  float    fsr2;
++  float    fsr5;
++  float    fsr4;
++  float    fsr7;
++  float    fsr6;
++  float    fsr9;
++  float    fsr8;
++  float    fsr11;
++  float    fsr10;
++  float    fsr13;
++  float    fsr12;
++  float    fsr15;
++  float    fsr14;
++#endif
++} NDS32_FSR16;
++
++typedef struct
++{
++  double   dsr0;
++  double   dsr1;
++  double   dsr2;
++  double   dsr3;
++  double   dsr4;
++  double   dsr5;
++  double   dsr6;
++  double   dsr7;
++} NDS32_DSR8;
++
++typedef struct
++{
++#ifdef __NDS32_EB__
++  float    fsr0;
++  float    fsr1;
++  float    fsr2;
++  float    fsr3;
++  float    fsr4;
++  float    fsr5;
++  float    fsr6;
++  float    fsr7;
++  float    fsr8;
++  float    fsr9;
++  float    fsr10;
++  float    fsr11;
++  float    fsr12;
++  float    fsr13;
++  float    fsr14;
++  float    fsr15;
++  float    fsr16;
++  float    fsr17;
++  float    fsr18;
++  float    fsr19;
++  float    fsr20;
++  float    fsr21;
++  float    fsr22;
++  float    fsr23;
++  float    fsr24;
++  float    fsr25;
++  float    fsr26;
++  float    fsr27;
++  float    fsr28;
++  float    fsr29;
++  float    fsr30;
++  float    fsr31;
++#else
++  float    fsr1;
++  float    fsr0;
++  float    fsr3;
++  float    fsr2;
++  float    fsr5;
++  float    fsr4;
++  float    fsr7;
++  float    fsr6;
++  float    fsr9;
++  float    fsr8;
++  float    fsr11;
++  float    fsr10;
++  float    fsr13;
++  float    fsr12;
++  float    fsr15;
++  float    fsr14;
++  float    fsr17;
++  float    fsr16;
++  float    fsr19;
++  float    fsr18;
++  float    fsr21;
++  float    fsr20;
++  float    fsr23;
++  float    fsr22;
++  float    fsr25;
++  float    fsr24;
++  float    fsr27;
++  float    fsr26;
++  float    fsr29;
++  float    fsr28;
++  float    fsr31;
++  float    fsr30;
++#endif
++} NDS32_FSR32;
++
++typedef struct
++{
++  double   dsr0;
++  double   dsr1;
++  double   dsr2;
++  double   dsr3;
++  double   dsr4;
++  double   dsr5;
++  double   dsr6;
++  double   dsr7;
++  double   dsr8;
++  double   dsr9;
++  double   dsr10;
++  double   dsr11;
++  double   dsr12;
++  double   dsr13;
++  double   dsr14;
++  double   dsr15;
++} NDS32_DSR16;
++
++typedef struct
++{
++  double   dsr0;
++  double   dsr1;
++  double   dsr2;
++  double   dsr3;
++  double   dsr4;
++  double   dsr5;
++  double   dsr6;
++  double   dsr7;
++  double   dsr8;
++  double   dsr9;
++  double   dsr10;
++  double   dsr11;
++  double   dsr12;
++  double   dsr13;
++  double   dsr14;
++  double   dsr15;
++  double   dsr16;
++  double   dsr17;
++  double   dsr18;
++  double   dsr19;
++  double   dsr20;
++  double   dsr21;
++  double   dsr22;
++  double   dsr23;
++  double   dsr24;
++  double   dsr25;
++  double   dsr26;
++  double   dsr27;
++  double   dsr28;
++  double   dsr29;
++  double   dsr30;
++  double   dsr31;
++} NDS32_DSR32;
++
++typedef struct
++{
++  union
++    {
++      NDS32_FSR8   fsr_s ;
++      NDS32_DSR4   dsr_s ;
++    } u ;
++} NDS32_FPU8_TAB;
++
++typedef struct
++{
++  union
++    {
++      NDS32_FSR16  fsr_s ;
++      NDS32_DSR8   dsr_s ;
++    } u ;
++} NDS32_FPU16_TAB;
++
++typedef struct
++{
++  union
++    {
++      NDS32_FSR32  fsr_s ;
++      NDS32_DSR16  dsr_s ;
++    } u ;
++} NDS32_FPU32_TAB;
++
++typedef struct
++{
++  union
++    {
++      NDS32_FSR32  fsr_s ;
++      NDS32_DSR32  dsr_s ;
++    } u ;
++} NDS32_FPU64_TAB;
++
++typedef struct
++{
++  int    ipc;
++  int    ipsw;
++#if defined(NDS32_EXT_FPU_CONFIG_0)
++  NDS32_FPU8_TAB fpr;
++#elif defined(NDS32_EXT_FPU_CONFIG_1)
++  NDS32_FPU16_TAB fpr;
++#elif defined(NDS32_EXT_FPU_CONFIG_2)
++  NDS32_FPU32_TAB fpr;
++#elif defined(NDS32_EXT_FPU_CONFIG_3)
++  NDS32_FPU64_TAB fpr;
++#endif
++#if __NDS32_DX_REGS__
++  NDS32_DX_TAB dxr;
++#endif
++#if __NDS32_EXT_IFC__
++  int    ifc_lp;
++  int    filler;
++#endif
++#if __NDS32_REDUCED_REGS__ || __NDS32_REDUCE_REGS
++  NDS32_REG16_TAB gpr;
++#else
++  NDS32_REG32_TAB gpr;
++#endif
++} NDS32_CONTEXT;
++
++/* Predefined Vector Definition.
++
++   For IVIC Mode: 9 to 14 are for hardware interrupt
++                  and 15 is for software interrupt.
++   For EVIC Mode: 9 to 72 are for hardware interrupt
++                  and software interrupt can be routed to any one of them.
++
++   You may want to define your hardware interrupts in the following way
++   for easy maintainance.
++
++     IVIC mode:
++       #define MY_HW_IVIC_TIMER NDS32_VECTOR_INTERRUPT_HW0 + 1
++       #define MY_HW_IVIC_USB   NDS32_VECTOR_INTERRUPT_HW0 + 3
++     EVIC mode:
++     #define MY_HW_EVIC_DMA   NDS32_VECTOR_INTERRUPT_HW0 + 2
++     #define MY_HW_EVIC_SWI   NDS32_VECTOR_INTERRUPT_HW0 + 10 */
++#define NDS32_VECTOR_RESET               0
++#define NDS32_VECTOR_TLB_FILL            1
++#define NDS32_VECTOR_PTE_NOT_PRESENT     2
++#define NDS32_VECTOR_TLB_MISC            3
++#define NDS32_VECTOR_TLB_VLPT_MISS       4
++#define NDS32_VECTOR_MACHINE_ERROR       5
++#define NDS32_VECTOR_DEBUG_RELATED       6
++#define NDS32_VECTOR_GENERAL_EXCEPTION   7
++#define NDS32_VECTOR_SYSCALL             8
++#define NDS32_VECTOR_INTERRUPT_HW0       9
++#define NDS32_VECTOR_INTERRUPT_HW1       10
++#define NDS32_VECTOR_INTERRUPT_HW2       11
++#define NDS32_VECTOR_INTERRUPT_HW3       12
++#define NDS32_VECTOR_INTERRUPT_HW4       13
++#define NDS32_VECTOR_INTERRUPT_HW5       14
++#define NDS32_VECTOR_INTERRUPT_HW6       15
++#define NDS32_VECTOR_SWI                 15  /* THIS IS FOR IVIC MODE ONLY */
++#define NDS32_VECTOR_INTERRUPT_HW7       16
++#define NDS32_VECTOR_INTERRUPT_HW8       17
++#define NDS32_VECTOR_INTERRUPT_HW9       18
++#define NDS32_VECTOR_INTERRUPT_HW10      19
++#define NDS32_VECTOR_INTERRUPT_HW11      20
++#define NDS32_VECTOR_INTERRUPT_HW12      21
++#define NDS32_VECTOR_INTERRUPT_HW13      22
++#define NDS32_VECTOR_INTERRUPT_HW14      23
++#define NDS32_VECTOR_INTERRUPT_HW15      24
++#define NDS32_VECTOR_INTERRUPT_HW16      25
++#define NDS32_VECTOR_INTERRUPT_HW17      26
++#define NDS32_VECTOR_INTERRUPT_HW18      27
++#define NDS32_VECTOR_INTERRUPT_HW19      28
++#define NDS32_VECTOR_INTERRUPT_HW20      29
++#define NDS32_VECTOR_INTERRUPT_HW21      30
++#define NDS32_VECTOR_INTERRUPT_HW22      31
++#define NDS32_VECTOR_INTERRUPT_HW23      32
++#define NDS32_VECTOR_INTERRUPT_HW24      33
++#define NDS32_VECTOR_INTERRUPT_HW25      34
++#define NDS32_VECTOR_INTERRUPT_HW26      35
++#define NDS32_VECTOR_INTERRUPT_HW27      36
++#define NDS32_VECTOR_INTERRUPT_HW28      37
++#define NDS32_VECTOR_INTERRUPT_HW29      38
++#define NDS32_VECTOR_INTERRUPT_HW30      39
++#define NDS32_VECTOR_INTERRUPT_HW31      40
++#define NDS32_VECTOR_INTERRUPT_HW32      41
++#define NDS32_VECTOR_INTERRUPT_HW33      42
++#define NDS32_VECTOR_INTERRUPT_HW34      43
++#define NDS32_VECTOR_INTERRUPT_HW35      44
++#define NDS32_VECTOR_INTERRUPT_HW36      45
++#define NDS32_VECTOR_INTERRUPT_HW37      46
++#define NDS32_VECTOR_INTERRUPT_HW38      47
++#define NDS32_VECTOR_INTERRUPT_HW39      48
++#define NDS32_VECTOR_INTERRUPT_HW40      49
++#define NDS32_VECTOR_INTERRUPT_HW41      50
++#define NDS32_VECTOR_INTERRUPT_HW42      51
++#define NDS32_VECTOR_INTERRUPT_HW43      52
++#define NDS32_VECTOR_INTERRUPT_HW44      53
++#define NDS32_VECTOR_INTERRUPT_HW45      54
++#define NDS32_VECTOR_INTERRUPT_HW46      55
++#define NDS32_VECTOR_INTERRUPT_HW47      56
++#define NDS32_VECTOR_INTERRUPT_HW48      57
++#define NDS32_VECTOR_INTERRUPT_HW49      58
++#define NDS32_VECTOR_INTERRUPT_HW50      59
++#define NDS32_VECTOR_INTERRUPT_HW51      60
++#define NDS32_VECTOR_INTERRUPT_HW52      61
++#define NDS32_VECTOR_INTERRUPT_HW53      62
++#define NDS32_VECTOR_INTERRUPT_HW54      63
++#define NDS32_VECTOR_INTERRUPT_HW55      64
++#define NDS32_VECTOR_INTERRUPT_HW56      65
++#define NDS32_VECTOR_INTERRUPT_HW57      66
++#define NDS32_VECTOR_INTERRUPT_HW58      67
++#define NDS32_VECTOR_INTERRUPT_HW59      68
++#define NDS32_VECTOR_INTERRUPT_HW60      69
++#define NDS32_VECTOR_INTERRUPT_HW61      70
++#define NDS32_VECTOR_INTERRUPT_HW62      71
++#define NDS32_VECTOR_INTERRUPT_HW63      72
++
++#define NDS32ATTR_RESET(option)          __attribute__((reset(option)))
++#define NDS32ATTR_EXCEPT(type)           __attribute__((exception(type)))
++#define NDS32ATTR_EXCEPTION(type)        __attribute__((exception(type)))
++#define NDS32ATTR_INTERRUPT(type)        __attribute__((interrupt(type)))
++#define NDS32ATTR_ISR(type)              __attribute__((interrupt(type)))
++
++#endif /* nds32_isr.h */
+diff --git a/gcc/config/nds32/pipelines.md b/gcc/config/nds32/pipelines.md
+index f7e2fa8..6cd854d 100644
+--- a/gcc/config/nds32/pipelines.md
++++ b/gcc/config/nds32/pipelines.md
+@@ -18,12 +18,65 @@
+ ;; along with GCC; see the file COPYING3.  If not see
+ ;; <http://www.gnu.org/licenses/>.
+ 
+-(define_automaton "nds32_machine")
++;; ------------------------------------------------------------------------
++;; Include N7 pipeline settings.
++;; ------------------------------------------------------------------------
++(include "nds32-n7.md")
++
++
++;; ------------------------------------------------------------------------
++;; Include N8 pipeline settings.
++;; ------------------------------------------------------------------------
++(include "nds32-n8.md")
++
++
++;; ------------------------------------------------------------------------
++;; Include E8 pipeline settings.
++;; ------------------------------------------------------------------------
++(include "nds32-e8.md")
++
++
++;; ------------------------------------------------------------------------
++;; Include N9/N10 pipeline settings.
++;; ------------------------------------------------------------------------
++(include "nds32-n9-3r2w.md")
++(include "nds32-n9-2r1w.md")
++
++
++;; ------------------------------------------------------------------------
++;; Include N10 pipeline settings.
++;; ------------------------------------------------------------------------
++(include "nds32-n10.md")
++
++
++;; ------------------------------------------------------------------------
++;; Include Graywolf pipeline settings.
++;; ------------------------------------------------------------------------
++(include "nds32-graywolf.md")
++
++
++;; ------------------------------------------------------------------------
++;; Include N12/N13 pipeline settings.
++;; ------------------------------------------------------------------------
++(include "nds32-n13.md")
++
++
++;; ------------------------------------------------------------------------
++;; Include Panther pipeline settings.
++;; ------------------------------------------------------------------------
++(include "nds32-panther.md")
++
++
++;; ------------------------------------------------------------------------
++;; Define simple pipeline settings.
++;; ------------------------------------------------------------------------
++
++(define_automaton "nds32_simple_machine")
+ 
+-(define_cpu_unit "general_unit" "nds32_machine")
++(define_cpu_unit "simple_unit" "nds32_simple_machine")
+ 
+ (define_insn_reservation "simple_insn" 1
+-			 (eq_attr "type" "unknown,load,store,move,alu,compare,branch,call,misc")
+-			 "general_unit")
++  (eq_attr "pipeline_model" "simple")
++  "simple_unit")
+ 
+ ;; ------------------------------------------------------------------------
+diff --git a/gcc/config/nds32/predicates.md b/gcc/config/nds32/predicates.md
+index 05a039d..71a3615 100644
+--- a/gcc/config/nds32/predicates.md
++++ b/gcc/config/nds32/predicates.md
+@@ -24,25 +24,93 @@
+ (define_predicate "nds32_greater_less_comparison_operator"
+   (match_code "gt,ge,lt,le"))
+ 
++(define_predicate "nds32_float_comparison_operator"
++  (match_code "eq,ne,le,lt,ge,gt,ordered,unordered,ungt,unge,unlt,unle"))
++
++(define_predicate "nds32_movecc_comparison_operator"
++  (match_code "eq,ne,le,leu,ge,geu"))
++
+ (define_special_predicate "nds32_logical_binary_operator"
+   (match_code "and,ior,xor"))
+ 
++(define_special_predicate "nds32_conditional_call_comparison_operator"
++  (match_code "lt,ge"))
++
++(define_special_predicate "nds32_have_33_inst_operator"
++  (match_code "mult,and,ior,xor"))
++
+ (define_predicate "nds32_symbolic_operand"
+-  (match_code "const,symbol_ref,label_ref"))
++  (and (match_code "const,symbol_ref,label_ref")
++       (match_test "!(TARGET_ICT_MODEL_LARGE
++		      && nds32_indirect_call_referenced_p (op))")))
++
++(define_predicate "nds32_nonunspec_symbolic_operand"
++  (and (match_code "const,symbol_ref,label_ref")
++       (match_test "!flag_pic && nds32_const_unspec_p (op)
++		    && !(TARGET_ICT_MODEL_LARGE
++			 && nds32_indirect_call_referenced_p (op))")))
++
++(define_predicate "nds32_label_operand"
++  (match_code "label_ref"))
+ 
+ (define_predicate "nds32_reg_constant_operand"
+-  (ior (match_operand 0 "register_operand")
+-       (match_operand 0 "const_int_operand")))
++  (match_code "reg,const_int"))
+ 
+ (define_predicate "nds32_rimm15s_operand"
+   (ior (match_operand 0 "register_operand")
+        (and (match_operand 0 "const_int_operand")
+ 	    (match_test "satisfies_constraint_Is15 (op)"))))
+ 
++(define_predicate "nds32_rimm11s_operand"
++  (ior (match_operand 0 "register_operand")
++       (and (match_operand 0 "const_int_operand")
++	    (match_test "satisfies_constraint_Is11 (op)"))))
++
++(define_predicate "nds32_imm_0_1_operand"
++  (and (match_operand 0 "const_int_operand")
++       (ior (match_test "satisfies_constraint_Iv00 (op)")
++	    (match_test "satisfies_constraint_Iv01 (op)"))))
++
++(define_predicate "nds32_imm_1_2_operand"
++  (and (match_operand 0 "const_int_operand")
++       (ior (match_test "satisfies_constraint_Iv01 (op)")
++	    (match_test "satisfies_constraint_Iv02 (op)"))))
++
++(define_predicate "nds32_imm_1_2_4_8_operand"
++  (and (match_operand 0 "const_int_operand")
++       (ior (ior (match_test "satisfies_constraint_Iv01 (op)")
++		 (match_test "satisfies_constraint_Iv02 (op)"))
++	    (ior (match_test "satisfies_constraint_Iv04 (op)")
++		 (match_test "satisfies_constraint_Iv08 (op)")))))
++
++(define_predicate "nds32_imm2u_operand"
++  (and (match_operand 0 "const_int_operand")
++       (match_test "satisfies_constraint_Iu02 (op)")))
++
++(define_predicate "nds32_imm4u_operand"
++  (and (match_operand 0 "const_int_operand")
++       (match_test "satisfies_constraint_Iu04 (op)")))
++
+ (define_predicate "nds32_imm5u_operand"
+   (and (match_operand 0 "const_int_operand")
+        (match_test "satisfies_constraint_Iu05 (op)")))
+ 
++(define_predicate "nds32_imm6u_operand"
++  (and (match_operand 0 "const_int_operand")
++       (match_test "satisfies_constraint_Iu06 (op)")))
++
++(define_predicate "nds32_rimm4u_operand"
++  (ior (match_operand 0 "register_operand")
++       (match_operand 0 "nds32_imm4u_operand")))
++
++(define_predicate "nds32_rimm5u_operand"
++  (ior (match_operand 0 "register_operand")
++       (match_operand 0 "nds32_imm5u_operand")))
++
++(define_predicate "nds32_rimm6u_operand"
++  (ior (match_operand 0 "register_operand")
++       (match_operand 0 "nds32_imm6u_operand")))
++
+ (define_predicate "nds32_move_operand"
+   (and (match_operand 0 "general_operand")
+        (not (match_code "high,const,symbol_ref,label_ref")))
+@@ -57,12 +125,121 @@
+   return true;
+ })
+ 
++(define_predicate "nds32_vmove_operand"
++  (and (match_operand 0 "general_operand")
++       (not (match_code "high,const,symbol_ref,label_ref")))
++{
++  /* If the constant op does NOT satisfy Is20 nor Ihig,
++     we can not perform move behavior by a single instruction.  */
++  if (GET_CODE (op) == CONST_VECTOR
++      && !satisfies_constraint_CVs2 (op)
++      && !satisfies_constraint_CVhi (op))
++    return false;
++
++  return true;
++})
++
++(define_predicate "nds32_and_operand"
++  (match_code "reg,const_int")
++{
++  return (REG_P (op) && GET_MODE (op) == mode)
++	 || satisfies_constraint_Izeb (op)
++	 || satisfies_constraint_Izeh (op)
++	 || satisfies_constraint_Ixls (op)
++	 || satisfies_constraint_Ix11 (op)
++	 || satisfies_constraint_Ibms (op)
++	 || satisfies_constraint_Ifex (op)
++	 || satisfies_constraint_Iu15 (op)
++	 || satisfies_constraint_Ii15 (op)
++	 || satisfies_constraint_Ic15 (op);
++})
++
++(define_predicate "nds32_ior_operand"
++  (match_code "reg,const_int")
++{
++  return (REG_P (op) && GET_MODE (op) == mode)
++	 || satisfies_constraint_Iu15 (op)
++	 || satisfies_constraint_Ie15 (op);
++})
++
++(define_predicate "nds32_xor_operand"
++  (match_code "reg,const_int")
++{
++  return (REG_P (op) && GET_MODE (op) == mode)
++	 || GET_CODE (op) == SUBREG
++	 || satisfies_constraint_Iu15 (op)
++	 || satisfies_constraint_It15 (op);
++})
++
++(define_predicate "nds32_general_register_operand"
++  (match_code "reg,subreg")
++{
++  if (GET_CODE (op) == SUBREG)
++    op = SUBREG_REG (op);
++
++  return (REG_P (op)
++	  && (REGNO (op) >= FIRST_PSEUDO_REGISTER
++	      || REGNO (op) <= NDS32_LAST_GPR_REGNUM));
++})
++
++(define_predicate "nds32_fpu_register_operand"
++  (match_code "reg,subreg")
++{
++  if (GET_CODE (op) == SUBREG)
++    op = SUBREG_REG (op);
++
++  return (REG_P (op)
++	  && NDS32_IS_FPR_REGNUM (REGNO (op)));
++})
++
++(define_predicate "fpu_reg_or_memory_operand"
++  (ior (match_operand 0 "nds32_fpu_register_operand")
++       (match_operand 0 "memory_operand")))
++
++(define_predicate "nds32_call_address_operand"
++  (ior (match_operand 0 "nds32_symbolic_operand")
++       (match_operand 0 "nds32_general_register_operand")))
++
++(define_predicate "nds32_insv_operand"
++  (match_code "const_int")
++{
++  return INTVAL (op) == 0
++	 || INTVAL (op) == 8
++	 || INTVAL (op) == 16
++	 || INTVAL (op) == 24;
++})
++
++(define_predicate "nds32_lmw_smw_base_operand"
++  (and (match_code "mem")
++       (match_test "nds32_valid_smw_lwm_base_p (op)")))
++
++(define_predicate "float_even_register_operand"
++  (and (match_code "reg")
++       (and (match_test "REGNO (op) >= NDS32_FIRST_FPR_REGNUM")
++	    (match_test "REGNO (op) <= NDS32_LAST_FPR_REGNUM")
++	    (match_test "(REGNO (op) & 1) == 0"))))
++
++(define_predicate "float_odd_register_operand"
++  (and (match_code "reg")
++       (and (match_test "REGNO (op) >= NDS32_FIRST_FPR_REGNUM")
++	    (match_test "REGNO (op) <= NDS32_LAST_FPR_REGNUM")
++	    (match_test "(REGNO (op) & 1) != 0"))))
++
+ (define_special_predicate "nds32_load_multiple_operation"
+   (match_code "parallel")
+ {
+   /* To verify 'load' operation, pass 'true' for the second argument.
+      See the implementation in nds32.c for details.  */
+-  return nds32_valid_multiple_load_store (op, true);
++  return nds32_valid_multiple_load_store_p (op, true, false);
++})
++
++(define_special_predicate "nds32_load_multiple_and_update_address_operation"
++  (match_code "parallel")
++{
++  /* To verify 'load' operation, pass 'true' for the second argument.
++     to verify 'update address' operation, pass 'true' for the third argument
++     See the implementation in nds32.c for details.  */
++  return nds32_valid_multiple_load_store_p (op, true, true);
+ })
+ 
+ (define_special_predicate "nds32_store_multiple_operation"
+@@ -70,7 +247,16 @@
+ {
+   /* To verify 'store' operation, pass 'false' for the second argument.
+      See the implementation in nds32.c for details.  */
+-  return nds32_valid_multiple_load_store (op, false);
++  return nds32_valid_multiple_load_store_p (op, false, false);
++})
++
++(define_special_predicate "nds32_store_multiple_and_update_address_operation"
++  (match_code "parallel")
++{
++  /* To verify 'store' operation, pass 'false' for the second argument,
++     to verify 'update address' operation, pass 'true' for the third argument
++     See the implementation in nds32.c for details.  */
++  return nds32_valid_multiple_load_store_p (op, false, true);
+ })
+ 
+ (define_special_predicate "nds32_stack_push_operation"
+diff --git a/gcc/config/nds32/t-elf b/gcc/config/nds32/t-elf
+new file mode 100644
+index 0000000..a63a310
+--- /dev/null
++++ b/gcc/config/nds32/t-elf
+@@ -0,0 +1,42 @@
++# The multilib settings of Andes NDS32 cpu for GNU compiler
++# Copyright (C) 2012-2016 Free Software Foundation, Inc.
++# Contributed by Andes Technology Corporation.
++#
++# This file is part of GCC.
++#
++# GCC is free software; you can redistribute it and/or modify it
++# under the terms of the GNU General Public License as published
++# by the Free Software Foundation; either version 3, or (at your
++# option) any later version.
++#
++# GCC is distributed in the hope that it will be useful, but WITHOUT
++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++# License for more details.
++#
++# You should have received a copy of the GNU General Public License
++# along with GCC; see the file COPYING3.  If not see
++# <http://www.gnu.org/licenses/>.
++
++# We also define a macro MULTILIB_DEFAULTS in nds32.h that tells the
++# driver program which options are defaults for this target and thus
++# do not need to be handled specially.
++MULTILIB_OPTIONS += mcmodel=small/mcmodel=medium/mcmodel=large mvh
++
++ifneq ($(filter graywolf,$(TM_MULTILIB_CONFIG)),)
++MULTILIB_OPTIONS += mcpu=graywolf
++endif
++
++ifneq ($(filter dsp,$(TM_MULTILIB_CONFIG)),)
++MULTILIB_OPTIONS += mext-dsp
++endif
++
++ifneq ($(filter zol,$(TM_MULTILIB_CONFIG)),)
++MULTILIB_OPTIONS += mext-zol
++endif
++
++ifneq ($(filter v3m+,$(TM_MULTILIB_CONFIG)),)
++MULTILIB_OPTIONS += march=v3m+
++endif
++
++# ------------------------------------------------------------------------
+diff --git a/gcc/config/nds32/t-mlibs b/gcc/config/nds32/t-linux
+similarity index 94%
+rename from gcc/config/nds32/t-mlibs
+rename to gcc/config/nds32/t-linux
+index 5cb13f7..a4d8ab3 100644
+--- a/gcc/config/nds32/t-mlibs
++++ b/gcc/config/nds32/t-linux
+@@ -21,6 +21,6 @@
+ # We also define a macro MULTILIB_DEFAULTS in nds32.h that tells the
+ # driver program which options are defaults for this target and thus
+ # do not need to be handled specially.
+-MULTILIB_OPTIONS = mcmodel=small/mcmodel=medium/mcmodel=large
++MULTILIB_OPTIONS +=
+ 
+ # ------------------------------------------------------------------------
+diff --git a/gcc/config/nds32/t-nds32 b/gcc/config/nds32/t-nds32
+index cf3aea6..e34b844 100644
+--- a/gcc/config/nds32/t-nds32
++++ b/gcc/config/nds32/t-nds32
+@@ -1,51 +1,294 @@
+-# General rules that all nds32/ targets must have.
++# Dependency rules rule of Andes NDS32 cpu for GNU compiler
+ # Copyright (C) 2012-2016 Free Software Foundation, Inc.
+ # Contributed by Andes Technology Corporation.
+ #
+ # This file is part of GCC.
+ #
+-# GCC is free software; you can redistribute it and/or modify
+-# it under the terms of the GNU General Public License as published by
+-# the Free Software Foundation; either version 3, or (at your option)
+-# any later version.
++# GCC is free software; you can redistribute it and/or modify it
++# under the terms of the GNU General Public License as published
++# by the Free Software Foundation; either version 3, or (at your
++# option) any later version.
+ #
+-# GCC is distributed in the hope that it will be useful,
+-# but WITHOUT ANY WARRANTY; without even the implied warranty of
+-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+-# GNU General Public License for more details.
++# GCC is distributed in the hope that it will be useful, but WITHOUT
++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++# License for more details.
+ #
+ # You should have received a copy of the GNU General Public License
+ # along with GCC; see the file COPYING3.  If not see
+ # <http://www.gnu.org/licenses/>.
+ 
+-nds32-cost.o: $(srcdir)/config/nds32/nds32-cost.c
+-	$(COMPILE) $<
+-	$(POSTCOMPILE)
+ 
+-nds32-intrinsic.o: $(srcdir)/config/nds32/nds32-intrinsic.c
+-	$(COMPILE) $<
+-	$(POSTCOMPILE)
++nds32-md-auxiliary.o: $(srcdir)/config/nds32/nds32-md-auxiliary.c \
++  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
++  $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \
++  insn-config.h conditions.h output.h dumpfile.h \
++  $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \
++  $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \
++  $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \
++  $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \
++  intl.h libfuncs.h $(PARAMS_H) $(OPTS_H)
++	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
++		$(srcdir)/config/nds32/nds32-md-auxiliary.c
+ 
+-nds32-isr.o: $(srcdir)/config/nds32/nds32-isr.c
+-	$(COMPILE) $<
+-	$(POSTCOMPILE)
++nds32-memory-manipulation.o: $(srcdir)/config/nds32/nds32-memory-manipulation.c \
++  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
++  $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \
++  insn-config.h conditions.h output.h dumpfile.h \
++  $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \
++  $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \
++  $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \
++  $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \
++  intl.h libfuncs.h $(PARAMS_H) $(OPTS_H)
++	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
++		$(srcdir)/config/nds32/nds32-memory-manipulation.c
+ 
+-nds32-md-auxiliary.o: $(srcdir)/config/nds32/nds32-md-auxiliary.c
+-	$(COMPILE) $<
+-	$(POSTCOMPILE)
++nds32-predicates.o: $(srcdir)/config/nds32/nds32-predicates.c \
++  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
++  $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \
++  insn-config.h conditions.h output.h dumpfile.h \
++  $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \
++  $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \
++  $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \
++  $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \
++  intl.h libfuncs.h $(PARAMS_H) $(OPTS_H)
++	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
++		$(srcdir)/config/nds32/nds32-predicates.c
+ 
+-nds32-pipelines-auxiliary.o: $(srcdir)/config/nds32/nds32-pipelines-auxiliary.c
+-	$(COMPILE) $<
+-	$(POSTCOMPILE)
++nds32-intrinsic.o: $(srcdir)/config/nds32/nds32-intrinsic.c \
++  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
++  $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \
++  insn-config.h conditions.h output.h dumpfile.h \
++  $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \
++  $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \
++  $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \
++  $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \
++  intl.h libfuncs.h $(PARAMS_H) $(OPTS_H)
++	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
++		$(srcdir)/config/nds32/nds32-intrinsic.c
+ 
+-nds32-predicates.o: $(srcdir)/config/nds32/nds32-predicates.c
+-	$(COMPILE) $<
+-	$(POSTCOMPILE)
++nds32-pipelines-auxiliary.o: \
++  $(srcdir)/config/nds32/nds32-pipelines-auxiliary.c \
++  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
++  $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \
++  insn-config.h conditions.h output.h dumpfile.h \
++  $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \
++  $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \
++  $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \
++  $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \
++  intl.h libfuncs.h $(PARAMS_H) $(OPTS_H)
++	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
++		$(srcdir)/config/nds32/nds32-pipelines-auxiliary.c
+ 
+-nds32-memory-manipulation.o: $(srcdir)/config/nds32/nds32-memory-manipulation.c
+-	$(COMPILE) $<
+-	$(POSTCOMPILE)
++nds32-isr.o: \
++  $(srcdir)/config/nds32/nds32-isr.c \
++  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
++  $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \
++  insn-config.h conditions.h output.h dumpfile.h \
++  $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \
++  $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \
++  $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \
++  $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \
++  intl.h libfuncs.h $(PARAMS_H) $(OPTS_H)
++	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
++		$(srcdir)/config/nds32/nds32-isr.c
+ 
+-nds32-fp-as-gp.o: $(srcdir)/config/nds32/nds32-fp-as-gp.c
+-	$(COMPILE) $<
+-	$(POSTCOMPILE)
++nds32-cost.o: \
++  $(srcdir)/config/nds32/nds32-cost.c \
++  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
++  $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \
++  insn-config.h conditions.h output.h dumpfile.h \
++  $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \
++  $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \
++  $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \
++  $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \
++  intl.h libfuncs.h $(PARAMS_H) $(OPTS_H)
++	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
++		$(srcdir)/config/nds32/nds32-cost.c
++
++nds32-fp-as-gp.o: \
++  $(srcdir)/config/nds32/nds32-fp-as-gp.c \
++  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
++  $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \
++  insn-config.h conditions.h output.h dumpfile.h \
++  $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \
++  $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \
++  $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \
++  $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \
++  intl.h libfuncs.h $(PARAMS_H) $(OPTS_H)
++	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
++		$(srcdir)/config/nds32/nds32-fp-as-gp.c
++
++nds32-load-store-opt.o: \
++  $(srcdir)/config/nds32/nds32-load-store-opt.c \
++  $(srcdir)/config/nds32/nds32-load-store-opt.h \
++  $(srcdir)/config/nds32/nds32-reg-utils.h \
++  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
++  $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \
++  insn-config.h conditions.h output.h dumpfile.h \
++  $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \
++  $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \
++  $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \
++  $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \
++  intl.h libfuncs.h $(PARAMS_H) $(OPTS_H)
++	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
++		$(srcdir)/config/nds32/nds32-load-store-opt.c
++
++nds32-soft-fp-comm.o: \
++  $(srcdir)/config/nds32/nds32-soft-fp-comm.c \
++  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
++  $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \
++  insn-config.h conditions.h output.h dumpfile.h \
++  $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \
++  $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \
++  $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \
++  $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \
++  intl.h libfuncs.h $(PARAMS_H) $(OPTS_H)
++	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
++		$(srcdir)/config/nds32/nds32-soft-fp-comm.c
++
++nds32-regrename.o: \
++  $(srcdir)/config/nds32/nds32-regrename.c \
++  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
++  $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \
++  insn-config.h conditions.h output.h dumpfile.h \
++  $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \
++  $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \
++  $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \
++  $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \
++  intl.h libfuncs.h $(PARAMS_H) $(OPTS_H)
++	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
++		$(srcdir)/config/nds32/nds32-regrename.c
++
++nds32-gcse.o: \
++  $(srcdir)/config/nds32/nds32-gcse.c \
++  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
++  $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \
++  insn-config.h conditions.h output.h dumpfile.h \
++  $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \
++  $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \
++  $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \
++  $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \
++  intl.h libfuncs.h $(PARAMS_H) $(OPTS_H)
++	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
++		$(srcdir)/config/nds32/nds32-gcse.c
++
++nds32-relax-opt.o: \
++  $(srcdir)/config/nds32/nds32-relax-opt.c \
++  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
++  $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \
++  insn-config.h conditions.h output.h dumpfile.h \
++  $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \
++  $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \
++  $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \
++  $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \
++  intl.h libfuncs.h $(PARAMS_H) $(OPTS_H)
++	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
++		$(srcdir)/config/nds32/nds32-relax-opt.c
++
++nds32-cprop-acc.o: \
++  $(srcdir)/config/nds32/nds32-cprop-acc.c \
++  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
++  $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \
++  insn-config.h conditions.h output.h dumpfile.h \
++  $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \
++  $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \
++  $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \
++  $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \
++  intl.h libfuncs.h $(PARAMS_H) $(OPTS_H)
++	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
++		$(srcdir)/config/nds32/nds32-cprop-acc.c
++
++nds32-sign-conversion.o: \
++  $(srcdir)/config/nds32/nds32-sign-conversion.c \
++  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
++  $(GIMPLE_H) $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \
++  insn-config.h conditions.h output.h dumpfile.h \
++  $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \
++  $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \
++  $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \
++  $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \
++  intl.h libfuncs.h $(PARAMS_H) $(OPTS_H)
++	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
++		$(srcdir)/config/nds32/nds32-sign-conversion.c
++
++nds32-scalbn-transform.o: \
++  $(srcdir)/config/nds32/nds32-scalbn-transform.c \
++  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
++  $(GIMPLE_H) $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \
++  insn-config.h conditions.h output.h dumpfile.h \
++  $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \
++  $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \
++  $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \
++  $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \
++  intl.h libfuncs.h $(PARAMS_H) $(OPTS_H)
++	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
++		$(srcdir)/config/nds32/nds32-scalbn-transform.c
++
++nds32-abi-compatible.o: \
++  $(srcdir)/config/nds32/nds32-abi-compatible.c \
++  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
++  $(GIMPLE_H) $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \
++  insn-config.h conditions.h output.h dumpfile.h \
++  $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \
++  $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \
++  $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \
++  $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \
++  intl.h libfuncs.h $(PARAMS_H) $(OPTS_H)
++	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
++		$(srcdir)/config/nds32/nds32-abi-compatible.c
++
++nds32-lmwsmw.o: \
++  $(srcdir)/config/nds32/nds32-lmwsmw.c \
++  $(srcdir)/config/nds32/nds32-load-store-opt.h \
++  $(srcdir)/config/nds32/nds32-reg-utils.h \
++  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
++  $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \
++  insn-config.h conditions.h output.h dumpfile.h \
++  $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \
++  $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \
++  $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \
++  $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \
++  intl.h libfuncs.h $(PARAMS_H) $(OPTS_H)
++	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
++		$(srcdir)/config/nds32/nds32-lmwsmw.c
++
++nds32-reg-utils.o: \
++  $(srcdir)/config/nds32/nds32-reg-utils.c \
++  $(srcdir)/config/nds32/nds32-reg-utils.h \
++  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
++  $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \
++  insn-config.h conditions.h output.h dumpfile.h \
++  $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \
++  $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \
++  $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \
++  $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \
++  intl.h libfuncs.h $(PARAMS_H) $(OPTS_H)
++	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
++		$(srcdir)/config/nds32/nds32-reg-utils.c
++
++nds32-const-remater.o: \
++  $(srcdir)/config/nds32/nds32-const-remater.c \
++  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
++  $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \
++  insn-config.h conditions.h output.h dumpfile.h \
++  $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \
++  $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \
++  $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \
++  $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \
++  intl.h libfuncs.h $(PARAMS_H) $(OPTS_H)
++	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
++		$(srcdir)/config/nds32/nds32-const-remater.c
++
++nds32-utils.o: \
++  $(srcdir)/config/nds32/nds32-utils.c \
++  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
++  $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \
++  insn-config.h conditions.h output.h dumpfile.h \
++  $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \
++  $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \
++  $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \
++  $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \
++  intl.h libfuncs.h $(PARAMS_H) $(OPTS_H)
++	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
++		$(srcdir)/config/nds32/nds32-utils.c
+diff --git a/gcc/configure b/gcc/configure
+index 954673c..ca21885 100755
+--- a/gcc/configure
++++ b/gcc/configure
+@@ -27327,7 +27327,7 @@ esac
+ # version to the per-target configury.
+ case "$cpu_type" in
+   aarch64 | alpha | arm | avr | bfin | cris | i386 | m32c | m68k | microblaze \
+-  | mips | nios2 | pa | rs6000 | score | sparc | spu | tilegx | tilepro \
++  | mips | nds32 | nios2 | pa | rs6000 | score | sparc | spu | tilegx | tilepro \
+   | visium | xstormy16 | xtensa)
+     insn="nop"
+     ;;
+diff --git a/gcc/configure.ac b/gcc/configure.ac
+index 4c65d44..d7a5efc 100644
+--- a/gcc/configure.ac
++++ b/gcc/configure.ac
+@@ -4667,7 +4667,7 @@ esac
+ # version to the per-target configury.
+ case "$cpu_type" in
+   aarch64 | alpha | arm | avr | bfin | cris | i386 | m32c | m68k | microblaze \
+-  | mips | nios2 | pa | rs6000 | score | sparc | spu | tilegx | tilepro \
++  | mips | nds32 | nios2 | pa | rs6000 | score | sparc | spu | tilegx | tilepro \
+   | visium | xstormy16 | xtensa)
+     insn="nop"
+     ;;
+diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
+index ee2715d..37fa3b5 100644
+--- a/gcc/doc/extend.texi
++++ b/gcc/doc/extend.texi
+@@ -13587,38 +13587,33 @@ builtin is exact.
+ 
+ These built-in functions are available for the NDS32 target:
+ 
+-@deftypefn {Built-in Function} void __builtin_nds32_isync (int *@var{addr})
++@table @code
++@item void __builtin_nds32_isync (int *@var{addr})
+ Insert an ISYNC instruction into the instruction stream where
+ @var{addr} is an instruction address for serialization.
+-@end deftypefn
+ 
+-@deftypefn {Built-in Function} void __builtin_nds32_isb (void)
++@item void __builtin_nds32_isb (void)
+ Insert an ISB instruction into the instruction stream.
+-@end deftypefn
+ 
+-@deftypefn {Built-in Function} int __builtin_nds32_mfsr (int @var{sr})
++@item int __builtin_nds32_mfsr (int @var{sr})
+ Return the content of a system register which is mapped by @var{sr}.
+-@end deftypefn
+ 
+-@deftypefn {Built-in Function} int __builtin_nds32_mfusr (int @var{usr})
++@item int __builtin_nds32_mfusr (int @var{usr})
+ Return the content of a user space register which is mapped by @var{usr}.
+-@end deftypefn
+ 
+-@deftypefn {Built-in Function} void __builtin_nds32_mtsr (int @var{value}, int @var{sr})
++@item void __builtin_nds32_mtsr (int @var{value}, int @var{sr})
+ Move the @var{value} to a system register which is mapped by @var{sr}.
+-@end deftypefn
+ 
+-@deftypefn {Built-in Function} void __builtin_nds32_mtusr (int @var{value}, int @var{usr})
++@item void __builtin_nds32_mtusr (int @var{value}, int @var{usr})
+ Move the @var{value} to a user space register which is mapped by @var{usr}.
+-@end deftypefn
+ 
+-@deftypefn {Built-in Function} void __builtin_nds32_setgie_en (void)
++@item void __builtin_nds32_setgie_en (void)
+ Enable global interrupt.
+-@end deftypefn
+ 
+-@deftypefn {Built-in Function} void __builtin_nds32_setgie_dis (void)
++@item void __builtin_nds32_setgie_dis (void)
+ Disable global interrupt.
+-@end deftypefn
++
++@end table
+ 
+ @node picoChip Built-in Functions
+ @subsection picoChip Built-in Functions
+diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi
+index b60b53a..fc23722 100644
+--- a/gcc/doc/install.texi
++++ b/gcc/doc/install.texi
+@@ -2109,7 +2109,7 @@ supported since version 4.7.2 and is the default in 4.8.0 and newer.
+ 
+ @item --with-nds32-lib=@var{library}
+ Specifies that @var{library} setting is used for building @file{libgcc.a}.
+-Currently, the valid @var{library} is @samp{newlib} or @samp{mculib}.
++Currently, the valid @var{library} are 'newlib' or 'mculib'.
+ This option is only supported for the NDS32 target.
+ 
+ @item --with-build-time-tools=@var{dir}
+diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
+index 2ed9285..75e0042 100644
+--- a/gcc/doc/invoke.texi
++++ b/gcc/doc/invoke.texi
+@@ -904,13 +904,19 @@ Objective-C and Objective-C++ Dialects}.
+ -mreduced-regs -mfull-regs @gol
+ -mcmov -mno-cmov @gol
+ -mperf-ext -mno-perf-ext @gol
++-mperf2-ext -mno-perf2-ext @gol
++-mstring-ext -mno-string-ext @gol
+ -mv3push -mno-v3push @gol
+ -m16bit -mno-16bit @gol
++-mgp-direct -mno-gp-direct @gol
+ -misr-vector-size=@var{num} @gol
+ -mcache-block-size=@var{num} @gol
+ -march=@var{arch} @gol
+--mcmodel=@var{code-model} @gol
+--mctor-dtor -mrelax}
++-mcpu=@var{cpu} @gol
++-mmemory-model=@var{cpu} @gol
++-mconfig-register-ports=@var{ports} @gol
++-mforce-fp-as-gp -mforbid-fp-as-gp @gol
++-mex9 -mctor-dtor -mrelax}
+ 
+ @emph{Nios II Options}
+ @gccoptlist{-G @var{num} -mgpopt=@var{option} -mgpopt -mno-gpopt @gol
+@@ -5006,7 +5012,7 @@ example, warn if an unsigned variable is compared against zero with
+ @opindex Wbad-function-cast
+ @opindex Wno-bad-function-cast
+ Warn when a function call is cast to a non-matching type.
+-For example, warn if a call to a function returning an integer type 
++For example, warn if a call to a function returning an integer type
+ is cast to a pointer type.
+ 
+ @item -Wc90-c99-compat @r{(C and Objective-C only)}
+@@ -19089,6 +19095,22 @@ Generate performance extension instructions.
+ @opindex mno-perf-ext
+ Do not generate performance extension instructions.
+ 
++@item -mperf2-ext
++@opindex mperf2-ext
++Generate performance extension version 2 instructions.
++
++@item -mno-perf2-ext
++@opindex mno-perf2-ext
++Do not generate performance extension version 2 instructions.
++
++@item -mstring-ext
++@opindex mstring-ext
++Generate string extension instructions.
++
++@item -mno-string-ext
++@opindex mno-string-ext
++Do not generate string extension instructions.
++
+ @item -mv3push
+ @opindex mv3push
+ Generate v3 push25/pop25 instructions.
+@@ -19105,6 +19127,14 @@ Generate 16-bit instructions.
+ @opindex mno-16-bit
+ Do not generate 16-bit instructions.
+ 
++@item -mgp-direct
++@opindex mgp-direct
++Generate GP base instructions directly.
++
++@item -mno-gp-direct
++@opindex mno-gp-direct
++Do no generate GP base instructions directly.
++
+ @item -misr-vector-size=@var{num}
+ @opindex misr-vector-size
+ Specify the size of each interrupt vector, which must be 4 or 16.
+@@ -19118,20 +19148,33 @@ which must be a power of 2 between 4 and 512.
+ @opindex march
+ Specify the name of the target architecture.
+ 
+-@item -mcmodel=@var{code-model}
+-@opindex mcmodel
+-Set the code model to one of
+-@table @asis
+-@item @samp{small}
+-All the data and read-only data segments must be within 512KB addressing space.
+-The text segment must be within 16MB addressing space.
+-@item @samp{medium}
+-The data segment must be within 512KB while the read-only data segment can be
+-within 4GB addressing space.  The text segment should be still within 16MB
+-addressing space.
+-@item @samp{large}
+-All the text and data segments can be within 4GB addressing space.
+-@end table
++@item -mcpu=@var{cpu}
++@opindex mcpu
++Specify the cpu for pipeline model.
++
++@item -mmemory-model=@var{cpu}
++@opindex mmemory-model
++Specify fast or slow memory model.
++
++@item -mconfig-register-ports=@var{ports}
++@opindex mconfig-register-ports
++Specify how many read/write ports for n9/n10 cores.
++The value should be 3r2w or 2r1w.
++
++@item -mforce-fp-as-gp
++@opindex mforce-fp-as-gp
++Prevent $fp being allocated during register allocation so that compiler
++is able to force performing fp-as-gp optimization.
++
++@item -mforbid-fp-as-gp
++@opindex mforbid-fp-as-gp
++Forbid using $fp to access static and global variables.
++This option strictly forbids fp-as-gp optimization
++regardless of @option{-mforce-fp-as-gp}.
++
++@item -mex9
++@opindex mex9
++Use special directives to guide linker doing ex9 optimization.
+ 
+ @item -mctor-dtor
+ @opindex mctor-dtor
+@@ -19159,55 +19202,15 @@ Put global and static objects less than or equal to @var{num} bytes
+ into the small data or BSS sections instead of the normal data or BSS
+ sections.  The default value of @var{num} is 8.
+ 
+-@item -mgpopt=@var{option}
+ @item -mgpopt
+ @itemx -mno-gpopt
+ @opindex mgpopt
+ @opindex mno-gpopt
+-Generate (do not generate) GP-relative accesses.  The following 
+-@var{option} names are recognized:
+-
+-@table @samp
+-
+-@item none
+-Do not generate GP-relative accesses.
+-
+-@item local
+-Generate GP-relative accesses for small data objects that are not 
+-external, weak, or uninitialized common symbols.  
+-Also use GP-relative addressing for objects that
+-have been explicitly placed in a small data section via a @code{section}
+-attribute.
+-
+-@item global
+-As for @samp{local}, but also generate GP-relative accesses for
+-small data objects that are external, weak, or common.  If you use this option,
+-you must ensure that all parts of your program (including libraries) are
+-compiled with the same @option{-G} setting.
+-
+-@item data
+-Generate GP-relative accesses for all data objects in the program.  If you
+-use this option, the entire data and BSS segments
+-of your program must fit in 64K of memory and you must use an appropriate
+-linker script to allocate them within the addressable range of the
+-global pointer.
+-
+-@item all
+-Generate GP-relative addresses for function pointers as well as data
+-pointers.  If you use this option, the entire text, data, and BSS segments
+-of your program must fit in 64K of memory and you must use an appropriate
+-linker script to allocate them within the addressable range of the
+-global pointer.
+-
+-@end table
+-
+-@option{-mgpopt} is equivalent to @option{-mgpopt=local}, and
+-@option{-mno-gpopt} is equivalent to @option{-mgpopt=none}.
+-
+-The default is @option{-mgpopt} except when @option{-fpic} or
+-@option{-fPIC} is specified to generate position-independent code.
+-Note that the Nios II ABI does not permit GP-relative accesses from
+-shared libraries.
++Generate (do not generate) GP-relative accesses for objects in the
++small data or BSS sections.  The default is @option{-mgpopt} except
++when @option{-fpic} or @option{-fPIC} is specified to generate
++position-independent code.  Note that the Nios II ABI does not permit
++GP-relative accesses from shared libraries.
+ 
+ You may need to specify @option{-mno-gpopt} explicitly when building
+ programs that include large amounts of small data, including large
+diff --git a/gcc/gcc.c b/gcc/gcc.c
+index 0f042b0..5c43f33 100644
+--- a/gcc/gcc.c
++++ b/gcc/gcc.c
+@@ -1288,7 +1288,7 @@ static const struct compiler default_compilers[] =
+   {".zip", "#Java", 0, 0, 0}, {".jar", "#Java", 0, 0, 0},
+   {".go", "#Go", 0, 1, 0},
+   /* Next come the entries for C.  */
+-  {".c", "@c", 0, 0, 1},
++  {".c", "@nds32_c", 0, 0, 1},
+   {"@c",
+    /* cc1 has an integrated ISO C preprocessor.  We should invoke the
+       external preprocessor if -save-temps is given.  */
+@@ -1303,6 +1303,38 @@ static const struct compiler default_compilers[] =
+       %{!save-temps*:%{!traditional-cpp:%{!no-integrated-cpp:\
+ 	  cc1 %(cpp_unique_options) %(cc1_options)}}}\
+       %{!fsyntax-only:%(invoke_as)}}}}", 0, 0, 1},
++  {"@nds32_c",
++   /* cc1 has an integrated ISO C preprocessor.  We should invoke the
++      external preprocessor if -save-temps is given.  */
++     "%{E|M|MM:%(trad_capable_cpp) %(cpp_options) %(cpp_debug_options)}\
++      %{mace:\
++	  %{!E:%{!M:%{!MM:\
++	      %{traditional:\
++%eGNU C no longer supports -traditional without -E}\
++	  %{save-temps*|traditional-cpp|no-integrated-cpp:%(trad_capable_cpp) \
++	      %(cpp_options) -o %{save-temps*:%b.i} %{!save-temps*:%g.i} \n\
++		cs2 %{mace-s2s*} %{save-temps*:%b.i} %{!save-temps*:%g.i} \
++		    -o %{save-temps*:%b.ace.i} %{!save-temps*:%g.ace.i} --\n\
++		cc1 -fpreprocessed %{save-temps*:%b.ace.i} %{!save-temps*:%g.ace.i} \
++	      %(cc1_options)}\
++	  %{!save-temps*:%{!traditional-cpp:%{!no-integrated-cpp:\
++	      %(trad_capable_cpp) %(cpp_options) -o %u.i\n}}}\
++	  %{!save-temps*:%{!traditional-cpp:%{!no-integrated-cpp:\
++	      cs2 %{mace-s2s*} %U.i -o %u.ace.i --\n}}}\
++	  %{!save-temps*:%{!traditional-cpp:%{!no-integrated-cpp:\
++	      cc1 -fpreprocessed %U.ace.i %(cc1_options)}}}\
++	  %{!fsyntax-only:%(invoke_as)}}}}}\
++      %{!mace:\
++	  %{!E:%{!M:%{!MM:\
++	      %{traditional:\
++%eGNU C no longer supports -traditional without -E}\
++	  %{save-temps*|traditional-cpp|no-integrated-cpp:%(trad_capable_cpp) \
++	      %(cpp_options) -o %{save-temps*:%b.i} %{!save-temps*:%g.i} \n\
++		cc1 -fpreprocessed %{save-temps*:%b.i} %{!save-temps*:%g.i} \
++	      %(cc1_options)}\
++	  %{!save-temps*:%{!traditional-cpp:%{!no-integrated-cpp:\
++	      cc1 %(cpp_unique_options) %(cc1_options)}}}\
++	  %{!fsyntax-only:%(invoke_as)}}}}}", 0, 0, 1},
+   {"-",
+    "%{!E:%e-E or -x required when input is from standard input}\
+     %(trad_capable_cpp) %(cpp_options) %(cpp_debug_options)", 0, 0, 0},
+diff --git a/gcc/loop-unroll.c b/gcc/loop-unroll.c
+index 4d26e2f..60f934c 100644
+--- a/gcc/loop-unroll.c
++++ b/gcc/loop-unroll.c
+@@ -1132,7 +1132,9 @@ decide_unroll_stupid (struct loop *loop, int flags)
+      of mispredicts. 
+      TODO: this heuristic needs tunning; call inside the loop body
+      is also relatively good reason to not unroll.  */
+-  if (num_loop_branches (loop) > 1)
++  unsigned branch_count = PARAM_VALUE (PARAM_MAX_LOOP_UNROLL_BRANCH);
++
++  if (num_loop_branches (loop) > branch_count)
+     {
+       if (dump_file)
+ 	fprintf (dump_file, ";; Not unrolling, contains branches\n");
+diff --git a/gcc/opt-read.awk b/gcc/opt-read.awk
+index b304ccb..2e6e8df 100644
+--- a/gcc/opt-read.awk
++++ b/gcc/opt-read.awk
+@@ -99,6 +99,7 @@ BEGIN {
+ 			val_flags = "0"
+ 			val_flags = val_flags \
+ 			  test_flag("Canonical", props, "| CL_ENUM_CANONICAL") \
++			  test_flag("Undocumented", props, "| CL_UNDOCUMENTED") \
+ 			  test_flag("DriverOnly", props, "| CL_ENUM_DRIVER_ONLY")
+ 			enum_data[enum_name] = enum_data[enum_name] \
+ 			  "  { " quote string quote ", " value ", " val_flags \
+diff --git a/gcc/opts.c b/gcc/opts.c
+index 0f9431a..da75332 100644
+--- a/gcc/opts.c
++++ b/gcc/opts.c
+@@ -1271,6 +1271,10 @@ print_filtered_help (unsigned int include_flags,
+ 	{
+ 	  unsigned int len = strlen (cl_enums[i].values[j].arg);
+ 
++	  /* Skip the undocument enum value */
++	  if (cl_enums[i].values[j].flags & CL_UNDOCUMENTED)
++	     continue;
++
+ 	  if (pos > 4 && pos + 1 + len <= columns)
+ 	    {
+ 	      printf (" %s", cl_enums[i].values[j].arg);
+diff --git a/gcc/params.def b/gcc/params.def
+index dbff305..44847b3 100644
+--- a/gcc/params.def
++++ b/gcc/params.def
+@@ -297,6 +297,11 @@ DEFPARAM(PARAM_MAX_UNROLL_TIMES,
+ 	"max-unroll-times",
+ 	"The maximum number of unrollings of a single loop.",
+ 	8, 0, 0)
++/* Maximum number of loop unroll loop branch count.  */
++DEFPARAM (PARAM_MAX_LOOP_UNROLL_BRANCH,
++         "max-unroll-loop-branch",
++         "Maximum number of loop branch count",
++         1, 1, 20)
+ /* The maximum number of insns of a peeled loop.  */
+ DEFPARAM(PARAM_MAX_PEELED_INSNS,
+ 	"max-peeled-insns",
+diff --git a/gcc/testsuite/g++.dg/init/array15.C b/gcc/testsuite/g++.dg/init/array15.C
+index 17160d0..280fe69 100644
+--- a/gcc/testsuite/g++.dg/init/array15.C
++++ b/gcc/testsuite/g++.dg/init/array15.C
+@@ -1,4 +1,6 @@
+ // { dg-do run }
++// { dg-require-effective-target nds32_full_addr_space { target nds32*-*-elf* } }
++// { dg-options "-mcmodel=large" { target nds32*-*-elf* } }
+ 
+ // Copyright (C) 2004 Free Software Foundation, Inc.
+ // Contributed by Nathan Sidwell 8 Dec 2004 <nathan@codesourcery.com>
+diff --git a/gcc/testsuite/g++.dg/init/array16.C b/gcc/testsuite/g++.dg/init/array16.C
+index 188d1a8..83c0d47 100644
+--- a/gcc/testsuite/g++.dg/init/array16.C
++++ b/gcc/testsuite/g++.dg/init/array16.C
+@@ -2,6 +2,7 @@
+ // have "compile" for some targets and "run" for others.
+ // { dg-do run { target { ! mmix-*-* } } }
+ // { dg-options "-mstructure-size-boundary=8" { target arm*-*-* } }
++// { dg-skip-if "" { nds32_gp_direct } }
+ 
+ // Copyright (C) 2004 Free Software Foundation, Inc.
+ // Contributed by Nathan Sidwell 8 Dec 2004 <nathan@codesourcery.com>
+diff --git a/gcc/testsuite/g++.dg/torture/type-generic-1.C b/gcc/testsuite/g++.dg/torture/type-generic-1.C
+index 4d82592..5ae789c 100644
+--- a/gcc/testsuite/g++.dg/torture/type-generic-1.C
++++ b/gcc/testsuite/g++.dg/torture/type-generic-1.C
+@@ -4,6 +4,7 @@
+ /* { dg-do run } */
+ /* { dg-add-options ieee } */
+ /* { dg-skip-if "No Inf/NaN support" { spu-*-* } } */
++/* { dg-skip-if "No Denormmalized support" { nds32_ext_fpu } } */
+ 
+ #include "../../gcc.dg/tg-tests.h"
+ 
+diff --git a/gcc/testsuite/gcc.c-torture/compile/limits-fndefn.c b/gcc/testsuite/gcc.c-torture/compile/limits-fndefn.c
+index 228c5d9..d2d3e51 100644
+--- a/gcc/testsuite/gcc.c-torture/compile/limits-fndefn.c
++++ b/gcc/testsuite/gcc.c-torture/compile/limits-fndefn.c
+@@ -1,4 +1,5 @@
+ /* { dg-skip-if "too complex for avr" { avr-*-* } { "*" } { "" } } */
++/* { dg-skip-if "lto may cause internal compiler error on cygwin with gcc-4.9" { nds32*-*-* } { "*" } { "" } } */
+ /* { dg-skip-if "ptxas times out" { nvptx-*-* } { "*" } { "" } } */
+ /* { dg-timeout-factor 4.0 } */
+ #define LIM1(x) x##0, x##1, x##2, x##3, x##4, x##5, x##6, x##7, x##8, x##9,
+diff --git a/gcc/testsuite/gcc.c-torture/execute/20010122-1.c b/gcc/testsuite/gcc.c-torture/execute/20010122-1.c
+index 4eeb8c7..6cd02bc 100644
+--- a/gcc/testsuite/gcc.c-torture/execute/20010122-1.c
++++ b/gcc/testsuite/gcc.c-torture/execute/20010122-1.c
+@@ -1,4 +1,5 @@
+ /* { dg-skip-if "requires frame pointers" { *-*-* } "-fomit-frame-pointer" "" } */
++/* { dg-additional-options "-malways-save-lp" { target nds32*-*-* } } */
+ /* { dg-require-effective-target return_address } */
+ 
+ extern void exit (int);
+diff --git a/gcc/testsuite/gcc.c-torture/execute/920501-8.x b/gcc/testsuite/gcc.c-torture/execute/920501-8.x
+new file mode 100644
+index 0000000..96f05bc
+--- /dev/null
++++ b/gcc/testsuite/gcc.c-torture/execute/920501-8.x
+@@ -0,0 +1,11 @@
++# Please see Andes Bugzilla #11005 for the details.
++if { [istarget "nds32*-*-*"] } {
++	# The nds32 mculib toolchains require
++	# "-u_printf_float" and "-u_scanf_float" options
++	# to fully support printf and scanf functionality.
++	# These options are supposed to be harmless to newlib toolchain.
++	set additional_flags "-u_printf_float -u_scanf_float"
++}
++
++return 0
++
+diff --git a/gcc/testsuite/gcc.c-torture/execute/930513-1.x b/gcc/testsuite/gcc.c-torture/execute/930513-1.x
+new file mode 100644
+index 0000000..96f05bc
+--- /dev/null
++++ b/gcc/testsuite/gcc.c-torture/execute/930513-1.x
+@@ -0,0 +1,11 @@
++# Please see Andes Bugzilla #11005 for the details.
++if { [istarget "nds32*-*-*"] } {
++	# The nds32 mculib toolchains require
++	# "-u_printf_float" and "-u_scanf_float" options
++	# to fully support printf and scanf functionality.
++	# These options are supposed to be harmless to newlib toolchain.
++	set additional_flags "-u_printf_float -u_scanf_float"
++}
++
++return 0
++
+diff --git a/gcc/testsuite/gcc.c-torture/execute/ieee/ieee.exp b/gcc/testsuite/gcc.c-torture/execute/ieee/ieee.exp
+index 009984e..19cfcca 100644
+--- a/gcc/testsuite/gcc.c-torture/execute/ieee/ieee.exp
++++ b/gcc/testsuite/gcc.c-torture/execute/ieee/ieee.exp
+@@ -30,6 +30,10 @@ load_lib c-torture.exp
+ # Disable tests on machines with no hardware support for IEEE arithmetic.
+ if { [istarget "vax-*-*"] || [ istarget "powerpc-*-*spe"] || [istarget "pdp11-*-*"] } { return }
+ 
++# Since we cannot use dg-skip-if or dg-require-effective-target for individual
++# test case under ieee category, we disable all ieee tests on nds32 fpu toolchains.
++if { [istarget "nds32*-*-*"] && [check_effective_target_nds32_ext_fpu] } { return }
++
+ if $tracelevel then {
+     strace $tracelevel
+ }
+diff --git a/gcc/testsuite/gcc.c-torture/execute/pr60822.c b/gcc/testsuite/gcc.c-torture/execute/pr60822.c
+index dcd2447..a305df3 100644
+--- a/gcc/testsuite/gcc.c-torture/execute/pr60822.c
++++ b/gcc/testsuite/gcc.c-torture/execute/pr60822.c
+@@ -1,4 +1,5 @@
+ /* { dg-require-effective-target int32plus } */
++/* { dg-additional-options "-mcmodel=large" { target nds32*-*-elf* } } */
+ struct X {
+     char fill0[800000];
+     int a;
+diff --git a/gcc/testsuite/gcc.c-torture/execute/struct-ret-1.x b/gcc/testsuite/gcc.c-torture/execute/struct-ret-1.x
+new file mode 100644
+index 0000000..96f05bc
+--- /dev/null
++++ b/gcc/testsuite/gcc.c-torture/execute/struct-ret-1.x
+@@ -0,0 +1,11 @@
++# Please see Andes Bugzilla #11005 for the details.
++if { [istarget "nds32*-*-*"] } {
++	# The nds32 mculib toolchains require
++	# "-u_printf_float" and "-u_scanf_float" options
++	# to fully support printf and scanf functionality.
++	# These options are supposed to be harmless to newlib toolchain.
++	set additional_flags "-u_printf_float -u_scanf_float"
++}
++
++return 0
++
+diff --git a/gcc/testsuite/gcc.dg/constructor-1.c b/gcc/testsuite/gcc.dg/constructor-1.c
+index 73e9fc3..827987e 100644
+--- a/gcc/testsuite/gcc.dg/constructor-1.c
++++ b/gcc/testsuite/gcc.dg/constructor-1.c
+@@ -1,6 +1,7 @@
+ /* { dg-do run } */
+ /* { dg-options "-O2" } */
+ /* { dg-skip-if "" { ! global_constructor } { "*" } { "" } } */
++/* { dg-options "-O2 -mctor-dtor" { target { nds32*-*-* } } } */
+ 
+ /* The ipa-split pass pulls the body of the if(!x) block
+    into a separate function to make foo a better inlining
+diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-0.c b/gcc/testsuite/gcc.dg/graphite/interchange-0.c
+index d56be46..b83535c 100644
+--- a/gcc/testsuite/gcc.dg/graphite/interchange-0.c
++++ b/gcc/testsuite/gcc.dg/graphite/interchange-0.c
+@@ -1,4 +1,5 @@
+ /* { dg-require-effective-target size32plus } */
++/* { dg-additional-options "-mcmodel=large" { target nds32*-*-elf* } } */
+ 
+ #define DEBUG 0
+ 
+diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-1.c b/gcc/testsuite/gcc.dg/graphite/interchange-1.c
+index b65d486..2d77f0e 100644
+--- a/gcc/testsuite/gcc.dg/graphite/interchange-1.c
++++ b/gcc/testsuite/gcc.dg/graphite/interchange-1.c
+@@ -1,4 +1,6 @@
+ /* { dg-require-effective-target size32plus } */
++/* { dg-require-effective-target nds32_full_addr_space { target nds32*-*-elf* } } */
++/* { dg-additional-options "-mcmodel=large" { target nds32*-*-elf* } } */
+ 
+ /* Formerly known as ltrans-1.c */
+ 
+diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-10.c b/gcc/testsuite/gcc.dg/graphite/interchange-10.c
+index a955644..2021de2 100644
+--- a/gcc/testsuite/gcc.dg/graphite/interchange-10.c
++++ b/gcc/testsuite/gcc.dg/graphite/interchange-10.c
+@@ -1,4 +1,6 @@
+ /* { dg-require-effective-target size32plus } */
++/* { dg-require-effective-target nds32_full_addr_space { target nds32*-*-elf* } } */
++/* { dg-additional-options "-mcmodel=large" { target nds32*-*-elf* } } */
+ 
+ #define DEBUG 0
+ #if DEBUG
+diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-11.c b/gcc/testsuite/gcc.dg/graphite/interchange-11.c
+index 6102822..5abb316 100644
+--- a/gcc/testsuite/gcc.dg/graphite/interchange-11.c
++++ b/gcc/testsuite/gcc.dg/graphite/interchange-11.c
+@@ -1,4 +1,6 @@
+ /* { dg-require-effective-target size32plus } */
++/* { dg-require-effective-target nds32_full_addr_space { target nds32*-*-elf* } } */
++/* { dg-additional-options "-mcmodel=large" { target nds32*-*-elf* } } */
+ 
+ #define DEBUG 0
+ #if DEBUG
+diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-15.c b/gcc/testsuite/gcc.dg/graphite/interchange-15.c
+index 7410f29..1f71f06 100644
+--- a/gcc/testsuite/gcc.dg/graphite/interchange-15.c
++++ b/gcc/testsuite/gcc.dg/graphite/interchange-15.c
+@@ -1,4 +1,6 @@
+ /* { dg-require-effective-target size32plus } */
++/* { dg-require-effective-target nds32_full_addr_space { target nds32*-*-elf* } } */
++/* { dg-additional-options "-mcmodel=large" { target nds32*-*-elf* } } */
+ 
+ #define DEBUG 0
+ #if DEBUG
+diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-2.c b/gcc/testsuite/gcc.dg/graphite/interchange-2.c
+index 936ee00..0041649 100644
+--- a/gcc/testsuite/gcc.dg/graphite/interchange-2.c
++++ b/gcc/testsuite/gcc.dg/graphite/interchange-2.c
+@@ -1,4 +1,6 @@
+ /* { dg-require-effective-target size32plus } */
++/* { dg-require-effective-target nds32_full_addr_space { target nds32*-*-elf* } } */
++/* { dg-additional-options "-mcmodel=large" { target nds32*-*-elf* } } */
+ 
+ /* Formerly known as ltrans-2.c */
+ 
+diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-3.c b/gcc/testsuite/gcc.dg/graphite/interchange-3.c
+index 4aec824..6635529 100644
+--- a/gcc/testsuite/gcc.dg/graphite/interchange-3.c
++++ b/gcc/testsuite/gcc.dg/graphite/interchange-3.c
+@@ -1,4 +1,6 @@
+ /* { dg-require-effective-target size32plus } */
++/* { dg-require-effective-target nds32_full_addr_space { target nds32*-*-elf* } } */
++/* { dg-additional-options "-mcmodel=large" { target nds32*-*-elf* } } */
+ 
+ /* Formerly known as ltrans-3.c */
+ 
+diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-4.c b/gcc/testsuite/gcc.dg/graphite/interchange-4.c
+index 463ecb5..359f0ac 100644
+--- a/gcc/testsuite/gcc.dg/graphite/interchange-4.c
++++ b/gcc/testsuite/gcc.dg/graphite/interchange-4.c
+@@ -1,4 +1,6 @@
+ /* { dg-require-effective-target size32plus } */
++/* { dg-require-effective-target nds32_full_addr_space { target nds32*-*-elf* } } */
++/* { dg-additional-options "-mcmodel=large" { target nds32*-*-elf* } } */
+ 
+ /* Formerly known as ltrans-4.c */
+ 
+diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-5.c b/gcc/testsuite/gcc.dg/graphite/interchange-5.c
+index e5aaa64..892257e 100644
+--- a/gcc/testsuite/gcc.dg/graphite/interchange-5.c
++++ b/gcc/testsuite/gcc.dg/graphite/interchange-5.c
+@@ -1,4 +1,5 @@
+ /* { dg-require-effective-target size32plus } */
++/* { dg-additional-options "-mcmodel=large" { target nds32*-*-elf* } } */
+ 
+ /* Formerly known as ltrans-5.c */
+ 
+diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-mvt.c b/gcc/testsuite/gcc.dg/graphite/interchange-mvt.c
+index c6543ec..51c6ee5 100644
+--- a/gcc/testsuite/gcc.dg/graphite/interchange-mvt.c
++++ b/gcc/testsuite/gcc.dg/graphite/interchange-mvt.c
+@@ -1,4 +1,6 @@
+ /* { dg-require-effective-target size32plus } */
++/* { dg-require-effective-target nds32_full_addr_space { target nds32*-*-elf* } } */
++/* { dg-additional-options "-mcmodel=large" { target nds32*-*-elf* } } */
+ 
+ #define DEBUG 0
+ #if DEBUG
+diff --git a/gcc/testsuite/gcc.dg/graphite/pr46185.c b/gcc/testsuite/gcc.dg/graphite/pr46185.c
+index 36d46a4..738c9a8 100644
+--- a/gcc/testsuite/gcc.dg/graphite/pr46185.c
++++ b/gcc/testsuite/gcc.dg/graphite/pr46185.c
+@@ -1,5 +1,7 @@
+ /* { dg-do run } */
++/* { dg-require-effective-target nds32_full_addr_space { target nds32*-*-elf* } } */
+ /* { dg-options "-O2 -floop-interchange -ffast-math -fno-ipa-cp" } */
++/* { dg-additional-options "-mcmodel=large" { target nds32*-*-elf* } } */
+ 
+ #define DEBUG 0
+ #if DEBUG
+diff --git a/gcc/testsuite/gcc.dg/graphite/uns-interchange-15.c b/gcc/testsuite/gcc.dg/graphite/uns-interchange-15.c
+index fe2669f..dd77aa3 100644
+--- a/gcc/testsuite/gcc.dg/graphite/uns-interchange-15.c
++++ b/gcc/testsuite/gcc.dg/graphite/uns-interchange-15.c
+@@ -1,4 +1,6 @@
+ /* { dg-require-effective-target size32plus } */
++/* { dg-require-effective-target nds32_full_addr_space { target nds32*-*-elf* } } */
++/* { dg-additional-options "-mcmodel=large" { target nds32*-*-elf* } } */
+ 
+ #define DEBUG 0
+ #if DEBUG
+diff --git a/gcc/testsuite/gcc.dg/graphite/uns-interchange-mvt.c b/gcc/testsuite/gcc.dg/graphite/uns-interchange-mvt.c
+index 211c9ab..c7defb4 100644
+--- a/gcc/testsuite/gcc.dg/graphite/uns-interchange-mvt.c
++++ b/gcc/testsuite/gcc.dg/graphite/uns-interchange-mvt.c
+@@ -1,4 +1,6 @@
+ /* { dg-require-effective-target size32plus } */
++/* { dg-require-effective-target nds32_full_addr_space { target nds32*-*-elf* } } */
++/* { dg-additional-options "-mcmodel=large" { target nds32*-*-elf* } } */
+ 
+ #define DEBUG 0
+ #if DEBUG
+diff --git a/gcc/testsuite/gcc.dg/initpri1.c b/gcc/testsuite/gcc.dg/initpri1.c
+index 794ea2b..10b3a24 100644
+--- a/gcc/testsuite/gcc.dg/initpri1.c
++++ b/gcc/testsuite/gcc.dg/initpri1.c
+@@ -1,4 +1,5 @@
+ /* { dg-do run { target init_priority } } */
++/* { dg-options "-mctor-dtor" { target { nds32*-*-* } } } */
+ 
+ extern void abort ();
+ 
+diff --git a/gcc/testsuite/gcc.dg/initpri2.c b/gcc/testsuite/gcc.dg/initpri2.c
+index fa9fda0..1418411 100644
+--- a/gcc/testsuite/gcc.dg/initpri2.c
++++ b/gcc/testsuite/gcc.dg/initpri2.c
+@@ -1,4 +1,5 @@
+ /* { dg-do compile { target init_priority } } */
++/* { dg-options "-mctor-dtor" { target { nds32*-*-* } } } */
+ 
+ /* Priorities must be in the range [0, 65535].  */
+ void c1()
+diff --git a/gcc/testsuite/gcc.dg/initpri3.c b/gcc/testsuite/gcc.dg/initpri3.c
+index 1633da0..e1b8cf6 100644
+--- a/gcc/testsuite/gcc.dg/initpri3.c
++++ b/gcc/testsuite/gcc.dg/initpri3.c
+@@ -1,6 +1,7 @@
+ /* { dg-do run { target init_priority } } */
+ /* { dg-require-effective-target lto } */
+ /* { dg-options "-flto -O3" } */
++/* { dg-options "-flto -O3 -mctor-dtor" { target { nds32*-*-* } } } */
+ 
+ extern void abort ();
+ 
+diff --git a/gcc/testsuite/gcc.dg/ipa/ipa-sra-1.c b/gcc/testsuite/gcc.dg/ipa/ipa-sra-1.c
+index 4db904b..2290d8b 100644
+--- a/gcc/testsuite/gcc.dg/ipa/ipa-sra-1.c
++++ b/gcc/testsuite/gcc.dg/ipa/ipa-sra-1.c
+@@ -1,5 +1,6 @@
+ /* { dg-do run } */
+ /* { dg-options "-O2 -fipa-sra -fdump-tree-eipa_sra-details"  } */
++/* { dg-additional-options "-u_printf_float -u_scanf_float" { target nds32*-*-* } } */
+ 
+ struct bovid
+ {
+diff --git a/gcc/testsuite/gcc.dg/lower-subreg-1.c b/gcc/testsuite/gcc.dg/lower-subreg-1.c
+index 47057fe..25439b1 100644
+--- a/gcc/testsuite/gcc.dg/lower-subreg-1.c
++++ b/gcc/testsuite/gcc.dg/lower-subreg-1.c
+@@ -1,4 +1,4 @@
+-/* { dg-do compile { target { ! { mips64 || { aarch64*-*-* arm*-*-* ia64-*-* sparc*-*-* spu-*-* tilegx-*-* } } } } } */
++/* { dg-do compile { target { ! { mips64 || { aarch64*-*-* arm*-*-* ia64-*-* sparc*-*-* spu-*-* tilegx-*-* nds32*-*-* } } } } } */
+ /* { dg-options "-O -fdump-rtl-subreg1" } */
+ /* { dg-additional-options "-mno-stv" { target ia32 } } */
+ /* { dg-skip-if "" { { i?86-*-* x86_64-*-* } && x32 } { "*" } { "" } } */
+diff --git a/gcc/testsuite/gcc.dg/pr28796-2.c b/gcc/testsuite/gcc.dg/pr28796-2.c
+index f56a5d4..fff71bc 100644
+--- a/gcc/testsuite/gcc.dg/pr28796-2.c
++++ b/gcc/testsuite/gcc.dg/pr28796-2.c
+@@ -2,6 +2,7 @@
+ /* { dg-options "-O2 -funsafe-math-optimizations -fno-finite-math-only -DUNSAFE" } */
+ /* { dg-add-options ieee } */
+ /* { dg-skip-if "No Inf/NaN support" { spu-*-* } } */
++/* { dg-skip-if "No Denormmalized support" { nds32_ext_fpu } } */
+ 
+ #include "tg-tests.h"
+ 
+diff --git a/gcc/testsuite/gcc.dg/sibcall-10.c b/gcc/testsuite/gcc.dg/sibcall-10.c
+index d98b43a..bb0e24c 100644
+--- a/gcc/testsuite/gcc.dg/sibcall-10.c
++++ b/gcc/testsuite/gcc.dg/sibcall-10.c
+@@ -5,7 +5,7 @@
+    Copyright (C) 2002 Free Software Foundation Inc.
+    Contributed by Hans-Peter Nilsson  <hp@bitrange.com>  */
+ 
+-/* { dg-do run { xfail { { cris-*-* crisv32-*-* h8300-*-* hppa*64*-*-* m32r-*-* mcore-*-* mn10300-*-* msp430*-*-* nds32*-*-* xstormy16-*-* v850*-*-* vax-*-* xtensa*-*-* } || { arm*-*-* && { ! arm32 } } } } } */
++/* { dg-do run { xfail { { cris-*-* crisv32-*-* h8300-*-* hppa*64*-*-* m32r-*-* mcore-*-* mn10300-*-* msp430*-*-* xstormy16-*-* v850*-*-* vax-*-* xtensa*-*-* } || { arm*-*-* && { ! arm32 } } } } } */
+ /* -mlongcall disables sibcall patterns.  */
+ /* { dg-skip-if "" { powerpc*-*-* } { "-mlongcall" } { "" } } */
+ /* { dg-options "-O2 -foptimize-sibling-calls" } */
+diff --git a/gcc/testsuite/gcc.dg/sibcall-3.c b/gcc/testsuite/gcc.dg/sibcall-3.c
+index eafe8dd..f188a18 100644
+--- a/gcc/testsuite/gcc.dg/sibcall-3.c
++++ b/gcc/testsuite/gcc.dg/sibcall-3.c
+@@ -5,7 +5,7 @@
+    Copyright (C) 2002 Free Software Foundation Inc.
+    Contributed by Hans-Peter Nilsson  <hp@bitrange.com>  */
+ 
+-/* { dg-do run { xfail { { cris-*-* crisv32-*-* h8300-*-* hppa*64*-*-* m32r-*-* mcore-*-* mn10300-*-* msp430*-*-* nds32*-*-* xstormy16-*-* v850*-*-* vax-*-* xtensa*-*-* } || { arm*-*-* && { ! arm32 } } } } } */
++/* { dg-do run { xfail { { cris-*-* crisv32-*-* h8300-*-* hppa*64*-*-* m32r-*-* mcore-*-* mn10300-*-* msp430*-*-* xstormy16-*-* v850*-*-* vax-*-* xtensa*-*-* } || { arm*-*-* && { ! arm32 } } } } } */
+ /* -mlongcall disables sibcall patterns.  */
+ /* { dg-skip-if "" { powerpc*-*-* } { "-mlongcall" } { "" } } */
+ /* { dg-options "-O2 -foptimize-sibling-calls" } */
+diff --git a/gcc/testsuite/gcc.dg/sibcall-4.c b/gcc/testsuite/gcc.dg/sibcall-4.c
+index 1e039c6..a8c844a 100644
+--- a/gcc/testsuite/gcc.dg/sibcall-4.c
++++ b/gcc/testsuite/gcc.dg/sibcall-4.c
+@@ -5,7 +5,7 @@
+    Copyright (C) 2002 Free Software Foundation Inc.
+    Contributed by Hans-Peter Nilsson  <hp@bitrange.com>  */
+ 
+-/* { dg-do run { xfail { { cris-*-* crisv32-*-* h8300-*-* hppa*64*-*-* m32r-*-* mcore-*-* mn10300-*-* msp430*-*-* nds32*-*-* xstormy16-*-* v850*-*-* vax-*-* xtensa*-*-* } || { arm*-*-* && { ! arm32 } } } } } */
++/* { dg-do run { xfail { { cris-*-* crisv32-*-* h8300-*-* hppa*64*-*-* m32r-*-* mcore-*-* mn10300-*-* msp430*-*-* xstormy16-*-* v850*-*-* vax-*-* xtensa*-*-* } || { arm*-*-* && { ! arm32 } } } } } */
+ /* -mlongcall disables sibcall patterns.  */
+ /* { dg-skip-if "" { powerpc*-*-* } { "-mlongcall" } { "" } } */
+ /* { dg-options "-O2 -foptimize-sibling-calls" } */
+diff --git a/gcc/testsuite/gcc.dg/sibcall-9.c b/gcc/testsuite/gcc.dg/sibcall-9.c
+index 34e7053..71c3251 100644
+--- a/gcc/testsuite/gcc.dg/sibcall-9.c
++++ b/gcc/testsuite/gcc.dg/sibcall-9.c
+@@ -5,7 +5,7 @@
+    Copyright (C) 2002 Free Software Foundation Inc.
+    Contributed by Hans-Peter Nilsson  <hp@bitrange.com>  */
+ 
+-/* { dg-do run { xfail { { cris-*-* crisv32-*-* h8300-*-* hppa*64*-*-* m32r-*-* mcore-*-* mn10300-*-* msp430*-*-* nds32*-*-* nvptx-*-* xstormy16-*-* v850*-*-* vax-*-* xtensa*-*-* } || { arm*-*-* && { ! arm32 } } } } } */
++/* { dg-do run { xfail { { cris-*-* crisv32-*-* h8300-*-* hppa*64*-*-* m32r-*-* mcore-*-* mn10300-*-* msp430*-*-* nvptx-*-* xstormy16-*-* v850*-*-* vax-*-* xtensa*-*-* } || { arm*-*-* && { ! arm32 } } } } } */
+ /* -mlongcall disables sibcall patterns.  */
+ /* { dg-skip-if "" { powerpc*-*-* } { "-mlongcall" } { "" } } */
+ /* { dg-options "-O2 -foptimize-sibling-calls" } */
+diff --git a/gcc/testsuite/gcc.dg/stack-usage-1.c b/gcc/testsuite/gcc.dg/stack-usage-1.c
+index 7864c6a..c768ca2 100644
+--- a/gcc/testsuite/gcc.dg/stack-usage-1.c
++++ b/gcc/testsuite/gcc.dg/stack-usage-1.c
+@@ -2,6 +2,7 @@
+ /* { dg-options "-fstack-usage" } */
+ /* nvptx doesn't have a reg allocator, and hence no stack usage data.  */
+ /* { dg-skip-if "" { nvptx-*-* } { "*" } { "" } } */
++/* { dg-options "-fstack-usage -fno-omit-frame-pointer" { target { nds32*-*-* } } } */
+ 
+ /* This is aimed at testing basic support for -fstack-usage in the back-ends.
+    See the SPARC back-end for example (grep flag_stack_usage_info in sparc.c).
+diff --git a/gcc/testsuite/gcc.dg/torture/type-generic-1.c b/gcc/testsuite/gcc.dg/torture/type-generic-1.c
+index 3897818..6815e8b 100644
+--- a/gcc/testsuite/gcc.dg/torture/type-generic-1.c
++++ b/gcc/testsuite/gcc.dg/torture/type-generic-1.c
+@@ -3,6 +3,7 @@
+ 
+ /* { dg-do run } */
+ /* { dg-skip-if "No Inf/NaN support" { spu-*-* } } */
++/* { dg-skip-if "No Denormmalized support" { nds32_ext_fpu } } */
+ /* { dg-options "-DUNSAFE" { target tic6x*-*-* visium-*-* } } */
+ /* { dg-add-options ieee } */
+ 
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c
+index 1a4bfe6..78c948a 100644
+--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c
++++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c
+@@ -25,4 +25,4 @@ foo ()
+    but the loop reads only one element at a time, and DOM cannot resolve these.
+    The same happens on powerpc depending on the SIMD support available.  */
+ 
+-/* { dg-final { scan-tree-dump "return 28;" "optimized" { xfail { { alpha*-*-* hppa*64*-*-* powerpc64*-*-* } || { sparc*-*-* && lp64 } } } } } */
++/* { dg-final { scan-tree-dump "return 28;" "optimized" { xfail { { alpha*-*-* hppa*64*-*-* powerpc64*-*-* nds32*-*-*} || { sparc*-*-* && lp64 } } } } } */
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/vrp88.c b/gcc/testsuite/gcc.dg/tree-ssa/vrp88.c
+index f70b311..8a1081c 100644
+--- a/gcc/testsuite/gcc.dg/tree-ssa/vrp88.c
++++ b/gcc/testsuite/gcc.dg/tree-ssa/vrp88.c
+@@ -33,6 +33,6 @@ bitmap_single_bit_set_p (const_bitmap a)
+ }
+ 
+ /* Verify that VRP simplified an "if" statement.  */
+-/* { dg-final { scan-tree-dump "Folded into: if.*" "vrp1"} } */
++/* { dg-final { scan-tree-dump "Folded into: if.*" "vrp1" { xfail *-*-* } } } */
+ 
+ 
+diff --git a/gcc/testsuite/gcc.target/nds32/basic-main.c b/gcc/testsuite/gcc.target/nds32/basic-main.c
+index 6fdbc35..7341fb5 100644
+--- a/gcc/testsuite/gcc.target/nds32/basic-main.c
++++ b/gcc/testsuite/gcc.target/nds32/basic-main.c
+@@ -1,9 +1,10 @@
+ /* This is a basic main function test program.  */
+ 
+-/* { dg-do run }  */
+-/* { dg-options "-O0" }  */
++/* { dg-do run } */
++/* { dg-options "-O0" } */
+ 
+-int main(void)
++int
++main (void)
+ {
+   return 0;
+ }
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-PE1-abs.c b/gcc/testsuite/gcc.target/nds32/builtin-PE1-abs.c
+new file mode 100644
+index 0000000..8cadcfd
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-PE1-abs.c
+@@ -0,0 +1,20 @@
++/* This is a test program for abs instruction.  */
++
++/* { dg-do run } */
++/* { dg-options "-O1" } */
++/* { dg-require-effective-target nds32_ext_perf } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++int
++main ()
++{
++  int a = -4;
++  int abs = __nds32__abs (a);
++
++  if (abs != 4)
++    abort ();
++  else
++    exit (0);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-PE1-ave.c b/gcc/testsuite/gcc.target/nds32/builtin-PE1-ave.c
+new file mode 100644
+index 0000000..d2c87db
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-PE1-ave.c
+@@ -0,0 +1,21 @@
++/* This is a test program for ave instruction.  */
++
++/* { dg-do run } */
++/* { dg-options "-O1" } */
++/* { dg-require-effective-target nds32_ext_perf } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++int
++main ()
++{
++  int a = 4;
++  int b = 2;
++  int ave = __nds32__ave (a, b);
++
++  if (ave != 3)
++    abort ();
++  else
++    exit (0);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-PE1-bclr.c b/gcc/testsuite/gcc.target/nds32/builtin-PE1-bclr.c
+new file mode 100644
+index 0000000..0e6c1e0
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-PE1-bclr.c
+@@ -0,0 +1,20 @@
++/* This is a test program for bclr instruction.  */
++
++/* { dg-do run } */
++/* { dg-options "-O1" } */
++/* { dg-require-effective-target nds32_ext_perf } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++int
++main ()
++{
++  int a = 1;
++  int c = __nds32__bclr (a, 0);
++
++  if (c != 0)
++    abort ();
++  else
++    exit (0);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-PE1-bset.c b/gcc/testsuite/gcc.target/nds32/builtin-PE1-bset.c
+new file mode 100644
+index 0000000..1bd8513
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-PE1-bset.c
+@@ -0,0 +1,20 @@
++/* This is a test program for bset instruction.  */
++
++/* { dg-do run } */
++/* { dg-options "-O1" } */
++/* { dg-require-effective-target nds32_ext_perf } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++int
++main ()
++{
++  int c = 0;
++  c = __nds32__bset (c, 0);
++
++  if (c != 1)
++    abort ();
++  else
++    exit (0);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-PE1-btgl.c b/gcc/testsuite/gcc.target/nds32/builtin-PE1-btgl.c
+new file mode 100644
+index 0000000..a1dbc00
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-PE1-btgl.c
+@@ -0,0 +1,20 @@
++/* This is a test program for btgl instruction.  */
++
++/* { dg-do run } */
++/* { dg-options "-O1" } */
++/* { dg-require-effective-target nds32_ext_perf } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++int
++main ()
++{
++  int a = 1;
++  int c = __nds32__btgl (1, 0);
++
++  if (c != 0)
++    abort ();
++  else
++    exit (0);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-PE1-btst.c b/gcc/testsuite/gcc.target/nds32/builtin-PE1-btst.c
+new file mode 100644
+index 0000000..c001f94
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-PE1-btst.c
+@@ -0,0 +1,20 @@
++/* This is a test program for btst instruction.  */
++
++/* { dg-do run } */
++/* { dg-options "-O1" } */
++/* { dg-require-effective-target nds32_ext_perf } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++int
++main ()
++{
++  int c = 1;
++  c = __nds32__btst (c, 0);
++
++  if (c != 1)
++    abort ();
++  else
++    exit (0);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-PE1-clip.c b/gcc/testsuite/gcc.target/nds32/builtin-PE1-clip.c
+new file mode 100644
+index 0000000..d63b298
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-PE1-clip.c
+@@ -0,0 +1,20 @@
++/* This is a test program for clip instruction.  */
++
++/* { dg-do run } */
++/* { dg-options "-O1" } */
++/* { dg-require-effective-target nds32_ext_perf } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++int
++main ()
++{
++  int c = 33;
++  c = __nds32__clip (c, 5);
++
++  if (c != 31)
++    abort ();
++  else
++    exit (0);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-PE1-clips.c b/gcc/testsuite/gcc.target/nds32/builtin-PE1-clips.c
+new file mode 100644
+index 0000000..3e3f663
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-PE1-clips.c
+@@ -0,0 +1,20 @@
++/* This is a test program for clips instruction.  */
++
++/* { dg-do run } */
++/* { dg-options "-O1" } */
++/* { dg-require-effective-target nds32_ext_perf } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++int
++main ()
++{
++  int a = -33;
++  int c = __nds32__clips (a, 5);
++
++  if (c != -32)
++    abort ();
++  else
++    exit (0);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-PE1-clo.c b/gcc/testsuite/gcc.target/nds32/builtin-PE1-clo.c
+new file mode 100644
+index 0000000..d672a33
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-PE1-clo.c
+@@ -0,0 +1,20 @@
++/* This is a test program for clo instruction.  */
++
++/* { dg-do run } */
++/* { dg-options "-O1" } */
++/* { dg-require-effective-target nds32_ext_perf } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++int
++main ()
++{
++  int c = 0xFFFF0000;
++  c =  __nds32__clo (c);
++
++  if (c != 16)
++    abort ();
++  else
++    exit (0);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-PE1-clz.c b/gcc/testsuite/gcc.target/nds32/builtin-PE1-clz.c
+new file mode 100644
+index 0000000..17e6318
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-PE1-clz.c
+@@ -0,0 +1,20 @@
++/* This is a test program for clz instruction.  */
++
++/* { dg-do run } */
++/* { dg-options "-O1" } */
++/* { dg-require-effective-target nds32_ext_perf } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++int
++main ()
++{
++  int c = 0x0000FFFF;
++  c =  __nds32__clz (c);
++
++  if (c != 16)
++    abort ();
++  else
++    exit (0);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-PE2-bse.c b/gcc/testsuite/gcc.target/nds32/builtin-PE2-bse.c
+new file mode 100644
+index 0000000..c769fea
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-PE2-bse.c
+@@ -0,0 +1,28 @@
++/* This is a test program for bse instruction.  */
++
++/* { dg-do run } */
++/* { dg-options "-O1" } */
++/* { dg-require-effective-target nds32_ext_perf2 } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++int
++main ()
++{
++  unsigned int a = 0xF0F0F0F0;
++  unsigned int b = 0x00000300;
++  unsigned int r = 0;
++
++  unsigned int verify_b = 0x00000300;
++  unsigned int verify_r = 0;
++
++  __nds32__bse (&r, a, &b);
++  a = 0xF0F0F0F0;
++  asm volatile ("bse %0, %2, %1": "+&r" (verify_r), "+&r" (verify_b) : "r" (a));
++
++  if ((verify_b == b) && (verify_r == r))
++    exit (0);
++  else
++    abort ();
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-PE2-bsp.c b/gcc/testsuite/gcc.target/nds32/builtin-PE2-bsp.c
+new file mode 100644
+index 0000000..d798719
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-PE2-bsp.c
+@@ -0,0 +1,26 @@
++/* This is a test program for bsp instruction.  */
++
++/* { dg-do run } */
++/* { dg-options "-O1" } */
++/* { dg-require-effective-target nds32_ext_perf2 } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++int
++main ()
++{
++  unsigned int a = 0x0000000F;
++  unsigned int b = 0x00000300;
++  unsigned int r = 0;
++  unsigned int verify_b = 0x00000300;
++  unsigned int verify_r = 0;
++
++  __nds32__bsp (&r, a, &b);
++  asm volatile ("bsp %0, %2, %1": "+&r" (verify_r), "+&r" (verify_b) : "r" (a));
++
++  if ((verify_b == b) && (verify_r == r))
++    exit (0);
++  else
++    abort ();
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-PE2-pbsad.c b/gcc/testsuite/gcc.target/nds32/builtin-PE2-pbsad.c
+new file mode 100644
+index 0000000..bc4fe42
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-PE2-pbsad.c
+@@ -0,0 +1,21 @@
++/* This is a test program for pbsad instruction.  */
++
++/* { dg-do run } */
++/* { dg-options "-O1" } */
++/* { dg-require-effective-target nds32_ext_perf2 } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++int
++main ()
++{
++  unsigned int a = 0x09070605;
++  unsigned int b = 0x04020301;
++  unsigned int r = __nds32__pbsad (a, b);
++
++  if (r != 17)
++    abort ();
++  else
++    exit (0);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-PE2-pbsada.c b/gcc/testsuite/gcc.target/nds32/builtin-PE2-pbsada.c
+new file mode 100644
+index 0000000..6ed1b08
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-PE2-pbsada.c
+@@ -0,0 +1,23 @@
++/* This is a test program for pbsada instruction.  */
++
++/* { dg-do run } */
++/* { dg-options "-O1" } */
++/* { dg-require-effective-target nds32_ext_perf2 } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++int
++main ()
++{
++  unsigned int a = 0x09070605;
++  unsigned int b = 0x04020301;
++  unsigned int r = 1;
++
++  r = __nds32__pbsada(r, a, b);
++
++  if (r != 18)
++    abort ();
++  else
++    exit (0);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-add16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-add16.c
+new file mode 100644
+index 0000000..0eec324
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-add16.c
+@@ -0,0 +1,49 @@
++/* This is a test program for add16 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int add16 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__add16 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++uint16x2_t v_uadd16 (uint16x2_t ra, uint16x2_t rb)
++{
++  return __nds32__v_uadd16 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++int16x2_t v_sadd16 (int16x2_t ra, int16x2_t rb)
++{
++  return __nds32__v_sadd16 (ra, rb);
++}
++
++int
++main ()
++{
++  unsigned int a = add16 (0x0001f000, 0x00011000);
++  uint16x2_t v_ua = v_uadd16 ((uint16x2_t) {0xf000, 0xf000},
++			      (uint16x2_t) {0x1000, 0x2000});
++  int16x2_t v_sa = v_sadd16 ((int16x2_t) {0xf777, 0xf111},
++			     (int16x2_t) {0x1000, 0x2000});
++
++  if (a != 0x00020000)
++    abort ();
++  else if (v_ua[0] != 0x0000
++	   || v_ua[1] != 0x1000)
++    abort ();
++  else if (v_sa[0] != 0x0777
++	   || v_sa[1] != 0x1111)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-add64.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-add64.c
+new file mode 100644
+index 0000000..b761b7f
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-add64.c
+@@ -0,0 +1,36 @@
++/* This is a test program for add64 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++long long sadd64 (long long ra, long long rb)
++{
++  return __nds32__sadd64 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++unsigned long long uadd64 (unsigned long long ra, unsigned long long rb)
++{
++  return __nds32__uadd64 (ra, rb);
++}
++
++int
++main ()
++{
++  long long sa = sadd64 (0x1122334400000000ll, 0x55667788ll);
++  unsigned long long ua = uadd64 (0xffff00000000ull, 0x55667788ull);
++
++  if (sa != 0x1122334455667788ll)
++    abort ();
++  else if (ua != 0xffff55667788ull)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-add8.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-add8.c
+new file mode 100644
+index 0000000..77e686c
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-add8.c
+@@ -0,0 +1,53 @@
++/* This is a test program for add8 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int add8 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__add8 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++uint8x4_t v_uadd8 (uint8x4_t ra, uint8x4_t rb)
++{
++  return __nds32__v_uadd8 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++int8x4_t v_sadd8 (int8x4_t ra, int8x4_t rb)
++{
++  return __nds32__v_sadd8 (ra, rb);
++}
++
++int
++main ()
++{
++  unsigned int a = add8 (0x11223344, 0x55667788);
++  uint8x4_t v_ua = v_uadd8 ((uint8x4_t) {0xff, 0xee, 0xdd, 0xcc},
++			    (uint8x4_t) {0x1, 0xee, 0xdd, 0xcc});
++  int8x4_t v_sa = v_sadd8 ((int8x4_t) {0x80, 0x7f, 0xbb, 0xaa},
++			   (int8x4_t) {0x80, 0x7f, 0xbb, 0xaa});
++
++  if (a != 0x6688aacc)
++    abort ();
++  else if (v_ua[0] != 0
++	   || v_ua[1] != 0xdc
++	   || v_ua[2] != 0xba
++	   || v_ua[3] != 0x98)
++    abort ();
++  else if (v_sa[0] != 0
++	   || v_sa[1] != (char) 0xfe
++	   || v_sa[2] != 0x76
++	   || v_sa[3] != 0x54)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-bitrev.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-bitrev.c
+new file mode 100644
+index 0000000..2c8c297
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-bitrev.c
+@@ -0,0 +1,27 @@
++/* This is a test program for bitrev instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int bitrev (unsigned int ra, unsigned int rb)
++{
++  return __nds32__bitrev (ra, rb);
++}
++
++int
++main ()
++{
++  unsigned int a = bitrev (0xd, 1);
++
++  if (a != 0x2)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-bpick.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-bpick.c
+new file mode 100644
+index 0000000..78893cb
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-bpick.c
+@@ -0,0 +1,27 @@
++/* This is a test program for bpick instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int bpick (unsigned int ra, unsigned int rb, unsigned int rc)
++{
++  return __nds32__bpick (ra, rb, rc);
++}
++
++int
++main ()
++{
++  unsigned int a = bpick (0x11223344, 0x11332244, 0);
++
++  if (a != 0x11332244)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-cmpeq16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-cmpeq16.c
+new file mode 100644
+index 0000000..c37abf4
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-cmpeq16.c
+@@ -0,0 +1,49 @@
++/* This is a test program for cmpeq16 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int cmpeq16 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__cmpeq16 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++uint16x2_t v_scmpeq16 (int16x2_t ra, int16x2_t rb)
++{
++  return __nds32__v_scmpeq16 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++uint16x2_t v_ucmpeq16 (uint16x2_t ra, uint16x2_t rb)
++{
++  return __nds32__v_ucmpeq16 (ra, rb);
++}
++
++int
++main ()
++{
++  unsigned int a = cmpeq16 (0xffff0000, 0xffff0001);
++  uint16x2_t v_sa = v_scmpeq16 ((int16x2_t) {0x7fff, 0x8000},
++				(int16x2_t) {0x8000, 0x8000});
++  uint16x2_t v_ua = v_ucmpeq16 ((uint16x2_t) {0x7fff, 0x8000},
++				(uint16x2_t) {0x8000, 0x8000});
++
++  if (a != 0xffff0000)
++    abort ();
++  else if (v_sa[0] != 0
++	   || v_sa[1] != 0xffff)
++    abort ();
++  else if (v_ua[0] != 0
++	   || v_ua[1] != 0xffff)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-cmpeq8.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-cmpeq8.c
+new file mode 100644
+index 0000000..a692dac
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-cmpeq8.c
+@@ -0,0 +1,53 @@
++/* This is a test program for cmpeq8 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int cmpeq8 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__cmpeq8 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++uint8x4_t v_scmpeq8 (int8x4_t ra, int8x4_t rb)
++{
++  return __nds32__v_scmpeq8 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++uint8x4_t v_ucmpeq8 (uint8x4_t ra, uint8x4_t rb)
++{
++  return __nds32__v_ucmpeq8 (ra, rb);
++}
++
++int
++main ()
++{
++  unsigned int a = cmpeq8 (0xffff0000, 0xffff0101);
++  uint8x4_t v_sa = v_scmpeq8 ((int8x4_t) { 0x7f, 0x7f, 0x01, 0x01},
++			      (int8x4_t) { 0x7f, 0x7f, 0x00, 0x00});
++  uint8x4_t v_ua = v_ucmpeq8 ((uint8x4_t) { 0x7f, 0x7f, 0x01, 0x01},
++			      (uint8x4_t) { 0x7f, 0x7f, 0x00, 0x00});
++
++  if (a != 0xffff0000)
++    abort ();
++  else if (v_sa[0] != 0xff
++           || v_sa[1] != 0xff
++           || v_sa[2] != 0
++	   || v_sa[3] != 0)
++    abort ();
++  else if (v_ua[0] != 0xff
++           || v_ua[1] != 0xff
++           || v_ua[2] != 0
++	   || v_ua[3] != 0)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-cras16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-cras16.c
+new file mode 100644
+index 0000000..7d6da46
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-cras16.c
+@@ -0,0 +1,58 @@
++/* This is a test program for cras16 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int cras16 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__cras16 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++uint16x2_t v_ucras16 (uint16x2_t ra, uint16x2_t rb)
++{
++  return __nds32__v_ucras16 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++int16x2_t v_scras16 (int16x2_t ra, int16x2_t rb)
++{
++  return __nds32__v_scras16 (ra, rb);
++}
++
++int
++main ()
++{
++
++#ifdef __NDS32_EL__
++  uint16x2_t v_ua_p = {1, 0};
++  int16x2_t v_sa_p = {0x1000, 0x111};
++#else
++  uint16x2_t v_ua_p = {0x2469, 0xe000};
++  int16x2_t v_sa_p = {0x3000, 0xe111};
++#endif
++
++  unsigned int a = cras16 (0x0001f000, 0x0001f000);
++  uint16x2_t v_ua = v_ucras16 ((uint16x2_t) {0x1235, 0xf000},
++			       (uint16x2_t) {0x1000, 0x1234});
++  int16x2_t v_sa = v_scras16 ((int16x2_t) {0x2000, 0xf111},
++			      (int16x2_t) {0x1000, 0x1000});
++
++  if (a != 0xf001efff)
++    abort ();
++  else if (v_ua[0] != v_ua_p[0]
++	   || v_ua[1] != v_ua_p[1])
++    abort ();
++  else if (v_sa[0] != v_sa_p[0]
++	   || v_sa[1] != v_sa_p[1])
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-crsa16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-crsa16.c
+new file mode 100644
+index 0000000..de99c3a
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-crsa16.c
+@@ -0,0 +1,57 @@
++/* This is a test program for crsa16 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int crsa16 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__crsa16 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++uint16x2_t v_ucrsa16 (uint16x2_t ra, uint16x2_t rb)
++{
++  return __nds32__v_ucrsa16 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++int16x2_t v_scrsa16 (int16x2_t ra, int16x2_t rb)
++{
++  return __nds32__v_scrsa16 (ra, rb);
++}
++
++int
++main ()
++{
++#ifdef __NDS32_EL__
++  uint16x2_t v_ua_p = {0x2469, 0xe000};
++  int16x2_t v_sa_p = {0x3000, 0x110};
++#else
++  uint16x2_t v_ua_p = {1, 0};
++  int16x2_t v_sa_p = {0x1000, 0x112};
++#endif
++
++  unsigned int a = crsa16 (0x0001f000, 0x0001f000);
++  uint16x2_t v_ua = v_ucrsa16 ((uint16x2_t) {0x1235, 0xf000},
++			       (uint16x2_t) {0x1000, 0x1234});
++  int16x2_t v_sa = v_scrsa16 ((int16x2_t) {0x2000, 0x0111},
++			      (int16x2_t) {0x0001, 0x1000});
++
++  if (a != 0x1001f001)
++    abort ();
++  else if (v_ua[0] != v_ua_p[0]
++	   || v_ua[1] != v_ua_p[1])
++    abort ();
++  else if (v_sa[0] != v_sa_p[0]
++	   || v_sa[1] != v_sa_p[1])
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-insb.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-insb.c
+new file mode 100644
+index 0000000..ebd0348
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-insb.c
+@@ -0,0 +1,27 @@
++/* This is a test program for insb instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int insb (unsigned int ra, unsigned int rb)
++{
++  return __nds32__insb (ra, rb, 1);
++}
++
++int
++main ()
++{
++  unsigned int a = insb (0x11220044, 0x33);
++
++  if (a != 0x11223344)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-pkbb16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-pkbb16.c
+new file mode 100644
+index 0000000..23d92e9
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-pkbb16.c
+@@ -0,0 +1,44 @@
++/* This is a test program for pkbb16 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int pkbb16 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__pkbb16 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++uint16x2_t v_pkbb16 (uint16x2_t ra, uint16x2_t rb)
++{
++  return __nds32__v_pkbb16 (ra, rb);
++}
++
++int
++main ()
++{
++#ifdef __NDS32_EL__
++  uint16x2_t va_p = {0xcccc, 0xaaaa};
++#else
++  uint16x2_t va_p = {0xbbbb, 0xdddd};
++#endif
++
++  unsigned int a = pkbb16 (0x11223344, 0x55667788);
++  uint16x2_t va = v_pkbb16 ((uint16x2_t) {0xaaaa, 0xbbbb},
++			    (uint16x2_t) {0xcccc, 0xdddd});
++
++  if (a != 0x33447788)
++    abort ();
++  else if (va[0] != va_p[0]
++	   || va[1] != va_p[1])
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-pkbt16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-pkbt16.c
+new file mode 100644
+index 0000000..6c34420
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-pkbt16.c
+@@ -0,0 +1,44 @@
++/* This is a test program for pkbt16 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int pkbt16 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__pkbt16 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++uint16x2_t v_pkbt16 (uint16x2_t ra, uint16x2_t rb)
++{
++  return __nds32__v_pkbt16 (ra, rb);
++}
++
++int
++main ()
++{
++#ifdef __NDS32_EL__
++  uint16x2_t va_p = {0xdddd, 0xaaaa};
++#else
++  uint16x2_t va_p = {0xbbbb, 0xcccc};
++#endif
++
++  unsigned int a = pkbt16 (0x11223344, 0x55667788);
++  uint16x2_t va = v_pkbt16 ((uint16x2_t) {0xaaaa, 0xbbbb},
++			    (uint16x2_t) {0xcccc, 0xdddd});
++
++  if (a != 0x33445566)
++    abort ();
++  else if (va[0] != va_p[0]
++	   || va[1] != va_p[1])
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-pktb16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-pktb16.c
+new file mode 100644
+index 0000000..0aab5df
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-pktb16.c
+@@ -0,0 +1,44 @@
++/* This is a test program for pktb16 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int pktb16 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__pktb16 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++uint16x2_t v_pktb16 (uint16x2_t ra, uint16x2_t rb)
++{
++  return __nds32__v_pktb16 (ra, rb);
++}
++
++int
++main ()
++{
++#ifdef __NDS32_EL__
++  uint16x2_t va_p = {0xcccc, 0xbbbb};
++#else
++  uint16x2_t va_p = {0xaaaa, 0xdddd};
++#endif
++
++  unsigned int a = pktb16 (0x11223344, 0x55667788);
++  uint16x2_t va = v_pktb16 ((uint16x2_t) {0xaaaa, 0xbbbb},
++			    (uint16x2_t) {0xcccc, 0xdddd});
++
++  if (a != 0x11227788)
++    abort ();
++  else if (va[0] != va_p[0]
++	   || va[1] != va_p[1])
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-pktt16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-pktt16.c
+new file mode 100644
+index 0000000..745cde5
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-pktt16.c
+@@ -0,0 +1,44 @@
++/* This is a test program for pktt16 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int pktt16 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__pktt16 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++uint16x2_t v_pktt16 (uint16x2_t ra, uint16x2_t rb)
++{
++  return __nds32__v_pktt16 (ra, rb);
++}
++
++int
++main ()
++{
++#ifdef __NDS32_EL__
++  uint16x2_t va_p = {0xdddd, 0xbbbb};
++#else
++  uint16x2_t va_p = {0xaaaa, 0xcccc};
++#endif
++
++  unsigned int a = pktt16 (0x11223344, 0x55667788);
++  uint16x2_t va = v_pktt16 ((uint16x2_t) {0xaaaa, 0xbbbb},
++			    (uint16x2_t) {0xcccc, 0xdddd});
++
++  if (a != 0x11225566)
++    abort ();
++  else if (va[0] != va_p[0]
++	   || va[1] != va_p[1])
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-radd16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-radd16.c
+new file mode 100644
+index 0000000..5271b41
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-radd16.c
+@@ -0,0 +1,38 @@
++/* This is a test program for radd16 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int radd16 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__radd16 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++int16x2_t v_radd16 (int16x2_t ra, int16x2_t rb)
++{
++  return __nds32__v_radd16 (ra, rb);
++}
++
++int
++main ()
++{
++  unsigned int a = radd16 (0x7fff7fff, 0x7fff7fff);
++  int16x2_t va = v_radd16 ((int16x2_t) {0x8000, 0x4000},
++			   (int16x2_t) {0x8000, 0x8000});
++
++  if (a != 0x7fff7fff)
++    abort ();
++  else if (va[0] != (short) 0x8000
++	   || va[1] != (short) 0xe000)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-radd64.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-radd64.c
+new file mode 100644
+index 0000000..3e82ff5
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-radd64.c
+@@ -0,0 +1,27 @@
++/* This is a test program for radd64 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++long long radd64 (long long ra, long long rb)
++{
++  return __nds32__radd64 (ra, rb);
++}
++
++int
++main ()
++{
++  long long a = radd64 (0xf000000000000000ll, 0xf000000000000000ll);
++
++  if (a != 0xf000000000000000ll)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-radd8.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-radd8.c
+new file mode 100644
+index 0000000..10735a1
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-radd8.c
+@@ -0,0 +1,40 @@
++/* This is a test program for radd8 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int radd8 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__radd8 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++int8x4_t v_radd8 (int8x4_t ra, int8x4_t rb)
++{
++  return __nds32__v_radd8 (ra, rb);
++}
++
++int
++main ()
++{
++  unsigned int a = radd8 (0x11223344, 0x55667788);
++  int8x4_t va = v_radd8 ((int8x4_t) {0x7f, 0x80, 0x80, 0xaa},
++			 (int8x4_t) {0x7f, 0x80, 0x40, 0xaa});
++
++  if (a != 0x334455e6)
++    abort ();
++  else if (va[0] != 0x7f
++	   || va[1] != (char) 0x80
++	   || va[2] != (char) 0xe0
++	   || va[3] != (char) 0xaa)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-raddw.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-raddw.c
+new file mode 100644
+index 0000000..190a477
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-raddw.c
+@@ -0,0 +1,27 @@
++/* This is a test program for raddw instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++int raddw (int ra, int rb)
++{
++  return __nds32__raddw (ra, rb);
++}
++
++int
++main ()
++{
++  int a = raddw (0x80000000, 0x80000000);
++
++  if (a != 0x80000000)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-rcras16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-rcras16.c
+new file mode 100644
+index 0000000..2a2288a
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-rcras16.c
+@@ -0,0 +1,44 @@
++/* This is a test program for rcras16 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int rcras16 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__rcras16 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++int16x2_t v_rcras16 (int16x2_t ra, int16x2_t rb)
++{
++  return __nds32__v_rcras16 (ra, rb);
++}
++
++int
++main ()
++{
++#ifdef __NDS32_EL__
++  int16x2_t va_p = {0x7fff, 0x8000};
++#else
++  int16x2_t va_p = {0xffff, 0};
++#endif
++
++  unsigned int a = rcras16 (0x0fff0000, 0x00000fff);
++  int16x2_t va = v_rcras16 ((int16x2_t) {0x7fff, 0x8000},
++			    (int16x2_t) {0x8000, 0x8000});
++
++  if (a != 0x0fff0000)
++    abort ();
++  else if (va[0] != va_p[0]
++	   || va[1] != va_p[1])
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-rcrsa16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-rcrsa16.c
+new file mode 100644
+index 0000000..ebcc0f6
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-rcrsa16.c
+@@ -0,0 +1,44 @@
++/* This is a test program for rcrsa16 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int rcrsa16 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__rcrsa16 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++int16x2_t v_rcrsa16 (int16x2_t ra, int16x2_t rb)
++{
++  return __nds32__v_rcrsa16 (ra, rb);
++}
++
++int
++main ()
++{
++#ifdef __NDS32_EL__
++  int16x2_t va_p = {0x8000, 0x8000};
++#else
++  int16x2_t va_p = {0, 0xffff};
++#endif
++
++  unsigned int a = rcrsa16 (0x7fff7fff, 0x7fff8000);
++  int16x2_t va = v_rcrsa16 ((int16x2_t) {0x8000, 0x8000},
++			    (int16x2_t) {0x7fff, 0x8000});
++
++  if (a != 0x7fff7fff)
++    abort ();
++  else if (va[0] != va_p [0]
++	   || va[1] != va_p [1])
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-rsub16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-rsub16.c
+new file mode 100644
+index 0000000..f9fcc86
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-rsub16.c
+@@ -0,0 +1,38 @@
++/* This is a test program for rsub16 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int rsub16 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__rsub16 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++int16x2_t v_rsub16 (int16x2_t ra, int16x2_t rb)
++{
++  return __nds32__v_rsub16 (ra, rb);
++}
++
++int
++main ()
++{
++  unsigned int a = rsub16 (0x7fff7fff, 0x80008000);
++  int16x2_t va = v_rsub16 ((int16x2_t) {0x8000, 0x8000},
++			   (int16x2_t) {0x7fff, 0x4000});
++
++  if (a != 0x7fff7fff)
++    abort ();
++  else if (va[0] != (short) 0x8000
++	   || va[1] != (short) 0xa000)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-rsub64.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-rsub64.c
+new file mode 100644
+index 0000000..227eba7
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-rsub64.c
+@@ -0,0 +1,27 @@
++/* This is a test program for rsub64 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++long long rsub64 (long long ra, long long rb)
++{
++  return __nds32__rsub64 (ra, rb);
++}
++
++int
++main ()
++{
++  long long a = rsub64 (0xe, 0xf);
++
++  if (a != 0xffffffffffffffff)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-rsub8.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-rsub8.c
+new file mode 100644
+index 0000000..0f1dddc
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-rsub8.c
+@@ -0,0 +1,40 @@
++/* This is a test program for rsub8 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int rsub8 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__rsub8 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++int8x4_t v_rsub8 (int8x4_t ra, int8x4_t rb)
++{
++  return __nds32__v_rsub8 (ra, rb);
++}
++
++int
++main ()
++{
++  unsigned int a = rsub8 (0x55667788, 0x11223344);
++  int8x4_t va = v_rsub8 ((int8x4_t) {0x7f, 0x80, 0x80, 0xaa},
++			 (int8x4_t) {0x80, 0x7f, 0x40, 0xaa});
++
++  if (a != 0x222222a2)
++    abort ();
++  else if (va[0] != 0x7f
++	   || va[1] != (char) 0x80
++	   || va[2] != (char) 0xa0
++	   || va[3] != 0)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-rsubw.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-rsubw.c
+new file mode 100644
+index 0000000..b70a229
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-rsubw.c
+@@ -0,0 +1,27 @@
++/* This is a test program for rsubw instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++int rsubw (int ra, int rb)
++{
++  return __nds32__rsubw (ra, rb);
++}
++
++int
++main ()
++{
++  int a = rsubw (0x80000000, 0x7fffffff);
++
++  if (a != 0x80000000)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-scmple16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-scmple16.c
+new file mode 100644
+index 0000000..95251d6
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-scmple16.c
+@@ -0,0 +1,37 @@
++/* This is a test program for scmple16 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int scmple16 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__scmple16 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++uint16x2_t v_scmple16 (int16x2_t ra, int16x2_t rb)
++{
++  return __nds32__v_scmple16 (ra, rb);
++}
++
++int
++main ()
++{
++  unsigned int a = scmple16 (0xfffe0001, 0xffff0000);
++  uint16x2_t va = v_scmple16 ((int16x2_t) {0x7fff, 0x7ffe},
++			      (int16x2_t) {0x7ffe, 0x7fff});
++  if (a != 0xffff0000)
++    abort ();
++  else if (va[0] != 0
++           || va[1] != 0xffff)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-scmple8.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-scmple8.c
+new file mode 100644
+index 0000000..6c0033d
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-scmple8.c
+@@ -0,0 +1,40 @@
++/* This is a test program for scmple8 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int scmple8 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__scmple8 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++uint8x4_t v_scmple8 (int8x4_t ra, int8x4_t rb)
++{
++  return __nds32__v_scmple8 (ra, rb);
++}
++
++int
++main ()
++{
++  unsigned int a = scmple8 (0xfefe0101, 0xffff0000);
++  uint8x4_t va = v_scmple8 ((int8x4_t) {0x7e, 0x7e, 0x01, 0x01},
++			    (int8x4_t) {0x7f, 0x7f, 0x00, 0x00});
++
++  if (a != 0xffff0000)
++    abort ();
++  else if (va[0] != 0xff
++           || va[1] != 0xff
++           || va[2] != 0
++	   || va[3] != 0)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-scmplt16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-scmplt16.c
+new file mode 100644
+index 0000000..5797711
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-scmplt16.c
+@@ -0,0 +1,38 @@
++/* This is a test program for scmplt16 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int scmplt16 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__scmplt16 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++uint16x2_t v_scmplt16 (int16x2_t ra, int16x2_t rb)
++{
++  return __nds32__v_scmplt16 (ra, rb);
++}
++
++int
++main ()
++{
++  unsigned int a = scmplt16 (0xfffe0001, 0xffff0000);
++  uint16x2_t va = v_scmplt16 ((int16x2_t) {0x7fff, 0x7ffe},
++			      (int16x2_t) {0x7ffe, 0x7fff});
++
++  if (a != 0xffff0000)
++    abort ();
++  else if (va[0] != 0
++           || va[1] != 0xffff)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-scmplt8.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-scmplt8.c
+new file mode 100644
+index 0000000..3e52006
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-scmplt8.c
+@@ -0,0 +1,40 @@
++/* This is a test program for scmplt8 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int scmplt8 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__scmplt8 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++uint8x4_t v_scmplt8 (int8x4_t ra, int8x4_t rb)
++{
++  return __nds32__v_scmplt8 (ra, rb);
++}
++
++int
++main ()
++{
++  unsigned int a = scmplt8 (0xfefe0101, 0xffff0000);
++  uint8x4_t va = v_scmplt8 ((int8x4_t) {0x7e, 0x7e, 0x01, 0x01},
++			    (int8x4_t) {0x7f, 0x7f, 0x00, 0x00});
++
++  if (a != 0xffff0000)
++    abort ();
++  else if (va[0] != 0xff
++           || va[1] != 0xff
++           || va[2] != 0
++	   || va[3] != 0)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-sll16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sll16.c
+new file mode 100644
+index 0000000..5ab9506
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sll16.c
+@@ -0,0 +1,37 @@
++/* This is a test program for sll16 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int sll16 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__sll16 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++uint16x2_t v_sll16 (uint16x2_t ra, unsigned int rb)
++{
++  return __nds32__v_sll16 (ra, rb);
++}
++
++int
++main ()
++{
++  unsigned int a = sll16 (0x0f00f000, 4);
++  uint16x2_t va = v_sll16 ((uint16x2_t) {0x7fff, 0x8000}, 4);
++
++  if (a != 0xf0000000)
++    abort ();
++  else if (va[0] != 0xfff0
++	   || va[1] != 0)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smal.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smal.c
+new file mode 100644
+index 0000000..f7e54b7
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smal.c
+@@ -0,0 +1,36 @@
++/* This is a test program for smal instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++long long smal (long long ra, unsigned int rb)
++{
++  return __nds32__smal (ra, rb);
++}
++
++static __attribute__ ((noinline))
++long long v_smal (long long ra, int16x2_t rb)
++{
++  return __nds32__v_smal (ra, rb);
++}
++
++int
++main ()
++{
++  long long a = smal (0xfffff0000ll, 0x0001ffff);
++  long long va = v_smal (0xffffff0000ll,
++			 (int16x2_t) {0x0002, 0xffff});
++  if (a != 0xffffeffffll)
++    abort ();
++  else if (va != 0xfffffefffell)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smalbb.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smalbb.c
+new file mode 100644
+index 0000000..c39a889
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smalbb.c
+@@ -0,0 +1,45 @@
++/* This is a test program for smalbb instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++long long smalbb (long long t, unsigned int a, unsigned int b)
++{
++  return __nds32__smalbb (t, a, b);
++}
++
++static __attribute__ ((noinline))
++long long v_smalbb (long long t, int16x2_t a, int16x2_t b)
++{
++  return __nds32__v_smalbb (t, a, b);
++}
++
++
++int
++main ()
++{
++#ifdef __NDS32_EL__
++  long long a_p = 0x12345679075ca9d3ll;
++  long long va_p = 0x12345679075ca9d3ll;
++#else
++  long long a_p = 0x12345679075ca9d3ll;
++  long long va_p = 0x12345678ffffffffll;
++#endif
++
++  long long a = smalbb (0x12345678ffffffffll,0x00006789, 0x00001234);
++  long long va = v_smalbb (0x12345678ffffffffll, (int16x2_t) {0x6789, 0},
++						 (int16x2_t) {0x1234, 0});
++  if (a != a_p)
++    abort ();
++  else if (va != va_p)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smalbt.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smalbt.c
+new file mode 100644
+index 0000000..06577fd
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smalbt.c
+@@ -0,0 +1,45 @@
++/* This is a test program for smalbt instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++long long smalbt (long long t, unsigned int a, unsigned int b)
++{
++  return __nds32__smalbt (t, a, b);
++}
++
++static __attribute__ ((noinline))
++long long v_smalbt (long long t, int16x2_t a, int16x2_t b)
++{
++  return __nds32__v_smalbt (t, a, b);
++}
++
++
++int
++main ()
++{
++#ifdef __NDS32_EL__
++  long long a_p = 0x12345679075ca9d3ll;
++  long long va_p = 0x12345679075ca9d3ll;
++#else
++  long long a_p = 0x12345679075ca9d3ll;
++  long long va_p = 0x12345678ffffffffll;
++#endif
++
++  long long a = smalbt (0x12345678ffffffffll, 0x00006789, 0x12340000);
++  long long va = v_smalbt (0x12345678ffffffffll, (int16x2_t) {0x6789, 0},
++						 (int16x2_t) {0, 0x1234});
++  if (a != a_p)
++    abort ();
++  else if (va != va_p)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smalda.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smalda.c
+new file mode 100644
+index 0000000..33b4b3f
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smalda.c
+@@ -0,0 +1,38 @@
++/* This is a test program for smalda instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++long long smalda (long long t, unsigned int a, unsigned int b)
++{
++  return __nds32__smalda (t, a, b);
++}
++
++static __attribute__ ((noinline))
++long long v_smalda (long long t, int16x2_t a, int16x2_t b)
++{
++  return __nds32__v_smalda (t, a, b);
++}
++
++
++int
++main ()
++{
++  long long a = smalda (0x12345678ffffffffll, 0x67890000, 0x12340000);
++  long long va = v_smalda (0x12345678ffffffffll, (int16x2_t) {0, 0x6789},
++						 (int16x2_t) {0, 0x1234});
++
++  if (a != 0x12345679075CA9D3ll)
++    abort ();
++  else if (va != 0x12345679075CA9D3ll)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smaldrs.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smaldrs.c
+new file mode 100644
+index 0000000..48255b1
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smaldrs.c
+@@ -0,0 +1,46 @@
++/* This is a test program for smaldrs instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++long long smaldrs (long long t, unsigned int a, unsigned int b)
++{
++  return __nds32__smaldrs (t, a, b);
++}
++
++static __attribute__ ((noinline))
++long long v_smaldrs (long long t, int16x2_t a, int16x2_t b)
++{
++  return __nds32__v_smaldrs (t, a, b);
++}
++
++
++int
++main ()
++{
++#ifdef __NDS32_EL__
++  long long a_p = 0x12345678ffffaaaall;
++  long long va_p = 0x12345678ffffaaaall;
++#else
++  long long a_p = 0x12345678ffffaaaall;
++  long long va_p = 0x1234567900005554ll;
++#endif
++
++  long long a = smaldrs (0x12345678ffffffffll, 0x67890001, 0x00011234);
++  long long va = v_smaldrs (0x12345678ffffffffll, (int16x2_t) {0x0001, 0x6789},
++						  (int16x2_t) {0x1234, 0x0001});
++
++  if (a != a_p)
++    abort ();
++  else if (va != va_p)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smalds.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smalds.c
+new file mode 100644
+index 0000000..5a89ea6
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smalds.c
+@@ -0,0 +1,46 @@
++/* This is a test program for smalds instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++long long smalds (long long t, unsigned int a, unsigned int b)
++{
++  return __nds32__smalds (t, a, b);
++}
++
++static __attribute__ ((noinline))
++long long v_smalds (long long t, int16x2_t a, int16x2_t b)
++{
++  return __nds32__v_smalds (t, a, b);
++}
++
++
++int
++main ()
++{
++#ifdef __NDS32_EL__
++  long long a_p = 0x12345678ffffaaaall;
++  long long va_p = 0x12345678ffffaaaall;
++#else
++  long long a_p = 0x12345678ffffaaaall;
++  long long va_p = 0x1234567900005554ll;
++#endif
++
++  long long a = smalds (0x12345678ffffffffll, 0x12340001, 0x00016789);
++  long long va = v_smalds (0x12345678ffffffffll, (int16x2_t) {0x0001, 0x1234},
++						 (int16x2_t) {0x6789, 0x0001});
++
++  if (a != a_p)
++    abort ();
++  else if (va != va_p)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smaltt.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smaltt.c
+new file mode 100644
+index 0000000..709607a
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smaltt.c
+@@ -0,0 +1,46 @@
++/* This is a test program for smaltt instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++long long smaltt (long long t, unsigned int a, unsigned int b)
++{
++  return __nds32__smaltt (t, a, b);
++}
++
++static __attribute__ ((noinline))
++long long v_smaltt (long long t, int16x2_t a, int16x2_t b)
++{
++  return __nds32__v_smaltt (t, a, b);
++}
++
++
++int
++main ()
++{
++#ifdef __NDS32_EL__
++  long long a_p = 0x12345679075ca9d3ll;
++  long long va_p = 0x12345679075ca9d3ll;
++#else
++  long long a_p = 0x12345679075ca9d3ll;
++  long long va_p = 0x12345678ffffffffll;
++#endif
++
++  long long a = smaltt (0x12345678ffffffffll, 0x67890000, 0x12340000);
++  long long va = v_smaltt (0x12345678ffffffffll, (int16x2_t) {0, 0x6789},
++						 (int16x2_t) {0, 0x1234});
++
++  if (a != a_p)
++    abort ();
++  else if (va != va_p)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smalxda.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smalxda.c
+new file mode 100644
+index 0000000..0f90250
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smalxda.c
+@@ -0,0 +1,38 @@
++/* This is a test program for smalxda instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++long long smalxda (long long t, unsigned int a, unsigned int b)
++{
++  return __nds32__smalxda (t, a, b);
++}
++
++static __attribute__ ((noinline))
++long long v_smalxda (long long t, int16x2_t a, int16x2_t b)
++{
++  return __nds32__v_smalxda (t, a, b);
++}
++
++
++int
++main ()
++{
++  long long a = smalxda (0x12345678ffffffffll, 0x67890000, 0x00001234);
++  long long va = v_smalxda (0x12345678ffffffffll, (int16x2_t) {0, 0x6789},
++						  (int16x2_t) {0x1234, 0});
++
++  if (a != 0x12345679075CA9D3)
++    abort ();
++  else if (va != 0x12345679075CA9D3)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smalxds.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smalxds.c
+new file mode 100644
+index 0000000..ee2e098
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smalxds.c
+@@ -0,0 +1,46 @@
++/* This is a test program for smalxds instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++long long smalxds (long long t, unsigned int a, unsigned int b)
++{
++  return __nds32__smalxds (t, a, b);
++}
++
++static __attribute__ ((noinline))
++long long v_smalxds (long long t, int16x2_t a, int16x2_t b)
++{
++  return __nds32__v_smalxds (t, a, b);
++}
++
++
++int
++main ()
++{
++#ifdef __NDS32_EL__
++  long long a_p = 0x12345678ffffaaaall;
++  long long va_p = 0x12345678ffffaaaall;
++#else
++  long long a_p = 0x12345678ffffaaaall;
++  long long va_p = 0x1234567900005554ll;
++#endif
++
++  long long a = smalxds (0x12345678ffffffffll, 0x12340001, 0x67890001);
++  long long va = v_smalxds (0x12345678ffffffffll, (int16x2_t) {0x0001, 0x1234},
++						  (int16x2_t) {0x0001, 0x6789});
++
++  if (a != a_p)
++    abort ();
++  else if (va != va_p)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smar64.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smar64.c
+new file mode 100644
+index 0000000..59c6f1f
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smar64.c
+@@ -0,0 +1,27 @@
++/* This is a test program for smar64 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++long long smar64 (long long t, int a, int b)
++{
++  return __nds32__smar64 (t, a, b);
++}
++
++int
++main ()
++{
++  long long a = smar64 (0xf000000000000000ll, 0x12345678, 0x23);
++
++  if (a != 0xf00000027d27d268ll)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smax16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smax16.c
+new file mode 100644
+index 0000000..72bf957
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smax16.c
+@@ -0,0 +1,37 @@
++/* This is a test program for smax16 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int smax16 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__smax16 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++int16x2_t v_smax16 (int16x2_t ra, int16x2_t rb)
++{
++  return __nds32__v_smax16 (ra, rb);
++}
++
++int
++main ()
++{
++  unsigned int a = smax16 (0xfffe0001, 0xffff0000);
++  int16x2_t va = v_smax16 ((int16x2_t) {0x7fff, 0},
++			   (int16x2_t) {0x7ffe, 1});
++  if (a != 0xffff0001)
++    abort ();
++  else if (va[0] != 0x7fff
++           || va[1] != 1)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smax8.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smax8.c
+new file mode 100644
+index 0000000..128bf19
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smax8.c
+@@ -0,0 +1,41 @@
++/* This is a test program for smax8 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int smax8 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__smax8 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++int8x4_t v_smax8 (int8x4_t ra, int8x4_t rb)
++{
++  return __nds32__v_smax8 (ra, rb);
++}
++
++
++int
++main ()
++{
++  unsigned int a = smax8 (0xffff0000, 0xfefe0001);
++  int8x4_t va = v_smax8 ((int8x4_t) {0x7f, 0x7f, 0x01, 0x01},
++			 (int8x4_t) {0x7e, 0x7e, 0x00, 0x00});
++
++  if (a != 0xffff0001)
++    abort ();
++  else if (va[0] != 0x7f
++           || va[1] != 0x7f
++           || va[2] != 1
++	   || va[3] != 1)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smbb.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smbb.c
+new file mode 100644
+index 0000000..25759bd
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smbb.c
+@@ -0,0 +1,44 @@
++/* This is a test program for smbb instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++int smbb (unsigned int ra, unsigned int rb)
++{
++  return __nds32__smbb (ra, rb);
++}
++
++static __attribute__ ((noinline))
++int v_smbb (int16x2_t ra, int16x2_t rb)
++{
++  return __nds32__v_smbb (ra, rb);
++}
++
++int
++main ()
++{
++#ifdef __NDS32_EL__
++  int va_p = 1;
++#else
++  int va_p = 2;
++#endif
++
++  int a = smbb (0x80000002, 0x80000001);
++
++  int va = v_smbb ((int16x2_t) {0xffff, 0x0002},
++		   (int16x2_t) {0xffff, 0x0001});
++
++  if (a != 2)
++    abort ();
++  else if (va != va_p)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smbt.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smbt.c
+new file mode 100644
+index 0000000..7ed2c22
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smbt.c
+@@ -0,0 +1,44 @@
++/* This is a test program for smbt instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++int smbt (unsigned int ra, unsigned int rb)
++{
++  return __nds32__smbt (ra, rb);
++}
++
++static __attribute__ ((noinline))
++int v_smbt (int16x2_t ra, int16x2_t rb)
++{
++  return __nds32__v_smbt (ra, rb);
++}
++
++int
++main ()
++{
++#ifdef __NDS32_EL__
++  int va_p = 0xffffffff;
++#else
++  int va_p = 0xfffffffe;
++#endif
++
++  int a = smbt (0x80000002, 0x80000001);
++
++  int va = v_smbt ((int16x2_t) {0xffff, 0x0002},
++		   (int16x2_t) {0xffff, 0x0001});
++
++  if (a != 0xffff0000)
++    abort ();
++  else if (va != va_p)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smdrs.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smdrs.c
+new file mode 100644
+index 0000000..4224b04
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smdrs.c
+@@ -0,0 +1,43 @@
++/* This is a test program for smdrs instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++int smdrs (unsigned int ra, unsigned int rb)
++{
++  return __nds32__smdrs (ra, rb);
++}
++
++static __attribute__ ((noinline))
++int v_smdrs (int16x2_t ra, int16x2_t rb)
++{
++  return __nds32__v_smdrs (ra, rb);
++}
++
++int
++main ()
++{
++#ifdef __NDS32_EL__
++  int va_p = 0xffffffff;
++#else
++  int va_p = 1;
++#endif
++
++  int a = smdrs (0x80000002, 0x80000001);
++  int va = v_smdrs ((int16x2_t) {0xffff, 0x0002},
++		    (int16x2_t) {0xffff, 0x0001});
++
++  if (a != 0xc0000002)
++    abort ();
++  else if (va != va_p)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smds.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smds.c
+new file mode 100644
+index 0000000..9875efb
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smds.c
+@@ -0,0 +1,43 @@
++/* This is a test program for smds instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++int smds (unsigned int ra, unsigned int rb)
++{
++  return __nds32__smds (ra, rb);
++}
++
++static __attribute__ ((noinline))
++int v_smds (int16x2_t ra, int16x2_t rb)
++{
++  return __nds32__v_smds (ra, rb);
++}
++
++int
++main ()
++{
++#ifdef __NDS32_EL__
++  int va_p = 1;
++#else
++  int va_p = 0xffffffff;
++#endif
++
++  int a = smds (0x80000002, 0x80000001);
++  int va = v_smds ((int16x2_t) {0xffff, 0x0002},
++		   (int16x2_t) {0xffff, 0x0001});
++
++  if (a != 0x3ffffffe)
++    abort ();
++  else if (va != va_p)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smin16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smin16.c
+new file mode 100644
+index 0000000..60deb4b
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smin16.c
+@@ -0,0 +1,37 @@
++/* This is a test program for smin16 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int smin16 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__smin16 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++int16x2_t v_smin16 (int16x2_t ra, int16x2_t rb)
++{
++  return __nds32__v_smin16 (ra, rb);
++}
++
++int
++main ()
++{
++  unsigned int a = smin16 (0xfffe0001, 0xffff0000);
++  int16x2_t v_sa = v_smin16 ((int16x2_t) {0x7fff, 0},
++			     (int16x2_t) {0x7ffe, 1});
++  if (a != 0xfffe0000)
++    abort ();
++  else if (v_sa[0] != 0x7ffe
++           || v_sa[1] != 0)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smmul.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smmul.c
+new file mode 100644
+index 0000000..5735efa
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smmul.c
+@@ -0,0 +1,27 @@
++/* This is a test program for smmul instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++int smmul (int ra, int rb)
++{
++  return __nds32__smmul (ra, rb);
++}
++
++int
++main ()
++{
++  int a = smmul (0x80000000, 0x80000000);
++
++  if (a != 0x40000000)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smmulu.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smmulu.c
+new file mode 100644
+index 0000000..fbe0b15
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smmulu.c
+@@ -0,0 +1,27 @@
++/* This is a test program for smmul.u instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++int smmul_u (int ra, int rb)
++{
++  return __nds32__smmul_u (ra, rb);
++}
++
++int
++main ()
++{
++  int a = smmul_u (0x80000002, 0x80000001);
++
++  if (a != 0x3fffffff)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smmwb.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smmwb.c
+new file mode 100644
+index 0000000..9160b9a
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smmwb.c
+@@ -0,0 +1,43 @@
++/* This is a test program for smmwb instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++int smmwb (int ra, unsigned int rb)
++{
++  return __nds32__smmwb (ra, rb);
++}
++
++static __attribute__ ((noinline))
++int v_smmwb (int ra, int16x2_t rb)
++{
++  return __nds32__v_smmwb (ra, rb);
++}
++
++int
++main ()
++{
++#ifdef __NDS32_EL__
++  int va_p = 0;
++#else
++  int va_p = 0xffffffff;
++#endif
++
++  int a = smmwb (0x80000002, 0x80000001);
++
++  int va = v_smmwb (0xffff0002, (int16x2_t) {0xffff, 0x0001});
++
++  if (a != 0xffff8000)
++    abort ();
++  else if (va != va_p)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smmwbu.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smmwbu.c
+new file mode 100644
+index 0000000..46ebed2
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smmwbu.c
+@@ -0,0 +1,43 @@
++/* This is a test program for smmwb.u instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++int smmwb_u (int ra, unsigned int rb)
++{
++  return __nds32__smmwb_u (ra, rb);
++}
++
++static __attribute__ ((noinline))
++int v_smmwb_u (int ra, int16x2_t rb)
++{
++  return __nds32__v_smmwb_u (ra, rb);
++}
++
++int
++main ()
++{
++#ifdef __NDS32_EL__
++  int va_p = 1;
++#else
++  int va_p = 0xffffffff;
++#endif
++
++  int a = smmwb_u (0x80000002, 0x80000001);
++
++  int va = v_smmwb_u (0xffff0002, (int16x2_t) {0xffff, 0x0001});
++
++  if (a != 0xffff8000)
++    abort ();
++  else if (va != va_p)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smmwt.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smmwt.c
+new file mode 100644
+index 0000000..45d4792
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smmwt.c
+@@ -0,0 +1,43 @@
++/* This is a test program for smmwt instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++int smmwt (int ra, unsigned int rb)
++{
++  return __nds32__smmwt (ra, rb);
++}
++
++static __attribute__ ((noinline))
++int v_smmwt (int ra, int16x2_t rb)
++{
++  return __nds32__v_smmwt (ra, rb);
++}
++
++int
++main ()
++{
++#ifdef __NDS32_EL__
++  int va_p = 0xffffffff;
++#else
++  int va_p = 0;
++#endif
++
++  int a = smmwt (0x80000002, 0x80000001);
++
++  int va = v_smmwt (0xffff0002, (int16x2_t) {0xffff, 0x0001});
++
++  if (a != 0x3fffffff)
++    abort ();
++  else if (va != va_p)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smmwtu.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smmwtu.c
+new file mode 100644
+index 0000000..3b4b487
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smmwtu.c
+@@ -0,0 +1,43 @@
++/* This is a test program for smmwt.u instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++int smmwt_u (int ra, unsigned int rb)
++{
++  return __nds32__smmwt_u (ra, rb);
++}
++
++static __attribute__ ((noinline))
++int v_smmwt_u (int ra, int16x2_t rb)
++{
++  return __nds32__v_smmwt_u (ra, rb);
++}
++
++int
++main ()
++{
++#ifdef __NDS32_EL__
++  int va_p = 0xffffffff;
++#else
++  int va_p = 1;
++#endif
++
++  int a = smmwt_u (0x80000002, 0x80000001);
++
++  int va = v_smmwt_u (0xffff0002, (int16x2_t) {0xffff, 0x0001});
++
++  if (a != 0x3fffffff)
++    abort ();
++  else if (va != va_p)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smslda.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smslda.c
+new file mode 100644
+index 0000000..be2ac27
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smslda.c
+@@ -0,0 +1,37 @@
++/* This is a test program for smslda instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++long long smslda (long long rt, unsigned int ra, unsigned int rb)
++{
++  return __nds32__smslda (rt, ra, rb);
++}
++
++static __attribute__ ((noinline))
++long long v_smslda (long long rt, int16x2_t ra, int16x2_t rb)
++{
++  return __nds32__v_smslda (rt, ra, rb);
++}
++
++int
++main ()
++{
++  long long a = smslda (0xff0000000000ll, 0xffffffff, 0x2);
++  long long va = v_smslda (0x100000000ll,
++			   (int16x2_t) {0xf000, 0}, (int16x2_t) {0, 3});
++
++  if (a != 0xff0000000002ll)
++    abort ();
++  else if (va != 0x100000000ll)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smslxda.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smslxda.c
+new file mode 100644
+index 0000000..f276a2e
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smslxda.c
+@@ -0,0 +1,37 @@
++/* This is a test program for smslxda instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++long long smslxda (long long rt, unsigned int ra, unsigned int rb)
++{
++  return __nds32__smslxda (rt, ra, rb);
++}
++
++static __attribute__ ((noinline))
++long long v_smslxda (long long rt, int16x2_t ra, int16x2_t rb)
++{
++  return __nds32__v_smslxda (rt, ra, rb);
++}
++
++int
++main ()
++{
++  long long a = smslxda (0xff0000000000ll, 0xffffffff, 0x2);
++  long long va = v_smslxda (0x100000000ll,
++			    (int16x2_t) {0xf000, 0}, (int16x2_t) {0, 3});
++
++  if (a != 0xff0000000002ll)
++    abort ();
++  else if (va != 0x100003000ll)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smsr64.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smsr64.c
+new file mode 100644
+index 0000000..64a84e9
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smsr64.c
+@@ -0,0 +1,27 @@
++/* This is a test program for smsr64 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++long long smsr64 (long long t, int a, int b)
++{
++  return __nds32__smsr64 (t, a, b);
++}
++
++int
++main ()
++{
++  long long a = smsr64 (0x5000000300000000ll, 0x12345678, 0x23);
++
++  if (a != 0x5000000082D82D98ll)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smtt.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smtt.c
+new file mode 100644
+index 0000000..bfb30f2
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smtt.c
+@@ -0,0 +1,44 @@
++/* This is a test program for smtt instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++int smtt (unsigned int ra, unsigned int rb)
++{
++  return __nds32__smtt (ra, rb);
++}
++
++static __attribute__ ((noinline))
++int v_smtt (int16x2_t ra, int16x2_t rb)
++{
++  return __nds32__v_smtt (ra, rb);
++}
++
++int
++main ()
++{
++#ifdef __NDS32_EL__
++  int va_p = 2;
++#else
++  int va_p = 1;
++#endif
++
++  int a = smtt (0x80000002, 0x80000001);
++
++  int va = v_smtt ((int16x2_t) {0xffff, 0x0002},
++		   (int16x2_t) {0xffff, 0x0001});
++
++  if (a != 0x40000000)
++    abort ();
++  else if (va != va_p)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smul16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smul16.c
+new file mode 100644
+index 0000000..bb3fad4
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smul16.c
+@@ -0,0 +1,38 @@
++/* This is a test program for smul16 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned long long smul16 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__smul16 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++int32x2_t v_smul16 (int16x2_t ra, int16x2_t rb)
++{
++  return __nds32__v_smul16 (ra, rb);
++}
++
++int
++main ()
++{
++  unsigned long long a = smul16 (0xffff0000, 0x0001ffff);
++  int32x2_t va = v_smul16 ((int16x2_t) {0xffff, 0},
++			   (int16x2_t) {0x0001, 0xffff});
++
++  if (a != 0xffffffff00000000)
++    abort ();
++  else if (va[0] != 0xffffffff
++           || va[1] != 0)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smulx16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smulx16.c
+new file mode 100644
+index 0000000..0e65a2a
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smulx16.c
+@@ -0,0 +1,37 @@
++/* This is a test program for smulx16 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned long long smulx16 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__smulx16 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++int32x2_t v_smulx16 (int16x2_t ra, int16x2_t rb)
++{
++  return __nds32__v_smulx16 (ra, rb);
++}
++
++int
++main ()
++{
++  unsigned long long a = smulx16 (0xffff0000, 0xffff0001);
++  int32x2_t va = v_smulx16 ((int16x2_t) {0xffff, 0xffff},
++			    (int16x2_t) {1, 0});
++  if (a != 0xffffffff00000000)
++    abort ();
++  else if (va[0] != 0
++           || va[1] != 0xffffffff)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smxds.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smxds.c
+new file mode 100644
+index 0000000..e429aa3
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smxds.c
+@@ -0,0 +1,45 @@
++/* This is a test program for smxds instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++int smxds (unsigned int ra, unsigned int rb)
++{
++  return __nds32__smxds (ra, rb);
++}
++
++static __attribute__ ((noinline))
++int v_smxds (int16x2_t ra, int16x2_t rb)
++{
++  return __nds32__v_smxds (ra, rb);
++}
++
++int
++main ()
++{
++#ifdef __NDS32_EL__
++  int a_p = 0x8000;
++  int va_p = 0xffffffff;
++#else
++  int a_p = 0x8000;
++  int va_p = 1;
++#endif
++
++  int a = smxds (0x80000002, 0x80000001);
++  int va = v_smxds ((int16x2_t) {0xffff, 0x0002},
++		    (int16x2_t) {0xffff, 0x0001});
++
++  if (a != a_p)
++    abort ();
++  else if (va != va_p)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-sra16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sra16.c
+new file mode 100644
+index 0000000..7d85032
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sra16.c
+@@ -0,0 +1,37 @@
++/* This is a test program for sra16 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int sra16 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__sra16 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++int16x2_t v_sra16 (int16x2_t ra, unsigned int rb)
++{
++  return __nds32__v_sra16 (ra, rb);
++}
++
++int
++main ()
++{
++  unsigned int a = sra16 (0x0ffff000, 4);
++  int16x2_t va = v_sra16 ((int16x2_t) {0x7fff, 0x8000}, 4);
++
++  if (a != 0x00ffff00)
++    abort ();
++  else if (va[0] != 0x7ff
++	   || va[1] != (short) 0xf800)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-sra16u.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sra16u.c
+new file mode 100644
+index 0000000..5bc127c
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sra16u.c
+@@ -0,0 +1,37 @@
++/* This is a test program for sra16.u instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int sra16u (unsigned int ra, unsigned int rb)
++{
++  return __nds32__sra16_u (ra, rb);
++}
++
++static __attribute__ ((noinline))
++int16x2_t v_sra16u (int16x2_t ra, unsigned int rb)
++{
++  return __nds32__v_sra16_u (ra, rb);
++}
++
++int
++main ()
++{
++  unsigned int a = sra16u (0x0ffff000, 4);
++  int16x2_t va = v_sra16u ((int16x2_t) {0x7fff, 0x8000}, 4);
++
++  if (a != 0x100ff00)
++    abort ();
++  else if (va[0] != 0x800
++	   || va[1] != (short) 0xf800)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-srai16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-srai16.c
+new file mode 100644
+index 0000000..f3c6e16
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-srai16.c
+@@ -0,0 +1,39 @@
++/* This is a test program for srai16 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int srai16 (unsigned int ra)
++{
++  return __nds32__sra16 (ra, 4);
++}
++
++static __attribute__ ((noinline))
++int16x2_t v_srai16 (int16x2_t ra)
++{
++  return __nds32__v_sra16 (ra, 4);
++}
++
++int
++main ()
++{
++  unsigned int a = srai16 (0x0ffff000);
++
++  int16x2_t aa;
++  int16x2_t va = v_srai16 ((int16x2_t) {0x7fff, 0x8000});
++
++  if (a != 0x00ffff00)
++    abort ();
++  else if (va[0] != 0x7ff
++	   || va[1] != (short) 0xf800)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-srai16u.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-srai16u.c
+new file mode 100644
+index 0000000..380bd2e
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-srai16u.c
+@@ -0,0 +1,37 @@
++/* This is a test program for srai16.u instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int srai16u (unsigned int ra)
++{
++  return __nds32__sra16_u (ra, 4);
++}
++
++static __attribute__ ((noinline))
++int16x2_t v_srai16u (int16x2_t ra)
++{
++  return __nds32__v_sra16_u (ra, 4);
++}
++
++int
++main ()
++{
++  unsigned int a = srai16u (0x0ffff000);
++  int16x2_t va = v_srai16u ((int16x2_t) {0x7fff, 0x8000});
++
++  if (a != 0x100ff00)
++    abort ();
++  else if (va[0] != 0x800
++	   || va[1] != (short) 0xf800)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-sraiu.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sraiu.c
+new file mode 100644
+index 0000000..4090762
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sraiu.c
+@@ -0,0 +1,27 @@
++/* This is a test program for srai.u instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++int sraiu (int ra)
++{
++  return __nds32__sra_u (ra, 8);
++}
++
++int
++main ()
++{
++  int a = sraiu (0xf00ff);
++
++  if (a != 0xf01)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-srau.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-srau.c
+new file mode 100644
+index 0000000..e3a3137
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-srau.c
+@@ -0,0 +1,27 @@
++/* This is a test program for sra.u instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++int srau (int ra, unsigned int rb)
++{
++  return __nds32__sra_u (ra, rb);
++}
++
++int
++main ()
++{
++  int a = srau (0xf00ff, 8);
++
++  if (a != 0xf01)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-srl16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-srl16.c
+new file mode 100644
+index 0000000..8aa9c59
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-srl16.c
+@@ -0,0 +1,37 @@
++/* This is a test program for srl16 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int srl16 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__srl16 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++uint16x2_t v_srl16 (uint16x2_t ra, unsigned int rb)
++{
++  return __nds32__v_srl16 (ra, rb);
++}
++
++int
++main ()
++{
++  unsigned int a = srl16 (0x0f00f000, 4);
++  uint16x2_t va = v_srl16 ((uint16x2_t) {0x7fff, 0x8000}, 4);
++
++  if (a != 0xf00f00)
++    abort ();
++  else if (va[0] != 0x7ff
++	   || va[1] != 0x0800)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-srl16u.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-srl16u.c
+new file mode 100644
+index 0000000..3f4ac5b
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-srl16u.c
+@@ -0,0 +1,37 @@
++/* This is a test program for srl16.u instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int srl16_u (unsigned int ra, unsigned int rb)
++{
++  return __nds32__srl16_u (ra, rb);
++}
++
++static __attribute__ ((noinline))
++uint16x2_t v_srl16_u (uint16x2_t ra, unsigned int rb)
++{
++  return __nds32__v_srl16_u (ra, rb);
++}
++
++int
++main ()
++{
++  unsigned int a = srl16_u (0x0f00f000, 4);
++  uint16x2_t va = v_srl16_u ((uint16x2_t) {0x7fff, 0x8000}, 4);
++
++  if (a != 0xf00f00)
++    abort ();
++  else if (va[0] != 0x800
++	   || va[1] != 0x800)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-srli16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-srli16.c
+new file mode 100644
+index 0000000..200bf8c
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-srli16.c
+@@ -0,0 +1,37 @@
++/* This is a test program for srli16 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int srli16 (unsigned int ra)
++{
++  return __nds32__srl16 (ra, 4);
++}
++
++static __attribute__ ((noinline))
++uint16x2_t v_srli16 (uint16x2_t ra)
++{
++  return __nds32__v_srl16 (ra, 4);
++}
++
++int
++main ()
++{
++  unsigned int a = srli16 (0x0f00f000);
++  uint16x2_t va = v_srli16 ((uint16x2_t) {0x7fff, 0x8000});
++
++  if (a != 0xf00f00)
++    abort ();
++  else if (va[0] != 0x7ff
++	   || va[1] != 0x0800)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-srli16u.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-srli16u.c
+new file mode 100644
+index 0000000..808319b
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-srli16u.c
+@@ -0,0 +1,37 @@
++/* This is a test program for sril16.u instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int srli16_u (unsigned int ra)
++{
++  return __nds32__srl16_u (ra, 4);
++}
++
++static __attribute__ ((noinline))
++uint16x2_t v_srli16_u (uint16x2_t ra)
++{
++  return __nds32__v_srl16_u (ra, 4);
++}
++
++int
++main ()
++{
++  unsigned int a = srli16_u (0x0f00f000);
++  uint16x2_t va = v_srli16_u ((uint16x2_t) {0x7fff, 0x8000});
++
++  if (a != 0xf00f00)
++    abort ();
++  else if (va[0] != 0x800
++	   || va[1] != 0x800)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-sub16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sub16.c
+new file mode 100644
+index 0000000..eff5f92
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sub16.c
+@@ -0,0 +1,49 @@
++/* This is a test program for sub16 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int sub16 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__sub16 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++uint16x2_t v_usub16 (uint16x2_t ra, uint16x2_t rb)
++{
++  return __nds32__v_usub16 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++int16x2_t v_ssub16 (int16x2_t ra, int16x2_t rb)
++{
++  return __nds32__v_ssub16 (ra, rb);
++}
++
++int
++main ()
++{
++  unsigned int a = sub16 (0x00010000, 0x00010001);
++  uint16x2_t v_ua = v_usub16 ((uint16x2_t) {0x1000, 0x0001},
++			      (uint16x2_t) {0xf000, 0x0000});
++  int16x2_t v_sa = v_ssub16 ((int16x2_t) {0x7777, 0x2111},
++			     (int16x2_t) {0x1000, 0x2000});
++
++  if (a != 0x0000ffff)
++    abort ();
++  else if (v_ua[0] != 0x2000
++	   || v_ua[1] != 0x0001)
++    abort ();
++  else if (v_sa[0] != 0x6777
++	   || v_sa[1] != 0x0111)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-sub64.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sub64.c
+new file mode 100644
+index 0000000..efdd879
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sub64.c
+@@ -0,0 +1,36 @@
++/* This is a test program for sub64 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++long long ssub64 (long long ra, long long rb)
++{
++  return __nds32__ssub64 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++unsigned long long usub64 (unsigned long long ra, unsigned long long rb)
++{
++  return __nds32__usub64 (ra, rb);
++}
++
++int
++main ()
++{
++  long long sa = ssub64 (0x100000000ll, 0xffffffffll);
++  unsigned long long ua = usub64 (0xf00000000ull, 0x1111ull);
++
++  if (sa != 1ll)
++    abort ();
++  else if (ua != 0xeffffeeefull)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-sub8.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sub8.c
+new file mode 100644
+index 0000000..b21f8a5
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sub8.c
+@@ -0,0 +1,53 @@
++/* This is a test program for sub8 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int sub8 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__sub8 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++uint8x4_t v_usub8 (uint8x4_t ra, uint8x4_t rb)
++{
++  return __nds32__v_usub8 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++int8x4_t v_ssub8 (int8x4_t ra, int8x4_t rb)
++{
++  return __nds32__v_ssub8 (ra, rb);
++}
++
++int
++main ()
++{
++  unsigned int a = sub8 (0x55667788, 0x11223344);
++  uint8x4_t v_ua = v_usub8 ((uint8x4_t) {0xff, 0xee, 0xee, 0xcc},
++			    (uint8x4_t) {0x1, 0xee, 0xdd, 0xdd});
++  int8x4_t v_sa = v_ssub8 ((int8x4_t) {0x81, 0x0, 0xdd, 0xaa},
++			   (int8x4_t) {0x80, 0x1, 0xcc, 0xaa});
++
++  if (a != 0x44444444)
++    abort ();
++  else if (v_ua[0] != 0xfe
++	   || v_ua[1] != 0
++	   || v_ua[2] != 0x11
++	   || v_ua[3] != 0xef)
++    abort ();
++  else if (v_sa[0] != 1
++	   || v_sa[1] != (char) 0xff
++	   || v_sa[2] != 0x11
++	   || v_sa[3] != 0)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-sunpkd810.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sunpkd810.c
+new file mode 100644
+index 0000000..29fff3a
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sunpkd810.c
+@@ -0,0 +1,43 @@
++/* This is a test program for sunpkd810 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int sunpkd810 (unsigned int a)
++{
++  return __nds32__sunpkd810 (a);
++}
++
++static __attribute__ ((noinline))
++int16x2_t v_sunpkd810 (int8x4_t a)
++{
++  return __nds32__v_sunpkd810 (a);
++}
++
++int
++main ()
++{
++#ifdef __NDS32_EL__
++  int16x2_t va_p = {0xfff8, 0x56};
++#else
++  int16x2_t va_p = {0, 0};
++#endif
++
++  unsigned int a = sunpkd810 (0x000056f8);
++  int16x2_t va = v_sunpkd810 ((int8x4_t) {0xf8, 0x56, 0, 0});
++
++  if (a != 0x0056fff8)
++    abort ();
++  else if (va[0] != va_p[0]
++           || va[1] != va_p[1])
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-sunpkd820.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sunpkd820.c
+new file mode 100644
+index 0000000..43f969a
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sunpkd820.c
+@@ -0,0 +1,43 @@
++/* This is a test program for sunpkd820 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int sunpkd820 (unsigned int a)
++{
++  return __nds32__sunpkd820 (a);
++}
++
++static __attribute__ ((noinline))
++int16x2_t v_sunpkd820 (int8x4_t a)
++{
++  return __nds32__v_sunpkd820 (a);
++}
++
++int
++main ()
++{
++#ifdef __NDS32_EL__
++  int16x2_t va_p = {0xfff8, 0x34};
++#else
++  int16x2_t va_p = {0, 0};
++#endif
++
++  unsigned int a = sunpkd820 (0x003400f8);
++  int16x2_t va = v_sunpkd820 ((int8x4_t) {0xf8, 0, 0x34, 0});
++
++  if (a != 0x0034fff8)
++    abort ();
++  else if (va[0] != va_p[0]
++           || va[1] != va_p[1])
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-sunpkd830.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sunpkd830.c
+new file mode 100644
+index 0000000..76540b5
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sunpkd830.c
+@@ -0,0 +1,37 @@
++/* This is a test program for sunpkd830 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int sunpkd830 (unsigned int a)
++{
++  return __nds32__sunpkd830 (a);
++}
++
++static __attribute__ ((noinline))
++int16x2_t v_sunpkd830 (int8x4_t a)
++{
++  return __nds32__v_sunpkd830 (a);
++}
++
++int
++main ()
++{
++  unsigned int a = sunpkd830 (0x120000f8);
++  int16x2_t va = v_sunpkd830 ((int8x4_t) {0xf8, 0x00, 0, 0x12});
++
++  if (a != 0x0012fff8)
++    abort ();
++  else if (va[0] != (short) 0xfff8
++           || va[1] != 0x0012)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-sunpkd831.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sunpkd831.c
+new file mode 100644
+index 0000000..05149e6
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sunpkd831.c
+@@ -0,0 +1,43 @@
++/* This is a test program for sunpkd831 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int sunpkd831 (unsigned int a)
++{
++  return __nds32__sunpkd831 (a);
++}
++
++static __attribute__ ((noinline))
++int16x2_t v_sunpkd831 (int8x4_t a)
++{
++  return __nds32__v_sunpkd831 (a);
++}
++
++int
++main ()
++{
++#ifdef __NDS32_EL__
++  int16x2_t va_p = {0xfff8, 0x12};
++#else
++  int16x2_t va_p = {0, 0};
++#endif
++
++  unsigned int a = sunpkd831 (0x1200f800);
++  int16x2_t va = v_sunpkd831 ((int8x4_t) {0, 0xf8, 0, 0x12});
++
++  if (a != 0x0012fff8)
++    abort ();
++  else if (va[0] != va_p[0]
++           || va[1] != va_p[1])
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-ucmple16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-ucmple16.c
+new file mode 100644
+index 0000000..17b5344
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-ucmple16.c
+@@ -0,0 +1,37 @@
++/* This is a test program for ucmple16 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int ucmple16 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__ucmple16 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++uint16x2_t v_ucmple16 (uint16x2_t ra, uint16x2_t rb)
++{
++  return __nds32__v_ucmple16 (ra, rb);
++}
++
++int
++main ()
++{
++  unsigned int a = ucmple16 (0xfffe0001, 0xffff0000);
++  uint16x2_t va = v_ucmple16 ((uint16x2_t) {0x7fff, 0x7ffe},
++			      (uint16x2_t) {0x7ffe, 0x7fff});
++  if (a != 0xffff0000)
++    abort ();
++  else if (va[0] != 0
++           || va[1] != 0xffff)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-ucmple8.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-ucmple8.c
+new file mode 100644
+index 0000000..561b500
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-ucmple8.c
+@@ -0,0 +1,40 @@
++/* This is a test program for ucmple8 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int ucmple8 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__ucmple8 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++uint8x4_t v_ucmple8 (uint8x4_t ra, uint8x4_t rb)
++{
++  return __nds32__v_ucmple8 (ra, rb);
++}
++
++int
++main ()
++{
++  unsigned int a = ucmple8 (0xfefe0101, 0xffff0000);
++  uint8x4_t va = v_ucmple8 ((uint8x4_t) {0x7e, 0x7e, 0x01, 0x01},
++			    (uint8x4_t) {0x7f, 0x7f, 0x00, 0x00});
++
++  if (a != 0xffff0000)
++    abort ();
++  else if (va[0] != 0xff
++           || va[1] != 0xff
++           || va[2] != 0
++	   || va[3] != 0)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-ucmplt16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-ucmplt16.c
+new file mode 100644
+index 0000000..820ce1e
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-ucmplt16.c
+@@ -0,0 +1,37 @@
++/* This is a test program for ucmplt16 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int ucmplt16 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__ucmplt16 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++uint16x2_t v_ucmplt16 (uint16x2_t ra, uint16x2_t rb)
++{
++  return __nds32__v_ucmplt16 (ra, rb);
++}
++
++int
++main ()
++{
++  unsigned int a = ucmplt16 (0xfffe0001, 0xffff0000);
++  uint16x2_t va = v_ucmplt16 ((uint16x2_t) {0x7fff, 0x7ffe},
++			      (uint16x2_t) {0x7ffe, 0x7fff});
++  if (a != 0xffff0000)
++    abort ();
++  else if (va[0] != 0
++           || va[1] != 0xffff)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-ucmplt8.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-ucmplt8.c
+new file mode 100644
+index 0000000..8001586
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-ucmplt8.c
+@@ -0,0 +1,40 @@
++/* This is a test program for ucmplt8 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int ucmplt8 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__ucmplt8 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++uint8x4_t v_ucmplt8 (uint8x4_t ra, uint8x4_t rb)
++{
++  return __nds32__v_ucmplt8 (ra, rb);
++}
++
++int
++main ()
++{
++  unsigned int a = ucmplt8 (0xfefe0101, 0xffff0000);
++  uint8x4_t va = v_ucmplt8 ((uint8x4_t) {0x7e, 0x7e, 0x01, 0x01},
++			    (uint8x4_t) {0x7f, 0x7f, 0x00, 0x00});
++
++  if (a != 0xffff0000)
++    abort ();
++  else if (va[0] != 0xff
++           || va[1] != 0xff
++           || va[2] != 0
++	   || va[3] != 0)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-umar64.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-umar64.c
+new file mode 100644
+index 0000000..ac32ae1
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-umar64.c
+@@ -0,0 +1,27 @@
++/* This is a test program for umar64 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned long long umar64 (unsigned long long t,unsigned int a,unsigned int b)
++{
++  return __nds32__umar64 (t, a, b);
++}
++
++int
++main ()
++{
++  unsigned long long a = umar64 (0xf000000000000000ull, 0x12345678, 0x23);
++
++  if (a != 0xf00000027d27d268ull)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-umax16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-umax16.c
+new file mode 100644
+index 0000000..99a43d2
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-umax16.c
+@@ -0,0 +1,37 @@
++/* This is a test program for umax16 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int umax16 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__umax16 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++uint16x2_t v_umax16 (uint16x2_t ra, uint16x2_t rb)
++{
++  return __nds32__v_umax16 (ra, rb);
++}
++
++int
++main ()
++{
++  unsigned int a = umax16 (0xfffe0001, 0xffff0000);
++  uint16x2_t va = v_umax16 ((uint16x2_t) {0xffff, 0},
++			    (uint16x2_t) {0xfffe, 1});
++  if (a != 0xffff0001)
++    abort ();
++  else if (va[0] != 0xffff
++           || va[1] != 1)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-umax8.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-umax8.c
+new file mode 100644
+index 0000000..23904b2
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-umax8.c
+@@ -0,0 +1,41 @@
++/* This is a test program for umax8 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int umax8 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__umax8 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++uint8x4_t v_umax8 (uint8x4_t ra, uint8x4_t rb)
++{
++  return __nds32__v_umax8 (ra, rb);
++}
++
++
++int
++main ()
++{
++  unsigned int a = umax8 (0xffff0000, 0xfffe0001);
++  uint8x4_t va = v_umax8 ((uint8x4_t) {0xff, 0xff, 0x01, 0x01},
++			  (uint8x4_t) {0xfe, 0xfe, 0x00, 0x00});
++
++  if (a != 0xffff0001)
++    abort ();
++  else if (va[0] != 0xff
++           || va[1] != 0xff
++           || va[2] != 1
++	   || va[3] != 1)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-umin16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-umin16.c
+new file mode 100644
+index 0000000..eec7058
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-umin16.c
+@@ -0,0 +1,37 @@
++/* This is a test program for umin16 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int umin16 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__umin16 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++uint16x2_t v_umin16 (uint16x2_t ra, uint16x2_t rb)
++{
++  return __nds32__v_umin16 (ra, rb);
++}
++
++int
++main ()
++{
++  unsigned int a = umin16 (0xfffe0001, 0xffff0000);
++  uint16x2_t va = v_umin16 ((uint16x2_t) {0x7fff, 0},
++			    (uint16x2_t) {0x7ffe, 1});
++  if (a != 0xfffe0000)
++    abort ();
++  else if (va[0] != 0x7ffe
++           || va[1] != 0)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-umsr64.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-umsr64.c
+new file mode 100644
+index 0000000..3fb20bf
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-umsr64.c
+@@ -0,0 +1,27 @@
++/* This is a test program for umsr64 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned long long umsr64 (unsigned long long t, unsigned int a, unsigned int b)
++{
++  return __nds32__umsr64 (t, a, b);
++}
++
++int
++main ()
++{
++  unsigned long long a = umsr64 (0x5000000300000000ull, 0x12345678, 0x23);
++
++  if (a != 0x5000000082D82D98ull)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-umul16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-umul16.c
+new file mode 100644
+index 0000000..ddfb6be
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-umul16.c
+@@ -0,0 +1,37 @@
++/* This is a test program for umul16 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned long long umul16 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__umul16 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++uint32x2_t v_umul16 (uint16x2_t ra, uint16x2_t rb)
++{
++  return __nds32__v_umul16 (ra, rb);
++}
++
++int
++main ()
++{
++  unsigned long long a = umul16 (0xffff0000, 0x0001ffff);
++  uint32x2_t va = v_umul16 ((uint16x2_t) {0xffff, 0},
++			    (uint16x2_t) {0x0001, 0xffff});
++  if (a != 0xffff00000000)
++    abort ();
++  else if (va[0] != 0xffff
++           || va[1] != 0)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-umulx16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-umulx16.c
+new file mode 100644
+index 0000000..c57d304
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-umulx16.c
+@@ -0,0 +1,37 @@
++/* This is a test program for umulx16 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned long long umulx16 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__umulx16 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++uint32x2_t v_umulx16 (uint16x2_t ra, uint16x2_t rb)
++{
++  return __nds32__v_umulx16 (ra, rb);
++}
++
++int
++main ()
++{
++  unsigned long long a = umulx16 (0xffff0000, 0xffff0001);
++  uint32x2_t va = v_umulx16 ((uint16x2_t) {0xffff, 0xffff},
++			     (uint16x2_t) {1, 0});
++  if (a != 0xffff00000000)
++    abort ();
++  else if (va[0] != 0
++           || va[1] != 0xffff)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-uradd16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-uradd16.c
+new file mode 100644
+index 0000000..82c7be7
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-uradd16.c
+@@ -0,0 +1,38 @@
++/* This is a test program for uradd16 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int uradd16 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__uradd16 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++uint16x2_t v_uradd16 (uint16x2_t ra, uint16x2_t rb)
++{
++  return __nds32__v_uradd16 (ra, rb);
++}
++
++int
++main ()
++{
++  unsigned int a = uradd16 (0x7fff7fff, 0x7fff7fff);
++  uint16x2_t va = v_uradd16 ((uint16x2_t) {0x8000, 0x4000},
++			     (uint16x2_t) {0x8000, 0x8000});
++
++  if (a != 0x7fff7fff)
++    abort ();
++  else if (va[0] != 0x8000
++	   || va[1] != 0x6000)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-uradd64.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-uradd64.c
+new file mode 100644
+index 0000000..51ee961
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-uradd64.c
+@@ -0,0 +1,27 @@
++/* This is a test program for uradd64 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned long long uradd64 (unsigned long long ra, unsigned long long rb)
++{
++  return __nds32__uradd64 (ra, rb);
++}
++
++int
++main ()
++{
++  unsigned long long a = uradd64 (0xf000000000000000ull, 0xf000000000000000ull);
++
++  if (a != 0xf000000000000000ull)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-uradd8.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-uradd8.c
+new file mode 100644
+index 0000000..d4f91d6
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-uradd8.c
+@@ -0,0 +1,40 @@
++/* This is a test program for uradd8 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int uradd8 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__uradd8 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++uint8x4_t v_uradd8 (uint8x4_t ra, uint8x4_t rb)
++{
++  return __nds32__v_uradd8 (ra, rb);
++}
++
++int
++main ()
++{
++  unsigned int a = uradd8 (0x11223344, 0x55667788);
++  uint8x4_t va = v_uradd8 ((uint8x4_t) {0x7f, 0x80, 0x40, 0xaa},
++			   (uint8x4_t) {0x7f, 0x80, 0x80, 0xaa});
++
++  if (a != 0x33445566)
++    abort ();
++  else if (va[0] != 0x7f
++	   || va[1] != 0x80
++	   || va[2] != 0x60
++	   || va[3] != 0xaa)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-uraddw.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-uraddw.c
+new file mode 100644
+index 0000000..9fc76b0
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-uraddw.c
+@@ -0,0 +1,27 @@
++/* This is a test program for uraddw instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int uraddw (unsigned int ra, unsigned int rb)
++{
++  return __nds32__uraddw (ra, rb);
++}
++
++unsigned int
++main ()
++{
++  unsigned int a = uraddw (0x80000000, 0x80000000);
++
++  if (a != 0x80000000)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-urcras16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-urcras16.c
+new file mode 100644
+index 0000000..1330374
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-urcras16.c
+@@ -0,0 +1,44 @@
++/* This is a test program for urcras16 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int urcras16 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__urcras16 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++uint16x2_t v_urcras16 (uint16x2_t ra, uint16x2_t rb)
++{
++  return __nds32__v_urcras16 (ra, rb);
++}
++
++int
++main ()
++{
++#ifdef __NDS32_EL__
++  uint16x2_t va_p = {0xffff, 0x8000};
++#else
++  uint16x2_t va_p = {0x7fff, 0};
++#endif
++
++  unsigned int a = urcras16 (0x7fff7fff, 0x80007fff);
++  uint16x2_t va = v_urcras16 ((uint16x2_t) {0x7fff, 0x8000},
++			      (uint16x2_t) {0x8000, 0x8000});
++
++  if (a != 0x7fffffff)
++    abort ();
++  else if (va[0] != va_p[0]
++	   || va[1] != va_p[1])
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-urcrsa16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-urcrsa16.c
+new file mode 100644
+index 0000000..806fa7a
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-urcrsa16.c
+@@ -0,0 +1,44 @@
++/* This is a test program for urcrsa16 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int urcrsa16 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__urcrsa16 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++uint16x2_t v_urcrsa16 (uint16x2_t ra, uint16x2_t rb)
++{
++  return __nds32__v_urcrsa16 (ra, rb);
++}
++
++int
++main ()
++{
++#ifdef __NDS32_EL__
++  uint16x2_t va_p = {0x8000, 0xffff};
++#else
++  uint16x2_t va_p = {0, 0x7fff};
++#endif
++
++  unsigned int a = urcrsa16 (0x7fff7fff, 0x7fff8000);
++  uint16x2_t va = v_urcrsa16 ((uint16x2_t) {0x8000, 0x7fff},
++			      (uint16x2_t) {0x8000, 0x8000});
++
++  if (a != 0xffff7fff)
++    abort ();
++  else if (va[0] != va_p[0]
++	   || va[1] != va_p[1])
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-ursub16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-ursub16.c
+new file mode 100644
+index 0000000..9e87234
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-ursub16.c
+@@ -0,0 +1,38 @@
++/* This is a test program for ursub16 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int ursub16 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__ursub16 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++uint16x2_t v_ursub16 (uint16x2_t ra, uint16x2_t rb)
++{
++  return __nds32__v_ursub16 (ra, rb);
++}
++
++int
++main ()
++{
++  unsigned int a = ursub16 (0x7fff7fff, 0x80008000);
++  uint16x2_t va = v_ursub16 ((uint16x2_t) {0x8000, 0x8000},
++			     (uint16x2_t) {0x7fff, 0x4000});
++
++  if (a != 0xffffffff)
++    abort ();
++  else if (va[0] != 0
++	   || va[1] != 0x2000)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-ursub64.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-ursub64.c
+new file mode 100644
+index 0000000..e1f7b15
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-ursub64.c
+@@ -0,0 +1,27 @@
++/* This is a test program for ursub64 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned long long ursub64 (unsigned long long ra, unsigned long long rb)
++{
++  return __nds32__ursub64 (ra, rb);
++}
++
++int
++main ()
++{
++  unsigned long long a = ursub64 (0xeull, 0xfull);
++
++  if (a != 0xffffffffffffffffull)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-ursub8.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-ursub8.c
+new file mode 100644
+index 0000000..f5e3ff6
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-ursub8.c
+@@ -0,0 +1,40 @@
++/* This is a test program for ursub8 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int ursub8 (unsigned int ra, unsigned int rb)
++{
++  return __nds32__ursub8 (ra, rb);
++}
++
++static __attribute__ ((noinline))
++uint8x4_t v_ursub8 (uint8x4_t ra, uint8x4_t rb)
++{
++  return __nds32__v_ursub8 (ra, rb);
++}
++
++int
++main ()
++{
++  unsigned int a = ursub8 (0x55667788, 0x11223344);
++  uint8x4_t va = v_ursub8 ((uint8x4_t) {0x7f, 0x80, 0x80, 0xaa},
++			   (uint8x4_t) {0x80, 0x7f, 0x40, 0xaa});
++
++  if (a != 0x22222222)
++    abort ();
++  else if (va[0] != 0xff
++	   || va[1] != 0
++	   || va[2] != 0x20
++	   || va[3] != 0)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-ursubw.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-ursubw.c
+new file mode 100644
+index 0000000..b12afb0
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-ursubw.c
+@@ -0,0 +1,27 @@
++/* This is a test program for ursubw instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int ursubw (unsigned int ra,unsigned int rb)
++{
++  return __nds32__ursubw (ra, rb);
++}
++
++int
++main ()
++{
++  unsigned int a = ursubw (0x80000000, 0x40000000);
++
++  if (a != 0x20000000)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-wext.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-wext.c
+new file mode 100644
+index 0000000..d86fb8f
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-wext.c
+@@ -0,0 +1,27 @@
++/* This is a test program for wext instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int wext (long long ra, unsigned int rb)
++{
++  return __nds32__wext (ra, rb);
++}
++
++int
++main ()
++{
++  unsigned int a = wext (0x1234ffff0000ll, 16);
++
++  if (a != 0x1234ffff)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-wexti.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-wexti.c
+new file mode 100644
+index 0000000..8f09423
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-wexti.c
+@@ -0,0 +1,27 @@
++/* This is a test program for wexti instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int wexti (long long ra)
++{
++  return __nds32__wext (ra, 16);
++}
++
++int
++main ()
++{
++  unsigned int a = wexti (0x1234ffff0000ll);
++
++  if (a != 0x1234ffff)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-zunpkd810.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-zunpkd810.c
+new file mode 100644
+index 0000000..7b3aebb
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-zunpkd810.c
+@@ -0,0 +1,43 @@
++/* This is a test program for zunpkd810 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int zunpkd810 (unsigned int a)
++{
++  return __nds32__zunpkd810 (a);
++}
++
++static __attribute__ ((noinline))
++uint16x2_t v_zunpkd810 (uint8x4_t a)
++{
++  return __nds32__v_zunpkd810 (a);
++}
++
++int
++main ()
++{
++#ifdef __NDS32_EL__
++  int16x2_t va_p = {0xf8, 0x56};
++#else
++  int16x2_t va_p = {0, 0};
++#endif
++
++  unsigned int a = zunpkd810 (0x000056f8);
++  uint16x2_t va = v_zunpkd810 ((uint8x4_t) {0xf8, 0x56, 0, 0});
++
++  if (a != 0x005600f8)
++    abort ();
++  else if (va[0] != va_p[0]
++           || va[1] != va_p[1])
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-zunpkd820.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-zunpkd820.c
+new file mode 100644
+index 0000000..dc37a3d
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-zunpkd820.c
+@@ -0,0 +1,43 @@
++/* This is a test program for zunpkd820 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int zunpkd820 (unsigned int a)
++{
++  return __nds32__zunpkd820 (a);
++}
++
++static __attribute__ ((noinline))
++uint16x2_t v_zunpkd820 (uint8x4_t a)
++{
++  return __nds32__v_zunpkd820 (a);
++}
++
++int
++main ()
++{
++#ifdef __NDS32_EL__
++  int16x2_t va_p = {0xf8, 0x34};
++#else
++  int16x2_t va_p = {0, 0};
++#endif
++
++  unsigned int a = zunpkd820 (0x003400f8);
++  uint16x2_t va = v_zunpkd820 ((uint8x4_t) {0xf8, 0, 0x34, 0});
++
++  if (a != 0x003400f8)
++    abort ();
++  else if (va[0] != va_p[0]
++           || va[1] != va_p[1])
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-zunpkd830.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-zunpkd830.c
+new file mode 100644
+index 0000000..8f5a224
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-zunpkd830.c
+@@ -0,0 +1,37 @@
++/* This is a test program for zunpkd830 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int zunpkd830 (unsigned int a)
++{
++  return __nds32__zunpkd830 (a);
++}
++
++static __attribute__ ((noinline))
++uint16x2_t v_zunpkd830 (uint8x4_t a)
++{
++  return __nds32__v_zunpkd830 (a);
++}
++
++int
++main ()
++{
++  unsigned int a = zunpkd830 (0x120000f8);
++  uint16x2_t va = v_zunpkd830 ((uint8x4_t) { 0xf8, 0x00, 0, 0x12});
++
++  if (a != 0x001200f8)
++    abort ();
++  else if (va[0] != 0x00f8
++           || va[1] != 0x0012)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-zunpkd831.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-zunpkd831.c
+new file mode 100644
+index 0000000..6878cd3
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-zunpkd831.c
+@@ -0,0 +1,43 @@
++/* This is a test program for zunpkd831 instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++static __attribute__ ((noinline))
++unsigned int zunpkd831 (unsigned int a)
++{
++  return __nds32__zunpkd831 (a);
++}
++
++static __attribute__ ((noinline))
++uint16x2_t v_zunpkd831 (uint8x4_t a)
++{
++  return __nds32__v_zunpkd831 (a);
++}
++
++int
++main ()
++{
++#ifdef __NDS32_EL__
++  int16x2_t va_p = {0xf8, 0x12};
++#else
++  int16x2_t va_p = {0, 0};
++#endif
++
++  unsigned int a = zunpkd831 (0x1200f800);
++  uint16x2_t va = v_zunpkd831 ((uint8x4_t) {0, 0xf8, 0, 0x12});
++
++  if (a != 0x001200f8)
++    abort ();
++  else if (va[0] != va_p[0]
++           || va[1] != va_p[1])
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-fpu-fcpyd.c b/gcc/testsuite/gcc.target/nds32/builtin-fpu-fcpyd.c
+new file mode 100644
+index 0000000..4ee7e5e
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-fpu-fcpyd.c
+@@ -0,0 +1,21 @@
++/* This is a test program for fcpysd instruction.  */
++
++/* { dg-do run } */
++/* { dg-options "-O1" } */
++/* { dg-require-effective-target nds32_ext_fpu_dp } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++int
++main ()
++{
++  double da = -1.5;
++  double db = 1.3;
++  double dr = __nds32__fcpysd (da, db);
++
++  if (dr != 1.5)
++    abort ();
++  else
++    exit (0);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-fpu-fcpynd.c b/gcc/testsuite/gcc.target/nds32/builtin-fpu-fcpynd.c
+new file mode 100644
+index 0000000..804410b
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-fpu-fcpynd.c
+@@ -0,0 +1,21 @@
++/* This is a test program for fcpynsd instruction.  */
++
++/* { dg-do run } */
++/* { dg-options "-O1" } */
++/* { dg-require-effective-target nds32_ext_fpu_dp } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++int
++main ()
++{
++  double da = -1.5;
++  double db = -1.3;
++  double dr =  __nds32__fcpynsd (da, db);
++
++  if (dr != 1.5)
++    abort ();
++  else
++    exit (0);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-fpu-fcpyns.c b/gcc/testsuite/gcc.target/nds32/builtin-fpu-fcpyns.c
+new file mode 100644
+index 0000000..0d86734
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-fpu-fcpyns.c
+@@ -0,0 +1,21 @@
++/* This is a test program for fcpynss instruction.  */
++
++/* { dg-do run } */
++/* { dg-options "-O1" } */
++/* { dg-require-effective-target nds32_ext_fpu_sp } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++int
++main ()
++{
++  float a = -1.5;
++  float b = -1.3;
++  float r = __nds32__fcpynss (a, b);
++
++  if (r != 1.5)
++    abort ();
++  else
++    exit (0);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-fpu-fcpys.c b/gcc/testsuite/gcc.target/nds32/builtin-fpu-fcpys.c
+new file mode 100644
+index 0000000..4bccf57
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-fpu-fcpys.c
+@@ -0,0 +1,21 @@
++/* This is a test program for fcpyss instruction.  */
++
++/* { dg-do run } */
++/* { dg-options "-O1" } */
++/* { dg-require-effective-target nds32_ext_fpu_sp } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++int
++main ()
++{
++  float a = -1.5;
++  float b = 1.3;
++  float r = __nds32__fcpyss (a, b);
++
++  if (r != 1.5)
++    abort ();
++  else
++    exit (0);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-fpu-fmfcfg.c b/gcc/testsuite/gcc.target/nds32/builtin-fpu-fmfcfg.c
+new file mode 100644
+index 0000000..83e65ed
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-fpu-fmfcfg.c
+@@ -0,0 +1,23 @@
++/* This is a test program for fmfcfg instruction.  */
++
++/* { dg-do run } */
++/* { dg-options "-O1" } */
++/* { dg-require-effective-target nds32_ext_fpu } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++int
++main ()
++{
++  unsigned int intrinsic_fmfcfg = -1;
++  unsigned int inline_assemble_fmfcfg = -2;
++
++  intrinsic_fmfcfg = __nds32__fmfcfg ();
++  __asm volatile ("fmfcfg %0" : "=r" (inline_assemble_fmfcfg));
++
++  if (intrinsic_fmfcfg == inline_assemble_fmfcfg)
++    exit (0);
++  else
++    abort ();
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-fpu-fpcsr.c b/gcc/testsuite/gcc.target/nds32/builtin-fpu-fpcsr.c
+new file mode 100644
+index 0000000..787b430
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-fpu-fpcsr.c
+@@ -0,0 +1,35 @@
++/* This is a test program for fmtcsr/fmfcsr instruction.  */
++
++/* { dg-do run } */
++/* { dg-options "-O1" } */
++/* { dg-require-effective-target nds32_ext_fpu } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++int
++main ()
++{
++  unsigned int fpcsr;
++  unsigned int real_fpcsr;
++
++  /* Keep real fpcsr value.  */
++  real_fpcsr = __nds32__fmfcsr ();
++
++  /* write fpcsr */
++  fpcsr = 3;
++  __nds32__fmtcsr (fpcsr);
++
++  /* read fpcsr */
++  fpcsr = 0;
++  fpcsr = __nds32__fmfcsr ();
++  fpcsr = fpcsr & 0x00001fff;
++
++  /* Recover fpcsr value.  */
++  __nds32__fmtcsr (real_fpcsr);
++
++  if (fpcsr == 3)
++    exit (0);
++  else
++   abort ();
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-get-lp.c b/gcc/testsuite/gcc.target/nds32/builtin-get-lp.c
+new file mode 100644
+index 0000000..80b4921
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-get-lp.c
+@@ -0,0 +1,22 @@
++/* Verify the return address with builtin function.  */
++
++/* { dg-do run } */
++/* { dg-options "-O1" } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++int main()
++{
++  unsigned int intrinsic_lp = -1;
++  unsigned int inline_assemble_lp = -2;
++
++  intrinsic_lp = __nds32__return_address ();
++
++  __asm volatile ("mov55 %0, $lp" : "=r" (inline_assemble_lp));
++
++  if (intrinsic_lp != inline_assemble_lp)
++    abort ();
++  else
++    exit (0);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-isb.c b/gcc/testsuite/gcc.target/nds32/builtin-isb.c
+deleted file mode 100644
+index e65061b..0000000
+--- a/gcc/testsuite/gcc.target/nds32/builtin-isb.c
++++ /dev/null
+@@ -1,11 +0,0 @@
+-/* Verify that we generate isb instruction with builtin function.  */
+-
+-/* { dg-do compile }  */
+-/* { dg-options "-O0" }  */
+-/* { dg-final { scan-assembler "\\tisb" } }  */
+-
+-void
+-test (void)
+-{
+-  __builtin_nds32_isb ();
+-}
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-isync.c b/gcc/testsuite/gcc.target/nds32/builtin-isync.c
+deleted file mode 100644
+index 3160e4a..0000000
+--- a/gcc/testsuite/gcc.target/nds32/builtin-isync.c
++++ /dev/null
+@@ -1,12 +0,0 @@
+-/* Verify that we generate isync instruction with builtin function.  */
+-
+-/* { dg-do compile }  */
+-/* { dg-options "-O0" }  */
+-/* { dg-final { scan-assembler "\\tisync" } }  */
+-
+-void
+-test (void)
+-{
+-  int *addr = (int *) 0x53000000;
+-  __builtin_nds32_isync (addr);
+-}
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-mfsr-mtsr.c b/gcc/testsuite/gcc.target/nds32/builtin-mfsr-mtsr.c
+deleted file mode 100644
+index db4c558..0000000
+--- a/gcc/testsuite/gcc.target/nds32/builtin-mfsr-mtsr.c
++++ /dev/null
+@@ -1,17 +0,0 @@
+-/* Verify that we generate mfsr/mtsr instruction with builtin function.  */
+-
+-/* { dg-do compile }  */
+-/* { dg-options "-O0" }  */
+-/* { dg-final { scan-assembler "\\tmfsr" } }  */
+-/* { dg-final { scan-assembler "\\tmtsr" } }  */
+-
+-#include <nds32_intrinsic.h>
+-
+-void
+-test (void)
+-{
+-  int ipsw_value;
+-
+-  ipsw_value = __builtin_nds32_mfsr (__NDS32_REG_IPSW__);
+-  __builtin_nds32_mtsr (ipsw_value, __NDS32_REG_IPSW__);
+-}
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-mfusr-mtusr.c b/gcc/testsuite/gcc.target/nds32/builtin-mfusr-mtusr.c
+deleted file mode 100644
+index 3cfaab9..0000000
+--- a/gcc/testsuite/gcc.target/nds32/builtin-mfusr-mtusr.c
++++ /dev/null
+@@ -1,17 +0,0 @@
+-/* Verify that we generate mfusr/mtusr instruction with builtin function.  */
+-
+-/* { dg-do compile }  */
+-/* { dg-options "-O0" }  */
+-/* { dg-final { scan-assembler "\\tmfusr" } }  */
+-/* { dg-final { scan-assembler "\\tmtusr" } }  */
+-
+-#include <nds32_intrinsic.h>
+-
+-void
+-test (void)
+-{
+-  int itype_value;
+-
+-  itype_value = __builtin_nds32_mfusr (__NDS32_REG_ITYPE__);
+-  __builtin_nds32_mtusr (itype_value, __NDS32_REG_ITYPE__);
+-}
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-rotr.c b/gcc/testsuite/gcc.target/nds32/builtin-rotr.c
+new file mode 100644
+index 0000000..a295cb2
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-rotr.c
+@@ -0,0 +1,19 @@
++/* This is a test program for rotr instruction.  */
++
++/* { dg-do run } */
++/* { dg-options "-O0" } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++int
++main ()
++{
++  unsigned int a = 1;
++  a = __nds32__rotr (a, 30);
++
++  if (a != 4)
++    abort ();
++  else
++    exit (0);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-setgie-dis.c b/gcc/testsuite/gcc.target/nds32/builtin-setgie-dis.c
+deleted file mode 100644
+index 2dceed9..0000000
+--- a/gcc/testsuite/gcc.target/nds32/builtin-setgie-dis.c
++++ /dev/null
+@@ -1,11 +0,0 @@
+-/* Verify that we generate setgie.d instruction with builtin function.  */
+-
+-/* { dg-do compile }  */
+-/* { dg-options "-O0" }  */
+-/* { dg-final { scan-assembler "\\tsetgie.d" } }  */
+-
+-void
+-test (void)
+-{
+-  __builtin_nds32_setgie_dis ();
+-}
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-setgie-en.c b/gcc/testsuite/gcc.target/nds32/builtin-setgie-en.c
+deleted file mode 100644
+index 8928870..0000000
+--- a/gcc/testsuite/gcc.target/nds32/builtin-setgie-en.c
++++ /dev/null
+@@ -1,11 +0,0 @@
+-/* Verify that we generate setgie.e instruction with builtin function.  */
+-
+-/* { dg-do compile }  */
+-/* { dg-options "-O0" }  */
+-/* { dg-final { scan-assembler "\\tsetgie.e" } }  */
+-
+-void
+-test (void)
+-{
+-  __builtin_nds32_setgie_en ();
+-}
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-setgie_mtsr_mfsr.c b/gcc/testsuite/gcc.target/nds32/builtin-setgie_mtsr_mfsr.c
+new file mode 100644
+index 0000000..b353909
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-setgie_mtsr_mfsr.c
+@@ -0,0 +1,43 @@
++/* This is a test program for checking gie with
++   mtsr/mfsr instruction.  */
++
++/* { dg-do run } */
++/* { dg-options "-O0" } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++int
++main ()
++{
++  unsigned int psw;
++  unsigned int gie;
++  unsigned int pfm_ctl;
++  unsigned int real_psw;
++
++  /* Keep PSW value.  */
++  real_psw = __nds32__mfsr (NDS32_SR_PSW);
++
++  __nds32__setgie_en ();
++  __nds32__dsb(); /* This is needed for waiting pipeline.  */
++  psw = __nds32__mfsr (NDS32_SR_PSW);
++
++  gie = psw & 0x00000001;
++
++  if (gie != 1)
++    abort ();
++
++  psw = psw & 0xFFFFFFFE;
++  __nds32__mtsr (psw, NDS32_SR_PSW);
++  __nds32__dsb(); /* This is needed for waiting pipeline.  */
++  psw = __nds32__mfsr (NDS32_SR_PSW);
++  gie = psw & 0x00000001;
++
++  /* Recover PSW value.  */
++  __nds32__mtsr (real_psw, NDS32_SR_PSW);
++
++  if (gie != 0)
++    abort ();
++  else
++   exit (0);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-sp.c b/gcc/testsuite/gcc.target/nds32/builtin-sp.c
+new file mode 100644
+index 0000000..2e5499d
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-sp.c
+@@ -0,0 +1,33 @@
++/* This is a test program for sp intrinsic usage.
++   Because we want to use frame pointer to access local variable,
++   we need to use -fno-omit-frame-pointer to make sure the existence
++   of frame pointer.  */
++
++/* { dg-do run } */
++/* { dg-options "-O0 -fno-omit-frame-pointer" } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++int
++main ()
++{
++  unsigned int old_sp, new_sp;
++
++  old_sp = __nds32__get_current_sp ();
++  new_sp = old_sp - 4;
++  __nds32__set_current_sp (new_sp);
++  new_sp = __nds32__get_current_sp ();
++
++  if (new_sp != (old_sp - 4))
++    abort ();
++
++  new_sp = new_sp + 4;
++  __nds32__set_current_sp (new_sp);
++  new_sp = __nds32__get_current_sp ();
++
++  if (new_sp != old_sp)
++    abort ();
++  else
++    exit (0);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-string-ffb.c b/gcc/testsuite/gcc.target/nds32/builtin-string-ffb.c
+new file mode 100644
+index 0000000..cf02434
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-string-ffb.c
+@@ -0,0 +1,28 @@
++/* This is a test program for ffb instruction.  */
++
++/* { dg-do run } */
++/* { dg-options "-O1" } */
++/* { dg-require-effective-target nds32_ext_string } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++int
++main ()
++{
++  unsigned int a = 0x1b2a3d4c;
++  unsigned int b = 0x0000003d;
++  int r;
++
++  r =  __nds32__ffb (a, b);
++
++#ifdef __NDS32_EL__
++  if (r != -3)
++    abort ();
++#else
++  if (r != -2)
++    abort ();
++#endif
++
++  exit (0);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-string-ffmism.c b/gcc/testsuite/gcc.target/nds32/builtin-string-ffmism.c
+new file mode 100644
+index 0000000..b2fb008
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-string-ffmism.c
+@@ -0,0 +1,28 @@
++/* This is a test program for ffmism instruction.  */
++
++/* { dg-do run } */
++/* { dg-options "-O1" } */
++/* { dg-require-effective-target nds32_ext_string } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++int
++main ()
++{
++  unsigned int a = 0x1b2a3d4c;
++  unsigned int b = 0x112a334c;
++  int r;
++
++  r = __nds32__ffmism (a, b);
++
++#ifdef __NDS32_EL__
++  if (r != -3)
++    abort ();
++#else
++  if (r != -4)
++    abort ();
++#endif
++
++  exit (0);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-string-flmism.c b/gcc/testsuite/gcc.target/nds32/builtin-string-flmism.c
+new file mode 100644
+index 0000000..105fce5
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-string-flmism.c
+@@ -0,0 +1,28 @@
++/* This is a test program for flmism instruction.  */
++
++/* { dg-do run } */
++/* { dg-options "-O1" } */
++/* { dg-require-effective-target nds32_ext_string } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++int
++main ()
++{
++  unsigned int a = 0x1b2a3d4c;
++  unsigned int b = 0x112a334c;
++  int r;
++
++  r = __nds32__flmism (a, b);
++
++#ifdef __NDS32_EL__
++  if (r != -1)
++    abort ();
++#else
++  if (r != -2)
++    abort ();
++#endif
++
++  exit (0);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-unaligned-s16x2.c b/gcc/testsuite/gcc.target/nds32/builtin-unaligned-s16x2.c
+new file mode 100644
+index 0000000..5a2e8b7
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-unaligned-s16x2.c
+@@ -0,0 +1,36 @@
++/* This is a test program for smbb instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++int
++main (void)
++{
++  char data[] = {0x55,0x66,0x77,0x88};
++  short* short_data = (short*)& data[1];
++  int16x2_t test_short = {0x1111, 0xaaaa};
++  int16x2_t vecdata =  __nds32__get_unaligned_s16x2 (short_data);
++
++#ifdef __NDS32_EL__
++  if (vecdata[0] != 0x7766)
++    abort ();
++#else
++  if (vecdata[0] != 0x6677)
++    abort ();
++#endif
++
++  __nds32__put_unaligned_s16x2 (short_data, test_short);
++  vecdata =  __nds32__get_unaligned_s16x2 (short_data);
++
++  if (vecdata[0] != 0x1111
++      & vecdata[1] != 0xaaaa)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-unaligned-s8x4.c b/gcc/testsuite/gcc.target/nds32/builtin-unaligned-s8x4.c
+new file mode 100644
+index 0000000..f6cb4c9
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-unaligned-s8x4.c
+@@ -0,0 +1,36 @@
++/* This is a test program for smbb instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++int
++main (void)
++{
++  char data[] = {0x55,0x66,0x77,0x88};
++  char* char_data = (char*)& data[1];
++  int8x4_t test_char = {0x11, 0x22, 0xaa, 0xbb};
++  int8x4_t vecdata =  __nds32__get_unaligned_s8x4 (char_data);
++
++#ifdef __NDS32_EL__
++  if (vecdata[0] != 0x66)
++    abort ();
++#else
++  if (vecdata[0] != 0x66)
++    abort ();
++#endif
++
++  __nds32__put_unaligned_s8x4 (char_data, test_char);
++  vecdata =  __nds32__get_unaligned_s8x4 (char_data);
++
++  if (vecdata[0] != 0x11
++      & vecdata[3] != 0xaa)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-unaligned-u16x2.c b/gcc/testsuite/gcc.target/nds32/builtin-unaligned-u16x2.c
+new file mode 100644
+index 0000000..63ebd40
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-unaligned-u16x2.c
+@@ -0,0 +1,36 @@
++/* This is a test program for smbb instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++int
++main (void)
++{
++  unsigned char data[] = {0x55,0x66,0x77,0x88};
++  unsigned short* short_data = (unsigned short*)& data[1];
++  uint16x2_t test_short = {0x1111, 0xaaaa};
++  uint16x2_t vecdata =  __nds32__get_unaligned_u16x2 (short_data);
++
++#ifdef __NDS32_EL__
++  if (vecdata[0] != 0x7766)
++    abort ();
++#else
++  if (vecdata[0] != 0x6677)
++    abort ();
++#endif
++
++  __nds32__put_unaligned_u16x2 (short_data, test_short);
++  vecdata =  __nds32__get_unaligned_u16x2 (short_data);
++
++  if (vecdata[0] != 0x1111
++      & vecdata[1] != 0xaaaa)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-unaligned-u8x4.c b/gcc/testsuite/gcc.target/nds32/builtin-unaligned-u8x4.c
+new file mode 100644
+index 0000000..7b48274
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-unaligned-u8x4.c
+@@ -0,0 +1,36 @@
++/* This is a test program for smbb instruction.  */
++
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++#ifdef __NDS32_EXT_DSP__
++int
++main (void)
++{
++  char data[] = {0x55,0x66,0x77,0x88};
++  unsigned char* char_data = (char*)& data[1];
++  uint8x4_t test_char = {0x11, 0x22, 0xaa, 0xbb};
++  uint8x4_t vecdata =  __nds32__get_unaligned_u8x4 (char_data);
++
++#ifdef __NDS32_EL__
++  if (vecdata[0] != 0x66)
++    abort ();
++#else
++  if (vecdata[0] != 0x66)
++    abort ();
++#endif
++
++  __nds32__put_unaligned_u8x4 (char_data, test_char);
++  vecdata =  __nds32__get_unaligned_u8x4 (char_data);
++
++  if (vecdata[0] != 0x11
++      & vecdata[3] != 0xaa)
++    abort ();
++  else
++    exit (0);
++}
++#else
++int main(){return 0;}
++#endif
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-unaligned_dw.c b/gcc/testsuite/gcc.target/nds32/builtin-unaligned_dw.c
+new file mode 100644
+index 0000000..42640b4
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-unaligned_dw.c
+@@ -0,0 +1,31 @@
++/* This is a test program for unaligned double word access.  */
++
++/* { dg-do run } */
++/* { dg-options "-O0 -std=c99" } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++int
++main ()
++{
++  unsigned char data[] = {0x55, 0x66, 0x77, 0x88, 0xAA,
++			  0xBB, 0xCC, 0xDD, 0xEE, 0xFF};
++  unsigned long long* long_long_data = (unsigned long long*) & data[1];
++  unsigned long long test_long_long = 0x1122334455667788LL;
++
++#ifdef __NDS32_EL__
++  if (__nds32__get_unaligned_dw (long_long_data) != 0xEEDDCCBBAA887766LL)
++    abort ();
++#else
++  if (__nds32__get_unaligned_dw (long_long_data) != 0x667788AABBCCDDEELL)
++    abort ();
++#endif
++
++  __nds32__put_unaligned_dw (long_long_data, test_long_long);
++
++  if (__nds32__get_unaligned_dw (long_long_data) != 0x1122334455667788LL)
++    abort ();
++  else
++    exit (0);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-unaligned_hw.c b/gcc/testsuite/gcc.target/nds32/builtin-unaligned_hw.c
+new file mode 100644
+index 0000000..f9e1ceb
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-unaligned_hw.c
+@@ -0,0 +1,30 @@
++/* This is a test program for unaligned half word access.  */
++
++/* { dg-do run } */
++/* { dg-options "-O0" } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++int
++main ()
++{
++  unsigned char data[] = {0x55,0x66,0x77,0x88};
++  unsigned short* short_data = (unsigned short*)& data[1];
++  unsigned short test_short = 0x5566;
++
++#ifdef __NDS32_EL__
++  if (__nds32__get_unaligned_hw (short_data) != 0x7766)
++    abort ();
++#else
++  if (__nds32__get_unaligned_hw (short_data) != 0x6677)
++    abort ();
++#endif
++
++  __nds32__put_unaligned_hw (short_data, test_short);
++
++  if (__nds32__get_unaligned_hw (short_data) != 0x5566)
++    abort ();
++  else
++    exit (0);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-unaligned_w.c b/gcc/testsuite/gcc.target/nds32/builtin-unaligned_w.c
+new file mode 100644
+index 0000000..40d8711
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-unaligned_w.c
+@@ -0,0 +1,30 @@
++/* This is a test program for unaligned word access.  */
++
++/* { dg-do run } */
++/* { dg-options "-O0 -std=c99" } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++int
++main ()
++{
++  unsigned char data[] = {0x55,0x66,0x77,0x88,0xAA,0xBB,0xCC,0xDD};
++  unsigned int* int_data = (unsigned int*)& data[1];
++  unsigned int test_int = 0x55667788;
++
++#ifdef __NDS32_EL__
++  if (__nds32__get_unaligned_w (int_data) != 0xAA887766)
++    abort ();
++#else
++  if (__nds32__get_unaligned_w (int_data) != 0x667788AA)
++    abort ();
++#endif
++
++  __nds32__put_unaligned_w (int_data, test_int);
++
++  if (__nds32__get_unaligned_w (int_data) != 0x55667788)
++    abort ();
++  else
++    exit (0);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/builtin-wsbh.c b/gcc/testsuite/gcc.target/nds32/builtin-wsbh.c
+new file mode 100644
+index 0000000..1cee2ed
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/builtin-wsbh.c
+@@ -0,0 +1,21 @@
++/* This is a test program for wsbh instruction.  */
++
++/* { dg-do run } */
++/* { dg-options "-O0" } */
++
++#include <nds32_intrinsic.h>
++#include <stdlib.h>
++
++int
++main ()
++{
++  unsigned int a = 0x03020100;
++  unsigned int b;
++
++  b = __nds32__wsbh (a);
++
++  if (b != 0x02030001)
++    abort ();
++  else
++    exit (0);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-all-pending.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-all-pending.c
+new file mode 100644
+index 0000000..0e57831
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-all-pending.c
+@@ -0,0 +1,11 @@
++/* { dg-do compile } */
++/* { dg-options "-O1" } */
++
++#include <nds32_intrinsic.h>
++
++int
++main (void)
++{
++  int a = __nds32__get_all_pending_int ();
++  return a;
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-cctl.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-cctl.c
+new file mode 100644
+index 0000000..2af55f5
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-cctl.c
+@@ -0,0 +1,29 @@
++/* Verify that we generate cache control instruction with builtin function.  */
++
++/* { dg-do compile } */
++/* { dg-options "-O0" } */
++/* { dg-final { scan-assembler "L1D_VA_INVAL" } } */
++/* { dg-final { scan-assembler "L1D_VA_INVAL" } } */
++/* { dg-final { scan-assembler "L1D_INVALALL" } } */
++/* { dg-final { scan-assembler "L1D_IX_WWD" } } */
++/* { dg-final { scan-assembler "L1D_IX_RWD" } } */
++/* { dg-final { scan-assembler "PFM_CTL" } } */
++/* { dg-final { scan-assembler "PFM_CTL" } } */
++
++#include <nds32_intrinsic.h>
++
++void
++test (void)
++{
++  unsigned int va = 0;
++
++  __nds32__cctlva_lck (NDS32_CCTL_L1D_VA_FILLCK, &va);
++  __nds32__cctlidx_wbinval (NDS32_CCTL_L1D_IX_WBINVAL, va);
++  __nds32__cctlva_wbinval_alvl (NDS32_CCTL_L1D_VA_INVAL, &va);
++  __nds32__cctlva_wbinval_one_lvl (NDS32_CCTL_L1D_VA_INVAL, &va);
++  __nds32__cctl_l1d_invalall ();
++  __nds32__cctlidx_write (NDS32_CCTL_L1D_IX_WWD, va, 1);
++  __nds32__cctlidx_read (NDS32_CCTL_L1D_IX_RWD, 1);
++  __nds32__mtusr (0, NDS32_USR_PFM_CTL);
++  __nds32__mfusr (NDS32_USR_PFM_CTL);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-clr-pending-hw.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-clr-pending-hw.c
+new file mode 100644
+index 0000000..fce90e9
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-clr-pending-hw.c
+@@ -0,0 +1,16 @@
++/* { dg-do compile } */
++/* { dg-options "-O1" } */
++
++#include <nds32_intrinsic.h>
++
++void
++main (void)
++{
++ __nds32__clr_pending_hwint (NDS32_INT_H0);
++ __nds32__clr_pending_hwint (NDS32_INT_H1);
++ __nds32__clr_pending_hwint (NDS32_INT_H2);
++
++ __nds32__clr_pending_hwint (NDS32_INT_H15);
++ __nds32__clr_pending_hwint (NDS32_INT_H16);
++ __nds32__clr_pending_hwint (NDS32_INT_H31);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-clr-pending.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-clr-pending.c
+new file mode 100644
+index 0000000..08e1dd0
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-clr-pending.c
+@@ -0,0 +1,10 @@
++/* { dg-do compile } */
++/* { dg-options "-O1" } */
++
++#include <nds32_intrinsic.h>
++
++void
++main (void)
++{
++  __nds32__clr_pending_swint ();
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-disable.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-disable.c
+new file mode 100644
+index 0000000..a3a1f44
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-disable.c
+@@ -0,0 +1,13 @@
++/* { dg-do compile } */
++/* { dg-options "-O1" } */
++
++#include <nds32_intrinsic.h>
++
++void
++main (void)
++{
++  __nds32__disable_int (NDS32_INT_H15);
++  __nds32__disable_int (NDS32_INT_H16);
++  __nds32__disable_int (NDS32_INT_H31);
++  __nds32__disable_int (NDS32_INT_SWI);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-dpref.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-dpref.c
+new file mode 100644
+index 0000000..38cf822
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-dpref.c
+@@ -0,0 +1,24 @@
++/* Verify that we generate data prefetch instruction with builtin function.  */
++
++/* { dg-do compile } */
++/* { dg-options "-O0" } */
++/* { dg-final { scan-assembler "dpref\\tSRD" } } */
++/* { dg-final { scan-assembler "dpref\\tSRD" } } */
++/* { dg-final { scan-assembler "dpref\\tSRD" } } */
++/* { dg-final { scan-assembler "dpref\\tSRD" } } */
++
++#include <nds32_intrinsic.h>
++
++void
++test (void)
++{
++  unsigned char dpref_q = 0;
++  unsigned short dpref_h = 0;
++  unsigned int dpref_w = 0;
++  unsigned long long dpref_dw = 0;
++
++  __nds32__dpref_qw (&dpref_q, 0, NDS32_DPREF_SRD);
++  __nds32__dpref_hw (&dpref_h, 0, NDS32_DPREF_SRD);
++  __nds32__dpref_w (&dpref_w, 0, NDS32_DPREF_SRD);
++  __nds32__dpref_dw (&dpref_dw, 0, NDS32_DPREF_SRD);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-enable.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-enable.c
+new file mode 100644
+index 0000000..e18ed7a
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-enable.c
+@@ -0,0 +1,13 @@
++/* { dg-do compile } */
++/* { dg-options "-O1" } */
++
++#include <nds32_intrinsic.h>
++
++void
++main (void)
++{
++  __nds32__enable_int (NDS32_INT_H15);
++  __nds32__enable_int (NDS32_INT_H16);
++  __nds32__enable_int (NDS32_INT_H31);
++  __nds32__enable_int (NDS32_INT_SWI);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-get-pending-int.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-get-pending-int.c
+new file mode 100644
+index 0000000..4ced0a5
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-get-pending-int.c
+@@ -0,0 +1,14 @@
++/* { dg-do compile } */
++/* { dg-options "-O1" } */
++
++#include <nds32_intrinsic.h>
++
++int
++main (void)
++{
++  int a = __nds32__get_pending_int (NDS32_INT_H15);
++  int b = __nds32__get_pending_int (NDS32_INT_SWI);
++  int c = __nds32__get_pending_int (NDS32_INT_H16);
++
++  return a + b + c;
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-get-trig.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-get-trig.c
+new file mode 100644
+index 0000000..a394a60
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-get-trig.c
+@@ -0,0 +1,14 @@
++/* { dg-do compile } */
++/* { dg-options "-O1" } */
++
++#include <nds32_intrinsic.h>
++
++int
++main (void)
++{
++  int a = __nds32__get_trig_type (NDS32_INT_H0);
++  int b = __nds32__get_trig_type (NDS32_INT_H15);
++  int c = __nds32__get_trig_type (NDS32_INT_H16);
++  int d = __nds32__get_trig_type (NDS32_INT_H31);
++  return a + b + c + d;
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-isb.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-isb.c
+new file mode 100644
+index 0000000..c699966
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-isb.c
+@@ -0,0 +1,13 @@
++/* Verify that we generate isb instruction with builtin function.  */
++
++/* { dg-do compile } */
++/* { dg-options "-O0" } */
++/* { dg-final { scan-assembler "\\tisb" } } */
++
++#include <nds32_intrinsic.h>
++
++void
++test (void)
++{
++  __nds32__isb ();
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-isync.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-isync.c
+new file mode 100644
+index 0000000..0c312e4
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-isync.c
+@@ -0,0 +1,14 @@
++/* Verify that we generate isync instruction with builtin function.  */
++
++/* { dg-do compile } */
++/* { dg-options "-O0" } */
++/* { dg-final { scan-assembler "\\tisync" } } */
++
++#include <nds32_intrinsic.h>
++
++void
++test (void)
++{
++  int *addr = (int *) 0x53000000;
++  __nds32__isync (addr);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-load-store.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-load-store.c
+new file mode 100644
+index 0000000..fc15716
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-load-store.c
+@@ -0,0 +1,25 @@
++/* Verify that we generate llw/lwup/scw/swup instruction
++   with builtin function.  */
++
++/* { dg-do compile } */
++/* { dg-require-effective-target nds32_no_v3m } */
++/* { dg-options "-O0" } */
++/* { dg-final { scan-assembler "\\tllw" } } */
++/* { dg-final { scan-assembler "\\tlwup" } } */
++/* { dg-final { scan-assembler "\\tscw" } } */
++/* { dg-final { scan-assembler "\\tswup" } } */
++
++#include <nds32_intrinsic.h>
++
++void
++test (void)
++{
++  int a = 0;
++  int b = 0;
++  unsigned int cc = 0;
++
++  __nds32__llw (&a);
++  cc = __nds32__lwup (&a);
++  __nds32__scw (&a, b);
++  __nds32__swup (&a, b);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-lto.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-lto.c
+new file mode 100644
+index 0000000..fbebcb6
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-lto.c
+@@ -0,0 +1,28 @@
++/* Verify that we use -flto option to generate instructions
++   with builtin function.  */
++
++/* { dg-do compile } */
++/* { dg-options "-O0 -flto" } */
++/* { dg-final { scan-assembler "\\tdsb" } } */
++/* { dg-final { scan-assembler "\\tisb" } } */
++/* { dg-final { scan-assembler "\\tmsync\\tall" } } */
++/* { dg-final { scan-assembler "\\tmsync\\tstore" } } */
++/* { dg-final { scan-assembler "\\tnop" } } */
++/* { dg-final { scan-assembler "\\tstandby\\tno_wake_grant" } } */
++/* { dg-final { scan-assembler "\\tstandby\\twake_grant" } } */
++/* { dg-final { scan-assembler "\\tstandby\\twait_done" } } */
++
++#include <nds32_intrinsic.h>
++
++void
++test (void)
++{
++  __nds32__dsb ();
++  __nds32__isb ();
++  __nds32__msync_all ();
++  __nds32__msync_store ();
++  __nds32__nop ();
++  __nds32__standby_no_wake_grant ();
++  __nds32__standby_wake_grant ();
++  __nds32__standby_wait_done ();
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-machine-sva.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-machine-sva.c
+new file mode 100644
+index 0000000..f927c72
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-machine-sva.c
+@@ -0,0 +1,16 @@
++/* Verify that we generate sva instruction with builtin function.  */
++
++/* { dg-do compile } */
++/* { dg-options "-O0" } */
++/* { dg-final { scan-assembler "\\tsva" } } */
++
++#include <nds32_intrinsic.h>
++
++void
++test (void)
++{
++  int a, b;
++  char c;
++
++  c = __nds32__sva (a, b);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-machine-svs.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-machine-svs.c
+new file mode 100644
+index 0000000..f998491
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-machine-svs.c
+@@ -0,0 +1,16 @@
++/* Verify that we generate svs instruction with builtin function.  */
++
++/* { dg-do compile } */
++/* { dg-options "-O0" } */
++/* { dg-final { scan-assembler "\\tsvs" } } */
++
++#include <nds32_intrinsic.h>
++
++void
++test (void)
++{
++  int a, b;
++  char c;
++
++  c = __nds32__svs (a, b);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-mfsr-mtsr.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-mfsr-mtsr.c
+new file mode 100644
+index 0000000..f069507
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-mfsr-mtsr.c
+@@ -0,0 +1,17 @@
++/* Verify that we generate mfsr/mtsr instruction with builtin function.  */
++
++/* { dg-do compile } */
++/* { dg-options "-O0" } */
++/* { dg-final { scan-assembler "\\tmfsr" } } */
++/* { dg-final { scan-assembler "\\tmtsr" } } */
++
++#include <nds32_intrinsic.h>
++
++void
++test (void)
++{
++  int ipsw_value;
++
++  ipsw_value = __nds32__mfsr (__NDS32_REG_IPSW__);
++  __nds32__mtsr (ipsw_value, __NDS32_REG_IPSW__);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-mfusr-mtusr.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-mfusr-mtusr.c
+new file mode 100644
+index 0000000..d6d069b
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-mfusr-mtusr.c
+@@ -0,0 +1,17 @@
++/* Verify that we generate mfusr/mtusr instruction with builtin function.  */
++
++/* { dg-do compile } */
++/* { dg-options "-O0" } */
++/* { dg-final { scan-assembler "\\tmfusr" } } */
++/* { dg-final { scan-assembler "\\tmtusr" } } */
++
++#include <nds32_intrinsic.h>
++
++void
++test (void)
++{
++  int itype_value;
++
++  itype_value = __nds32__mfusr (__NDS32_REG_ITYPE__);
++  __nds32__mtusr (itype_value, __NDS32_REG_ITYPE__);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-misc.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-misc.c
+new file mode 100644
+index 0000000..a11f6d9
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-misc.c
+@@ -0,0 +1,39 @@
++/* Verify that we generate other instructions with builtin function.  */
++
++/* { dg-do compile } */
++/* { dg-options "-O0" } */
++/* { dg-final { scan-assembler "\\tbreak" } } */
++/* { dg-final { scan-assembler "\\tdsb" } } */
++/* { dg-final { scan-assembler "\\tisb" } } */
++/* { dg-final { scan-assembler "\\tisync" } } */
++/* { dg-final { scan-assembler "\\tmsync\\tall" } } */
++/* { dg-final { scan-assembler "\\tmsync\\tstore" } } */
++/* { dg-final { scan-assembler "\\tnop" } } */
++/* { dg-final { scan-assembler "\\tstandby\\tno_wake_grant" } } */
++/* { dg-final { scan-assembler "\\tstandby\\twake_grant" } } */
++/* { dg-final { scan-assembler "\\tstandby\\twait_done" } } */
++/* { dg-final { scan-assembler "\\tteqz" } } */
++/* { dg-final { scan-assembler "\\ttnez" } } */
++/* { dg-final { scan-assembler "\\ttrap" } } */
++
++#include <nds32_intrinsic.h>
++
++void
++test (void)
++{
++  int a = 0;
++
++  __nds32__break (2);
++  __nds32__dsb ();
++  __nds32__isb ();
++  __nds32__isync (&a);
++  __nds32__msync_all ();
++  __nds32__msync_store ();
++  __nds32__nop ();
++  __nds32__standby_no_wake_grant ();
++  __nds32__standby_wake_grant ();
++  __nds32__standby_wait_done ();
++  __nds32__teqz (a, 2);
++  __nds32__tnez (a, 2);
++  __nds32__trap (2);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-mtsr-dsb.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-mtsr-dsb.c
+new file mode 100644
+index 0000000..226d627
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-mtsr-dsb.c
+@@ -0,0 +1,14 @@
++/* Verify that we generate mtsr and dsb instruction with builtin function.  */
++
++/* { dg-do compile } */
++/* { dg-options "-O0" } */
++/* { dg-final { scan-assembler "\\tmtsr" } } */
++/* { dg-final { scan-assembler "\\tdsb" } } */
++
++#include <nds32_intrinsic.h>
++
++void
++main (void)
++{
++  __nds32__mtsr_dsb (1, NDS32_SR_ILMB);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-mtsr-isb.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-mtsr-isb.c
+new file mode 100644
+index 0000000..e8b1f98
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-mtsr-isb.c
+@@ -0,0 +1,14 @@
++/* Verify that we generate mtsr and isb instruction with builtin function.  */
++
++/* { dg-do compile } */
++/* { dg-options "-O0" } */
++/* { dg-final { scan-assembler "\\tmtsr" } } */
++/* { dg-final { scan-assembler "\\tisb" } } */
++
++#include <nds32_intrinsic.h>
++
++void
++main (void)
++{
++  __nds32__mtsr_isb (1, NDS32_SR_ILMB);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-priority.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-priority.c
+new file mode 100644
+index 0000000..c2ec6f6
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-priority.c
+@@ -0,0 +1,18 @@
++/* { dg-do compile } */
++/* { dg-options "-O1" } */
++
++#include <nds32_intrinsic.h>
++
++int
++main (void)
++{
++  __nds32__set_int_priority (NDS32_INT_H0,  0);
++  __nds32__set_int_priority (NDS32_INT_H15, 3);
++  __nds32__set_int_priority (NDS32_INT_H31, 3);
++
++  int a =  __nds32__get_int_priority (NDS32_INT_H0);
++  int b =  __nds32__get_int_priority (NDS32_INT_H15);
++  int c =  __nds32__get_int_priority (NDS32_INT_H31);
++
++  return a + b + c;
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-set-pending.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-set-pending.c
+new file mode 100644
+index 0000000..f10b83d
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-set-pending.c
+@@ -0,0 +1,10 @@
++/* { dg-do compile } */
++/* { dg-options "-O1" } */
++
++#include <nds32_intrinsic.h>
++
++int
++main (void)
++{
++  __nds32__set_pending_swint ();
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-set-trig-edge.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-set-trig-edge.c
+new file mode 100644
+index 0000000..bd8178c
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-set-trig-edge.c
+@@ -0,0 +1,13 @@
++/* { dg-do compile } */
++/* { dg-options "-O1" } */
++
++#include <nds32_intrinsic.h>
++
++void
++main (void)
++{
++  __nds32__set_trig_type_edge (NDS32_INT_H0);
++  __nds32__set_trig_type_edge (NDS32_INT_H15);
++  __nds32__set_trig_type_edge (NDS32_INT_H16);
++  __nds32__set_trig_type_edge (NDS32_INT_H31);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-set-trig-level.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-set-trig-level.c
+new file mode 100644
+index 0000000..1780543
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-set-trig-level.c
+@@ -0,0 +1,13 @@
++/* { dg-do compile } */
++/* { dg-options "-O1" } */
++
++#include <nds32_intrinsic.h>
++
++void
++main (void)
++{
++  __nds32__set_trig_type_level (NDS32_INT_H0);
++  __nds32__set_trig_type_level (NDS32_INT_H15);
++  __nds32__set_trig_type_level (NDS32_INT_H16);
++  __nds32__set_trig_type_level (NDS32_INT_H31);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-setgie-dis.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-setgie-dis.c
+new file mode 100644
+index 0000000..e143d3f
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-setgie-dis.c
+@@ -0,0 +1,13 @@
++/* Verify that we generate setgie.d instruction with builtin function.  */
++
++/* { dg-do compile } */
++/* { dg-options "-O0" } */
++/* { dg-final { scan-assembler "\\tsetgie.d" } } */
++
++#include <nds32_intrinsic.h>
++
++void
++test (void)
++{
++  __nds32__setgie_dis ();
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-setgie-en.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-setgie-en.c
+new file mode 100644
+index 0000000..ed95782
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-setgie-en.c
+@@ -0,0 +1,13 @@
++/* Verify that we generate setgie.e instruction with builtin function.  */
++
++/* { dg-do compile */
++/* { dg-options "-O0" } */
++/* { dg-final { scan-assembler "\\tsetgie.e" } } */
++
++#include <nds32_intrinsic.h>
++
++void
++test (void)
++{
++  __nds32__setgie_en ();
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-add16.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-add16.c
+new file mode 100644
+index 0000000..49fca46
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-add16.c
+@@ -0,0 +1,22 @@
++/* { dg-do compile } */
++/* { dg-options "-mext-dsp" } */
++/* { dg-final { scan-assembler "kadd16" } } */
++/* { dg-final { scan-assembler "kadd16" } } */
++/* { dg-final { scan-assembler "ukadd16" } } */
++/* { dg-final { scan-assembler "ukadd16" } } */
++
++#include <nds32_intrinsic.h>
++
++void
++test (void)
++{
++  unsigned int r, a, b;
++  int16x2_t vr, va, vb;
++  uint16x2_t v_ur, v_ua, v_ub;
++
++  r = __nds32__kadd16 (a, b);
++  vr = __nds32__v_kadd16 (va, vb);
++
++  r = __nds32__ukadd16 (a, b);
++  v_ur = __nds32__v_ukadd16 (v_ua, v_ub);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-add64.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-add64.c
+new file mode 100644
+index 0000000..1f33a42
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-add64.c
+@@ -0,0 +1,17 @@
++/* { dg-do compile } */
++/* { dg-options "-mext-dsp" } */
++/* { dg-final { scan-assembler "kadd64" } } */
++/* { dg-final { scan-assembler "ukadd64" } } */
++
++#include <nds32_intrinsic.h>
++
++void
++test (void)
++{
++  long long r, a, b;
++  unsigned long long ur, ua, ub;
++
++  r = __nds32__kadd64 (a, b);
++  ur = __nds32__ukadd64 (ua, ub);
++
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-add8.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-add8.c
+new file mode 100644
+index 0000000..1f2d226
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-add8.c
+@@ -0,0 +1,22 @@
++/* { dg-do compile } */
++/* { dg-options "-mext-dsp" } */
++/* { dg-final { scan-assembler "kadd8" } } */
++/* { dg-final { scan-assembler "kadd8" } } */
++/* { dg-final { scan-assembler "ukadd8" } } */
++/* { dg-final { scan-assembler "ukadd8" } } */
++
++#include <nds32_intrinsic.h>
++
++void
++test (void)
++{
++  unsigned int r, a, b;
++  int8x4_t vr, va, vb;
++  uint8x4_t v_ur, v_ua, v_ub;
++
++  r = __nds32__kadd8 (a, b);
++  vr = __nds32__v_kadd8 (va, vb);
++
++  r = __nds32__ukadd8 (a, b);
++  v_ur = __nds32__v_ukadd8 (v_ua, v_ub);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-cras16.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-cras16.c
+new file mode 100644
+index 0000000..89c7e6d
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-cras16.c
+@@ -0,0 +1,22 @@
++/* { dg-do compile } */
++/* { dg-options "-mext-dsp" } */
++/* { dg-final { scan-assembler "kcras16" } } */
++/* { dg-final { scan-assembler "kcras16" } } */
++/* { dg-final { scan-assembler "ukcras16" } } */
++/* { dg-final { scan-assembler "ukcras16" } } */
++
++#include <nds32_intrinsic.h>
++
++void
++test (void)
++{
++  unsigned int r, a, b;
++  int16x2_t vr, va, vb;
++  uint16x2_t v_ur, v_ua, v_ub;
++
++  r = __nds32__kcras16 (a, b);
++  vr = __nds32__v_kcras16 (va, vb);
++
++  r = __nds32__ukcras16 (a, b);
++  v_ur = __nds32__v_ukcras16 (v_ua, v_ub);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-crsa16.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-crsa16.c
+new file mode 100644
+index 0000000..beaa69a
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-crsa16.c
+@@ -0,0 +1,22 @@
++/* { dg-do compile } */
++/* { dg-options "-mext-dsp" } */
++/* { dg-final { scan-assembler "kcrsa16" } } */
++/* { dg-final { scan-assembler "kcrsa16" } } */
++/* { dg-final { scan-assembler "ukcrsa16" } } */
++/* { dg-final { scan-assembler "ukcrsa16" } } */
++
++#include <nds32_intrinsic.h>
++
++void
++test (void)
++{
++  unsigned int r, a, b;
++  int16x2_t vr, va, vb;
++  uint16x2_t v_ur, v_ua, v_ub;
++
++  r = __nds32__kcrsa16 (a, b);
++  vr = __nds32__v_kcrsa16 (va, vb);
++
++  r = __nds32__ukcrsa16 (a, b);
++  v_ur = __nds32__v_ukcrsa16 (v_ua, v_ub);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-kabs8.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-kabs8.c
+new file mode 100644
+index 0000000..de2e3c3
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-kabs8.c
+@@ -0,0 +1,16 @@
++/* { dg-do compile } */
++/* { dg-options "-mext-dsp" } */
++/* { dg-final { scan-assembler "kabs8" } } */
++/* { dg-final { scan-assembler "kabs8" } } */
++
++#include <nds32_intrinsic.h>
++
++void
++test (void)
++{
++  unsigned int r, a;
++  int8x4_t vr, va;
++
++  r = __nds32__kabs8 (a);
++  vr = __nds32__v_kabs8 (va);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-ksll.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-ksll.c
+new file mode 100644
+index 0000000..316b10c
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-ksll.c
+@@ -0,0 +1,16 @@
++/* { dg-do compile } */
++/* { dg-options "-mext-dsp" } */
++/* { dg-final { scan-assembler "ksll" } } */
++/* { dg-final { scan-assembler "kslli" } } */
++
++#include <nds32_intrinsic.h>
++
++void
++test (void)
++{
++  int r, a;
++  unsigned int b;
++
++  r = __nds32__ksll (a, b);
++  r = __nds32__ksll (a, 0);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-ksll16.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-ksll16.c
+new file mode 100644
+index 0000000..be9a08e
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-ksll16.c
+@@ -0,0 +1,21 @@
++/* { dg-do compile } */
++/* { dg-options "-mext-dsp" } */
++/* { dg-final { scan-assembler "ksll16" } } */
++/* { dg-final { scan-assembler "ksll16" } } */
++/* { dg-final { scan-assembler "kslli16" } } */
++/* { dg-final { scan-assembler "kslli16" } } */
++
++#include <nds32_intrinsic.h>
++
++void
++test (void)
++{
++  unsigned int r, a, b;
++  int16x2_t vr, va;
++
++  r = __nds32__ksll16 (a, b);
++  vr = __nds32__v_ksll16 (va, b);
++
++  r = __nds32__ksll16 (a, 0);
++  vr = __nds32__v_ksll16 (va, 0);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-kslrawu.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-kslrawu.c
+new file mode 100644
+index 0000000..4eb03e5
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-kslrawu.c
+@@ -0,0 +1,14 @@
++/* { dg-do compile } */
++/* { dg-options "-mext-dsp" } */
++/* { dg-final { scan-assembler "kslraw.u" } } */
++
++#include <nds32_intrinsic.h>
++
++void
++test (void)
++{
++  int r, a;
++  unsigned int b;
++
++  r = __nds32__kslraw_u (a, b);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-mar64.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-mar64.c
+new file mode 100644
+index 0000000..79a3eb3
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-mar64.c
+@@ -0,0 +1,16 @@
++/* { dg-do compile } */
++/* { dg-options "-mext-dsp" } */
++/* { dg-final { scan-assembler "kmar64" } } */
++/* { dg-final { scan-assembler "ukmar64" } } */
++
++#include <nds32_intrinsic.h>
++
++void
++test (void)
++{
++  long long r, a, b;
++  unsigned long long ur, ua, ub;
++
++  r = __nds32__kmar64 (r, a, b);
++  ur = __nds32__ukmar64 (ur, ua, ub);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-misc16.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-misc16.c
+new file mode 100644
+index 0000000..272e922
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-misc16.c
+@@ -0,0 +1,36 @@
++/* { dg-do compile } */
++/* { dg-options "-mext-dsp" } */
++/* { dg-final { scan-assembler "sclip16" } } */
++/* { dg-final { scan-assembler "sclip16" } } */
++/* { dg-final { scan-assembler "uclip16" } } */
++/* { dg-final { scan-assembler "uclip16" } } */
++/* { dg-final { scan-assembler "khm16" } } */
++/* { dg-final { scan-assembler "khm16" } } */
++/* { dg-final { scan-assembler "khmx16" } } */
++/* { dg-final { scan-assembler "khmx16" } } */
++/* { dg-final { scan-assembler "kabs16" } } */
++/* { dg-final { scan-assembler "kabs16" } } */
++
++#include <nds32_intrinsic.h>
++
++void
++test (void)
++{
++  unsigned int r, a, b;
++  int16x2_t vr, va, vb;
++
++  r = __nds32__sclip16 (a, 0);
++  vr = __nds32__v_sclip16 (va, 0);
++
++  r = __nds32__uclip16 (a, 0);
++  vr = __nds32__v_uclip16 (va, 0);
++
++  r = __nds32__khm16 (a, b);
++  vr = __nds32__v_khm16 (va, vb);
++
++  r = __nds32__khmx16 (a, b);
++  vr = __nds32__v_khmx16 (va, vb);
++
++  r = __nds32__kabs16 (a);
++  vr = __nds32__v_kabs16 (va);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-msr64.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-msr64.c
+new file mode 100644
+index 0000000..2ad64fa
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-msr64.c
+@@ -0,0 +1,16 @@
++/* { dg-do compile } */
++/* { dg-options "-mext-dsp" } */
++/* { dg-final { scan-assembler "kmsr64" } } */
++/* { dg-final { scan-assembler "ukmsr64" } } */
++
++#include <nds32_intrinsic.h>
++
++void
++test (void)
++{
++  long long r, a, b;
++  unsigned long long ur, ua, ub;
++
++  r = __nds32__kmsr64 (r, a, b);
++  ur = __nds32__ukmsr64 (ur, ua, ub);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-msw16.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-msw16.c
+new file mode 100644
+index 0000000..d7ccecb
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-msw16.c
+@@ -0,0 +1,32 @@
++/* { dg-do compile } */
++/* { dg-options "-mext-dsp" } */
++/* { dg-final { scan-assembler "kmmawb" } } */
++/* { dg-final { scan-assembler "kmmawb" } } */
++/* { dg-final { scan-assembler "kmmawb.u" } } */
++/* { dg-final { scan-assembler "kmmawb.u" } } */
++/* { dg-final { scan-assembler "kmmawt" } } */
++/* { dg-final { scan-assembler "kmmawt" } } */
++/* { dg-final { scan-assembler "kmmawt.u" } } */
++/* { dg-final { scan-assembler "kmmawt.u" } } */
++
++#include <nds32_intrinsic.h>
++
++void
++test (void)
++{
++  int r, a;
++  unsigned int b;
++  int16x2_t vb;
++
++  r = __nds32__kmmawb (r, a, b);
++  r = __nds32__v_kmmawb (r, a, vb);
++
++  r = __nds32__kmmawb_u (r, a, b);
++  r = __nds32__v_kmmawb_u (r, a, vb);
++
++  r = __nds32__kmmawt (r, a, b);
++  r = __nds32__v_kmmawt (r, a, vb);
++
++  r = __nds32__kmmawt_u (r, a, b);
++  r = __nds32__v_kmmawt_u (r, a, vb);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-msw32.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-msw32.c
+new file mode 100644
+index 0000000..64d8d4a
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-msw32.c
+@@ -0,0 +1,24 @@
++/* { dg-do compile } */
++/* { dg-options "-mext-dsp" } */
++/* { dg-final { scan-assembler "kmmac" } } */
++/* { dg-final { scan-assembler "kmmac.u" } } */
++/* { dg-final { scan-assembler "kmmsb" } } */
++/* { dg-final { scan-assembler "kmmsb.u" } } */
++/* { dg-final { scan-assembler "kwmmul" } } */
++/* { dg-final { scan-assembler "kwmmul.u" } } */
++
++#include <nds32_intrinsic.h>
++
++void
++test (void)
++{
++  int r, a, b;
++  r = __nds32__kmmac (r, a, b);
++  r = __nds32__kmmac_u (r, a, b);
++
++  r = __nds32__kmmsb (r, a, b);
++  r = __nds32__kmmsb_u (r, a, b);
++
++  r = __nds32__kwmmul (a, b);
++  r = __nds32__kwmmul_u (a, b);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-smul16x32.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-smul16x32.c
+new file mode 100644
+index 0000000..0d2b87f
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-smul16x32.c
+@@ -0,0 +1,72 @@
++/* { dg-do compile } */
++/* { dg-options "-mext-dsp" } */
++/* { dg-final { scan-assembler "kmda" } } */
++/* { dg-final { scan-assembler "kmda" } } */
++/* { dg-final { scan-assembler "kmxda" } } */
++/* { dg-final { scan-assembler "kmxda" } } */
++/* { dg-final { scan-assembler "kmabb" } } */
++/* { dg-final { scan-assembler "kmabb" } } */
++/* { dg-final { scan-assembler "kmabt" } } */
++/* { dg-final { scan-assembler "kmabt" } } */
++/* { dg-final { scan-assembler "kmatt" } } */
++/* { dg-final { scan-assembler "kmatt" } } */
++/* { dg-final { scan-assembler "kmada" } } */
++/* { dg-final { scan-assembler "kmada" } } */
++/* { dg-final { scan-assembler "kmaxda" } } */
++/* { dg-final { scan-assembler "kmaxda" } } */
++/* { dg-final { scan-assembler "kmads" } } */
++/* { dg-final { scan-assembler "kmads" } } */
++/* { dg-final { scan-assembler "kmadrs" } } */
++/* { dg-final { scan-assembler "kmadrs" } } */
++/* { dg-final { scan-assembler "kmaxds" } } */
++/* { dg-final { scan-assembler "kmaxds" } } */
++/* { dg-final { scan-assembler "kmsda" } } */
++/* { dg-final { scan-assembler "kmsda" } } */
++/* { dg-final { scan-assembler "kmsxda" } } */
++/* { dg-final { scan-assembler "kmsxda" } } */
++
++#include <nds32_intrinsic.h>
++
++void
++test (void)
++{
++  int r;
++  unsigned int a, b;
++  int16x2_t va, vb;
++
++  r = __nds32__kmda (a, b);
++  r = __nds32__v_kmda (va, vb);
++
++  r = __nds32__kmxda (a, b);
++  r = __nds32__v_kmxda (va, vb);
++
++  r = __nds32__kmabb (r, a, b);
++  r = __nds32__v_kmabb (r, va, vb);
++
++  r = __nds32__kmabt (r, a, b);
++  r = __nds32__v_kmabt (r, va, vb);
++
++  r = __nds32__kmatt (r, a, b);
++  r = __nds32__v_kmatt (r, va, vb);
++
++  r = __nds32__kmada (r, a, b);
++  r = __nds32__v_kmada (r, va, vb);
++
++  r = __nds32__kmaxda (r, a, b);
++  r = __nds32__v_kmaxda (r, va, vb);
++
++  r = __nds32__kmads (r, a, b);
++  r = __nds32__v_kmads (r, va, vb);
++
++  r = __nds32__kmadrs (r, a, b);
++  r = __nds32__v_kmadrs (r, va, vb);
++
++  r = __nds32__kmaxds (r, a, b);
++  r = __nds32__v_kmaxds (r, va, vb);
++
++  r = __nds32__kmsda (r, a, b);
++  r = __nds32__v_kmsda (r, va, vb);
++
++  r = __nds32__kmsxda (r, a, b);
++  r = __nds32__v_kmsxda (r, va, vb);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-sub16.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-sub16.c
+new file mode 100644
+index 0000000..ecea7bb
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-sub16.c
+@@ -0,0 +1,22 @@
++/* { dg-do compile } */
++/* { dg-options "-mext-dsp" } */
++/* { dg-final { scan-assembler "ksub16" } } */
++/* { dg-final { scan-assembler "ksub16" } } */
++/* { dg-final { scan-assembler "uksub16" } } */
++/* { dg-final { scan-assembler "uksub16" } } */
++
++#include <nds32_intrinsic.h>
++
++void
++test (void)
++{
++  unsigned int r, a, b;
++  int16x2_t vr, va, vb;
++  uint16x2_t v_ur, v_ua, v_ub;
++
++  r = __nds32__ksub16 (a, b);
++  vr = __nds32__v_ksub16 (va, vb);
++
++  r = __nds32__uksub16 (a, b);
++  v_ur = __nds32__v_uksub16 (v_ua, v_ub);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-sub64.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-sub64.c
+new file mode 100644
+index 0000000..fae30e9
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-sub64.c
+@@ -0,0 +1,17 @@
++/* { dg-do compile } */
++/* { dg-options "-mext-dsp" } */
++/* { dg-final { scan-assembler "ksub64" } } */
++/* { dg-final { scan-assembler "uksub64" } } */
++
++#include <nds32_intrinsic.h>
++
++void
++test (void)
++{
++  long long r, a, b;
++  unsigned long long ur, ua, ub;
++
++  r = __nds32__ksub64 (a, b);
++  ur = __nds32__uksub64 (ua, ub);
++
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-sub8.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-sub8.c
+new file mode 100644
+index 0000000..5e343e9
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-sub8.c
+@@ -0,0 +1,22 @@
++/* { dg-do compile } */
++/* { dg-options "-mext-dsp" } */
++/* { dg-final { scan-assembler "ksub8" } } */
++/* { dg-final { scan-assembler "ksub8" } } */
++/* { dg-final { scan-assembler "uksub8" } } */
++/* { dg-final { scan-assembler "uksub8" } } */
++
++#include <nds32_intrinsic.h>
++
++void
++test (void)
++{
++  unsigned int r, a, b;
++  int8x4_t vr, va, vb;
++  uint8x4_t v_ur, v_ua, v_ub;
++
++  r = __nds32__ksub8 (a, b);
++  vr = __nds32__v_ksub8 (va, vb);
++
++  r = __nds32__uksub8 (a, b);
++  v_ur = __nds32__v_uksub8 (v_ua, v_ub);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-unaligned-feature.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-unaligned-feature.c
+new file mode 100644
+index 0000000..6199109
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-unaligned-feature.c
+@@ -0,0 +1,13 @@
++/* { dg-do compile } */
++/* { dg-options "-O1" } */
++
++#include <nds32_intrinsic.h>
++
++int
++main ()
++{
++  unsigned unalign = __nds32__unaligned_feature ();
++   __nds32__enable_unaligned ();
++   __nds32__disable_unaligned ();
++  return unalign;
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/dsp-add-sub.c b/gcc/testsuite/gcc.target/nds32/compile/dsp-add-sub.c
+new file mode 100644
+index 0000000..704610e
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/dsp-add-sub.c
+@@ -0,0 +1,47 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -mext-dsp" } */
++/* { dg-final { scan-assembler "add8" } } */
++/* { dg-final { scan-assembler "add16" } } */
++/* { dg-final { scan-assembler "add64" } } */
++/* { dg-final { scan-assembler "sub8" } } */
++/* { dg-final { scan-assembler "sub16" } } */
++/* { dg-final { scan-assembler "sub64" } } */
++
++typedef signed char v4qi __attribute__ ((vector_size (4)));
++typedef short v2hi __attribute__ ((vector_size (4)));
++
++v4qi  __attribute__ ((noinline))
++add8 (v4qi a, v4qi b)
++{
++  return a + b;
++}
++
++v4qi  __attribute__ ((noinline))
++sub8 (v4qi a, v4qi b)
++{
++  return a - b;
++}
++
++v2hi  __attribute__ ((noinline))
++add16 (v2hi a, v2hi b)
++{
++  return a + b;
++}
++
++v2hi  __attribute__ ((noinline))
++sub16 (v2hi a, v2hi b)
++{
++  return a - b;
++}
++
++long long
++add64 (long long a, long long b)
++{
++  return a + b;
++}
++
++long long
++sub64 (long long a, long long b)
++{
++  return a - b;
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/dsp-bpick.c b/gcc/testsuite/gcc.target/nds32/compile/dsp-bpick.c
+new file mode 100644
+index 0000000..5f9d7de
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/dsp-bpick.c
+@@ -0,0 +1,8 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -mext-dsp" } */
++/* { dg-final { scan-assembler "bpick" } } */
++
++int bpick(int a, int b, int mask)
++{
++  return (a & mask) | (b & ~mask);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/dsp-mmul.c b/gcc/testsuite/gcc.target/nds32/compile/dsp-mmul.c
+new file mode 100644
+index 0000000..5c9cdeb
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/dsp-mmul.c
+@@ -0,0 +1,12 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -mext-dsp" } */
++/* { dg-final { scan-assembler "smmul" } } */
++
++typedef signed char v4qi __attribute__ ((vector_size (4)));
++typedef short v2hi __attribute__ ((vector_size (4)));
++
++int smmul(int a, int b)
++{
++  long long tmp = (long long)a * b;
++  return (int)((tmp >> 32) & 0xffffffffll);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/dsp-mulhisi.c b/gcc/testsuite/gcc.target/nds32/compile/dsp-mulhisi.c
+new file mode 100644
+index 0000000..856530b
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/dsp-mulhisi.c
+@@ -0,0 +1,23 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -mext-dsp" } */
++/* { dg-final { scan-assembler "smbb" } } */
++/* { dg-final { scan-assembler "smbt" } } */
++/* { dg-final { scan-assembler "smtt" } } */
++
++typedef signed char v4qi __attribute__ ((vector_size (4)));
++typedef short v2hi __attribute__ ((vector_size (4)));
++
++int smbb(v2hi a, v2hi b)
++{
++  return a[0] * b[0];
++}
++
++int smbt(v2hi a, v2hi b)
++{
++  return a[0] * b[1];
++}
++
++int smtt(v2hi a, v2hi b)
++{
++  return a[1] * b[1];
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/dsp-raddsub.c b/gcc/testsuite/gcc.target/nds32/compile/dsp-raddsub.c
+new file mode 100644
+index 0000000..4817637
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/dsp-raddsub.c
+@@ -0,0 +1,26 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -mext-dsp" } */
++/* { dg-final { scan-assembler "raddw" } } */
++/* { dg-final { scan-assembler "rsubw" } } */
++/* { dg-final { scan-assembler "uraddw" } } */
++/* { dg-final { scan-assembler "ursubw" } } */
++
++int raddw(int a, int b)
++{
++  return (a + b) >> 1;
++}
++
++int rsubw(int a, int b)
++{
++  return (a - b) >> 1;
++}
++
++unsigned uraddw(unsigned a, unsigned b)
++{
++  return (a + b) >> 1;
++}
++
++unsigned ursubw(unsigned a, unsigned b)
++{
++  return (a - b) >> 1;
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/dsp-smals.c b/gcc/testsuite/gcc.target/nds32/compile/dsp-smals.c
+new file mode 100644
+index 0000000..f1dc684
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/dsp-smals.c
+@@ -0,0 +1,30 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -mext-dsp" } */
++/* { dg-final { scan-assembler "smalbb" } } */
++/* { dg-final { scan-assembler "smalbt" } } */
++/* { dg-final { scan-assembler "smaltt" } } */
++/* { dg-final { scan-assembler "smal" } } */
++
++typedef signed char v4qi __attribute__ ((vector_size (4)));
++typedef short v2hi __attribute__ ((vector_size (4)));
++
++
++long long smalbb(long long acc, v2hi a, v2hi b)
++{
++  return acc + a[0] * b[0];
++}
++
++long long smalbt(long long acc, v2hi a, v2hi b)
++{
++  return acc + a[1] * b[0];
++}
++
++long long smaltt(long long acc, v2hi a, v2hi b)
++{
++  return acc + a[1] * b[1];
++}
++
++long long smal(v2hi a, long long b)
++{
++  return b + (long long)(a[0] * a[1]);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/dsp-smalxda.c b/gcc/testsuite/gcc.target/nds32/compile/dsp-smalxda.c
+new file mode 100644
+index 0000000..2fe606b
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/dsp-smalxda.c
+@@ -0,0 +1,17 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -mext-dsp" } */
++/* { dg-final { scan-assembler "smalxda" } } */
++/* { dg-final { scan-assembler "smalxds" } } */
++
++typedef signed char v4qi __attribute__ ((vector_size (4)));
++typedef short v2hi __attribute__ ((vector_size (4)));
++
++long long smalxda(long long acc, v2hi a, v2hi b)
++{
++  return acc + (a[0] * b[1] + a[1] * b[0]);
++}
++
++long long smalxds(long long acc, v2hi a, v2hi b)
++{
++  return acc + (a[1] * b[0] - a[0] * b[1]);
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/dsp-unpkd.c b/gcc/testsuite/gcc.target/nds32/compile/dsp-unpkd.c
+new file mode 100644
+index 0000000..2de7107
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/dsp-unpkd.c
+@@ -0,0 +1,79 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -mext-dsp" } */
++/* { dg-final { scan-assembler "sunpkd810" } } */
++/* { dg-final { scan-assembler "sunpkd820" } } */
++/* { dg-final { scan-assembler "sunpkd830" } } */
++/* { dg-final { scan-assembler "sunpkd831" } } */
++/* { dg-final { scan-assembler "zunpkd810" } } */
++/* { dg-final { scan-assembler "zunpkd820" } } */
++/* { dg-final { scan-assembler "zunpkd830" } } */
++/* { dg-final { scan-assembler "zunpkd831" } } */
++
++typedef signed char v4qi __attribute__ ((vector_size (4)));
++typedef short v2hi __attribute__ ((vector_size (4)));
++typedef unsigned char uv4qi __attribute__ ((vector_size (4)));
++typedef unsigned short uv2hi __attribute__ ((vector_size (4)));
++
++v2hi sunpkd810(v4qi v)
++{
++  v2hi ret;
++  ret[0] = v[0];
++  ret[1] = v[1];
++  return ret;
++}
++
++v2hi sunpkd820(v4qi v)
++{
++  v2hi ret;
++  ret[0] = v[0];
++  ret[1] = v[2];
++  return ret;
++}
++
++v2hi sunpkd830(v4qi v)
++{
++  v2hi ret;
++  ret[0] = v[0];
++  ret[1] = v[3];
++  return ret;
++}
++
++v2hi sunpkd831(v4qi v)
++{
++  v2hi ret;
++  ret[0] = v[1];
++  ret[1] = v[3];
++  return ret;
++}
++
++uv2hi zunpkd810(uv4qi v)
++{
++  uv2hi ret;
++  ret[0] = v[0];
++  ret[1] = v[1];
++  return ret;
++}
++
++uv2hi zunpkd820(uv4qi v)
++{
++  uv2hi ret;
++  ret[0] = v[0];
++  ret[1] = v[2];
++  return ret;
++}
++
++uv2hi zunpkd830(uv4qi v)
++{
++  uv2hi ret;
++  ret[0] = v[0];
++  ret[1] = v[3];
++  return ret;
++}
++
++uv2hi zunpkd831(uv4qi v)
++{
++  uv2hi ret;
++  ret[0] = v[1];
++  ret[1] = v[3];
++  return ret;
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/scalbn-transform-1.c b/gcc/testsuite/gcc.target/nds32/compile/scalbn-transform-1.c
+new file mode 100644
+index 0000000..d456fa5
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/scalbn-transform-1.c
+@@ -0,0 +1,21 @@
++/* Verify scalbn transform pass for normal case.  */
++
++/* { dg-do compile } */
++/* { dg-options "-O2 -fdump-tree-all -lm" } */
++/* { dg-require-effective-target nds32_soft_fp } */
++
++float test_scalbnf (float x)
++{
++  return x * 128;
++}
++
++double test_scalbn (double x)
++{
++  return x * 256;
++}
++
++/* { dg-final { scan-tree-dump "(_\[0-9\]+) = __builtin_scalbnf \\(x_\[0-9\]+\\(D\\), 7\\);\\s*_\[0-9\]+ = \\(float\\) \\1;" "scalbn_transform" } } */
++/* { dg-final { scan-tree-dump "(_\[0-9\]+) = __builtin_scalbn \\(x_\[0-9\]+\\(D\\), 8\\);\\s*_\[0-9\]+ = \\(double\\) \\1;" "scalbn_transform" } } */
++/* { dg-final { scan-tree-dump-not " \\* 1.28e\\+2" "scalbn_transform" } } */
++/* { dg-final { scan-tree-dump-not " \\* 2.56e\\+2" "scalbn_transform" } } */
++/* { dg-final { cleanup-tree-dump "*" } } */
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/scalbn-transform-2.c b/gcc/testsuite/gcc.target/nds32/compile/scalbn-transform-2.c
+new file mode 100644
+index 0000000..480cf23
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/scalbn-transform-2.c
+@@ -0,0 +1,14 @@
++/* Verify scalbn transform pass for negative number case.  */
++
++/* { dg-do compile } */
++/* { dg-options "-O2 -fdump-tree-all" } */
++/* { dg-require-effective-target nds32_soft_fp } */
++
++double test_neg_scalbn (double x)
++{
++  return x * -8;
++}
++
++/* { dg-final { scan-tree-dump "(_\[0-9\]+) = __builtin_scalbn \\(x_\[0-9\]+\\(D\\), 3\\);\\s*_\[0-9\]+ = -\\1;" "scalbn_transform" } } */
++/* { dg-final { scan-tree-dump-not " \\* -8.0e\\+0" "scalbn_transform" } } */
++/* { dg-final { cleanup-tree-dump "*" } } */
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/scalbn-transform-3.c b/gcc/testsuite/gcc.target/nds32/compile/scalbn-transform-3.c
+new file mode 100644
+index 0000000..256f31a
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/scalbn-transform-3.c
+@@ -0,0 +1,14 @@
++/* Verify scalbn transform pass for negative-exponent case.  */
++
++/* { dg-do compile } */
++/* { dg-options "-O2 -fdump-tree-all" } */
++/* { dg-require-effective-target nds32_soft_fp } */
++
++double test_neg_exp_scalbnf (double x)
++{
++  return x * 0.0625;
++}
++
++/* { dg-final { scan-tree-dump "(_\[0-9\]+) = __builtin_scalbn \\(x_\[0-9\]+\\(D\\), -4\\);\\s*_\[0-9\]+ = \\(double\\) \\1;" "scalbn_transform" } } */
++/* { dg-final { scan-tree-dump-not " \\* 6.25e\\-2" "scalbn_transform" } } */
++/* { dg-final { cleanup-tree-dump "*" } } */
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/scalbn-transform-4.c b/gcc/testsuite/gcc.target/nds32/compile/scalbn-transform-4.c
+new file mode 100644
+index 0000000..b6ba596
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/scalbn-transform-4.c
+@@ -0,0 +1,52 @@
++/* Verify scalbn transform pass for cases that can't be optimized.  */
++
++/* { dg-do compile } */
++/* { dg-options "-O2 -fdump-tree-all" } */
++/* { dg-require-effective-target nds32_soft_fp } */
++
++#include "math.h"
++
++double test_filter_condition_1 (double x)
++{
++  return x * 0;
++}
++
++double test_filter_condition_2 (double x)
++{
++  return x * -0;
++}
++
++double test_filter_condition_3 (double x)
++{
++  return x * 485;
++}
++
++double test_filter_condition_4 (double x)
++{
++  return x * -85;
++}
++
++double test_filter_condition_5 (double x)
++{
++  return x * 0.12;
++}
++
++double test_filter_condition_6 (double x)
++{
++  return x * -INFINITY;
++}
++
++double test_filter_condition_7 (double x)
++{
++  return x * NAN;
++}
++
++/* { dg-final { scan-tree-dump-times "x_\[0-9\]+\\(D\\) \\* 0.0" 2 "scalbn_transform" } } */
++/* { dg-final { scan-tree-dump " \\* 4.85e\\+2" "scalbn_transform" } } */
++/* { dg-final { scan-tree-dump " \\* -8.5e\\+1" "scalbn_transform" } } */
++/* { dg-final { scan-tree-dump " \\* 1.19999" "scalbn_transform" } } */
++/* { dg-final { scan-tree-dump " \\*  -Inf" "scalbn_transform" } } */
++/* { dg-final { scan-tree-dump " \\*  Nan" "scalbn_transform" } } */
++/* { dg-final { scan-tree-dump-not "__builtin_scalbn" "scalbn_transform" } } */
++/* { dg-final { scan-tree-dump-times "No multiplication stmt is transformed" 7  "scalbn_transform" } } */
++/* { dg-final { cleanup-tree-dump "*" } } */
+diff --git a/gcc/testsuite/gcc.target/nds32/compile/scalbn-transform-5.c b/gcc/testsuite/gcc.target/nds32/compile/scalbn-transform-5.c
+new file mode 100644
+index 0000000..874170e
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/compile/scalbn-transform-5.c
+@@ -0,0 +1,20 @@
++/* Verify scalbn transform pass for bug 11424 case.  */
++
++/* { dg-do compile } */
++/* { dg-options "-O2 -fdump-tree-all" } */
++/* { dg-require-effective-target nds32_soft_fp } */
++
++typedef float float32_t;
++float32_t test_case (float32_t *pIn)
++{
++  float32_t in;
++  in = *pIn++;
++  in = (in * 128);
++  in += in > 0.0f ? 0.5f : -0.5f;
++
++  return in;
++}
++
++/* { dg-final { scan-tree-dump "(_\[0-9\]+) = __builtin_scalbnf \\(in_\[0-9\]+, 7\\);\\s*in_\[0-9\]+ = \\(float32_t\\) \\1;" "scalbn_transform" } } */
++/* { dg-final { scan-tree-dump-not "in_\[0-9\]+ = in_\[0-9\]+ \\* 1.28e\\+2" "scalbn_transform" } } */
++/* { dg-final { cleanup-tree-dump "*" } } */
+diff --git a/gcc/testsuite/gcc.target/nds32/dsp-v2hi-packing00.c b/gcc/testsuite/gcc.target/nds32/dsp-v2hi-packing00.c
+new file mode 100644
+index 0000000..d1c61b7
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/nds32/dsp-v2hi-packing00.c
+@@ -0,0 +1,127 @@
++/* { dg-do run } */
++
++#include <nds32_intrinsic.h>
++
++int16x2_t packing01(int16x2_t x, int16x2_t y) __attribute__ ((noinline));
++int16x2_t packing01(int16x2_t x, int16x2_t y)
++{
++  int16x2_t ret;
++  ret[0] = x[0];
++  ret[1] = y[1];
++  return ret;
++}
++
++int16x2_t packing10(int16x2_t x, int16x2_t y) __attribute__ ((noinline));
++int16x2_t packing10(int16x2_t x, int16x2_t y)
++{
++  int16x2_t ret;
++  ret[0] = x[1];
++  ret[1] = y[0];
++  return ret;
++}
++
++int16x2_t packing00(int16x2_t x, int16x2_t y) __attribute__ ((noinline));
++int16x2_t packing00(int16x2_t x, int16x2_t y)
++{
++  int16x2_t ret;
++  ret[0] = x[0];
++  ret[1] = y[0];
++  return ret;
++}
++
++int16x2_t packing0cv0(int16x2_t x) __attribute__ ((noinline));
++int16x2_t packing0cv0(int16x2_t x)
++{
++  int16x2_t ret = {0, 0};
++  ret[0] = x[0];
++  return ret;
++}
++
++int16x2_t packingcv00(int16x2_t x) __attribute__ ((noinline));
++int16x2_t packingcv00(int16x2_t x)
++{
++  int16x2_t ret = {0, 0};
++  ret[1] = x[0];
++  return ret;
++}
++
++int16x2_t packing11(int16x2_t x, int16x2_t y) __attribute__ ((noinline));
++int16x2_t packing11(int16x2_t x, int16x2_t y)
++{
++  int16x2_t ret;
++  ret[0] = x[1];
++  ret[1] = y[1];
++  return ret;
++}
++int16x2_t packing1cv0(int16x2_t x) __attribute__ ((noinline));
++int16x2_t packing1cv0(int16x2_t x)
++{
++  int16x2_t ret = {0, 0};
++  ret[0] = x[1];
++  return ret;
++}
++
++int16x2_t packingcv01(int16x2_t x) __attribute__ ((noinline));
++int16x2_t packingcv01(int16x2_t x)
++{
++  int16x2_t ret = {0, 0};
++  ret[1] = x[1];
++  return ret;
++}
++
++int main() {
++  int16x2_t a = {0x11, 0x22};
++  int16x2_t b = {0x33, 0x44};
++
++  int16x2_t ret00, ret01, ret10, ret11;
++  int16x2_t ret0cv0, retcv00, ret1cv0, retcv01;
++  ret00 = packing00 (a, b);
++
++  if (ret00[0] != 0x11
++      || ret00[1] != 0x33)
++    return 1;
++
++  ret0cv0 = packing0cv0 (a);
++
++  if (ret0cv0[0] != 0x11
++      || ret0cv0[1] != 0)
++    return 1;
++
++  retcv00 = packingcv00 (a);
++
++  if (retcv00[0] != 0
++      || retcv00[1] != 0x11)
++    return 1;
++
++  ret11 = packing11 (a, b);
++
++  if (ret11[0] != 0x22
++      || ret11[1] != 0x44)
++    return 1;
++
++  ret1cv0 = packing1cv0 (a);
++
++  if (ret1cv0[0] != 0x22
++      || ret1cv0[1] != 0)
++    return 1;
++
++  retcv01 = packingcv01 (a);
++
++  if (retcv01[0] != 0
++      || retcv01[1] != 0x22)
++    return 1;
++
++  ret01 = packing01 (a, b);
++
++  if (ret01[0] != 0x11
++      || ret01[1] != 0x44)
++    return 1;
++
++  ret10 = packing10 (a, b);
++
++  if (ret10[0] != 0x22
++      || ret10[1] != 0x33)
++    return 1;
++
++  return 0;
++}
+diff --git a/gcc/testsuite/gcc.target/nds32/nds32.exp b/gcc/testsuite/gcc.target/nds32/nds32.exp
+index 1c245f6..2f5a150 100644
+--- a/gcc/testsuite/gcc.target/nds32/nds32.exp
++++ b/gcc/testsuite/gcc.target/nds32/nds32.exp
+@@ -38,8 +38,10 @@ if ![info exists DEFAULT_CFLAGS] then {
+ dg-init
+ 
+ # Main loop.
+-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cS\]]] \
++dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/compile/*.\[cS\]]] \
+ 	"" $DEFAULT_CFLAGS
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cS\]]] \
++	"" ""
+ 
+ # All done.
+ dg-finish
+diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
+index f0f5ac4..5a9b57d 100644
+--- a/gcc/testsuite/lib/target-supports.exp
++++ b/gcc/testsuite/lib/target-supports.exp
+@@ -487,6 +487,10 @@ proc check_effective_target_trampolines { } {
+ 	 || [istarget hppa64-hp-hpux11.23] } {
+ 	return 0;
+     }
++    if { [istarget nds32*-*-*]
++	 && [check_effective_target_nds32_reduced_regs] } {
++	return 0;
++    }
+     return 1
+ }
+ 
+@@ -500,7 +504,7 @@ proc check_effective_target_keeps_null_pointer_checks { } {
+     if [target_info exists keeps_null_pointer_checks] {
+       return 1
+     }
+-    if { [istarget avr-*-*] } {
++    if { [istarget avr-*-*] || [istarget nds32*-*-elf] } {
+ 	return 1;   
+     }
+     return 0
+@@ -3597,6 +3601,125 @@ proc check_effective_target_arm_prefer_ldrd_strd { } {
+     }  "-O2 -mthumb" ]
+ }
+ 
++# If board info says it only has 16M addressing space, return 0.
++# Otherwise, return 1.
++proc check_effective_target_nds32_full_addr_space { } {
++    if [board_info target exists addr16m] {
++	return 0
++    }
++    return 1;
++}
++
++# Return 1 if gp direct is enable by default.
++proc check_effective_target_nds32_gp_direct { } {
++    return [check_no_compiler_messages gp_direct object {
++	#ifdef __NDS32_GP_DIRECT__
++	int dummy;
++	#else
++	#error no GP_DIRECT
++	#endif
++    }]
++}
++
++# Return 1 if this is a nds32 target supporting -mext-perf.
++proc check_effective_target_nds32_ext_perf { } {
++    return [check_no_compiler_messages ext_perf object {
++	#ifdef __NDS32_EXT_PERF__
++	int dummy;
++	#else
++	#error no EXT_PERF
++	#endif
++    }]
++}
++
++# Return 1 if this is a nds32 target supporting -mext-perf2.
++proc check_effective_target_nds32_ext_perf2 { } {
++    return [check_no_compiler_messages ext_perf2 object {
++	#ifdef __NDS32_EXT_PERF2__
++	int dummy;
++	#else
++	#error no EXT_PERF2
++	#endif
++    }]
++}
++
++# Return 1 if this is a nds32 target supporting -mext-string.
++proc check_effective_target_nds32_ext_string { } {
++    return [check_no_compiler_messages ext_string object {
++	#ifdef __NDS32_EXT_STRING__
++	int dummy;
++	#else
++	#error no EXT_STRING
++	#endif
++    }]
++}
++
++# Return 1 if this is a nds32 target supporting -mext-fpu-sp or -mext-fpu-dp.
++proc check_effective_target_nds32_ext_fpu { } {
++    return [check_no_compiler_messages ext_fpu object {
++	#if defined(__NDS32_EXT_FPU_SP__) || defined(__NDS32_EXT_FPU_DP__)
++	int dummy;
++	#else
++	#error no support FPU
++	#endif
++    }]
++}
++
++# Return 1 if this is a nds32 target not supporting -mext-fpu-sp or -mext-fpu-dp.
++proc check_effective_target_nds32_soft_fp { } {
++    return [check_no_compiler_messages soft_fp object {
++	#if defined(__NDS32_EXT_FPU_SP__) || defined(__NDS32_EXT_FPU_DP__)
++	#error Hard FP
++	#else
++	int dummy;
++	#endif
++    }]
++}
++
++# Return 1 if this is a nds32 target supporting -mext-fpu-sp.
++proc check_effective_target_nds32_ext_fpu_sp { } {
++    return [check_no_compiler_messages ext_fpu_sp object {
++	#ifdef __NDS32_EXT_FPU_SP__
++	int dummy;
++	#else
++	#error no EXT_FPU_SP
++	#endif
++    }]
++}
++
++# Return 1 if this is a nds32 target supporting -mext-fpu-dp.
++proc check_effective_target_nds32_ext_fpu_dp { } {
++    return [check_no_compiler_messages ext_fpu_dp object {
++	#ifdef __NDS32_EXT_FPU_DP__
++	int dummy;
++	#else
++	#error no EXT_FPU_DP
++	#endif
++    }]
++}
++
++# Return 1 if this is a nds32 target supporting -mreduced-regs.
++proc check_effective_target_nds32_reduced_regs { } {
++    return [check_no_compiler_messages reduced_regs object {
++	#ifdef __NDS32_REDUCED_REGS__
++	int dummy;
++	#else
++	#error no REDUCED_REGS
++	#endif
++    }]
++}
++
++# Return 1 if this is a nds32 target not supporting v3m ISA.
++proc check_effective_target_nds32_no_v3m { } {
++    return [check_no_compiler_messages no_v3m object {
++	#if !defined(__NDS32_BASELINE_V3M__)
++	int dummy;
++	#else
++	#error Support V3M ISA
++	#endif
++    }]
++}
++
+ # Return 1 if this is a PowerPC target supporting -meabi.
+ 
+ proc check_effective_target_powerpc_eabi_ok { } {
+@@ -6897,6 +7020,7 @@ proc check_effective_target_logical_op_short_circuit {} {
+ 	 || [istarget avr*-*-*]
+ 	 || [istarget crisv32-*-*] || [istarget cris-*-*]
+ 	 || [istarget mmix-*-*]
++	 || [istarget nds32*-*-*]
+ 	 || [istarget s390*-*-*]
+ 	 || [istarget powerpc*-*-*]
+ 	 || [istarget nios2*-*-*]
+diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
+index 154df21..acd1a52 100644
+--- a/gcc/tree-vrp.c
++++ b/gcc/tree-vrp.c
+@@ -9518,6 +9518,7 @@ simplify_cond_using_ranges (gcond *stmt)
+      used for the comparison directly if we just massage the constant in the
+      comparison.  */
+   if (TREE_CODE (op0) == SSA_NAME
++      && has_single_use (op0)
+       && TREE_CODE (op1) == INTEGER_CST)
+     {
+       gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
+diff --git a/libgcc/config.host b/libgcc/config.host
+index 124f2ce..107ccb1 100644
+--- a/libgcc/config.host
++++ b/libgcc/config.host
+@@ -946,6 +946,23 @@ msp430*-*-elf)
+ 	tmake_file="$tm_file t-crtstuff t-fdpbit msp430/t-msp430"
+         extra_parts="$extra_parts libmul_none.a libmul_16.a libmul_32.a libmul_f5.a"
+ 	;;
++nds32*-linux*)
++	# Basic makefile fragment and extra_parts for crt stuff.
++	# We also append c-isr library implementation.
++	tmake_file="${tmake_file} t-slibgcc-libgcc"
++	tmake_file="${tmake_file} nds32/t-nds32-glibc nds32/t-crtstuff t-softfp-sfdf t-softfp"
++	# The header file of defining MD_FALLBACK_FRAME_STATE_FOR.
++	md_unwind_header=nds32/linux-unwind.h
++	# Append library definition makefile fragment according to --with-nds32-lib=X setting.
++	case "${with_nds32_lib}" in
++	"" | glibc | uclibc )
++		;;
++	*)
++		echo "Cannot accept --with-nds32-lib=$with_nds32_lib, available values are: glibc uclibc" 1>&2
++		exit 1
++		;;
++	esac
++	;;
+ nds32*-elf*)
+ 	# Basic makefile fragment and extra_parts for crt stuff.
+ 	# We also append c-isr library implementation.
+@@ -959,9 +976,19 @@ nds32*-elf*)
+ 		tmake_file="${tmake_file} nds32/t-nds32-newlib t-softfp-sfdf t-softfp"
+ 		;;
+ 	mculib)
+-		# Append library definition makefile fragment t-nds32-mculib.
++		case "${with_arch}" in
++		"" | v2 | v2j | v3 | v3j | v3m)
++		# Append library definition makefile fragment t-nds32-mculib-generic.
+ 		# The software floating point library is included in mculib.
+-		tmake_file="${tmake_file} nds32/t-nds32-mculib"
++			tmake_file="${tmake_file} nds32/t-nds32-mculib-generic"
++			;;
++		v3f | v3s)
++		# Append library definition makefile fragment t-nds32-mculib-softfp.
++		# Append mculib do not support ABI2FP_PLUS,
++		# so using'soft-fp' software floating point make rule fragment provided by gcc.
++			tmake_file="${tmake_file} nds32/t-nds32-mculib-softfp t-softfp-sfdf t-softfp"
++			;;
++		esac
+ 		;;
+ 	*)
+ 		echo "Cannot accept --with-nds32-lib=$with_nds32_lib, available values are: newlib mculib" 1>&2
+diff --git a/libgcc/config/nds32/crtzero.S b/libgcc/config/nds32/crtzero.S
+deleted file mode 100644
+index 9898525..0000000
+--- a/libgcc/config/nds32/crtzero.S
++++ /dev/null
+@@ -1,103 +0,0 @@
+-/* The startup code sample of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-!!==============================================================================
+-!!
+-!!      crtzero.S
+-!!
+-!!      This is JUST A SAMPLE of nds32 startup code !!
+-!!      You can refer this content and implement
+-!!      the actual one in newlib/mculib.
+-!!
+-!!==============================================================================
+-
+-!!------------------------------------------------------------------------------
+-!! Jump to start up code
+-!!------------------------------------------------------------------------------
+-	.section	.nds32_init, "ax"
+-	j	_start
+-
+-!!------------------------------------------------------------------------------
+-!! Startup code implementation
+-!!------------------------------------------------------------------------------
+-	.section	.text
+-	.global	_start
+-	.weak	_SDA_BASE_
+-	.weak	_FP_BASE_
+-	.align	2
+-	.func	_start
+-	.type	_start, @function
+-_start:
+-.L_fp_gp_lp_init:
+-	la	$fp, _FP_BASE_		! init $fp
+-	la	$gp, _SDA_BASE_		! init $gp for small data access
+-	movi	$lp, 0			! init $lp
+-
+-.L_stack_init:
+-	la	$sp, _stack		! init $sp
+-	movi	$r0, -8			! align $sp to 8-byte (use 0xfffffff8)
+-	and	$sp, $sp, $r0		! align $sp to 8-byte (filter out lower 3-bit)
+-
+-.L_bss_init:
+-	! clear BSS, this process can be 4 time faster if data is 4 byte aligned
+-	! if so, use swi.p instead of sbi.p
+-	! the related stuff are defined in linker script
+-	la	$r0, _edata		! get the starting addr of bss
+-	la	$r2, _end		! get ending addr of bss
+-	beq	$r0, $r2, .L_call_main	! if no bss just do nothing
+-	movi	$r1, 0			! should be cleared to 0
+-.L_clear_bss:
+-	sbi.p	$r1, [$r0], 1		! Set 0 to bss
+-	bne	$r0, $r2, .L_clear_bss	! Still bytes left to set
+-
+-!.L_stack_heap_check:
+-!	la	$r0, _end		! init heap_end
+-!	s.w	$r0, heap_end		! save it
+-
+-
+-!.L_init_argc_argv:
+-!	! argc/argv initialization if necessary; default implementation is in crt1.o
+-!	la	$r9, _arg_init		! load address of _arg_init?
+-!	beqz	$r9, .L4		! has _arg_init? no, go check main()
+-!	addi	$sp, $sp, -512		! allocate space for command line + arguments
+-!	move	$r6, $sp		! r6 = buffer addr of cmd line
+-!	move	$r0, $r6		! r0 = buffer addr of cmd line
+-!	syscall	6002			! get cmd line
+-!	move	$r0, $r6		! r0 = buffer addr of cmd line
+-!	addi	$r1, $r6, 256		! r1 = argv
+-!	jral	$r9			! init argc/argv
+-!	addi	$r1, $r6, 256		! r1 = argv
+-
+-.L_call_main:
+-	! call main() if main() is provided
+-	la	$r15, main		! load address of main
+-	jral	$r15			! call main
+-
+-.L_terminate_program:
+-	syscall	0x1			! use syscall 0x1 to terminate program
+-	.size	_start, .-_start
+-	.end
+-
+-!! ------------------------------------------------------------------------
+diff --git a/libgcc/config/nds32/initfini.c b/libgcc/config/nds32/initfini.c
+index 0aa33f5..34406f0 100644
+--- a/libgcc/config/nds32/initfini.c
++++ b/libgcc/config/nds32/initfini.c
+@@ -25,6 +25,10 @@
+    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+    <http://www.gnu.org/licenses/>.  */
+ 
++#include <stddef.h>
++/* Need header file for `struct object' type.  */
++#include "../libgcc/unwind-dw2-fde.h"
++
+ /*  Declare a pointer to void function type.  */
+ typedef void (*func_ptr) (void);
+ 
+@@ -42,11 +46,59 @@ typedef void (*func_ptr) (void);
+    refer to only the __CTOR_END__ symbol in crtfini.o and the __DTOR_LIST__
+    symbol in crtinit.o, where they are defined.  */
+ 
+-static func_ptr __CTOR_LIST__[1] __attribute__ ((section (".ctors")))
+-     = { (func_ptr) (-1) };
++static func_ptr __CTOR_LIST__[1] __attribute__ ((section (".ctors"), used))
++     = { (func_ptr) 0 };
++
++static func_ptr __DTOR_LIST__[1] __attribute__ ((section (".dtors"), used))
++     = { (func_ptr) 0 };
++
++
++#ifdef SUPPORT_UNWINDING_DWARF2
++/* Preparation of exception handling with dwar2 mechanism registration.  */
+ 
+-static func_ptr __DTOR_LIST__[1] __attribute__ ((section (".dtors")))
+-     = { (func_ptr) (-1) };
++asm ("\n\
++	.section .eh_frame,\"aw\",@progbits\n\
++	.global __EH_FRAME_BEGIN__\n\
++	.type	__EH_FRAME_BEGIN__, @object\n\
++	.align 2\n\
++__EH_FRAME_BEGIN__:\n\
++	! Beginning location of eh_frame section\n\
++	.previous\n\
++");
++
++extern func_ptr __EH_FRAME_BEGIN__[];
++
++
++/* Note that the following two functions are going to be chained into
++   constructor and destructor list, repectively.  So these two declarations
++   must be placed after __CTOR_LIST__ and __DTOR_LIST.  */
++extern void __nds32_register_eh(void) __attribute__((constructor, used));
++extern void __nds32_deregister_eh(void) __attribute__((destructor, used));
++
++/* Register the exception handling table as the first constructor.  */
++void
++__nds32_register_eh (void)
++{
++  static struct object object;
++  if (__register_frame_info)
++    __register_frame_info (__EH_FRAME_BEGIN__, &object);
++}
++
++/* Unregister the exception handling table as a deconstructor.  */
++void
++__nds32_deregister_eh (void)
++{
++  static int completed = 0;
++
++  if (completed)
++    return;
++
++  if (__deregister_frame_info)
++    __deregister_frame_info (__EH_FRAME_BEGIN__);
++
++  completed = 1;
++}
++#endif
+ 
+ /* Run all the global destructors on exit from the program.  */
+ 
+@@ -63,7 +115,7 @@ static func_ptr __DTOR_LIST__[1] __attribute__ ((section (".dtors")))
+    same particular root executable or shared library file.  */
+ 
+ static void __do_global_dtors (void)
+-asm ("__do_global_dtors") __attribute__ ((section (".text")));
++asm ("__do_global_dtors") __attribute__ ((section (".text"), used));
+ 
+ static void
+ __do_global_dtors (void)
+@@ -116,23 +168,37 @@ void *__dso_handle = 0;
+    last, these words naturally end up at the very ends of the two lists
+    contained in these two sections.  */
+ 
+-static func_ptr __CTOR_END__[1] __attribute__ ((section (".ctors")))
++static func_ptr __CTOR_END__[1] __attribute__ ((section (".ctors"), used))
+      = { (func_ptr) 0 };
+ 
+-static func_ptr __DTOR_END__[1] __attribute__ ((section (".dtors")))
++static func_ptr __DTOR_END__[1] __attribute__ ((section (".dtors"), used))
+      = { (func_ptr) 0 };
+ 
++#ifdef SUPPORT_UNWINDING_DWARF2
++/* ZERO terminator in .eh_frame section.  */
++asm ("\n\
++	.section .eh_frame,\"aw\",@progbits\n\
++	.global __EH_FRAME_END__\n\
++	.type	__EH_FRAME_END__, @object\n\
++	.align 2\n\
++__EH_FRAME_END__:\n\
++	! End location of eh_frame section with ZERO terminator\n\
++	.word 0\n\
++	.previous\n\
++");
++#endif
++
+ /* Run all global constructors for the program.
+    Note that they are run in reverse order.  */
+ 
+ static void __do_global_ctors (void)
+-asm ("__do_global_ctors") __attribute__ ((section (".text")));
++asm ("__do_global_ctors") __attribute__ ((section (".text"), used));
+ 
+ static void
+ __do_global_ctors (void)
+ {
+   func_ptr *p;
+-  for (p = __CTOR_END__ - 1; *p != (func_ptr) -1; p--)
++  for (p = __CTOR_END__ - 1; *p; p--)
+     (*p) ();
+ }
+ 
+diff --git a/libgcc/config/nds32/isr-library/adj_intr_lvl.inc b/libgcc/config/nds32/isr-library/adj_intr_lvl.inc
+index 3e978b4..a519df8 100644
+--- a/libgcc/config/nds32/isr-library/adj_intr_lvl.inc
++++ b/libgcc/config/nds32/isr-library/adj_intr_lvl.inc
+@@ -26,13 +26,26 @@
+ .macro ADJ_INTR_LVL
+ #if defined(NDS32_NESTED) /* Nested handler.  */
+ 	mfsr	$r3, $PSW
++	/* By substracting 1 from $PSW, we can lower PSW.INTL
++	   and enable GIE simultaneously.  */
+ 	addi	$r3, $r3, #-0x1
++  #if __NDS32_EXT_ZOL__ || __NDS32_EXT_DSP__
++    ori   $r3, $r3, 0x2000  /* Set PSW.AEN(b'13) */
++  #endif
+ 	mtsr	$r3, $PSW
+ #elif defined(NDS32_NESTED_READY) /* Nested ready handler.  */
+ 	/* Save ipc and ipsw and lower INT level.  */
+ 	mfsr	$r3, $PSW
+ 	addi	$r3, $r3, #-0x2
++  #if __NDS32_EXT_ZOL__ || __NDS32_EXT_DSP__
++    ori   $r3, $r3, 0x2000  /* Set PSW.AEN(b'13) */
++  #endif
+ 	mtsr	$r3, $PSW
+ #else /* Not nested handler.  */
++  #if __NDS32_EXT_ZOL__ || __NDS32_EXT_DSP__
++    mfsr	$r3, $PSW
++    ori   $r3, $r3, 0x2000  /* Set PSW.AEN(b'13) */
++    mtsr	$r3, $PSW
++  #endif
+ #endif
+ .endm
+diff --git a/libgcc/config/nds32/isr-library/excp_isr.S b/libgcc/config/nds32/isr-library/excp_isr.S
+index 6179a98..f1a3b59 100644
+--- a/libgcc/config/nds32/isr-library/excp_isr.S
++++ b/libgcc/config/nds32/isr-library/excp_isr.S
+@@ -23,6 +23,7 @@
+    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+    <http://www.gnu.org/licenses/>.  */
+ 
++#include "save_usr_regs.inc"
+ #include "save_mac_regs.inc"
+ #include "save_fpu_regs.inc"
+ #include "save_fpu_regs_00.inc"
+@@ -32,35 +33,33 @@
+ #include "save_all.inc"
+ #include "save_partial.inc"
+ #include "adj_intr_lvl.inc"
+-#include "restore_mac_regs.inc"
+ #include "restore_fpu_regs_00.inc"
+ #include "restore_fpu_regs_01.inc"
+ #include "restore_fpu_regs_02.inc"
+ #include "restore_fpu_regs_03.inc"
+ #include "restore_fpu_regs.inc"
++#include "restore_mac_regs.inc"
++#include "restore_usr_regs.inc"
+ #include "restore_all.inc"
+ #include "restore_partial.inc"
++
+ 	.section .nds32_isr, "ax"       /* Put it in the section of 1st level handler. */
+ 	.align	1
+-/*
+-  First Level Handlers
+-  1. First Level Handlers are invokded in vector section via jump instruction
+-  with specific names for different configurations.
+-  2. Naming Format: _nds32_e_SR_NT for exception handlers.
+-		    _nds32_i_SR_NT for interrupt handlers.
+-  2.1 All upper case letters are replaced with specific lower case letters encodings.
+-  2.2 SR: Saved Registers
+-      sa: Save All regs (context)
+-      ps: Partial Save (all caller-saved regs)
+-  2.3 NT: Nested Type
+-      ns: nested
+-      nn: not nested
+-      nr: nested ready
+-*/
+-
+-/*
+-  This is original 16-byte vector size version.
+-*/
++
++/* First Level Handlers
++   1. First Level Handlers are invokded in vector section via jump instruction
++      with specific names for different configurations.
++   2. Naming Format: _nds32_e_SR_NT for exception handlers.
++                     _nds32_i_SR_NT for interrupt handlers.
++     2.1 All upper case letters are replaced with specific lower case letters encodings.
++     2.2 SR -- Saved Registers
++         sa: Save All regs (context)
++         ps: Partial Save (all caller-saved regs)
++     2.3 NT -- Nested Type
++         ns: nested
++         nn: not nested
++         nr: nested ready */
++
+ #ifdef NDS32_SAVE_ALL_REGS
+ #if defined(NDS32_NESTED)
+ 	.globl	_nds32_e_sa_ns
+@@ -91,21 +90,26 @@ _nds32_e_ps_nn:
+ #endif /* endif for Nest Type */
+ #endif /* not NDS32_SAVE_ALL_REGS */
+ 
+-/*
+-  This is 16-byte vector size version.
+-  The vector id was restored into $r0 in vector by compiler.
+-*/
++
++/* For 4-byte vector size version, the vector id is
++   extracted from $ITYPE and is set into $r0 by library.
++   For 16-byte vector size version, the vector id
++   is set into $r0 in vector section by compiler.  */
++
++/* Save used registers.  */
+ #ifdef NDS32_SAVE_ALL_REGS
+         SAVE_ALL
+ #else
+         SAVE_PARTIAL
+ #endif
++
+ 	/* Prepare to call 2nd level handler. */
+ 	la	$r2, _nds32_jmptbl_00
+ 	lw	$r2, [$r2 + $r0 << #2]
+ 	ADJ_INTR_LVL	/* Adjust INTR level. $r3 is clobbered.  */
+ 	jral    $r2
+-	/* Restore used registers. */
++
++/* Restore used registers.  */
+ #ifdef NDS32_SAVE_ALL_REGS
+ 	RESTORE_ALL
+ #else
+@@ -113,6 +117,7 @@ _nds32_e_ps_nn:
+ #endif
+ 	iret
+ 
++
+ #ifdef NDS32_SAVE_ALL_REGS
+ #if defined(NDS32_NESTED)
+ 	.size	_nds32_e_sa_ns, .-_nds32_e_sa_ns
+diff --git a/libgcc/config/nds32/isr-library/excp_isr_4b.S b/libgcc/config/nds32/isr-library/excp_isr_4b.S
+deleted file mode 100644
+index af70c7a..0000000
+--- a/libgcc/config/nds32/isr-library/excp_isr_4b.S
++++ /dev/null
+@@ -1,133 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-#include "save_mac_regs.inc"
+-#include "save_fpu_regs.inc"
+-#include "save_fpu_regs_00.inc"
+-#include "save_fpu_regs_01.inc"
+-#include "save_fpu_regs_02.inc"
+-#include "save_fpu_regs_03.inc"
+-#include "save_all.inc"
+-#include "save_partial.inc"
+-#include "adj_intr_lvl.inc"
+-#include "restore_mac_regs.inc"
+-#include "restore_fpu_regs_00.inc"
+-#include "restore_fpu_regs_01.inc"
+-#include "restore_fpu_regs_02.inc"
+-#include "restore_fpu_regs_03.inc"
+-#include "restore_fpu_regs.inc"
+-#include "restore_all.inc"
+-#include "restore_partial.inc"
+-	.section .nds32_isr, "ax"       /* Put it in the section of 1st level handler. */
+-	.align	1
+-/*
+-  First Level Handlers
+-  1. First Level Handlers are invokded in vector section via jump instruction
+-  with specific names for different configurations.
+-  2. Naming Format: _nds32_e_SR_NT for exception handlers.
+-		    _nds32_i_SR_NT for interrupt handlers.
+-  2.1 All upper case letters are replaced with specific lower case letters encodings.
+-  2.2 SR: Saved Registers
+-      sa: Save All regs (context)
+-      ps: Partial Save (all caller-saved regs)
+-  2.3 NT: Nested Type
+-      ns: nested
+-      nn: not nested
+-      nr: nested ready
+-*/
+-
+-/*
+-  This is 4-byte vector size version.
+-  The "_4b" postfix was added for 4-byte version symbol.
+-*/
+-#ifdef NDS32_SAVE_ALL_REGS
+-#if defined(NDS32_NESTED)
+-	.globl	_nds32_e_sa_ns_4b
+-	.type	_nds32_e_sa_ns_4b, @function
+-_nds32_e_sa_ns_4b:
+-#elif defined(NDS32_NESTED_READY)
+-	.globl	_nds32_e_sa_nr_4b
+-	.type	_nds32_e_sa_nr_4b, @function
+-_nds32_e_sa_nr_4b:
+-#else /* Not nested handler. */
+-	.globl	_nds32_e_sa_nn_4b
+-	.type	_nds32_e_sa_nn_4b, @function
+-_nds32_e_sa_nn_4b:
+-#endif /* endif for Nest Type */
+-#else /* not NDS32_SAVE_ALL_REGS */
+-#if defined(NDS32_NESTED)
+-	.globl	_nds32_e_ps_ns_4b
+-	.type	_nds32_e_ps_ns_4b, @function
+-_nds32_e_ps_ns_4b:
+-#elif defined(NDS32_NESTED_READY)
+-	.globl	_nds32_e_ps_nr_4b
+-	.type	_nds32_e_ps_nr_4b, @function
+-_nds32_e_ps_nr_4b:
+-#else /* Not nested handler. */
+-	.globl	_nds32_e_ps_nn_4b
+-	.type	_nds32_e_ps_nn_4b, @function
+-_nds32_e_ps_nn_4b:
+-#endif /* endif for Nest Type */
+-#endif /* not NDS32_SAVE_ALL_REGS */
+-
+-/*
+-  This is 4-byte vector size version.
+-  The vector id was restored into $lp in vector by compiler.
+-*/
+-#ifdef NDS32_SAVE_ALL_REGS
+-	SAVE_ALL_4B
+-#else
+-	SAVE_PARTIAL_4B
+-#endif
+-	/* Prepare to call 2nd level handler. */
+-	la	$r2, _nds32_jmptbl_00
+-	lw	$r2, [$r2 + $r0 << #2]
+-	ADJ_INTR_LVL	/* Adjust INTR level. $r3 is clobbered.  */
+-	jral    $r2
+-	/* Restore used registers. */
+-#ifdef NDS32_SAVE_ALL_REGS
+-	RESTORE_ALL
+-#else
+-	RESTORE_PARTIAL
+-#endif
+-	iret
+-
+-#ifdef NDS32_SAVE_ALL_REGS
+-#if defined(NDS32_NESTED)
+-	.size	_nds32_e_sa_ns_4b, .-_nds32_e_sa_ns_4b
+-#elif defined(NDS32_NESTED_READY)
+-	.size	_nds32_e_sa_nr_4b, .-_nds32_e_sa_nr_4b
+-#else /* Not nested handler. */
+-	.size	_nds32_e_sa_nn_4b, .-_nds32_e_sa_nn_4b
+-#endif /* endif for Nest Type */
+-#else /* not NDS32_SAVE_ALL_REGS */
+-#if defined(NDS32_NESTED)
+-	.size	_nds32_e_ps_ns_4b, .-_nds32_e_ps_ns_4b
+-#elif defined(NDS32_NESTED_READY)
+-	.size	_nds32_e_ps_nr_4b, .-_nds32_e_ps_nr_4b
+-#else /* Not nested handler. */
+-	.size	_nds32_e_ps_nn_4b, .-_nds32_e_ps_nn_4b
+-#endif /* endif for Nest Type */
+-#endif /* not NDS32_SAVE_ALL_REGS */
+diff --git a/libgcc/config/nds32/isr-library/intr_isr.S b/libgcc/config/nds32/isr-library/intr_isr.S
+index c55da1c..90c5c25 100644
+--- a/libgcc/config/nds32/isr-library/intr_isr.S
++++ b/libgcc/config/nds32/isr-library/intr_isr.S
+@@ -23,6 +23,7 @@
+    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+    <http://www.gnu.org/licenses/>.  */
+ 
++#include "save_usr_regs.inc"
+ #include "save_mac_regs.inc"
+ #include "save_fpu_regs.inc"
+ #include "save_fpu_regs_00.inc"
+@@ -32,35 +33,33 @@
+ #include "save_all.inc"
+ #include "save_partial.inc"
+ #include "adj_intr_lvl.inc"
+-#include "restore_mac_regs.inc"
+ #include "restore_fpu_regs_00.inc"
+ #include "restore_fpu_regs_01.inc"
+ #include "restore_fpu_regs_02.inc"
+ #include "restore_fpu_regs_03.inc"
+ #include "restore_fpu_regs.inc"
++#include "restore_mac_regs.inc"
++#include "restore_usr_regs.inc"
+ #include "restore_all.inc"
+ #include "restore_partial.inc"
++
+ 	.section .nds32_isr, "ax"       /* Put it in the section of 1st level handler. */
+ 	.align	1
+-/*
+-  First Level Handlers
+-  1. First Level Handlers are invokded in vector section via jump instruction
+-  with specific names for different configurations.
+-  2. Naming Format: _nds32_e_SR_NT for exception handlers.
+-		    _nds32_i_SR_NT for interrupt handlers.
+-  2.1 All upper case letters are replaced with specific lower case letters encodings.
+-  2.2 SR: Saved Registers
+-      sa: Save All regs (context)
+-      ps: Partial Save (all caller-saved regs)
+-  2.3 NT: Nested Type
+-      ns: nested
+-      nn: not nested
+-      nr: nested ready
+-*/
+-
+-/*
+-  This is original 16-byte vector size version.
+-*/
++
++/* First Level Handlers
++   1. First Level Handlers are invokded in vector section via jump instruction
++      with specific names for different configurations.
++   2. Naming Format: _nds32_e_SR_NT for exception handlers.
++                     _nds32_i_SR_NT for interrupt handlers.
++     2.1 All upper case letters are replaced with specific lower case letters encodings.
++     2.2 SR -- Saved Registers
++         sa: Save All regs (context)
++         ps: Partial Save (all caller-saved regs)
++     2.3 NT -- Nested Type
++         ns: nested
++         nn: not nested
++         nr: nested ready */
++
+ #ifdef NDS32_SAVE_ALL_REGS
+ #if defined(NDS32_NESTED)
+ 	.globl	_nds32_i_sa_ns
+@@ -91,21 +90,36 @@ _nds32_i_ps_nn:
+ #endif /* endif for Nest Type */
+ #endif /* not NDS32_SAVE_ALL_REGS */
+ 
+-/*
+-  This is 16-byte vector size version.
+-  The vector id was restored into $r0 in vector by compiler.
+-*/
++
++/* For 4-byte vector size version, the vector id is
++   extracted from $ITYPE and is set into $r0 by library.
++   For 16-byte vector size version, the vector id
++   is set into $r0 in vector section by compiler.  */
++
++/* Save used registers first.  */
+ #ifdef NDS32_SAVE_ALL_REGS
+         SAVE_ALL
+ #else
+         SAVE_PARTIAL
+ #endif
+-	/* Prepare to call 2nd level handler. */
++
++/* According to vector size, we need to have different implementation.  */
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* Prepare to call 2nd level handler.  */
++	la	$r2, _nds32_jmptbl_00
++	lw	$r2, [$r2 + $r0 << #2]
++	addi    $r0, $r0, #-9	/* Make interrput vector id zero-based.  */
++	ADJ_INTR_LVL	/* Adjust INTR level.  $r3 is clobbered.  */
++	jral    $r2
++#else /* not __NDS32_ISR_VECTOR_SIZE_4__ */
++	/* Prepare to call 2nd level handler.  */
+ 	la	$r2, _nds32_jmptbl_09	/* For zero-based vcetor id.  */
+ 	lw	$r2, [$r2 + $r0 << #2]
+ 	ADJ_INTR_LVL	/* Adjust INTR level. $r3 is clobbered.  */
+ 	jral    $r2
+-	/* Restore used registers. */
++#endif /* not __NDS32_ISR_VECTOR_SIZE_4__ */
++
++/* Restore used registers.  */
+ #ifdef NDS32_SAVE_ALL_REGS
+ 	RESTORE_ALL
+ #else
+@@ -113,6 +127,7 @@ _nds32_i_ps_nn:
+ #endif
+ 	iret
+ 
++
+ #ifdef NDS32_SAVE_ALL_REGS
+ #if defined(NDS32_NESTED)
+ 	.size	_nds32_i_sa_ns, .-_nds32_i_sa_ns
+diff --git a/libgcc/config/nds32/isr-library/intr_isr_4b.S b/libgcc/config/nds32/isr-library/intr_isr_4b.S
+deleted file mode 100644
+index d82c007..0000000
+--- a/libgcc/config/nds32/isr-library/intr_isr_4b.S
++++ /dev/null
+@@ -1,134 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-#include "save_mac_regs.inc"
+-#include "save_fpu_regs.inc"
+-#include "save_fpu_regs_00.inc"
+-#include "save_fpu_regs_01.inc"
+-#include "save_fpu_regs_02.inc"
+-#include "save_fpu_regs_03.inc"
+-#include "save_all.inc"
+-#include "save_partial.inc"
+-#include "adj_intr_lvl.inc"
+-#include "restore_mac_regs.inc"
+-#include "restore_fpu_regs_00.inc"
+-#include "restore_fpu_regs_01.inc"
+-#include "restore_fpu_regs_02.inc"
+-#include "restore_fpu_regs_03.inc"
+-#include "restore_fpu_regs.inc"
+-#include "restore_all.inc"
+-#include "restore_partial.inc"
+-	.section .nds32_isr, "ax"       /* Put it in the section of 1st level handler. */
+-	.align	1
+-/*
+-  First Level Handlers
+-  1. First Level Handlers are invokded in vector section via jump instruction
+-  with specific names for different configurations.
+-  2. Naming Format: _nds32_e_SR_NT for exception handlers.
+-		    _nds32_i_SR_NT for interrupt handlers.
+-  2.1 All upper case letters are replaced with specific lower case letters encodings.
+-  2.2 SR: Saved Registers
+-      sa: Save All regs (context)
+-      ps: Partial Save (all caller-saved regs)
+-  2.3 NT: Nested Type
+-      ns: nested
+-      nn: not nested
+-      nr: nested ready
+-*/
+-
+-/*
+-  This is 4-byte vector size version.
+-  The "_4b" postfix was added for 4-byte version symbol.
+-*/
+-#ifdef NDS32_SAVE_ALL_REGS
+-#if defined(NDS32_NESTED)
+-	.globl	_nds32_i_sa_ns_4b
+-	.type	_nds32_i_sa_ns_4b, @function
+-_nds32_i_sa_ns_4b:
+-#elif defined(NDS32_NESTED_READY)
+-	.globl	_nds32_i_sa_nr_4b
+-	.type	_nds32_i_sa_nr_4b, @function
+-_nds32_i_sa_nr_4b:
+-#else /* Not nested handler. */
+-	.globl	_nds32_i_sa_nn_4b
+-	.type	_nds32_i_sa_nn_4b, @function
+-_nds32_i_sa_nn_4b:
+-#endif /* endif for Nest Type */
+-#else /* not NDS32_SAVE_ALL_REGS */
+-#if defined(NDS32_NESTED)
+-	.globl	_nds32_i_ps_ns_4b
+-	.type	_nds32_i_ps_ns_4b, @function
+-_nds32_i_ps_ns_4b:
+-#elif defined(NDS32_NESTED_READY)
+-	.globl	_nds32_i_ps_nr_4b
+-	.type	_nds32_i_ps_nr_4b, @function
+-_nds32_i_ps_nr_4b:
+-#else /* Not nested handler. */
+-	.globl	_nds32_i_ps_nn_4b
+-	.type	_nds32_i_ps_nn_4b, @function
+-_nds32_i_ps_nn_4b:
+-#endif /* endif for Nest Type */
+-#endif /* not NDS32_SAVE_ALL_REGS */
+-
+-/*
+-  This is 4-byte vector size version.
+-  The vector id was restored into $lp in vector by compiler.
+-*/
+-#ifdef NDS32_SAVE_ALL_REGS
+-	SAVE_ALL_4B
+-#else
+-        SAVE_PARTIAL_4B
+-#endif
+-	/* Prepare to call 2nd level handler. */
+-	la	$r2, _nds32_jmptbl_00
+-	lw	$r2, [$r2 + $r0 << #2]
+-	addi    $r0, $r0, #-9	/* Make interrput vector id zero-based.  */
+-	ADJ_INTR_LVL	/* Adjust INTR level. $r3 is clobbered.  */
+-	jral    $r2
+-	/* Restore used registers. */
+-#ifdef NDS32_SAVE_ALL_REGS
+-	RESTORE_ALL
+-#else
+-	RESTORE_PARTIAL
+-#endif
+-	iret
+-
+-#ifdef NDS32_SAVE_ALL_REGS
+-#if defined(NDS32_NESTED)
+-	.size	_nds32_i_sa_ns_4b, .-_nds32_i_sa_ns_4b
+-#elif defined(NDS32_NESTED_READY)
+-	.size	_nds32_i_sa_nr_4b, .-_nds32_i_sa_nr_4b
+-#else /* Not nested handler. */
+-	.size	_nds32_i_sa_nn_4b, .-_nds32_i_sa_nn_4b
+-#endif /* endif for Nest Type */
+-#else /* not NDS32_SAVE_ALL_REGS */
+-#if defined(NDS32_NESTED)
+-	.size	_nds32_i_ps_ns_4b, .-_nds32_i_ps_ns_4b
+-#elif defined(NDS32_NESTED_READY)
+-	.size	_nds32_i_ps_nr_4b, .-_nds32_i_ps_nr_4b
+-#else /* Not nested handler. */
+-	.size	_nds32_i_ps_nn_4b, .-_nds32_i_ps_nn_4b
+-#endif /* endif for Nest Type */
+-#endif /* not NDS32_SAVE_ALL_REGS */
+diff --git a/libgcc/config/nds32/isr-library/reset.S b/libgcc/config/nds32/isr-library/reset.S
+index 961d731..8b9ccf5 100644
+--- a/libgcc/config/nds32/isr-library/reset.S
++++ b/libgcc/config/nds32/isr-library/reset.S
+@@ -26,22 +26,18 @@
+ 	.section .nds32_isr, "ax"	/* Put it in the section of 1st level handler.  */
+ 	.align	1
+ 	.weak	_SDA_BASE_	/* For reset handler only.  */
+-	.weak	_FP_BASE_	/* For reset handler only.  */
+ 	.weak	_nds32_init_mem	/* User defined memory initialization function.  */
+ 	.globl	_start
+ 	.globl	_nds32_reset
+ 	.type	_nds32_reset, @function
+ _nds32_reset:
+ _start:
+-#ifdef  NDS32_EXT_EX9
+-	.no_ex9_begin
+-#endif
+ 	/* Handle NMI and warm boot if any of them exists.  */
+ 	beqz	$sp, 1f		/* Reset, NMI or warm boot?  */
+ 	/* Either NMI or warm boot; save all regs.  */
+ 
+ 	/* Preserve registers for context-switching.  */
+-#ifdef __NDS32_REDUCED_REGS__
++#if __NDS32_REDUCED_REGS__ || __NDS32_REDUCE_REGS
+ 	/* For 16-reg mode.  */
+ 	smw.adm $r0, [$sp], $r10, #0x0
+ 	smw.adm $r15, [$sp], $r15, #0xf
+@@ -49,10 +45,9 @@ _start:
+ 	/* For 32-reg mode.  */
+ 	smw.adm $r0, [$sp], $r27, #0xf
+ #endif
+-#ifdef NDS32_EXT_IFC
++#if __NDS32_EXT_IFC__
+ 	mfusr   $r1, $IFC_LP
+-	smw.adm $r1, [$sp], $r2, #0x0	/* Save extra $r2 to keep
+-					   stack 8-byte alignment.  */
++	smw.adm $r1, [$sp], $r2, #0x0	/* Save extra $r2 to keep stack 8-byte alignment.  */
+ #endif
+ 
+ 	la	$gp, _SDA_BASE_	/* Init GP for small data access.  */
+@@ -71,12 +66,11 @@ _start:
+ 	bnez    $r0, 1f		/* If fail to resume, do cold boot.  */
+ 
+ 	/* Restore registers for context-switching.  */
+-#ifdef NDS32_EXT_IFC
+-	lmw.bim	$r1, [$sp], $r2, #0x0	/* Restore extra $r2 to keep
+-					   stack 8-byte alignment.  */
++#if __NDS32_EXT_IFC__
++	lmw.bim	$r1, [$sp], $r2, #0x0	/* Restore extra $r2 to keep stack 8-byte alignment.  */
+ 	mtusr   $r1, $IFC_LP
+ #endif
+-#ifdef __NDS32_REDUCED_REGS__
++#if __NDS32_REDUCED_REGS__ || __NDS32_REDUCE_REGS
+ 	/* For 16-reg mode.  */
+ 	lmw.bim	$r15, [$sp], $r15, #0xf
+ 	lmw.bim	$r0, [$sp], $r10, #0x0
+@@ -88,6 +82,17 @@ _start:
+ 
+ 
+ 1:	/* Cold boot.  */
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* With vector ID feature for v3 architecture, default vector size is 4-byte.  */
++	/* Set IVB.ESZ = 0 (vector table entry size = 4 bytes)  */
++	mfsr    $r0, $IVB
++	li      $r1, #0xc000
++	or      $r0, $r0, $r1
++	xor     $r0, $r0, $r1
++	mtsr    $r0, $IVB
++	dsb
++#else
++	/* There is no vector ID feature, so the vector size must be 16-byte.  */
+ 	/* Set IVB.ESZ = 1 (vector table entry size = 16 bytes)  */
+ 	mfsr    $r0, $IVB
+ 	li	$r1, #0xffff3fff
+@@ -95,36 +100,54 @@ _start:
+ 	ori	$r0, $r0, #0x4000
+ 	mtsr    $r0, $IVB
+ 	dsb
++#endif
+ 
+ 	la	$gp, _SDA_BASE_		/* Init $gp.  */
+-	la	$fp, _FP_BASE_		/* Init $fp.  */
+ 	la	$sp, _stack		/* Init $sp.  */
+-#ifdef  NDS32_EXT_EX9
+-/*
+- *	Initialize the table base of EX9 instruction
+- *	ex9 generation needs to disable before the ITB is set
+- */
+-	mfsr    $r0, $MSC_CFG	/* Check if HW support of EX9.  */
++
++#if __NDS32_EXT_EX9__
++.L_init_itb:
++	/* Initialization for Instruction Table Base (ITB).
++	   The symbol _ITB_BASE_ is determined by Linker.
++	   Set $ITB only if MSC_CFG.EIT (cr4.b'24) is set.  */
++	mfsr    $r0, $MSC_CFG
+ 	srli	$r0, $r0, 24
+ 	andi	$r0, $r0, 0x1
+-	beqz	$r0, 4f		/* Zero means HW does not support EX9.  */
+-	la      $r0, _ITB_BASE_	/* Init $ITB.  */
++	beqz	$r0, 4f		/* Fall through ?  */
++	la      $r0, _ITB_BASE_
+ 	mtusr   $r0, $ITB
+-	.no_ex9_end
+ 4:
+ #endif
+-	la	$r15, _nds32_init_mem	/* Call DRAM init. _nds32_init_mem
+-					  may written by C language.  */
++
++#if __NDS32_EXT_FPU_SP__ || __NDS32_EXT_FPU_DP__
++.L_init_fpu:
++	/* Initialize FPU
++	   Set FUCOP_CTL.CP0EN (fucpr.b'0).  */
++	mfsr    $r0, $FUCOP_CTL
++	ori     $r0, $r0, 0x1
++	mtsr    $r0, $FUCOP_CTL
++	dsb
++	/* According to [bugzilla #9425], set flush-to-zero mode.
++	   That is, set $FPCSR.DNZ(b'12) = 1.  */
++	FMFCSR	$r0
++	ori	$r0, $r0, 0x1000
++	FMTCSR	$r0
++	dsb
++#endif
++
++	/* Call DRAM init. _nds32_init_mem may written by C language.  */
++	la	$r15, _nds32_init_mem
+ 	beqz	$r15, 6f
+ 	jral	$r15
+ 6:
+ 	l.w	$r15, _nds32_jmptbl_00	/* Load reset handler.  */
+ 	jral	$r15
+-/* Reset handler() should never return in a RTOS or non-OS system.
+-   In case it does return, an exception will be generated.
+-   This exception will be caught either by default break handler or by EDM.
+-   Default break handle may just do an infinite loop.
+-   EDM will notify GDB and GDB will regain control when the ID is 0x7fff. */
++
++	/* Reset handler() should never return in a RTOS or non-OS system.
++	   In case it does return, an exception will be generated.
++	   This exception will be caught either by default break handler or by EDM.
++	   Default break handle may just do an infinite loop.
++	   EDM will notify GDB and GDB will regain control when the ID is 0x7fff.  */
+ 5:
+ 	break    #0x7fff
+ 	.size	_nds32_reset, .-_nds32_reset
+diff --git a/libgcc/config/nds32/isr-library/reset_4b.S b/libgcc/config/nds32/isr-library/reset_4b.S
+deleted file mode 100644
+index 792e655..0000000
+--- a/libgcc/config/nds32/isr-library/reset_4b.S
++++ /dev/null
+@@ -1,131 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section .nds32_isr, "ax"	/* Put it in the section of 1st level handler.  */
+-	.align	1
+-	.weak	_SDA_BASE_	/* For reset handler only.  */
+-	.weak	_FP_BASE_	/* For reset handler only.  */
+-	.weak	_nds32_init_mem	/* User defined memory initialization function.  */
+-	.globl	_start
+-	.globl	_nds32_reset_4b
+-	.type	_nds32_reset_4b, @function
+-_nds32_reset_4b:
+-_start:
+-#ifdef  NDS32_EXT_EX9
+-	.no_ex9_begin
+-#endif
+-	/* Handle NMI and warm boot if any of them exists.  */
+-	beqz	$sp, 1f		/* Reset, NMI or warm boot?  */
+-	/* Either NMI or warm boot; save all regs.  */
+-
+-	/* Preserve registers for context-switching.  */
+-#ifdef __NDS32_REDUCED_REGS__
+-	/* For 16-reg mode.  */
+-	smw.adm $r0, [$sp], $r10, #0x0
+-	smw.adm $r15, [$sp], $r15, #0xf
+-#else
+-	/* For 32-reg mode.  */
+-	smw.adm $r0, [$sp], $r27, #0xf
+-#endif
+-#ifdef NDS32_EXT_IFC
+-	mfusr   $r1, $IFC_LP
+-	smw.adm $r1, [$sp], $r2, #0x0	/* Save extra $r2 to keep
+-					   stack 8-byte alignment.  */
+-#endif
+-
+-	la	$gp, _SDA_BASE_	/* Init GP for small data access.  */
+-	move	$r0, $sp	/* Init parameter.  */
+-	mfsr	$r1, $ITYPE	/* Check ITYPE for NMI or warm boot.  */
+-	andi	$r1, $r1, #0xf
+-	addi	$r1, $r1, #-1
+-	beqz	$r1, 2f		/* Warm boot if true.  */
+-	l.w	$r15, _nds32_nmih	/* Load NMI handler.  */
+-	j	3f
+-2:
+-	l.w	$r15, _nds32_wrh	/* Load warm boot handler.  */
+-3:
+-	beqz    $r15, 1f	/* If no handler, do cold boot.  */
+-	jral    $r15		/* Call handler.  */
+-	bnez    $r0, 1f		/* If fail to resume, do cold boot.  */
+-
+-	/* Restore registers for context-switching.  */
+-#ifdef NDS32_EXT_IFC
+-	lmw.bim	$r1, [$sp], $r2, #0x0	/* Restore extra $r2 to keep
+-					   stack 8-byte alignment.  */
+-	mtusr   $r1, $IFC_LP
+-#endif
+-#ifdef __NDS32_REDUCED_REGS__
+-	/* For 16-reg mode.  */
+-	lmw.bim	$r15, [$sp], $r15, #0xf
+-	lmw.bim	$r0, [$sp], $r10, #0x0
+-#else
+-	/* For 32-reg mode.  */
+-	lmw.bim $r0, [$sp], $r27, #0xf
+-#endif
+-	iret	/* Resume operation.  */
+-
+-
+-1:	/* Cold boot.  */
+-	/* With vector ID feature, set default vector size to 4B.  */
+-	/* Set IVB.ESZ = 0 (vector table entry size = 4 bytes)  */
+-	mfsr    $r0, $IVB
+-	li      $r1, #0xc000
+-	or      $r0, $r0, $r1
+-	xor     $r0, $r0, $r1
+-	mtsr    $r0, $IVB
+-	dsb
+-
+-	la	$gp, _SDA_BASE_		/* Init $gp.  */
+-	la	$fp, _FP_BASE_		/* Init $fp.  */
+-	la	$sp, _stack		/* Init $sp.  */
+-#ifdef  NDS32_EXT_EX9
+-/*
+- *	Initialize the table base of EX9 instruction
+- *	ex9 generation needs to disable before the ITB is set
+- */
+-	mfsr    $r0, $MSC_CFG	/* Check if HW support of EX9.  */
+-	srli	$r0, $r0, 24
+-	andi	$r0, $r0, 0x1
+-	beqz	$r0, 4f		/* Zero means HW does not support EX9.  */
+-	la      $r0, _ITB_BASE_	/* Init $ITB.  */
+-	mtusr   $r0, $ITB
+-	.no_ex9_end
+-4:
+-#endif
+-	la	$r15, _nds32_init_mem	/* Call DRAM init. _nds32_init_mem
+-					  may written by C language.  */
+-	beqz	$r15, 6f
+-	jral	$r15
+-6:
+-	l.w	$r15, _nds32_jmptbl_00	/* Load reset handler.  */
+-	jral	$r15
+-/* Reset handler() should never return in a RTOS or non-OS system.
+-   In case it does return, an exception will be generated.
+-   This exception will be caught either by default break handler or by EDM.
+-   Default break handle may just do an infinite loop.
+-   EDM will notify GDB and GDB will regain control when the ID is 0x7fff. */
+-5:
+-	break    #0x7fff
+-	.size	_nds32_reset_4b, .-_nds32_reset_4b
+diff --git a/libgcc/config/nds32/isr-library/restore_all.inc b/libgcc/config/nds32/isr-library/restore_all.inc
+index c25b46e..96f87ec 100644
+--- a/libgcc/config/nds32/isr-library/restore_all.inc
++++ b/libgcc/config/nds32/isr-library/restore_all.inc
+@@ -31,15 +31,11 @@
+ 	mtsr	$r2, $IPSW
+ 	RESTORE_FPU_REGS
+ 	RESTORE_MAC_REGS
+-#ifdef NDS32_EXT_IFC
+-	lmw.bim	$r1, [$sp], $r2, #0x0	/* Restore extra $r2 to keep
+-					   stack 8-byte alignment.  */
+-	mtusr   $r1, $IFC_LP
+-#endif
+-#ifdef __NDS32_REDUCED_REGS__
++  RESTORE_USR_REGS
++#if __NDS32_REDUCED_REGS__ || __NDS32_REDUCE_REGS
+ 	lmw.bim $r0, [$sp], $r10, #0x0	/* Restore all regs.  */
+ 	lmw.bim $r15, [$sp], $r15, #0xf
+-#else /* not __NDS32_REDUCED_REGS__ */
++#else
+ 	lmw.bim $r0, [$sp], $r27, #0xf	/* Restore all regs.  */
+ #endif
+ .endm
+diff --git a/libgcc/config/nds32/isr-library/restore_mac_regs.inc b/libgcc/config/nds32/isr-library/restore_mac_regs.inc
+index 0ffc980..a15024c 100644
+--- a/libgcc/config/nds32/isr-library/restore_mac_regs.inc
++++ b/libgcc/config/nds32/isr-library/restore_mac_regs.inc
+@@ -24,7 +24,7 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ .macro RESTORE_MAC_REGS
+-#ifdef NDS32_DX_REGS
++#if __NDS32_DX_REGS__
+ 	lmw.bim	$r1, [$sp], $r4, #0x0
+ 	mtusr	$r1, $d0.lo
+ 	mtusr	$r2, $d0.hi
+diff --git a/libgcc/config/nds32/isr-library/restore_partial.inc b/libgcc/config/nds32/isr-library/restore_partial.inc
+index 70d5421..c07d30e 100644
+--- a/libgcc/config/nds32/isr-library/restore_partial.inc
++++ b/libgcc/config/nds32/isr-library/restore_partial.inc
+@@ -31,15 +31,11 @@
+ 	mtsr $r1, $IPC	/* Set IPC.  */
+ 	mtsr $r2, $IPSW	/* Set IPSW.  */
+ #endif
+-	RESTORE_FPU_REGS
+-	RESTORE_MAC_REGS
+-#ifdef NDS32_EXT_IFC
+-	lmw.bim	$r1, [$sp], $r2, #0x0	/* Restore extra $r2 to keep
+-					   stack 8-byte alignment.  */
+-	mtusr   $r1, $IFC_LP
+-#endif
++  RESTORE_FPU_REGS
++  RESTORE_MAC_REGS
++  RESTORE_USR_REGS
+ 	lmw.bim $r0, [$sp], $r5, #0x0	/* Restore all regs.  */
+-#ifdef __NDS32_REDUCED_REGS__
++#if __NDS32_REDUCED_REGS__ || __NDS32_REDUCE_REGS
+ 	lmw.bim $r15, [$sp], $r15, #0x2
+ #else
+ 	lmw.bim $r15, [$sp], $r27, #0x2	/* Restore all regs.  */
+diff --git a/libgcc/config/nds32/isr-library/vec_vid03_4b.S b/libgcc/config/nds32/isr-library/restore_usr_regs.inc
+similarity index 72%
+rename from libgcc/config/nds32/isr-library/vec_vid03_4b.S
+rename to libgcc/config/nds32/isr-library/restore_usr_regs.inc
+index cd30906..c8f6e4a 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid03_4b.S
++++ b/libgcc/config/nds32/isr-library/restore_usr_regs.inc
+@@ -23,12 +23,20 @@
+    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+    <http://www.gnu.org/licenses/>.  */
+ 
+-	.section	.nds32_vector.03, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_03_4b
+-	.type	_nds32_vector_03_4b, @function
+-_nds32_vector_03_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_03_4b, .-_nds32_vector_03_4b
++.macro RESTORE_USR_REGS
++#if __NDS32_EXT_IFC__ && (__NDS32_EXT_ZOL__ || __NDS32_EXT_DSP__)
++  lmw.bim $r1, [$sp], $r4, #0x0
++  mtusr   $r1, $IFC_LP
++  mtusr   $r2, $LB
++  mtusr   $r3, $LE
++  mtusr   $r4, $LC
++#elif __NDS32_EXT_IFC__
++  lmw.bim	$r1, [$sp], $r2, #0x0
++  mtusr   $r1, $IFC_LP
++#elif __NDS32_EXT_ZOL__ || __NDS32_EXT_DSP__
++  lmw.bim $r1, [$sp], $r4, #0x0
++  mtusr   $r1, $LB
++  mtusr   $r2, $LE
++  mtusr   $r3, $LC
++#endif
++.endm
+diff --git a/libgcc/config/nds32/isr-library/save_all.inc b/libgcc/config/nds32/isr-library/save_all.inc
+index 20eb29d..c926664 100644
+--- a/libgcc/config/nds32/isr-library/save_all.inc
++++ b/libgcc/config/nds32/isr-library/save_all.inc
+@@ -23,45 +23,42 @@
+    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+    <http://www.gnu.org/licenses/>.  */
+ 
+-.macro SAVE_ALL_4B
+-#ifdef __NDS32_REDUCED_REGS__
++#if __NDS32_ISR_VECTOR_SIZE_4__
++
++/* If vector size is 4-byte, we have to save registers
++   in the macro implementation.  */
++.macro SAVE_ALL
++#if __NDS32_REDUCED_REGS__ || __NDS32_REDUCE_REGS
+ 	smw.adm $r15, [$sp], $r15, #0xf
+ 	smw.adm $r0, [$sp], $r10, #0x0
+-#else /* not __NDS32_REDUCED_REGS__ */
++#else
+ 	smw.adm $r0, [$sp], $r27, #0xf
+-#endif /* not __NDS32_REDUCED_REGS__ */
+-#ifdef NDS32_EXT_IFC
+-	mfusr   $r1, $IFC_LP
+-	smw.adm $r1, [$sp], $r2, #0x0	/* Save extra $r2 to keep
+-					   stack 8-byte alignment.  */
+ #endif
+-	SAVE_MAC_REGS
+-	SAVE_FPU_REGS
++  SAVE_USR_REGS
++  SAVE_MAC_REGS
++  SAVE_FPU_REGS
+ 	mfsr	$r1, $IPC	/* Get IPC.  */
+ 	mfsr	$r2, $IPSW	/* Get IPSW.  */
+ 	smw.adm	$r1, [$sp], $r2, #0x0	/* Push IPC, IPSW.  */
+ 	move	$r1, $sp	/* $r1 is ptr to NDS32_CONTEXT.  */
+ 	mfsr	$r0, $ITYPE	/* Get VID to $r0.  */
+ 	srli	$r0, $r0, #5
+-#ifdef __NDS32_ISA_V2__
+ 	andi	$r0, $r0, #127
+-#else
+-	fexti33	$r0, #6
+-#endif
+ .endm
+ 
++#else /* not __NDS32_ISR_VECTOR_SIZE_4__ */
++
++/* If vector size is 16-byte, some works can be done in
++   the vector section generated by compiler, so that we
++   can implement less in the macro.  */
+ .macro SAVE_ALL
+-/* SAVE_REG_TBL code has been moved to
+-   vector table generated by compiler.  */
+-#ifdef NDS32_EXT_IFC
+-	mfusr   $r1, $IFC_LP
+-	smw.adm $r1, [$sp], $r2, #0x0	/* Save extra $r2 to keep
+-					   stack 8-byte alignment.  */
+-#endif
+-	SAVE_MAC_REGS
+-	SAVE_FPU_REGS
++  SAVE_USR_REGS
++  SAVE_MAC_REGS
++  SAVE_FPU_REGS
+ 	mfsr	$r1, $IPC	/* Get IPC.  */
+ 	mfsr	$r2, $IPSW	/* Get IPSW.  */
+ 	smw.adm	$r1, [$sp], $r2, #0x0	/* Push IPC, IPSW.  */
+ 	move	$r1, $sp	/* $r1 is ptr to NDS32_CONTEXT.  */
+ .endm
++
++#endif /* not __NDS32_ISR_VECTOR_SIZE_4__ */
+diff --git a/libgcc/config/nds32/isr-library/save_mac_regs.inc b/libgcc/config/nds32/isr-library/save_mac_regs.inc
+index ddb5e77..2d79d70 100644
+--- a/libgcc/config/nds32/isr-library/save_mac_regs.inc
++++ b/libgcc/config/nds32/isr-library/save_mac_regs.inc
+@@ -24,7 +24,7 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ .macro SAVE_MAC_REGS
+-#ifdef NDS32_DX_REGS
++#if __NDS32_DX_REGS__
+ 	mfusr	$r1, $d0.lo
+ 	mfusr	$r2, $d0.hi
+ 	mfusr	$r3, $d1.lo
+diff --git a/libgcc/config/nds32/isr-library/save_partial.inc b/libgcc/config/nds32/isr-library/save_partial.inc
+index ee514c4..0c6d481 100644
+--- a/libgcc/config/nds32/isr-library/save_partial.inc
++++ b/libgcc/config/nds32/isr-library/save_partial.inc
+@@ -23,20 +23,20 @@
+    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+    <http://www.gnu.org/licenses/>.  */
+ 
+-.macro SAVE_PARTIAL_4B
+-#ifdef __NDS32_REDUCED_REGS__
++#if __NDS32_ISR_VECTOR_SIZE_4__
++
++/* If vector size is 4-byte, we have to save registers
++   in the macro implementation.  */
++.macro SAVE_PARTIAL
++#if __NDS32_REDUCED_REGS__ || __NDS32_REDUCE_REGS
+ 	smw.adm $r15, [$sp], $r15, #0x2
+-#else /* not __NDS32_REDUCED_REGS__ */
++#else
+ 	smw.adm $r15, [$sp], $r27, #0x2
+-#endif /* not __NDS32_REDUCED_REGS__ */
+-	smw.adm $r0, [$sp], $r5, #0x0
+-#ifdef NDS32_EXT_IFC
+-	mfusr   $r1, $IFC_LP
+-	smw.adm $r1, [$sp], $r2, #0x0	/* Save extra $r2 to keep
+-					   stack 8-byte alignment.  */
+ #endif
+-	SAVE_MAC_REGS
+-	SAVE_FPU_REGS
++	smw.adm $r0, [$sp], $r5, #0x0
++  SAVE_USR_REGS
++  SAVE_MAC_REGS
++  SAVE_FPU_REGS
+ #if defined(NDS32_NESTED) || defined(NDS32_NESTED_READY)
+        mfsr    $r1, $IPC       /* Get IPC.  */
+        mfsr    $r2, $IPSW      /* Get IPSW.  */
+@@ -44,26 +44,24 @@
+ #endif
+ 	mfsr	$r0, $ITYPE	/* Get VID to $r0.  */
+ 	srli	$r0, $r0, #5
+-#ifdef __NDS32_ISA_V2__
+ 	andi	$r0, $r0, #127
+-#else
+-	fexti33	$r0, #6
+-#endif
+ .endm
+ 
++#else /* not __NDS32_ISR_VECTOR_SIZE_4__ */
++
++/* If vector size is 16-byte, some works can be done in
++   the vector section generated by compiler, so that we
++   can implement less in the macro.  */
++
+ .macro SAVE_PARTIAL
+-/* SAVE_CALLER_REGS code has been moved to
+-   vector table generated by compiler.  */
+-#ifdef NDS32_EXT_IFC
+-	mfusr   $r1, $IFC_LP
+-	smw.adm $r1, [$sp], $r2, #0x0	/* Save extra $r2 to keep
+-					   stack 8-byte alignment.  */
+-#endif
+-	SAVE_MAC_REGS
+-	SAVE_FPU_REGS
++  SAVE_USR_REGS
++  SAVE_MAC_REGS
++  SAVE_FPU_REGS
+ #if defined(NDS32_NESTED) || defined(NDS32_NESTED_READY)
+        mfsr    $r1, $IPC       /* Get IPC.  */
+        mfsr    $r2, $IPSW      /* Get IPSW.  */
+        smw.adm $r1, [$sp], $r2, #0x0   /* Push IPC, IPSW.  */
+ #endif
+ .endm
++
++#endif /* not __NDS32_ISR_VECTOR_SIZE_4__ */
+diff --git a/libgcc/config/nds32/isr-library/vec_vid00_4b.S b/libgcc/config/nds32/isr-library/save_usr_regs.inc
+similarity index 61%
+rename from libgcc/config/nds32/isr-library/vec_vid00_4b.S
+rename to libgcc/config/nds32/isr-library/save_usr_regs.inc
+index e1a37b4..b6807d7 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid00_4b.S
++++ b/libgcc/config/nds32/isr-library/save_usr_regs.inc
+@@ -23,12 +23,22 @@
+    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+    <http://www.gnu.org/licenses/>.  */
+ 
+-	.section	.nds32_vector.00, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_00_4b
+-	.type	_nds32_vector_00_4b, @function
+-_nds32_vector_00_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_00_4b, .-_nds32_vector_00_4b
++.macro SAVE_USR_REGS
++/* Store User Special Registers according to supported ISA extension
++   !!! WATCH OUT !!! Take care of 8-byte alignment issue.  */
++#if __NDS32_EXT_IFC__ && (__NDS32_EXT_ZOL__ || __NDS32_EXT_DSP__)
++  mfusr   $r1, $IFC_LP
++  mfusr   $r2, $LB
++  mfusr   $r3, $LE
++  mfusr   $r4, $LC
++  smw.adm $r1, [$sp], $r4, #0x0 /* Save even. Ok!  */
++#elif __NDS32_EXT_IFC__
++  mfusr   $r1, $IFC_LP
++  smw.adm $r1, [$sp], $r2, #0x0	/* Save extra $r2 to keep stack 8-byte aligned.  */
++#elif (__NDS32_EXT_ZOL__ || __NDS32_EXT_DSP__)
++  mfusr   $r1, $LB
++  mfusr   $r2, $LE
++  mfusr   $r3, $LC
++  smw.adm $r1, [$sp], $r4, #0x0	/* Save extra $r4 to keep stack 8-byte aligned.  */
++#endif
++.endm
+diff --git a/libgcc/config/nds32/isr-library/vec_vid00.S b/libgcc/config/nds32/isr-library/vec_vid00.S
+index ccdbd19..f02e92c 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid00.S
++++ b/libgcc/config/nds32/isr-library/vec_vid00.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.00, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_00
+ 	.type	_nds32_vector_00, @function
+ _nds32_vector_00:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid01.S b/libgcc/config/nds32/isr-library/vec_vid01.S
+index ed5a88e..542fcf8 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid01.S
++++ b/libgcc/config/nds32/isr-library/vec_vid01.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.01, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_01
+ 	.type	_nds32_vector_01, @function
+ _nds32_vector_01:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid01_4b.S b/libgcc/config/nds32/isr-library/vec_vid01_4b.S
+deleted file mode 100644
+index 239bd75..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid01_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.01, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_01_4b
+-	.type	_nds32_vector_01_4b, @function
+-_nds32_vector_01_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_01_4b, .-_nds32_vector_01_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid02.S b/libgcc/config/nds32/isr-library/vec_vid02.S
+index 1a95a57..72b8b56 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid02.S
++++ b/libgcc/config/nds32/isr-library/vec_vid02.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.02, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_02
+ 	.type	_nds32_vector_02, @function
+ _nds32_vector_02:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid02_4b.S b/libgcc/config/nds32/isr-library/vec_vid02_4b.S
+deleted file mode 100644
+index c532e62..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid02_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.02, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_02_4b
+-	.type	_nds32_vector_02_4b, @function
+-_nds32_vector_02_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_02_4b, .-_nds32_vector_02_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid03.S b/libgcc/config/nds32/isr-library/vec_vid03.S
+index 9bc572a..b0f8a60 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid03.S
++++ b/libgcc/config/nds32/isr-library/vec_vid03.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.03, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_03
+ 	.type	_nds32_vector_03, @function
+ _nds32_vector_03:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid04.S b/libgcc/config/nds32/isr-library/vec_vid04.S
+index e8d4e10..d76ef73 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid04.S
++++ b/libgcc/config/nds32/isr-library/vec_vid04.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.04, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_04
+ 	.type	_nds32_vector_04, @function
+ _nds32_vector_04:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid04_4b.S b/libgcc/config/nds32/isr-library/vec_vid04_4b.S
+deleted file mode 100644
+index 21fc77e..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid04_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.04, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_04_4b
+-	.type	_nds32_vector_04_4b, @function
+-_nds32_vector_04_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_04_4b, .-_nds32_vector_04_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid05.S b/libgcc/config/nds32/isr-library/vec_vid05.S
+index 1621a9d..ed5a5bb 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid05.S
++++ b/libgcc/config/nds32/isr-library/vec_vid05.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.05, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_05
+ 	.type	_nds32_vector_05, @function
+ _nds32_vector_05:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid05_4b.S b/libgcc/config/nds32/isr-library/vec_vid05_4b.S
+deleted file mode 100644
+index b86fe19..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid05_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.05, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_05_4b
+-	.type	_nds32_vector_05_4b, @function
+-_nds32_vector_05_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_05_4b, .-_nds32_vector_05_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid06.S b/libgcc/config/nds32/isr-library/vec_vid06.S
+index 934f0b1..834c7de 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid06.S
++++ b/libgcc/config/nds32/isr-library/vec_vid06.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.06, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_06
+ 	.type	_nds32_vector_06, @function
+ _nds32_vector_06:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid06_4b.S b/libgcc/config/nds32/isr-library/vec_vid06_4b.S
+deleted file mode 100644
+index 3624cfd..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid06_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.06, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_06_4b
+-	.type	_nds32_vector_06_4b, @function
+-_nds32_vector_06_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_06_4b, .-_nds32_vector_06_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid07.S b/libgcc/config/nds32/isr-library/vec_vid07.S
+index 0b0484d..cb3b33a 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid07.S
++++ b/libgcc/config/nds32/isr-library/vec_vid07.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.07, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_07
+ 	.type	_nds32_vector_07, @function
+ _nds32_vector_07:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid07_4b.S b/libgcc/config/nds32/isr-library/vec_vid07_4b.S
+deleted file mode 100644
+index 997ca75..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid07_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.07, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_07_4b
+-	.type	_nds32_vector_07_4b, @function
+-_nds32_vector_07_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_07_4b, .-_nds32_vector_07_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid08.S b/libgcc/config/nds32/isr-library/vec_vid08.S
+index 2a30375..b4ae947 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid08.S
++++ b/libgcc/config/nds32/isr-library/vec_vid08.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.08, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_08
+ 	.type	_nds32_vector_08, @function
+ _nds32_vector_08:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid08_4b.S b/libgcc/config/nds32/isr-library/vec_vid08_4b.S
+deleted file mode 100644
+index 83546d1..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid08_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.08, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_08_4b
+-	.type	_nds32_vector_08_4b, @function
+-_nds32_vector_08_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_08_4b, .-_nds32_vector_08_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid09.S b/libgcc/config/nds32/isr-library/vec_vid09.S
+index 9aeaf78..47fa5c1 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid09.S
++++ b/libgcc/config/nds32/isr-library/vec_vid09.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.09, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_09
+ 	.type	_nds32_vector_09, @function
+ _nds32_vector_09:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid09_4b.S b/libgcc/config/nds32/isr-library/vec_vid09_4b.S
+deleted file mode 100644
+index 2d1944f..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid09_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.09, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_09_4b
+-	.type	_nds32_vector_09_4b, @function
+-_nds32_vector_09_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_09_4b, .-_nds32_vector_09_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid10.S b/libgcc/config/nds32/isr-library/vec_vid10.S
+index 411edd7..6bf2c7c 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid10.S
++++ b/libgcc/config/nds32/isr-library/vec_vid10.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.10, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_10
+ 	.type	_nds32_vector_10, @function
+ _nds32_vector_10:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid10_4b.S b/libgcc/config/nds32/isr-library/vec_vid10_4b.S
+deleted file mode 100644
+index 04761ab..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid10_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.10, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_10_4b
+-	.type	_nds32_vector_10_4b, @function
+-_nds32_vector_10_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_10_4b, .-_nds32_vector_10_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid11.S b/libgcc/config/nds32/isr-library/vec_vid11.S
+index 8de45a4..86975ea 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid11.S
++++ b/libgcc/config/nds32/isr-library/vec_vid11.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.11, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_11
+ 	.type	_nds32_vector_11, @function
+ _nds32_vector_11:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid11_4b.S b/libgcc/config/nds32/isr-library/vec_vid11_4b.S
+deleted file mode 100644
+index 328c1e6..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid11_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.11, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_11_4b
+-	.type	_nds32_vector_11_4b, @function
+-_nds32_vector_11_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_11_4b, .-_nds32_vector_11_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid12.S b/libgcc/config/nds32/isr-library/vec_vid12.S
+index ff5c6df..07cb7de 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid12.S
++++ b/libgcc/config/nds32/isr-library/vec_vid12.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.12, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_12
+ 	.type	_nds32_vector_12, @function
+ _nds32_vector_12:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid12_4b.S b/libgcc/config/nds32/isr-library/vec_vid12_4b.S
+deleted file mode 100644
+index 52b7d23..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid12_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.12, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_12_4b
+-	.type	_nds32_vector_12_4b, @function
+-_nds32_vector_12_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_12_4b, .-_nds32_vector_12_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid13.S b/libgcc/config/nds32/isr-library/vec_vid13.S
+index 66014c3..5ac1a83 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid13.S
++++ b/libgcc/config/nds32/isr-library/vec_vid13.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.13, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_13
+ 	.type	_nds32_vector_13, @function
+ _nds32_vector_13:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid13_4b.S b/libgcc/config/nds32/isr-library/vec_vid13_4b.S
+deleted file mode 100644
+index 59029ad..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid13_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.13, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_13_4b
+-	.type	_nds32_vector_13_4b, @function
+-_nds32_vector_13_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_13_4b, .-_nds32_vector_13_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid14.S b/libgcc/config/nds32/isr-library/vec_vid14.S
+index ca6f66f..5116f2f 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid14.S
++++ b/libgcc/config/nds32/isr-library/vec_vid14.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.14, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_14
+ 	.type	_nds32_vector_14, @function
+ _nds32_vector_14:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid14_4b.S b/libgcc/config/nds32/isr-library/vec_vid14_4b.S
+deleted file mode 100644
+index 0d2afe4..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid14_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.14, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_14_4b
+-	.type	_nds32_vector_14_4b, @function
+-_nds32_vector_14_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_14_4b, .-_nds32_vector_14_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid15.S b/libgcc/config/nds32/isr-library/vec_vid15.S
+index c94b42a..03449c0 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid15.S
++++ b/libgcc/config/nds32/isr-library/vec_vid15.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.15, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_15
+ 	.type	_nds32_vector_15, @function
+ _nds32_vector_15:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid15_4b.S b/libgcc/config/nds32/isr-library/vec_vid15_4b.S
+deleted file mode 100644
+index 60799d7..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid15_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.15, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_15_4b
+-	.type	_nds32_vector_15_4b, @function
+-_nds32_vector_15_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_15_4b, .-_nds32_vector_15_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid16.S b/libgcc/config/nds32/isr-library/vec_vid16.S
+index f19454d..b01d673 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid16.S
++++ b/libgcc/config/nds32/isr-library/vec_vid16.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.16, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_16
+ 	.type	_nds32_vector_16, @function
+ _nds32_vector_16:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid16_4b.S b/libgcc/config/nds32/isr-library/vec_vid16_4b.S
+deleted file mode 100644
+index 6791204..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid16_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.16, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_16_4b
+-	.type	_nds32_vector_16_4b, @function
+-_nds32_vector_16_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_16_4b, .-_nds32_vector_16_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid17.S b/libgcc/config/nds32/isr-library/vec_vid17.S
+index 486a0aa..c6ed785 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid17.S
++++ b/libgcc/config/nds32/isr-library/vec_vid17.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.17, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_17
+ 	.type	_nds32_vector_17, @function
+ _nds32_vector_17:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid17_4b.S b/libgcc/config/nds32/isr-library/vec_vid17_4b.S
+deleted file mode 100644
+index 04f4285..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid17_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.17, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_17_4b
+-	.type	_nds32_vector_17_4b, @function
+-_nds32_vector_17_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_17_4b, .-_nds32_vector_17_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid18.S b/libgcc/config/nds32/isr-library/vec_vid18.S
+index 137511f..e0e7b7e 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid18.S
++++ b/libgcc/config/nds32/isr-library/vec_vid18.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.18, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_18
+ 	.type	_nds32_vector_18, @function
+ _nds32_vector_18:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid18_4b.S b/libgcc/config/nds32/isr-library/vec_vid18_4b.S
+deleted file mode 100644
+index 4d80192..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid18_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.18, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_18_4b
+-	.type	_nds32_vector_18_4b, @function
+-_nds32_vector_18_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_18_4b, .-_nds32_vector_18_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid19.S b/libgcc/config/nds32/isr-library/vec_vid19.S
+index 791e135..ef7075f 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid19.S
++++ b/libgcc/config/nds32/isr-library/vec_vid19.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.19, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_19
+ 	.type	_nds32_vector_19, @function
+ _nds32_vector_19:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid19_4b.S b/libgcc/config/nds32/isr-library/vec_vid19_4b.S
+deleted file mode 100644
+index 87d4c7c..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid19_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.19, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_19_4b
+-	.type	_nds32_vector_19_4b, @function
+-_nds32_vector_19_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_19_4b, .-_nds32_vector_19_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid20.S b/libgcc/config/nds32/isr-library/vec_vid20.S
+index e7ab0e3..99bcf01 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid20.S
++++ b/libgcc/config/nds32/isr-library/vec_vid20.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.20, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_20
+ 	.type	_nds32_vector_20, @function
+ _nds32_vector_20:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid20_4b.S b/libgcc/config/nds32/isr-library/vec_vid20_4b.S
+deleted file mode 100644
+index 308385a..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid20_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.20, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_20_4b
+-	.type	_nds32_vector_20_4b, @function
+-_nds32_vector_20_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_20_4b, .-_nds32_vector_20_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid21.S b/libgcc/config/nds32/isr-library/vec_vid21.S
+index 315ae56..8c66bef 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid21.S
++++ b/libgcc/config/nds32/isr-library/vec_vid21.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.21, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_21
+ 	.type	_nds32_vector_21, @function
+ _nds32_vector_21:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid21_4b.S b/libgcc/config/nds32/isr-library/vec_vid21_4b.S
+deleted file mode 100644
+index 16cf02a..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid21_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.21, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_21_4b
+-	.type	_nds32_vector_21_4b, @function
+-_nds32_vector_21_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_21_4b, .-_nds32_vector_21_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid22.S b/libgcc/config/nds32/isr-library/vec_vid22.S
+index 6f9de85..5c442ce 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid22.S
++++ b/libgcc/config/nds32/isr-library/vec_vid22.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.22, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_22
+ 	.type	_nds32_vector_22, @function
+ _nds32_vector_22:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid22_4b.S b/libgcc/config/nds32/isr-library/vec_vid22_4b.S
+deleted file mode 100644
+index 587ee7f..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid22_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.22, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_22_4b
+-	.type	_nds32_vector_22_4b, @function
+-_nds32_vector_22_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_22_4b, .-_nds32_vector_22_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid23.S b/libgcc/config/nds32/isr-library/vec_vid23.S
+index 956b585..c5d73df 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid23.S
++++ b/libgcc/config/nds32/isr-library/vec_vid23.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.23, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_23
+ 	.type	_nds32_vector_23, @function
+ _nds32_vector_23:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid23_4b.S b/libgcc/config/nds32/isr-library/vec_vid23_4b.S
+deleted file mode 100644
+index 5e4b643..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid23_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.23, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_23_4b
+-	.type	_nds32_vector_23_4b, @function
+-_nds32_vector_23_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_23_4b, .-_nds32_vector_23_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid24.S b/libgcc/config/nds32/isr-library/vec_vid24.S
+index 57086e9..fe7dada 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid24.S
++++ b/libgcc/config/nds32/isr-library/vec_vid24.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.24, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_24
+ 	.type	_nds32_vector_24, @function
+ _nds32_vector_24:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid24_4b.S b/libgcc/config/nds32/isr-library/vec_vid24_4b.S
+deleted file mode 100644
+index 43495f9..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid24_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.24, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_24_4b
+-	.type	_nds32_vector_24_4b, @function
+-_nds32_vector_24_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_24_4b, .-_nds32_vector_24_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid25.S b/libgcc/config/nds32/isr-library/vec_vid25.S
+index 61fa526..ada24e4 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid25.S
++++ b/libgcc/config/nds32/isr-library/vec_vid25.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.25, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_25
+ 	.type	_nds32_vector_25, @function
+ _nds32_vector_25:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid25_4b.S b/libgcc/config/nds32/isr-library/vec_vid25_4b.S
+deleted file mode 100644
+index 1ce6cf3..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid25_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.25, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_25_4b
+-	.type	_nds32_vector_25_4b, @function
+-_nds32_vector_25_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_25_4b, .-_nds32_vector_25_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid26.S b/libgcc/config/nds32/isr-library/vec_vid26.S
+index 3d9191d..1f97945 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid26.S
++++ b/libgcc/config/nds32/isr-library/vec_vid26.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.26, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_26
+ 	.type	_nds32_vector_26, @function
+ _nds32_vector_26:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid26_4b.S b/libgcc/config/nds32/isr-library/vec_vid26_4b.S
+deleted file mode 100644
+index 5803247..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid26_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.26, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_26_4b
+-	.type	_nds32_vector_26_4b, @function
+-_nds32_vector_26_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_26_4b, .-_nds32_vector_26_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid27.S b/libgcc/config/nds32/isr-library/vec_vid27.S
+index ff12cfb..f440a8b 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid27.S
++++ b/libgcc/config/nds32/isr-library/vec_vid27.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.27, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_27
+ 	.type	_nds32_vector_27, @function
+ _nds32_vector_27:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid27_4b.S b/libgcc/config/nds32/isr-library/vec_vid27_4b.S
+deleted file mode 100644
+index d61e3f9..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid27_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.27, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_27_4b
+-	.type	_nds32_vector_27_4b, @function
+-_nds32_vector_27_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_27_4b, .-_nds32_vector_27_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid28.S b/libgcc/config/nds32/isr-library/vec_vid28.S
+index 6b7610e..e1621c7 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid28.S
++++ b/libgcc/config/nds32/isr-library/vec_vid28.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.28, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_28
+ 	.type	_nds32_vector_28, @function
+ _nds32_vector_28:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid28_4b.S b/libgcc/config/nds32/isr-library/vec_vid28_4b.S
+deleted file mode 100644
+index a39d015..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid28_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.28, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_28_4b
+-	.type	_nds32_vector_28_4b, @function
+-_nds32_vector_28_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_28_4b, .-_nds32_vector_28_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid29.S b/libgcc/config/nds32/isr-library/vec_vid29.S
+index b995841..4fa29c1 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid29.S
++++ b/libgcc/config/nds32/isr-library/vec_vid29.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.29, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_29
+ 	.type	_nds32_vector_29, @function
+ _nds32_vector_29:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid29_4b.S b/libgcc/config/nds32/isr-library/vec_vid29_4b.S
+deleted file mode 100644
+index 803f323..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid29_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.29, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_29_4b
+-	.type	_nds32_vector_29_4b, @function
+-_nds32_vector_29_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_29_4b, .-_nds32_vector_29_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid30.S b/libgcc/config/nds32/isr-library/vec_vid30.S
+index 57d1507..214e67b 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid30.S
++++ b/libgcc/config/nds32/isr-library/vec_vid30.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.30, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_30
+ 	.type	_nds32_vector_30, @function
+ _nds32_vector_30:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid30_4b.S b/libgcc/config/nds32/isr-library/vec_vid30_4b.S
+deleted file mode 100644
+index a2a1e3e..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid30_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.30, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_30_4b
+-	.type	_nds32_vector_30_4b, @function
+-_nds32_vector_30_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_30_4b, .-_nds32_vector_30_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid31.S b/libgcc/config/nds32/isr-library/vec_vid31.S
+index f9aee4e..b758b8c 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid31.S
++++ b/libgcc/config/nds32/isr-library/vec_vid31.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.31, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_31
+ 	.type	_nds32_vector_31, @function
+ _nds32_vector_31:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid31_4b.S b/libgcc/config/nds32/isr-library/vec_vid31_4b.S
+deleted file mode 100644
+index 989645f..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid31_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.31, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_31_4b
+-	.type	_nds32_vector_31_4b, @function
+-_nds32_vector_31_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_31_4b, .-_nds32_vector_31_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid32.S b/libgcc/config/nds32/isr-library/vec_vid32.S
+index fc26cad..58234d5 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid32.S
++++ b/libgcc/config/nds32/isr-library/vec_vid32.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.32, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_32
+ 	.type	_nds32_vector_32, @function
+ _nds32_vector_32:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid32_4b.S b/libgcc/config/nds32/isr-library/vec_vid32_4b.S
+deleted file mode 100644
+index 1ac7e31..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid32_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.32, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_32_4b
+-	.type	_nds32_vector_32_4b, @function
+-_nds32_vector_32_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_32_4b, .-_nds32_vector_32_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid33.S b/libgcc/config/nds32/isr-library/vec_vid33.S
+index dd655e6..d920352 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid33.S
++++ b/libgcc/config/nds32/isr-library/vec_vid33.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.33, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_33
+ 	.type	_nds32_vector_33, @function
+ _nds32_vector_33:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid33_4b.S b/libgcc/config/nds32/isr-library/vec_vid33_4b.S
+deleted file mode 100644
+index 3c99412..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid33_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.33, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_33_4b
+-	.type	_nds32_vector_33_4b, @function
+-_nds32_vector_33_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_33_4b, .-_nds32_vector_33_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid34.S b/libgcc/config/nds32/isr-library/vec_vid34.S
+index a6b8517..01999b4 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid34.S
++++ b/libgcc/config/nds32/isr-library/vec_vid34.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.34, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_34
+ 	.type	_nds32_vector_34, @function
+ _nds32_vector_34:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid34_4b.S b/libgcc/config/nds32/isr-library/vec_vid34_4b.S
+deleted file mode 100644
+index 77c07b9..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid34_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.34, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_34_4b
+-	.type	_nds32_vector_34_4b, @function
+-_nds32_vector_34_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_34_4b, .-_nds32_vector_34_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid35.S b/libgcc/config/nds32/isr-library/vec_vid35.S
+index 65ceeab..7ab0536 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid35.S
++++ b/libgcc/config/nds32/isr-library/vec_vid35.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.35, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_35
+ 	.type	_nds32_vector_35, @function
+ _nds32_vector_35:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid35_4b.S b/libgcc/config/nds32/isr-library/vec_vid35_4b.S
+deleted file mode 100644
+index 432873a..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid35_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.35, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_35_4b
+-	.type	_nds32_vector_35_4b, @function
+-_nds32_vector_35_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_35_4b, .-_nds32_vector_35_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid36.S b/libgcc/config/nds32/isr-library/vec_vid36.S
+index 688dbb9..5da079d 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid36.S
++++ b/libgcc/config/nds32/isr-library/vec_vid36.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.36, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_36
+ 	.type	_nds32_vector_36, @function
+ _nds32_vector_36:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid36_4b.S b/libgcc/config/nds32/isr-library/vec_vid36_4b.S
+deleted file mode 100644
+index dadd381..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid36_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.36, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_36_4b
+-	.type	_nds32_vector_36_4b, @function
+-_nds32_vector_36_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_36_4b, .-_nds32_vector_36_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid37.S b/libgcc/config/nds32/isr-library/vec_vid37.S
+index 712bbe8..704d6b8 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid37.S
++++ b/libgcc/config/nds32/isr-library/vec_vid37.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.37, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_37
+ 	.type	_nds32_vector_37, @function
+ _nds32_vector_37:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid37_4b.S b/libgcc/config/nds32/isr-library/vec_vid37_4b.S
+deleted file mode 100644
+index ec845e1..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid37_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.37, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_37_4b
+-	.type	_nds32_vector_37_4b, @function
+-_nds32_vector_37_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_37_4b, .-_nds32_vector_37_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid38.S b/libgcc/config/nds32/isr-library/vec_vid38.S
+index b6e4979..fdfc4a9 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid38.S
++++ b/libgcc/config/nds32/isr-library/vec_vid38.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.38, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_38
+ 	.type	_nds32_vector_38, @function
+ _nds32_vector_38:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid38_4b.S b/libgcc/config/nds32/isr-library/vec_vid38_4b.S
+deleted file mode 100644
+index 84919ed..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid38_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.38, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_38_4b
+-	.type	_nds32_vector_38_4b, @function
+-_nds32_vector_38_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_38_4b, .-_nds32_vector_38_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid39.S b/libgcc/config/nds32/isr-library/vec_vid39.S
+index 2dee269..00dd245 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid39.S
++++ b/libgcc/config/nds32/isr-library/vec_vid39.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.39, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_39
+ 	.type	_nds32_vector_39, @function
+ _nds32_vector_39:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid39_4b.S b/libgcc/config/nds32/isr-library/vec_vid39_4b.S
+deleted file mode 100644
+index 8f2f634..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid39_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.39, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_39_4b
+-	.type	_nds32_vector_39_4b, @function
+-_nds32_vector_39_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_39_4b, .-_nds32_vector_39_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid40.S b/libgcc/config/nds32/isr-library/vec_vid40.S
+index fe7508c..82b579f 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid40.S
++++ b/libgcc/config/nds32/isr-library/vec_vid40.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.40, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_40
+ 	.type	_nds32_vector_40, @function
+ _nds32_vector_40:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid40_4b.S b/libgcc/config/nds32/isr-library/vec_vid40_4b.S
+deleted file mode 100644
+index 0aab8f4..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid40_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.40, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_40_4b
+-	.type	_nds32_vector_40_4b, @function
+-_nds32_vector_40_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_40_4b, .-_nds32_vector_40_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid41.S b/libgcc/config/nds32/isr-library/vec_vid41.S
+index 711fcd5..721c735 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid41.S
++++ b/libgcc/config/nds32/isr-library/vec_vid41.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.41, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_41
+ 	.type	_nds32_vector_41, @function
+ _nds32_vector_41:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid41_4b.S b/libgcc/config/nds32/isr-library/vec_vid41_4b.S
+deleted file mode 100644
+index e8a8527..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid41_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.41, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_41_4b
+-	.type	_nds32_vector_41_4b, @function
+-_nds32_vector_41_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_41_4b, .-_nds32_vector_41_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid42.S b/libgcc/config/nds32/isr-library/vec_vid42.S
+index 0c6a849..307b51d 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid42.S
++++ b/libgcc/config/nds32/isr-library/vec_vid42.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.42, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_42
+ 	.type	_nds32_vector_42, @function
+ _nds32_vector_42:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid42_4b.S b/libgcc/config/nds32/isr-library/vec_vid42_4b.S
+deleted file mode 100644
+index cfe184c..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid42_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.42, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_42_4b
+-	.type	_nds32_vector_42_4b, @function
+-_nds32_vector_42_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_42_4b, .-_nds32_vector_42_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid43.S b/libgcc/config/nds32/isr-library/vec_vid43.S
+index 2b4681a..c0ce02d 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid43.S
++++ b/libgcc/config/nds32/isr-library/vec_vid43.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.43, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_43
+ 	.type	_nds32_vector_43, @function
+ _nds32_vector_43:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid43_4b.S b/libgcc/config/nds32/isr-library/vec_vid43_4b.S
+deleted file mode 100644
+index 3edd606..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid43_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.43, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_43_4b
+-	.type	_nds32_vector_43_4b, @function
+-_nds32_vector_43_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_43_4b, .-_nds32_vector_43_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid44.S b/libgcc/config/nds32/isr-library/vec_vid44.S
+index 232ef41..c2a384c 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid44.S
++++ b/libgcc/config/nds32/isr-library/vec_vid44.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.44, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_44
+ 	.type	_nds32_vector_44, @function
+ _nds32_vector_44:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid44_4b.S b/libgcc/config/nds32/isr-library/vec_vid44_4b.S
+deleted file mode 100644
+index 0f2b8a3..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid44_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.44, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_44_4b
+-	.type	_nds32_vector_44_4b, @function
+-_nds32_vector_44_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_44_4b, .-_nds32_vector_44_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid45.S b/libgcc/config/nds32/isr-library/vec_vid45.S
+index e2f9863..e13c52b 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid45.S
++++ b/libgcc/config/nds32/isr-library/vec_vid45.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.45, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_45
+ 	.type	_nds32_vector_45, @function
+ _nds32_vector_45:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid45_4b.S b/libgcc/config/nds32/isr-library/vec_vid45_4b.S
+deleted file mode 100644
+index 7358ec1..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid45_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.45, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_45_4b
+-	.type	_nds32_vector_45_4b, @function
+-_nds32_vector_45_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_45_4b, .-_nds32_vector_45_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid46.S b/libgcc/config/nds32/isr-library/vec_vid46.S
+index f3b93aa..71bfb53 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid46.S
++++ b/libgcc/config/nds32/isr-library/vec_vid46.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.46, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_46
+ 	.type	_nds32_vector_46, @function
+ _nds32_vector_46:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid46_4b.S b/libgcc/config/nds32/isr-library/vec_vid46_4b.S
+deleted file mode 100644
+index 2782e86..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid46_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.46, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_46_4b
+-	.type	_nds32_vector_46_4b, @function
+-_nds32_vector_46_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_46_4b, .-_nds32_vector_46_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid47.S b/libgcc/config/nds32/isr-library/vec_vid47.S
+index 130c8d7..d1f2131 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid47.S
++++ b/libgcc/config/nds32/isr-library/vec_vid47.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.47, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_47
+ 	.type	_nds32_vector_47, @function
+ _nds32_vector_47:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid47_4b.S b/libgcc/config/nds32/isr-library/vec_vid47_4b.S
+deleted file mode 100644
+index f237577..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid47_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.47, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_47_4b
+-	.type	_nds32_vector_47_4b, @function
+-_nds32_vector_47_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_47_4b, .-_nds32_vector_47_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid48.S b/libgcc/config/nds32/isr-library/vec_vid48.S
+index f3bca05..4ba5eb9 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid48.S
++++ b/libgcc/config/nds32/isr-library/vec_vid48.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.48, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_48
+ 	.type	_nds32_vector_48, @function
+ _nds32_vector_48:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid48_4b.S b/libgcc/config/nds32/isr-library/vec_vid48_4b.S
+deleted file mode 100644
+index 3e35f68..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid48_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.48, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_48_4b
+-	.type	_nds32_vector_48_4b, @function
+-_nds32_vector_48_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_48_4b, .-_nds32_vector_48_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid49.S b/libgcc/config/nds32/isr-library/vec_vid49.S
+index 0b32691..dd3d35e 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid49.S
++++ b/libgcc/config/nds32/isr-library/vec_vid49.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.49, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_49
+ 	.type	_nds32_vector_49, @function
+ _nds32_vector_49:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid49_4b.S b/libgcc/config/nds32/isr-library/vec_vid49_4b.S
+deleted file mode 100644
+index a510bbb..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid49_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.49, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_49_4b
+-	.type	_nds32_vector_49_4b, @function
+-_nds32_vector_49_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_49_4b, .-_nds32_vector_49_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid50.S b/libgcc/config/nds32/isr-library/vec_vid50.S
+index 48334feb..8f801ec 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid50.S
++++ b/libgcc/config/nds32/isr-library/vec_vid50.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.50, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_50
+ 	.type	_nds32_vector_50, @function
+ _nds32_vector_50:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid50_4b.S b/libgcc/config/nds32/isr-library/vec_vid50_4b.S
+deleted file mode 100644
+index 1f42b73..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid50_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.50, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_50_4b
+-	.type	_nds32_vector_50_4b, @function
+-_nds32_vector_50_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_50_4b, .-_nds32_vector_50_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid51.S b/libgcc/config/nds32/isr-library/vec_vid51.S
+index 4c27f27..445abf9 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid51.S
++++ b/libgcc/config/nds32/isr-library/vec_vid51.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.51, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_51
+ 	.type	_nds32_vector_51, @function
+ _nds32_vector_51:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid51_4b.S b/libgcc/config/nds32/isr-library/vec_vid51_4b.S
+deleted file mode 100644
+index 7bb8abe..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid51_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.51, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_51_4b
+-	.type	_nds32_vector_51_4b, @function
+-_nds32_vector_51_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_51_4b, .-_nds32_vector_51_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid52.S b/libgcc/config/nds32/isr-library/vec_vid52.S
+index 4c44811..7283975 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid52.S
++++ b/libgcc/config/nds32/isr-library/vec_vid52.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.52, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_52
+ 	.type	_nds32_vector_52, @function
+ _nds32_vector_52:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid52_4b.S b/libgcc/config/nds32/isr-library/vec_vid52_4b.S
+deleted file mode 100644
+index 4cb89f6..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid52_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.52, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_52_4b
+-	.type	_nds32_vector_52_4b, @function
+-_nds32_vector_52_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_52_4b, .-_nds32_vector_52_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid53.S b/libgcc/config/nds32/isr-library/vec_vid53.S
+index 2882583..299c645 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid53.S
++++ b/libgcc/config/nds32/isr-library/vec_vid53.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.53, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_53
+ 	.type	_nds32_vector_53, @function
+ _nds32_vector_53:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid53_4b.S b/libgcc/config/nds32/isr-library/vec_vid53_4b.S
+deleted file mode 100644
+index 9abc839..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid53_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.53, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_53_4b
+-	.type	_nds32_vector_53_4b, @function
+-_nds32_vector_53_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_53_4b, .-_nds32_vector_53_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid54.S b/libgcc/config/nds32/isr-library/vec_vid54.S
+index a014c72..ae99390 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid54.S
++++ b/libgcc/config/nds32/isr-library/vec_vid54.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.54, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_54
+ 	.type	_nds32_vector_54, @function
+ _nds32_vector_54:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid54_4b.S b/libgcc/config/nds32/isr-library/vec_vid54_4b.S
+deleted file mode 100644
+index f736ba8..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid54_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.54, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_54_4b
+-	.type	_nds32_vector_54_4b, @function
+-_nds32_vector_54_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_54_4b, .-_nds32_vector_54_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid55.S b/libgcc/config/nds32/isr-library/vec_vid55.S
+index 44d820c..e75d24a 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid55.S
++++ b/libgcc/config/nds32/isr-library/vec_vid55.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.55, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_55
+ 	.type	_nds32_vector_55, @function
+ _nds32_vector_55:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid55_4b.S b/libgcc/config/nds32/isr-library/vec_vid55_4b.S
+deleted file mode 100644
+index d09c665..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid55_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.55, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_55_4b
+-	.type	_nds32_vector_55_4b, @function
+-_nds32_vector_55_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_55_4b, .-_nds32_vector_55_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid56.S b/libgcc/config/nds32/isr-library/vec_vid56.S
+index d5cb362..cc4904e 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid56.S
++++ b/libgcc/config/nds32/isr-library/vec_vid56.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.56, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_56
+ 	.type	_nds32_vector_56, @function
+ _nds32_vector_56:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid56_4b.S b/libgcc/config/nds32/isr-library/vec_vid56_4b.S
+deleted file mode 100644
+index 86b4103..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid56_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.56, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_56_4b
+-	.type	_nds32_vector_56_4b, @function
+-_nds32_vector_56_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_56_4b, .-_nds32_vector_56_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid57.S b/libgcc/config/nds32/isr-library/vec_vid57.S
+index 5fb3ce9..a17ed45 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid57.S
++++ b/libgcc/config/nds32/isr-library/vec_vid57.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.57, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_57
+ 	.type	_nds32_vector_57, @function
+ _nds32_vector_57:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid57_4b.S b/libgcc/config/nds32/isr-library/vec_vid57_4b.S
+deleted file mode 100644
+index 45c5d29..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid57_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.57, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_57_4b
+-	.type	_nds32_vector_57_4b, @function
+-_nds32_vector_57_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_57_4b, .-_nds32_vector_57_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid58.S b/libgcc/config/nds32/isr-library/vec_vid58.S
+index d420d68..629bf1a 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid58.S
++++ b/libgcc/config/nds32/isr-library/vec_vid58.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.58, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_58
+ 	.type	_nds32_vector_58, @function
+ _nds32_vector_58:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid58_4b.S b/libgcc/config/nds32/isr-library/vec_vid58_4b.S
+deleted file mode 100644
+index 812470c..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid58_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.58, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_58_4b
+-	.type	_nds32_vector_58_4b, @function
+-_nds32_vector_58_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_58_4b, .-_nds32_vector_58_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid59.S b/libgcc/config/nds32/isr-library/vec_vid59.S
+index 78a1885..540e02e 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid59.S
++++ b/libgcc/config/nds32/isr-library/vec_vid59.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.59, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_59
+ 	.type	_nds32_vector_59, @function
+ _nds32_vector_59:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid59_4b.S b/libgcc/config/nds32/isr-library/vec_vid59_4b.S
+deleted file mode 100644
+index fa3a467..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid59_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.59, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_59_4b
+-	.type	_nds32_vector_59_4b, @function
+-_nds32_vector_59_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_59_4b, .-_nds32_vector_59_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid60.S b/libgcc/config/nds32/isr-library/vec_vid60.S
+index a6f704d..8658249 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid60.S
++++ b/libgcc/config/nds32/isr-library/vec_vid60.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.60, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_60
+ 	.type	_nds32_vector_60, @function
+ _nds32_vector_60:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid60_4b.S b/libgcc/config/nds32/isr-library/vec_vid60_4b.S
+deleted file mode 100644
+index 505da2a..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid60_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.60, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_60_4b
+-	.type	_nds32_vector_60_4b, @function
+-_nds32_vector_60_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_60_4b, .-_nds32_vector_60_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid61.S b/libgcc/config/nds32/isr-library/vec_vid61.S
+index 4e79bde..376acb9 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid61.S
++++ b/libgcc/config/nds32/isr-library/vec_vid61.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.61, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_61
+ 	.type	_nds32_vector_61, @function
+ _nds32_vector_61:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid61_4b.S b/libgcc/config/nds32/isr-library/vec_vid61_4b.S
+deleted file mode 100644
+index 9a0cce5..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid61_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.61, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_61_4b
+-	.type	_nds32_vector_61_4b, @function
+-_nds32_vector_61_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_61_4b, .-_nds32_vector_61_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid62.S b/libgcc/config/nds32/isr-library/vec_vid62.S
+index 5eef0a6..5ab06a8 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid62.S
++++ b/libgcc/config/nds32/isr-library/vec_vid62.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.62, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_62
+ 	.type	_nds32_vector_62, @function
+ _nds32_vector_62:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid62_4b.S b/libgcc/config/nds32/isr-library/vec_vid62_4b.S
+deleted file mode 100644
+index da8ba28..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid62_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.62, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_62_4b
+-	.type	_nds32_vector_62_4b, @function
+-_nds32_vector_62_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_62_4b, .-_nds32_vector_62_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid63.S b/libgcc/config/nds32/isr-library/vec_vid63.S
+index 0a8c0ad..6646bcc 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid63.S
++++ b/libgcc/config/nds32/isr-library/vec_vid63.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.63, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_63
+ 	.type	_nds32_vector_63, @function
+ _nds32_vector_63:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid63_4b.S b/libgcc/config/nds32/isr-library/vec_vid63_4b.S
+deleted file mode 100644
+index 8f1045e..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid63_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.63, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_63_4b
+-	.type	_nds32_vector_63_4b, @function
+-_nds32_vector_63_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_63_4b, .-_nds32_vector_63_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid64.S b/libgcc/config/nds32/isr-library/vec_vid64.S
+index b3f034b..f892aec 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid64.S
++++ b/libgcc/config/nds32/isr-library/vec_vid64.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.64, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_64
+ 	.type	_nds32_vector_64, @function
+ _nds32_vector_64:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid64_4b.S b/libgcc/config/nds32/isr-library/vec_vid64_4b.S
+deleted file mode 100644
+index 81d9679..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid64_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.64, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_64_4b
+-	.type	_nds32_vector_64_4b, @function
+-_nds32_vector_64_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_64_4b, .-_nds32_vector_64_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid65.S b/libgcc/config/nds32/isr-library/vec_vid65.S
+index 72db454..03f79a5 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid65.S
++++ b/libgcc/config/nds32/isr-library/vec_vid65.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.65, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_65
+ 	.type	_nds32_vector_65, @function
+ _nds32_vector_65:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid65_4b.S b/libgcc/config/nds32/isr-library/vec_vid65_4b.S
+deleted file mode 100644
+index aa9ad2b..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid65_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.65, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_65_4b
+-	.type	_nds32_vector_65_4b, @function
+-_nds32_vector_65_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_65_4b, .-_nds32_vector_65_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid66.S b/libgcc/config/nds32/isr-library/vec_vid66.S
+index 75469e7..ff805bd 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid66.S
++++ b/libgcc/config/nds32/isr-library/vec_vid66.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.66, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_66
+ 	.type	_nds32_vector_66, @function
+ _nds32_vector_66:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid66_4b.S b/libgcc/config/nds32/isr-library/vec_vid66_4b.S
+deleted file mode 100644
+index 9830fe2..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid66_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.66, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_66_4b
+-	.type	_nds32_vector_66_4b, @function
+-_nds32_vector_66_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_66_4b, .-_nds32_vector_66_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid67.S b/libgcc/config/nds32/isr-library/vec_vid67.S
+index 4b076cd..f592aba 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid67.S
++++ b/libgcc/config/nds32/isr-library/vec_vid67.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.67, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_67
+ 	.type	_nds32_vector_67, @function
+ _nds32_vector_67:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid67_4b.S b/libgcc/config/nds32/isr-library/vec_vid67_4b.S
+deleted file mode 100644
+index c7e31dd..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid67_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.67, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_67_4b
+-	.type	_nds32_vector_67_4b, @function
+-_nds32_vector_67_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_67_4b, .-_nds32_vector_67_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid68.S b/libgcc/config/nds32/isr-library/vec_vid68.S
+index 7df1cdd..ee2702a 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid68.S
++++ b/libgcc/config/nds32/isr-library/vec_vid68.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.68, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_68
+ 	.type	_nds32_vector_68, @function
+ _nds32_vector_68:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid68_4b.S b/libgcc/config/nds32/isr-library/vec_vid68_4b.S
+deleted file mode 100644
+index 0d6fcb5..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid68_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.68, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_68_4b
+-	.type	_nds32_vector_68_4b, @function
+-_nds32_vector_68_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_68_4b, .-_nds32_vector_68_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid69.S b/libgcc/config/nds32/isr-library/vec_vid69.S
+index e30e5bf..c152015 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid69.S
++++ b/libgcc/config/nds32/isr-library/vec_vid69.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.69, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_69
+ 	.type	_nds32_vector_69, @function
+ _nds32_vector_69:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid69_4b.S b/libgcc/config/nds32/isr-library/vec_vid69_4b.S
+deleted file mode 100644
+index 3508162..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid69_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.69, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_69_4b
+-	.type	_nds32_vector_69_4b, @function
+-_nds32_vector_69_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_69_4b, .-_nds32_vector_69_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid70.S b/libgcc/config/nds32/isr-library/vec_vid70.S
+index d436ac5..a3578d6 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid70.S
++++ b/libgcc/config/nds32/isr-library/vec_vid70.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.70, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_70
+ 	.type	_nds32_vector_70, @function
+ _nds32_vector_70:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid70_4b.S b/libgcc/config/nds32/isr-library/vec_vid70_4b.S
+deleted file mode 100644
+index f3f0dd6..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid70_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.70, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_70_4b
+-	.type	_nds32_vector_70_4b, @function
+-_nds32_vector_70_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_70_4b, .-_nds32_vector_70_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid71.S b/libgcc/config/nds32/isr-library/vec_vid71.S
+index d7d7ab3..6790888 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid71.S
++++ b/libgcc/config/nds32/isr-library/vec_vid71.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.71, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_71
+ 	.type	_nds32_vector_71, @function
+ _nds32_vector_71:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid71_4b.S b/libgcc/config/nds32/isr-library/vec_vid71_4b.S
+deleted file mode 100644
+index 505c79e..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid71_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.71, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_71_4b
+-	.type	_nds32_vector_71_4b, @function
+-_nds32_vector_71_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_71_4b, .-_nds32_vector_71_4b
+diff --git a/libgcc/config/nds32/isr-library/vec_vid72.S b/libgcc/config/nds32/isr-library/vec_vid72.S
+index 08652d2..32984a0 100644
+--- a/libgcc/config/nds32/isr-library/vec_vid72.S
++++ b/libgcc/config/nds32/isr-library/vec_vid72.S
+@@ -24,8 +24,15 @@
+    <http://www.gnu.org/licenses/>.  */
+ 
+ 	.section	.nds32_vector.72, "ax"
++#if __NDS32_ISR_VECTOR_SIZE_4__
++	/* The vector size is default 4-byte for v3 architecture.  */
++	.vec_size	4
++	.align	2
++#else
++	/* The vector size is default 16-byte for other architectures.  */
+ 	.vec_size	16
+ 	.align	4
++#endif
+ 	.weak	_nds32_vector_72
+ 	.type	_nds32_vector_72, @function
+ _nds32_vector_72:
+diff --git a/libgcc/config/nds32/isr-library/vec_vid72_4b.S b/libgcc/config/nds32/isr-library/vec_vid72_4b.S
+deleted file mode 100644
+index 1083c03..0000000
+--- a/libgcc/config/nds32/isr-library/vec_vid72_4b.S
++++ /dev/null
+@@ -1,34 +0,0 @@
+-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.nds32_vector.72, "ax"
+-	.vec_size	4
+-	.align	2
+-	.weak	_nds32_vector_72_4b
+-	.type	_nds32_vector_72_4b, @function
+-_nds32_vector_72_4b:
+-1:
+-	j	1b
+-	.size	_nds32_vector_72_4b, .-_nds32_vector_72_4b
+diff --git a/libgcc/config/nds32/lib1asmsrc-mculib.S b/libgcc/config/nds32/lib1asmsrc-mculib.S
+deleted file mode 100644
+index bdbcd74..0000000
+--- a/libgcc/config/nds32/lib1asmsrc-mculib.S
++++ /dev/null
+@@ -1,5213 +0,0 @@
+-/* mculib libgcc routines of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-	.section	.mdebug.abi_nds32
+-	.previous
+-
+-
+-/* ------------------------------------------- */
+-/* FPBIT floating point operations for libgcc  */
+-/* ------------------------------------------- */
+-
+-#ifdef L_addsub_sf
+-
+-	.text
+-	.align	2
+-	.global	__subsf3
+-	.type	__subsf3, @function
+-__subsf3:
+-	push    $lp
+-	pushm   $r6, $r9
+-
+-	move    $r2, #0x80000000
+-	xor     $r1, $r1, $r2
+-
+-	j       .Lsfpadd
+-
+-	.global	__addsf3
+-	.type	__addsf3, @function
+-__addsf3:
+-	push    $lp
+-	pushm   $r6, $r9
+-.Lsfpadd:
+-	srli    $r5, $r0, #23
+-	andi    $r5, $r5, #0xff
+-	srli    $r7, $r1, #23
+-	andi    $r7, $r7, #0xff
+-	move    $r3, #0x80000000
+-	slli    $r4, $r0, #8
+-	or      $r4, $r4, $r3
+-	slli    $r6, $r1, #8
+-	or      $r6, $r6, $r3
+-
+-	addi    $r9, $r5, #-1
+-	slti    $r15, $r9, #0xfe
+-	beqzs8  .LEspecA
+-
+-.LElab1:
+-	addi    $r9, $r7, #-1
+-	slti    $r15, $r9, #0xfe
+-	beqzs8  .LEspecB
+-
+-.LElab2:
+-	sub     $r8, $r5, $r7
+-	sltsi   $r15, $r8, #0
+-	bnezs8  .Li1
+-	sltsi   $r15, $r8, #0x20
+-	bnezs8  .Li2
+-	move    $r6, #2
+-	j       .Le1
+-.Li2:
+-	move    $r2, $r6
+-	srl     $r6, $r6, $r8
+-	sll     $r9, $r6, $r8
+-	beq     $r9, $r2, .Le1
+-	ori     $r6, $r6, #2
+-	j       .Le1
+-.Li1:
+-	move    $r5, $r7
+-	subri   $r8, $r8, #0
+-	sltsi   $r15, $r8, #0x20
+-	bnezs8  .Li4
+-	move    $r4, #2
+-	j       .Le1
+-.Li4:
+-	move    $r2, $r4
+-	srl     $r4, $r4, $r8
+-	sll     $r9, $r4, $r8
+-	beq     $r9, $r2, .Le1
+-	ori     $r4, $r4, #2
+-
+-.Le1:
+-	and     $r8, $r0, $r3
+-	xor     $r9, $r8, $r1
+-	sltsi   $r15, $r9, #0
+-	bnezs8  .LEsub1
+-
+-	#ADD($r4, $r6)
+-	add     $r4, $r4, $r6
+-	slt     $r15, $r4, $r6
+-	beqzs8  .LEres
+-	andi    $r9, $r4, #1
+-	beqz    $r9, .Li7
+-	ori     $r4, $r4, #2
+-.Li7:
+-	srli    $r4, $r4, #1
+-	addi    $r5, $r5, #1
+-	subri   $r15, $r5, #0xff
+-	bnezs8  .LEres
+-	move    $r4, #0
+-	j       .LEres
+-
+-.LEsub1:
+-	#SUB($r4, $r6)
+-	move    $r15, $r4
+-	sub     $r4, $r4, $r6
+-	slt     $r15, $r15, $r4
+-	beqzs8  .Li9
+-	subri   $r4, $r4, #0
+-	xor     $r8, $r8, $r3
+-	j       .Le9
+-.Li9:
+-	beqz    $r4, .LEzer
+-.Le9:
+-#ifdef __NDS32_PERF_EXT__
+-	clz	$r2, $r4
+-#else
+-	pushm	$r0, $r1
+-	pushm	$r3, $r5
+-	move	$r0, $r4
+-	bal	__clzsi2
+-	move	$r2, $r0
+-	popm	$r3, $r5
+-	popm	$r0, $r1
+-#endif
+-	sub     $r5, $r5, $r2
+-	sll     $r4, $r4, $r2
+-
+-.LEres:
+-	blez    $r5, .LEund
+-
+-.LElab12:
+-	#ADD($r4, $0x80)
+-	move    $r15, #0x80
+-	add     $r4, $r4, $r15
+-	slt     $r15, $r4, $r15
+-
+-	#ADDC($r5, $0x0)
+-	add     $r5, $r5, $r15
+-	srli    $r9, $r4, #8
+-	andi    $r9, $r9, #1
+-	sub     $r4, $r4, $r9
+-	slli    $r4, $r4, #1
+-	srli    $r4, $r4, #9
+-	slli    $r9, $r5, #23
+-	or      $r4, $r4, $r9
+-	or      $r0, $r4, $r8
+-
+-.LE999:
+-	popm    $r6, $r9
+-	pop     $lp
+-	ret5    $lp
+-
+-.LEund:
+-	subri   $r2, $r5, #1
+-	slti    $r15, $r2, #0x20
+-	beqzs8  .LEzer
+-	move    $r9, #0x80000000
+-	or      $r4, $r4, $r9
+-	subri   $r9, $r2, #0x20
+-	sll     $r5, $r4, $r9
+-	srl     $r4, $r4, $r2
+-	beqz    $r5, .Li10
+-	ori     $r4, $r4, #1
+-.Li10:
+-	move    $r5, #0
+-	addi    $r9, $r4, #0x80
+-	sltsi   $r15, $r9, #0
+-	beqzs8  .LElab12
+-	move    $r5, #1
+-	j       .LElab12
+-
+-.LEspecA:
+-	bnez    $r5, .Li12
+-	add     $r4, $r4, $r4
+-	beqz    $r4, .Li13
+-#ifdef __NDS32_PERF_EXT__
+-	clz	$r8, $r4
+-#else
+-	pushm	$r0, $r5
+-	move	$r0, $r4
+-	bal	__clzsi2
+-	move	$r8, $r0
+-	popm	$r0, $r5
+-#endif
+-	sub     $r5, $r5, $r8
+-	sll     $r4, $r4, $r8
+-	j       .LElab1
+-.Li13:
+-	subri   $r15, $r7, #0xff
+-	beqzs8  .LEspecB
+-	move    $r9, #0x80000000
+-	bne     $r1, $r9, .LEretB
+-.Li12:
+-	add     $r9, $r4, $r4
+-	bnez    $r9, .LEnan
+-	subri   $r15, $r7, #0xff
+-	bnezs8  .LEretA
+-	xor     $r9, $r0, $r1
+-	sltsi   $r15, $r9, #0
+-	bnezs8  .LEnan
+-	j       .LEretB
+-
+-.LEspecB:
+-	bnez    $r7, .Li15
+-	add     $r6, $r6, $r6
+-	beqz    $r6, .LEretA
+-#ifdef __NDS32_PERF_EXT__
+-	clz	$r8, $r6
+-#else
+-	pushm	$r0, $r5
+-	move	$r0, $r6
+-	bal	__clzsi2
+-	move	$r8, $r0
+-	popm	$r0, $r5
+-#endif
+-	sub     $r7, $r7, $r8
+-	sll     $r6, $r6, $r8
+-	j       .LElab2
+-.Li15:
+-	add     $r9, $r6, $r6
+-	bnez    $r9, .LEnan
+-
+-.LEretB:
+-	move    $r0, $r1
+-	j       .LE999
+-
+-.LEretA:
+-	j       .LE999
+-
+-.LEzer:
+-	move    $r0, #0
+-	j       .LE999
+-
+-.LEnan:
+-	move    $r0, #0xffc00000
+-	j       .LE999
+-	.size	__subsf3, .-__subsf3
+-	.size	__addsf3, .-__addsf3
+-#endif /* L_addsub_sf */
+-
+-
+-
+-#ifdef L_sf_to_si
+-
+-	.text
+-	.align	2
+-	.global	__fixsfsi
+-	.type	__fixsfsi, @function
+-__fixsfsi:
+-	push    $lp
+-
+-	slli    $r1, $r0, #8
+-	move    $r3, #0x80000000
+-	or      $r1, $r1, $r3
+-	srli    $r3, $r0, #23
+-	andi    $r3, $r3, #0xff
+-	subri   $r2, $r3, #0x9e
+-	blez    $r2, .LJspec
+-	sltsi   $r15, $r2, #0x20
+-	bnezs8  .Li42
+-	move    $r0, #0
+-	j       .LJ999
+-.Li42:
+-	srl     $r1, $r1, $r2
+-	sltsi   $r15, $r0, #0
+-	beqzs8  .Li43
+-	subri   $r1, $r1, #0
+-.Li43:
+-	move    $r0, $r1
+-
+-.LJ999:
+-	pop     $lp
+-	ret5    $lp
+-
+-.LJspec:
+-	move    $r3, #0x7f800000
+-	slt     $r15, $r3, $r0
+-	beqzs8  .Li44
+-	move    $r0, #0x80000000
+-	j       .LJ999
+-.Li44:
+-	move    $r0, #0x7fffffff
+-	j       .LJ999
+-	.size	__fixsfsi, .-__fixsfsi
+-#endif /* L_sf_to_si */
+-
+-
+-
+-#ifdef L_divsi3
+-
+-	.text
+-	.align	2
+-	.globl	__divsi3
+-	.type	__divsi3, @function
+-__divsi3:
+-	! ---------------------------------------------------------------------
+-	! neg = 0;
+-	! if (a < 0)
+-	! {   a = -a;
+-	!     neg = !neg;
+-	! }
+-	! ---------------------------------------------------------------------
+-	sltsi	$r5, $r0, 0			! $r5  <- neg = (a < 0) ? 1 : 0
+-	subri	$r4, $r0, 0			! $r4  <- a = -a
+-	cmovn	$r0, $r4, $r5			! $r0  <- a = neg ? -a : a
+-.L2:
+-	! ---------------------------------------------------------------------
+-	! if (b < 0)
+-	! ---------------------------------------------------------------------
+-	bgez	$r1, .L3			! if b >= 0, skip
+-	! ---------------------------------------------------------------------
+-	! {   b=-b;
+-	!     neg=!neg;
+-	! }
+-	! ---------------------------------------------------------------------
+-	subri	$r1, $r1, 0			! $r1  <- b = -b
+-	subri	$r5, $r5, 1			! $r5  <- neg = !neg
+-.L3:
+-	! ---------------------------------------------------------------------
+-	!!res = udivmodsi4 (a, b, 1);
+-	! res = 0;
+-	! if (den != 0)
+-	! ---------------------------------------------------------------------
+-	movi	$r2, 0				! $r2  <- res = 0
+-	beqz	$r1, .L1			! if den == 0, skip
+-	! ---------------------------------------------------------------------
+-	! bit = 1;
+-	! ---------------------------------------------------------------------
+-	movi	$r4, 1				! $r4  <- bit = 1
+-#ifndef __OPTIMIZE_SIZE__
+-.L6:
+-#endif
+-	! ---------------------------------------------------------------------
+-	! while (den < num && bit && !(den & (1L << 31)))
+-	! ---------------------------------------------------------------------
+-	slt	$ta, $r1, $r0			! $ta  <- den < num ?
+-	beqz	$ta, .L5			! if no, skip
+-	! ---------------------------------------------------------------------
+-	! {   den << = 1;
+-	!     bit << = 1;
+-	! }
+-	! ---------------------------------------------------------------------
+-#if defined (__OPTIMIZE_SIZE__) && !defined (__NDS32_ISA_V3M__)
+-	clz	$r3, $r1			! $r3  <- leading zero count for den
+-	clz	$ta, $r0			! $ta  <- leading zero count for num
+-	sub	$r3, $r3, $ta			! $r3  <- number of bits to shift
+-	sll	$r1, $r1, $r3			! $r1  <- den
+-	sll	$r4, $r4, $r3			! $r2  <- bit
+-#else
+-	slli	$r1, $r1, 1			! $r1  <- den << = 1
+-	slli	$r4, $r4, 1			! $r4  <- bit << = 1
+-	b	.L6				! continue loop
+-#endif
+-.L5:
+-	! ---------------------------------------------------------------------
+-	! while (bit)
+-	! {   if (num >= den)
+-	! ---------------------------------------------------------------------
+-	slt	$ta, $r0, $r1			! $ta  <- num < den ?
+-	bnez	$ta, .L9			! if yes, skip
+-	! ---------------------------------------------------------------------
+-	!     {   num -= den;
+-	!         res |= bit;
+-	!     }
+-	! ---------------------------------------------------------------------
+-	sub	$r0, $r0, $r1			! $r0  <- num -= den
+-	or	$r2, $r2, $r4			! $r2  <- res |= bit
+-.L9:
+-	! ---------------------------------------------------------------------
+-	!     bit >> = 1;
+-	!     den >> = 1;
+-	! }
+-	!!if (modwanted)
+-	!!    return num;
+-	!!return res;
+-	! ---------------------------------------------------------------------
+-	srli	$r4, $r4, 1			! $r4  <- bit >> = 1
+-	srli	$r1, $r1, 1			! $r1  <- den >> = 1
+-	bnez	$r4, .L5			! if bit != 0, continue loop
+-.L1:
+-	! ---------------------------------------------------------------------
+-	! if (neg)
+-	!     res = -res;
+-	! return res;
+-	! ---------------------------------------------------------------------
+-	subri	$r0, $r2, 0			! $r0  <- -res
+-	cmovz	$r0, $r2, $r5			! $r0  <- neg ? -res : res
+-	! ---------------------------------------------------------------------
+-	ret
+-	.size	__divsi3, .-__divsi3
+-#endif /* L_divsi3 */
+-
+-
+-
+-#ifdef L_divdi3
+-
+-	!--------------------------------------
+-	#ifdef __big_endian__
+-		#define  V1H  $r0
+-		#define  V1L  $r1
+-		#define  V2H  $r2
+-		#define  V2L  $r3
+-	#else
+-		#define  V1H  $r1
+-		#define  V1L  $r0
+-		#define  V2H  $r3
+-		#define  V2L  $r2
+-	#endif
+-	!--------------------------------------
+-	.text
+-	.align	2
+-	.globl	__divdi3
+-	.type	__divdi3, @function
+-__divdi3:
+-	! prologue
+-#ifdef __NDS32_ISA_V3M__
+-	push25	$r10, 0
+-#else
+-	smw.adm	$r6, [$sp], $r10, 2
+-#endif
+-	! end of prologue
+-	move	$r8, V1L
+-	move	$r9, V1H
+-	move	$r6, V2L
+-	move	$r7, V2H
+-	movi	$r10, 0
+-	bgez	V1H, .L80
+-	bal	__negdi2
+-	move	$r8, V1L
+-	move	$r9, V1H
+-	movi	$r10, -1
+-.L80:
+-	bgez	$r7, .L81
+-	move	V1L, $r6
+-	move	V1H, $r7
+-	bal	__negdi2
+-	move	$r6, V1L
+-	move	$r7, V1H
+-	nor	$r10, $r10, $r10
+-.L81:
+-	move	V2L, $r6
+-	move	V2H, $r7
+-	move	V1L, $r8
+-	move	V1H, $r9
+-	movi	$r4, 0
+-	bal	__udivmoddi4
+-	beqz	$r10, .L82
+-	bal	__negdi2
+-.L82:
+-	! epilogue
+-#ifdef __NDS32_ISA_V3M__
+-	pop25	$r10, 0
+-#else
+-	lmw.bim	$r6, [$sp], $r10, 2
+-	ret
+-#endif
+-	.size	__divdi3, .-__divdi3
+-#endif /* L_divdi3 */
+-
+-
+-
+-#ifdef L_modsi3
+-
+-	.text
+-	.align	2
+-	.globl	__modsi3
+-	.type	__modsi3, @function
+-__modsi3:
+-	! ---------------------------------------------------------------------
+-	! neg=0;
+-	! if (a<0)
+-	! {   a=-a;
+-	!     neg=1;
+-	! }
+-	! ---------------------------------------------------------------------
+-	sltsi	$r5, $r0, 0			! $r5  <- neg < 0 ? 1 : 0
+-	subri	$r4, $r0, 0			! $r4  <- -a
+-	cmovn	$r0, $r4, $r5			! $r0  <- |a|
+-	! ---------------------------------------------------------------------
+-	! if (b < 0)
+-#ifndef __NDS32_PERF_EXT__
+-	! ---------------------------------------------------------------------
+-	bgez	$r1, .L3			! if b >= 0, skip
+-	! ---------------------------------------------------------------------
+-	!     b = -b;
+-	! ---------------------------------------------------------------------
+-	subri	$r1, $r1, 0			! $r1  <- |b|
+-.L3:
+-	! ---------------------------------------------------------------------
+-	!!res = udivmodsi4 (a, b, 1);
+-	! if (den != 0)
+-	! ---------------------------------------------------------------------
+-#else /* __NDS32_PERF_EXT__ */
+-	!     b = -b;
+-	!!res = udivmodsi4 (a, b, 1);
+-	! if (den != 0)
+-	! ---------------------------------------------------------------------
+-	abs	$r1, $r1			! $r1  <- |b|
+-#endif /* __NDS32_PERF_EXT__ */
+-	beqz	$r1, .L1			! if den == 0, skip
+-	! ---------------------------------------------------------------------
+-	! {   bit = 1;
+-	!     res = 0;
+-	! ---------------------------------------------------------------------
+-	movi	$r4, 1				! $r4  <- bit = 1
+-#ifndef __OPTIMIZE_SIZE__
+-.L6:
+-#endif
+-	! ---------------------------------------------------------------------
+-	!     while (den < num&&bit && !(den & (1L << 31)))
+-	! ---------------------------------------------------------------------
+-	slt	$ta, $r1, $r0			! $ta  <- den < num ?
+-	beqz	$ta, .L5			! if no, skip
+-	! ---------------------------------------------------------------------
+-	!     {   den << = 1;
+-	!         bit << = 1;
+-	!     }
+-	! ---------------------------------------------------------------------
+-#if defined (__OPTIMIZE_SIZE__) && ! defined (__NDS32_ISA_V3M__)
+-	clz	$r3, $r1			! $r3  <- leading zero count for den
+-	clz	$ta, $r0			! $ta  <- leading zero count for num
+-	sub	$r3, $r3, $ta			! $r3  <- number of bits to shift
+-	sll	$r1, $r1, $r3			! $r1  <- den
+-	sll	$r4, $r4, $r3			! $r2  <- bit
+-#else
+-	slli	$r1, $r1, 1			! $r1  <- den << = 1
+-	slli	$r4, $r4, 1			! $r4  <- bit << = 1
+-	b	.L6				! continue loop
+-#endif
+-.L5:
+-	! ---------------------------------------------------------------------
+-	!     while (bit)
+-	!     {   if (num >= den)
+-	!         {   num -= den;
+-	!             res |= bit;
+-	!         }
+-	!         bit >> = 1;
+-	!         den >> = 1;
+-	!     }
+-	! }
+-	!!if (modwanted)
+-	!!    return num;
+-	!!return res;
+-	! ---------------------------------------------------------------------
+-	sub	$r2, $r0, $r1			! $r2  <- num - den
+-	slt	$ta, $r0, $r1			! $ta  <- num < den ?
+-	srli	$r4, $r4, 1			! $r4  <- bit >> = 1
+-	cmovz	$r0, $r2, $ta			! $r0  <- num = (num < den) ? num : num - den
+-	srli	$r1, $r1, 1			! $r1  <- den >> = 1
+-	bnez	$r4, .L5			! if bit != 0, continue loop
+-.L1:
+-	! ---------------------------------------------------------------------
+-	! if (neg)
+-	!     res = -res;
+-	! return res;
+-	! ---------------------------------------------------------------------
+-	subri	$r3, $r0, 0			! $r3  <- -res
+-	cmovn	$r0, $r3, $r5			! $r0  <- neg ? -res : res
+-	! ---------------------------------------------------------------------
+-	ret
+-	.size	__modsi3, .-__modsi3
+-#endif /* L_modsi3 */
+-
+-
+-
+-#ifdef L_moddi3
+-
+-	!--------------------------------------
+-	#ifdef __big_endian__
+-		#define  V1H  $r0
+-		#define  V1L  $r1
+-		#define  V2H  $r2
+-		#define  V2L  $r3
+-	#else
+-		#define  V1H  $r1
+-		#define  V1L  $r0
+-		#define  V2H  $r3
+-		#define  V2L  $r2
+-	#endif
+-	!--------------------------------------
+-	.text
+-	.align	2
+-	.globl	__moddi3
+-	.type	__moddi3, @function
+-__moddi3:
+-	! =====================================================================
+-	! stack allocation:
+-	! sp+32 +-----------------------+
+-	!       | $lp                   |
+-	! sp+28 +-----------------------+
+-	!       | $r6 - $r10            |
+-	! sp+8  +-----------------------+
+-	!       |                       |
+-	! sp+4  +-----------------------+
+-	!       |                       |
+-	! sp    +-----------------------+
+-	! =====================================================================
+-	! prologue
+-#ifdef __NDS32_ISA_V3M__
+-	push25	$r10, 8
+-#else
+-	smw.adm	$r6, [$sp], $r10, 2
+-	addi	$sp, $sp, -8
+-#endif
+-	! end of prologue
+-	!------------------------------------------
+-	! 	__moddi3 (DWtype u, DWtype v)
+-	!		{
+-	!			word_type c = 0;
+-	!			DWunion uu = {.ll = u};
+-	!			DWunion vv = {.ll = v};
+-	!			DWtype w;
+-	!		if (uu.s.high < 0)
+-	!  		  c = ~c,
+-	!		  uu.ll = -uu.ll;
+-	!---------------------------------------------
+-	move	$r8, V1L
+-	move	$r9, V1H
+-	move	$r6, V2L
+-	move	$r7, V2H
+-	movi	$r10, 0        ! r10 = c = 0
+-	bgez	V1H, .L80      ! if u > 0 , go L80
+-	bal	__negdi2
+-	move	$r8, V1L
+-	move	$r9, V1H
+-	movi	$r10, -1       ! r10 = c = ~c
+-	!------------------------------------------------
+-	!	 	if (vv.s.high < 0)
+-	!		  vv.ll = -vv.ll;
+-	!----------------------------------------------
+-.L80:
+-	bgez	$r7, .L81     !  if v > 0 , go L81
+-	move	V1L, $r6
+-	move	V1H, $r7
+-	bal	__negdi2
+-	move	$r6, V1L
+-	move	$r7, V1H
+-	!------------------------------------------
+-	!		(void) __udivmoddi4 (uu.ll, vv.ll, &w);
+-	!		if (c)
+-	!		  w = -w;
+-	!		return w;
+-	!-----------------------------------------
+-.L81:
+-	move	V2L, $r6
+-	move	V2H, $r7
+-	move	V1L, $r8
+-	move	V1H, $r9
+-	addi	$r4, $sp, 0
+-	bal	__udivmoddi4
+-	lwi	$r0, [$sp+(0)]    ! le: sp + 0 is low, be: sp + 0 is high
+-	lwi	$r1, [$sp+(4)]    ! le: sp + 4 is low, be: sp + 4 is high
+-	beqz	$r10, .L82
+-	bal	__negdi2
+-.L82:
+-	! epilogue
+-#ifdef __NDS32_ISA_V3M__
+-	pop25	$r10, 8
+-#else
+-	addi	$sp, $sp, 8
+-	lmw.bim	$r6, [$sp], $r10, 2
+-	ret
+-#endif
+-	.size	__moddi3, .-__moddi3
+-#endif /* L_moddi3 */
+-
+-
+-
+-#ifdef L_mulsi3
+-
+-	.text
+-	.align	2
+-	.globl	__mulsi3
+-	.type	__mulsi3, @function
+-__mulsi3:
+-	! ---------------------------------------------------------------------
+-	! r = 0;
+-	! while (a)
+-	! $r0:       r
+-	! $r1:       b
+-	! $r2:       a
+-	! ---------------------------------------------------------------------
+-	beqz	$r0, .L7			! if a == 0, done
+-	move	$r2, $r0			! $r2  <- a
+-	movi	$r0, 0				! $r0  <- r <- 0
+-.L8:
+-	! ---------------------------------------------------------------------
+-	! {   if (a & 1)
+-	!         r += b;
+-	!     a >> = 1;
+-	!     b << = 1;
+-	! }
+-	! $r0:       r
+-	! $r1:       b
+-	! $r2:       a
+-	! $r3:       scratch
+-	! $r4:       scratch
+-	! ---------------------------------------------------------------------
+-	andi	$r3, $r2, 1			! $r3  <- a & 1
+-	add	$r4, $r0, $r1			! $r4  <- r += b
+-	cmovn	$r0, $r4, $r3			! $r0  <- r
+-	srli	$r2, $r2, 1			! $r2  <- a >> = 1
+-	slli	$r1, $r1, 1			! $r1  <- b << = 1
+-	bnez	$r2, .L8			! if a != 0, continue loop
+-.L7:
+-	! ---------------------------------------------------------------------
+-	! $r0:       return code
+-	! ---------------------------------------------------------------------
+-	ret
+-	.size	__mulsi3, .-__mulsi3
+-#endif /* L_mulsi3 */
+-
+-
+-
+-#ifdef L_udivsi3
+-
+-	.text
+-	.align	2
+-	.globl	__udivsi3
+-	.type	__udivsi3, @function
+-__udivsi3:
+-	! ---------------------------------------------------------------------
+-	!!res=udivmodsi4(a,b,0);
+-	! res=0;
+-	! if (den!=0)
+-	! ---------------------------------------------------------------------
+-	movi	$r2, 0				! $r2  <- res=0
+-	beqz	$r1, .L1			! if den==0, skip
+-	! ---------------------------------------------------------------------
+-	! {   bit=1;
+-	! ---------------------------------------------------------------------
+-	movi	$r4, 1				! $r4  <- bit=1
+-#ifndef __OPTIMIZE_SIZE__
+-.L6:
+-#endif
+-	! ---------------------------------------------------------------------
+-	!     while (den<num
+-	! ---------------------------------------------------------------------
+-	slt	$ta, $r1, $r0			! $ta  <- den<num?
+-	beqz	$ta, .L5			! if no, skip
+-	! ---------------------------------------------------------------------
+-	!          &&bit&&!(den&(1L<<31)))
+-	! ---------------------------------------------------------------------
+-	bltz	$r1, .L5			! if den<0, skip
+-	! ---------------------------------------------------------------------
+-	!     {   den<<=1;
+-	!         bit<<=1;
+-	!     }
+-	! ---------------------------------------------------------------------
+-#if defined (__OPTIMIZE_SIZE__) && ! defined (__NDS32_ISA_V3M__)
+-	clz	$r3, $r1			! $r3  <- leading zero count for den
+-	clz	$ta, $r0			! $ta  <- leading zero count for num
+-	sub	$r3, $r3, $ta			! $r3  <- number of bits to shift
+-	sll	$r1, $r1, $r3			! $r1  <- den
+-	sll	$r2, $r2, $r3			! $r2  <- bit
+-#else
+-	slli	$r1, $r1, 1			! $r1  <- den<<=1
+-	slli	$r4, $r4, 1			! $r4  <- bit<<=1
+-	b	.L6				! continue loop
+-#endif
+-.L5:
+-	! ---------------------------------------------------------------------
+-	!     while (bit)
+-	!     {   if (num>=den)
+-	! ---------------------------------------------------------------------
+-	slt	$ta, $r0, $r1			! $ta  <- num<den?
+-	bnez	$ta, .L9			! if yes, skip
+-	! ---------------------------------------------------------------------
+-	!         {   num-=den;
+-	!             res|=bit;
+-	!         }
+-	! ---------------------------------------------------------------------
+-	sub	$r0, $r0, $r1			! $r0  <- num-=den
+-	or	$r2, $r2, $r4			! $r2  <- res|=bit
+-.L9:
+-	! ---------------------------------------------------------------------
+-	!         bit>>=1;
+-	!         den>>=1;
+-	!     }
+-	! }
+-	!!if (modwanted)
+-	!!    return num;
+-	!!return res;
+-	! ---------------------------------------------------------------------
+-	srli	$r4, $r4, 1			! $r4  <- bit>>=1
+-	srli	$r1, $r1, 1			! $r1  <- den>>=1
+-	bnez	$r4, .L5			! if bit!=0, continue loop
+-.L1:
+-	! ---------------------------------------------------------------------
+-	! return res;
+-	! ---------------------------------------------------------------------
+-	move	$r0, $r2			! $r0  <- return value
+-	! ---------------------------------------------------------------------
+-	! ---------------------------------------------------------------------
+-	ret
+-	.size	__udivsi3, .-__udivsi3
+-#endif /* L_udivsi3 */
+-
+-
+-
+-#ifdef L_udivdi3
+-
+-	!--------------------------------------
+-	#ifdef __big_endian__
+-		#define  V1H  $r0
+-		#define  V1L  $r1
+-		#define  V2H  $r2
+-		#define  V2L  $r3
+-	#else
+-		#define  V1H  $r1
+-		#define  V1L  $r0
+-		#define  V2H  $r3
+-		#define  V2L  $r2
+-	#endif
+-	!--------------------------------------
+-
+-	.text
+-	.align	2
+-	.globl	__udivdi3
+-	.type	__udivdi3, @function
+-__udivdi3:
+-	! prologue
+-#ifdef __NDS32_ISA_V3M__
+-	push25	$r8, 0
+-#else
+-	smw.adm	$r6, [$sp], $r8, 2
+-#endif
+-	! end of prologue
+-	movi	$r4, 0
+-	bal	__udivmoddi4
+-	! epilogue
+-#ifdef __NDS32_ISA_V3M__
+-	pop25	$r8, 0
+-#else
+-	lmw.bim	$r6, [$sp], $r8, 2
+-	ret
+-#endif
+-	.size	__udivdi3, .-__udivdi3
+-#endif /* L_udivdi3 */
+-
+-
+-
+-#ifdef L_udivmoddi4
+-
+-	.text
+-	.align	2
+-	.globl	fudiv_qrnnd
+-	.type	fudiv_qrnnd, @function
+-	#ifdef __big_endian__
+-		#define P1H     $r0
+-		#define P1L     $r1
+-		#define P2H     $r2
+-		#define P2L     $r3
+-		#define W6H     $r4
+-		#define W6L     $r5
+-		#define OFFSET_L 4
+-		#define OFFSET_H 0
+-	#else
+-		#define P1H     $r1
+-		#define P1L     $r0
+-		#define P2H     $r3
+-		#define P2L     $r2
+-		#define W6H     $r5
+-		#define W6L     $r4
+-		#define OFFSET_L 0
+-		#define OFFSET_H 4
+-	#endif
+-fudiv_qrnnd:
+-	!------------------------------------------------------
+-	! function:  fudiv_qrnnd(quotient, remainder, high_numerator, low_numerator, denominator)
+-	!            divides a UDWtype, composed by the UWtype integers,HIGH_NUMERATOR (from $r4)
+-	!            and LOW_NUMERATOR(from $r5) by DENOMINATOR(from $r6), and places the quotient
+-	!            in $r7 and the remainder in $r8.
+-	!------------------------------------------------------
+-	!  in reg:$r4(n1), $r5(n0), $r6(d0)
+-	!  __d1 = ((USItype) (d) >> ((4 * 8) / 2));
+-	!  __d0 = ((USItype) (d) & (((USItype) 1 << ((4 * 8) / 2)) - 1));
+-	!  __r1 = (n1) % __d1;
+-	!  __q1 = (n1) / __d1;
+-	!  __m = (USItype) __q1 * __d0;
+-	!  __r1 = __r1 * ((USItype) 1 << ((4 * 8) / 2)) | ((USItype) (n0) >> ((4 * 8) / 2));
+-	!   if (__r1 < __m)
+-	!    {
+-	!------------------------------------------------------
+-	smw.adm $r0, [$sp], $r4, 2				! store $lp, when use BASELINE_V1,and must store $r0-$r3
+-	srli	$r7, $r6, 16					! $r7 = d1 =__ll_highpart (d)
+-	movi	$ta, 65535
+-	and	  $r8, $r6, $ta       				! $r8 = d0 = __ll_lowpart (d)
+-
+-	divr	$r9, $r10, $r4, $r7				! $r9 = q1, $r10 = r1
+-	and	  $r4, $r5, $ta       				! $r4 = __ll_lowpart (n0)
+-	slli	$r10, $r10, 16      				! $r10 = r1 << 16
+-	srli	$ta, $r5, 16        				! $ta = __ll_highpart (n0)
+-
+-	or	$r10, $r10, $ta					! $r10 <- $r0|$r3=__r1
+-	mul	$r5, $r9, $r8					! $r5 = m =  __q1*__d0
+-	slt	$ta, $r10, $r5					! $ta <- __r1<__m
+-	beqz	$ta, .L2					!if yes,skip
+-	!------------------------------------------------------
+-	!    __q1--, __r1 += (d);
+-	!    if (__r1 >= (d))
+-	!     {
+-	!------------------------------------------------------
+-
+-	add	$r10, $r10, $r6					!$r10 <- __r1+d=__r1
+-	addi	$r9, $r9, -1					!$r9 <- __q1--=__q1
+-	slt	$ta, $r10, $r6					!$ta <- __r1<d
+-	bnez	$ta, .L2					!if yes,skip
+-	!------------------------------------------------------
+-	!       if (__r1 < __m)
+-	!        {
+-	!------------------------------------------------------
+-
+-	slt	$ta, $r10, $r5					!$ta <- __r1<__m
+-	beqz	$ta, .L2					!if yes,skip
+-	!------------------------------------------------------
+-	!           __q1--, __r1 += (d);
+-	!        }
+-	!     }
+-	!  }
+-	!------------------------------------------------------
+-
+-	addi	$r9, $r9, -1					!$r9 <- __q1--=__q1
+-	add	$r10, $r10, $r6					!$r2 <- __r1+d=__r1
+-.L2:
+-	!------------------------------------------------------
+-	!  __r1 -= __m;
+-	!  __r0 = __r1 % __d1;
+-	!  __q0 = __r1 / __d1;
+-	!  __m = (USItype) __q0 * __d0;
+-	!  __r0 = __r0 * ((USItype) 1 << ((4 * 8) / 2)) \
+-	!        | ((USItype) (n0) & (((USItype) 1 << ((4 * 8) / 2)) - 1));
+-	!  if (__r0 < __m)
+-	!   {
+-	!------------------------------------------------------
+-	sub  $r10, $r10, $r5					!$r10 <- __r1-__m=__r1
+-	divr	$r7, $r10, $r10, $r7				!$r7 <- r1/__d1=__q0,$r10 <- r1%__d1=__r0
+-	slli	$r10, $r10, 16					!$r10 <- __r0<<16
+-	mul	$r5, $r8, $r7					!$r5 <- __q0*__d0=__m
+-	or	$r10, $r4, $r10					!$r3 <- $r0|__ll_lowpart (n0) =__r0
+-	slt	$ta, $r10, $r5					!$ta <- __r0<__m
+-	beqz	$ta, .L5					!if yes,skip
+-	!------------------------------------------------------
+-	!      __q0--, __r0 += (d);
+-	!      if (__r0 >= (d))
+-	!       {
+-	!------------------------------------------------------
+-
+-	add	$r10, $r10, $r6					!$r10 <- __r0+d=__r0
+-	addi	$r7, $r7, -1					!$r7 <- __q0--=__q0
+-	slt	$ta, $r10, $r6					!$ta <- __r0<d
+-	bnez	$ta, .L5					!if yes,skip
+-	!------------------------------------------------------
+-	!         if (__r0 < __m)
+-	!          {
+-	!------------------------------------------------------
+-
+-	slt	$ta, $r10, $r5					!$ta <- __r0<__m
+-	beqz	$ta, .L5					!if yes,skip
+-	!------------------------------------------------------
+-	!             __q0--, __r0 += (d);
+-	!          }
+-	!       }
+-	!   }
+-	!------------------------------------------------------
+-
+-	add	  $r10, $r10, $r6				!$r3 <- __r0+d=__r0
+-	addi	$r7, $r7, -1					!$r2 <- __q0--=__q0
+-.L5:
+-	!------------------------------------------------------
+-	!   __r0 -= __m;
+-	!   *q = (USItype) __q1 * ((USItype) 1 << ((4 * 8) / 2)) | __q0;
+-	!   *r = __r0;
+-	!}
+-	!------------------------------------------------------
+-
+-	sub		$r8, $r10, $r5				!$r8 = r = r0 = __r0-__m
+-	slli	$r9, $r9, 16					!$r9 <- __q1<<16
+-	or	$r7, $r9, $r7					!$r7 = q = $r9|__q0
+-	lmw.bim $r0, [$sp], $r4, 2
+-	ret
+-	.size	fudiv_qrnnd, .-fudiv_qrnnd
+-
+-	.align	2
+-	.globl	__udivmoddi4
+-	.type	__udivmoddi4, @function
+-__udivmoddi4:
+-	! =====================================================================
+-	! stack allocation:
+-	! sp+40 +------------------+
+-	!       | q1               |
+-	! sp+36 +------------------+
+-	!       | q0               |
+-	! sp+32 +------------------+
+-	!       | bm               |
+-	! sp+28 +------------------+
+-	!       | $lp              |
+-	! sp+24 +------------------+
+-	!       | $fp              |
+-	! sp+20 +------------------+
+-	!       | $r6 - $r10       |
+-	! sp    +------------------+
+-	! =====================================================================
+-
+-	addi	$sp, $sp, -40
+-	smw.bi	$r6, [$sp], $r10, 10
+-	!------------------------------------------------------
+-	!  d0 = dd.s.low;
+-	!  d1 = dd.s.high;
+-	!  n0 = nn.s.low;
+-	!  n1 = nn.s.high;
+-	!  if (d1 == 0)
+-	!   {
+-	!------------------------------------------------------
+-
+-	move	$fp, $r4					!$fp <- rp
+-	bnez	P2H, .L9					!if yes,skip
+-	!------------------------------------------------------
+-	!     if (d0 > n1)
+-	!      {
+-	!------------------------------------------------------
+-
+-	slt	$ta, P1H, P2L					!$ta <- n1<d0
+-	beqz	$ta, .L10					!if yes,skip
+-#ifndef __NDS32_PERF_EXT__
+-	smw.adm $r0, [$sp], $r5, 0
+-	move    $r0, P2L
+-	bal __clzsi2
+-	move	$r7, $r0
+-	lmw.bim $r0, [$sp], $r5, 0
+-#else
+-	clz  $r7, P2L
+-#endif
+-	swi     $r7,  [$sp+(28)]
+-	beqz	$r7, .L18					!if yes,skip
+-	!------------------------------------------------------
+-	!         d0 = d0 << bm;
+-	!         n1 = (n1 << bm) | (n0 >> ((4 * 8) - bm));
+-	!         n0 = n0 << bm;
+-	!      }
+-	!------------------------------------------------------
+-
+-	subri	$r5, $r7, 32					!$r5 <- 32-bm
+-	srl	$r5, P1L, $r5					!$r5 <- n0>>$r5
+-	sll	$r6, P1H, $r7					!$r6 <- n1<<bm
+-	or	P1H, $r6, $r5					!P2h <- $r5|$r6=n1
+-	sll	P1L, P1L, $r7					!P1H <- n0<<bm=n0
+-	sll	P2L, P2L, $r7					!P2L <- d0<<bm=d0
+-.L18:
+-	!------------------------------------------------------
+-	!    fudiv_qrnnd (&q0, &n0, n1, n0, d0);
+-	!    q1 = 0;
+-	!  } #if (d0 > n1)
+-	!------------------------------------------------------
+-
+-	move 	$r4,P1H						! give fudiv_qrnnd args
+-	move 	$r5,P1L						!
+-	move 	$r6,P2L						!
+-	bal	fudiv_qrnnd					!calcaulte q0 n0
+-	movi	$r6, 0						!P1L <- 0
+-	swi     $r7,[$sp+32]                                    !q0
+-	swi     $r6,[$sp+36]                                    !q1
+-	move    P1L,$r8						!n0
+-	b	.L19
+-.L10:
+-	!------------------------------------------------------
+-	!  else #if (d0 > n1)
+-	!   {
+-	!     if(d0 == 0)
+-	!------------------------------------------------------
+-
+-	bnez	P2L, .L20					!if yes,skip
+-	!------------------------------------------------------
+-	!      d0 = 1 / d0;
+-	!------------------------------------------------------
+-
+-	movi	$r4, 1						!P1L <- 1
+-	divr	P2L, $r4, $r4, P2L				!$r9=1/d0,P1L=1%d0
+-.L20:
+-
+-#ifndef __NDS32_PERF_EXT__
+-	smw.adm $r0, [$sp], $r5, 0
+-	move    $r0, P2L
+-	bal __clzsi2
+-	move    $r7, $r0
+-	lmw.bim $r0, [$sp], $r5, 0
+-#else
+-	clz  $r7, P2L
+-#endif
+-	swi     $r7,[$sp+(28)]      ! store bm
+-	beqz	$r7, .L28					! if yes,skip
+-	!------------------------------------------------------
+-	!         b = (4 * 8) - bm;
+-	!         d0 = d0 << bm;
+-	!         n2 = n1 >> b;
+-	!         n1 = (n1 << bm) | (n0 >> b);
+-	!         n0 = n0 << bm;
+-	!         fudiv_qrnnd (&q1, &n1, n2, n1, d0);
+-	!    }
+-	!------------------------------------------------------
+-
+-	subri	$r10, $r7, 32					!$r10 <- 32-bm=b
+-	srl	$r4, P1L, $r10					!$r4 <- n0>>b
+-	sll	$r5, P1H, $r7					!$r5 <- n1<<bm
+-	or	$r5, $r5, $r4					!$r5 <- $r5|$r4=n1  !for fun
+-	sll	P2L, P2L, $r7					!P2L <- d0<<bm=d0   !for fun
+-	sll	P1L, P1L, $r7					!P1L <- n0<<bm=n0
+-	srl	$r4, P1H, $r10					!$r4 <- n1>>b=n2    !for fun
+-
+-	move    $r6,P2L                     			!for fun
+-	bal	fudiv_qrnnd					!caculate q1, n1
+-
+-	swi  $r7,[$sp+(36)]          ! q1 store
+-	move P1H,$r8                 ! n1 store
+-
+-	move $r4,$r8	             ! prepare for next fudiv_qrnnd()
+-	move $r5,P1L
+-	move $r6,P2L
+-	b	.L29
+-.L28:
+-	!------------------------------------------------------
+-	!    else // bm != 0
+-	!     {
+-	!        n1 -= d0;
+-	!        q1 = 1;
+-	!
+-	!------------------------------------------------------
+-
+-	sub	P1H, P1H, P2L					!P1L <- n1-d0=n1
+-	movi	$ta, 1						!
+-	swi	$ta, [$sp+(36)]	                                !1 -> [$sp+(36)]
+-
+-	move $r4,P1H						! give fudiv_qrnnd args
+-	move $r5,P1L
+-	move $r6,P2L
+-.L29:
+-	!------------------------------------------------------
+-	!    fudiv_qrnnd (&q0, &n0, n1, n0, d0);
+-	!------------------------------------------------------
+-
+-	bal	fudiv_qrnnd					!calcuate  q0, n0
+-	swi     $r7,[$sp+(32)]  !q0 store
+-	move    P1L,$r8		!n0
+-.L19:
+-	!------------------------------------------------------
+-	!    if (rp != 0)
+-	!     {
+-	!------------------------------------------------------
+-
+-	beqz	$fp, .L31					!if yes,skip
+-	!------------------------------------------------------
+-	!         rr.s.low = n0 >> bm;
+-	!         rr.s.high = 0;
+-	!         *rp = rr.ll;
+-	!     }
+-	!------------------------------------------------------
+-
+-	movi    $r5, 0							!$r5 <- 0
+-	lwi     $r7,[$sp+(28)]    					!load bm
+-	srl	$r4, P1L, $r7     	     				!$r4 <- n0>>bm
+-        swi	$r4, [$fp+OFFSET_L]	  !r0				!$r4 -> [$sp+(48)]
+-	swi	$r5, [$fp+OFFSET_H]	  !r1				!0 -> [$sp+(52)]
+-	b .L31
+-.L9:
+-	!------------------------------------------------------
+-	! else # d1 == 0
+-	!  {
+-	!     if(d1 > n1)
+-	!      {
+-	!------------------------------------------------------
+-
+-	slt	$ta, P1H, P2H					!$ta <- n1<d1
+-	beqz	$ta, .L32					!if yes,skip
+-	!------------------------------------------------------
+-	!         q0 = 0;
+-	!	  q1 = 0;
+-	!         if (rp != 0)
+-	!          {
+-	!------------------------------------------------------
+-
+-	movi	$r5, 0						!$r5 <- 0
+-	swi	$r5, [$sp+(32)]	   !q0				!0 -> [$sp+(40)]=q1
+-	swi	$r5, [$sp+(36)]    !q1				!0 -> [$sp+(32)]=q0
+-	beqz	$fp, .L31					!if yes,skip
+-	!------------------------------------------------------
+-	!             rr.s.low = n0;
+-	!	      rr.s.high = n1;
+-	!             *rp = rr.ll;
+-	!          }
+-	!------------------------------------------------------
+-
+-	swi	P1L, [$fp+OFFSET_L]					!P1L -> [rp]
+-	swi	P1H, [$fp+OFFSET_H]					!P1H -> [rp+4]
+-	b	.L31
+-.L32:
+-#ifndef __NDS32_PERF_EXT__
+-	smw.adm $r0, [$sp], $r5, 0
+-	move    $r0, P2H
+-	bal __clzsi2
+-	move    $r7, $r0
+-	lmw.bim $r0, [$sp], $r5, 0
+-#else
+-	clz  $r7,P2H
+-#endif
+-        swi     $r7,[$sp+(28)] 	                                !$r7=bm  store
+-	beqz	$r7, .L42					!if yes,skip
+-	!------------------------------------------------------
+-	!        USItype m1, m0;
+-	!        b = (4 * 8) - bm;
+-	!        d1 = (d0 >> b) | (d1 << bm);
+-	!        d0 = d0 << bm;
+-	!        n2 = n1 >> b;
+-	!        n1 = (n0 >> b) | (n1 << bm);
+-	!        n0 = n0 << bm;
+-	!        fudiv_qrnnd (&q0, &n1, n2, n1, d1);
+-	!------------------------------------------------------
+-
+-	subri	$r10, $r7, 32					!$r10 <- 32-bm=b
+-	srl	$r5, P2L, $r10					!$r5 <- d0>>b
+-	sll	$r6, P2H, $r7					!$r6 <- d1<<bm
+-	or      $r6, $r5, $r6                                   !$r6 <- $r5|$r6=d1  !! func
+-	move	P2H, $r6 					!P2H <- d1
+-	srl     $r4, P1H, $r10                                  !$r4 <- n1>>b=n2    !!! func
+-	srl	$r8, P1L, $r10					!$r8 <- n0>>b       !!$r8
+-	sll     $r9, P1H, $r7                                   !$r9 <- n1<<bm
+-	or	$r5, $r8, $r9					!$r5 <- $r8|$r9=n1  !func
+-	sll     P2L, P2L, $r7                                   !P2L <- d0<<bm=d0
+-	sll	P1L, P1L, $r7					!P1L <- n0<<bm=n0
+-
+-	bal	fudiv_qrnnd					! cal  q0,n1
+-	swi     $r7,[$sp+(32)]
+-	move    P1H,$r8            ! fudiv_qrnnd (&q0, &n1, n2, n1, d1);
+-        move    $r6, $r7           ! from func
+-
+-	!----------------------------------------------------
+-	!       #umul_ppmm (m1, m0, q0, d0);
+-	!        do
+-	!         {     USItype __x0, __x1, __x2, __x3;
+-	!               USItype __ul, __vl, __uh, __vh;
+-	!               __ul = ((USItype) (q0) & (((USItype) 1 << ((4 * 8) / 2)) - 1));
+-	!               __uh = ((USItype) (q0) >> ((4 * 8) / 2));
+-	!               __vl = ((USItype) (d0) & (((USItype) 1 << ((4 * 8) / 2)) - 1));
+-	!               __vh = ((USItype) (d0) >> ((4 * 8) / 2));
+-	!               __x0 = (USItype) __ul * __vl;
+-	!               __x1 = (USItype) __ul * __vh;
+-	!               __x2 = (USItype) __uh * __vl;
+-	!               __x3 = (USItype) __uh * __vh;
+-	!               __x1 += ((USItype) (__x0) >> ((4 * 8) / 2));
+-	!               __x1 += __x2;
+-	!               if (__x1 < __x2)
+-	!                  __x3 += ((USItype) 1 << ((4 * 8) / 2));
+-	!               (m1) = __x3 + ((USItype) (__x1) >> ((4 * 8) / 2));
+-	!               (m0) = (USItype)(q0*d0);
+-	!        }
+-	!        if (m1 > n1)
+-	!---------------------------------------------------
+-#ifdef __NDS32_ISA_V3M__
+-        !mulr64  $r4, P2L, $r6
+-	smw.adm $r0, [$sp], $r3, 0
+-	move	P1L, P2L
+-	move	P2L, $r6
+-	movi	P1H, 0
+-	movi	P2H, 0
+-	bal	__muldi3
+-	movd44	$r4, $r0
+-	lmw.bim $r0, [$sp], $r3, 0
+-        move    $r8, W6H
+-        move    $r5, W6L
+-#else
+-        mulr64  $r4, P2L, $r6
+-        move    $r8, W6H
+-        move    $r5, W6L
+-#endif
+-	slt	$ta, P1H, $r8					!$ta <- n1<m1
+-	bnez	$ta, .L46					!if yes,skip
+-	!------------------------------------------------------
+-	!   if(m1 == n1)
+-	!------------------------------------------------------
+-
+-	bne	$r8, P1H, .L45					!if yes,skip
+-	!------------------------------------------------------
+-	!   if(m0 > n0)
+-	!------------------------------------------------------
+-
+-	slt	$ta, P1L, $r5					!$ta <- n0<m0
+-	beqz	$ta, .L45					!if yes,skip
+-.L46:
+-	!------------------------------------------------------
+-	!    {
+-	!       q0--;
+-	!       # sub_ddmmss (m1, m0, m1, m0, d1, d0);
+-	!       do
+-	!        {   USItype __x;
+-	!            __x = (m0) - (d0);
+-	!            (m1) = (m1) - (d1) - (__x > (m0));
+-	!            (m0) = __x;
+-	!        }
+-	!    }
+-	!------------------------------------------------------
+-
+-	sub	$r4, $r5, P2L					!$r4 <- m0-d0=__x
+-	addi	$r6, $r6, -1					!$r6 <- q0--=q0
+-	sub	$r8, $r8, P2H					!$r8 <- m1-d1
+-	swi	$r6, [$sp+(32)]	      ! q0			!$r6->[$sp+(32)]
+-	slt	$ta, $r5, $r4					!$ta <- m0<__x
+-	sub	$r8, $r8, $ta					!$r8 <- P1H-P1L=m1
+-	move	$r5, $r4					!$r5 <- __x=m0
+-.L45:
+-	!------------------------------------------------------
+-	!    q1 = 0;
+-	!    if (rp != 0)
+-	!     {
+-	!------------------------------------------------------
+-
+-	movi	$r4, 0						!$r4 <- 0
+-	swi	$r4, [$sp+(36)]					!0 -> [$sp+(40)]=q1
+-	beqz	$fp, .L31					!if yes,skip
+-	!------------------------------------------------------
+-	!      # sub_ddmmss (n1, n0, n1, n0, m1, m0);
+-	!      do
+-	!       {   USItype __x;
+-	!           __x = (n0) - (m0);
+-	!           (n1) = (n1) - (m1) - (__x > (n0));
+-	!           (n0) = __x;
+-	!       }
+-	!       rr.s.low = (n1 << b) | (n0 >> bm);
+-	!       rr.s.high = n1 >> bm;
+-	!       *rp = rr.ll;
+-	!------------------------------------------------------
+-
+-	sub	$r4, P1H, $r8					!$r4 <- n1-m1
+-	sub	$r6, P1L, $r5					!$r6 <- n0-m0=__x=n0
+-	slt	$ta, P1L, $r6					!$ta <- n0<__x
+-	sub	P1H, $r4, $ta					!P1H <- $r4-$ta=n1
+-	move    P1L, $r6
+-
+-	lwi     $r7,[$sp+(28)]         ! load bm
+-	subri   $r10,$r7,32
+-	sll	$r4, P1H, $r10					!$r4 <- n1<<b
+-	srl	$r5, P1L, $r7					!$r5 <- __x>>bm
+-	or	$r6, $r5, $r4					!$r6 <- $r5|$r4=rr.s.low
+-	srl	$r8, P1H, $r7					!$r8 <- n1>>bm =rr.s.high
+-	swi	$r6, [$fp+OFFSET_L]				!
+-	swi	$r8, [$fp+OFFSET_H]				!
+-	b	.L31
+-.L42:
+-	!------------------------------------------------------
+-	!  else
+-	!   {
+-	!     if(n1 > d1)
+-	!------------------------------------------------------
+-
+-	slt	$ta, P2H, P1H					!$ta <- P2H<P1H
+-	bnez	$ta, .L52					!if yes,skip
+-	!------------------------------------------------------
+-	!     if (n0 >= d0)
+-	!------------------------------------------------------
+-
+-	slt	$ta, P1L, P2L					!$ta <- P1L<P2L
+-	bnez	$ta, .L51					!if yes,skip
+-	!------------------------------------------------------
+-	!        q0 = 1;
+-	!        do
+-	!         {   USItype __x;
+-	!             __x = (n0) - (d0);
+-	!             (n1) = (n1) - (d1) - (__x > (n0));
+-	!             (n0) = __x;
+-	!         }
+-	!------------------------------------------------------
+-.L52:
+-	sub	$r4, P1H, P2H					!$r4 <- P1H-P2H
+-	sub	$r6, P1L, P2L					!$r6 <- no-d0=__x=n0
+-	slt	$ta, P1L, $r6					!$ta <- no<__x
+-	sub	P1H, $r4, $ta					!P1H <- $r4-$ta=n1
+-	move    P1L, $r6					!n0
+-	movi	$r5, 1						!
+-	swi	$r5, [$sp+(32)]					!1 -> [$sp+(32)]=q0
+-	b	.L54
+-.L51:
+-	!------------------------------------------------------
+-	!       q0 = 0;
+-	!------------------------------------------------------
+-
+-	movi    $r5,0
+-	swi	$r5, [$sp+(32)]					!$r5=0 -> [$sp+(32)]
+-.L54:
+-	!------------------------------------------------------
+-	!       q1 = 0;
+-	!       if (rp != 0)
+-	!        {
+-	!------------------------------------------------------
+-
+-	movi	$r5, 0						!
+-	swi	$r5, [$sp+(36)]					!0 -> [$sp+(36)]
+-	beqz	$fp, .L31
+-	!------------------------------------------------------
+-	!          rr.s.low = n0;
+-	!          rr.s.high = n1;
+-	!          *rp = rr.ll;
+-	!        }
+-	!------------------------------------------------------
+-
+-	swi	P1L, [$fp+OFFSET_L]				!remainder
+-	swi	P1H, [$fp+OFFSET_H]				!
+-.L31:
+-	!------------------------------------------------------
+-	! const DWunion ww = {{.low = q0, .high = q1}};
+-	! return ww.ll;
+-	!}
+-	!------------------------------------------------------
+-
+-	lwi	P1L, [$sp+(32)]					!quotient
+-	lwi	P1H, [$sp+(36)]
+-	lmw.bim	$r6, [$sp], $r10, 10
+-	addi	$sp, $sp, 12
+-	ret
+-	.size	__udivmoddi4, .-__udivmoddi4
+-#endif /* L_udivmoddi4 */
+-
+-
+-
+-#ifdef L_umodsi3
+-
+-	! =====================================================================
+-	.text
+-	.align	2
+-	.globl	__umodsi3
+-	.type	__umodsi3, @function
+-__umodsi3:
+-	! ---------------------------------------------------------------------
+-	!!res=udivmodsi4(a,b,1);
+-	! if (den==0)
+-	!     return num;
+-	! ---------------------------------------------------------------------
+-	beqz	$r1, .L1			! if den==0, skip
+-	! ---------------------------------------------------------------------
+-	! bit=1;
+-	! res=0;
+-	! ---------------------------------------------------------------------
+-	movi	$r4, 1				! $r4  <- bit=1
+-#ifndef __OPTIMIZE_SIZE__
+-.L6:
+-#endif
+-	! ---------------------------------------------------------------------
+-	! while (den<num
+-	! ---------------------------------------------------------------------
+-	slt	$ta, $r1, $r0			! $ta  <- den<num?
+-	beqz	$ta, .L5			! if no, skip
+-	! ---------------------------------------------------------------------
+-	!      &&bit&&!(den&(1L<<31)))
+-	! ---------------------------------------------------------------------
+-	bltz	$r1, .L5			! if den<0, skip
+-	! ---------------------------------------------------------------------
+-	! {   den<<=1;
+-	!     bit<<=1;
+-	! }
+-	! ---------------------------------------------------------------------
+-#if defined (__OPTIMIZE_SIZE__) && ! defined (__NDS32_ISA_V3M__)
+-	clz	$r3, $r1			! $r3  <- leading zero count for den
+-	clz	$ta, $r0			! $ta  <- leading zero count for num
+-	sub	$r3, $r3, $ta			! $r3  <- number of bits to shift
+-	sll	$r1, $r1, $r3			! $r1  <- den
+-	sll	$r4, $r4, $r3			! $r2  <- bit
+-#else
+-	slli	$r1, $r1, 1			! $r1  <- den<<=1
+-	slli	$r4, $r4, 1			! $r4  <- bit<<=1
+-	b	.L6				! continue loop
+-#endif
+-.L5:
+-	! ---------------------------------------------------------------------
+-	! while (bit)
+-	! {   if (num>=den)
+-	!     {   num-=den;
+-	!         res|=bit;
+-	!     }
+-	!     bit>>=1;
+-	!     den>>=1;
+-	! }
+-	!!if (modwanted)
+-	!!    return num;
+-	!!return res;
+-	! ---------------------------------------------------------------------
+-	sub	$r2, $r0, $r1			! $r2  <- num-den
+-	slt	$ta, $r0, $r1			! $ta  <- num<den?
+-	srli	$r4, $r4, 1			! $r4  <- bit>>=1
+-	cmovz	$r0, $r2, $ta			! $r0  <- num=(num<den)?num:num-den
+-	srli	$r1, $r1, 1			! $r1  <- den>>=1
+-	bnez	$r4, .L5			! if bit!=0, continue loop
+-.L1:
+-	! ---------------------------------------------------------------------
+-	! return res;
+-	! ---------------------------------------------------------------------
+-	ret
+-	.size	__umodsi3, .-__umodsi3
+-#endif /* L_umodsi3 */
+-
+-
+-
+-#ifdef L_umoddi3
+-
+-	!--------------------------------------
+-	#ifdef __big_endian__
+-		#define  V1H  $r0
+-		#define  V1L  $r1
+-		#define  V2H  $r2
+-		#define  V2L  $r3
+-	#else
+-		#define  V1H  $r1
+-		#define  V1L  $r0
+-		#define  V2H  $r3
+-		#define  V2L  $r2
+-	#endif
+-	!--------------------------------------
+-	.text
+-	.align	2
+-	.globl	__umoddi3
+-	.type	__umoddi3, @function
+-__umoddi3:
+-	! prologue
+-	addi	$sp, $sp, -12
+-	swi $lp, [$sp+(0)]
+-	! end of prologue
+-	addi	$r4, $sp, 4
+-	bal	__udivmoddi4
+-	lwi	$r0, [$sp+(4)]    ! __udivmoddi4 return low when LE mode or return high when BE mode
+-	lwi	$r1, [$sp+(8)]    !
+-.L82:
+-	! epilogue
+-	lwi $lp, [$sp+(0)]
+-	addi	$sp, $sp, 12
+-	ret
+-	.size	__umoddi3, .-__umoddi3
+-#endif /* L_umoddi3 */
+-
+-
+-
+-#ifdef L_muldi3
+-
+-#ifdef __big_endian__
+-	#define P1H	$r0
+-	#define P1L	$r1
+-	#define P2H	$r2
+-	#define P2L	$r3
+-
+-	#define V2H $r4
+-	#define V2L $r5
+-#else
+-	#define P1H	$r1
+-	#define P1L	$r0
+-	#define P2H	$r3
+-	#define P2L	$r2
+-
+-	#define V2H $r5
+-	#define V2L $r4
+-#endif
+-
+-	! ====================================================================
+-	.text
+-	.align	2
+-	.globl	__muldi3
+-	.type	__muldi3, @function
+-__muldi3:
+-	! parameter passing for libgcc functions normally involves 2 doubles
+-	!---------------------------------------
+-#ifdef __NDS32_ISA_V3M__
+-	! There is no mulr64 instruction in Andes ISA V3M.
+-	! So we must provide a sequence of calculations to complete the job.
+-	smw.adm   $r6, [$sp], $r9, 0x0
+-	zeh33	  $r4, P1L
+-	srli      $r7, P1L, 16
+-	zeh33     $r5, P2L
+-	mul       $r6, $r5, $r4
+-	mul33     $r5, $r7
+-	srli      $r8, P2L, 16
+-	mov55     $r9, $r5
+-	maddr32   $r9, $r8, $r4
+-	srli      $r4, $r6, 16
+-	add       $r4, $r9, $r4
+-	slt45     $r4, $r5
+-	slli      $r5, $r15, 16
+-	maddr32   $r5, $r8, $r7
+-	mul       P2L, P1H, P2L
+-	srli      $r7, $r4, 16
+-	maddr32   P2L, P2H, P1L
+-	add333    P1H, $r5, $r7
+-	slli      $r4, $r4, 16
+-	zeh33     $r6, $r6
+-	add333    P1L, $r4, $r6
+-	add333    P1H, P2L, P1H
+-	lmw.bim   $r6, [$sp], $r9, 0x0
+-	ret
+-#else /* not  __NDS32_ISA_V3M__ */
+-	mul	    $ta, P1L, P2H
+-	mulr64	$r4, P1L, P2L
+-	maddr32	$ta, P1H, P2L
+-	move	  P1L, V2L
+-	add	    P1H, $ta, V2H
+-	ret
+-#endif /* not __NDS32_ISA_V3M__ */
+-	.size	__muldi3, .-__muldi3
+-#endif /* L_muldi3 */
+-
+-
+-
+-#ifdef L_addsub_df
+-
+-#ifndef __big_endian__
+-	#define P1L     $r0
+-	#define P1H     $r1
+-	#define P2L     $r2
+-	#define P2H     $r3
+-	#define P3L     $r4
+-	#define P3H     $r5
+-	#define O1L     $r7
+-	#define O1H	$r8
+-#else
+-	#define P1H     $r0
+-	#define P1L     $r1
+-	#define P2H     $r2
+-	#define P2L     $r3
+-	#define P3H     $r4
+-	#define P3L     $r5
+-	#define O1H     $r7
+-	#define O1L	$r8
+-#endif
+-	.text
+-	.align	2
+-	.global  __subdf3
+-	.type    __subdf3, @function
+-__subdf3:
+-	push    $lp
+-	pushm   $r6, $r10
+-
+-	move    $r4, #0x80000000
+-	xor     P2H, P2H, $r4
+-
+-	j       .Lsdpadd
+-
+-	.global  __adddf3
+-	.type    __adddf3, @function
+-__adddf3:
+-	push    $lp
+-	pushm   $r6, $r10
+-.Lsdpadd:
+-	slli    $r6, P1H, #1
+-	srli    $r6, $r6, #21
+-	slli    P3H, P1H, #11
+-	srli    $r10, P1L, #21
+-	or      P3H, P3H, $r10
+-	slli    P3L, P1L, #11
+-	move    O1L, #0x80000000
+-	or      P3H, P3H, O1L
+-	slli    $r9, P2H, #1
+-	srli    $r9, $r9, #21
+-	slli    O1H, P2H, #11
+-	srli    $r10, P2L, #21
+-	or      O1H, O1H, $r10
+-	or      O1H, O1H, O1L
+-	slli    O1L, P2L, #11
+-
+-	addi    $r10, $r6, #-1
+-	slti    $r15, $r10, #0x7fe
+-	beqzs8  .LEspecA
+-
+-.LElab1:
+-	addi    $r10, $r9, #-1
+-	slti    $r15, $r10, #0x7fe
+-	beqzs8  .LEspecB
+-
+-.LElab2:
+-	#NORMd($r4, P2L, P1L)
+-	bnez    P3H, .LL1
+-	bnez    P3L, .LL2
+-	move    $r6, #0
+-	j       .LL3
+-.LL2:
+-	move    P3H, P3L
+-	move    P3L, #0
+-	move    P2L, #32
+-	sub     $r6, $r6, P2L
+-.LL1:
+-#ifndef __big_endian__
+-#ifdef __NDS32_PERF_EXT__
+-	clz	$r2, $r5
+-#else
+-	pushm	$r0, $r1
+-	pushm	$r3, $r5
+-	move	$r0, $r5
+-	bal	__clzsi2
+-	move	$r2, $r0
+-	popm	$r3, $r5
+-	popm	$r0, $r1
+-#endif
+-#else /* __big_endian__ */
+-#ifdef __NDS32_PERF_EXT__
+-	clz	$r3, $r4
+-#else
+-	pushm	$r0, $r2
+-	pushm	$r4, $r5
+-	move	$r0, $r4
+-	bal	__clzsi2
+-	move	$r3, $r0
+-	popm	$r4, $r5
+-	popm	$r0, $r2
+-#endif
+-#endif /* __big_endian__ */
+-	beqz    P2L, .LL3
+-	sub     $r6, $r6, P2L
+-	subri   P1L, P2L, #32
+-	srl     P1L, P3L, P1L
+-	sll     P3L, P3L, P2L
+-	sll     P3H, P3H, P2L
+-	or      P3H, P3H, P1L
+-.LL3:
+-	#NORMd End
+-
+-	#NORMd($r7, P2L, P1L)
+-	bnez    O1H, .LL4
+-	bnez    O1L, .LL5
+-	move    $r9, #0
+-	j       .LL6
+-.LL5:
+-	move    O1H, O1L
+-	move    O1L, #0
+-	move    P2L, #32
+-	sub     $r9, $r9, P2L
+-.LL4:
+-#ifndef __big_endian__
+-#ifdef __NDS32_PERF_EXT__
+-	clz	$r2, O1H
+-#else
+-	pushm	$r0, $r1
+-	pushm	$r3, $r5
+-	move	$r0, O1H
+-	bal	__clzsi2
+-	move	$r2, $r0
+-	popm	$r3, $r5
+-	popm	$r0, $r1
+-#endif
+-#else /* __big_endian__ */
+-#ifdef __NDS32_PERF_EXT__
+-	clz	$r3, O1H
+-#else
+-	pushm	$r0, $r2
+-	pushm	$r4, $r5
+-	move	$r0, O1H
+-	bal	__clzsi2
+-	move	$r3, $r0
+-	popm	$r4, $r5
+-	popm	$r0, $r2
+-#endif
+-#endif /* __big_endian__ */
+-	beqz    P2L, .LL6
+-	sub     $r9, $r9, P2L
+-	subri   P1L, P2L, #32
+-	srl     P1L, O1L, P1L
+-	sll     O1L, O1L, P2L
+-	sll     O1H, O1H, P2L
+-	or      O1H, O1H, P1L
+-.LL6:
+-	#NORMd End
+-
+-	move    $r10, #0x80000000
+-	and     P1H, P1H, $r10
+-
+-	beq     $r6, $r9, .LEadd3
+-	slts    $r15, $r9, $r6
+-	beqzs8  .Li1
+-	sub     $r9, $r6, $r9
+-	move    P2L, #0
+-.LL7:
+-	move    $r10, #0x20
+-	slt     $r15, $r9, $r10
+-	bnezs8  .LL8
+-	or      P2L, P2L, O1L
+-	move    O1L, O1H
+-	move    O1H, #0
+-	addi    $r9, $r9, #0xffffffe0
+-	bnez    O1L, .LL7
+-.LL8:
+-	beqz    $r9, .LEadd3
+-	move    P1L, O1H
+-	move    $r10, O1L
+-	srl     O1L, O1L, $r9
+-	srl     O1H, O1H, $r9
+-	subri   $r9, $r9, #0x20
+-	sll     P1L, P1L, $r9
+-	or      O1L, O1L, P1L
+-	sll     $r10, $r10, $r9
+-	or      P2L, P2L, $r10
+-	beqz    P2L, .LEadd3
+-	ori     O1L, O1L, #1
+-	j       .LEadd3
+-.Li1:
+-	move    $r15, $r6
+-	move    $r6, $r9
+-	sub     $r9, $r9, $r15
+-	move    P2L, #0
+-.LL10:
+-	move    $r10, #0x20
+-	slt     $r15, $r9, $r10
+-	bnezs8  .LL11
+-	or      P2L, P2L, P3L
+-	move    P3L, P3H
+-	move    P3H, #0
+-	addi    $r9, $r9, #0xffffffe0
+-	bnez    P3L, .LL10
+-.LL11:
+-	beqz    $r9, .LEadd3
+-	move    P1L, P3H
+-	move    $r10, P3L
+-	srl     P3L, P3L, $r9
+-	srl     P3H, P3H, $r9
+-	subri   $r9, $r9, #0x20
+-	sll     P1L, P1L, $r9
+-	or      P3L, P3L, P1L
+-	sll     $r10, $r10, $r9
+-	or      P2L, P2L, $r10
+-	beqz    P2L, .LEadd3
+-	ori     P3L, P3L, #1
+-
+-.LEadd3:
+-	xor     $r10, P1H, P2H
+-	sltsi   $r15, $r10, #0
+-	bnezs8  .LEsub1
+-
+-	#ADD(P3L, O1L)
+-	add     P3L, P3L, O1L
+-	slt     $r15, P3L, O1L
+-
+-	#ADDCC(P3H, O1H)
+-	beqzs8  .LL13
+-	add     P3H, P3H, O1H
+-	slt     $r15, P3H, O1H
+-	beqzs8  .LL14
+-	addi    P3H, P3H, #0x1
+-	j       .LL15
+-.LL14:
+-	move    $r15, #1
+-	add     P3H, P3H, $r15
+-	slt     $r15, P3H, $r15
+-	j       .LL15
+-.LL13:
+-	add     P3H, P3H, O1H
+-	slt     $r15, P3H, O1H
+-.LL15:
+-
+-	beqzs8  .LEres
+-	andi    $r10, P3L, #1
+-	beqz    $r10, .Li3
+-	ori     P3L, P3L, #2
+-.Li3:
+-	srli    P3L, P3L, #1
+-	slli    $r10, P3H, #31
+-	or      P3L, P3L, $r10
+-	srli    P3H, P3H, #1
+-	move    $r10, #0x80000000
+-	or      P3H, P3H, $r10
+-	addi    $r6, $r6, #1
+-	subri   $r15, $r6, #0x7ff
+-	bnezs8  .LEres
+-	move    $r10, #0x7ff00000
+-	or      P1H, P1H, $r10
+-	move    P1L, #0
+-	j       .LEretA
+-
+-.LEsub1:
+-	#SUB(P3L, O1L)
+-	move    $r15, P3L
+-	sub     P3L, P3L, O1L
+-	slt     $r15, $r15, P3L
+-
+-	#SUBCC(P3H, O1H)
+-	beqzs8  .LL16
+-	move    $r15, P3H
+-	sub     P3H, P3H, O1H
+-	slt     $r15, $r15, P3H
+-	beqzs8  .LL17
+-	subi333 P3H, P3H, #1
+-	j       .LL18
+-.LL17:
+-	move    $r15, P3H
+-	subi333 P3H, P3H, #1
+-	slt     $r15, $r15, P3H
+-	j       .LL18
+-.LL16:
+-	move    $r15, P3H
+-	sub     P3H, P3H, O1H
+-	slt     $r15, $r15, P3H
+-.LL18:
+-
+-	beqzs8  .Li5
+-	move    $r10, #0x80000000
+-	xor     P1H, P1H, $r10
+-
+-	subri   P3H, P3H, #0
+-	beqz    P3L, .LL19
+-	subri   P3L, P3L, #0
+-	subi45  P3H, #1
+-.LL19:
+-
+-.Li5:
+-	#NORMd($r4, $r9, P1L)
+-	bnez    P3H, .LL20
+-	bnez    P3L, .LL21
+-	move    $r6, #0
+-	j       .LL22
+-.LL21:
+-	move    P3H, P3L
+-	move    P3L, #0
+-	move    $r9, #32
+-	sub     $r6, $r6, $r9
+-.LL20:
+-#ifdef __NDS32_PERF_EXT__
+-	clz	$r9, P3H
+-#else
+-	pushm	$r0, $r5
+-	move	$r0, P3H
+-	bal	__clzsi2
+-	move	$r9, $r0
+-	popm	$r0, $r5
+-#endif
+-	beqz    $r9, .LL22
+-	sub     $r6, $r6, $r9
+-	subri   P1L, $r9, #32
+-	srl     P1L, P3L, P1L
+-	sll     P3L, P3L, $r9
+-	sll     P3H, P3H, $r9
+-	or      P3H, P3H, P1L
+-.LL22:
+-	#NORMd End
+-
+-	or      $r10, P3H, P3L
+-	bnez    $r10, .LEres
+-	move    P1H, #0
+-
+-.LEres:
+-	blez    $r6, .LEund
+-
+-.LElab8:
+-	#ADD(P3L, $0x400)
+-	move    $r15, #0x400
+-	add     P3L, P3L, $r15
+-	slt     $r15, P3L, $r15
+-
+-	#ADDCC(P3H, $0x0)
+-	beqzs8  .LL25
+-	add     P3H, P3H, $r15
+-	slt     $r15, P3H, $r15
+-.LL25:
+-
+-	#ADDC($r6, $0x0)
+-	add     $r6, $r6, $r15
+-	srli    $r10, P3L, #11
+-	andi    $r10, $r10, #1
+-	sub     P3L, P3L, $r10
+-	srli    P1L, P3L, #11
+-	slli    $r10, P3H, #21
+-	or      P1L, P1L, $r10
+-	slli    $r10, P3H, #1
+-	srli    $r10, $r10, #12
+-	or      P1H, P1H, $r10
+-	slli    $r10, $r6, #20
+-	or      P1H, P1H, $r10
+-
+-.LEretA:
+-.LE999:
+-	popm    $r6, $r10
+-	pop     $lp
+-	ret5    $lp
+-
+-.LEspecA:
+-	#ADD(P3L, P3L)
+-	move    $r15, P3L
+-	add     P3L, P3L, P3L
+-	slt     $r15, P3L, $r15
+-
+-	#ADDC(P3H, P3H)
+-	add     P3H, P3H, P3H
+-	add     P3H, P3H, $r15
+-	bnez    $r6, .Li7
+-	or      $r10, P3H, P3L
+-	beqz    $r10, .Li8
+-	j       .LElab1
+-.Li8:
+-	subri   $r15, $r9, #0x7ff
+-	beqzs8  .LEspecB
+-	add     P3L, P2H, P2H
+-	or      $r10, P3L, P2L
+-	bnez    $r10, .LEretB
+-	sltsi   $r15, P2H, #0
+-	bnezs8  .LEretA
+-
+-.LEretB:
+-	move    P1L, P2L
+-	move    P1H, P2H
+-	j       .LE999
+-.Li7:
+-	or      $r10, P3H, P3L
+-	bnez    $r10, .LEnan
+-	subri   $r15, $r9, #0x7ff
+-	bnezs8  .LEretA
+-	xor     $r10, P1H, P2H
+-	sltsi   $r15, $r10, #0
+-	bnezs8  .LEnan
+-	j       .LEretB
+-
+-.LEspecB:
+-	#ADD(O1L, O1L)
+-	move    $r15, O1L
+-	add     O1L, O1L, O1L
+-	slt     $r15, O1L, $r15
+-
+-	#ADDC(O1H, O1H)
+-	add     O1H, O1H, O1H
+-	add     O1H, O1H, $r15
+-	bnez    $r9, .Li11
+-	or      $r10, O1H, O1L
+-	beqz    $r10, .LEretA
+-	j       .LElab2
+-.Li11:
+-	or      $r10, O1H, O1L
+-	beqz    $r10, .LEretB
+-
+-.LEnan:
+-	move    P1H, #0xfff80000
+-	move    P1L, #0
+-	j       .LEretA
+-
+-.LEund:
+-	subri   $r9, $r6, #1
+-	move    P2L, #0
+-.LL26:
+-	move    $r10, #0x20
+-	slt     $r15, $r9, $r10
+-	bnezs8  .LL27
+-	or      P2L, P2L, P3L
+-	move    P3L, P3H
+-	move    P3H, #0
+-	addi    $r9, $r9, #0xffffffe0
+-	bnez    P3L, .LL26
+-.LL27:
+-	beqz    $r9, .LL28
+-	move    P1L, P3H
+-	move    $r10, P3L
+-	srl     P3L, P3L, $r9
+-	srl     P3H, P3H, $r9
+-	subri   $r9, $r9, #0x20
+-	sll     P1L, P1L, $r9
+-	or      P3L, P3L, P1L
+-	sll     $r10, $r10, $r9
+-	or      P2L, P2L, $r10
+-	beqz    P2L, .LL28
+-	ori     P3L, P3L, #1
+-.LL28:
+-	move    $r6, #0
+-	j       .LElab8
+-	.size   __subdf3, .-__subdf3
+-	.size   __adddf3, .-__adddf3
+-#endif /* L_addsub_df */
+-
+-
+-
+-#ifdef L_mul_sf
+-
+-#if !defined (__big_endian__)
+-	#define P1L     $r0
+-	#define P1H     $r1
+-	#define P2L     $r2
+-	#define P2H     $r3
+-#else
+-	#define P1H     $r0
+-	#define P1L     $r1
+-	#define P2H     $r2
+-	#define P2L     $r3
+-#endif
+-	.text
+-	.align	2
+-	.global	__mulsf3
+-	.type	__mulsf3, @function
+-__mulsf3:
+-	push    $lp
+-	pushm   $r6, $r10
+-
+-	srli    $r3, $r0, #23
+-	andi    $r3, $r3, #0xff
+-	srli    $r5, $r1, #23
+-	andi    $r5, $r5, #0xff
+-	move    $r6, #0x80000000
+-	slli    $r2, $r0, #8
+-	or      $r2, $r2, $r6
+-	slli    $r4, $r1, #8
+-	or      $r4, $r4, $r6
+-	xor     $r8, $r0, $r1
+-	and     $r6, $r6, $r8
+-
+-	addi    $r8, $r3, #-1
+-	slti    $r15, $r8, #0xfe
+-	beqzs8  .LFspecA
+-
+-.LFlab1:
+-	addi    $r8, $r5, #-1
+-	slti    $r15, $r8, #0xfe
+-	beqzs8  .LFspecB
+-
+-.LFlab2:
+-	move    $r10, $r3
+-/* This is a 64-bit multiple. ($r2, $r7) is (high, low). */
+-#ifndef __NDS32_ISA_V3M__
+-	mulr64	$r2, $r2, $r4
+-#else
+-	pushm	$r0, $r1
+-	pushm	$r4, $r5
+-	move	P1L, $r2
+-	movi	P1H, #0
+-	move	P2L, $r4
+-	movi	P2H, #0
+-	bal	__muldi3
+-	movd44	$r2, $r0
+-	popm	$r4, $r5
+-	popm	$r0, $r1
+-#endif
+-#ifndef __big_endian__
+-	move    $r7, $r2
+-	move    $r2, $r3
+-#else
+-	move	$r7, $r3
+-#endif
+-	move    $r3, $r10
+-
+-	beqz    $r7, .Li17
+-	ori     $r2, $r2, #1
+-
+-.Li17:
+-	sltsi   $r15, $r2, #0
+-	bnezs8  .Li18
+-	slli    $r2, $r2, #1
+-	addi    $r3, $r3, #-1
+-.Li18:
+-	addi    $r8, $r5, #0xffffff82
+-	add     $r3, $r3, $r8
+-	addi    $r8, $r3, #-1
+-	slti    $r15, $r8, #0xfe
+-	beqzs8  .LFoveund
+-
+-.LFlab8:
+-	#ADD($r2, $0x80)
+-	move    $r15, #0x80
+-	add     $r2, $r2, $r15
+-	slt     $r15, $r2, $r15
+-
+-	#ADDC($r3, $0x0)
+-	add     $r3, $r3, $r15
+-	srli    $r8, $r2, #8
+-	andi    $r8, $r8, #1
+-	sub     $r2, $r2, $r8
+-	slli    $r2, $r2, #1
+-	srli    $r2, $r2, #9
+-	slli    $r8, $r3, #23
+-	or      $r2, $r2, $r8
+-	or      $r0, $r2, $r6
+-
+-.LF999:
+-	popm    $r6, $r10
+-	pop     $lp
+-	ret5    $lp
+-
+-.LFspecA:
+-	bnez    $r3, .Li19
+-	add     $r2, $r2, $r2
+-	beqz    $r2, .Li20
+-#ifdef __NDS32_PERF_EXT__
+-	clz	$r7, $r2
+-#else
+-	pushm	$r0, $r5
+-	move	$r0, $r2
+-	bal	__clzsi2
+-	move	$r7, $r0
+-	popm	$r0, $r5
+-#endif
+-	sub     $r3, $r3, $r7
+-	sll     $r2, $r2, $r7
+-	j       .LFlab1
+-.Li20:
+-	subri   $r15, $r5, #0xff
+-	beqzs8  .LFnan
+-	j       .LFzer
+-.Li19:
+-	add     $r8, $r2, $r2
+-	bnez    $r8, .LFnan
+-	bnez    $r5, .Li21
+-	add     $r8, $r4, $r4
+-	beqz    $r8, .LFnan
+-.Li21:
+-	subri   $r15, $r5, #0xff
+-	bnezs8  .LFinf
+-
+-.LFspecB:
+-	bnez    $r5, .Li22
+-	add     $r4, $r4, $r4
+-	beqz    $r4, .LFzer
+-#ifdef __NDS32_PERF_EXT__
+-	clz	$r7, $r4
+-#else
+-	pushm	$r0, $r5
+-	move	$r0, $r4
+-	bal	__clzsi2
+-	move	$r7, $r0
+-	popm	$r0, $r5
+-#endif
+-	sub     $r5, $r5, $r7
+-	sll     $r4, $r4, $r7
+-	j       .LFlab2
+-
+-.LFzer:
+-	move    $r0, $r6
+-	j       .LF999
+-.Li22:
+-	add     $r8, $r4, $r4
+-	bnez    $r8, .LFnan
+-
+-.LFinf:
+-	move    $r8, #0x7f800000
+-	or      $r0, $r6, $r8
+-	j       .LF999
+-
+-.LFnan:
+-	move    $r0, #0xffc00000
+-	j       .LF999
+-
+-.LFoveund:
+-	bgtz    $r3, .LFinf
+-	subri   $r7, $r3, #1
+-	slti    $r15, $r7, #0x20
+-	beqzs8  .LFzer
+-	subri   $r8, $r7, #0x20
+-	sll     $r3, $r2, $r8
+-	srl     $r2, $r2, $r7
+-	beqz    $r3, .Li25
+-	ori     $r2, $r2, #2
+-.Li25:
+-	move    $r3, #0
+-	addi    $r8, $r2, #0x80
+-	sltsi   $r15, $r8, #0
+-	beqzs8  .LFlab8
+-	move    $r3, #1
+-	j       .LFlab8
+-	.size	__mulsf3, .-__mulsf3
+-#endif /* L_mul_sf */
+-
+-
+-
+-#ifdef L_mul_df
+-
+-#ifndef __big_endian__
+-	#define P1L     $r0
+-	#define P1H     $r1
+-	#define P2L     $r2
+-	#define P2H     $r3
+-	#define P3L     $r4
+-	#define P3H     $r5
+-	#define O1L     $r7
+-	#define O1H	$r8
+-#else
+-	#define P1H     $r0
+-	#define P1L     $r1
+-	#define P2H     $r2
+-	#define P2L     $r3
+-	#define P3H     $r4
+-	#define P3L     $r5
+-	#define O1H     $r7
+-	#define O1L	$r8
+-#endif
+-	.text
+-	.align	2
+-	.global	__muldf3
+-	.type	__muldf3, @function
+-__muldf3:
+-	push    $lp
+-	pushm   $r6, $r10
+-
+-	slli    $r6, P1H, #1
+-	srli    $r6, $r6, #21
+-	slli    P3H, P1H, #11
+-	srli    $r10, P1L, #21
+-	or      P3H, P3H, $r10
+-	slli    P3L, P1L, #11
+-	move    O1L, #0x80000000
+-	or      P3H, P3H, O1L
+-	slli    $r9, P2H, #1
+-	srli    $r9, $r9, #21
+-	slli    O1H, P2H, #11
+-	srli    $r10, P2L, #21
+-	or      O1H, O1H, $r10
+-	or      O1H, O1H, O1L
+-	xor     P1H, P1H, P2H
+-	and     P1H, P1H, O1L
+-	slli    O1L, P2L, #11
+-
+-	addi    $r10, $r6, #-1
+-	slti    $r15, $r10, #0x7fe
+-	beqzs8  .LFspecA
+-
+-.LFlab1:
+-	addi    $r10, $r9, #-1
+-	slti    $r15, $r10, #0x7fe
+-	beqzs8  .LFspecB
+-
+-.LFlab2:
+-	addi    $r10, $r9, #0xfffffc02
+-	add     $r6, $r6, $r10
+-
+-	move    $r10, $r8
+-/* This is a 64-bit multiple. */
+-#ifndef __big_endian__
+-/* For little endian: ($r9, $r3) is (high, low). */
+-#ifndef __NDS32_ISA_V3M__
+-	mulr64	$r8, $r5, $r8
+-#else
+-	pushm	$r0, $r5
+-	move	$r0, $r5
+-	movi	$r1, #0
+-	move	$r2, $r8
+-	movi	$r3, #0
+-	bal	__muldi3
+-	movd44	$r8, $r0
+-	popm	$r0, $r5
+-#endif
+-	move    $r3, $r8
+-#else /* __big_endian__ */
+-/* For big endain: ($r9, $r2) is (high, low). */
+-#ifndef __NDS32_ISA_V3M__
+-	mulr64	$r8, $r4, $r7
+-#else
+-	pushm	$r0, $r5
+-	move	$r1, $r4
+-	movi	$r0, #0
+-	move	$r3, $r7
+-	movi	$r2, #0
+-	bal	__muldi3
+-	movd44	$r8, $r0
+-	popm	$r0, $r5
+-#endif
+-	move    $r2, $r9
+-	move    $r9, $r8
+-#endif /* __big_endian__ */
+-	move    $r8, $r10
+-
+-	move    $r10, P1H
+-/* This is a 64-bit multiple. */
+-#ifndef __big_endian__
+-/* For little endian: ($r0, $r2) is (high, low). */
+-#ifndef __NDS32_ISA_V3M__
+-	mulr64	$r0, $r4, $r8
+-#else
+-	pushm	$r2, $r5
+-	move	$r0, $r4
+-	movi	$r1, #0
+-	move	$r2, $r8
+-	movi	$r3, #0
+-	bal	__muldi3
+-	popm	$r2, $r5
+-#endif
+-	move    $r2, $r0
+-	move    $r0, $r1
+-#else /* __big_endian__ */
+-/* For big endain: ($r1, $r3) is (high, low). */
+-#ifndef __NDS32_ISA_V3M__
+-	mulr64	$r0, $r5, $r7
+-#else
+-	pushm	$r2, $r5
+-	move	$r1, $r5
+-	movi	$r0, #0
+-	move	$r3, $r7
+-	movi	$r2, #0
+-	bal	__muldi3
+-	popm	$r2, $r5
+-#endif
+-	move    $r3, $r1
+-	move    $r1, $r0
+-#endif /* __big_endian__ */
+-	move    P1H, $r10
+-
+-	#ADD(P2H, P1L)
+-	add     P2H, P2H, P1L
+-	slt     $r15, P2H, P1L
+-
+-	#ADDC($r9, $0x0)
+-	add     $r9, $r9, $r15
+-
+-	move    $r10, P1H
+-/* This is a 64-bit multiple. */
+-#ifndef __big_endian__
+-/* For little endian: ($r0, $r8) is (high, low). */
+-#ifndef __NDS32_ISA_V3M__
+-	mulr64	$r0, $r5, $r7
+-#else
+-	pushm	$r2, $r5
+-	move	$r0, $r5
+-	movi	$r1, #0
+-	move	$r2, $r7
+-	movi	$r3, #0
+-	bal	__muldi3
+-	popm	$r2, $r5
+-#endif
+-	move    $r8, $r0
+-	move    $r0, $r1
+-#else /* __big_endian__ */
+-/* For big endian: ($r1, $r7) is (high, low). */
+-#ifndef __NDS32_ISA_V3M__
+-	mulr64	$r0, $r4, $r8
+-#else
+-	pushm	$r2, $r5
+-	move	$r1, $r4
+-	movi	$r0, #0
+-	move	$r3, $r8
+-	movi	$r2, #0
+-	bal	__muldi3
+-	popm	$r2, $r5
+-#endif
+-	move	$r7, $r1
+-	move	$r1, $r0
+-#endif /* __big_endian__ */
+-	move    P1H, $r10
+-
+-	#ADD(P2L, O1H)
+-	add     P2L, P2L, O1H
+-	slt     $r15, P2L, O1H
+-
+-
+-	#ADDCC(P2H, P1L)
+-	beqzs8  .LL29
+-	add     P2H, P2H, P1L
+-	slt     $r15, P2H, P1L
+-	beqzs8  .LL30
+-	addi    P2H, P2H, #0x1
+-	j       .LL31
+-.LL30:
+-	move    $r15, #1
+-	add     P2H, P2H, $r15
+-	slt     $r15, P2H, $r15
+-	j       .LL31
+-.LL29:
+-	add     P2H, P2H, P1L
+-	slt     $r15, P2H, P1L
+-.LL31:
+-
+-	#ADDC($r9, $0x0)
+-	add     $r9, $r9, $r15
+-
+-/* This is a 64-bit multiple. */
+-#ifndef __big_endian__
+-/* For little endian: ($r8, $r0) is (high, low). */
+-	move    $r10, $r9
+-#ifndef __NDS32_ISA_V3M__
+-	mulr64	$r8, $r4, $r7
+-#else
+-	pushm	$r0, $r5
+-	move	$r0, $r4
+-	movi	$r1, #0
+-	move	$r2, $r7
+-	movi	$r3, #0
+-	bal	__muldi3
+-	movd44	$r8, $r0
+-	popm	$r0, $r5
+-#endif
+-	move    $r0, $r8
+-	move    $r8, $r9
+-	move    $r9, $r10
+-#else /* __big_endian__ */
+-/* For big endian: ($r7, $r1) is (high, low). */
+-	move	$r10, $r6
+-#ifndef __NDS32_ISA_V3M__
+-	mulr64	$r6, $r5, $r8
+-#else
+-	pushm	$r0, $r5
+-	move	$r1, $r5
+-	movi	$r0, #0
+-	move	$r3, $r8
+-	movi	$r2, #0
+-	bal	__muldi3
+-	movd44	$r6, $r0
+-	popm	$r0, $r5
+-#endif
+-	move	$r1, $r7
+-	move	$r7, $r6
+-	move	$r6, $r10
+-#endif /* __big_endian__ */
+-
+-	#ADD(P2L, O1H)
+-	add     P2L, P2L, O1H
+-	slt     $r15, P2L, O1H
+-
+-
+-	#ADDCC(P2H, $0x0)
+-	beqzs8  .LL34
+-	add     P2H, P2H, $r15
+-	slt     $r15, P2H, $r15
+-.LL34:
+-
+-	#ADDC($r9, $0x0)
+-	add     $r9, $r9, $r15
+-	or      $r10, P1L, P2L
+-	beqz    $r10, .Li13
+-	ori     P2H, P2H, #1
+-.Li13:
+-	move    P3H, $r9
+-	move    P3L, P2H
+-	sltsi   $r15, P3H, #0
+-	bnezs8  .Li14
+-
+-	move    $r15, P3L
+-	add     P3L, P3L, P3L
+-	slt     $r15, P3L, $r15
+-	add     P3H, P3H, P3H
+-	add     P3H, P3H, $r15
+-	addi    $r6, $r6, #-1
+-.Li14:
+-	addi    $r10, $r6, #-1
+-	slti    $r15, $r10, #0x7fe
+-	beqzs8  .LFoveund
+-
+-	#ADD(P3L, $0x400)
+-	move    $r15, #0x400
+-	add     P3L, P3L, $r15
+-	slt     $r15, P3L, $r15
+-
+-
+-	#ADDCC(P3H, $0x0)
+-	beqzs8  .LL37
+-	add     P3H, P3H, $r15
+-	slt     $r15, P3H, $r15
+-.LL37:
+-
+-	#ADDC($r6, $0x0)
+-	add     $r6, $r6, $r15
+-
+-.LFlab8:
+-	srli    $r10, P3L, #11
+-	andi    $r10, $r10, #1
+-	sub     P3L, P3L, $r10
+-	srli    P1L, P3L, #11
+-	slli    $r10, P3H, #21
+-	or      P1L, P1L, $r10
+-	slli    $r10, P3H, #1
+-	srli    $r10, $r10, #12
+-	or      P1H, P1H, $r10
+-	slli    $r10, $r6, #20
+-	or      P1H, P1H, $r10
+-
+-.LFret:
+-.LF999:
+-	popm    $r6, $r10
+-	pop     $lp
+-	ret5    $lp
+-
+-.LFspecA:
+-	#ADD(P3L, P3L)
+-	move    $r15, P3L
+-	add     P3L, P3L, P3L
+-	slt     $r15, P3L, $r15
+-
+-	#ADDC(P3H, P3H)
+-	add     P3H, P3H, P3H
+-	add     P3H, P3H, $r15
+-	bnez    $r6, .Li15
+-	or      $r10, P3H, P3L
+-	beqz    $r10, .Li16
+-
+-
+-	#NORMd($r4, P1L, P2H)
+-	bnez    P3H, .LL38
+-	bnez    P3L, .LL39
+-	move    $r6, #0
+-	j       .LL40
+-.LL39:
+-	move    P3H, P3L
+-	move    P3L, #0
+-	move    P1L, #32
+-	sub     $r6, $r6, P1L
+-.LL38:
+-#ifndef __big_endian__
+-#ifdef __NDS32_PERF_EXT__
+-	clz	$r0, P3H
+-#else
+-	pushm	$r1, P3H
+-	move	$r0, P3H
+-	bal	__clzsi2
+-	popm	$r1, $r5
+-#endif
+-#else /* __big_endian__ */
+-#ifdef __NDS32_PERF_EXT__
+-	clz	$r1, $r4
+-#else
+-	push	$r0
+-	pushm	$r2, $r5
+-	move	$r0, $r4
+-	bal	__clzsi2
+-	move	$r1, $r0
+-	popm	$r2, $r5
+-	pop	$r0
+-#endif
+-#endif /* __big_endian__ */
+-	beqz    P1L, .LL40
+-	sub     $r6, $r6, P1L
+-	subri   P2H, P1L, #32
+-	srl     P2H, P3L, P2H
+-	sll     P3L, P3L, P1L
+-	sll     P3H, P3H, P1L
+-	or      P3H, P3H, P2H
+-.LL40:
+-	#NORMd End
+-
+-	j       .LFlab1
+-.Li16:
+-	subri   $r15, $r9, #0x7ff
+-	beqzs8  .LFnan
+-	j       .LFret
+-.Li15:
+-	or      $r10, P3H, P3L
+-	bnez    $r10, .LFnan
+-	bnez    $r9, .Li17
+-	slli    $r10, O1H, #1
+-	or      $r10, $r10, O1L
+-	beqz    $r10, .LFnan
+-.Li17:
+-	subri   $r15, $r9, #0x7ff
+-	bnezs8  .LFinf
+-
+-.LFspecB:
+-	#ADD(O1L, O1L)
+-	move    $r15, O1L
+-	add     O1L, O1L, O1L
+-	slt     $r15, O1L, $r15
+-
+-	#ADDC(O1H, O1H)
+-	add     O1H, O1H, O1H
+-	add     O1H, O1H, $r15
+-	bnez    $r9, .Li18
+-	or      $r10, O1H, O1L
+-	beqz    $r10, .Li19
+-
+-
+-	#NORMd($r7, P2L, P1L)
+-	bnez    O1H, .LL41
+-	bnez    O1L, .LL42
+-	move    $r9, #0
+-	j       .LL43
+-.LL42:
+-	move    O1H, O1L
+-	move    O1L, #0
+-	move    P2L, #32
+-	sub     $r9, $r9, P2L
+-.LL41:
+-#ifndef __big_endian__
+-#ifdef __NDS32_PERF_EXT__
+-	clz	$r2, $r8
+-#else
+-	pushm	$r0, $r1
+-	pushm	$r3, $r5
+-	move	$r0, $r8
+-	bal	__clzsi2
+-	move	$r2, $r0
+-	popm	$r3, $r5
+-	popm	$r0, $r1
+-#endif
+-#else /* __big_endian__ */
+-#ifdef __NDS32_PERF_EXT__
+-	clz	$r3, $r7
+-#else
+-	pushm	$r0, $r2
+-	pushm	$r4, $r5
+-	move	$r0, $r7
+-	bal	__clzsi2
+-	move	$r3, $r0
+-	popm	$r4, $r5
+-	popm	$r0, $r2
+-#endif
+-#endif /* __big_endian__ */
+-	beqz    P2L, .LL43
+-	sub     $r9, $r9, P2L
+-	subri   P1L, P2L, #32
+-	srl     P1L, O1L, P1L
+-	sll     O1L, O1L, P2L
+-	sll     O1H, O1H, P2L
+-	or      O1H, O1H, P1L
+-.LL43:
+-	#NORMd End
+-
+-	j       .LFlab2
+-.Li19:
+-	move    P1L, #0
+-	j       .LFret
+-.Li18:
+-	or      $r10, O1H, O1L
+-	bnez    $r10, .LFnan
+-
+-.LFinf:
+-	move    $r10, #0x7ff00000
+-	or      P1H, P1H, $r10
+-	move    P1L, #0
+-	j       .LFret
+-
+-.LFnan:
+-	move    P1H, #0xfff80000
+-	move    P1L, #0
+-	j       .LFret
+-
+-.LFoveund:
+-	bgtz    $r6, .LFinf
+-	subri   P1L, $r6, #1
+-	move    P2L, #0
+-.LL44:
+-	move    $r10, #0x20
+-	slt     $r15, P1L, $r10
+-	bnezs8  .LL45
+-	or      P2L, P2L, P3L
+-	move    P3L, P3H
+-	move    P3H, #0
+-	addi    P1L, P1L, #0xffffffe0
+-	bnez    P3L, .LL44
+-.LL45:
+-	beqz    P1L, .LL46
+-	move    P2H, P3H
+-	move    $r10, P3L
+-	srl     P3L, P3L, P1L
+-	srl     P3H, P3H, P1L
+-	subri   P1L, P1L, #0x20
+-	sll     P2H, P2H, P1L
+-	or      P3L, P3L, P2H
+-	sll     $r10, $r10, P1L
+-	or      P2L, P2L, $r10
+-	beqz    P2L, .LL46
+-	ori     P3L, P3L, #1
+-.LL46:
+-	#ADD(P3L, $0x400)
+-	move    $r15, #0x400
+-	add     P3L, P3L, $r15
+-	slt     $r15, P3L, $r15
+-
+-	#ADDC(P3H, $0x0)
+-	add     P3H, P3H, $r15
+-	srli    $r6, P3H, #31
+-	j       .LFlab8
+-	.size __muldf3, .-__muldf3
+-#endif /* L_mul_df */
+-
+-
+-
+-#ifdef L_div_sf
+-
+-	.text
+-	.align	2
+-	.global	__divsf3
+-	.type	__divsf3, @function
+-__divsf3:
+-	push    $lp
+-	pushm   $r6, $r10
+-
+-	move    $r7, #0x80000000
+-	srli    $r4, $r0, #23
+-	andi    $r4, $r4, #0xff
+-	srli    $r6, $r1, #23
+-	andi    $r6, $r6, #0xff
+-	slli    $r3, $r0, #8
+-	or      $r3, $r3, $r7
+-	slli    $r5, $r1, #8
+-	or      $r5, $r5, $r7
+-	xor     $r10, $r0, $r1
+-	and     $r7, $r7, $r10
+-
+-	addi    $r10, $r4, #-1
+-	slti    $r15, $r10, #0xfe
+-	beqzs8  .LGspecA
+-
+-.LGlab1:
+-	addi    $r10, $r6, #-1
+-	slti    $r15, $r10, #0xfe
+-	beqzs8  .LGspecB
+-
+-.LGlab2:
+-	slt     $r15, $r3, $r5
+-	bnezs8  .Li27
+-	srli    $r3, $r3, #1
+-	addi    $r4, $r4, #1
+-.Li27:
+-	srli    $r8, $r5, #14
+-	divr    $r0, $r2, $r3, $r8
+-	andi    $r9, $r5, #0x3fff
+-	mul     $r1, $r9, $r0
+-	slli    $r2, $r2, #14
+-
+-	#SUB($r2, $r1)
+-	move    $r15, $r2
+-	sub     $r2, $r2, $r1
+-	slt     $r15, $r15, $r2
+-	beqzs8  .Li28
+-	addi    $r0, $r0, #-1
+-
+-	#ADD($r2, $r5)
+-	add     $r2, $r2, $r5
+-	slt     $r15, $r2, $r5
+-.Li28:
+-	divr    $r3, $r2, $r2, $r8
+-	mul     $r1, $r9, $r3
+-	slli    $r2, $r2, #14
+-
+-	#SUB($r2, $r1)
+-	move    $r15, $r2
+-	sub     $r2, $r2, $r1
+-	slt     $r15, $r15, $r2
+-	beqzs8  .Li29
+-	addi    $r3, $r3, #-1
+-
+-	#ADD($r2, $r5)
+-	add     $r2, $r2, $r5
+-	slt     $r15, $r2, $r5
+-.Li29:
+-	slli    $r10, $r0, #14
+-	add     $r3, $r3, $r10
+-	slli    $r3, $r3, #4
+-	beqz    $r2, .Li30
+-	ori     $r3, $r3, #1
+-.Li30:
+-	subri   $r10, $r6, #0x7e
+-	add     $r4, $r4, $r10
+-	addi    $r10, $r4, #-1
+-	slti    $r15, $r10, #0xfe
+-	beqzs8  .LGoveund
+-
+-.LGlab8:
+-	#ADD($r3, $0x80)
+-	move    $r15, #0x80
+-	add     $r3, $r3, $r15
+-	slt     $r15, $r3, $r15
+-
+-	#ADDC($r4, $0x0)
+-	add     $r4, $r4, $r15
+-	srli    $r10, $r3, #8
+-	andi    $r10, $r10, #1
+-	sub     $r3, $r3, $r10
+-	slli    $r3, $r3, #1
+-	srli    $r3, $r3, #9
+-	slli    $r10, $r4, #23
+-	or      $r3, $r3, $r10
+-	or      $r0, $r3, $r7
+-
+-.LG999:
+-	popm    $r6, $r10
+-	pop     $lp
+-	ret5    $lp
+-
+-.LGspecA:
+-	bnez    $r4, .Li31
+-	add     $r3, $r3, $r3
+-	beqz    $r3, .Li31
+-#ifdef __NDS32_PERF_EXT__
+-	clz	$r8, $r3
+-#else
+-	pushm	$r0, $r5
+-	move	$r0, $r3
+-	bal	__clzsi2
+-	move	$r8, $r0
+-	popm	$r0, $r5
+-#endif
+-	sub     $r4, $r4, $r8
+-	sll     $r3, $r3, $r8
+-	j       .LGlab1
+-.Li31:
+-	bne     $r6, $r4, .Li33
+-	add     $r10, $r5, $r5
+-	beqz    $r10, .LGnan
+-.Li33:
+-	subri   $r15, $r6, #0xff
+-	beqzs8  .LGspecB
+-	beqz    $r4, .LGzer
+-	add     $r10, $r3, $r3
+-	bnez    $r10, .LGnan
+-	j       .LGinf
+-
+-.LGspecB:
+-	bnez    $r6, .Li34
+-	add     $r5, $r5, $r5
+-	beqz    $r5, .LGinf
+-#ifdef __NDS32_PERF_EXT__
+-	clz	$r8, $r5
+-#else
+-	pushm	$r0, $r5
+-	move	$r0, $r5
+-	bal	__clzsi2
+-	move	$r8, $r0
+-	popm	$r0, $r5
+-#endif
+-	sub     $r6, $r6, $r8
+-	sll     $r5, $r5, $r8
+-	j       .LGlab2
+-.Li34:
+-	add     $r10, $r5, $r5
+-	bnez    $r10, .LGnan
+-
+-.LGzer:
+-	move    $r0, $r7
+-	j       .LG999
+-
+-.LGoveund:
+-	bgtz    $r4, .LGinf
+-	subri   $r8, $r4, #1
+-	slti    $r15, $r8, #0x20
+-	beqzs8  .LGzer
+-	subri   $r10, $r8, #0x20
+-	sll     $r4, $r3, $r10
+-	srl     $r3, $r3, $r8
+-	beqz    $r4, .Li37
+-	ori     $r3, $r3, #2
+-.Li37:
+-	move    $r4, #0
+-	addi    $r10, $r3, #0x80
+-	sltsi   $r15, $r10, #0
+-	beqzs8  .LGlab8
+-	move    $r4, #1
+-	j       .LGlab8
+-
+-.LGinf:
+-	move    $r10, #0x7f800000
+-	or      $r0, $r7, $r10
+-	j       .LG999
+-
+-.LGnan:
+-	move    $r0, #0xffc00000
+-	j       .LG999
+-	.size	__divsf3, .-__divsf3
+-#endif /* L_div_sf */
+-
+-
+-
+-#ifdef L_div_df
+-
+-#ifndef __big_endian__
+-	#define P1L     $r0
+-	#define P1H     $r1
+-	#define P2L     $r2
+-	#define P2H     $r3
+-	#define P3L     $r4
+-	#define P3H     $r5
+-	#define O1L     $r7
+-	#define O1H	$r8
+-#else
+-	#define P1H     $r0
+-	#define P1L     $r1
+-	#define P2H     $r2
+-	#define P2L     $r3
+-	#define P3H     $r4
+-	#define P3L     $r5
+-	#define O1H     $r7
+-	#define O1L	$r8
+-#endif
+-	.text
+-	.align	2
+-	.global	__divdf3
+-	.type	__divdf3, @function
+-__divdf3:
+-	push    $lp
+-	pushm   $r6, $r10
+-
+-	slli    $r6, P1H, #1
+-	srli    $r6, $r6, #21
+-	slli    P3H, P1H, #11
+-	srli    $r10, P1L, #21
+-	or      P3H, P3H, $r10
+-	slli    P3L, P1L, #11
+-	move    O1L, #0x80000000
+-	or      P3H, P3H, O1L
+-	slli    $r9, P2H, #1
+-	srli    $r9, $r9, #21
+-	slli    O1H, P2H, #11
+-	srli    $r10, P2L, #21
+-	or      O1H, O1H, $r10
+-	or      O1H, O1H, O1L
+-	xor     P1H, P1H, P2H
+-	and     P1H, P1H, O1L
+-	slli    O1L, P2L, #11
+-
+-	addi    $r10, $r6, #-1
+-	slti    $r15, $r10, #0x7fe
+-	beqzs8  .LGspecA
+-
+-.LGlab1:
+-	addi    $r10, $r9, #-1
+-	slti    $r15, $r10, #0x7fe
+-	beqzs8  .LGspecB
+-
+-.LGlab2:
+-	sub     $r6, $r6, $r9
+-	addi    $r6, $r6, #0x3ff
+-	srli    P3L, P3L, #1
+-	slli    $r10, P3H, #31
+-	or      P3L, P3L, $r10
+-	srli    P3H, P3H, #1
+-	srli    $r9, O1H, #16
+-	divr    P2H, P3H, P3H, $r9
+-	move    $r10, #0xffff
+-	and     P2L, O1H, $r10
+-	mul     P1L, P2L, P2H
+-	slli    P3H, P3H, #16
+-	srli    $r10, P3L, #16
+-	or      P3H, P3H, $r10
+-
+-	#SUB(P3H, P1L)
+-	move    $r15, P3H
+-	sub     P3H, P3H, P1L
+-	slt     $r15, $r15, P3H
+-	beqzs8  .Li20
+-
+-.Lb21:
+-	addi    P2H, P2H, #-1
+-	add     P3H, P3H, O1H
+-	slt     $r15, P3H, O1H
+-	beqzs8  .Lb21
+-.Li20:
+-	divr    $r9, P3H, P3H, $r9
+-	mul     P1L, P2L, $r9
+-	slli    P3H, P3H, #16
+-	move    $r15, #0xffff
+-	and     $r10, P3L, $r15
+-	or      P3H, P3H, $r10
+-
+-	#SUB(P3H, P1L)
+-	move    $r15, P3H
+-	sub     P3H, P3H, P1L
+-	slt     $r15, $r15, P3H
+-	beqzs8  .Li22
+-
+-.Lb23:
+-	addi    $r9, $r9, #-1
+-	add     P3H, P3H, O1H
+-	slt     $r15, P3H, O1H
+-	beqzs8  .Lb23
+-.Li22:
+-	slli    P2H, P2H, #16
+-	add     P2H, P2H, $r9
+-
+-/* This is a 64-bit multiple. */
+-#ifndef __big_endian__
+-/* For little endian: ($r0, $r9) is (high, low). */
+-	move    $r10, $r1
+-#ifndef __NDS32_ISA_V3M__
+-	mulr64	$r0, $r3, $r7
+-#else
+-	pushm	$r2, $r5
+-	move	$r0, $r3
+-	movi	$r1, #0
+-	move	$r2, $r7
+-	movi	$r3, #0
+-	bal	__muldi3
+-	popm	$r2, $r5
+-#endif
+-	move    $r9, $r0
+-	move    $r0, $r1
+-	move    $r1, $r10
+-#else /* __big_endian__ */
+-/* For big endian: ($r1, $r9) is (high, low). */
+-	move    $r10, $r0
+-#ifndef __NDS32_ISA_V3M__
+-	mulr64	$r0, $r2, $r8
+-#else
+-	pushm	$r2, $r5
+-	move	$r1, $r2
+-	movi	$r0, #0
+-	move	$r3, $r8
+-	movi	$r2, #0
+-	bal	__muldi3
+-	popm	$r2, $r5
+-#endif
+-	move    $r9, $r1
+-	move    $r1, $r0
+-	move    $r0, $r10
+-#endif /* __big_endian__ */
+-
+-	move    P3L, #0
+-
+-	#SUB(P3L, $r9)
+-	move    $r15, P3L
+-	sub     P3L, P3L, $r9
+-	slt     $r15, $r15, P3L
+-
+-
+-	#SUBCC(P3H, P1L)
+-	beqzs8  .LL47
+-	move    $r15, P3H
+-	sub     P3H, P3H, P1L
+-	slt     $r15, $r15, P3H
+-	beqzs8  .LL48
+-	subi333 P3H, P3H, #1
+-	j       .LL49
+-.LL48:
+-	move    $r15, P3H
+-	subi333 P3H, P3H, #1
+-	slt     $r15, $r15, P3H
+-	j       .LL49
+-.LL47:
+-	move    $r15, P3H
+-	sub     P3H, P3H, P1L
+-	slt     $r15, $r15, P3H
+-.LL49:
+-
+-	beqzs8  .Li24
+-
+-.LGlab3:
+-	addi    P2H, P2H, #-1
+-
+-	#ADD(P3L, O1L)
+-	add     P3L, P3L, O1L
+-	slt     $r15, P3L, O1L
+-
+-
+-	#ADDCC(P3H, O1H)
+-	beqzs8  .LL50
+-	add     P3H, P3H, O1H
+-	slt     $r15, P3H, O1H
+-	beqzs8  .LL51
+-	addi    P3H, P3H, #0x1
+-	j       .LL52
+-.LL51:
+-	move    $r15, #1
+-	add     P3H, P3H, $r15
+-	slt     $r15, P3H, $r15
+-	j       .LL52
+-.LL50:
+-	add     P3H, P3H, O1H
+-	slt     $r15, P3H, O1H
+-.LL52:
+-
+-	beqzs8  .LGlab3
+-.Li24:
+-	bne     P3H, O1H, .Li25
+-	move    P1L, O1L
+-	move    P3H, P3L
+-	move    $r9, #0
+-	move    P2L, $r9
+-	j       .Le25
+-.Li25:
+-	srli    P2L, O1H, #16
+-	divr    $r9, P3H, P3H, P2L
+-	move    $r10, #0xffff
+-	and     $r10, O1H, $r10
+-	mul     P1L, $r10, $r9
+-	slli    P3H, P3H, #16
+-	srli    $r15, P3L, #16
+-	or      P3H, P3H, $r15
+-
+-	#SUB(P3H, P1L)
+-	move    $r15, P3H
+-	sub     P3H, P3H, P1L
+-	slt     $r15, $r15, P3H
+-	beqzs8  .Li26
+-
+-.Lb27:
+-	addi    $r9, $r9, #-1
+-	add     P3H, P3H, O1H
+-	slt     $r15, P3H, O1H
+-	beqzs8  .Lb27
+-.Li26:
+-	divr    P2L, P3H, P3H, P2L
+-	mul     P1L, $r10, P2L
+-	slli    P3H, P3H, #16
+-	move    $r10, #0xffff
+-	and     $r10, P3L, $r10
+-	or      P3H, P3H, $r10
+-
+-	#SUB(P3H, P1L)
+-	move    $r15, P3H
+-	sub     P3H, P3H, P1L
+-	slt     $r15, $r15, P3H
+-	beqzs8  .Li28
+-
+-.Lb29:
+-	addi    P2L, P2L, #-1
+-	add     P3H, P3H, O1H
+-	slt     $r15, P3H, O1H
+-	beqzs8  .Lb29
+-.Li28:
+-	slli    $r9, $r9, #16
+-	add     $r9, $r9, P2L
+-
+-/* This is a 64-bit multiple. */
+-#ifndef __big_endian__
+-/* For little endian: ($r0, $r2) is (high, low). */
+-	move    $r10, $r1
+-#ifndef __NDS32_ISA_V3M__
+-	mulr64	$r0, $r9, $r7
+-#else
+-	pushm	$r2, $r5
+-	move	$r0, $r9
+-	movi	$r1, #0
+-	move	$r2, $r7
+-	movi	$r3, #0
+-	bal	__muldi3
+-	popm	$r2, $r5
+-#endif
+-	move    $r2, $r0
+-	move    $r0, $r1
+-	move    $r1, $r10
+-#else /* __big_endian__ */
+-/* For big endian: ($r1, $r3) is (high, low). */
+-	move	$r10, $r0
+-#ifndef __NDS32_ISA_V3M__
+-	mulr64	$r0, $r9, $r8
+-#else
+-	pushm	$r2, $r5
+-	move	$r0, $r9
+-	movi	$r1, #0
+-	move	$r2, $r7
+-	movi	$r3, #0
+-	bal	__muldi3
+-	popm	$r2, $r5
+-#endif
+-	move	$r3, $r1
+-	move	$r1, $r0
+-	move	$r0, $r10
+-#endif /* __big_endian__ */
+-
+-.Le25:
+-	move    P3L, #0
+-
+-	#SUB(P3L, P2L)
+-	move    $r15, P3L
+-	sub     P3L, P3L, P2L
+-	slt     $r15, $r15, P3L
+-
+-
+-	#SUBCC(P3H, P1L)
+-	beqzs8  .LL53
+-	move    $r15, P3H
+-	sub     P3H, P3H, P1L
+-	slt     $r15, $r15, P3H
+-	beqzs8  .LL54
+-	subi333 P3H, P3H, #1
+-	j       .LL55
+-.LL54:
+-	move    $r15, P3H
+-	subi333 P3H, P3H, #1
+-	slt     $r15, $r15, P3H
+-	j       .LL55
+-.LL53:
+-	move    $r15, P3H
+-	sub     P3H, P3H, P1L
+-	slt     $r15, $r15, P3H
+-.LL55:
+-
+-	beqzs8  .Li30
+-
+-.LGlab4:
+-	addi    $r9, $r9, #-1
+-
+-	#ADD(P3L, O1L)
+-	add     P3L, P3L, O1L
+-	slt     $r15, P3L, O1L
+-
+-
+-	#ADDCC(P3H, O1H)
+-	beqzs8  .LL56
+-	add     P3H, P3H, O1H
+-	slt     $r15, P3H, O1H
+-	beqzs8  .LL57
+-	addi    P3H, P3H, #0x1
+-	j       .LL58
+-.LL57:
+-	move    $r15, #1
+-	add     P3H, P3H, $r15
+-	slt     $r15, P3H, $r15
+-	j       .LL58
+-.LL56:
+-	add     P3H, P3H, O1H
+-	slt     $r15, P3H, O1H
+-.LL58:
+-
+-	beqzs8  .LGlab4
+-.Li30:
+-	sltsi   $r15, P2H, #0
+-	bnezs8  .Li31
+-
+-	#ADD($r9, $r9)
+-	move    $r15, $r9
+-	add     $r9, $r9, $r9
+-	slt     $r15, $r9, $r15
+-
+-	#ADDC(P2H, P2H)
+-	add     P2H, P2H, P2H
+-	add     P2H, P2H, $r15
+-	addi    $r6, $r6, #-1
+-.Li31:
+-	or      $r10, P3H, P3L
+-	beqz    $r10, .Li32
+-	ori     $r9, $r9, #1
+-.Li32:
+-	move    P3H, P2H
+-	move    P3L, $r9
+-	addi    $r10, $r6, #-1
+-	slti    $r15, $r10, #0x7fe
+-	beqzs8  .LGoveund
+-
+-	#ADD(P3L, $0x400)
+-	move    $r15, #0x400
+-	add     P3L, P3L, $r15
+-	slt     $r15, P3L, $r15
+-
+-
+-	#ADDCC(P3H, $0x0)
+-	beqzs8  .LL61
+-	add     P3H, P3H, $r15
+-	slt     $r15, P3H, $r15
+-.LL61:
+-
+-	#ADDC($r6, $0x0)
+-	add     $r6, $r6, $r15
+-
+-.LGlab8:
+-	srli    $r10, P3L, #11
+-	andi    $r10, $r10, #1
+-	sub     P3L, P3L, $r10
+-	srli    P1L, P3L, #11
+-	slli    $r10, P3H, #21
+-	or      P1L, P1L, $r10
+-	slli    $r10, P3H, #1
+-	srli    $r10, $r10, #12
+-	or      P1H, P1H, $r10
+-	slli    $r10, $r6, #20
+-	or      P1H, P1H, $r10
+-
+-.LGret:
+-.LG999:
+-	popm    $r6, $r10
+-	pop     $lp
+-	ret5    $lp
+-
+-.LGoveund:
+-	bgtz    $r6, .LGinf
+-	subri   P2H, $r6, #1
+-	move    P1L, #0
+-.LL62:
+-	move    $r10, #0x20
+-	slt     $r15, P2H, $r10
+-	bnezs8  .LL63
+-	or      P1L, P1L, P3L
+-	move    P3L, P3H
+-	move    P3H, #0
+-	addi    P2H, P2H, #0xffffffe0
+-	bnez    P3L, .LL62
+-.LL63:
+-	beqz    P2H, .LL64
+-	move    P2L, P3H
+-	move    $r10, P3L
+-	srl     P3L, P3L, P2H
+-	srl     P3H, P3H, P2H
+-	subri   P2H, P2H, #0x20
+-	sll     P2L, P2L, P2H
+-	or      P3L, P3L, P2L
+-	sll     $r10, $r10, P2H
+-	or      P1L, P1L, $r10
+-	beqz    P1L, .LL64
+-	ori     P3L, P3L, #1
+-.LL64:
+-	#ADD(P3L, $0x400)
+-	move    $r15, #0x400
+-	add     P3L, P3L, $r15
+-	slt     $r15, P3L, $r15
+-
+-	#ADDC(P3H, $0x0)
+-	add     P3H, P3H, $r15
+-	srli    $r6, P3H, #31
+-	j       .LGlab8
+-
+-.LGspecA:
+-	#ADD(P3L, P3L)
+-	move    $r15, P3L
+-	add     P3L, P3L, P3L
+-	slt     $r15, P3L, $r15
+-
+-	#ADDC(P3H, P3H)
+-	add     P3H, P3H, P3H
+-	add     P3H, P3H, $r15
+-	bnez    $r6, .Li33
+-	or      $r10, P3H, P3L
+-	beqz    $r10, .Li33
+-
+-
+-	#NORMd($r4, P2H, P2L)
+-	bnez    P3H, .LL65
+-	bnez    P3L, .LL66
+-	move    $r6, #0
+-	j       .LL67
+-.LL66:
+-	move    P3H, P3L
+-	move    P3L, #0
+-	move    P2H, #32
+-	sub     $r6, $r6, P2H
+-.LL65:
+-#ifndef __big_endian__
+-#ifdef __NDS32_PERF_EXT__
+-	clz	$r3, $r5
+-#else
+-	pushm	$r0, $r2
+-	pushm	$r4, $r5
+-	move	$r0, $r5
+-	bal	__clzsi2
+-	move	$r3, $r0
+-	popm	$r4, $r5
+-	popm	$r0, $r2
+-#endif
+-#else /* __big_endian__ */
+-#ifdef __NDS32_PERF_EXT__
+-	clz	$r2, $r4
+-#else
+-	pushm	$r0, $r1
+-	pushm	$r3, $r5
+-	move	$r0, $r4
+-	bal	__clzsi2
+-	move	$r2, $r0
+-	popm	$r3, $r5
+-	popm	$r0, $r1
+-#endif
+-#endif /* __big_endian_ */
+-	beqz    P2H, .LL67
+-	sub     $r6, $r6, P2H
+-	subri   P2L, P2H, #32
+-	srl     P2L, P3L, P2L
+-	sll     P3L, P3L, P2H
+-	sll     P3H, P3H, P2H
+-	or      P3H, P3H, P2L
+-.LL67:
+-	#NORMd End
+-
+-	j       .LGlab1
+-.Li33:
+-	bne     $r6, $r9, .Li35
+-	slli    $r10, O1H, #1
+-	or      $r10, $r10, O1L
+-	beqz    $r10, .LGnan
+-.Li35:
+-	subri   $r15, $r9, #0x7ff
+-	beqzs8  .LGspecB
+-	beqz    $r6, .LGret
+-	or      $r10, P3H, P3L
+-	bnez    $r10, .LGnan
+-
+-.LGinf:
+-	move    $r10, #0x7ff00000
+-	or      P1H, P1H, $r10
+-	move    P1L, #0
+-	j       .LGret
+-
+-.LGspecB:
+-	#ADD(O1L, O1L)
+-	move    $r15, O1L
+-	add     O1L, O1L, O1L
+-	slt     $r15, O1L, $r15
+-
+-	#ADDC(O1H, O1H)
+-	add     O1H, O1H, O1H
+-	add     O1H, O1H, $r15
+-	bnez    $r9, .Li36
+-	or      $r10, O1H, O1L
+-	beqz    $r10, .LGinf
+-
+-
+-	#NORMd($r7, P2H, P2L)
+-	bnez    O1H, .LL68
+-	bnez    O1L, .LL69
+-	move    $r9, #0
+-	j       .LL70
+-.LL69:
+-	move    O1H, O1L
+-	move    O1L, #0
+-	move    P2H, #32
+-	sub     $r9, $r9, P2H
+-.LL68:
+-#ifndef __big_endian__
+-#ifdef __NDS32_PERF_EXT__
+-	clz	$r3, $r8
+-#else
+-	pushm	$r0, $r2
+-	pushm	$r4, $r5
+-	move	$r0, $r8
+-	bal	__clzsi2
+-	move	$r3, $r0
+-	popm	$r4, $r5
+-	popm	$r0, $r2
+-#endif
+-#else /* __big_endian__ */
+-#ifdef __NDS32_PERF_EXT__
+-	clz	$r2, $r7
+-#else
+-	pushm	$r0, $r1
+-	pushm	$r3, $r5
+-	move	$r0, $r7
+-	bal	__clzsi2
+-	move	$r2, $r0
+-	popm	$r3, $r5
+-	popm	$r0, $r1
+-#endif
+-#endif /* __big_endian__ */
+-	beqz    P2H, .LL70
+-	sub     $r9, $r9, P2H
+-	subri   P2L, P2H, #32
+-	srl     P2L, O1L, P2L
+-	sll     O1L, O1L, P2H
+-	sll     O1H, O1H, P2H
+-	or      O1H, O1H, P2L
+-.LL70:
+-	#NORMd End
+-
+-	j       .LGlab2
+-.Li36:
+-	or      $r10, O1H, O1L
+-	beqz    $r10, .Li38
+-
+-.LGnan:
+-	move    P1H, #0xfff80000
+-.Li38:
+-	move    P1L, #0
+-	j       .LGret
+-	.size __divdf3, .-__divdf3
+-#endif /* L_div_df */
+-
+-
+-
+-#ifdef L_negate_sf
+-
+-	.text
+-	.align	2
+-	.global	__negsf2
+-	.type	__negsf2, @function
+-__negsf2:
+-	push    $lp
+-
+-	move    $r1, #0x80000000
+-	xor     $r0, $r0, $r1
+-
+-.LN999:
+-	pop     $lp
+-	ret5    $lp
+-	.size __negsf2, .-__negsf2
+-#endif /* L_negate_sf */
+-
+-
+-
+-#ifdef L_negate_df
+-
+-#ifndef __big_endian__
+-	#define P1H     $r1
+-#else
+-	#define P1H     $r0
+-#endif
+-	.text
+-	.align	2
+-	.global	__negdf2
+-	.type	__negdf2, @function
+-__negdf2:
+-	push    $lp
+-
+-	move    $r2, #0x80000000
+-	xor     P1H, P1H, $r2
+-
+-.LP999:
+-	pop     $lp
+-	ret5    $lp
+-	.size __negdf2, .-__negdf2
+-#endif /* L_negate_df */
+-
+-
+-
+-#ifdef L_sf_to_df
+-
+-#ifndef __big_endian__
+-	#define O1L     $r1
+-	#define O1H     $r2
+-#else
+-	#define O1H     $r1
+-	#define O1L     $r2
+-#endif
+-	.text
+-	.align	2
+-	.global	__extendsfdf2
+-	.type	__extendsfdf2, @function
+-__extendsfdf2:
+-	push    $lp
+-
+-	srli    $r3, $r0, #23
+-	andi    $r3, $r3, #0xff
+-	move    $r5, #0x80000000
+-	and     O1H, $r0, $r5
+-	addi    $r5, $r3, #-1
+-	slti    $r15, $r5, #0xfe
+-	beqzs8  .LJspec
+-
+-.LJlab1:
+-	addi    $r3, $r3, #0x380
+-	slli    $r5, $r0, #9
+-	srli    $r5, $r5, #12
+-	or      O1H, O1H, $r5
+-	slli    O1L, $r0, #29
+-
+-.LJret:
+-	slli    $r5, $r3, #20
+-	or      O1H, O1H, $r5
+-	move    $r0, $r1
+-	move    $r1, $r2
+-
+-.LJ999:
+-	pop     $lp
+-	ret5    $lp
+-
+-.LJspec:
+-	move    O1L, #0
+-	add     $r0, $r0, $r0
+-	beqz    $r0, .LJret
+-	bnez    $r3, .Li42
+-
+-.Lb43:
+-	addi    $r3, $r3, #-1
+-	add     $r0, $r0, $r0
+-	move    $r5, #0x800000
+-	slt     $r15, $r0, $r5
+-	bnezs8  .Lb43
+-	j       .LJlab1
+-.Li42:
+-	move    $r3, #0x7ff
+-	move    $r5, #0xff000000
+-	slt     $r15, $r5, $r0
+-	beqzs8  .LJret
+-	move    O1H, #0xfff80000
+-	j       .LJret
+-	.size __extendsfdf2, .-__extendsfdf2
+-#endif /* L_sf_to_df */
+-
+-
+-
+-#ifdef L_df_to_sf
+-
+-#ifndef __big_endian__
+-	#define P1L     $r0
+-	#define P1H     $r1
+-	#define P2L     $r2
+-	#define P2H     $r3
+-#else
+-	#define P1H     $r0
+-	#define P1L     $r1
+-	#define P2H     $r2
+-	#define P2L     $r3
+-#endif
+-	.text
+-	.align	2
+-	.global	__truncdfsf2
+-	.type	__truncdfsf2, @function
+-__truncdfsf2:
+-	push    $lp
+-	pushm   $r6, $r8
+-
+-	slli    P2H, P1H, #11
+-	srli    $r7, P1L, #21
+-	or      P2H, P2H, $r7
+-	slli    P2L, P1L, #11
+-	move    $r7, #0x80000000
+-	or      P2H, P2H, $r7
+-	and     $r5, P1H, $r7
+-	slli    $r4, P1H, #1
+-	srli    $r4, $r4, #21
+-	addi    $r4, $r4, #0xfffffc80
+-	addi    $r7, $r4, #-1
+-	slti    $r15, $r7, #0xfe
+-	beqzs8  .LKspec
+-
+-.LKlab1:
+-	beqz    P2L, .Li45
+-	ori     P2H, P2H, #1
+-.Li45:
+-	#ADD(P2H, $0x80)
+-	move    $r15, #0x80
+-	add     P2H, P2H, $r15
+-	slt     $r15, P2H, $r15
+-
+-	#ADDC($r4, $0x0)
+-	add     $r4, $r4, $r15
+-	srli    $r7, P2H, #8
+-	andi    $r7, $r7, #1
+-	sub     P2H, P2H, $r7
+-	slli    P2H, P2H, #1
+-	srli    P2H, P2H, #9
+-	slli    $r7, $r4, #23
+-	or      P2H, P2H, $r7
+-	or      $r0, P2H, $r5
+-
+-.LK999:
+-	popm    $r6, $r8
+-	pop     $lp
+-	ret5    $lp
+-
+-.LKspec:
+-	subri   $r15, $r4, #0x47f
+-	bnezs8  .Li46
+-	slli    $r7, P2H, #1
+-	or      $r7, $r7, P2L
+-	beqz    $r7, .Li46
+-	move    $r0, #0xffc00000
+-	j       .LK999
+-.Li46:
+-	sltsi   $r15, $r4, #0xff
+-	bnezs8  .Li48
+-	move    $r7, #0x7f800000
+-	or      $r0, $r5, $r7
+-	j       .LK999
+-.Li48:
+-	subri   $r6, $r4, #1
+-	move    $r7, #0x20
+-	slt     $r15, $r6, $r7
+-	bnezs8  .Li49
+-	move    $r0, $r5
+-	j       .LK999
+-.Li49:
+-	subri   $r8, $r6, #0x20
+-	sll     $r7, P2H, $r8
+-	or      P2L, P2L, $r7
+-	srl     P2H, P2H, $r6
+-	move    $r4, #0
+-	move    $r7, #0x80000000
+-	or      P2H, P2H, $r7
+-	j       .LKlab1
+-	.size __truncdfsf2, .-__truncdfsf2
+-#endif /* L_df_to_sf */
+-
+-
+-
+-#ifdef L_df_to_si
+-
+-#ifndef __big_endian__
+-	#define P1L     $r0
+-	#define P1H     $r1
+-#else
+-	#define P1H     $r0
+-	#define P1L     $r1
+-#endif
+-	.global	__fixdfsi
+-	.type	__fixdfsi, @function
+-__fixdfsi:
+-	push    $lp
+-	pushm   $r6, $r6
+-
+-	slli    $r3, P1H, #11
+-	srli    $r6, P1L, #21
+-	or      $r3, $r3, $r6
+-	move    $r6, #0x80000000
+-	or      $r3, $r3, $r6
+-	slli    $r6, P1H, #1
+-	srli    $r6, $r6, #21
+-	subri   $r2, $r6, #0x41e
+-	blez    $r2, .LLnaninf
+-	move    $r6, #0x20
+-	slt     $r15, $r2, $r6
+-	bnezs8  .LL72
+-	move    $r3, #0
+-.LL72:
+-	srl     $r3, $r3, $r2
+-	sltsi   $r15, P1H, #0
+-	beqzs8  .Li50
+-	subri   $r3, $r3, #0
+-.Li50:
+-	move    $r0, $r3
+-
+-.LL999:
+-	popm    $r6, $r6
+-	pop     $lp
+-	ret5    $lp
+-
+-.LLnaninf:
+-	beqz    P1L, .Li51
+-	ori     P1H, P1H, #1
+-.Li51:
+-	move    $r6, #0x7ff00000
+-	slt     $r15, $r6, P1H
+-	beqzs8  .Li52
+-	move    $r0, #0x80000000
+-	j       .LL999
+-.Li52:
+-	move    $r0, #0x7fffffff
+-	j       .LL999
+-	.size __fixdfsi, .-__fixdfsi
+-#endif /* L_df_to_si */
+-
+-
+-
+-#ifdef L_fixsfdi
+-
+-#ifndef __big_endian__
+-	#define O1L     $r1
+-	#define O1H     $r2
+-#else
+-	#define O1H     $r1
+-	#define O1L     $r2
+-#endif
+-	.text
+-	.align	2
+-	.global	__fixsfdi
+-	.type	__fixsfdi, @function
+-__fixsfdi:
+-	push    $lp
+-
+-	srli    $r3, $r0, #23
+-	andi    $r3, $r3, #0xff
+-	slli    O1H, $r0, #8
+-	move    $r5, #0x80000000
+-	or      O1H, O1H, $r5
+-	move    O1L, #0
+-	sltsi   $r15, $r3, #0xbe
+-	beqzs8  .LCinfnan
+-	subri   $r3, $r3, #0xbe
+-.LL8:
+-	move    $r5, #0x20
+-	slt     $r15, $r3, $r5
+-	bnezs8  .LL9
+-	move    O1L, O1H
+-	move    O1H, #0
+-	addi    $r3, $r3, #0xffffffe0
+-	bnez    O1L, .LL8
+-.LL9:
+-	beqz    $r3, .LL10
+-	move    $r4, O1H
+-	srl     O1L, O1L, $r3
+-	srl     O1H, O1H, $r3
+-	subri   $r3, $r3, #0x20
+-	sll     $r4, $r4, $r3
+-	or      O1L, O1L, $r4
+-.LL10:
+-	sltsi   $r15, $r0, #0
+-	beqzs8  .LCret
+-
+-	subri   O1H, O1H, #0
+-	beqz    O1L, .LL11
+-	subri   O1L, O1L, #0
+-	subi45  O1H, #1
+-.LL11:
+-
+-.LCret:
+-	move    $r0, $r1
+-	move    $r1, $r2
+-
+-.LC999:
+-	pop     $lp
+-	ret5    $lp
+-
+-.LCinfnan:
+-	sltsi   $r15, $r0, #0
+-	bnezs8  .LCret3
+-	subri   $r15, $r3, #0xff
+-	bnezs8  .Li7
+-	slli    $r5, O1H, #1
+-	beqz    $r5, .Li7
+-
+-.LCret3:
+-	move    O1H, #0x80000000
+-	j       .LCret
+-.Li7:
+-	move    O1H, #0x7fffffff
+-	move    O1L, #-1
+-	j       .LCret
+-	.size	__fixsfdi, .-__fixsfdi
+-#endif /* L_fixsfdi */
+-
+-
+-
+-#ifdef L_fixdfdi
+-
+-#ifndef __big_endian__
+-	#define P1L     $r0
+-	#define P1H     $r1
+-	#define O1L     $r3
+-	#define O1H     $r4
+-#else
+-	#define P1H     $r0
+-	#define P1L     $r1
+-	#define O1H     $r3
+-	#define O1L     $r4
+-#endif
+-	.text
+-	.align	2
+-	.global	__fixdfdi
+-	.type	__fixdfdi, @function
+-__fixdfdi:
+-	push    $lp
+-	pushm   $r6, $r6
+-
+-	slli    $r5, P1H, #1
+-	srli    $r5, $r5, #21
+-	slli    O1H, P1H, #11
+-	srli    $r6, P1L, #21
+-	or      O1H, O1H, $r6
+-	slli    O1L, P1L, #11
+-	move    $r6, #0x80000000
+-	or      O1H, O1H, $r6
+-	slti    $r15, $r5, #0x43e
+-	beqzs8  .LCnaninf
+-	subri   $r2, $r5, #0x43e
+-.LL14:
+-	move    $r6, #0x20
+-	slt     $r15, $r2, $r6
+-	bnezs8  .LL15
+-	move    O1L, O1H
+-	move    O1H, #0
+-	addi    $r2, $r2, #0xffffffe0
+-	bnez    O1L, .LL14
+-.LL15:
+-	beqz    $r2, .LL16
+-	move    P1L, O1H
+-	srl     O1L, O1L, $r2
+-	srl     O1H, O1H, $r2
+-	subri   $r2, $r2, #0x20
+-	sll     P1L, P1L, $r2
+-	or      O1L, O1L, P1L
+-.LL16:
+-	sltsi   $r15, P1H, #0
+-	beqzs8  .LCret
+-
+-	subri   O1H, O1H, #0
+-	beqz    O1L, .LL17
+-	subri   O1L, O1L, #0
+-	subi45  O1H, #1
+-.LL17:
+-
+-.LCret:
+-	move    P1L, O1L
+-	move    P1H, O1H
+-
+-.LC999:
+-	popm    $r6, $r6
+-	pop     $lp
+-	ret5    $lp
+-
+-.LCnaninf:
+-	sltsi   $r15, P1H, #0
+-	bnezs8  .LCret3
+-	subri   $r15, $r5, #0x7ff
+-	bnezs8  .Li5
+-	slli    $r6, O1H, #1
+-	or      $r6, $r6, O1L
+-	beqz    $r6, .Li5
+-
+-.LCret3:
+-	move    O1H, #0x80000000
+-	move    O1L, #0
+-	j       .LCret
+-.Li5:
+-	move    O1H, #0x7fffffff
+-	move    O1L, #-1
+-	j       .LCret
+-	.size	__fixdfdi, .-__fixdfdi
+-#endif /* L_fixdfdi */
+-
+-
+-
+-#ifdef L_fixunssfsi
+-
+-	.global	__fixunssfsi
+-	.type	__fixunssfsi, @function
+-__fixunssfsi:
+-	push    $lp
+-
+-	slli    $r1, $r0, #8
+-	move    $r3, #0x80000000
+-	or      $r1, $r1, $r3
+-	srli    $r3, $r0, #23
+-	andi    $r3, $r3, #0xff
+-	subri   $r2, $r3, #0x9e
+-	sltsi   $r15, $r2, #0
+-	bnezs8  .LLspec
+-	sltsi   $r15, $r2, #0x20
+-	bnezs8  .Li45
+-	move    $r0, #0
+-	j       .LL999
+-.Li45:
+-	srl     $r1, $r1, $r2
+-	sltsi   $r15, $r0, #0
+-	beqzs8  .Li46
+-	subri   $r1, $r1, #0
+-.Li46:
+-	move    $r0, $r1
+-
+-.LL999:
+-	pop     $lp
+-	ret5    $lp
+-
+-.LLspec:
+-	move    $r3, #0x7f800000
+-	slt     $r15, $r3, $r0
+-	beqzs8  .Li47
+-	move    $r0, #0x80000000
+-	j       .LL999
+-.Li47:
+-	move    $r0, #-1
+-	j       .LL999
+-	.size	__fixunssfsi, .-__fixunssfsi
+-#endif /* L_fixunssfsi */
+-
+-
+-
+-#ifdef L_fixunsdfsi
+-
+-#ifndef __big_endian__
+-	#define P1L     $r0
+-	#define P1H     $r1
+-#else
+-	#define P1H     $r0
+-	#define P1L     $r1
+-#endif
+-	.text
+-	.align	2
+-	.global	__fixunsdfsi
+-	.type	__fixunsdfsi, @function
+-__fixunsdfsi:
+-	push    $lp
+-	pushm   $r6, $r6
+-
+-	slli    $r3, P1H, #11
+-	srli    $r6, P1L, #21
+-	or      $r3, $r3, $r6
+-	move    $r6, #0x80000000
+-	or      $r3, $r3, $r6
+-	slli    $r6, P1H, #1
+-	srli    $r6, $r6, #21
+-	subri   $r2, $r6, #0x41e
+-	sltsi   $r15, $r2, #0
+-	bnezs8  .LNnaninf
+-	move    $r6, #0x20
+-	slt     $r15, $r2, $r6
+-	bnezs8  .LL73
+-	move    $r3, #0
+-.LL73:
+-	srl     $r3, $r3, $r2
+-	sltsi   $r15, P1H, #0
+-	beqzs8  .Li53
+-	subri   $r3, $r3, #0
+-.Li53:
+-	move    $r0, $r3
+-
+-.LN999:
+-	popm    $r6, $r6
+-	pop     $lp
+-	ret5    $lp
+-
+-.LNnaninf:
+-	beqz    P1L, .Li54
+-	ori     P1H, P1H, #1
+-.Li54:
+-	move    $r6, #0x7ff00000
+-	slt     $r15, $r6, P1H
+-	beqzs8  .Li55
+-	move    $r0, #0x80000000
+-	j       .LN999
+-.Li55:
+-	move    $r0, #-1
+-	j       .LN999
+-	.size __fixunsdfsi, .-__fixunsdfsi
+-#endif /* L_fixunsdfsi */
+-
+-
+-
+-#ifdef L_fixunssfdi
+-
+-#ifndef __big_endian__
+-	#define O1L     $r1
+-	#define O1H     $r2
+-#else
+-	#define O1H     $r1
+-	#define O1L     $r2
+-#endif
+-	.text
+-	.align	2
+-	.global	__fixunssfdi
+-	.type	__fixunssfdi, @function
+-__fixunssfdi:
+-	push    $lp
+-
+-	srli    $r3, $r0, #23
+-	andi    $r3, $r3, #0xff
+-	slli    O1H, $r0, #8
+-	move    $r5, #0x80000000
+-	or      O1H, O1H, $r5
+-	move    O1L, #0
+-	sltsi   $r15, $r3, #0xbe
+-	beqzs8  .LDinfnan
+-	subri   $r3, $r3, #0xbe
+-.LL12:
+-	move    $r5, #0x20
+-	slt     $r15, $r3, $r5
+-	bnezs8  .LL13
+-	move    O1L, O1H
+-	move    O1H, #0
+-	addi    $r3, $r3, #0xffffffe0
+-	bnez    O1L, .LL12
+-.LL13:
+-	beqz    $r3, .LL14
+-	move    $r4, O1H
+-	srl     O1L, O1L, $r3
+-	srl     O1H, O1H, $r3
+-	subri   $r3, $r3, #0x20
+-	sll     $r4, $r4, $r3
+-	or      O1L, O1L, $r4
+-.LL14:
+-	sltsi   $r15, $r0, #0
+-	beqzs8  .LDret
+-
+-	subri   O1H, O1H, #0
+-	beqz    O1L, .LL15
+-	subri   O1L, O1L, #0
+-	subi45  O1H, #1
+-.LL15:
+-
+-.LDret:
+-	move    $r0, $r1
+-	move    $r1, $r2
+-
+-.LD999:
+-	pop     $lp
+-	ret5    $lp
+-
+-.LDinfnan:
+-	move    O1H, #0x80000000
+-	move    O1L, #0
+-	j       .LDret
+-	.size	__fixunssfdi, .-__fixunssfdi
+-#endif /* L_fixunssfdi */
+-
+-
+-
+-#ifdef L_fixunsdfdi
+-
+-#ifndef __big_endian__
+-	#define P1L     $r0
+-	#define P1H     $r1
+-	#define O1L     $r3
+-	#define O1H     $r4
+-#else
+-	#define P1H     $r0
+-	#define P1L     $r1
+-	#define O1H     $r3
+-	#define O1L     $r4
+-#endif
+-	.text
+-	.align	2
+-	.global	__fixunsdfdi
+-	.type	__fixunsdfdi, @function
+-__fixunsdfdi:
+-	push    $lp
+-	pushm   $r6, $r6
+-
+-	slli    $r5, P1H, #1
+-	srli    $r5, $r5, #21
+-	slli    O1H, P1H, #11
+-	srli    $r6, P1L, #21
+-	or      O1H, O1H, $r6
+-	slli    O1L, P1L, #11
+-	move    $r6, #0x80000000
+-	or      O1H, O1H, $r6
+-	slti    $r15, $r5, #0x43e
+-	beqzs8  .LDnaninf
+-	subri   $r2, $r5, #0x43e
+-.LL18:
+-	move    $r6, #0x20
+-	slt     $r15, $r2, $r6
+-	bnezs8  .LL19
+-	move    O1L, O1H
+-	move    O1H, #0
+-	addi    $r2, $r2, #0xffffffe0
+-	bnez    O1L, .LL18
+-.LL19:
+-	beqz    $r2, .LL20
+-	move    P1L, O1H
+-	srl     O1L, O1L, $r2
+-	srl     O1H, O1H, $r2
+-	subri   $r2, $r2, #0x20
+-	sll     P1L, P1L, $r2
+-	or      O1L, O1L, P1L
+-.LL20:
+-	sltsi   $r15, P1H, #0
+-	beqzs8  .LDret
+-
+-	subri   O1H, O1H, #0
+-	beqz    O1L, .LL21
+-	subri   O1L, O1L, #0
+-	subi45  O1H, #1
+-.LL21:
+-
+-.LDret:
+-	move    P1L, O1L
+-	move    P1H, O1H
+-
+-.LD999:
+-	popm    $r6, $r6
+-	pop     $lp
+-	ret5    $lp
+-
+-.LDnaninf:
+-	move    O1H, #0x80000000
+-	move    O1L, #0
+-	j       .LDret
+-	.size	__fixunsdfdi, .-__fixunsdfdi
+-#endif /* L_fixunsdfdi */
+-
+-
+-
+-#ifdef L_si_to_sf
+-
+-	.text
+-	.align	2
+-	.global	__floatsisf
+-	.type	__floatsisf, @function
+-__floatsisf:
+-	push    $lp
+-
+-	move    $r4, #0x80000000
+-	and     $r2, $r0, $r4
+-	beqz    $r0, .Li39
+-	sltsi   $r15, $r0, #0
+-	beqzs8  .Li40
+-	subri   $r0, $r0, #0
+-.Li40:
+-	move    $r1, #0x9e
+-#ifdef __NDS32_PERF_EXT__
+-	clz	$r3, $r0
+-#else
+-	pushm	$r0, $r2
+-	pushm	$r4, $r5
+-	bal	__clzsi2
+-	move	$r3, $r0
+-	popm	$r4, $r5
+-	popm	$r0, $r2
+-#endif
+-	sub     $r1, $r1, $r3
+-	sll     $r0, $r0, $r3
+-
+-	#ADD($r0, $0x80)
+-	move    $r15, #0x80
+-	add     $r0, $r0, $r15
+-	slt     $r15, $r0, $r15
+-
+-	#ADDC($r1, $0x0)
+-	add     $r1, $r1, $r15
+-	srai    $r4, $r0, #8
+-	andi    $r4, $r4, #1
+-	sub     $r0, $r0, $r4
+-	slli    $r0, $r0, #1
+-	srli    $r0, $r0, #9
+-	slli    $r4, $r1, #23
+-	or      $r0, $r0, $r4
+-.Li39:
+-	or      $r0, $r0, $r2
+-
+-.LH999:
+-	pop     $lp
+-	ret5    $lp
+-	.size	__floatsisf, .-__floatsisf
+-#endif /* L_si_to_sf */
+-
+-
+-
+-#ifdef L_si_to_df
+-
+-#ifndef __big_endian__
+-	#define O1L     $r1
+-	#define O1H     $r2
+-	#define O2L     $r4
+-	#define O2H	$r5
+-#else
+-	#define O1H     $r1
+-	#define O1L     $r2
+-	#define O2H     $r4
+-	#define O2L	$r5
+-#endif
+-	.text
+-	.align	2
+-	.global	__floatsidf
+-	.type	__floatsidf, @function
+-__floatsidf:
+-	push    $lp
+-	pushm   $r6, $r6
+-
+-	move    O1L, #0
+-	move    O2H, O1L
+-	move    $r3, O1L
+-	move    O1H, $r0
+-	beqz    O1H, .Li39
+-	sltsi   $r15, O1H, #0
+-	beqzs8  .Li40
+-	move    O2H, #0x80000000
+-
+-	subri   O1H, O1H, #0
+-	beqz    O1L, .LL71
+-	subri   O1L, O1L, #0
+-	subi45  O1H, #1
+-.LL71:
+-.Li40:
+-	move    $r3, #0x41e
+-#ifndef __big_endian__
+-#ifdef __NDS32_PERF_EXT__
+-	clz	$r4, $r2
+-#else
+-	pushm	$r0, $r3
+-	push	$r5
+-	move	$r0, $r2
+-	bal	__clzsi2
+-	move	$r4, $r0
+-	pop	$r5
+-	popm	$r0, $r3
+-#endif
+-#else /* __big_endian__ */
+-#ifdef __NDS32_PERF_EXT__
+-	clz	$r5, $r1
+-#else
+-	pushm	$r0, $r4
+-	move	$r0, $r1
+-	bal	__clzsi2
+-	move	$r5, $r0
+-	popm	$r0, $r4
+-#endif
+-#endif /* __big_endian__ */
+-	sub     $r3, $r3, O2L
+-	sll     O1H, O1H, O2L
+-.Li39:
+-	srli    O2L, O1L, #11
+-	slli    $r6, O1H, #21
+-	or      O2L, O2L, $r6
+-	slli    $r6, O1H, #1
+-	srli    $r6, $r6, #12
+-	or      O2H, O2H, $r6
+-	slli    $r6, $r3, #20
+-	or      O2H, O2H, $r6
+-	move    $r0, $r4
+-	move    $r1, $r5
+-
+-.LH999:
+-	popm    $r6, $r6
+-	pop     $lp
+-	ret5    $lp
+-	.size __floatsidf, .-__floatsidf
+-#endif /* L_si_to_df */
+-
+-
+-
+-#ifdef L_floatdisf
+-
+-#ifndef __big_endian__
+-	#define P1L     $r0
+-	#define P1H     $r1
+-	#define P2L     $r2
+-	#define P2H     $r3
+-#else
+-	#define P1H     $r0
+-	#define P1L     $r1
+-	#define P2H     $r2
+-	#define P2L     $r3
+-#endif
+-	.text
+-	.align	2
+-	.global	__floatdisf
+-	.type	__floatdisf, @function
+-__floatdisf:
+-	push    $lp
+-	pushm   $r6, $r7
+-
+-	move    $r7, #0x80000000
+-	and     $r5, P1H, $r7
+-	move    P2H, P1H
+-	move    P2L, P1L
+-	or      $r7, P1H, P1L
+-	beqz    $r7, .Li1
+-	sltsi   $r15, P1H, #0
+-	beqzs8  .Li2
+-
+-	subri   P2H, P2H, #0
+-	beqz    P2L, .LL1
+-	subri   P2L, P2L, #0
+-	subi45  P2H, #1
+-.LL1:
+-.Li2:
+-	move    $r4, #0xbe
+-
+-
+-	#NORMd($r2, $r6, P1L)
+-	bnez    P2H, .LL2
+-	bnez    P2L, .LL3
+-	move    $r4, #0
+-	j       .LL4
+-.LL3:
+-	move    P2H, P2L
+-	move    P2L, #0
+-	move    $r6, #32
+-	sub     $r4, $r4, $r6
+-.LL2:
+-#ifdef __NDS32_PERF_EXT__
+-	clz	$r6, P2H
+-#else
+-	pushm	$r0, $r5
+-	move	$r0, P2H
+-	bal	__clzsi2
+-	move	$r6, $r0
+-	popm	$r0, $r5
+-#endif
+-	beqz    $r6, .LL4
+-	sub     $r4, $r4, $r6
+-	subri   P1L, $r6, #32
+-	srl     P1L, P2L, P1L
+-	sll     P2L, P2L, $r6
+-	sll     P2H, P2H, $r6
+-	or      P2H, P2H, P1L
+-.LL4:
+-	#NORMd End
+-
+-	beqz    P2L, .Li3
+-	ori     P2H, P2H, #1
+-.Li3:
+-	#ADD(P2H, $0x80)
+-	move    $r15, #0x80
+-	add     P2H, P2H, $r15
+-	slt     $r15, P2H, $r15
+-
+-	#ADDC($r4, $0x0)
+-	add     $r4, $r4, $r15
+-	srli    $r7, P2H, #8
+-	andi    $r7, $r7, #1
+-	sub     P2H, P2H, $r7
+-	slli    P2H, P2H, #1
+-	srli    P2H, P2H, #9
+-	slli    $r7, $r4, #23
+-	or      P2H, P2H, $r7
+-.Li1:
+-	or      $r0, P2H, $r5
+-
+-.LA999:
+-	popm    $r6, $r7
+-	pop     $lp
+-	ret5    $lp
+-	.size	__floatdisf, .-__floatdisf
+-#endif /* L_floatdisf */
+-
+-
+-
+-#ifdef L_floatdidf
+-
+-#ifndef __big_endian__
+-	#define P1L     $r0
+-	#define P1H     $r1
+-	#define P2L     $r2
+-	#define P2H     $r3
+-	#define O1L     $r5
+-	#define O1H     $r6
+-#else
+-	#define P1H     $r0
+-	#define P1L     $r1
+-	#define P2H     $r2
+-	#define P2L     $r3
+-	#define O1H     $r5
+-	#define O1L     $r6
+-#endif
+-	.text
+-	.align	2
+-	.global	__floatdidf
+-	.type	__floatdidf, @function
+-__floatdidf:
+-	push    $lp
+-	pushm   $r6, $r8
+-
+-	move    $r4, #0
+-	move    $r7, $r4
+-	move    P2H, P1H
+-	move    P2L, P1L
+-	or      $r8, P1H, P1L
+-	beqz    $r8, .Li1
+-	move    $r4, #0x43e
+-	sltsi   $r15, P1H, #0
+-	beqzs8  .Li2
+-	move    $r7, #0x80000000
+-
+-	subri   P2H, P2H, #0
+-	beqz    P2L, .LL1
+-	subri   P2L, P2L, #0
+-	subi45  P2H, #1
+-.LL1:
+-
+-.Li2:
+-	#NORMd($r2, O1H, O1L)
+-	bnez    P2H, .LL2
+-	bnez    P2L, .LL3
+-	move    $r4, #0
+-	j       .LL4
+-.LL3:
+-	move    P2H, P2L
+-	move    P2L, #0
+-	move    O1H, #32
+-	sub     $r4, $r4, O1H
+-.LL2:
+-#ifdef __NDS32_PERF_EXT__
+-	clz	O1H, P2H
+-#else /* not __NDS32_PERF_EXT__ */
+-/*
+-  Replace clz with function call.
+-	clz     O1H, P2H
+-  EL:	clz     $r6, $r3
+-  EB:	clz	$r5, $r2
+-*/
+-#ifndef __big_endian__
+-	pushm	$r0, $r5
+-	move	$r0, $r3
+-	bal	__clzsi2
+-	move	$r6, $r0
+-	popm	$r0, $r5
+-#else
+-	pushm	$r0, $r4
+-	move	$r0, $r2
+-	bal	__clzsi2
+-	move	$r5, $r0
+-	popm	$r0, $r4
+-#endif
+-#endif /* not __NDS32_PERF_EXT__ */
+-	beqz    O1H, .LL4
+-	sub     $r4, $r4, O1H
+-	subri   O1L, O1H, #32
+-	srl     O1L, P2L, O1L
+-	sll     P2L, P2L, O1H
+-	sll     P2H, P2H, O1H
+-	or      P2H, P2H, O1L
+-.LL4:
+-	#NORMd End
+-
+-	#ADD(P2L, $0x400)
+-	move    $r15, #0x400
+-	add     P2L, P2L, $r15
+-	slt     $r15, P2L, $r15
+-
+-
+-	#ADDCC(P2H, $0x0)
+-	beqzs8  .LL7
+-	add     P2H, P2H, $r15
+-	slt     $r15, P2H, $r15
+-.LL7:
+-
+-	#ADDC($r4, $0x0)
+-	add     $r4, $r4, $r15
+-	srli    $r8, P2L, #11
+-	andi    $r8, $r8, #1
+-	sub     P2L, P2L, $r8
+-.Li1:
+-	srli    O1L, P2L, #11
+-	slli    $r8, P2H, #21
+-	or      O1L, O1L, $r8
+-	slli    O1H, P2H, #1
+-	srli    O1H, O1H, #12
+-	slli    $r8, $r4, #20
+-	or      O1H, O1H, $r8
+-	or      O1H, O1H, $r7
+-	move    P1L, O1L
+-	move    P1H, O1H
+-
+-.LA999:
+-	popm    $r6, $r8
+-	pop     $lp
+-	ret5    $lp
+-	.size	__floatdidf, .-__floatdidf
+-#endif /* L_floatdidf */
+-
+-
+-
+-#ifdef L_floatunsisf
+-
+-	.text
+-	.align	2
+-	.global	__floatunsisf
+-	.type	__floatunsisf, @function
+-__floatunsisf:
+-	push    $lp
+-
+-	beqz    $r0, .Li41
+-	move    $r2, #0x9e
+-#ifdef __NDS32_PERF_EXT__
+-	clz	$r1, $r0
+-#else
+-	push	$r0
+-	pushm	$r2, $r5
+-	bal	__clzsi2
+-	move	$r1, $r0
+-	popm	$r2, $r5
+-	pop	$r0
+-#endif
+-
+-	sub     $r2, $r2, $r1
+-	sll     $r0, $r0, $r1
+-
+-	#ADD($r0, $0x80)
+-	move    $r15, #0x80
+-	add     $r0, $r0, $r15
+-	slt     $r15, $r0, $r15
+-
+-	#ADDC($r2, $0x0)
+-	add     $r2, $r2, $r15
+-	srli    $r3, $r0, #8
+-	andi    $r3, $r3, #1
+-	sub     $r0, $r0, $r3
+-	slli    $r0, $r0, #1
+-	srli    $r0, $r0, #9
+-	slli    $r3, $r2, #23
+-	or      $r0, $r0, $r3
+-
+-.Li41:
+-.LI999:
+-	pop     $lp
+-	ret5    $lp
+-	.size	__floatunsisf, .-__floatunsisf
+-#endif /* L_floatunsisf */
+-
+-
+-
+-#ifdef L_floatunsidf
+-
+-#ifndef __big_endian__
+-	#define O1L     $r1
+-	#define O1H     $r2
+-	#define O2L     $r4
+-	#define O2H	$r5
+-#else
+-	#define O1H     $r1
+-	#define O1L     $r2
+-	#define O2H     $r4
+-	#define O2L	$r5
+-#endif
+-	.text
+-	.align	2
+-	.global	__floatunsidf
+-	.type	__floatunsidf, @function
+-__floatunsidf:
+-	push    $lp
+-	pushm   $r6, $r6
+-
+-	move    O1L, #0
+-	move    $r3, O1L
+-	move    O1H, $r0
+-	beqz    O1H, .Li41
+-	move    $r3, #0x41e
+-#ifndef __big_endian__
+-#ifdef __NDS32_PERF_EXT__
+-	clz	$r5, $r2
+-#else
+-	pushm	$r0, $r4
+-	move	$r0, $r2
+-	bal	__clzsi2
+-	move	$r5, $r0
+-	popm	$r0, $r4
+-#endif
+-#else /* __big_endian__ */
+-#ifdef __NDS32_PERF_EXT__
+-	clz	$r4, $r1
+-#else
+-	pushm	$r0, $r3
+-	push	$r5
+-	move	$r0, $r1
+-	bal	__clzsi2
+-	move	$r4, $r0
+-	pop	$r5
+-	popm	$r0, $r3
+-#endif
+-#endif /* __big_endian__ */
+-	sub     $r3, $r3, O2H
+-	sll     O1H, O1H, O2H
+-.Li41:
+-	srli    O2L, O1L, #11
+-	slli    $r6, O1H, #21
+-	or      O2L, O2L, $r6
+-	slli    O2H, O1H, #1
+-	srli    O2H, O2H, #12
+-	slli    $r6, $r3, #20
+-	or      O2H, O2H, $r6
+-	move    $r0, $r4
+-	move    $r1, $r5
+-
+-.LI999:
+-	popm    $r6, $r6
+-	pop     $lp
+-	ret5    $lp
+-	.size __floatunsidf, .-__floatunsidf
+-#endif /* L_floatunsidf */
+-
+-
+-
+-#ifdef L_floatundisf
+-
+-#ifndef __big_endian__
+-	#define P1L     $r0
+-	#define P1H     $r1
+-	#define P2L     $r2
+-	#define P2H     $r3
+-#else
+-	#define P1H     $r0
+-	#define P1L     $r1
+-	#define P2H     $r2
+-	#define P2L     $r3
+-#endif
+-	.text
+-	.align	2
+-	.global	__floatundisf
+-	.type	__floatundisf, @function
+-__floatundisf:
+-	push    $lp
+-	pushm   $r6, $r6
+-
+-	move    P2H, P1H
+-	move    P2L, P1L
+-	or      $r6, P1H, P1L
+-	beqz    $r6, .Li4
+-	move    $r4, #0xbe
+-
+-
+-	#NORMd($r2, $r5, P1L)
+-	bnez    P2H, .LL5
+-	bnez    P2L, .LL6
+-	move    $r4, #0
+-	j       .LL7
+-.LL6:
+-	move    P2H, P2L
+-	move    P2L, #0
+-	move    $r5, #32
+-	sub     $r4, $r4, $r5
+-.LL5:
+-#ifdef __NDS32_PERF_EXT__
+-	clz	$r5, P2H
+-#else
+-	pushm	$r0, $r4
+-	move	$r0, P2H
+-	bal	__clzsi2
+-	move	$r5, $r0
+-	popm	$r0, $r4
+-#endif
+-	beqz    $r5, .LL7
+-	sub     $r4, $r4, $r5
+-	subri   P1L, $r5, #32
+-	srl     P1L, P2L, P1L
+-	sll     P2L, P2L, $r5
+-	sll     P2H, P2H, $r5
+-	or      P2H, P2H, P1L
+-.LL7:
+-	#NORMd End
+-
+-	beqz    P2L, .Li5
+-	ori     P2H, P2H, #1
+-.Li5:
+-	#ADD(P2H, $0x80)
+-	move    $r15, #0x80
+-	add     P2H, P2H, $r15
+-	slt     $r15, P2H, $r15
+-
+-	#ADDC($r4, $0x0)
+-	add     $r4, $r4, $r15
+-	srli    $r6, P2H, #8
+-	andi    $r6, $r6, #1
+-	sub     P2H, P2H, $r6
+-	slli    P2H, P2H, #1
+-	srli    P2H, P2H, #9
+-	slli    $r6, $r4, #23
+-	or      P2H, P2H, $r6
+-.Li4:
+-	move    $r0, P2H
+-
+-.LB999:
+-	popm    $r6, $r6
+-	pop     $lp
+-	ret5    $lp
+-	.size	__floatundisf, .-__floatundisf
+-#endif /* L_floatundisf */
+-
+-
+-
+-#ifdef L_floatundidf
+-
+-#ifndef __big_endian__
+-	#define P1L     $r0
+-	#define P1H     $r1
+-	#define P2L     $r2
+-	#define P2H     $r3
+-	#define O1L     $r5
+-	#define O1H     $r6
+-#else
+-	#define P1H     $r0
+-	#define P1L     $r1
+-	#define P2H     $r2
+-	#define P2L     $r3
+-	#define O1H     $r5
+-	#define O1L     $r6
+-#endif
+-	.text
+-	.align	2
+-	.global	__floatundidf
+-	.type	__floatundidf, @function
+-__floatundidf:
+-	push    $lp
+-	pushm   $r6, $r7
+-
+-	move    $r4, #0
+-	move    P2H, P1H
+-	move    P2L, P1L
+-	or      $r7, P1H, P1L
+-	beqz    $r7, .Li3
+-	move    $r4, #0x43e
+-
+-
+-	#NORMd($r2, O1H, O1L)
+-	bnez    P2H, .LL8
+-	bnez    P2L, .LL9
+-	move    $r4, #0
+-	j       .LL10
+-.LL9:
+-	move    P2H, P2L
+-	move    P2L, #0
+-	move    O1H, #32
+-	sub     $r4, $r4, O1H
+-.LL8:
+-#ifdef __NDS32_PERF_EXT__
+-	clz	O1H, P2H
+-#else /* not __NDS32_PERF_EXT__ */
+-/*
+-  Replace clz with function call.
+-	clz     O1H, P2H
+-  EL:	clz     $r6, $r3
+-  EB:	clz	$r5, $r2
+-*/
+-#ifndef __big_endian__
+-	pushm	$r0, $r5
+-	move	$r0, $r3
+-	bal	__clzsi2
+-	move	$r6, $r0
+-	popm	$r0, $r5
+-#else
+-	pushm	$r0, $r4
+-	move	$r0, $r2
+-	bal	__clzsi2
+-	move	$r5, $r0
+-	popm	$r0, $r4
+-#endif
+-#endif /* not __NDS32_PERF_EXT__ */
+-	beqz    O1H, .LL10
+-	sub     $r4, $r4, O1H
+-	subri   O1L, O1H, #32
+-	srl     O1L, P2L, O1L
+-	sll     P2L, P2L, O1H
+-	sll     P2H, P2H, O1H
+-	or      P2H, P2H, O1L
+-.LL10:
+-	#NORMd End
+-
+-	#ADD(P2L, $0x400)
+-	move    $r15, #0x400
+-	add     P2L, P2L, $r15
+-	slt     $r15, P2L, $r15
+-
+-
+-	#ADDCC(P2H, $0x0)
+-	beqzs8  .LL13
+-	add     P2H, P2H, $r15
+-	slt     $r15, P2H, $r15
+-.LL13:
+-
+-	#ADDC($r4, $0x0)
+-	add     $r4, $r4, $r15
+-	srli    $r7, P2L, #11
+-	andi    $r7, $r7, #1
+-	sub     P2L, P2L, $r7
+-.Li3:
+-	srli    O1L, P2L, #11
+-	slli    $r7, P2H, #21
+-	or      O1L, O1L, $r7
+-	slli    O1H, P2H, #1
+-	srli    O1H, O1H, #12
+-	slli    $r7, $r4, #20
+-	or      O1H, O1H, $r7
+-	move    P1L, O1L
+-	move    P1H, O1H
+-
+-.LB999:
+-	popm    $r6, $r7
+-	pop     $lp
+-	ret5    $lp
+-	.size	__floatundidf, .-__floatundidf
+-#endif /* L_floatundidf */
+-
+-
+-
+-#ifdef L_compare_sf
+-
+-	.text
+-	.align	2
+-	.global	__cmpsf2
+-	.type	__cmpsf2, @function
+-__cmpsf2:
+-	.global	__eqsf2
+-	.type	__eqsf2, @function
+-__eqsf2:
+-	.global	__ltsf2
+-	.type	__ltsf2, @function
+-__ltsf2:
+-	.global	__lesf2
+-	.type	__lesf2, @function
+-__lesf2:
+-	.global	__nesf2
+-	.type	__nesf2, @function
+-__nesf2:
+-	move    $r4, #1
+-	j	.LA
+-
+-	.global	__gesf2
+-	.type	__gesf2, @function
+-__gesf2:
+-	.global	__gtsf2
+-	.type	__gtsf2, @function
+-__gtsf2:
+-	move	$r4, #-1
+-.LA:
+-	push    $lp
+-
+-	slli    $r2, $r0, #1
+-	slli    $r3, $r1, #1
+-	or      $r5, $r2, $r3
+-	beqz    $r5, .LMequ
+-	move    $r5, #0xff000000
+-	slt     $r15, $r5, $r2
+-	bnezs8  .LMnan
+-	slt     $r15, $r5, $r3
+-	bnezs8  .LMnan
+-	srli    $r2, $r2, #1
+-	sltsi   $r15, $r0, #0
+-	beqzs8  .Li48
+-	subri   $r2, $r2, #0
+-.Li48:
+-	srli    $r3, $r3, #1
+-	sltsi   $r15, $r1, #0
+-	beqzs8  .Li49
+-	subri   $r3, $r3, #0
+-.Li49:
+-	slts    $r15, $r2, $r3
+-	beqzs8  .Li50
+-	move    $r0, #-1
+-	j       .LM999
+-.Li50:
+-	slts    $r15, $r3, $r2
+-	beqzs8  .LMequ
+-	move    $r0, #1
+-	j       .LM999
+-
+-.LMequ:
+-	move    $r0, #0
+-
+-.LM999:
+-	pop     $lp
+-	ret5    $lp
+-
+-.LMnan:
+-	move    $r0, $r4
+-	j       .LM999
+-	.size   __cmpsf2, .-__cmpsf2
+-	.size   __eqsf2, .-__eqsf2
+-	.size   __ltsf2, .-__ltsf2
+-	.size   __lesf2, .-__lesf2
+-	.size   __nesf2, .-__nesf2
+-	.size   __gesf2, .-__gesf2
+-	.size   __gtsf2, .-__gtsf2
+-#endif /* L_compare_sf */
+-
+-
+-
+-#ifdef L_compare_df
+-
+-#ifdef __big_endian__
+-	#define P1H     $r0
+-	#define P1L     $r1
+-	#define P2H     $r2
+-	#define P2L     $r3
+-#else
+-	#define P1H     $r1
+-	#define P1L     $r0
+-	#define P2H     $r3
+-	#define P2L     $r2
+-#endif
+-	.align	2
+-	.globl	__gtdf2
+-	.globl	__gedf2
+-	.globl	__ltdf2
+-	.globl	__ledf2
+-	.globl	__eqdf2
+-	.globl	__nedf2
+-	.globl	__cmpdf2
+-	.type	__gtdf2, @function
+-	.type	__gedf2, @function
+-	.type	__ltdf2, @function
+-	.type	__ledf2, @function
+-	.type	__eqdf2, @function
+-	.type	__nedf2, @function
+-	.type	__cmpdf2, @function
+-__gtdf2:
+-__gedf2:
+-	movi	$r4, -1
+-	b	.L1
+-
+-__ltdf2:
+-__ledf2:
+-__cmpdf2:
+-__nedf2:
+-__eqdf2:
+-	movi	$r4, 1
+-.L1:
+-#if defined (__NDS32_ISA_V3M__)
+-	push25	$r10, 0
+-#else
+-	smw.adm	$r6, [$sp], $r9, 0
+-#endif
+-
+-	sethi	$r5, 0x7ff00
+-	and	$r6, P1H, $r5	! r6=aExp
+-	and	$r7, P2H, $r5	! r7=bExp
+-	slli	$r8, P1H, 12	! r8=aSig0
+-	slli	$r9, P2H, 12	! r9=bSig0
+-	beq	$r6, $r5, .L11	! aExp==0x7ff
+-	beq	$r7, $r5, .L12	! bExp==0x7ff
+-.L2:
+-	slli	$ta, P1H, 1	! ta=ahigh<<1
+-	or	$ta, P1L, $ta	!
+-	xor	$r5, P1H, P2H	! r5=ahigh^bhigh
+-	beqz	$ta, .L3	! if(ahigh<<1)==0,go .L3
+-	!-------------------------------
+-	! (ahigh<<1)!=0 || (bhigh<<1)!=0
+-	!-------------------------------
+-.L4:
+-	beqz	$r5, .L5	! ahigh==bhigh, go .L5
+-	!--------------------
+-	! a != b
+-	!--------------------
+-.L6:
+-	bltz	$r5, .L7	! if(aSign!=bSign), go .L7
+-	!--------------------
+-	! aSign==bSign
+-	!--------------------
+-	slt	$ta, $r6, $r7	! ta=(aExp<bExp)
+-	bne	$r6, $r7, .L8	! if(aExp!=bExp),go .L8
+-	slt	$ta, $r8, $r9	! ta=(aSig0<bSig0)
+-	bne	$r8, $r9, .L8	! if(aSig0!=bSig0),go .L8
+-	slt	$ta, P1L, P2L	! ta=(aSig1<bSig1)
+-.L8:
+-	beqz	$ta, .L10	! if(|a|>|b|), go .L10
+-	nor	$r0, P2H, P2H	! if(|a|<|b|),return (~yh)
+-.L14:
+-#if defined (__NDS32_ISA_V3M__)
+-	pop25	$r10, 0
+-#else
+-	lmw.bim	$r6, [$sp], $r9, 0
+-	ret
+-#endif
+-.L10:
+-	ori	$r0, P2H, 1	! return (yh|1)
+-	b	.L14
+-	!--------------------
+-	! (ahigh<<1)=0
+-	!--------------------
+-.L3:
+-	slli	$ta, P2H, 1	! ta=bhigh<<1
+-	or	$ta, P2L, $ta	!
+-	bnez	$ta, .L4	! ta=(bhigh<<1)!=0,go .L4
+-.L5:
+-	xor	$ta, P1L, P2L	! ta=alow^blow
+-	bnez	$ta, .L6	! alow!=blow,go .L6
+-	movi	$r0, 0		! a==b, return 0
+-	b	.L14
+-	!--------------------
+-	! aExp=0x7ff;
+-	!--------------------
+-.L11:
+-	or	P1L, P1L, $r8	! x1=(aSig0|aSig1)
+-	bnez	P1L, .L13	! if(a=nan), go.L13
+-	xor	$ta, $r7, $r5	! ta=(bExp^0x7ff)
+-	bnez	$ta, .L2	! if(bExp!=0x7ff), go .L2
+-	!--------------------
+-	! bExp=0x7ff;
+-	!--------------------
+-.L12:
+-	or	$ta, P2L, $r9	! ta=(bSig0|bSig1)
+-	beqz	$ta, .L2	! if(b!=nan), go .L2
+-.L13:
+-	move	$r0, $r4
+-	b	.L14
+-	!--------------------
+-	! aSign!=bSign
+-	!--------------------
+-.L7:
+-	ori	$r0, P1H, 1	! if(aSign!=bSign), return (ahigh|1)
+-	b	.L14
+-
+-	.size	__gtdf2, .-__gtdf2
+-	.size	__gedf2, .-__gedf2
+-	.size	__ltdf2, .-__ltdf2
+-	.size	__ledf2, .-__ledf2
+-	.size	__eqdf2, .-__eqdf2
+-	.size	__nedf2, .-__nedf2
+-	.size	__cmpdf2, .-__cmpdf2
+-#endif /* L_compare_df */
+-
+-
+-
+-#ifdef L_unord_sf
+-
+-	.text
+-	.align	2
+-	.global	__unordsf2
+-	.type	__unordsf2, @function
+-__unordsf2:
+-	push    $lp
+-
+-	slli    $r2, $r0, #1
+-	move    $r3, #0xff000000
+-	slt     $r15, $r3, $r2
+-	beqzs8  .Li52
+-	move    $r0, #1
+-	j       .LP999
+-.Li52:
+-	slli    $r2, $r1, #1
+-	move    $r3, #0xff000000
+-	slt     $r15, $r3, $r2
+-	beqzs8  .Li53
+-	move    $r0, #1
+-	j       .LP999
+-.Li53:
+-	move    $r0, #0
+-
+-.LP999:
+-	pop     $lp
+-	ret5    $lp
+-	.size	__unordsf2, .-__unordsf2
+-#endif /* L_unord_sf */
+-
+-
+-
+-#ifdef L_unord_df
+-
+-#ifndef __big_endian__
+-	#define P1L     $r0
+-	#define P1H     $r1
+-	#define P2L     $r2
+-	#define P2H     $r3
+-#else
+-	#define P1H     $r0
+-	#define P1L     $r1
+-	#define P2H     $r2
+-	#define P2L     $r3
+-#endif
+-	.text
+-	.align	2
+-	.global	__unorddf2
+-	.type	__unorddf2, @function
+-__unorddf2:
+-	push    $lp
+-
+-	slli    $r4, P1H, #1
+-	beqz    P1L, .Li66
+-	addi    $r4, $r4, #1
+-.Li66:
+-	move    $r5, #0xffe00000
+-	slt     $r15, $r5, $r4
+-	beqzs8  .Li67
+-	move    $r0, #1
+-	j       .LR999
+-.Li67:
+-	slli    $r4, P2H, #1
+-	beqz    P2L, .Li68
+-	addi    $r4, $r4, #1
+-.Li68:
+-	move    $r5, #0xffe00000
+-	slt     $r15, $r5, $r4
+-	beqzs8  .Li69
+-	move    $r0, #1
+-	j       .LR999
+-.Li69:
+-	move    $r0, #0
+-
+-.LR999:
+-	pop     $lp
+-	ret5    $lp
+-	.size __unorddf2, .-__unorddf2
+-#endif /* L_unord_df */
+-/* ------------------------------------------- */
+-/* DPBIT floating point operations for libgcc  */
+-/* ------------------------------------------- */
+diff --git a/libgcc/config/nds32/lib2csrc-mculib/_clzdi2.c b/libgcc/config/nds32/lib2csrc-mculib/_clzdi2.c
+deleted file mode 100644
+index 6afd6ab..0000000
+--- a/libgcc/config/nds32/lib2csrc-mculib/_clzdi2.c
++++ /dev/null
+@@ -1,38 +0,0 @@
+-/* mculib libgcc routines of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-extern int __clzsi2 (int val);
+-int
+-__clzdi2 (long long val)
+-{
+-  if (val >> 32)
+-    {
+-      return __clzsi2 (val >> 32);
+-    }
+-  else
+-    {
+-      return __clzsi2 (val) + 32;
+-    }
+-}
+diff --git a/libgcc/config/nds32/lib2csrc-mculib/_clzsi2.c b/libgcc/config/nds32/lib2csrc-mculib/_clzsi2.c
+deleted file mode 100644
+index 407caaf..0000000
+--- a/libgcc/config/nds32/lib2csrc-mculib/_clzsi2.c
++++ /dev/null
+@@ -1,49 +0,0 @@
+-/* mculib libgcc routines of Andes NDS32 cpu for GNU compiler
+-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+-   Contributed by Andes Technology Corporation.
+-
+-   This file is part of GCC.
+-
+-   GCC is free software; you can redistribute it and/or modify it
+-   under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; either version 3, or (at your
+-   option) any later version.
+-
+-   GCC is distributed in the hope that it will be useful, but WITHOUT
+-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+-   License for more details.
+-
+-   Under Section 7 of GPL version 3, you are granted additional
+-   permissions described in the GCC Runtime Library Exception, version
+-   3.1, as published by the Free Software Foundation.
+-
+-   You should have received a copy of the GNU General Public License and
+-   a copy of the GCC Runtime Library Exception along with this program;
+-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+-   <http://www.gnu.org/licenses/>.  */
+-
+-int
+-__clzsi2 (int val)
+-{
+-  int i = 32;
+-  int j = 16;
+-  int temp;
+-
+-  for (; j; j >>= 1)
+-    {
+-      if (temp = val >> j)
+-	{
+-	  if (j == 1)
+-	    {
+-	      return (i - 2);
+-	    }
+-	  else
+-	    {
+-	      i -= j;
+-	      val = temp;
+-	    }
+-	}
+-    }
+-  return (i - val);
+-}
+diff --git a/libgcc/config/nds32/linux-atomic.c b/libgcc/config/nds32/linux-atomic.c
+new file mode 100644
+index 0000000..69f589b
+--- /dev/null
++++ b/libgcc/config/nds32/linux-atomic.c
+@@ -0,0 +1,282 @@
++/* Linux-specific atomic operations for NDS32 Linux.
++   Copyright (C) 2012-2016 Free Software Foundation, Inc.
++
++This file is free software; you can redistribute it and/or modify it
++under the terms of the GNU General Public License as published by the
++Free Software Foundation; either version 3, or (at your option) any
++later version.
++
++This file is distributed in the hope that it will be useful, but
++WITHOUT ANY WARRANTY; without even the implied warranty of
++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++General Public License for more details.
++
++Under Section 7 of GPL version 3, you are granted additional
++permissions described in the GCC Runtime Library Exception, version
++3.1, as published by the Free Software Foundation.
++
++You should have received a copy of the GNU General Public License and
++a copy of the GCC Runtime Library Exception along with this program;
++see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++<http://www.gnu.org/licenses/>.  */
++
++/* We implement byte, short and int versions of each atomic operation
++   using the kernel helper defined below.  There is no support for
++   64-bit operations yet.  */
++
++/* This function copy form NDS32 Linux-kernal. */
++static inline int
++__kernel_cmpxchg (int oldval, int newval, int *mem)
++{
++  int temp1, temp2, temp3, offset;
++
++  asm volatile ("msync\tall\n"
++		"movi\t%0, #0\n"
++		"1:\n"
++		"\tllw\t%1, [%4+%0]\n"
++		"\tsub\t%3, %1, %6\n"
++		"\tcmovz\t%2, %5, %3\n"
++		"\tcmovn\t%2, %1, %3\n"
++		"\tscw\t%2, [%4+%0]\n"
++		"\tbeqz\t%2, 1b\n"
++		: "=&r" (offset), "=&r" (temp3), "=&r" (temp2), "=&r" (temp1)
++		: "r" (mem), "r" (newval), "r" (oldval) : "memory");
++
++  return temp1;
++}
++
++#define HIDDEN __attribute__ ((visibility ("hidden")))
++
++#ifdef __NDS32_EL__
++#define INVERT_MASK_1 0
++#define INVERT_MASK_2 0
++#else
++#define INVERT_MASK_1 24
++#define INVERT_MASK_2 16
++#endif
++
++#define MASK_1 0xffu
++#define MASK_2 0xffffu
++
++#define FETCH_AND_OP_WORD(OP, PFX_OP, INF_OP)				\
++  int HIDDEN								\
++  __sync_fetch_and_##OP##_4 (int *ptr, int val)				\
++  {									\
++    int failure, tmp;							\
++									\
++    do {								\
++      tmp = __atomic_load_n (ptr, __ATOMIC_SEQ_CST);			\
++      failure = __kernel_cmpxchg (tmp, PFX_OP (tmp INF_OP val), ptr);	\
++    } while (failure != 0);						\
++									\
++    return tmp;								\
++  }
++
++FETCH_AND_OP_WORD (add,   , +)
++FETCH_AND_OP_WORD (sub,   , -)
++FETCH_AND_OP_WORD (or,    , |)
++FETCH_AND_OP_WORD (and,   , &)
++FETCH_AND_OP_WORD (xor,   , ^)
++FETCH_AND_OP_WORD (nand, ~, &)
++
++#define NAME_oldval(OP, WIDTH) __sync_fetch_and_##OP##_##WIDTH
++#define NAME_newval(OP, WIDTH) __sync_##OP##_and_fetch_##WIDTH
++
++/* Implement both __sync_<op>_and_fetch and __sync_fetch_and_<op> for
++   subword-sized quantities.  */
++
++#define SUBWORD_SYNC_OP(OP, PFX_OP, INF_OP, TYPE, WIDTH, RETURN)	\
++  TYPE HIDDEN								\
++  NAME##_##RETURN (OP, WIDTH) (TYPE *ptr, TYPE val)			\
++  {									\
++    int *wordptr = (int *) ((unsigned long) ptr & ~3);			\
++    unsigned int mask, shift, oldval, newval;				\
++    int failure;							\
++									\
++    shift = (((unsigned long) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH;	\
++    mask = MASK_##WIDTH << shift;					\
++									\
++    do {								\
++      oldval = __atomic_load_n (wordptr, __ATOMIC_SEQ_CST);		\
++      newval = ((PFX_OP (((oldval & mask) >> shift)			\
++			 INF_OP (unsigned int) val)) << shift) & mask;	\
++      newval |= oldval & ~mask;						\
++      failure = __kernel_cmpxchg (oldval, newval, wordptr);		\
++    } while (failure != 0);						\
++									\
++    return (RETURN & mask) >> shift;					\
++  }
++
++
++SUBWORD_SYNC_OP (add,   , +, unsigned short, 2, oldval)
++SUBWORD_SYNC_OP (sub,   , -, unsigned short, 2, oldval)
++SUBWORD_SYNC_OP (or,    , |, unsigned short, 2, oldval)
++SUBWORD_SYNC_OP (and,   , &, unsigned short, 2, oldval)
++SUBWORD_SYNC_OP (xor,   , ^, unsigned short, 2, oldval)
++SUBWORD_SYNC_OP (nand, ~, &, unsigned short, 2, oldval)
++
++SUBWORD_SYNC_OP (add,   , +, unsigned char, 1, oldval)
++SUBWORD_SYNC_OP (sub,   , -, unsigned char, 1, oldval)
++SUBWORD_SYNC_OP (or,    , |, unsigned char, 1, oldval)
++SUBWORD_SYNC_OP (and,   , &, unsigned char, 1, oldval)
++SUBWORD_SYNC_OP (xor,   , ^, unsigned char, 1, oldval)
++SUBWORD_SYNC_OP (nand, ~, &, unsigned char, 1, oldval)
++
++#define OP_AND_FETCH_WORD(OP, PFX_OP, INF_OP)				\
++  int HIDDEN								\
++  __sync_##OP##_and_fetch_4 (int *ptr, int val)				\
++  {									\
++    int tmp, failure;							\
++									\
++    do {								\
++      tmp = __atomic_load_n (ptr, __ATOMIC_SEQ_CST);			\
++      failure = __kernel_cmpxchg (tmp, PFX_OP (tmp INF_OP val), ptr);	\
++    } while (failure != 0);						\
++									\
++    return PFX_OP (tmp INF_OP val);					\
++  }
++
++OP_AND_FETCH_WORD (add,   , +)
++OP_AND_FETCH_WORD (sub,   , -)
++OP_AND_FETCH_WORD (or,    , |)
++OP_AND_FETCH_WORD (and,   , &)
++OP_AND_FETCH_WORD (xor,   , ^)
++OP_AND_FETCH_WORD (nand, ~, &)
++
++SUBWORD_SYNC_OP (add,   , +, unsigned short, 2, newval)
++SUBWORD_SYNC_OP (sub,   , -, unsigned short, 2, newval)
++SUBWORD_SYNC_OP (or,    , |, unsigned short, 2, newval)
++SUBWORD_SYNC_OP (and,   , &, unsigned short, 2, newval)
++SUBWORD_SYNC_OP (xor,   , ^, unsigned short, 2, newval)
++SUBWORD_SYNC_OP (nand, ~, &, unsigned short, 2, newval)
++
++SUBWORD_SYNC_OP (add,   , +, unsigned char, 1, newval)
++SUBWORD_SYNC_OP (sub,   , -, unsigned char, 1, newval)
++SUBWORD_SYNC_OP (or,    , |, unsigned char, 1, newval)
++SUBWORD_SYNC_OP (and,   , &, unsigned char, 1, newval)
++SUBWORD_SYNC_OP (xor,   , ^, unsigned char, 1, newval)
++SUBWORD_SYNC_OP (nand, ~, &, unsigned char, 1, newval)
++
++int HIDDEN
++__sync_val_compare_and_swap_4 (int *ptr, int oldval, int newval)
++{
++  int actual_oldval, fail;
++
++  while (1)
++    {
++      actual_oldval = __atomic_load_n (ptr, __ATOMIC_SEQ_CST);
++
++      if (oldval != actual_oldval)
++	return actual_oldval;
++
++      fail = __kernel_cmpxchg (actual_oldval, newval, ptr);
++
++      if (!fail)
++	return oldval;
++    }
++}
++
++#define SUBWORD_VAL_CAS(TYPE, WIDTH)					\
++  TYPE HIDDEN								\
++  __sync_val_compare_and_swap_##WIDTH (TYPE *ptr, TYPE oldval,		\
++				       TYPE newval)			\
++  {									\
++    int *wordptr = (int *)((unsigned long) ptr & ~3), fail;		\
++    unsigned int mask, shift, actual_oldval, actual_newval;		\
++									\
++    shift = (((unsigned long) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH;	\
++    mask = MASK_##WIDTH << shift;					\
++									\
++    while (1)								\
++      {									\
++	actual_oldval = __atomic_load_n (wordptr, __ATOMIC_SEQ_CST); 	\
++									\
++	if (((actual_oldval & mask) >> shift) != (unsigned int) oldval)	\
++	  return (actual_oldval & mask) >> shift;			\
++									\
++	actual_newval = (actual_oldval & ~mask)				\
++			| (((unsigned int) newval << shift) & mask);	\
++									\
++	fail = __kernel_cmpxchg (actual_oldval, actual_newval,		\
++				 wordptr);				\
++									\
++	if (!fail)							\
++	  return oldval;						\
++      }									\
++  }
++
++SUBWORD_VAL_CAS (unsigned short, 2)
++SUBWORD_VAL_CAS (unsigned char,  1)
++
++typedef unsigned char bool;
++
++bool HIDDEN
++__sync_bool_compare_and_swap_4 (int *ptr, int oldval, int newval)
++{
++  int failure = __kernel_cmpxchg (oldval, newval, ptr);
++  return (failure == 0);
++}
++
++#define SUBWORD_BOOL_CAS(TYPE, WIDTH)					\
++  bool HIDDEN								\
++  __sync_bool_compare_and_swap_##WIDTH (TYPE *ptr, TYPE oldval,		\
++					TYPE newval)			\
++  {									\
++    TYPE actual_oldval							\
++      = __sync_val_compare_and_swap_##WIDTH (ptr, oldval, newval);	\
++    return (oldval == actual_oldval);					\
++  }
++
++SUBWORD_BOOL_CAS (unsigned short, 2)
++SUBWORD_BOOL_CAS (unsigned char,  1)
++
++int HIDDEN
++__sync_lock_test_and_set_4 (int *ptr, int val)
++{
++  int failure, oldval;
++
++  do {
++    oldval = __atomic_load_n (ptr, __ATOMIC_SEQ_CST);
++    failure = __kernel_cmpxchg (oldval, val, ptr);
++  } while (failure != 0);
++
++  return oldval;
++}
++
++#define SUBWORD_TEST_AND_SET(TYPE, WIDTH)				\
++  TYPE HIDDEN								\
++  __sync_lock_test_and_set_##WIDTH (TYPE *ptr, TYPE val)		\
++  {									\
++    int failure;							\
++    unsigned int oldval, newval, shift, mask;				\
++    int *wordptr = (int *) ((unsigned long) ptr & ~3);			\
++									\
++    shift = (((unsigned long) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH;	\
++    mask = MASK_##WIDTH << shift;					\
++									\
++    do {								\
++      oldval = __atomic_load_n (wordptr, __ATOMIC_SEQ_CST);		\
++      newval = (oldval & ~mask)						\
++	       | (((unsigned int) val << shift) & mask);		\
++      failure = __kernel_cmpxchg (oldval, newval, wordptr);		\
++    } while (failure != 0);						\
++									\
++    return (oldval & mask) >> shift;					\
++  }
++
++SUBWORD_TEST_AND_SET (unsigned short, 2)
++SUBWORD_TEST_AND_SET (unsigned char,  1)
++
++#define SYNC_LOCK_RELEASE(TYPE, WIDTH)					\
++  void HIDDEN								\
++  __sync_lock_release_##WIDTH (TYPE *ptr)				\
++  {									\
++    /* All writes before this point must be seen before we release	\
++       the lock itself.  */						\
++    __builtin_nds32_msync_all ();					\
++    *ptr = 0;								\
++  }
++
++SYNC_LOCK_RELEASE (int,   4)
++SYNC_LOCK_RELEASE (short, 2)
++SYNC_LOCK_RELEASE (char,  1)
+diff --git a/libgcc/config/nds32/linux-unwind.h b/libgcc/config/nds32/linux-unwind.h
+new file mode 100644
+index 0000000..921edf9
+--- /dev/null
++++ b/libgcc/config/nds32/linux-unwind.h
+@@ -0,0 +1,156 @@
++/* DWARF2 EH unwinding support for NDS32 Linux signal frame.
++   Copyright (C) 2014-2015 Free Software Foundation, Inc.
++   Contributed by Andes Technology Corporation.
++
++   This file is part of GCC.
++
++   GCC is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published
++   by the Free Software Foundation; either version 3, or (at your
++   option) any later version.
++
++   GCC is distributed in the hope that it will be useful, but WITHOUT
++   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++   License for more details.
++
++   Under Section 7 of GPL version 3, you are granted additional
++   permissions described in the GCC Runtime Library Exception, version
++   3.1, as published by the Free Software Foundation.
++
++   You should have received a copy of the GNU General Public License and
++   a copy of the GCC Runtime Library Exception along with this program;
++   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++   <http://www.gnu.org/licenses/>.  */
++
++#ifndef inhibit_libc
++
++/* Do code reading to identify a signal frame, and set the frame
++   state data appropriately.  See unwind-dw2.c for the structs.
++   The corresponding bits in the Linux kernel are in
++   arch/nds32/kernel/signal.c.  */
++
++#include <signal.h>
++#include <asm/unistd.h>
++
++/* Exactly the same layout as the kernel structures, unique names.  */
++
++/* arch/nds32/kernel/signal.c */
++struct _sigframe {
++    struct ucontext uc;
++    unsigned long retcode;
++};
++
++struct _rt_sigframe {
++  siginfo_t info;
++  struct _sigframe sig;
++};
++#define SIGRETURN 0xeb0e0a64
++#define RT_SIGRETURN 0xab150a64
++
++#define MD_FALLBACK_FRAME_STATE_FOR nds32_fallback_frame_state
++
++/* This function is supposed to be invoked by uw_frame_state_for()
++   when there is no unwind data available.
++
++   Generally, given the _Unwind_Context CONTEXT for a stack frame,
++   we need to look up its caller and decode information into FS.
++   However, if the exception handling happens within a signal handler,
++   the return address of signal handler is a special module, which
++   contains signal return syscall and has no FDE in the .eh_frame section.
++   We need to implement MD_FALLBACK_FRAME_STATE_FOR so that we can
++   unwind through signal frames.  */
++static _Unwind_Reason_Code
++nds32_fallback_frame_state (struct _Unwind_Context *context,
++			    _Unwind_FrameState *fs)
++{
++  u_int32_t *pc = (u_int32_t *) context->ra;
++  struct sigcontext *sc_;
++  _Unwind_Ptr new_cfa;
++
++#ifdef __NDS32_EB__
++#error "Signal handler is not supported for force unwind."
++#endif
++
++  if ((_Unwind_Ptr) pc & 3)
++    return _URC_END_OF_STACK;
++
++  /* Check if we are going through a signal handler.
++     See arch/nds32/kernel/signal.c implementation.
++       SWI_SYS_SIGRETURN    -> (0xeb0e0a64)
++       SWI_SYS_RT_SIGRETURN -> (0xab150a64)
++     FIXME: Currently we only handle little endian (EL) case.  */
++  if (pc[0] == SIGRETURN)
++    {
++      /* Using '_sigfame' memory address to locate kernal's sigcontext.
++	 The sigcontext structures in arch/nds32/include/asm/sigcontext.h.  */
++      struct _sigframe *rt_;
++      rt_ = context->cfa;
++      sc_ = &rt_->uc.uc_mcontext;
++    }
++  else if (pc[0] == RT_SIGRETURN)
++    {
++      /* Using '_sigfame' memory address to locate kernal's sigcontext.  */
++      struct _rt_sigframe *rt_;
++      rt_ = context->cfa;
++      sc_ = &rt_->sig.uc.uc_mcontext;
++    }
++  else
++    return _URC_END_OF_STACK;
++
++  /* Update cfa from sigcontext.  */
++  new_cfa = (_Unwind_Ptr) sc_;
++  fs->regs.cfa_how = CFA_REG_OFFSET;
++  fs->regs.cfa_reg = STACK_POINTER_REGNUM;
++  fs->regs.cfa_offset = new_cfa - (_Unwind_Ptr) context->cfa;
++
++#define NDS32_PUT_FS_REG(NUM, NAME) \
++  (fs->regs.reg[NUM].how = REG_SAVED_OFFSET, \
++   fs->regs.reg[NUM].loc.offset = (_Unwind_Ptr) &(sc_->NAME) - new_cfa)
++
++  /* Restore all registers value.  */
++  NDS32_PUT_FS_REG (0, nds32_r0);
++  NDS32_PUT_FS_REG (1, nds32_r1);
++  NDS32_PUT_FS_REG (2, nds32_r2);
++  NDS32_PUT_FS_REG (3, nds32_r3);
++  NDS32_PUT_FS_REG (4, nds32_r4);
++  NDS32_PUT_FS_REG (5, nds32_r5);
++  NDS32_PUT_FS_REG (6, nds32_r6);
++  NDS32_PUT_FS_REG (7, nds32_r7);
++  NDS32_PUT_FS_REG (8, nds32_r8);
++  NDS32_PUT_FS_REG (9, nds32_r9);
++  NDS32_PUT_FS_REG (10, nds32_r10);
++  NDS32_PUT_FS_REG (11, nds32_r11);
++  NDS32_PUT_FS_REG (12, nds32_r12);
++  NDS32_PUT_FS_REG (13, nds32_r13);
++  NDS32_PUT_FS_REG (14, nds32_r14);
++  NDS32_PUT_FS_REG (15, nds32_r15);
++  NDS32_PUT_FS_REG (16, nds32_r16);
++  NDS32_PUT_FS_REG (17, nds32_r17);
++  NDS32_PUT_FS_REG (18, nds32_r18);
++  NDS32_PUT_FS_REG (19, nds32_r19);
++  NDS32_PUT_FS_REG (20, nds32_r20);
++  NDS32_PUT_FS_REG (21, nds32_r21);
++  NDS32_PUT_FS_REG (22, nds32_r22);
++  NDS32_PUT_FS_REG (23, nds32_r23);
++  NDS32_PUT_FS_REG (24, nds32_r24);
++  NDS32_PUT_FS_REG (25, nds32_r25);
++
++  NDS32_PUT_FS_REG (28, nds32_fp);
++  NDS32_PUT_FS_REG (29, nds32_gp);
++  NDS32_PUT_FS_REG (30, nds32_lp);
++  NDS32_PUT_FS_REG (31, nds32_sp);
++
++  /* Restore PC, point to trigger signal instruction.  */
++  NDS32_PUT_FS_REG (32, nds32_ipc);
++
++#undef NDS32_PUT_FS_REG
++
++  /* The retaddr is PC, use PC to find FDE.  */
++  fs->retaddr_column = 32;
++  fs->signal_frame = 1;
++
++  return _URC_NO_REASON;
++}
++
++#endif
+diff --git a/libgcc/config/nds32/sfp-machine.h b/libgcc/config/nds32/sfp-machine.h
+index d822898..930a32e 100644
+--- a/libgcc/config/nds32/sfp-machine.h
++++ b/libgcc/config/nds32/sfp-machine.h
+@@ -76,6 +76,25 @@ typedef int __gcc_CMPtype __attribute__ ((mode (__libgcc_cmp_return__)));
+     R##_c = FP_CLS_NAN;						\
+   } while (0)
+ 
++#ifdef NDS32_ABI_2FP_PLUS
++#define FP_RND_NEAREST		0x0
++#define FP_RND_PINF		0x1
++#define FP_RND_MINF		0x2
++#define FP_RND_ZERO		0x3
++#define FP_RND_MASK		0x3
++
++#define _FP_DECL_EX \
++  unsigned long int _fcsr __attribute__ ((unused)) = FP_RND_NEAREST
++
++#define FP_INIT_ROUNDMODE			\
++  do {						\
++    _fcsr = __builtin_nds32_fmfcsr ();		\
++  } while (0)
++
++#define FP_ROUNDMODE (_fcsr & FP_RND_MASK)
++
++#endif
++
+ /* Not checked.  */
+ #define _FP_TININESS_AFTER_ROUNDING 0
+ 
+diff --git a/libgcc/config/nds32/t-nds32 b/libgcc/config/nds32/t-nds32
+index 20c8a3f..4e58b1b 100644
+--- a/libgcc/config/nds32/t-nds32
++++ b/libgcc/config/nds32/t-nds32
+@@ -26,33 +26,22 @@
+ #   Make sure the linker script include these two objects
+ #   for building .ctors/.dtors sections.
+ 
+-# Use -DCRT_BEGIN to create beginning parts of .init and .fini content
+-# Make sure you are building crtbegin1.o with -O0 optimization,
+-# otherwise the static function will be optimized out
++# Use -DCRT_BEGIN to create beginning parts of .init and .fini content.
+ crtbegin1.o: $(srcdir)/config/nds32/initfini.c $(GCC_PASSES) $(CONFIG_H)
+ 	$(GCC_FOR_TARGET) $(INCLUDES) \
+ 	$(CFLAGS) \
+ 	-DCRT_BEGIN \
+ 	-finhibit-size-directive -fno-inline-functions \
+-	-O0 -c $(srcdir)/config/nds32/initfini.c -o crtbegin1.o
++	-fno-toplevel-reorder \
++	-Os -c $(srcdir)/config/nds32/initfini.c -o crtbegin1.o
+ 
+-# Use -DCRT_END to create ending parts of .init and .fini content
+-# Make sure you are building crtend1.o with -O0 optimization,
+-# otherwise the static function will be optimized out
++# Use -DCRT_END to create ending parts of .init and .fini content.
+ crtend1.o: $(srcdir)/config/nds32/initfini.c $(GCC_PASSES) $(CONFIG_H)
+ 	$(GCC_FOR_TARGET) $(INCLUDES) \
+ 	$(CFLAGS) \
+ 	-DCRT_END \
+ 	-finhibit-size-directive -fno-inline-functions \
+-	-O0 -c $(srcdir)/config/nds32/initfini.c -o crtend1.o
+-
+-# Use this rule if and only if your crt0.o does not come from library
+-# Also, be sure to add 'crtzero.o' in extra_parts in libgcc/config.host
+-# and change STARTFILE_SPEC in nds32.h
+-#
+-#crtzero.o: $(srcdir)/config/nds32/crtzero.S $(GCC_PASSES) $(CONFIG_H)
+-#	$(GCC_FOR_TARGET) $(INCLUDES) \
+-#	-c $(srcdir)/config/nds32/crtzero.S -o crtzero.o
+-
++	-fno-toplevel-reorder \
++	-Os -c $(srcdir)/config/nds32/initfini.c -o crtend1.o
+ 
+ # ------------------------------------------------------------------------
+diff --git a/libgcc/config/nds32/t-nds32-mculib b/libgcc/config/nds32/t-nds32-glibc
+similarity index 50%
+rename from libgcc/config/nds32/t-nds32-mculib
+rename to libgcc/config/nds32/t-nds32-glibc
+index b4f7b4c..385644b 100644
+--- a/libgcc/config/nds32/t-nds32-mculib
++++ b/libgcc/config/nds32/t-nds32-glibc
+@@ -1,4 +1,4 @@
+-# Rules of mculib library makefile of Andes NDS32 cpu for GNU compiler
++# Rules of glibc library makefile of Andes NDS32 cpu for GNU compiler
+ # Copyright (C) 2012-2016 Free Software Foundation, Inc.
+ # Contributed by Andes Technology Corporation.
+ #
+@@ -19,59 +19,16 @@
+ # <http://www.gnu.org/licenses/>.
+ 
+ # Compiler flags to use when compiling 'libgcc2.c'
+-HOST_LIBGCC2_CFLAGS = -Os
++HOST_LIBGCC2_CFLAGS = -O2 -fPIC -fwrapv
++LIB2ADD += $(srcdir)/config/nds32/linux-atomic.c
+ 
+-
+-LIB1ASMSRC   = nds32/lib1asmsrc-mculib.S
+-
+-LIB1ASMFUNCS =   \
+-	_addsub_sf   \
+-	_sf_to_si    \
+-	_divsi3      \
+-	_divdi3      \
+-	_modsi3      \
+-	_moddi3      \
+-	_mulsi3      \
+-	_udivsi3     \
+-	_udivdi3     \
+-	_udivmoddi4  \
+-	_umodsi3     \
+-	_umoddi3     \
+-	_muldi3      \
+-	_addsub_df   \
+-	_mul_sf      \
+-	_mul_df      \
+-	_div_sf      \
+-	_div_df      \
+-	_negate_sf   \
+-	_negate_df   \
+-	_sf_to_df    \
+-	_df_to_sf    \
+-	_df_to_si    \
+-	_fixsfdi     \
+-	_fixdfdi     \
+-	_fixunssfsi  \
+-	_fixunsdfsi  \
+-	_fixunssfdi  \
+-	_fixunsdfdi  \
+-	_si_to_sf    \
+-	_si_to_df    \
+-	_floatdisf   \
+-	_floatdidf   \
+-	_floatunsisf \
+-	_floatunsidf \
+-	_floatundisf \
+-	_floatundidf \
+-	_compare_sf  \
+-	_compare_df  \
+-	_unord_sf    \
+-	_unord_df
++#LIB1ASMSRC   = nds32/lib1asmsrc-newlib.S
++#LIB1ASMFUNCS = _divsi3 _modsi3 _udivsi3 _umodsi3
+ 
+ # List of functions not to build from libgcc2.c.
+-LIB2FUNCS_EXCLUDE = _clzsi2 _clzdi2
++#LIB2FUNCS_EXCLUDE = _clzsi2
+ 
+ # List of extra C and assembler files(*.S) to add to static libgcc2.
+-LIB2ADD_ST += $(srcdir)/config/nds32/lib2csrc-mculib/_clzsi2.c
+-LIB2ADD_ST += $(srcdir)/config/nds32/lib2csrc-mculib/_clzdi2.c
++#LIB2ADD_ST += $(srcdir)/config/nds32/lib2csrc-newlib/_clzsi2.c
+ 
+ # ------------------------------------------------------------------------
+diff --git a/libgcc/config/nds32/t-nds32-isr b/libgcc/config/nds32/t-nds32-isr
+index 62b6867..6493838 100644
+--- a/libgcc/config/nds32/t-nds32-isr
++++ b/libgcc/config/nds32/t-nds32-isr
+@@ -23,11 +23,15 @@
+ # Makfile fragment rules for libnds32_isr.a to support ISR attribute extension
+ ###############################################################################
+ 
+-# basic flags setting
+-ISR_CFLAGS = $(CFLAGS) -c
+-
+-# the object files we would like to create
+-LIBNDS32_ISR_16B_OBJS = \
++# Basic flags setting.
++ifneq ($(filter -mext-dsp,$(CFLAGS)),)
++ISR_CFLAGS = $(CFLAGS) -mno-force-no-ext-zol -mext-zol -c
++else
++ISR_CFLAGS = $(CFLAGS) -mno-force-no-ext-zol -c
++endif
++
++# The object files we would like to create.
++LIBNDS32_ISR_VEC_OBJS = \
+ 		vec_vid00.o vec_vid01.o vec_vid02.o vec_vid03.o \
+ 		vec_vid04.o vec_vid05.o vec_vid06.o vec_vid07.o \
+ 		vec_vid08.o vec_vid09.o vec_vid10.o vec_vid11.o \
+@@ -46,40 +50,9 @@ LIBNDS32_ISR_16B_OBJS = \
+ 		vec_vid60.o vec_vid61.o vec_vid62.o vec_vid63.o \
+ 		vec_vid64.o vec_vid65.o vec_vid66.o vec_vid67.o \
+ 		vec_vid68.o vec_vid69.o vec_vid70.o vec_vid71.o \
+-		vec_vid72.o \
+-		excp_isr_ps_nn.o excp_isr_ps_ns.o excp_isr_ps_nr.o \
+-		excp_isr_sa_nn.o excp_isr_sa_ns.o excp_isr_sa_nr.o \
+-		intr_isr_ps_nn.o intr_isr_ps_ns.o intr_isr_ps_nr.o \
+-		intr_isr_sa_nn.o intr_isr_sa_ns.o intr_isr_sa_nr.o \
+-		reset.o
+-
+-LIBNDS32_ISR_4B_OBJS = \
+-		vec_vid00_4b.o vec_vid01_4b.o vec_vid02_4b.o vec_vid03_4b.o \
+-		vec_vid04_4b.o vec_vid05_4b.o vec_vid06_4b.o vec_vid07_4b.o \
+-		vec_vid08_4b.o vec_vid09_4b.o vec_vid10_4b.o vec_vid11_4b.o \
+-		vec_vid12_4b.o vec_vid13_4b.o vec_vid14_4b.o vec_vid15_4b.o \
+-		vec_vid16_4b.o vec_vid17_4b.o vec_vid18_4b.o vec_vid19_4b.o \
+-		vec_vid20_4b.o vec_vid21_4b.o vec_vid22_4b.o vec_vid23_4b.o \
+-		vec_vid24_4b.o vec_vid25_4b.o vec_vid26_4b.o vec_vid27_4b.o \
+-		vec_vid28_4b.o vec_vid29_4b.o vec_vid30_4b.o vec_vid31_4b.o \
+-		vec_vid32_4b.o vec_vid33_4b.o vec_vid34_4b.o vec_vid35_4b.o \
+-		vec_vid36_4b.o vec_vid37_4b.o vec_vid38_4b.o vec_vid39_4b.o \
+-		vec_vid40_4b.o vec_vid41_4b.o vec_vid42_4b.o vec_vid43_4b.o \
+-		vec_vid44_4b.o vec_vid45_4b.o vec_vid46_4b.o vec_vid47_4b.o \
+-		vec_vid48_4b.o vec_vid49_4b.o vec_vid50_4b.o vec_vid51_4b.o \
+-		vec_vid52_4b.o vec_vid53_4b.o vec_vid54_4b.o vec_vid55_4b.o \
+-		vec_vid56_4b.o vec_vid57_4b.o vec_vid58_4b.o vec_vid59_4b.o \
+-		vec_vid60_4b.o vec_vid61_4b.o vec_vid62_4b.o vec_vid63_4b.o \
+-		vec_vid64_4b.o vec_vid65_4b.o vec_vid66_4b.o vec_vid67_4b.o \
+-		vec_vid68_4b.o vec_vid69_4b.o vec_vid70_4b.o vec_vid71_4b.o \
+-		vec_vid72_4b.o \
+-		excp_isr_ps_nn_4b.o excp_isr_ps_ns_4b.o excp_isr_ps_nr_4b.o \
+-		excp_isr_sa_nn_4b.o excp_isr_sa_ns_4b.o excp_isr_sa_nr_4b.o \
+-		intr_isr_ps_nn_4b.o intr_isr_ps_ns_4b.o intr_isr_ps_nr_4b.o \
+-		intr_isr_sa_nn_4b.o intr_isr_sa_ns_4b.o intr_isr_sa_nr_4b.o \
+-		reset_4b.o
++		vec_vid72.o
+ 
+-LIBNDS32_ISR_COMMON_OBJS = \
++LIBNDS32_ISR_JMP_OBJS = \
+ 		jmptbl_vid00.o jmptbl_vid01.o jmptbl_vid02.o jmptbl_vid03.o \
+ 		jmptbl_vid04.o jmptbl_vid05.o jmptbl_vid06.o jmptbl_vid07.o \
+ 		jmptbl_vid08.o jmptbl_vid09.o jmptbl_vid10.o jmptbl_vid11.o \
+@@ -98,29 +71,32 @@ LIBNDS32_ISR_COMMON_OBJS = \
+ 		jmptbl_vid60.o jmptbl_vid61.o jmptbl_vid62.o jmptbl_vid63.o \
+ 		jmptbl_vid64.o jmptbl_vid65.o jmptbl_vid66.o jmptbl_vid67.o \
+ 		jmptbl_vid68.o jmptbl_vid69.o jmptbl_vid70.o jmptbl_vid71.o \
+-		jmptbl_vid72.o \
++		jmptbl_vid72.o
++
++LIBNDS32_ISR_COMMON_OBJS = \
++		excp_isr_ps_nn.o excp_isr_ps_ns.o excp_isr_ps_nr.o \
++		excp_isr_sa_nn.o excp_isr_sa_ns.o excp_isr_sa_nr.o \
++		intr_isr_ps_nn.o intr_isr_ps_ns.o intr_isr_ps_nr.o \
++		intr_isr_sa_nn.o intr_isr_sa_ns.o intr_isr_sa_nr.o \
++		reset.o \
+ 		nmih.o \
+ 		wrh.o
+ 
+-LIBNDS32_ISR_COMPLETE_OBJS = $(LIBNDS32_ISR_16B_OBJS) $(LIBNDS32_ISR_4B_OBJS) $(LIBNDS32_ISR_COMMON_OBJS)
+-
++LIBNDS32_ISR_COMPLETE_OBJS = $(LIBNDS32_ISR_VEC_OBJS) $(LIBNDS32_ISR_JMP_OBJS) $(LIBNDS32_ISR_COMMON_OBJS)
+ 
+-# Build common objects for ISR library
+-nmih.o: $(srcdir)/config/nds32/isr-library/nmih.S
+-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/nmih.S -o nmih.o
+ 
+-wrh.o: $(srcdir)/config/nds32/isr-library/wrh.S
+-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/wrh.S -o wrh.o
+ 
+-jmptbl_vid%.o: $(srcdir)/config/nds32/isr-library/jmptbl_vid%.S
++# Build vector vid objects for ISR library.
++vec_vid%.o: $(srcdir)/config/nds32/isr-library/vec_vid%.S
+ 	$(GCC_FOR_TARGET) $(ISR_CFLAGS) $< -o $@
+ 
+ 
+-
+-# Build 16b version objects for ISR library. (no "_4b" postfix string)
+-vec_vid%.o: $(srcdir)/config/nds32/isr-library/vec_vid%.S
++# Build jump table objects for ISR library.
++jmptbl_vid%.o: $(srcdir)/config/nds32/isr-library/jmptbl_vid%.S
+ 	$(GCC_FOR_TARGET) $(ISR_CFLAGS) $< -o $@
+ 
++
++# Build commen objects for ISR library.
+ excp_isr_ps_nn.o: $(srcdir)/config/nds32/isr-library/excp_isr.S
+ 	$(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/excp_isr.S -o excp_isr_ps_nn.o
+ 
+@@ -160,48 +136,12 @@ intr_isr_sa_nr.o: $(srcdir)/config/nds32/isr-library/intr_isr.S
+ reset.o: $(srcdir)/config/nds32/isr-library/reset.S
+ 	$(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/reset.S -o reset.o
+ 
+-# Build 4b version objects for ISR library.
+-vec_vid%_4b.o: $(srcdir)/config/nds32/isr-library/vec_vid%_4b.S
+-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) $< -o $@
+-
+-excp_isr_ps_nn_4b.o: $(srcdir)/config/nds32/isr-library/excp_isr_4b.S
+-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/excp_isr_4b.S -o excp_isr_ps_nn_4b.o
+-
+-excp_isr_ps_ns_4b.o: $(srcdir)/config/nds32/isr-library/excp_isr_4b.S
+-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_NESTED $(srcdir)/config/nds32/isr-library/excp_isr_4b.S -o excp_isr_ps_ns_4b.o
+-
+-excp_isr_ps_nr_4b.o: $(srcdir)/config/nds32/isr-library/excp_isr_4b.S
+-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_NESTED_READY $(srcdir)/config/nds32/isr-library/excp_isr_4b.S -o excp_isr_ps_nr_4b.o
+-
+-excp_isr_sa_nn_4b.o: $(srcdir)/config/nds32/isr-library/excp_isr_4b.S
+-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_SAVE_ALL_REGS $(srcdir)/config/nds32/isr-library/excp_isr_4b.S -o excp_isr_sa_nn_4b.o
+-
+-excp_isr_sa_ns_4b.o: $(srcdir)/config/nds32/isr-library/excp_isr_4b.S
+-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_SAVE_ALL_REGS -DNDS32_NESTED $(srcdir)/config/nds32/isr-library/excp_isr_4b.S -o excp_isr_sa_ns_4b.o
+-
+-excp_isr_sa_nr_4b.o: $(srcdir)/config/nds32/isr-library/excp_isr_4b.S
+-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_SAVE_ALL_REGS -DNDS32_NESTED_READY $(srcdir)/config/nds32/isr-library/excp_isr_4b.S -o excp_isr_sa_nr_4b.o
+-
+-intr_isr_ps_nn_4b.o: $(srcdir)/config/nds32/isr-library/intr_isr_4b.S
+-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/intr_isr_4b.S -o intr_isr_ps_nn_4b.o
+-
+-intr_isr_ps_ns_4b.o: $(srcdir)/config/nds32/isr-library/intr_isr_4b.S
+-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_NESTED $(srcdir)/config/nds32/isr-library/intr_isr_4b.S -o intr_isr_ps_ns_4b.o
+-
+-intr_isr_ps_nr_4b.o: $(srcdir)/config/nds32/isr-library/intr_isr_4b.S
+-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_NESTED_READY $(srcdir)/config/nds32/isr-library/intr_isr_4b.S -o intr_isr_ps_nr_4b.o
+-
+-intr_isr_sa_nn_4b.o: $(srcdir)/config/nds32/isr-library/intr_isr_4b.S
+-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_SAVE_ALL_REGS $(srcdir)/config/nds32/isr-library/intr_isr_4b.S -o intr_isr_sa_nn_4b.o
+-
+-intr_isr_sa_ns_4b.o: $(srcdir)/config/nds32/isr-library/intr_isr_4b.S
+-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_SAVE_ALL_REGS -DNDS32_NESTED $(srcdir)/config/nds32/isr-library/intr_isr_4b.S -o intr_isr_sa_ns_4b.o
++nmih.o: $(srcdir)/config/nds32/isr-library/nmih.S
++	$(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/nmih.S -o nmih.o
+ 
+-intr_isr_sa_nr_4b.o: $(srcdir)/config/nds32/isr-library/intr_isr_4b.S
+-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_SAVE_ALL_REGS -DNDS32_NESTED_READY $(srcdir)/config/nds32/isr-library/intr_isr_4b.S -o intr_isr_sa_nr_4b.o
++wrh.o: $(srcdir)/config/nds32/isr-library/wrh.S
++	$(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/wrh.S -o wrh.o
+ 
+-reset_4b.o: $(srcdir)/config/nds32/isr-library/reset_4b.S
+-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/reset_4b.S -o reset_4b.o
+ 
+ 
+ # The rule to create libnds32_isr.a file
+diff --git a/libgcc/config/nds32/t-nds32-newlib b/libgcc/config/nds32/t-nds32-newlib
+index e4af03e..c356b60 100644
+--- a/libgcc/config/nds32/t-nds32-newlib
++++ b/libgcc/config/nds32/t-nds32-newlib
+@@ -19,7 +19,7 @@
+ # <http://www.gnu.org/licenses/>.
+ 
+ # Compiler flags to use when compiling 'libgcc2.c'
+-HOST_LIBGCC2_CFLAGS = -O2
++HOST_LIBGCC2_CFLAGS = -O2 -fwrapv
+ 
+ 
+ #LIB1ASMSRC   = nds32/lib1asmsrc-newlib.S
diff --git a/util/crossgcc/patches/gcc-6.3.0_riscv.patch b/util/crossgcc/patches/gcc-6.3.0_riscv.patch
index ca9555de0b..a60511362a 100644
--- a/util/crossgcc/patches/gcc-6.3.0_riscv.patch
+++ b/util/crossgcc/patches/gcc-6.3.0_riscv.patch
@@ -9030,9 +9030,9 @@ index c9e43fb80e3..5359a4e6ee5 100755
  # version to the per-target configury.
  case "$cpu_type" in
    aarch64 | alpha | arm | avr | bfin | cris | i386 | m32c | m68k | microblaze \
--  | mips | nios2 | pa | rs6000 | score | sparc | spu | tilegx | tilepro \
+-  | mips | nds32 | nios2 | pa | rs6000 | score | sparc | spu | tilegx | tilepro \
 -  | visium | xstormy16 | xtensa)
-+  | mips | nios2 | pa | riscv | rs6000 | score | sparc | spu | tilegx \
++  | mips | nds32 | nios2 | pa | riscv | rs6000 | score | sparc | spu | tilegx \
 +  | tilepro | visium | xstormy16 | xtensa)
      insn="nop"
      ;;
@@ -9063,9 +9063,9 @@ index 33f9a0ecdc6..673fb1bb891 100644
  # version to the per-target configury.
  case "$cpu_type" in
    aarch64 | alpha | arm | avr | bfin | cris | i386 | m32c | m68k | microblaze \
--  | mips | nios2 | pa | rs6000 | score | sparc | spu | tilegx | tilepro \
+-  | mips | nds32 | nios2 | pa | rs6000 | score | sparc | spu | tilegx | tilepro \
 -  | visium | xstormy16 | xtensa)
-+  | mips | nios2 | pa | riscv | rs6000 | score | sparc | spu | tilegx \
++  | mips | nds32 | nios2 | pa | riscv | rs6000 | score | sparc | spu | tilegx \
 +  | tilepro | visium | xstormy16 | xtensa)
      insn="nop"
      ;;
author	Stefan Reinauer <stefan.reinauer@coreboot.org>	2017-08-07 15:27:15 -0700
committer	Patrick Georgi <pgeorgi@google.com>	2017-09-01 12:32:36 +0000
commit	f3e23a313558b1e9e913878d7a638ff32321a4b3 (patch)
tree	12064c039d78bcb9e7f4bab4c986d533a7659b81 /util/crossgcc/patches
parent	d37ebddfd84699464d076642f35fce0ef21cd1d5 (diff)
download	coreboot-f3e23a313558b1e9e913878d7a638ff32321a4b3.tar.xz