diff options
author | Stefan Reinauer <stefan.reinauer@coreboot.org> | 2017-08-07 15:27:15 -0700 |
---|---|---|
committer | Patrick Georgi <pgeorgi@google.com> | 2017-09-01 12:32:36 +0000 |
commit | f3e23a313558b1e9e913878d7a638ff32321a4b3 (patch) | |
tree | 12064c039d78bcb9e7f4bab4c986d533a7659b81 /util/crossgcc | |
parent | d37ebddfd84699464d076642f35fce0ef21cd1d5 (diff) | |
download | coreboot-f3e23a313558b1e9e913878d7a638ff32321a4b3.tar.xz |
buildgcc: Integrate nds32 update from Andes Technology
This patch has been provided by Mentor Chih-Chyang Chang
on behalf of Andes Technology. It fixes using the coreboot
toolchain to compile the Chrome EC code base on the ITE8320
embedded controller.
The new patch incorporates a fix for the issue previously
fixed by patches/gcc-6.3.0_nds32.patch, so that patch can
be removed.
patches/gcc-6.3.0_riscv.patch needs to be slightly adjusted
to still apply cleanly (configure scripts only).
Change-Id: I0033888360f13ba951b692b3242aab6697ca61b3
Signed-off-by: Stefan Reinauer <stefan.reinauer@coreboot.org>
Reviewed-on: https://review.coreboot.org/20901
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
Reviewed-by: Patrick Georgi <pgeorgi@google.com>
Diffstat (limited to 'util/crossgcc')
-rwxr-xr-x | util/crossgcc/buildgcc | 4 | ||||
-rw-r--r-- | util/crossgcc/patches/gcc-6.3.0_nds32.patch | 17 | ||||
-rw-r--r-- | util/crossgcc/patches/gcc-6.3.0_nds32_ite.patch | 73397 | ||||
-rw-r--r-- | util/crossgcc/patches/gcc-6.3.0_riscv.patch | 8 |
4 files changed, 73403 insertions, 23 deletions
diff --git a/util/crossgcc/buildgcc b/util/crossgcc/buildgcc index dcfb9a6b45..9cbdfc6001 100755 --- a/util/crossgcc/buildgcc +++ b/util/crossgcc/buildgcc @@ -18,8 +18,8 @@ cd $(dirname $0) -CROSSGCC_DATE="July 27th, 2017" -CROSSGCC_VERSION="1.46" +CROSSGCC_DATE="August 16th, 2017" +CROSSGCC_VERSION="1.47" CROSSGCC_COMMIT=$( git describe ) # default settings diff --git a/util/crossgcc/patches/gcc-6.3.0_nds32.patch b/util/crossgcc/patches/gcc-6.3.0_nds32.patch deleted file mode 100644 index cdfb02f351..0000000000 --- a/util/crossgcc/patches/gcc-6.3.0_nds32.patch +++ /dev/null @@ -1,17 +0,0 @@ -diff -urN gcc-6.1.0.orig/gcc/config/nds32/nds32.md gcc-6.1.0/gcc/config/nds32/nds32.md ---- gcc-6.1.0.orig/gcc/config/nds32/nds32.md 2015-01-15 22:45:09.000000000 -0800 -+++ gcc-6.1.0/gcc/config/nds32/nds32.md 2016-04-14 22:09:09.000000000 -0700 -@@ -2289,11 +2289,11 @@ - emit_jump_insn (gen_cbranchsi4 (test, operands[0], operands[2], - operands[4])); - -- operands[5] = gen_reg_rtx (SImode); -+ rtx tmp = gen_reg_rtx (SImode); - /* Step C, D, E, and F, using another temporary register operands[5]. */ - emit_jump_insn (gen_casesi_internal (operands[0], - operands[3], -- operands[5])); -+ tmp)); - DONE; - }) - diff --git a/util/crossgcc/patches/gcc-6.3.0_nds32_ite.patch b/util/crossgcc/patches/gcc-6.3.0_nds32_ite.patch new file mode 100644 index 0000000000..50e39691b6 --- /dev/null +++ b/util/crossgcc/patches/gcc-6.3.0_nds32_ite.patch @@ -0,0 +1,73397 @@ +diff --git a/gcc/common.opt b/gcc/common.opt +index 67048db..e6f8fd3 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -1281,7 +1281,7 @@ ffast-math + Common + + ffat-lto-objects +-Common Var(flag_fat_lto_objects) ++Common Var(flag_fat_lto_objects) Init(1) + Output lto objects containing both the intermediate language and binary output. + + ffinite-math-only +diff --git a/gcc/common/config/nds32/nds32-common.c b/gcc/common/config/nds32/nds32-common.c +index fb75956..66ea95c 100644 +--- a/gcc/common/config/nds32/nds32-common.c ++++ b/gcc/common/config/nds32/nds32-common.c +@@ -53,6 +53,16 @@ nds32_handle_option (struct gcc_options *opts ATTRIBUTE_UNUSED, + + return true; + ++ case OPT_misr_secure_: ++ /* Check the valid security level: 0 1 2 3. */ ++ if (value < 0 || value > 3) ++ { ++ error_at (loc, "for the option -misr-secure=X, the valid X " ++ "must be: 0, 1, 2, or 3"); ++ return false; ++ } ++ return true; ++ + case OPT_mcache_block_size_: + /* Check valid value: 4 8 16 32 64 128 256 512. */ + if (exact_log2 (value) < 2 || exact_log2 (value) > 9) +@@ -74,15 +84,69 @@ nds32_handle_option (struct gcc_options *opts ATTRIBUTE_UNUSED, + /* Implement TARGET_OPTION_OPTIMIZATION_TABLE. */ + static const struct default_options nds32_option_optimization_table[] = + { +- /* Enable -fomit-frame-pointer by default at -O1 or higher. */ +- { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 }, ++#ifdef TARGET_DEFAULT_NO_MATH_ERRNO ++ /* Under some configuration, we would like to use -fno-math-errno by default ++ at all optimization levels for performance and code size consideration. ++ Please check gcc/config.gcc for more implementation details. */ ++ { OPT_LEVELS_ALL, OPT_fmath_errno, NULL, 0 }, ++#endif ++#if TARGET_LINUX_ABI == 0 ++ /* Disable -fdelete-null-pointer-checks by default in ELF toolchain. */ ++ { OPT_LEVELS_ALL, OPT_fdelete_null_pointer_checks, ++ NULL, 0 }, ++#endif ++ /* Enable -fsched-pressure by default at -O1 and above. */ ++ { OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 }, ++ /* Enable -fomit-frame-pointer by default at all optimization levels. */ ++ { OPT_LEVELS_ALL, OPT_fomit_frame_pointer, NULL, 1 }, ++ /* Enable -mrelax-hint by default at all optimization levels. */ ++ { OPT_LEVELS_ALL, OPT_mrelax_hint, NULL, 1 }, ++ /* Enable -mabi-compatible by default at all optimization levels. */ ++ { OPT_LEVELS_ALL, OPT_mabi_compatible, NULL, 1 }, ++ /* Enalbe -malways-align by default at -O1 and above, but not -Os or -Og. */ ++ { OPT_LEVELS_1_PLUS_SPEED_ONLY, OPT_malways_align, NULL, 1 }, + /* Enable -mv3push by default at -Os, but it is useless under V2 ISA. */ +- { OPT_LEVELS_SIZE, OPT_mv3push, NULL, 1 }, +- +- { OPT_LEVELS_NONE, 0, NULL, 0 } ++ { OPT_LEVELS_SIZE, OPT_mv3push, NULL, 1 }, ++ /* Enable -mload-store-opt by default at -Os. */ ++ { OPT_LEVELS_SIZE, OPT_mload_store_opt, NULL, 1 }, ++ /* Enable -mregrename by default at -O1 and above. */ ++ { OPT_LEVELS_1_PLUS, OPT_mregrename, NULL, 1 }, ++ /* Enable -mgcse by default at -O1 and above. */ ++ { OPT_LEVELS_1_PLUS, OPT_mgcse, NULL, 1 }, ++ /* Enable -msign-conversion by default at -O1 and above. */ ++ { OPT_LEVELS_1_PLUS, OPT_msign_conversion, NULL, 1 }, ++ /* Enable -mscalbn-transform by default at -O1 and above. */ ++ { OPT_LEVELS_1_PLUS, OPT_mscalbn_transform, NULL, 1 }, ++ /* Enable -mconst_remeterialization by default at -O1 and above. */ ++ { OPT_LEVELS_1_PLUS, OPT_mconst_remater, NULL, 1 }, ++ /* Enable -mcprop-acc by default at -O1 and above. */ ++ { OPT_LEVELS_1_PLUS, OPT_mcprop_acc, NULL, 1 }, ++#ifdef TARGET_OS_DEFAULT_IFC ++ /* Enable -mifc by default at -Os, but it is useless under V2/V3M ISA. */ ++ { OPT_LEVELS_SIZE, OPT_mifc, NULL, 1 }, ++#endif ++#ifdef TARGET_OS_DEFAULT_EX9 ++ /* Enable -mex9 by default at -Os, but it is useless under V2/V3M ISA. */ ++ { OPT_LEVELS_SIZE, OPT_mex9, NULL, 1 }, ++#endif ++ ++ { OPT_LEVELS_NONE, 0, NULL, 0 } + }; + + /* ------------------------------------------------------------------------ */ ++ ++/* Implement TARGET_EXCEPT_UNWIND_INFO. */ ++static enum unwind_info_type ++nds32_except_unwind_info (struct gcc_options *opts ATTRIBUTE_UNUSED) ++{ ++ if (TARGET_LINUX_ABI) ++ return UI_DWARF2; ++ ++ return UI_SJLJ; ++} ++ ++/* ------------------------------------------------------------------------ */ ++ + + /* Run-time Target Specification. */ + +@@ -95,14 +159,22 @@ static const struct default_options nds32_option_optimization_table[] = + + Other MASK_XXX flags are set individually. + By default we enable +- TARGET_16_BIT : Generate 16/32 bit mixed length instruction. +- TARGET_PERF_EXT : Generate performance extention instrcution. +- TARGET_CMOV : Generate conditional move instruction. */ ++ TARGET_16_BIT : Generate 16/32 bit mixed length instruction. ++ TARGET_EXT_PERF : Generate performance extention instrcution. ++ TARGET_EXT_PERF2 : Generate performance extention version 2 instrcution. ++ TARGET_EXT_STRING : Generate string extention instrcution. ++ TARGET_HW_ABS : Generate hardware abs instruction. ++ TARGET_CMOV : Generate conditional move instruction. */ + #undef TARGET_DEFAULT_TARGET_FLAGS + #define TARGET_DEFAULT_TARGET_FLAGS \ + (TARGET_CPU_DEFAULT \ ++ | TARGET_DEFAULT_FPU_ISA \ ++ | TARGET_DEFAULT_FPU_FMA \ + | MASK_16_BIT \ +- | MASK_PERF_EXT \ ++ | MASK_EXT_PERF \ ++ | MASK_EXT_PERF2 \ ++ | MASK_EXT_STRING \ ++ | MASK_HW_ABS \ + | MASK_CMOV) + + #undef TARGET_HANDLE_OPTION +@@ -115,7 +187,7 @@ static const struct default_options nds32_option_optimization_table[] = + /* Defining the Output Assembler Language. */ + + #undef TARGET_EXCEPT_UNWIND_INFO +-#define TARGET_EXCEPT_UNWIND_INFO sjlj_except_unwind_info ++#define TARGET_EXCEPT_UNWIND_INFO nds32_except_unwind_info + + /* ------------------------------------------------------------------------ */ + +diff --git a/gcc/config.gcc b/gcc/config.gcc +index 1d5b23f..367a821 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -433,8 +433,28 @@ mips*-*-*) + ;; + nds32*) + cpu_type=nds32 +- extra_headers="nds32_intrinsic.h" +- extra_objs="nds32-cost.o nds32-intrinsic.o nds32-isr.o nds32-md-auxiliary.o nds32-pipelines-auxiliary.o nds32-predicates.o nds32-memory-manipulation.o nds32-fp-as-gp.o" ++ extra_headers="nds32_intrinsic.h nds32_isr.h nds32_init.inc" ++ case ${target} in ++ nds32*-*-linux*) ++ extra_options="${extra_options} nds32/nds32-linux.opt" ++ ;; ++ nds32*-*-elf*) ++ extra_options="${extra_options} nds32/nds32-elf.opt" ++ ;; ++ *) ++ ;; ++ esac ++ extra_options="${extra_options} g.opt" ++ extra_objs="nds32-cost.o nds32-intrinsic.o nds32-md-auxiliary.o \ ++ nds32-pipelines-auxiliary.o nds32-predicates.o \ ++ nds32-memory-manipulation.o nds32-fp-as-gp.o \ ++ nds32-load-store-opt.o nds32-soft-fp-comm.o nds32-isr.o \ ++ nds32-regrename.o nds32-gcse.o nds32-relax-opt.o \ ++ nds32-sign-conversion.o \ ++ nds32-scalbn-transform.o nds32-lmwsmw.o \ ++ nds32-reg-utils.o nds32-const-remater.o \ ++ nds32-utils.o nds32-abi-compatible.o \ ++ nds32-cprop-acc.o" + ;; + nios2-*-*) + cpu_type=nios2 +@@ -2265,17 +2285,67 @@ msp430*-*-*) + tmake_file="${tmake_file} msp430/t-msp430" + extra_gcc_objs="driver-msp430.o" + ;; +-nds32le-*-*) ++nds32*-*-*) + target_cpu_default="0" + tm_defines="${tm_defines}" +- tm_file="dbxelf.h elfos.h newlib-stdint.h ${tm_file}" +- tmake_file="nds32/t-nds32 nds32/t-mlibs" +- ;; +-nds32be-*-*) +- target_cpu_default="0|MASK_BIG_ENDIAN" +- tm_defines="${tm_defines} TARGET_BIG_ENDIAN_DEFAULT=1" +- tm_file="dbxelf.h elfos.h newlib-stdint.h ${tm_file}" +- tmake_file="nds32/t-nds32 nds32/t-mlibs" ++ case ${target} in ++ nds32le*-*-*) ++ ;; ++ nds32be-*-*) ++ target_cpu_default="${target_cpu_default}|MASK_BIG_ENDIAN" ++ tm_defines="${tm_defines} TARGET_BIG_ENDIAN_DEFAULT=1" ++ ;; ++ esac ++ case ${target} in ++ nds32*-*-elf*) ++ tm_file="dbxelf.h elfos.h newlib-stdint.h ${tm_file} nds32/elf.h nds32/nds32_intrinsic.h" ++ tmake_file="nds32/t-nds32 nds32/t-elf" ++ ;; ++ nds32*-*-linux*) ++ tm_file="dbxelf.h elfos.h ${tm_file} gnu-user.h linux.h glibc-stdint.h nds32/linux.h nds32/nds32_intrinsic.h" ++ tmake_file="${tmake_file} nds32/t-nds32 nds32/t-linux" ++ ;; ++ esac ++ nds32_multilibs="${with_multilib_list}" ++ if test "$nds32_multilibs" = "default"; then ++ nds32_multilibs="" ++ fi ++ nds32_multilibs=`echo $nds32_multilibs | sed -e 's/,/ /g'` ++ for nds32_multilib in ${nds32_multilibs}; do ++ case ${nds32_multilib} in ++ dsp | zol | v3m+ | graywolf ) ++ TM_MULTILIB_CONFIG="${TM_MULTILIB_CONFIG} ${nds32_multilib}" ++ ;; ++ *) ++ echo "--with-multilib-list=${nds32_multilib} not supported." ++ exit 1 ++ esac ++ done ++ ++ # Handle --enable-default-relax setting. ++ if test x${enable_default_relax} = xyes; then ++ tm_defines="${tm_defines} TARGET_DEFAULT_RELAX=1" ++ fi ++ # Handle --enable-Os-default-ifc setting. ++ if test x${enable_Os_default_ifc} = xyes; then ++ tm_defines="${tm_defines} TARGET_OS_DEFAULT_IFC=1" ++ fi ++ # Handle --enable-Os-default-ex9 setting. ++ if test x${enable_Os_default_ex9} = xyes; then ++ tm_defines="${tm_defines} TARGET_OS_DEFAULT_EX9=1" ++ fi ++ # Handle --with-ext-dsp ++ if test x${with_ext_dsp} = xyes; then ++ tm_defines="${tm_defines} TARGET_DEFAULT_EXT_DSP=1" ++ fi ++ if test x${with_ext_zol} = xyes; then ++ tm_defines="${tm_defines} TARGET_DEFAULT_HWLOOP=1" ++ fi ++ # Handle --with-16bit-ext, and default is on ++ if test x${with_ext_16bit} != xno; then ++ tm_defines="${tm_defines} TARGET_DEFAULT_16BIT=1" ++ fi ++ + ;; + nios2-*-*) + tm_file="elfos.h ${tm_file}" +@@ -4097,15 +4167,51 @@ case "${target}" in + ;; + + nds32*-*-*) +- supported_defaults="arch nds32_lib" ++ supported_defaults="arch cpu nds32_lib float fpu_config memory_model" + + # process --with-arch + case "${with_arch}" in +- "" | v2 | v3 | v3m) ++ "" | v3 | v3j) ++ # OK ++ tm_defines="${tm_defines} TARGET_ARCH_DEFAULT=0" ++ tm_defines="${tm_defines} TARGET_DEFAULT_ISR_VECTOR_SIZE=4" ++ ;; ++ v2 | v2j | v3m) ++ # OK ++ tm_defines="${tm_defines} TARGET_ARCH_DEFAULT=0" ++ tm_defines="${tm_defines} TARGET_DEFAULT_ISR_VECTOR_SIZE=16" ++ ;; ++ v3f) ++ tm_defines="${tm_defines} TARGET_ARCH_DEFAULT=1" ++ tm_defines="${tm_defines} TARGET_DEFAULT_ISR_VECTOR_SIZE=4" ++ ;; ++ v3s) ++ tm_defines="${tm_defines} TARGET_ARCH_DEFAULT=2" ++ tm_defines="${tm_defines} TARGET_DEFAULT_ISR_VECTOR_SIZE=4" ++ ;; ++ *) ++ echo "Cannot accept --with-arch=$with_arch, available values are: v2 v2j v3 v3j v3m v3f v3s" 1>&2 ++ exit 1 ++ ;; ++ esac ++ ++ # process --with-memory-model ++ case "${with_memory_model}" in ++ "" | fast | slow) ++ ;; ++ *) ++ echo "Cannot accept --with-memory-model=$with_memory_model, available values are: fast slow" 1>&2 ++ exit 1 ++ ;; ++ esac ++ ++ # process --with-cpu ++ case "${with_cpu}" in ++ "" | n7 | n8 | e8 | s8 | n9 | n10 | d10 | graywolf | n12 | n13 | panther) + # OK + ;; + *) +- echo "Cannot accept --with-arch=$with_arch, available values are: v2 v3 v3m" 1>&2 ++ echo "Cannot accept --with-cpu=$with_cpu, available values are: n7 n8 e8 s8 n9 n10 d10 graywolf n12 n13 panther" 1>&2 + exit 1 + ;; + esac +@@ -4115,31 +4221,56 @@ case "${target}" in + "") + # the default library is newlib + with_nds32_lib=newlib ++ tm_defines="${tm_defines} TARGET_DEFAULT_CTOR_DTOR=1" + ;; + newlib) + # OK ++ tm_defines="${tm_defines} TARGET_DEFAULT_CTOR_DTOR=1" + ;; + mculib) + # OK ++ # for the arch=v3f or arch=v3s under mculib toolchain, ++ # we would like to set -fno-math-errno as default ++ case "${with_arch}" in ++ v3f | v3s) ++ tm_defines="${tm_defines} TARGET_DEFAULT_NO_MATH_ERRNO=1" ++ ;; ++ esac ++ ;; ++ glibc) ++ # OK ++ tm_defines="${tm_defines} TARGET_DEFAULT_TLSDESC_TRAMPOLINE=1" ++ ;; ++ uclibc) + ;; + *) +- echo "Cannot accept --with-nds32-lib=$with_nds32_lib, available values are: newlib mculib" 1>&2 ++ echo "Cannot accept --with-nds32-lib=$with_nds32_lib, available values are: newlib mculib glibc uclibc" 1>&2 ++ exit 1 ++ ;; ++ esac ++ ++ # process --with-float ++ case "${with_float}" in ++ "" | soft | hard) ++ # OK ++ ;; ++ *) ++ echo "Cannot accept --with-float=$with_float, available values are: soft hard" 1>&2 ++ exit 1 ++ ;; ++ esac ++ ++ # process --with-config-fpu ++ case "${with_config_fpu}" in ++ "" | 0 | 1 | 2 | 3) ++ # OK ++ ;; ++ *) ++ echo "Cannot accept --with-config-fpu=$with_config_fpu, available values from 0 to 7" 1>&2 + exit 1 + ;; + esac +- ;; + +- nios2*-*-*) +- supported_defaults="arch" +- case "$with_arch" in +- "" | r1 | r2) +- # OK +- ;; +- *) +- echo "Unknown arch used in --with-arch=$with_arch" 1>&2 +- exit 1 +- ;; +- esac + ;; + + powerpc*-*-* | rs6000-*-*) +@@ -4527,7 +4658,7 @@ case ${target} in + esac + + t= +-all_defaults="abi cpu cpu_32 cpu_64 arch arch_32 arch_64 tune tune_32 tune_64 schedule float mode fpu nan fp_32 odd_spreg_32 divide llsc mips-plt synci tls" ++all_defaults="abi cpu cpu_32 cpu_64 arch arch_32 arch_64 tune tune_32 tune_64 schedule float mode fpu nan fp_32 odd_spreg_32 divide llsc mips-plt synci tls memory_model" + for option in $all_defaults + do + eval "val=\$with_"`echo $option | sed s/-/_/g` +diff --git a/gcc/config/nds32/constants.md b/gcc/config/nds32/constants.md +index bea42ee..6c92412 100644 +--- a/gcc/config/nds32/constants.md ++++ b/gcc/config/nds32/constants.md +@@ -23,25 +23,176 @@ + (define_constants + [(R8_REGNUM 8) + (TA_REGNUM 15) ++ (TP_REGNUM 25) + (FP_REGNUM 28) + (GP_REGNUM 29) + (LP_REGNUM 30) + (SP_REGNUM 31) ++ (LB_REGNUM 98) ++ (LE_REGNUM 99) ++ (LC_REGNUM 100) + ]) + + ++;; The unpec operation index. ++(define_c_enum "unspec_element" [ ++ UNSPEC_COPYSIGN ++ UNSPEC_FCPYNSD ++ UNSPEC_FCPYNSS ++ UNSPEC_FCPYSD ++ UNSPEC_FCPYSS ++ UNSPEC_CLIP ++ UNSPEC_CLIPS ++ UNSPEC_CLO ++ UNSPEC_PBSAD ++ UNSPEC_PBSADA ++ UNSPEC_BSE ++ UNSPEC_BSE_2 ++ UNSPEC_BSP ++ UNSPEC_BSP_2 ++ UNSPEC_FFB ++ UNSPEC_FFMISM ++ UNSPEC_FLMISM ++ UNSPEC_KDMBB ++ UNSPEC_KDMBT ++ UNSPEC_KDMTB ++ UNSPEC_KDMTT ++ UNSPEC_KHMBB ++ UNSPEC_KHMBT ++ UNSPEC_KHMTB ++ UNSPEC_KHMTT ++ UNSPEC_KSLRAW ++ UNSPEC_KSLRAWU ++ UNSPEC_SVA ++ UNSPEC_SVS ++ UNSPEC_WSBH ++ UNSPEC_LWUP ++ UNSPEC_LBUP ++ UNSPEC_SWUP ++ UNSPEC_SBUP ++ UNSPEC_LMWZB ++ UNSPEC_SMWZB ++ UNSPEC_UALOAD_HW ++ UNSPEC_UALOAD_W ++ UNSPEC_UALOAD_DW ++ UNSPEC_UASTORE_HW ++ UNSPEC_UASTORE_W ++ UNSPEC_UASTORE_DW ++ UNSPEC_GOTINIT ++ UNSPEC_GOT ++ UNSPEC_GOTOFF ++ UNSPEC_PLT ++ UNSPEC_TLSGD ++ UNSPEC_TLSLD ++ UNSPEC_TLSIE ++ UNSPEC_TLSLE ++ UNSPEC_ROUND ++ UNSPEC_VEC_COMPARE ++ UNSPEC_KHM ++ UNSPEC_KHMX ++ UNSPEC_CLIP_OV ++ UNSPEC_CLIPS_OV ++ UNSPEC_BITREV ++ UNSPEC_KABS ++ UNSPEC_LOOP_END ++ UNSPEC_TLS_DESC ++ UNSPEC_TLS_IE ++ UNSPEC_ADD32 ++ UNSPEC_ICT ++]) ++ ++ + ;; The unspec_volatile operation index. + (define_c_enum "unspec_volatile_element" [ +- UNSPEC_VOLATILE_FUNC_RETURN ++ UNSPEC_VOLATILE_EH_RETURN + UNSPEC_VOLATILE_ISYNC + UNSPEC_VOLATILE_ISB ++ UNSPEC_VOLATILE_DSB ++ UNSPEC_VOLATILE_MSYNC ++ UNSPEC_VOLATILE_MSYNC_ALL ++ UNSPEC_VOLATILE_MSYNC_STORE + UNSPEC_VOLATILE_MFSR + UNSPEC_VOLATILE_MFUSR + UNSPEC_VOLATILE_MTSR + UNSPEC_VOLATILE_MTUSR + UNSPEC_VOLATILE_SETGIE_EN + UNSPEC_VOLATILE_SETGIE_DIS ++ UNSPEC_VOLATILE_FMFCSR ++ UNSPEC_VOLATILE_FMTCSR ++ UNSPEC_VOLATILE_FMFCFG ++ UNSPEC_VOLATILE_JR_ITOFF ++ UNSPEC_VOLATILE_JR_TOFF ++ UNSPEC_VOLATILE_JRAL_ITON ++ UNSPEC_VOLATILE_JRAL_TON ++ UNSPEC_VOLATILE_RET_ITOFF ++ UNSPEC_VOLATILE_RET_TOFF ++ UNSPEC_VOLATILE_STANDBY_NO_WAKE_GRANT ++ UNSPEC_VOLATILE_STANDBY_WAKE_GRANT ++ UNSPEC_VOLATILE_STANDBY_WAKE_DONE ++ UNSPEC_VOLATILE_TEQZ ++ UNSPEC_VOLATILE_TNEZ ++ UNSPEC_VOLATILE_TRAP ++ UNSPEC_VOLATILE_SETEND_BIG ++ UNSPEC_VOLATILE_SETEND_LITTLE ++ UNSPEC_VOLATILE_BREAK ++ UNSPEC_VOLATILE_SYSCALL ++ UNSPEC_VOLATILE_NOP ++ UNSPEC_VOLATILE_RES_DEP ++ UNSPEC_VOLATILE_DATA_DEP ++ UNSPEC_VOLATILE_LLW ++ UNSPEC_VOLATILE_SCW ++ UNSPEC_VOLATILE_CCTL_L1D_INVALALL ++ UNSPEC_VOLATILE_CCTL_L1D_WBALL_ALVL ++ UNSPEC_VOLATILE_CCTL_L1D_WBALL_ONE_LVL ++ UNSPEC_VOLATILE_CCTL_IDX_WRITE ++ UNSPEC_VOLATILE_CCTL_IDX_READ ++ UNSPEC_VOLATILE_CCTL_VA_WBINVAL_L1 ++ UNSPEC_VOLATILE_CCTL_VA_WBINVAL_LA ++ UNSPEC_VOLATILE_CCTL_IDX_WBINVAL ++ UNSPEC_VOLATILE_CCTL_VA_LCK ++ UNSPEC_VOLATILE_DPREF_QW ++ UNSPEC_VOLATILE_DPREF_HW ++ UNSPEC_VOLATILE_DPREF_W ++ UNSPEC_VOLATILE_DPREF_DW ++ UNSPEC_VOLATILE_TLBOP_TRD ++ UNSPEC_VOLATILE_TLBOP_TWR ++ UNSPEC_VOLATILE_TLBOP_RWR ++ UNSPEC_VOLATILE_TLBOP_RWLK ++ UNSPEC_VOLATILE_TLBOP_UNLK ++ UNSPEC_VOLATILE_TLBOP_PB ++ UNSPEC_VOLATILE_TLBOP_INV ++ UNSPEC_VOLATILE_TLBOP_FLUA ++ UNSPEC_VOLATILE_ENABLE_INT ++ UNSPEC_VOLATILE_DISABLE_INT ++ UNSPEC_VOLATILE_SET_PENDING_SWINT ++ UNSPEC_VOLATILE_CLR_PENDING_SWINT ++ UNSPEC_VOLATILE_CLR_PENDING_HWINT ++ UNSPEC_VOLATILE_GET_ALL_PENDING_INT ++ UNSPEC_VOLATILE_GET_PENDING_INT ++ UNSPEC_VOLATILE_SET_INT_PRIORITY ++ UNSPEC_VOLATILE_GET_INT_PRIORITY ++ UNSPEC_VOLATILE_SET_TRIG_LEVEL ++ UNSPEC_VOLATILE_SET_TRIG_EDGE ++ UNSPEC_VOLATILE_GET_TRIG_TYPE ++ UNSPEC_VOLATILE_RELAX_GROUP ++ UNSPEC_VOLATILE_INNERMOST_LOOP_BEGIN ++ UNSPEC_VOLATILE_INNERMOST_LOOP_END ++ UNSPEC_VOLATILE_OMIT_FP_BEGIN ++ UNSPEC_VOLATILE_OMIT_FP_END + UNSPEC_VOLATILE_POP25_RETURN ++ UNSPEC_VOLATILE_SIGNATURE_BEGIN ++ UNSPEC_VOLATILE_SIGNATURE_END ++ UNSPEC_VOLATILE_NO_HWLOOP ++ UNSPEC_VOLATILE_NO_IFC_BEGIN ++ UNSPEC_VOLATILE_NO_IFC_END ++ UNSPEC_VOLATILE_NO_EX9_BEGIN ++ UNSPEC_VOLATILE_NO_EX9_END ++ UNSPEC_VOLATILE_UNALIGNED_FEATURE ++ UNSPEC_VOLATILE_ENABLE_UNALIGNED ++ UNSPEC_VOLATILE_DISABLE_UNALIGNED ++ UNSPEC_VOLATILE_RDOV ++ UNSPEC_VOLATILE_CLROV ++ UNSPEC_VOLATILE_HWLOOP_LAST_INSN + ]) + + ;; ------------------------------------------------------------------------ +diff --git a/gcc/config/nds32/constraints.md b/gcc/config/nds32/constraints.md +index 1f44a1a..8163f46 100644 +--- a/gcc/config/nds32/constraints.md ++++ b/gcc/config/nds32/constraints.md +@@ -25,9 +25,6 @@ + ;; Machine-dependent floating: G H + + +-(define_register_constraint "w" "(TARGET_ISA_V3 || TARGET_ISA_V3M) ? LOW_REGS : NO_REGS" +- "LOW register class $r0 ~ $r7 constraint for V3/V3M ISA") +- + (define_register_constraint "l" "LOW_REGS" + "LOW register class $r0 ~ $r7") + +@@ -41,9 +38,59 @@ + (define_register_constraint "t" "R15_TA_REG" + "Temporary Assist register $ta (i.e. $r15)") + ++(define_register_constraint "e" "R8_REG" ++ "Function Entry register $r8)") ++ + (define_register_constraint "k" "STACK_REG" + "Stack register $sp") + ++(define_register_constraint "v" "R5_REG" ++ "Register $r5") ++ ++(define_register_constraint "x" "FRAME_POINTER_REG" ++ "Frame pointer register $fp") ++ ++(define_register_constraint "f" ++ "(TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE) ? FP_REGS : NO_REGS" ++ "The Floating point registers $fs0 ~ $fs31") ++ ++(define_register_constraint "A" "LOOP_REGS" ++ "Loop register class") ++ ++(define_constraint "Iv00" ++ "Constant value 0" ++ (and (match_code "const_int") ++ (match_test "ival == 0"))) ++ ++(define_constraint "Iv01" ++ "Constant value 1" ++ (and (match_code "const_int") ++ (match_test "ival == 1"))) ++ ++(define_constraint "Iv02" ++ "Constant value 2" ++ (and (match_code "const_int") ++ (match_test "ival == 2"))) ++ ++(define_constraint "Iv04" ++ "Constant value 4" ++ (and (match_code "const_int") ++ (match_test "ival == 4"))) ++ ++(define_constraint "Iv08" ++ "Constant value 8" ++ (and (match_code "const_int") ++ (match_test "ival == 8"))) ++ ++(define_constraint "Iu01" ++ "Unsigned immediate 1-bit value" ++ (and (match_code "const_int") ++ (match_test "ival == 1 || ival == 0"))) ++ ++(define_constraint "Iu02" ++ "Unsigned immediate 2-bit value" ++ (and (match_code "const_int") ++ (match_test "ival < (1 << 2) && ival >= 0"))) + + (define_constraint "Iu03" + "Unsigned immediate 3-bit value" +@@ -65,6 +112,11 @@ + (and (match_code "const_int") + (match_test "ival < (1 << 4) && ival >= -(1 << 4)"))) + ++(define_constraint "Cs05" ++ "Signed immediate 5-bit value" ++ (and (match_code "const_double") ++ (match_test "nds32_const_double_range_ok_p (op, SFmode, -(1 << 4), (1 << 4))"))) ++ + (define_constraint "Iu05" + "Unsigned immediate 5-bit value" + (and (match_code "const_int") +@@ -75,6 +127,11 @@ + (and (match_code "const_int") + (match_test "IN_RANGE (ival, -31, 0)"))) + ++(define_constraint "Iu06" ++ "Unsigned immediate 6-bit value" ++ (and (match_code "const_int") ++ (match_test "ival < (1 << 6) && ival >= 0"))) ++ + ;; Ip05 is special and dedicated for v3 movpi45 instruction. + ;; movpi45 has imm5u field but the range is 16 ~ 47. + (define_constraint "Ip05" +@@ -84,10 +141,10 @@ + && ival >= (0 + 16) + && (TARGET_ISA_V3 || TARGET_ISA_V3M)"))) + +-(define_constraint "Iu06" ++(define_constraint "IU06" + "Unsigned immediate 6-bit value constraint for addri36.sp instruction" + (and (match_code "const_int") +- (match_test "ival < (1 << 6) ++ (match_test "ival < (1 << 8) + && ival >= 0 + && (ival % 4 == 0) + && (TARGET_ISA_V3 || TARGET_ISA_V3M)"))) +@@ -103,6 +160,11 @@ + (match_test "ival < (1 << 9) && ival >= 0"))) + + ++(define_constraint "Is08" ++ "Signed immediate 8-bit value" ++ (and (match_code "const_int") ++ (match_test "ival < (1 << 7) && ival >= -(1 << 7)"))) ++ + (define_constraint "Is10" + "Signed immediate 10-bit value" + (and (match_code "const_int") +@@ -113,6 +175,10 @@ + (and (match_code "const_int") + (match_test "ival < (1 << 10) && ival >= -(1 << 10)"))) + ++(define_constraint "Is14" ++ "Signed immediate 14-bit value" ++ (and (match_code "const_int") ++ (match_test "ival < (1 << 13) && ival >= -(1 << 13)"))) + + (define_constraint "Is15" + "Signed immediate 15-bit value" +@@ -194,12 +260,21 @@ + (and (match_code "const_int") + (match_test "ival < (1 << 19) && ival >= -(1 << 19)"))) + ++(define_constraint "Cs20" ++ "Signed immediate 20-bit value" ++ (and (match_code "const_double") ++ (match_test "nds32_const_double_range_ok_p (op, SFmode, -(1 << 19), (1 << 19))"))) + + (define_constraint "Ihig" + "The immediate value that can be simply set high 20-bit" + (and (match_code "const_int") + (match_test "(ival != 0) && ((ival & 0xfff) == 0)"))) + ++(define_constraint "Chig" ++ "The immediate value that can be simply set high 20-bit" ++ (and (match_code "high") ++ (match_test "GET_CODE (XEXP (op, 0)) == CONST_DOUBLE"))) ++ + (define_constraint "Izeb" + "The immediate value 0xff" + (and (match_code "const_int") +@@ -213,12 +288,12 @@ + (define_constraint "Ixls" + "The immediate value 0x01" + (and (match_code "const_int") +- (match_test "TARGET_PERF_EXT && (ival == 0x1)"))) ++ (match_test "TARGET_EXT_PERF && (ival == 0x1)"))) + + (define_constraint "Ix11" + "The immediate value 0x7ff" + (and (match_code "const_int") +- (match_test "TARGET_PERF_EXT && (ival == 0x7ff)"))) ++ (match_test "TARGET_EXT_PERF && (ival == 0x7ff)"))) + + (define_constraint "Ibms" + "The immediate value with power of 2" +@@ -232,23 +307,70 @@ + (match_test "(TARGET_ISA_V3 || TARGET_ISA_V3M) + && (IN_RANGE (exact_log2 (ival + 1), 1, 8))"))) + ++(define_constraint "CVp5" ++ "Unsigned immediate 5-bit value for movpi45 instruction with range 16-47" ++ (and (match_code "const_vector") ++ (match_test "nds32_valid_CVp5_p (op)"))) ++ ++(define_constraint "CVs5" ++ "Signed immediate 5-bit value" ++ (and (match_code "const_vector") ++ (match_test "nds32_valid_CVs5_p (op)"))) ++ ++(define_constraint "CVs2" ++ "Signed immediate 20-bit value" ++ (and (match_code "const_vector") ++ (match_test "nds32_valid_CVs2_p (op)"))) ++ ++(define_constraint "CVhi" ++ "The immediate value that can be simply set high 20-bit" ++ (and (match_code "const_vector") ++ (match_test "nds32_valid_CVhi_p (op)"))) + + (define_memory_constraint "U33" + "Memory constraint for 333 format" + (and (match_code "mem") +- (match_test "nds32_mem_format (op) == ADDRESS_LO_REG_IMM3U"))) ++ (match_test "nds32_mem_format (op) == ADDRESS_POST_INC_LO_REG_IMM3U ++ || nds32_mem_format (op) == ADDRESS_POST_MODIFY_LO_REG_IMM3U ++ || nds32_mem_format (op) == ADDRESS_LO_REG_IMM3U"))) + + (define_memory_constraint "U45" + "Memory constraint for 45 format" + (and (match_code "mem") + (match_test "(nds32_mem_format (op) == ADDRESS_REG) +- && (GET_MODE (op) == SImode)"))) ++ && ((GET_MODE (op) == SImode) ++ || (GET_MODE (op) == SFmode))"))) ++ ++(define_memory_constraint "Ufe" ++ "Memory constraint for fe format" ++ (and (match_code "mem") ++ (match_test "nds32_mem_format (op) == ADDRESS_R8_IMM7U ++ && (GET_MODE (op) == SImode ++ || GET_MODE (op) == SFmode)"))) + + (define_memory_constraint "U37" + "Memory constraint for 37 format" + (and (match_code "mem") + (match_test "(nds32_mem_format (op) == ADDRESS_SP_IMM7U + || nds32_mem_format (op) == ADDRESS_FP_IMM7U) +- && (GET_MODE (op) == SImode)"))) ++ && (GET_MODE (op) == SImode ++ || GET_MODE (op) == SFmode)"))) ++ ++(define_memory_constraint "Umw" ++ "Memory constraint for lwm/smw" ++ (and (match_code "mem") ++ (match_test "nds32_valid_smw_lwm_base_p (op)"))) ++ ++(define_memory_constraint "Da" ++ "Memory constraint for non-offset loads/stores" ++ (and (match_code "mem") ++ (match_test "REG_P (XEXP (op, 0)) ++ || (GET_CODE (XEXP (op, 0)) == POST_INC)"))) ++ ++(define_memory_constraint "Q" ++ "Memory constraint for no symbol_ref and const" ++ (and (match_code "mem") ++ (match_test "(TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE) ++ && nds32_float_mem_operand_p (op)"))) + + ;; ------------------------------------------------------------------------ +diff --git a/gcc/config/nds32/elf.h b/gcc/config/nds32/elf.h +new file mode 100644 +index 0000000..315dcd8 +--- /dev/null ++++ b/gcc/config/nds32/elf.h +@@ -0,0 +1,83 @@ ++/* Definitions of target machine of Andes NDS32 cpu for GNU compiler ++ Copyright (C) 2012-2016 Free Software Foundation, Inc. ++ Contributed by Andes Technology Corporation. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published ++ by the Free Software Foundation; either version 3, or (at your ++ option) any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ <http://www.gnu.org/licenses/>. */ ++ ++ ++/* ------------------------------------------------------------------------ */ ++ ++#define TARGET_LINUX_ABI 0 ++ ++/* In the configure stage we may use options --enable-default-relax, ++ --enable-Os-default-ifc and --enable-Os-default-ex9. They effect ++ the default spec of passing --relax, --mifc, and --mex9 to linker. ++ We use NDS32_RELAX_SPEC, NDS32_IFC_SPEC, and NDS32_EX9_SPEC ++ so that we can customize them conveniently. */ ++#define LINK_SPEC \ ++ " %{G*}" \ ++ " %{mbig-endian:-EB} %{mlittle-endian:-EL}" \ ++ " %{shared:-shared}" \ ++ NDS32_RELAX_SPEC \ ++ NDS32_IFC_SPEC \ ++ NDS32_EX9_SPEC ++ ++#define LIB_SPEC \ ++ " -lc -lgloss" ++ ++#define LIBGCC_SPEC \ ++ " -lgcc" ++ ++/* The option -mno-ctor-dtor can disable constructor/destructor feature ++ by applying different crt stuff. In the convention, crt0.o is the ++ startup file without constructor/destructor; ++ crt1.o, crti.o, crtbegin.o, crtend.o, and crtn.o are the ++ startup files with constructor/destructor. ++ Note that crt0.o, crt1.o, crti.o, and crtn.o are provided ++ by newlib/mculib/glibc/ublic, while crtbegin.o and crtend.o are ++ currently provided by GCC for nds32 target. ++ ++ For nds32 target so far: ++ If -mno-ctor-dtor, we are going to link ++ "crt0.o [user objects]". ++ If -mctor-dtor, we are going to link ++ "crt1.o crtbegin1.o [user objects] crtend1.o". ++ ++ Note that the TARGET_DEFAULT_CTOR_DTOR would effect the ++ default behavior. Check gcc/config.gcc for more information. */ ++#ifdef TARGET_DEFAULT_CTOR_DTOR ++ #define STARTFILE_SPEC \ ++ " %{!mno-ctor-dtor:crt1.o%s;:crt0.o%s}" \ ++ " %{!mno-ctor-dtor:crtbegin1.o%s}" \ ++ " %{mcrt-arg:crtarg.o%s}" ++ #define ENDFILE_SPEC \ ++ " %{!mno-ctor-dtor:crtend1.o%s}" ++#else ++ #define STARTFILE_SPEC \ ++ " %{mctor-dtor|coverage:crt1.o%s;:crt0.o%s}" \ ++ " %{mctor-dtor|coverage:crtbegin1.o%s}" \ ++ " %{mcrt-arg:crtarg.o%s}" ++ #define ENDFILE_SPEC \ ++ " %{mctor-dtor|coverage:crtend1.o%s}" ++#endif ++ ++#define STARTFILE_CXX_SPEC \ ++ " %{!mno-ctor-dtor:crt1.o%s;:crt0.o%s}" \ ++ " %{!mno-ctor-dtor:crtbegin1.o%s}" \ ++ " %{mcrt-arg:crtarg.o%s}" ++#define ENDFILE_CXX_SPEC \ ++ " %{!mno-ctor-dtor:crtend1.o%s}" +diff --git a/gcc/config/nds32/iterators.md b/gcc/config/nds32/iterators.md +index ab0f103..6023b9c 100644 +--- a/gcc/config/nds32/iterators.md ++++ b/gcc/config/nds32/iterators.md +@@ -26,30 +26,99 @@ + ;; A list of integer modes that are up to one word long. + (define_mode_iterator QIHISI [QI HI SI]) + ++;; A list of integer modes for one word and double word. ++(define_mode_iterator SIDI [SI DI]) ++ + ;; A list of integer modes that are up to one half-word long. + (define_mode_iterator QIHI [QI HI]) + + ;; A list of the modes that are up to double-word long. + (define_mode_iterator DIDF [DI DF]) + ++;; A list of the modes that are up to one word long vector. ++(define_mode_iterator VQIHI [V4QI V2HI]) ++ ++;; A list of the modes that are up to one word long vector and scalar. ++(define_mode_iterator VSQIHI [V4QI V2HI QI HI]) ++ ++(define_mode_iterator VSQIHIDI [V4QI V2HI QI HI DI]) ++ ++(define_mode_iterator VQIHIDI [V4QI V2HI DI]) ++ ++;; A list of the modes that are up to one word long vector ++;; and scalar for HImode. ++(define_mode_iterator VSHI [V2HI HI]) ++ ++;; A list of the modes that are up to double-word long. ++(define_mode_iterator ANYF [(SF "TARGET_FPU_SINGLE") ++ (DF "TARGET_FPU_DOUBLE")]) + + ;;---------------------------------------------------------------------------- + ;; Mode attributes. + ;;---------------------------------------------------------------------------- + +-(define_mode_attr size [(QI "b") (HI "h") (SI "w")]) ++(define_mode_attr size [(QI "b") (HI "h") (SI "w") (SF "s") (DF "d")]) + +-(define_mode_attr byte [(QI "1") (HI "2") (SI "4")]) ++(define_mode_attr byte [(QI "1") (HI "2") (SI "4") (V4QI "4") (V2HI "4")]) + ++(define_mode_attr bits [(V4QI "8") (QI "8") (V2HI "16") (HI "16") (DI "64")]) ++ ++(define_mode_attr VELT [(V4QI "QI") (V2HI "HI")]) + + ;;---------------------------------------------------------------------------- + ;; Code iterators. + ;;---------------------------------------------------------------------------- + ++;; shifts ++(define_code_iterator shift_rotate [ashift ashiftrt lshiftrt rotatert]) ++ ++(define_code_iterator shifts [ashift ashiftrt lshiftrt]) ++ ++(define_code_iterator shiftrt [ashiftrt lshiftrt]) ++ ++(define_code_iterator sat_plus [ss_plus us_plus]) ++ ++(define_code_iterator all_plus [plus ss_plus us_plus]) ++ ++(define_code_iterator sat_minus [ss_minus us_minus]) ++ ++(define_code_iterator all_minus [minus ss_minus us_minus]) ++ ++(define_code_iterator plus_minus [plus minus]) ++ ++(define_code_iterator extend [sign_extend zero_extend]) ++ ++(define_code_iterator sumax [smax umax]) ++ ++(define_code_iterator sumin [smin umin]) ++ ++(define_code_iterator sumin_max [smax umax smin umin]) + + ;;---------------------------------------------------------------------------- + ;; Code attributes. + ;;---------------------------------------------------------------------------- + ++;; shifts ++(define_code_attr shift ++ [(ashift "ashl") (ashiftrt "ashr") (lshiftrt "lshr") (rotatert "rotr")]) ++ ++(define_code_attr su ++ [(ashiftrt "") (lshiftrt "u") (sign_extend "s") (zero_extend "u")]) ++ ++(define_code_attr zs ++ [(sign_extend "s") (zero_extend "z")]) ++ ++(define_code_attr uk ++ [(plus "") (ss_plus "k") (us_plus "uk") ++ (minus "") (ss_minus "k") (us_minus "uk")]) ++ ++(define_code_attr opcode ++ [(plus "add") (minus "sub") (smax "smax") (umax "umax") (smin "smin") (umin "umin")]) ++ ++(define_code_attr add_rsub ++ [(plus "a") (minus "rs")]) ++ ++(define_code_attr add_sub ++ [(plus "a") (minus "s")]) + + ;;---------------------------------------------------------------------------- +diff --git a/gcc/config/nds32/linux.h b/gcc/config/nds32/linux.h +new file mode 100644 +index 0000000..36ddf2f +--- /dev/null ++++ b/gcc/config/nds32/linux.h +@@ -0,0 +1,78 @@ ++/* Definitions of target machine of Andes NDS32 cpu for GNU compiler ++ Copyright (C) 2012-2016 Free Software Foundation, Inc. ++ Contributed by Andes Technology Corporation. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published ++ by the Free Software Foundation; either version 3, or (at your ++ option) any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ <http://www.gnu.org/licenses/>. */ ++ ++ ++/* ------------------------------------------------------------------------ */ ++ ++#define TARGET_LINUX_ABI 1 ++ ++#undef SIZE_TYPE ++#define SIZE_TYPE "unsigned int" ++ ++#undef PTRDIFF_TYPE ++#define PTRDIFF_TYPE "int" ++ ++#ifdef TARGET_DEFAULT_TLSDESC_TRAMPOLINE ++ #define NDS32_TLSDESC_TRAMPOLINE_SPEC \ ++ " %{!mno-tlsdesc-trampoline:--mtlsdesc-trampoline}" ++#else ++ #define NDS32_TLSDESC_TRAMPOLINE_SPEC "" ++#endif ++ ++#define TARGET_OS_CPP_BUILTINS() \ ++ do \ ++ { \ ++ GNU_USER_TARGET_OS_CPP_BUILTINS(); \ ++ } \ ++ while (0) ++ ++#define GLIBC_DYNAMIC_LINKER "/lib/ld.so.1" ++ ++/* In the configure stage we may use options --enable-default-relax, ++ --enable-Os-default-ifc and --enable-Os-default-ex9. They effect ++ the default spec of passing --relax, --mifc, and --mex9 to linker. ++ We use NDS32_RELAX_SPEC, NDS32_IFC_SPEC, and NDS32_EX9_SPEC ++ so that we can customize them conveniently. */ ++#define LINK_SPEC \ ++ " %{G*}" \ ++ " %{mbig-endian:-EB} %{mlittle-endian:-EL}" \ ++ " %{shared:-shared} \ ++ %{!shared: \ ++ %{!static: \ ++ %{rdynamic:-export-dynamic} \ ++ -dynamic-linker " GNU_USER_DYNAMIC_LINKER "} \ ++ %{static:-static}}" \ ++ NDS32_RELAX_SPEC \ ++ NDS32_IFC_SPEC \ ++ NDS32_EX9_SPEC \ ++ NDS32_TLSDESC_TRAMPOLINE_SPEC ++ ++#define LINK_PIE_SPEC "%{pie:%{!fno-pie:%{!fno-PIE:%{!static:-pie}}}} " ++ ++ ++/* The SYNC operations are implemented as library functions, not ++ INSN patterns. As a result, the HAVE defines for the patterns are ++ not defined. We need to define them to generate the corresponding ++ __GCC_HAVE_SYNC_COMPARE_AND_SWAP_* and __GCC_ATOMIC_*_LOCK_FREE ++ defines. ++ Ref: https://sourceware.org/ml/libc-alpha/2014-09/msg00322.html */ ++#define HAVE_sync_compare_and_swapqi 1 ++#define HAVE_sync_compare_and_swaphi 1 ++#define HAVE_sync_compare_and_swapsi 1 +diff --git a/gcc/config/nds32/nds32-abi-compatible.c b/gcc/config/nds32/nds32-abi-compatible.c +new file mode 100644 +index 0000000..f2ed006 +--- /dev/null ++++ b/gcc/config/nds32/nds32-abi-compatible.c +@@ -0,0 +1,315 @@ ++/* A Gimple-level pass of Andes NDS32 cpu for GNU compiler. ++ This pass collects the usage of float-point. ++ ++ Copyright (C) 2012-2016 Free Software Foundation, Inc. ++ Contributed by Andes Technology Corporation. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify it under ++the terms of the GNU General Public License as published by the Free ++Software Foundation; either version 3, or (at your option) any later ++version. ++ ++GCC is distributed in the hope that it will be useful, but WITHOUT ANY ++WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++<http://www.gnu.org/licenses/>. */ ++ ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "tm.h" ++#include "hash-set.h" ++#include "machmode.h" ++#include "vec.h" ++#include "double-int.h" ++#include "input.h" ++#include "alias.h" ++#include "symtab.h" ++#include "wide-int.h" ++#include "inchash.h" ++#include "tree.h" ++#include "stor-layout.h" ++#include "varasm.h" ++#include "calls.h" ++#include "rtl.h" ++#include "regs.h" ++#include "hard-reg-set.h" ++#include "insn-config.h" /* Required by recog.h. */ ++#include "conditions.h" ++#include "output.h" ++#include "insn-attr.h" /* For DFA state_t. */ ++#include "insn-codes.h" /* For CODE_FOR_xxx. */ ++#include "reload.h" /* For push_reload (). */ ++#include "flags.h" ++#include "input.h" ++#include "function.h" ++#include "expr.h" ++#include "recog.h" ++#include "diagnostic-core.h" ++#include "dominance.h" ++#include "cfg.h" ++#include "cfgrtl.h" ++#include "cfganal.h" ++#include "lcm.h" ++#include "cfgbuild.h" ++#include "cfgcleanup.h" ++#include "predict.h" ++#include "basic-block.h" ++#include "bitmap.h" ++#include "df.h" ++#include "tm_p.h" ++#include "tm-constrs.h" ++#include "optabs.h" /* For GEN_FCN. */ ++#include "target.h" ++#include "langhooks.h" /* For add_builtin_function (). */ ++#include "ggc.h" ++#include "tree-pass.h" ++#include "tree-ssa-alias.h" ++#include "fold-const.h" ++#include "gimple-expr.h" ++#include "is-a.h" ++#include "gimple.h" ++#include "gimplify.h" ++#include "gimple-iterator.h" ++#include "gimplify-me.h" ++#include "gimple-ssa.h" ++#include "ipa-ref.h" ++#include "lto-streamer.h" ++#include "cgraph.h" ++#include "tree-cfg.h" ++#include "tree-phinodes.h" ++#include "stringpool.h" ++#include "tree-ssanames.h" ++#include "tree-pass.h" ++#include "gimple-pretty-print.h" ++#include "gimple-walk.h" ++ ++/* Indicate the translation unit whether including floating-point arithmetic ++ or not. */ ++bool nds32_include_fp_arith = false; ++ ++/* Return true if the return type and argument types of current function ++ pass the insepction. Furthermore, the global value NDS32_INCLUDE_FP_ARITH ++ is modified. */ ++ ++static bool ++nds32_acd_func_rtn_args_check (tree fn_decl) ++{ ++ tree fn_type = TREE_TYPE (fn_decl); ++ function_args_iterator iter; ++ tree arg_type = NULL_TREE; ++ tree rtn_type = NULL_TREE; ++ unsigned argno = 1; ++ ++ gcc_assert (fn_type); ++ ++ rtn_type = TREE_TYPE (fn_type); ++ if (dump_file) ++ { ++ fprintf (dump_file, ++ " Check the return & arguments for function %s\n" ++ " Prototype:", ++ fndecl_name (fn_decl)); ++ print_generic_decl (dump_file, fn_decl, 0); ++ fprintf (dump_file, "\n"); ++ } ++ ++ /* Check the return type. */ ++ if (FLOAT_TYPE_P (rtn_type) ++ || RECORD_OR_UNION_TYPE_P (rtn_type)) ++ { ++ if (dump_file) ++ fprintf (dump_file, " ! Return type is FP or record/union type\n"); ++ nds32_include_fp_arith = true; ++ ++ return false; ++ } ++ ++ /* Check if the function has a variable argument list. */ ++ if (stdarg_p (fn_type)) ++ { ++ if (dump_file) ++ fprintf (dump_file, " ! Has variable argument list (i.e. ,...)\n"); ++ nds32_include_fp_arith = true; ++ ++ return false; ++ } ++ ++ /* Check the arguments. */ ++ FOREACH_FUNCTION_ARGS (fn_type, arg_type, iter) ++ { ++ if (arg_type == void_type_node) ++ break; ++ ++ if (FLOAT_TYPE_P (arg_type) ++ || RECORD_OR_UNION_TYPE_P (arg_type)) ++ { ++ if (dump_file) ++ fprintf (dump_file, ++ " ! No.%d argument is FP or record/union type\n", ++ argno); ++ nds32_include_fp_arith = true; ++ ++ return false; ++ } ++ argno++; ++ } ++ ++ if (dump_file) ++ fprintf (dump_file, ++ " >> Pass the inspection of return & arguments type\n"); ++ ++ return true; ++} ++ ++/* Helper for nds32_abi_compatible. Return *TP if it is a floating-point ++ -related operand. */ ++ ++static tree ++nds32_acd_walk_op_fn (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED) ++{ ++ tree t = *tp; ++ ++ if (t && TREE_TYPE (t) ++ && (FLOAT_TYPE_P (TREE_TYPE (t)) ++ || TREE_CODE (t) == REAL_CST ++ || TREE_CODE (t) == COMPLEX_CST ++ || TREE_CODE (t) == FLOAT_EXPR ++ || TREE_CODE (t) == REALPART_EXPR)) ++ { ++ *walk_subtrees = 0; ++ return t; ++ } ++ ++ return NULL_TREE; ++} ++ ++/* Helper for nds32_abi_compatible. Return non-NULL tree and set ++ *HANDLED_OPS_P to true if *GSI_P is an ASM stmt. */ ++ ++static tree ++nds32_acd_walk_stmt_fn (gimple_stmt_iterator *gsi_p, bool *handled_ops_p, ++ struct walk_stmt_info *wi ATTRIBUTE_UNUSED) ++{ ++ gimple *stmt = gsi_stmt (*gsi_p); ++ ++ switch (gimple_code (stmt)) ++ { ++ case GIMPLE_DEBUG: ++ *handled_ops_p = true; ++ break; ++ ++ case GIMPLE_ASM: ++ *handled_ops_p = true; ++ return (tree) -1; ++ break; ++ ++ case GIMPLE_CALL: ++ { ++ tree call_decl = gimple_call_fndecl (stmt); ++ if (!call_decl ++ || !nds32_acd_func_rtn_args_check (call_decl)) ++ { ++ *handled_ops_p = true; ++ return call_decl; ++ } ++ } ++ break; ++ ++ default: ++ break; ++ } ++ ++ return NULL_TREE; ++} ++ ++/* This function is the entry of ABI compatible detection pass. */ ++ ++static int ++nds32_abi_compatible (void) ++{ ++ basic_block bb; ++ struct walk_stmt_info wi; ++ ++ memset (&wi, 0, sizeof (wi)); ++ ++ if (!nds32_acd_func_rtn_args_check (current_function_decl)) ++ return 0; ++ ++ if (dump_file) ++ fprintf (dump_file, "Check function body %s\n", ++ function_name (cfun)); ++ ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ gimple *ret; ++ gimple_seq seq = bb_seq (bb); ++ ++ ret = walk_gimple_seq (seq, ++ nds32_acd_walk_stmt_fn, ++ nds32_acd_walk_op_fn, ++ &wi); ++ if (ret != NULL) ++ { ++ if (dump_file) ++ { ++ fprintf (dump_file, " ! NO PASS: "); ++ print_gimple_stmt (dump_file, ret, 0, TDF_SLIM|TDF_RAW); ++ } ++ nds32_include_fp_arith = true; ++ break; ++ } ++ } ++ ++ if (dump_file) ++ if (!nds32_include_fp_arith) ++ fprintf (dump_file, ++ " >> Pass the inspection of FP operand for function body\n"); ++ ++ return 0; ++} ++ ++static bool ++gate_nds32_abi_compatible (void) ++{ ++ return flag_nds32_abi_compatible ++ && !nds32_include_fp_arith; ++} ++ ++const pass_data pass_data_nds32_abi_compatible = ++{ ++ GIMPLE_PASS, /* type */ ++ "abi_compatible", /* name */ ++ OPTGROUP_NONE, /* optinfo_flags */ ++ TV_MACH_DEP, /* tv_id */ ++ ( PROP_cfg | PROP_ssa ), /* properties_required */ ++ 0, /* properties_provided */ ++ 0, /* properties_destroyed */ ++ 0, /* todo_flags_start */ ++ 0, /* todo_flags_finish */ ++}; ++ ++class pass_nds32_abi_compatible : public gimple_opt_pass ++{ ++public: ++ pass_nds32_abi_compatible (gcc::context *ctxt) ++ : gimple_opt_pass (pass_data_nds32_abi_compatible, ctxt) ++ {} ++ ++ /* opt_pass methods: */ ++ bool gate (function *) { return gate_nds32_abi_compatible (); } ++ unsigned int execute (function *) { return nds32_abi_compatible (); } ++}; ++ ++gimple_opt_pass * ++make_pass_nds32_abi_compatible (gcc::context *ctxt) ++{ ++ return new pass_nds32_abi_compatible (ctxt); ++} +diff --git a/gcc/config/nds32/nds32-const-remater.c b/gcc/config/nds32/nds32-const-remater.c +new file mode 100644 +index 0000000..760e567 +--- /dev/null ++++ b/gcc/config/nds32/nds32-const-remater.c +@@ -0,0 +1,461 @@ ++/* Global CSE pass of Andes NDS32 cpu for GNU compiler ++ Copyright (C) 2012-2016 Free Software Foundation, Inc. ++ Contributed by Andes Technology Corporation. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published ++ by the Free Software Foundation; either version 3, or (at your ++ option) any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ <http://www.gnu.org/licenses/>. */ ++ ++/* ------------------------------------------------------------------------ */ ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "backend.h" ++#include "tree.h" ++#include "rtl.h" ++#include "df.h" ++#include "alias.h" ++#include "stor-layout.h" ++#include "varasm.h" ++#include "calls.h" ++#include "regs.h" ++#include "insn-config.h" /* Required by recog.h. */ ++#include "conditions.h" ++#include "output.h" ++#include "insn-attr.h" /* For DFA state_t. */ ++#include "insn-codes.h" /* For CODE_FOR_xxx. */ ++#include "reload.h" /* For push_reload(). */ ++#include "flags.h" ++#include "insn-config.h" ++#include "expmed.h" ++#include "dojump.h" ++#include "explow.h" ++#include "emit-rtl.h" ++#include "stmt.h" ++#include "expr.h" ++#include "recog.h" ++#include "diagnostic-core.h" ++#include "cfgrtl.h" ++#include "cfganal.h" ++#include "lcm.h" ++#include "cfgbuild.h" ++#include "cfgcleanup.h" ++#include "tm_p.h" ++#include "tm-constrs.h" ++#include "optabs.h" /* For GEN_FCN. */ ++#include "target.h" ++#include "langhooks.h" /* For add_builtin_function(). */ ++#include "builtins.h" ++#include "cpplib.h" ++#include "params.h" ++#include "tree-pass.h" ++#include "dbgcnt.h" ++#include "df.h" ++#include "tm-constrs.h" ++ ++/* ------------------------------------------------------------------------ */ ++ ++typedef struct reg_avail_info ++{ ++ rtx insn; ++ unsigned int uint; ++ unsigned int regno; ++} reg_avail_info_t; ++ ++ ++static void find_common_const (void); ++static bool try_rematerialize (rtx_insn *, unsigned int, ++ auto_vec<reg_avail_info_t, 32> *); ++static void clean_reg_avail_info (rtx ,const_rtx, void *); ++static rtx get_const (rtx); ++static bool addsi3_format_p (rtx); ++ ++/* Search the register records. */ ++static bool ++try_rematerialize (rtx_insn *insn, unsigned int uint_r, ++ auto_vec<reg_avail_info_t, 32> *reg_avail_infos) ++{ ++ unsigned int i, uint_i, cl_i, cl_r, ct_i, ct_r; ++ rtx pat, src, dest, new_insn; ++ bool done = FALSE; ++ df_ref df_rec; ++ df_link *link; ++ ++ cl_r = __builtin_clz (uint_r); ++ ct_r = __builtin_ctz (uint_r); ++ for (i = 0; i < reg_avail_infos->length (); ++i) ++ { ++ if ((*reg_avail_infos)[i].uint != uint_r) ++ { ++ uint_i = (*reg_avail_infos)[i].uint; ++ if (dump_file) ++ fprintf (dump_file, "Try rematerialize %08x with const %08x\n", ++ uint_r, uint_i); ++ cl_i = __builtin_clz (uint_i); ++ ct_i = __builtin_ctz (uint_i); ++ src = SET_DEST (PATTERN ((*reg_avail_infos)[i].insn)); ++ dest = SET_DEST (PATTERN (insn)); ++ ++ if (cl_r > cl_i ++ && (uint_i >> (cl_r - cl_i)) == uint_r) ++ { ++ /* Right shift logical. */ ++ pat = gen_rtx_LSHIFTRT (SImode, src, GEN_INT (cl_r - cl_i)); ++ done = TRUE; ++ if (dump_file) ++ fprintf (dump_file, ++ "Rematerialize %08x with const %08x by l>> %d\n", ++ uint_r, uint_i, (cl_r - cl_i)); ++ } ++ else if (ct_i >= ct_r ++ && ((int) uint_i >> (ct_i - ct_r)) == (int) uint_r) ++ { ++ /* Right shift arithmetic. */ ++ pat = gen_rtx_ASHIFTRT (SImode, src, GEN_INT (ct_i - ct_r)); ++ done = TRUE; ++ if (dump_file) ++ fprintf (dump_file, ++ "Rematerialize %08x with const %08x by a>> %d\n", ++ uint_r, uint_i, (cl_r - cl_i)); ++ } ++ else if (ct_r > ct_i ++ && (uint_i << (ct_r - ct_i)) == uint_r) ++ { ++ /* Left shift. */ ++ pat = gen_rtx_ASHIFT (SImode, src, GEN_INT (ct_r - ct_i)); ++ done = TRUE; ++ if (dump_file) ++ fprintf (dump_file, ++ "Rematerialize %08x with const %08x by << %d\n", ++ uint_r, uint_i, (ct_r - ct_i)); ++ } ++ else if (TARGET_EXT_PERF && __builtin_popcount (uint_r ^ uint_i) == 1) ++ { ++ unsigned int val = uint_r ^ uint_i; ++ if ((uint_r & (uint_r ^ uint_i)) != 0) ++ { ++ if (val > (1 << 5)) ++ { ++ /* Bit set. */ ++ pat = gen_rtx_IOR (SImode, src, GEN_INT (val)); ++ done = TRUE; ++ if (dump_file) ++ fprintf (dump_file, ++ "Rematerialize %08x with const %08x by | %08x\n", ++ uint_r, uint_i, uint_r ^ uint_i); ++ } ++ else ++ { ++ /* Transform to plus if immediate can fit addi45. */ ++ pat = gen_rtx_PLUS (SImode, src, GEN_INT (val)); ++ done = TRUE; ++ if (dump_file) ++ fprintf (dump_file, ++ "Rematerialize %08x with const %08x by | %08x\n", ++ uint_r, uint_i, uint_r ^ uint_i); ++ } ++ } ++ else ++ { ++ if (val > (1 << 5)) ++ { ++ /* Bit clear. */ ++ pat = gen_rtx_AND (SImode, src, GEN_INT (~(uint_r ^ uint_i))); ++ done = TRUE; ++ if (dump_file) ++ fprintf (dump_file, ++ "Rematerialize %08x with const %08x by & %08x\n", ++ uint_r, uint_i, ~(uint_r ^ uint_i)); ++ } ++ else ++ { ++ /* Transform to plus if immediate can fit subi45. */ ++ pat = gen_rtx_PLUS (SImode, src, GEN_INT ((int) -val)); ++ done = TRUE; ++ if (dump_file) ++ fprintf (dump_file, ++ "Rematerialize %08x with const %08x by | %08x\n", ++ uint_r, uint_i, uint_r ^ uint_i); ++ } ++ } ++ } ++ else if ((uint_r > uint_i ? uint_r - uint_i ++ : uint_i - uint_r) < 0x4000) ++ { ++ /* Check insn_info existence because the instruction ++ maybe be deleted.*/ ++ if (DF_INSN_INFO_GET ((*reg_avail_infos)[i].insn)) ++ { ++ df_rec = DF_INSN_DEFS ((*reg_avail_infos)[i].insn); ++ link = DF_REF_CHAIN (df_rec); ++ ++ /* Do not use the dead instruction. */ ++ /* Do not use the original matched sethi. */ ++ if (!link) ++ continue; ++ for (link = DF_REF_CHAIN (df_rec); link; link = link->next) ++ { ++ if (DF_REF_REGNO (link->ref) == 0 ++ || !DF_REF_INSN_INFO (link->ref) ++ || DF_REF_INSN (link->ref) == insn) ++ break; ++ } ++ if (link) ++ continue; ++ } ++ ++ /* Add. */ ++ if (uint_r > uint_i) ++ { ++ pat = gen_rtx_PLUS (SImode, src, GEN_INT (uint_r - uint_i)); ++ done = TRUE; ++ } ++ else ++ { ++ pat = gen_rtx_PLUS (SImode, src, GEN_INT ((HOST_WIDE_INT) ++ uint_r - uint_i)); ++ done = TRUE; ++ } ++ } ++ ++ if (done) ++ { ++ /* Emit the new instruction. */ ++ new_insn = gen_move_insn (dest, pat); ++ emit_insn_before (new_insn, insn); ++ set_dst_reg_note (new_insn, REG_EQUAL, GEN_INT (uint_r), dest); ++ return TRUE; ++ } ++ } ++ } ++ return FALSE; ++} ++ ++/* Clean the reg_avail_info value. */ ++static void ++clean_reg_avail_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, ++ void *data) ++{ ++ unsigned int i; ++ auto_vec<reg_avail_info_t, 32> *reg_avail_infos = ++ (auto_vec<reg_avail_info_t, 32> *) data; ++ ++ if (GET_CODE (dest) == SUBREG) ++ dest = SUBREG_REG (dest); ++ ++ if (REG_P (dest)) ++ for (i = 0; i < reg_avail_infos->length (); ++i) ++ if ((*reg_avail_infos)[i].regno == REGNO (dest) ++ || (GET_MODE_SIZE (GET_MODE (dest)) == 8 ++ && (*reg_avail_infos)[i].regno == REGNO (dest) + 1)) ++ reg_avail_infos->unordered_remove (i--); ++} ++ ++/* Return the const if the setting value is a constant integer. */ ++static rtx ++get_const (rtx insn) ++{ ++ rtx note; ++ ++ if (GET_CODE (PATTERN (insn)) != SET ++ || !REG_P (SET_DEST (PATTERN (insn))) ++ || GET_MODE (SET_DEST (PATTERN (insn))) != SImode) ++ return NULL_RTX; ++ ++ /* Constant move instruction. */ ++ if (CONST_INT_P (XEXP (PATTERN (insn), 1))) ++ return XEXP (PATTERN (insn), 1); ++ ++ note = find_reg_note (insn, REG_EQUAL, NULL_RTX); ++ if (!note) ++ note = find_reg_note (insn, REG_EQUIV, NULL_RTX); ++ ++ if (note && CONST_INT_P (XEXP (note, 0))) ++ return XEXP (note, 0); ++ ++ return NULL_RTX; ++} ++ ++/* Return true if the instruction is addi format. */ ++static bool ++addsi3_format_p (rtx insn) ++{ ++ if (GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS ++ && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT) ++ return TRUE; ++ ++ return FALSE; ++} ++ ++/* Return true if the instruction is sethi format. */ ++static bool ++sethi_format_p (rtx insn) ++{ ++ if (GET_CODE (PATTERN (insn)) == SET ++ && GET_CODE (XEXP (PATTERN (insn), 1)) == CONST_INT ++ && satisfies_constraint_Ihig (XEXP (PATTERN (insn), 1))) ++ return TRUE; ++ return FALSE; ++} ++ ++/* Return true if the register definition only be used by insn. */ ++static bool ++use_only_p (rtx insn) ++{ ++ rtx def_insn; ++ df_ref rec; ++ df_link *link; ++ rec = DF_INSN_USES (insn); ++ link = DF_REF_CHAIN (rec); ++ ++ if (!link ++ || DF_REF_REGNO (link->ref) == 0 ++ || !DF_REF_INSN_INFO (link->ref)) ++ return FALSE; ++ ++ def_insn = DF_REF_INSN (link->ref); ++ ++ if (!sethi_format_p (def_insn)) ++ return FALSE; ++ ++ rec = DF_INSN_DEFS (def_insn); ++ link = DF_REF_CHAIN (rec); ++ ++ if (!link ++ || link->next ++ || DF_REF_REGNO (link->ref) == 0 ++ || !DF_REF_INSN_INFO (link->ref)) ++ return FALSE; ++ ++ return TRUE; ++} ++ ++/* Traverse instructions in each basic block, and save the value of ++ setting constant instructions. */ ++static void ++find_common_const (void) ++{ ++ basic_block bb; ++ unsigned int i; ++ ++ /* Save register constant value. */ ++ auto_vec<reg_avail_info_t, 32> reg_avail_infos; ++ reg_avail_info_t reg_avail_info; ++ ++ ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ rtx_insn *insn; ++ rtx dest, cst; ++ ++ /* Clear the vector. */ ++ while (!reg_avail_infos.is_empty ()) ++ reg_avail_infos.pop (); ++ ++ FOR_BB_INSNS (bb, insn) ++ { ++ if (!NONDEBUG_INSN_P (insn)) ++ continue; ++ ++ if (CALL_P (insn)) ++ { ++ /* Clean hard register. */ ++ for (i = 0; i < reg_avail_infos.length ();) ++ { ++ if (HARD_REGISTER_NUM_P (reg_avail_infos[i].regno) ++ && call_used_regs[reg_avail_infos[i].regno]) ++ reg_avail_infos.unordered_remove (i); ++ else ++ ++i; ++ } ++ } ++ ++ cst = get_const (insn); ++ if (cst == NULL_RTX) ++ { ++ note_stores (PATTERN (insn), clean_reg_avail_info, ++ ®_avail_infos); ++ continue; ++ } ++ ++ dest = SET_DEST (PATTERN (insn)); ++ ++ if (addsi3_format_p (insn) ++ && use_only_p (insn) ++ && try_rematerialize (insn, XUINT (cst, 0), ®_avail_infos)) ++ { ++ delete_insn (insn); ++ df_insn_rescan_all (); ++ } ++ ++ note_stores (PATTERN (insn), clean_reg_avail_info, ®_avail_infos); ++ reg_avail_info.insn = insn; ++ reg_avail_info.uint = XUINT (cst, 0); ++ reg_avail_info.regno = REGNO (dest); ++ if (dump_file) ++ fprintf (dump_file, "Find const %08x on %u\n", ++ reg_avail_info.uint, reg_avail_info.regno); ++ reg_avail_infos.safe_push (reg_avail_info); ++ } ++ } ++} ++ ++static unsigned int ++nds32_const_remater_opt (void) ++{ ++ df_chain_add_problem (DF_DU_CHAIN + DF_UD_CHAIN); ++ df_note_add_problem (); ++ df_insn_rescan_all (); ++ df_analyze (); ++ ++ find_common_const (); ++ ++ df_insn_rescan_all (); ++ return 0; ++} ++ ++const pass_data pass_data_nds32_const_remater_opt = ++{ ++ RTL_PASS, /* type */ ++ "const_remater_opt", /* name */ ++ OPTGROUP_NONE, /* optinfo_flags */ ++ TV_MACH_DEP, /* tv_id */ ++ 0, /* properties_required */ ++ 0, /* properties_provided */ ++ 0, /* properties_destroyed */ ++ 0, /* todo_flags_start */ ++ TODO_df_finish, /* todo_flags_finish */ ++}; ++ ++class pass_nds32_const_remater_opt : public rtl_opt_pass ++{ ++public: ++ pass_nds32_const_remater_opt (gcc::context *ctxt) ++ : rtl_opt_pass (pass_data_nds32_const_remater_opt, ctxt) ++ {} ++ ++ /* opt_pass methods: */ ++ bool gate (function *) { return flag_nds32_const_remater_opt; } ++ unsigned int execute (function *) { return nds32_const_remater_opt (); } ++}; ++ ++rtl_opt_pass * ++make_pass_nds32_const_remater_opt (gcc::context *ctxt) ++{ ++ return new pass_nds32_const_remater_opt (ctxt); ++} ++ ++/* ------------------------------------------------------------------------ */ +diff --git a/gcc/config/nds32/nds32-cost.c b/gcc/config/nds32/nds32-cost.c +index e6a29fc..881d086 100644 +--- a/gcc/config/nds32/nds32-cost.c ++++ b/gcc/config/nds32/nds32-cost.c +@@ -24,73 +24,447 @@ + #include "system.h" + #include "coretypes.h" + #include "backend.h" +-#include "target.h" +-#include "rtl.h" + #include "tree.h" +-#include "tm_p.h" +-#include "optabs.h" /* For GEN_FCN. */ ++#include "rtl.h" ++#include "df.h" ++#include "alias.h" ++#include "stor-layout.h" ++#include "varasm.h" ++#include "calls.h" ++#include "regs.h" ++#include "insn-config.h" /* Required by recog.h. */ ++#include "conditions.h" ++#include "output.h" ++#include "insn-attr.h" /* For DFA state_t. */ ++#include "insn-codes.h" /* For CODE_FOR_xxx. */ ++#include "reload.h" /* For push_reload(). */ ++#include "flags.h" ++#include "insn-config.h" ++#include "expmed.h" ++#include "dojump.h" ++#include "explow.h" ++#include "emit-rtl.h" ++#include "stmt.h" ++#include "expr.h" + #include "recog.h" ++#include "diagnostic-core.h" ++#include "cfgrtl.h" ++#include "cfganal.h" ++#include "lcm.h" ++#include "cfgbuild.h" ++#include "cfgcleanup.h" ++#include "tm_p.h" + #include "tm-constrs.h" ++#include "optabs.h" /* For GEN_FCN. */ ++#include "target.h" ++#include "langhooks.h" /* For add_builtin_function(). */ ++#include "builtins.h" ++#include "tree-pass.h" + + /* ------------------------------------------------------------------------ */ + +-bool +-nds32_rtx_costs_impl (rtx x, +- machine_mode mode ATTRIBUTE_UNUSED, +- int outer_code, +- int opno ATTRIBUTE_UNUSED, +- int *total, +- bool speed) +-{ +- int code = GET_CODE (x); ++typedef bool (*rtx_cost_func) (rtx, int, int, int, int*); + +- /* According to 'speed', goto suitable cost model section. */ +- if (speed) +- goto performance_cost; +- else +- goto size_cost; ++struct rtx_cost_model_t { ++ rtx_cost_func speed_prefer; ++ rtx_cost_func size_prefer; ++}; + ++static rtx_cost_model_t rtx_cost_model; + +-performance_cost: +- /* This is section for performance cost model. */ ++static int insn_size_16bit; /* Initial at nds32_init_rtx_costs. */ ++static const int insn_size_32bit = 4; ++ ++static bool ++nds32_rtx_costs_speed_prefer (rtx x ATTRIBUTE_UNUSED, ++ int code, ++ int outer_code ATTRIBUTE_UNUSED, ++ int opno ATTRIBUTE_UNUSED, ++ int *total) ++{ ++ rtx op0; ++ rtx op1; ++ enum machine_mode mode = GET_MODE (x); ++ /* Scale cost by mode size. */ ++ int cost = COSTS_N_INSNS (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode)); + +- /* In gcc/rtl.h, the default value of COSTS_N_INSNS(N) is N*4. +- We treat it as 4-cycle cost for each instruction +- under performance consideration. */ + switch (code) + { +- case SET: +- /* For 'SET' rtx, we need to return false +- so that it can recursively calculate costs. */ +- return false; +- + case USE: + /* Used in combine.c as a marker. */ + *total = 0; +- break; ++ return true; ++ ++ case CONST_INT: ++ /* When not optimizing for size, we care more about the cost ++ of hot code, and hot code is often in a loop. If a constant ++ operand needs to be forced into a register, we will often be ++ able to hoist the constant load out of the loop, so the load ++ should not contribute to the cost. */ ++ if (outer_code == SET || outer_code == PLUS) ++ *total = satisfies_constraint_Is20 (x) ? 0 : 4; ++ else if (outer_code == AND || outer_code == IOR || outer_code == XOR ++ || outer_code == MINUS) ++ *total = satisfies_constraint_Iu15 (x) ? 0 : 4; ++ else if (outer_code == ASHIFT || outer_code == ASHIFTRT ++ || outer_code == LSHIFTRT) ++ *total = satisfies_constraint_Iu05 (x) ? 0 : 4; ++ else if (GET_RTX_CLASS (outer_code) == RTX_COMPARE ++ || GET_RTX_CLASS (outer_code) == RTX_COMM_COMPARE) ++ *total = satisfies_constraint_Is16 (x) ? 0 : 4; ++ else ++ *total = COSTS_N_INSNS (1); ++ return true; ++ ++ case CONST: ++ case LO_SUM: ++ case HIGH: ++ case SYMBOL_REF: ++ *total = COSTS_N_INSNS (1); ++ return true; ++ ++ case MEM: ++ *total = COSTS_N_INSNS (1); ++ return true; ++ ++ case SET: ++ op0 = SET_DEST (x); ++ op1 = SET_SRC (x); ++ mode = GET_MODE (op0); ++ /* Scale cost by mode size. */ ++ cost = COSTS_N_INSNS (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode)); ++ ++ switch (GET_CODE (op1)) ++ { ++ case REG: ++ case SUBREG: ++ /* Register move and Store instructions. */ ++ if ((REG_P (op0) || MEM_P (op0)) ++ && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (DImode)) ++ *total = COSTS_N_INSNS (1); ++ else ++ *total = cost; ++ return true; ++ ++ case MEM: ++ /* Load instructions. */ ++ if (REG_P (op0) && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (DImode)) ++ *total = COSTS_N_INSNS (1); ++ else ++ *total = cost; ++ return true; ++ ++ case CONST_INT: ++ /* movi instruction. */ ++ if (REG_P (op0) && GET_MODE_SIZE (mode) < GET_MODE_SIZE (DImode)) ++ { ++ if (satisfies_constraint_Is20 (op1)) ++ *total = COSTS_N_INSNS (1) - 1; ++ else ++ *total = COSTS_N_INSNS (2); ++ } ++ else ++ *total = cost; ++ return true; ++ ++ case CONST: ++ case SYMBOL_REF: ++ case LABEL_REF: ++ /* la instruction. */ ++ if (REG_P (op0) && GET_MODE_SIZE (mode) < GET_MODE_SIZE (DImode)) ++ *total = COSTS_N_INSNS (1) - 1; ++ else ++ *total = cost; ++ return true; ++ case VEC_SELECT: ++ *total = cost; ++ return true; ++ ++ default: ++ *total = cost; ++ return true; ++ } ++ ++ case PLUS: ++ op0 = XEXP (x, 0); ++ op1 = XEXP (x, 1); ++ ++ if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode)) ++ *total = cost; ++ else if (GET_CODE (op0) == MULT || GET_CODE (op0) == LSHIFTRT ++ || GET_CODE (op1) == MULT || GET_CODE (op1) == LSHIFTRT) ++ { ++ /* ALU_SHIFT */ ++ if (TARGET_PIPELINE_PANTHER) ++ *total = COSTS_N_INSNS (1); ++ else ++ *total = COSTS_N_INSNS (2); ++ } ++ else if ((GET_CODE (op1) == CONST_INT ++ && satisfies_constraint_Is15 (op1)) ++ || REG_P (op1)) ++ /* ADD instructions */ ++ *total = COSTS_N_INSNS (1); ++ else ++ /* ADD instructions: IMM out of range. */ ++ *total = COSTS_N_INSNS (2); ++ return true; ++ ++ case MINUS: ++ op0 = XEXP (x, 0); ++ op1 = XEXP (x, 1); ++ ++ if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode)) ++ *total = cost; ++ else if (GET_CODE (op0) == MULT || GET_CODE (op0) == LSHIFTRT ++ || GET_CODE (op1) == MULT || GET_CODE (op1) == LSHIFTRT) ++ { ++ /* ALU_SHIFT */ ++ if (TARGET_PIPELINE_PANTHER) ++ *total = COSTS_N_INSNS (1); ++ else ++ *total = COSTS_N_INSNS (2); ++ } ++ else if ((GET_CODE (op0) == CONST_INT ++ && satisfies_constraint_Is15 (op0)) ++ || REG_P (op0)) ++ /* SUB instructions */ ++ *total = COSTS_N_INSNS (1); ++ else ++ /* SUB instructions: IMM out of range. */ ++ *total = COSTS_N_INSNS (2); ++ return true; ++ ++ case TRUNCATE: ++ /* TRUNCATE and AND behavior is same. */ ++ *total = COSTS_N_INSNS (1); ++ return true; ++ ++ case AND: ++ case IOR: ++ case XOR: ++ op0 = XEXP (x, 0); ++ op1 = XEXP (x, 1); ++ ++ if (NDS32_EXT_DSP_P ()) ++ { ++ /* We prefer (and (ior) (ior)) than (ior (and) (and)) for ++ synthetize pk** and insb instruction. */ ++ if (code == AND && GET_CODE (op0) == IOR && GET_CODE (op1) == IOR) ++ return COSTS_N_INSNS (1); ++ ++ if (code == IOR && GET_CODE (op0) == AND && GET_CODE (op1) == AND) ++ return COSTS_N_INSNS (10); ++ } ++ ++ if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode)) ++ *total = cost; ++ else if (GET_CODE (op0) == ASHIFT || GET_CODE (op0) == LSHIFTRT) ++ { ++ /* ALU_SHIFT */ ++ if (TARGET_PIPELINE_PANTHER) ++ *total = COSTS_N_INSNS (1); ++ else ++ *total = COSTS_N_INSNS (2); ++ } ++ else if ((GET_CODE (op1) == CONST_INT ++ && satisfies_constraint_Iu15 (op1)) ++ || REG_P (op1)) ++ /* AND, OR, XOR instructions */ ++ *total = COSTS_N_INSNS (1); ++ else if (code == AND || GET_CODE (op0) == NOT) ++ /* BITC instruction */ ++ *total = COSTS_N_INSNS (1); ++ else ++ /* AND, OR, XOR instructions: IMM out of range. */ ++ *total = COSTS_N_INSNS (2); ++ return true; + + case MULT: ++ if (GET_MODE (x) == DImode ++ || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND ++ || GET_CODE (XEXP (x, 1)) == ZERO_EXTEND) ++ /* MUL instructions */ ++ *total = COSTS_N_INSNS (1); ++ else if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode)) ++ *total = cost; ++ else if (outer_code == PLUS || outer_code == MINUS) ++ { ++ /* ALU_SHIFT */ ++ if (TARGET_PIPELINE_PANTHER) ++ *total = COSTS_N_INSNS (1); ++ else ++ *total = COSTS_N_INSNS (2); ++ } ++ else if ((GET_CODE (XEXP (x, 1)) == CONST_INT ++ && satisfies_constraint_Iu05 (XEXP (x, 1))) ++ || REG_P (XEXP (x, 1))) ++ /* MUL instructions */ ++ *total = COSTS_N_INSNS (1); ++ else ++ /* MUL instructions: IMM out of range. */ ++ *total = COSTS_N_INSNS (2); ++ ++ if (TARGET_MUL_SLOW) ++ *total += COSTS_N_INSNS (4); ++ ++ return true; ++ ++ case LSHIFTRT: ++ if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode)) ++ *total = cost; ++ else if (outer_code == PLUS || outer_code == MINUS ++ || outer_code == AND || outer_code == IOR ++ || outer_code == XOR) ++ { ++ /* ALU_SHIFT */ ++ if (TARGET_PIPELINE_PANTHER) ++ *total = COSTS_N_INSNS (1); ++ else ++ *total = COSTS_N_INSNS (2); ++ } ++ else if ((GET_CODE (XEXP (x, 1)) == CONST_INT ++ && satisfies_constraint_Iu05 (XEXP (x, 1))) ++ || REG_P (XEXP (x, 1))) ++ /* SRL instructions */ ++ *total = COSTS_N_INSNS (1); ++ else ++ /* SRL instructions: IMM out of range. */ ++ *total = COSTS_N_INSNS (2); ++ return true; ++ ++ case ASHIFT: ++ if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode)) ++ *total = cost; ++ else if (outer_code == AND || outer_code == IOR ++ || outer_code == XOR) ++ { ++ /* ALU_SHIFT */ ++ if (TARGET_PIPELINE_PANTHER) ++ *total = COSTS_N_INSNS (1); ++ else ++ *total = COSTS_N_INSNS (2); ++ } ++ else if ((GET_CODE (XEXP (x, 1)) == CONST_INT ++ && satisfies_constraint_Iu05 (XEXP (x, 1))) ++ || REG_P (XEXP (x, 1))) ++ /* SLL instructions */ ++ *total = COSTS_N_INSNS (1); ++ else ++ /* SLL instructions: IMM out of range. */ ++ *total = COSTS_N_INSNS (2); ++ return true; ++ ++ case ASHIFTRT: ++ case ROTATERT: ++ if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode)) ++ *total = cost; ++ else if ((GET_CODE (XEXP (x, 1)) == CONST_INT ++ && satisfies_constraint_Iu05 (XEXP (x, 1))) ++ || REG_P (XEXP (x, 1))) ++ /* ROTR, SLL instructions */ ++ *total = COSTS_N_INSNS (1); ++ else ++ /* ROTR, SLL instructions: IMM out of range. */ ++ *total = COSTS_N_INSNS (2); ++ return true; ++ ++ case LT: ++ case LTU: ++ if (outer_code == SET) ++ { ++ if ((GET_CODE (XEXP (x, 1)) == CONST_INT ++ && satisfies_constraint_Iu15 (XEXP (x, 1))) ++ || REG_P (XEXP (x, 1))) ++ /* SLT, SLTI instructions */ ++ *total = COSTS_N_INSNS (1); ++ else ++ /* SLT, SLT instructions: IMM out of range. */ ++ *total = COSTS_N_INSNS (2); ++ } ++ else ++ /* branch */ ++ *total = COSTS_N_INSNS (2); ++ return true; ++ ++ case EQ: ++ case NE: ++ case GE: ++ case LE: ++ case GT: ++ /* branch */ ++ *total = COSTS_N_INSNS (2); ++ return true; ++ ++ case IF_THEN_ELSE: ++ if (GET_CODE (XEXP (x, 1)) == LABEL_REF) ++ /* branch */ ++ *total = COSTS_N_INSNS (2); ++ else ++ /* cmovz, cmovn instructions */ ++ *total = COSTS_N_INSNS (1); ++ return true; ++ ++ case LABEL_REF: ++ if (outer_code == IF_THEN_ELSE) ++ /* branch */ ++ *total = COSTS_N_INSNS (2); ++ else ++ *total = COSTS_N_INSNS (1); ++ return true; ++ ++ case ZERO_EXTEND: ++ case SIGN_EXTEND: ++ if (MEM_P (XEXP (x, 0))) ++ /* Using memory access. */ ++ *total = COSTS_N_INSNS (1); ++ else ++ /* Zero extend and sign extend instructions. */ ++ *total = COSTS_N_INSNS (1); ++ return true; ++ ++ case NEG: ++ case NOT: + *total = COSTS_N_INSNS (1); +- break; ++ return true; + + case DIV: + case UDIV: + case MOD: + case UMOD: +- *total = COSTS_N_INSNS (7); +- break; ++ *total = COSTS_N_INSNS (20); ++ return true; + +- default: ++ case CALL: ++ *total = COSTS_N_INSNS (2); ++ return true; ++ ++ case CLZ: ++ case SMIN: ++ case SMAX: ++ case ZERO_EXTRACT: ++ if (TARGET_EXT_PERF) ++ *total = COSTS_N_INSNS (1); ++ else ++ *total = COSTS_N_INSNS (3); ++ return true; ++ case VEC_SELECT: + *total = COSTS_N_INSNS (1); +- break; +- } +- +- return true; +- ++ return true; + +-size_cost: +- /* This is section for size cost model. */ ++ default: ++ *total = COSTS_N_INSNS (3); ++ return true; ++ } ++} + ++static bool ++nds32_rtx_costs_size_prefer (rtx x, ++ int code, ++ int outer_code, ++ int opno ATTRIBUTE_UNUSED, ++ int *total) ++{ + /* In gcc/rtl.h, the default value of COSTS_N_INSNS(N) is N*4. + We treat it as 4-byte cost for each instruction + under code size consideration. */ +@@ -98,7 +472,7 @@ size_cost: + { + case SET: + /* For 'SET' rtx, we need to return false +- so that it can recursively calculate costs. */ ++ so that it can recursively calculate costs. */ + return false; + + case USE: +@@ -108,92 +482,169 @@ size_cost: + + case CONST_INT: + /* All instructions involving constant operation +- need to be considered for cost evaluation. */ ++ need to be considered for cost evaluation. */ + if (outer_code == SET) + { + /* (set X imm5s), use movi55, 2-byte cost. + (set X imm20s), use movi, 4-byte cost. + (set X BIG_INT), use sethi/ori, 8-byte cost. */ + if (satisfies_constraint_Is05 (x)) +- *total = COSTS_N_INSNS (1) - 2; ++ *total = insn_size_16bit; + else if (satisfies_constraint_Is20 (x)) +- *total = COSTS_N_INSNS (1); ++ *total = insn_size_32bit; + else +- *total = COSTS_N_INSNS (2); ++ *total = insn_size_32bit * 2; + } + else if (outer_code == PLUS || outer_code == MINUS) + { + /* Possible addi333/subi333 or subi45/addi45, 2-byte cost. + General case, cost 1 instruction with 4-byte. */ + if (satisfies_constraint_Iu05 (x)) +- *total = COSTS_N_INSNS (1) - 2; ++ *total = insn_size_16bit; + else +- *total = COSTS_N_INSNS (1); ++ *total = insn_size_32bit; + } + else if (outer_code == ASHIFT) + { + /* Possible slli333, 2-byte cost. + General case, cost 1 instruction with 4-byte. */ + if (satisfies_constraint_Iu03 (x)) +- *total = COSTS_N_INSNS (1) - 2; ++ *total = insn_size_16bit; + else +- *total = COSTS_N_INSNS (1); ++ *total = insn_size_32bit; + } + else if (outer_code == ASHIFTRT || outer_code == LSHIFTRT) + { + /* Possible srai45 or srli45, 2-byte cost. + General case, cost 1 instruction with 4-byte. */ + if (satisfies_constraint_Iu05 (x)) +- *total = COSTS_N_INSNS (1) - 2; ++ *total = insn_size_16bit; + else +- *total = COSTS_N_INSNS (1); ++ *total = insn_size_32bit; + } + else + { + /* For other cases, simply set it 4-byte cost. */ +- *total = COSTS_N_INSNS (1); ++ *total = insn_size_32bit; + } + break; + + case CONST_DOUBLE: + /* It requires high part and low part processing, set it 8-byte cost. */ +- *total = COSTS_N_INSNS (2); ++ *total = insn_size_32bit * 2; ++ break; ++ ++ case CONST: ++ case SYMBOL_REF: ++ *total = insn_size_32bit * 2; + break; + + default: + /* For other cases, generally we set it 4-byte cost +- and stop resurively traversing. */ +- *total = COSTS_N_INSNS (1); ++ and stop resurively traversing. */ ++ *total = insn_size_32bit; + break; + } + + return true; + } + +-int +-nds32_address_cost_impl (rtx address, +- machine_mode mode ATTRIBUTE_UNUSED, +- addr_space_t as ATTRIBUTE_UNUSED, +- bool speed) ++void ++nds32_init_rtx_costs (void) ++{ ++ rtx_cost_model.speed_prefer = nds32_rtx_costs_speed_prefer; ++ rtx_cost_model.size_prefer = nds32_rtx_costs_size_prefer; ++ ++ if (TARGET_16_BIT) ++ insn_size_16bit = 2; ++ else ++ insn_size_16bit = 4; ++} ++ ++/* This target hook describes the relative costs of RTL expressions. ++ Return 'true' when all subexpressions of x have been processed. ++ Return 'false' to sum the costs of sub-rtx, plus cost of this operation. ++ Refer to gcc/rtlanal.c for more information. */ ++bool ++nds32_rtx_costs_impl (rtx x, ++ machine_mode mode ATTRIBUTE_UNUSED, ++ int outer_code, ++ int opno, ++ int *total, ++ bool speed) ++{ ++ int code = GET_CODE (x); ++ ++ /* According to 'speed', use suitable cost model section. */ ++ if (speed) ++ return rtx_cost_model.speed_prefer(x, code, outer_code, opno, total); ++ else ++ return rtx_cost_model.size_prefer(x, code, outer_code, opno, total); ++} ++ ++ ++int nds32_address_cost_speed_prefer (rtx address) + { + rtx plus0, plus1; + enum rtx_code code; + + code = GET_CODE (address); + +- /* According to 'speed', goto suitable cost model section. */ +- if (speed) +- goto performance_cost; +- else +- goto size_cost; ++ switch (code) ++ { ++ case POST_MODIFY: ++ case POST_INC: ++ case POST_DEC: ++ /* We encourage that rtx contains ++ POST_MODIFY/POST_INC/POST_DEC behavior. */ ++ return COSTS_N_INSNS (1) - 2; ++ ++ case SYMBOL_REF: ++ /* We can have gp-relative load/store for symbol_ref. ++ Have it 4-byte cost. */ ++ return COSTS_N_INSNS (2); ++ ++ case CONST: ++ /* It is supposed to be the pattern (const (plus symbol_ref const_int)). ++ Have it 4-byte cost. */ ++ return COSTS_N_INSNS (2); ++ ++ case REG: ++ /* Simply return 4-byte costs. */ ++ return COSTS_N_INSNS (1) - 2; ++ ++ case PLUS: ++ /* We do not need to check if the address is a legitimate address, ++ because this hook is never called with an invalid address. ++ But we better check the range of ++ const_int value for cost, if it exists. */ ++ plus0 = XEXP (address, 0); ++ plus1 = XEXP (address, 1); ++ ++ if (REG_P (plus0) && CONST_INT_P (plus1)) ++ return COSTS_N_INSNS (1) - 2; ++ else if (ARITHMETIC_P (plus0) || ARITHMETIC_P (plus1)) ++ return COSTS_N_INSNS (1) - 1; ++ else if (REG_P (plus0) && REG_P (plus1)) ++ return COSTS_N_INSNS (1); ++ ++ /* For other 'plus' situation, make it cost 4-byte. */ ++ return COSTS_N_INSNS (1); + +-performance_cost: +- /* This is section for performance cost model. */ ++ default: ++ break; ++ } + +- /* FALLTHRU, currently we use same cost model as size_cost. */ ++ return COSTS_N_INSNS (4); + +-size_cost: +- /* This is section for size cost model. */ ++} ++ ++int nds32_address_cost_speed_fwprop (rtx address) ++{ ++ rtx plus0, plus1; ++ enum rtx_code code; ++ ++ code = GET_CODE (address); + + switch (code) + { +@@ -201,18 +652,18 @@ size_cost: + case POST_INC: + case POST_DEC: + /* We encourage that rtx contains +- POST_MODIFY/POST_INC/POST_DEC behavior. */ ++ POST_MODIFY/POST_INC/POST_DEC behavior. */ + return 0; + + case SYMBOL_REF: + /* We can have gp-relative load/store for symbol_ref. +- Have it 4-byte cost. */ +- return COSTS_N_INSNS (1); ++ Have it 4-byte cost. */ ++ return COSTS_N_INSNS (2); + + case CONST: + /* It is supposed to be the pattern (const (plus symbol_ref const_int)). +- Have it 4-byte cost. */ +- return COSTS_N_INSNS (1); ++ Have it 4-byte cost. */ ++ return COSTS_N_INSNS (2); + + case REG: + /* Simply return 4-byte costs. */ +@@ -220,21 +671,25 @@ size_cost: + + case PLUS: + /* We do not need to check if the address is a legitimate address, +- because this hook is never called with an invalid address. +- But we better check the range of +- const_int value for cost, if it exists. */ ++ because this hook is never called with an invalid address. ++ But we better check the range of ++ const_int value for cost, if it exists. */ + plus0 = XEXP (address, 0); + plus1 = XEXP (address, 1); + + if (REG_P (plus0) && CONST_INT_P (plus1)) +- { ++ { + /* If it is possible to be lwi333/swi333 form, + make it 2-byte cost. */ +- if (satisfies_constraint_Iu05 (plus1)) ++ if (satisfies_constraint_Iu03 (plus1)) + return (COSTS_N_INSNS (1) - 2); + else + return COSTS_N_INSNS (1); + } ++ if (ARITHMETIC_P (plus0) || ARITHMETIC_P (plus1)) ++ return COSTS_N_INSNS (1) - 2; ++ else if (REG_P (plus0) && REG_P (plus1)) ++ return COSTS_N_INSNS (1); + + /* For other 'plus' situation, make it cost 4-byte. */ + return COSTS_N_INSNS (1); +@@ -246,4 +701,84 @@ size_cost: + return COSTS_N_INSNS (4); + } + ++ ++int nds32_address_cost_size_prefer (rtx address) ++{ ++ rtx plus0, plus1; ++ enum rtx_code code; ++ ++ code = GET_CODE (address); ++ ++ switch (code) ++ { ++ case POST_MODIFY: ++ case POST_INC: ++ case POST_DEC: ++ /* We encourage that rtx contains ++ POST_MODIFY/POST_INC/POST_DEC behavior. */ ++ return 0; ++ ++ case SYMBOL_REF: ++ /* We can have gp-relative load/store for symbol_ref. ++ Have it 4-byte cost. */ ++ return COSTS_N_INSNS (2); ++ ++ case CONST: ++ /* It is supposed to be the pattern (const (plus symbol_ref const_int)). ++ Have it 4-byte cost. */ ++ return COSTS_N_INSNS (2); ++ ++ case REG: ++ /* Simply return 4-byte costs. */ ++ return COSTS_N_INSNS (1) - 1; ++ ++ case PLUS: ++ /* We do not need to check if the address is a legitimate address, ++ because this hook is never called with an invalid address. ++ But we better check the range of ++ const_int value for cost, if it exists. */ ++ plus0 = XEXP (address, 0); ++ plus1 = XEXP (address, 1); ++ ++ if (REG_P (plus0) && CONST_INT_P (plus1)) ++ { ++ /* If it is possible to be lwi333/swi333 form, ++ make it 2-byte cost. */ ++ if (satisfies_constraint_Iu03 (plus1)) ++ return (COSTS_N_INSNS (1) - 2); ++ else ++ return COSTS_N_INSNS (1) - 1; ++ } ++ ++ /* (plus (reg) (mult (reg) (const))) */ ++ if (ARITHMETIC_P (plus0) || ARITHMETIC_P (plus1)) ++ return (COSTS_N_INSNS (1) - 1); ++ ++ /* For other 'plus' situation, make it cost 4-byte. */ ++ return COSTS_N_INSNS (1); ++ ++ default: ++ break; ++ } ++ ++ return COSTS_N_INSNS (4); ++ ++} ++ ++int nds32_address_cost_impl (rtx address, ++ enum machine_mode mode ATTRIBUTE_UNUSED, ++ addr_space_t as ATTRIBUTE_UNUSED, ++ bool speed_p) ++{ ++ if (speed_p) ++ { ++ if (current_pass->tv_id == TV_FWPROP) ++ return nds32_address_cost_speed_fwprop (address); ++ else ++ return nds32_address_cost_speed_prefer (address); ++ } ++ else ++ return nds32_address_cost_size_prefer (address); ++} ++ + /* ------------------------------------------------------------------------ */ +diff --git a/gcc/config/nds32/nds32-cprop-acc.c b/gcc/config/nds32/nds32-cprop-acc.c +new file mode 100644 +index 0000000..0852095 +--- /dev/null ++++ b/gcc/config/nds32/nds32-cprop-acc.c +@@ -0,0 +1,845 @@ ++/* Copy propagation on hard registers for accumulate style instruction. ++ Copyright (C) 2000-2014 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ <http://www.gnu.org/licenses/>. */ ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "tm.h" ++#include "rtl.h" ++#include "tm_p.h" ++#include "insn-config.h" ++#include "regs.h" ++#include "addresses.h" ++#include "predict.h" ++#include "basic-block.h" ++#include "reload.h" ++#include "hash-set.h" ++#include "dominance.h" ++#include "cfg.h" ++#include "function.h" ++#include "recog.h" ++#include "cfgrtl.h" ++#include "flags.h" ++#include "diagnostic-core.h" ++#include "obstack.h" ++#include "tree-pass.h" ++#include "bitmap.h" ++#include "df.h" ++#include "output.h" ++#include "emit-rtl.h" ++#include <vector> ++ ++/* For each move instruction, we have a two-dimensional vector that record ++ what insns need to replace the operands when the move instruction is ++ propagated. */ ++ ++typedef std::vector<rtx_insn *> insn_list; ++ ++/* Function called by note_uses to replace used subexpressions. */ ++ ++struct replace_src_operands_data ++{ ++ rtx dst_reg; ++ rtx src_reg; ++ unsigned int old_regno; ++ unsigned int new_regno; ++ rtx_insn *insn; ++}; ++ ++/* Return true if a mode change from ORIG to NEW is allowed for REGNO. ++ Adapted from mode_change_ok in regcprop. */ ++ ++static bool ++nds32_mode_change_ok (enum machine_mode orig_mode, enum machine_mode new_mode, ++ unsigned int regno ATTRIBUTE_UNUSED) ++{ ++ if (GET_MODE_SIZE (orig_mode) < GET_MODE_SIZE (new_mode)) ++ return false; ++ ++#ifdef CANNOT_CHANGE_MODE_CLASS ++ return !REG_CANNOT_CHANGE_MODE_P (regno, orig_mode, new_mode); ++#endif ++ ++ return true; ++} ++ ++/* Register REGNO was originally set in ORIG_MODE. It - or a copy of it - ++ was copied in COPY_MODE to COPY_REGNO, and then COPY_REGNO was accessed ++ in NEW_MODE. ++ Return a NEW_MODE rtx for REGNO if that's OK, otherwise return NULL_RTX. ++ Adapted from maybe_mode_change in regcprop. */ ++ ++static rtx ++nds32_mode_change_reg (enum machine_mode orig_mode, enum machine_mode copy_mode, ++ enum machine_mode new_mode, unsigned int regno, ++ unsigned int copy_regno ATTRIBUTE_UNUSED) ++{ ++ if (GET_MODE_SIZE (copy_mode) < GET_MODE_SIZE (orig_mode) ++ && GET_MODE_SIZE (copy_mode) < GET_MODE_SIZE (new_mode)) ++ return NULL_RTX; ++ ++ if (orig_mode == new_mode) ++ return gen_raw_REG (new_mode, regno); ++ else if (nds32_mode_change_ok (orig_mode, new_mode, regno)) ++ { ++ int copy_nregs = hard_regno_nregs[copy_regno][copy_mode]; ++ int use_nregs = hard_regno_nregs[copy_regno][new_mode]; ++ int copy_offset ++ = GET_MODE_SIZE (copy_mode) / copy_nregs * (copy_nregs - use_nregs); ++ int offset ++ = GET_MODE_SIZE (orig_mode) - GET_MODE_SIZE (new_mode) - copy_offset; ++ int byteoffset = offset % UNITS_PER_WORD; ++ int wordoffset = offset - byteoffset; ++ ++ offset = ((WORDS_BIG_ENDIAN ? wordoffset : 0) ++ + (BYTES_BIG_ENDIAN ? byteoffset : 0)); ++ regno += subreg_regno_offset (regno, orig_mode, offset, new_mode); ++ if (HARD_REGNO_MODE_OK (regno, new_mode)) ++ return gen_raw_REG (new_mode, regno); ++ } ++ return NULL_RTX; ++} ++ ++/* Return true if INSN is a register-based move instruction, false ++ otherwise. */ ++ ++static bool ++nds32_is_reg_mov_p (rtx_insn *insn) ++{ ++ rtx pat = PATTERN (insn); ++ ++ if (GET_CODE (pat) != SET) ++ return false; ++ ++ rtx src_reg = SET_SRC (pat); ++ rtx dst_reg = SET_DEST (pat); ++ ++ if (REG_P (dst_reg) && REG_P (src_reg) && can_copy_p (GET_MODE (dst_reg))) ++ return true; ++ else ++ return false; ++} ++ ++ ++/* Return accumulated register if INSN is an accumulate style instruction, ++ otherwise return NULL_RTX. */ ++ ++static rtx ++nds32_is_acc_insn_p (rtx_insn *insn) ++{ ++ int i; ++ const operand_alternative *op_alt; ++ rtx pat; ++ ++ if (get_attr_length (insn) != 4) ++ return NULL_RTX; ++ ++ pat = PATTERN (insn); ++ if (GET_CODE (pat) != SET) ++ return NULL_RTX; ++ ++ /* Try to get the insn data from recog_data. */ ++ recog_memoized (insn); ++ extract_constrain_insn (insn); ++ /* Transform the constraint strings into a more usable form, ++ recog_op_alt. */ ++ preprocess_constraints (insn); ++ op_alt = which_op_alt (); ++ ++ /* Check all operands whether the output operand is identical to ++ another input operand */ ++ for (i = 0; i < recog_data.n_operands; ++i) ++ { ++ int matches = op_alt[i].matches; ++ int matched = op_alt[i].matched; ++ if ((matches >= 0 ++ && (recog_data.operand_type[i] != OP_IN ++ || recog_data.operand_type[matches] != OP_IN)) ++ || (matched >= 0 ++ && (recog_data.operand_type[i] != OP_IN ++ || recog_data.operand_type[matched] != OP_IN))) ++ return recog_data.operand[i]; ++ } ++ ++ return NULL_RTX; ++} ++ ++/* Finds the reference corresponding to the definition of register whose ++ register number is REGNO in INSN. DF is the dataflow object. ++ Adapted from df_find_def in df-core. */ ++ ++static df_ref ++nds32_df_find_regno_def (rtx_insn *insn, unsigned int regno) ++{ ++ df_ref def; ++ ++ FOR_EACH_INSN_DEF (def, insn) ++ if (DF_REF_REGNO (def) == regno) ++ return def; ++ ++ return NULL; ++ } ++ ++/* Return true if the REG in INSN is only defined by one insn whose uid ++ is DEF_UID, otherwise return false. */ ++ ++static bool ++nds32_is_single_def_p (rtx_insn *insn, rtx reg, unsigned int def_uid) ++{ ++ df_ref use; ++ ++ FOR_EACH_INSN_USE (use, insn) ++ { ++ df_link *link; ++ unsigned int uid; ++ ++ if (DF_REF_REGNO (use) >= REGNO (reg) ++ && DF_REF_REGNO (use) < END_REGNO (reg)) ++ { ++ link = DF_REF_CHAIN (use); ++ if (link->next ++ || DF_REF_IS_ARTIFICIAL (link->ref)) ++ return false; ++ ++ uid = DF_REF_INSN_UID (link->ref); ++ if (uid != def_uid) ++ return false; ++ } ++ } ++ ++ return true; ++} ++ ++/* Return true if there is no definition of REG on any path from the insn ++ whose uid is FROM_UID (called FROM) to insn TO, otherwise return false. ++ This function collects the reaching definitions bitmap at insn TO, and ++ check if all uses of REG in insn FROM can reach insn TO. */ ++ ++static bool ++nds32_no_define_reg_p (rtx to, rtx reg, unsigned int from_uid) ++{ ++ basic_block bb = BLOCK_FOR_INSN (to); ++ struct df_rd_bb_info *bb_info = DF_RD_BB_INFO (bb); ++ bitmap_head rd_local; ++ bool result = true; ++ rtx_insn *insn; ++ df_ref use; ++ df_insn_info *insn_info; ++ ++ bitmap_initialize (&rd_local, &bitmap_default_obstack); ++ bitmap_copy (&rd_local, &bb_info->in); ++ df_rd_simulate_artificial_defs_at_top (bb, &rd_local); ++ ++ for (insn = BB_HEAD (bb); insn != to; insn = NEXT_INSN (insn)) ++ if (INSN_P (insn)) ++ df_rd_simulate_one_insn (bb, insn, &rd_local); ++ ++ if (dump_file) ++ { ++ fprintf (dump_file, "scan reach define:"); ++ print_rtl_single (dump_file, to); ++ ++ fprintf (dump_file, "bb rd in:\n"); ++ dump_bitmap (dump_file, &bb_info->in); ++ ++ fprintf (dump_file, "reach def:\n"); ++ dump_bitmap (dump_file, &rd_local); ++ } ++ ++ insn_info = DF_INSN_UID_GET (from_uid); ++ FOR_EACH_INSN_INFO_USE (use, insn_info) ++ { ++ df_link *link; ++ ++ if (DF_REF_REGNO (use) >= REGNO (reg) ++ && DF_REF_REGNO (use) < END_REGNO (reg)) ++ for (link = DF_REF_CHAIN (use); link; link = link->next) ++ { ++ if (dump_file) ++ { ++ fprintf (dump_file, "use ID %d\n", DF_REF_ID (link->ref)); ++ if (DF_REF_IS_ARTIFICIAL (link->ref)) ++ fprintf (dump_file, "use ref is artificial\n"); ++ else ++ { ++ fprintf (dump_file, "use from insn:"); ++ print_rtl_single (dump_file, DF_REF_INSN (link->ref)); ++ } ++ } ++ result &= ++ (bitmap_bit_p (&rd_local, DF_REF_ID (link->ref))) ++ ? true ++ : false; ++ } ++ } ++ ++ bitmap_clear (&rd_local); ++ return result; ++} ++ ++/* Return true if the value held by REG is no longer needed before INSN ++ (i.e. REG is dead before INSN), otherwise return false. */ ++ ++static bool ++nds32_is_dead_reg_p (rtx_insn *insn, rtx reg) ++{ ++ basic_block bb = BLOCK_FOR_INSN (insn); ++ bitmap live = BITMAP_ALLOC (®_obstack); ++ bool result = true; ++ rtx_insn *i; ++ unsigned int rn; ++ ++ bitmap_copy (live, DF_LR_IN (bb)); ++ df_simulate_initialize_forwards (bb, live); ++ ++ for (i = BB_HEAD (bb); i != insn; i = NEXT_INSN (i)) ++ df_simulate_one_insn_forwards (bb, i, live); ++ ++ if (dump_file) ++ { ++ fprintf (dump_file, "scan live regs:"); ++ print_rtl_single (dump_file, insn); ++ ++ fprintf (dump_file, "bb lr in:\n"); ++ dump_bitmap (dump_file, DF_LR_IN (bb)); ++ ++ fprintf (dump_file, "live:\n"); ++ dump_bitmap (dump_file, live); ++ } ++ ++ for (rn = REGNO (reg); rn < END_REGNO (reg); ++rn) ++ result &= (bitmap_bit_p (live, rn)) ? false : true; ++ ++ BITMAP_FREE (live); ++ return result; ++} ++ ++/* Return true if START can do propagation. Notice START maybe a move ++ instruction or an accumulate style instruction. ++ MOV_UID is the uid of beginning move instruction that is only used by ++ function nds32_no_define_reg_p. ++ DST_REG & SRC_REG is the SET_DEST and SET_SRC of a move instruction that ++ maybe real or unreal, respectively. ++ INDEX indicates what number sequence is currently considered rank as ++ consecutive hard registers. Simultaneously, INDEX is the index of row in ++ INSN_LISTS. */ ++ ++static bool ++nds32_can_cprop_acc_1 (rtx_insn *start, unsigned int mov_uid, ++ rtx dst_reg, rtx src_reg, ++ unsigned int index, ++ std::vector<insn_list> &insn_lists) ++{ ++ unsigned int lead_regno = REGNO (dst_reg) + index; ++ unsigned int new_regno = REGNO (src_reg) + index; ++ df_ref def_rec; ++ df_link *link; ++ ++ def_rec = nds32_df_find_regno_def (start, lead_regno); ++ gcc_assert (def_rec); ++ ++ for (link = DF_REF_CHAIN (def_rec); link; link = link->next) ++ { ++ rtx *use_loc; ++ unsigned int use_regno; ++ enum machine_mode use_mode; ++ rtx_insn *use_insn; ++ rtx acc_reg, new_src; ++ ++ if (DF_REF_IS_ARTIFICIAL (link->ref)) ++ return false; ++ ++ use_loc = DF_REF_LOC (link->ref); ++ gcc_assert (use_loc && REG_P (*use_loc)); ++ ++ use_regno = REGNO (*use_loc); ++ /* Do not propagate when any insns use register that regno is ++ smaller than DST_REG. */ ++ if (use_regno < REGNO (dst_reg)) ++ return false; ++ ++ /* This status should be handled by previous call. */ ++ if (use_regno < lead_regno) ++ continue; ++ ++ /* Do not propagate because not all of the pieces of the copy came ++ from DST_REG. */ ++ if (END_REGNO (*use_loc) > END_REGNO (dst_reg)) ++ return false; ++ ++ use_insn = DF_REF_INSN (link->ref); ++ /* Do not propagate since call-used registers can't be replaced. */ ++ if (CALL_P (use_insn)) ++ return false; ++ ++ /* Do not replace in asms intentionally referencing hard registers. */ ++ if (asm_noperands (PATTERN (use_insn)) >= 0 ++ && use_regno == ORIGINAL_REGNO (*use_loc)) ++ return false; ++ ++ /* Do not propagate when the register is defined by more than one ++ instruction. */ ++ if (!nds32_is_single_def_p (use_insn, *use_loc, INSN_UID (start))) ++ return false; ++ ++ use_mode = GET_MODE (*use_loc); ++ new_src = nds32_mode_change_reg (GET_MODE (src_reg), ++ GET_MODE (dst_reg), ++ use_mode, ++ new_regno, ++ use_regno); ++ /* Do not propagate if we can't generate a new register with new mode. */ ++ if (!new_src) ++ return false; ++ ++ /* Can not replace DST_REG with SRC_REG when SRC_REG is redefined between ++ START and use insn of START. */ ++ if (!nds32_no_define_reg_p (use_insn, new_src, mov_uid)) ++ return false; ++ ++ acc_reg = nds32_is_acc_insn_p (use_insn); ++ /* Handle the accumulate style instruction that accumulate register ++ may be replaced. ++ Also handle the AUTO_INC register that is another form of accumulated ++ register. */ ++ if ((acc_reg && rtx_equal_p (acc_reg, *use_loc)) ++ || FIND_REG_INC_NOTE (use_insn, *use_loc)) ++ { ++ unsigned int i, use_nregs; ++ ++ /* ACC_REG can't be replaced since the SRC_REG can't be ++ overwritten. */ ++ if (!nds32_is_dead_reg_p (use_insn, new_src)) ++ return false; ++ ++ /* Once we confirm that ACC_REG can be replaced, the unreal move ++ instruction is generated. For example: ++ mov r0, r1 mov r0, r1 ++ cmovn r0, r2, r3 -> cmovn r1, r2, r3 ++ mov r0, r1 ++ If the unreal move instruction can do propagation, the ACC_REG ++ can be replaced. We check it in a recursive way. */ ++ use_nregs = hard_regno_nregs [use_regno][(int) use_mode]; ++ for (i = 0; i < use_nregs; ++i) ++ if (!nds32_can_cprop_acc_1 (use_insn, mov_uid, ++ *use_loc, new_src, ++ i, insn_lists)) ++ return false; ++ } ++ insn_lists[index].push_back (use_insn); ++ } ++ ++ return true; ++} ++ ++/* Return true if MOV can do propagation, otherwise return false. ++ INSN_LISTS is used to record what insns need to replace the operands. */ ++ ++static bool ++nds32_can_cprop_acc (rtx_insn *mov, std::vector<insn_list> &insn_lists) ++{ ++ rtx dst_reg = SET_DEST (PATTERN (mov)); ++ rtx src_reg = SET_SRC (PATTERN (mov)); ++ unsigned int dst_regno = REGNO (dst_reg); ++ enum machine_mode dst_mode = GET_MODE (dst_reg); ++ unsigned int dst_nregs = hard_regno_nregs[dst_regno][(int) dst_mode]; ++ unsigned int index; ++ ++ insn_lists.resize (dst_nregs); ++ for (index = 0; index < dst_nregs; ++index) ++ if (!nds32_can_cprop_acc_1 (mov, INSN_UID (mov), ++ dst_reg, src_reg, ++ index, insn_lists)) ++ return false; ++ ++ return true; ++} ++ ++/* Replace every occurrence of OLD_REGNO in LOC with NEW_REGNO. LOC maybe a ++ part of INSN. ++ DST_REG & SRC_REG are used by function nds32_mode_change_reg. ++ Mark each change with validate_change passing INSN. */ ++ ++static void ++nds32_replace_partial_operands (rtx *loc, rtx dst_reg, rtx src_reg, ++ unsigned int old_regno, unsigned int new_regno, ++ rtx_insn *insn) ++{ ++ int i, j; ++ rtx x = *loc; ++ enum rtx_code code; ++ const char *fmt; ++ ++ if (!x) ++ return; ++ ++ code = GET_CODE (x); ++ fmt = GET_RTX_FORMAT (code); ++ ++ if (REG_P (x) && REGNO (x) == old_regno) ++ { ++ rtx new_reg = nds32_mode_change_reg (GET_MODE (src_reg), ++ GET_MODE (dst_reg), ++ GET_MODE (x), ++ new_regno, ++ old_regno); ++ ++ gcc_assert (new_reg); ++ ++ ORIGINAL_REGNO (new_reg) = ORIGINAL_REGNO (x); ++ REG_ATTRS (new_reg) = REG_ATTRS (x); ++ REG_POINTER (new_reg) = REG_POINTER (x); ++ ++ /* ??? unshare or not? */ ++ validate_change (insn, loc, new_reg, 1); ++ return; ++ } ++ ++ /* Call ourself recursively to perform the replacements. */ ++ for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) ++ { ++ if (fmt[i] == 'e') ++ nds32_replace_partial_operands (&XEXP (x, i), dst_reg, src_reg, ++ old_regno, new_regno, insn); ++ else if (fmt[i] == 'E') /* ??? how about V? */ ++ for (j = XVECLEN (x, i) - 1; j >= 0; j--) ++ nds32_replace_partial_operands (&XVECEXP (x, i, j), dst_reg, src_reg, ++ old_regno, new_regno, insn); ++ } ++} ++ ++/* Try replacing every occurrence of OLD_REGNO in INSN with NEW_REGNO. */ ++ ++static void ++nds32_replace_all_operands (rtx dst_reg, rtx src_reg, ++ unsigned int old_regno, unsigned int new_regno, ++ rtx_insn *insn) ++{ ++ nds32_replace_partial_operands (&PATTERN (insn), dst_reg, src_reg, ++ old_regno, new_regno, insn); ++} ++ ++/* Called via note_uses in function nds32_replace_src_operands, for all used ++ rtx do replacement. */ ++ ++static void ++nds32_replace_src_operands_1 (rtx *loc, void *data) ++{ ++ struct replace_src_operands_data *d ++ = (struct replace_src_operands_data *) data; ++ ++ nds32_replace_partial_operands (loc, d->dst_reg, d->src_reg, ++ d->old_regno, d->new_regno, d->insn); ++} ++ ++/* Try replacing every occurrence of OLD_REGNO in INSN with NEW_REGNO, ++ avoiding SET_DESTs. */ ++ ++static void ++nds32_replace_src_operands (rtx dst_reg, rtx src_reg, ++ unsigned int old_regno, unsigned int new_regno, ++ rtx_insn *insn) ++{ ++ struct replace_src_operands_data d ++ = {dst_reg, src_reg, old_regno, new_regno, insn}; ++ ++ note_uses (&PATTERN (insn), nds32_replace_src_operands_1, &d); ++} ++ ++/* Try replacing every occurrence of SRC_REG (include its consecutive hard ++ registers) in each insn of INSN_LISTS with DST_REG. */ ++ ++static bool ++nds32_try_replace_operands (rtx dst_reg, rtx src_reg, ++ std::vector<insn_list> &insn_lists) ++{ ++ unsigned int i; ++ std::vector<rtx_insn *>::iterator ritr; ++ unsigned int old_regno, new_regno; ++ ++ old_regno = REGNO (dst_reg); ++ new_regno = REGNO (src_reg); ++ ++ for (i = 0; i < insn_lists.size (); ++i, ++old_regno, ++new_regno) ++ for (ritr = insn_lists[i].begin (); ritr != insn_lists[i].end (); ++ritr) ++ { ++ rtx_insn *insn = *ritr; ++ rtx acc_reg; ++ ++ acc_reg = nds32_is_acc_insn_p (insn); ++ if (acc_reg && REGNO (acc_reg) == old_regno) ++ { ++ /* Replace OP_OUT & OP_INOUT */ ++ nds32_replace_all_operands (dst_reg, src_reg, ++ old_regno, new_regno, insn); ++ ++ } ++ else ++ { ++ /* Replace OP_IN */ ++ nds32_replace_src_operands (dst_reg, src_reg, ++ old_regno, new_regno, insn); ++ } ++ } ++ ++ if (!apply_change_group ()) ++ return false; ++ else ++ { ++ df_analyze (); ++ return true; ++ } ++} ++ ++/* Check if each move instruction in WORK_LIST can do propagation, and ++ then try to replace operands if necessary. */ ++ ++static int ++nds32_do_cprop_acc (auto_vec<rtx_insn *> &work_list) ++{ ++ int n_replace = 0; ++ int i; ++ rtx_insn *mov; ++ std::vector<insn_list> insn_lists; ++ ++ FOR_EACH_VEC_ELT (work_list, i, mov) ++ { ++ if (nds32_can_cprop_acc (mov, insn_lists)) ++ { ++ if (dump_file) ++ fprintf (dump_file, "\n [CPROP_ACC] insn %d will be cprop. \n", ++ INSN_UID (mov)); ++ ++ if (nds32_try_replace_operands (SET_DEST (PATTERN (mov)), ++ SET_SRC (PATTERN (mov)), ++ insn_lists)) ++ n_replace++; ++ } ++ insn_lists.clear (); ++ } ++ ++ return n_replace; ++} ++ ++/* Return true if MOV meets the conditions of propagation about move ++ instruction, otherwise return false. */ ++ ++static bool ++nds32_is_target_mov_p (rtx mov) ++{ ++ rtx dst = SET_DEST (PATTERN (mov)); ++ rtx src = SET_SRC (PATTERN (mov)); ++ unsigned int dst_regno, src_regno; ++ unsigned int dst_nregs, src_nregs; ++ bool dst_is_general, src_is_general; ++ ++ gcc_assert (REG_P (dst) && REG_P (src)); ++ ++ dst_regno = REGNO (dst); ++ src_regno = REGNO (src); ++ dst_nregs = hard_regno_nregs[dst_regno][GET_MODE (dst)]; ++ src_nregs = hard_regno_nregs[src_regno][GET_MODE (src)]; ++ ++ /* Do not propagate to the stack pointer, as that can leave memory accesses ++ with no scheduling dependency on the stack update. ++ Adapted from regcprop. */ ++ if (dst_regno == STACK_POINTER_REGNUM) ++ return false; ++ ++ /* Likewise with the frame pointer, if we're using one. ++ Adapted from regcprop. */ ++ if (frame_pointer_needed && dst_regno == HARD_FRAME_POINTER_REGNUM) ++ return false; ++ ++ /* Do not propagate to fixed or global registers, patterns can be relying ++ to see particular fixed register or users can expect the chosen global ++ register in asm. ++ Adapted from regcprop. */ ++ if (fixed_regs[dst_regno] || global_regs[dst_regno]) ++ return false; ++ ++ /* Make sure the all consecutive registers of SET_DEST are only defined by ++ SET_SRC. */ ++ if (dst_nregs > src_nregs) ++ return false; ++ ++ /* Narrowing on big endian will result in the invalid transformation. */ ++ if (dst_nregs < src_nregs ++ && (GET_MODE_SIZE (GET_MODE (src)) > UNITS_PER_WORD ++ ? WORDS_BIG_ENDIAN : BYTES_BIG_ENDIAN)) ++ return false; ++ ++ dst_is_general = in_hard_reg_set_p (reg_class_contents[GENERAL_REGS], ++ GET_MODE (dst), REGNO (dst)); ++ src_is_general = in_hard_reg_set_p (reg_class_contents[GENERAL_REGS], ++ GET_MODE (src), REGNO (src)); ++ /* Make sure the register class of SET_DEST & SET_SRC are the same. */ ++ if (dst_is_general ^ src_is_general) ++ return false; ++ ++ return true; ++} ++ ++/* Collect the move instructions that are the uses of accumulated register ++ in WORK_LIST */ ++ ++static void ++nds32_cprop_acc_find_target_mov (auto_vec<rtx_insn *> &work_list) ++{ ++ basic_block bb; ++ rtx_insn *insn; ++ rtx acc_reg; ++ ++ FOR_EACH_BB_FN (bb, cfun) ++ FOR_BB_INSNS (bb, insn) ++ if (INSN_P (insn)) ++ { ++ acc_reg = nds32_is_acc_insn_p (insn); ++ if (acc_reg) ++ { ++ unsigned int acc_regno; ++ enum machine_mode acc_mode; ++ df_ref use; ++ df_link *link; ++ rtx_insn *def_insn; ++ ++ if (!single_set (insn) || !REG_P (acc_reg)) ++ continue; ++ ++ acc_regno = REGNO (acc_reg); ++ /* Don't replace in asms intentionally referencing hard regs. */ ++ if (asm_noperands (PATTERN (insn)) >= 0 ++ && acc_regno == ORIGINAL_REGNO (acc_reg)) ++ continue; ++ ++ if (dump_file) ++ fprintf (dump_file, ++ "\n [CPROP_ACC] " ++ "RTL_UID %d is an exchangeable ACC insn. \n", ++ INSN_UID (insn)); ++ ++ use = df_find_use (insn, acc_reg); ++ gcc_assert (use); ++ link = DF_REF_CHAIN (use); ++ ++ if (link->next ++ || DF_REF_IS_ARTIFICIAL (link->ref)) ++ continue; ++ ++ acc_mode = GET_MODE (acc_reg); ++ def_insn = DF_REF_INSN (link->ref); ++ if (nds32_is_reg_mov_p (def_insn)) ++ { ++ rtx *loc = DF_REF_LOC (link->ref); ++ enum machine_mode loc_mode = GET_MODE (*loc); ++ ++ /* If the move instruction can't define whole accumulated ++ register, the replacement is invalid. */ ++ if (loc_mode != acc_mode) ++ if (hard_regno_nregs[acc_regno][acc_mode] ++ > hard_regno_nregs[acc_regno][loc_mode]) ++ continue; ++ ++ if (nds32_is_target_mov_p (def_insn)) ++ work_list.safe_push (def_insn); ++ } ++ } ++ } ++} ++ ++/* Main entry point for the forward copy propagation optimization for ++ accumulate style instruction. */ ++ ++static int ++nds32_cprop_acc_opt (void) ++{ ++ df_chain_add_problem (DF_DU_CHAIN + DF_UD_CHAIN); ++ df_note_add_problem (); ++ df_set_flags (DF_RD_PRUNE_DEAD_DEFS); ++ df_insn_rescan_all (); ++ df_analyze (); ++ ++ auto_vec<rtx_insn *> work_list; ++ ++ nds32_cprop_acc_find_target_mov (work_list); ++ if (work_list.is_empty()) ++ { ++ if (dump_file) ++ fprintf (dump_file, "\n [CPROP_ACC] The work_list is empty. \n"); ++ return 0; ++ } ++ ++ if (dump_file) ++ { ++ int i; ++ rtx_insn *mov; ++ ++ fprintf (dump_file, "\n [CPROP_ACC] The content of work_list:"); ++ FOR_EACH_VEC_ELT (work_list, i, mov) ++ fprintf (dump_file, " %d", INSN_UID (mov)); ++ fprintf (dump_file, "\n"); ++ } ++ ++ compute_bb_for_insn (); ++ ++ int n_replace = nds32_do_cprop_acc (work_list); ++ ++ if (dump_file) ++ { ++ fprintf (dump_file, "\n [CPROP_ACC] Result: "); ++ if (n_replace == 0) ++ fprintf (dump_file, "No move can do cprop. \n"); ++ else ++ fprintf (dump_file, "Do cprop for %d move. \n", n_replace); ++ } ++ ++ work_list.release (); ++ return 1; ++} ++ ++const pass_data pass_data_nds32_cprop_acc_opt = ++{ ++ RTL_PASS, /* type */ ++ "cprop_acc", /* name */ ++ OPTGROUP_NONE, /* optinfo_flags */ ++ TV_MACH_DEP, /* tv_id */ ++ 0, /* properties_required */ ++ 0, /* properties_provided */ ++ 0, /* properties_destroyed */ ++ 0, /* todo_flags_start */ ++ TODO_df_finish, /* todo_flags_finish */ ++}; ++ ++class pass_nds32_cprop_acc_opt : public rtl_opt_pass ++{ ++public: ++ pass_nds32_cprop_acc_opt (gcc::context *ctxt) ++ : rtl_opt_pass (pass_data_nds32_cprop_acc_opt, ctxt) ++ {} ++ ++ /* opt_pass methods: */ ++ bool gate (function *) { return optimize > 0 && flag_nds32_cprop_acc; } ++ unsigned int execute (function *) { return nds32_cprop_acc_opt (); } ++}; ++ ++rtl_opt_pass * ++make_pass_nds32_cprop_acc_opt (gcc::context *ctxt) ++{ ++ return new pass_nds32_cprop_acc_opt (ctxt); ++} +diff --git a/gcc/config/nds32/nds32-doubleword.md b/gcc/config/nds32/nds32-doubleword.md +index 23a9f25..7c9dfb9 100644 +--- a/gcc/config/nds32/nds32-doubleword.md ++++ b/gcc/config/nds32/nds32-doubleword.md +@@ -23,7 +23,8 @@ + ;; Move DImode/DFmode instructions. + ;; ------------------------------------------------------------- + +- ++;; Do *NOT* try to split DI/DFmode before reload since LRA seem ++;; still buggy for such behavior at least at gcc 4.8.2... + (define_expand "movdi" + [(set (match_operand:DI 0 "general_operand" "") + (match_operand:DI 1 "general_operand" ""))] +@@ -46,149 +47,100 @@ + + + (define_insn "move_<mode>" +- [(set (match_operand:DIDF 0 "nonimmediate_operand" "=r, r, r, m") +- (match_operand:DIDF 1 "general_operand" " r, i, m, r"))] +- "" ++ [(set (match_operand:DIDF 0 "nonimmediate_operand" "=r, r, r, r, Da, m, f, Q, f, *r, *f") ++ (match_operand:DIDF 1 "general_operand" " r, i, Da, m, r, r, Q, f, f, *f, *r"))] ++ "register_operand(operands[0], <MODE>mode) ++ || register_operand(operands[1], <MODE>mode)" + { +- rtx addr; +- rtx otherops[5]; +- + switch (which_alternative) + { + case 0: + return "movd44\t%0, %1"; +- + case 1: + /* reg <- const_int, we ask gcc to split instruction. */ + return "#"; +- + case 2: +- /* Refer to nds32_legitimate_address_p() in nds32.c, +- we only allow "reg", "symbol_ref", "const", and "reg + const_int" +- as address rtx for DImode/DFmode memory access. */ +- addr = XEXP (operands[1], 0); +- +- otherops[0] = gen_rtx_REG (SImode, REGNO (operands[0])); +- otherops[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1); +- otherops[2] = addr; +- +- if (REG_P (addr)) +- { +- /* (reg) <- (mem (reg)) */ +- output_asm_insn ("lmw.bi\t%0, [%2], %1, 0", otherops); +- } +- else if (GET_CODE (addr) == PLUS) +- { +- /* (reg) <- (mem (plus (reg) (const_int))) */ +- rtx op0 = XEXP (addr, 0); +- rtx op1 = XEXP (addr, 1); +- +- if (REG_P (op0)) +- { +- otherops[2] = op0; +- otherops[3] = op1; +- otherops[4] = gen_int_mode (INTVAL (op1) + 4, SImode); +- } +- else +- { +- otherops[2] = op1; +- otherops[3] = op0; +- otherops[4] = gen_int_mode (INTVAL (op0) + 4, SImode); +- } +- +- /* To avoid base overwrite when REGNO(%0) == REGNO(%2). */ +- if (REGNO (otherops[0]) != REGNO (otherops[2])) +- { +- output_asm_insn ("lwi\t%0, [%2 + (%3)]", otherops); +- output_asm_insn ("lwi\t%1, [%2 + (%4)]", otherops); +- } +- else +- { +- output_asm_insn ("lwi\t%1, [%2 + (%4)]", otherops); +- output_asm_insn ("lwi\t%0,[ %2 + (%3)]", otherops); +- } +- } +- else +- { +- /* (reg) <- (mem (symbol_ref ...)) +- (reg) <- (mem (const ...)) */ +- output_asm_insn ("lwi.gp\t%0, [ + %2]", otherops); +- output_asm_insn ("lwi.gp\t%1, [ + %2 + 4]", otherops); +- } +- +- /* We have already used output_asm_insn() by ourself, +- so return an empty string. */ +- return ""; +- ++ /* The memory format is (mem (reg)), ++ we can generate 'lmw.bi' instruction. */ ++ return nds32_output_double (operands, true); + case 3: +- /* Refer to nds32_legitimate_address_p() in nds32.c, +- we only allow "reg", "symbol_ref", "const", and "reg + const_int" +- as address rtx for DImode/DFmode memory access. */ +- addr = XEXP (operands[0], 0); +- +- otherops[0] = gen_rtx_REG (SImode, REGNO (operands[1])); +- otherops[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1); +- otherops[2] = addr; +- +- if (REG_P (addr)) +- { +- /* (mem (reg)) <- (reg) */ +- output_asm_insn ("smw.bi\t%0, [%2], %1, 0", otherops); +- } +- else if (GET_CODE (addr) == PLUS) +- { +- /* (mem (plus (reg) (const_int))) <- (reg) */ +- rtx op0 = XEXP (addr, 0); +- rtx op1 = XEXP (addr, 1); +- +- if (REG_P (op0)) +- { +- otherops[2] = op0; +- otherops[3] = op1; +- otherops[4] = gen_int_mode (INTVAL (op1) + 4, SImode); +- } +- else +- { +- otherops[2] = op1; +- otherops[3] = op0; +- otherops[4] = gen_int_mode (INTVAL (op0) + 4, SImode); +- } +- +- /* To avoid base overwrite when REGNO(%0) == REGNO(%2). */ +- if (REGNO (otherops[0]) != REGNO (otherops[2])) +- { +- output_asm_insn ("swi\t%0, [%2 + (%3)]", otherops); +- output_asm_insn ("swi\t%1, [%2 + (%4)]", otherops); +- } +- else +- { +- output_asm_insn ("swi\t%1, [%2 + (%4)]", otherops); +- output_asm_insn ("swi\t%0, [%2 + (%3)]", otherops); +- } +- } +- else +- { +- /* (mem (symbol_ref ...)) <- (reg) +- (mem (const ...)) <- (reg) */ +- output_asm_insn ("swi.gp\t%0, [ + %2]", otherops); +- output_asm_insn ("swi.gp\t%1, [ + %2 + 4]", otherops); +- } +- +- /* We have already used output_asm_insn() by ourself, +- so return an empty string. */ +- return ""; +- ++ /* We haven't 64-bit load instruction, ++ we split this pattern to two SImode pattern. */ ++ return "#"; ++ case 4: ++ /* The memory format is (mem (reg)), ++ we can generate 'smw.bi' instruction. */ ++ return nds32_output_double (operands, false); ++ case 5: ++ /* We haven't 64-bit store instruction, ++ we split this pattern to two SImode pattern. */ ++ return "#"; ++ case 6: ++ return nds32_output_float_load (operands); ++ case 7: ++ return nds32_output_float_store (operands); ++ case 8: ++ return "fcpysd\t%0, %1, %1"; ++ case 9: ++ return "fmfdr\t%0, %1"; ++ case 10: ++ return "fmtdr\t%1, %0"; + default: + gcc_unreachable (); + } + } +- [(set_attr "type" "move,move,move,move") +- (set_attr "length" " 4, 16, 8, 8")]) ++ [(set_attr "type" "alu,alu,load,load,store,store,fload,fstore,fcpy,fmfdr,fmtdr") ++ (set_attr_alternative "length" ++ [ ++ ;; Alternative 0 ++ (if_then_else (match_test "!TARGET_16_BIT") ++ (const_int 4) ++ (const_int 2)) ++ ;; Alternative 1 ++ (const_int 16) ++ ;; Alternative 2 ++ (const_int 4) ++ ;; Alternative 3 ++ (const_int 8) ++ ;; Alternative 4 ++ (const_int 4) ++ ;; Alternative 5 ++ (const_int 8) ++ ;; Alternative 6 ++ (const_int 4) ++ ;; Alternative 7 ++ (const_int 4) ++ ;; Alternative 8 ++ (const_int 4) ++ ;; Alternative 9 ++ (const_int 4) ++ ;; Alternative 10 ++ (const_int 4) ++ ]) ++ (set_attr "feature" " v1, v1, v1, v1, v1, v1, fpu, fpu, fpu, fpu, fpu")]) ++ ++;; Split move_di pattern when the hard register is odd. ++(define_split ++ [(set (match_operand:DIDF 0 "register_operand" "") ++ (match_operand:DIDF 1 "register_operand" ""))] ++ "(NDS32_IS_GPR_REGNUM (REGNO (operands[0])) ++ && ((REGNO (operands[0]) & 0x1) == 1)) ++ || (NDS32_IS_GPR_REGNUM (REGNO (operands[1])) ++ && ((REGNO (operands[1]) & 0x1) == 1))" ++ [(set (match_dup 2) (match_dup 3)) ++ (set (match_dup 4) (match_dup 5))] ++ { ++ operands[2] = gen_lowpart (SImode, operands[0]); ++ operands[4] = gen_highpart (SImode, operands[0]); ++ operands[3] = gen_lowpart (SImode, operands[1]); ++ operands[5] = gen_highpart (SImode, operands[1]); ++ } ++) + + (define_split + [(set (match_operand:DIDF 0 "register_operand" "") + (match_operand:DIDF 1 "const_double_operand" ""))] +- "reload_completed" ++ "flag_pic || reload_completed" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5))] + { +@@ -207,7 +159,12 @@ + /* Actually we would like to create move behavior by ourself. + So that movsi expander could have chance to split large constant. */ + emit_move_insn (operands[2], operands[3]); +- emit_move_insn (operands[4], operands[5]); ++ ++ unsigned HOST_WIDE_INT mask = GET_MODE_MASK (SImode); ++ if ((UINTVAL (operands[3]) & mask) == (UINTVAL (operands[5]) & mask)) ++ emit_move_insn (operands[4], operands[2]); ++ else ++ emit_move_insn (operands[4], operands[5]); + DONE; + }) + +@@ -217,7 +174,9 @@ + [(set (match_operand:DIDF 0 "register_operand" "") + (match_operand:DIDF 1 "register_operand" ""))] + "reload_completed +- && (TARGET_ISA_V2 || !TARGET_16_BIT)" ++ && (TARGET_ISA_V2 || !TARGET_16_BIT) ++ && NDS32_IS_GPR_REGNUM (REGNO (operands[0])) ++ && NDS32_IS_GPR_REGNUM (REGNO (operands[1]))" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3))] + { +@@ -239,6 +198,28 @@ + } + }) + ++(define_split ++ [(set (match_operand:DIDF 0 "nds32_general_register_operand" "") ++ (match_operand:DIDF 1 "memory_operand" ""))] ++ "reload_completed ++ && nds32_split_double_word_load_store_p (operands, true)" ++ [(set (match_dup 2) (match_dup 3)) ++ (set (match_dup 4) (match_dup 5))] ++{ ++ nds32_spilt_doubleword (operands, true); ++}) ++ ++(define_split ++ [(set (match_operand:DIDF 0 "memory_operand" "") ++ (match_operand:DIDF 1 "nds32_general_register_operand" ""))] ++ "reload_completed ++ && nds32_split_double_word_load_store_p (operands, false)" ++ [(set (match_dup 2) (match_dup 3)) ++ (set (match_dup 4) (match_dup 5))] ++{ ++ nds32_spilt_doubleword (operands, false); ++}) ++ + ;; ------------------------------------------------------------- + ;; Boolean DImode instructions. + ;; ------------------------------------------------------------- +diff --git a/gcc/config/nds32/nds32-dspext.md b/gcc/config/nds32/nds32-dspext.md +new file mode 100644 +index 0000000..6ec2137 +--- /dev/null ++++ b/gcc/config/nds32/nds32-dspext.md +@@ -0,0 +1,5280 @@ ++;; Machine description of Andes NDS32 cpu for GNU compiler ++;; Copyright (C) 2012-2016 Free Software Foundation, Inc. ++;; Contributed by Andes Technology Corporation. ++;; ++;; This file is part of GCC. ++;; ++;; GCC is free software; you can redistribute it and/or modify it ++;; under the terms of the GNU General Public License as published ++;; by the Free Software Foundation; either version 3, or (at your ++;; option) any later version. ++;; ++;; GCC is distributed in the hope that it will be useful, but WITHOUT ++;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++;; License for more details. ++;; ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; <http://www.gnu.org/licenses/>. ++ ++(define_expand "mov<mode>" ++ [(set (match_operand:VQIHI 0 "general_operand" "") ++ (match_operand:VQIHI 1 "general_operand" ""))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ /* Need to force register if mem <- !reg. */ ++ if (MEM_P (operands[0]) && !REG_P (operands[1])) ++ operands[1] = force_reg (<MODE>mode, operands[1]); ++ ++ /* If operands[1] is a large constant and cannot be performed ++ by a single instruction, we need to split it. */ ++ if (GET_CODE (operands[1]) == CONST_VECTOR ++ && !satisfies_constraint_CVs2 (operands[1]) ++ && !satisfies_constraint_CVhi (operands[1])) ++ { ++ HOST_WIDE_INT ival = const_vector_to_hwint (operands[1]); ++ rtx tmp_rtx; ++ ++ tmp_rtx = can_create_pseudo_p () ++ ? gen_reg_rtx (SImode) ++ : simplify_gen_subreg (SImode, operands[0], <MODE>mode, 0); ++ ++ emit_move_insn (tmp_rtx, gen_int_mode (ival, SImode)); ++ convert_move (operands[0], tmp_rtx, false); ++ DONE; ++ } ++ ++ if (REG_P (operands[0]) && SYMBOLIC_CONST_P (operands[1])) ++ { ++ if (nds32_tls_referenced_p (operands [1])) ++ { ++ nds32_expand_tls_move (operands); ++ DONE; ++ } ++ else if (flag_pic) ++ { ++ nds32_expand_pic_move (operands); ++ DONE; ++ } ++ } ++}) ++ ++(define_insn "*mov<mode>" ++ [(set (match_operand:VQIHI 0 "nonimmediate_operand" "=r, r,$U45,$U33,$U37,$U45, m,$ l,$ l,$ l,$ d, d, r,$ d, r, r, r, *f, *f, r, *f, Q, A") ++ (match_operand:VQIHI 1 "nds32_vmove_operand" " r, r, l, l, l, d, r, U45, U33, U37, U45,Ufe, m, CVp5, CVs5, CVs2, CVhi, *f, r, *f, Q, *f, r"))] ++ "NDS32_EXT_DSP_P () ++ && (register_operand(operands[0], <MODE>mode) ++ || register_operand(operands[1], <MODE>mode))" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "mov55\t%0, %1"; ++ case 1: ++ return "ori\t%0, %1, 0"; ++ case 2: ++ case 3: ++ case 4: ++ case 5: ++ return nds32_output_16bit_store (operands, <byte>); ++ case 6: ++ return nds32_output_32bit_store (operands, <byte>); ++ case 7: ++ case 8: ++ case 9: ++ case 10: ++ case 11: ++ return nds32_output_16bit_load (operands, <byte>); ++ case 12: ++ return nds32_output_32bit_load (operands, <byte>); ++ case 13: ++ return "movpi45\t%0, %1"; ++ case 14: ++ return "movi55\t%0, %1"; ++ case 15: ++ return "movi\t%0, %1"; ++ case 16: ++ return "sethi\t%0, hi20(%1)"; ++ case 17: ++ if (TARGET_FPU_SINGLE) ++ return "fcpyss\t%0, %1, %1"; ++ else ++ return "#"; ++ case 18: ++ return "fmtsr\t%1, %0"; ++ case 19: ++ return "fmfsr\t%0, %1"; ++ case 20: ++ return nds32_output_float_load (operands); ++ case 21: ++ return nds32_output_float_store (operands); ++ case 22: ++ return "mtusr\t%1, %0"; ++ default: ++ gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "alu,alu,store,store,store,store,store,load,load,load,load,load,load,alu,alu,alu,alu,fcpy,fmtsr,fmfsr,fload,fstore,alu") ++ (set_attr "length" " 2, 4, 2, 2, 2, 2, 4, 2, 2, 2, 2, 2, 4, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4") ++ (set_attr "feature" " v1, v1, v1, v1, v1, v1, v1, v1, v1, v1, v1, v3m, v1, v1, v1, v1, v1, fpu, fpu, fpu, fpu, fpu, v1")]) ++ ++(define_expand "movv2si" ++ [(set (match_operand:V2SI 0 "general_operand" "") ++ (match_operand:V2SI 1 "general_operand" ""))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ /* Need to force register if mem <- !reg. */ ++ if (MEM_P (operands[0]) && !REG_P (operands[1])) ++ operands[1] = force_reg (V2SImode, operands[1]); ++}) ++ ++(define_insn "*movv2si" ++ [(set (match_operand:V2SI 0 "nonimmediate_operand" "=r, r, r, r, Da, m, f, Q, f, r, f") ++ (match_operand:V2SI 1 "general_operand" " r, i, Da, m, r, r, Q, f, f, f, r"))] ++ "NDS32_EXT_DSP_P () ++ && (register_operand(operands[0], V2SImode) ++ || register_operand(operands[1], V2SImode))" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "movd44\t%0, %1"; ++ case 1: ++ /* reg <- const_int, we ask gcc to split instruction. */ ++ return "#"; ++ case 2: ++ /* The memory format is (mem (reg)), ++ we can generate 'lmw.bi' instruction. */ ++ return nds32_output_double (operands, true); ++ case 3: ++ /* We haven't 64-bit load instruction, ++ we split this pattern to two SImode pattern. */ ++ return "#"; ++ case 4: ++ /* The memory format is (mem (reg)), ++ we can generate 'smw.bi' instruction. */ ++ return nds32_output_double (operands, false); ++ case 5: ++ /* We haven't 64-bit store instruction, ++ we split this pattern to two SImode pattern. */ ++ return "#"; ++ case 6: ++ return nds32_output_float_load (operands); ++ case 7: ++ return nds32_output_float_store (operands); ++ case 8: ++ return "fcpysd\t%0, %1, %1"; ++ case 9: ++ return "fmfdr\t%0, %1"; ++ case 10: ++ return "fmtdr\t%1, %0"; ++ default: ++ gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "alu,alu,load,load,store,store,unknown,unknown,unknown,unknown,unknown") ++ (set_attr_alternative "length" ++ [ ++ ;; Alternative 0 ++ (if_then_else (match_test "!TARGET_16_BIT") ++ (const_int 4) ++ (const_int 2)) ++ ;; Alternative 1 ++ (const_int 16) ++ ;; Alternative 2 ++ (const_int 4) ++ ;; Alternative 3 ++ (const_int 8) ++ ;; Alternative 4 ++ (const_int 4) ++ ;; Alternative 5 ++ (const_int 8) ++ ;; Alternative 6 ++ (const_int 4) ++ ;; Alternative 7 ++ (const_int 4) ++ ;; Alternative 8 ++ (const_int 4) ++ ;; Alternative 9 ++ (const_int 4) ++ ;; Alternative 10 ++ (const_int 4) ++ ]) ++ (set_attr "feature" " v1, v1, v1, v1, v1, v1, fpu, fpu, fpu, fpu, fpu")]) ++ ++(define_expand "movmisalign<mode>" ++ [(set (match_operand:VQIHI 0 "general_operand" "") ++ (match_operand:VQIHI 1 "general_operand" ""))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ rtx addr; ++ if (MEM_P (operands[0]) && !REG_P (operands[1])) ++ operands[1] = force_reg (<MODE>mode, operands[1]); ++ ++ if (MEM_P (operands[0])) ++ { ++ addr = force_reg (Pmode, XEXP (operands[0], 0)); ++ emit_insn (gen_unaligned_store<mode> (addr, operands[1])); ++ } ++ else ++ { ++ addr = force_reg (Pmode, XEXP (operands[1], 0)); ++ emit_insn (gen_unaligned_load<mode> (operands[0], addr)); ++ } ++ DONE; ++}) ++ ++(define_expand "unaligned_load<mode>" ++ [(set (match_operand:VQIHI 0 "register_operand" "=r") ++ (unspec:VQIHI [(mem:VQIHI (match_operand:SI 1 "register_operand" "r"))] UNSPEC_UALOAD_W))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_ISA_V3M) ++ nds32_expand_unaligned_load (operands, <MODE>mode); ++ else ++ emit_insn (gen_unaligned_load_w<mode> (operands[0], gen_rtx_MEM (<MODE>mode, operands[1]))); ++ DONE; ++}) ++ ++(define_insn "unaligned_load_w<mode>" ++ [(set (match_operand:VQIHI 0 "register_operand" "= r") ++ (unspec:VQIHI [(match_operand:VQIHI 1 "nds32_lmw_smw_base_operand" " Umw")] UNSPEC_UALOAD_W))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ return nds32_output_lmw_single_word (operands); ++} ++ [(set_attr "type" "load") ++ (set_attr "length" "4")] ++) ++ ++(define_expand "unaligned_store<mode>" ++ [(set (mem:VQIHI (match_operand:SI 0 "register_operand" "r")) ++ (unspec:VQIHI [(match_operand:VQIHI 1 "register_operand" "r")] UNSPEC_UASTORE_W))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_ISA_V3M) ++ nds32_expand_unaligned_store (operands, <MODE>mode); ++ else ++ emit_insn (gen_unaligned_store_w<mode> (gen_rtx_MEM (<MODE>mode, operands[0]), operands[1])); ++ DONE; ++}) ++ ++(define_insn "unaligned_store_w<mode>" ++ [(set (match_operand:VQIHI 0 "nds32_lmw_smw_base_operand" "=Umw") ++ (unspec:VQIHI [(match_operand:VQIHI 1 "register_operand" " r")] UNSPEC_UASTORE_W))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ return nds32_output_smw_single_word (operands); ++} ++ [(set_attr "type" "store") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "<uk>add<mode>3" ++ [(set (match_operand:VQIHI 0 "register_operand" "=r") ++ (all_plus:VQIHI (match_operand:VQIHI 1 "register_operand" " r") ++ (match_operand:VQIHI 2 "register_operand" " r")))] ++ "NDS32_EXT_DSP_P ()" ++ "<uk>add<bits> %0, %1, %2" ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4") ++ (set_attr "feature" "v1")]) ++ ++(define_insn "<uk>adddi3" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (all_plus:DI (match_operand:DI 1 "register_operand" " r") ++ (match_operand:DI 2 "register_operand" " r")))] ++ "NDS32_EXT_DSP_P ()" ++ "<uk>add64 %0, %1, %2" ++ [(set_attr "type" "dalu64") ++ (set_attr "length" "4") ++ (set_attr "feature" "v1")]) ++ ++(define_insn "raddv4qi3" ++ [(set (match_operand:V4QI 0 "register_operand" "=r") ++ (truncate:V4QI ++ (ashiftrt:V4HI ++ (plus:V4HI (sign_extend:V4HI (match_operand:V4QI 1 "register_operand" " r")) ++ (sign_extend:V4HI (match_operand:V4QI 2 "register_operand" " r"))) ++ (const_int 1))))] ++ "NDS32_EXT_DSP_P ()" ++ "radd8\t%0, %1, %2" ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4") ++ (set_attr "feature" "v1")]) ++ ++ ++(define_insn "uraddv4qi3" ++ [(set (match_operand:V4QI 0 "register_operand" "=r") ++ (truncate:V4QI ++ (lshiftrt:V4HI ++ (plus:V4HI (zero_extend:V4HI (match_operand:V4QI 1 "register_operand" " r")) ++ (zero_extend:V4HI (match_operand:V4QI 2 "register_operand" " r"))) ++ (const_int 1))))] ++ "NDS32_EXT_DSP_P ()" ++ "uradd8\t%0, %1, %2" ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4") ++ (set_attr "feature" "v1")]) ++ ++(define_insn "raddv2hi3" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (truncate:V2HI ++ (ashiftrt:V2SI ++ (plus:V2SI (sign_extend:V2SI (match_operand:V2HI 1 "register_operand" " r")) ++ (sign_extend:V2SI (match_operand:V2HI 2 "register_operand" " r"))) ++ (const_int 1))))] ++ "NDS32_EXT_DSP_P ()" ++ "radd16\t%0, %1, %2" ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4") ++ (set_attr "feature" "v1")]) ++ ++(define_insn "uraddv2hi3" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (truncate:V2HI ++ (lshiftrt:V2SI ++ (plus:V2SI (zero_extend:V2SI (match_operand:V2HI 1 "register_operand" " r")) ++ (zero_extend:V2SI (match_operand:V2HI 2 "register_operand" " r"))) ++ (const_int 1))))] ++ "NDS32_EXT_DSP_P ()" ++ "uradd16\t%0, %1, %2" ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4") ++ (set_attr "feature" "v1")]) ++ ++(define_insn "radddi3" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (truncate:DI ++ (ashiftrt:TI ++ (plus:TI (sign_extend:TI (match_operand:DI 1 "register_operand" " r")) ++ (sign_extend:TI (match_operand:DI 2 "register_operand" " r"))) ++ (const_int 1))))] ++ "NDS32_EXT_DSP_P ()" ++ "radd64\t%0, %1, %2" ++ [(set_attr "type" "dalu64") ++ (set_attr "length" "4") ++ (set_attr "feature" "v1")]) ++ ++ ++(define_insn "uradddi3" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (truncate:DI ++ (lshiftrt:TI ++ (plus:TI (zero_extend:TI (match_operand:DI 1 "register_operand" " r")) ++ (zero_extend:TI (match_operand:DI 2 "register_operand" " r"))) ++ (const_int 1))))] ++ "NDS32_EXT_DSP_P ()" ++ "uradd64\t%0, %1, %2" ++ [(set_attr "type" "dalu64") ++ (set_attr "length" "4") ++ (set_attr "feature" "v1")]) ++ ++(define_insn "<uk>sub<mode>3" ++ [(set (match_operand:VQIHI 0 "register_operand" "=r") ++ (all_minus:VQIHI (match_operand:VQIHI 1 "register_operand" " r") ++ (match_operand:VQIHI 2 "register_operand" " r")))] ++ "NDS32_EXT_DSP_P ()" ++ "<uk>sub<bits> %0, %1, %2" ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4") ++ (set_attr "feature" "v1")]) ++ ++(define_insn "<uk>subdi3" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (all_minus:DI (match_operand:DI 1 "register_operand" " r") ++ (match_operand:DI 2 "register_operand" " r")))] ++ "NDS32_EXT_DSP_P ()" ++ "<uk>sub64 %0, %1, %2" ++ [(set_attr "type" "dalu64") ++ (set_attr "length" "4") ++ (set_attr "feature" "v1")]) ++ ++(define_insn "rsubv4qi3" ++ [(set (match_operand:V4QI 0 "register_operand" "=r") ++ (truncate:V4QI ++ (ashiftrt:V4HI ++ (minus:V4HI (sign_extend:V4HI (match_operand:V4QI 1 "register_operand" " r")) ++ (sign_extend:V4HI (match_operand:V4QI 2 "register_operand" " r"))) ++ (const_int 1))))] ++ "NDS32_EXT_DSP_P ()" ++ "rsub8\t%0, %1, %2" ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4")]) ++ ++(define_insn "ursubv4qi3" ++ [(set (match_operand:V4QI 0 "register_operand" "=r") ++ (truncate:V4QI ++ (lshiftrt:V4HI ++ (minus:V4HI (zero_extend:V4HI (match_operand:V4QI 1 "register_operand" " r")) ++ (zero_extend:V4HI (match_operand:V4QI 2 "register_operand" " r"))) ++ (const_int 1))))] ++ "NDS32_EXT_DSP_P ()" ++ "ursub8\t%0, %1, %2" ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4")]) ++ ++(define_insn "rsubv2hi3" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (truncate:V2HI ++ (ashiftrt:V2SI ++ (minus:V2SI (sign_extend:V2SI (match_operand:V2HI 1 "register_operand" " r")) ++ (sign_extend:V2SI (match_operand:V2HI 2 "register_operand" " r"))) ++ (const_int 1))))] ++ "NDS32_EXT_DSP_P ()" ++ "rsub16\t%0, %1, %2" ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4")]) ++ ++(define_insn "ursubv2hi3" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (truncate:V2HI ++ (lshiftrt:V2SI ++ (minus:V2SI (zero_extend:V2SI (match_operand:V2HI 1 "register_operand" " r")) ++ (zero_extend:V2SI (match_operand:V2HI 2 "register_operand" " r"))) ++ (const_int 1))))] ++ "NDS32_EXT_DSP_P ()" ++ "ursub16\t%0, %1, %2" ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4")]) ++ ++(define_insn "rsubdi3" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (truncate:DI ++ (ashiftrt:TI ++ (minus:TI (sign_extend:TI (match_operand:DI 1 "register_operand" " r")) ++ (sign_extend:TI (match_operand:DI 2 "register_operand" " r"))) ++ (const_int 1))))] ++ "NDS32_EXT_DSP_P ()" ++ "rsub64\t%0, %1, %2" ++ [(set_attr "type" "dalu64") ++ (set_attr "length" "4")]) ++ ++ ++(define_insn "ursubdi3" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (truncate:DI ++ (lshiftrt:TI ++ (minus:TI (zero_extend:TI (match_operand:DI 1 "register_operand" " r")) ++ (zero_extend:TI (match_operand:DI 2 "register_operand" " r"))) ++ (const_int 1))))] ++ "NDS32_EXT_DSP_P ()" ++ "ursub64\t%0, %1, %2" ++ [(set_attr "type" "dalu64") ++ (set_attr "length" "4")]) ++ ++(define_expand "cras16_1" ++ [(match_operand:V2HI 0 "register_operand" "") ++ (match_operand:V2HI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_cras16_1_be (operands[0], operands[1], operands[2])); ++ else ++ emit_insn (gen_cras16_1_le (operands[0], operands[1], operands[2])); ++ DONE; ++}) ++ ++(define_insn "cras16_1_le" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (minus:HI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 0)])) ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (vec_duplicate:V2HI ++ (plus:HI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)])) ++ (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 1)])))) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "cras16\t%0, %1, %2" ++ [(set_attr "type" "dalu")] ++) ++ ++(define_insn "cras16_1_be" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (minus:HI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 1)])) ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (vec_duplicate:V2HI ++ (plus:HI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)])) ++ (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 0)])))) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "cras16\t%0, %1, %2" ++ [(set_attr "type" "dalu")] ++) ++ ++(define_expand "kcras16_1" ++ [(match_operand:V2HI 0 "register_operand" "") ++ (match_operand:V2HI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_kcras16_1_be (operands[0], operands[1], operands[2])); ++ else ++ emit_insn (gen_kcras16_1_le (operands[0], operands[1], operands[2])); ++ DONE; ++}) ++ ++(define_insn "kcras16_1_le" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (ss_minus:HI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 0)])) ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (vec_duplicate:V2HI ++ (ss_plus:HI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)])) ++ (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 1)])))) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "kcras16\t%0, %1, %2" ++ [(set_attr "type" "dalu")] ++) ++ ++(define_insn "kcras16_1_be" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (ss_minus:HI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 1)])) ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (vec_duplicate:V2HI ++ (ss_plus:HI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)])) ++ (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 0)])))) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "kcras16\t%0, %1, %2" ++ [(set_attr "type" "dalu")] ++) ++ ++(define_expand "ukcras16_1" ++ [(match_operand:V2HI 0 "register_operand" "") ++ (match_operand:V2HI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_ukcras16_1_be (operands[0], operands[1], operands[2])); ++ else ++ emit_insn (gen_ukcras16_1_le (operands[0], operands[1], operands[2])); ++ DONE; ++}) ++ ++(define_insn "ukcras16_1_le" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (us_minus:HI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 0)])) ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (vec_duplicate:V2HI ++ (us_plus:HI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)])) ++ (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 1)])))) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "ukcras16\t%0, %1, %2" ++ [(set_attr "type" "dalu")] ++) ++ ++(define_insn "ukcras16_1_be" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (us_minus:HI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 1)])) ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (vec_duplicate:V2HI ++ (us_plus:HI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)])) ++ (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 0)])))) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "ukcras16\t%0, %1, %2" ++ [(set_attr "type" "dalu")] ++) ++ ++(define_expand "crsa16_1" ++ [(match_operand:V2HI 0 "register_operand" "") ++ (match_operand:V2HI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_crsa16_1_be (operands[0], operands[1], operands[2])); ++ else ++ emit_insn (gen_crsa16_1_le (operands[0], operands[1], operands[2])); ++ DONE; ++}) ++ ++(define_insn "crsa16_1_le" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (minus:HI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 1)])) ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (vec_duplicate:V2HI ++ (plus:HI ++ (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 0)])) ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)])))) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "crsa16\t%0, %1, %2" ++ [(set_attr "type" "dalu")] ++) ++ ++(define_insn "crsa16_1_be" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (minus:HI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 0)])) ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (vec_duplicate:V2HI ++ (plus:HI ++ (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 1)])) ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)])))) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "crsa16\t%0, %1, %2" ++ [(set_attr "type" "dalu")] ++) ++ ++(define_expand "kcrsa16_1" ++ [(match_operand:V2HI 0 "register_operand" "") ++ (match_operand:V2HI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_kcrsa16_1_be (operands[0], operands[1], operands[2])); ++ else ++ emit_insn (gen_kcrsa16_1_le (operands[0], operands[1], operands[2])); ++ DONE; ++}) ++ ++(define_insn "kcrsa16_1_le" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (ss_minus:HI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 1)])) ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (vec_duplicate:V2HI ++ (ss_plus:HI ++ (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 0)])) ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)])))) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "kcrsa16\t%0, %1, %2" ++ [(set_attr "type" "dalu")] ++) ++ ++(define_insn "kcrsa16_1_be" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (ss_minus:HI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 0)])) ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (vec_duplicate:V2HI ++ (ss_plus:HI ++ (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 1)])) ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)])))) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "kcrsa16\t%0, %1, %2" ++ [(set_attr "type" "dalu")] ++) ++ ++(define_expand "ukcrsa16_1" ++ [(match_operand:V2HI 0 "register_operand" "") ++ (match_operand:V2HI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_ukcrsa16_1_be (operands[0], operands[1], operands[2])); ++ else ++ emit_insn (gen_ukcrsa16_1_le (operands[0], operands[1], operands[2])); ++ DONE; ++}) ++ ++(define_insn "ukcrsa16_1_le" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (us_minus:HI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 1)])) ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (vec_duplicate:V2HI ++ (us_plus:HI ++ (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 0)])) ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)])))) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "ukcrsa16\t%0, %1, %2" ++ [(set_attr "type" "dalu")] ++) ++ ++(define_insn "ukcrsa16_1_be" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (us_minus:HI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 0)])) ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (vec_duplicate:V2HI ++ (us_plus:HI ++ (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 1)])) ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)])))) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "ukcrsa16\t%0, %1, %2" ++ [(set_attr "type" "dalu")] ++) ++ ++(define_expand "rcras16_1" ++ [(match_operand:V2HI 0 "register_operand" "") ++ (match_operand:V2HI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_rcras16_1_be (operands[0], operands[1], operands[2])); ++ else ++ emit_insn (gen_rcras16_1_le (operands[0], operands[1], operands[2])); ++ DONE; ++}) ++ ++(define_insn "rcras16_1_le" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (truncate:HI ++ (ashiftrt:SI ++ (minus:SI ++ (sign_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (const_int 1)))) ++ (vec_duplicate:V2HI ++ (truncate:HI ++ (ashiftrt:SI ++ (plus:SI ++ (sign_extend:SI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI ++ (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 1)])))) ++ (const_int 1)))) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "rcras16\t%0, %1, %2" ++ [(set_attr "type" "dalu")] ++) ++ ++(define_insn "rcras16_1_be" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (truncate:HI ++ (ashiftrt:SI ++ (minus:SI ++ (sign_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (const_int 1)))) ++ (vec_duplicate:V2HI ++ (truncate:HI ++ (ashiftrt:SI ++ (plus:SI ++ (sign_extend:SI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI ++ (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 0)])))) ++ (const_int 1)))) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "rcras16\t%0, %1, %2" ++ [(set_attr "type" "dalu")] ++) ++ ++(define_expand "urcras16_1" ++ [(match_operand:V2HI 0 "register_operand" "") ++ (match_operand:V2HI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_urcras16_1_be (operands[0], operands[1], operands[2])); ++ else ++ emit_insn (gen_urcras16_1_le (operands[0], operands[1], operands[2])); ++ DONE; ++}) ++ ++(define_insn "urcras16_1_le" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (truncate:HI ++ (lshiftrt:SI ++ (minus:SI ++ (zero_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 0)]))) ++ (zero_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (const_int 1)))) ++ (vec_duplicate:V2HI ++ (truncate:HI ++ (lshiftrt:SI ++ (plus:SI ++ (zero_extend:SI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)]))) ++ (zero_extend:SI ++ (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 1)])))) ++ (const_int 1)))) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "urcras16\t%0, %1, %2" ++ [(set_attr "type" "dalu")] ++) ++ ++(define_insn "urcras16_1_be" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (truncate:HI ++ (lshiftrt:SI ++ (minus:SI ++ (zero_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (zero_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (const_int 1)))) ++ (vec_duplicate:V2HI ++ (truncate:HI ++ (lshiftrt:SI ++ (plus:SI ++ (zero_extend:SI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)]))) ++ (zero_extend:SI ++ (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 0)])))) ++ (const_int 1)))) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "urcras16\t%0, %1, %2" ++ [(set_attr "type" "dalu")] ++) ++ ++(define_expand "rcrsa16_1" ++ [(match_operand:V2HI 0 "register_operand" "") ++ (match_operand:V2HI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_rcrsa16_1_be (operands[0], operands[1], operands[2])); ++ else ++ emit_insn (gen_rcrsa16_1_le (operands[0], operands[1], operands[2])); ++ DONE; ++}) ++ ++(define_insn "rcrsa16_1_le" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (truncate:HI ++ (ashiftrt:SI ++ (minus:SI ++ (sign_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (const_int 1)))) ++ (vec_duplicate:V2HI ++ (truncate:HI ++ (ashiftrt:SI ++ (plus:SI ++ (sign_extend:SI ++ (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)])))) ++ (const_int 1)))) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "rcrsa16\t%0, %1, %2" ++ [(set_attr "type" "dalu")] ++) ++ ++(define_insn "rcrsa16_1_be" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (truncate:HI ++ (ashiftrt:SI ++ (minus:SI ++ (sign_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (const_int 1)))) ++ (vec_duplicate:V2HI ++ (truncate:HI ++ (ashiftrt:SI ++ (plus:SI ++ (sign_extend:SI ++ (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)])))) ++ (const_int 1)))) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "rcrsa16\t%0, %1, %2" ++ [(set_attr "type" "dalu")] ++) ++ ++(define_expand "urcrsa16_1" ++ [(match_operand:V2HI 0 "register_operand" "") ++ (match_operand:V2HI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_urcrsa16_1_be (operands[0], operands[1], operands[2])); ++ else ++ emit_insn (gen_urcrsa16_1_le (operands[0], operands[1], operands[2])); ++ DONE; ++}) ++ ++(define_insn "urcrsa16_1_le" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (truncate:HI ++ (lshiftrt:SI ++ (minus:SI ++ (zero_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (zero_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (const_int 1)))) ++ (vec_duplicate:V2HI ++ (truncate:HI ++ (lshiftrt:SI ++ (plus:SI ++ (zero_extend:SI ++ (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 0)]))) ++ (zero_extend:SI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)])))) ++ (const_int 1)))) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "urcrsa16\t%0, %1, %2" ++ [(set_attr "type" "dalu")] ++) ++ ++(define_insn "urcrsa16_1_be" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (truncate:HI ++ (lshiftrt:SI ++ (minus:SI ++ (zero_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 0)]))) ++ (zero_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (const_int 1)))) ++ (vec_duplicate:V2HI ++ (truncate:HI ++ (lshiftrt:SI ++ (plus:SI ++ (zero_extend:SI ++ (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 1)]))) ++ (zero_extend:SI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)])))) ++ (const_int 1)))) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "urcrsa16\t%0, %1, %2" ++ [(set_attr "type" "dalu")] ++) ++ ++(define_expand "<shift>v2hi3" ++ [(set (match_operand:V2HI 0 "register_operand" "") ++ (shifts:V2HI (match_operand:V2HI 1 "register_operand" "") ++ (match_operand:SI 2 "nds32_rimm4u_operand" "")))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (operands[2] == const0_rtx) ++ { ++ emit_move_insn (operands[0], operands[1]); ++ DONE; ++ } ++}) ++ ++(define_insn "*ashlv2hi3" ++ [(set (match_operand:V2HI 0 "register_operand" "= r, r") ++ (ashift:V2HI (match_operand:V2HI 1 "register_operand" " r, r") ++ (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r")))] ++ "NDS32_EXT_DSP_P ()" ++ "@ ++ slli16\t%0, %1, %2 ++ sll16\t%0, %1, %2" ++ [(set_attr "type" "dalu,dalu") ++ (set_attr "length" " 4, 4")]) ++ ++(define_insn "kslli16" ++ [(set (match_operand:V2HI 0 "register_operand" "= r, r") ++ (ss_ashift:V2HI (match_operand:V2HI 1 "register_operand" " r, r") ++ (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r")))] ++ "NDS32_EXT_DSP_P ()" ++ "@ ++ kslli16\t%0, %1, %2 ++ ksll16\t%0, %1, %2" ++ [(set_attr "type" "dalu,dalu") ++ (set_attr "length" " 4, 4")]) ++ ++(define_insn "*ashrv2hi3" ++ [(set (match_operand:V2HI 0 "register_operand" "= r, r") ++ (ashiftrt:V2HI (match_operand:V2HI 1 "register_operand" " r, r") ++ (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r")))] ++ "NDS32_EXT_DSP_P ()" ++ "@ ++ srai16\t%0, %1, %2 ++ sra16\t%0, %1, %2" ++ [(set_attr "type" "dalu,dalu") ++ (set_attr "length" " 4, 4")]) ++ ++(define_insn "sra16_round" ++ [(set (match_operand:V2HI 0 "register_operand" "= r, r") ++ (unspec:V2HI [(ashiftrt:V2HI (match_operand:V2HI 1 "register_operand" " r, r") ++ (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r"))] ++ UNSPEC_ROUND))] ++ "NDS32_EXT_DSP_P ()" ++ "@ ++ srai16.u\t%0, %1, %2 ++ sra16.u\t%0, %1, %2" ++ [(set_attr "type" "daluround,daluround") ++ (set_attr "length" " 4, 4")]) ++ ++(define_insn "*lshrv2hi3" ++ [(set (match_operand:V2HI 0 "register_operand" "= r, r") ++ (lshiftrt:V2HI (match_operand:V2HI 1 "register_operand" " r, r") ++ (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r")))] ++ "NDS32_EXT_DSP_P ()" ++ "@ ++ srli16\t%0, %1, %2 ++ srl16\t%0, %1, %2" ++ [(set_attr "type" "dalu,dalu") ++ (set_attr "length" " 4, 4")]) ++ ++(define_insn "srl16_round" ++ [(set (match_operand:V2HI 0 "register_operand" "= r, r") ++ (unspec:V2HI [(lshiftrt:V2HI (match_operand:V2HI 1 "register_operand" " r, r") ++ (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r"))] ++ UNSPEC_ROUND))] ++ "NDS32_EXT_DSP_P ()" ++ "@ ++ srli16.u\t%0, %1, %2 ++ srl16.u\t%0, %1, %2" ++ [(set_attr "type" "daluround,daluround") ++ (set_attr "length" " 4, 4")]) ++ ++(define_insn "kslra16" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (if_then_else:V2HI ++ (lt:SI (match_operand:SI 2 "register_operand" " r") ++ (const_int 0)) ++ (ashiftrt:V2HI (match_operand:V2HI 1 "register_operand" " r") ++ (neg:SI (match_dup 2))) ++ (ashift:V2HI (match_dup 1) ++ (match_dup 2))))] ++ "NDS32_EXT_DSP_P ()" ++ "kslra16\t%0, %1, %2" ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4")]) ++ ++(define_insn "kslra16_round" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (if_then_else:V2HI ++ (lt:SI (match_operand:SI 2 "register_operand" " r") ++ (const_int 0)) ++ (unspec:V2HI [(ashiftrt:V2HI (match_operand:V2HI 1 "register_operand" " r") ++ (neg:SI (match_dup 2)))] ++ UNSPEC_ROUND) ++ (ashift:V2HI (match_dup 1) ++ (match_dup 2))))] ++ "NDS32_EXT_DSP_P ()" ++ "kslra16.u\t%0, %1, %2" ++ [(set_attr "type" "daluround") ++ (set_attr "length" "4")]) ++ ++(define_insn "cmpeq<bits>" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(eq:SI (match_operand:VQIHI 1 "register_operand" " r") ++ (match_operand:VQIHI 2 "register_operand" " r"))] ++ UNSPEC_VEC_COMPARE))] ++ "NDS32_EXT_DSP_P ()" ++ "cmpeq<bits>\t%0, %1, %2" ++ [(set_attr "type" "dcmp") ++ (set_attr "length" "4")]) ++ ++(define_insn "scmplt<bits>" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(lt:SI (match_operand:VQIHI 1 "register_operand" " r") ++ (match_operand:VQIHI 2 "register_operand" " r"))] ++ UNSPEC_VEC_COMPARE))] ++ "NDS32_EXT_DSP_P ()" ++ "scmplt<bits>\t%0, %1, %2" ++ [(set_attr "type" "dcmp") ++ (set_attr "length" "4")]) ++ ++(define_insn "scmple<bits>" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(le:SI (match_operand:VQIHI 1 "register_operand" " r") ++ (match_operand:VQIHI 2 "register_operand" " r"))] ++ UNSPEC_VEC_COMPARE))] ++ "NDS32_EXT_DSP_P ()" ++ "scmple<bits>\t%0, %1, %2" ++ [(set_attr "type" "dcmp") ++ (set_attr "length" "4")]) ++ ++(define_insn "ucmplt<bits>" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(ltu:SI (match_operand:VQIHI 1 "register_operand" " r") ++ (match_operand:VQIHI 2 "register_operand" " r"))] ++ UNSPEC_VEC_COMPARE))] ++ "NDS32_EXT_DSP_P ()" ++ "ucmplt<bits>\t%0, %1, %2" ++ [(set_attr "type" "dcmp") ++ (set_attr "length" "4")]) ++ ++(define_insn "ucmple<bits>" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(leu:SI (match_operand:VQIHI 1 "register_operand" " r") ++ (match_operand:VQIHI 2 "register_operand" " r"))] ++ UNSPEC_VEC_COMPARE))] ++ "NDS32_EXT_DSP_P ()" ++ "ucmple<bits>\t%0, %1, %2" ++ [(set_attr "type" "dcmp") ++ (set_attr "length" "4")]) ++ ++(define_insn "sclip16" ++ [(set (match_operand:V2HI 0 "register_operand" "= r") ++ (unspec:V2HI [(match_operand:V2HI 1 "register_operand" " r") ++ (match_operand:SI 2 "nds32_imm4u_operand" " Iu04")] ++ UNSPEC_CLIPS))] ++ "NDS32_EXT_DSP_P ()" ++ "sclip16\t%0, %1, %2" ++ [(set_attr "type" "dclip") ++ (set_attr "length" "4")]) ++ ++(define_insn "uclip16" ++ [(set (match_operand:V2HI 0 "register_operand" "= r") ++ (unspec:V2HI [(match_operand:V2HI 1 "register_operand" " r") ++ (match_operand:SI 2 "nds32_imm4u_operand" " Iu04")] ++ UNSPEC_CLIP))] ++ "NDS32_EXT_DSP_P ()" ++ "uclip16\t%0, %1, %2" ++ [(set_attr "type" "dclip") ++ (set_attr "length" "4")]) ++ ++(define_insn "khm16" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (unspec:V2HI [(match_operand:V2HI 1 "register_operand" " r") ++ (match_operand:V2HI 2 "register_operand" " r")] ++ UNSPEC_KHM))] ++ "NDS32_EXT_DSP_P ()" ++ "khm16\t%0, %1, %2" ++ [(set_attr "type" "dmul") ++ (set_attr "length" "4")]) ++ ++(define_insn "khmx16" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (unspec:V2HI [(match_operand:V2HI 1 "register_operand" " r") ++ (match_operand:V2HI 2 "register_operand" " r")] ++ UNSPEC_KHMX))] ++ "NDS32_EXT_DSP_P ()" ++ "khmx16\t%0, %1, %2" ++ [(set_attr "type" "dmul") ++ (set_attr "length" "4")]) ++ ++(define_expand "vec_setv4qi" ++ [(match_operand:V4QI 0 "register_operand" "") ++ (match_operand:QI 1 "register_operand" "") ++ (match_operand:SI 2 "immediate_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ HOST_WIDE_INT pos = INTVAL (operands[2]); ++ if (pos > 4) ++ gcc_unreachable (); ++ HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << pos; ++ emit_insn (gen_vec_setv4qi_internal (operands[0], operands[1], ++ operands[0], GEN_INT (elem))); ++ DONE; ++}) ++ ++(define_expand "insb" ++ [(match_operand:V4QI 0 "register_operand" "") ++ (match_operand:V4QI 1 "register_operand" "") ++ (match_operand:SI 2 "register_operand" "") ++ (match_operand:SI 3 "const_int_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (INTVAL (operands[3]) > 3 || INTVAL (operands[3]) < 0) ++ gcc_unreachable (); ++ ++ rtx src = gen_reg_rtx (QImode); ++ ++ convert_move (src, operands[2], false); ++ ++ HOST_WIDE_INT selector_index; ++ /* Big endian need reverse index. */ ++ if (TARGET_BIG_ENDIAN) ++ selector_index = 4 - INTVAL (operands[3]) - 1; ++ else ++ selector_index = INTVAL (operands[3]); ++ rtx selector = gen_int_mode (1 << selector_index, SImode); ++ emit_insn (gen_vec_setv4qi_internal (operands[0], src, ++ operands[1], selector)); ++ DONE; ++}) ++ ++(define_expand "insvsi" ++ [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "") ++ (match_operand:SI 1 "const_int_operand" "") ++ (match_operand:SI 2 "nds32_insv_operand" "")) ++ (match_operand:SI 3 "register_operand" ""))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (INTVAL (operands[1]) != 8) ++ FAIL; ++} ++ [(set_attr "type" "dinsb") ++ (set_attr "length" "4")]) ++ ++ ++(define_insn "insvsi_internal" ++ [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r") ++ (const_int 8) ++ (match_operand:SI 1 "nds32_insv_operand" "i")) ++ (match_operand:SI 2 "register_operand" "r"))] ++ "NDS32_EXT_DSP_P ()" ++ "insb\t%0, %2, %v1" ++ [(set_attr "type" "dinsb") ++ (set_attr "length" "4")]) ++ ++(define_insn "insvsiqi_internal" ++ [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r") ++ (const_int 8) ++ (match_operand:SI 1 "nds32_insv_operand" "i")) ++ (zero_extend:SI (match_operand:QI 2 "register_operand" "r")))] ++ "NDS32_EXT_DSP_P ()" ++ "insb\t%0, %2, %v1" ++ [(set_attr "type" "dinsb") ++ (set_attr "length" "4")]) ++ ++;; Intermedium pattern for synthetize insvsiqi_internal ++;; v0 = ((v1 & 0xff) << 8) ++(define_insn_and_split "and0xff_s8" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r") ++ (const_int 8)) ++ (const_int 65280)))] ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ "#" ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ [(const_int 1)] ++{ ++ rtx tmp = gen_reg_rtx (SImode); ++ emit_insn (gen_ashlsi3 (tmp, operands[1], gen_int_mode (8, SImode))); ++ emit_insn (gen_andsi3 (operands[0], tmp, gen_int_mode (0xffff, SImode))); ++ DONE; ++}) ++ ++;; v0 = (v1 & 0xff00ffff) | ((v2 << 16) | 0xff0000) ++(define_insn_and_split "insbsi2" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ior:SI (and:SI (match_operand:SI 1 "register_operand" "0") ++ (const_int -16711681)) ++ (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int 16)) ++ (const_int 16711680))))] ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ "#" ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ [(const_int 1)] ++{ ++ rtx tmp = gen_reg_rtx (SImode); ++ emit_move_insn (tmp, operands[1]); ++ emit_insn (gen_insvsi_internal (tmp, gen_int_mode(16, SImode), operands[2])); ++ emit_move_insn (operands[0], tmp); ++ DONE; ++}) ++ ++;; v0 = (v1 & 0xff00ffff) | v2 ++(define_insn_and_split "ior_and0xff00ffff_reg" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ior:SI (and:SI (match_operand:SI 1 "register_operand" "r") ++ (const_int -16711681)) ++ (match_operand:SI 2 "register_operand" "r")))] ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ "#" ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ [(const_int 1)] ++{ ++ rtx tmp = gen_reg_rtx (SImode); ++ emit_insn (gen_andsi3 (tmp, operands[1], gen_int_mode (0xff00ffff, SImode))); ++ emit_insn (gen_iorsi3 (operands[0], tmp, operands[2])); ++ DONE; ++}) ++ ++(define_insn "vec_setv4qi_internal" ++ [(set (match_operand:V4QI 0 "register_operand" "= r, r, r, r") ++ (vec_merge:V4QI ++ (vec_duplicate:V4QI ++ (match_operand:QI 1 "register_operand" " r, r, r, r")) ++ (match_operand:V4QI 2 "register_operand" " 0, 0, 0, 0") ++ (match_operand:SI 3 "nds32_imm_1_2_4_8_operand" " Iv01, Iv02, Iv04, Iv08")))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ { ++ const char *pats[] = { "insb\t%0, %1, 3", ++ "insb\t%0, %1, 2", ++ "insb\t%0, %1, 1", ++ "insb\t%0, %1, 0" }; ++ return pats[which_alternative]; ++ } ++ else ++ { ++ const char *pats[] = { "insb\t%0, %1, 0", ++ "insb\t%0, %1, 1", ++ "insb\t%0, %1, 2", ++ "insb\t%0, %1, 3" }; ++ return pats[which_alternative]; ++ } ++} ++ [(set_attr "type" "dinsb") ++ (set_attr "length" "4")]) ++ ++(define_insn "vec_setv4qi_internal_vec" ++ [(set (match_operand:V4QI 0 "register_operand" "= r, r, r, r") ++ (vec_merge:V4QI ++ (vec_duplicate:V4QI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r, r, r, r") ++ (parallel [(const_int 0)]))) ++ (match_operand:V4QI 2 "register_operand" " 0, 0, 0, 0") ++ (match_operand:SI 3 "nds32_imm_1_2_4_8_operand" " Iv01, Iv02, Iv04, Iv08")))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "@ ++ insb\t%0, %1, 0 ++ insb\t%0, %1, 1 ++ insb\t%0, %1, 2 ++ insb\t%0, %1, 3" ++ [(set_attr "type" "dinsb") ++ (set_attr "length" "4")]) ++ ++(define_insn "vec_mergev4qi_and_cv0_1" ++ [(set (match_operand:V4QI 0 "register_operand" "=$l,r") ++ (vec_merge:V4QI ++ (vec_duplicate:V4QI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " l,r") ++ (parallel [(const_int 0)]))) ++ (const_vector:V4QI [ ++ (const_int 0) ++ (const_int 0) ++ (const_int 0) ++ (const_int 0)]) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "@ ++ zeb33\t%0, %1 ++ zeb\t%0, %1" ++ [(set_attr "type" "alu,alu") ++ (set_attr "length" " 2, 4")]) ++ ++(define_insn "vec_mergev4qi_and_cv0_2" ++ [(set (match_operand:V4QI 0 "register_operand" "=$l,r") ++ (vec_merge:V4QI ++ (const_vector:V4QI [ ++ (const_int 0) ++ (const_int 0) ++ (const_int 0) ++ (const_int 0)]) ++ (vec_duplicate:V4QI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " l,r") ++ (parallel [(const_int 0)]))) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "@ ++ zeb33\t%0, %1 ++ zeb\t%0, %1" ++ [(set_attr "type" "alu,alu") ++ (set_attr "length" " 2, 4")]) ++ ++(define_insn "vec_mergeqi_and_cv0_1" ++ [(set (match_operand:V4QI 0 "register_operand" "=$l,r") ++ (vec_merge:V4QI ++ (vec_duplicate:V4QI (match_operand:QI 1 "register_operand" " l,r")) ++ (const_vector:V4QI [ ++ (const_int 0) ++ (const_int 0) ++ (const_int 0) ++ (const_int 0)]) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "@ ++ zeb33\t%0, %1 ++ zeb\t%0, %1" ++ [(set_attr "type" "alu,alu") ++ (set_attr "length" " 2, 4")]) ++ ++(define_insn "vec_mergeqi_and_cv0_2" ++ [(set (match_operand:V4QI 0 "register_operand" "=$l,r") ++ (vec_merge:V4QI ++ (const_vector:V4QI [ ++ (const_int 0) ++ (const_int 0) ++ (const_int 0) ++ (const_int 0)]) ++ (vec_duplicate:V4QI (match_operand:QI 1 "register_operand" " l,r")) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "@ ++ zeb33\t%0, %1 ++ zeb\t%0, %1" ++ [(set_attr "type" "alu,alu") ++ (set_attr "length" " 2, 4")]) ++ ++(define_expand "vec_setv2hi" ++ [(match_operand:V2HI 0 "register_operand" "") ++ (match_operand:HI 1 "register_operand" "") ++ (match_operand:SI 2 "immediate_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ HOST_WIDE_INT pos = INTVAL (operands[2]); ++ if (pos > 2) ++ gcc_unreachable (); ++ HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << pos; ++ emit_insn (gen_vec_setv2hi_internal (operands[0], operands[1], ++ operands[0], GEN_INT (elem))); ++ DONE; ++}) ++ ++(define_insn "vec_setv2hi_internal" ++ [(set (match_operand:V2HI 0 "register_operand" "= r, r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (match_operand:HI 1 "register_operand" " r, r")) ++ (match_operand:V2HI 2 "register_operand" " r, r") ++ (match_operand:SI 3 "nds32_imm_1_2_operand" " Iv01, Iv02")))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ { ++ const char *pats[] = { "pkbb16\t%0, %1, %2", ++ "pktb16\t%0, %2, %1" }; ++ return pats[which_alternative]; ++ } ++ else ++ { ++ const char *pats[] = { "pktb16\t%0, %2, %1", ++ "pkbb16\t%0, %1, %2" }; ++ return pats[which_alternative]; ++ } ++} ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "vec_mergev2hi_and_cv0_1" ++ [(set (match_operand:V2HI 0 "register_operand" "=$l,r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " l,r") ++ (parallel [(const_int 0)]))) ++ (const_vector:V2HI [ ++ (const_int 0) ++ (const_int 0)]) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "@ ++ zeh33\t%0, %1 ++ zeh\t%0, %1" ++ [(set_attr "type" "alu,alu") ++ (set_attr "length" " 2, 4")]) ++ ++(define_insn "vec_mergev2hi_and_cv0_2" ++ [(set (match_operand:V2HI 0 "register_operand" "=$l,r") ++ (vec_merge:V2HI ++ (const_vector:V2HI [ ++ (const_int 0) ++ (const_int 0)]) ++ (vec_duplicate:V2HI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " l,r") ++ (parallel [(const_int 0)]))) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "@ ++ zeh33\t%0, %1 ++ zeh\t%0, %1" ++ [(set_attr "type" "alu,alu") ++ (set_attr "length" " 2, 4")]) ++ ++(define_insn "vec_mergehi_and_cv0_1" ++ [(set (match_operand:V2HI 0 "register_operand" "=$l,r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI (match_operand:HI 1 "register_operand" " l,r")) ++ (const_vector:V2HI [ ++ (const_int 0) ++ (const_int 0)]) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "@ ++ zeh33\t%0, %1 ++ zeh\t%0, %1" ++ [(set_attr "type" "alu,alu") ++ (set_attr "length" " 2, 4")]) ++ ++(define_insn "vec_mergehi_and_cv0_2" ++ [(set (match_operand:V2HI 0 "register_operand" "=$l,r") ++ (vec_merge:V2HI ++ (const_vector:V2HI [ ++ (const_int 0) ++ (const_int 0)]) ++ (vec_duplicate:V2HI (match_operand:HI 1 "register_operand" " l,r")) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "@ ++ zeh33\t%0, %1 ++ zeh\t%0, %1" ++ [(set_attr "type" "alu,alu") ++ (set_attr "length" " 2, 4")]) ++ ++(define_expand "pkbb" ++ [(match_operand:V2HI 0 "register_operand") ++ (match_operand:V2HI 1 "register_operand") ++ (match_operand:V2HI 2 "register_operand")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ { ++ emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2], ++ GEN_INT (1), GEN_INT (1), GEN_INT (1))); ++ } ++ else ++ { ++ emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2], ++ GEN_INT (2), GEN_INT (0), GEN_INT (0))); ++ } ++ DONE; ++}) ++ ++(define_insn "pkbbsi_1" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ior:SI (and:SI (match_operand:SI 1 "register_operand" "r") ++ (const_int 65535)) ++ (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int 16))))] ++ "NDS32_EXT_DSP_P ()" ++ "pkbb16\t%0, %2, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "pkbbsi_2" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ior:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int 16)) ++ (and:SI (match_operand:SI 1 "register_operand" "r") ++ (const_int 65535))))] ++ "NDS32_EXT_DSP_P ()" ++ "pkbb16\t%0, %2, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "pkbbsi_3" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ior:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "r")) ++ (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int 16))))] ++ "NDS32_EXT_DSP_P ()" ++ "pkbb16\t%0, %2, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "pkbbsi_4" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ior:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int 16)) ++ (zero_extend:SI (match_operand:HI 1 "register_operand" "r"))))] ++ "NDS32_EXT_DSP_P ()" ++ "pkbb16\t%0, %2, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++;; v0 = (v1 & 0xffff0000) | (v2 & 0xffff) ++(define_insn "pktbsi_1" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ior:SI (and:SI (match_operand:SI 1 "register_operand" "r") ++ (const_int -65536)) ++ (zero_extend:SI (match_operand:HI 2 "register_operand" "r"))))] ++ "NDS32_EXT_DSP_P ()" ++ "pktb16\t%0, %1, %2" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "pktbsi_2" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ior:SI (and:SI (match_operand:SI 1 "register_operand" "r") ++ (const_int -65536)) ++ (and:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int 65535))))] ++ "NDS32_EXT_DSP_P ()" ++ "pktb16\t%0, %1, %2" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_insn "pktbsi_3" ++ [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r") ++ (const_int 16 ) ++ (const_int 0)) ++ (match_operand:SI 1 "register_operand" " r"))] ++ "NDS32_EXT_DSP_P ()" ++ "pktb16\t%0, %0, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "pktbsi_4" ++ [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r") ++ (const_int 16 ) ++ (const_int 0)) ++ (zero_extend:SI (match_operand:HI 1 "register_operand" " r")))] ++ "NDS32_EXT_DSP_P ()" ++ "pktb16\t%0, %0, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "pkttsi" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ior:SI (and:SI (match_operand:SI 1 "register_operand" " r") ++ (const_int -65536)) ++ (lshiftrt:SI (match_operand:SI 2 "register_operand" " r") ++ (const_int 16))))] ++ "NDS32_EXT_DSP_P ()" ++ "pktt16\t%0, %1, %2" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_expand "pkbt" ++ [(match_operand:V2HI 0 "register_operand") ++ (match_operand:V2HI 1 "register_operand") ++ (match_operand:V2HI 2 "register_operand")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ { ++ emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2], ++ GEN_INT (1), GEN_INT (1), GEN_INT (0))); ++ } ++ else ++ { ++ emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2], ++ GEN_INT (2), GEN_INT (0), GEN_INT (1))); ++ } ++ DONE; ++}) ++ ++(define_expand "pktt" ++ [(match_operand:V2HI 0 "register_operand") ++ (match_operand:V2HI 1 "register_operand") ++ (match_operand:V2HI 2 "register_operand")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ { ++ emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2], ++ GEN_INT (1), GEN_INT (0), GEN_INT (0))); ++ } ++ else ++ { ++ emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2], ++ GEN_INT (2), GEN_INT (1), GEN_INT (1))); ++ } ++ DONE; ++}) ++ ++(define_expand "pktb" ++ [(match_operand:V2HI 0 "register_operand") ++ (match_operand:V2HI 1 "register_operand") ++ (match_operand:V2HI 2 "register_operand")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ { ++ emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2], ++ GEN_INT (1), GEN_INT (0), GEN_INT (1))); ++ } ++ else ++ { ++ emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2], ++ GEN_INT (2), GEN_INT (1), GEN_INT (0))); ++ } ++ DONE; ++}) ++ ++(define_insn "vec_mergerr" ++ [(set (match_operand:V2HI 0 "register_operand" "= r, r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (match_operand:HI 1 "register_operand" " r, r")) ++ (vec_duplicate:V2HI ++ (match_operand:HI 2 "register_operand" " r, r")) ++ (match_operand:SI 3 "nds32_imm_1_2_operand" " Iv01, Iv02")))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "@ ++ pkbb16\t%0, %2, %1 ++ pkbb16\t%0, %1, %2" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++ ++(define_insn "vec_merge" ++ [(set (match_operand:V2HI 0 "register_operand" "= r, r") ++ (vec_merge:V2HI ++ (match_operand:V2HI 1 "register_operand" " r, r") ++ (match_operand:V2HI 2 "register_operand" " r, r") ++ (match_operand:SI 3 "nds32_imm_1_2_operand" " Iv01, Iv02")))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ { ++ const char *pats[] = { "pktb16\t%0, %1, %2", ++ "pktb16\t%0, %2, %1" }; ++ return pats[which_alternative]; ++ } ++ else ++ { ++ const char *pats[] = { "pktb16\t%0, %2, %1", ++ "pktb16\t%0, %1, %2" }; ++ return pats[which_alternative]; ++ } ++} ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "vec_mergerv" ++ [(set (match_operand:V2HI 0 "register_operand" "= r, r, r, r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (match_operand:HI 1 "register_operand" " r, r, r, r")) ++ (vec_duplicate:V2HI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r, r, r, r") ++ (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv00, Iv01")]))) ++ (match_operand:SI 3 "nds32_imm_1_2_operand" " Iv01, Iv01, Iv02, Iv02")))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "@ ++ pkbb16\t%0, %2, %1 ++ pktb16\t%0, %2, %1 ++ pkbb16\t%0, %1, %2 ++ pkbt16\t%0, %1, %2" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "vec_mergevr" ++ [(set (match_operand:V2HI 0 "register_operand" "= r, r, r, r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r, r, r, r") ++ (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv00, Iv01")]))) ++ (vec_duplicate:V2HI ++ (match_operand:HI 2 "register_operand" " r, r, r, r")) ++ (match_operand:SI 3 "nds32_imm_1_2_operand" " Iv01, Iv01, Iv02, Iv02")))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "@ ++ pkbb16\t%0, %2, %1 ++ pkbt16\t%0, %2, %1 ++ pkbb16\t%0, %1, %2 ++ pktb16\t%0, %1, %2" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "vec_mergevv" ++ [(set (match_operand:V2HI 0 "register_operand" "= r, r, r, r, r, r, r, r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r, r, r, r, r, r, r, r") ++ (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv00, Iv01, Iv01, Iv00, Iv00, Iv01, Iv01")]))) ++ (vec_duplicate:V2HI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r, r, r, r, r, r, r, r") ++ (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv01, Iv00, Iv00, Iv01, Iv01, Iv00")]))) ++ (match_operand:SI 3 "nds32_imm_1_2_operand" " Iv01, Iv01, Iv01, Iv01, Iv02, Iv02, Iv02, Iv02")))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ { ++ const char *pats[] = { "pktt16\t%0, %1, %2", ++ "pktb16\t%0, %1, %2", ++ "pkbb16\t%0, %1, %2", ++ "pkbt16\t%0, %1, %2", ++ "pktt16\t%0, %2, %1", ++ "pkbt16\t%0, %2, %1", ++ "pkbb16\t%0, %2, %1", ++ "pktb16\t%0, %2, %1" }; ++ return pats[which_alternative]; ++ } ++ else ++ { ++ const char *pats[] = { "pkbb16\t%0, %2, %1", ++ "pktb16\t%0, %2, %1", ++ "pktt16\t%0, %2, %1", ++ "pkbt16\t%0, %2, %1", ++ "pkbb16\t%0, %1, %2", ++ "pkbt16\t%0, %1, %2", ++ "pktt16\t%0, %1, %2", ++ "pktb16\t%0, %1, %2" }; ++ return pats[which_alternative]; ++ } ++} ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_expand "vec_extractv4qi" ++ [(set (match_operand:QI 0 "register_operand" "") ++ (vec_select:QI ++ (match_operand:V4QI 1 "nonimmediate_operand" "") ++ (parallel [(match_operand:SI 2 "const_int_operand" "")])))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++{ ++ if (INTVAL (operands[2]) != 0 ++ && INTVAL (operands[2]) != 1 ++ && INTVAL (operands[2]) != 2 ++ && INTVAL (operands[2]) != 3) ++ gcc_unreachable (); ++ ++ if (INTVAL (operands[2]) != 0 && MEM_P (operands[0])) ++ FAIL; ++}) ++ ++(define_insn "vec_extractv4qi0" ++ [(set (match_operand:QI 0 "register_operand" "=l,r,r") ++ (vec_select:QI ++ (match_operand:V4QI 1 "nonimmediate_operand" " l,r,m") ++ (parallel [(const_int 0)])))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "zeb33\t%0, %1"; ++ case 1: ++ return "zeb\t%0, %1"; ++ case 2: ++ return nds32_output_32bit_load (operands, 1); ++ default: ++ gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_insn "vec_extractv4qi0_ze" ++ [(set (match_operand:SI 0 "register_operand" "=l,r,r") ++ (zero_extend:SI ++ (vec_select:QI ++ (match_operand:V4QI 1 "nonimmediate_operand" " l,r,m") ++ (parallel [(const_int 0)]))))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "zeb33\t%0, %1"; ++ case 1: ++ return "zeb\t%0, %1"; ++ case 2: ++ return nds32_output_32bit_load (operands, 1); ++ default: ++ gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_insn "vec_extractv4qi0_se" ++ [(set (match_operand:SI 0 "register_operand" "=l,r,r") ++ (sign_extend:SI ++ (vec_select:QI ++ (match_operand:V4QI 1 "nonimmediate_operand" " l,r,m") ++ (parallel [(const_int 0)]))))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "seb33\t%0, %1"; ++ case 1: ++ return "seb\t%0, %1"; ++ case 2: ++ return nds32_output_32bit_load_se (operands, 1); ++ default: ++ gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_insn_and_split "vec_extractv4qi1" ++ [(set (match_operand:QI 0 "register_operand" "=r") ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 1)])))] ++ "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN" ++ "#" ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ [(const_int 1)] ++{ ++ rtx tmp = gen_reg_rtx (V4QImode); ++ emit_insn (gen_rotrv4qi_1 (tmp, operands[1])); ++ emit_insn (gen_vec_extractv4qi0 (operands[0], tmp)); ++ DONE; ++} ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_insn_and_split "vec_extractv4qi2" ++ [(set (match_operand:QI 0 "register_operand" "=r") ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 2)])))] ++ "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN" ++ "#" ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ [(const_int 1)] ++{ ++ rtx tmp = gen_reg_rtx (V4QImode); ++ emit_insn (gen_rotrv4qi_2 (tmp, operands[1])); ++ emit_insn (gen_vec_extractv4qi0 (operands[0], tmp)); ++ DONE; ++} ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_insn_and_split "vec_extractv4qi3" ++ [(set (match_operand:QI 0 "register_operand" "=r") ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 3)])))] ++ "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN" ++ "#" ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ [(const_int 1)] ++{ ++ rtx tmp = gen_reg_rtx (V4QImode); ++ emit_insn (gen_rotrv4qi_3 (tmp, operands[1])); ++ emit_insn (gen_vec_extractv4qi0 (operands[0], tmp)); ++ DONE; ++} ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_insn "vec_extractv4qi3_se" ++ [(set (match_operand:SI 0 "register_operand" "=$d,r") ++ (sign_extend:SI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " 0,r") ++ (parallel [(const_int 3)]))))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "@ ++ srai45\t%0, 24 ++ srai\t%0, %1, 24" ++ [(set_attr "type" "alu,alu") ++ (set_attr "length" " 2, 4")]) ++ ++(define_insn "vec_extractv4qi3_ze" ++ [(set (match_operand:SI 0 "register_operand" "=$d,r") ++ (zero_extend:SI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " 0,r") ++ (parallel [(const_int 3)]))))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "@ ++ srli45\t%0, 24 ++ srli\t%0, %1, 24" ++ [(set_attr "type" "alu,alu") ++ (set_attr "length" " 2, 4")]) ++ ++(define_insn_and_split "vec_extractv4qihi0" ++ [(set (match_operand:HI 0 "register_operand" "=r") ++ (sign_extend:HI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 0)]))))] ++ "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN" ++ "#" ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ [(const_int 1)] ++{ ++ rtx tmp = gen_reg_rtx (QImode); ++ emit_insn (gen_vec_extractv4qi0 (tmp, operands[1])); ++ emit_insn (gen_extendqihi2 (operands[0], tmp)); ++ DONE; ++} ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_insn_and_split "vec_extractv4qihi1" ++ [(set (match_operand:HI 0 "register_operand" "=r") ++ (sign_extend:HI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 1)]))))] ++ "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN" ++ "#" ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ [(const_int 1)] ++{ ++ rtx tmp = gen_reg_rtx (QImode); ++ emit_insn (gen_vec_extractv4qi1 (tmp, operands[1])); ++ emit_insn (gen_extendqihi2 (operands[0], tmp)); ++ DONE; ++} ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_insn_and_split "vec_extractv4qihi2" ++ [(set (match_operand:HI 0 "register_operand" "=r") ++ (sign_extend:HI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 2)]))))] ++ "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN" ++ "#" ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ [(const_int 1)] ++{ ++ rtx tmp = gen_reg_rtx (QImode); ++ emit_insn (gen_vec_extractv4qi2 (tmp, operands[1])); ++ emit_insn (gen_extendqihi2 (operands[0], tmp)); ++ DONE; ++} ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_insn_and_split "vec_extractv4qihi3" ++ [(set (match_operand:HI 0 "register_operand" "=r") ++ (sign_extend:HI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 3)]))))] ++ "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN" ++ "#" ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ [(const_int 1)] ++{ ++ rtx tmp = gen_reg_rtx (QImode); ++ emit_insn (gen_vec_extractv4qi3 (tmp, operands[1])); ++ emit_insn (gen_extendqihi2 (operands[0], tmp)); ++ DONE; ++} ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_expand "vec_extractv2hi" ++ [(set (match_operand:HI 0 "register_operand" "") ++ (vec_select:HI ++ (match_operand:V2HI 1 "nonimmediate_operand" "") ++ (parallel [(match_operand:SI 2 "const_int_operand" "")])))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (INTVAL (operands[2]) != 0 ++ && INTVAL (operands[2]) != 1) ++ gcc_unreachable (); ++ ++ if (INTVAL (operands[2]) != 0 && MEM_P (operands[0])) ++ FAIL; ++}) ++ ++(define_insn "vec_extractv2hi0" ++ [(set (match_operand:HI 0 "register_operand" "=$l,r,r") ++ (vec_select:HI ++ (match_operand:V2HI 1 "nonimmediate_operand" " l,r,m") ++ (parallel [(const_int 0)])))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "seh33\t%0, %1"; ++ case 1: ++ return "seh\t%0, %1"; ++ case 2: ++ return nds32_output_32bit_load_se (operands, 2); ++ ++ default: ++ gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "alu,alu,load") ++ (set_attr "length" " 2, 4, 4")]) ++ ++(define_insn "vec_extractv2hi0_ze" ++ [(set (match_operand:SI 0 "register_operand" "=$l, r,$ l, *r") ++ (zero_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 1 "nonimmediate_operand" " l, r, U33, m") ++ (parallel [(const_int 0)]))))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "zeh33\t%0, %1"; ++ case 1: ++ return "zeh\t%0, %1"; ++ case 2: ++ return nds32_output_16bit_load (operands, 2); ++ case 3: ++ return nds32_output_32bit_load (operands, 2); ++ ++ default: ++ gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "alu,alu,load,load") ++ (set_attr "length" " 2, 4, 2, 4")]) ++ ++(define_insn "vec_extractv2hi0_se" ++ [(set (match_operand:SI 0 "register_operand" "=$l, r, r") ++ (sign_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 1 "nonimmediate_operand" " l,r,m") ++ (parallel [(const_int 0)]))))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "seh33\t%0, %1"; ++ case 1: ++ return "seh\t%0, %1"; ++ case 2: ++ return nds32_output_32bit_load_se (operands, 2); ++ ++ default: ++ gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "alu,alu,load") ++ (set_attr "length" " 2, 4, 4")]) ++ ++(define_insn "vec_extractv2hi0_be" ++ [(set (match_operand:HI 0 "register_operand" "=$d,r") ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " 0,r") ++ (parallel [(const_int 0)])))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "@ ++ srai45\t%0, 16 ++ srai\t%0, %1, 16" ++ [(set_attr "type" "alu,alu") ++ (set_attr "length" " 2, 4")]) ++ ++(define_insn "vec_extractv2hi1" ++ [(set (match_operand:HI 0 "register_operand" "=$d,r") ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " 0,r") ++ (parallel [(const_int 1)])))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "@ ++ srai45\t%0, 16 ++ srai\t%0, %1, 16" ++ [(set_attr "type" "alu,alu") ++ (set_attr "length" " 2, 4")]) ++ ++(define_insn "vec_extractv2hi1_se" ++ [(set (match_operand:SI 0 "register_operand" "=$d,r") ++ (sign_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " 0,r") ++ (parallel [(const_int 1)]))))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "@ ++ srai45\t%0, 16 ++ srai\t%0, %1, 16" ++ [(set_attr "type" "alu,alu") ++ (set_attr "length" " 2, 4")]) ++ ++(define_insn "vec_extractv2hi1_ze" ++ [(set (match_operand:SI 0 "register_operand" "=$d,r") ++ (zero_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " 0,r") ++ (parallel [(const_int 1)]))))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "@ ++ srli45\t%0, 16 ++ srli\t%0, %1, 16" ++ [(set_attr "type" "alu,alu") ++ (set_attr "length" " 2, 4")]) ++ ++(define_insn "vec_extractv2hi1_be" ++ [(set (match_operand:HI 0 "register_operand" "=$l,r,r") ++ (vec_select:HI ++ (match_operand:V2HI 1 "nonimmediate_operand" " l,r,m") ++ (parallel [(const_int 1)])))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "seh33\t%0, %1"; ++ case 1: ++ return "seh\t%0, %1"; ++ case 2: ++ return nds32_output_32bit_load_se (operands, 2); ++ ++ default: ++ gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "alu,alu,load") ++ (set_attr "length" " 2, 4, 4")]) ++ ++(define_insn "<su>mul16" ++ [(set (match_operand:V2SI 0 "register_operand" "=r") ++ (mult:V2SI (extend:V2SI (match_operand:V2HI 1 "register_operand" "%r")) ++ (extend:V2SI (match_operand:V2HI 2 "register_operand" " r"))))] ++ "NDS32_EXT_DSP_P ()" ++ "<su>mul16\t%0, %1, %2" ++ [(set_attr "type" "dmul") ++ (set_attr "length" "4")]) ++ ++(define_insn "<su>mulx16" ++ [(set (match_operand:V2SI 0 "register_operand" "=r") ++ (vec_merge:V2SI ++ (vec_duplicate:V2SI ++ (mult:SI ++ (extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 0)]))) ++ (extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)]))))) ++ (vec_duplicate:V2SI ++ (mult:SI ++ (extend:SI ++ (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 1)]))) ++ (extend:SI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)]))))) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P ()" ++ "<su>mulx16\t%0, %1, %2" ++ [(set_attr "type" "dmul") ++ (set_attr "length" "4")]) ++ ++(define_insn "rotrv2hi_1" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_select:V2HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 1) (const_int 0)])))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "rotri\t%0, %1, 16" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_insn "rotrv2hi_1_be" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_select:V2HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 0) (const_int 1)])))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "rotri\t%0, %1, 16" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_insn "rotrv4qi_1" ++ [(set (match_operand:V4QI 0 "register_operand" "=r") ++ (vec_select:V4QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 1) (const_int 2) (const_int 3) (const_int 0)])))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "rotri\t%0, %1, 8" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_insn "rotrv4qi_1_be" ++ [(set (match_operand:V4QI 0 "register_operand" "=r") ++ (vec_select:V4QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 2) (const_int 1) (const_int 0) (const_int 3)])))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "rotri\t%0, %1, 8" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_insn "rotrv4qi_2" ++ [(set (match_operand:V4QI 0 "register_operand" "=r") ++ (vec_select:V4QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 2) (const_int 3) (const_int 0) (const_int 1)])))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "rotri\t%0, %1, 16" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_insn "rotrv4qi_2_be" ++ [(set (match_operand:V4QI 0 "register_operand" "=r") ++ (vec_select:V4QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 1) (const_int 0) (const_int 3) (const_int 2)])))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "rotri\t%0, %1, 16" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_insn "rotrv4qi_3" ++ [(set (match_operand:V4QI 0 "register_operand" "=r") ++ (vec_select:V4QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 3) (const_int 0) (const_int 1) (const_int 2)])))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "rotri\t%0, %1, 24" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_insn "rotrv4qi_3_be" ++ [(set (match_operand:V4QI 0 "register_operand" "=r") ++ (vec_select:V4QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 0) (const_int 3) (const_int 2) (const_int 1)])))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "rotri\t%0, %1, 24" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_insn "v4qi_dup_10" ++ [(set (match_operand:V4QI 0 "register_operand" "=r") ++ (vec_select:V4QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 0) (const_int 1) (const_int 0) (const_int 1)])))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "pkbb\t%0, %1, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "v4qi_dup_32" ++ [(set (match_operand:V4QI 0 "register_operand" "=r") ++ (vec_select:V4QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 2) (const_int 3) (const_int 2) (const_int 3)])))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "pktt\t%0, %1, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_expand "vec_unpacks_lo_v4qi" ++ [(match_operand:V2HI 0 "register_operand" "=r") ++ (match_operand:V4QI 1 "register_operand" " r")] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++{ ++ emit_insn (gen_sunpkd810 (operands[0], operands[1])); ++ DONE; ++}) ++ ++(define_expand "sunpkd810" ++ [(match_operand:V2HI 0 "register_operand") ++ (match_operand:V4QI 1 "register_operand")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_sunpkd810_imp_be (operands[0], operands[1])); ++ else ++ emit_insn (gen_sunpkd810_imp (operands[0], operands[1])); ++ DONE; ++}) ++ ++(define_insn "<zs>unpkd810_imp" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_dup 1) ++ (parallel [(const_int 0)])))) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "<zs>unpkd810\t%0, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "<zs>unpkd810_imp_inv" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_dup 1) ++ (parallel [(const_int 1)])))) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "<zs>unpkd810\t%0, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "<zs>unpkd810_imp_be" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 2)])))) ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_dup 1) ++ (parallel [(const_int 3)])))) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "<zs>unpkd810\t%0, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "<zs>unpkd810_imp_inv_be" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 3)])))) ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_dup 1) ++ (parallel [(const_int 2)])))) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "<zs>unpkd810\t%0, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_expand "sunpkd820" ++ [(match_operand:V2HI 0 "register_operand") ++ (match_operand:V4QI 1 "register_operand")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_sunpkd820_imp_be (operands[0], operands[1])); ++ else ++ emit_insn (gen_sunpkd820_imp (operands[0], operands[1])); ++ DONE; ++}) ++ ++(define_insn "<zs>unpkd820_imp" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 2)])))) ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_dup 1) ++ (parallel [(const_int 0)])))) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "<zs>unpkd820\t%0, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "<zs>unpkd820_imp_inv" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_dup 1) ++ (parallel [(const_int 2)])))) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "<zs>unpkd820\t%0, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "<zs>unpkd820_imp_be" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_dup 1) ++ (parallel [(const_int 3)])))) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "<zs>unpkd820\t%0, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "<zs>unpkd820_imp_inv_be" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 3)])))) ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_dup 1) ++ (parallel [(const_int 1)])))) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "<zs>unpkd820\t%0, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_expand "sunpkd830" ++ [(match_operand:V2HI 0 "register_operand") ++ (match_operand:V4QI 1 "register_operand")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_sunpkd830_imp_be (operands[0], operands[1])); ++ else ++ emit_insn (gen_sunpkd830_imp (operands[0], operands[1])); ++ DONE; ++}) ++ ++(define_insn "<zs>unpkd830_imp" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 3)])))) ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_dup 1) ++ (parallel [(const_int 0)])))) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "<zs>unpkd830\t%0, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "<zs>unpkd830_imp_inv" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_dup 1) ++ (parallel [(const_int 3)])))) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "<zs>unpkd830\t%0, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "<zs>unpkd830_imp_be" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_dup 1) ++ (parallel [(const_int 3)])))) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "<zs>unpkd830\t%0, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "<zs>unpkd830_imp_inv_be" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 3)])))) ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_dup 1) ++ (parallel [(const_int 0)])))) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "<zs>unpkd830\t%0, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_expand "sunpkd831" ++ [(match_operand:V2HI 0 "register_operand") ++ (match_operand:V4QI 1 "register_operand")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_sunpkd831_imp_be (operands[0], operands[1])); ++ else ++ emit_insn (gen_sunpkd831_imp (operands[0], operands[1])); ++ DONE; ++}) ++ ++(define_insn "<zs>unpkd831_imp" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 3)])))) ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_dup 1) ++ (parallel [(const_int 1)])))) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "<zs>unpkd831\t%0, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "<zs>unpkd831_imp_inv" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_dup 1) ++ (parallel [(const_int 3)])))) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "<zs>unpkd831\t%0, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "<zs>unpkd831_imp_be" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_dup 1) ++ (parallel [(const_int 2)])))) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "<zs>unpkd831\t%0, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "<zs>unpkd831_imp_inv_be" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 2)])))) ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_dup 1) ++ (parallel [(const_int 0)])))) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "<zs>unpkd831\t%0, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_expand "zunpkd810" ++ [(match_operand:V2HI 0 "register_operand") ++ (match_operand:V4QI 1 "register_operand")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_zunpkd810_imp_be (operands[0], operands[1])); ++ else ++ emit_insn (gen_zunpkd810_imp (operands[0], operands[1])); ++ DONE; ++}) ++ ++(define_expand "zunpkd820" ++ [(match_operand:V2HI 0 "register_operand") ++ (match_operand:V4QI 1 "register_operand")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_zunpkd820_imp_be (operands[0], operands[1])); ++ else ++ emit_insn (gen_zunpkd820_imp (operands[0], operands[1])); ++ DONE; ++}) ++ ++(define_expand "zunpkd830" ++ [(match_operand:V2HI 0 "register_operand") ++ (match_operand:V4QI 1 "register_operand")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_zunpkd830_imp_be (operands[0], operands[1])); ++ else ++ emit_insn (gen_zunpkd830_imp (operands[0], operands[1])); ++ DONE; ++}) ++ ++(define_expand "zunpkd831" ++ [(match_operand:V2HI 0 "register_operand") ++ (match_operand:V4QI 1 "register_operand")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_zunpkd831_imp_be (operands[0], operands[1])); ++ else ++ emit_insn (gen_zunpkd831_imp (operands[0], operands[1])); ++ DONE; ++}) ++ ++(define_expand "smbb" ++ [(match_operand:SI 0 "register_operand" "") ++ (match_operand:V2HI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2], ++ GEN_INT (1), GEN_INT (1))); ++ else ++ emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2], ++ GEN_INT (0), GEN_INT (0))); ++ DONE; ++}) ++ ++(define_expand "smbt" ++ [(match_operand:SI 0 "register_operand" "") ++ (match_operand:V2HI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2], ++ GEN_INT (1), GEN_INT (0))); ++ else ++ emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2], ++ GEN_INT (0), GEN_INT (1))); ++ DONE; ++}) ++ ++(define_expand "smtt" ++ [(match_operand:SI 0 "register_operand" "") ++ (match_operand:V2HI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2], ++ GEN_INT (0), GEN_INT (0))); ++ else ++ emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2], ++ GEN_INT (1), GEN_INT (1))); ++ DONE; ++}) ++ ++(define_insn "mulhisi3v" ++ [(set (match_operand:SI 0 "register_operand" "= r, r, r, r") ++ (mult:SI ++ (sign_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r, r, r, r") ++ (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv00, Iv01, Iv01")]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r, r, r, r") ++ (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv01, Iv00")])))))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ { ++ const char *pats[] = { "smtt\t%0, %1, %2", ++ "smbt\t%0, %2, %1", ++ "smbb\t%0, %1, %2", ++ "smbt\t%0, %1, %2" }; ++ return pats[which_alternative]; ++ } ++ else ++ { ++ const char *pats[] = { "smbb\t%0, %1, %2", ++ "smbt\t%0, %1, %2", ++ "smtt\t%0, %1, %2", ++ "smbt\t%0, %2, %1" }; ++ return pats[which_alternative]; ++ } ++} ++ [(set_attr "type" "dmul") ++ (set_attr "length" "4")]) ++ ++(define_expand "kmabb" ++ [(match_operand:SI 0 "register_operand" "") ++ (match_operand:SI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "") ++ (match_operand:V2HI 3 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_kma_internal (operands[0], operands[2], operands[3], ++ GEN_INT (1), GEN_INT (1), ++ operands[1])); ++ else ++ emit_insn (gen_kma_internal (operands[0], operands[2], operands[3], ++ GEN_INT (0), GEN_INT (0), ++ operands[1])); ++ DONE; ++}) ++ ++(define_expand "kmabt" ++ [(match_operand:SI 0 "register_operand" "") ++ (match_operand:SI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "") ++ (match_operand:V2HI 3 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_kma_internal (operands[0], operands[2], operands[3], ++ GEN_INT (1), GEN_INT (0), ++ operands[1])); ++ else ++ emit_insn (gen_kma_internal (operands[0], operands[2], operands[3], ++ GEN_INT (0), GEN_INT (1), ++ operands[1])); ++ DONE; ++}) ++ ++(define_expand "kmatt" ++ [(match_operand:SI 0 "register_operand" "") ++ (match_operand:SI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "") ++ (match_operand:V2HI 3 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_kma_internal (operands[0], operands[2], operands[3], ++ GEN_INT (0), GEN_INT (0), ++ operands[1])); ++ else ++ emit_insn (gen_kma_internal (operands[0], operands[2], operands[3], ++ GEN_INT (1), GEN_INT (1), ++ operands[1])); ++ DONE; ++}) ++ ++(define_insn "kma_internal" ++ [(set (match_operand:SI 0 "register_operand" "= r, r, r, r") ++ (ss_plus:SI ++ (mult:SI ++ (sign_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r, r, r, r") ++ (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv00, Iv01, Iv01")]))) ++ (sign_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r, r, r, r") ++ (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv01, Iv00")])))) ++ (match_operand:SI 5 "register_operand" " 0, 0, 0, 0")))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ { ++ const char *pats[] = { "kmatt\t%0, %1, %2", ++ "kmabt\t%0, %2, %1", ++ "kmabb\t%0, %1, %2", ++ "kmabt\t%0, %1, %2" }; ++ return pats[which_alternative]; ++ } ++ else ++ { ++ const char *pats[] = { "kmabb\t%0, %1, %2", ++ "kmabt\t%0, %1, %2", ++ "kmatt\t%0, %1, %2", ++ "kmabt\t%0, %2, %1" }; ++ return pats[which_alternative]; ++ } ++} ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_expand "smds" ++ [(match_operand:SI 0 "register_operand" "") ++ (match_operand:V2HI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_smds_be (operands[0], operands[1], operands[2])); ++ else ++ emit_insn (gen_smds_le (operands[0], operands[1], operands[2])); ++ DONE; ++}) ++ ++(define_expand "smds_le" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (minus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)]))))))] ++ "NDS32_EXT_DSP_P ()" ++{ ++}) ++ ++(define_expand "smds_be" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (minus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)]))))))] ++ "NDS32_EXT_DSP_P ()" ++{ ++}) ++ ++(define_expand "smdrs" ++ [(match_operand:SI 0 "register_operand" "") ++ (match_operand:V2HI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_smdrs_be (operands[0], operands[1], operands[2])); ++ else ++ emit_insn (gen_smdrs_le (operands[0], operands[1], operands[2])); ++ DONE; ++}) ++ ++(define_expand "smdrs_le" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (minus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)]))))))] ++ "NDS32_EXT_DSP_P ()" ++{ ++}) ++ ++(define_expand "smdrs_be" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (minus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)]))))))] ++ "NDS32_EXT_DSP_P ()" ++{ ++}) ++ ++(define_expand "smxdsv" ++ [(match_operand:SI 0 "register_operand" "") ++ (match_operand:V2HI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_smxdsv_be (operands[0], operands[1], operands[2])); ++ else ++ emit_insn (gen_smxdsv_le (operands[0], operands[1], operands[2])); ++ DONE; ++}) ++ ++ ++(define_expand "smxdsv_le" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (minus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)]))))))] ++ "NDS32_EXT_DSP_P ()" ++{ ++}) ++ ++(define_expand "smxdsv_be" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (minus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)]))))))] ++ "NDS32_EXT_DSP_P ()" ++{ ++}) ++ ++(define_insn "smal1" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (plus:DI (match_operand:DI 1 "register_operand" " r") ++ (sign_extend:DI ++ (mult:SI ++ (sign_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)])))))))] ++ "NDS32_EXT_DSP_P ()" ++ "smal\t%0, %1, %2" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "smal2" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (plus:DI (match_operand:DI 1 "register_operand" " r") ++ (mult:DI ++ (sign_extend:DI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)]))) ++ (sign_extend:DI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)]))))))] ++ "NDS32_EXT_DSP_P ()" ++ "smal\t%0, %1, %2" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "smal3" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (plus:DI (match_operand:DI 1 "register_operand" " r") ++ (sign_extend:DI ++ (mult:SI ++ (sign_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)])))))))] ++ "NDS32_EXT_DSP_P ()" ++ "smal\t%0, %1, %2" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "smal4" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (plus:DI (match_operand:DI 1 "register_operand" " r") ++ (mult:DI ++ (sign_extend:DI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:DI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)]))))))] ++ "NDS32_EXT_DSP_P ()" ++ "smal\t%0, %1, %2" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "smal5" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (plus:DI ++ (sign_extend:DI ++ (mult:SI ++ (sign_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)]))))) ++ (match_operand:DI 1 "register_operand" " r")))] ++ "NDS32_EXT_DSP_P ()" ++ "smal\t%0, %1, %2" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "smal6" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (plus:DI ++ (mult:DI ++ (sign_extend:DI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)]))) ++ (sign_extend:DI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)])))) ++ (match_operand:DI 1 "register_operand" " r")))] ++ "NDS32_EXT_DSP_P ()" ++ "smal\t%0, %1, %2" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "smal7" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (plus:DI ++ (sign_extend:DI ++ (mult:SI ++ (sign_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)]))))) ++ (match_operand:DI 1 "register_operand" " r")))] ++ "NDS32_EXT_DSP_P ()" ++ "smal\t%0, %1, %2" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "smal8" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (plus:DI ++ (mult:DI ++ (sign_extend:DI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:DI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)])))) ++ (match_operand:DI 1 "register_operand" " r")))] ++ "NDS32_EXT_DSP_P ()" ++ "smal\t%0, %1, %2" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++;; We need this dummy pattern for smal ++(define_insn_and_split "extendsidi2" ++ [(set (match_operand:DI 0 "register_operand" "") ++ (sign_extend:DI (match_operand:SI 1 "nds32_move_operand" "")))] ++ "NDS32_EXT_DSP_P ()" ++ "#" ++ "NDS32_EXT_DSP_P ()" ++ [(const_int 0)] ++{ ++ rtx high_part_dst, low_part_dst; ++ ++ low_part_dst = nds32_di_low_part_subreg (operands[0]); ++ high_part_dst = nds32_di_high_part_subreg (operands[0]); ++ ++ emit_move_insn (low_part_dst, operands[1]); ++ emit_insn (gen_ashrsi3 (high_part_dst, low_part_dst, GEN_INT (31))); ++ DONE; ++} ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++;; We need this dummy pattern for usmar64/usmsr64 ++(define_insn_and_split "zero_extendsidi2" ++ [(set (match_operand:DI 0 "register_operand" "") ++ (zero_extend:DI (match_operand:SI 1 "nds32_move_operand" "")))] ++ "NDS32_EXT_DSP_P ()" ++ "#" ++ "NDS32_EXT_DSP_P ()" ++ [(const_int 0)] ++{ ++ rtx high_part_dst, low_part_dst; ++ ++ low_part_dst = nds32_di_low_part_subreg (operands[0]); ++ high_part_dst = nds32_di_high_part_subreg (operands[0]); ++ ++ emit_move_insn (low_part_dst, operands[1]); ++ emit_move_insn (high_part_dst, const0_rtx); ++ DONE; ++} ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_insn_and_split "extendhidi2" ++ [(set (match_operand:DI 0 "register_operand" "") ++ (sign_extend:DI (match_operand:HI 1 "nonimmediate_operand" "")))] ++ "NDS32_EXT_DSP_P ()" ++ "#" ++ "NDS32_EXT_DSP_P ()" ++ [(const_int 0)] ++{ ++ rtx high_part_dst, low_part_dst; ++ ++ low_part_dst = nds32_di_low_part_subreg (operands[0]); ++ high_part_dst = nds32_di_high_part_subreg (operands[0]); ++ ++ ++ emit_insn (gen_extendhisi2 (low_part_dst, operands[1])); ++ emit_insn (gen_ashrsi3 (high_part_dst, low_part_dst, GEN_INT (31))); ++ DONE; ++} ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_insn "extendqihi2" ++ [(set (match_operand:HI 0 "register_operand" "=r") ++ (sign_extend:HI (match_operand:QI 1 "register_operand" " r")))] ++ "NDS32_EXT_DSP_P ()" ++ "sunpkd820\t%0, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "smulsi3_highpart" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (truncate:SI ++ (lshiftrt:DI ++ (mult:DI ++ (sign_extend:DI (match_operand:SI 1 "register_operand" " r")) ++ (sign_extend:DI (match_operand:SI 2 "register_operand" " r"))) ++ (const_int 32))))] ++ "NDS32_EXT_DSP_P ()" ++ "smmul\t%0, %1, %2" ++ [(set_attr "type" "dmul") ++ (set_attr "length" "4")]) ++ ++(define_insn "smmul_round" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (truncate:SI ++ (lshiftrt:DI ++ (unspec:DI [(mult:DI ++ (sign_extend:DI (match_operand:SI 1 "register_operand" " r")) ++ (sign_extend:DI (match_operand:SI 2 "register_operand" " r")))] ++ UNSPEC_ROUND) ++ (const_int 32))))] ++ "NDS32_EXT_DSP_P ()" ++ "smmul.u\t%0, %1, %2" ++ [(set_attr "type" "dmul") ++ (set_attr "length" "4")]) ++ ++(define_insn "kmmac" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ss_plus:SI (match_operand:SI 1 "register_operand" " 0") ++ (truncate:SI ++ (lshiftrt:DI ++ (mult:DI ++ (sign_extend:DI (match_operand:SI 2 "register_operand" " r")) ++ (sign_extend:DI (match_operand:SI 3 "register_operand" " r"))) ++ (const_int 32)))))] ++ "NDS32_EXT_DSP_P ()" ++ "kmmac\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "kmmac_round" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ss_plus:SI (match_operand:SI 1 "register_operand" " 0") ++ (truncate:SI ++ (lshiftrt:DI ++ (unspec:DI [(mult:DI ++ (sign_extend:DI (match_operand:SI 2 "register_operand" " r")) ++ (sign_extend:DI (match_operand:SI 3 "register_operand" " r")))] ++ UNSPEC_ROUND) ++ (const_int 32)))))] ++ "NDS32_EXT_DSP_P ()" ++ "kmmac.u\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "kmmsb" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ss_minus:SI (match_operand:SI 1 "register_operand" " 0") ++ (truncate:SI ++ (lshiftrt:DI ++ (mult:DI ++ (sign_extend:DI (match_operand:SI 2 "register_operand" " r")) ++ (sign_extend:DI (match_operand:SI 3 "register_operand" " r"))) ++ (const_int 32)))))] ++ "NDS32_EXT_DSP_P ()" ++ "kmmsb\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "kmmsb_round" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ss_minus:SI (match_operand:SI 1 "register_operand" " 0") ++ (truncate:SI ++ (lshiftrt:DI ++ (unspec:DI [(mult:DI ++ (sign_extend:DI (match_operand:SI 2 "register_operand" " r")) ++ (sign_extend:DI (match_operand:SI 3 "register_operand" " r")))] ++ UNSPEC_ROUND) ++ (const_int 32)))))] ++ "NDS32_EXT_DSP_P ()" ++ "kmmsb.u\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "kwmmul" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (truncate:SI ++ (lshiftrt:DI ++ (ss_mult:DI ++ (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" " r")) (const_int 2)) ++ (mult:DI (sign_extend:DI (match_operand:SI 2 "register_operand" " r")) (const_int 2))) ++ (const_int 32))))] ++ "NDS32_EXT_DSP_P ()" ++ "kwmmul\t%0, %1, %2" ++ [(set_attr "type" "dmul") ++ (set_attr "length" "4")]) ++ ++(define_insn "kwmmul_round" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (truncate:SI ++ (lshiftrt:DI ++ (unspec:DI [ ++ (ss_mult:DI ++ (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" " r")) (const_int 2)) ++ (mult:DI (sign_extend:DI (match_operand:SI 2 "register_operand" " r")) (const_int 2)))] ++ UNSPEC_ROUND) ++ (const_int 32))))] ++ "NDS32_EXT_DSP_P ()" ++ "kwmmul.u\t%0, %1, %2" ++ [(set_attr "type" "dmul") ++ (set_attr "length" "4")]) ++ ++(define_expand "smmwb" ++ [(match_operand:SI 0 "register_operand" "") ++ (match_operand:SI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_smulhisi3_highpart_1 (operands[0], operands[1], operands[2], GEN_INT (1))); ++ else ++ emit_insn (gen_smulhisi3_highpart_1 (operands[0], operands[1], operands[2], GEN_INT (0))); ++ DONE; ++}) ++ ++(define_expand "smmwt" ++ [(match_operand:SI 0 "register_operand" "") ++ (match_operand:SI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_smulhisi3_highpart_1 (operands[0], operands[1], operands[2], GEN_INT (0))); ++ else ++ emit_insn (gen_smulhisi3_highpart_1 (operands[0], operands[1], operands[2], GEN_INT (1))); ++ DONE; ++}) ++ ++(define_insn "smulhisi3_highpart_1" ++ [(set (match_operand:SI 0 "register_operand" "= r, r") ++ (truncate:SI ++ (lshiftrt:DI ++ (mult:DI ++ (sign_extend:DI (match_operand:SI 1 "register_operand" " r, r")) ++ (sign_extend:DI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r, r") ++ (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")])))) ++ (const_int 16))))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ { ++ const char *pats[] = { "smmwt\t%0, %1, %2", ++ "smmwb\t%0, %1, %2" }; ++ return pats[which_alternative]; ++ } ++ else ++ { ++ const char *pats[] = { "smmwb\t%0, %1, %2", ++ "smmwt\t%0, %1, %2" }; ++ return pats[which_alternative]; ++ } ++} ++ [(set_attr "type" "dmul") ++ (set_attr "length" "4")]) ++ ++(define_insn "smulhisi3_highpart_2" ++ [(set (match_operand:SI 0 "register_operand" "= r, r") ++ (truncate:SI ++ (lshiftrt:DI ++ (mult:DI ++ (sign_extend:DI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r, r") ++ (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")]))) ++ (sign_extend:DI (match_operand:SI 2 "register_operand" " r, r"))) ++ (const_int 16))))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ { ++ const char *pats[] = { "smmwt\t%0, %1, %2", ++ "smmwb\t%0, %1, %2" }; ++ return pats[which_alternative]; ++ } ++ else ++ { ++ const char *pats[] = { "smmwb\t%0, %1, %2", ++ "smmwt\t%0, %1, %2" }; ++ return pats[which_alternative]; ++ } ++} ++ [(set_attr "type" "dmul") ++ (set_attr "length" "4")]) ++ ++(define_expand "smmwb_round" ++ [(match_operand:SI 0 "register_operand" "") ++ (match_operand:SI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_smmw_round_internal (operands[0], operands[1], operands[2], GEN_INT (1))); ++ else ++ emit_insn (gen_smmw_round_internal (operands[0], operands[1], operands[2], GEN_INT (0))); ++ DONE; ++}) ++ ++(define_expand "smmwt_round" ++ [(match_operand:SI 0 "register_operand" "") ++ (match_operand:SI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_smmw_round_internal (operands[0], operands[1], operands[2], GEN_INT (0))); ++ else ++ emit_insn (gen_smmw_round_internal (operands[0], operands[1], operands[2], GEN_INT (1))); ++ DONE; ++}) ++ ++(define_insn "smmw_round_internal" ++ [(set (match_operand:SI 0 "register_operand" "= r, r") ++ (truncate:SI ++ (lshiftrt:DI ++ (unspec:DI ++ [(mult:DI ++ (sign_extend:DI (match_operand:SI 1 "register_operand" " r, r")) ++ (sign_extend:DI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r, r") ++ (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")]))))] ++ UNSPEC_ROUND) ++ (const_int 16))))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ { ++ const char *pats[] = { "smmwt.u\t%0, %1, %2", ++ "smmwb.u\t%0, %1, %2" }; ++ return pats[which_alternative]; ++ } ++ else ++ { ++ const char *pats[] = { "smmwb.u\t%0, %1, %2", ++ "smmwt.u\t%0, %1, %2" }; ++ return pats[which_alternative]; ++ } ++} ++ [(set_attr "type" "dmul") ++ (set_attr "length" "4")]) ++ ++(define_expand "kmmawb" ++ [(match_operand:SI 0 "register_operand" "") ++ (match_operand:SI 1 "register_operand" "") ++ (match_operand:SI 2 "register_operand" "") ++ (match_operand:V2HI 3 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_kmmaw_internal (operands[0], operands[2], operands[3], GEN_INT (1), operands[1])); ++ else ++ emit_insn (gen_kmmaw_internal (operands[0], operands[2], operands[3], GEN_INT (0), operands[1])); ++ DONE; ++}) ++ ++(define_expand "kmmawt" ++ [(match_operand:SI 0 "register_operand" "") ++ (match_operand:SI 1 "register_operand" "") ++ (match_operand:SI 2 "register_operand" "") ++ (match_operand:V2HI 3 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_kmmaw_internal (operands[0], operands[2], operands[3], GEN_INT (0), operands[1])); ++ else ++ emit_insn (gen_kmmaw_internal (operands[0], operands[2], operands[3], GEN_INT (1), operands[1])); ++ DONE; ++}) ++ ++(define_insn "kmmaw_internal" ++ [(set (match_operand:SI 0 "register_operand" "= r, r") ++ (ss_plus:SI ++ (match_operand:SI 4 "register_operand" " 0, 0") ++ (truncate:SI ++ (lshiftrt:DI ++ (mult:DI ++ (sign_extend:DI (match_operand:SI 1 "register_operand" " r, r")) ++ (sign_extend:DI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r, r") ++ (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")])))) ++ (const_int 16)))))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ { ++ const char *pats[] = { "kmmawt\t%0, %1, %2", ++ "kmmawb\t%0, %1, %2" }; ++ return pats[which_alternative]; ++ } ++ else ++ { ++ const char *pats[] = { "kmmawb\t%0, %1, %2", ++ "kmmawt\t%0, %1, %2" }; ++ return pats[which_alternative]; ++ } ++} ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_expand "kmmawb_round" ++ [(match_operand:SI 0 "register_operand" "") ++ (match_operand:SI 1 "register_operand" "") ++ (match_operand:SI 2 "register_operand" "") ++ (match_operand:V2HI 3 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_kmmaw_round_internal (operands[0], operands[2], operands[3], GEN_INT (1), operands[1])); ++ else ++ emit_insn (gen_kmmaw_round_internal (operands[0], operands[2], operands[3], GEN_INT (0), operands[1])); ++ DONE; ++} ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_expand "kmmawt_round" ++ [(match_operand:SI 0 "register_operand" "") ++ (match_operand:SI 1 "register_operand" "") ++ (match_operand:SI 2 "register_operand" "") ++ (match_operand:V2HI 3 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_kmmaw_round_internal (operands[0], operands[2], operands[3], GEN_INT (0), operands[1])); ++ else ++ emit_insn (gen_kmmaw_round_internal (operands[0], operands[2], operands[3], GEN_INT (1), operands[1])); ++ DONE; ++} ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++ ++(define_insn "kmmaw_round_internal" ++ [(set (match_operand:SI 0 "register_operand" "= r, r") ++ (ss_plus:SI ++ (match_operand:SI 4 "register_operand" " 0, 0") ++ (truncate:SI ++ (lshiftrt:DI ++ (unspec:DI ++ [(mult:DI ++ (sign_extend:DI (match_operand:SI 1 "register_operand" " r, r")) ++ (sign_extend:DI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r, r") ++ (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")]))))] ++ UNSPEC_ROUND) ++ (const_int 16)))))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ { ++ const char *pats[] = { "kmmawt.u\t%0, %1, %2", ++ "kmmawb.u\t%0, %1, %2" }; ++ return pats[which_alternative]; ++ } ++ else ++ { ++ const char *pats[] = { "kmmawb.u\t%0, %1, %2", ++ "kmmawt.u\t%0, %1, %2" }; ++ return pats[which_alternative]; ++ } ++} ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_expand "smalbb" ++ [(match_operand:DI 0 "register_operand" "") ++ (match_operand:DI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "") ++ (match_operand:V2HI 3 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_smaddhidi (operands[0], operands[2], ++ operands[3], operands[1], ++ GEN_INT (1), GEN_INT (1))); ++ else ++ emit_insn (gen_smaddhidi (operands[0], operands[2], ++ operands[3], operands[1], ++ GEN_INT (0), GEN_INT (0))); ++ DONE; ++}) ++ ++(define_expand "smalbt" ++ [(match_operand:DI 0 "register_operand" "") ++ (match_operand:DI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "") ++ (match_operand:V2HI 3 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_smaddhidi (operands[0], operands[2], ++ operands[3], operands[1], ++ GEN_INT (1), GEN_INT (0))); ++ else ++ emit_insn (gen_smaddhidi (operands[0], operands[2], ++ operands[3], operands[1], ++ GEN_INT (0), GEN_INT (1))); ++ DONE; ++}) ++ ++(define_expand "smaltt" ++ [(match_operand:DI 0 "register_operand" "") ++ (match_operand:DI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "") ++ (match_operand:V2HI 3 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_smaddhidi (operands[0], operands[2], ++ operands[3], operands[1], ++ GEN_INT (0), GEN_INT (0))); ++ else ++ emit_insn (gen_smaddhidi (operands[0], operands[2], ++ operands[3], operands[1], ++ GEN_INT (1), GEN_INT (1))); ++ DONE; ++}) ++ ++(define_insn "smaddhidi" ++ [(set (match_operand:DI 0 "register_operand" "= r, r, r, r") ++ (plus:DI ++ (match_operand:DI 3 "register_operand" " 0, 0, 0, 0") ++ (mult:DI ++ (sign_extend:DI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r, r, r, r") ++ (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv00, Iv01, Iv01")]))) ++ (sign_extend:DI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r, r, r, r") ++ (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv01, Iv00")]))))))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ { ++ const char *pats[] = { "smaltt\t%0, %1, %2", ++ "smalbt\t%0, %2, %1", ++ "smalbb\t%0, %1, %2", ++ "smalbt\t%0, %1, %2" }; ++ return pats[which_alternative]; ++ } ++ else ++ { ++ const char *pats[] = { "smalbb\t%0, %1, %2", ++ "smalbt\t%0, %1, %2", ++ "smaltt\t%0, %1, %2", ++ "smalbt\t%0, %2, %1" }; ++ return pats[which_alternative]; ++ } ++} ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "smaddhidi2" ++ [(set (match_operand:DI 0 "register_operand" "= r, r, r, r") ++ (plus:DI ++ (mult:DI ++ (sign_extend:DI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r, r, r, r") ++ (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv00, Iv01, Iv01")]))) ++ (sign_extend:DI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r, r, r, r") ++ (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv01, Iv00")])))) ++ (match_operand:DI 3 "register_operand" " 0, 0, 0, 0")))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ { ++ const char *pats[] = { "smaltt\t%0, %1, %2", ++ "smalbt\t%0, %2, %1", ++ "smalbb\t%0, %1, %2", ++ "smalbt\t%0, %1, %2" }; ++ return pats[which_alternative]; ++ } ++ else ++ { ++ const char *pats[] = { "smalbb\t%0, %1, %2", ++ "smalbt\t%0, %1, %2", ++ "smaltt\t%0, %1, %2", ++ "smalbt\t%0, %2, %1" }; ++ return pats[which_alternative]; ++ } ++} ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_expand "smalda1" ++ [(match_operand:DI 0 "register_operand" "") ++ (match_operand:DI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" " r") ++ (match_operand:V2HI 3 "register_operand" " r")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_smalda1_be (operands[0], operands[1], operands[2], operands[3])); ++ else ++ emit_insn (gen_smalda1_le (operands[0], operands[1], operands[2], operands[3])); ++ DONE; ++}) ++ ++(define_expand "smalds1" ++ [(match_operand:DI 0 "register_operand" "") ++ (match_operand:DI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" " r") ++ (match_operand:V2HI 3 "register_operand" " r")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_smalds1_be (operands[0], operands[1], operands[2], operands[3])); ++ else ++ emit_insn (gen_smalds1_le (operands[0], operands[1], operands[2], operands[3])); ++ DONE; ++}) ++ ++(define_insn "smalda1_le" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (plus:DI ++ (match_operand:DI 1 "register_operand" " 0") ++ (sign_extend:DI ++ (plus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 3 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 3) ++ (parallel [(const_int 0)]))))))))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "smalda\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "smalds1_le" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (plus:DI ++ (match_operand:DI 1 "register_operand" " 0") ++ (sign_extend:DI ++ (minus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 3 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 3) ++ (parallel [(const_int 0)]))))))))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "smalds\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "smalda1_be" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (plus:DI ++ (match_operand:DI 1 "register_operand" " 0") ++ (sign_extend:DI ++ (plus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 3 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 3) ++ (parallel [(const_int 1)]))))))))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "smalda\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "smalds1_be" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (plus:DI ++ (match_operand:DI 1 "register_operand" " 0") ++ (sign_extend:DI ++ (minus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 3 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 3) ++ (parallel [(const_int 1)]))))))))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "smalds\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_expand "smaldrs3" ++ [(match_operand:DI 0 "register_operand" "") ++ (match_operand:DI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" " r") ++ (match_operand:V2HI 3 "register_operand" " r")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_smaldrs3_be (operands[0], operands[1], operands[2], operands[3])); ++ else ++ emit_insn (gen_smaldrs3_le (operands[0], operands[1], operands[2], operands[3])); ++ DONE; ++}) ++ ++(define_insn "smaldrs3_le" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (plus:DI ++ (match_operand:DI 1 "register_operand" " 0") ++ (sign_extend:DI ++ (minus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 3 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 3) ++ (parallel [(const_int 1)]))))))))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "smaldrs\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "smaldrs3_be" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (plus:DI ++ (match_operand:DI 1 "register_operand" " 0") ++ (sign_extend:DI ++ (minus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 3 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 3) ++ (parallel [(const_int 0)]))))))))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "smaldrs\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_expand "smalxda1" ++ [(match_operand:DI 0 "register_operand" "") ++ (match_operand:DI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" " r") ++ (match_operand:V2HI 3 "register_operand" " r")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_smalxda1_be (operands[0], operands[1], operands[2], operands[3])); ++ else ++ emit_insn (gen_smalxda1_le (operands[0], operands[1], operands[2], operands[3])); ++ DONE; ++}) ++ ++(define_expand "smalxds1" ++ [(match_operand:DI 0 "register_operand" "") ++ (match_operand:DI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" " r") ++ (match_operand:V2HI 3 "register_operand" " r")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_smalxds1_be (operands[0], operands[1], operands[2], operands[3])); ++ else ++ emit_insn (gen_smalxds1_le (operands[0], operands[1], operands[2], operands[3])); ++ DONE; ++}) ++ ++(define_insn "smalxd<add_sub>1_le" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (plus:DI ++ (match_operand:DI 1 "register_operand" " 0") ++ (sign_extend:DI ++ (plus_minus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 3 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 3) ++ (parallel [(const_int 1)]))))))))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "smalxd<add_sub>\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++ ++(define_insn "smalxd<add_sub>1_be" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (plus:DI ++ (match_operand:DI 1 "register_operand" " 0") ++ (sign_extend:DI ++ (plus_minus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 3 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 3) ++ (parallel [(const_int 0)]))))))))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "smalxd<add_sub>\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "smslda1" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (minus:DI ++ (minus:DI ++ (match_operand:DI 1 "register_operand" " 0") ++ (sign_extend:DI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 3 "register_operand" " r") ++ (parallel [(const_int 1)])))))) ++ (sign_extend:DI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 3) ++ (parallel [(const_int 0)])))))))] ++ "NDS32_EXT_DSP_P ()" ++ "smslda\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "smslxda1" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (minus:DI ++ (minus:DI ++ (match_operand:DI 1 "register_operand" " 0") ++ (sign_extend:DI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 3 "register_operand" " r") ++ (parallel [(const_int 0)])))))) ++ (sign_extend:DI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 3) ++ (parallel [(const_int 1)])))))))] ++ "NDS32_EXT_DSP_P ()" ++ "smslxda\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++;; mada for synthetize smalda ++(define_insn_and_split "mada1" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (plus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" "r") ++ (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iu01")]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" "r") ++ (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iu01")])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 1) ++ (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iu01")]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(match_operand:SI 6 "nds32_imm_0_1_operand" " Iu01")]))))))] ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ "#" ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ [(const_int 1)] ++{ ++ rtx result0 = gen_reg_rtx (SImode); ++ rtx result1 = gen_reg_rtx (SImode); ++ emit_insn (gen_mulhisi3v (result0, operands[1], operands[2], ++ operands[3], operands[4])); ++ emit_insn (gen_mulhisi3v (result1, operands[1], operands[2], ++ operands[5], operands[6])); ++ emit_insn (gen_addsi3 (operands[0], result0, result1)); ++ DONE; ++}) ++ ++(define_insn_and_split "mada2" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (plus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" "r") ++ (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iu01")]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" "r") ++ (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iu01")])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iu01")]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 1) ++ (parallel [(match_operand:SI 6 "nds32_imm_0_1_operand" " Iu01")]))))))] ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ "#" ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ [(const_int 1)] ++{ ++ rtx result0 = gen_reg_rtx (SImode); ++ rtx result1 = gen_reg_rtx (SImode); ++ emit_insn (gen_mulhisi3v (result0, operands[1], operands[2], ++ operands[3], operands[4])); ++ emit_insn (gen_mulhisi3v (result1, operands[1], operands[2], ++ operands[6], operands[5])); ++ emit_insn (gen_addsi3 (operands[0], result0, result1)); ++ DONE; ++}) ++ ++;; sms for synthetize smalds ++(define_insn_and_split "sms1" ++ [(set (match_operand:SI 0 "register_operand" "= r") ++ (minus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iu01")]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iu01")])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 1) ++ (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iu01")]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(match_operand:SI 6 "nds32_imm_0_1_operand" " Iu01")]))))))] ++ "NDS32_EXT_DSP_P () ++ && (!reload_completed ++ || !nds32_need_split_sms_p (operands[3], operands[4], ++ operands[5], operands[6]))" ++ ++{ ++ return nds32_output_sms (operands[3], operands[4], ++ operands[5], operands[6]); ++} ++ "NDS32_EXT_DSP_P () ++ && !reload_completed ++ && nds32_need_split_sms_p (operands[3], operands[4], ++ operands[5], operands[6])" ++ [(const_int 1)] ++{ ++ nds32_split_sms (operands[0], operands[1], operands[2], ++ operands[3], operands[4], ++ operands[5], operands[6]); ++ DONE; ++} ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn_and_split "sms2" ++ [(set (match_operand:SI 0 "register_operand" "= r") ++ (minus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iu01")]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iu01")])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iu01")]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 1) ++ (parallel [(match_operand:SI 6 "nds32_imm_0_1_operand" " Iu01")]))))))] ++ "NDS32_EXT_DSP_P () ++ && (!reload_completed ++ || !nds32_need_split_sms_p (operands[3], operands[4], ++ operands[6], operands[5]))" ++{ ++ return nds32_output_sms (operands[3], operands[4], ++ operands[6], operands[5]); ++} ++ "NDS32_EXT_DSP_P () ++ && !reload_completed ++ && nds32_need_split_sms_p (operands[3], operands[4], ++ operands[6], operands[5])" ++ [(const_int 1)] ++{ ++ nds32_split_sms (operands[0], operands[1], operands[2], ++ operands[3], operands[4], ++ operands[6], operands[5]); ++ DONE; ++} ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "kmda" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ss_plus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" "r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" "r") ++ (parallel [(const_int 1)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)]))))))] ++ "NDS32_EXT_DSP_P ()" ++ "kmda\t%0, %1, %2" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "kmxda" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ss_plus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" "r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" "r") ++ (parallel [(const_int 0)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)]))))))] ++ "NDS32_EXT_DSP_P ()" ++ "kmxda\t%0, %1, %2" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "kmada" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ss_plus:SI ++ (match_operand:SI 1 "register_operand" " 0") ++ (ss_plus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 3 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 3) ++ (parallel [(const_int 0)])))))))] ++ "NDS32_EXT_DSP_P ()" ++ "kmada\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "kmada2" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ss_plus:SI ++ (match_operand:SI 1 "register_operand" " 0") ++ (ss_plus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 3 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 3) ++ (parallel [(const_int 1)])))))))] ++ "NDS32_EXT_DSP_P ()" ++ "kmada\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "kmaxda" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ss_plus:SI ++ (match_operand:SI 1 "register_operand" " 0") ++ (ss_plus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 3 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 3) ++ (parallel [(const_int 1)])))))))] ++ "NDS32_EXT_DSP_P ()" ++ "kmaxda\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "kmads" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ss_plus:SI ++ (match_operand:SI 1 "register_operand" " 0") ++ (ss_minus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 3 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 3) ++ (parallel [(const_int 0)])))))))] ++ "NDS32_EXT_DSP_P ()" ++ "kmads\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "kmadrs" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ss_plus:SI ++ (match_operand:SI 1 "register_operand" " 0") ++ (ss_minus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 3 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 3) ++ (parallel [(const_int 1)])))))))] ++ "NDS32_EXT_DSP_P ()" ++ "kmadrs\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "kmaxds" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ss_plus:SI ++ (match_operand:SI 1 "register_operand" " 0") ++ (ss_minus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 3 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 3) ++ (parallel [(const_int 1)])))))))] ++ "NDS32_EXT_DSP_P ()" ++ "kmaxds\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "kmsda" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ss_minus:SI ++ (match_operand:SI 1 "register_operand" " 0") ++ (ss_minus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 3 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 3) ++ (parallel [(const_int 0)])))))))] ++ "NDS32_EXT_DSP_P ()" ++ "kmsda\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "kmsxda" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ss_minus:SI ++ (match_operand:SI 1 "register_operand" " 0") ++ (ss_minus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 3 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 3) ++ (parallel [(const_int 1)])))))))] ++ "NDS32_EXT_DSP_P ()" ++ "kmsxda\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++;; smax[8|16] and umax[8|16] ++(define_insn "<opcode><mode>3" ++ [(set (match_operand:VQIHI 0 "register_operand" "=r") ++ (sumax:VQIHI (match_operand:VQIHI 1 "register_operand" " r") ++ (match_operand:VQIHI 2 "register_operand" " r")))] ++ "NDS32_EXT_DSP_P ()" ++ "<opcode><bits>\t%0, %1, %2" ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4")]) ++ ++;; smin[8|16] and umin[8|16] ++(define_insn "<opcode><mode>3" ++ [(set (match_operand:VQIHI 0 "register_operand" "=r") ++ (sumin:VQIHI (match_operand:VQIHI 1 "register_operand" " r") ++ (match_operand:VQIHI 2 "register_operand" " r")))] ++ "NDS32_EXT_DSP_P ()" ++ "<opcode><bits>\t%0, %1, %2" ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4")]) ++ ++(define_insn "<opcode><mode>3_bb" ++ [(set (match_operand:<VELT> 0 "register_operand" "=r") ++ (sumin_max:<VELT> (vec_select:<VELT> ++ (match_operand:VQIHI 1 "register_operand" " r") ++ (parallel [(const_int 0)])) ++ (vec_select:<VELT> ++ (match_operand:VQIHI 2 "register_operand" " r") ++ (parallel [(const_int 0)]))))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "<opcode><bits>\t%0, %1, %2" ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4")]) ++ ++(define_insn_and_split "<opcode><mode>3_tt" ++ [(set (match_operand:<VELT> 0 "register_operand" "=r") ++ (sumin_max:<VELT> (vec_select:<VELT> ++ (match_operand:VQIHI 1 "register_operand" " r") ++ (parallel [(const_int 1)])) ++ (vec_select:<VELT> ++ (match_operand:VQIHI 2 "register_operand" " r") ++ (parallel [(const_int 1)]))))] ++ "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN" ++ "#" ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ [(const_int 0)] ++{ ++ rtx tmp = gen_reg_rtx (<MODE>mode); ++ emit_insn (gen_<opcode><mode>3 (tmp, operands[1], operands[2])); ++ emit_insn (gen_rotr<mode>_1 (tmp, tmp)); ++ emit_move_insn (operands[0], simplify_gen_subreg (<VELT>mode, tmp, <MODE>mode, 0)); ++ DONE; ++} ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4")]) ++ ++(define_insn_and_split "<opcode>v4qi3_22" ++ [(set (match_operand:QI 0 "register_operand" "=r") ++ (sumin_max:QI (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 2)])) ++ (vec_select:QI ++ (match_operand:V4QI 2 "register_operand" " r") ++ (parallel [(const_int 2)]))))] ++ "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN" ++ "#" ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ [(const_int 0)] ++{ ++ rtx tmp = gen_reg_rtx (V4QImode); ++ emit_insn (gen_<opcode>v4qi3 (tmp, operands[1], operands[2])); ++ emit_insn (gen_rotrv4qi_2 (tmp, tmp)); ++ emit_move_insn (operands[0], simplify_gen_subreg (QImode, tmp, V4QImode, 0)); ++ DONE; ++} ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4")]) ++ ++(define_insn_and_split "<opcode>v4qi3_33" ++ [(set (match_operand:QI 0 "register_operand" "=r") ++ (sumin_max:QI (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 3)])) ++ (vec_select:QI ++ (match_operand:V4QI 2 "register_operand" " r") ++ (parallel [(const_int 3)]))))] ++ "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN" ++ "#" ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ [(const_int 0)] ++{ ++ rtx tmp = gen_reg_rtx (V4QImode); ++ emit_insn (gen_<opcode>v4qi3 (tmp, operands[1], operands[2])); ++ emit_insn (gen_rotrv4qi_3 (tmp, tmp)); ++ emit_move_insn (operands[0], simplify_gen_subreg (QImode, tmp, V4QImode, 0)); ++ DONE; ++} ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4")]) ++ ++(define_insn_and_split "<opcode>v2hi3_bbtt" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (sumin_max:HI (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 1)])) ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (vec_duplicate:V2HI ++ (sumin_max:HI (vec_select:HI ++ (match_dup:V2HI 1) ++ (parallel [(const_int 0)])) ++ (vec_select:HI ++ (match_dup:HI 2) ++ (parallel [(const_int 0)])))) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "#" ++ "NDS32_EXT_DSP_P ()" ++ [(const_int 0)] ++{ ++ emit_insn (gen_<opcode>v2hi3 (operands[0], operands[1], operands[2])); ++ DONE; ++} ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4")]) ++ ++(define_expand "abs<mode>2" ++ [(set (match_operand:VQIHI 0 "register_operand" "=r") ++ (ss_abs:VQIHI (match_operand:VQIHI 1 "register_operand" " r")))] ++ "NDS32_EXT_DSP_P () && TARGET_HW_ABS && !flag_wrapv" ++{ ++}) ++ ++(define_insn "kabs<mode>2" ++ [(set (match_operand:VQIHI 0 "register_operand" "=r") ++ (ss_abs:VQIHI (match_operand:VQIHI 1 "register_operand" " r")))] ++ "NDS32_EXT_DSP_P ()" ++ "kabs<bits>\t%0, %1" ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4")]) ++ ++(define_insn "<su>mar64_1" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (plus:DI ++ (match_operand:DI 1 "register_operand" " 0") ++ (mult:DI ++ (extend:DI ++ (match_operand:SI 2 "register_operand" " r")) ++ (extend:DI ++ (match_operand:SI 3 "register_operand" " r")))))] ++ "NDS32_EXT_DSP_P ()" ++ "<su>mar64\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "<su>mar64_2" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (plus:DI ++ (mult:DI ++ (extend:DI ++ (match_operand:SI 2 "register_operand" " r")) ++ (extend:DI ++ (match_operand:SI 3 "register_operand" " r"))) ++ (match_operand:DI 1 "register_operand" " 0")))] ++ "NDS32_EXT_DSP_P ()" ++ "<su>mar64\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "<su>mar64_3" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (plus:DI ++ (match_operand:DI 1 "register_operand" " 0") ++ (extend:DI ++ (mult:SI ++ (match_operand:SI 2 "register_operand" " r") ++ (match_operand:SI 3 "register_operand" " r")))))] ++ "NDS32_EXT_DSP_P ()" ++ "<su>mar64\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "<su>mar64_4" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (plus:DI ++ (extend:DI ++ (mult:SI ++ (match_operand:SI 2 "register_operand" " r") ++ (match_operand:SI 3 "register_operand" " r"))) ++ (match_operand:DI 1 "register_operand" " 0")))] ++ "NDS32_EXT_DSP_P ()" ++ "<su>mar64\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "<su>msr64" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (minus:DI ++ (match_operand:DI 1 "register_operand" " 0") ++ (mult:DI ++ (extend:DI ++ (match_operand:SI 2 "register_operand" " r")) ++ (extend:DI ++ (match_operand:SI 3 "register_operand" " r")))))] ++ "NDS32_EXT_DSP_P ()" ++ "<su>msr64\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "<su>msr64_2" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (minus:DI ++ (match_operand:DI 1 "register_operand" " 0") ++ (extend:DI ++ (mult:SI ++ (match_operand:SI 2 "register_operand" " r") ++ (match_operand:SI 3 "register_operand" " r")))))] ++ "NDS32_EXT_DSP_P ()" ++ "<su>msr64\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++;; kmar64, kmsr64, ukmar64 and ukmsr64 ++(define_insn "kmar64_1" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (ss_plus:DI ++ (match_operand:DI 1 "register_operand" " 0") ++ (mult:DI ++ (sign_extend:DI ++ (match_operand:SI 2 "register_operand" " r")) ++ (sign_extend:DI ++ (match_operand:SI 3 "register_operand" " r")))))] ++ "NDS32_EXT_DSP_P ()" ++ "kmar64\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "kmar64_2" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (ss_plus:DI ++ (mult:DI ++ (sign_extend:DI ++ (match_operand:SI 2 "register_operand" " r")) ++ (sign_extend:DI ++ (match_operand:SI 3 "register_operand" " r"))) ++ (match_operand:DI 1 "register_operand" " 0")))] ++ "NDS32_EXT_DSP_P ()" ++ "kmar64\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "kmsr64" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (ss_minus:DI ++ (match_operand:DI 1 "register_operand" " 0") ++ (mult:DI ++ (sign_extend:DI ++ (match_operand:SI 2 "register_operand" " r")) ++ (sign_extend:DI ++ (match_operand:SI 3 "register_operand" " r")))))] ++ "NDS32_EXT_DSP_P ()" ++ "kmsr64\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "ukmar64_1" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (us_plus:DI ++ (match_operand:DI 1 "register_operand" " 0") ++ (mult:DI ++ (zero_extend:DI ++ (match_operand:SI 2 "register_operand" " r")) ++ (zero_extend:DI ++ (match_operand:SI 3 "register_operand" " r")))))] ++ "NDS32_EXT_DSP_P ()" ++ "ukmar64\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "ukmar64_2" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (us_plus:DI ++ (mult:DI ++ (zero_extend:DI ++ (match_operand:SI 2 "register_operand" " r")) ++ (zero_extend:DI ++ (match_operand:SI 3 "register_operand" " r"))) ++ (match_operand:DI 1 "register_operand" " 0")))] ++ "NDS32_EXT_DSP_P ()" ++ "ukmar64\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "ukmsr64" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (us_minus:DI ++ (match_operand:DI 1 "register_operand" " 0") ++ (mult:DI ++ (zero_extend:DI ++ (match_operand:SI 2 "register_operand" " r")) ++ (zero_extend:DI ++ (match_operand:SI 3 "register_operand" " r")))))] ++ "NDS32_EXT_DSP_P ()" ++ "ukmsr64\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "bpick1" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ior:SI ++ (and:SI ++ (match_operand:SI 1 "register_operand" " r") ++ (match_operand:SI 3 "register_operand" " r")) ++ (and:SI ++ (match_operand:SI 2 "register_operand" " r") ++ (not:SI (match_dup 3)))))] ++ "NDS32_EXT_DSP_P ()" ++ "bpick\t%0, %1, %2, %3" ++ [(set_attr "type" "dbpick") ++ (set_attr "length" "4")]) ++ ++(define_insn "bpick2" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ior:SI ++ (and:SI ++ (match_operand:SI 1 "register_operand" " r") ++ (match_operand:SI 2 "register_operand" " r")) ++ (and:SI ++ (not:SI (match_dup 2)) ++ (match_operand:SI 3 "register_operand" " r"))))] ++ "NDS32_EXT_DSP_P ()" ++ "bpick\t%0, %1, %3, %2" ++ [(set_attr "type" "dbpick") ++ (set_attr "length" "4")]) ++ ++(define_insn "bpick3" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ior:SI ++ (and:SI ++ (match_operand:SI 1 "register_operand" " r") ++ (match_operand:SI 2 "register_operand" " r")) ++ (and:SI ++ (match_operand:SI 3 "register_operand" " r") ++ (not:SI (match_dup 1)))))] ++ "NDS32_EXT_DSP_P ()" ++ "bpick\t%0, %2, %3, %1" ++ [(set_attr "type" "dbpick") ++ (set_attr "length" "4")]) ++ ++(define_insn "bpick4" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ior:SI ++ (and:SI ++ (match_operand:SI 1 "register_operand" " r") ++ (match_operand:SI 2 "register_operand" " r")) ++ (and:SI ++ (not:SI (match_dup 1)) ++ (match_operand:SI 3 "register_operand" " r"))))] ++ "NDS32_EXT_DSP_P ()" ++ "bpick\t%0, %2, %3, %1" ++ [(set_attr "type" "dbpick") ++ (set_attr "length" "4")]) ++ ++(define_insn "bpick5" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ior:SI ++ (and:SI ++ (match_operand:SI 1 "register_operand" " r") ++ (not:SI (match_operand:SI 2 "register_operand" " r"))) ++ (and:SI ++ (match_operand:SI 3 "register_operand" " r") ++ (match_dup 2))))] ++ "NDS32_EXT_DSP_P ()" ++ "bpick\t%0, %3, %1, %2" ++ [(set_attr "type" "dbpick") ++ (set_attr "length" "4")]) ++ ++(define_insn "bpick6" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ior:SI ++ (and:SI ++ (not:SI (match_operand:SI 1 "register_operand" " r")) ++ (match_operand:SI 2 "register_operand" " r")) ++ (and:SI ++ (match_operand:SI 3 "register_operand" " r") ++ (match_dup 1))))] ++ "NDS32_EXT_DSP_P ()" ++ "bpick\t%0, %3, %2, %1" ++ [(set_attr "type" "dbpick") ++ (set_attr "length" "4")]) ++ ++(define_insn "bpick7" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ior:SI ++ (and:SI ++ (match_operand:SI 1 "register_operand" " r") ++ (not:SI (match_operand:SI 2 "register_operand" " r"))) ++ (and:SI ++ (match_dup 2) ++ (match_operand:SI 3 "register_operand" " r"))))] ++ "NDS32_EXT_DSP_P ()" ++ "bpick\t%0, %3, %1, %2" ++ [(set_attr "type" "dbpick") ++ (set_attr "length" "4")]) ++ ++(define_insn "bpick8" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ior:SI ++ (and:SI ++ (not:SI (match_operand:SI 1 "register_operand" " r")) ++ (match_operand:SI 2 "register_operand" " r")) ++ (and:SI ++ (match_dup 1) ++ (match_operand:SI 3 "register_operand" " r"))))] ++ "NDS32_EXT_DSP_P ()" ++ "bpick\t%0, %3, %2, %1" ++ [(set_attr "type" "dbpick") ++ (set_attr "length" "4")]) ++ ++(define_insn "sraiu" ++ [(set (match_operand:SI 0 "register_operand" "= r, r") ++ (unspec:SI [(ashiftrt:SI (match_operand:SI 1 "register_operand" " r, r") ++ (match_operand:SI 2 "nds32_rimm5u_operand" " Iu05, r"))] ++ UNSPEC_ROUND))] ++ "NDS32_EXT_DSP_P ()" ++ "@ ++ srai.u\t%0, %1, %2 ++ sra.u\t%0, %1, %2" ++ [(set_attr "type" "daluround") ++ (set_attr "length" "4")]) ++ ++(define_insn "kssl" ++ [(set (match_operand:SI 0 "register_operand" "= r, r") ++ (ss_ashift:SI (match_operand:SI 1 "register_operand" " r, r") ++ (match_operand:SI 2 "nds32_rimm5u_operand" " Iu05, r")))] ++ "NDS32_EXT_DSP_P ()" ++ "@ ++ kslli\t%0, %1, %2 ++ ksll\t%0, %1, %2" ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4")]) ++ ++(define_insn "kslraw_round" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (if_then_else:SI ++ (lt:SI (match_operand:SI 2 "register_operand" " r") ++ (const_int 0)) ++ (unspec:SI [(ashiftrt:SI (match_operand:SI 1 "register_operand" " r") ++ (neg:SI (match_dup 2)))] ++ UNSPEC_ROUND) ++ (ss_ashift:SI (match_dup 1) ++ (match_dup 2))))] ++ "NDS32_EXT_DSP_P ()" ++ "kslraw.u\t%0, %1, %2" ++ [(set_attr "type" "daluround") ++ (set_attr "length" "4")]) ++ ++(define_insn_and_split "<shift>di3" ++ [(set (match_operand:DI 0 "register_operand" "") ++ (shift_rotate:DI (match_operand:DI 1 "register_operand" "") ++ (match_operand:SI 2 "nds32_rimm6u_operand" "")))] ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ "#" ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ [(const_int 0)] ++{ ++ if (REGNO (operands[0]) == REGNO (operands[1])) ++ { ++ rtx tmp = gen_reg_rtx (DImode); ++ nds32_split_<code>di3 (tmp, operands[1], operands[2]); ++ emit_move_insn (operands[0], tmp); ++ } ++ else ++ nds32_split_<code>di3 (operands[0], operands[1], operands[2]); ++ DONE; ++}) ++ ++(define_insn "sclip32" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_CLIPS_OV))] ++ "NDS32_EXT_DSP_P ()" ++ "sclip32\t%0, %1, %2" ++ [(set_attr "type" "dclip") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "uclip32" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_CLIP_OV))] ++ "NDS32_EXT_DSP_P ()" ++ "uclip32\t%0, %1, %2" ++ [(set_attr "type" "dclip") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "bitrev" ++ [(set (match_operand:SI 0 "register_operand" "=r, r") ++ (unspec:SI [(match_operand:SI 1 "register_operand" " r, r") ++ (match_operand:SI 2 "nds32_rimm5u_operand" " r, Iu05")] ++ UNSPEC_BITREV))] ++ "" ++ "@ ++ bitrev\t%0, %1, %2 ++ bitrevi\t%0, %1, %2" ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4")] ++) ++ ++;; wext, wexti ++(define_insn "<su>wext" ++ [(set (match_operand:SI 0 "register_operand" "=r, r") ++ (truncate:SI ++ (shiftrt:DI ++ (match_operand:DI 1 "register_operand" " r, r") ++ (match_operand:SI 2 "nds32_rimm5u_operand" " r,Iu05"))))] ++ "NDS32_EXT_DSP_P ()" ++ "@ ++ wext\t%0, %1, %2 ++ wexti\t%0, %1, %2" ++ [(set_attr "type" "dwext") ++ (set_attr "length" "4")]) ++ ++;; 32-bit add/sub instruction: raddw and rsubw. ++(define_insn "r<opcode>si3" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (truncate:SI ++ (ashiftrt:DI ++ (plus_minus:DI ++ (sign_extend:DI (match_operand:SI 1 "register_operand" " r")) ++ (sign_extend:DI (match_operand:SI 2 "register_operand" " r"))) ++ (const_int 1))))] ++ "NDS32_EXT_DSP_P ()" ++ "r<opcode>w\t%0, %1, %2" ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4")]) ++ ++;; 32-bit add/sub instruction: uraddw and ursubw. ++(define_insn "ur<opcode>si3" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (truncate:SI ++ (lshiftrt:DI ++ (plus_minus:DI ++ (zero_extend:DI (match_operand:SI 1 "register_operand" " r")) ++ (zero_extend:DI (match_operand:SI 2 "register_operand" " r"))) ++ (const_int 1))))] ++ "NDS32_EXT_DSP_P ()" ++ "ur<opcode>w\t%0, %1, %2" ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4")]) +diff --git a/gcc/config/nds32/nds32-e8.md b/gcc/config/nds32/nds32-e8.md +new file mode 100644 +index 0000000..1f24b5c +--- /dev/null ++++ b/gcc/config/nds32/nds32-e8.md +@@ -0,0 +1,329 @@ ++;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler ++;; Copyright (C) 2012-2016 Free Software Foundation, Inc. ++;; Contributed by Andes Technology Corporation. ++;; ++;; This file is part of GCC. ++;; ++;; GCC is free software; you can redistribute it and/or modify it ++;; under the terms of the GNU General Public License as published ++;; by the Free Software Foundation; either version 3, or (at your ++;; option) any later version. ++;; ++;; GCC is distributed in the hope that it will be useful, but WITHOUT ++;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++;; License for more details. ++;; ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; <http://www.gnu.org/licenses/>. ++ ++ ++;; ------------------------------------------------------------------------ ++;; Define E8 pipeline settings. ++;; ------------------------------------------------------------------------ ++ ++(define_automaton "nds32_e8_machine") ++ ++;; ------------------------------------------------------------------------ ++;; Pipeline Stages ++;; ------------------------------------------------------------------------ ++;; IF - Instruction Fetch ++;; II - Instruction Issue / Address Generation ++;; EX - Instruction Execution ++;; EXD - Psuedo Stage / Load Data Completion ++ ++(define_cpu_unit "e8_ii" "nds32_e8_machine") ++(define_cpu_unit "e8_ex" "nds32_e8_machine") ++ ++(define_insn_reservation "nds_e8_unknown" 1 ++ (and (eq_attr "type" "unknown") ++ (eq_attr "pipeline_model" "e8")) ++ "e8_ii, e8_ex") ++ ++(define_insn_reservation "nds_e8_misc" 1 ++ (and (eq_attr "type" "misc") ++ (eq_attr "pipeline_model" "e8")) ++ "e8_ii, e8_ex") ++ ++(define_insn_reservation "nds_e8_alu" 1 ++ (and (eq_attr "type" "alu") ++ (eq_attr "pipeline_model" "e8")) ++ "e8_ii, e8_ex") ++ ++(define_insn_reservation "nds_e8_load" 1 ++ (and (match_test "nds32::load_single_p (insn)") ++ (eq_attr "pipeline_model" "e8")) ++ "e8_ii, e8_ex") ++ ++(define_insn_reservation "nds_e8_store" 1 ++ (and (match_test "nds32::store_single_p (insn)") ++ (eq_attr "pipeline_model" "e8")) ++ "e8_ii, e8_ex") ++ ++(define_insn_reservation "nds_e8_load_multiple_1" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "1")) ++ (eq_attr "pipeline_model" "e8")) ++ "e8_ii, e8_ex") ++ ++(define_insn_reservation "nds_e8_load_multiple_2" 1 ++ (and (ior (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "2")) ++ (match_test "nds32::load_double_p (insn)")) ++ (eq_attr "pipeline_model" "e8")) ++ "e8_ii, e8_ii+e8_ex, e8_ex") ++ ++(define_insn_reservation "nds_e8_load_multiple_3" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "3")) ++ (eq_attr "pipeline_model" "e8")) ++ "e8_ii, (e8_ii+e8_ex)*2, e8_ex") ++ ++(define_insn_reservation "nds_e8_load_multiple_4" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "4")) ++ (eq_attr "pipeline_model" "e8")) ++ "e8_ii, (e8_ii+e8_ex)*3, e8_ex") ++ ++(define_insn_reservation "nds_e8_load_multiple_5" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "5")) ++ (eq_attr "pipeline_model" "e8")) ++ "e8_ii, (e8_ii+e8_ex)*4, e8_ex") ++ ++(define_insn_reservation "nds_e8_load_multiple_6" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "6")) ++ (eq_attr "pipeline_model" "e8")) ++ "e8_ii, (e8_ii+e8_ex)*5, e8_ex") ++ ++(define_insn_reservation "nds_e8_load_multiple_7" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "7")) ++ (eq_attr "pipeline_model" "e8")) ++ "e8_ii, (e8_ii+e8_ex)*6, e8_ex") ++ ++(define_insn_reservation "nds_e8_load_multiple_8" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "8")) ++ (eq_attr "pipeline_model" "e8")) ++ "e8_ii, (e8_ii+e8_ex)*7, e8_ex") ++ ++(define_insn_reservation "nds_e8_load_multiple_12" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "12")) ++ (eq_attr "pipeline_model" "e8")) ++ "e8_ii, (e8_ii+e8_ex)*11, e8_ex") ++ ++(define_insn_reservation "nds_e8_store_multiple_1" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "1")) ++ (eq_attr "pipeline_model" "e8")) ++ "e8_ii, e8_ex") ++ ++(define_insn_reservation "nds_e8_store_multiple_2" 1 ++ (and (ior (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "2")) ++ (match_test "nds32::store_double_p (insn)")) ++ (eq_attr "pipeline_model" "e8")) ++ "e8_ii, e8_ii+e8_ex, e8_ex") ++ ++(define_insn_reservation "nds_e8_store_multiple_3" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "3")) ++ (eq_attr "pipeline_model" "e8")) ++ "e8_ii, (e8_ii+e8_ex)*2, e8_ex") ++ ++(define_insn_reservation "nds_e8_store_multiple_4" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "4")) ++ (eq_attr "pipeline_model" "e8")) ++ "e8_ii, (e8_ii+e8_ex)*3, e8_ex") ++ ++(define_insn_reservation "nds_e8_store_multiple_5" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "5")) ++ (eq_attr "pipeline_model" "e8")) ++ "e8_ii, (e8_ii+e8_ex)*4, e8_ex") ++ ++(define_insn_reservation "nds_e8_store_multiple_6" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "6")) ++ (eq_attr "pipeline_model" "e8")) ++ "e8_ii, (e8_ii+e8_ex)*5, e8_ex") ++ ++(define_insn_reservation "nds_e8_store_multiple_7" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "7")) ++ (eq_attr "pipeline_model" "e8")) ++ "e8_ii, (e8_ii+e8_ex)*6, e8_ex") ++ ++(define_insn_reservation "nds_e8_store_multiple_8" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "8")) ++ (eq_attr "pipeline_model" "e8")) ++ "e8_ii, (e8_ii+e8_ex)*7, e8_ex") ++ ++(define_insn_reservation "nds_e8_store_multiple_12" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "12")) ++ (eq_attr "pipeline_model" "e8")) ++ "e8_ii, (e8_ii+e8_ex)*11, e8_ex") ++ ++(define_insn_reservation "nds_e8_mul_fast" 1 ++ (and (match_test "nds32_mul_config != MUL_TYPE_SLOW") ++ (and (eq_attr "type" "mul") ++ (eq_attr "pipeline_model" "e8"))) ++ "e8_ii, e8_ex") ++ ++(define_insn_reservation "nds_e8_mul_slow" 1 ++ (and (match_test "nds32_mul_config == MUL_TYPE_SLOW") ++ (and (eq_attr "type" "mul") ++ (eq_attr "pipeline_model" "e8"))) ++ "e8_ii, e8_ex*16") ++ ++(define_insn_reservation "nds_e8_mac_fast" 1 ++ (and (match_test "nds32_mul_config != MUL_TYPE_SLOW") ++ (and (eq_attr "type" "mac") ++ (eq_attr "pipeline_model" "e8"))) ++ "e8_ii, e8_ii+e8_ex, e8_ex") ++ ++(define_insn_reservation "nds_e8_mac_slow" 1 ++ (and (match_test "nds32_mul_config == MUL_TYPE_SLOW") ++ (and (eq_attr "type" "mac") ++ (eq_attr "pipeline_model" "e8"))) ++ "e8_ii, (e8_ii+e8_ex)*16, e8_ex") ++ ++(define_insn_reservation "nds_e8_div" 1 ++ (and (eq_attr "type" "div") ++ (eq_attr "pipeline_model" "e8")) ++ "e8_ii, (e8_ii+e8_ex)*36, e8_ex") ++ ++(define_insn_reservation "nds_e8_branch" 1 ++ (and (eq_attr "type" "branch") ++ (eq_attr "pipeline_model" "e8")) ++ "e8_ii, e8_ex") ++ ++;; ------------------------------------------------------------------------ ++;; Comment Notations and Bypass Rules ++;; ------------------------------------------------------------------------ ++;; Producers (LHS) ++;; LD ++;; Load data from the memory and produce the loaded data. The result is ++;; ready at EXD. ++;; LMW(N, M) ++;; There are N micro-operations within an instruction that loads multiple ++;; words. The result produced by the M-th micro-operation is sent to ++;; consumers. The result is ready at EXD. ++;; ADDR_OUT ++;; Most load/store instructions can produce an address output if updating ++;; the base register is required. The result is ready at EX, which is ++;; produced by ALU. ++;; ALU, MOVD44, MUL, MAC ++;; The result is ready at EX. ++;; DIV_Rs ++;; A division instruction saves the quotient result to Rt and saves the ++;; remainder result to Rs. The instruction is separated into two micro- ++;; operations. The first micro-operation writes to Rt, and the seconde ++;; one writes to Rs. Each of the results is ready at EX. ++;; ++;; Consumers (RHS) ++;; ALU, MUL, DIV ++;; Require operands at EX. ++;; ADDR_IN_MOP(N) ++;; N denotes the address input is required by the N-th micro-operation. ++;; Such operand is required at II. ++;; ST ++;; A store instruction requires its data at EX. ++;; SMW(N, M) ++;; There are N micro-operations within an instruction that stores multiple ++;; words. Each M-th micro-operation requires its data at EX. ++;; BR_COND ++;; If a branch instruction is conditional, its input data is required at EX. ++ ++;; LD -> ADDR_IN_MOP(1) ++(define_bypass 2 ++ "nds_e8_load" ++ "nds_e8_branch,\ ++ nds_e8_load, nds_e8_store,\ ++ nds_e8_load_multiple_1,nds_e8_load_multiple_2, nds_e8_load_multiple_3,\ ++ nds_e8_load_multiple_4,nds_e8_load_multiple_5, nds_e8_load_multiple_6,\ ++ nds_e8_load_multiple_7,nds_e8_load_multiple_8, nds_e8_load_multiple_12,\ ++ nds_e8_store_multiple_1,nds_e8_store_multiple_2, nds_e8_store_multiple_3,\ ++ nds_e8_store_multiple_4,nds_e8_store_multiple_5, nds_e8_store_multiple_6,\ ++ nds_e8_store_multiple_7,nds_e8_store_multiple_8, nds_e8_store_multiple_12" ++ "nds32_e8_load_to_ii_p" ++) ++ ++;; LD -> ALU, MUL, MAC, DIV, BR_COND, ST, SMW(N, 1) ++(define_bypass 2 ++ "nds_e8_load" ++ "nds_e8_alu, ++ nds_e8_mul_fast, nds_e8_mul_slow,\ ++ nds_e8_mac_fast, nds_e8_mac_slow,\ ++ nds_e8_div,\ ++ nds_e8_branch,\ ++ nds_e8_store,\ ++ nds_e8_store_multiple_1,nds_e8_store_multiple_2, nds_e8_store_multiple_3,\ ++ nds_e8_store_multiple_4,nds_e8_store_multiple_5, nds_e8_store_multiple_6,\ ++ nds_e8_store_multiple_7,nds_e8_store_multiple_8, nds_e8_store_multiple_12" ++ "nds32_e8_load_to_ex_p" ++) ++ ++;; ALU, MOVD44, MUL, MAC, DIV_Rs, LD_bi, ADDR_OUT -> ADDR_IN_MOP(1) ++(define_bypass 2 ++ "nds_e8_alu, ++ nds_e8_mul_fast, nds_e8_mul_slow,\ ++ nds_e8_mac_fast, nds_e8_mac_slow,\ ++ nds_e8_div,\ ++ nds_e8_load, nds_e8_store,\ ++ nds_e8_load_multiple_1,nds_e8_load_multiple_2, nds_e8_load_multiple_3,\ ++ nds_e8_load_multiple_4,nds_e8_load_multiple_5, nds_e8_load_multiple_6,\ ++ nds_e8_load_multiple_7,nds_e8_load_multiple_8, nds_e8_load_multiple_12,\ ++ nds_e8_store_multiple_1,nds_e8_store_multiple_2, nds_e8_store_multiple_3,\ ++ nds_e8_store_multiple_4,nds_e8_store_multiple_5, nds_e8_store_multiple_6,\ ++ nds_e8_store_multiple_7,nds_e8_store_multiple_8, nds_e8_store_multiple_12" ++ "nds_e8_branch,\ ++ nds_e8_load, nds_e8_store,\ ++ nds_e8_load_multiple_1,nds_e8_load_multiple_2, nds_e8_load_multiple_3,\ ++ nds_e8_load_multiple_4,nds_e8_load_multiple_5, nds_e8_load_multiple_6,\ ++ nds_e8_load_multiple_7,nds_e8_load_multiple_8, nds_e8_load_multiple_12,\ ++ nds_e8_store_multiple_1,nds_e8_store_multiple_2, nds_e8_store_multiple_3,\ ++ nds_e8_store_multiple_4,nds_e8_store_multiple_5, nds_e8_store_multiple_6,\ ++ nds_e8_store_multiple_7,nds_e8_store_multiple_8, nds_e8_store_multiple_12" ++ "nds32_e8_ex_to_ii_p" ++) ++ ++;; LMW(N, N) -> ADDR_IN_MOP(1) ++(define_bypass 2 ++ "nds_e8_load_multiple_1,nds_e8_load_multiple_2, nds_e8_load_multiple_3,\ ++ nds_e8_load_multiple_4,nds_e8_load_multiple_5, nds_e8_load_multiple_6,\ ++ nds_e8_load_multiple_7,nds_e8_load_multiple_8, nds_e8_load_multiple_12" ++ "nds_e8_branch,\ ++ nds_e8_load, nds_e8_store,\ ++ nds_e8_load_multiple_1,nds_e8_load_multiple_2, nds_e8_load_multiple_3,\ ++ nds_e8_load_multiple_4,nds_e8_load_multiple_5, nds_e8_load_multiple_6,\ ++ nds_e8_load_multiple_7,nds_e8_load_multiple_8, nds_e8_load_multiple_12,\ ++ nds_e8_store_multiple_1,nds_e8_store_multiple_2, nds_e8_store_multiple_3,\ ++ nds_e8_store_multiple_4,nds_e8_store_multiple_5, nds_e8_store_multiple_6,\ ++ nds_e8_store_multiple_7,nds_e8_store_multiple_8, nds_e8_store_multiple_12" ++ "nds32_e8_last_load_to_ii_p" ++) ++ ++;; LMW(N, N) -> ALU, MUL, MAC, DIV, BR_COND, ST, SMW(N, 1) ++(define_bypass 2 ++ "nds_e8_load_multiple_1,nds_e8_load_multiple_2, nds_e8_load_multiple_3,\ ++ nds_e8_load_multiple_4,nds_e8_load_multiple_5, nds_e8_load_multiple_6,\ ++ nds_e8_load_multiple_7,nds_e8_load_multiple_8, nds_e8_load_multiple_12" ++ "nds_e8_alu, ++ nds_e8_mul_fast, nds_e8_mul_slow,\ ++ nds_e8_mac_fast, nds_e8_mac_slow,\ ++ nds_e8_div,\ ++ nds_e8_branch,\ ++ nds_e8_store,\ ++ nds_e8_store_multiple_1,nds_e8_store_multiple_2, nds_e8_store_multiple_3,\ ++ nds_e8_store_multiple_4,nds_e8_store_multiple_5, nds_e8_store_multiple_6,\ ++ nds_e8_store_multiple_7,nds_e8_store_multiple_8, nds_e8_store_multiple_12" ++ "nds32_e8_last_load_to_ex_p" ++) +diff --git a/gcc/config/nds32/nds32-elf.opt b/gcc/config/nds32/nds32-elf.opt +new file mode 100644 +index 0000000..afe6aad +--- /dev/null ++++ b/gcc/config/nds32/nds32-elf.opt +@@ -0,0 +1,16 @@ ++mcmodel= ++Target RejectNegative Joined Enum(nds32_cmodel_type) Var(nds32_cmodel_option) Init(CMODEL_MEDIUM) ++Specify the address generation strategy for code model. ++ ++Enum ++Name(nds32_cmodel_type) Type(enum nds32_cmodel_type) ++Known cmodel types (for use with the -mcmodel= option): ++ ++EnumValue ++Enum(nds32_cmodel_type) String(small) Value(CMODEL_SMALL) ++ ++EnumValue ++Enum(nds32_cmodel_type) String(medium) Value(CMODEL_MEDIUM) ++ ++EnumValue ++Enum(nds32_cmodel_type) String(large) Value(CMODEL_LARGE) +diff --git a/gcc/config/nds32/nds32-fp-as-gp.c b/gcc/config/nds32/nds32-fp-as-gp.c +index f8b2738..6525915 100644 +--- a/gcc/config/nds32/nds32-fp-as-gp.c ++++ b/gcc/config/nds32/nds32-fp-as-gp.c +@@ -1,4 +1,4 @@ +-/* The fp-as-gp pass of Andes NDS32 cpu for GNU compiler ++/* fp-as-gp pass of Andes NDS32 cpu for GNU compiler + Copyright (C) 2012-2016 Free Software Foundation, Inc. + Contributed by Andes Technology Corporation. + +@@ -24,19 +24,280 @@ + #include "system.h" + #include "coretypes.h" + #include "backend.h" ++#include "tree.h" ++#include "rtl.h" ++#include "df.h" ++#include "alias.h" ++#include "stor-layout.h" ++#include "varasm.h" ++#include "calls.h" ++#include "regs.h" ++#include "insn-config.h" /* Required by recog.h. */ ++#include "conditions.h" ++#include "output.h" ++#include "insn-attr.h" /* For DFA state_t. */ ++#include "insn-codes.h" /* For CODE_FOR_xxx. */ ++#include "reload.h" /* For push_reload(). */ ++#include "flags.h" ++#include "insn-config.h" ++#include "expmed.h" ++#include "dojump.h" ++#include "explow.h" ++#include "emit-rtl.h" ++#include "stmt.h" ++#include "expr.h" ++#include "recog.h" ++#include "diagnostic-core.h" ++#include "cfgrtl.h" ++#include "cfganal.h" ++#include "lcm.h" ++#include "cfgbuild.h" ++#include "cfgcleanup.h" ++#include "tm_p.h" ++#include "tm-constrs.h" ++#include "optabs.h" /* For GEN_FCN. */ ++#include "target.h" ++#include "langhooks.h" /* For add_builtin_function(). */ ++#include "builtins.h" ++#include "ira.h" ++#include "ira-int.h" ++#include "tree-pass.h" + + /* ------------------------------------------------------------------------ */ + ++/* A helper function to check if this function should contain prologue. */ ++static bool ++nds32_have_prologue_p (void) ++{ ++ int i; ++ ++ for (i = 0; i < 28; i++) ++ if (NDS32_REQUIRED_CALLEE_SAVED_P (i)) ++ return true; ++ ++ return (flag_pic ++ || NDS32_REQUIRED_CALLEE_SAVED_P (FP_REGNUM) ++ || NDS32_REQUIRED_CALLEE_SAVED_P (LP_REGNUM)); ++} ++ ++static int ++nds32_get_symbol_count (void) ++{ ++ int symbol_count = 0; ++ rtx_insn *insn; ++ basic_block bb; ++ ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ FOR_BB_INSNS (bb, insn) ++ { ++ /* Counting the insn number which the addressing mode is symbol. */ ++ if (single_set (insn) && nds32_symbol_load_store_p (insn)) ++ { ++ rtx pattern = PATTERN (insn); ++ rtx mem; ++ gcc_assert (GET_CODE (pattern) == SET); ++ if (GET_CODE (SET_SRC (pattern)) == REG ) ++ mem = SET_DEST (pattern); ++ else ++ mem = SET_SRC (pattern); ++ ++ /* We have only lwi37 and swi37 for fp-as-gp optimization, ++ so don't count any other than SImode. ++ MEM for QImode and HImode will wrap by ZERO_EXTEND ++ or SIGN_EXTEND */ ++ if (GET_CODE (mem) == MEM) ++ symbol_count++; ++ } ++ } ++ } ++ ++ return symbol_count; ++} ++ + /* Function to determine whether it is worth to do fp_as_gp optimization. +- Return 0: It is NOT worth to do fp_as_gp optimization. +- Return 1: It is APPROXIMATELY worth to do fp_as_gp optimization. ++ Return false: It is NOT worth to do fp_as_gp optimization. ++ Return true: It is APPROXIMATELY worth to do fp_as_gp optimization. + Note that if it is worth to do fp_as_gp optimization, + we MUST set FP_REGNUM ever live in this function. */ +-int ++static bool + nds32_fp_as_gp_check_available (void) + { +- /* By default we return 0. */ +- return 0; ++ basic_block bb; ++ basic_block exit_bb; ++ edge_iterator ei; ++ edge e; ++ bool first_exit_blocks_p; ++ ++ /* If there exists ANY of following conditions, ++ we DO NOT perform fp_as_gp optimization: ++ 1. TARGET_FORBID_FP_AS_GP is set ++ regardless of the TARGET_FORCE_FP_AS_GP. ++ 2. User explicitly uses 'naked'/'no_prologue' attribute. ++ We use nds32_naked_function_p() to help such checking. ++ 3. Not optimize for size. ++ 4. Need frame pointer. ++ 5. If $fp is already required to be saved, ++ it means $fp is already choosen by register allocator. ++ Thus we better not to use it for fp_as_gp optimization. ++ 6. This function is a vararg function. ++ DO NOT apply fp_as_gp optimization on this function ++ because it may change and break stack frame. ++ 7. The epilogue is empty. ++ This happens when the function uses exit() ++ or its attribute is no_return. ++ In that case, compiler will not expand epilogue ++ so that we have no chance to output .omit_fp_end directive. */ ++ if (TARGET_FORBID_FP_AS_GP ++ || nds32_naked_function_p (current_function_decl) ++ || !optimize_size ++ || frame_pointer_needed ++ || NDS32_REQUIRED_CALLEE_SAVED_P (FP_REGNUM) ++ || (cfun->stdarg == 1) ++ || (find_fallthru_edge (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) == NULL)) ++ return false; ++ ++ /* Disable fp_as_gp if there is any infinite loop since the fp may ++ reuse in infinite loops by register rename. ++ For check infinite loops we should make sure exit_bb is post dominate ++ all other basic blocks if there is no infinite loops. */ ++ first_exit_blocks_p = true; ++ exit_bb = NULL; ++ ++ FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) ++ { ++ /* More than one exit block also do not perform fp_as_gp optimization. */ ++ if (!first_exit_blocks_p) ++ return false; ++ ++ exit_bb = e->src; ++ first_exit_blocks_p = false; ++ } ++ ++ /* Not found exit_bb? just abort fp_as_gp! */ ++ if (!exit_bb) ++ return false; ++ ++ /* Each bb should post dominate by exit_bb if there is no infinite loop! */ ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ if (!dominated_by_p (CDI_POST_DOMINATORS, ++ bb, ++ exit_bb)) ++ return false; ++ } ++ ++ /* Now we can check the possibility of using fp_as_gp optimization. */ ++ if (TARGET_FORCE_FP_AS_GP) ++ { ++ /* User explicitly issues -mforce-fp-as-gp option. */ ++ return true; ++ } ++ else ++ { ++ /* In the following we are going to evaluate whether ++ it is worth to do fp_as_gp optimization. */ ++ bool good_gain = false; ++ int symbol_count; ++ ++ int threshold; ++ ++ /* We check if there already requires prologue. ++ Note that $gp will be saved in prologue for PIC code generation. ++ After that, we can set threshold by the existence of prologue. ++ Each fp-implied instruction will gain 2-byte code size ++ from gp-aware instruction, so we have following heuristics. */ ++ if (flag_pic ++ || nds32_have_prologue_p ()) ++ { ++ /* Have-prologue: ++ Compiler already intends to generate prologue content, ++ so the fp_as_gp optimization will only insert ++ 'la $fp,_FP_BASE_' instruction, which will be ++ converted into 4-byte instruction at link time. ++ The threshold is "3" symbol accesses, 2 + 2 + 2 > 4. */ ++ threshold = 3; ++ } ++ else ++ { ++ /* None-prologue: ++ Compiler originally does not generate prologue content, ++ so the fp_as_gp optimization will NOT ONLY insert ++ 'la $fp,_FP_BASE' instruction, but also causes ++ push/pop instructions. ++ If we are using v3push (push25/pop25), ++ the threshold is "5" symbol accesses, 5*2 > 4 + 2 + 2; ++ If we are using normal push (smw/lmw), ++ the threshold is "5+2" symbol accesses 7*2 > 4 + 4 + 4. */ ++ threshold = 5 + (TARGET_V3PUSH ? 0 : 2); ++ } ++ ++ symbol_count = nds32_get_symbol_count (); ++ ++ if (symbol_count >= threshold) ++ good_gain = true; ++ ++ /* Enable fp_as_gp optimization when potential gain is good enough. */ ++ return good_gain; ++ } ++} ++ ++static unsigned int ++nds32_fp_as_gp (void) ++{ ++ bool fp_as_gp_p; ++ calculate_dominance_info (CDI_POST_DOMINATORS); ++ fp_as_gp_p = nds32_fp_as_gp_check_available (); ++ ++ /* Here is a hack to IRA for enable/disable a hard register per function. ++ We *MUST* review this way after migrate gcc 4.9! */ ++ if (fp_as_gp_p) { ++ SET_HARD_REG_BIT(this_target_ira_int->x_no_unit_alloc_regs, FP_REGNUM); ++ df_set_regs_ever_live (FP_REGNUM, 1); ++ } else { ++ CLEAR_HARD_REG_BIT(this_target_ira_int->x_no_unit_alloc_regs, FP_REGNUM); ++ } ++ ++ cfun->machine->fp_as_gp_p = fp_as_gp_p; ++ ++ free_dominance_info (CDI_POST_DOMINATORS); ++ return 1; ++} ++ ++const pass_data pass_data_nds32_fp_as_gp = ++{ ++ RTL_PASS, /* type */ ++ "fp_as_gp", /* name */ ++ OPTGROUP_NONE, /* optinfo_flags */ ++ TV_MACH_DEP, /* tv_id */ ++ 0, /* properties_required */ ++ 0, /* properties_provided */ ++ 0, /* properties_destroyed */ ++ 0, /* todo_flags_start */ ++ 0 /* todo_flags_finish */ ++}; ++ ++class pass_nds32_fp_as_gp : public rtl_opt_pass ++{ ++public: ++ pass_nds32_fp_as_gp (gcc::context *ctxt) ++ : rtl_opt_pass (pass_data_nds32_fp_as_gp, ctxt) ++ {} ++ ++ /* opt_pass methods: */ ++ bool gate (function *) ++ { ++ return !TARGET_LINUX_ABI ++ && TARGET_16_BIT ++ && optimize_size; ++ } ++ unsigned int execute (function *) { return nds32_fp_as_gp (); } ++}; ++ ++rtl_opt_pass * ++make_pass_nds32_fp_as_gp (gcc::context *ctxt) ++{ ++ return new pass_nds32_fp_as_gp (ctxt); + } + + /* ------------------------------------------------------------------------ */ +diff --git a/gcc/config/nds32/nds32-fpu.md b/gcc/config/nds32/nds32-fpu.md +new file mode 100644 +index 0000000..11eabd5 +--- /dev/null ++++ b/gcc/config/nds32/nds32-fpu.md +@@ -0,0 +1,503 @@ ++;; Machine description of Andes NDS32 cpu for GNU compiler ++;; Copyright (C) 2012-2016 Free Software Foundation, Inc. ++;; Contributed by Andes Technology Corporation. ++;; ++;; This file is part of GCC. ++;; ++;; GCC is free software; you can redistribute it and/or modify it ++;; under the terms of the GNU General Public License as published ++;; by the Free Software Foundation; either version 3, or (at your ++;; option) any later version. ++;; ++;; GCC is distributed in the hope that it will be useful, but WITHOUT ++;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++;; License for more details. ++;; ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; <http://www.gnu.org/licenses/>. ++ ++;;SFmode moves ++ ++(define_expand "movsf" ++ [(set (match_operand:SF 0 "general_operand" "") ++ (match_operand:SF 1 "general_operand" ""))] ++ "" ++{ ++ /* Need to force register if mem <- !reg. */ ++ if (MEM_P (operands[0]) && !REG_P (operands[1])) ++ operands[1] = force_reg (SFmode, operands[1]); ++ if (CONST_DOUBLE_P (operands[1]) ++ && !satisfies_constraint_Cs20 (operands[1])) ++ { ++ const REAL_VALUE_TYPE *r; ++ unsigned long l; ++ ++ r = CONST_DOUBLE_REAL_VALUE (operands[1]); ++ REAL_VALUE_TO_TARGET_SINGLE (*r, l); ++ ++ emit_move_insn (operands[0], gen_rtx_HIGH (SFmode, operands[1])); ++ ++ if ((l & 0xFFF) != 0) ++ emit_insn (gen_movsf_lo (operands[0], operands[0], operands[1])); ++ DONE; ++ } ++}) ++ ++(define_insn "movsf_lo" ++ [(set (match_operand:SF 0 "register_operand" "=r") ++ (lo_sum:SF (match_operand:SF 1 "register_operand" "r") ++ (match_operand:SF 2 "immediate_operand" "i")))] ++ "" ++ "ori\t%0, %1, lo12(%2)" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*movsf" ++ [(set (match_operand:SF 0 "nonimmediate_operand" "=r, r, U45, U33, U37, U45, m, l, l, l, d, r, f, *f, *r, f, Q, r, r, r") ++ (match_operand:SF 1 "general_operand" " r, r, l, l, l, d, r, U45, U33, U37, U45, m, f, *r, *f, Q, f,Cs05,Cs20, Chig"))] ++ "(register_operand(operands[0], SFmode) ++ || register_operand(operands[1], SFmode))" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "mov55\t%0, %1"; ++ case 1: ++ return "ori\t%0, %1, 0"; ++ case 2: ++ case 3: ++ case 4: ++ case 5: ++ return nds32_output_16bit_store (operands, 4); ++ case 6: ++ return nds32_output_32bit_store (operands, 4); ++ case 7: ++ case 8: ++ case 9: ++ case 10: ++ return nds32_output_16bit_load (operands, 4); ++ case 11: ++ return nds32_output_32bit_load (operands, 4); ++ case 12: ++ if (TARGET_FPU_SINGLE) ++ return "fcpyss\t%0, %1, %1"; ++ else ++ return "#"; ++ case 13: ++ return "fmtsr\t%1, %0"; ++ case 14: ++ return "fmfsr\t%0, %1"; ++ case 15: ++ return nds32_output_float_load (operands); ++ case 16: ++ return nds32_output_float_store (operands); ++ case 17: ++ return "movi55\t%0, %1"; ++ case 18: ++ return "movi\t%0, %1"; ++ case 19: ++ return "sethi\t%0, %1"; ++ default: ++ gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "alu,alu,store,store,store,store,store,load,load,load,load,load,fcpy,fmtsr,fmfsr,fload,fstore,alu,alu,alu") ++ (set_attr "length" " 2, 4, 2, 2, 2, 2, 4, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 2, 4, 4") ++ (set_attr "feature" " v1, v1, v1, v1, v1, v1, v1, v1, v1, v1, v1, v1, fpu, fpu, fpu, fpu, fpu, v1, v1, v1")]) ++ ++;; Conditional Move Instructions ++ ++(define_expand "mov<mode>cc" ++ [(set (match_operand:ANYF 0 "register_operand" "") ++ (if_then_else:ANYF (match_operand 1 "nds32_float_comparison_operator" "") ++ (match_operand:ANYF 2 "register_operand" "") ++ (match_operand:ANYF 3 "register_operand" "")))] ++ "" ++{ ++ if (nds32_cond_move_p (operands[1])) ++ { ++ /* Operands[1] condition code is UNORDERED or ORDERED, and ++ sub-operands[1] MODE isn't SFmode or SFmode, return FAIL ++ for gcc, because we don't using slt compare instruction ++ to generate UNORDERED and ORDERED condition. */ ++ FAIL; ++ } ++ else ++ nds32_expand_float_movcc (operands); ++}) ++ ++(define_insn "fcmov<mode>_eq" ++ [(set (match_operand:ANYF 0 "register_operand" "=f, f") ++ (if_then_else:ANYF (eq (match_operand:SI 1 "register_operand" "f, f") ++ (const_int 0)) ++ (match_operand:ANYF 2 "register_operand" "f, 0") ++ (match_operand:ANYF 3 "register_operand" "0, f")))] ++ "" ++ "@ ++ fcmovz<size>\t%0,%2,%1 ++ fcmovn<size>\t%0,%3,%1" ++ [(set_attr "type" "fcmov") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "fcmov<mode>_ne" ++ [(set (match_operand:ANYF 0 "register_operand" "=f, f") ++ (if_then_else:ANYF (ne (match_operand:SI 1 "register_operand" "f, f") ++ (const_int 0)) ++ (match_operand:ANYF 2 "register_operand" "f, 0") ++ (match_operand:ANYF 3 "register_operand" "0, f")))] ++ "" ++ "@ ++ fcmovn<size>\t%0,%2,%1 ++ fcmovz<size>\t%0,%3,%1" ++ [(set_attr "type" "fcmov") ++ (set_attr "length" "4")] ++) ++ ++;; Arithmetic instructions. ++ ++(define_insn "add<mode>3" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (plus:ANYF (match_operand:ANYF 1 "register_operand" "f") ++ (match_operand:ANYF 2 "register_operand" "f")))] ++ "" ++ "fadd<size>\t %0, %1, %2" ++ [(set_attr "type" "falu") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "sub<mode>3" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (minus:ANYF (match_operand:ANYF 1 "register_operand" "f") ++ (match_operand:ANYF 2 "register_operand" "f")))] ++ "" ++ "fsub<size>\t %0, %1, %2" ++ [(set_attr "type" "falu") ++ (set_attr "length" "4")] ++) ++ ++;; Multiplication insns. ++ ++(define_insn "mul<mode>3" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (mult:ANYF (match_operand:ANYF 1 "register_operand" "f") ++ (match_operand:ANYF 2 "register_operand" "f")))] ++ "" ++ "fmul<size>\t %0, %1, %2" ++ [(set_attr "type" "fmul<size>") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "fma<mode>4" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (fma:ANYF (match_operand:ANYF 1 "register_operand" "f") ++ (match_operand:ANYF 2 "register_operand" "f") ++ (match_operand:ANYF 3 "register_operand" "0")))] ++ "TARGET_EXT_FPU_FMA" ++ "fmadd<size>\t%0, %1, %2" ++ [(set_attr "type" "fmac<size>") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "fnma<mode>4" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (fma:ANYF (neg:ANYF (match_operand:ANYF 1 "register_operand" "f")) ++ (match_operand:ANYF 2 "register_operand" "f") ++ (match_operand:ANYF 3 "register_operand" "0")))] ++ "TARGET_EXT_FPU_FMA" ++ "fmsub<size>\t%0, %1, %2" ++ [(set_attr "type" "fmac<size>") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "fms<mode>4" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (fma:ANYF (match_operand:ANYF 1 "register_operand" "f") ++ (match_operand:ANYF 2 "register_operand" "f") ++ (neg:ANYF (match_operand:ANYF 3 "register_operand" "0"))))] ++ "TARGET_EXT_FPU_FMA" ++ "fnmsub<size>\t%0, %1, %2" ++ [(set_attr "type" "fmac<size>") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "fnms<mode>4" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (fma:ANYF (neg:ANYF (match_operand:ANYF 1 "register_operand" "f")) ++ (match_operand:ANYF 2 "register_operand" "f") ++ (neg:ANYF (match_operand:ANYF 3 "register_operand" "0"))))] ++ "TARGET_EXT_FPU_FMA" ++ "fnmadd<size>\t%0, %1, %2" ++ [(set_attr "type" "fmac<size>") ++ (set_attr "length" "4")] ++) ++ ++;; Div Instructions. ++ ++(define_insn "div<mode>3" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (div:ANYF (match_operand:ANYF 1 "register_operand" "f") ++ (match_operand:ANYF 2 "register_operand" "f")))] ++ "" ++ "fdiv<size>\t %0, %1, %2" ++ [(set_attr "type" "fdiv<size>") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "sqrt<mode>2" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (sqrt:ANYF (match_operand:ANYF 1 "register_operand" "f")))] ++ "" ++ "fsqrt<size>\t %0, %1" ++ [(set_attr "type" "fsqrt<size>") ++ (set_attr "length" "4")] ++) ++ ++;; Conditional Branch patterns ++ ++(define_expand "cstore<mode>4" ++ [(set (match_operand:SI 0 "register_operand" "") ++ (match_operator:SI 1 "nds32_float_comparison_operator" ++ [(match_operand:ANYF 2 "register_operand" "") ++ (match_operand:ANYF 3 "register_operand" "")]))] ++ "" ++{ ++ nds32_expand_float_cstore (operands); ++ DONE; ++}) ++ ++(define_expand "cbranch<mode>4" ++ [(set (pc) ++ (if_then_else (match_operator 0 "nds32_float_comparison_operator" ++ [(match_operand:ANYF 1 "register_operand" "") ++ (match_operand:ANYF 2 "register_operand" "")]) ++ (label_ref (match_operand 3 "" "")) ++ (pc)))] ++ "" ++{ ++ nds32_expand_float_cbranch (operands); ++ DONE; ++}) ++ ++;; Copysign Instructions. ++ ++(define_insn "copysignsf3" ++ [(set (match_operand:SF 0 "register_operand" "=f") ++ (unspec:SF [(match_operand:SF 1 "register_operand" "f") ++ (match_operand:SF 2 "register_operand" "f")] ++ UNSPEC_COPYSIGN))] ++ "TARGET_FPU_SINGLE" ++ "fcpyss\t%0,%1,%2" ++ [(set_attr "type" "fcpy") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "copysigndf3" ++ [(set (match_operand:DF 0 "register_operand" "=f") ++ (unspec:DF [(match_operand:DF 1 "register_operand" "f") ++ (match_operand:DF 2 "register_operand" "f")] ++ UNSPEC_COPYSIGN))] ++ "TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE" ++ "fcpysd\t%0,%1,%2" ++ [(set_attr "type" "fcpy") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*ncopysign<mode>3" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (neg:ANYF (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f") ++ (match_operand:ANYF 2 "register_operand" "f")] ++ UNSPEC_COPYSIGN)))] ++ "" ++ "fcpyns<size>\t%0,%1,%2" ++ [(set_attr "type" "fcpy") ++ (set_attr "length" "4")] ++) ++ ++;; Absolute Instructions ++ ++(define_insn "abssf2" ++ [(set (match_operand:SF 0 "register_operand" "=f, r") ++ (abs:SF (match_operand:SF 1 "register_operand" "f, r")))] ++ "TARGET_FPU_SINGLE || TARGET_EXT_PERF" ++ "@ ++ fabss\t%0, %1 ++ bclr\t%0, %1, 31" ++ [(set_attr "type" "fabs,alu") ++ (set_attr "length" "4") ++ (set_attr "feature" "fpu,pe1")] ++) ++ ++(define_insn "absdf2" ++ [(set (match_operand:DF 0 "register_operand" "=f") ++ (abs:DF (match_operand:DF 1 "register_operand" "f")))] ++ "TARGET_FPU_DOUBLE" ++ "fabsd\t%0, %1" ++ [(set_attr "type" "fabs") ++ (set_attr "length" "4")] ++) ++ ++;; Negation Instructions ++ ++(define_insn "*negsf2" ++ [(set (match_operand:SF 0 "register_operand" "=f, r") ++ (neg:SF (match_operand:SF 1 "register_operand" "f, r")))] ++ "TARGET_FPU_SINGLE || TARGET_EXT_PERF" ++ "@ ++ fcpynss\t%0, %1, %1 ++ btgl\t%0, %1, 31" ++ [(set_attr "type" "fcpy,alu") ++ (set_attr "length" "4") ++ (set_attr "feature" "fpu,pe1")] ++) ++ ++(define_insn "*negdf2" ++ [(set (match_operand:DF 0 "register_operand" "=f") ++ (neg:DF (match_operand:DF 1 "register_operand" "f")))] ++ "TARGET_FPU_DOUBLE" ++ "fcpynsd\t%0, %1, %1" ++ [(set_attr "type" "fcpy") ++ (set_attr "length" "4")] ++) ++ ++;; Data Format Conversion Instructions ++ ++(define_insn "floatunssi<mode>2" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (unsigned_float:ANYF (match_operand:SI 1 "register_operand" "f")))] ++ "" ++ "fui2<size>\t %0, %1" ++ [(set_attr "type" "falu") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "floatsi<mode>2" ++ [(set (match_operand:ANYF 0 "register_operand" "=f") ++ (float:ANYF (match_operand:SI 1 "register_operand" "f")))] ++ "" ++ "fsi2<size>\t %0, %1" ++ [(set_attr "type" "falu") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "fixuns_trunc<mode>si2" ++ [(set (match_operand:SI 0 "register_operand" "=f") ++ (unsigned_fix:SI (fix:ANYF (match_operand:ANYF 1 "register_operand" "f"))))] ++ "" ++ "f<size>2ui.z\t %0, %1" ++ [(set_attr "type" "falu") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "fix_trunc<mode>si2" ++ [(set (match_operand:SI 0 "register_operand" "=f") ++ (fix:SI (fix:ANYF (match_operand:ANYF 1 "register_operand" "f"))))] ++ "" ++ "f<size>2si.z\t %0, %1" ++ [(set_attr "type" "falu") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "extendsfdf2" ++ [(set (match_operand:DF 0 "register_operand" "=f") ++ (float_extend:DF (match_operand:SF 1 "register_operand" "f")))] ++ "TARGET_FPU_SINGLE && TARGET_FPU_DOUBLE" ++ "fs2d\t%0, %1" ++ [(set_attr "type" "falu") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "truncdfsf2" ++ [(set (match_operand:SF 0 "register_operand" "=f") ++ (float_truncate:SF (match_operand:DF 1 "register_operand" "f")))] ++ "TARGET_FPU_SINGLE && TARGET_FPU_DOUBLE" ++ "fd2s\t%0, %1" ++ [(set_attr "type" "falu") ++ (set_attr "length" "4")] ++) ++ ++;; Compare Instructions ++ ++(define_insn "cmp<mode>_eq" ++ [(set (match_operand:SI 0 "register_operand" "=f") ++ (eq:SI (match_operand:ANYF 1 "register_operand" "f") ++ (match_operand:ANYF 2 "register_operand" "f")))] ++ "" ++ { ++ if (NDS32_EXT_FPU_DOT_E) ++ return "fcmpeq<size>.e %0, %1, %2"; ++ else ++ return "fcmpeq<size>\t%0, %1, %2"; ++ } ++ [(set_attr "type" "fcmp") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "cmp<mode>_lt" ++ [(set (match_operand:SI 0 "register_operand" "=f") ++ (lt:SI (match_operand:ANYF 1 "register_operand" "f") ++ (match_operand:ANYF 2 "register_operand" "f")))] ++ "" ++{ ++ if (NDS32_EXT_FPU_DOT_E) ++ return "fcmplt<size>.e %0, %1, %2"; ++ else ++ return "fcmplt<size>\t%0, %1, %2"; ++} ++ [(set_attr "type" "fcmp") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "cmp<mode>_le" ++ [(set (match_operand:SI 0 "register_operand" "=f") ++ (le:SI (match_operand:ANYF 1 "register_operand" "f") ++ (match_operand:ANYF 2 "register_operand" "f")))] ++ "" ++{ ++ if (NDS32_EXT_FPU_DOT_E) ++ return "fcmple<size>.e %0, %1, %2"; ++ else ++ return "fcmple<size>\t%0, %1, %2"; ++} ++ [(set_attr "type" "fcmp") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "cmp<mode>_un" ++ [(set (match_operand:SI 0 "register_operand" "=f") ++ (unordered:SI (match_operand:ANYF 1 "register_operand" "f") ++ (match_operand:ANYF 2 "register_operand" "f")))] ++ "" ++{ ++ if (NDS32_EXT_FPU_DOT_E) ++ return "fcmpun<size>.e %0, %1, %2"; ++ else ++ return "fcmpun<size>\t%0, %1, %2"; ++} ++ [(set_attr "type" "fcmp") ++ (set_attr "length" "4")] ++) ++ ++(define_split ++ [(set (match_operand:SF 0 "register_operand" "") ++ (match_operand:SF 1 "register_operand" ""))] ++ "!TARGET_FPU_SINGLE ++ && NDS32_IS_FPR_REGNUM (REGNO (operands[0])) ++ && NDS32_IS_FPR_REGNUM (REGNO (operands[1]))" ++ [(set (match_dup 2) (match_dup 1)) ++ (set (match_dup 0) (match_dup 2))] ++{ ++ operands[2] = gen_rtx_REG (SFmode, TA_REGNUM); ++}) ++ ++(define_split ++ [(set (match_operand:SF 0 "register_operand" "") ++ (match_operand:SF 1 "const_double_operand" ""))] ++ "!satisfies_constraint_Cs20 (operands[1]) ++ && !satisfies_constraint_Chig (operands[1])" ++ [(set (match_dup 0) (high:SF (match_dup 1))) ++ (set (match_dup 0) (lo_sum:SF (match_dup 0) (match_dup 1)))]) ++;; ---------------------------------------------------------------------------- +diff --git a/gcc/config/nds32/nds32-gcse.c b/gcc/config/nds32/nds32-gcse.c +new file mode 100644 +index 0000000..301981d +--- /dev/null ++++ b/gcc/config/nds32/nds32-gcse.c +@@ -0,0 +1,670 @@ ++/* Global CSE pass of Andes NDS32 cpu for GNU compiler ++ Copyright (C) 2012-2016 Free Software Foundation, Inc. ++ Contributed by Andes Technology Corporation. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published ++ by the Free Software Foundation; either version 3, or (at your ++ option) any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ <http://www.gnu.org/licenses/>. */ ++ ++/* ------------------------------------------------------------------------ */ ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "backend.h" ++#include "tree.h" ++#include "rtl.h" ++#include "df.h" ++#include "alias.h" ++#include "stor-layout.h" ++#include "varasm.h" ++#include "calls.h" ++#include "regs.h" ++#include "insn-config.h" /* Required by recog.h. */ ++#include "conditions.h" ++#include "output.h" ++#include "insn-attr.h" /* For DFA state_t. */ ++#include "insn-codes.h" /* For CODE_FOR_xxx. */ ++#include "reload.h" /* For push_reload(). */ ++#include "flags.h" ++#include "insn-config.h" ++#include "expmed.h" ++#include "dojump.h" ++#include "explow.h" ++#include "emit-rtl.h" ++#include "stmt.h" ++#include "expr.h" ++#include "recog.h" ++#include "diagnostic-core.h" ++#include "cfgrtl.h" ++#include "cfganal.h" ++#include "lcm.h" ++#include "cfgbuild.h" ++#include "cfgcleanup.h" ++#include "tm_p.h" ++#include "tm-constrs.h" ++#include "optabs.h" /* For GEN_FCN. */ ++#include "target.h" ++#include "langhooks.h" /* For add_builtin_function(). */ ++#include "builtins.h" ++#include "cpplib.h" ++#include "params.h" ++#include "tree-pass.h" ++#include "dbgcnt.h" ++#include "df.h" ++#include "reload.h" ++ ++/* ------------------------------------------------------------------------ */ ++ ++struct expr ++{ ++ /* The expression. */ ++ rtx expr; ++ ++ /* The same hash for this entry. */ ++ hashval_t hash; ++ ++ struct occr *antic_occr; ++ /* The number of antic_occr. */ ++ unsigned int count; ++}; ++ ++struct occr ++{ ++ /* Next occurrence of this expression. */ ++ struct occr *next; ++ /* The insn that computes the expression. */ ++ rtx_insn *insn; ++ /* Nonzero if this [anticipatable] occurrence has been deleted. */ ++ char deleted_p; ++}; ++ ++struct reg_avail_info ++{ ++ basic_block last_bb; ++ int first_set; ++ int first_use; ++}; ++ ++/* Hashtable helpers. */ ++ ++struct expr_hasher : nofree_ptr_hash <expr> ++{ ++ static inline hashval_t hash (const expr *); ++ static inline bool equal (const expr *, const expr *); ++}; ++ ++/* Callback for hashtab. ++ Return the hash value for expression EXP. We don't actually hash ++ here, we just return the cached hash value. */ ++ ++inline hashval_t ++expr_hasher::hash (const expr *exp) ++{ ++ return exp->hash; ++} ++ ++/* Callback for hashtab. ++ Return nonzero if exp1 is equivalent to exp2. */ ++ ++inline bool ++expr_hasher::equal (const expr *exp1, const expr *exp2) ++{ ++ int equiv_p = exp_equiv_p (exp1->expr, exp2->expr, 0, true); ++ ++ gcc_assert (!equiv_p || exp1->hash == exp2->hash); ++ return equiv_p; ++} ++ ++static hashval_t ++hash_expr (rtx x, int *do_not_record_p) ++{ ++ *do_not_record_p = 0; ++ return hash_rtx (x, GET_MODE (x), do_not_record_p, ++ NULL, /*have_reg_qty=*/false); ++} ++ ++ ++/* Helpers for memory allocation/freeing. */ ++static void alloc_mem (void); ++static void free_mem (void); ++static void compute_hash_table (void); ++/* Scan the pattern of INSN and add an entry to the hash TABLE. ++ After reload we are interested in loads/stores only. */ ++static void hash_scan_set (rtx_insn *); ++static void insert_expr_in_table (rtx, rtx_insn *); ++static void dump_hash_table (FILE *); ++ ++static struct obstack expr_obstack; ++/* The table itself. */ ++static hash_table <expr_hasher> *expr_table; ++static struct reg_avail_info *reg_avail_info; ++static sbitmap *hoist_vbein; ++static sbitmap *hoist_vbeout; ++ ++/* Allocate memory for the CUID mapping array and register/memory ++ tracking tables. */ ++ ++static void ++alloc_mem (void) ++{ ++ /* Allocate the available expressions hash table. We don't want to ++ make the hash table too small, but unnecessarily making it too large ++ also doesn't help. The i/4 is a gcse.c relic, and seems like a ++ reasonable choice. */ ++ expr_table = new hash_table<expr_hasher> (MAX (get_max_insn_count () / 4, ++ 13)); ++ ++ /* We allocate everything on obstacks because we often can roll back ++ the whole obstack to some point. Freeing obstacks is very fast. */ ++ gcc_obstack_init (&expr_obstack); ++} ++ ++/* Free memory allocated by alloc_mem. */ ++ ++static void ++free_mem (void) ++{ ++ delete expr_table; ++ expr_table = NULL; ++ ++ obstack_free (&expr_obstack, NULL); ++} ++ ++ ++/* Dump all expressions and occurrences that are currently in the ++ expression hash table to FILE. */ ++ ++/* This helper is called via htab_traverse. */ ++int ++nds32_dump_expr_hash_table_entry (expr **slot, FILE *file) ++{ ++ struct expr *exprs = *slot; ++ struct occr *occr; ++ ++ fprintf (file, "expr: "); ++ print_rtl (file, exprs->expr); ++ fprintf (file,"\nhashcode: %u\n", exprs->hash); ++ fprintf (file,"list of occurrences:\n"); ++ occr = exprs->antic_occr; ++ while (occr) ++ { ++ rtx_insn *insn = occr->insn; ++ print_rtl_single (file, insn); ++ fprintf (file, "\n"); ++ occr = occr->next; ++ } ++ fprintf (file, "\n"); ++ return 1; ++} ++ ++static void ++dump_hash_table (FILE *file) ++{ ++ fprintf (file, "\n\nexpression hash table\n"); ++ fprintf (file, "size %ld, %ld elements, %f collision/search ratio\n", ++ (long) expr_table->size (), ++ (long) expr_table->elements (), ++ expr_table->collisions ()); ++ if (expr_table->elements () > 0) ++ { ++ fprintf (file, "\n\ntable entries:\n"); ++ expr_table->traverse <FILE *, nds32_dump_expr_hash_table_entry> (file); ++ } ++ fprintf (file, "\n"); ++} ++ ++/* Insert expression X in INSN in the hash TABLE. ++ If it is already present, record it as the last occurrence in INSN's ++ basic block. */ ++ ++static void ++insert_expr_in_table (rtx x, rtx_insn *insn) ++{ ++ int do_not_record_p; ++ hashval_t hash; ++ struct expr *cur_expr, **slot; ++ struct occr *antic_occr, *last_occr = NULL; ++ ++ hash = hash_expr (x, &do_not_record_p); ++ ++ /* Do not insert expression in the table if it contains volatile operands, ++ or if hash_expr determines the expression is something we don't want ++ to or can't handle. */ ++ if (do_not_record_p) ++ return; ++ ++ /* We anticipate that redundant expressions are rare, so for convenience ++ allocate a new hash table element here already and set its fields. ++ If we don't do this, we need a hack with a static struct expr. Anyway, ++ obstack_free is really fast and one more obstack_alloc doesn't hurt if ++ we're going to see more expressions later on. */ ++ cur_expr = (struct expr *) obstack_alloc (&expr_obstack, ++ sizeof (struct expr)); ++ cur_expr->expr = x; ++ cur_expr->hash = hash; ++ cur_expr->antic_occr = NULL; ++ ++ slot = expr_table->find_slot_with_hash (cur_expr, hash, INSERT); ++ ++ if (! (*slot)) ++ /* The expression isn't found, so insert it. */ ++ *slot = cur_expr; ++ else ++ { ++ /* The expression is already in the table, so roll back the ++ obstack and use the existing table entry. */ ++ obstack_free (&expr_obstack, cur_expr); ++ cur_expr = *slot; ++ } ++ ++ /* Search for another occurrence in the same basic block. */ ++ antic_occr = cur_expr->antic_occr; ++ cur_expr->count++; ++ while (antic_occr ++ && BLOCK_FOR_INSN (antic_occr->insn) != BLOCK_FOR_INSN (insn)) ++ { ++ /* If an occurrence isn't found, save a pointer to the end of ++ the list. */ ++ last_occr = antic_occr; ++ antic_occr = antic_occr->next; ++ } ++ ++ if (antic_occr) ++ /* Found another instance of the expression in the same basic block. ++ Prefer this occurrence to the currently recorded one. We want ++ the last one in the block and the block is scanned from start ++ to end. */ ++ antic_occr->insn = insn; ++ else ++ { ++ /* First occurrence of this expression in this basic block. */ ++ antic_occr = (struct occr *) obstack_alloc (&expr_obstack, ++ sizeof (struct occr)); ++ ++ /* First occurrence of this expression in any block? */ ++ if (cur_expr->antic_occr == NULL) ++ cur_expr->antic_occr = antic_occr; ++ else ++ last_occr->next = antic_occr; ++ ++ antic_occr->insn = insn; ++ antic_occr->next = NULL; ++ antic_occr->deleted_p = 0; ++ } ++} ++ ++/* Check whether this instruction is supported format. */ ++ ++static void ++hash_scan_set (rtx_insn *insn) ++{ ++ rtx pat = PATTERN (insn); ++ rtx src = SET_SRC (pat); ++ rtx dest = SET_DEST (pat); ++ int regno; ++ struct reg_avail_info *info; ++ ++ /* Don't mess with jumps and nops. */ ++ if (JUMP_P (insn) || set_noop_p (pat)) ++ return; ++ ++ /* TODO: support more format. */ ++ ++ /* Only consider locally anticipatable intructions currently. */ ++ if (REG_P (dest) && REGNO (dest) <= SP_REGNUM) ++ { ++ regno = REGNO (dest); ++ info = ®_avail_info[regno]; ++ ++ if (BLOCK_FOR_INSN (insn) == info->last_bb ++ && info->first_set == DF_INSN_LUID (insn) ++ && info->first_use >= info->first_set) ++ { ++ /* Only support immediate input currently because ++ this is bugzilla case. */ ++ if (CONST_INT_P (src) || CONST_DOUBLE_P (src)) ++ insert_expr_in_table (PATTERN (insn), insn); ++ } ++ } ++} ++ ++/* Record register first use information for REGNO in INSN. ++ ++ first_use records the first place in the block where the register ++ is used and is used to compute "anticipatability". ++ ++ last_bb records the block for which first_use is valid, ++ as a quick test to invalidate them. */ ++ ++static void ++record_first_reg_use_info (rtx_insn *insn, int regno) ++{ ++ struct reg_avail_info *info = ®_avail_info[regno]; ++ int luid = DF_INSN_LUID (insn); ++ ++ if (info->last_bb != BLOCK_FOR_INSN (insn)) ++ { ++ info->last_bb = BLOCK_FOR_INSN (insn); ++ info->first_use = luid; ++ /* Set the value to record the using is former than setting. */ ++ info->first_set = luid + 1; ++ } ++} ++ ++/* Called from compute_hash_table via note_stores to handle one ++ SET or CLOBBER in an insn. DATA is really the instruction in which ++ the SET is taking place. */ ++ ++static void ++record_first_use_info (rtx *dest, void *data) ++{ ++ rtx_insn *last_set_insn = static_cast<rtx_insn*> (data); ++ int i, j; ++ enum rtx_code code; ++ const char *fmt; ++ rtx x = *dest; ++ ++ if (x == 0) ++ return; ++ ++ code = GET_CODE (x); ++ if (REG_P (x) && REGNO (x) <= SP_REGNUM) ++ { ++ record_first_reg_use_info (last_set_insn, REGNO (x)); ++ /* DF and DI mode may use two registers. */ ++ if (GET_MODE_SIZE (GET_MODE (x)) == 8) ++ record_first_reg_use_info (last_set_insn, REGNO (x) + 1); ++ } ++ ++ for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--) ++ { ++ if (fmt[i] == 'e') ++ record_first_use_info (&XEXP (x, i), data); ++ else if (fmt[i] == 'E') ++ for (j = 0; j < XVECLEN (x, i); j++) ++ record_first_use_info (&XVECEXP (x, i, j), data); ++ } ++} ++ ++/* Record register first/block set information for REGNO in INSN. ++ ++ first_set records the first place in the block where the register ++ is set and is used to compute "anticipatability". ++ ++ last_bb records the block for which first_set is valid, ++ as a quick test to invalidate them. */ ++ ++static void ++record_first_reg_set_info (rtx_insn *insn, int regno) ++{ ++ struct reg_avail_info *info = ®_avail_info[regno]; ++ int luid = DF_INSN_LUID (insn); ++ ++ if (info->last_bb != BLOCK_FOR_INSN (insn)) ++ { ++ info->last_bb = BLOCK_FOR_INSN (insn); ++ info->first_set = luid; ++ /* Set the value to record the using is later than setting. */ ++ info->first_use = luid + 1; ++ } ++} ++ ++/* Called from compute_hash_table via note_stores to handle one ++ SET or CLOBBER in an insn. DATA is really the instruction in which ++ the SET is taking place. */ ++ ++static void ++record_first_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, void *data) ++{ ++ rtx_insn *last_set_insn = static_cast<rtx_insn *> (data); ++ ++ if (GET_CODE (dest) == SUBREG) ++ dest = SUBREG_REG (dest); ++ ++ if (REG_P (dest) && REGNO (dest) <= SP_REGNUM) ++ { ++ record_first_reg_set_info (last_set_insn, REGNO (dest)); ++ if (GET_MODE_SIZE (GET_MODE (dest)) == 8) ++ record_first_reg_set_info (last_set_insn, REGNO (dest) + 1); ++ } ++} ++ ++/* Build hash table for supported format instructions. ++ Only consider if the instruction is anticipatable in the basic block here. ++ We postpone the def-use check until hoisting. */ ++ ++static void ++compute_hash_table (void) ++{ ++ basic_block bb; ++ int i; ++ ++ /* We only take care hard registers. */ ++ reg_avail_info = ++ (struct reg_avail_info *) xmalloc (sizeof (struct reg_avail_info) * ++ (SP_REGNUM + 1)); ++ ++ for (i = 0; i < 32; i++) ++ reg_avail_info[i].last_bb = NULL; ++ ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ rtx_insn *insn; ++ ++ /* Do not hoist instrucion from block which has more ++ than one predecessor. */ ++ if (EDGE_COUNT (bb->preds) > 1) ++ continue; ++ ++ FOR_BB_INSNS (bb, insn) ++ { ++ if (!NONDEBUG_INSN_P (insn)) ++ continue; ++ ++ /* Construct a caller save register barrier. We cannot hoist the ++ instruction over a function call which sets caller save ++ registers. */ ++ if (CALL_P (insn)) ++ { ++ for (i = 0; i <= SP_REGNUM; i++) ++ if (call_used_regs[i]) ++ record_first_reg_use_info (insn, i); ++ } ++ ++ note_uses (&PATTERN (insn), record_first_use_info, insn); ++ note_stores (PATTERN (insn), record_first_set_info, insn); ++ } ++ ++ /* Build the hash table. */ ++ FOR_BB_INSNS (bb, insn) ++ if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == SET) ++ hash_scan_set (insn); ++ } ++} ++ ++/* Hoist instructions in this slot if possible. */ ++int ++nds32_find_gcse_expr_table (expr **slot, void *data ATTRIBUTE_UNUSED) ++{ ++ struct expr *exprs = *slot; ++ struct occr *occr; ++ rtx_insn *insn = NULL; ++ rtx_insn *last_insn; ++ basic_block bb; ++ edge e; ++ unsigned ix; ++ unsigned emit_done; ++ unsigned cover, regno; ++ df_ref use; ++ enum machine_mode mode; ++ ++ if (exprs->count < 2) ++ return 1; ++ ++ bitmap_vector_clear (hoist_vbeout, last_basic_block_for_fn (cfun)); ++ bitmap_vector_clear (hoist_vbein, last_basic_block_for_fn (cfun)); ++ ++ /* Set the bit for this slot. */ ++ occr = exprs->antic_occr; ++ while (occr) ++ { ++ insn = occr->insn; ++ bb = BLOCK_FOR_INSN (insn); ++ if (!occr->deleted_p) ++ bitmap_set_bit (hoist_vbein[bb->index], 0); ++ occr = occr->next; ++ } ++ ++ /* Try to hoist code for each basic block. */ ++ FOR_EACH_BB_REVERSE_FN (bb, cfun) ++ { ++ if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun)) ++ bitmap_intersection_of_succs (hoist_vbeout[bb->index], hoist_vbein, bb); ++ ++ if (bitmap_bit_p (hoist_vbeout[bb->index], 0) ++ && EDGE_COUNT (bb->succs) > 1) ++ { ++ emit_done = 0; ++ cover = FALSE; ++ for (e = NULL, ix = 0; ix < EDGE_COUNT (bb->succs); ix++) ++ { ++ e = EDGE_SUCC (bb, ix); ++ if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)) ++ continue; ++ occr = exprs->antic_occr; ++ while (occr) ++ { ++ insn = occr->insn; ++ if (!occr->deleted_p && e->dest == BLOCK_FOR_INSN (insn)) ++ break; ++ occr = occr->next; ++ } ++ ++ gcc_assert (insn != NULL); ++ ++ if (!emit_done) ++ { ++ last_insn = BB_END (bb); ++ /* Check the defined register is not used by the last ++ instruction of the previos block.*/ ++ regno = REGNO (SET_DEST (PATTERN (insn))); ++ mode = GET_MODE (SET_DEST (PATTERN (insn))); ++ FOR_EACH_INSN_USE (use, last_insn) ++ { ++ if (DF_REF_REGNO (use) == regno ++ || regno_clobbered_p (regno, last_insn, mode, 2)) ++ { ++ cover = TRUE; ++ break; ++ } ++ } ++ ++ /* TODO: support more format. */ ++ if (cover) ++ break; ++ else if (JUMP_P (last_insn)) ++ { ++ emit_insn_before_noloc (PATTERN (insn), last_insn, bb); ++ emit_done = TRUE; ++ } ++ else ++ break; ++ } ++ ++ if (emit_done) ++ { ++ delete_insn (insn); ++ occr->deleted_p = TRUE; ++ } ++ } ++ } ++ } ++ return 1; ++} ++ ++static int ++hoist_code (void) ++{ ++ hoist_vbein = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), 1); ++ hoist_vbeout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), 1); ++ ++ expr_table->traverse <void *, nds32_find_gcse_expr_table> (NULL); ++ ++ sbitmap_vector_free (hoist_vbein); ++ sbitmap_vector_free (hoist_vbeout); ++ ++ return 0; ++} ++ ++ ++static unsigned int ++nds32_gcse_opt (void) ++{ ++ ++ if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1) ++ return 0; ++ /* Allocate memory for this pass. ++ Also computes and initializes the insns' CUIDs. */ ++ alloc_mem (); ++ ++ df_chain_add_problem (DF_DU_CHAIN); ++ df_insn_rescan_all (); ++ df_analyze (); ++ ++ compute_hash_table (); ++ ++ if (dump_file) ++ dump_hash_table (dump_file); ++ ++ hoist_code (); ++ ++ df_insn_rescan_all (); ++ free_mem (); ++ return 0; ++} ++ ++const pass_data pass_data_nds32_gcse_opt = ++{ ++ RTL_PASS, /* type */ ++ "gcse_opt", /* name */ ++ OPTGROUP_NONE, /* optinfo_flags */ ++ TV_MACH_DEP, /* tv_id */ ++ 0, /* properties_required */ ++ 0, /* properties_provided */ ++ 0, /* properties_destroyed */ ++ 0, /* todo_flags_start */ ++ 0, /* todo_flags_finish */ ++}; ++ ++class pass_nds32_gcse_opt : public rtl_opt_pass ++{ ++public: ++ pass_nds32_gcse_opt (gcc::context *ctxt) ++ : rtl_opt_pass (pass_data_nds32_gcse_opt, ctxt) ++ {} ++ ++ /* opt_pass methods: */ ++ bool gate (function *) { return TARGET_GCSE_OPT; } ++ unsigned int execute (function *) { return nds32_gcse_opt (); } ++}; ++ ++rtl_opt_pass * ++make_pass_nds32_gcse_opt (gcc::context *ctxt) ++{ ++ return new pass_nds32_gcse_opt (ctxt); ++} ++ ++/* ------------------------------------------------------------------------ */ +diff --git a/gcc/config/nds32/nds32-graywolf.md b/gcc/config/nds32/nds32-graywolf.md +new file mode 100644 +index 0000000..f9ddbd8 +--- /dev/null ++++ b/gcc/config/nds32/nds32-graywolf.md +@@ -0,0 +1,471 @@ ++;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler ++;; Copyright (C) 2012-2016 Free Software Foundation, Inc. ++;; Contributed by Andes Technology Corporation. ++;; ++;; This file is part of GCC. ++;; ++;; GCC is free software; you can redistribute it and/or modify it ++;; under the terms of the GNU General Public License as published ++;; by the Free Software Foundation; either version 3, or (at your ++;; option) any later version. ++;; ++;; GCC is distributed in the hope that it will be useful, but WITHOUT ++;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++;; License for more details. ++;; ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; <http://www.gnu.org/licenses/>. ++ ++;; ------------------------------------------------------------------------ ++;; Define Graywolf pipeline settings. ++;; ------------------------------------------------------------------------ ++ ++(define_automaton "nds32_graywolf_machine") ++ ++(define_cpu_unit "gw_ii_0" "nds32_graywolf_machine") ++(define_cpu_unit "gw_ii_1" "nds32_graywolf_machine") ++(define_cpu_unit "gw_ex_p0" "nds32_graywolf_machine") ++(define_cpu_unit "gw_mm_p0" "nds32_graywolf_machine") ++(define_cpu_unit "gw_wb_p0" "nds32_graywolf_machine") ++(define_cpu_unit "gw_ex_p1" "nds32_graywolf_machine") ++(define_cpu_unit "gw_mm_p1" "nds32_graywolf_machine") ++(define_cpu_unit "gw_wb_p1" "nds32_graywolf_machine") ++(define_cpu_unit "gw_iq_p2" "nds32_graywolf_machine") ++(define_cpu_unit "gw_rf_p2" "nds32_graywolf_machine") ++(define_cpu_unit "gw_e1_p2" "nds32_graywolf_machine") ++(define_cpu_unit "gw_e2_p2" "nds32_graywolf_machine") ++(define_cpu_unit "gw_e3_p2" "nds32_graywolf_machine") ++(define_cpu_unit "gw_e4_p2" "nds32_graywolf_machine") ++ ++(define_reservation "gw_ii" "gw_ii_0 | gw_ii_1") ++(define_reservation "gw_ex" "gw_ex_p0 | gw_ex_p1") ++(define_reservation "gw_mm" "gw_mm_p0 | gw_mm_p1") ++(define_reservation "gw_wb" "gw_wb_p0 | gw_wb_p1") ++ ++(define_reservation "gw_ii_all" "gw_ii_0 + gw_ii_1") ++ ++(define_insn_reservation "nds_gw_unknown" 1 ++ (and (eq_attr "type" "unknown") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_ex, gw_mm, gw_wb") ++ ++(define_insn_reservation "nds_gw_misc" 1 ++ (and (eq_attr "type" "misc") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_ex, gw_mm, gw_wb") ++ ++(define_insn_reservation "nds_gw_mmu" 1 ++ (and (eq_attr "type" "mmu") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_ex, gw_mm, gw_wb") ++ ++(define_insn_reservation "nds_gw_alu" 1 ++ (and (and (eq_attr "type" "alu") ++ (match_test "!nds32::movd44_insn_p (insn)")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_ex, gw_mm, gw_wb") ++ ++(define_insn_reservation "nds_gw_movd44" 1 ++ (and (and (eq_attr "type" "alu") ++ (match_test "nds32::movd44_insn_p (insn)")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_1, gw_ex, gw_mm, gw_wb") ++ ++(define_insn_reservation "nds_gw_alu_shift" 1 ++ (and (eq_attr "type" "alu_shift") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_ex*2, gw_mm, gw_wb") ++ ++(define_insn_reservation "nds_gw_pbsad" 1 ++ (and (eq_attr "type" "pbsad") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_ex*3, gw_mm, gw_wb") ++ ++(define_insn_reservation "nds_gw_pbsada" 1 ++ (and (eq_attr "type" "pbsada") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_ex*3, gw_mm, gw_wb") ++ ++(define_insn_reservation "nds_gw_load" 1 ++ (and (and (eq_attr "type" "load") ++ (match_test "!nds32::post_update_insn_p (insn)")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_1, gw_ex_p1, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_load_2w" 1 ++ (and (and (eq_attr "type" "load") ++ (match_test "nds32::post_update_insn_p (insn)")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_all, gw_ex_p1, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_store" 1 ++ (and (and (eq_attr "type" "store") ++ (match_test "!nds32::store_offset_reg_p (insn)")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_1, gw_ex_p1, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_store_3r" 1 ++ (and (and (eq_attr "type" "store") ++ (match_test "nds32::store_offset_reg_p (insn)")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_all, gw_ex_p1, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_load_multiple_1" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "1")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_1, gw_ex_p1, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_load_multiple_2" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "2")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_1, gw_ex_p1*2, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_load_multiple_3" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "3")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_1, gw_ex_p1*3, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_load_multiple_4" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "4")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_load_multiple_5" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "5")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_load_multiple_6" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "6")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_load_multiple_7" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "7")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_load_multiple_8" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "8")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_load_multiple_12" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "12")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_store_multiple_1" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "1")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_1, gw_ex_p1, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_store_multiple_2" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "2")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_1, gw_ex_p1*2, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_store_multiple_3" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "3")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_1, gw_ex_p1*3, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_store_multiple_4" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "4")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_store_multiple_5" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "5")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_store_multiple_6" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "6")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_store_multiple_7" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "7")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_store_multiple_8" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "8")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_store_multiple_12" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "12")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_mul_fast1" 1 ++ (and (match_test "nds32_mul_config == MUL_TYPE_FAST_1") ++ (and (eq_attr "type" "mul") ++ (eq_attr "pipeline_model" "graywolf"))) ++ "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0") ++ ++(define_insn_reservation "nds_gw_mul_fast2" 1 ++ (and (match_test "nds32_mul_config == MUL_TYPE_FAST_2") ++ (and (eq_attr "type" "mul") ++ (eq_attr "pipeline_model" "graywolf"))) ++ "gw_ii_0, gw_ex_p0*2, gw_mm_p0, gw_wb_p0") ++ ++(define_insn_reservation "nds_gw_mul_slow" 1 ++ (and (match_test "nds32_mul_config == MUL_TYPE_SLOW") ++ (and (eq_attr "type" "mul") ++ (eq_attr "pipeline_model" "graywolf"))) ++ "gw_ii_0, gw_ex_p0*4, gw_mm_p0, gw_wb_p0") ++ ++(define_insn_reservation "nds_gw_mac_fast1" 1 ++ (and (match_test "nds32_mul_config == MUL_TYPE_FAST_1") ++ (and (eq_attr "type" "mac") ++ (eq_attr "pipeline_model" "graywolf"))) ++ "gw_ii_all, gw_ex_p0, gw_mm_p0, gw_wb_p0") ++ ++(define_insn_reservation "nds_gw_mac_fast2" 1 ++ (and (match_test "nds32_mul_config == MUL_TYPE_FAST_2") ++ (and (eq_attr "type" "mac") ++ (eq_attr "pipeline_model" "graywolf"))) ++ "gw_ii_all, gw_ex_p0*2, gw_mm_p0, gw_wb_p0") ++ ++(define_insn_reservation "nds_gw_mac_slow" 1 ++ (and (match_test "nds32_mul_config == MUL_TYPE_SLOW") ++ (and (eq_attr "type" "mac") ++ (eq_attr "pipeline_model" "graywolf"))) ++ "gw_ii_all, gw_ex_p0*4, gw_mm_p0, gw_wb_p0") ++ ++(define_insn_reservation "nds_gw_div" 1 ++ (and (and (eq_attr "type" "div") ++ (match_test "!nds32::divmod_p (insn)")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_0, gw_ex_p0*4, gw_mm_p0, gw_wb_p0") ++ ++(define_insn_reservation "nds_gw_div_2w" 1 ++ (and (and (eq_attr "type" "div") ++ (match_test "nds32::divmod_p (insn)")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_all, gw_ex_p0*4, gw_mm_p0, gw_wb_p0") ++ ++(define_insn_reservation "nds_gw_branch" 1 ++ (and (eq_attr "type" "branch") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0") ++ ++(define_insn_reservation "nds_gw_dsp_alu" 1 ++ (and (eq_attr "type" "dalu") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_ex, gw_mm, gw_wb") ++ ++(define_insn_reservation "nds_gw_dsp_alu64" 1 ++ (and (eq_attr "type" "dalu64") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_all, gw_ex_p0, gw_mm_p0, gw_wb_p0") ++ ++(define_insn_reservation "nds_gw_dsp_alu_round" 1 ++ (and (eq_attr "type" "daluround") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0") ++ ++(define_insn_reservation "nds_gw_dsp_cmp" 1 ++ (and (eq_attr "type" "dcmp") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0") ++ ++(define_insn_reservation "nds_gw_dsp_clip" 1 ++ (and (eq_attr "type" "dclip") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0") ++ ++(define_insn_reservation "nds_gw_dsp_mul" 1 ++ (and (eq_attr "type" "dmul") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0") ++ ++(define_insn_reservation "nds_gw_dsp_mac" 1 ++ (and (eq_attr "type" "dmac") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_all, gw_ex_p0, gw_mm_p0, gw_wb_p0") ++ ++(define_insn_reservation "nds_gw_dsp_insb" 1 ++ (and (eq_attr "type" "dinsb") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0") ++ ++(define_insn_reservation "nds_gw_dsp_pack" 1 ++ (and (eq_attr "type" "dpack") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0") ++ ++(define_insn_reservation "nds_gw_dsp_bpick" 1 ++ (and (eq_attr "type" "dbpick") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0") ++ ++(define_insn_reservation "nds_gw_dsp_wext" 1 ++ (and (eq_attr "type" "dwext") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_all, gw_ex_p0, gw_mm_p0, gw_wb_p0") ++ ++(define_insn_reservation "nds_gw_fpu_alu" 4 ++ (and (eq_attr "type" "falu") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2, gw_e3_p2, gw_e4_p2") ++ ++(define_insn_reservation "nds_gw_fpu_muls" 4 ++ (and (eq_attr "type" "fmuls") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2, gw_e3_p2, gw_e4_p2") ++ ++(define_insn_reservation "nds_gw_fpu_muld" 4 ++ (and (eq_attr "type" "fmuld") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2*2, gw_e3_p2, gw_e4_p2") ++ ++(define_insn_reservation "nds_gw_fpu_macs" 4 ++ (and (eq_attr "type" "fmacs") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2*3, gw_e3_p2, gw_e4_p2") ++ ++(define_insn_reservation "nds_gw_fpu_macd" 4 ++ (and (eq_attr "type" "fmacd") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2*4, gw_e3_p2, gw_e4_p2") ++ ++(define_insn_reservation "nds_gw_fpu_divs" 4 ++ (and (ior (eq_attr "type" "fdivs") ++ (eq_attr "type" "fsqrts")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2*14, gw_e3_p2, gw_e4_p2") ++ ++(define_insn_reservation "nds_gw_fpu_divd" 4 ++ (and (ior (eq_attr "type" "fdivd") ++ (eq_attr "type" "fsqrtd")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2*28, gw_e3_p2, gw_e4_p2") ++ ++(define_insn_reservation "nds_gw_fpu_fast_alu" 2 ++ (and (ior (eq_attr "type" "fcmp") ++ (ior (eq_attr "type" "fabs") ++ (ior (eq_attr "type" "fcpy") ++ (eq_attr "type" "fcmov")))) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2, gw_e3_p2, gw_e4_p2") ++ ++(define_insn_reservation "nds_gw_fpu_fmtsr" 1 ++ (and (eq_attr "type" "fmtsr") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2, gw_e3_p2, gw_e4_p2") ++ ++(define_insn_reservation "nds_gw_fpu_fmtdr" 1 ++ (and (eq_attr "type" "fmtdr") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_ii+gw_iq_p2, gw_iq_p2+gw_rf_p2, gw_rf_p2+gw_e1_p2, gw_e1_p2+gw_e2_p2, gw_e2_p2+gw_e3_p2, gw_e3_p2+gw_e4_p2, gw_e4_p2") ++ ++(define_insn_reservation "nds_gw_fpu_fmfsr" 1 ++ (and (eq_attr "type" "fmfsr") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2, gw_e3_p2, gw_e4_p2") ++ ++(define_insn_reservation "nds_gw_fpu_fmfdr" 1 ++ (and (eq_attr "type" "fmfdr") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_ii+gw_iq_p2, gw_iq_p2+gw_rf_p2, gw_rf_p2+gw_e1_p2, gw_e1_p2+gw_e2_p2, gw_e2_p2+gw_e3_p2, gw_e3_p2+gw_e4_p2, gw_e4_p2") ++ ++(define_insn_reservation "nds_gw_fpu_load" 3 ++ (and (eq_attr "type" "fload") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2, gw_e3_p2, gw_e4_p2") ++ ++(define_insn_reservation "nds_gw_fpu_store" 1 ++ (and (eq_attr "type" "fstore") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2, gw_e3_p2, gw_e4_p2") ++ ++;; FPU_ADDR_OUT -> FPU_ADDR_IN ++;; Main pipeline rules don't need this because those default latency is 1. ++(define_bypass 1 ++ "nds_gw_fpu_load, nds_gw_fpu_store" ++ "nds_gw_fpu_load, nds_gw_fpu_store" ++ "nds32_gw_ex_to_ex_p" ++) ++ ++;; LD, MUL, MAC, DIV, DALU64, DMUL, DMAC, DALUROUND, DBPICK, DWEXT ++;; -> ALU, ALU_SHIFT_Rb, PBSAD, PBSADA_RaRb, MOVD44, MUL, MAC_RaRb, DIV, ADDR_IN, BR, MMU, ++;; DALU, DALUROUND, DMUL, DMAC_RaRb, DPACK, DINSB, DCMP, DCLIP, WEXT_O, BPICK_RaRb ++(define_bypass 2 ++ "nds_gw_load, nds_gw_load_2w,\ ++ nds_gw_mul_fast1, nds_gw_mul_fast2, nds_gw_mul_slow,\ ++ nds_gw_mac_fast1, nds_gw_mac_fast2, nds_gw_mac_slow,\ ++ nds_gw_div, nds_gw_div_2w,\ ++ nds_gw_dsp_alu64, nds_gw_dsp_mul, nds_gw_dsp_mac,\ ++ nds_gw_dsp_alu_round, nds_gw_dsp_bpick, nds_gw_dsp_wext" ++ "nds_gw_alu, nds_gw_movd44, nds_gw_alu_shift,\ ++ nds_gw_pbsad, nds_gw_pbsada,\ ++ nds_gw_mul_fast1, nds_gw_mul_fast2, nds_gw_mul_slow,\ ++ nds_gw_mac_fast1, nds_gw_mac_fast2, nds_gw_mac_slow,\ ++ nds_gw_branch,\ ++ nds_gw_div, nds_gw_div_2w,\ ++ nds_gw_load, nds_gw_load_2w, nds_gw_store, nds_gw_store_3r,\ ++ nds_gw_load_multiple_1,nds_gw_load_multiple_2, nds_gw_load_multiple_3,\ ++ nds_gw_load_multiple_4,nds_gw_load_multiple_5, nds_gw_load_multiple_6,\ ++ nds_gw_load_multiple_7,nds_gw_load_multiple_8, nds_gw_load_multiple_12,\ ++ nds_gw_store_multiple_1,nds_gw_store_multiple_2, nds_gw_store_multiple_3,\ ++ nds_gw_store_multiple_4,nds_gw_store_multiple_5, nds_gw_store_multiple_6,\ ++ nds_gw_store_multiple_7,nds_gw_store_multiple_8, nds_gw_store_multiple_12,\ ++ nds_gw_mmu,\ ++ nds_gw_dsp_alu, nds_gw_dsp_alu_round,\ ++ nds_gw_dsp_mul, nds_gw_dsp_mac, nds_gw_dsp_pack,\ ++ nds_gw_dsp_insb, nds_gw_dsp_cmp, nds_gw_dsp_clip,\ ++ nds_gw_dsp_wext, nds_gw_dsp_bpick" ++ "nds32_gw_mm_to_ex_p" ++) ++ ++;; LMW(N, N) ++;; -> ALU, ALU_SHIFT_Rb, PBSAD, PBSADA_RaRb, MOVD44, MUL, MAC_RaRb, DIV, ADDR_IN, BR, MMU ++;; DALU, DALUROUND, DMUL, DMAC_RaRb, DPACK, DINSB, DCMP, DCLIP, WEXT_O, BPICK_RaRb ++(define_bypass 2 ++ "nds_gw_load_multiple_1,nds_gw_load_multiple_2, nds_gw_load_multiple_3,\ ++ nds_gw_load_multiple_4,nds_gw_load_multiple_5, nds_gw_load_multiple_6,\ ++ nds_gw_load_multiple_7,nds_gw_load_multiple_8, nds_gw_load_multiple_12" ++ "nds_gw_alu, nds_gw_movd44, nds_gw_alu_shift,\ ++ nds_gw_pbsad, nds_gw_pbsada,\ ++ nds_gw_mul_fast1, nds_gw_mul_fast2, nds_gw_mul_slow,\ ++ nds_gw_mac_fast1, nds_gw_mac_fast2, nds_gw_mac_slow,\ ++ nds_gw_branch,\ ++ nds_gw_div, nds_gw_div_2w,\ ++ nds_gw_load, nds_gw_load_2w, nds_gw_store, nds_gw_store_3r,\ ++ nds_gw_load_multiple_1,nds_gw_load_multiple_2, nds_gw_load_multiple_3,\ ++ nds_gw_load_multiple_4,nds_gw_load_multiple_5, nds_gw_load_multiple_6,\ ++ nds_gw_load_multiple_7,nds_gw_load_multiple_8, nds_gw_load_multiple_12,\ ++ nds_gw_store_multiple_1,nds_gw_store_multiple_2, nds_gw_store_multiple_3,\ ++ nds_gw_store_multiple_4,nds_gw_store_multiple_5, nds_gw_store_multiple_6,\ ++ nds_gw_store_multiple_7,nds_gw_store_multiple_8, nds_gw_store_multiple_12,\ ++ nds_gw_mmu,\ ++ nds_gw_dsp_alu, nds_gw_dsp_alu_round,\ ++ nds_gw_dsp_mul, nds_gw_dsp_mac, nds_gw_dsp_pack,\ ++ nds_gw_dsp_insb, nds_gw_dsp_cmp, nds_gw_dsp_clip,\ ++ nds_gw_dsp_wext, nds_gw_dsp_bpick" ++ "nds32_gw_last_load_to_ex_p" ++) +diff --git a/gcc/config/nds32/nds32-intrinsic.c b/gcc/config/nds32/nds32-intrinsic.c +index fabf262..7547fb1 100644 +--- a/gcc/config/nds32/nds32-intrinsic.c ++++ b/gcc/config/nds32/nds32-intrinsic.c +@@ -24,210 +24,1867 @@ + #include "system.h" + #include "coretypes.h" + #include "backend.h" +-#include "target.h" +-#include "rtl.h" + #include "tree.h" +-#include "optabs.h" /* For GEN_FCN. */ +-#include "diagnostic-core.h" ++#include "rtl.h" ++#include "df.h" ++#include "alias.h" + #include "stor-layout.h" ++#include "varasm.h" ++#include "calls.h" ++#include "regs.h" ++#include "insn-config.h" /* Required by recog.h. */ ++#include "conditions.h" ++#include "output.h" ++#include "insn-attr.h" /* For DFA state_t. */ ++#include "insn-codes.h" /* For CODE_FOR_xxx. */ ++#include "reload.h" /* For push_reload(). */ ++#include "flags.h" ++#include "insn-config.h" ++#include "expmed.h" ++#include "dojump.h" ++#include "explow.h" ++#include "emit-rtl.h" ++#include "stmt.h" + #include "expr.h" ++#include "recog.h" ++#include "diagnostic-core.h" ++#include "cfgrtl.h" ++#include "cfganal.h" ++#include "lcm.h" ++#include "cfgbuild.h" ++#include "cfgcleanup.h" ++#include "tm_p.h" ++#include "tm-constrs.h" ++#include "optabs.h" /* For GEN_FCN. */ ++#include "target.h" + #include "langhooks.h" /* For add_builtin_function(). */ ++#include "builtins.h" + + /* ------------------------------------------------------------------------ */ + +-/* Function to expand builtin function for +- '[(unspec_volatile [(reg)])]'. */ ++/* Read the requested argument from the EXP given by INDEX. ++ Return the value as an rtx. */ ++static rtx ++nds32_read_argument (tree exp, unsigned int index) ++{ ++ return expand_normal (CALL_EXPR_ARG (exp, index)); ++} ++ ++/* Return a legitimate rtx for instruction ICODE's return value. Use TARGET ++ if it's not null, has the right mode, and satisfies operand 0's ++ predicate. */ ++static rtx ++nds32_legitimize_target (enum insn_code icode, rtx target) ++{ ++ enum machine_mode mode = insn_data[icode].operand[0].mode; ++ ++ if (! target ++ || GET_MODE (target) != mode ++ || ! (*insn_data[icode].operand[0].predicate) (target, mode)) ++ return gen_reg_rtx (mode); ++ else ++ return target; ++} ++ ++/* Given that ARG is being passed as operand OPNUM to instruction ICODE, ++ check whether ARG satisfies the operand's constraints. If it doesn't, ++ copy ARG to a temporary register and return that. Otherwise return ARG ++ itself. */ + static rtx +-nds32_expand_builtin_null_ftype_reg (enum insn_code icode, +- tree exp, rtx target) ++nds32_legitimize_argument (enum insn_code icode, int opnum, rtx arg) ++{ ++ enum machine_mode mode = insn_data[icode].operand[opnum].mode; ++ ++ if ((*insn_data[icode].operand[opnum].predicate) (arg, mode)) ++ return arg; ++ else if (VECTOR_MODE_P (mode) && CONST_INT_P (arg)) ++ { ++ /* Handle CONST_INT covert to CONST_VECTOR. */ ++ int nunits = GET_MODE_NUNITS (mode); ++ int i, shift = 0; ++ rtvec v = rtvec_alloc (nunits); ++ int val = INTVAL (arg); ++ enum machine_mode val_mode = (mode == V4QImode) ? QImode : HImode; ++ int shift_acc = (val_mode == QImode) ? 8 : 16; ++ int mask = (val_mode == QImode) ? 0xff : 0xffff; ++ int tmp_val = val; ++ ++ if (TARGET_BIG_ENDIAN) ++ for (i = 0; i < nunits; i++) ++ { ++ tmp_val = (val >> shift) & mask; ++ RTVEC_ELT (v, nunits - i - 1) = gen_int_mode (tmp_val, val_mode); ++ shift += shift_acc; ++ } ++ else ++ for (i = 0; i < nunits; i++) ++ { ++ tmp_val = (val >> shift) & mask; ++ RTVEC_ELT (v, i) = gen_int_mode (tmp_val, val_mode); ++ shift += shift_acc; ++ } ++ ++ return copy_to_mode_reg (mode, gen_rtx_CONST_VECTOR (mode, v)); ++ } ++ else ++ { ++ rtx tmp_rtx = gen_reg_rtx (mode); ++ convert_move (tmp_rtx, arg, false); ++ return tmp_rtx; ++ } ++} ++ ++/* Return true if OPVAL can be used for operand OPNUM of instruction ICODE. ++ The instruction should require a constant operand of some sort. The ++ function prints an error if OPVAL is not valid. */ ++static int ++nds32_check_constant_argument (enum insn_code icode, int opnum, rtx opval, ++ const char *name) + { +- /* Mapping: +- ops[0] <--> value0 <--> arg0 */ +- struct expand_operand ops[1]; +- tree arg0; +- rtx value0; ++ if (GET_CODE (opval) != CONST_INT) ++ { ++ error ("invalid argument to built-in function %s", name); ++ return false; ++ } ++ if (! (*insn_data[icode].operand[opnum].predicate) (opval, VOIDmode)) ++ { ++ error ("constant argument out of range for %s", name); ++ ++ return false; ++ } ++ return true; ++} + +- /* Grab the incoming arguments and extract its rtx. */ +- arg0 = CALL_EXPR_ARG (exp, 0); +- value0 = expand_normal (arg0); ++/* Expand builtins that return target. */ ++static rtx ++nds32_expand_noarg_builtin (enum insn_code icode, rtx target) ++{ ++ rtx pat; + +- /* Create operands. */ +- create_input_operand (&ops[0], value0, TYPE_MODE (TREE_TYPE (arg0))); ++ target = nds32_legitimize_target (icode, target); + +- /* Emit new instruction. */ +- if (!maybe_expand_insn (icode, 1, ops)) +- error ("invalid argument to built-in function"); ++ /* Emit and return the new instruction. */ ++ pat = GEN_FCN (icode) (target); ++ if (! pat) ++ return NULL_RTX; + ++ emit_insn (pat); + return target; + } + +-/* Function to expand builtin function for +- '[(set (reg) (unspec_volatile [(imm)]))]'. */ ++/* Expand builtins that take one operand. */ + static rtx +-nds32_expand_builtin_reg_ftype_imm (enum insn_code icode, +- tree exp, rtx target) ++nds32_expand_unop_builtin (enum insn_code icode, tree exp, rtx target, ++ bool return_p) + { +- /* Mapping: +- ops[0] <--> target <--> exp +- ops[1] <--> value0 <--> arg0 */ +- struct expand_operand ops[2]; +- tree arg0; +- rtx value0; ++ rtx pat; ++ rtx op0 = nds32_read_argument (exp, 0); ++ int op0_num = return_p ? 1 : 0; ++ ++ if (return_p) ++ target = nds32_legitimize_target (icode, target); + +- /* Grab the incoming arguments and extract its rtx. */ +- arg0 = CALL_EXPR_ARG (exp, 0); +- value0 = expand_normal (arg0); ++ op0 = nds32_legitimize_argument (icode, op0_num, op0); + +- /* Create operands. */ +- create_output_operand (&ops[0], target, TYPE_MODE (TREE_TYPE (exp))); +- create_input_operand (&ops[1], value0, TYPE_MODE (TREE_TYPE (arg0))); ++ /* Emit and return the new instruction. */ ++ if (return_p) ++ pat = GEN_FCN (icode) (target, op0); ++ else ++ pat = GEN_FCN (icode) (op0); + +- /* Emit new instruction. */ +- if (!maybe_expand_insn (icode, 2, ops)) +- error ("invalid argument to built-in function"); ++ if (! pat) ++ return NULL_RTX; + ++ emit_insn (pat); + return target; + } + +-/* Function to expand builtin function for +- '[(unspec_volatile [(reg) (imm)])]' pattern. */ ++/* Expand builtins that take one operands and the first is immediate. */ + static rtx +-nds32_expand_builtin_null_ftype_reg_imm (enum insn_code icode, +- tree exp, rtx target) +-{ +- /* Mapping: +- ops[0] <--> value0 <--> arg0 +- ops[1] <--> value1 <--> arg1 */ +- struct expand_operand ops[2]; +- tree arg0, arg1; +- rtx value0, value1; +- +- /* Grab the incoming arguments and extract its rtx. */ +- arg0 = CALL_EXPR_ARG (exp, 0); +- arg1 = CALL_EXPR_ARG (exp, 1); +- value0 = expand_normal (arg0); +- value1 = expand_normal (arg1); +- +- /* Create operands. */ +- create_input_operand (&ops[0], value0, TYPE_MODE (TREE_TYPE (arg0))); +- create_input_operand (&ops[1], value1, TYPE_MODE (TREE_TYPE (arg1))); +- +- /* Emit new instruction. */ +- if (!maybe_expand_insn (icode, 2, ops)) +- error ("invalid argument to built-in function"); ++nds32_expand_unopimm_builtin (enum insn_code icode, tree exp, rtx target, ++ bool return_p, const char *name) ++{ ++ rtx pat; ++ rtx op0 = nds32_read_argument (exp, 0); ++ int op0_num = return_p ? 1 : 0; ++ ++ if (return_p) ++ target = nds32_legitimize_target (icode, target); ++ ++ if (!nds32_check_constant_argument (icode, op0_num, op0, name)) ++ return NULL_RTX; ++ ++ op0 = nds32_legitimize_argument (icode, op0_num, op0); + ++ /* Emit and return the new instruction. */ ++ if (return_p) ++ pat = GEN_FCN (icode) (target, op0); ++ else ++ pat = GEN_FCN (icode) (op0); ++ ++ if (! pat) ++ return NULL_RTX; ++ ++ emit_insn (pat); + return target; + } + +-/* ------------------------------------------------------------------------ */ ++/* Expand builtins that take two operands. */ ++static rtx ++nds32_expand_binop_builtin (enum insn_code icode, tree exp, rtx target, ++ bool return_p) ++{ ++ rtx pat; ++ rtx op0 = nds32_read_argument (exp, 0); ++ rtx op1 = nds32_read_argument (exp, 1); ++ int op0_num = return_p ? 1 : 0; ++ int op1_num = return_p ? 2 : 1; + +-void +-nds32_init_builtins_impl (void) ++ if (return_p) ++ target = nds32_legitimize_target (icode, target); ++ ++ op0 = nds32_legitimize_argument (icode, op0_num, op0); ++ op1 = nds32_legitimize_argument (icode, op1_num, op1); ++ ++ /* Emit and return the new instruction. */ ++ if (return_p) ++ pat = GEN_FCN (icode) (target, op0, op1); ++ else ++ pat = GEN_FCN (icode) (op0, op1); ++ ++ if (! pat) ++ return NULL_RTX; ++ ++ emit_insn (pat); ++ return target; ++} ++ ++/* Expand builtins that take two operands and the second is immediate. */ ++static rtx ++nds32_expand_binopimm_builtin (enum insn_code icode, tree exp, rtx target, ++ bool return_p, const char *name) + { +- tree pointer_type_node = build_pointer_type (integer_type_node); ++ rtx pat; ++ rtx op0 = nds32_read_argument (exp, 0); ++ rtx op1 = nds32_read_argument (exp, 1); ++ int op0_num = return_p ? 1 : 0; ++ int op1_num = return_p ? 2 : 1; + +- tree void_ftype_void = build_function_type (void_type_node, +- void_list_node); ++ if (return_p) ++ target = nds32_legitimize_target (icode, target); + +- tree void_ftype_pint = build_function_type_list (void_type_node, +- pointer_type_node, +- NULL_TREE); ++ if (!nds32_check_constant_argument (icode, op1_num, op1, name)) ++ return NULL_RTX; + +- tree int_ftype_int = build_function_type_list (integer_type_node, +- integer_type_node, +- NULL_TREE); ++ op0 = nds32_legitimize_argument (icode, op0_num, op0); ++ op1 = nds32_legitimize_argument (icode, op1_num, op1); + +- tree void_ftype_int_int = build_function_type_list (void_type_node, +- integer_type_node, +- integer_type_node, +- NULL_TREE); ++ /* Emit and return the new instruction. */ ++ if (return_p) ++ pat = GEN_FCN (icode) (target, op0, op1); ++ else ++ pat = GEN_FCN (icode) (op0, op1); + +- /* Cache. */ +- add_builtin_function ("__builtin_nds32_isync", void_ftype_pint, +- NDS32_BUILTIN_ISYNC, +- BUILT_IN_MD, NULL, NULL_TREE); +- add_builtin_function ("__builtin_nds32_isb", void_ftype_void, +- NDS32_BUILTIN_ISB, +- BUILT_IN_MD, NULL, NULL_TREE); ++ if (! pat) ++ return NULL_RTX; + +- /* Register Transfer. */ +- add_builtin_function ("__builtin_nds32_mfsr", int_ftype_int, +- NDS32_BUILTIN_MFSR, +- BUILT_IN_MD, NULL, NULL_TREE); +- add_builtin_function ("__builtin_nds32_mfusr", int_ftype_int, +- NDS32_BUILTIN_MFUSR, +- BUILT_IN_MD, NULL, NULL_TREE); +- add_builtin_function ("__builtin_nds32_mtsr", void_ftype_int_int, +- NDS32_BUILTIN_MTSR, +- BUILT_IN_MD, NULL, NULL_TREE); +- add_builtin_function ("__builtin_nds32_mtusr", void_ftype_int_int, +- NDS32_BUILTIN_MTUSR, +- BUILT_IN_MD, NULL, NULL_TREE); ++ emit_insn (pat); ++ return target; ++} + +- /* Interrupt. */ +- add_builtin_function ("__builtin_nds32_setgie_en", void_ftype_void, +- NDS32_BUILTIN_SETGIE_EN, +- BUILT_IN_MD, NULL, NULL_TREE); +- add_builtin_function ("__builtin_nds32_setgie_dis", void_ftype_void, +- NDS32_BUILTIN_SETGIE_DIS, +- BUILT_IN_MD, NULL, NULL_TREE); ++/* Expand builtins that take three operands. */ ++static rtx ++nds32_expand_triop_builtin (enum insn_code icode, tree exp, rtx target, ++ bool return_p) ++{ ++ rtx pat; ++ rtx op0 = nds32_read_argument (exp, 0); ++ rtx op1 = nds32_read_argument (exp, 1); ++ rtx op2 = nds32_read_argument (exp, 2); ++ int op0_num = return_p ? 1 : 0; ++ int op1_num = return_p ? 2 : 1; ++ int op2_num = return_p ? 3 : 2; ++ ++ if (return_p) ++ target = nds32_legitimize_target (icode, target); ++ ++ op0 = nds32_legitimize_argument (icode, op0_num, op0); ++ op1 = nds32_legitimize_argument (icode, op1_num, op1); ++ op2 = nds32_legitimize_argument (icode, op2_num, op2); ++ ++ /* Emit and return the new instruction. */ ++ if (return_p) ++ pat = GEN_FCN (icode) (target, op0, op1, op2); ++ else ++ pat = GEN_FCN (icode) (op0, op1, op2); ++ ++ if (! pat) ++ return NULL_RTX; ++ ++ emit_insn (pat); ++ return target; ++} ++ ++/* Expand builtins that take three operands and the third is immediate. */ ++static rtx ++nds32_expand_triopimm_builtin (enum insn_code icode, tree exp, rtx target, ++ bool return_p, const char *name) ++{ ++ rtx pat; ++ rtx op0 = nds32_read_argument (exp, 0); ++ rtx op1 = nds32_read_argument (exp, 1); ++ rtx op2 = nds32_read_argument (exp, 2); ++ int op0_num = return_p ? 1 : 0; ++ int op1_num = return_p ? 2 : 1; ++ int op2_num = return_p ? 3 : 2; ++ ++ if (return_p) ++ target = nds32_legitimize_target (icode, target); ++ ++ if (!nds32_check_constant_argument (icode, op2_num, op2, name)) ++ return NULL_RTX; ++ ++ op0 = nds32_legitimize_argument (icode, op0_num, op0); ++ op1 = nds32_legitimize_argument (icode, op1_num, op1); ++ op2 = nds32_legitimize_argument (icode, op2_num, op2); ++ ++ /* Emit and return the new instruction. */ ++ if (return_p) ++ pat = GEN_FCN (icode) (target, op0, op1, op2); ++ else ++ pat = GEN_FCN (icode) (op0, op1, op2); ++ ++ if (! pat) ++ return NULL_RTX; ++ ++ emit_insn (pat); ++ return target; ++} ++ ++/* Expand builtins for load. */ ++static rtx ++nds32_expand_builtin_load (enum insn_code icode, tree exp, rtx target) ++{ ++ /* Load address format is [$ra + $rb], ++ but input arguments not enough, ++ so we need another temp register as $rb. ++ Generating assembly code: ++ movi $temp, 0 ++ llw $rt, [$ra + $temp] */ ++ rtx pat; ++ rtx op0 = nds32_read_argument (exp, 0); ++ rtx addr_helper = gen_reg_rtx (insn_data[icode].operand[1].mode); ++ ++ target = nds32_legitimize_target (icode, target); ++ op0 = nds32_legitimize_argument (icode, 1, op0); ++ ++ /* Emit and return the new instruction. */ ++ pat = GEN_FCN (icode) (target, op0, addr_helper); ++ if (!pat) ++ return NULL_RTX; ++ ++ emit_move_insn (addr_helper, GEN_INT (0)); ++ emit_insn (pat); ++ return target; ++} ++ ++/* Expand builtins for store. */ ++static rtx ++nds32_expand_builtin_store (enum insn_code icode, tree exp, rtx target) ++{ ++ /* Store address format is [$ra + $rb], ++ but input arguments not enough, ++ so we need another temp register as $rb. ++ Generating assembly code: ++ movi $temp, 0 ++ store $rt, [$ra + $temp] */ ++ rtx pat; ++ rtx op0 = nds32_read_argument (exp, 0); ++ rtx op1 = nds32_read_argument (exp, 1); ++ rtx addr_helper = gen_reg_rtx (insn_data[icode].operand[1].mode); ++ ++ op0 = nds32_legitimize_argument (icode, 0, op0); ++ op1 = nds32_legitimize_argument (icode, 2, op1); ++ ++ /* Emit and return the new instruction. */ ++ pat = GEN_FCN (icode) (op0, addr_helper, op1); ++ if (! pat) ++ return NULL_RTX; ++ ++ emit_move_insn (addr_helper, GEN_INT (0)); ++ emit_insn (pat); ++ return target; ++} ++ ++/* Expand cctl builtins. */ ++static rtx ++nds32_expand_cctl_builtin (enum insn_code icode, tree exp, rtx target, ++ bool return_p, const char *name) ++{ ++ rtx pat; ++ rtx op0 = nds32_read_argument (exp, 0); ++ rtx op1 = nds32_read_argument (exp, 1); ++ int op0_num = return_p ? 1 : 0; ++ int op1_num = return_p ? 2 : 1; ++ ++ if (return_p) ++ target = nds32_legitimize_target (icode, target); ++ ++ if (!nds32_check_constant_argument (icode, op0_num, op0, name)) ++ return NULL_RTX; ++ ++ op0 = nds32_legitimize_argument (icode, op0_num, op0); ++ op1 = nds32_legitimize_argument (icode, op1_num, op1); ++ ++ /* Emit and return the new instruction. */ ++ if (icode == CODE_FOR_cctl_idx_write) ++ { ++ /* cctl_idx_write is three argument, ++ so create operand2 for cctl_idx_write pattern. */ ++ rtx op2 = nds32_read_argument (exp, 2); ++ op2 = nds32_legitimize_argument (icode, 2, op2); ++ pat = GEN_FCN (icode) (op0, op1, op2); ++ } ++ else if (return_p) ++ pat = GEN_FCN (icode) (target, op0, op1); ++ else ++ pat = GEN_FCN (icode) (op0, op1); ++ ++ if (! pat) ++ return NULL_RTX; ++ ++ emit_insn (pat); ++ return target; ++} ++ ++/* Expand scw builtins. */ ++static rtx ++nds32_expand_scw_builtin (enum insn_code icode, tree exp, rtx target) ++{ ++ /* SCW address format is [$ra + $rb], but input arguments not enough, ++ so we need another temp register as $rb. ++ Generating assembly code: ++ movi $temp, 0 ++ scw $rt, [$ra + $temp] */ ++ rtx pat; ++ rtx op0 = nds32_read_argument (exp, 0); ++ rtx op1 = nds32_read_argument (exp, 1); ++ rtx addr_helper = gen_reg_rtx (insn_data[icode].operand[1].mode); ++ ++ target = nds32_legitimize_target (icode, target); ++ op0 = nds32_legitimize_argument (icode, 1, op0); ++ op1 = nds32_legitimize_argument (icode, 2, op1); ++ ++ /* Emit and return the new instruction. */ ++ pat = GEN_FCN (icode) (target, op0, addr_helper, target); ++ ++ if (!pat) ++ return NULL_RTX; ++ ++ emit_move_insn (addr_helper, GEN_INT (0)); ++ emit_move_insn (target, op1); ++ emit_insn (pat); ++ return target; + } + ++/* Expand set int priority builtins. */ ++static rtx ++nds32_expand_priority_builtin (enum insn_code icode, tree exp, rtx target, ++ const char *name) ++{ ++ rtx pat; ++ rtx op0 = nds32_read_argument (exp, 0); ++ rtx op1 = nds32_read_argument (exp, 1); ++ ++ /* set_int_priority intrinsic function that two arguments are immediate, ++ so check whether auguments are immedite. */ ++ ++ if (!nds32_check_constant_argument (icode, 0, op0, name)) ++ return NULL_RTX; ++ ++ if (!nds32_check_constant_argument (icode, 1, op1, name)) ++ return NULL_RTX; ++ ++ op0 = nds32_legitimize_argument (icode, 0, op0); ++ op1 = nds32_legitimize_argument (icode, 1, op1); ++ ++ /* Emit and return the new instruction. */ ++ pat = GEN_FCN (icode) (op0, op1); ++ ++ if (! pat) ++ return NULL_RTX; ++ ++ emit_insn (pat); ++ return target; ++} ++ ++struct builtin_description ++{ ++ const enum insn_code icode; ++ const char *name; ++ enum nds32_builtins code; ++ bool return_p; ++}; ++ ++#define NDS32_BUILTIN(code, string, builtin) \ ++ { CODE_FOR_##code, "__nds32__" string, \ ++ NDS32_BUILTIN_##builtin, true }, ++ ++#define NDS32_NO_TARGET_BUILTIN(code, string, builtin) \ ++ { CODE_FOR_##code, "__nds32__" string, \ ++ NDS32_BUILTIN_##builtin, false }, ++ ++/* Intrinsics that no argument, and that return value. */ ++static struct builtin_description bdesc_noarg[] = ++{ ++ NDS32_BUILTIN(unspec_fmfcfg, "fmfcfg", FMFCFG) ++ NDS32_BUILTIN(unspec_fmfcsr, "fmfcsr", FMFCSR) ++ NDS32_BUILTIN(unspec_volatile_rdov, "rdov", RDOV) ++ NDS32_BUILTIN(unspec_get_current_sp, "get_current_sp", GET_CURRENT_SP) ++ NDS32_BUILTIN(unspec_return_address, "return_address", RETURN_ADDRESS) ++ NDS32_BUILTIN(unspec_get_all_pending_int, "get_all_pending_int", ++ GET_ALL_PENDING_INT) ++ NDS32_BUILTIN(unspec_unaligned_feature, "unaligned_feature", ++ UNALIGNED_FEATURE) ++ NDS32_NO_TARGET_BUILTIN(unspec_enable_unaligned, "enable_unaligned", ++ ENABLE_UNALIGNED) ++ NDS32_NO_TARGET_BUILTIN(unspec_disable_unaligned, "disable_unaligned", ++ DISABLE_UNALIGNED) ++}; ++ ++/* Intrinsics that take just one argument. */ ++static struct builtin_description bdesc_1arg[] = ++{ ++ NDS32_BUILTIN(unspec_ssabssi2, "abs", ABS) ++ NDS32_BUILTIN(clzsi2, "clz", CLZ) ++ NDS32_BUILTIN(unspec_clo, "clo", CLO) ++ NDS32_BUILTIN(unspec_wsbh, "wsbh", WSBH) ++ NDS32_BUILTIN(unspec_tlbop_pb, "tlbop_pb",TLBOP_PB) ++ NDS32_BUILTIN(unaligned_load_hw, "unaligned_load_hw", UALOAD_HW) ++ NDS32_BUILTIN(unaligned_loadsi, "unaligned_load_w", UALOAD_W) ++ NDS32_BUILTIN(unaligned_loaddi, "unaligned_load_dw", UALOAD_DW) ++ NDS32_NO_TARGET_BUILTIN(unspec_volatile_isync, "isync", ISYNC) ++ NDS32_NO_TARGET_BUILTIN(unspec_fmtcsr, "fmtcsr", FMTCSR) ++ NDS32_NO_TARGET_BUILTIN(unspec_jr_itoff, "jr_itoff", JR_ITOFF) ++ NDS32_NO_TARGET_BUILTIN(unspec_jr_toff, "jr_toff", JR_TOFF) ++ NDS32_NO_TARGET_BUILTIN(unspec_jral_ton, "jral_ton", JRAL_TON) ++ NDS32_NO_TARGET_BUILTIN(unspec_ret_toff, "ret_toff", RET_TOFF) ++ NDS32_NO_TARGET_BUILTIN(unspec_jral_iton, "jral_iton",JRAL_ITON) ++ NDS32_NO_TARGET_BUILTIN(unspec_tlbop_trd, "tlbop_trd", TLBOP_TRD) ++ NDS32_NO_TARGET_BUILTIN(unspec_tlbop_twr, "tlbop_twr", TLBOP_TWR) ++ NDS32_NO_TARGET_BUILTIN(unspec_tlbop_rwr, "tlbop_rwr", TLBOP_RWR) ++ NDS32_NO_TARGET_BUILTIN(unspec_tlbop_rwlk, "tlbop_rwlk", TLBOP_RWLK) ++ NDS32_NO_TARGET_BUILTIN(unspec_tlbop_unlk, "tlbop_unlk", TLBOP_UNLK) ++ NDS32_NO_TARGET_BUILTIN(unspec_tlbop_inv, "tlbop_inv", TLBOP_INV) ++ NDS32_NO_TARGET_BUILTIN(unspec_ret_itoff, "ret_itoff", RET_ITOFF) ++ NDS32_NO_TARGET_BUILTIN(unspec_set_current_sp, ++ "set_current_sp", SET_CURRENT_SP) ++ NDS32_BUILTIN(kabsv2hi2, "kabs16", KABS16) ++ NDS32_BUILTIN(kabsv2hi2, "v_kabs16", V_KABS16) ++ NDS32_BUILTIN(kabsv4qi2, "kabs8", KABS8) ++ NDS32_BUILTIN(kabsv4qi2, "v_kabs8", V_KABS8) ++ NDS32_BUILTIN(sunpkd810, "sunpkd810", SUNPKD810) ++ NDS32_BUILTIN(sunpkd810, "v_sunpkd810", V_SUNPKD810) ++ NDS32_BUILTIN(sunpkd820, "sunpkd820", SUNPKD820) ++ NDS32_BUILTIN(sunpkd820, "v_sunpkd820", V_SUNPKD820) ++ NDS32_BUILTIN(sunpkd830, "sunpkd830", SUNPKD830) ++ NDS32_BUILTIN(sunpkd830, "v_sunpkd830", V_SUNPKD830) ++ NDS32_BUILTIN(sunpkd831, "sunpkd831", SUNPKD831) ++ NDS32_BUILTIN(sunpkd831, "v_sunpkd831", V_SUNPKD831) ++ NDS32_BUILTIN(zunpkd810, "zunpkd810", ZUNPKD810) ++ NDS32_BUILTIN(zunpkd810, "v_zunpkd810", V_ZUNPKD810) ++ NDS32_BUILTIN(zunpkd820, "zunpkd820", ZUNPKD820) ++ NDS32_BUILTIN(zunpkd820, "v_zunpkd820", V_ZUNPKD820) ++ NDS32_BUILTIN(zunpkd830, "zunpkd830", ZUNPKD830) ++ NDS32_BUILTIN(zunpkd830, "v_zunpkd830", V_ZUNPKD830) ++ NDS32_BUILTIN(zunpkd831, "zunpkd831", ZUNPKD831) ++ NDS32_BUILTIN(zunpkd831, "v_zunpkd831", V_ZUNPKD831) ++ NDS32_BUILTIN(unspec_kabs, "kabs", KABS) ++ NDS32_BUILTIN(unaligned_loadv2hi, "get_unaligned_u16x2", UALOAD_U16) ++ NDS32_BUILTIN(unaligned_loadv2hi, "get_unaligned_s16x2", UALOAD_S16) ++ NDS32_BUILTIN(unaligned_loadv4qi, "get_unaligned_u8x4", UALOAD_U8) ++ NDS32_BUILTIN(unaligned_loadv4qi, "get_unaligned_s8x4", UALOAD_S8) ++}; ++ ++/* Intrinsics that take just one argument. and the argument is immediate. */ ++static struct builtin_description bdesc_1argimm[] = ++{ ++ NDS32_BUILTIN(unspec_volatile_mfsr, "mfsr", MFSR) ++ NDS32_BUILTIN(unspec_volatile_mfusr, "mfsr", MFUSR) ++ NDS32_BUILTIN(unspec_get_pending_int, "get_pending_int", GET_PENDING_INT) ++ NDS32_BUILTIN(unspec_get_int_priority, "get_int_priority", GET_INT_PRIORITY) ++ NDS32_NO_TARGET_BUILTIN(unspec_trap, "trap", TRAP) ++ NDS32_NO_TARGET_BUILTIN(unspec_break, "break", BREAK) ++ NDS32_NO_TARGET_BUILTIN(unspec_syscall, "syscall", SYSCALL) ++ NDS32_NO_TARGET_BUILTIN(unspec_enable_int, "enable_int", ENABLE_INT) ++ NDS32_NO_TARGET_BUILTIN(unspec_disable_int, "disable_int", DISABLE_INT) ++ NDS32_NO_TARGET_BUILTIN(unspec_clr_pending_hwint, "clr_pending_hwint", ++ CLR_PENDING_HWINT) ++ NDS32_NO_TARGET_BUILTIN(unspec_set_trig_level, "set_trig_level", ++ SET_TRIG_LEVEL) ++ NDS32_NO_TARGET_BUILTIN(unspec_set_trig_edge, "set_trig_edge", ++ SET_TRIG_EDGE) ++ NDS32_BUILTIN(unspec_get_trig_type, "get_trig_type", GET_TRIG_TYPE) ++}; ++ ++/* Intrinsics that take two arguments. */ ++static struct builtin_description bdesc_2arg[] = ++{ ++ NDS32_BUILTIN(unspec_fcpynss, "fcpynss", FCPYNSS) ++ NDS32_BUILTIN(unspec_fcpyss, "fcpyss", FCPYSS) ++ NDS32_BUILTIN(unspec_fcpynsd, "fcpynsd", FCPYNSD) ++ NDS32_BUILTIN(unspec_fcpysd, "fcpysd", FCPYSD) ++ NDS32_BUILTIN(unspec_ave, "ave", AVE) ++ NDS32_BUILTIN(unspec_pbsad, "pbsad", PBSAD) ++ NDS32_BUILTIN(unspec_ffb, "ffb", FFB) ++ NDS32_BUILTIN(unspec_ffmism, "ffmsim", FFMISM) ++ NDS32_BUILTIN(unspec_flmism, "flmism", FLMISM) ++ NDS32_BUILTIN(unspec_kaddw, "kaddw", KADDW) ++ NDS32_BUILTIN(unspec_kaddh, "kaddh", KADDH) ++ NDS32_BUILTIN(unspec_ksubw, "ksubw", KSUBW) ++ NDS32_BUILTIN(unspec_ksubh, "ksubh", KSUBH) ++ NDS32_BUILTIN(unspec_kdmbb, "kdmbb", KDMBB) ++ NDS32_BUILTIN(unspec_kdmbb, "v_kdmbb", V_KDMBB) ++ NDS32_BUILTIN(unspec_kdmbt, "kdmbt", KDMBT) ++ NDS32_BUILTIN(unspec_kdmbt, "v_kdmbt", V_KDMBT) ++ NDS32_BUILTIN(unspec_kdmtb, "kdmtb", KDMTB) ++ NDS32_BUILTIN(unspec_kdmtb, "v_kdmtb", V_KDMTB) ++ NDS32_BUILTIN(unspec_kdmtt, "kdmtt", KDMTT) ++ NDS32_BUILTIN(unspec_kdmtt, "v_kdmtt", V_KDMTT) ++ NDS32_BUILTIN(unspec_khmbb, "khmbb", KHMBB) ++ NDS32_BUILTIN(unspec_khmbb, "v_khmbb", V_KHMBB) ++ NDS32_BUILTIN(unspec_khmbt, "khmbt", KHMBT) ++ NDS32_BUILTIN(unspec_khmbt, "v_khmbt", V_KHMBT) ++ NDS32_BUILTIN(unspec_khmtb, "khmtb", KHMTB) ++ NDS32_BUILTIN(unspec_khmtb, "v_khmtb", V_KHMTB) ++ NDS32_BUILTIN(unspec_khmtt, "khmtt", KHMTT) ++ NDS32_BUILTIN(unspec_khmtt, "v_khmtt", V_KHMTT) ++ NDS32_BUILTIN(unspec_kslraw, "kslraw", KSLRAW) ++ NDS32_BUILTIN(unspec_kslrawu, "kslraw_u", KSLRAW_U) ++ NDS32_BUILTIN(rotrsi3, "rotr", ROTR) ++ NDS32_BUILTIN(unspec_sva, "sva", SVA) ++ NDS32_BUILTIN(unspec_svs, "svs", SVS) ++ NDS32_NO_TARGET_BUILTIN(mtsr_isb, "mtsr_isb", MTSR_ISB) ++ NDS32_NO_TARGET_BUILTIN(mtsr_dsb, "mtsr_dsb", MTSR_DSB) ++ NDS32_NO_TARGET_BUILTIN(unspec_volatile_mtsr, "mtsr", MTSR) ++ NDS32_NO_TARGET_BUILTIN(unspec_volatile_mtusr, "mtusr", MTUSR) ++ NDS32_NO_TARGET_BUILTIN(unaligned_store_hw, "unaligned_store_hw", UASTORE_HW) ++ NDS32_NO_TARGET_BUILTIN(unaligned_storesi, "unaligned_store_hw", UASTORE_W) ++ NDS32_NO_TARGET_BUILTIN(unaligned_storedi, "unaligned_store_hw", UASTORE_DW) ++ NDS32_BUILTIN(addv2hi3, "add16", ADD16) ++ NDS32_BUILTIN(addv2hi3, "v_uadd16", V_UADD16) ++ NDS32_BUILTIN(addv2hi3, "v_sadd16", V_SADD16) ++ NDS32_BUILTIN(raddv2hi3, "radd16", RADD16) ++ NDS32_BUILTIN(raddv2hi3, "v_radd16", V_RADD16) ++ NDS32_BUILTIN(uraddv2hi3, "uradd16", URADD16) ++ NDS32_BUILTIN(uraddv2hi3, "v_uradd16", V_URADD16) ++ NDS32_BUILTIN(kaddv2hi3, "kadd16", KADD16) ++ NDS32_BUILTIN(kaddv2hi3, "v_kadd16", V_KADD16) ++ NDS32_BUILTIN(ukaddv2hi3, "ukadd16", UKADD16) ++ NDS32_BUILTIN(ukaddv2hi3, "v_ukadd16", V_UKADD16) ++ NDS32_BUILTIN(subv2hi3, "sub16", SUB16) ++ NDS32_BUILTIN(subv2hi3, "v_usub16", V_USUB16) ++ NDS32_BUILTIN(subv2hi3, "v_ssub16", V_SSUB16) ++ NDS32_BUILTIN(rsubv2hi3, "rsub16", RSUB16) ++ NDS32_BUILTIN(rsubv2hi3, "v_rsub16", V_RSUB16) ++ NDS32_BUILTIN(ursubv2hi3, "ursub16", URSUB16) ++ NDS32_BUILTIN(ursubv2hi3, "v_ursub16", V_URSUB16) ++ NDS32_BUILTIN(ksubv2hi3, "ksub16", KSUB16) ++ NDS32_BUILTIN(ksubv2hi3, "v_ksub16", V_KSUB16) ++ NDS32_BUILTIN(uksubv2hi3, "uksub16", UKSUB16) ++ NDS32_BUILTIN(uksubv2hi3, "v_uksub16", V_UKSUB16) ++ NDS32_BUILTIN(cras16_1, "cras16", CRAS16) ++ NDS32_BUILTIN(cras16_1, "v_ucras16", V_UCRAS16) ++ NDS32_BUILTIN(cras16_1, "v_scras16", V_SCRAS16) ++ NDS32_BUILTIN(rcras16_1, "rcras16", RCRAS16) ++ NDS32_BUILTIN(rcras16_1, "v_rcras16", V_RCRAS16) ++ NDS32_BUILTIN(urcras16_1, "urcras16", URCRAS16) ++ NDS32_BUILTIN(urcras16_1, "v_urcras16", V_URCRAS16) ++ NDS32_BUILTIN(kcras16_1, "kcras16", KCRAS16) ++ NDS32_BUILTIN(kcras16_1, "v_kcras16", V_KCRAS16) ++ NDS32_BUILTIN(ukcras16_1, "ukcras16", UKCRAS16) ++ NDS32_BUILTIN(ukcras16_1, "v_ukcras16", V_UKCRAS16) ++ NDS32_BUILTIN(crsa16_1, "crsa16", CRSA16) ++ NDS32_BUILTIN(crsa16_1, "v_ucrsa16", V_UCRSA16) ++ NDS32_BUILTIN(crsa16_1, "v_scrsa16", V_SCRSA16) ++ NDS32_BUILTIN(rcrsa16_1, "rcrsa16", RCRSA16) ++ NDS32_BUILTIN(rcrsa16_1, "v_rcrsa16", V_RCRSA16) ++ NDS32_BUILTIN(urcrsa16_1, "urcrsa16", URCRSA16) ++ NDS32_BUILTIN(urcrsa16_1, "v_urcrsa16", V_URCRSA16) ++ NDS32_BUILTIN(kcrsa16_1, "kcrsa16", KCRSA16) ++ NDS32_BUILTIN(kcrsa16_1, "v_kcrsa16", V_KCRSA16) ++ NDS32_BUILTIN(ukcrsa16_1, "ukcrsa16", UKCRSA16) ++ NDS32_BUILTIN(ukcrsa16_1, "v_ukcrsa16", V_UKCRSA16) ++ NDS32_BUILTIN(addv4qi3, "add8", ADD8) ++ NDS32_BUILTIN(addv4qi3, "v_uadd8", V_UADD8) ++ NDS32_BUILTIN(addv4qi3, "v_sadd8", V_SADD8) ++ NDS32_BUILTIN(raddv4qi3, "radd8", RADD8) ++ NDS32_BUILTIN(raddv4qi3, "v_radd8", V_RADD8) ++ NDS32_BUILTIN(uraddv4qi3, "uradd8", URADD8) ++ NDS32_BUILTIN(uraddv4qi3, "v_uradd8", V_URADD8) ++ NDS32_BUILTIN(kaddv4qi3, "kadd8", KADD8) ++ NDS32_BUILTIN(kaddv4qi3, "v_kadd8", V_KADD8) ++ NDS32_BUILTIN(ukaddv4qi3, "ukadd8", UKADD8) ++ NDS32_BUILTIN(ukaddv4qi3, "v_ukadd8", V_UKADD8) ++ NDS32_BUILTIN(subv4qi3, "sub8", SUB8) ++ NDS32_BUILTIN(subv4qi3, "v_usub8", V_USUB8) ++ NDS32_BUILTIN(subv4qi3, "v_ssub8", V_SSUB8) ++ NDS32_BUILTIN(rsubv4qi3, "rsub8", RSUB8) ++ NDS32_BUILTIN(rsubv4qi3, "v_rsub8", V_RSUB8) ++ NDS32_BUILTIN(ursubv4qi3, "ursub8", URSUB8) ++ NDS32_BUILTIN(ursubv4qi3, "v_ursub8", V_URSUB8) ++ NDS32_BUILTIN(ksubv4qi3, "ksub8", KSUB8) ++ NDS32_BUILTIN(ksubv4qi3, "v_ksub8", V_KSUB8) ++ NDS32_BUILTIN(uksubv4qi3, "uksub8", UKSUB8) ++ NDS32_BUILTIN(uksubv4qi3, "v_uksub8", V_UKSUB8) ++ NDS32_BUILTIN(ashrv2hi3, "sra16", SRA16) ++ NDS32_BUILTIN(ashrv2hi3, "v_sra16", V_SRA16) ++ NDS32_BUILTIN(sra16_round, "sra16_u", SRA16_U) ++ NDS32_BUILTIN(sra16_round, "v_sra16_u", V_SRA16_U) ++ NDS32_BUILTIN(lshrv2hi3, "srl16", SRL16) ++ NDS32_BUILTIN(lshrv2hi3, "v_srl16", V_SRL16) ++ NDS32_BUILTIN(srl16_round, "srl16_u", SRL16_U) ++ NDS32_BUILTIN(srl16_round, "v_srl16_u", V_SRL16_U) ++ NDS32_BUILTIN(ashlv2hi3, "sll16", SLL16) ++ NDS32_BUILTIN(ashlv2hi3, "v_sll16", V_SLL16) ++ NDS32_BUILTIN(kslli16, "ksll16", KSLL16) ++ NDS32_BUILTIN(kslli16, "v_ksll16", V_KSLL16) ++ NDS32_BUILTIN(kslra16, "kslra16", KSLRA16) ++ NDS32_BUILTIN(kslra16, "v_kslra16", V_KSLRA16) ++ NDS32_BUILTIN(kslra16_round, "kslra16_u", KSLRA16_U) ++ NDS32_BUILTIN(kslra16_round, "v_kslra16_u", V_KSLRA16_U) ++ NDS32_BUILTIN(cmpeq16, "cmpeq16", CMPEQ16) ++ NDS32_BUILTIN(cmpeq16, "v_scmpeq16", V_SCMPEQ16) ++ NDS32_BUILTIN(cmpeq16, "v_ucmpeq16", V_UCMPEQ16) ++ NDS32_BUILTIN(scmplt16, "scmplt16", SCMPLT16) ++ NDS32_BUILTIN(scmplt16, "v_scmplt16", V_SCMPLT16) ++ NDS32_BUILTIN(scmple16, "scmple16", SCMPLE16) ++ NDS32_BUILTIN(scmple16, "v_scmple16", V_SCMPLE16) ++ NDS32_BUILTIN(ucmplt16, "ucmplt16", UCMPLT16) ++ NDS32_BUILTIN(ucmplt16, "v_ucmplt16", V_UCMPLT16) ++ NDS32_BUILTIN(ucmplt16, "ucmple16", UCMPLE16) ++ NDS32_BUILTIN(ucmplt16, "v_ucmple16", V_UCMPLE16) ++ NDS32_BUILTIN(cmpeq8, "cmpeq8", CMPEQ8) ++ NDS32_BUILTIN(cmpeq8, "v_scmpeq8", V_SCMPEQ8) ++ NDS32_BUILTIN(cmpeq8, "v_ucmpeq8", V_UCMPEQ8) ++ NDS32_BUILTIN(scmplt8, "scmplt8", SCMPLT8) ++ NDS32_BUILTIN(scmplt8, "v_scmplt8", V_SCMPLT8) ++ NDS32_BUILTIN(scmple8, "scmple8", SCMPLE8) ++ NDS32_BUILTIN(scmple8, "v_scmple8", V_SCMPLE8) ++ NDS32_BUILTIN(ucmplt8, "ucmplt8", UCMPLT8) ++ NDS32_BUILTIN(ucmplt8, "v_ucmplt8", V_UCMPLT8) ++ NDS32_BUILTIN(ucmplt8, "ucmple8", UCMPLE8) ++ NDS32_BUILTIN(ucmplt8, "v_ucmple8", V_UCMPLE8) ++ NDS32_BUILTIN(sminv2hi3, "smin16", SMIN16) ++ NDS32_BUILTIN(sminv2hi3, "v_smin16", V_SMIN16) ++ NDS32_BUILTIN(uminv2hi3, "umin16", UMIN16) ++ NDS32_BUILTIN(uminv2hi3, "v_umin16", V_UMIN16) ++ NDS32_BUILTIN(smaxv2hi3, "smax16", SMAX16) ++ NDS32_BUILTIN(smaxv2hi3, "v_smax16", V_SMAX16) ++ NDS32_BUILTIN(umaxv2hi3, "umax16", UMAX16) ++ NDS32_BUILTIN(umaxv2hi3, "v_umax16", V_UMAX16) ++ NDS32_BUILTIN(khm16, "khm16", KHM16) ++ NDS32_BUILTIN(khm16, "v_khm16", V_KHM16) ++ NDS32_BUILTIN(khmx16, "khmx16", KHMX16) ++ NDS32_BUILTIN(khmx16, "v_khmx16", V_KHMX16) ++ NDS32_BUILTIN(sminv4qi3, "smin8", SMIN8) ++ NDS32_BUILTIN(sminv4qi3, "v_smin8", V_SMIN8) ++ NDS32_BUILTIN(uminv4qi3, "umin8", UMIN8) ++ NDS32_BUILTIN(uminv4qi3, "v_umin8", V_UMIN8) ++ NDS32_BUILTIN(smaxv4qi3, "smax8", SMAX8) ++ NDS32_BUILTIN(smaxv4qi3, "v_smax8", V_SMAX8) ++ NDS32_BUILTIN(umaxv4qi3, "umax8", UMAX8) ++ NDS32_BUILTIN(umaxv4qi3, "v_umax8", V_UMAX8) ++ NDS32_BUILTIN(raddsi3, "raddw", RADDW) ++ NDS32_BUILTIN(uraddsi3, "uraddw", URADDW) ++ NDS32_BUILTIN(rsubsi3, "rsubw", RSUBW) ++ NDS32_BUILTIN(ursubsi3, "ursubw", URSUBW) ++ NDS32_BUILTIN(sraiu, "sra_u", SRA_U) ++ NDS32_BUILTIN(kssl, "ksll", KSLL) ++ NDS32_BUILTIN(pkbb, "pkbb16", PKBB16) ++ NDS32_BUILTIN(pkbb, "v_pkbb16", V_PKBB16) ++ NDS32_BUILTIN(pkbt, "pkbt16", PKBT16) ++ NDS32_BUILTIN(pkbt, "v_pkbt16", V_PKBT16) ++ NDS32_BUILTIN(pktb, "pktb16", PKTB16) ++ NDS32_BUILTIN(pktb, "v_pktb16", V_PKTB16) ++ NDS32_BUILTIN(pktt, "pktt16", PKTT16) ++ NDS32_BUILTIN(pktt, "v_pktt16", V_PKTT16) ++ NDS32_BUILTIN(smulsi3_highpart, "smmul", SMMUL) ++ NDS32_BUILTIN(smmul_round, "smmul_u", SMMUL_U) ++ NDS32_BUILTIN(smmwb, "smmwb", SMMWB) ++ NDS32_BUILTIN(smmwb, "v_smmwb", V_SMMWB) ++ NDS32_BUILTIN(smmwb_round, "smmwb_u", SMMWB_U) ++ NDS32_BUILTIN(smmwb_round, "v_smmwb_u", V_SMMWB_U) ++ NDS32_BUILTIN(smmwt, "smmwt", SMMWT) ++ NDS32_BUILTIN(smmwt, "v_smmwt", V_SMMWT) ++ NDS32_BUILTIN(smmwt_round, "smmwt_u", SMMWT_U) ++ NDS32_BUILTIN(smmwt_round, "v_smmwt_u", V_SMMWT_U) ++ NDS32_BUILTIN(smbb, "smbb", SMBB) ++ NDS32_BUILTIN(smbb, "v_smbb", V_SMBB) ++ NDS32_BUILTIN(smbt, "smbt", SMBT) ++ NDS32_BUILTIN(smbt, "v_smbt", V_SMBT) ++ NDS32_BUILTIN(smtt, "smtt", SMTT) ++ NDS32_BUILTIN(smtt, "v_smtt", V_SMTT) ++ NDS32_BUILTIN(kmda, "kmda", KMDA) ++ NDS32_BUILTIN(kmda, "v_kmda", V_KMDA) ++ NDS32_BUILTIN(kmxda, "kmxda", KMXDA) ++ NDS32_BUILTIN(kmxda, "v_kmxda", V_KMXDA) ++ NDS32_BUILTIN(smds, "smds", SMDS) ++ NDS32_BUILTIN(smds, "v_smds", V_SMDS) ++ NDS32_BUILTIN(smdrs, "smdrs", SMDRS) ++ NDS32_BUILTIN(smdrs, "v_smdrs", V_SMDRS) ++ NDS32_BUILTIN(smxdsv, "smxds", SMXDS) ++ NDS32_BUILTIN(smxdsv, "v_smxds", V_SMXDS) ++ NDS32_BUILTIN(smal1, "smal", SMAL) ++ NDS32_BUILTIN(smal1, "v_smal", V_SMAL) ++ NDS32_BUILTIN(bitrev, "bitrev", BITREV) ++ NDS32_BUILTIN(wext, "wext", WEXT) ++ NDS32_BUILTIN(adddi3, "sadd64", SADD64) ++ NDS32_BUILTIN(adddi3, "uadd64", UADD64) ++ NDS32_BUILTIN(radddi3, "radd64", RADD64) ++ NDS32_BUILTIN(uradddi3, "uradd64", URADD64) ++ NDS32_BUILTIN(kadddi3, "kadd64", KADD64) ++ NDS32_BUILTIN(ukadddi3, "ukadd64", UKADD64) ++ NDS32_BUILTIN(subdi3, "ssub64", SSUB64) ++ NDS32_BUILTIN(subdi3, "usub64", USUB64) ++ NDS32_BUILTIN(rsubdi3, "rsub64", RSUB64) ++ NDS32_BUILTIN(ursubdi3, "ursub64", URSUB64) ++ NDS32_BUILTIN(ksubdi3, "ksub64", KSUB64) ++ NDS32_BUILTIN(uksubdi3, "uksub64", UKSUB64) ++ NDS32_BUILTIN(smul16, "smul16", SMUL16) ++ NDS32_BUILTIN(smul16, "v_smul16", V_SMUL16) ++ NDS32_BUILTIN(smulx16, "smulx16", SMULX16) ++ NDS32_BUILTIN(smulx16, "v_smulx16", V_SMULX16) ++ NDS32_BUILTIN(umul16, "umul16", UMUL16) ++ NDS32_BUILTIN(umul16, "v_umul16", V_UMUL16) ++ NDS32_BUILTIN(umulx16, "umulx16", UMULX16) ++ NDS32_BUILTIN(umulx16, "v_umulx16", V_UMULX16) ++ NDS32_BUILTIN(kwmmul, "kwmmul", KWMMUL) ++ NDS32_BUILTIN(kwmmul_round, "kwmmul_u", KWMMUL_U) ++ NDS32_NO_TARGET_BUILTIN(unaligned_storev2hi, ++ "put_unaligned_u16x2", UASTORE_U16) ++ NDS32_NO_TARGET_BUILTIN(unaligned_storev2hi, ++ "put_unaligned_s16x2", UASTORE_S16) ++ NDS32_NO_TARGET_BUILTIN(unaligned_storev4qi, "put_unaligned_u8x4", UASTORE_U8) ++ NDS32_NO_TARGET_BUILTIN(unaligned_storev4qi, "put_unaligned_s8x4", UASTORE_S8) ++}; ++ ++/* Two-argument intrinsics with an immediate second argument. */ ++static struct builtin_description bdesc_2argimm[] = ++{ ++ NDS32_BUILTIN(unspec_bclr, "bclr", BCLR) ++ NDS32_BUILTIN(unspec_bset, "bset", BSET) ++ NDS32_BUILTIN(unspec_btgl, "btgl", BTGL) ++ NDS32_BUILTIN(unspec_btst, "btst", BTST) ++ NDS32_BUILTIN(unspec_clip, "clip", CLIP) ++ NDS32_BUILTIN(unspec_clips, "clips", CLIPS) ++ NDS32_NO_TARGET_BUILTIN(unspec_teqz, "teqz", TEQZ) ++ NDS32_NO_TARGET_BUILTIN(unspec_tnez, "tnez", TNEZ) ++ NDS32_BUILTIN(ashrv2hi3, "srl16", SRL16) ++ NDS32_BUILTIN(ashrv2hi3, "v_srl16", V_SRL16) ++ NDS32_BUILTIN(srl16_round, "srl16_u", SRL16_U) ++ NDS32_BUILTIN(srl16_round, "v_srl16_u", V_SRL16_U) ++ NDS32_BUILTIN(kslli16, "ksll16", KSLL16) ++ NDS32_BUILTIN(kslli16, "v_ksll16", V_KSLL16) ++ NDS32_BUILTIN(sclip16, "sclip16", SCLIP16) ++ NDS32_BUILTIN(sclip16, "v_sclip16", V_SCLIP16) ++ NDS32_BUILTIN(uclip16, "uclip16", UCLIP16) ++ NDS32_BUILTIN(uclip16, "v_uclip16", V_UCLIP16) ++ NDS32_BUILTIN(sraiu, "sra_u", SRA_U) ++ NDS32_BUILTIN(kssl, "ksll", KSLL) ++ NDS32_BUILTIN(bitrev, "bitrev", BITREV) ++ NDS32_BUILTIN(wext, "wext", WEXT) ++ NDS32_BUILTIN(uclip32, "uclip32", UCLIP32) ++ NDS32_BUILTIN(sclip32, "sclip32", SCLIP32) ++}; ++ ++/* Intrinsics that take three arguments. */ ++static struct builtin_description bdesc_3arg[] = ++{ ++ NDS32_BUILTIN(unspec_pbsada, "pbsada", PBSADA) ++ NDS32_NO_TARGET_BUILTIN(bse, "bse", BSE) ++ NDS32_NO_TARGET_BUILTIN(bsp, "bsp", BSP) ++ NDS32_BUILTIN(kmabb, "kmabb", KMABB) ++ NDS32_BUILTIN(kmabb, "v_kmabb", V_KMABB) ++ NDS32_BUILTIN(kmabt, "kmabt", KMABT) ++ NDS32_BUILTIN(kmabt, "v_kmabt", V_KMABT) ++ NDS32_BUILTIN(kmatt, "kmatt", KMATT) ++ NDS32_BUILTIN(kmatt, "v_kmatt", V_KMATT) ++ NDS32_BUILTIN(kmada, "kmada", KMADA) ++ NDS32_BUILTIN(kmada, "v_kmada", V_KMADA) ++ NDS32_BUILTIN(kmaxda, "kmaxda", KMAXDA) ++ NDS32_BUILTIN(kmaxda, "v_kmaxda", V_KMAXDA) ++ NDS32_BUILTIN(kmads, "kmads", KMADS) ++ NDS32_BUILTIN(kmads, "v_kmads", V_KMADS) ++ NDS32_BUILTIN(kmadrs, "kmadrs", KMADRS) ++ NDS32_BUILTIN(kmadrs, "v_kmadrs", V_KMADRS) ++ NDS32_BUILTIN(kmaxds, "kmaxds", KMAXDS) ++ NDS32_BUILTIN(kmaxds, "v_kmaxds", V_KMAXDS) ++ NDS32_BUILTIN(kmsda, "kmsda", KMSDA) ++ NDS32_BUILTIN(kmsda, "v_kmsda", V_KMSDA) ++ NDS32_BUILTIN(kmsxda, "kmsxda", KMSXDA) ++ NDS32_BUILTIN(kmsxda, "v_kmsxda", V_KMSXDA) ++ NDS32_BUILTIN(bpick1, "bpick", BPICK) ++ NDS32_BUILTIN(smar64_1, "smar64", SMAR64) ++ NDS32_BUILTIN(smsr64, "smsr64", SMSR64) ++ NDS32_BUILTIN(umar64_1, "umar64", UMAR64) ++ NDS32_BUILTIN(umsr64, "umsr64", UMSR64) ++ NDS32_BUILTIN(kmar64_1, "kmar64", KMAR64) ++ NDS32_BUILTIN(kmsr64, "kmsr64", KMSR64) ++ NDS32_BUILTIN(ukmar64_1, "ukmar64", UKMAR64) ++ NDS32_BUILTIN(ukmsr64, "ukmsr64", UKMSR64) ++ NDS32_BUILTIN(smalbb, "smalbb", SMALBB) ++ NDS32_BUILTIN(smalbb, "v_smalbb", V_SMALBB) ++ NDS32_BUILTIN(smalbt, "smalbt", SMALBT) ++ NDS32_BUILTIN(smalbt, "v_smalbt", V_SMALBT) ++ NDS32_BUILTIN(smaltt, "smaltt", SMALTT) ++ NDS32_BUILTIN(smaltt, "v_smaltt", V_SMALTT) ++ NDS32_BUILTIN(smalda1, "smalda", SMALDA) ++ NDS32_BUILTIN(smalda1, "v_smalda", V_SMALDA) ++ NDS32_BUILTIN(smalxda1, "smalxda", SMALXDA) ++ NDS32_BUILTIN(smalxda1, "v_smalxda", V_SMALXDA) ++ NDS32_BUILTIN(smalds1, "smalds", SMALDS) ++ NDS32_BUILTIN(smalds1, "v_smalds", V_SMALDS) ++ NDS32_BUILTIN(smaldrs3, "smaldrs", SMALDRS) ++ NDS32_BUILTIN(smaldrs3, "v_smaldrs", V_SMALDRS) ++ NDS32_BUILTIN(smalxds1, "smalxds", SMALXDS) ++ NDS32_BUILTIN(smalxds1, "v_smalxds", V_SMALXDS) ++ NDS32_BUILTIN(smslda1, "smslda", SMSLDA) ++ NDS32_BUILTIN(smslda1, "v_smslda", V_SMSLDA) ++ NDS32_BUILTIN(smslxda1, "smslxda", SMSLXDA) ++ NDS32_BUILTIN(smslxda1, "v_smslxda", V_SMSLXDA) ++ NDS32_BUILTIN(kmmawb, "kmmawb", KMMAWB) ++ NDS32_BUILTIN(kmmawb, "v_kmmawb", V_KMMAWB) ++ NDS32_BUILTIN(kmmawb_round, "kmmawb_u", KMMAWB_U) ++ NDS32_BUILTIN(kmmawb_round, "v_kmmawb_u", V_KMMAWB_U) ++ NDS32_BUILTIN(kmmawt, "kmmawt", KMMAWT) ++ NDS32_BUILTIN(kmmawt, "v_kmmawt", V_KMMAWT) ++ NDS32_BUILTIN(kmmawt_round, "kmmawt_u", KMMAWT_U) ++ NDS32_BUILTIN(kmmawt_round, "v_kmmawt_u", V_KMMAWT_U) ++ NDS32_BUILTIN(kmmac, "kmmac", KMMAC) ++ NDS32_BUILTIN(kmmac_round, "kmmac_u", KMMAC_U) ++ NDS32_BUILTIN(kmmsb, "kmmsb", KMMSB) ++ NDS32_BUILTIN(kmmsb_round, "kmmsb_u", KMMSB_U) ++}; ++ ++/* Three-argument intrinsics with an immediate third argument. */ ++static struct builtin_description bdesc_3argimm[] = ++{ ++ NDS32_NO_TARGET_BUILTIN(prefetch_qw, "prefetch_qw", DPREF_QW) ++ NDS32_NO_TARGET_BUILTIN(prefetch_hw, "prefetch_hw", DPREF_HW) ++ NDS32_NO_TARGET_BUILTIN(prefetch_w, "prefetch_w", DPREF_W) ++ NDS32_NO_TARGET_BUILTIN(prefetch_dw, "prefetch_dw", DPREF_DW) ++ NDS32_BUILTIN(insb, "insb", INSB) ++}; ++ ++/* Intrinsics that load a value. */ ++static struct builtin_description bdesc_load[] = ++{ ++ NDS32_BUILTIN(unspec_volatile_llw, "llw", LLW) ++ NDS32_BUILTIN(unspec_lwup, "lwup", LWUP) ++ NDS32_BUILTIN(unspec_lbup, "lbup", LBUP) ++}; ++ ++/* Intrinsics that store a value. */ ++static struct builtin_description bdesc_store[] = ++{ ++ NDS32_BUILTIN(unspec_swup, "swup", SWUP) ++ NDS32_BUILTIN(unspec_sbup, "sbup", SBUP) ++}; ++ ++static struct builtin_description bdesc_cctl[] = ++{ ++ NDS32_BUILTIN(cctl_idx_read, "cctl_idx_read", CCTL_IDX_READ) ++ NDS32_NO_TARGET_BUILTIN(cctl_idx_write, "cctl_idx_write", CCTL_IDX_WRITE) ++ NDS32_NO_TARGET_BUILTIN(cctl_va_lck, "cctl_va_lck", CCTL_VA_LCK) ++ NDS32_NO_TARGET_BUILTIN(cctl_idx_wbinval, ++ "cctl_idx_wbinval", CCTL_IDX_WBINVAL) ++ NDS32_NO_TARGET_BUILTIN(cctl_va_wbinval_l1, ++ "cctl_va_wbinval_l1", CCTL_VA_WBINVAL_L1) ++ NDS32_NO_TARGET_BUILTIN(cctl_va_wbinval_la, ++ "cctl_va_wbinval_la", CCTL_VA_WBINVAL_LA) ++}; + + rtx + nds32_expand_builtin_impl (tree exp, + rtx target, + rtx subtarget ATTRIBUTE_UNUSED, +- machine_mode mode ATTRIBUTE_UNUSED, ++ enum machine_mode mode ATTRIBUTE_UNUSED, + int ignore ATTRIBUTE_UNUSED) + { + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); ++ unsigned int fcode = DECL_FUNCTION_CODE (fndecl); ++ unsigned i; ++ struct builtin_description *d; ++ ++ if (!NDS32_EXT_DSP_P () ++ && fcode > NDS32_BUILTIN_DSP_BEGIN ++ && fcode < NDS32_BUILTIN_DSP_END) ++ error ("don't support DSP extension instructions"); ++ ++ switch (fcode) ++ { ++ /* FPU Register Transfer. */ ++ case NDS32_BUILTIN_FMFCFG: ++ case NDS32_BUILTIN_FMFCSR: ++ case NDS32_BUILTIN_FMTCSR: ++ case NDS32_BUILTIN_FCPYNSS: ++ case NDS32_BUILTIN_FCPYSS: ++ /* Both v3s and v3f toolchains define TARGET_FPU_SINGLE. */ ++ if (!TARGET_FPU_SINGLE) ++ { ++ error ("this builtin function is only available " ++ "on the v3s or v3f toolchain"); ++ return NULL_RTX; ++ } ++ break; ++ ++ /* FPU Register Transfer. */ ++ case NDS32_BUILTIN_FCPYNSD: ++ case NDS32_BUILTIN_FCPYSD: ++ /* Only v3f toolchain defines TARGET_FPU_DOUBLE. */ ++ if (!TARGET_FPU_DOUBLE) ++ { ++ error ("this builtin function is only available " ++ "on the v3f toolchain"); ++ return NULL_RTX; ++ } ++ break; ++ ++ /* Load and Store */ ++ case NDS32_BUILTIN_LLW: ++ case NDS32_BUILTIN_LWUP: ++ case NDS32_BUILTIN_LBUP: ++ case NDS32_BUILTIN_SCW: ++ case NDS32_BUILTIN_SWUP: ++ case NDS32_BUILTIN_SBUP: ++ if (TARGET_ISA_V3M) ++ { ++ error ("this builtin function not support " ++ "on the v3m toolchain"); ++ return NULL_RTX; ++ } ++ break; ++ ++ /* Performance Extension */ ++ case NDS32_BUILTIN_ABS: ++ case NDS32_BUILTIN_AVE: ++ case NDS32_BUILTIN_BCLR: ++ case NDS32_BUILTIN_BSET: ++ case NDS32_BUILTIN_BTGL: ++ case NDS32_BUILTIN_BTST: ++ case NDS32_BUILTIN_CLIP: ++ case NDS32_BUILTIN_CLIPS: ++ case NDS32_BUILTIN_CLZ: ++ case NDS32_BUILTIN_CLO: ++ if (!TARGET_EXT_PERF) ++ { ++ error ("don't support performance extension instructions"); ++ return NULL_RTX; ++ } ++ break; ++ ++ /* Performance Extension 2 */ ++ case NDS32_BUILTIN_PBSAD: ++ case NDS32_BUILTIN_PBSADA: ++ case NDS32_BUILTIN_BSE: ++ case NDS32_BUILTIN_BSP: ++ if (!TARGET_EXT_PERF2) ++ { ++ error ("don't support performance extension " ++ "version 2 instructions"); ++ return NULL_RTX; ++ } ++ break; + +- int fcode = DECL_FUNCTION_CODE (fndecl); ++ /* String Extension */ ++ case NDS32_BUILTIN_FFB: ++ case NDS32_BUILTIN_FFMISM: ++ case NDS32_BUILTIN_FLMISM: ++ if (!TARGET_EXT_STRING) ++ { ++ error ("don't support string extension instructions"); ++ return NULL_RTX; ++ } ++ break; + ++ default: ++ break; ++ } ++ ++ /* Since there are no result and operands, we can simply emit this rtx. */ + switch (fcode) + { +- /* Cache. */ +- case NDS32_BUILTIN_ISYNC: +- return nds32_expand_builtin_null_ftype_reg +- (CODE_FOR_unspec_volatile_isync, exp, target); + case NDS32_BUILTIN_ISB: +- /* Since there are no result and operands for isb instruciton, +- we can simply emit this rtx. */ + emit_insn (gen_unspec_volatile_isb ()); + return target; +- +- /* Register Transfer. */ +- case NDS32_BUILTIN_MFSR: +- return nds32_expand_builtin_reg_ftype_imm +- (CODE_FOR_unspec_volatile_mfsr, exp, target); +- case NDS32_BUILTIN_MFUSR: +- return nds32_expand_builtin_reg_ftype_imm +- (CODE_FOR_unspec_volatile_mfusr, exp, target); +- case NDS32_BUILTIN_MTSR: +- return nds32_expand_builtin_null_ftype_reg_imm +- (CODE_FOR_unspec_volatile_mtsr, exp, target); +- case NDS32_BUILTIN_MTUSR: +- return nds32_expand_builtin_null_ftype_reg_imm +- (CODE_FOR_unspec_volatile_mtusr, exp, target); +- +- /* Interrupt. */ ++ case NDS32_BUILTIN_DSB: ++ emit_insn (gen_unspec_dsb ()); ++ return target; ++ case NDS32_BUILTIN_MSYNC_ALL: ++ emit_insn (gen_unspec_msync_all ()); ++ return target; ++ case NDS32_BUILTIN_MSYNC_STORE: ++ emit_insn (gen_unspec_msync_store ()); ++ return target; + case NDS32_BUILTIN_SETGIE_EN: +- /* Since there are no result and operands for setgie.e instruciton, +- we can simply emit this rtx. */ + emit_insn (gen_unspec_volatile_setgie_en ()); ++ emit_insn (gen_unspec_dsb ()); + return target; + case NDS32_BUILTIN_SETGIE_DIS: +- /* Since there are no result and operands for setgie.d instruciton, +- we can simply emit this rtx. */ + emit_insn (gen_unspec_volatile_setgie_dis ()); ++ emit_insn (gen_unspec_dsb ()); ++ return target; ++ case NDS32_BUILTIN_GIE_DIS: ++ emit_insn (gen_unspec_volatile_setgie_dis ()); ++ emit_insn (gen_unspec_dsb ()); ++ return target; ++ case NDS32_BUILTIN_GIE_EN: ++ emit_insn (gen_unspec_volatile_setgie_en ()); ++ emit_insn (gen_unspec_dsb ()); ++ return target; ++ case NDS32_BUILTIN_SET_PENDING_SWINT: ++ emit_insn (gen_unspec_set_pending_swint ()); ++ return target; ++ case NDS32_BUILTIN_CLR_PENDING_SWINT: ++ emit_insn (gen_unspec_clr_pending_swint ()); ++ return target; ++ case NDS32_BUILTIN_CCTL_L1D_INVALALL: ++ emit_insn (gen_cctl_l1d_invalall()); ++ return target; ++ case NDS32_BUILTIN_CCTL_L1D_WBALL_ALVL: ++ emit_insn (gen_cctl_l1d_wball_alvl()); ++ return target; ++ case NDS32_BUILTIN_CCTL_L1D_WBALL_ONE_LVL: ++ emit_insn (gen_cctl_l1d_wball_one_lvl()); ++ return target; ++ case NDS32_BUILTIN_CLROV: ++ emit_insn (gen_unspec_volatile_clrov ()); ++ return target; ++ case NDS32_BUILTIN_STANDBY_NO_WAKE_GRANT: ++ emit_insn (gen_unspec_standby_no_wake_grant ()); ++ return target; ++ case NDS32_BUILTIN_STANDBY_WAKE_GRANT: ++ emit_insn (gen_unspec_standby_wake_grant ()); ++ return target; ++ case NDS32_BUILTIN_STANDBY_WAKE_DONE: ++ emit_insn (gen_unspec_standby_wait_done ()); ++ return target; ++ case NDS32_BUILTIN_SETEND_BIG: ++ emit_insn (gen_unspec_setend_big ()); ++ return target; ++ case NDS32_BUILTIN_SETEND_LITTLE: ++ emit_insn (gen_unspec_setend_little ()); ++ return target; ++ case NDS32_BUILTIN_NOP: ++ emit_insn (gen_unspec_nop ()); ++ return target; ++ case NDS32_BUILTIN_SCHE_BARRIER: ++ emit_insn (gen_blockage ()); ++ return target; ++ case NDS32_BUILTIN_TLBOP_FLUA: ++ emit_insn (gen_unspec_tlbop_flua ()); ++ return target; ++ /* Instruction sequence protection */ ++ case NDS32_BUILTIN_SIGNATURE_BEGIN: ++ emit_insn (gen_unspec_signature_begin ()); ++ return target; ++ case NDS32_BUILTIN_SIGNATURE_END: ++ emit_insn (gen_unspec_signature_end ()); ++ return target; ++ case NDS32_BUILTIN_SCW: ++ return nds32_expand_scw_builtin (CODE_FOR_unspec_volatile_scw, ++ exp, target); ++ case NDS32_BUILTIN_SET_INT_PRIORITY: ++ return nds32_expand_priority_builtin (CODE_FOR_unspec_set_int_priority, ++ exp, target, ++ "__nds32__set_int_priority"); ++ case NDS32_BUILTIN_NO_HWLOOP: ++ emit_insn (gen_no_hwloop ()); + return target; +- + default: +- gcc_unreachable (); ++ break; + } + ++ /* Expand groups of builtins. */ ++ for (i = 0, d = bdesc_noarg; i < ARRAY_SIZE (bdesc_noarg); i++, d++) ++ if (d->code == fcode) ++ return nds32_expand_noarg_builtin (d->icode, target); ++ ++ for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++) ++ if (d->code == fcode) ++ return nds32_expand_unop_builtin (d->icode, exp, target, d->return_p); ++ ++ for (i = 0, d = bdesc_1argimm; i < ARRAY_SIZE (bdesc_1argimm); i++, d++) ++ if (d->code == fcode) ++ return nds32_expand_unopimm_builtin (d->icode, exp, target, ++ d->return_p, d->name); ++ ++ for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++) ++ if (d->code == fcode) ++ return nds32_expand_binop_builtin (d->icode, exp, target, d->return_p); ++ ++ for (i = 0, d = bdesc_2argimm; i < ARRAY_SIZE (bdesc_2argimm); i++, d++) ++ if (d->code == fcode) ++ return nds32_expand_binopimm_builtin (d->icode, exp, target, ++ d->return_p, d->name); ++ ++ for (i = 0, d = bdesc_3arg; i < ARRAY_SIZE (bdesc_3arg); i++, d++) ++ if (d->code == fcode) ++ return nds32_expand_triop_builtin (d->icode, exp, target, d->return_p); ++ ++ for (i = 0, d = bdesc_3argimm; i < ARRAY_SIZE (bdesc_3argimm); i++, d++) ++ if (d->code == fcode) ++ return nds32_expand_triopimm_builtin (d->icode, exp, target, ++ d->return_p, d->name); ++ ++ for (i = 0, d = bdesc_load; i < ARRAY_SIZE (bdesc_load); i++, d++) ++ if (d->code == fcode) ++ return nds32_expand_builtin_load (d->icode, exp, target); ++ ++ for (i = 0, d = bdesc_store; i < ARRAY_SIZE (bdesc_store); i++, d++) ++ if (d->code == fcode) ++ return nds32_expand_builtin_store (d->icode, exp, target); ++ ++ for (i = 0, d = bdesc_cctl; i < ARRAY_SIZE (bdesc_cctl); i++, d++) ++ if (d->code == fcode) ++ return nds32_expand_cctl_builtin (d->icode, exp, target, ++ d->return_p, d->name); ++ + return NULL_RTX; + } + ++static GTY(()) tree nds32_builtin_decls[NDS32_BUILTIN_COUNT]; ++ ++/* Return the NDS32 builtin for CODE. */ ++tree ++nds32_builtin_decl_impl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED) ++{ ++ if (code >= NDS32_BUILTIN_COUNT) ++ return error_mark_node; ++ ++ return nds32_builtin_decls[code]; ++} ++ ++void ++nds32_init_builtins_impl (void) ++{ ++#define ADD_NDS32_BUILTIN0(NAME, RET_TYPE, CODE) \ ++ nds32_builtin_decls[NDS32_BUILTIN_ ## CODE] = \ ++ add_builtin_function ("__builtin_nds32_" NAME, \ ++ build_function_type_list (RET_TYPE##_type_node, \ ++ NULL_TREE), \ ++ NDS32_BUILTIN_ ## CODE, BUILT_IN_MD, NULL, NULL_TREE) ++ ++#define ADD_NDS32_BUILTIN1(NAME, RET_TYPE, ARG_TYPE, CODE) \ ++ nds32_builtin_decls[NDS32_BUILTIN_ ## CODE] = \ ++ add_builtin_function ("__builtin_nds32_" NAME, \ ++ build_function_type_list (RET_TYPE##_type_node, \ ++ ARG_TYPE##_type_node, \ ++ NULL_TREE), \ ++ NDS32_BUILTIN_ ## CODE, BUILT_IN_MD, NULL, NULL_TREE) ++ ++#define ADD_NDS32_BUILTIN2(NAME, RET_TYPE, ARG_TYPE1, ARG_TYPE2, CODE) \ ++ nds32_builtin_decls[NDS32_BUILTIN_ ## CODE] = \ ++ add_builtin_function ("__builtin_nds32_" NAME, \ ++ build_function_type_list (RET_TYPE##_type_node, \ ++ ARG_TYPE1##_type_node,\ ++ ARG_TYPE2##_type_node,\ ++ NULL_TREE), \ ++ NDS32_BUILTIN_ ## CODE, BUILT_IN_MD, NULL, NULL_TREE) ++ ++#define ADD_NDS32_BUILTIN3(NAME, RET_TYPE, ARG_TYPE1, ARG_TYPE2, ARG_TYPE3, CODE) \ ++ nds32_builtin_decls[NDS32_BUILTIN_ ## CODE] = \ ++ add_builtin_function ("__builtin_nds32_" NAME, \ ++ build_function_type_list (RET_TYPE##_type_node, \ ++ ARG_TYPE1##_type_node,\ ++ ARG_TYPE2##_type_node,\ ++ ARG_TYPE3##_type_node,\ ++ NULL_TREE), \ ++ NDS32_BUILTIN_ ## CODE, BUILT_IN_MD, NULL, NULL_TREE) ++ ++ /* Looking for return type and argument can be found in tree.h file. */ ++ tree ptr_char_type_node = build_pointer_type (char_type_node); ++ tree ptr_uchar_type_node = build_pointer_type (unsigned_char_type_node); ++ tree ptr_ushort_type_node = build_pointer_type (short_unsigned_type_node); ++ tree ptr_short_type_node = build_pointer_type (short_integer_type_node); ++ tree ptr_uint_type_node = build_pointer_type (unsigned_type_node); ++ tree ptr_ulong_type_node = build_pointer_type (long_long_unsigned_type_node); ++ tree v4qi_type_node = build_vector_type (intQI_type_node, 4); ++ tree u_v4qi_type_node = build_vector_type (unsigned_intQI_type_node, 4); ++ tree v2hi_type_node = build_vector_type (intHI_type_node, 2); ++ tree u_v2hi_type_node = build_vector_type (unsigned_intHI_type_node, 2); ++ tree v2si_type_node = build_vector_type (intSI_type_node, 2); ++ tree u_v2si_type_node = build_vector_type (unsigned_intSI_type_node, 2); ++ ++ /* Cache. */ ++ ADD_NDS32_BUILTIN1 ("isync", void, ptr_uint, ISYNC); ++ ADD_NDS32_BUILTIN0 ("isb", void, ISB); ++ ADD_NDS32_BUILTIN0 ("dsb", void, DSB); ++ ADD_NDS32_BUILTIN0 ("msync_all", void, MSYNC_ALL); ++ ADD_NDS32_BUILTIN0 ("msync_store", void, MSYNC_STORE); ++ ++ /* Register Transfer. */ ++ ADD_NDS32_BUILTIN1 ("mfsr", unsigned, integer, MFSR); ++ ADD_NDS32_BUILTIN1 ("mfusr", unsigned, integer, MFUSR); ++ ADD_NDS32_BUILTIN2 ("mtsr", void, unsigned, integer, MTSR); ++ ADD_NDS32_BUILTIN2 ("mtsr_isb", void, unsigned, integer, MTSR_ISB); ++ ADD_NDS32_BUILTIN2 ("mtsr_dsb", void, unsigned, integer, MTSR_DSB); ++ ADD_NDS32_BUILTIN2 ("mtusr", void, unsigned, integer, MTUSR); ++ ++ /* FPU Register Transfer. */ ++ ADD_NDS32_BUILTIN0 ("fmfcsr", unsigned, FMFCSR); ++ ADD_NDS32_BUILTIN1 ("fmtcsr", void, unsigned, FMTCSR); ++ ADD_NDS32_BUILTIN0 ("fmfcfg", unsigned, FMFCFG); ++ ADD_NDS32_BUILTIN2 ("fcpyss", float, float, float, FCPYSS); ++ ADD_NDS32_BUILTIN2 ("fcpynss", float, float, float, FCPYNSS); ++ ADD_NDS32_BUILTIN2 ("fcpysd", double, double, double, FCPYSD); ++ ADD_NDS32_BUILTIN2 ("fcpynsd", double, double, double, FCPYNSD); ++ ++ /* Interrupt. */ ++ ADD_NDS32_BUILTIN0 ("setgie_en", void, SETGIE_EN); ++ ADD_NDS32_BUILTIN0 ("setgie_dis", void, SETGIE_DIS); ++ ADD_NDS32_BUILTIN0 ("gie_en", void, GIE_EN); ++ ADD_NDS32_BUILTIN0 ("gie_dis", void, GIE_DIS); ++ ADD_NDS32_BUILTIN1 ("enable_int", void, integer, ENABLE_INT); ++ ADD_NDS32_BUILTIN1 ("disable_int", void, integer, DISABLE_INT); ++ ADD_NDS32_BUILTIN0 ("set_pending_swint", void, SET_PENDING_SWINT); ++ ADD_NDS32_BUILTIN0 ("clr_pending_swint", void, CLR_PENDING_SWINT); ++ ADD_NDS32_BUILTIN0 ("get_all_pending_int", unsigned, GET_ALL_PENDING_INT); ++ ADD_NDS32_BUILTIN1 ("get_pending_int", unsigned, integer, GET_PENDING_INT); ++ ADD_NDS32_BUILTIN1 ("get_int_priority", unsigned, integer, GET_INT_PRIORITY); ++ ADD_NDS32_BUILTIN2 ("set_int_priority", void, integer, integer, ++ SET_INT_PRIORITY); ++ ADD_NDS32_BUILTIN1 ("clr_pending_hwint", void, integer, CLR_PENDING_HWINT); ++ ADD_NDS32_BUILTIN1 ("set_trig_level", void, integer, SET_TRIG_LEVEL); ++ ADD_NDS32_BUILTIN1 ("set_trig_edge", void, integer, SET_TRIG_EDGE); ++ ADD_NDS32_BUILTIN1 ("get_trig_type", unsigned, integer, GET_TRIG_TYPE); ++ ++ /* Load and Store */ ++ ADD_NDS32_BUILTIN1 ("llw", unsigned, ptr_uint, LLW); ++ ADD_NDS32_BUILTIN1 ("lwup", unsigned, ptr_uint, LWUP); ++ ADD_NDS32_BUILTIN1 ("lbup", char, ptr_uchar, LBUP); ++ ADD_NDS32_BUILTIN2 ("scw", unsigned, ptr_uint, unsigned, SCW); ++ ADD_NDS32_BUILTIN2 ("swup", void, ptr_uint, unsigned, SWUP); ++ ADD_NDS32_BUILTIN2 ("sbup", void, ptr_uchar, char, SBUP); ++ ++ /* CCTL */ ++ ADD_NDS32_BUILTIN0 ("cctl_l1d_invalall", void, CCTL_L1D_INVALALL); ++ ADD_NDS32_BUILTIN0 ("cctl_l1d_wball_alvl", void, CCTL_L1D_WBALL_ALVL); ++ ADD_NDS32_BUILTIN0 ("cctl_l1d_wball_one_lvl", void, CCTL_L1D_WBALL_ONE_LVL); ++ ADD_NDS32_BUILTIN2 ("cctl_va_lck", void, integer, ptr_uint, CCTL_VA_LCK); ++ ADD_NDS32_BUILTIN2 ("cctl_idx_wbinval", void, integer, unsigned, ++ CCTL_IDX_WBINVAL); ++ ADD_NDS32_BUILTIN2 ("cctl_va_wbinval_l1", void, integer, ptr_uint, ++ CCTL_VA_WBINVAL_L1); ++ ADD_NDS32_BUILTIN2 ("cctl_va_wbinval_la", void, integer, ptr_uint, ++ CCTL_VA_WBINVAL_LA); ++ ADD_NDS32_BUILTIN2 ("cctl_idx_read", unsigned, integer, unsigned, ++ CCTL_IDX_READ); ++ ADD_NDS32_BUILTIN3 ("cctl_idx_write", void, integer, unsigned, unsigned, ++ CCTL_IDX_WRITE); ++ ++ /* PREFETCH */ ++ ADD_NDS32_BUILTIN3 ("dpref_qw", void, ptr_uchar, unsigned, integer, DPREF_QW); ++ ADD_NDS32_BUILTIN3 ("dpref_hw", void, ptr_ushort, unsigned, integer, ++ DPREF_HW); ++ ADD_NDS32_BUILTIN3 ("dpref_w", void, ptr_uint, unsigned, integer, DPREF_W); ++ ADD_NDS32_BUILTIN3 ("dpref_dw", void, ptr_ulong, unsigned, integer, DPREF_DW); ++ ++ /* Performance Extension */ ++ ADD_NDS32_BUILTIN1 ("pe_abs", integer, integer, ABS); ++ ADD_NDS32_BUILTIN2 ("pe_ave", integer, integer, integer, AVE); ++ ADD_NDS32_BUILTIN2 ("pe_bclr", unsigned, unsigned, unsigned, BCLR); ++ ADD_NDS32_BUILTIN2 ("pe_bset", unsigned, unsigned, unsigned, BSET); ++ ADD_NDS32_BUILTIN2 ("pe_btgl", unsigned, unsigned, unsigned, BTGL); ++ ADD_NDS32_BUILTIN2 ("pe_btst", unsigned, unsigned, unsigned, BTST); ++ ADD_NDS32_BUILTIN2 ("pe_clip", unsigned, integer, unsigned, CLIP); ++ ADD_NDS32_BUILTIN2 ("pe_clips", integer, integer, unsigned, CLIPS); ++ ADD_NDS32_BUILTIN1 ("pe_clz", unsigned, unsigned, CLZ); ++ ADD_NDS32_BUILTIN1 ("pe_clo", unsigned, unsigned, CLO); ++ ++ /* Performance Extension 2 */ ++ ADD_NDS32_BUILTIN3 ("pe2_bse", void, ptr_uint, unsigned, ptr_uint, BSE); ++ ADD_NDS32_BUILTIN3 ("pe2_bsp", void, ptr_uint, unsigned, ptr_uint, BSP); ++ ADD_NDS32_BUILTIN2 ("pe2_pbsad", unsigned, unsigned, unsigned, PBSAD); ++ ADD_NDS32_BUILTIN3 ("pe2_pbsada", unsigned, unsigned, unsigned, unsigned, ++ PBSADA); ++ ++ /* String Extension */ ++ ADD_NDS32_BUILTIN2 ("se_ffb", integer, unsigned, unsigned, FFB); ++ ADD_NDS32_BUILTIN2 ("se_ffmism", integer, unsigned, unsigned, FFMISM); ++ ADD_NDS32_BUILTIN2 ("se_flmism", integer, unsigned, unsigned, FLMISM); ++ ++ /* SATURATION */ ++ ADD_NDS32_BUILTIN2 ("kaddw", integer, integer, integer, KADDW); ++ ADD_NDS32_BUILTIN2 ("ksubw", integer, integer, integer, KSUBW); ++ ADD_NDS32_BUILTIN2 ("kaddh", integer, integer, integer, KADDH); ++ ADD_NDS32_BUILTIN2 ("ksubh", integer, integer, integer, KSUBH); ++ ADD_NDS32_BUILTIN2 ("kdmbb", integer, unsigned, unsigned, KDMBB); ++ ADD_NDS32_BUILTIN2 ("v_kdmbb", integer, v2hi, v2hi, V_KDMBB); ++ ADD_NDS32_BUILTIN2 ("kdmbt", integer, unsigned, unsigned, KDMBT); ++ ADD_NDS32_BUILTIN2 ("v_kdmbt", integer, v2hi, v2hi, V_KDMBT); ++ ADD_NDS32_BUILTIN2 ("kdmtb", integer, unsigned, unsigned, KDMTB); ++ ADD_NDS32_BUILTIN2 ("v_kdmtb", integer, v2hi, v2hi, V_KDMTB); ++ ADD_NDS32_BUILTIN2 ("kdmtt", integer, unsigned, unsigned, KDMTT); ++ ADD_NDS32_BUILTIN2 ("v_kdmtt", integer, v2hi, v2hi, V_KDMTT); ++ ADD_NDS32_BUILTIN2 ("khmbb", integer, unsigned, unsigned, KHMBB); ++ ADD_NDS32_BUILTIN2 ("v_khmbb", integer, v2hi, v2hi, V_KHMBB); ++ ADD_NDS32_BUILTIN2 ("khmbt", integer, unsigned, unsigned, KHMBT); ++ ADD_NDS32_BUILTIN2 ("v_khmbt", integer, v2hi, v2hi, V_KHMBT); ++ ADD_NDS32_BUILTIN2 ("khmtb", integer, unsigned, unsigned, KHMTB); ++ ADD_NDS32_BUILTIN2 ("v_khmtb", integer, v2hi, v2hi, V_KHMTB); ++ ADD_NDS32_BUILTIN2 ("khmtt", integer, unsigned, unsigned, KHMTT); ++ ADD_NDS32_BUILTIN2 ("v_khmtt", integer, v2hi, v2hi, V_KHMTT); ++ ADD_NDS32_BUILTIN2 ("kslraw", integer, integer, integer, KSLRAW); ++ ADD_NDS32_BUILTIN2 ("kslraw_u", integer, integer, integer, KSLRAW_U); ++ ADD_NDS32_BUILTIN0 ("rdov", unsigned, RDOV); ++ ADD_NDS32_BUILTIN0 ("clrov", void, CLROV); ++ ++ /* ROTR */ ++ ADD_NDS32_BUILTIN2 ("rotr", unsigned, unsigned, unsigned, ROTR); ++ ++ /* Swap */ ++ ADD_NDS32_BUILTIN1 ("wsbh", unsigned, unsigned, WSBH); ++ ++ /* System */ ++ ADD_NDS32_BUILTIN2 ("svs", unsigned, integer, integer, SVS); ++ ADD_NDS32_BUILTIN2 ("sva", unsigned, integer, integer, SVA); ++ ADD_NDS32_BUILTIN1 ("jr_itoff", void, unsigned, JR_ITOFF); ++ ADD_NDS32_BUILTIN1 ("jr_toff", void, unsigned, JR_TOFF); ++ ADD_NDS32_BUILTIN1 ("jral_iton", void, unsigned, JRAL_ITON); ++ ADD_NDS32_BUILTIN1 ("jral_ton", void, unsigned, JRAL_TON); ++ ADD_NDS32_BUILTIN1 ("ret_itoff", void, unsigned, RET_ITOFF); ++ ADD_NDS32_BUILTIN1 ("ret_toff", void, unsigned, RET_TOFF); ++ ADD_NDS32_BUILTIN0 ("standby_no_wake_grant", void, STANDBY_NO_WAKE_GRANT); ++ ADD_NDS32_BUILTIN0 ("standby_wake_grant", void, STANDBY_WAKE_GRANT); ++ ADD_NDS32_BUILTIN0 ("standby_wait_done", void, STANDBY_WAKE_DONE); ++ ADD_NDS32_BUILTIN1 ("break", void, unsigned, BREAK); ++ ADD_NDS32_BUILTIN1 ("syscall", void, unsigned, SYSCALL); ++ ADD_NDS32_BUILTIN0 ("nop", void, NOP); ++ ADD_NDS32_BUILTIN0 ("get_current_sp", unsigned, GET_CURRENT_SP); ++ ADD_NDS32_BUILTIN1 ("set_current_sp", void, unsigned, SET_CURRENT_SP); ++ ADD_NDS32_BUILTIN2 ("teqz", void, unsigned, unsigned, TEQZ); ++ ADD_NDS32_BUILTIN2 ("tnez", void, unsigned, unsigned, TNEZ); ++ ADD_NDS32_BUILTIN1 ("trap", void, unsigned, TRAP); ++ ADD_NDS32_BUILTIN0 ("return_address", unsigned, RETURN_ADDRESS); ++ ADD_NDS32_BUILTIN0 ("setend_big", void, SETEND_BIG); ++ ADD_NDS32_BUILTIN0 ("setend_little", void, SETEND_LITTLE); ++ ++ /* Schedule Barrier */ ++ ADD_NDS32_BUILTIN0 ("schedule_barrier", void, SCHE_BARRIER); ++ ++ /* TLBOP */ ++ ADD_NDS32_BUILTIN1 ("tlbop_trd", void, unsigned, TLBOP_TRD); ++ ADD_NDS32_BUILTIN1 ("tlbop_twr", void, unsigned, TLBOP_TWR); ++ ADD_NDS32_BUILTIN1 ("tlbop_rwr", void, unsigned, TLBOP_RWR); ++ ADD_NDS32_BUILTIN1 ("tlbop_rwlk", void, unsigned, TLBOP_RWLK); ++ ADD_NDS32_BUILTIN1 ("tlbop_unlk", void, unsigned, TLBOP_UNLK); ++ ADD_NDS32_BUILTIN1 ("tlbop_pb", unsigned, unsigned, TLBOP_PB); ++ ADD_NDS32_BUILTIN1 ("tlbop_inv", void, unsigned, TLBOP_INV); ++ ADD_NDS32_BUILTIN0 ("tlbop_flua", void, TLBOP_FLUA); ++ ++ /* Unaligned Load/Store */ ++ ADD_NDS32_BUILTIN1 ("unaligned_load_hw", short_unsigned, ptr_ushort, ++ UALOAD_HW); ++ ADD_NDS32_BUILTIN1 ("unaligned_load_w", unsigned, ptr_uint, UALOAD_W); ++ ADD_NDS32_BUILTIN1 ("unaligned_load_dw", long_long_unsigned, ptr_ulong, ++ UALOAD_DW); ++ ADD_NDS32_BUILTIN2 ("unaligned_store_hw", void, ptr_ushort, short_unsigned, ++ UASTORE_HW); ++ ADD_NDS32_BUILTIN2 ("unaligned_store_w", void, ptr_uint, unsigned, UASTORE_W); ++ ADD_NDS32_BUILTIN2 ("unaligned_store_dw", void, ptr_ulong, long_long_unsigned, ++ UASTORE_DW); ++ ADD_NDS32_BUILTIN0 ("unaligned_feature", unsigned, UNALIGNED_FEATURE); ++ ADD_NDS32_BUILTIN0 ("enable_unaligned", void, ENABLE_UNALIGNED); ++ ADD_NDS32_BUILTIN0 ("disable_unaligned", void, DISABLE_UNALIGNED); ++ ++ /* Instruction sequence protection */ ++ ADD_NDS32_BUILTIN0 ("signature_begin", void, SIGNATURE_BEGIN); ++ ADD_NDS32_BUILTIN0 ("signature_end", void, SIGNATURE_END); ++ ++ /* DSP Extension: SIMD 16bit Add and Subtract. */ ++ ADD_NDS32_BUILTIN2 ("add16", unsigned, unsigned, unsigned, ADD16); ++ ADD_NDS32_BUILTIN2 ("v_uadd16", u_v2hi, u_v2hi, u_v2hi, V_UADD16); ++ ADD_NDS32_BUILTIN2 ("v_sadd16", v2hi, v2hi, v2hi, V_SADD16); ++ ADD_NDS32_BUILTIN2 ("radd16", unsigned, unsigned, unsigned, RADD16); ++ ADD_NDS32_BUILTIN2 ("v_radd16", v2hi, v2hi, v2hi, V_RADD16); ++ ADD_NDS32_BUILTIN2 ("uradd16", unsigned, unsigned, unsigned, URADD16); ++ ADD_NDS32_BUILTIN2 ("v_uradd16", u_v2hi, u_v2hi, u_v2hi, V_URADD16); ++ ADD_NDS32_BUILTIN2 ("kadd16", unsigned, unsigned, unsigned, KADD16); ++ ADD_NDS32_BUILTIN2 ("v_kadd16", v2hi, v2hi, v2hi, V_KADD16); ++ ADD_NDS32_BUILTIN2 ("ukadd16", unsigned, unsigned, unsigned, UKADD16); ++ ADD_NDS32_BUILTIN2 ("v_ukadd16", u_v2hi, u_v2hi, u_v2hi, V_UKADD16); ++ ADD_NDS32_BUILTIN2 ("sub16", unsigned, unsigned, unsigned, SUB16); ++ ADD_NDS32_BUILTIN2 ("v_usub16", u_v2hi, u_v2hi, u_v2hi, V_USUB16); ++ ADD_NDS32_BUILTIN2 ("v_ssub16", v2hi, v2hi, v2hi, V_SSUB16); ++ ADD_NDS32_BUILTIN2 ("rsub16", unsigned, unsigned, unsigned, RSUB16); ++ ADD_NDS32_BUILTIN2 ("v_rsub16", v2hi, v2hi, v2hi, V_RSUB16); ++ ADD_NDS32_BUILTIN2 ("ursub16", unsigned, unsigned, unsigned, URSUB16); ++ ADD_NDS32_BUILTIN2 ("v_ursub16", u_v2hi, u_v2hi, u_v2hi, V_URSUB16); ++ ADD_NDS32_BUILTIN2 ("ksub16", unsigned, unsigned, unsigned, KSUB16); ++ ADD_NDS32_BUILTIN2 ("v_ksub16", v2hi, v2hi, v2hi, V_KSUB16); ++ ADD_NDS32_BUILTIN2 ("uksub16", unsigned, unsigned, unsigned, UKSUB16); ++ ADD_NDS32_BUILTIN2 ("v_uksub16", u_v2hi, u_v2hi, u_v2hi, V_UKSUB16); ++ ADD_NDS32_BUILTIN2 ("cras16", unsigned, unsigned, unsigned, CRAS16); ++ ADD_NDS32_BUILTIN2 ("v_ucras16", u_v2hi, u_v2hi, u_v2hi, V_UCRAS16); ++ ADD_NDS32_BUILTIN2 ("v_scras16", v2hi, v2hi, v2hi, V_SCRAS16); ++ ADD_NDS32_BUILTIN2 ("rcras16", unsigned, unsigned, unsigned, RCRAS16); ++ ADD_NDS32_BUILTIN2 ("v_rcras16", v2hi, v2hi, v2hi, V_RCRAS16); ++ ADD_NDS32_BUILTIN2 ("urcras16", unsigned, unsigned, unsigned, URCRAS16); ++ ADD_NDS32_BUILTIN2 ("v_urcras16", u_v2hi, u_v2hi, u_v2hi, V_URCRAS16); ++ ADD_NDS32_BUILTIN2 ("kcras16", unsigned, unsigned, unsigned, KCRAS16); ++ ADD_NDS32_BUILTIN2 ("v_kcras16", v2hi, v2hi, v2hi, V_KCRAS16); ++ ADD_NDS32_BUILTIN2 ("ukcras16", unsigned, unsigned, unsigned, UKCRAS16); ++ ADD_NDS32_BUILTIN2 ("v_ukcras16", u_v2hi, u_v2hi, u_v2hi, V_UKCRAS16); ++ ADD_NDS32_BUILTIN2 ("crsa16", unsigned, unsigned, unsigned, CRSA16); ++ ADD_NDS32_BUILTIN2 ("v_ucrsa16", u_v2hi, u_v2hi, u_v2hi, V_UCRSA16); ++ ADD_NDS32_BUILTIN2 ("v_scrsa16", v2hi, v2hi, v2hi, V_SCRSA16); ++ ADD_NDS32_BUILTIN2 ("rcrsa16", unsigned, unsigned, unsigned, RCRSA16); ++ ADD_NDS32_BUILTIN2 ("v_rcrsa16", v2hi, v2hi, v2hi, V_RCRSA16); ++ ADD_NDS32_BUILTIN2 ("urcrsa16", unsigned, unsigned, unsigned, URCRSA16); ++ ADD_NDS32_BUILTIN2 ("v_urcrsa16", u_v2hi, u_v2hi, u_v2hi, V_URCRSA16); ++ ADD_NDS32_BUILTIN2 ("kcrsa16", unsigned, unsigned, unsigned, KCRSA16); ++ ADD_NDS32_BUILTIN2 ("v_kcrsa16", v2hi, v2hi, v2hi, V_KCRSA16); ++ ADD_NDS32_BUILTIN2 ("ukcrsa16", unsigned, unsigned, unsigned, UKCRSA16); ++ ADD_NDS32_BUILTIN2 ("v_ukcrsa16", u_v2hi, u_v2hi, u_v2hi, V_UKCRSA16); ++ ++ /* DSP Extension: SIMD 8bit Add and Subtract. */ ++ ADD_NDS32_BUILTIN2 ("add8", integer, integer, integer, ADD8); ++ ADD_NDS32_BUILTIN2 ("v_uadd8", u_v4qi, u_v4qi, u_v4qi, V_UADD8); ++ ADD_NDS32_BUILTIN2 ("v_sadd8", v4qi, v4qi, v4qi, V_SADD8); ++ ADD_NDS32_BUILTIN2 ("radd8", unsigned, unsigned, unsigned, RADD8); ++ ADD_NDS32_BUILTIN2 ("v_radd8", v4qi, v4qi, v4qi, V_RADD8); ++ ADD_NDS32_BUILTIN2 ("uradd8", unsigned, unsigned, unsigned, URADD8); ++ ADD_NDS32_BUILTIN2 ("v_uradd8", u_v4qi, u_v4qi, u_v4qi, V_URADD8); ++ ADD_NDS32_BUILTIN2 ("kadd8", unsigned, unsigned, unsigned, KADD8); ++ ADD_NDS32_BUILTIN2 ("v_kadd8", v4qi, v4qi, v4qi, V_KADD8); ++ ADD_NDS32_BUILTIN2 ("ukadd8", unsigned, unsigned, unsigned, UKADD8); ++ ADD_NDS32_BUILTIN2 ("v_ukadd8", u_v4qi, u_v4qi, u_v4qi, V_UKADD8); ++ ADD_NDS32_BUILTIN2 ("sub8", integer, integer, integer, SUB8); ++ ADD_NDS32_BUILTIN2 ("v_usub8", u_v4qi, u_v4qi, u_v4qi, V_USUB8); ++ ADD_NDS32_BUILTIN2 ("v_ssub8", v4qi, v4qi, v4qi, V_SSUB8); ++ ADD_NDS32_BUILTIN2 ("rsub8", unsigned, unsigned, unsigned, RSUB8); ++ ADD_NDS32_BUILTIN2 ("v_rsub8", v4qi, v4qi, v4qi, V_RSUB8); ++ ADD_NDS32_BUILTIN2 ("ursub8", unsigned, unsigned, unsigned, URSUB8); ++ ADD_NDS32_BUILTIN2 ("v_ursub8", u_v4qi, u_v4qi, u_v4qi, V_URSUB8); ++ ADD_NDS32_BUILTIN2 ("ksub8", unsigned, unsigned, unsigned, KSUB8); ++ ADD_NDS32_BUILTIN2 ("v_ksub8", v4qi, v4qi, v4qi, V_KSUB8); ++ ADD_NDS32_BUILTIN2 ("uksub8", unsigned, unsigned, unsigned, UKSUB8); ++ ADD_NDS32_BUILTIN2 ("v_uksub8", u_v4qi, u_v4qi, u_v4qi, V_UKSUB8); ++ ++ /* DSP Extension: SIMD 16bit Shift. */ ++ ADD_NDS32_BUILTIN2 ("sra16", unsigned, unsigned, unsigned, SRA16); ++ ADD_NDS32_BUILTIN2 ("v_sra16", v2hi, v2hi, unsigned, V_SRA16); ++ ADD_NDS32_BUILTIN2 ("sra16_u", unsigned, unsigned, unsigned, SRA16_U); ++ ADD_NDS32_BUILTIN2 ("v_sra16_u", v2hi, v2hi, unsigned, V_SRA16_U); ++ ADD_NDS32_BUILTIN2 ("srl16", unsigned, unsigned, unsigned, SRL16); ++ ADD_NDS32_BUILTIN2 ("v_srl16", u_v2hi, u_v2hi, unsigned, V_SRL16); ++ ADD_NDS32_BUILTIN2 ("srl16_u", unsigned, unsigned, unsigned, SRL16_U); ++ ADD_NDS32_BUILTIN2 ("v_srl16_u", u_v2hi, u_v2hi, unsigned, V_SRL16_U); ++ ADD_NDS32_BUILTIN2 ("sll16", unsigned, unsigned, unsigned, SLL16); ++ ADD_NDS32_BUILTIN2 ("v_sll16", u_v2hi, u_v2hi, unsigned, V_SLL16); ++ ADD_NDS32_BUILTIN2 ("ksll16", unsigned, unsigned, unsigned, KSLL16); ++ ADD_NDS32_BUILTIN2 ("v_ksll16", v2hi, v2hi, unsigned, V_KSLL16); ++ ADD_NDS32_BUILTIN2 ("kslra16", unsigned, unsigned, unsigned, KSLRA16); ++ ADD_NDS32_BUILTIN2 ("v_kslra16", v2hi, v2hi, unsigned, V_KSLRA16); ++ ADD_NDS32_BUILTIN2 ("kslra16_u", unsigned, unsigned, unsigned, KSLRA16_U); ++ ADD_NDS32_BUILTIN2 ("v_kslra16_u", v2hi, v2hi, unsigned, V_KSLRA16_U); ++ ++ /* DSP Extension: 16bit Compare. */ ++ ADD_NDS32_BUILTIN2 ("cmpeq16", unsigned, unsigned, unsigned, CMPEQ16); ++ ADD_NDS32_BUILTIN2 ("v_scmpeq16", u_v2hi, v2hi, v2hi, V_SCMPEQ16); ++ ADD_NDS32_BUILTIN2 ("v_ucmpeq16", u_v2hi, u_v2hi, u_v2hi, V_UCMPEQ16); ++ ADD_NDS32_BUILTIN2 ("scmplt16", unsigned, unsigned, unsigned, SCMPLT16); ++ ADD_NDS32_BUILTIN2 ("v_scmplt16", u_v2hi, v2hi, v2hi, V_SCMPLT16); ++ ADD_NDS32_BUILTIN2 ("scmple16", unsigned, unsigned, unsigned, SCMPLE16); ++ ADD_NDS32_BUILTIN2 ("v_scmple16", u_v2hi, v2hi, v2hi, V_SCMPLE16); ++ ADD_NDS32_BUILTIN2 ("ucmplt16", unsigned, unsigned, unsigned, UCMPLT16); ++ ADD_NDS32_BUILTIN2 ("v_ucmplt16", u_v2hi, u_v2hi, u_v2hi, V_UCMPLT16); ++ ADD_NDS32_BUILTIN2 ("ucmple16", unsigned, unsigned, unsigned, UCMPLE16); ++ ADD_NDS32_BUILTIN2 ("v_ucmple16", u_v2hi, u_v2hi, u_v2hi, V_UCMPLE16); ++ ++ /* DSP Extension: 8bit Compare. */ ++ ADD_NDS32_BUILTIN2 ("cmpeq8", unsigned, unsigned, unsigned, CMPEQ8); ++ ADD_NDS32_BUILTIN2 ("v_scmpeq8", u_v4qi, v4qi, v4qi, V_SCMPEQ8); ++ ADD_NDS32_BUILTIN2 ("v_ucmpeq8", u_v4qi, u_v4qi, u_v4qi, V_UCMPEQ8); ++ ADD_NDS32_BUILTIN2 ("scmplt8", unsigned, unsigned, unsigned, SCMPLT8); ++ ADD_NDS32_BUILTIN2 ("v_scmplt8", u_v4qi, v4qi, v4qi, V_SCMPLT8); ++ ADD_NDS32_BUILTIN2 ("scmple8", unsigned, unsigned, unsigned, SCMPLE8); ++ ADD_NDS32_BUILTIN2 ("v_scmple8", u_v4qi, v4qi, v4qi, V_SCMPLE8); ++ ADD_NDS32_BUILTIN2 ("ucmplt8", unsigned, unsigned, unsigned, UCMPLT8); ++ ADD_NDS32_BUILTIN2 ("v_ucmplt8", u_v4qi, u_v4qi, u_v4qi, V_UCMPLT8); ++ ADD_NDS32_BUILTIN2 ("ucmple8", unsigned, unsigned, unsigned, UCMPLE8); ++ ADD_NDS32_BUILTIN2 ("v_ucmple8", u_v4qi, u_v4qi, u_v4qi, V_UCMPLE8); ++ ++ /* DSP Extension: SIMD 16bit MISC. */ ++ ADD_NDS32_BUILTIN2 ("smin16", unsigned, unsigned, unsigned, SMIN16); ++ ADD_NDS32_BUILTIN2 ("v_smin16", v2hi, v2hi, v2hi, V_SMIN16); ++ ADD_NDS32_BUILTIN2 ("umin16", unsigned, unsigned, unsigned, UMIN16); ++ ADD_NDS32_BUILTIN2 ("v_umin16", u_v2hi, u_v2hi, u_v2hi, V_UMIN16); ++ ADD_NDS32_BUILTIN2 ("smax16", unsigned, unsigned, unsigned, SMAX16); ++ ADD_NDS32_BUILTIN2 ("v_smax16", v2hi, v2hi, v2hi, V_SMAX16); ++ ADD_NDS32_BUILTIN2 ("umax16", unsigned, unsigned, unsigned, UMAX16); ++ ADD_NDS32_BUILTIN2 ("v_umax16", u_v2hi, u_v2hi, u_v2hi, V_UMAX16); ++ ADD_NDS32_BUILTIN2 ("sclip16", unsigned, unsigned, unsigned, SCLIP16); ++ ADD_NDS32_BUILTIN2 ("v_sclip16", v2hi, v2hi, unsigned, V_SCLIP16); ++ ADD_NDS32_BUILTIN2 ("uclip16", unsigned, unsigned, unsigned, UCLIP16); ++ ADD_NDS32_BUILTIN2 ("v_uclip16", v2hi, v2hi, unsigned, V_UCLIP16); ++ ADD_NDS32_BUILTIN2 ("khm16", unsigned, unsigned, unsigned, KHM16); ++ ADD_NDS32_BUILTIN2 ("v_khm16", v2hi, v2hi, v2hi, V_KHM16); ++ ADD_NDS32_BUILTIN2 ("khmx16", unsigned, unsigned, unsigned, KHMX16); ++ ADD_NDS32_BUILTIN2 ("v_khmx16", v2hi, v2hi, v2hi, V_KHMX16); ++ ADD_NDS32_BUILTIN1 ("kabs16", unsigned, unsigned, KABS16); ++ ADD_NDS32_BUILTIN1 ("v_kabs16", v2hi, v2hi, V_KABS16); ++ ADD_NDS32_BUILTIN2 ("smul16", long_long_unsigned, unsigned, unsigned, SMUL16); ++ ADD_NDS32_BUILTIN2 ("v_smul16", v2si, v2hi, v2hi, V_SMUL16); ++ ADD_NDS32_BUILTIN2 ("smulx16", ++ long_long_unsigned, unsigned, unsigned, SMULX16); ++ ADD_NDS32_BUILTIN2 ("v_smulx16", v2si, v2hi, v2hi, V_SMULX16); ++ ADD_NDS32_BUILTIN2 ("umul16", long_long_unsigned, unsigned, unsigned, UMUL16); ++ ADD_NDS32_BUILTIN2 ("v_umul16", u_v2si, u_v2hi, u_v2hi, V_UMUL16); ++ ADD_NDS32_BUILTIN2 ("umulx16", ++ long_long_unsigned, unsigned, unsigned, UMULX16); ++ ADD_NDS32_BUILTIN2 ("v_umulx16", u_v2si, u_v2hi, u_v2hi, V_UMULX16); ++ ++ /* DSP Extension: SIMD 8bit MISC. */ ++ ADD_NDS32_BUILTIN2 ("smin8", unsigned, unsigned, unsigned, SMIN8); ++ ADD_NDS32_BUILTIN2 ("v_smin8", v4qi, v4qi, v4qi, V_SMIN8); ++ ADD_NDS32_BUILTIN2 ("umin8", unsigned, unsigned, unsigned, UMIN8); ++ ADD_NDS32_BUILTIN2 ("v_umin8", u_v4qi, u_v4qi, u_v4qi, V_UMIN8); ++ ADD_NDS32_BUILTIN2 ("smax8", unsigned, unsigned, unsigned, SMAX8); ++ ADD_NDS32_BUILTIN2 ("v_smax8", v4qi, v4qi, v4qi, V_SMAX8); ++ ADD_NDS32_BUILTIN2 ("umax8", unsigned, unsigned, unsigned, UMAX8); ++ ADD_NDS32_BUILTIN2 ("v_umax8", u_v4qi, u_v4qi, u_v4qi, V_UMAX8); ++ ADD_NDS32_BUILTIN1 ("kabs8", unsigned, unsigned, KABS8); ++ ADD_NDS32_BUILTIN1 ("v_kabs8", v4qi, v4qi, V_KABS8); ++ ++ /* DSP Extension: 8bit Unpacking. */ ++ ADD_NDS32_BUILTIN1 ("sunpkd810", unsigned, unsigned, SUNPKD810); ++ ADD_NDS32_BUILTIN1 ("v_sunpkd810", v2hi, v4qi, V_SUNPKD810); ++ ADD_NDS32_BUILTIN1 ("sunpkd820", unsigned, unsigned, SUNPKD820); ++ ADD_NDS32_BUILTIN1 ("v_sunpkd820", v2hi, v4qi, V_SUNPKD820); ++ ADD_NDS32_BUILTIN1 ("sunpkd830", unsigned, unsigned, SUNPKD830); ++ ADD_NDS32_BUILTIN1 ("v_sunpkd830", v2hi, v4qi, V_SUNPKD830); ++ ADD_NDS32_BUILTIN1 ("sunpkd831", unsigned, unsigned, SUNPKD831); ++ ADD_NDS32_BUILTIN1 ("v_sunpkd831", v2hi, v4qi, V_SUNPKD831); ++ ADD_NDS32_BUILTIN1 ("zunpkd810", unsigned, unsigned, ZUNPKD810); ++ ADD_NDS32_BUILTIN1 ("v_zunpkd810", u_v2hi, u_v4qi, V_ZUNPKD810); ++ ADD_NDS32_BUILTIN1 ("zunpkd820", unsigned, unsigned, ZUNPKD820); ++ ADD_NDS32_BUILTIN1 ("v_zunpkd820", u_v2hi, u_v4qi, V_ZUNPKD820); ++ ADD_NDS32_BUILTIN1 ("zunpkd830", unsigned, unsigned, ZUNPKD830); ++ ADD_NDS32_BUILTIN1 ("v_zunpkd830", u_v2hi, u_v4qi, V_ZUNPKD830); ++ ADD_NDS32_BUILTIN1 ("zunpkd831", unsigned, unsigned, ZUNPKD831); ++ ADD_NDS32_BUILTIN1 ("v_zunpkd831", u_v2hi, u_v4qi, V_ZUNPKD831); ++ ++ /* DSP Extension: 32bit Add and Subtract. */ ++ ADD_NDS32_BUILTIN2 ("raddw", integer, integer, integer, RADDW); ++ ADD_NDS32_BUILTIN2 ("uraddw", unsigned, unsigned, unsigned, URADDW); ++ ADD_NDS32_BUILTIN2 ("rsubw", integer, integer, integer, RSUBW); ++ ADD_NDS32_BUILTIN2 ("ursubw", unsigned, unsigned, unsigned, URSUBW); ++ ++ /* DSP Extension: 32bit Shift. */ ++ ADD_NDS32_BUILTIN2 ("sra_u", integer, integer, unsigned, SRA_U); ++ ADD_NDS32_BUILTIN2 ("ksll", integer, integer, unsigned, KSLL); ++ ++ /* DSP Extension: 16bit Packing. */ ++ ADD_NDS32_BUILTIN2 ("pkbb16", unsigned, unsigned, unsigned, PKBB16); ++ ADD_NDS32_BUILTIN2 ("v_pkbb16", u_v2hi, u_v2hi, u_v2hi, V_PKBB16); ++ ADD_NDS32_BUILTIN2 ("pkbt16", unsigned, unsigned, unsigned, PKBT16); ++ ADD_NDS32_BUILTIN2 ("v_pkbt16", u_v2hi, u_v2hi, u_v2hi, V_PKBT16); ++ ADD_NDS32_BUILTIN2 ("pktb16", unsigned, unsigned, unsigned, PKTB16); ++ ADD_NDS32_BUILTIN2 ("v_pktb16", u_v2hi, u_v2hi, u_v2hi, V_PKTB16); ++ ADD_NDS32_BUILTIN2 ("pktt16", unsigned, unsigned, unsigned, PKTT16); ++ ADD_NDS32_BUILTIN2 ("v_pktt16", u_v2hi, u_v2hi, u_v2hi, V_PKTT16); ++ ++ /* DSP Extension: Signed MSW 32x32 Multiply and ADD. */ ++ ADD_NDS32_BUILTIN2 ("smmul", integer, integer, integer, SMMUL); ++ ADD_NDS32_BUILTIN2 ("smmul_u", integer, integer, integer, SMMUL_U); ++ ADD_NDS32_BUILTIN3 ("kmmac", integer, integer, integer, integer, KMMAC); ++ ADD_NDS32_BUILTIN3 ("kmmac_u", integer, integer, integer, integer, KMMAC_U); ++ ADD_NDS32_BUILTIN3 ("kmmsb", integer, integer, integer, integer, KMMSB); ++ ADD_NDS32_BUILTIN3 ("kmmsb_u", integer, integer, integer, integer, KMMSB_U); ++ ADD_NDS32_BUILTIN2 ("kwmmul", integer, integer, integer, KWMMUL); ++ ADD_NDS32_BUILTIN2 ("kwmmul_u", integer, integer, integer, KWMMUL_U); ++ ++ /* DSP Extension: Most Significant Word 32x16 Multiply and ADD. */ ++ ADD_NDS32_BUILTIN2 ("smmwb", integer, integer, unsigned, SMMWB); ++ ADD_NDS32_BUILTIN2 ("v_smmwb", integer, integer, v2hi, V_SMMWB); ++ ADD_NDS32_BUILTIN2 ("smmwb_u", integer, integer, unsigned, SMMWB_U); ++ ADD_NDS32_BUILTIN2 ("v_smmwb_u", integer, integer, v2hi, V_SMMWB_U); ++ ADD_NDS32_BUILTIN2 ("smmwt", integer, integer, unsigned, SMMWT); ++ ADD_NDS32_BUILTIN2 ("v_smmwt", integer, integer, v2hi, V_SMMWT); ++ ADD_NDS32_BUILTIN2 ("smmwt_u", integer, integer, unsigned, SMMWT_U); ++ ADD_NDS32_BUILTIN2 ("v_smmwt_u", integer, integer, v2hi, V_SMMWT_U); ++ ADD_NDS32_BUILTIN3 ("kmmawb", integer, integer, integer, unsigned, KMMAWB); ++ ADD_NDS32_BUILTIN3 ("v_kmmawb", integer, integer, integer, v2hi, V_KMMAWB); ++ ADD_NDS32_BUILTIN3 ("kmmawb_u", ++ integer, integer, integer, unsigned, KMMAWB_U); ++ ADD_NDS32_BUILTIN3 ("v_kmmawb_u", ++ integer, integer, integer, v2hi, V_KMMAWB_U); ++ ADD_NDS32_BUILTIN3 ("kmmawt", integer, integer, integer, unsigned, KMMAWT); ++ ADD_NDS32_BUILTIN3 ("v_kmmawt", integer, integer, integer, v2hi, V_KMMAWT); ++ ADD_NDS32_BUILTIN3 ("kmmawt_u", ++ integer, integer, integer, unsigned, KMMAWT_U); ++ ADD_NDS32_BUILTIN3 ("v_kmmawt_u", ++ integer, integer, integer, v2hi, V_KMMAWT_U); ++ ++ /* DSP Extension: Signed 16bit Multiply with ADD/Subtract. */ ++ ADD_NDS32_BUILTIN2 ("smbb", integer, unsigned, unsigned, SMBB); ++ ADD_NDS32_BUILTIN2 ("v_smbb", integer, v2hi, v2hi, V_SMBB); ++ ADD_NDS32_BUILTIN2 ("smbt", integer, unsigned, unsigned, SMBT); ++ ADD_NDS32_BUILTIN2 ("v_smbt", integer, v2hi, v2hi, V_SMBT); ++ ADD_NDS32_BUILTIN2 ("smtt", integer, unsigned, unsigned, SMTT); ++ ADD_NDS32_BUILTIN2 ("v_smtt", integer, v2hi, v2hi, V_SMTT); ++ ADD_NDS32_BUILTIN2 ("kmda", integer, unsigned, unsigned, KMDA); ++ ADD_NDS32_BUILTIN2 ("v_kmda", integer, v2hi, v2hi, V_KMDA); ++ ADD_NDS32_BUILTIN2 ("kmxda", integer, unsigned, unsigned, KMXDA); ++ ADD_NDS32_BUILTIN2 ("v_kmxda", integer, v2hi, v2hi, V_KMXDA); ++ ADD_NDS32_BUILTIN2 ("smds", integer, unsigned, unsigned, SMDS); ++ ADD_NDS32_BUILTIN2 ("v_smds", integer, v2hi, v2hi, V_SMDS); ++ ADD_NDS32_BUILTIN2 ("smdrs", integer, unsigned, unsigned, SMDRS); ++ ADD_NDS32_BUILTIN2 ("v_smdrs", integer, v2hi, v2hi, V_SMDRS); ++ ADD_NDS32_BUILTIN2 ("smxds", integer, unsigned, unsigned, SMXDS); ++ ADD_NDS32_BUILTIN2 ("v_smxds", integer, v2hi, v2hi, V_SMXDS); ++ ADD_NDS32_BUILTIN3 ("kmabb", integer, integer, unsigned, unsigned, KMABB); ++ ADD_NDS32_BUILTIN3 ("v_kmabb", integer, integer, v2hi, v2hi, V_KMABB); ++ ADD_NDS32_BUILTIN3 ("kmabt", integer, integer, unsigned, unsigned, KMABT); ++ ADD_NDS32_BUILTIN3 ("v_kmabt", integer, integer, v2hi, v2hi, V_KMABT); ++ ADD_NDS32_BUILTIN3 ("kmatt", integer, integer, unsigned, unsigned, KMATT); ++ ADD_NDS32_BUILTIN3 ("v_kmatt", integer, integer, v2hi, v2hi, V_KMATT); ++ ADD_NDS32_BUILTIN3 ("kmada", integer, integer, unsigned, unsigned, KMADA); ++ ADD_NDS32_BUILTIN3 ("v_kmada", integer, integer, v2hi, v2hi, V_KMADA); ++ ADD_NDS32_BUILTIN3 ("kmaxda", integer, integer, unsigned, unsigned, KMAXDA); ++ ADD_NDS32_BUILTIN3 ("v_kmaxda", integer, integer, v2hi, v2hi, V_KMAXDA); ++ ADD_NDS32_BUILTIN3 ("kmads", integer, integer, unsigned, unsigned, KMADS); ++ ADD_NDS32_BUILTIN3 ("v_kmads", integer, integer, v2hi, v2hi, V_KMADS); ++ ADD_NDS32_BUILTIN3 ("kmadrs", integer, integer, unsigned, unsigned, KMADRS); ++ ADD_NDS32_BUILTIN3 ("v_kmadrs", integer, integer, v2hi, v2hi, V_KMADRS); ++ ADD_NDS32_BUILTIN3 ("kmaxds", integer, integer, unsigned, unsigned, KMAXDS); ++ ADD_NDS32_BUILTIN3 ("v_kmaxds", integer, integer, v2hi, v2hi, V_KMAXDS); ++ ADD_NDS32_BUILTIN3 ("kmsda", integer, integer, unsigned, unsigned, KMSDA); ++ ADD_NDS32_BUILTIN3 ("v_kmsda", integer, integer, v2hi, v2hi, V_KMSDA); ++ ADD_NDS32_BUILTIN3 ("kmsxda", integer, integer, unsigned, unsigned, KMSXDA); ++ ADD_NDS32_BUILTIN3 ("v_kmsxda", integer, integer, v2hi, v2hi, V_KMSXDA); ++ ++ /* DSP Extension: Signed 16bit Multiply with 64bit ADD/Subtract. */ ++ ADD_NDS32_BUILTIN2 ("smal", long_long_integer, ++ long_long_integer, unsigned, SMAL); ++ ADD_NDS32_BUILTIN2 ("v_smal", long_long_integer, ++ long_long_integer, v2hi, V_SMAL); ++ ++ /* DSP Extension: 32bit MISC. */ ++ ADD_NDS32_BUILTIN2 ("bitrev", unsigned, unsigned, unsigned, BITREV); ++ ADD_NDS32_BUILTIN2 ("wext", unsigned, long_long_integer, unsigned, WEXT); ++ ADD_NDS32_BUILTIN3 ("bpick", unsigned, unsigned, unsigned, unsigned, BPICK); ++ ADD_NDS32_BUILTIN3 ("insb", unsigned, unsigned, unsigned, unsigned, INSB); ++ ++ /* DSP Extension: 64bit Add and Subtract. */ ++ ADD_NDS32_BUILTIN2 ("sadd64", long_long_integer, ++ long_long_integer, long_long_integer, SADD64); ++ ADD_NDS32_BUILTIN2 ("uadd64", long_long_unsigned, ++ long_long_unsigned, long_long_unsigned, UADD64); ++ ADD_NDS32_BUILTIN2 ("radd64", long_long_integer, ++ long_long_integer, long_long_integer, RADD64); ++ ADD_NDS32_BUILTIN2 ("uradd64", long_long_unsigned, ++ long_long_unsigned, long_long_unsigned, URADD64); ++ ADD_NDS32_BUILTIN2 ("kadd64", long_long_integer, ++ long_long_integer, long_long_integer, KADD64); ++ ADD_NDS32_BUILTIN2 ("ukadd64", long_long_unsigned, ++ long_long_unsigned, long_long_unsigned, UKADD64); ++ ADD_NDS32_BUILTIN2 ("ssub64", long_long_integer, ++ long_long_integer, long_long_integer, SSUB64); ++ ADD_NDS32_BUILTIN2 ("usub64", long_long_unsigned, ++ long_long_unsigned, long_long_unsigned, USUB64); ++ ADD_NDS32_BUILTIN2 ("rsub64", long_long_integer, ++ long_long_integer, long_long_integer, RSUB64); ++ ADD_NDS32_BUILTIN2 ("ursub64", long_long_unsigned, ++ long_long_unsigned, long_long_unsigned, URSUB64); ++ ADD_NDS32_BUILTIN2 ("ksub64", long_long_integer, ++ long_long_integer, long_long_integer, KSUB64); ++ ADD_NDS32_BUILTIN2 ("uksub64", long_long_unsigned, ++ long_long_unsigned, long_long_unsigned, UKSUB64); ++ ++ /* DSP Extension: 32bit Multiply with 64bit Add/Subtract. */ ++ ADD_NDS32_BUILTIN3 ("smar64", long_long_integer, ++ long_long_integer, integer, integer, SMAR64); ++ ADD_NDS32_BUILTIN3 ("smsr64", long_long_integer, ++ long_long_integer, integer, integer, SMSR64); ++ ADD_NDS32_BUILTIN3 ("umar64", long_long_unsigned, ++ long_long_unsigned, unsigned, unsigned, UMAR64); ++ ADD_NDS32_BUILTIN3 ("umsr64", long_long_unsigned, ++ long_long_unsigned, unsigned, unsigned, UMSR64); ++ ADD_NDS32_BUILTIN3 ("kmar64", long_long_integer, ++ long_long_integer, integer, integer, KMAR64); ++ ADD_NDS32_BUILTIN3 ("kmsr64", long_long_integer, ++ long_long_integer, integer, integer, KMSR64); ++ ADD_NDS32_BUILTIN3 ("ukmar64", long_long_unsigned, ++ long_long_unsigned, unsigned, unsigned, UKMAR64); ++ ADD_NDS32_BUILTIN3 ("ukmsr64", long_long_unsigned, ++ long_long_unsigned, unsigned, unsigned, UKMSR64); ++ ++ /* DSP Extension: Signed 16bit Multiply with 64bit Add/Subtract. */ ++ ADD_NDS32_BUILTIN3 ("smalbb", long_long_integer, ++ long_long_integer, unsigned, unsigned, SMALBB); ++ ADD_NDS32_BUILTIN3 ("v_smalbb", long_long_integer, ++ long_long_integer, v2hi, v2hi, V_SMALBB); ++ ADD_NDS32_BUILTIN3 ("smalbt", long_long_integer, ++ long_long_integer, unsigned, unsigned, SMALBT); ++ ADD_NDS32_BUILTIN3 ("v_smalbt", long_long_integer, ++ long_long_integer, v2hi, v2hi, V_SMALBT); ++ ADD_NDS32_BUILTIN3 ("smaltt", long_long_integer, ++ long_long_integer, unsigned, unsigned, SMALTT); ++ ADD_NDS32_BUILTIN3 ("v_smaltt", long_long_integer, ++ long_long_integer, v2hi, v2hi, V_SMALTT); ++ ADD_NDS32_BUILTIN3 ("smalda", long_long_integer, ++ long_long_integer, unsigned, unsigned, SMALDA); ++ ADD_NDS32_BUILTIN3 ("v_smalda", long_long_integer, ++ long_long_integer, v2hi, v2hi, V_SMALDA); ++ ADD_NDS32_BUILTIN3 ("smalxda", long_long_integer, ++ long_long_integer, unsigned, unsigned, SMALXDA); ++ ADD_NDS32_BUILTIN3 ("v_smalxda", long_long_integer, ++ long_long_integer, v2hi, v2hi, V_SMALXDA); ++ ADD_NDS32_BUILTIN3 ("smalds", long_long_integer, ++ long_long_integer, unsigned, unsigned, SMALDS); ++ ADD_NDS32_BUILTIN3 ("v_smalds", long_long_integer, ++ long_long_integer, v2hi, v2hi, V_SMALDS); ++ ADD_NDS32_BUILTIN3 ("smaldrs", long_long_integer, ++ long_long_integer, unsigned, unsigned, SMALDRS); ++ ADD_NDS32_BUILTIN3 ("v_smaldrs", long_long_integer, ++ long_long_integer, v2hi, v2hi, V_SMALDRS); ++ ADD_NDS32_BUILTIN3 ("smalxds", long_long_integer, ++ long_long_integer, unsigned, unsigned, SMALXDS); ++ ADD_NDS32_BUILTIN3 ("v_smalxds", long_long_integer, ++ long_long_integer, v2hi, v2hi, V_SMALXDS); ++ ADD_NDS32_BUILTIN3 ("smslda", long_long_integer, ++ long_long_integer, unsigned, unsigned, SMSLDA); ++ ADD_NDS32_BUILTIN3 ("v_smslda", long_long_integer, ++ long_long_integer, v2hi, v2hi, V_SMSLDA); ++ ADD_NDS32_BUILTIN3 ("smslxda", long_long_integer, ++ long_long_integer, unsigned, unsigned, SMSLXDA); ++ ADD_NDS32_BUILTIN3 ("v_smslxda", long_long_integer, ++ long_long_integer, v2hi, v2hi, V_SMSLXDA); ++ ++ /* DSP Extension: augmented baseline. */ ++ ADD_NDS32_BUILTIN2 ("uclip32", unsigned, integer, unsigned, UCLIP32); ++ ADD_NDS32_BUILTIN2 ("sclip32", integer, integer, unsigned, SCLIP32); ++ ADD_NDS32_BUILTIN1 ("kabs", integer, integer, KABS); ++ ++ /* The builtin turn off hwloop optimization. */ ++ ADD_NDS32_BUILTIN0 ("no_ext_zol", void, NO_HWLOOP); ++ ++ /* DSP Extension: vector type unaligned Load/Store */ ++ ADD_NDS32_BUILTIN1 ("get_unaligned_u16x2", u_v2hi, ptr_ushort, UALOAD_U16); ++ ADD_NDS32_BUILTIN1 ("get_unaligned_s16x2", v2hi, ptr_short, UALOAD_S16); ++ ADD_NDS32_BUILTIN1 ("get_unaligned_u8x4", u_v4qi, ptr_uchar, UALOAD_U8); ++ ADD_NDS32_BUILTIN1 ("get_unaligned_s8x4", v4qi, ptr_char, UALOAD_S8); ++ ADD_NDS32_BUILTIN2 ("put_unaligned_u16x2", void, ptr_ushort, ++ u_v2hi, UASTORE_U16); ++ ADD_NDS32_BUILTIN2 ("put_unaligned_s16x2", void, ptr_short, ++ v2hi, UASTORE_S16); ++ ADD_NDS32_BUILTIN2 ("put_unaligned_u8x4", void, ptr_uchar, ++ u_v4qi, UASTORE_U8); ++ ADD_NDS32_BUILTIN2 ("put_unaligned_s8x4", void, ptr_char, ++ v4qi, UASTORE_S8); ++} + /* ------------------------------------------------------------------------ */ +diff --git a/gcc/config/nds32/nds32-intrinsic.md b/gcc/config/nds32/nds32-intrinsic.md +index 53876c5..6f8b3eb 100644 +--- a/gcc/config/nds32/nds32-intrinsic.md ++++ b/gcc/config/nds32/nds32-intrinsic.md +@@ -40,6 +40,26 @@ + (set_attr "length" "4")] + ) + ++(define_expand "mtsr_isb" ++ [(set (match_operand:SI 0 "register_operand" "") ++ (match_operand:SI 1 "immediate_operand" ""))] ++ "" ++{ ++ emit_insn (gen_unspec_volatile_mtsr (operands[0], operands[1])); ++ emit_insn (gen_unspec_volatile_isb()); ++ DONE; ++}) ++ ++(define_expand "mtsr_dsb" ++ [(set (match_operand:SI 0 "register_operand" "") ++ (match_operand:SI 1 "immediate_operand" ""))] ++ "" ++{ ++ emit_insn (gen_unspec_volatile_mtsr (operands[0], operands[1])); ++ emit_insn (gen_unspec_dsb()); ++ DONE; ++}) ++ + (define_insn "unspec_volatile_mtsr" + [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r") + (match_operand:SI 1 "immediate_operand" "i")] UNSPEC_VOLATILE_MTSR)] +@@ -58,6 +78,74 @@ + (set_attr "length" "4")] + ) + ++;; FPU Register Transfer. ++ ++(define_insn "unspec_fcpynsd" ++ [(set (match_operand:DF 0 "register_operand" "=f") ++ (unspec:DF [(match_operand:DF 1 "register_operand" "f") ++ (match_operand:DF 2 "register_operand" "f")] UNSPEC_FCPYNSD))] ++ "" ++ "fcpynsd\t%0, %1, %2" ++ [(set_attr "type" "misc") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_fcpynss" ++ [(set (match_operand:SF 0 "register_operand" "=f") ++ (unspec:SF [(match_operand:SF 1 "register_operand" "f") ++ (match_operand:SF 2 "register_operand" "f")] UNSPEC_FCPYNSS))] ++ "" ++ "fcpynss\t%0, %1, %2" ++ [(set_attr "type" "misc") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_fcpysd" ++ [(set (match_operand:DF 0 "register_operand" "=f") ++ (unspec:DF [(match_operand:DF 1 "register_operand" "f") ++ (match_operand:DF 2 "register_operand" "f")] UNSPEC_FCPYSD))] ++ "" ++ "fcpysd\t%0, %1, %2" ++ [(set_attr "type" "misc") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_fcpyss" ++ [(set (match_operand:SF 0 "register_operand" "=f") ++ (unspec:SF [(match_operand:SF 1 "register_operand" "f") ++ (match_operand:SF 2 "register_operand" "f")] UNSPEC_FCPYSS))] ++ "" ++ "fcpyss\t%0, %1, %2" ++ [(set_attr "type" "misc") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_fmfcsr" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_FMFCSR))] ++ "" ++ "fmfcsr\t%0" ++ [(set_attr "type" "misc") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_fmtcsr" ++ [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_FMTCSR)] ++ "" ++ "fmtcsr\t%0" ++ [(set_attr "type" "misc") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_fmfcfg" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_FMFCFG))] ++ "" ++ "fmfcfg\t%0" ++ [(set_attr "type" "misc") ++ (set_attr "length" "4")] ++) ++ + ;; ------------------------------------------------------------------------ + + ;; Interrupt Instructions. +@@ -76,6 +164,445 @@ + [(set_attr "type" "misc")] + ) + ++(define_expand "unspec_enable_int" ++ [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "")] UNSPEC_VOLATILE_ENABLE_INT)] ++ "" ++{ ++ rtx system_reg; ++ rtx temp_reg = gen_reg_rtx (SImode); ++ ++ /* Set system register form nds32_intrinsic_register_names[]. */ ++ if ((INTVAL (operands[0]) >= NDS32_INT_H16) ++ && (INTVAL (operands[0]) <= NDS32_INT_H31)) ++ { ++ system_reg = GEN_INT (__NDS32_REG_INT_MASK2__); ++ operands[0] = GEN_INT (1 << (INTVAL (operands[0]))); ++ } ++ else if ((INTVAL (operands[0]) >= NDS32_INT_H32) ++ && (INTVAL (operands[0]) <= NDS32_INT_H63)) ++ { ++ system_reg = GEN_INT (__NDS32_REG_INT_MASK3__); ++ operands[0] = GEN_INT (1 << (INTVAL (operands[0]) - 32)); ++ } ++ else ++ { ++ system_reg = GEN_INT (__NDS32_REG_INT_MASK__); ++ ++ if (INTVAL (operands[0]) == NDS32_INT_SWI) ++ operands[0] = GEN_INT (1 << 16); ++ else if ((INTVAL (operands[0]) >= NDS32_INT_ALZ) ++ && (INTVAL (operands[0]) <= NDS32_INT_DSSIM)) ++ operands[0] = GEN_INT (1 << (INTVAL (operands[0]) - 4)); ++ else ++ operands[0] = GEN_INT (1 << (INTVAL (operands[0]))); ++ } ++ ++ emit_insn (gen_unspec_volatile_mfsr (temp_reg, system_reg)); ++ emit_insn (gen_iorsi3 (temp_reg, temp_reg, operands[0])); ++ emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg)); ++ emit_insn (gen_unspec_dsb ()); ++ DONE; ++}) ++ ++(define_expand "unspec_disable_int" ++ [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "")] UNSPEC_VOLATILE_DISABLE_INT)] ++ "" ++{ ++ rtx system_reg; ++ rtx temp_reg = gen_reg_rtx (SImode); ++ ++ /* Set system register form nds32_intrinsic_register_names[]. */ ++ if ((INTVAL (operands[0]) >= NDS32_INT_H16) ++ && (INTVAL (operands[0]) <= NDS32_INT_H31)) ++ { ++ system_reg = GEN_INT (__NDS32_REG_INT_MASK2__); ++ operands[0] = GEN_INT (~(1 << INTVAL (operands[0]))); ++ } ++ else if ((INTVAL (operands[0]) >= NDS32_INT_H32) ++ && (INTVAL (operands[0]) <= NDS32_INT_H63)) ++ { ++ system_reg = GEN_INT (__NDS32_REG_INT_MASK3__); ++ operands[0] = GEN_INT (~(1 << (INTVAL (operands[0]) - 32))); ++ } ++ else ++ { ++ system_reg = GEN_INT (__NDS32_REG_INT_MASK__); ++ ++ if (INTVAL (operands[0]) == NDS32_INT_SWI) ++ operands[0] = GEN_INT (~(1 << 16)); ++ else if ((INTVAL (operands[0]) >= NDS32_INT_ALZ) ++ && (INTVAL (operands[0]) <= NDS32_INT_DSSIM)) ++ operands[0] = GEN_INT (~(1 << (INTVAL (operands[0]) - 4))); ++ else ++ operands[0] = GEN_INT (~(1 << INTVAL (operands[0]))); ++ } ++ ++ emit_insn (gen_unspec_volatile_mfsr (temp_reg, system_reg)); ++ emit_insn (gen_andsi3 (temp_reg, temp_reg, operands[0])); ++ emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg)); ++ emit_insn (gen_unspec_dsb ()); ++ DONE; ++}) ++ ++(define_expand "unspec_set_pending_swint" ++ [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_SET_PENDING_SWINT)] ++ "" ++{ ++ /* Get $INT_PEND system register form nds32_intrinsic_register_names[] */ ++ rtx system_reg = GEN_INT (__NDS32_REG_INT_PEND__); ++ rtx temp_reg = gen_reg_rtx (SImode); ++ ++ emit_insn (gen_unspec_volatile_mfsr (temp_reg, system_reg)); ++ emit_insn (gen_iorsi3 (temp_reg, temp_reg, GEN_INT (65536))); ++ emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg)); ++ emit_insn (gen_unspec_dsb ()); ++ DONE; ++}) ++ ++(define_expand "unspec_clr_pending_swint" ++ [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_CLR_PENDING_SWINT)] ++ "" ++{ ++ /* Get $INT_PEND system register form nds32_intrinsic_register_names[] */ ++ rtx system_reg = GEN_INT (__NDS32_REG_INT_PEND__); ++ rtx temp_reg = gen_reg_rtx (SImode); ++ ++ emit_insn (gen_unspec_volatile_mfsr (temp_reg, system_reg)); ++ emit_insn (gen_andsi3 (temp_reg, temp_reg, GEN_INT (~(1 << 16)))); ++ emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg)); ++ emit_insn (gen_unspec_dsb ()); ++ DONE; ++}) ++ ++(define_expand "unspec_clr_pending_hwint" ++ [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "")] UNSPEC_VOLATILE_CLR_PENDING_HWINT)] ++ "" ++{ ++ rtx system_reg = NULL_RTX; ++ rtx temp_reg = gen_reg_rtx (SImode); ++ rtx clr_hwint; ++ unsigned offset = 0; ++ ++ /* Set system register form nds32_intrinsic_register_names[]. */ ++ if ((INTVAL (operands[0]) >= NDS32_INT_H0) ++ && (INTVAL (operands[0]) <= NDS32_INT_H15)) ++ { ++ system_reg = GEN_INT (__NDS32_REG_INT_PEND__); ++ } ++ else if ((INTVAL (operands[0]) >= NDS32_INT_H16) ++ && (INTVAL (operands[0]) <= NDS32_INT_H31)) ++ { ++ system_reg = GEN_INT (__NDS32_REG_INT_PEND2__); ++ } ++ else if ((INTVAL (operands[0]) >= NDS32_INT_H32) ++ && (INTVAL (operands[0]) <= NDS32_INT_H63)) ++ { ++ system_reg = GEN_INT (__NDS32_REG_INT_PEND3__); ++ offset = 32; ++ } ++ else ++ error ("__nds32__clr_pending_hwint not support NDS32_INT_SWI," ++ " NDS32_INT_ALZ, NDS32_INT_IDIVZE, NDS32_INT_DSSIM"); ++ ++ /* $INT_PEND type is write one clear. */ ++ clr_hwint = GEN_INT (1 << (INTVAL (operands[0]) - offset)); ++ ++ if (system_reg != NULL_RTX) ++ { ++ emit_move_insn (temp_reg, clr_hwint); ++ emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg)); ++ emit_insn (gen_unspec_dsb ()); ++ } ++ DONE; ++}) ++ ++(define_expand "unspec_get_all_pending_int" ++ [(set (match_operand:SI 0 "register_operand" "") ++ (unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_GET_ALL_PENDING_INT))] ++ "" ++{ ++ rtx system_reg = GEN_INT (__NDS32_REG_INT_PEND__); ++ emit_insn (gen_unspec_volatile_mfsr (operands[0], system_reg)); ++ emit_insn (gen_unspec_dsb ()); ++ DONE; ++}) ++ ++(define_expand "unspec_get_pending_int" ++ [(set (match_operand:SI 0 "register_operand" "") ++ (unspec_volatile:SI [(match_operand:SI 1 "immediate_operand" "")] UNSPEC_VOLATILE_GET_PENDING_INT))] ++ "" ++{ ++ rtx system_reg = NULL_RTX; ++ ++ /* Set system register form nds32_intrinsic_register_names[]. */ ++ if ((INTVAL (operands[1]) >= NDS32_INT_H0) ++ && (INTVAL (operands[1]) <= NDS32_INT_H15)) ++ { ++ system_reg = GEN_INT (__NDS32_REG_INT_PEND__); ++ operands[2] = GEN_INT (31 - INTVAL (operands[1])); ++ } ++ else if (INTVAL (operands[1]) == NDS32_INT_SWI) ++ { ++ system_reg = GEN_INT (__NDS32_REG_INT_PEND__); ++ operands[2] = GEN_INT (15); ++ } ++ else if ((INTVAL (operands[1]) >= NDS32_INT_H16) ++ && (INTVAL (operands[1]) <= NDS32_INT_H31)) ++ { ++ system_reg = GEN_INT (__NDS32_REG_INT_PEND2__); ++ operands[2] = GEN_INT (31 - INTVAL (operands[1])); ++ } ++ else if ((INTVAL (operands[1]) >= NDS32_INT_H32) ++ && (INTVAL (operands[1]) <= NDS32_INT_H63)) ++ { ++ system_reg = GEN_INT (__NDS32_REG_INT_PEND3__); ++ operands[2] = GEN_INT (31 - (INTVAL (operands[1]) - 32)); ++ } ++ else ++ error ("get_pending_int not support NDS32_INT_ALZ," ++ " NDS32_INT_IDIVZE, NDS32_INT_DSSIM"); ++ ++ /* mfsr op0, sytem_reg */ ++ if (system_reg != NULL_RTX) ++ { ++ emit_insn (gen_unspec_volatile_mfsr (operands[0], system_reg)); ++ emit_insn (gen_ashlsi3 (operands[0], operands[0], operands[2])); ++ emit_insn (gen_lshrsi3 (operands[0], operands[0], GEN_INT (31))); ++ emit_insn (gen_unspec_dsb ()); ++ } ++ DONE; ++}) ++ ++(define_expand "unspec_set_int_priority" ++ [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "") ++ (match_operand:SI 1 "immediate_operand" "")] UNSPEC_VOLATILE_SET_INT_PRIORITY)] ++ "" ++{ ++ rtx system_reg = NULL_RTX; ++ rtx priority = NULL_RTX; ++ rtx mask = NULL_RTX; ++ rtx temp_reg = gen_reg_rtx (SImode); ++ rtx mask_reg = gen_reg_rtx (SImode); ++ rtx set_reg = gen_reg_rtx (SImode); ++ unsigned offset = 0; ++ ++ /* Get system register form nds32_intrinsic_register_names[]. */ ++ if (INTVAL (operands[0]) <= NDS32_INT_H15) ++ { ++ system_reg = GEN_INT (__NDS32_REG_INT_PRI__); ++ offset = 0; ++ } ++ else if (INTVAL (operands[0]) >= NDS32_INT_H16 ++ && INTVAL (operands[0]) <= NDS32_INT_H31) ++ { ++ system_reg = GEN_INT (__NDS32_REG_INT_PRI2__); ++ /* The $INT_PRI2 first bit correspond to H16, so need ++ subtract 16. */ ++ offset = 16; ++ } ++ else if (INTVAL (operands[0]) >= NDS32_INT_H32 ++ && INTVAL (operands[0]) <= NDS32_INT_H47) ++ { ++ system_reg = GEN_INT (__NDS32_REG_INT_PRI3__); ++ /* The $INT_PRI3 first bit correspond to H32, so need ++ subtract 32. */ ++ offset = 32; ++ } ++ else if (INTVAL (operands[0]) >= NDS32_INT_H48 ++ && INTVAL (operands[0]) <= NDS32_INT_H63) ++ { ++ system_reg = GEN_INT (__NDS32_REG_INT_PRI4__); ++ /* The $INT_PRI3 first bit correspond to H48, so need ++ subtract 48. */ ++ offset = 48; ++ } ++ else ++ error ("set_int_priority not support NDS32_INT_SWI," ++ " NDS32_INT_ALZ, NDS32_INT_IDIVZE, NDS32_INT_DSSIM"); ++ ++ mask = GEN_INT (~(3 << 2 * (INTVAL (operands[0]) - offset))); ++ priority = GEN_INT ((int) (INTVAL (operands[1]) ++ << ((INTVAL (operands[0]) - offset) * 2))); ++ ++ if (system_reg != NULL_RTX) ++ { ++ emit_move_insn (mask_reg, mask); ++ emit_move_insn (set_reg, priority); ++ emit_insn (gen_unspec_volatile_mfsr (temp_reg, system_reg)); ++ emit_insn (gen_andsi3 (temp_reg, temp_reg, mask_reg)); ++ emit_insn (gen_iorsi3 (temp_reg, temp_reg, set_reg)); ++ emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg)); ++ emit_insn (gen_unspec_dsb ()); ++ } ++ DONE; ++}) ++ ++(define_expand "unspec_get_int_priority" ++ [(set (match_operand:SI 0 "register_operand" "") ++ (unspec_volatile:SI [(match_operand:SI 1 "immediate_operand" "")] UNSPEC_VOLATILE_GET_INT_PRIORITY))] ++ "" ++{ ++ rtx system_reg = NULL_RTX; ++ rtx priority = NULL_RTX; ++ unsigned offset = 0; ++ ++ /* Get system register form nds32_intrinsic_register_names[] */ ++ if (INTVAL (operands[1]) <= NDS32_INT_H15) ++ { ++ system_reg = GEN_INT (__NDS32_REG_INT_PRI__); ++ offset = 0; ++ } ++ else if (INTVAL (operands[1]) >= NDS32_INT_H16 ++ && INTVAL (operands[1]) <= NDS32_INT_H31) ++ { ++ system_reg = GEN_INT (__NDS32_REG_INT_PRI2__); ++ /* The $INT_PRI2 first bit correspond to H16, so need ++ subtract 16. */ ++ offset = 16; ++ } ++ else if (INTVAL (operands[1]) >= NDS32_INT_H32 ++ && INTVAL (operands[1]) <= NDS32_INT_H47) ++ { ++ system_reg = GEN_INT (__NDS32_REG_INT_PRI3__); ++ /* The $INT_PRI3 first bit correspond to H32, so need ++ subtract 32. */ ++ offset = 32; ++ } ++ else if (INTVAL (operands[1]) >= NDS32_INT_H48 ++ && INTVAL (operands[1]) <= NDS32_INT_H63) ++ { ++ system_reg = GEN_INT (__NDS32_REG_INT_PRI4__); ++ /* The $INT_PRI4 first bit correspond to H48, so need ++ subtract 48. */ ++ offset = 48; ++ } ++ else ++ error ("set_int_priority not support NDS32_INT_SWI," ++ " NDS32_INT_ALZ, NDS32_INT_IDIVZE, NDS32_INT_DSSIM"); ++ ++ priority = GEN_INT (31 - 2 * (INTVAL (operands[1]) - offset)); ++ ++ if (system_reg != NULL_RTX) ++ { ++ emit_insn (gen_unspec_volatile_mfsr (operands[0], system_reg)); ++ emit_insn (gen_ashlsi3 (operands[0], operands[0], priority)); ++ emit_insn (gen_lshrsi3 (operands[0], operands[0], GEN_INT (30))); ++ emit_insn (gen_unspec_dsb ()); ++ } ++ DONE; ++}) ++ ++(define_expand "unspec_set_trig_level" ++ [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "")] UNSPEC_VOLATILE_SET_TRIG_LEVEL)] ++ "" ++{ ++ rtx system_reg = NULL_RTX; ++ rtx temp_reg = gen_reg_rtx (SImode); ++ rtx set_level; ++ unsigned offset = 0; ++ ++ if (INTVAL (operands[0]) >= NDS32_INT_H0 ++ && INTVAL (operands[0]) <= NDS32_INT_H31) ++ { ++ system_reg = GEN_INT (__NDS32_REG_INT_TRIGGER__); ++ offset = 0; ++ } ++ else if (INTVAL (operands[0]) >= NDS32_INT_H32 ++ && INTVAL (operands[0]) <= NDS32_INT_H63) ++ { ++ system_reg = GEN_INT (__NDS32_REG_INT_TRIGGER2__); ++ offset = 32; ++ } ++ else ++ error ("__nds32__set_trig_type_level not support NDS32_INT_SWI," ++ " NDS32_INT_ALZ, NDS32_INT_IDIVZE, NDS32_INT_DSSIM"); ++ ++ if (system_reg != NULL_RTX) ++ { ++ /* TRIGGER register, 0 mean level triggered and 1 mean edge triggered. */ ++ set_level = GEN_INT (~(1 << (INTVAL (operands[0]) - offset))); ++ ++ emit_insn (gen_unspec_volatile_mfsr (temp_reg, system_reg)); ++ emit_insn (gen_andsi3 (temp_reg, temp_reg, set_level)); ++ emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg)); ++ } ++ DONE; ++}) ++ ++(define_expand "unspec_set_trig_edge" ++ [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "")] UNSPEC_VOLATILE_SET_TRIG_EDGE)] ++ "" ++{ ++ rtx system_reg = NULL_RTX; ++ rtx temp_reg = gen_reg_rtx (SImode); ++ rtx set_level; ++ unsigned offset = 0; ++ ++ if (INTVAL (operands[0]) >= NDS32_INT_H0 ++ && INTVAL (operands[0]) <= NDS32_INT_H31) ++ { ++ system_reg = GEN_INT (__NDS32_REG_INT_TRIGGER__); ++ offset = 0; ++ } ++ else if (INTVAL (operands[0]) >= NDS32_INT_H32 ++ && INTVAL (operands[0]) <= NDS32_INT_H63) ++ { ++ system_reg = GEN_INT (__NDS32_REG_INT_TRIGGER2__); ++ offset = 32; ++ } ++ else ++ error ("__nds32__set_trig_type_edge not support NDS32_INT_SWI," ++ " NDS32_INT_ALZ, NDS32_INT_IDIVZE, NDS32_INT_DSSIM"); ++ ++ if (system_reg != NULL_RTX) ++ { ++ /* TRIGGER register, 0 mean level triggered and 1 mean edge triggered. */ ++ set_level = GEN_INT ((1 << (INTVAL (operands[0]) - offset))); ++ ++ emit_insn (gen_unspec_volatile_mfsr (temp_reg, system_reg)); ++ emit_insn (gen_iorsi3 (temp_reg, temp_reg, set_level)); ++ emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg)); ++ } ++ DONE; ++}) ++ ++(define_expand "unspec_get_trig_type" ++ [(set (match_operand:SI 0 "register_operand" "") ++ (unspec_volatile:SI [(match_operand:SI 1 "immediate_operand" "")] UNSPEC_VOLATILE_GET_TRIG_TYPE))] ++ "" ++{ ++ rtx system_reg = NULL_RTX; ++ rtx trig_type; ++ unsigned offset = 0; ++ ++ if (INTVAL (operands[1]) >= NDS32_INT_H0 ++ && INTVAL (operands[1]) <= NDS32_INT_H31) ++ { ++ system_reg = GEN_INT (__NDS32_REG_INT_TRIGGER__); ++ offset = 0; ++ } ++ else if (INTVAL (operands[1]) >= NDS32_INT_H32 ++ && INTVAL (operands[1]) <= NDS32_INT_H63) ++ { ++ system_reg = GEN_INT (__NDS32_REG_INT_TRIGGER2__); ++ offset = 32; ++ } ++ else ++ error ("__nds32__get_trig_type not support NDS32_INT_SWI," ++ " NDS32_INT_ALZ, NDS32_INT_IDIVZE, NDS32_INT_DSSIM"); ++ ++ if (system_reg != NULL_RTX) ++ { ++ trig_type = GEN_INT (31 - (INTVAL (operands[1]) - offset)); ++ ++ emit_insn (gen_unspec_volatile_mfsr (operands[0], system_reg)); ++ emit_insn (gen_ashlsi3 (operands[0], operands[0], trig_type)); ++ emit_insn (gen_lshrsi3 (operands[0], operands[0], GEN_INT (31))); ++ emit_insn (gen_unspec_dsb ()); ++ } ++ DONE; ++}) ++ + ;; ------------------------------------------------------------------------ + + ;; Cache Synchronization Instructions +@@ -84,7 +611,7 @@ + [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_ISYNC)] + "" + "isync\t%0" +- [(set_attr "type" "misc")] ++ [(set_attr "type" "mmu")] + ) + + (define_insn "unspec_volatile_isb" +@@ -94,4 +621,1077 @@ + [(set_attr "type" "misc")] + ) + ++(define_insn "unspec_dsb" ++ [(unspec_volatile [(const_int 0)] UNSPEC_VOLATILE_DSB)] ++ "" ++ "dsb" ++ [(set_attr "type" "misc")] ++) ++ ++(define_insn "unspec_msync" ++ [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "i")] UNSPEC_VOLATILE_MSYNC)] ++ "" ++ "msync\t%0" ++ [(set_attr "type" "misc")] ++) ++ ++(define_insn "unspec_msync_all" ++ [(unspec_volatile [(const_int 0)] UNSPEC_VOLATILE_MSYNC_ALL)] ++ "" ++ "msync\tall" ++ [(set_attr "type" "misc")] ++) ++ ++(define_insn "unspec_msync_store" ++ [(unspec_volatile [(const_int 0)] UNSPEC_VOLATILE_MSYNC_STORE)] ++ "" ++ "msync\tstore" ++ [(set_attr "type" "misc")] ++) ++ ++;; Load and Store ++ ++(define_insn "unspec_volatile_llw" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec_volatile:SI [(mem:SI (plus:SI (match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "register_operand" "r")))] UNSPEC_VOLATILE_LLW))] ++ "" ++ "llw\t%0, [%1 + %2]" ++ [(set_attr "length" "4")] ++) ++ ++(define_insn "unspec_lwup" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec_volatile:SI [(mem:SI (plus:SI (match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "register_operand" "r")))] UNSPEC_LWUP))] ++ "" ++ "lwup\t%0, [%1 + %2]" ++ [(set_attr "length" "4")] ++) ++ ++(define_insn "unspec_lbup" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec_volatile:SI [(mem:SI (plus:SI (match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "register_operand" "r")))] UNSPEC_LBUP))] ++ "" ++ "lbup\t%0, [%1 + %2]" ++ [(set_attr "length" "4")] ++) ++ ++(define_insn "unspec_volatile_scw" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec_volatile:SI [(mem:SI (plus:SI (match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "register_operand" "r"))) ++ (match_operand:SI 3 "register_operand" "0")] UNSPEC_VOLATILE_SCW))] ++ "" ++ "scw\t%0, [%1 + %2]" ++ [(set_attr "length" "4")] ++) ++ ++(define_insn "unspec_swup" ++ [(set (mem:SI (plus:SI (match_operand:SI 0 "register_operand" "r") ++ (match_operand:SI 1 "register_operand" "r"))) ++ (unspec:SI [(match_operand:SI 2 "register_operand" "r")] UNSPEC_SWUP))] ++ "" ++ "swup\t%2, [%0 + %1]" ++ [(set_attr "length" "4")] ++) ++ ++(define_insn "unspec_sbup" ++ [(set (mem:SI (plus:SI (match_operand:SI 0 "register_operand" "r") ++ (match_operand:SI 1 "register_operand" "r"))) ++ (unspec:SI [(match_operand:SI 2 "register_operand" "r")] UNSPEC_SBUP))] ++ "" ++ "sbup\t%2, [%0 + %1]" ++ [(set_attr "length" "4")] ++) ++ ++;; CCTL ++ ++(define_insn "cctl_l1d_invalall" ++ [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_CCTL_L1D_INVALALL)] ++ "" ++ "cctl\tL1D_INVALALL" ++ [(set_attr "type" "mmu")] ++) ++ ++(define_insn "cctl_l1d_wball_alvl" ++ [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_CCTL_L1D_WBALL_ALVL)] ++ "" ++ "cctl\tL1D_WBALL, alevel" ++ [(set_attr "type" "mmu")] ++) ++ ++(define_insn "cctl_l1d_wball_one_lvl" ++ [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_CCTL_L1D_WBALL_ONE_LVL)] ++ "" ++ "cctl\tL1D_WBALL, 1level" ++ [(set_attr "type" "mmu")] ++) ++ ++(define_insn "cctl_idx_read" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec_volatile:SI [(match_operand:SI 1 "immediate_operand" "i") ++ (match_operand:SI 2 "register_operand" "r")] UNSPEC_VOLATILE_CCTL_IDX_READ))] ++ "" ++ "cctl\t%0, %2, %X1" ++ [(set_attr "type" "mmu")] ++) ++ ++(define_insn "cctl_idx_write" ++ [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i") ++ (match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "register_operand" "r")] UNSPEC_VOLATILE_CCTL_IDX_WRITE)] ++ "" ++ "cctl\t%1, %2, %W0" ++ [(set_attr "type" "mmu")] ++) ++ ++(define_insn "cctl_va_wbinval_l1" ++ [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i") ++ (match_operand:SI 1 "register_operand" "r")] UNSPEC_VOLATILE_CCTL_VA_WBINVAL_L1)] ++ "" ++ "cctl\t%1, %U0, 1level" ++ [(set_attr "type" "mmu")] ++) ++ ++(define_insn "cctl_va_wbinval_la" ++ [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i") ++ (match_operand:SI 1 "register_operand" "r")] UNSPEC_VOLATILE_CCTL_VA_WBINVAL_LA)] ++ "" ++ "cctl\t%1, %U0, alevel" ++ [(set_attr "type" "mmu")] ++) ++ ++(define_insn "cctl_idx_wbinval" ++ [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i") ++ (match_operand:SI 1 "register_operand" "r")] UNSPEC_VOLATILE_CCTL_IDX_WBINVAL)] ++ "" ++ "cctl\t%1, %T0" ++ [(set_attr "type" "mmu")] ++) ++ ++(define_insn "cctl_va_lck" ++ [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i") ++ (match_operand:SI 1 "register_operand" "r")] UNSPEC_VOLATILE_CCTL_VA_LCK)] ++ "" ++ "cctl\t%1, %R0" ++ [(set_attr "type" "mmu")] ++) ++ ++;;PREFETCH ++ ++(define_insn "prefetch_qw" ++ [(unspec_volatile:QI [(match_operand:SI 0 "register_operand" "r") ++ (match_operand:SI 1 "nonmemory_operand" "r") ++ (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_VOLATILE_DPREF_QW)] ++ "" ++ "dpref\t%Z2, [%0 + %1]" ++ [(set_attr "type" "misc")] ++) ++ ++(define_insn "prefetch_hw" ++ [(unspec_volatile:HI [(match_operand:SI 0 "register_operand" "r") ++ (match_operand:SI 1 "nonmemory_operand" "r") ++ (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_VOLATILE_DPREF_HW)] ++ "" ++ "dpref\t%Z2, [%0 + (%1<<1)]" ++ [(set_attr "type" "misc")] ++) ++ ++(define_insn "prefetch_w" ++ [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" " r, r") ++ (match_operand:SI 1 "nonmemory_operand" "Is15, r") ++ (match_operand:SI 2 "immediate_operand" " i, i")] UNSPEC_VOLATILE_DPREF_W)] ++ "" ++ "@ ++ dprefi.w\t%Z2, [%0 + %1] ++ dpref\t%Z2, [%0 + (%1<<2)]" ++ [(set_attr "type" "misc")] ++) ++ ++(define_insn "prefetch_dw" ++ [(unspec_volatile:DI [(match_operand:SI 0 "register_operand" " r, r") ++ (match_operand:SI 1 "nonmemory_operand" "Is15, r") ++ (match_operand:SI 2 "immediate_operand" " i, i")] UNSPEC_VOLATILE_DPREF_DW)] ++ "" ++ "@ ++ dprefi.d\t%Z2, [%0 + %1] ++ dpref\t%Z2, [%0 + (%1<<3)]" ++ [(set_attr "type" "misc")] ++) ++ ++;; Performance Extension ++ ++(define_expand "unspec_ave" ++ [(match_operand:SI 0 "register_operand" "") ++ (match_operand:SI 1 "register_operand" "") ++ (match_operand:SI 2 "register_operand" "")] ++ "" ++{ ++ emit_insn (gen_ave (operands[0], operands[1], operands[2])); ++ DONE; ++}) ++ ++(define_expand "unspec_bclr" ++ [(match_operand:SI 0 "register_operand" "") ++ (match_operand:SI 1 "register_operand" "") ++ (match_operand:SI 2 "immediate_operand" "")] ++ "" ++{ ++ unsigned HOST_WIDE_INT val = ~(1u << UINTVAL (operands[2])); ++ emit_insn (gen_andsi3 (operands[0], operands[1], gen_int_mode (val, SImode))); ++ DONE; ++}) ++ ++(define_expand "unspec_bset" ++ [(match_operand:SI 0 "register_operand" "") ++ (match_operand:SI 1 "register_operand" "") ++ (match_operand:SI 2 "immediate_operand" "")] ++ "" ++{ ++ unsigned HOST_WIDE_INT val = 1u << UINTVAL (operands[2]); ++ emit_insn (gen_iorsi3 (operands[0], operands[1], gen_int_mode (val, SImode))); ++ DONE; ++}) ++ ++(define_expand "unspec_btgl" ++ [(match_operand:SI 0 "register_operand" "") ++ (match_operand:SI 1 "register_operand" "") ++ (match_operand:SI 2 "immediate_operand" "")] ++ "" ++{ ++ unsigned HOST_WIDE_INT val = 1u << UINTVAL (operands[2]); ++ emit_insn (gen_xorsi3 (operands[0], operands[1], gen_int_mode (val, SImode))); ++ DONE; ++}) ++ ++(define_expand "unspec_btst" ++ [(match_operand:SI 0 "register_operand" "") ++ (match_operand:SI 1 "register_operand" "") ++ (match_operand:SI 2 "immediate_operand" "")] ++ "" ++{ ++ emit_insn (gen_btst (operands[0], operands[1], operands[2])); ++ DONE; ++}) ++ ++(define_insn "unspec_clip" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_CLIP))] ++ "" ++ "clip\t%0, %1, %2" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_clips" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_CLIPS))] ++ "" ++ "clips\t%0, %1, %2" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_clo" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(match_operand:SI 1 "register_operand" "r")] UNSPEC_CLO))] ++ "" ++ "clo\t%0, %1" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_ssabssi2" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ss_abs:SI (match_operand:SI 1 "register_operand" "r")))] ++ "" ++ "abs\t%0, %1" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")] ++) ++ ++;; Performance extension 2 ++ ++(define_insn "unspec_pbsad" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "register_operand" "r")] UNSPEC_PBSAD))] ++ "" ++ "pbsad\t%0, %1, %2" ++ [(set_attr "type" "pbsad") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_pbsada" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(match_operand:SI 1 "register_operand" "0") ++ (match_operand:SI 2 "register_operand" "r") ++ (match_operand:SI 3 "register_operand" "r")] UNSPEC_PBSADA))] ++ "" ++ "pbsada\t%0, %2, %3" ++ [(set_attr "type" "pbsada") ++ (set_attr "length" "4")] ++) ++ ++(define_expand "bse" ++ [(match_operand:SI 0 "register_operand" "") ++ (match_operand:SI 1 "register_operand" "") ++ (match_operand:SI 2 "register_operand" "")] ++ "" ++ { ++ rtx temp0 = gen_reg_rtx (SImode); ++ rtx temp2 = gen_reg_rtx (SImode); ++ ++ emit_move_insn (temp0, gen_rtx_MEM (Pmode, operands[0])); ++ emit_move_insn (temp2, gen_rtx_MEM (Pmode, operands[2])); ++ emit_insn (gen_unspec_bse (temp0, operands[1], temp2, temp0, temp2)); ++ emit_move_insn (gen_rtx_MEM (Pmode, operands[0]), temp0); ++ emit_move_insn (gen_rtx_MEM (Pmode, operands[2]), temp2); ++ DONE; ++ } ++) ++ ++(define_insn "unspec_bse" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "register_operand" "r") ++ (match_operand:SI 3 "register_operand" "0")] UNSPEC_BSE)) ++ (set (match_operand:SI 4 "register_operand" "=2") ++ (unspec:SI [(match_dup 1) ++ (match_dup 2) ++ (match_dup 0)] UNSPEC_BSE_2))] ++ "" ++ "bse\t%0, %1, %2" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")] ++) ++ ++(define_expand "bsp" ++ [(match_operand:SI 0 "register_operand" "") ++ (match_operand:SI 1 "register_operand" "") ++ (match_operand:SI 2 "register_operand" "")] ++ "" ++ { ++ rtx temp0 = gen_reg_rtx (SImode); ++ rtx temp2 = gen_reg_rtx (SImode); ++ ++ emit_move_insn (temp0, gen_rtx_MEM (Pmode, operands[0])); ++ emit_move_insn (temp2, gen_rtx_MEM (Pmode, operands[2])); ++ emit_insn (gen_unspec_bsp (temp0, operands[1], temp2, temp0, temp2)); ++ emit_move_insn (gen_rtx_MEM (Pmode, operands[0]), temp0); ++ emit_move_insn (gen_rtx_MEM (Pmode, operands[2]), temp2); ++ DONE; ++ } ++) ++ ++(define_insn "unspec_bsp" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "register_operand" "r") ++ (match_operand:SI 3 "register_operand" "0")] UNSPEC_BSP)) ++ (set (match_operand:SI 4 "register_operand" "=2") ++ (unspec:SI [(match_dup 1) ++ (match_dup 2) ++ (match_dup 0)] UNSPEC_BSP_2))] ++ "" ++ "bsp\t%0, %1, %2" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")] ++) ++ ++;; String Extension ++ ++(define_insn "unspec_ffb" ++ [(set (match_operand:SI 0 "register_operand" "=r, r") ++ (unspec:SI [(match_operand:SI 1 "register_operand" "r, r") ++ (match_operand:SI 2 "nonmemory_operand" "Iu08, r")] UNSPEC_FFB))] ++ "" ++ "@ ++ ffbi\t%0, %1, %2 ++ ffb\t%0, %1, %2" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_ffmism" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "register_operand" "r")] UNSPEC_FFMISM))] ++ "" ++ "ffmism\t%0, %1, %2" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_flmism" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "register_operand" "r")] UNSPEC_FLMISM))] ++ "" ++ "flmism\t%0, %1, %2" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")] ++) ++ ++;; SATURATION ++ ++(define_insn "unspec_kaddw" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ss_plus:SI (match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "register_operand" "r")))] ++ "" ++ "kaddw\t%0, %1, %2" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_ksubw" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ss_minus:SI (match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "register_operand" "r")))] ++ "" ++ "ksubw\t%0, %1, %2" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_kaddh" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(plus:SI (match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "register_operand" "r")) ++ (const_int 15)] UNSPEC_CLIPS))] ++ "" ++ "kaddh\t%0, %1, %2" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_ksubh" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(minus:SI (match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "register_operand" "r")) ++ (const_int 15)] UNSPEC_CLIPS))] ++ "" ++ "ksubh\t%0, %1, %2" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_kdmbb" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r") ++ (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KDMBB))] ++ "" ++ "kdmbb\t%0, %1, %2" ++ [(set_attr "type" "mul") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_kdmbt" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r") ++ (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KDMBT))] ++ "" ++ "kdmbt\t%0, %1, %2" ++ [(set_attr "type" "mul") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_kdmtb" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r") ++ (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KDMTB))] ++ "" ++ "kdmtb\t%0, %1, %2" ++ [(set_attr "type" "mul") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_kdmtt" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r") ++ (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KDMTT))] ++ "" ++ "kdmtt\t%0, %1, %2" ++ [(set_attr "type" "mul") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_khmbb" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r") ++ (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KHMBB))] ++ "" ++ "khmbb\t%0, %1, %2" ++ [(set_attr "type" "mul") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_khmbt" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r") ++ (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KHMBT))] ++ "" ++ "khmbt\t%0, %1, %2" ++ [(set_attr "type" "mul") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_khmtb" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r") ++ (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KHMTB))] ++ "" ++ "khmtb\t%0, %1, %2" ++ [(set_attr "type" "mul") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_khmtt" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r") ++ (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KHMTT))] ++ "" ++ "khmtt\t%0, %1, %2" ++ [(set_attr "type" "mul") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_kslraw" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "register_operand" "r")] UNSPEC_KSLRAW))] ++ "" ++ "kslraw\t%0, %1, %2" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_kslrawu" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "register_operand" "r")] UNSPEC_KSLRAWU))] ++ "" ++ "kslraw.u\t%0, %1, %2" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_volatile_rdov" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_RDOV))] ++ "" ++ "rdov\t%0" ++ [(set_attr "type" "misc") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_volatile_clrov" ++ [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_CLROV)] ++ "" ++ "clrov" ++ [(set_attr "type" "misc") ++ (set_attr "length" "4")] ++) ++ ++;; System ++ ++(define_insn "unspec_sva" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "register_operand" "r")] UNSPEC_SVA))] ++ "" ++ "sva\t%0, %1, %2" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_svs" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "register_operand" "r")] UNSPEC_SVS))] ++ "" ++ "svs\t%0, %1, %2" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_jr_itoff" ++ [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_JR_ITOFF)] ++ "" ++ "jr.itoff\t%0" ++ [(set_attr "type" "misc")] ++) ++ ++(define_insn "unspec_jr_toff" ++ [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_JR_TOFF)] ++ "" ++ "jr.toff\t%0" ++ [(set_attr "type" "branch")] ++) ++ ++(define_insn "unspec_jral_iton" ++ [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_JRAL_ITON)] ++ "" ++ "jral.iton\t%0" ++ [(set_attr "type" "branch")] ++) ++ ++(define_insn "unspec_jral_ton" ++ [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_JRAL_TON)] ++ "" ++ "jral.ton\t%0" ++ [(set_attr "type" "branch")] ++) ++ ++(define_insn "unspec_ret_itoff" ++ [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_RET_ITOFF)] ++ "" ++ "ret.itoff\t%0" ++ [(set_attr "type" "branch")] ++) ++ ++(define_insn "unspec_ret_toff" ++ [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_RET_TOFF)] ++ "" ++ "ret.toff\t%0" ++ [(set_attr "type" "branch")] ++) ++ ++(define_insn "unspec_standby_no_wake_grant" ++ [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_STANDBY_NO_WAKE_GRANT)] ++ "" ++ "standby\tno_wake_grant" ++ [(set_attr "type" "misc")] ++) ++ ++(define_insn "unspec_standby_wake_grant" ++ [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_STANDBY_WAKE_GRANT)] ++ "" ++ "standby\twake_grant" ++ [(set_attr "type" "misc")] ++) ++ ++(define_insn "unspec_standby_wait_done" ++ [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_STANDBY_WAKE_DONE)] ++ "" ++ "standby\twait_done" ++ [(set_attr "type" "misc")] ++) ++ ++(define_insn "unspec_teqz" ++ [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r") ++ (match_operand:SI 1 "immediate_operand" "i")] UNSPEC_VOLATILE_TEQZ)] ++ "" ++ "teqz\t%0, %1" ++ [(set_attr "type" "misc")] ++) ++ ++(define_insn "unspec_tnez" ++ [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r") ++ (match_operand:SI 1 "immediate_operand" "i")] UNSPEC_VOLATILE_TNEZ)] ++ "" ++ "tnez\t%0, %1" ++ [(set_attr "type" "misc")] ++) ++ ++(define_insn "unspec_trap" ++ [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i")] UNSPEC_VOLATILE_TRAP)] ++ "" ++ "trap\t%0" ++ [(set_attr "type" "misc")] ++) ++ ++(define_insn "unspec_setend_big" ++ [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_SETEND_BIG)] ++ "" ++ "setend.b" ++ [(set_attr "type" "misc")] ++) ++ ++(define_insn "unspec_setend_little" ++ [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_SETEND_LITTLE)] ++ "" ++ "setend.l" ++ [(set_attr "type" "misc")] ++) ++ ++(define_insn "unspec_break" ++ [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i")] UNSPEC_VOLATILE_BREAK)] ++ "" ++ "break\t%0" ++ [(set_attr "type" "misc")] ++) ++ ++(define_insn "unspec_syscall" ++ [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i")] UNSPEC_VOLATILE_SYSCALL)] ++ "" ++ "syscall\t%0" ++ [(set_attr "type" "misc")] ++) ++ ++(define_insn "unspec_nop" ++ [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_NOP)] ++ "" ++ "nop" ++ [(set_attr "type" "misc")] ++) ++ ++(define_expand "unspec_get_current_sp" ++ [(match_operand:SI 0 "register_operand" "")] ++ "" ++{ ++ emit_move_insn (operands[0], gen_rtx_REG (SImode, SP_REGNUM)); ++ DONE; ++}) ++ ++(define_expand "unspec_set_current_sp" ++ [(match_operand:SI 0 "register_operand" "")] ++ "" ++{ ++ emit_move_insn (gen_rtx_REG (SImode, SP_REGNUM), operands[0]); ++ DONE; ++}) ++ ++(define_expand "unspec_return_address" ++ [(match_operand:SI 0 "register_operand" "")] ++ "" ++{ ++ emit_move_insn (operands[0], gen_rtx_REG (SImode, LP_REGNUM)); ++ DONE; ++}) ++ ++(define_insn "unspec_signature_begin" ++ [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_SIGNATURE_BEGIN)] ++ "" ++ "isps" ++ [(set_attr "length" "4")] ++) ++ ++(define_insn "unspec_signature_end" ++ [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_SIGNATURE_END)] ++ "" ++ "! -----\;.signature_end\;j8 2\;! -----" ++ [(set_attr "length" "2")] ++) ++ ++;; Swap ++ ++(define_insn "unspec_wsbh" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(match_operand:SI 1 "register_operand" "r")] UNSPEC_WSBH))] ++ "" ++ "wsbh\t%0, %1" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")] ++) ++ ++;; TLBOP Intrinsic ++ ++(define_insn "unspec_tlbop_trd" ++ [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_TLBOP_TRD)] ++ "" ++ "tlbop\t%0, TRD" ++ [(set_attr "type" "mmu")] ++) ++ ++(define_insn "unspec_tlbop_twr" ++ [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_TLBOP_TWR)] ++ "" ++ "tlbop\t%0, TWR" ++ [(set_attr "type" "mmu")] ++) ++ ++(define_insn "unspec_tlbop_rwr" ++ [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_TLBOP_RWR)] ++ "" ++ "tlbop\t%0, RWR" ++ [(set_attr "type" "mmu")] ++) ++ ++(define_insn "unspec_tlbop_rwlk" ++ [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_TLBOP_RWLK)] ++ "" ++ "tlbop\t%0, RWLK" ++ [(set_attr "type" "mmu")] ++) ++ ++(define_insn "unspec_tlbop_unlk" ++ [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_TLBOP_UNLK)] ++ "" ++ "tlbop\t%0, UNLK" ++ [(set_attr "type" "mmu")] ++) ++ ++(define_insn "unspec_tlbop_pb" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec_volatile:SI [(match_operand:SI 1 "register_operand" "r")] UNSPEC_VOLATILE_TLBOP_PB))] ++ "" ++ "tlbop\t%0, %1, PB" ++ [(set_attr "type" "mmu")] ++) ++ ++(define_insn "unspec_tlbop_inv" ++ [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_TLBOP_INV)] ++ "" ++ "tlbop\t%0, INV" ++ [(set_attr "type" "mmu")] ++) ++ ++(define_insn "unspec_tlbop_flua" ++ [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_TLBOP_FLUA)] ++ "" ++ "tlbop\tFLUA" ++ [(set_attr "type" "mmu")] ++) ++ ++;;Unaligned Load/Store ++ ++(define_expand "unaligned_load_hw" ++ [(set (match_operand:HI 0 "register_operand" "") ++ (unspec:HI [(mem:HI (match_operand:SI 1 "register_operand" ""))] UNSPEC_UALOAD_HW))] ++ "" ++{ ++ operands[0] = simplify_gen_subreg (SImode, operands[0], ++ GET_MODE (operands[0]), 0); ++ if (TARGET_ISA_V3M) ++ { ++ nds32_expand_unaligned_load (operands, HImode); ++ } ++ else ++ { ++ emit_insn (gen_unaligned_load_w (operands[0], ++ gen_rtx_MEM (SImode, operands[1]))); ++ ++ if (WORDS_BIG_ENDIAN) ++ emit_insn (gen_lshrsi3 (operands[0], operands[0], GEN_INT(16))); ++ else ++ emit_insn (gen_andsi3 (operands[0], operands[0], GEN_INT (0xffff))); ++ } ++ ++ DONE; ++}) ++ ++(define_expand "unaligned_loadsi" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(mem:SI (match_operand:SI 1 "register_operand" "r"))] UNSPEC_UALOAD_W))] ++ "" ++{ ++ if (flag_unaligned_access) ++ { ++ rtx mem = gen_rtx_MEM (SImode, operands[1]); ++ emit_move_insn (operands[0], mem); ++ } ++ else ++ { ++ if (TARGET_ISA_V3M) ++ nds32_expand_unaligned_load (operands, SImode); ++ else ++ emit_insn (gen_unaligned_load_w (operands[0], ++ gen_rtx_MEM (SImode, (operands[1])))); ++ } ++ DONE; ++}) ++ ++(define_insn "unaligned_load_w" ++ [(set (match_operand:SI 0 "register_operand" "= r") ++ (unspec:SI [(match_operand:SI 1 "nds32_lmw_smw_base_operand" " Umw")] UNSPEC_UALOAD_W))] ++ "" ++{ ++ return nds32_output_lmw_single_word (operands); ++} ++ [(set_attr "type" "load") ++ (set_attr "length" "4")] ++) ++ ++(define_expand "unaligned_loaddi" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (unspec:DI [(mem:DI (match_operand:SI 1 "register_operand" "r"))] UNSPEC_UALOAD_DW))] ++ "" ++{ ++ if (TARGET_ISA_V3M) ++ { ++ nds32_expand_unaligned_load (operands, DImode); ++ } ++ else ++ emit_insn (gen_unaligned_load_dw (operands[0], operands[1])); ++ DONE; ++}) ++ ++(define_insn "unaligned_load_dw" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (unspec:DI [(mem:DI (match_operand:SI 1 "register_operand" "r"))] UNSPEC_UALOAD_DW))] ++ "" ++{ ++ rtx otherops[3]; ++ otherops[0] = gen_rtx_REG (SImode, REGNO (operands[0])); ++ otherops[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1); ++ otherops[2] = operands[1]; ++ ++ output_asm_insn ("lmw.bi\t%0, [%2], %1, 0", otherops); ++ return ""; ++} ++ [(set_attr "type" "load") ++ (set_attr "length" "4")] ++) ++ ++(define_expand "unaligned_store_hw" ++ [(set (mem:SI (match_operand:SI 0 "register_operand" "")) ++ (unspec:HI [(match_operand:HI 1 "register_operand" "")] UNSPEC_UASTORE_HW))] ++ "" ++{ ++ operands[1] = simplify_gen_subreg (SImode, operands[1], ++ GET_MODE (operands[1]), 0); ++ nds32_expand_unaligned_store (operands, HImode); ++ DONE; ++}) ++ ++(define_expand "unaligned_storesi" ++ [(set (mem:SI (match_operand:SI 0 "register_operand" "r")) ++ (unspec:SI [(match_operand:SI 1 "register_operand" "r")] UNSPEC_UASTORE_W))] ++ "" ++{ ++ if (flag_unaligned_access) ++ { ++ rtx mem = gen_rtx_MEM (SImode, operands[0]); ++ emit_move_insn (mem, operands[1]); ++ } ++ else ++ { ++ if (TARGET_ISA_V3M) ++ nds32_expand_unaligned_store (operands, SImode); ++ else ++ emit_insn (gen_unaligned_store_w (gen_rtx_MEM (SImode, operands[0]), ++ operands[1])); ++ } ++ DONE; ++}) ++ ++(define_insn "unaligned_store_w" ++ [(set (match_operand:SI 0 "nds32_lmw_smw_base_operand" "=Umw") ++ (unspec:SI [(match_operand:SI 1 "register_operand" " r")] UNSPEC_UASTORE_W))] ++ "" ++{ ++ return nds32_output_smw_single_word (operands); ++} ++ [(set_attr "type" "store") ++ (set_attr "length" "4")] ++) ++ ++(define_expand "unaligned_storedi" ++ [(set (mem:DI (match_operand:SI 0 "register_operand" "r")) ++ (unspec:DI [(match_operand:DI 1 "register_operand" "r")] UNSPEC_UASTORE_DW))] ++ "" ++{ ++ if (TARGET_ISA_V3M) ++ nds32_expand_unaligned_store (operands, DImode); ++ else ++ emit_insn (gen_unaligned_store_dw (gen_rtx_MEM (DImode, operands[0]), ++ operands[1])); ++ DONE; ++}) ++ ++(define_insn "unaligned_store_dw" ++ [(set (match_operand:DI 0 "nds32_lmw_smw_base_operand" "=Umw") ++ (unspec:DI [(match_operand:DI 1 "register_operand" " r")] UNSPEC_UASTORE_DW))] ++ "" ++{ ++ return nds32_output_smw_double_word (operands); ++} ++ [(set_attr "type" "store") ++ (set_attr "length" "4")] ++) ++ ++(define_expand "unspec_unaligned_feature" ++ [(set (match_operand:SI 0 "register_operand" "") ++ (unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_UNALIGNED_FEATURE))] ++ "" ++{ ++ /* Get $MMU_CTL system register form nds32_intrinsic_register_names[] */ ++ rtx system_reg = GEN_INT (__NDS32_REG_MMU_CTL__); ++ rtx temp_reg = gen_reg_rtx (SImode); ++ rtx temp2_reg = gen_reg_rtx (SImode); ++ ++ emit_insn (gen_unspec_volatile_mfsr (operands[0], system_reg)); ++ emit_move_insn (temp_reg, operands[0]); ++ emit_move_insn (temp2_reg, GEN_INT (0x800 << 12)); ++ emit_insn (gen_iorsi3 (operands[0], operands[0], temp2_reg)); ++ emit_insn (gen_unspec_volatile_mtsr (operands[0], system_reg)); ++ emit_insn (gen_unspec_dsb ()); ++ ++ emit_insn (gen_unspec_volatile_mfsr (operands[0], system_reg)); ++ emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg)); ++ emit_insn (gen_unspec_dsb ()); ++ ++ emit_insn (gen_ashlsi3 (operands[0], operands[0], GEN_INT (8))); ++ emit_insn (gen_lshrsi3 (operands[0], operands[0], GEN_INT (31))); ++ DONE; ++}) ++ ++(define_expand "unspec_enable_unaligned" ++ [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_UNALIGNED_FEATURE)] ++ "" ++{ ++ /* Get $MMU_CTL system register form nds32_intrinsic_register_names[] */ ++ rtx system_reg = GEN_INT (__NDS32_REG_MMU_CTL__); ++ rtx temp_reg = gen_reg_rtx (SImode); ++ rtx temp2_reg = gen_reg_rtx (SImode); ++ emit_insn (gen_unspec_volatile_mfsr (temp_reg, system_reg)); ++ emit_move_insn (temp2_reg, GEN_INT (0x800 << 12)); ++ emit_insn (gen_iorsi3 (temp_reg, temp_reg, temp2_reg)); ++ emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg)); ++ emit_insn (gen_unspec_dsb ()); ++ DONE; ++}) ++ ++(define_expand "unspec_disable_unaligned" ++ [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_UNALIGNED_FEATURE)] ++ "" ++{ ++ /* Get $MMU_CTL system register form nds32_intrinsic_register_names[] */ ++ rtx system_reg = GEN_INT (__NDS32_REG_MMU_CTL__); ++ rtx temp_reg = gen_reg_rtx (SImode); ++ rtx temp2_reg = gen_reg_rtx (SImode); ++ emit_insn (gen_unspec_volatile_mfsr (temp_reg, system_reg)); ++ emit_move_insn (temp2_reg, GEN_INT (0x800 << 12)); ++ emit_insn (gen_one_cmplsi2 (temp2_reg, temp2_reg)); ++ emit_insn (gen_andsi3 (temp_reg, temp_reg, temp2_reg)); ++ emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg)); ++ emit_insn (gen_unspec_dsb ()); ++ DONE; ++}) ++ ++;; abs alias kabs ++ ++(define_insn "unspec_kabs" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(match_operand:SI 1 "register_operand" "r")] UNSPEC_KABS))] ++ "" ++ "kabs\t%0, %1" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")] ++) ++ ++(define_expand "no_hwloop" ++ [(const_int 0)] ++ "" ++{ ++ if (NDS32_HW_LOOP_P ()) ++ emit_insn (gen_unspec_no_hwloop ()); ++ else ++ emit_insn (gen_nop ()); ++ ++ DONE; ++}) ++ ++(define_insn "unspec_no_hwloop" ++ [(unspec_volatile [(const_int 0)] UNSPEC_VOLATILE_NO_HWLOOP)] ++ "" ++ "" ++ [(set_attr "type" "misc")] ++) + ;; ------------------------------------------------------------------------ +diff --git a/gcc/config/nds32/nds32-isr.c b/gcc/config/nds32/nds32-isr.c +index 79be27e..be82609 100644 +--- a/gcc/config/nds32/nds32-isr.c ++++ b/gcc/config/nds32/nds32-isr.c +@@ -24,11 +24,41 @@ + #include "system.h" + #include "coretypes.h" + #include "backend.h" +-#include "target.h" +-#include "rtl.h" + #include "tree.h" +-#include "diagnostic-core.h" ++#include "rtl.h" ++#include "df.h" ++#include "alias.h" ++#include "stor-layout.h" ++#include "varasm.h" ++#include "calls.h" ++#include "regs.h" ++#include "insn-config.h" /* Required by recog.h. */ ++#include "conditions.h" + #include "output.h" ++#include "insn-attr.h" /* For DFA state_t. */ ++#include "insn-codes.h" /* For CODE_FOR_xxx. */ ++#include "reload.h" /* For push_reload(). */ ++#include "flags.h" ++#include "insn-config.h" ++#include "expmed.h" ++#include "dojump.h" ++#include "explow.h" ++#include "emit-rtl.h" ++#include "stmt.h" ++#include "expr.h" ++#include "recog.h" ++#include "diagnostic-core.h" ++#include "cfgrtl.h" ++#include "cfganal.h" ++#include "lcm.h" ++#include "cfgbuild.h" ++#include "cfgcleanup.h" ++#include "tm_p.h" ++#include "tm-constrs.h" ++#include "optabs.h" /* For GEN_FCN. */ ++#include "target.h" ++#include "langhooks.h" /* For add_builtin_function(). */ ++#include "builtins.h" + + /* ------------------------------------------------------------------------ */ + +@@ -39,7 +69,260 @@ + We use an array to record essential information for each vector. */ + static struct nds32_isr_info nds32_isr_vectors[NDS32_N_ISR_VECTORS]; + +-/* ------------------------------------------------------------------------ */ ++/* ------------------------------------------------------------- */ ++/* FIXME: ++ FOR BACKWARD COMPATIBILITY, we need to support following patterns: ++ ++ __attribute__((interrupt("XXX;YYY;id=ZZZ"))) ++ __attribute__((exception("XXX;YYY;id=ZZZ"))) ++ __attribute__((reset("vectors=XXX;nmi_func=YYY;warm_func=ZZZ"))) ++ ++ We provide several functions to parse the strings. */ ++ ++static void ++nds32_interrupt_attribute_parse_string (const char *original_str, ++ const char *func_name, ++ unsigned int s_level) ++{ ++ char target_str[100]; ++ enum nds32_isr_save_reg save_reg; ++ enum nds32_isr_nested_type nested_type; ++ ++ char *save_all_regs_str, *save_caller_regs_str; ++ char *nested_str, *not_nested_str, *ready_nested_str, *critical_str; ++ char *id_str, *value_str; ++ ++ /* Copy original string into a character array so that ++ the string APIs can handle it. */ ++ strcpy (target_str, original_str); ++ ++ /* 1. Detect 'save_all_regs' : NDS32_SAVE_ALL ++ 'save_caller_regs' : NDS32_PARTIAL_SAVE */ ++ save_all_regs_str = strstr (target_str, "save_all_regs"); ++ save_caller_regs_str = strstr (target_str, "save_caller_regs"); ++ ++ /* Note that if no argument is found, ++ use NDS32_PARTIAL_SAVE by default. */ ++ if (save_all_regs_str) ++ save_reg = NDS32_SAVE_ALL; ++ else if (save_caller_regs_str) ++ save_reg = NDS32_PARTIAL_SAVE; ++ else ++ save_reg = NDS32_PARTIAL_SAVE; ++ ++ /* 2. Detect 'nested' : NDS32_NESTED ++ 'not_nested' : NDS32_NOT_NESTED ++ 'ready_nested' : NDS32_NESTED_READY ++ 'critical' : NDS32_CRITICAL */ ++ nested_str = strstr (target_str, "nested"); ++ not_nested_str = strstr (target_str, "not_nested"); ++ ready_nested_str = strstr (target_str, "ready_nested"); ++ critical_str = strstr (target_str, "critical"); ++ ++ /* Note that if no argument is found, ++ use NDS32_NOT_NESTED by default. ++ Also, since 'not_nested' and 'ready_nested' both contains ++ 'nested' string, we check 'nested' with lowest priority. */ ++ if (not_nested_str) ++ nested_type = NDS32_NOT_NESTED; ++ else if (ready_nested_str) ++ nested_type = NDS32_NESTED_READY; ++ else if (nested_str) ++ nested_type = NDS32_NESTED; ++ else if (critical_str) ++ nested_type = NDS32_CRITICAL; ++ else ++ nested_type = NDS32_NOT_NESTED; ++ ++ /* 3. Traverse each id value and set corresponding information. */ ++ id_str = strstr (target_str, "id="); ++ ++ /* If user forgets to assign 'id', issue an error message. */ ++ if (id_str == NULL) ++ error ("require id argument in the string"); ++ /* Extract the value_str first. */ ++ id_str = strtok (id_str, "="); ++ value_str = strtok (NULL, ";"); ++ ++ /* Pick up the first id value token. */ ++ value_str = strtok (value_str, ","); ++ while (value_str != NULL) ++ { ++ int i; ++ i = atoi (value_str); ++ ++ /* For interrupt(0..63), the actual vector number is (9..72). */ ++ i = i + 9; ++ if (i < 9 || i > 72) ++ error ("invalid id value for interrupt attribute"); ++ ++ /* Setup nds32_isr_vectors[] array. */ ++ nds32_isr_vectors[i].category = NDS32_ISR_INTERRUPT; ++ strcpy (nds32_isr_vectors[i].func_name, func_name); ++ nds32_isr_vectors[i].save_reg = save_reg; ++ nds32_isr_vectors[i].nested_type = nested_type; ++ nds32_isr_vectors[i].security_level = s_level; ++ ++ /* Fetch next token. */ ++ value_str = strtok (NULL, ","); ++ } ++ ++ return; ++} ++ ++static void ++nds32_exception_attribute_parse_string (const char *original_str, ++ const char *func_name, ++ unsigned int s_level) ++{ ++ char target_str[100]; ++ enum nds32_isr_save_reg save_reg; ++ enum nds32_isr_nested_type nested_type; ++ ++ char *save_all_regs_str, *save_caller_regs_str; ++ char *nested_str, *not_nested_str, *ready_nested_str, *critical_str; ++ char *id_str, *value_str; ++ ++ /* Copy original string into a character array so that ++ the string APIs can handle it. */ ++ strcpy (target_str, original_str); ++ ++ /* 1. Detect 'save_all_regs' : NDS32_SAVE_ALL ++ 'save_caller_regs' : NDS32_PARTIAL_SAVE */ ++ save_all_regs_str = strstr (target_str, "save_all_regs"); ++ save_caller_regs_str = strstr (target_str, "save_caller_regs"); ++ ++ /* Note that if no argument is found, ++ use NDS32_PARTIAL_SAVE by default. */ ++ if (save_all_regs_str) ++ save_reg = NDS32_SAVE_ALL; ++ else if (save_caller_regs_str) ++ save_reg = NDS32_PARTIAL_SAVE; ++ else ++ save_reg = NDS32_PARTIAL_SAVE; ++ ++ /* 2. Detect 'nested' : NDS32_NESTED ++ 'not_nested' : NDS32_NOT_NESTED ++ 'ready_nested' : NDS32_NESTED_READY ++ 'critical' : NDS32_CRITICAL */ ++ nested_str = strstr (target_str, "nested"); ++ not_nested_str = strstr (target_str, "not_nested"); ++ ready_nested_str = strstr (target_str, "ready_nested"); ++ critical_str = strstr (target_str, "critical"); ++ ++ /* Note that if no argument is found, ++ use NDS32_NOT_NESTED by default. ++ Also, since 'not_nested' and 'ready_nested' both contains ++ 'nested' string, we check 'nested' with lowest priority. */ ++ if (not_nested_str) ++ nested_type = NDS32_NOT_NESTED; ++ else if (ready_nested_str) ++ nested_type = NDS32_NESTED_READY; ++ else if (nested_str) ++ nested_type = NDS32_NESTED; ++ else if (critical_str) ++ nested_type = NDS32_CRITICAL; ++ else ++ nested_type = NDS32_NOT_NESTED; ++ ++ /* 3. Traverse each id value and set corresponding information. */ ++ id_str = strstr (target_str, "id="); ++ ++ /* If user forgets to assign 'id', issue an error message. */ ++ if (id_str == NULL) ++ error ("require id argument in the string"); ++ /* Extract the value_str first. */ ++ id_str = strtok (id_str, "="); ++ value_str = strtok (NULL, ";"); ++ ++ /* Pick up the first id value token. */ ++ value_str = strtok (value_str, ","); ++ while (value_str != NULL) ++ { ++ int i; ++ i = atoi (value_str); ++ ++ /* For exception(1..8), the actual vector number is (1..8). */ ++ if (i < 1 || i > 8) ++ error ("invalid id value for exception attribute"); ++ ++ /* Setup nds32_isr_vectors[] array. */ ++ nds32_isr_vectors[i].category = NDS32_ISR_EXCEPTION; ++ strcpy (nds32_isr_vectors[i].func_name, func_name); ++ nds32_isr_vectors[i].save_reg = save_reg; ++ nds32_isr_vectors[i].nested_type = nested_type; ++ nds32_isr_vectors[i].security_level = s_level; ++ ++ /* Fetch next token. */ ++ value_str = strtok (NULL, ","); ++ } ++ ++ return; ++} ++ ++static void ++nds32_reset_attribute_parse_string (const char *original_str, ++ const char *func_name) ++{ ++ char target_str[100]; ++ char *vectors_str, *nmi_str, *warm_str, *value_str; ++ ++ /* Deal with reset attribute. Its vector number is always 0. */ ++ nds32_isr_vectors[0].category = NDS32_ISR_RESET; ++ ++ ++ /* 1. Parse 'vectors=XXXX'. */ ++ ++ /* Copy original string into a character array so that ++ the string APIs can handle it. */ ++ strcpy (target_str, original_str); ++ vectors_str = strstr (target_str, "vectors="); ++ /* The total vectors = interrupt + exception numbers + reset. ++ There are 8 exception and 1 reset in nds32 architecture. ++ If user forgets to assign 'vectors', user default 16 interrupts. */ ++ if (vectors_str != NULL) ++ { ++ /* Extract the value_str. */ ++ vectors_str = strtok (vectors_str, "="); ++ value_str = strtok (NULL, ";"); ++ nds32_isr_vectors[0].total_n_vectors = atoi (value_str) + 8 + 1; ++ } ++ else ++ nds32_isr_vectors[0].total_n_vectors = 16 + 8 + 1; ++ strcpy (nds32_isr_vectors[0].func_name, func_name); ++ ++ ++ /* 2. Parse 'nmi_func=YYYY'. */ ++ ++ /* Copy original string into a character array so that ++ the string APIs can handle it. */ ++ strcpy (target_str, original_str); ++ nmi_str = strstr (target_str, "nmi_func="); ++ if (nmi_str != NULL) ++ { ++ /* Extract the value_str. */ ++ nmi_str = strtok (nmi_str, "="); ++ value_str = strtok (NULL, ";"); ++ strcpy (nds32_isr_vectors[0].nmi_name, value_str); ++ } ++ ++ /* 3. Parse 'warm_func=ZZZZ'. */ ++ ++ /* Copy original string into a character array so that ++ the string APIs can handle it. */ ++ strcpy (target_str, original_str); ++ warm_str = strstr (target_str, "warm_func="); ++ if (warm_str != NULL) ++ { ++ /* Extract the value_str. */ ++ warm_str = strtok (warm_str, "="); ++ value_str = strtok (NULL, ";"); ++ strcpy (nds32_isr_vectors[0].warm_name, value_str); ++ } ++ ++ return; ++} ++/* ------------------------------------------------------------- */ + + /* A helper function to emit section head template. */ + static void +@@ -75,6 +358,15 @@ nds32_emit_isr_jmptbl_section (int vector_id) + char section_name[100]; + char symbol_name[100]; + ++ /* A critical isr does not need jump table section because ++ its behavior is not performed by two-level handler. */ ++ if (nds32_isr_vectors[vector_id].nested_type == NDS32_CRITICAL) ++ { ++ fprintf (asm_out_file, "\t! The vector %02d is a critical isr !\n", ++ vector_id); ++ return; ++ } ++ + /* Prepare jmptbl section and symbol name. */ + snprintf (section_name, sizeof (section_name), + ".nds32_jmptbl.%02d", vector_id); +@@ -95,7 +387,6 @@ nds32_emit_isr_vector_section (int vector_id) + const char *c_str = "CATEGORY"; + const char *sr_str = "SR"; + const char *nt_str = "NT"; +- const char *vs_str = "VS"; + char first_level_handler_name[100]; + char section_name[100]; + char symbol_name[100]; +@@ -143,46 +434,63 @@ nds32_emit_isr_vector_section (int vector_id) + case NDS32_NESTED_READY: + nt_str = "nr"; + break; ++ case NDS32_CRITICAL: ++ /* The critical isr is not performed by two-level handler. */ ++ nt_str = ""; ++ break; + } + +- /* Currently we have 4-byte or 16-byte size for each vector. +- If it is 4-byte, the first level handler name has suffix string "_4b". */ +- vs_str = (nds32_isr_vector_size == 4) ? "_4b" : ""; +- + /* Now we can create first level handler name. */ +- snprintf (first_level_handler_name, sizeof (first_level_handler_name), +- "_nds32_%s_%s_%s%s", c_str, sr_str, nt_str, vs_str); ++ if (nds32_isr_vectors[vector_id].security_level == 0) ++ { ++ /* For security level 0, use normal first level handler name. */ ++ snprintf (first_level_handler_name, sizeof (first_level_handler_name), ++ "_nds32_%s_%s_%s", c_str, sr_str, nt_str); ++ } ++ else ++ { ++ /* For security level 1-3, use corresponding spl_1, spl_2, or spl_3. */ ++ snprintf (first_level_handler_name, sizeof (first_level_handler_name), ++ "_nds32_spl_%d", nds32_isr_vectors[vector_id].security_level); ++ } + + /* Prepare vector section and symbol name. */ + snprintf (section_name, sizeof (section_name), + ".nds32_vector.%02d", vector_id); + snprintf (symbol_name, sizeof (symbol_name), +- "_nds32_vector_%02d%s", vector_id, vs_str); ++ "_nds32_vector_%02d", vector_id); + + + /* Everything is ready. We can start emit vector section content. */ + nds32_emit_section_head_template (section_name, symbol_name, + floor_log2 (nds32_isr_vector_size), false); + +- /* According to the vector size, the instructions in the +- vector section may be different. */ +- if (nds32_isr_vector_size == 4) ++ /* First we check if it is a critical isr. ++ If so, jump to user handler directly; otherwise, the instructions ++ in the vector section may be different according to the vector size. */ ++ if (nds32_isr_vectors[vector_id].nested_type == NDS32_CRITICAL) ++ { ++ /* This block is for critical isr. Jump to user handler directly. */ ++ fprintf (asm_out_file, "\tj\t%s ! jump to user handler directly\n", ++ nds32_isr_vectors[vector_id].func_name); ++ } ++ else if (nds32_isr_vector_size == 4) + { + /* This block is for 4-byte vector size. +- Hardware $VID support is necessary and only one instruction +- is needed in vector section. */ ++ Hardware $VID support is necessary and only one instruction ++ is needed in vector section. */ + fprintf (asm_out_file, "\tj\t%s ! jump to first level handler\n", + first_level_handler_name); + } + else + { + /* This block is for 16-byte vector size. +- There is NO hardware $VID so that we need several instructions +- such as pushing GPRs and preparing software vid at vector section. +- For pushing GPRs, there are four variations for +- 16-byte vector content and we have to handle each combination. +- For preparing software vid, note that the vid need to +- be substracted vector_number_offset. */ ++ There is NO hardware $VID so that we need several instructions ++ such as pushing GPRs and preparing software vid at vector section. ++ For pushing GPRs, there are four variations for ++ 16-byte vector content and we have to handle each combination. ++ For preparing software vid, note that the vid need to ++ be substracted vector_number_offset. */ + if (TARGET_REDUCED_REGS) + { + if (nds32_isr_vectors[vector_id].save_reg == NDS32_SAVE_ALL) +@@ -235,13 +543,11 @@ nds32_emit_isr_reset_content (void) + { + unsigned int i; + unsigned int total_n_vectors; +- const char *vs_str; + char reset_handler_name[100]; + char section_name[100]; + char symbol_name[100]; + + total_n_vectors = nds32_isr_vectors[0].total_n_vectors; +- vs_str = (nds32_isr_vector_size == 4) ? "_4b" : ""; + + fprintf (asm_out_file, "\t! RESET HANDLER CONTENT - BEGIN !\n"); + +@@ -257,7 +563,7 @@ nds32_emit_isr_reset_content (void) + /* Emit vector references. */ + fprintf (asm_out_file, "\t ! references to vector section entries\n"); + for (i = 0; i < total_n_vectors; i++) +- fprintf (asm_out_file, "\t.word\t_nds32_vector_%02d%s\n", i, vs_str); ++ fprintf (asm_out_file, "\t.word\t_nds32_vector_%02d\n", i); + + /* Emit jmptbl_00 section. */ + snprintf (section_name, sizeof (section_name), ".nds32_jmptbl.00"); +@@ -271,9 +577,9 @@ nds32_emit_isr_reset_content (void) + + /* Emit vector_00 section. */ + snprintf (section_name, sizeof (section_name), ".nds32_vector.00"); +- snprintf (symbol_name, sizeof (symbol_name), "_nds32_vector_00%s", vs_str); ++ snprintf (symbol_name, sizeof (symbol_name), "_nds32_vector_00"); + snprintf (reset_handler_name, sizeof (reset_handler_name), +- "_nds32_reset%s", vs_str); ++ "_nds32_reset"); + + fprintf (asm_out_file, "\t! ....................................\n"); + nds32_emit_section_head_template (section_name, symbol_name, +@@ -319,12 +625,12 @@ void + nds32_check_isr_attrs_conflict (tree func_decl, tree func_attrs) + { + int save_all_p, partial_save_p; +- int nested_p, not_nested_p, nested_ready_p; ++ int nested_p, not_nested_p, nested_ready_p, critical_p; + int intr_p, excp_p, reset_p; + + /* Initialize variables. */ + save_all_p = partial_save_p = 0; +- nested_p = not_nested_p = nested_ready_p = 0; ++ nested_p = not_nested_p = nested_ready_p = critical_p = 0; + intr_p = excp_p = reset_p = 0; + + /* We must check at MOST one attribute to set save-reg. */ +@@ -343,8 +649,10 @@ nds32_check_isr_attrs_conflict (tree func_decl, tree func_attrs) + not_nested_p = 1; + if (lookup_attribute ("nested_ready", func_attrs)) + nested_ready_p = 1; ++ if (lookup_attribute ("critical", func_attrs)) ++ critical_p = 1; + +- if ((nested_p + not_nested_p + nested_ready_p) > 1) ++ if ((nested_p + not_nested_p + nested_ready_p + critical_p) > 1) + error ("multiple nested types attributes to function %qD", func_decl); + + /* We must check at MOST one attribute to +@@ -358,6 +666,17 @@ nds32_check_isr_attrs_conflict (tree func_decl, tree func_attrs) + + if ((intr_p + excp_p + reset_p) > 1) + error ("multiple interrupt attributes to function %qD", func_decl); ++ ++ /* Do not allow isr attributes under linux toolchain. */ ++ if (TARGET_LINUX_ABI && intr_p) ++ error ("cannot use interrupt attributes to function %qD " ++ "under linux toolchain", func_decl); ++ if (TARGET_LINUX_ABI && excp_p) ++ error ("cannot use exception attributes to function %qD " ++ "under linux toolchain", func_decl); ++ if (TARGET_LINUX_ABI && reset_p) ++ error ("cannot use reset attributes to function %qD " ++ "under linux toolchain", func_decl); + } + + /* Function to construct isr vectors information array. +@@ -369,15 +688,21 @@ nds32_construct_isr_vectors_information (tree func_attrs, + const char *func_name) + { + tree save_all, partial_save; +- tree nested, not_nested, nested_ready; ++ tree nested, not_nested, nested_ready, critical; + tree intr, excp, reset; + ++ tree secure; ++ tree security_level_list; ++ tree security_level; ++ unsigned int s_level; ++ + save_all = lookup_attribute ("save_all", func_attrs); + partial_save = lookup_attribute ("partial_save", func_attrs); + + nested = lookup_attribute ("nested", func_attrs); + not_nested = lookup_attribute ("not_nested", func_attrs); + nested_ready = lookup_attribute ("nested_ready", func_attrs); ++ critical = lookup_attribute ("critical", func_attrs); + + intr = lookup_attribute ("interrupt", func_attrs); + excp = lookup_attribute ("exception", func_attrs); +@@ -387,6 +712,63 @@ nds32_construct_isr_vectors_information (tree func_attrs, + if (!intr && !excp && !reset) + return; + ++ /* At first, we need to retrieve security level. */ ++ secure = lookup_attribute ("secure", func_attrs); ++ if (secure != NULL) ++ { ++ security_level_list = TREE_VALUE (secure); ++ security_level = TREE_VALUE (security_level_list); ++ s_level = TREE_INT_CST_LOW (security_level); ++ } ++ else ++ { ++ /* If there is no secure attribute, the security level is set by ++ nds32_isr_secure_level, which is controlled by -misr-secure=X option. ++ By default nds32_isr_secure_level should be 0. */ ++ s_level = nds32_isr_secure_level; ++ } ++ ++ /* ------------------------------------------------------------- */ ++ /* FIXME: ++ FOR BACKWARD COMPATIBILITY, we need to support following patterns: ++ ++ __attribute__((interrupt("XXX;YYY;id=ZZZ"))) ++ __attribute__((exception("XXX;YYY;id=ZZZ"))) ++ __attribute__((reset("vectors=XXX;nmi_func=YYY;warm_func=ZZZ"))) ++ ++ If interrupt/exception/reset appears and its argument is a ++ STRING_CST, we will parse string with some auxiliary functions ++ which set necessary isr information in the nds32_isr_vectors[] array. ++ After that, we can return immediately to avoid new-syntax isr ++ information construction. */ ++ if (intr != NULL_TREE ++ && TREE_CODE (TREE_VALUE (TREE_VALUE (intr))) == STRING_CST) ++ { ++ tree string_arg = TREE_VALUE (TREE_VALUE (intr)); ++ nds32_interrupt_attribute_parse_string (TREE_STRING_POINTER (string_arg), ++ func_name, ++ s_level); ++ return; ++ } ++ if (excp != NULL_TREE ++ && TREE_CODE (TREE_VALUE (TREE_VALUE (excp))) == STRING_CST) ++ { ++ tree string_arg = TREE_VALUE (TREE_VALUE (excp)); ++ nds32_exception_attribute_parse_string (TREE_STRING_POINTER (string_arg), ++ func_name, ++ s_level); ++ return; ++ } ++ if (reset != NULL_TREE ++ && TREE_CODE (TREE_VALUE (TREE_VALUE (reset))) == STRING_CST) ++ { ++ tree string_arg = TREE_VALUE (TREE_VALUE (reset)); ++ nds32_reset_attribute_parse_string (TREE_STRING_POINTER (string_arg), ++ func_name); ++ return; ++ } ++ /* ------------------------------------------------------------- */ ++ + /* If we are here, either we have interrupt/exception, + or reset attribute. */ + if (intr || excp) +@@ -413,6 +795,9 @@ nds32_construct_isr_vectors_information (tree func_attrs, + /* Add vector_number_offset to get actual vector number. */ + vector_id = TREE_INT_CST_LOW (id) + vector_number_offset; + ++ /* Set security level. */ ++ nds32_isr_vectors[vector_id].security_level = s_level; ++ + /* Enable corresponding vector and set function name. */ + nds32_isr_vectors[vector_id].category = (intr) + ? (NDS32_ISR_INTERRUPT) +@@ -432,6 +817,8 @@ nds32_construct_isr_vectors_information (tree func_attrs, + nds32_isr_vectors[vector_id].nested_type = NDS32_NOT_NESTED; + else if (nested_ready) + nds32_isr_vectors[vector_id].nested_type = NDS32_NESTED_READY; ++ else if (critical) ++ nds32_isr_vectors[vector_id].nested_type = NDS32_CRITICAL; + + /* Advance to next id. */ + id_list = TREE_CHAIN (id_list); +@@ -447,12 +834,12 @@ nds32_construct_isr_vectors_information (tree func_attrs, + nds32_isr_vectors[0].category = NDS32_ISR_RESET; + + /* Prepare id_list and identify id value so that +- we can set total number of vectors. */ ++ we can set total number of vectors. */ + id_list = TREE_VALUE (reset); + id = TREE_VALUE (id_list); + + /* The total vectors = interrupt + exception numbers + reset. +- There are 8 exception and 1 reset in nds32 architecture. */ ++ There are 8 exception and 1 reset in nds32 architecture. */ + nds32_isr_vectors[0].total_n_vectors = TREE_INT_CST_LOW (id) + 8 + 1; + strcpy (nds32_isr_vectors[0].func_name, func_name); + +@@ -488,7 +875,6 @@ nds32_construct_isr_vectors_information (tree func_attrs, + } + } + +-/* A helper function to handle isr stuff at the beginning of asm file. */ + void + nds32_asm_file_start_for_isr (void) + { +@@ -501,15 +887,14 @@ nds32_asm_file_start_for_isr (void) + strcpy (nds32_isr_vectors[i].func_name, ""); + nds32_isr_vectors[i].save_reg = NDS32_PARTIAL_SAVE; + nds32_isr_vectors[i].nested_type = NDS32_NOT_NESTED; ++ nds32_isr_vectors[i].security_level = 0; + nds32_isr_vectors[i].total_n_vectors = 0; + strcpy (nds32_isr_vectors[i].nmi_name, ""); + strcpy (nds32_isr_vectors[i].warm_name, ""); + } + } + +-/* A helper function to handle isr stuff at the end of asm file. */ +-void +-nds32_asm_file_end_for_isr (void) ++void nds32_asm_file_end_for_isr (void) + { + int i; + +@@ -543,6 +928,8 @@ nds32_asm_file_end_for_isr (void) + /* Found one vector which is interupt or exception. + Output its jmptbl and vector section content. */ + fprintf (asm_out_file, "\t! interrupt/exception vector %02d\n", i); ++ fprintf (asm_out_file, "\t! security level: %d\n", ++ nds32_isr_vectors[i].security_level); + fprintf (asm_out_file, "\t! ------------------------------------\n"); + nds32_emit_isr_jmptbl_section (i); + fprintf (asm_out_file, "\t! ....................................\n"); +@@ -576,4 +963,65 @@ nds32_isr_function_p (tree func) + || (t_reset != NULL_TREE)); + } + +-/* ------------------------------------------------------------------------ */ ++/* Return true if FUNC is a isr function with critical attribute. */ ++bool ++nds32_isr_function_critical_p (tree func) ++{ ++ tree t_intr; ++ tree t_excp; ++ tree t_critical; ++ ++ tree attrs; ++ ++ if (TREE_CODE (func) != FUNCTION_DECL) ++ abort (); ++ ++ attrs = DECL_ATTRIBUTES (func); ++ ++ t_intr = lookup_attribute ("interrupt", attrs); ++ t_excp = lookup_attribute ("exception", attrs); ++ ++ t_critical = lookup_attribute ("critical", attrs); ++ ++ /* If both interrupt and exception attribute does not appear, ++ we can return false immediately. */ ++ if ((t_intr == NULL_TREE) && (t_excp == NULL_TREE)) ++ return false; ++ ++ /* Here we can guarantee either interrupt or ecxception attribute ++ does exist, so further check critical attribute. ++ If it also appears, we can return true. */ ++ if (t_critical != NULL_TREE) ++ return true; ++ ++ /* ------------------------------------------------------------- */ ++ /* FIXME: ++ FOR BACKWARD COMPATIBILITY, we need to handle string type. ++ If the string 'critical' appears in the interrupt/exception ++ string argument, we can return true. */ ++ if (t_intr != NULL_TREE || t_excp != NULL_TREE) ++ { ++ char target_str[100]; ++ char *critical_str; ++ tree t_check; ++ tree string_arg; ++ ++ t_check = t_intr ? t_intr : t_excp; ++ if (TREE_CODE (TREE_VALUE (TREE_VALUE (t_check))) == STRING_CST) ++ { ++ string_arg = TREE_VALUE (TREE_VALUE (t_check)); ++ strcpy (target_str, TREE_STRING_POINTER (string_arg)); ++ critical_str = strstr (target_str, "critical"); ++ ++ /* Found 'critical' string, so return true. */ ++ if (critical_str) ++ return true; ++ } ++ } ++ /* ------------------------------------------------------------- */ ++ ++ /* Other cases, this isr function is not critical type. */ ++ return false; ++} ++ ++/* ------------------------------------------------------------- */ +diff --git a/gcc/config/nds32/nds32-linux.opt b/gcc/config/nds32/nds32-linux.opt +new file mode 100644 +index 0000000..75ccd76 +--- /dev/null ++++ b/gcc/config/nds32/nds32-linux.opt +@@ -0,0 +1,16 @@ ++mcmodel= ++Target RejectNegative Joined Enum(nds32_cmodel_type) Var(nds32_cmodel_option) Init(CMODEL_LARGE) ++Specify the address generation strategy for code model. ++ ++Enum ++Name(nds32_cmodel_type) Type(enum nds32_cmodel_type) ++Known cmodel types (for use with the -mcmodel= option): ++ ++EnumValue ++Enum(nds32_cmodel_type) String(small) Value(CMODEL_SMALL) ++ ++EnumValue ++Enum(nds32_cmodel_type) String(medium) Value(CMODEL_MEDIUM) ++ ++EnumValue ++Enum(nds32_cmodel_type) String(large) Value(CMODEL_LARGE) +diff --git a/gcc/config/nds32/nds32-lmwsmw.c b/gcc/config/nds32/nds32-lmwsmw.c +new file mode 100644 +index 0000000..e3b66bf +--- /dev/null ++++ b/gcc/config/nds32/nds32-lmwsmw.c +@@ -0,0 +1,1998 @@ ++ ++/* lmwsmw pass of Andes NDS32 cpu for GNU compiler ++ Copyright (C) 2012-2016 Free Software Foundation, Inc. ++ Contributed by Andes Technology Corporation. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published ++ by the Free Software Foundation; either version 3, or (at your ++ option) any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ <http://www.gnu.org/licenses/>. */ ++ ++/* ------------------------------------------------------------------------ */ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "tm.h" ++#include "hash-set.h" ++#include "machmode.h" ++#include "vec.h" ++#include "double-int.h" ++#include "input.h" ++#include "alias.h" ++#include "symtab.h" ++#include "wide-int.h" ++#include "inchash.h" ++#include "tree.h" ++#include "stor-layout.h" ++#include "varasm.h" ++#include "calls.h" ++#include "rtl.h" ++#include "regs.h" ++#include "hard-reg-set.h" ++#include "insn-config.h" /* Required by recog.h. */ ++#include "conditions.h" ++#include "output.h" ++#include "insn-attr.h" /* For DFA state_t. */ ++#include "insn-codes.h" /* For CODE_FOR_xxx. */ ++#include "reload.h" /* For push_reload(). */ ++#include "flags.h" ++#include "input.h" ++#include "function.h" ++#include "expr.h" ++#include "recog.h" ++#include "diagnostic-core.h" ++#include "dominance.h" ++#include "cfg.h" ++#include "cfgrtl.h" ++#include "cfganal.h" ++#include "lcm.h" ++#include "cfgbuild.h" ++#include "cfgcleanup.h" ++#include "predict.h" ++#include "basic-block.h" ++#include "bitmap.h" ++#include "df.h" ++#include "tm_p.h" ++#include "tm-constrs.h" ++#include "optabs.h" /* For GEN_FCN. */ ++#include "target.h" ++#include "langhooks.h" /* For add_builtin_function(). */ ++#include "ggc.h" ++#include "tree-pass.h" ++#include "target-globals.h" ++#include "ira.h" ++#include "ira-int.h" ++#include "regrename.h" ++#include "nds32-load-store-opt.h" ++#include "nds32-reg-utils.h" ++#include <set> ++#include <vector> ++#include <algorithm> ++ ++#define NDS32_GPR_NUM 32 ++ ++static int ++compare_order (const void *a, const void *b) ++{ ++ const load_store_info_t *fp1 = (const load_store_info_t *) a; ++ const load_store_info_t *fp2 = (const load_store_info_t *) b; ++ const load_store_info_t f1 = *fp1; ++ const load_store_info_t f2 = *fp2; ++ ++ return f1.order < f2.order ? -1 : 1; ++} ++ ++static int ++compare_offset (const void *a, const void *b) ++{ ++ const load_store_info_t *fp1 = (const load_store_info_t *) a; ++ const load_store_info_t *fp2 = (const load_store_info_t *) b; ++ const load_store_info_t f1 = *fp1; ++ const load_store_info_t f2 = *fp2; ++ ++ return f1.offset < f2.offset ? -1 : 1; ++} ++ ++static bool ++compare_amount(available_reg_info_t a, available_reg_info_t b) ++{ ++ return a.amount > b.amount; ++} ++ ++static bool ++nds32_load_store_reg_plus_offset (rtx_insn *insn, load_store_info_t *load_store_info) ++{ ++ rtx pattern, mem, reg, base_reg, addr; ++ HOST_WIDE_INT offset; ++ bool load_p; ++ enum nds32_memory_post_type post_type = NDS32_NONE; ++ ++ pattern = PATTERN (insn); ++ mem = NULL_RTX; ++ reg = NULL_RTX; ++ base_reg = NULL_RTX; ++ offset = 0; ++ load_p = false; ++ ++ if (GET_CODE (pattern) != SET) ++ return false; ++ ++ if (MEM_P (SET_SRC (pattern))) ++ { ++ mem = SET_SRC (pattern); ++ reg = SET_DEST (pattern); ++ load_p = true; ++ } ++ ++ if (MEM_P (SET_DEST (pattern))) ++ { ++ mem = SET_DEST (pattern); ++ reg = SET_SRC (pattern); ++ load_p = false; ++ } ++ ++ if (mem == NULL_RTX || reg == NULL_RTX || !REG_P (reg)) ++ return false; ++ ++ /* The FPU ISA has not load-store-multiple instruction. */ ++ if (!NDS32_IS_GPR_REGNUM (REGNO (reg))) ++ return false; ++ ++ if (MEM_VOLATILE_P (mem)) ++ return false; ++ ++ if (GET_MODE (reg) != SImode) ++ return false; ++ ++ gcc_assert (REG_P (reg)); ++ ++ addr = XEXP (mem, 0); ++ ++ /* We only care about [reg] and [reg+const]. */ ++ if (REG_P (addr)) ++ { ++ base_reg = addr; ++ offset = 0; ++ } ++ else if (GET_CODE (addr) == PLUS ++ && CONST_INT_P (XEXP (addr, 1))) ++ { ++ base_reg = XEXP (addr, 0); ++ offset = INTVAL (XEXP (addr, 1)); ++ if (!REG_P (base_reg)) ++ return false; ++ } ++ else if (GET_CODE (addr) == POST_INC) ++ { ++ base_reg = XEXP (addr, 0); ++ offset = 0; ++ post_type = NDS32_POST_INC; ++ } ++ else if (GET_CODE (addr) == POST_DEC) ++ { ++ base_reg = XEXP (addr, 0); ++ offset = 0; ++ post_type = NDS32_POST_DEC; ++ } ++ else ++ return false; ++ ++ if ((REGNO (base_reg) > NDS32_LAST_GPR_REGNUM) ++ && (REGNO (base_reg) < FIRST_PSEUDO_REGISTER)) ++ return false; ++ ++ if (load_store_info) ++ { ++ load_store_info->load_p = load_p; ++ load_store_info->offset = offset; ++ load_store_info->reg = reg; ++ load_store_info->base_reg = base_reg; ++ load_store_info->insn = insn; ++ load_store_info->mem = mem; ++ load_store_info->post_type = post_type; ++ } ++ ++ return true; ++} ++ ++static bool ++nds32_insn_alias_p (rtx memref, rtx x) ++{ ++ rtx mem; ++ ++ if (GET_CODE (x) == PARALLEL) ++ { ++ int i, j; ++ ++ for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) ++ { ++ for (j = XVECLEN (x, i) - 1; j >= 0; j--) ++ if (nds32_insn_alias_p (memref, XVECEXP (x, i, j))) ++ return true; ++ } ++ ++ return false; ++ } ++ ++ if (GET_CODE (x) != SET) ++ return true; ++ ++ if (MEM_P (SET_SRC (x))) ++ mem = SET_SRC (x); ++ else if (MEM_P (SET_DEST (x))) ++ mem = SET_DEST (x); ++ else ++ return false; ++ ++ if (may_alias_p (memref, mem)) ++ return true; ++ else ++ return false; ++} ++ ++static void ++nds32_emit_multiple_insn (load_store_infos_t *multiple_insn, ++ rtx base_reg, rtx place, bool update_p) ++{ ++ unsigned int i; ++ unsigned int num_use_regs = multiple_insn->length (); ++ int par_index = 0; ++ int offset = 0; ++ bool load_p = (*multiple_insn)[0].load_p; ++ ++ rtx reg; ++ rtx mem; ++ rtx push_rtx; ++ rtx update_offset; ++ rtx parallel_insn; ++ ++ /* In addition to used registers, ++ we need one more space for (set base base-x) rtx. */ ++ if (update_p) ++ num_use_regs++; ++ ++ parallel_insn = gen_rtx_PARALLEL (VOIDmode, ++ rtvec_alloc (num_use_regs)); ++ ++ /* Set update insn. */ ++ if (update_p) ++ { ++ update_offset = GEN_INT (multiple_insn->length () * 4); ++ push_rtx = gen_addsi3 (base_reg, base_reg, update_offset); ++ XVECEXP (parallel_insn, 0, par_index) = push_rtx; ++ par_index++; ++ } ++ ++ /* Create (set mem regX) from start_reg to end_reg. */ ++ for (i = 0; i < multiple_insn->length (); ++i) ++ { ++ reg = (*multiple_insn)[i].reg; ++ mem = gen_frame_mem (SImode, plus_constant (Pmode, ++ base_reg, ++ offset)); ++ MEM_COPY_ATTRIBUTES (mem, (*multiple_insn)[i].mem); ++ ++ if (load_p) ++ push_rtx = gen_rtx_SET (reg, mem); ++ else ++ push_rtx = gen_rtx_SET (mem, reg); ++ ++ XVECEXP (parallel_insn, 0, par_index) = push_rtx; ++ offset = offset + 4; ++ par_index++; ++ } ++ ++ emit_insn_before (parallel_insn, place); ++ ++ if (dump_file) ++ { ++ fprintf (dump_file, "lmw/smw instruction:\n"); ++ print_rtl_single (dump_file, parallel_insn); ++ } ++} ++ ++static void ++nds32_emit_add_insn (load_store_info_t insn, rtx base_reg, ++ rtx place, bool add_p) ++{ ++ rtx add_insn; ++ HOST_WIDE_INT offset = insn.offset; ++ if (!add_p) ++ offset = -offset; ++ ++ add_insn = gen_addsi3 (base_reg, insn.base_reg, GEN_INT (offset)); ++ emit_insn_before (add_insn, place); ++} ++ ++/* Get the instruction of same ID. */ ++static void ++nds32_fetch_group_insn (load_store_infos_t *src, ++ load_store_infos_t *dst, int id) ++{ ++ unsigned int i = 0; ++ ++ while (i < src->length ()) ++ { ++ if (id == (*src)[i].group) ++ { ++ dst->safe_push ((*src)[i]); ++ src->ordered_remove (i); ++ i = 0; ++ } ++ else ++ i++; ++ } ++} ++ ++/* Check registers are not used and defined. */ ++static rtx ++nds32_lmwsmw_insert_place (load_store_infos_t *insn_set) ++{ ++ unsigned int i, position; ++ bool combine_p; ++ rtx_insn *insn; ++ auto_vec<load_store_info_t, 64> temp_set; ++ ++ for (i = 0; i < insn_set->length (); i++) ++ temp_set.safe_push ((*insn_set)[i]); ++ ++ /* Check registers are not used and defined ++ between first instruction and last instruction, ++ and find insert lmw/smw instruction place. ++ example: ++ lwi $r0, [$r2 + 4] ++ lwi $r1, [$r2 + 8] ++ ++ Check $r0 and $r1 are not used and defined. */ ++ temp_set.qsort (compare_order); ++ ++ for (position = 0; position < temp_set.length (); ++position) ++ { ++ combine_p = true; ++ ++ /* Check instruction form first instruction to position. */ ++ for (i = 0; i < position; i++) ++ { ++ for (insn = NEXT_INSN (temp_set[i].insn); ++ insn != temp_set[position].insn; ++ insn = NEXT_INSN (insn)) ++ { ++ if (!NONDEBUG_INSN_P (insn)) ++ continue; ++ if (df_reg_used (insn, temp_set[i].reg) ++ || df_reg_defined (insn, temp_set[i].reg)) ++ { ++ if (dump_file) ++ { ++ fprintf (dump_file, "Fail:register has modify\n"); ++ fprintf (dump_file, "insn uid:%d, reg: r%d,\n", ++ INSN_UID (temp_set[position].insn), ++ REGNO (temp_set[position].reg)); ++ fprintf (dump_file, "Modify instruction:\n"); ++ print_rtl_single (dump_file, insn); ++ } ++ combine_p = false; ++ break; ++ } ++ } ++ } ++ ++ /* Check instruction form position to last instruction. */ ++ for (i = position + 1; i < temp_set.length (); i++) ++ { ++ for (insn = temp_set[position].insn; ++ insn != temp_set[i].insn; ++ insn = NEXT_INSN (insn)) ++ { ++ if (!NONDEBUG_INSN_P (insn)) ++ continue; ++ if (df_reg_used (insn, temp_set[i].reg) ++ || df_reg_defined (insn, temp_set[i].reg)) ++ { ++ if (dump_file) ++ { ++ fprintf (dump_file, "Fail:register has modify\n"); ++ fprintf (dump_file, "insn uid:%d, reg: r%d,\n", ++ INSN_UID (temp_set[position].insn), ++ REGNO (temp_set[position].reg)); ++ fprintf (dump_file, "Modify instruction:\n"); ++ print_rtl_single (dump_file, insn); ++ } ++ combine_p = false; ++ break; ++ } ++ } ++ } ++ ++ if (combine_p) ++ return temp_set[position].insn; ++ } ++ ++ return NULL_RTX; ++} ++ ++/* Check registers are not used and defined. */ ++static bool ++nds32_base_reg_safe_p (load_store_infos_t *insn_set) ++{ ++ unsigned int i; ++ rtx_insn *insn; ++ auto_vec<load_store_info_t, 64> temp_set; ++ ++ /* We will change 'insn_set' element order, ++ to avoid change order using 'temp_set'. */ ++ for (i = 0; i < insn_set->length (); i++) ++ temp_set.safe_push ((*insn_set)[i]); ++ ++ /* We want to combine load and store instructions, ++ need to check base register is not used and defined ++ between first insn and last insn. ++ example: ++ lwi $r0, [$r3 + 4] ++ ... <- check here ++ lwi $r1, [$r3 + 8] ++ ... <- check here ++ lwi $r2, [$r3 + 12] ++ ++ Check $r3 is not used and defined, ++ between first insn and last insn. */ ++ ++ /* Scan instruction from top to bottom, ++ so need to sort by order. */ ++ temp_set.qsort (compare_order); ++ ++ for (i = 0; i < temp_set.length () - 1; ++i) ++ { ++ for (insn = NEXT_INSN (temp_set[i].insn); ++ insn != temp_set[i + 1].insn; ++ insn = NEXT_INSN (insn)) ++ { ++ if (!NONDEBUG_INSN_P (insn)) ++ continue; ++ ++ if (nds32_insn_alias_p (temp_set[0].mem, PATTERN (insn))) ++ { ++ if (dump_file) ++ { ++ fprintf (dump_file, "Memory alias:\n"); ++ print_rtl_single (dump_file, insn); ++ } ++ return false; ++ } ++ ++ if (temp_set[0].load_p) ++ { ++ if (df_reg_defined (insn, temp_set[0].base_reg)) ++ { ++ if (dump_file) ++ { ++ fprintf (dump_file, "Fail: base register has modify\n"); ++ fprintf (dump_file, "insn uid:%d, base reg: r%d,\n", ++ INSN_UID (temp_set[i].insn), ++ REGNO (temp_set[i].reg)); ++ fprintf (dump_file, "Modify instruction:\n"); ++ print_rtl_single (dump_file, insn); ++ } ++ return false; ++ } ++ } ++ else ++ { ++ if (df_reg_used (insn, temp_set[0].base_reg)) ++ { ++ if (dump_file) ++ { ++ fprintf (dump_file, "Fail: base register has modify\n"); ++ fprintf (dump_file, "insn uid:%d, base reg: r%d,\n", ++ INSN_UID (temp_set[i].insn), ++ REGNO (temp_set[i].reg)); ++ fprintf (dump_file, "Modify instruction:\n"); ++ print_rtl_single (dump_file, insn); ++ } ++ return false; ++ } ++ } ++ } ++ } ++ return true; ++} ++ ++static bool ++nds32_gain_size_p (load_store_infos_t *insn, bool new_base_p) ++{ ++ unsigned int i, new_cost = 4, old_cost = 0; ++ rtx reg; ++ rtx base_reg = (*insn)[0].base_reg; ++ HOST_WIDE_INT offset; ++ ++ for (i = 0; i < insn->length (); ++i) ++ { ++ reg = (*insn)[i].reg; ++ offset = (*insn)[i].offset; ++ ++ if (in_reg_class_p (reg, LOW_REGS)) ++ { ++ /* lwi37.sp/swi37.sp/lwi37/swi37 */ ++ if ((REGNO (base_reg) == SP_REGNUM ++ || REGNO (base_reg) == FP_REGNUM) ++ && (offset >= 0 && offset < 512 && (offset % 4 == 0))) ++ old_cost += 2; ++ /* lwi333/swi333 */ ++ else if (in_reg_class_p (base_reg, LOW_REGS) ++ && (offset >= 0 && offset < 32 && (offset % 4 == 0))) ++ old_cost += 2; ++ else ++ old_cost += 4; ++ } ++ else ++ { ++ /* lwi450/swi450 */ ++ if (in_reg_class_p (reg, MIDDLE_REGS) ++ && offset == 0) ++ old_cost += 2; ++ else ++ old_cost += 4; ++ } ++ } ++ ++ offset = (*insn)[0].offset; ++ if (offset != 0) ++ { ++ /* addi333 */ ++ if (in_reg_class_p (base_reg, LOW_REGS) ++ && satisfies_constraint_Iu05 (GEN_INT (offset))) ++ new_cost += 2; ++ /* addi45 */ ++ else if (in_reg_class_p (base_reg, MIDDLE_REGS) ++ && satisfies_constraint_Iu05 (GEN_INT (offset))) ++ new_cost += 2; ++ else ++ new_cost += 4; ++ ++ /* subri */ ++ if (!new_base_p) ++ new_cost += 4; ++ } ++ ++ if (dump_file) ++ fprintf (dump_file, "Code size compare: old code size is %d," ++ " new code size is %d\n", old_cost, new_cost); ++ ++ return new_cost < old_cost; ++} ++ ++static bool ++nds32_gain_speed_p (load_store_infos_t *insn, bool new_base_p) ++{ ++ unsigned int new_cost = 0, old_cost = insn->length (); ++ ++ if (TARGET_PIPELINE_GRAYWOLF) ++ { ++ new_cost = insn->length () / 2 + insn->length () % 2; ++ ++ if ((*insn)[0].offset != 0) ++ { ++ /* Need addi instruction. */ ++ new_cost += 1; ++ ++ /* Need subri instruction. */ ++ if (!new_base_p) ++ new_cost += 1; ++ } ++ } ++ else ++ { ++ if ((*insn)[0].offset != 0) ++ return false; ++ } ++ ++ return new_cost < old_cost; ++} ++ ++/* Check instructions can combine into a mulitple-instruction. */ ++static bool ++nds32_combine_multiple_p (load_store_infos_t *insn_set, bool new_base_p) ++{ ++ unsigned int i; ++ auto_vec<load_store_info_t, 64> temp_set; ++ ++ /* We will change 'insn_set' element order, ++ to avoid change order using 'temp_set'. */ ++ for (i = 0; i < insn_set->length (); i++) ++ temp_set.safe_push ((*insn_set)[i]); ++ ++ /* Check start offset need to sort by offset. */ ++ temp_set.qsort (compare_offset); ++ ++ /* The lmw/smw pattern, need two or more instructions. */ ++ if (temp_set.length () < 2) ++ return false; ++ ++ /* The lmw/smw pattern, only allow combine 25 instruction. */ ++ if (temp_set.length () > 25) ++ return false; ++ ++ if (TARGET_LMWSMW_OPT_SIZE ++ || (TARGET_LMWSMW_OPT_AUTO && optimize_size)) ++ { ++ /* Compare original instructions with multiple instruction, ++ when mupltiple instruction is small than original instructions ++ then combine it. */ ++ if (!nds32_gain_size_p (&temp_set, new_base_p)) ++ return false; ++ } ++ else if (TARGET_LMWSMW_OPT_SPEED ++ || (TARGET_LMWSMW_OPT_AUTO && !optimize_size)) ++ { ++ /* The start offset is not zero, we need add a instrucion ++ to handle offset, it is not worth on -O3, -O2 level. */ ++ if (!nds32_gain_speed_p (&temp_set, new_base_p)) ++ return false; ++ } ++ ++ /* Base register is not equal register, when offset is not zero. */ ++ if (temp_set[0].offset != 0) ++ for (i = 0; i < temp_set.length (); ++i) ++ { ++ if (REGNO (temp_set[i].reg) ++ == REGNO (temp_set[0].base_reg)) ++ return false; ++ } ++ ++ /* Don't combine, when start offset is greater then Is15, ++ because need extra register. */ ++ if (!satisfies_constraint_Is15 (GEN_INT (temp_set[0].offset))) ++ return false; ++ ++ return true; ++} ++ ++static bool ++nds32_use_bim_p (load_store_infos_t *insn_set, ++ load_store_infos_t *ref_set) ++{ ++ rtx_insn *insn; ++ bool combine_p = true; ++ ++ /* Generate .bim form, need offset is continuous. */ ++ if (insn_set->last ().offset != ((*ref_set)[0].offset - 4)) ++ return false; ++ ++ /* Reject 'insn_set' instructions bottom ++ of the 'ref_set' instructions. */ ++ if ((*insn_set)[0].group > (*ref_set)[0].group) ++ return false; ++ ++ /* Scan instruction from top to bottom, ++ so need to sort by order. */ ++ insn_set->qsort (compare_order); ++ ref_set->qsort (compare_order); ++ ++ /* We want to combine .bim form instruction, ++ so need to check base register is not used and defined ++ between multiple-insn and next mulitple-insn. ++ example: ++ lmw.bim $r0, [$r2], $r1 ++ ... <- check here ++ lmw.bi $r3, [$r2], $r4 ++ ++ Use .bim form need to check $r2 is not used and defined, ++ between lmw.bim and lmw.bi. */ ++ for (insn = NEXT_INSN (insn_set->last ().insn); ++ insn != (*ref_set)[0].insn; ++ insn = NEXT_INSN (insn)) ++ { ++ if (!NONDEBUG_INSN_P (insn)) ++ continue; ++ ++ if (nds32_insn_alias_p ((*insn_set)[0].mem, PATTERN (insn))) ++ { ++ if (dump_file) ++ { ++ fprintf (dump_file, "Have memory instruction:\n"); ++ print_rtl_single (dump_file, insn); ++ } ++ combine_p = false; ++ break; ++ } ++ ++ if (df_reg_used (insn, (*insn_set)[0].base_reg) ++ || df_reg_defined (insn, (*insn_set)[0].base_reg)) ++ { ++ if (dump_file) ++ { ++ fprintf (dump_file, "Use .bi form: Base reg is" ++ " used or defined between multiple-insn" ++ " and next multiple-insn\n"); ++ fprintf (dump_file, "Base register: r%d,\n", ++ REGNO ((*insn_set)[0].base_reg)); ++ fprintf (dump_file, "use or def instruction:\n"); ++ print_rtl_single (dump_file, insn); ++ } ++ combine_p = false; ++ break; ++ } ++ } ++ ++ /* Restore element order. */ ++ insn_set->qsort (compare_offset); ++ ref_set->qsort (compare_offset); ++ ++ if (combine_p) ++ return true; ++ else ++ return false; ++} ++ ++static void ++nds32_merge_overlapping_regs (HARD_REG_SET *pset, struct du_head *head) ++{ ++ bitmap_iterator bi; ++ unsigned i; ++ IOR_HARD_REG_SET (*pset, head->hard_conflicts); ++ EXECUTE_IF_SET_IN_BITMAP (&head->conflicts, 0, i, bi) ++ { ++ du_head_p other = regrename_chain_from_id (i); ++ unsigned j = other->nregs; ++ gcc_assert (other != head); ++ while (j-- > 0) ++ SET_HARD_REG_BIT (*pset, other->regno + j); ++ } ++} ++ ++/* Check if NEW_REG can be the candidate register to rename for ++ REG in THIS_HEAD chain. THIS_UNAVAILABLE is a set of unavailable hard ++ registers. */ ++static bool ++nds32_check_new_reg_p (int reg ATTRIBUTE_UNUSED, int new_reg, ++ struct du_head *this_head, HARD_REG_SET this_unavailable) ++{ ++ enum machine_mode mode = GET_MODE (*this_head->first->loc); ++ int nregs = hard_regno_nregs[new_reg][mode]; ++ int i; ++ struct du_chain *tmp; ++ ++ for (i = nregs - 1; i >= 0; --i) ++ if (TEST_HARD_REG_BIT (this_unavailable, new_reg + i) ++ || fixed_regs[new_reg + i] ++ || global_regs[new_reg + i] ++ /* Can't use regs which aren't saved by the prologue. */ ++ || (! df_regs_ever_live_p (new_reg + i) ++ && ! call_used_regs[new_reg + i]) ++#ifdef LEAF_REGISTERS ++ /* We can't use a non-leaf register if we're in a ++ leaf function. */ ++ || (crtl->is_leaf ++ && !LEAF_REGISTERS[new_reg + i]) ++#endif ++#ifdef HARD_REGNO_RENAME_OK ++ || ! HARD_REGNO_RENAME_OK (reg + i, new_reg + i) ++#endif ++ ) ++ return false; ++ ++ /* See whether it accepts all modes that occur in ++ definition and uses. */ ++ for (tmp = this_head->first; tmp; tmp = tmp->next_use) ++ if ((! HARD_REGNO_MODE_OK (new_reg, GET_MODE (*tmp->loc)) ++ && ! DEBUG_INSN_P (tmp->insn)) ++ || (this_head->need_caller_save_reg ++ && ! (HARD_REGNO_CALL_PART_CLOBBERED ++ (reg, GET_MODE (*tmp->loc))) ++ && (HARD_REGNO_CALL_PART_CLOBBERED ++ (new_reg, GET_MODE (*tmp->loc))))) ++ return false; ++ ++ return true; ++} ++ ++static int ++nds32_find_best_rename_reg (du_head_p this_head, int new_reg, int old_reg) ++{ ++ HARD_REG_SET unavailable; ++ int best_new_reg = old_reg; ++ ++ COMPL_HARD_REG_SET (unavailable, reg_class_contents[GENERAL_REGS]); ++ CLEAR_HARD_REG_BIT (unavailable, this_head->regno); ++ ++ /* Further narrow the set of registers we can use for renaming. ++ If the chain needs a call-saved register, mark the call-used ++ registers as unavailable. */ ++ if (this_head->need_caller_save_reg) ++ IOR_HARD_REG_SET (unavailable, call_used_reg_set); ++ ++ /* Mark registers that overlap this chain's lifetime as unavailable. */ ++ nds32_merge_overlapping_regs (&unavailable, this_head); ++ ++ if (nds32_check_new_reg_p (old_reg, new_reg, this_head, unavailable)) ++ best_new_reg = new_reg; ++ ++ return best_new_reg; ++} ++ ++static bool ++nds32_try_rename_reg (rtx_insn *insn, unsigned op_pos, unsigned best_reg) ++{ ++ insn_rr_info *info; ++ du_head_p op_chain; ++ unsigned oldreg, newreg; ++ ++ info = &insn_rr[INSN_UID (insn)]; ++ ++ if (info->op_info == NULL) ++ return false; ++ ++ if (info->op_info[op_pos].n_chains == 0) ++ return false; ++ ++ op_chain = regrename_chain_from_id (info->op_info[op_pos].heads[0]->id); ++ ++ if (op_chain->cannot_rename) ++ return false; ++ ++ oldreg = op_chain->regno; ++ newreg = nds32_find_best_rename_reg (op_chain, best_reg, oldreg); ++ ++ if (newreg == oldreg) ++ return false; ++ ++ return true; ++} ++ ++/* Grouping consecutive registers. */ ++static void ++nds32_group_available_reg (HARD_REG_SET *available_regset, enum reg_class clazz, ++ std::vector <available_reg_info_t> *available_group) ++{ ++ hard_reg_set_iterator hrsi; ++ unsigned regno, pre_regno = 0; ++ unsigned count = 0; ++ available_reg_info_t reg_info; ++ std::vector<available_reg_info_t>::iterator it; ++ ++ if (!available_group->empty ()) ++ available_group->clear (); ++ ++ /* Find available register form $r16 to $r31. */ ++ EXECUTE_IF_SET_IN_HARD_REG_SET (reg_class_contents[clazz], 2, regno, hrsi) ++ { ++ /* Caller-save register or callee-save register but it's ever live. */ ++ if (TEST_HARD_REG_BIT (*available_regset, regno) ++ && (call_used_regs[regno] || df_regs_ever_live_p (regno))) ++ { ++ if (pre_regno == 0 ++ || (pre_regno + 1) == regno) ++ count++; ++ } ++ else ++ { ++ if (count >= 2) ++ { ++ reg_info.amount = count; ++ reg_info.end = pre_regno; ++ reg_info.start = pre_regno - count + 1; ++ available_group->push_back (reg_info); ++ } ++ count = 0; ++ } ++ pre_regno = regno; ++ } ++ ++ sort (available_group->begin(), available_group->end(), compare_amount); ++ ++ if (dump_file) ++ { ++ for (it = available_group->begin(); ++ it != available_group->end(); ++it) ++ fprintf (dump_file, ++ "available amount = %d start = %d " ++ "end = %d \n", it->amount, it->start, ++ it->end); ++ } ++} ++ ++/* Try to rename insn's register in order. */ ++static void ++nds32_find_reg (load_store_infos_t *insn, load_store_infos_t *rename_insn, ++ HARD_REG_SET *available_regset) ++{ ++ int can_rename_number; ++ unsigned i, regno, amount; ++ unsigned op_pos = (*insn)[0].load_p ? 0 : 1; ++ auto_vec<load_store_info_t, 64> temp_set; ++ std::vector<available_reg_info_t> available_group; ++ std::vector<available_reg_info_t>::iterator it; ++ auto_vec<load_store_info_t, 64> down_set, up_set; ++ unsigned int down_num = 0, up_num = 0; ++ long offset; ++ int m; ++ ++ /* We will change 'insn' element order, ++ to avoid change order using 'temp_set'. */ ++ for (i = 0; i < insn->length (); i++) ++ temp_set.safe_push ((*insn)[i]); ++ ++ if (temp_set[0].post_type == NDS32_NONE) ++ temp_set.qsort (compare_offset); ++ ++ nds32_group_available_reg (available_regset, GENERAL_REGS, &available_group); ++ ++ /* Check rename register form top insn to bottom insn, ++ and avoid using fp, sp, lp, gp registers. */ ++ regno = REGNO (temp_set[0].reg); ++ can_rename_number = regno + temp_set.length () - 1; ++ offset = temp_set[0].offset; ++ ++ if (can_rename_number < FP_REGNUM) ++ for (i = 1; i < temp_set.length (); ++i) ++ { ++ /* Find this case: ++ lwi $r0, [$r2 + 4] ++ lwi $r3, [$r2 + 8] ++ ++ Rename $r3 to $r1. */ ++ down_num++; ++ if ((regno + i) != REGNO (temp_set[i].reg)) ++ { ++ if (nds32_try_rename_reg (temp_set[i].insn, op_pos, regno + i)) ++ { ++ /* Store in temparary set. */ ++ down_set.safe_push (temp_set[i]); ++ down_set.last ().new_reg = regno + i; ++ } ++ else ++ /* Stop when the register sequence is broken. */ ++ break; ++ } ++ } ++ ++ /* Check rename register form bottom insn to top insn, ++ and avoid using fp, sp, lp, gp registers. */ ++ regno = REGNO (temp_set.last ().reg); ++ can_rename_number = regno - temp_set.length () + 1; ++ ++ if (can_rename_number > 0 && regno < FP_REGNUM) ++ for (i = temp_set.length () - 1; i > 0; --i) ++ { ++ /* Find this case: ++ lwi $r1, [$r2 + 4] ++ lwi $r4, [$r2 + 8] ++ ++ Rename $r1 to $r3. */ ++ up_num++; ++ if ((regno - i) != REGNO (temp_set[i - 1].reg)) ++ { ++ if (nds32_try_rename_reg (temp_set[i - 1].insn, op_pos, regno - i)) ++ { ++ /* Store in rename_insn. */ ++ up_set.safe_push (temp_set[i - 1]); ++ up_set.last ().new_reg = regno - i; ++ } ++ else ++ /* Stop when the register sequence is broken. */ ++ break; ++ } ++ } ++ ++ /* Rename for the longest sequence. */ ++ /* The overhead of zero offset instruction is lowest, so try it first. */ ++ if ((offset == 0 || down_num >= up_num) && !down_set.is_empty ()) ++ { ++ for (m = down_set.length () - 1; m >= 0; --m) ++ { ++ regno = REGNO (down_set[m].reg); ++ CLEAR_HARD_REG_BIT (*available_regset, regno); ++ rename_insn->safe_push (down_set[m]); ++ } ++ nds32_group_available_reg (available_regset, GENERAL_REGS, ++ &available_group); ++ return; ++ } ++ else if (up_num >= down_num && !up_set.is_empty ()) ++ { ++ for (m = up_set.length () - 1; m >= 0; --m) ++ { ++ regno = REGNO (up_set[m].reg); ++ CLEAR_HARD_REG_BIT (*available_regset, regno); ++ rename_insn->safe_push (up_set[m]); ++ } ++ nds32_group_available_reg (available_regset, GENERAL_REGS, ++ &available_group); ++ return; ++ } ++ /* Check whether it is empty, We will use available table. */ ++ else if (available_group.empty ()) ++ return; ++ ++ amount = available_group.begin ()->amount; ++ /* Using the minimum number, as the rename amount. */ ++ if (amount > temp_set.length ()) ++ amount = temp_set.length (); ++ ++ /* Using most available register number to rename. */ ++ regno = available_group.begin ()->start; ++ for (i = 0; i < amount; ++i) ++ { ++ if (nds32_try_rename_reg (temp_set[i].insn, op_pos, regno)) ++ { ++ rename_insn->safe_push (temp_set[i]); ++ rename_insn->last ().new_reg = regno; ++ CLEAR_HARD_REG_BIT (*available_regset, regno); ++ regno++; ++ } ++ else ++ /* Stop when the register sequence is broken. */ ++ break; ++ } ++ ++ /* Check length here because the whole sequence entries ++ have to be renamed. */ ++ if (rename_insn->length () > 1) ++ { ++ /* Update available table. */ ++ nds32_group_available_reg (available_regset, GENERAL_REGS, ++ &available_group); ++ return; ++ } ++ ++ /* Using all available register to rename each insn. */ ++ for (i = 0; i < (temp_set.length () - 1); i += 2) ++ { ++ for (it = available_group.begin(); ++ it != available_group.end(); ++it) ++ { ++ bool change_p = false; ++ unsigned int j; ++ regno = it->start; ++ ++ /* Once replaced two instructions. */ ++ for (j = regno; j < (it->end + 1); j += 2) ++ { ++ if (nds32_try_rename_reg (temp_set[i].insn, op_pos, regno) ++ && nds32_try_rename_reg (temp_set[i + 1].insn, ++ op_pos, regno + 1)) ++ { ++ rename_insn->safe_push (temp_set[i]); ++ rename_insn->last ().new_reg = regno; ++ CLEAR_HARD_REG_BIT (*available_regset, regno); ++ ++ rename_insn->safe_push (temp_set[i + 1]); ++ rename_insn->last ().new_reg = regno + 1; ++ CLEAR_HARD_REG_BIT (*available_regset, regno + 1); ++ change_p = true; ++ break; ++ } ++ } ++ ++ if (change_p) ++ { ++ nds32_group_available_reg (available_regset, GENERAL_REGS, ++ &available_group); ++ break; ++ } ++ } ++ } ++} ++ ++static void ++nds32_rename_reg (rtx_insn *insn, unsigned op_pos, unsigned newreg) ++{ ++ insn_rr_info *info; ++ du_head_p op_chain; ++ ++ info = &insn_rr[INSN_UID (insn)]; ++ op_chain = regrename_chain_from_id (info->op_info[op_pos].heads[0]->id); ++ ++ if (dump_file) ++ { ++ fprintf (dump_file, "Try to rename operand %d to %d:\n", ++ op_pos, newreg); ++ print_rtl_single (dump_file, insn); ++ } ++ ++ regrename_do_replace (op_chain, newreg); ++ ++ if (dump_file) ++ { ++ print_rtl_single (dump_file, insn); ++ } ++} ++ ++/* Combine mutilple load/store insn into a lmw/smw insn. */ ++static void ++nds32_combine_bi_insn (load_store_infos_t *load_store_info) ++{ ++ auto_vec<load_store_info_t, 64> candidate_set, bi_set; ++ unsigned int i, j, regno; ++ ++ bool load_insn_p; ++ enum nds32_memory_post_type post_type; ++ ++ for (i = 0; i < load_store_info->length (); ++i) ++ { ++ /* Recording instruction order of priority and initinal place. */ ++ (*load_store_info)[i].order = i; ++ (*load_store_info)[i].place = false; ++ candidate_set.safe_push ((*load_store_info)[i]); ++ } ++ ++ for (i = 0; i < candidate_set.length (); ++i) ++ { ++ load_insn_p = candidate_set[i].load_p; ++ post_type = candidate_set[i].post_type; ++ regno = REGNO (candidate_set[i].reg); ++ ++ for (j = i + 1; j < candidate_set.length (); ++j) ++ { ++ if ((post_type == candidate_set[j].post_type) ++ && (load_insn_p == candidate_set[j].load_p) ++ && ((regno + 1) == REGNO (candidate_set[j].reg))) ++ { ++ bi_set.safe_push (candidate_set[i]); ++ bi_set.safe_push (candidate_set[j]); ++ ++ if (nds32_combine_multiple_p (&bi_set, false) ++ && nds32_base_reg_safe_p (&bi_set) ++ && nds32_lmwsmw_insert_place (&bi_set) != NULL_RTX) ++ { ++ rtx place = nds32_lmwsmw_insert_place (&bi_set); ++ rtx base_reg = bi_set[0].base_reg; ++ ++ nds32_emit_multiple_insn (&bi_set, base_reg, place, true); ++ delete_insn (bi_set[i].insn); ++ delete_insn (bi_set[j].insn); ++ candidate_set.ordered_remove (j); ++ bi_set.block_remove (0, bi_set.length ()); ++ break; ++ } ++ ++ bi_set.block_remove (0, bi_set.length ()); ++ } ++ } ++ } ++} ++ ++/* Combine mutilple load/store insn into a lmw/smw insn. */ ++static void ++nds32_combine_load_store_insn (load_store_infos_t *load_store_info, ++ HARD_REG_SET *available_regset) ++{ ++ auto_vec<load_store_info_t, 64> candidate_set, main_set, temp_set; ++ auto_vec<load_store_info_t, 64> first_set, second_set; ++ HOST_WIDE_INT current_offset, last_offset = 0, add_offset = 0; ++ unsigned int i, j, regno; ++ int group_num = 0, group_id; ++ bool load_insn_p; ++ bool new_base_p = false; ++ bool prev_bim_p = false; ++ bool inc_p = true, dec_p = true; ++ rtx new_base_reg = NULL_RTX; ++ rtx base_reg = (*load_store_info)[0].base_reg; ++ rtx place; ++ unsigned new_base_regnum; ++ ++ /* Get available register to add offset for first instruction. */ ++ new_base_regnum = find_available_reg (available_regset, GENERAL_REGS); ++ if (new_base_regnum != INVALID_REGNUM) ++ { ++ CLEAR_HARD_REG_BIT (*available_regset, new_base_regnum); ++ new_base_reg = gen_rtx_REG (Pmode, new_base_regnum); ++ /* Copy attribute form base register to new base register. */ ++ ORIGINAL_REGNO (new_base_reg) = ++ ORIGINAL_REGNO ((*load_store_info)[0].base_reg); ++ REG_ATTRS (new_base_reg) = REG_ATTRS ((*load_store_info)[0].base_reg); ++ new_base_p = true; ++ ++ if (dump_file) ++ fprintf (dump_file, "Have new base register: %d\n", new_base_regnum); ++ } ++ ++ /* Recording instruction order of priority and initinal place. */ ++ for (i = 0; i < load_store_info->length (); ++i) ++ { ++ (*load_store_info)[i].order = i; ++ (*load_store_info)[i].place = false; ++ } ++ ++ /* Fetch first instruction information from 'load_store_info', ++ we will use first instruction as base, to search next instruction. */ ++ candidate_set.safe_push ((*load_store_info)[0]); ++ /* Set offset, regno, load_p state from candidate_set. */ ++ current_offset = candidate_set[0].offset; ++ regno = REGNO (candidate_set[0].reg); ++ load_insn_p = candidate_set[0].load_p; ++ /* Set first instruction group ID, ++ the group ID mark instruction for the same group. */ ++ candidate_set[0].group = group_num; ++ ++ /* Search instructions can be combined to a lmw/smw instruction. */ ++ for (i = 1; i < load_store_info->length (); ++i) ++ { ++ /* Collecting register number and offset is increase, ++ for example: ++ ++ lwi $r0, [$r22 + 4] <- base instruction ++ lwi $r1, [$r22 + 8] <- collect object ++ ++ The collect object (regno + 1), (offset + 4) ++ from base instruction. */ ++ if ((current_offset == (*load_store_info)[i].offset - 4) ++ && ((regno + 1) == REGNO ((*load_store_info)[i].reg)) ++ && (load_insn_p == (*load_store_info)[i].load_p) ++ && inc_p) ++ { ++ /* Give instruction group ID. */ ++ (*load_store_info)[i].group = group_num; ++ /* Save instruction. */ ++ candidate_set.safe_push ((*load_store_info)[i]); ++ /* Update state, next register number and offset. */ ++ regno = REGNO ((*load_store_info)[i].reg); ++ current_offset += 4; ++ /* Close decrease type, search increase type. */ ++ dec_p = false; ++ } ++ /* Collecting register number and offset is decrease, ++ for example: ++ ++ lwi $r2, [$r22 + 8] <- base instruction ++ lwi $r1, [$r22 + 4] <- collect object ++ ++ The collect object (regno - 1), (offset - 4) ++ from base instruction. */ ++ else if ((current_offset == (*load_store_info)[i].offset + 4) ++ && ((regno - 1) == REGNO ((*load_store_info)[i].reg)) ++ && (load_insn_p == (*load_store_info)[i].load_p) ++ && dec_p) ++ { ++ /* Give instruction group ID. */ ++ (*load_store_info)[i].group = group_num; ++ /* Save instruction. */ ++ candidate_set.safe_push ((*load_store_info)[i]); ++ ++ /* Update state, next register number and offset. */ ++ regno = REGNO ((*load_store_info)[i].reg); ++ current_offset -= 4; ++ /* Close increase type, search decrease type. */ ++ inc_p = false; ++ } ++ else ++ { ++ inc_p = true; ++ dec_p = true; ++ } ++ ++ /* Instructions collect is complete. */ ++ if ((inc_p && dec_p) ++ || (i + 1) == load_store_info->length ()) ++ { ++ /* Filter candidate instructions. */ ++ if (nds32_combine_multiple_p (&candidate_set, new_base_p) ++ && nds32_base_reg_safe_p (&candidate_set) ++ && nds32_lmwsmw_insert_place (&candidate_set) != NULL_RTX) ++ { ++ /* Store candidate instructions to 'main_set'. */ ++ for (j = 0; j < candidate_set.length (); j++) ++ main_set.safe_push (candidate_set[j]); ++ } ++ ++ /* Scan to the last instruction, it is complete. */ ++ if ((i + 1) == load_store_info->length ()) ++ break; ++ ++ /* Clean candidate_set sequence. */ ++ candidate_set.block_remove (0, candidate_set.length ()); ++ /* Reinitialize first instruction infomation ++ to search next instruction. */ ++ candidate_set.safe_push ((*load_store_info)[i]); ++ /* Update group number for next sequence. */ ++ group_num ++; ++ /* Set offset, regno, load_p state from candidate_set. */ ++ current_offset = candidate_set.last ().offset; ++ regno = REGNO (candidate_set.last ().reg); ++ load_insn_p = candidate_set.last ().load_p; ++ candidate_set.last ().group = group_num; ++ } ++ else if (!nds32_base_reg_safe_p (&candidate_set) ++ || nds32_lmwsmw_insert_place (&candidate_set) == NULL_RTX) ++ { ++ /* Check collect instruction for each instruction, ++ we store (n - 1) instructions in group, and ++ last instruction make next group First instruction. */ ++ for (j = 0; j < (candidate_set.length () - 1); j++) ++ temp_set.safe_push (candidate_set[j]); ++ ++ /* Store candidate instructions to 'main_set'. */ ++ if (nds32_combine_multiple_p (&temp_set, new_base_p)) ++ { ++ for (j = 0; j < (temp_set.length ()); j++) ++ main_set.safe_push (temp_set[j]); ++ } ++ ++ /* Clean temp_set sequence. */ ++ temp_set.block_remove (0, temp_set.length ()); ++ /* Clean candidate_set sequence. */ ++ candidate_set.block_remove (0, (candidate_set.length () - 1)); ++ /* Update group number for next sequence. */ ++ group_num ++; ++ /* Set offset, regno, load_p state from candidate_set. */ ++ current_offset = candidate_set.last ().offset; ++ regno = REGNO (candidate_set.last ().reg); ++ load_insn_p = candidate_set.last ().load_p; ++ candidate_set.last ().group = group_num; ++ /* Reset it for search increase and decrease type. */ ++ inc_p = true; ++ dec_p = true; ++ } ++ } ++ ++ if (dump_file) ++ { ++ if (!main_set.is_empty ()) ++ fprintf (dump_file,"Do lmwsmw instructions:\n"); ++ for (i = 0; i < main_set.length (); ++i) ++ { ++ fprintf (dump_file, ++ "regno = %d base_regno = %d " ++ "offset = " HOST_WIDE_INT_PRINT_DEC " " ++ "load_p = %d UID = %u group = %d," ++ " order = %d, place = %d\n", ++ REGNO (main_set[i].reg), ++ REGNO (main_set[i].base_reg), ++ main_set[i].offset, ++ main_set[i].load_p, ++ INSN_UID (main_set[i].insn), ++ main_set[i].group, ++ main_set[i].order, ++ main_set[i].place); ++ } ++ } ++ ++ /* Fetch first group instruction from main_set. */ ++ if (!main_set.is_empty ()) ++ { ++ /* Sort main_set by offset. */ ++ main_set.qsort (compare_offset); ++ ++ group_id = main_set[0].group; ++ nds32_fetch_group_insn (&main_set, &first_set, group_id); ++ last_offset = first_set.last ().offset; ++ } ++ ++ /* Main loop for emit lmw/smw instrucion. */ ++ while (!main_set.is_empty ()) ++ { ++ /* Get second group ID. */ ++ group_id = main_set[0].group; ++ for (i = 0; i < main_set.length (); ++i) ++ { ++ /* Prefer get consecutive offset form ++ first group to second group */ ++ if ((last_offset + 4) == main_set[i].offset) ++ { ++ group_id = main_set[i].group; ++ break; ++ } ++ } ++ ++ /* Fetch second instrucion group. */ ++ nds32_fetch_group_insn (&main_set, &second_set, group_id); ++ /* Get lmw/smw insert place. */ ++ place = nds32_lmwsmw_insert_place (&first_set); ++ ++ /* Adjust address offset, because lmw/smw instruction ++ only allow offset is zero. ++ example: ++ lwi $r0, [$r3 + 4] ++ lwi $r1, [$r3 + 8] ++ lwi $r2, [$r3 + 12] ++ ++ combine into ++ ++ addi $r3, $r3, 4 ++ lwm.bi(m) $r0, [$r3], $r2 ++ ++ Need addi instrucion to handle offset. */ ++ if (first_set[0].offset != 0 && !prev_bim_p) ++ { ++ if (dump_file) ++ fprintf (dump_file, "Use addi insn handle offset: " ++ "" HOST_WIDE_INT_PRINT_DEC "\n", ++ first_set[0].offset); ++ /* Use available register to process offset, ++ and don't recovey base register value. */ ++ if (new_base_p) ++ { ++ base_reg = new_base_reg; ++ add_offset = 0; ++ CLEAR_HARD_REG_BIT (*available_regset, new_base_regnum); ++ } ++ else ++ add_offset = first_set[0].offset; ++ ++ nds32_emit_add_insn (first_set[0], base_reg, place, true); ++ } ++ ++ if (nds32_use_bim_p (&first_set, &second_set)) ++ { ++ if (dump_file) ++ fprintf (dump_file, "Generate BIM form.\n"); ++ ++ nds32_emit_multiple_insn (&first_set, base_reg, place, true); ++ ++ /* Update status, for next instruction sequence. ++ The add_offset need add 4, because the instruction ++ is post increase. */ ++ add_offset = first_set.last ().offset + 4; ++ prev_bim_p = true; ++ } ++ else ++ { ++ if (dump_file) ++ fprintf (dump_file, "Generate BI form.\n"); ++ ++ nds32_emit_multiple_insn (&first_set, base_reg, place, false); ++ ++ if (add_offset != 0) ++ { ++ if (dump_file) ++ fprintf (dump_file, "Use addi insn handle -offset: " ++ "" HOST_WIDE_INT_PRINT_DEC "\n", ++ add_offset); ++ ++ nds32_emit_add_insn (first_set[0], base_reg, place, false); ++ add_offset = 0; ++ } ++ prev_bim_p = false; ++ ++ /* Recovey base register for next instruction sequence. */ ++ if (REGNO (base_reg) != REGNO (first_set[0].base_reg)) ++ base_reg = first_set[0].base_reg; ++ } ++ ++ /* Delete insn, replace by lmw/smw instruction. */ ++ for (i = 0; i < first_set.length (); ++i) ++ delete_insn (first_set[i].insn); ++ ++ /* Clean first_set for store next instruction group. */ ++ first_set.block_remove (0, first_set.length ()); ++ /* Store next instruction group. */ ++ for (i = 0; i < second_set.length (); ++i) ++ first_set.safe_insert (i, second_set[i]); ++ ++ /* Clean second_set. */ ++ second_set.block_remove (0, second_set.length ()); ++ ++ /* Update last_offset for search next group. */ ++ last_offset = first_set.last ().offset; ++ } ++ ++ /* Processing the last instruction group. */ ++ if (!first_set.is_empty ()) ++ { ++ /* Get lmw/smw insert place. */ ++ place = nds32_lmwsmw_insert_place (&first_set); ++ ++ if (first_set[0].offset != 0 && !prev_bim_p) ++ { ++ if (dump_file) ++ fprintf (dump_file, "Use addi insn handle offset: " ++ "" HOST_WIDE_INT_PRINT_DEC "\n", ++ first_set[0].offset); ++ ++ if (new_base_p) ++ { ++ base_reg = new_base_reg; ++ add_offset = 0; ++ } ++ else ++ add_offset = first_set[0].offset; ++ ++ nds32_emit_add_insn (first_set[0], base_reg, place, true); ++ } ++ ++ if (dump_file) ++ fprintf (dump_file, "Generate BI form.\n"); ++ ++ nds32_emit_multiple_insn (&first_set, base_reg, place, false); ++ ++ if (add_offset != 0) ++ { ++ if (dump_file) ++ fprintf (dump_file, "Use addi insn handle -offset: " ++ "" HOST_WIDE_INT_PRINT_DEC "\n", ++ -add_offset); ++ ++ nds32_emit_add_insn (first_set[0], base_reg, place, false); ++ } ++ ++ /* Delete insn, replace by lmw/smw instruction. */ ++ for (i = 0; i < first_set.length (); ++i) ++ delete_insn (first_set[i].insn); ++ } ++} ++ ++/* Combine mutilple load/store insn into a lmw/smw insn. */ ++static void ++nds32_rename_bi_insn (load_store_infos_t *load_store_info, ++ HARD_REG_SET *available_regset) ++{ ++ auto_vec<load_store_info_t, 64> candidate_set, bi_set, replace_set; ++ unsigned int i, j; ++ ++ bool load_insn_p; ++ enum nds32_memory_post_type post_type; ++ ++ for (i = 0; i < load_store_info->length (); ++i) ++ { ++ /* Recording instruction order of priority and initinal place. */ ++ (*load_store_info)[i].order = i; ++ (*load_store_info)[i].place = false; ++ candidate_set.safe_push ((*load_store_info)[i]); ++ } ++ ++ for (i = 0; i < candidate_set.length (); ++i) ++ { ++ load_insn_p = candidate_set[i].load_p; ++ post_type = candidate_set[i].post_type; ++ ++ for (j = i + 1; j < candidate_set.length (); ++j) ++ { ++ if ((post_type == candidate_set[j].post_type) ++ && (load_insn_p == candidate_set[j].load_p)) ++ { ++ bi_set.safe_push (candidate_set[i]); ++ bi_set.safe_push (candidate_set[j]); ++ ++ if (nds32_combine_multiple_p (&bi_set, false) ++ && nds32_base_reg_safe_p (&bi_set) ++ && nds32_lmwsmw_insert_place (&bi_set) != NULL_RTX) ++ { ++ nds32_find_reg (&bi_set, &replace_set, available_regset); ++ ++ if (!replace_set.is_empty ()) ++ { ++ unsigned k; ++ unsigned op_pos = replace_set[0].load_p ? 0 : 1; ++ ++ /* Do rename register. */ ++ for (k = 0; k < replace_set.length (); ++k) ++ nds32_rename_reg (replace_set[k].insn, op_pos, ++ replace_set[k].new_reg); ++ ++ replace_set.block_remove (0, replace_set.length ()); ++ } ++ ++ candidate_set.ordered_remove (j); ++ bi_set.block_remove (0, bi_set.length ()); ++ break; ++ } ++ ++ bi_set.block_remove (0, bi_set.length ()); ++ } ++ } ++ } ++} ++ ++/* Rename register, can be combined mutilple load/store insn. */ ++static void ++nds32_rename_load_store_reg (load_store_infos_t *load_store_info, ++ HARD_REG_SET *available_regset) ++{ ++ auto_vec<load_store_info_t, 64> rename_set, temp_set, replace_set; ++ HOST_WIDE_INT current_offset; ++ unsigned int i, j; ++ bool load_insn_p; ++ bool inc_p = true, dec_p = true; ++ ++ /* Recording instruction order of priority and initinal place. */ ++ for (i = 0; i < load_store_info->length (); ++i) ++ { ++ (*load_store_info)[i].order = i; ++ (*load_store_info)[i].place = false; ++ } ++ ++ /* Fetch first instruction information from 'load_store_info', ++ we will use first instruction as base, to search next instruction. */ ++ rename_set.safe_push ((*load_store_info)[0]); ++ /* Set offset, load_p state from rename_set. */ ++ current_offset = rename_set[0].offset; ++ load_insn_p = rename_set[0].load_p; ++ ++ /* Search instructions can be combined to a lmw/smw instruction. */ ++ for (i = 1; i < load_store_info->length (); ++i) ++ { ++ /* Collecting offset is increase, for example: ++ ++ lwi pseudo_reg, [$r22 + 4] <- base instruction ++ lwi pseudo_reg, [$r22 + 8] <- collect object ++ ++ The collect object (offset + 4) from base instruction. */ ++ if ((current_offset == (*load_store_info)[i].offset - 4) ++ && (load_insn_p == (*load_store_info)[i].load_p) ++ && inc_p) ++ { ++ /* Save instruction. */ ++ rename_set.safe_push ((*load_store_info)[i]); ++ /* Update offset. */ ++ current_offset += 4; ++ /* Close decrease type, search increase type. */ ++ dec_p = false; ++ } ++ /* Collecting offset is decrease, for example: ++ ++ lwi pseudo_reg, [$r22 + 8] <- base instruction ++ lwi pseudo_reg, [$r22 + 4] <- collect object ++ ++ The collect object (offset - 4) from base instruction. */ ++ else if ((current_offset == (*load_store_info)[i].offset + 4) ++ && (load_insn_p == (*load_store_info)[i].load_p) ++ && dec_p) ++ { ++ /* Save instruction. */ ++ rename_set.safe_push ((*load_store_info)[i]); ++ ++ /* Update offset. */ ++ current_offset -= 4; ++ /* Close increase type, search decrease type. */ ++ inc_p = false; ++ } ++ else ++ { ++ inc_p = true; ++ dec_p = true; ++ } ++ ++ /* Instructions collect is completed. */ ++ if ((inc_p && dec_p) ++ || (i + 1) == load_store_info->length ()) ++ { ++ /* Check whether the rename register. */ ++ if (nds32_combine_multiple_p (&rename_set, false) ++ && nds32_base_reg_safe_p (&rename_set) ++ && nds32_lmwsmw_insert_place (&rename_set) != NULL_RTX) ++ { ++ /* Find can rename instruction, and store in 'replace_set'. */ ++ nds32_find_reg (&rename_set, &replace_set, available_regset); ++ ++ if (!replace_set.is_empty ()) ++ { ++ unsigned op_pos = replace_set[0].load_p ? 0 : 1; ++ ++ /* Do rename register. */ ++ for (j = 0; j < replace_set.length (); ++j) ++ nds32_rename_reg (replace_set[j].insn, op_pos, ++ replace_set[j].new_reg); ++ ++ replace_set.block_remove (0, replace_set.length ()); ++ } ++ } ++ ++ /* Scan to the last instruction, it is complete. */ ++ if ((i + 1) == load_store_info->length ()) ++ break; ++ ++ /* Clean rename_set sequence. */ ++ rename_set.block_remove (0, rename_set.length ()); ++ /* Reinitialize first instruction infomation ++ to search next instruction. */ ++ rename_set.safe_push ((*load_store_info)[i]); ++ /* Set offset, load_p state from rename_set. */ ++ current_offset = rename_set.last ().offset; ++ load_insn_p = rename_set.last ().load_p; ++ } ++ else if (!nds32_base_reg_safe_p (&rename_set) ++ || nds32_lmwsmw_insert_place (&rename_set) == NULL_RTX) ++ { ++ /* Check collect instruction for each instruction, ++ we store (n - 1) instructions in group, and ++ last instruction as the first instruction of the next group. */ ++ for (j = 0; j < (rename_set.length () - 1); j++) ++ temp_set.safe_push (rename_set[j]); ++ ++ if (nds32_combine_multiple_p (&temp_set, false)) ++ { ++ /* Find can rename instruction, and store in 'replace_set'. */ ++ nds32_find_reg (&temp_set, &replace_set, available_regset); ++ ++ if (!replace_set.is_empty ()) ++ { ++ unsigned op_pos = replace_set[0].load_p ? 0 : 1; ++ ++ /* Do rename register. */ ++ for (j = 0; j < replace_set.length (); ++j) ++ nds32_rename_reg (replace_set[j].insn, op_pos, ++ replace_set[j].new_reg); ++ ++ replace_set.block_remove (0, replace_set.length ()); ++ } ++ } ++ ++ /* Clean temp_set sequence. */ ++ temp_set.block_remove (0, temp_set.length ()); ++ /* Clean rename_set sequence. */ ++ rename_set.block_remove (0, (rename_set.length () - 1)); ++ /* Set offset, regno, load_p state from rename_set. */ ++ current_offset = rename_set.last ().offset; ++ load_insn_p = rename_set.last ().load_p; ++ /* Reset it for search increase and decrease type. */ ++ inc_p = true; ++ dec_p = true; ++ } ++ } ++} ++ ++static void ++nds32_do_lmwsmw_opt (basic_block bb, bool rename_p) ++{ ++ rtx_insn *insn; ++ HARD_REG_SET available_regset; ++ load_store_info_t load_store_info; ++ auto_vec<load_store_info_t, 64> load_store_infos[NDS32_GPR_NUM]; ++ auto_vec<load_store_info_t, 64> plus_infos[NDS32_GPR_NUM]; ++ auto_vec<load_store_info_t, 64> post_infos[NDS32_GPR_NUM]; ++ int i; ++ unsigned j; ++ unsigned regno; ++ unsigned polluting; ++ df_ref def; ++ /* Dirty mean a register is define again after ++ first load/store instruction. ++ For example: ++ ++ lwi $r2, [$r3 + #0x100] ++ mov $r3, $r4 ! $r3 is dirty after this instruction. ++ lwi $r1, [$r3 + #0x120] ! so this load can't chain with prev load. ++ */ ++ bool dirty[NDS32_GPR_NUM]; ++ ++ if (dump_file) ++ fprintf (dump_file, "scan bb %d\n", bb->index); ++ ++ for (i = 0; i < NDS32_GPR_NUM; ++i) ++ dirty[i] = false; ++ ++ FOR_BB_INSNS (bb, insn) ++ { ++ if (!INSN_P (insn)) ++ continue; ++ ++ polluting = INVALID_REGNUM; ++ ++ /* Set def reg is dirty if chain is not empty. */ ++ FOR_EACH_INSN_USE (def, insn) ++ { ++ regno = DF_REF_REGNO (def); ++ ++ if (!NDS32_IS_GPR_REGNUM (regno)) ++ continue; ++ ++ if (!load_store_infos[regno].is_empty ()) ++ { ++ /* Set pulluting here because the source register ++ may be the same one. */ ++ if (dirty[regno] == false) ++ polluting = regno; ++ ++ dirty[regno] = true; ++ } ++ } ++ ++ /* Set all caller-save register is dirty if chain is not empty. */ ++ if (CALL_P (insn)) ++ { ++ for (i = 0; i < NDS32_GPR_NUM; ++i) ++ { ++ if (call_used_regs[i] && !load_store_infos[i].is_empty ()) ++ dirty[i] = true; ++ } ++ } ++ ++ if (nds32_load_store_reg_plus_offset (insn, &load_store_info)) ++ { ++ regno = REGNO (load_store_info.base_reg); ++ gcc_assert (NDS32_IS_GPR_REGNUM (regno)); ++ ++ /* Don't add to chain if this reg is dirty. */ ++ if (dirty[regno] && polluting != regno) ++ break; ++ ++ /* If the register is first time to be used and be polluted ++ right away, we don't push it. */ ++ if (regno == REGNO (load_store_info.reg) && load_store_info.load_p ++ && dirty[regno] == false) ++ continue; ++ ++ load_store_infos[regno].safe_push (load_store_info); ++ } ++ } ++ ++ for (i = 0; i < NDS32_GPR_NUM; ++i) ++ { ++ for (j = 0; j < load_store_infos[i].length (); ++j) ++ { ++ if (load_store_infos[i][j].post_type == NDS32_NONE) ++ plus_infos[i].safe_push (load_store_infos[i][j]); ++ else ++ post_infos[i].safe_push (load_store_infos[i][j]); ++ } ++ } ++ ++ for (i = 0; i < NDS32_GPR_NUM; ++i) ++ { ++ if (load_store_infos[i].length () <= 1) ++ { ++ if (dump_file && load_store_infos[i].length () == 1) ++ fprintf (dump_file, ++ "Skip Chain for $r%d since chain size only 1\n", ++ i); ++ continue; ++ } ++ ++ if (dump_file) ++ { ++ fprintf (dump_file, ++ "Chain for $r%d: (size = %u)\n", ++ i, load_store_infos[i].length ()); ++ ++ for (j = 0; j < load_store_infos[i].length (); ++j) ++ { ++ fprintf (dump_file, ++ "regno = %d base_regno = %d " ++ "offset = " HOST_WIDE_INT_PRINT_DEC " " ++ "load_p = %d UID = %u place = %d\n", ++ REGNO (load_store_infos[i][j].reg), ++ REGNO (load_store_infos[i][j].base_reg), ++ load_store_infos[i][j].offset, ++ load_store_infos[i][j].load_p, ++ INSN_UID (load_store_infos[i][j].insn), ++ load_store_infos[i][j].place); ++ } ++ } ++ ++ nds32_get_available_reg_set (bb, ++ load_store_infos[i][0].insn, ++ load_store_infos[i].last ().insn, ++ &available_regset); ++ if (dump_file) ++ print_hard_reg_set (dump_file, "", available_regset); ++ ++ /* If rename_p is true, then do rename register of load/store ++ instruction. Otherwise combination of a multiple load/sotre ++ a multiple load/store instruction. */ ++ if (rename_p) ++ { ++ if (plus_infos[i].length () > 1) ++ nds32_rename_load_store_reg (&plus_infos[i], &available_regset); ++ if (post_infos[i].length () > 1) ++ nds32_rename_bi_insn (&post_infos[i], &available_regset); ++ } ++ else ++ { ++ if (plus_infos[i].length () > 1) ++ nds32_combine_load_store_insn (&plus_infos[i], &available_regset); ++ if (post_infos[i].length () > 1) ++ nds32_combine_bi_insn (&post_infos[i]); ++ } ++ } ++} ++ ++static void ++nds32_lmwsmw_opt (bool rename_p) ++{ ++ basic_block bb; ++ ++ FOR_EACH_BB_FN (bb, cfun) ++ nds32_do_lmwsmw_opt (bb, rename_p); ++} ++ ++/* Implement rename register for load and store instruction. */ ++static unsigned int ++rest_of_handle_rename_lmwsmw_opt (void) ++{ ++ init_alias_analysis (); ++ ++ df_set_flags (DF_LR_RUN_DCE); ++ df_note_add_problem (); ++ df_analyze (); ++ df_set_flags (DF_DEFER_INSN_RESCAN); ++ ++ regrename_init (true); ++ regrename_analyze (NULL); ++ ++ nds32_lmwsmw_opt (true); ++ ++ regrename_finish (); ++ ++ /* We are finished with alias. */ ++ end_alias_analysis (); ++ return 1; ++} ++ ++/* Implement generate lmw and smw instruction. */ ++static unsigned int ++rest_of_handle_gen_lmwsmw_opt (void) ++{ ++ init_alias_analysis (); ++ ++ df_note_add_problem (); ++ df_analyze (); ++ nds32_lmwsmw_opt (false); ++ ++ /* We are finished with alias. */ ++ end_alias_analysis (); ++ return 1; ++} ++ ++ ++const pass_data pass_data_nds32_rename_lmwsmw_opt = ++{ ++ RTL_PASS, /* type */ ++ "rename_lmwsmw_opt", /* name */ ++ OPTGROUP_NONE, /* optinfo_flags */ ++ TV_MACH_DEP, /* tv_id */ ++ 0, /* properties_required */ ++ 0, /* properties_provided */ ++ 0, /* properties_destroyed */ ++ 0, /* todo_flags_start */ ++ TODO_df_finish, /* todo_flags_finish */ ++}; ++ ++class pass_nds32_rename_lmwsmw_opt : public rtl_opt_pass ++{ ++public: ++ pass_nds32_rename_lmwsmw_opt (gcc::context *ctxt) ++ : rtl_opt_pass (pass_data_nds32_rename_lmwsmw_opt, ctxt) ++ {} ++ ++ /* opt_pass methods: */ ++ bool gate (function *) { return flag_nds32_lmwsmw_opt; } ++ unsigned int execute (function *) { return rest_of_handle_rename_lmwsmw_opt (); } ++}; ++ ++rtl_opt_pass * ++make_pass_nds32_rename_lmwsmw_opt (gcc::context *ctxt) ++{ ++ return new pass_nds32_rename_lmwsmw_opt (ctxt); ++} ++ ++const pass_data pass_data_nds32_gen_lmwsmw_opt = ++{ ++ RTL_PASS, /* type */ ++ "gen_lmwsmw_opt", /* name */ ++ OPTGROUP_NONE, /* optinfo_flags */ ++ TV_MACH_DEP, /* tv_id */ ++ 0, /* properties_required */ ++ 0, /* properties_provided */ ++ 0, /* properties_destroyed */ ++ 0, /* todo_flags_start */ ++ TODO_df_finish, /* todo_flags_finish */ ++}; ++ ++class pass_nds32_gen_lmwsmw_opt : public rtl_opt_pass ++{ ++public: ++ pass_nds32_gen_lmwsmw_opt (gcc::context *ctxt) ++ : rtl_opt_pass (pass_data_nds32_gen_lmwsmw_opt, ctxt) ++ {} ++ ++ /* opt_pass methods: */ ++ bool gate (function *) { return flag_nds32_lmwsmw_opt; } ++ unsigned int execute (function *) { return rest_of_handle_gen_lmwsmw_opt (); } ++}; ++ ++rtl_opt_pass * ++make_pass_nds32_gen_lmwsmw_opt (gcc::context *ctxt) ++{ ++ return new pass_nds32_gen_lmwsmw_opt (ctxt); ++} +diff --git a/gcc/config/nds32/nds32-load-store-opt.c b/gcc/config/nds32/nds32-load-store-opt.c +new file mode 100644 +index 0000000..9e5161e +--- /dev/null ++++ b/gcc/config/nds32/nds32-load-store-opt.c +@@ -0,0 +1,721 @@ ++/* load-store-opt pass of Andes NDS32 cpu for GNU compiler ++ Copyright (C) 2012-2016 Free Software Foundation, Inc. ++ Contributed by Andes Technology Corporation. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published ++ by the Free Software Foundation; either version 3, or (at your ++ option) any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ <http://www.gnu.org/licenses/>. */ ++ ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "backend.h" ++#include "tree.h" ++#include "rtl.h" ++#include "df.h" ++#include "alias.h" ++#include "stor-layout.h" ++#include "varasm.h" ++#include "calls.h" ++#include "regs.h" ++#include "insn-config.h" /* Required by recog.h. */ ++#include "conditions.h" ++#include "output.h" ++#include "insn-attr.h" /* For DFA state_t. */ ++#include "insn-codes.h" /* For CODE_FOR_xxx. */ ++#include "reload.h" /* For push_reload(). */ ++#include "flags.h" ++#include "insn-config.h" ++#include "expmed.h" ++#include "dojump.h" ++#include "explow.h" ++#include "emit-rtl.h" ++#include "stmt.h" ++#include "expr.h" ++#include "recog.h" ++#include "diagnostic-core.h" ++#include "cfgrtl.h" ++#include "cfganal.h" ++#include "lcm.h" ++#include "cfgbuild.h" ++#include "cfgcleanup.h" ++#include "tm_p.h" ++#include "tm-constrs.h" ++#include "optabs.h" /* For GEN_FCN. */ ++#include "target.h" ++#include "langhooks.h" /* For add_builtin_function(). */ ++#include "builtins.h" ++#include "cpplib.h" ++#include "params.h" ++#include "tree-pass.h" ++#include "target-globals.h" ++#include "nds32-load-store-opt.h" ++#include "nds32-reg-utils.h" ++#include <set> ++ ++#define NDS32_GPR_NUM 32 ++ ++static new_base_reg_info_t gen_new_base (rtx, ++ offset_info_t, ++ unsigned, ++ HOST_WIDE_INT, ++ HOST_WIDE_INT); ++ ++static const load_store_optimize_pass *load_store_optimizes[] = ++{ ++ /* allow_regclass, new_base_regclass, ++ offset_lower_bound, offset_upper_bound, ++ load_only_p, name */ ++ new load_store_optimize_pass ( ++ LOW_REGS, LOW_REGS, ++ 0, (32-4), ++ false, "lswi333"), ++ new load_store_optimize_pass ( ++ LOW_REGS, FRAME_POINTER_REG, ++ 0, (512-4), ++ false, "lswi37"), ++ new load_store_optimize_pass ( ++ MIDDLE_REGS, GENERAL_REGS, ++ 0, 0, ++ false, "lswi450"), ++ new load_store_optimize_pass ( ++ MIDDLE_REGS, R8_REG, ++ -128, -4, ++ true, "lwi45fe") ++}; ++ ++static const int N_LOAD_STORE_OPT_TYPE = sizeof (load_store_optimizes) ++ / sizeof (load_store_optimize_pass*); ++ ++load_store_optimize_pass ++::load_store_optimize_pass (enum reg_class allow_regclass, ++ enum reg_class new_base_regclass, ++ HOST_WIDE_INT offset_lower_bound, ++ HOST_WIDE_INT offset_upper_bound, ++ bool load_only_p, ++ const char *name) ++ : m_allow_regclass (allow_regclass), ++ m_new_base_regclass (new_base_regclass), ++ m_offset_lower_bound (offset_lower_bound), ++ m_offset_upper_bound (offset_upper_bound), ++ m_load_only_p (load_only_p), ++ m_name (name) ++{ ++ gcc_assert (offset_lower_bound <= offset_upper_bound); ++} ++ ++int ++load_store_optimize_pass::calc_gain (HARD_REG_SET *available_regset, ++ offset_info_t offset_info, ++ load_store_infos_t *load_store_info) const ++{ ++ int extra_cost = 0; ++ int gain = 0; ++ unsigned i; ++ unsigned chain_size; ++ unsigned new_base_regnum; ++ HOST_WIDE_INT allow_range = m_offset_upper_bound - m_offset_lower_bound; ++ new_base_regnum = find_available_reg (available_regset, m_new_base_regclass); ++ chain_size = load_store_info->length (); ++ ++ if (new_base_regnum == INVALID_REGNUM) ++ { ++ if (dump_file) ++ fprintf (dump_file, ++ "%s have no avariable register, so give up try %s\n", ++ reg_class_names[m_new_base_regclass], ++ m_name); ++ return 0; ++ } ++ else if (dump_file) ++ fprintf (dump_file, ++ "%s is avariable, get %s, try %s, chain size = %u\n", ++ reg_class_names[m_new_base_regclass], ++ reg_names[new_base_regnum], ++ m_name, ++ chain_size); ++ ++ HOST_WIDE_INT range = offset_info.max_offset - offset_info.min_offset; ++ ++ if (range > allow_range) ++ { ++ /* TODO: We can perform load-store opt for only part of load store. */ ++ if (dump_file) ++ fprintf (dump_file, ++ "range is too large for %s" ++ " (range = " HOST_WIDE_INT_PRINT_DEC ", " ++ "allow_range = " HOST_WIDE_INT_PRINT_DEC ")\n", ++ m_name, range, allow_range); ++ return 0; ++ } ++ ++ if (offset_info.min_offset >= m_offset_lower_bound ++ && offset_info.max_offset <= m_offset_upper_bound) ++ { ++ /* mov55. */ ++ extra_cost = 2; ++ } ++ else ++ { ++ if (satisfies_constraint_Is15 (GEN_INT (offset_info.min_offset ++ - m_offset_lower_bound))) ++ { ++ /* add. */ ++ extra_cost = 4; ++ } ++ else ++ { ++ /* TODO: Try m_offset_upper_bound instead of m_offset_lower_bound ++ again. */ ++ /* add45 + movi. */ ++ if (satisfies_constraint_Is20 (GEN_INT (offset_info.min_offset ++ - m_offset_lower_bound))) ++ extra_cost = 6; ++ else ++ return -1; /* Give up if this constant is too large. */ ++ } ++ } ++ ++ for (i = 0; i < chain_size; ++i) ++ { ++ if (m_load_only_p && !(*load_store_info)[i].load_p) ++ continue; ++ ++ if (in_reg_class_p ((*load_store_info)[i].reg, m_allow_regclass)) ++ gain += 2; ++ } ++ ++ if (dump_file) ++ fprintf (dump_file, ++ "%s: gain = %d extra_cost = %d\n", ++ m_name, gain, extra_cost); ++ ++ return gain - extra_cost; ++} ++ ++ ++void ++load_store_optimize_pass::do_optimize ( ++ HARD_REG_SET *available_regset, ++ offset_info_t offset_info, ++ load_store_infos_t *load_store_info) const ++{ ++ new_base_reg_info_t new_base_reg_info; ++ rtx load_store_insn; ++ unsigned new_base_regnum; ++ ++ new_base_regnum = find_available_reg (available_regset, m_new_base_regclass); ++ gcc_assert (new_base_regnum != INVALID_REGNUM); ++ ++ new_base_reg_info = ++ gen_new_base ((*load_store_info)[0].base_reg, ++ offset_info, ++ new_base_regnum, ++ m_offset_lower_bound, m_offset_upper_bound); ++ unsigned i; ++ rtx insn; ++ insn = emit_insn_before (new_base_reg_info.set_insns[0], ++ (*load_store_info)[0].insn); ++ if (new_base_reg_info.n_set_insns > 1) ++ { ++ gcc_assert (new_base_reg_info.n_set_insns == 2); ++ emit_insn_before (new_base_reg_info.set_insns[1], insn); ++ } ++ ++ for (i = 0; i < load_store_info->length (); ++i) ++ { ++ if (m_load_only_p && !(*load_store_info)[i].load_p) ++ continue; ++ ++ if (!in_reg_class_p ((*load_store_info)[i].reg, m_allow_regclass)) ++ continue; ++ ++ HOST_WIDE_INT offset = (*load_store_info)[i].offset; ++ ++ if (new_base_reg_info.need_adjust_offset_p) ++ offset = offset + new_base_reg_info.adjust_offset; ++ ++ load_store_insn = ++ gen_reg_plus_imm_load_store ((*load_store_info)[i].reg, ++ new_base_reg_info.reg, ++ offset, ++ (*load_store_info)[i].load_p, ++ (*load_store_info)[i].mem); ++ ++ emit_insn_before (load_store_insn, (*load_store_info)[i].insn); ++ ++ delete_insn ((*load_store_info)[i].insn); ++ } ++ ++ /* Recompute it CFG, to update BB_END() instruction. */ ++ compute_bb_for_insn (); ++} ++ ++static new_base_reg_info_t ++gen_new_base (rtx original_base_reg, ++ offset_info_t offset_info, ++ unsigned new_base_regno, ++ HOST_WIDE_INT offset_lower, ++ HOST_WIDE_INT offset_upper) ++{ ++ new_base_reg_info_t new_base_reg_info; ++ ++ /* Use gen_raw_REG instead of gen_rtx_REG to prevent break the reg ++ info for global one. ++ For example, gen_rtx_REG will return frame_pointer_rtx immediate ++ instead of create new rtx for gen_raw_REG (Pmode, FP_REGNUM). */ ++ new_base_reg_info.reg = gen_raw_REG (Pmode, new_base_regno); ++ ++ /* Setup register info. */ ++ ORIGINAL_REGNO (new_base_reg_info.reg) = ORIGINAL_REGNO (original_base_reg); ++ REG_ATTRS (new_base_reg_info.reg) = REG_ATTRS (original_base_reg); ++ ++ if (offset_info.max_offset <= offset_upper ++ && offset_info.min_offset >= offset_lower) ++ { ++ new_base_reg_info.set_insns[0] = gen_movsi (new_base_reg_info.reg, ++ original_base_reg); ++ new_base_reg_info.n_set_insns = 1; ++ new_base_reg_info.need_adjust_offset_p = false; ++ new_base_reg_info.adjust_offset = 0; ++ } ++ else ++ { ++ /* For example: ++ lwi45.fe allow -4 ~ -128 range: ++ offset_lower = #-4 ++ offset_upper = #-128 ++ ++ lwi $r2, [$r12 + #10] ++ -> ++ addi $r8, $r12, #14 ! $r8 = $r12 + #10 - offset_lower ++ ! = $r12 + #10 - #-4 ++ ! = $r12 + #14 ++ lwi45.fe $r2, [$r8 - #4] ! [$r8 - #4] ++ ! = [$r12 + #14 - #4] ++ ! = [$r12 + #10] ++ */ ++ new_base_reg_info.adjust_offset = ++ -(offset_info.min_offset - offset_lower); ++ ++ rtx offset = GEN_INT (-new_base_reg_info.adjust_offset); ++ ++ ++ if (satisfies_constraint_Is15 (offset)) ++ { ++ new_base_reg_info.set_insns[0] = ++ gen_addsi3(new_base_reg_info.reg, ++ original_base_reg, ++ offset); ++ ++ new_base_reg_info.n_set_insns = 1; ++ } ++ else ++ { ++ if (!satisfies_constraint_Is20 (offset)) ++ gcc_unreachable (); ++ ++ new_base_reg_info.set_insns[1] = ++ gen_rtx_SET (new_base_reg_info.reg, ++ GEN_INT (-new_base_reg_info.adjust_offset)); ++ ++ new_base_reg_info.set_insns[0] = ++ gen_addsi3 (new_base_reg_info.reg, ++ new_base_reg_info.reg, ++ original_base_reg); ++ ++ new_base_reg_info.n_set_insns = 2; ++ } ++ ++ new_base_reg_info.need_adjust_offset_p = true; ++ } ++ ++ return new_base_reg_info; ++} ++ ++static bool ++nds32_4byte_load_store_reg_plus_offset ( ++ rtx_insn *insn, ++ load_store_info_t *load_store_info) ++{ ++ if (!INSN_P (insn)) ++ return false; ++ ++ rtx pattern = PATTERN (insn); ++ rtx mem = NULL_RTX; ++ rtx reg = NULL_RTX; ++ rtx base_reg = NULL_RTX; ++ rtx addr; ++ HOST_WIDE_INT offset = 0; ++ bool load_p = false; ++ ++ if (GET_CODE (pattern) != SET) ++ return false; ++ ++ if (MEM_P (SET_SRC (pattern))) ++ { ++ mem = SET_SRC (pattern); ++ reg = SET_DEST (pattern); ++ load_p = true; ++ } ++ ++ if (MEM_P (SET_DEST (pattern))) ++ { ++ mem = SET_DEST (pattern); ++ reg = SET_SRC (pattern); ++ load_p = false; ++ } ++ ++ if (mem == NULL_RTX || reg == NULL_RTX || !REG_P (reg)) ++ return false; ++ ++ gcc_assert (REG_P (reg)); ++ ++ addr = XEXP (mem, 0); ++ ++ /* We only care about [reg] and [reg+const]. */ ++ if (REG_P (addr)) ++ { ++ base_reg = addr; ++ offset = 0; ++ } ++ else if (GET_CODE (addr) == PLUS ++ && CONST_INT_P (XEXP (addr, 1))) ++ { ++ base_reg = XEXP (addr, 0); ++ offset = INTVAL (XEXP (addr, 1)); ++ if (!REG_P (base_reg)) ++ return false; ++ } ++ else ++ return false; ++ ++ /* At least need MIDDLE_REGS. */ ++ if (!in_reg_class_p (reg, MIDDLE_REGS)) ++ return false; ++ ++ /* lwi450/swi450 */ ++ if (offset == 0) ++ return false; ++ ++ if (in_reg_class_p (reg, LOW_REGS)) ++ { ++ /* lwi37.sp/swi37.sp/lwi37/swi37 */ ++ if ((REGNO (base_reg) == SP_REGNUM ++ || REGNO (base_reg) == FP_REGNUM) ++ && (offset >= 0 && offset < 512 && (offset % 4 == 0))) ++ return false; ++ ++ /* lwi333/swi333 */ ++ if (in_reg_class_p (base_reg, LOW_REGS) ++ && (offset >= 0 && offset < 32 && (offset % 4 == 0))) ++ return false; ++ } ++ ++ if (load_store_info) ++ { ++ load_store_info->load_p = load_p; ++ load_store_info->offset = offset; ++ load_store_info->reg = reg; ++ load_store_info->base_reg = base_reg; ++ load_store_info->insn = insn; ++ load_store_info->mem = mem; ++ } ++ ++ if (GET_MODE (reg) != SImode) ++ return false; ++ ++ return true; ++} ++ ++static bool ++nds32_4byte_load_store_reg_plus_offset_p (rtx_insn *insn) ++{ ++ return nds32_4byte_load_store_reg_plus_offset (insn, NULL); ++} ++ ++static bool ++nds32_load_store_opt_profitable_p (basic_block bb) ++{ ++ int candidate = 0; ++ int threshold = 2; ++ rtx_insn *insn; ++ ++ if (dump_file) ++ fprintf (dump_file, "scan bb %d\n", bb->index); ++ ++ FOR_BB_INSNS (bb, insn) ++ { ++ if (nds32_4byte_load_store_reg_plus_offset_p (insn)) ++ candidate++; ++ } ++ ++ if (dump_file) ++ fprintf (dump_file, " candidate = %d\n", candidate); ++ ++ return candidate >= threshold; ++} ++ ++static offset_info_t ++nds32_get_offset_info (auto_vec<load_store_info_t, 64> *load_store_info) ++{ ++ unsigned i; ++ std::set<HOST_WIDE_INT> offsets; ++ offset_info_t offset_info; ++ offset_info.max_offset = 0; ++ offset_info.min_offset = 0; ++ offset_info.num_offset = 0; ++ ++ if (load_store_info->length () == 0) ++ return offset_info; ++ ++ offset_info.max_offset = (*load_store_info)[0].offset; ++ offset_info.min_offset = (*load_store_info)[0].offset; ++ offsets.insert ((*load_store_info)[0].offset); ++ ++ for (i = 1; i < load_store_info->length (); i++) ++ { ++ HOST_WIDE_INT offset = (*load_store_info)[i].offset; ++ offset_info.max_offset = MAX (offset_info.max_offset, offset); ++ offset_info.min_offset = MIN (offset_info.min_offset, offset); ++ offsets.insert (offset); ++ } ++ ++ offset_info.num_offset = offsets.size (); ++ ++ return offset_info; ++} ++ ++static void ++nds32_do_load_store_opt (basic_block bb) ++{ ++ rtx_insn *insn; ++ load_store_info_t load_store_info; ++ auto_vec<load_store_info_t, 64> load_store_infos[NDS32_GPR_NUM]; ++ HARD_REG_SET available_regset; ++ int i; ++ unsigned j; ++ unsigned regno; ++ unsigned polluting; ++ df_ref def; ++ /* Dirty mean a register is define again after ++ first load/store instruction. ++ For example: ++ ++ lwi $r2, [$r3 + #0x100] ++ mov $r3, $r4 ! $r3 is dirty after this instruction. ++ lwi $r1, [$r3 + #0x120] ! so this load can't chain with prev load. ++ */ ++ bool dirty[NDS32_GPR_NUM]; ++ ++ if (dump_file) ++ fprintf (dump_file, "try load store opt for bb %d\n", bb->index); ++ ++ for (i = 0; i < NDS32_GPR_NUM; ++i) ++ dirty[i] = false; ++ ++ FOR_BB_INSNS (bb, insn) ++ { ++ if (!INSN_P (insn)) ++ continue; ++ ++ polluting = INVALID_REGNUM; ++ ++ /* Set def reg is dirty if chain is not empty. */ ++ FOR_EACH_INSN_DEF (def, insn) ++ { ++ regno = DF_REF_REGNO (def); ++ ++ if (!NDS32_IS_GPR_REGNUM (regno)) ++ continue; ++ ++ if (!load_store_infos[regno].is_empty ()) ++ { ++ /* Set pulluting here because the source register ++ may be the same one. */ ++ if (dirty[regno] == false) ++ polluting = regno; ++ ++ dirty[regno] = true; ++ } ++ } ++ ++ /* Set all caller-save register is dirty if chain is not empty. */ ++ if (CALL_P (insn)) ++ { ++ for (i = 0; i < NDS32_GPR_NUM; ++i) ++ { ++ if (call_used_regs[i] && !load_store_infos[i].is_empty ()) ++ dirty[i] = true; ++ } ++ } ++ ++ if (nds32_4byte_load_store_reg_plus_offset (insn, &load_store_info)) ++ { ++ regno = REGNO (load_store_info.base_reg); ++ gcc_assert (NDS32_IS_GPR_REGNUM (regno)); ++ ++ /* Don't add to chain if this reg is dirty. */ ++ if (dirty[regno] && polluting != regno) ++ break; ++ ++ /* If the register is first time to be used and be polluted ++ right away, we don't push it. */ ++ if (regno == REGNO (load_store_info.reg) && load_store_info.load_p ++ && dirty[regno] == false) ++ continue; ++ ++ load_store_infos[regno].safe_push (load_store_info); ++ } ++ } ++ for (i = 0; i < NDS32_GPR_NUM; ++i) ++ { ++ if (load_store_infos[i].length () <= 1) ++ { ++ if (dump_file && load_store_infos[i].length () == 1) ++ fprintf (dump_file, ++ "Skip Chain for $r%d since chain size only 1\n", ++ i); ++ continue; ++ } ++ ++ if (dump_file) ++ { ++ fprintf (dump_file, ++ "Chain for $r%d: (size = %u)\n", ++ i, load_store_infos[i].length ()); ++ ++ for (j = 0; j < load_store_infos[i].length (); ++j) ++ { ++ fprintf (dump_file, ++ "regno = %d base_regno = %d " ++ "offset = " HOST_WIDE_INT_PRINT_DEC " " ++ "load_p = %d UID = %u\n", ++ REGNO (load_store_infos[i][j].reg), ++ REGNO (load_store_infos[i][j].base_reg), ++ load_store_infos[i][j].offset, ++ load_store_infos[i][j].load_p, ++ INSN_UID (load_store_infos[i][j].insn)); ++ } ++ } ++ ++ nds32_get_available_reg_set (bb, ++ load_store_infos[i][0].insn, ++ load_store_infos[i].last ().insn, ++ &available_regset); ++ ++ if (dump_file) ++ { ++ print_hard_reg_set (dump_file, "", available_regset); ++ } ++ ++ offset_info_t offset_info = nds32_get_offset_info (&load_store_infos[i]); ++ if (dump_file) ++ { ++ fprintf (dump_file, ++ "max offset = " HOST_WIDE_INT_PRINT_DEC "\n" ++ "min offset = " HOST_WIDE_INT_PRINT_DEC "\n" ++ "num offset = %d\n", ++ offset_info.max_offset, ++ offset_info.min_offset, ++ offset_info.num_offset); ++ } ++ ++ int gain; ++ int best_gain = 0; ++ const load_store_optimize_pass *best_load_store_optimize_pass = NULL; ++ ++ for (j = 0; j < N_LOAD_STORE_OPT_TYPE; ++j) ++ { ++ gain = load_store_optimizes[j]->calc_gain (&available_regset, ++ offset_info, ++ &load_store_infos[i]); ++ ++ if (dump_file) ++ fprintf (dump_file, "%s gain = %d\n", ++ load_store_optimizes[j]->name (), gain); ++ ++ if (gain > best_gain) ++ { ++ best_gain = gain; ++ best_load_store_optimize_pass = load_store_optimizes[j]; ++ } ++ } ++ ++ if (best_load_store_optimize_pass) ++ { ++ if (dump_file) ++ fprintf (dump_file, "%s is most profit, optimize it!\n", ++ best_load_store_optimize_pass->name ()); ++ ++ best_load_store_optimize_pass->do_optimize (&available_regset, ++ offset_info, ++ &load_store_infos[i]); ++ ++ df_insn_rescan_all (); ++ } ++ ++ } ++} ++ ++static unsigned int ++nds32_load_store_opt (void) ++{ ++ basic_block bb; ++ ++ df_set_flags (DF_LR_RUN_DCE); ++ df_note_add_problem (); ++ df_analyze (); ++ ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ if (nds32_load_store_opt_profitable_p (bb)) ++ nds32_do_load_store_opt (bb); ++ } ++ ++ return 1; ++} ++ ++const pass_data pass_data_nds32_load_store_opt = ++{ ++ RTL_PASS, /* type */ ++ "load_store_opt", /* name */ ++ OPTGROUP_NONE, /* optinfo_flags */ ++ TV_MACH_DEP, /* tv_id */ ++ 0, /* properties_required */ ++ 0, /* properties_provided */ ++ 0, /* properties_destroyed */ ++ 0, /* todo_flags_start */ ++ TODO_df_finish, /* todo_flags_finish */ ++}; ++ ++class pass_nds32_load_store_opt : public rtl_opt_pass ++{ ++public: ++ pass_nds32_load_store_opt (gcc::context *ctxt) ++ : rtl_opt_pass (pass_data_nds32_load_store_opt, ctxt) ++ {} ++ ++ /* opt_pass methods: */ ++ bool gate (function *) { return TARGET_16_BIT && TARGET_LOAD_STORE_OPT; } ++ unsigned int execute (function *) { return nds32_load_store_opt (); } ++}; ++ ++rtl_opt_pass * ++make_pass_nds32_load_store_opt (gcc::context *ctxt) ++{ ++ return new pass_nds32_load_store_opt (ctxt); ++} +diff --git a/gcc/config/nds32/nds32-load-store-opt.h b/gcc/config/nds32/nds32-load-store-opt.h +new file mode 100644 +index 0000000..f94b56a +--- /dev/null ++++ b/gcc/config/nds32/nds32-load-store-opt.h +@@ -0,0 +1,117 @@ ++/* Prototypes for load-store-opt of Andes NDS32 cpu for GNU compiler ++ Copyright (C) 2012-2016 Free Software Foundation, Inc. ++ Contributed by Andes Technology Corporation. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published ++ by the Free Software Foundation; either version 3, or (at your ++ option) any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ <http://www.gnu.org/licenses/>. */ ++ ++#ifndef NDS32_LOAD_STORE_OPT_H ++#define NDS32_LOAD_STORE_OPT_H ++ ++/* Define the type of a set of hard registers. */ ++ ++enum nds32_memory_post_type ++{ ++ NDS32_NONE, ++ NDS32_POST_INC, ++ NDS32_POST_DEC ++}; ++ ++typedef struct { ++ rtx reg; ++ rtx base_reg; ++ rtx offset; ++ HOST_WIDE_INT shift; ++ bool load_p; ++ rtx insn; ++} rr_load_store_info_t; ++ ++typedef struct { ++ rtx reg; ++ rtx base_reg; ++ HOST_WIDE_INT offset; ++ bool load_p; ++ rtx_insn *insn; ++ rtx mem; ++ int new_reg; ++ int order; ++ int group; ++ bool place; ++ enum nds32_memory_post_type post_type; ++} load_store_info_t; ++ ++typedef struct { ++ HOST_WIDE_INT max_offset; ++ HOST_WIDE_INT min_offset; ++ /* How many different offset. */ ++ int num_offset; ++} offset_info_t; ++ ++typedef struct { ++ rtx set_insns[2]; ++ int n_set_insns; ++ rtx reg; ++ bool need_adjust_offset_p; ++ HOST_WIDE_INT adjust_offset; ++} new_base_reg_info_t; ++ ++typedef struct { ++ unsigned int amount; ++ unsigned int start; ++ unsigned int end; ++} available_reg_info_t; ++ ++typedef auto_vec<load_store_info_t, 64> load_store_infos_t; ++ ++class load_store_optimize_pass ++{ ++public: ++ load_store_optimize_pass (enum reg_class, ++ enum reg_class, ++ HOST_WIDE_INT, ++ HOST_WIDE_INT, ++ bool, ++ const char *); ++ const char *name () const { return m_name; }; ++ int calc_gain (HARD_REG_SET *, ++ offset_info_t, ++ load_store_infos_t *) const; ++ void do_optimize (HARD_REG_SET *, ++ offset_info_t, ++ load_store_infos_t *) const; ++private: ++ enum reg_class m_allow_regclass; ++ enum reg_class m_new_base_regclass; ++ HOST_WIDE_INT m_offset_lower_bound; ++ HOST_WIDE_INT m_offset_upper_bound; ++ bool m_load_only_p; ++ const char *m_name; ++}; ++ ++static inline rtx ++gen_reg_plus_imm_load_store (rtx reg, rtx base_reg, ++ HOST_WIDE_INT offset, bool load_p, rtx oldmem) ++{ ++ rtx addr = plus_constant(Pmode, base_reg, offset); ++ rtx mem = gen_rtx_MEM (SImode, addr); ++ MEM_COPY_ATTRIBUTES (mem, oldmem); ++ if (load_p) ++ return gen_movsi (reg, mem); ++ else ++ return gen_movsi (mem, reg); ++} ++ ++#endif /* ! NDS32_LOAD_STORE_OPT_H */ +diff --git a/gcc/config/nds32/nds32-md-auxiliary.c b/gcc/config/nds32/nds32-md-auxiliary.c +index def8eda..3881df7 100644 +--- a/gcc/config/nds32/nds32-md-auxiliary.c ++++ b/gcc/config/nds32/nds32-md-auxiliary.c +@@ -25,17 +25,74 @@ + #include "system.h" + #include "coretypes.h" + #include "backend.h" +-#include "target.h" +-#include "rtl.h" + #include "tree.h" +-#include "tm_p.h" +-#include "optabs.h" /* For GEN_FCN. */ +-#include "recog.h" ++#include "rtl.h" ++#include "df.h" ++#include "alias.h" ++#include "stor-layout.h" ++#include "varasm.h" ++#include "calls.h" ++#include "regs.h" ++#include "insn-config.h" /* Required by recog.h. */ ++#include "conditions.h" + #include "output.h" ++#include "insn-attr.h" /* For DFA state_t. */ ++#include "insn-codes.h" /* For CODE_FOR_xxx. */ ++#include "reload.h" /* For push_reload(). */ ++#include "flags.h" ++#include "insn-config.h" ++#include "expmed.h" ++#include "dojump.h" ++#include "explow.h" ++#include "emit-rtl.h" ++#include "stmt.h" ++#include "expr.h" ++#include "recog.h" ++#include "diagnostic-core.h" ++#include "cfgrtl.h" ++#include "cfganal.h" ++#include "lcm.h" ++#include "cfgbuild.h" ++#include "cfgcleanup.h" ++#include "tm_p.h" + #include "tm-constrs.h" ++#include "optabs.h" /* For GEN_FCN. */ ++#include "target.h" ++#include "langhooks.h" /* For add_builtin_function(). */ ++#include "builtins.h" ++ ++/* ------------------------------------------------------------------------ */ ++ ++/* This file is divided into three parts: ++ ++ PART 1: Auxiliary static function definitions. ++ ++ PART 2: Auxiliary function for expand RTL pattern. ++ ++ PART 3: Auxiliary function for output asm template. */ + + /* ------------------------------------------------------------------------ */ + ++/* PART 1: Auxiliary static function definitions. */ ++ ++static int ++nds32_regno_to_enable4 (unsigned regno) ++{ ++ switch (regno) ++ { ++ case 28: /* $r28/fp */ ++ return 0x8; ++ case 29: /* $r29/gp */ ++ return 0x4; ++ case 30: /* $r30/lp */ ++ return 0x2; ++ case 31: /* $r31/sp */ ++ return 0x1; ++ default: ++ gcc_unreachable (); ++ } ++} ++ + /* A helper function to return character based on byte size. */ + static char + nds32_byte_to_size (int byte) +@@ -54,796 +111,3825 @@ nds32_byte_to_size (int byte) + } + } + +-/* A helper function to return memory format. */ +-enum nds32_16bit_address_type +-nds32_mem_format (rtx op) ++static int ++nds32_inverse_cond_code (int code) + { +- machine_mode mode_test; +- int val; +- int regno; +- +- if (!TARGET_16_BIT) +- return ADDRESS_NOT_16BIT_FORMAT; +- +- mode_test = GET_MODE (op); +- +- op = XEXP (op, 0); ++ switch (code) ++ { ++ case NE: ++ return EQ; ++ case EQ: ++ return NE; ++ case GT: ++ return LE; ++ case LE: ++ return GT; ++ case GE: ++ return LT; ++ case LT: ++ return GE; ++ default: ++ gcc_unreachable (); ++ } ++} + +- /* 45 format. */ +- if (GET_CODE (op) == REG && (mode_test == SImode)) +- return ADDRESS_REG; ++static const char * ++nds32_cond_code_str (int code) ++{ ++ switch (code) ++ { ++ case NE: ++ return "ne"; ++ case EQ: ++ return "eq"; ++ case GT: ++ return "gt"; ++ case LE: ++ return "le"; ++ case GE: ++ return "ge"; ++ case LT: ++ return "lt"; ++ default: ++ gcc_unreachable (); ++ } ++} + +- /* 333 format for QI/HImode. */ +- if (GET_CODE (op) == REG && (REGNO (op) < R8_REGNUM)) +- return ADDRESS_LO_REG_IMM3U; ++static void ++output_cond_branch (int code, const char *suffix, bool r5_p, ++ bool long_jump_p, rtx *operands) ++{ ++ char pattern[256]; ++ const char *cond_code; ++ bool align_p = NDS32_ALIGN_P (); ++ const char *align = align_p ? "\t.align\t2\n" : ""; + +- /* post_inc 333 format. */ +- if ((GET_CODE (op) == POST_INC) && (mode_test == SImode)) ++ if (r5_p && REGNO (operands[2]) == 5 && TARGET_16_BIT) + { +- regno = REGNO(XEXP (op, 0)); +- +- if (regno < 8) +- return ADDRESS_POST_INC_LO_REG_IMM3U; ++ /* This is special case for beqs38 and bnes38, ++ second operand 2 can't be $r5 and it's almost meanless, ++ however it may occur after copy propgation. */ ++ if (code == EQ) ++ { ++ /* $r5 == $r5 always taken! */ ++ if (long_jump_p) ++ snprintf (pattern, sizeof (pattern), ++ "j\t%%3"); ++ else ++ snprintf (pattern, sizeof (pattern), ++ "j8\t%%3"); ++ } ++ else ++ /* Don't output anything since $r5 != $r5 never taken! */ ++ pattern[0] = '\0'; + } +- +- /* post_inc 333 format. */ +- if ((GET_CODE (op) == POST_MODIFY) +- && (mode_test == SImode) +- && (REG_P (XEXP (XEXP (op, 1), 0))) +- && (CONST_INT_P (XEXP (XEXP (op, 1), 1)))) ++ else if (long_jump_p) + { +- regno = REGNO (XEXP (XEXP (op, 1), 0)); +- val = INTVAL (XEXP (XEXP (op, 1), 1)); +- if (regno < 8 && val < 32) +- return ADDRESS_POST_INC_LO_REG_IMM3U; ++ int inverse_code = nds32_inverse_cond_code (code); ++ cond_code = nds32_cond_code_str (inverse_code); ++ ++ /* b<cond><suffix> $r0, $r1, .L0 ++ => ++ b<inverse_cond><suffix> $r0, $r1, .LCB0 ++ j .L0 ++ .LCB0: ++ ++ or ++ ++ b<cond><suffix> $r0, $r1, .L0 ++ => ++ b<inverse_cond><suffix> $r0, $r1, .LCB0 ++ j .L0 ++ .LCB0: ++ */ ++ if (r5_p && TARGET_16_BIT) ++ { ++ snprintf (pattern, sizeof (pattern), ++ "b%ss38\t %%2, .LCB%%=\n\tj\t%%3\n%s.LCB%%=:", ++ cond_code, align); ++ } ++ else ++ { ++ snprintf (pattern, sizeof (pattern), ++ "b%s%s\t%%1, %%2, .LCB%%=\n\tj\t%%3\n%s.LCB%%=:", ++ cond_code, suffix, align); ++ } + } +- +- if ((GET_CODE (op) == PLUS) +- && (GET_CODE (XEXP (op, 0)) == REG) +- && (GET_CODE (XEXP (op, 1)) == CONST_INT)) ++ else + { +- val = INTVAL (XEXP (op, 1)); +- +- regno = REGNO(XEXP (op, 0)); +- +- if (regno > 7 +- && regno != SP_REGNUM +- && regno != FP_REGNUM) +- return ADDRESS_NOT_16BIT_FORMAT; +- +- switch (mode_test) ++ cond_code = nds32_cond_code_str (code); ++ if (r5_p && TARGET_16_BIT) + { +- case QImode: +- /* 333 format. */ +- if (val >= 0 && val < 8 && regno < 8) +- return ADDRESS_LO_REG_IMM3U; +- break; +- +- case HImode: +- /* 333 format. */ +- if (val >= 0 && val < 16 && (val % 2 == 0) && regno < 8) +- return ADDRESS_LO_REG_IMM3U; +- break; +- +- case SImode: +- case SFmode: +- case DFmode: +- /* fp imply 37 format. */ +- if ((regno == FP_REGNUM) && +- (val >= 0 && val < 512 && (val % 4 == 0))) +- return ADDRESS_FP_IMM7U; +- /* sp imply 37 format. */ +- else if ((regno == SP_REGNUM) && +- (val >= 0 && val < 512 && (val % 4 == 0))) +- return ADDRESS_SP_IMM7U; +- /* 333 format. */ +- else if (val >= 0 && val < 32 && (val % 4 == 0) && regno < 8) +- return ADDRESS_LO_REG_IMM3U; +- break; +- +- default: +- break; ++ /* b<cond>s38 $r1, .L0 */ ++ snprintf (pattern, sizeof (pattern), ++ "b%ss38\t %%2, %%3", cond_code); ++ } ++ else ++ { ++ /* b<cond><suffix> $r0, $r1, .L0 */ ++ snprintf (pattern, sizeof (pattern), ++ "b%s%s\t%%1, %%2, %%3", cond_code, suffix); + } + } + +- return ADDRESS_NOT_16BIT_FORMAT; ++ output_asm_insn (pattern, operands); + } + +-/* Output 16-bit store. */ +-const char * +-nds32_output_16bit_store (rtx *operands, int byte) ++static void ++output_cond_branch_compare_zero (int code, const char *suffix, ++ bool long_jump_p, rtx *operands, ++ bool ta_implied_p) + { +- char pattern[100]; +- char size; +- rtx code = XEXP (operands[0], 0); +- +- size = nds32_byte_to_size (byte); ++ char pattern[256]; ++ const char *cond_code; ++ bool align_p = NDS32_ALIGN_P (); ++ const char *align = align_p ? "\t.align\t2\n" : ""; ++ if (long_jump_p) ++ { ++ int inverse_code = nds32_inverse_cond_code (code); ++ cond_code = nds32_cond_code_str (inverse_code); + +- switch (nds32_mem_format (operands[0])) ++ if (ta_implied_p && TARGET_16_BIT) ++ { ++ /* b<cond>z<suffix> .L0 ++ => ++ b<inverse_cond>z<suffix> .LCB0 ++ j .L0 ++ .LCB0: ++ */ ++ snprintf (pattern, sizeof (pattern), ++ "b%sz%s\t.LCB%%=\n\tj\t%%2\n%s.LCB%%=:", ++ cond_code, suffix, align); ++ } ++ else ++ { ++ /* b<cond>z<suffix> $r0, .L0 ++ => ++ b<inverse_cond>z<suffix> $r0, .LCB0 ++ j .L0 ++ .LCB0: ++ */ ++ snprintf (pattern, sizeof (pattern), ++ "b%sz%s\t%%1, .LCB%%=\n\tj\t%%2\n%s.LCB%%=:", ++ cond_code, suffix, align); ++ } ++ } ++ else + { +- case ADDRESS_REG: +- operands[0] = code; +- output_asm_insn ("swi450\t%1, [%0]", operands); +- break; +- case ADDRESS_LO_REG_IMM3U: +- snprintf (pattern, sizeof (pattern), "s%ci333\t%%1, %%0", size); +- output_asm_insn (pattern, operands); +- break; +- case ADDRESS_POST_INC_LO_REG_IMM3U: +- snprintf (pattern, sizeof (pattern), "s%ci333.bi\t%%1, %%0", size); +- output_asm_insn (pattern, operands); +- break; +- case ADDRESS_FP_IMM7U: +- output_asm_insn ("swi37\t%1, %0", operands); +- break; +- case ADDRESS_SP_IMM7U: +- /* Get immediate value and set back to operands[1]. */ +- operands[0] = XEXP (code, 1); +- output_asm_insn ("swi37.sp\t%1, [ + (%0)]", operands); +- break; +- default: +- break; ++ cond_code = nds32_cond_code_str (code); ++ if (ta_implied_p && TARGET_16_BIT) ++ { ++ /* b<cond>z<suffix> .L0 */ ++ snprintf (pattern, sizeof (pattern), ++ "b%sz%s\t%%2", cond_code, suffix); ++ } ++ else ++ { ++ /* b<cond>z<suffix> $r0, .L0 */ ++ snprintf (pattern, sizeof (pattern), ++ "b%sz%s\t%%1, %%2", cond_code, suffix); ++ } + } + +- return ""; ++ output_asm_insn (pattern, operands); + } + +-/* Output 16-bit load. */ +-const char * +-nds32_output_16bit_load (rtx *operands, int byte) ++static void ++nds32_split_shiftrtdi3 (rtx dst, rtx src, rtx shiftamount, bool logic_shift_p) + { +- char pattern[100]; +- unsigned char size; +- rtx code = XEXP (operands[1], 0); ++ rtx src_high_part; ++ rtx dst_high_part, dst_low_part; + +- size = nds32_byte_to_size (byte); ++ dst_high_part = nds32_di_high_part_subreg (dst); ++ src_high_part = nds32_di_high_part_subreg (src); ++ dst_low_part = nds32_di_low_part_subreg (dst); + +- switch (nds32_mem_format (operands[1])) ++ if (CONST_INT_P (shiftamount)) + { +- case ADDRESS_REG: +- operands[1] = code; +- output_asm_insn ("lwi450\t%0, [%1]", operands); +- break; +- case ADDRESS_LO_REG_IMM3U: +- snprintf (pattern, sizeof (pattern), "l%ci333\t%%0, %%1", size); +- output_asm_insn (pattern, operands); +- break; +- case ADDRESS_POST_INC_LO_REG_IMM3U: +- snprintf (pattern, sizeof (pattern), "l%ci333.bi\t%%0, %%1", size); +- output_asm_insn (pattern, operands); +- break; +- case ADDRESS_FP_IMM7U: +- output_asm_insn ("lwi37\t%0, %1", operands); +- break; +- case ADDRESS_SP_IMM7U: +- /* Get immediate value and set back to operands[0]. */ +- operands[1] = XEXP (code, 1); +- output_asm_insn ("lwi37.sp\t%0, [ + (%1)]", operands); +- break; +- default: +- break; ++ if (INTVAL (shiftamount) < 32) ++ { ++ if (logic_shift_p) ++ { ++ emit_insn (gen_uwext (dst_low_part, src, ++ shiftamount)); ++ emit_insn (gen_lshrsi3 (dst_high_part, src_high_part, ++ shiftamount)); ++ } ++ else ++ { ++ emit_insn (gen_wext (dst_low_part, src, ++ shiftamount)); ++ emit_insn (gen_ashrsi3 (dst_high_part, src_high_part, ++ shiftamount)); ++ } ++ } ++ else ++ { ++ rtx new_shift_amout = gen_int_mode(INTVAL (shiftamount) - 32, SImode); ++ ++ if (logic_shift_p) ++ { ++ emit_insn (gen_lshrsi3 (dst_low_part, src_high_part, ++ new_shift_amout)); ++ emit_move_insn (dst_high_part, const0_rtx); ++ } ++ else ++ { ++ emit_insn (gen_ashrsi3 (dst_low_part, src_high_part, ++ new_shift_amout)); ++ emit_insn (gen_ashrsi3 (dst_high_part, src_high_part, ++ GEN_INT (31))); ++ } ++ } + } ++ else ++ { ++ rtx dst_low_part_l32, dst_high_part_l32; ++ rtx dst_low_part_g32, dst_high_part_g32; ++ rtx new_shift_amout, select_reg; ++ dst_low_part_l32 = gen_reg_rtx (SImode); ++ dst_high_part_l32 = gen_reg_rtx (SImode); ++ dst_low_part_g32 = gen_reg_rtx (SImode); ++ dst_high_part_g32 = gen_reg_rtx (SImode); ++ new_shift_amout = gen_reg_rtx (SImode); ++ select_reg = gen_reg_rtx (SImode); ++ ++ emit_insn (gen_andsi3 (shiftamount, shiftamount, GEN_INT (0x3f))); ++ ++ if (logic_shift_p) ++ { ++ /* ++ if (shiftamount < 32) ++ dst_low_part = wext (src, shiftamount) ++ dst_high_part = src_high_part >> shiftamount ++ else ++ dst_low_part = src_high_part >> (shiftamount & 0x1f) ++ dst_high_part = 0 ++ */ ++ emit_insn (gen_uwext (dst_low_part_l32, src, shiftamount)); ++ emit_insn (gen_lshrsi3 (dst_high_part_l32, src_high_part, ++ shiftamount)); ++ ++ emit_insn (gen_andsi3 (new_shift_amout, shiftamount, GEN_INT (0x1f))); ++ emit_insn (gen_lshrsi3 (dst_low_part_g32, src_high_part, ++ new_shift_amout)); ++ emit_move_insn (dst_high_part_g32, const0_rtx); ++ } ++ else ++ { ++ /* ++ if (shiftamount < 32) ++ dst_low_part = wext (src, shiftamount) ++ dst_high_part = src_high_part >> shiftamount ++ else ++ dst_low_part = src_high_part >> (shiftamount & 0x1f) ++ # shift 31 for sign extend ++ dst_high_part = src_high_part >> 31 ++ */ ++ emit_insn (gen_wext (dst_low_part_l32, src, shiftamount)); ++ emit_insn (gen_ashrsi3 (dst_high_part_l32, src_high_part, ++ shiftamount)); ++ ++ emit_insn (gen_andsi3 (new_shift_amout, shiftamount, GEN_INT (0x1f))); ++ emit_insn (gen_ashrsi3 (dst_low_part_g32, src_high_part, ++ new_shift_amout)); ++ emit_insn (gen_ashrsi3 (dst_high_part_g32, src_high_part, ++ GEN_INT (31))); ++ } + +- return ""; ++ emit_insn (gen_slt_compare (select_reg, shiftamount, GEN_INT (32))); ++ ++ emit_insn (gen_cmovnsi (dst_low_part, select_reg, ++ dst_low_part_l32, dst_low_part_g32)); ++ emit_insn (gen_cmovnsi (dst_high_part, select_reg, ++ dst_high_part_l32, dst_high_part_g32)); ++ } + } + +-/* Output 32-bit store. */ +-const char * +-nds32_output_32bit_store (rtx *operands, int byte) +-{ +- char pattern[100]; +- unsigned char size; +- rtx code = XEXP (operands[0], 0); ++/* ------------------------------------------------------------------------ */ + +- size = nds32_byte_to_size (byte); ++/* PART 2: Auxiliary function for expand RTL pattern. */ + +- switch (GET_CODE (code)) ++enum nds32_expand_result_type ++nds32_expand_cbranch (rtx *operands) ++{ ++ rtx tmp_reg; ++ enum rtx_code code; ++ ++ code = GET_CODE (operands[0]); ++ ++ /* If operands[2] is (const_int 0), ++ we can use beqz,bnez,bgtz,bgez,bltz,or blez instructions. ++ So we have gcc generate original template rtx. */ ++ if (GET_CODE (operands[2]) == CONST_INT) ++ if (INTVAL (operands[2]) == 0) ++ if ((code != GTU) ++ && (code != GEU) ++ && (code != LTU) ++ && (code != LEU)) ++ return EXPAND_CREATE_TEMPLATE; ++ ++ /* For other comparison, NDS32 ISA only has slt (Set-on-Less-Than) ++ behavior for the comparison, we might need to generate other ++ rtx patterns to achieve same semantic. */ ++ switch (code) + { +- case REG: +- /* (mem (reg X)) +- => access location by using register, +- use "sbi / shi / swi" */ +- snprintf (pattern, sizeof (pattern), "s%ci\t%%1, %%0", size); +- break; +- +- case SYMBOL_REF: +- case CONST: +- /* (mem (symbol_ref X)) +- (mem (const (...))) +- => access global variables, +- use "sbi.gp / shi.gp / swi.gp" */ +- operands[0] = XEXP (operands[0], 0); +- snprintf (pattern, sizeof (pattern), "s%ci.gp\t%%1, [ + %%0]", size); +- break; ++ case GT: ++ case GTU: ++ if (GET_CODE (operands[2]) == CONST_INT) ++ { ++ /* GT reg_A, const_int => !(LT reg_A, const_int + 1) */ ++ if (optimize_size || optimize == 0) ++ tmp_reg = gen_rtx_REG (SImode, TA_REGNUM); ++ else ++ tmp_reg = gen_reg_rtx (SImode); ++ ++ /* We want to plus 1 into the integer value ++ of operands[2] to create 'slt' instruction. ++ This caculation is performed on the host machine, ++ which may be 64-bit integer. ++ So the meaning of caculation result may be ++ different from the 32-bit nds32 target. ++ ++ For example: ++ 0x7fffffff + 0x1 -> 0x80000000, ++ this value is POSITIVE on 64-bit machine, ++ but the expected value on 32-bit nds32 target ++ should be NEGATIVE value. ++ ++ Hence, instead of using GEN_INT(), we use gen_int_mode() to ++ explicitly create SImode constant rtx. */ ++ enum rtx_code cmp_code; ++ ++ rtx plus1 = gen_int_mode (INTVAL (operands[2]) + 1, SImode); ++ if (satisfies_constraint_Is15 (plus1)) ++ { ++ operands[2] = plus1; ++ cmp_code = EQ; ++ if (code == GT) ++ { ++ /* GT, use slts instruction */ ++ emit_insn ( ++ gen_slts_compare (tmp_reg, operands[1], operands[2])); ++ } ++ else ++ { ++ /* GTU, use slt instruction */ ++ emit_insn ( ++ gen_slt_compare (tmp_reg, operands[1], operands[2])); ++ } ++ } ++ else ++ { ++ cmp_code = NE; ++ if (code == GT) ++ { ++ /* GT, use slts instruction */ ++ emit_insn ( ++ gen_slts_compare (tmp_reg, operands[2], operands[1])); ++ } ++ else ++ { ++ /* GTU, use slt instruction */ ++ emit_insn ( ++ gen_slt_compare (tmp_reg, operands[2], operands[1])); ++ } ++ } ++ ++ PUT_CODE (operands[0], cmp_code); ++ operands[1] = tmp_reg; ++ operands[2] = const0_rtx; ++ emit_insn (gen_cbranchsi4 (operands[0], operands[1], ++ operands[2], operands[3])); ++ ++ return EXPAND_DONE; ++ } ++ else ++ { ++ /* GT reg_A, reg_B => LT reg_B, reg_A */ ++ if (optimize_size || optimize == 0) ++ tmp_reg = gen_rtx_REG (SImode, TA_REGNUM); ++ else ++ tmp_reg = gen_reg_rtx (SImode); ++ ++ if (code == GT) ++ { ++ /* GT, use slts instruction */ ++ emit_insn (gen_slts_compare (tmp_reg, operands[2], operands[1])); ++ } ++ else ++ { ++ /* GTU, use slt instruction */ ++ emit_insn (gen_slt_compare (tmp_reg, operands[2], operands[1])); ++ } ++ ++ PUT_CODE (operands[0], NE); ++ operands[1] = tmp_reg; ++ operands[2] = const0_rtx; ++ emit_insn (gen_cbranchsi4 (operands[0], operands[1], ++ operands[2], operands[3])); ++ ++ return EXPAND_DONE; ++ } + +- case POST_INC: +- /* (mem (post_inc reg)) +- => access location by using register which will be post increment, +- use "sbi.bi / shi.bi / swi.bi" */ +- snprintf (pattern, sizeof (pattern), +- "s%ci.bi\t%%1, %%0, %d", size, byte); +- break; ++ case GE: ++ case GEU: ++ /* GE reg_A, reg_B => !(LT reg_A, reg_B) */ ++ /* GE reg_A, const_int => !(LT reg_A, const_int) */ ++ if (optimize_size || optimize == 0) ++ tmp_reg = gen_rtx_REG (SImode, TA_REGNUM); ++ else ++ tmp_reg = gen_reg_rtx (SImode); + +- case POST_DEC: +- /* (mem (post_dec reg)) +- => access location by using register which will be post decrement, +- use "sbi.bi / shi.bi / swi.bi" */ +- snprintf (pattern, sizeof (pattern), +- "s%ci.bi\t%%1, %%0, -%d", size, byte); +- break; ++ if (code == GE) ++ { ++ /* GE, use slts instruction */ ++ emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2])); ++ } ++ else ++ { ++ /* GEU, use slt instruction */ ++ emit_insn (gen_slt_compare (tmp_reg, operands[1], operands[2])); ++ } + +- case POST_MODIFY: +- switch (GET_CODE (XEXP (XEXP (code, 1), 1))) ++ PUT_CODE (operands[0], EQ); ++ operands[1] = tmp_reg; ++ operands[2] = const0_rtx; ++ emit_insn (gen_cbranchsi4 (operands[0], operands[1], ++ operands[2], operands[3])); ++ ++ return EXPAND_DONE; ++ ++ case LT: ++ case LTU: ++ /* LT reg_A, reg_B => LT reg_A, reg_B */ ++ /* LT reg_A, const_int => LT reg_A, const_int */ ++ if (optimize_size || optimize == 0) ++ tmp_reg = gen_rtx_REG (SImode, TA_REGNUM); ++ else ++ tmp_reg = gen_reg_rtx (SImode); ++ ++ if (code == LT) + { +- case REG: +- case SUBREG: +- /* (mem (post_modify (reg) (plus (reg) (reg)))) +- => access location by using register which will be +- post modified with reg, +- use "sb.bi/ sh.bi / sw.bi" */ +- snprintf (pattern, sizeof (pattern), "s%c.bi\t%%1, %%0", size); +- break; +- case CONST_INT: +- /* (mem (post_modify (reg) (plus (reg) (const_int)))) +- => access location by using register which will be +- post modified with const_int, +- use "sbi.bi/ shi.bi / swi.bi" */ +- snprintf (pattern, sizeof (pattern), "s%ci.bi\t%%1, %%0", size); +- break; +- default: +- abort (); ++ /* LT, use slts instruction */ ++ emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2])); ++ } ++ else ++ { ++ /* LTU, use slt instruction */ ++ emit_insn (gen_slt_compare (tmp_reg, operands[1], operands[2])); + } +- break; + +- case PLUS: +- switch (GET_CODE (XEXP (code, 1))) ++ PUT_CODE (operands[0], NE); ++ operands[1] = tmp_reg; ++ operands[2] = const0_rtx; ++ emit_insn (gen_cbranchsi4 (operands[0], operands[1], ++ operands[2], operands[3])); ++ ++ return EXPAND_DONE; ++ ++ case LE: ++ case LEU: ++ if (GET_CODE (operands[2]) == CONST_INT) + { +- case REG: +- case SUBREG: +- /* (mem (plus reg reg)) or (mem (plus (mult reg const_int) reg)) +- => access location by adding two registers, +- use "sb / sh / sw" */ +- snprintf (pattern, sizeof (pattern), "s%c\t%%1, %%0", size); +- break; +- case CONST_INT: +- /* (mem (plus reg const_int)) +- => access location by adding one register with const_int, +- use "sbi / shi / swi" */ +- snprintf (pattern, sizeof (pattern), "s%ci\t%%1, %%0", size); +- break; +- default: +- abort (); ++ /* LE reg_A, const_int => LT reg_A, const_int + 1 */ ++ if (optimize_size || optimize == 0) ++ tmp_reg = gen_rtx_REG (SImode, TA_REGNUM); ++ else ++ tmp_reg = gen_reg_rtx (SImode); ++ ++ enum rtx_code cmp_code; ++ /* Note that (le:SI X INT_MAX) is not the same as (lt:SI X INT_MIN). ++ We better have an assert here in case GCC does not properly ++ optimize it away. The INT_MAX here is 0x7fffffff for target. */ ++ rtx plus1 = gen_int_mode (INTVAL (operands[2]) + 1, SImode); ++ if (satisfies_constraint_Is15 (plus1)) ++ { ++ operands[2] = plus1; ++ cmp_code = NE; ++ if (code == LE) ++ { ++ /* LE, use slts instruction */ ++ emit_insn ( ++ gen_slts_compare (tmp_reg, operands[1], operands[2])); ++ } ++ else ++ { ++ /* LEU, use slt instruction */ ++ emit_insn ( ++ gen_slt_compare (tmp_reg, operands[1], operands[2])); ++ } ++ } ++ else ++ { ++ cmp_code = EQ; ++ if (code == LE) ++ { ++ /* LE, use slts instruction */ ++ emit_insn ( ++ gen_slts_compare (tmp_reg, operands[2], operands[1])); ++ } ++ else ++ { ++ /* LEU, use slt instruction */ ++ emit_insn ( ++ gen_slt_compare (tmp_reg, operands[2], operands[1])); ++ } ++ } ++ ++ PUT_CODE (operands[0], cmp_code); ++ operands[1] = tmp_reg; ++ operands[2] = const0_rtx; ++ emit_insn (gen_cbranchsi4 (operands[0], operands[1], ++ operands[2], operands[3])); ++ ++ return EXPAND_DONE; ++ } ++ else ++ { ++ /* LE reg_A, reg_B => !(LT reg_B, reg_A) */ ++ if (optimize_size || optimize == 0) ++ tmp_reg = gen_rtx_REG (SImode, TA_REGNUM); ++ else ++ tmp_reg = gen_reg_rtx (SImode); ++ ++ if (code == LE) ++ { ++ /* LE, use slts instruction */ ++ emit_insn (gen_slts_compare (tmp_reg, operands[2], operands[1])); ++ } ++ else ++ { ++ /* LEU, use slt instruction */ ++ emit_insn (gen_slt_compare (tmp_reg, operands[2], operands[1])); ++ } ++ ++ PUT_CODE (operands[0], EQ); ++ operands[1] = tmp_reg; ++ operands[2] = const0_rtx; ++ emit_insn (gen_cbranchsi4 (operands[0], operands[1], ++ operands[2], operands[3])); ++ ++ return EXPAND_DONE; + } +- break; + +- case LO_SUM: +- operands[2] = XEXP (code, 1); +- operands[0] = XEXP (code, 0); +- snprintf (pattern, sizeof (pattern), +- "s%ci\t%%1, [%%0 + lo12(%%2)]", size); +- break; ++ case EQ: ++ case NE: ++ /* NDS32 ISA has various form for eq/ne behavior no matter ++ what kind of the operand is. ++ So just generate original template rtx. */ ++ ++ /* Put operands[2] into register if operands[2] is a large ++ const_int or ISAv2. */ ++ if (GET_CODE (operands[2]) == CONST_INT ++ && (!satisfies_constraint_Is11 (operands[2]) ++ || TARGET_ISA_V2)) ++ operands[2] = force_reg (SImode, operands[2]); ++ ++ return EXPAND_CREATE_TEMPLATE; + + default: +- abort (); ++ return EXPAND_FAIL; + } +- +- output_asm_insn (pattern, operands); +- return ""; + } + +-/* Output 32-bit load. */ +-const char * +-nds32_output_32bit_load (rtx *operands, int byte) ++enum nds32_expand_result_type ++nds32_expand_cstore (rtx *operands) + { +- char pattern[100]; +- unsigned char size; +- rtx code; +- +- code = XEXP (operands[1], 0); ++ rtx tmp_reg; ++ enum rtx_code code; + +- size = nds32_byte_to_size (byte); ++ code = GET_CODE (operands[1]); + +- switch (GET_CODE (code)) ++ switch (code) + { +- case REG: +- /* (mem (reg X)) +- => access location by using register, +- use "lbi / lhi / lwi" */ +- snprintf (pattern, sizeof (pattern), "l%ci\t%%0, %%1", size); +- break; +- +- case SYMBOL_REF: +- case CONST: +- /* (mem (symbol_ref X)) +- (mem (const (...))) +- => access global variables, +- use "lbi.gp / lhi.gp / lwi.gp" */ +- operands[1] = XEXP (operands[1], 0); +- snprintf (pattern, sizeof (pattern), "l%ci.gp\t%%0, [ + %%1]", size); +- break; ++ case EQ: ++ case NE: ++ if (GET_CODE (operands[3]) == CONST_INT) ++ { ++ /* reg_R = (reg_A == const_int_B) ++ --> xori reg_C, reg_A, const_int_B ++ slti reg_R, reg_C, const_int_1 ++ reg_R = (reg_A != const_int_B) ++ --> xori reg_C, reg_A, const_int_B ++ slti reg_R, const_int0, reg_C */ ++ tmp_reg = gen_reg_rtx (SImode); ++ ++ /* If the integer value is not in the range of imm15s, ++ we need to force register first because our addsi3 pattern ++ only accept nds32_rimm15s_operand predicate. */ ++ rtx new_imm = gen_int_mode (-INTVAL (operands[3]), SImode); ++ if (satisfies_constraint_Is15 (new_imm)) ++ emit_insn (gen_addsi3 (tmp_reg, operands[2], new_imm)); ++ else ++ { ++ if (!(satisfies_constraint_Iu15 (operands[3]) ++ || (TARGET_EXT_PERF ++ && satisfies_constraint_It15 (operands[3])))) ++ operands[3] = force_reg (SImode, operands[3]); ++ emit_insn (gen_xorsi3 (tmp_reg, operands[2], operands[3])); ++ } ++ ++ if (code == EQ) ++ emit_insn (gen_slt_eq0 (operands[0], tmp_reg)); ++ else ++ emit_insn (gen_slt_compare (operands[0], const0_rtx, tmp_reg)); ++ ++ return EXPAND_DONE; ++ } ++ else ++ { ++ /* reg_R = (reg_A == reg_B) ++ --> xor reg_C, reg_A, reg_B ++ slti reg_R, reg_C, const_int_1 ++ reg_R = (reg_A != reg_B) ++ --> xor reg_C, reg_A, reg_B ++ slti reg_R, const_int0, reg_C */ ++ tmp_reg = gen_reg_rtx (SImode); ++ emit_insn (gen_xorsi3 (tmp_reg, operands[2], operands[3])); ++ if (code == EQ) ++ emit_insn (gen_slt_eq0 (operands[0], tmp_reg)); ++ else ++ emit_insn (gen_slt_compare (operands[0], const0_rtx, tmp_reg)); ++ ++ return EXPAND_DONE; ++ } ++ case GT: ++ case GTU: ++ /* reg_R = (reg_A > reg_B) --> slt reg_R, reg_B, reg_A */ ++ /* reg_R = (reg_A > const_int_B) --> slt reg_R, const_int_B, reg_A */ ++ if (code == GT) ++ { ++ /* GT, use slts instruction */ ++ emit_insn (gen_slts_compare (operands[0], operands[3], operands[2])); ++ } ++ else ++ { ++ /* GTU, use slt instruction */ ++ emit_insn (gen_slt_compare (operands[0], operands[3], operands[2])); ++ } + +- case POST_INC: +- /* (mem (post_inc reg)) +- => access location by using register which will be post increment, +- use "lbi.bi / lhi.bi / lwi.bi" */ +- snprintf (pattern, sizeof (pattern), +- "l%ci.bi\t%%0, %%1, %d", size, byte); +- break; ++ return EXPAND_DONE; + +- case POST_DEC: +- /* (mem (post_dec reg)) +- => access location by using register which will be post decrement, +- use "lbi.bi / lhi.bi / lwi.bi" */ +- snprintf (pattern, sizeof (pattern), +- "l%ci.bi\t%%0, %%1, -%d", size, byte); +- break; ++ case GE: ++ case GEU: ++ if (GET_CODE (operands[3]) == CONST_INT) ++ { ++ /* reg_R = (reg_A >= const_int_B) ++ --> movi reg_C, const_int_B - 1 ++ slt reg_R, reg_C, reg_A */ ++ tmp_reg = gen_reg_rtx (SImode); ++ ++ emit_insn (gen_movsi (tmp_reg, ++ gen_int_mode (INTVAL (operands[3]) - 1, ++ SImode))); ++ if (code == GE) ++ { ++ /* GE, use slts instruction */ ++ emit_insn (gen_slts_compare (operands[0], tmp_reg, operands[2])); ++ } ++ else ++ { ++ /* GEU, use slt instruction */ ++ emit_insn (gen_slt_compare (operands[0], tmp_reg, operands[2])); ++ } ++ ++ return EXPAND_DONE; ++ } ++ else ++ { ++ /* reg_R = (reg_A >= reg_B) ++ --> slt reg_R, reg_A, reg_B ++ xori reg_R, reg_R, const_int_1 */ ++ if (code == GE) ++ { ++ /* GE, use slts instruction */ ++ emit_insn (gen_slts_compare (operands[0], ++ operands[2], operands[3])); ++ } ++ else ++ { ++ /* GEU, use slt instruction */ ++ emit_insn (gen_slt_compare (operands[0], ++ operands[2], operands[3])); ++ } ++ ++ /* perform 'not' behavior */ ++ emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx)); ++ ++ return EXPAND_DONE; ++ } + +- case POST_MODIFY: +- switch (GET_CODE (XEXP (XEXP (code, 1), 1))) ++ case LT: ++ case LTU: ++ /* reg_R = (reg_A < reg_B) --> slt reg_R, reg_A, reg_B */ ++ /* reg_R = (reg_A < const_int_B) --> slt reg_R, reg_A, const_int_B */ ++ if (code == LT) + { +- case REG: +- case SUBREG: +- /* (mem (post_modify (reg) (plus (reg) (reg)))) +- => access location by using register which will be +- post modified with reg, +- use "lb.bi/ lh.bi / lw.bi" */ +- snprintf (pattern, sizeof (pattern), "l%c.bi\t%%0, %%1", size); +- break; +- case CONST_INT: +- /* (mem (post_modify (reg) (plus (reg) (const_int)))) +- => access location by using register which will be +- post modified with const_int, +- use "lbi.bi/ lhi.bi / lwi.bi" */ +- snprintf (pattern, sizeof (pattern), "l%ci.bi\t%%0, %%1", size); +- break; +- default: +- abort (); ++ /* LT, use slts instruction */ ++ emit_insn (gen_slts_compare (operands[0], operands[2], operands[3])); ++ } ++ else ++ { ++ /* LTU, use slt instruction */ ++ emit_insn (gen_slt_compare (operands[0], operands[2], operands[3])); + } +- break; + +- case PLUS: +- switch (GET_CODE (XEXP (code, 1))) ++ return EXPAND_DONE; ++ ++ case LE: ++ case LEU: ++ if (GET_CODE (operands[3]) == CONST_INT) + { +- case REG: +- case SUBREG: +- /* (mem (plus reg reg)) or (mem (plus (mult reg const_int) reg)) +- use "lb / lh / lw" */ +- snprintf (pattern, sizeof (pattern), "l%c\t%%0, %%1", size); +- break; +- case CONST_INT: +- /* (mem (plus reg const_int)) +- => access location by adding one register with const_int, +- use "lbi / lhi / lwi" */ +- snprintf (pattern, sizeof (pattern), "l%ci\t%%0, %%1", size); +- break; +- default: +- abort (); ++ /* reg_R = (reg_A <= const_int_B) ++ --> movi reg_C, const_int_B + 1 ++ slt reg_R, reg_A, reg_C */ ++ tmp_reg = gen_reg_rtx (SImode); ++ ++ emit_insn (gen_movsi (tmp_reg, ++ gen_int_mode (INTVAL (operands[3]) + 1, ++ SImode))); ++ if (code == LE) ++ { ++ /* LE, use slts instruction */ ++ emit_insn (gen_slts_compare (operands[0], operands[2], tmp_reg)); ++ } ++ else ++ { ++ /* LEU, use slt instruction */ ++ emit_insn (gen_slt_compare (operands[0], operands[2], tmp_reg)); ++ } ++ ++ return EXPAND_DONE; ++ } ++ else ++ { ++ /* reg_R = (reg_A <= reg_B) --> slt reg_R, reg_B, reg_A ++ xori reg_R, reg_R, const_int_1 */ ++ if (code == LE) ++ { ++ /* LE, use slts instruction */ ++ emit_insn (gen_slts_compare (operands[0], ++ operands[3], operands[2])); ++ } ++ else ++ { ++ /* LEU, use slt instruction */ ++ emit_insn (gen_slt_compare (operands[0], ++ operands[3], operands[2])); ++ } ++ ++ /* perform 'not' behavior */ ++ emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx)); ++ ++ return EXPAND_DONE; + } +- break; + +- case LO_SUM: +- operands[2] = XEXP (code, 1); +- operands[1] = XEXP (code, 0); +- snprintf (pattern, sizeof (pattern), +- "l%ci\t%%0, [%%1 + lo12(%%2)]", size); +- break; + + default: +- abort (); ++ gcc_unreachable (); + } +- +- output_asm_insn (pattern, operands); +- return ""; + } + +-/* Output 32-bit load with signed extension. */ +-const char * +-nds32_output_32bit_load_s (rtx *operands, int byte) ++void ++nds32_expand_float_cbranch (rtx *operands) + { +- char pattern[100]; +- unsigned char size; +- rtx code; ++ enum rtx_code code = GET_CODE (operands[0]); ++ enum rtx_code new_code = code; ++ rtx cmp_op0 = operands[1]; ++ rtx cmp_op1 = operands[2]; ++ rtx tmp_reg; ++ rtx tmp; + +- code = XEXP (operands[1], 0); ++ int reverse = 0; + +- size = nds32_byte_to_size (byte); ++ /* Main Goal: Use compare instruction + branch instruction. + +- switch (GET_CODE (code)) ++ For example: ++ GT, GE: swap condition and swap operands and generate ++ compare instruction(LT, LE) + branch not equal instruction. ++ ++ UNORDERED, LT, LE, EQ: no need to change and generate ++ compare instruction(UNORDERED, LT, LE, EQ) + branch not equal instruction. ++ ++ ORDERED, NE: reverse condition and generate ++ compare instruction(EQ) + branch equal instruction. */ ++ ++ switch (code) + { +- case REG: +- /* (mem (reg X)) +- => access location by using register, +- use "lbsi / lhsi" */ +- snprintf (pattern, sizeof (pattern), "l%csi\t%%0, %%1", size); ++ case GT: ++ case GE: ++ tmp = cmp_op0; ++ cmp_op0 = cmp_op1; ++ cmp_op1 = tmp; ++ new_code = swap_condition (new_code); + break; +- +- case SYMBOL_REF: +- case CONST: +- /* (mem (symbol_ref X)) +- (mem (const (...))) +- => access global variables, +- use "lbsi.gp / lhsi.gp" */ +- operands[1] = XEXP (operands[1], 0); +- snprintf (pattern, sizeof (pattern), "l%csi.gp\t%%0, [ + %%1]", size); ++ case UNORDERED: ++ case LT: ++ case LE: ++ case EQ: + break; +- +- case POST_INC: +- /* (mem (post_inc reg)) +- => access location by using register which will be post increment, +- use "lbsi.bi / lhsi.bi" */ +- snprintf (pattern, sizeof (pattern), +- "l%csi.bi\t%%0, %%1, %d", size, byte); ++ case ORDERED: ++ case NE: ++ new_code = reverse_condition (new_code); ++ reverse = 1; ++ break; ++ case UNGT: ++ case UNGE: ++ new_code = reverse_condition_maybe_unordered (new_code); ++ reverse = 1; + break; ++ case UNLT: ++ case UNLE: ++ new_code = reverse_condition_maybe_unordered (new_code); ++ tmp = cmp_op0; ++ cmp_op0 = cmp_op1; ++ cmp_op1 = tmp; ++ new_code = swap_condition (new_code); ++ reverse = 1; ++ break; ++ default: ++ return; ++ } + +- case POST_DEC: +- /* (mem (post_dec reg)) +- => access location by using register which will be post decrement, +- use "lbsi.bi / lhsi.bi" */ +- snprintf (pattern, sizeof (pattern), +- "l%csi.bi\t%%0, %%1, -%d", size, byte); ++ tmp_reg = gen_reg_rtx (SImode); ++ emit_insn (gen_rtx_SET (tmp_reg, ++ gen_rtx_fmt_ee (new_code, SImode, ++ cmp_op0, cmp_op1))); ++ ++ PUT_CODE (operands[0], reverse ? EQ : NE); ++ emit_insn (gen_cbranchsi4 (operands[0], tmp_reg, ++ const0_rtx, operands[3])); ++} ++ ++void ++nds32_expand_float_cstore (rtx *operands) ++{ ++ enum rtx_code code = GET_CODE (operands[1]); ++ enum rtx_code new_code = code; ++ enum machine_mode mode = GET_MODE (operands[2]); ++ ++ rtx cmp_op0 = operands[2]; ++ rtx cmp_op1 = operands[3]; ++ rtx tmp; ++ ++ /* Main Goal: Use compare instruction to store value. ++ ++ For example: ++ GT, GE: swap condition and swap operands. ++ reg_R = (reg_A > reg_B) --> fcmplt reg_R, reg_B, reg_A ++ reg_R = (reg_A >= reg_B) --> fcmple reg_R, reg_B, reg_A ++ ++ LT, LE, EQ: no need to change, it is already LT, LE, EQ. ++ reg_R = (reg_A < reg_B) --> fcmplt reg_R, reg_A, reg_B ++ reg_R = (reg_A <= reg_B) --> fcmple reg_R, reg_A, reg_B ++ reg_R = (reg_A == reg_B) --> fcmpeq reg_R, reg_A, reg_B ++ ++ ORDERED: reverse condition and using xor insturction to achieve 'ORDERED'. ++ reg_R = (reg_A != reg_B) --> fcmpun reg_R, reg_A, reg_B ++ xor reg_R, reg_R, const1_rtx ++ ++ NE: reverse condition and using xor insturction to achieve 'NE'. ++ reg_R = (reg_A != reg_B) --> fcmpeq reg_R, reg_A, reg_B ++ xor reg_R, reg_R, const1_rtx */ ++ switch (code) ++ { ++ case GT: ++ case GE: ++ tmp = cmp_op0; ++ cmp_op0 = cmp_op1; ++ cmp_op1 =tmp; ++ new_code = swap_condition (new_code); + break; ++ case UNORDERED: ++ case LT: ++ case LE: ++ case EQ: ++ break; ++ case ORDERED: ++ if (mode == SFmode) ++ emit_insn (gen_cmpsf_un (operands[0], cmp_op0, cmp_op1)); ++ else ++ emit_insn (gen_cmpdf_un (operands[0], cmp_op0, cmp_op1)); + +- case POST_MODIFY: +- switch (GET_CODE (XEXP (XEXP (code, 1), 1))) ++ emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx)); ++ return; ++ case NE: ++ if (mode == SFmode) ++ emit_insn (gen_cmpsf_eq (operands[0], cmp_op0, cmp_op1)); ++ else ++ emit_insn (gen_cmpdf_eq (operands[0], cmp_op0, cmp_op1)); ++ ++ emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx)); ++ return; ++ default: ++ return; ++ } ++ ++ emit_insn (gen_rtx_SET (operands[0], ++ gen_rtx_fmt_ee (new_code, SImode, ++ cmp_op0, cmp_op1))); ++} ++ ++enum nds32_expand_result_type ++nds32_expand_movcc (rtx *operands) ++{ ++ enum rtx_code code = GET_CODE (operands[1]); ++ enum rtx_code new_code = code; ++ enum machine_mode cmp0_mode = GET_MODE (XEXP (operands[1], 0)); ++ rtx cmp_op0 = XEXP (operands[1], 0); ++ rtx cmp_op1 = XEXP (operands[1], 1); ++ rtx tmp; ++ ++ if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE) ++ && XEXP (operands[1], 1) == const0_rtx) ++ { ++ /* If the operands[1] rtx is already (eq X 0) or (ne X 0), ++ we have gcc generate original template rtx. */ ++ return EXPAND_CREATE_TEMPLATE; ++ } ++ else if ((TARGET_FPU_SINGLE && cmp0_mode == SFmode) ++ || (TARGET_FPU_DOUBLE && cmp0_mode == DFmode)) ++ { ++ nds32_expand_float_movcc (operands); ++ } ++ else ++ { ++ /* Since there is only 'slt'(Set when Less Than) instruction for ++ comparison in Andes ISA, the major strategy we use here is to ++ convert conditional move into 'LT + EQ' or 'LT + NE' rtx combination. ++ We design constraints properly so that the reload phase will assist ++ to make one source operand to use same register as result operand. ++ Then we can use cmovz/cmovn to catch the other source operand ++ which has different register. */ ++ int reverse = 0; ++ ++ /* Main Goal: Use 'LT + EQ' or 'LT + NE' to target "then" part ++ Strategy : Reverse condition and swap comparison operands ++ ++ For example: ++ ++ a <= b ? P : Q (LE or LEU) ++ --> a > b ? Q : P (reverse condition) ++ --> b < a ? Q : P (swap comparison operands to achieve 'LT/LTU') ++ ++ a >= b ? P : Q (GE or GEU) ++ --> a < b ? Q : P (reverse condition to achieve 'LT/LTU') ++ ++ a < b ? P : Q (LT or LTU) ++ --> (NO NEED TO CHANGE, it is already 'LT/LTU') ++ ++ a > b ? P : Q (GT or GTU) ++ --> b < a ? P : Q (swap comparison operands to achieve 'LT/LTU') */ ++ switch (code) + { +- case REG: +- case SUBREG: +- /* (mem (post_modify (reg) (plus (reg) (reg)))) +- => access location by using register which will be +- post modified with reg, +- use "lbs.bi/ lhs.bi" */ +- snprintf (pattern, sizeof (pattern), "l%cs.bi\t%%0, %%1", size); ++ case GE: case GEU: case LE: case LEU: ++ new_code = reverse_condition (code); ++ reverse = 1; + break; +- case CONST_INT: +- /* (mem (post_modify (reg) (plus (reg) (const_int)))) +- => access location by using register which will be +- post modified with const_int, +- use "lbsi.bi/ lhsi.bi" */ +- snprintf (pattern, sizeof (pattern), "l%csi.bi\t%%0, %%1", size); ++ case EQ: ++ case NE: ++ /* no need to reverse condition */ + break; + default: +- abort (); ++ return EXPAND_FAIL; + } +- break; + +- case PLUS: +- switch (GET_CODE (XEXP (code, 1))) ++ /* For '>' comparison operator, we swap operands ++ so that we can have 'LT/LTU' operator. */ ++ if (new_code == GT || new_code == GTU) + { +- case REG: +- case SUBREG: +- /* (mem (plus reg reg)) or (mem (plus (mult reg const_int) reg)) +- use "lbs / lhs" */ +- snprintf (pattern, sizeof (pattern), "l%cs\t%%0, %%1", size); ++ tmp = cmp_op0; ++ cmp_op0 = cmp_op1; ++ cmp_op1 = tmp; ++ ++ new_code = swap_condition (new_code); ++ } ++ ++ /* Use a temporary register to store slt/slts result. */ ++ tmp = gen_reg_rtx (SImode); ++ ++ if (new_code == EQ || new_code == NE) ++ { ++ emit_insn (gen_xorsi3 (tmp, cmp_op0, cmp_op1)); ++ /* tmp == 0 if cmp_op0 == cmp_op1. */ ++ operands[1] = gen_rtx_fmt_ee (new_code, VOIDmode, tmp, const0_rtx); ++ } ++ else ++ { ++ /* This emit_insn will create corresponding 'slt/slts' ++ insturction. */ ++ if (new_code == LT) ++ emit_insn (gen_slts_compare (tmp, cmp_op0, cmp_op1)); ++ else if (new_code == LTU) ++ emit_insn (gen_slt_compare (tmp, cmp_op0, cmp_op1)); ++ else ++ gcc_unreachable (); ++ ++ /* Change comparison semantic into (eq X 0) or (ne X 0) behavior ++ so that cmovz or cmovn will be matched later. ++ ++ For reverse condition cases, we want to create a semantic that: ++ (eq X 0) --> pick up "else" part ++ For normal cases, we want to create a semantic that: ++ (ne X 0) --> pick up "then" part ++ ++ Later we will have cmovz/cmovn instruction pattern to ++ match corresponding behavior and output instruction. */ ++ operands[1] = gen_rtx_fmt_ee (reverse ? EQ : NE, ++ VOIDmode, tmp, const0_rtx); ++ } ++ } ++ return EXPAND_CREATE_TEMPLATE; ++} ++ ++void ++nds32_expand_float_movcc (rtx *operands) ++{ ++ if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE) ++ && GET_MODE (XEXP (operands[1], 0)) == SImode ++ && XEXP (operands[1], 1) == const0_rtx) ++ { ++ /* If the operands[1] rtx is already (eq X 0) or (ne X 0), ++ we have gcc generate original template rtx. */ ++ return; ++ } ++ else ++ { ++ enum rtx_code code = GET_CODE (operands[1]); ++ enum rtx_code new_code = code; ++ enum machine_mode cmp0_mode = GET_MODE (XEXP (operands[1], 0)); ++ enum machine_mode cmp1_mode = GET_MODE (XEXP (operands[1], 1)); ++ rtx cmp_op0 = XEXP (operands[1], 0); ++ rtx cmp_op1 = XEXP (operands[1], 1); ++ rtx tmp; ++ ++ /* Compare instruction Operations: (cmp_op0 condition cmp_op1) ? 1 : 0, ++ when result is 1, and 'reverse' be set 1 for fcmovzs instructuin. */ ++ int reverse = 0; ++ ++ /* Main Goal: Use cmpare instruction + conditional move instruction. ++ Strategy : swap condition and swap comparison operands. ++ ++ For example: ++ a > b ? P : Q (GT) ++ --> a < b ? Q : P (swap condition) ++ --> b < a ? Q : P (swap comparison operands to achieve 'GT') ++ ++ a >= b ? P : Q (GE) ++ --> a <= b ? Q : P (swap condition) ++ --> b <= a ? Q : P (swap comparison operands to achieve 'GE') ++ ++ a < b ? P : Q (LT) ++ --> (NO NEED TO CHANGE, it is already 'LT') ++ ++ a >= b ? P : Q (LE) ++ --> (NO NEED TO CHANGE, it is already 'LE') ++ ++ a == b ? P : Q (EQ) ++ --> (NO NEED TO CHANGE, it is already 'EQ') */ ++ ++ switch (code) ++ { ++ case GT: ++ case GE: ++ tmp = cmp_op0; ++ cmp_op0 = cmp_op1; ++ cmp_op1 =tmp; ++ new_code = swap_condition (new_code); + break; +- case CONST_INT: +- /* (mem (plus reg const_int)) +- => access location by adding one register with const_int, +- use "lbsi / lhsi" */ +- snprintf (pattern, sizeof (pattern), "l%csi\t%%0, %%1", size); ++ case UNORDERED: ++ case LT: ++ case LE: ++ case EQ: ++ break; ++ case ORDERED: ++ case NE: ++ reverse = 1; ++ new_code = reverse_condition (new_code); ++ break; ++ case UNGT: ++ case UNGE: ++ new_code = reverse_condition_maybe_unordered (new_code); ++ reverse = 1; ++ break; ++ case UNLT: ++ case UNLE: ++ new_code = reverse_condition_maybe_unordered (new_code); ++ tmp = cmp_op0; ++ cmp_op0 = cmp_op1; ++ cmp_op1 = tmp; ++ new_code = swap_condition (new_code); ++ reverse = 1; + break; + default: +- abort (); ++ return; + } +- break; + +- case LO_SUM: +- operands[2] = XEXP (code, 1); +- operands[1] = XEXP (code, 0); +- snprintf (pattern, sizeof (pattern), +- "l%csi\t%%0, [%%1 + lo12(%%2)]", size); +- break; ++ /* Use a temporary register to store fcmpxxs result. */ ++ tmp = gen_reg_rtx (SImode); ++ ++ /* Create float compare instruction for SFmode and DFmode, ++ other MODE using cstoresi create compare instruction. */ ++ if ((cmp0_mode == DFmode || cmp0_mode == SFmode) ++ && (cmp1_mode == DFmode || cmp1_mode == SFmode)) ++ { ++ /* This emit_insn create corresponding float compare instruction */ ++ emit_insn (gen_rtx_SET (tmp, ++ gen_rtx_fmt_ee (new_code, SImode, ++ cmp_op0, cmp_op1))); ++ } ++ else ++ { ++ /* This emit_insn using cstoresi create corresponding ++ compare instruction */ ++ PUT_CODE (operands[1], new_code); ++ emit_insn (gen_cstoresi4 (tmp, operands[1], ++ cmp_op0, cmp_op1)); ++ } ++ /* operands[1] crete corresponding condition move instruction ++ for fcmovzs and fcmovns. */ ++ operands[1] = gen_rtx_fmt_ee (reverse ? EQ : NE, ++ VOIDmode, tmp, const0_rtx); ++ } ++} ++ ++void ++nds32_emit_push_fpr_callee_saved (int base_offset) ++{ ++ rtx fpu_insn; ++ rtx reg, mem; ++ unsigned int regno = cfun->machine->callee_saved_first_fpr_regno; ++ unsigned int last_fpr = cfun->machine->callee_saved_last_fpr_regno; ++ ++ while (regno <= last_fpr) ++ { ++ /* Handling two registers, using fsdi instruction. */ ++ reg = gen_rtx_REG (DFmode, regno); ++ mem = gen_frame_mem (DFmode, plus_constant (Pmode, ++ stack_pointer_rtx, ++ base_offset)); ++ base_offset += 8; ++ regno += 2; ++ fpu_insn = emit_move_insn (mem, reg); ++ RTX_FRAME_RELATED_P (fpu_insn) = 1; ++ } ++} ++ ++void ++nds32_emit_pop_fpr_callee_saved (int gpr_padding_size) ++{ ++ rtx fpu_insn; ++ rtx reg, mem, addr; ++ rtx dwarf, adjust_sp_rtx; ++ unsigned int regno = cfun->machine->callee_saved_first_fpr_regno; ++ unsigned int last_fpr = cfun->machine->callee_saved_last_fpr_regno; ++ int padding = 0; ++ ++ while (regno <= last_fpr) ++ { ++ /* Handling two registers, using fldi.bi instruction. */ ++ if ((regno + 1) >= last_fpr) ++ padding = gpr_padding_size; ++ ++ reg = gen_rtx_REG (DFmode, (regno)); ++ addr = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, ++ gen_rtx_PLUS (Pmode, stack_pointer_rtx, ++ GEN_INT (8 + padding))); ++ mem = gen_frame_mem (DFmode, addr); ++ regno += 2; ++ fpu_insn = emit_move_insn (reg, mem); ++ ++ adjust_sp_rtx = ++ gen_rtx_SET (stack_pointer_rtx, ++ plus_constant (Pmode, stack_pointer_rtx, ++ 8 + padding)); ++ ++ dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, NULL_RTX); ++ /* Tell gcc we adjust SP in this insn. */ ++ dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, copy_rtx (adjust_sp_rtx), ++ dwarf); ++ RTX_FRAME_RELATED_P (fpu_insn) = 1; ++ REG_NOTES (fpu_insn) = dwarf; ++ } ++} ++ ++void ++nds32_emit_v3pop_fpr_callee_saved (int base) ++{ ++ int fpu_base_addr = base; ++ int regno; ++ rtx fpu_insn; ++ rtx reg, mem; ++ rtx dwarf; ++ ++ regno = cfun->machine->callee_saved_first_fpr_regno; ++ while (regno <= cfun->machine->callee_saved_last_fpr_regno) ++ { ++ /* Handling two registers, using fldi instruction. */ ++ reg = gen_rtx_REG (DFmode, regno); ++ mem = gen_frame_mem (DFmode, plus_constant (Pmode, ++ stack_pointer_rtx, ++ fpu_base_addr)); ++ fpu_base_addr += 8; ++ regno += 2; ++ fpu_insn = emit_move_insn (reg, mem); ++ dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, NULL_RTX); ++ RTX_FRAME_RELATED_P (fpu_insn) = 1; ++ REG_NOTES (fpu_insn) = dwarf; ++ } ++} ++ ++enum nds32_expand_result_type ++nds32_expand_extv (rtx *operands) ++{ ++ gcc_assert (CONST_INT_P (operands[2]) && CONST_INT_P (operands[3])); ++ HOST_WIDE_INT width = INTVAL (operands[2]); ++ HOST_WIDE_INT bitpos = INTVAL (operands[3]); ++ rtx dst = operands[0]; ++ rtx src = operands[1]; ++ ++ if (MEM_P (src) ++ && width == 32 ++ && (bitpos % BITS_PER_UNIT) == 0 ++ && GET_MODE_BITSIZE (GET_MODE (dst)) == width) ++ { ++ rtx newmem = adjust_address (src, GET_MODE (dst), ++ bitpos / BITS_PER_UNIT); ++ ++ rtx base_addr = force_reg (Pmode, XEXP (newmem, 0)); ++ ++ emit_insn (gen_unaligned_loadsi (dst, base_addr)); ++ ++ return EXPAND_DONE; ++ } ++ return EXPAND_FAIL; ++} ++ ++enum nds32_expand_result_type ++nds32_expand_insv (rtx *operands) ++{ ++ gcc_assert (CONST_INT_P (operands[1]) && CONST_INT_P (operands[2])); ++ HOST_WIDE_INT width = INTVAL (operands[1]); ++ HOST_WIDE_INT bitpos = INTVAL (operands[2]); ++ rtx dst = operands[0]; ++ rtx src = operands[3]; ++ ++ if (MEM_P (dst) ++ && width == 32 ++ && (bitpos % BITS_PER_UNIT) == 0 ++ && GET_MODE_BITSIZE (GET_MODE (src)) == width) ++ { ++ rtx newmem = adjust_address (dst, GET_MODE (src), ++ bitpos / BITS_PER_UNIT); ++ ++ rtx base_addr = force_reg (Pmode, XEXP (newmem, 0)); ++ ++ emit_insn (gen_unaligned_storesi (base_addr, src)); ++ ++ return EXPAND_DONE; ++ } ++ return EXPAND_FAIL; ++} ++ ++/* ------------------------------------------------------------------------ */ ++ ++/* PART 3: Auxiliary function for output asm template. */ ++ ++/* Function to generate PC relative jump table. ++ Refer to nds32.md for more details. ++ ++ The following is the sample for the case that diff value ++ can be presented in '.short' size. ++ ++ addi $r1, $r1, -(case_lower_bound) ++ slti $ta, $r1, (case_number) ++ beqz $ta, .L_skip_label ++ ++ la $ta, .L35 ! get jump table address ++ lh $r1, [$ta + $r1 << 1] ! load symbol diff from jump table entry ++ addi $ta, $r1, $ta ++ jr5 $ta ++ ++ ! jump table entry ++ L35: ++ .short .L25-.L35 ++ .short .L26-.L35 ++ .short .L27-.L35 ++ .short .L28-.L35 ++ .short .L29-.L35 ++ .short .L30-.L35 ++ .short .L31-.L35 ++ .short .L32-.L35 ++ .short .L33-.L35 ++ .short .L34-.L35 */ ++const char * ++nds32_output_casesi_pc_relative (rtx *operands) ++{ ++ enum machine_mode mode; ++ rtx diff_vec; ++ ++ diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[1]))); ++ ++ gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC); ++ ++ /* Step C: "t <-- operands[1]". */ ++ if (flag_pic) ++ { ++ output_asm_insn ("sethi\t$ta, hi20(%l1@GOTOFF)", operands); ++ output_asm_insn ("ori\t$ta, $ta, lo12(%l1@GOTOFF)", operands); ++ output_asm_insn ("add\t$ta, $ta, $gp", operands); ++ } ++ else ++ output_asm_insn ("la\t$ta, %l1", operands); ++ ++ /* Get the mode of each element in the difference vector. */ ++ mode = GET_MODE (diff_vec); + ++ /* Step D: "z <-- (mem (plus (operands[0] << m) t))", ++ where m is 0, 1, or 2 to load address-diff value from table. */ ++ switch (mode) ++ { ++ case QImode: ++ output_asm_insn ("lb\t%2, [$ta + %0 << 0]", operands); ++ break; ++ case HImode: ++ output_asm_insn ("lh\t%2, [$ta + %0 << 1]", operands); ++ break; ++ case SImode: ++ output_asm_insn ("lw\t%2, [$ta + %0 << 2]", operands); ++ break; + default: +- abort (); ++ gcc_unreachable (); + } + +- output_asm_insn (pattern, operands); +- return ""; ++ /* Step E: "t <-- z + t". ++ Add table label_ref with address-diff value to ++ obtain target case address. */ ++ output_asm_insn ("add\t$ta, %2, $ta", operands); ++ ++ /* Step F: jump to target with register t. */ ++ if (TARGET_16_BIT) ++ return "jr5\t$ta"; ++ else ++ return "jr\t$ta"; + } + +-/* Function to output stack push operation. +- We need to deal with normal stack push multiple or stack v3push. */ ++/* Function to generate normal jump table. */ + const char * +-nds32_output_stack_push (rtx par_rtx) ++nds32_output_casesi (rtx *operands) + { +- /* A string pattern for output_asm_insn(). */ +- char pattern[100]; +- /* The operands array which will be used in output_asm_insn(). */ +- rtx operands[3]; +- /* Pick up varargs first regno and last regno for further use. */ +- int rb_va_args = cfun->machine->va_args_first_regno; +- int re_va_args = cfun->machine->va_args_last_regno; +- int last_argument_regno = NDS32_FIRST_GPR_REGNUM +- + NDS32_MAX_GPR_REGS_FOR_ARGS +- - 1; +- /* Pick up callee-saved first regno and last regno for further use. */ +- int rb_callee_saved = cfun->machine->callee_saved_first_gpr_regno; +- int re_callee_saved = cfun->machine->callee_saved_last_gpr_regno; ++ /* Step C: "t <-- operands[1]". */ ++ if (flag_pic) ++ { ++ output_asm_insn ("sethi\t$ta, hi20(%l1@GOTOFF)", operands); ++ output_asm_insn ("ori\t$ta, $ta, lo12(%l1@GOTOFF)", operands); ++ output_asm_insn ("add\t$ta, $ta, $gp", operands); ++ } ++ else ++ output_asm_insn ("la\t$ta, %l1", operands); + +- /* First we need to check if we are pushing argument registers not used +- for the named arguments. If so, we have to create 'smw.adm' (push.s) +- instruction. */ +- if (reg_mentioned_p (gen_rtx_REG (SImode, last_argument_regno), par_rtx)) ++ /* Step D: "z <-- (mem (plus (operands[0] << 2) t))". */ ++ output_asm_insn ("lw\t%2, [$ta + %0 << 2]", operands); ++ ++ /* No need to perform Step E, which is only used for ++ pc relative jump table. */ ++ ++ /* Step F: jump to target with register z. */ ++ if (TARGET_16_BIT) ++ return "jr5\t%2"; ++ else ++ return "jr\t%2"; ++} ++ ++ ++/* Function to return memory format. */ ++enum nds32_16bit_address_type ++nds32_mem_format (rtx op) ++{ ++ enum machine_mode mode_test; ++ int val; ++ int regno; ++ ++ if (!TARGET_16_BIT) ++ return ADDRESS_NOT_16BIT_FORMAT; ++ ++ mode_test = GET_MODE (op); ++ ++ op = XEXP (op, 0); ++ ++ /* 45 format. */ ++ if (GET_CODE (op) == REG ++ && ((mode_test == SImode) || (mode_test == SFmode))) ++ return ADDRESS_REG; ++ ++ /* 333 format for QI/HImode. */ ++ if (GET_CODE (op) == REG && (REGNO (op) < R8_REGNUM)) ++ return ADDRESS_LO_REG_IMM3U; ++ ++ /* post_inc 333 format. */ ++ if ((GET_CODE (op) == POST_INC) ++ && ((mode_test == SImode) || (mode_test == SFmode))) + { +- /* Set operands[0] and operands[1]. */ +- operands[0] = gen_rtx_REG (SImode, rb_va_args); +- operands[1] = gen_rtx_REG (SImode, re_va_args); +- /* Create assembly code pattern: "Rb, Re, { }". */ +- snprintf (pattern, sizeof (pattern), "push.s\t%s", "%0, %1, { }"); +- /* We use output_asm_insn() to output assembly code by ourself. */ +- output_asm_insn (pattern, operands); +- return ""; ++ regno = REGNO(XEXP (op, 0)); ++ ++ if (regno < 8) ++ return ADDRESS_POST_INC_LO_REG_IMM3U; ++ } ++ ++ /* post_inc 333 format. */ ++ if ((GET_CODE (op) == POST_MODIFY) ++ && ((mode_test == SImode) || (mode_test == SFmode)) ++ && (REG_P (XEXP (XEXP (op, 1), 0))) ++ && (CONST_INT_P (XEXP (XEXP (op, 1), 1)))) ++ { ++ regno = REGNO (XEXP (XEXP (op, 1), 0)); ++ val = INTVAL (XEXP (XEXP (op, 1), 1)); ++ if (regno < 8 && val > 0 && val < 32) ++ return ADDRESS_POST_MODIFY_LO_REG_IMM3U; + } + +- /* If we step here, we are going to do v3push or multiple push operation. */ ++ if ((GET_CODE (op) == PLUS) ++ && (GET_CODE (XEXP (op, 0)) == REG) ++ && (GET_CODE (XEXP (op, 1)) == CONST_INT)) ++ { ++ val = INTVAL (XEXP (op, 1)); ++ ++ regno = REGNO(XEXP (op, 0)); ++ ++ if (regno > 8 ++ && regno != SP_REGNUM ++ && regno != FP_REGNUM) ++ return ADDRESS_NOT_16BIT_FORMAT; ++ ++ switch (mode_test) ++ { ++ case QImode: ++ /* 333 format. */ ++ if (val >= 0 && val < 8 && regno < 8) ++ return ADDRESS_LO_REG_IMM3U; ++ break; ++ ++ case HImode: ++ /* 333 format. */ ++ if (val >= 0 && val < 16 && (val % 2 == 0) && regno < 8) ++ return ADDRESS_LO_REG_IMM3U; ++ break; ++ ++ case SImode: ++ case SFmode: ++ case DFmode: ++ /* r8 imply fe format. */ ++ if ((regno == 8) && ++ (val >= -128 && val <= -4 && (val % 4 == 0))) ++ return ADDRESS_R8_IMM7U; ++ /* fp imply 37 format. */ ++ if ((regno == FP_REGNUM) && ++ (val >= 0 && val < 512 && (val % 4 == 0))) ++ return ADDRESS_FP_IMM7U; ++ /* sp imply 37 format. */ ++ else if ((regno == SP_REGNUM) && ++ (val >= 0 && val < 512 && (val % 4 == 0))) ++ return ADDRESS_SP_IMM7U; ++ /* 333 format. */ ++ else if (val >= 0 && val < 32 && (val % 4 == 0) && regno < 8) ++ return ADDRESS_LO_REG_IMM3U; ++ break; ++ ++ default: ++ break; ++ } ++ } ++ ++ return ADDRESS_NOT_16BIT_FORMAT; ++} ++ ++/* Output 16-bit store. */ ++const char * ++nds32_output_16bit_store (rtx *operands, int byte) ++{ ++ char pattern[100]; ++ char size; ++ rtx code = XEXP (operands[0], 0); ++ ++ size = nds32_byte_to_size (byte); ++ ++ switch (nds32_mem_format (operands[0])) ++ { ++ case ADDRESS_REG: ++ operands[0] = code; ++ output_asm_insn ("swi450\t%1, [%0]", operands); ++ break; ++ case ADDRESS_LO_REG_IMM3U: ++ snprintf (pattern, sizeof (pattern), "s%ci333\t%%1, %%0", size); ++ output_asm_insn (pattern, operands); ++ break; ++ case ADDRESS_POST_INC_LO_REG_IMM3U: ++ snprintf (pattern, sizeof (pattern), "swi333.bi\t%%1, %%0, 4"); ++ output_asm_insn (pattern, operands); ++ break; ++ case ADDRESS_POST_MODIFY_LO_REG_IMM3U: ++ snprintf (pattern, sizeof (pattern), "swi333.bi\t%%1, %%0"); ++ output_asm_insn (pattern, operands); ++ break; ++ case ADDRESS_FP_IMM7U: ++ output_asm_insn ("swi37\t%1, %0", operands); ++ break; ++ case ADDRESS_SP_IMM7U: ++ /* Get immediate value and set back to operands[1]. */ ++ operands[0] = XEXP (code, 1); ++ output_asm_insn ("swi37.sp\t%1, [ + (%0)]", operands); ++ break; ++ default: ++ break; ++ } ++ ++ return ""; ++} ++ ++/* Output 16-bit load. */ ++const char * ++nds32_output_16bit_load (rtx *operands, int byte) ++{ ++ char pattern[100]; ++ unsigned char size; ++ rtx code = XEXP (operands[1], 0); ++ ++ size = nds32_byte_to_size (byte); ++ ++ switch (nds32_mem_format (operands[1])) ++ { ++ case ADDRESS_REG: ++ operands[1] = code; ++ output_asm_insn ("lwi450\t%0, [%1]", operands); ++ break; ++ case ADDRESS_LO_REG_IMM3U: ++ snprintf (pattern, sizeof (pattern), "l%ci333\t%%0, %%1", size); ++ output_asm_insn (pattern, operands); ++ break; ++ case ADDRESS_POST_INC_LO_REG_IMM3U: ++ snprintf (pattern, sizeof (pattern), "lwi333.bi\t%%0, %%1, 4"); ++ output_asm_insn (pattern, operands); ++ break; ++ case ADDRESS_POST_MODIFY_LO_REG_IMM3U: ++ snprintf (pattern, sizeof (pattern), "lwi333.bi\t%%0, %%1"); ++ output_asm_insn (pattern, operands); ++ break; ++ case ADDRESS_R8_IMM7U: ++ output_asm_insn ("lwi45.fe\t%0, %e1", operands); ++ break; ++ case ADDRESS_FP_IMM7U: ++ output_asm_insn ("lwi37\t%0, %1", operands); ++ break; ++ case ADDRESS_SP_IMM7U: ++ /* Get immediate value and set back to operands[0]. */ ++ operands[1] = XEXP (code, 1); ++ output_asm_insn ("lwi37.sp\t%0, [ + (%1)]", operands); ++ break; ++ default: ++ break; ++ } ++ ++ return ""; ++} ++ ++/* Output 32-bit store. */ ++const char * ++nds32_output_32bit_store (rtx *operands, int byte) ++{ ++ char pattern[100]; ++ unsigned char size; ++ rtx code = XEXP (operands[0], 0); ++ ++ size = nds32_byte_to_size (byte); ++ ++ switch (GET_CODE (code)) ++ { ++ case REG: ++ /* (mem (reg X)) ++ => access location by using register, ++ use "sbi / shi / swi" */ ++ snprintf (pattern, sizeof (pattern), "s%ci\t%%1, %%0", size); ++ break; ++ ++ case SYMBOL_REF: ++ case CONST: ++ /* (mem (symbol_ref X)) ++ (mem (const (...))) ++ => access global variables, ++ use "sbi.gp / shi.gp / swi.gp" */ ++ operands[0] = XEXP (operands[0], 0); ++ snprintf (pattern, sizeof (pattern), "s%ci.gp\t%%1, [ + %%0]", size); ++ break; ++ ++ case POST_INC: ++ /* (mem (post_inc reg)) ++ => access location by using register which will be post increment, ++ use "sbi.bi / shi.bi / swi.bi" */ ++ snprintf (pattern, sizeof (pattern), ++ "s%ci.bi\t%%1, %%0, %d", size, byte); ++ break; ++ ++ case POST_DEC: ++ /* (mem (post_dec reg)) ++ => access location by using register which will be post decrement, ++ use "sbi.bi / shi.bi / swi.bi" */ ++ snprintf (pattern, sizeof (pattern), ++ "s%ci.bi\t%%1, %%0, -%d", size, byte); ++ break; ++ ++ case POST_MODIFY: ++ switch (GET_CODE (XEXP (XEXP (code, 1), 1))) ++ { ++ case REG: ++ case SUBREG: ++ /* (mem (post_modify (reg) (plus (reg) (reg)))) ++ => access location by using register which will be ++ post modified with reg, ++ use "sb.bi/ sh.bi / sw.bi" */ ++ snprintf (pattern, sizeof (pattern), "s%c.bi\t%%1, %%0", size); ++ break; ++ case CONST_INT: ++ /* (mem (post_modify (reg) (plus (reg) (const_int)))) ++ => access location by using register which will be ++ post modified with const_int, ++ use "sbi.bi/ shi.bi / swi.bi" */ ++ snprintf (pattern, sizeof (pattern), "s%ci.bi\t%%1, %%0", size); ++ break; ++ default: ++ abort (); ++ } ++ break; ++ ++ case PLUS: ++ switch (GET_CODE (XEXP (code, 1))) ++ { ++ case REG: ++ case SUBREG: ++ /* (mem (plus reg reg)) or (mem (plus (mult reg const_int) reg)) ++ => access location by adding two registers, ++ use "sb / sh / sw" */ ++ snprintf (pattern, sizeof (pattern), "s%c\t%%1, %%0", size); ++ break; ++ case CONST_INT: ++ /* (mem (plus reg const_int)) ++ => access location by adding one register with const_int, ++ use "sbi / shi / swi" */ ++ snprintf (pattern, sizeof (pattern), "s%ci\t%%1, %%0", size); ++ break; ++ default: ++ abort (); ++ } ++ break; ++ ++ case LO_SUM: ++ operands[2] = XEXP (code, 1); ++ operands[0] = XEXP (code, 0); ++ snprintf (pattern, sizeof (pattern), ++ "s%ci\t%%1, [%%0 + lo12(%%2)]", size); ++ break; ++ ++ default: ++ abort (); ++ } ++ ++ output_asm_insn (pattern, operands); ++ return ""; ++} ++ ++/* Output 32-bit load. */ ++const char * ++nds32_output_32bit_load (rtx *operands, int byte) ++{ ++ char pattern[100]; ++ unsigned char size; ++ rtx code; ++ ++ code = XEXP (operands[1], 0); ++ ++ size = nds32_byte_to_size (byte); ++ ++ switch (GET_CODE (code)) ++ { ++ case REG: ++ /* (mem (reg X)) ++ => access location by using register, ++ use "lbi / lhi / lwi" */ ++ snprintf (pattern, sizeof (pattern), "l%ci\t%%0, %%1", size); ++ break; ++ ++ case SYMBOL_REF: ++ case CONST: ++ /* (mem (symbol_ref X)) ++ (mem (const (...))) ++ => access global variables, ++ use "lbi.gp / lhi.gp / lwi.gp" */ ++ operands[1] = XEXP (operands[1], 0); ++ snprintf (pattern, sizeof (pattern), "l%ci.gp\t%%0, [ + %%1]", size); ++ break; ++ ++ case POST_INC: ++ /* (mem (post_inc reg)) ++ => access location by using register which will be post increment, ++ use "lbi.bi / lhi.bi / lwi.bi" */ ++ snprintf (pattern, sizeof (pattern), ++ "l%ci.bi\t%%0, %%1, %d", size, byte); ++ break; ++ ++ case POST_DEC: ++ /* (mem (post_dec reg)) ++ => access location by using register which will be post decrement, ++ use "lbi.bi / lhi.bi / lwi.bi" */ ++ snprintf (pattern, sizeof (pattern), ++ "l%ci.bi\t%%0, %%1, -%d", size, byte); ++ break; ++ ++ case POST_MODIFY: ++ switch (GET_CODE (XEXP (XEXP (code, 1), 1))) ++ { ++ case REG: ++ case SUBREG: ++ /* (mem (post_modify (reg) (plus (reg) (reg)))) ++ => access location by using register which will be ++ post modified with reg, ++ use "lb.bi/ lh.bi / lw.bi" */ ++ snprintf (pattern, sizeof (pattern), "l%c.bi\t%%0, %%1", size); ++ break; ++ case CONST_INT: ++ /* (mem (post_modify (reg) (plus (reg) (const_int)))) ++ => access location by using register which will be ++ post modified with const_int, ++ use "lbi.bi/ lhi.bi / lwi.bi" */ ++ snprintf (pattern, sizeof (pattern), "l%ci.bi\t%%0, %%1", size); ++ break; ++ default: ++ abort (); ++ } ++ break; ++ ++ case PLUS: ++ switch (GET_CODE (XEXP (code, 1))) ++ { ++ case REG: ++ case SUBREG: ++ /* (mem (plus reg reg)) or (mem (plus (mult reg const_int) reg)) ++ use "lb / lh / lw" */ ++ snprintf (pattern, sizeof (pattern), "l%c\t%%0, %%1", size); ++ break; ++ case CONST_INT: ++ /* (mem (plus reg const_int)) ++ => access location by adding one register with const_int, ++ use "lbi / lhi / lwi" */ ++ snprintf (pattern, sizeof (pattern), "l%ci\t%%0, %%1", size); ++ break; ++ default: ++ abort (); ++ } ++ break; ++ ++ case LO_SUM: ++ operands[2] = XEXP (code, 1); ++ operands[1] = XEXP (code, 0); ++ snprintf (pattern, sizeof (pattern), ++ "l%ci\t%%0, [%%1 + lo12(%%2)]", size); ++ break; ++ ++ default: ++ abort (); ++ } ++ ++ output_asm_insn (pattern, operands); ++ return ""; ++} ++ ++/* Output 32-bit load with signed extension. */ ++const char * ++nds32_output_32bit_load_se (rtx *operands, int byte) ++{ ++ char pattern[100]; ++ unsigned char size; ++ rtx code; ++ ++ code = XEXP (operands[1], 0); ++ ++ size = nds32_byte_to_size (byte); ++ ++ switch (GET_CODE (code)) ++ { ++ case REG: ++ /* (mem (reg X)) ++ => access location by using register, ++ use "lbsi / lhsi" */ ++ snprintf (pattern, sizeof (pattern), "l%csi\t%%0, %%1", size); ++ break; ++ ++ case SYMBOL_REF: ++ case CONST: ++ /* (mem (symbol_ref X)) ++ (mem (const (...))) ++ => access global variables, ++ use "lbsi.gp / lhsi.gp" */ ++ operands[1] = XEXP (operands[1], 0); ++ snprintf (pattern, sizeof (pattern), "l%csi.gp\t%%0, [ + %%1]", size); ++ break; ++ ++ case POST_INC: ++ /* (mem (post_inc reg)) ++ => access location by using register which will be post increment, ++ use "lbsi.bi / lhsi.bi" */ ++ snprintf (pattern, sizeof (pattern), ++ "l%csi.bi\t%%0, %%1, %d", size, byte); ++ break; ++ ++ case POST_DEC: ++ /* (mem (post_dec reg)) ++ => access location by using register which will be post decrement, ++ use "lbsi.bi / lhsi.bi" */ ++ snprintf (pattern, sizeof (pattern), ++ "l%csi.bi\t%%0, %%1, -%d", size, byte); ++ break; ++ ++ case POST_MODIFY: ++ switch (GET_CODE (XEXP (XEXP (code, 1), 1))) ++ { ++ case REG: ++ case SUBREG: ++ /* (mem (post_modify (reg) (plus (reg) (reg)))) ++ => access location by using register which will be ++ post modified with reg, ++ use "lbs.bi/ lhs.bi" */ ++ snprintf (pattern, sizeof (pattern), "l%cs.bi\t%%0, %%1", size); ++ break; ++ case CONST_INT: ++ /* (mem (post_modify (reg) (plus (reg) (const_int)))) ++ => access location by using register which will be ++ post modified with const_int, ++ use "lbsi.bi/ lhsi.bi" */ ++ snprintf (pattern, sizeof (pattern), "l%csi.bi\t%%0, %%1", size); ++ break; ++ default: ++ abort (); ++ } ++ break; ++ ++ case PLUS: ++ switch (GET_CODE (XEXP (code, 1))) ++ { ++ case REG: ++ case SUBREG: ++ /* (mem (plus reg reg)) or (mem (plus (mult reg const_int) reg)) ++ use "lbs / lhs" */ ++ snprintf (pattern, sizeof (pattern), "l%cs\t%%0, %%1", size); ++ break; ++ case CONST_INT: ++ /* (mem (plus reg const_int)) ++ => access location by adding one register with const_int, ++ use "lbsi / lhsi" */ ++ snprintf (pattern, sizeof (pattern), "l%csi\t%%0, %%1", size); ++ break; ++ default: ++ abort (); ++ } ++ break; ++ ++ case LO_SUM: ++ operands[2] = XEXP (code, 1); ++ operands[1] = XEXP (code, 0); ++ snprintf (pattern, sizeof (pattern), ++ "l%csi\t%%0, [%%1 + lo12(%%2)]", size); ++ break; ++ ++ default: ++ abort (); ++ } ++ ++ output_asm_insn (pattern, operands); ++ return ""; ++} ++ ++/* Function to output stack push operation. ++ We need to deal with normal stack push multiple or stack v3push. */ ++const char * ++nds32_output_stack_push (rtx par_rtx) ++{ ++ /* A string pattern for output_asm_insn(). */ ++ char pattern[100]; ++ /* The operands array which will be used in output_asm_insn(). */ ++ rtx operands[3]; ++ /* Pick up varargs first regno and last regno for further use. */ ++ int rb_va_args = cfun->machine->va_args_first_regno; ++ int re_va_args = cfun->machine->va_args_last_regno; ++ int last_argument_regno = NDS32_FIRST_GPR_REGNUM ++ + NDS32_MAX_GPR_REGS_FOR_ARGS ++ - 1; ++ /* Pick up first and last eh data regno for further use. */ ++ int rb_eh_data = cfun->machine->eh_return_data_first_regno; ++ int re_eh_data = cfun->machine->eh_return_data_last_regno; ++ int first_eh_data_regno = EH_RETURN_DATA_REGNO (0); ++ /* Pick up callee-saved first regno and last regno for further use. */ ++ int rb_callee_saved = cfun->machine->callee_saved_first_gpr_regno; ++ int re_callee_saved = cfun->machine->callee_saved_last_gpr_regno; ++ ++ /* First we need to check if we are pushing argument registers not used ++ for the named arguments. If so, we have to create 'smw.adm' (push.s) ++ instruction. */ ++ if (reg_mentioned_p (gen_rtx_REG (SImode, last_argument_regno), par_rtx)) ++ { ++ /* Set operands[0] and operands[1]. */ ++ operands[0] = gen_rtx_REG (SImode, rb_va_args); ++ operands[1] = gen_rtx_REG (SImode, re_va_args); ++ /* Create assembly code pattern: "Rb, Re, { }". */ ++ snprintf (pattern, sizeof (pattern), "push.s\t%s", "%0, %1, { }"); ++ /* We use output_asm_insn() to output assembly code by ourself. */ ++ output_asm_insn (pattern, operands); ++ return ""; ++ } ++ ++ /* If last_argument_regno is not mentioned in par_rtx, we can confirm that ++ we do not need to push argument registers for variadic function. ++ But we still need to check if we need to push exception handling ++ data registers. */ ++ if (reg_mentioned_p (gen_rtx_REG (SImode, first_eh_data_regno), par_rtx)) ++ { ++ /* Set operands[0] and operands[1]. */ ++ operands[0] = gen_rtx_REG (SImode, rb_eh_data); ++ operands[1] = gen_rtx_REG (SImode, re_eh_data); ++ /* Create assembly code pattern: "Rb, Re, { }". */ ++ snprintf (pattern, sizeof (pattern), "push.s\t%s", "%0, %1, { }"); ++ /* We use output_asm_insn() to output assembly code by ourself. */ ++ output_asm_insn (pattern, operands); ++ return ""; ++ } ++ ++ /* If we step here, we are going to do v3push or multiple push operation. */ ++ ++ /* Refer to nds32.h, where we comment when push25/pop25 are available. */ ++ if (NDS32_V3PUSH_AVAILABLE_P) ++ { ++ /* For stack v3push: ++ operands[0]: Re ++ operands[1]: imm8u */ ++ ++ /* This variable is to check if 'push25 Re,imm8u' is available. */ ++ int sp_adjust; ++ ++ /* Set operands[0]. */ ++ operands[0] = gen_rtx_REG (SImode, re_callee_saved); ++ ++ /* Check if we can generate 'push25 Re,imm8u', ++ otherwise, generate 'push25 Re,0'. */ ++ sp_adjust = cfun->machine->local_size ++ + cfun->machine->out_args_size ++ + cfun->machine->callee_saved_area_gpr_padding_bytes ++ + cfun->machine->callee_saved_fpr_regs_size; ++ if (satisfies_constraint_Iu08 (GEN_INT (sp_adjust)) ++ && NDS32_DOUBLE_WORD_ALIGN_P (sp_adjust)) ++ operands[1] = GEN_INT (sp_adjust); ++ else ++ { ++ /* Allocate callee saved fpr space. */ ++ if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM) ++ { ++ sp_adjust = cfun->machine->callee_saved_area_gpr_padding_bytes ++ + cfun->machine->callee_saved_fpr_regs_size; ++ operands[1] = GEN_INT (sp_adjust); ++ } ++ else ++ { ++ operands[1] = GEN_INT (0); ++ } ++ } ++ ++ /* Create assembly code pattern. */ ++ snprintf (pattern, sizeof (pattern), "push25\t%%0, %%1"); ++ } ++ else ++ { ++ /* For normal stack push multiple: ++ operands[0]: Rb ++ operands[1]: Re ++ operands[2]: En4 */ ++ ++ /* This variable is used to check if we only need to generate En4 field. ++ As long as Rb==Re=SP_REGNUM, we set this variable to 1. */ ++ int push_en4_only_p = 0; ++ ++ /* Set operands[0] and operands[1]. */ ++ operands[0] = gen_rtx_REG (SImode, rb_callee_saved); ++ operands[1] = gen_rtx_REG (SImode, re_callee_saved); ++ ++ /* 'smw.adm $sp,[$sp],$sp,0' means push nothing. */ ++ if (!cfun->machine->fp_size ++ && !cfun->machine->gp_size ++ && !cfun->machine->lp_size ++ && REGNO (operands[0]) == SP_REGNUM ++ && REGNO (operands[1]) == SP_REGNUM) ++ { ++ /* No need to generate instruction. */ ++ return ""; ++ } ++ else ++ { ++ /* If Rb==Re=SP_REGNUM, we only need to generate En4 field. */ ++ if (REGNO (operands[0]) == SP_REGNUM ++ && REGNO (operands[1]) == SP_REGNUM) ++ push_en4_only_p = 1; ++ ++ /* Create assembly code pattern. ++ We need to handle the form: "Rb, Re, { $fp $gp $lp }". */ ++ snprintf (pattern, sizeof (pattern), ++ "push.s\t%s{%s%s%s }", ++ push_en4_only_p ? "" : "%0, %1, ", ++ cfun->machine->fp_size ? " $fp" : "", ++ cfun->machine->gp_size ? " $gp" : "", ++ cfun->machine->lp_size ? " $lp" : ""); ++ } ++ } ++ ++ /* We use output_asm_insn() to output assembly code by ourself. */ ++ output_asm_insn (pattern, operands); ++ return ""; ++} ++ ++/* Function to output stack pop operation. ++ We need to deal with normal stack pop multiple or stack v3pop. */ ++const char * ++nds32_output_stack_pop (rtx par_rtx ATTRIBUTE_UNUSED) ++{ ++ /* A string pattern for output_asm_insn(). */ ++ char pattern[100]; ++ /* The operands array which will be used in output_asm_insn(). */ ++ rtx operands[3]; ++ /* Pick up first and last eh data regno for further use. */ ++ int rb_eh_data = cfun->machine->eh_return_data_first_regno; ++ int re_eh_data = cfun->machine->eh_return_data_last_regno; ++ int first_eh_data_regno = EH_RETURN_DATA_REGNO (0); ++ /* Pick up callee-saved first regno and last regno for further use. */ ++ int rb_callee_saved = cfun->machine->callee_saved_first_gpr_regno; ++ int re_callee_saved = cfun->machine->callee_saved_last_gpr_regno; ++ ++ /* We need to check if we need to push exception handling ++ data registers. */ ++ if (reg_mentioned_p (gen_rtx_REG (SImode, first_eh_data_regno), par_rtx)) ++ { ++ /* Set operands[0] and operands[1]. */ ++ operands[0] = gen_rtx_REG (SImode, rb_eh_data); ++ operands[1] = gen_rtx_REG (SImode, re_eh_data); ++ /* Create assembly code pattern: "Rb, Re, { }". */ ++ snprintf (pattern, sizeof (pattern), "pop.s\t%s", "%0, %1, { }"); ++ /* We use output_asm_insn() to output assembly code by ourself. */ ++ output_asm_insn (pattern, operands); ++ return ""; ++ } ++ ++ /* If we step here, we are going to do v3pop or multiple pop operation. */ ++ ++ /* Refer to nds32.h, where we comment when push25/pop25 are available. */ ++ if (NDS32_V3PUSH_AVAILABLE_P) ++ { ++ /* For stack v3pop: ++ operands[0]: Re ++ operands[1]: imm8u */ ++ ++ /* This variable is to check if 'pop25 Re,imm8u' is available. */ ++ int sp_adjust; ++ ++ /* Set operands[0]. */ ++ operands[0] = gen_rtx_REG (SImode, re_callee_saved); ++ ++ /* Check if we can generate 'pop25 Re,imm8u', ++ otherwise, generate 'pop25 Re,0'. ++ We have to consider alloca issue as well. ++ If the function does call alloca(), the stack pointer is not fixed. ++ In that case, we cannot use 'pop25 Re,imm8u' directly. ++ We have to caculate stack pointer from frame pointer ++ and then use 'pop25 Re,0'. */ ++ sp_adjust = cfun->machine->local_size ++ + cfun->machine->out_args_size ++ + cfun->machine->callee_saved_area_gpr_padding_bytes ++ + cfun->machine->callee_saved_fpr_regs_size; ++ if (satisfies_constraint_Iu08 (GEN_INT (sp_adjust)) ++ && NDS32_DOUBLE_WORD_ALIGN_P (sp_adjust) ++ && !cfun->calls_alloca) ++ operands[1] = GEN_INT (sp_adjust); ++ else ++ { ++ if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM) ++ { ++ /* If has fpr need to restore, the $sp on callee saved fpr ++ position, so we need to consider gpr pading bytes and ++ callee saved fpr size. */ ++ sp_adjust = cfun->machine->callee_saved_area_gpr_padding_bytes ++ + cfun->machine->callee_saved_fpr_regs_size; ++ operands[1] = GEN_INT (sp_adjust); ++ } ++ else ++ { ++ operands[1] = GEN_INT (0); ++ } ++ } ++ ++ /* Create assembly code pattern. */ ++ snprintf (pattern, sizeof (pattern), "pop25\t%%0, %%1"); ++ } ++ else ++ { ++ /* For normal stack pop multiple: ++ operands[0]: Rb ++ operands[1]: Re ++ operands[2]: En4 */ ++ ++ /* This variable is used to check if we only need to generate En4 field. ++ As long as Rb==Re=SP_REGNUM, we set this variable to 1. */ ++ int pop_en4_only_p = 0; ++ ++ /* Set operands[0] and operands[1]. */ ++ operands[0] = gen_rtx_REG (SImode, rb_callee_saved); ++ operands[1] = gen_rtx_REG (SImode, re_callee_saved); ++ ++ /* 'lmw.bim $sp,[$sp],$sp,0' means pop nothing. */ ++ if (!cfun->machine->fp_size ++ && !cfun->machine->gp_size ++ && !cfun->machine->lp_size ++ && REGNO (operands[0]) == SP_REGNUM ++ && REGNO (operands[1]) == SP_REGNUM) ++ { ++ /* No need to generate instruction. */ ++ return ""; ++ } ++ else ++ { ++ /* If Rb==Re=SP_REGNUM, we only need to generate En4 field. */ ++ if (REGNO (operands[0]) == SP_REGNUM ++ && REGNO (operands[1]) == SP_REGNUM) ++ pop_en4_only_p = 1; ++ ++ /* Create assembly code pattern. ++ We need to handle the form: "Rb, Re, { $fp $gp $lp }". */ ++ snprintf (pattern, sizeof (pattern), ++ "pop.s\t%s{%s%s%s }", ++ pop_en4_only_p ? "" : "%0, %1, ", ++ cfun->machine->fp_size ? " $fp" : "", ++ cfun->machine->gp_size ? " $gp" : "", ++ cfun->machine->lp_size ? " $lp" : ""); ++ } ++ } ++ ++ /* We use output_asm_insn() to output assembly code by ourself. */ ++ output_asm_insn (pattern, operands); ++ return ""; ++} ++ ++/* Function to output return operation. */ ++const char * ++nds32_output_return (void) ++{ ++ /* A string pattern for output_asm_insn(). */ ++ char pattern[100]; ++ /* The operands array which will be used in output_asm_insn(). */ ++ rtx operands[2]; ++ /* For stack v3pop: ++ operands[0]: Re ++ operands[1]: imm8u */ ++ int re_callee_saved = cfun->machine->callee_saved_last_gpr_regno; ++ int sp_adjust; ++ ++ /* Set operands[0]. */ ++ operands[0] = gen_rtx_REG (SImode, re_callee_saved); ++ ++ /* Check if we can generate 'pop25 Re,imm8u', ++ otherwise, generate 'pop25 Re,0'. ++ We have to consider alloca issue as well. ++ If the function does call alloca(), the stack pointer is not fixed. ++ In that case, we cannot use 'pop25 Re,imm8u' directly. ++ We have to caculate stack pointer from frame pointer ++ and then use 'pop25 Re,0'. */ ++ sp_adjust = cfun->machine->local_size ++ + cfun->machine->out_args_size ++ + cfun->machine->callee_saved_area_gpr_padding_bytes ++ + cfun->machine->callee_saved_fpr_regs_size; ++ if (satisfies_constraint_Iu08 (GEN_INT (sp_adjust)) ++ && NDS32_DOUBLE_WORD_ALIGN_P (sp_adjust) ++ && !cfun->calls_alloca) ++ operands[1] = GEN_INT (sp_adjust); ++ else ++ operands[1] = GEN_INT (0); ++ ++ /* Create assembly code pattern. */ ++ snprintf (pattern, sizeof (pattern), "pop25\t%%0, %%1"); ++ /* We use output_asm_insn() to output assembly code by ourself. */ ++ output_asm_insn (pattern, operands); ++ return ""; ++} ++ ++ ++/* output a float load instruction */ ++const char * ++nds32_output_float_load (rtx *operands) ++{ ++ char buff[100]; ++ const char *pattern; ++ rtx addr, addr_op0, addr_op1; ++ int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8; ++ addr = XEXP (operands[1], 0); ++ switch (GET_CODE (addr)) ++ { ++ case REG: ++ pattern = "fl%ci\t%%0, %%1"; ++ break; ++ ++ case PLUS: ++ addr_op0 = XEXP (addr, 0); ++ addr_op1 = XEXP (addr, 1); ++ ++ if (REG_P (addr_op0) && REG_P (addr_op1)) ++ pattern = "fl%c\t%%0, %%1"; ++ else if (REG_P (addr_op0) && CONST_INT_P (addr_op1)) ++ pattern = "fl%ci\t%%0, %%1"; ++ else if (GET_CODE (addr_op0) == MULT && REG_P (addr_op1) ++ && REG_P (XEXP (addr_op0, 0)) ++ && CONST_INT_P (XEXP (addr_op0, 1))) ++ pattern = "fl%c\t%%0, %%1"; ++ else ++ gcc_unreachable (); ++ break; ++ ++ case POST_MODIFY: ++ addr_op0 = XEXP (addr, 0); ++ addr_op1 = XEXP (addr, 1); ++ ++ if (REG_P (addr_op0) && GET_CODE (addr_op1) == PLUS ++ && REG_P (XEXP (addr_op1, 1))) ++ pattern = "fl%c.bi\t%%0, %%1"; ++ else if (REG_P (addr_op0) && GET_CODE (addr_op1) == PLUS ++ && CONST_INT_P (XEXP (addr_op1, 1))) ++ pattern = "fl%ci.bi\t%%0, %%1"; ++ else ++ gcc_unreachable (); ++ break; ++ ++ case POST_INC: ++ if (REG_P (XEXP (addr, 0))) ++ { ++ if (dp) ++ pattern = "fl%ci.bi\t%%0, %%1, 8"; ++ else ++ pattern = "fl%ci.bi\t%%0, %%1, 4"; ++ } ++ else ++ gcc_unreachable (); ++ break; ++ ++ case POST_DEC: ++ if (REG_P (XEXP (addr, 0))) ++ { ++ if (dp) ++ pattern = "fl%ci.bi\t%%0, %%1, -8"; ++ else ++ pattern = "fl%ci.bi\t%%0, %%1, -4"; ++ } ++ else ++ gcc_unreachable (); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ sprintf (buff, pattern, dp ? 'd' : 's'); ++ output_asm_insn (buff, operands); ++ return ""; ++} ++ ++/* output a float store instruction */ ++const char * ++nds32_output_float_store (rtx *operands) ++{ ++ char buff[100]; ++ const char *pattern; ++ rtx addr, addr_op0, addr_op1; ++ int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8; ++ addr = XEXP (operands[0], 0); ++ switch (GET_CODE (addr)) ++ { ++ case REG: ++ pattern = "fs%ci\t%%1, %%0"; ++ break; ++ ++ case PLUS: ++ addr_op0 = XEXP (addr, 0); ++ addr_op1 = XEXP (addr, 1); ++ ++ if (REG_P (addr_op0) && REG_P (addr_op1)) ++ pattern = "fs%c\t%%1, %%0"; ++ else if (REG_P (addr_op0) && CONST_INT_P (addr_op1)) ++ pattern = "fs%ci\t%%1, %%0"; ++ else if (GET_CODE (addr_op0) == MULT && REG_P (addr_op1) ++ && REG_P (XEXP (addr_op0, 0)) ++ && CONST_INT_P (XEXP (addr_op0, 1))) ++ pattern = "fs%c\t%%1, %%0"; ++ else ++ gcc_unreachable (); ++ break; ++ ++ case POST_MODIFY: ++ addr_op0 = XEXP (addr, 0); ++ addr_op1 = XEXP (addr, 1); ++ ++ if (REG_P (addr_op0) && GET_CODE (addr_op1) == PLUS ++ && REG_P (XEXP (addr_op1, 1))) ++ pattern = "fs%c.bi\t%%1, %%0"; ++ else if (REG_P (addr_op0) && GET_CODE (addr_op1) == PLUS ++ && CONST_INT_P (XEXP (addr_op1, 1))) ++ pattern = "fs%ci.bi\t%%1, %%0"; ++ else ++ gcc_unreachable (); ++ break; ++ ++ case POST_INC: ++ if (REG_P (XEXP (addr, 0))) ++ { ++ if (dp) ++ pattern = "fs%ci.bi\t%%1, %%0, 8"; ++ else ++ pattern = "fs%ci.bi\t%%1, %%0, 4"; ++ } ++ else ++ gcc_unreachable (); ++ break; ++ ++ case POST_DEC: ++ if (REG_P (XEXP (addr, 0))) ++ { ++ if (dp) ++ pattern = "fs%ci.bi\t%%1, %%0, -8"; ++ else ++ pattern = "fs%ci.bi\t%%1, %%0, -4"; ++ } ++ else ++ gcc_unreachable (); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ sprintf (buff, pattern, dp ? 'd' : 's'); ++ output_asm_insn (buff, operands); ++ return ""; ++} ++ ++const char * ++nds32_output_smw_single_word (rtx *operands) ++{ ++ char buff[100]; ++ unsigned regno; ++ int enable4; ++ bool update_base_p; ++ rtx base_addr = operands[0]; ++ rtx base_reg; ++ rtx otherops[2]; ++ ++ if (REG_P (XEXP (base_addr, 0))) ++ { ++ update_base_p = false; ++ base_reg = XEXP (base_addr, 0); ++ } ++ else ++ { ++ update_base_p = true; ++ base_reg = XEXP (XEXP (base_addr, 0), 0); ++ } ++ ++ const char *update_base = update_base_p ? "m" : ""; ++ ++ regno = REGNO (operands[1]); ++ ++ otherops[0] = base_reg; ++ otherops[1] = operands[1]; ++ ++ if (regno >= 28) ++ { ++ enable4 = nds32_regno_to_enable4 (regno); ++ sprintf (buff, "smw.bi%s\t$sp, [%%0], $sp, %x", update_base, enable4); ++ } ++ else ++ { ++ sprintf (buff, "smw.bi%s\t%%1, [%%0], %%1", update_base); ++ } ++ output_asm_insn (buff, otherops); ++ return ""; ++} ++ ++const char * ++nds32_output_smw_double_word (rtx *operands) ++{ ++ char buff[100]; ++ unsigned regno; ++ int enable4; ++ bool update_base_p; ++ rtx base_addr = operands[0]; ++ rtx base_reg; ++ rtx otherops[3]; ++ ++ if (REG_P (XEXP (base_addr, 0))) ++ { ++ update_base_p = false; ++ base_reg = XEXP (base_addr, 0); ++ } ++ else ++ { ++ update_base_p = true; ++ base_reg = XEXP (XEXP (base_addr, 0), 0); ++ } ++ ++ const char *update_base = update_base_p ? "m" : ""; ++ ++ regno = REGNO (operands[1]); ++ ++ otherops[0] = base_reg; ++ otherops[1] = operands[1]; ++ otherops[2] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);; ++ ++ if (regno >= 28) ++ { ++ enable4 = nds32_regno_to_enable4 (regno) ++ | nds32_regno_to_enable4 (regno + 1); ++ sprintf (buff, "smw.bi%s\t$sp, [%%0], $sp, %x", update_base, enable4); ++ } ++ else if (regno == 27) ++ { ++ enable4 = nds32_regno_to_enable4 (regno + 1); ++ sprintf (buff, "smw.bi%s\t%%1, [%%0], %%1, %x", update_base, enable4); ++ } ++ else ++ { ++ sprintf (buff, "smw.bi%s\t%%1, [%%0], %%2", update_base); ++ } ++ output_asm_insn (buff, otherops); ++ return ""; ++} ++ ++ ++const char * ++nds32_output_lmw_single_word (rtx *operands) ++{ ++ char buff[100]; ++ unsigned regno; ++ bool update_base_p; ++ int enable4; ++ rtx base_addr = operands[1]; ++ rtx base_reg; ++ rtx otherops[2]; ++ ++ if (REG_P (XEXP (base_addr, 0))) ++ { ++ update_base_p = false; ++ base_reg = XEXP (base_addr, 0); ++ } ++ else ++ { ++ update_base_p = true; ++ base_reg = XEXP (XEXP (base_addr, 0), 0); ++ } ++ ++ const char *update_base = update_base_p ? "m" : ""; ++ ++ regno = REGNO (operands[0]); ++ ++ otherops[0] = operands[0]; ++ otherops[1] = base_reg; ++ ++ if (regno >= 28) ++ { ++ enable4 = nds32_regno_to_enable4 (regno); ++ sprintf (buff, "lmw.bi%s\t$sp, [%%1], $sp, %x", update_base, enable4); ++ } ++ else ++ { ++ sprintf (buff, "lmw.bi%s\t%%0, [%%1], %%0", update_base); ++ } ++ output_asm_insn (buff, otherops); ++ return ""; ++} ++ ++void ++nds32_expand_unaligned_load (rtx *operands, enum machine_mode mode) ++{ ++ /* Initial memory offset. */ ++ int offset = WORDS_BIG_ENDIAN ? GET_MODE_SIZE (mode) - 1 : 0; ++ int offset_adj = WORDS_BIG_ENDIAN ? -1 : 1; ++ /* Initial register shift byte. */ ++ int shift = 0; ++ /* The first load byte instruction is not the same. */ ++ int width = GET_MODE_SIZE (mode) - 1; ++ rtx mem[2]; ++ rtx reg[2]; ++ rtx sub_reg; ++ rtx temp_reg, temp_sub_reg; ++ int num_reg; ++ ++ /* Generating a series of load byte instructions. ++ The first load byte instructions and other ++ load byte instructions are not the same. like: ++ First: ++ lbi reg0, [mem] ++ zeh reg0, reg0 ++ Second: ++ lbi temp_reg, [mem + offset] ++ sll temp_reg, (8 * shift) ++ ior reg0, temp_reg ++ ++ lbi temp_reg, [mem + (offset + 1)] ++ sll temp_reg, (8 * (shift + 1)) ++ ior reg0, temp_reg */ ++ ++ temp_reg = gen_reg_rtx (SImode); ++ temp_sub_reg = gen_lowpart (QImode, temp_reg); ++ ++ if (mode == DImode) ++ { ++ /* Load doubleword, we need two registers to access. */ ++ reg[0] = nds32_di_low_part_subreg (operands[0]); ++ reg[1] = nds32_di_high_part_subreg (operands[0]); ++ /* A register only store 4 byte. */ ++ width = GET_MODE_SIZE (SImode) - 1; ++ } ++ else ++ { ++ if (VECTOR_MODE_P (mode)) ++ reg[0] = gen_reg_rtx (SImode); ++ else ++ reg[0] = operands[0]; ++ } ++ ++ for (num_reg = (mode == DImode) ? 2 : 1; num_reg > 0; num_reg--) ++ { ++ sub_reg = gen_lowpart (QImode, reg[0]); ++ mem[0] = gen_rtx_MEM (QImode, plus_constant (Pmode, operands[1], offset)); ++ ++ /* Generating the first part instructions. ++ lbi reg0, [mem] ++ zeh reg0, reg0 */ ++ emit_move_insn (sub_reg, mem[0]); ++ emit_insn (gen_zero_extendqisi2 (reg[0], sub_reg)); ++ ++ while (width > 0) ++ { ++ offset = offset + offset_adj; ++ shift++; ++ width--; ++ ++ mem[1] = gen_rtx_MEM (QImode, plus_constant (Pmode, ++ operands[1], ++ offset)); ++ /* Generating the second part instructions. ++ lbi temp_reg, [mem + offset] ++ sll temp_reg, (8 * shift) ++ ior reg0, temp_reg */ ++ emit_move_insn (temp_sub_reg, mem[1]); ++ emit_insn (gen_ashlsi3 (temp_reg, temp_reg, ++ GEN_INT (shift * 8))); ++ emit_insn (gen_iorsi3 (reg[0], reg[0], temp_reg)); ++ } ++ ++ if (mode == DImode) ++ { ++ /* Using the second register to load memory information. */ ++ reg[0] = reg[1]; ++ shift = 0; ++ width = GET_MODE_SIZE (SImode) - 1; ++ offset = offset + offset_adj; ++ } ++ } ++ if (VECTOR_MODE_P (mode)) ++ convert_move (operands[0], reg[0], false); ++} ++ ++void ++nds32_expand_unaligned_store (rtx *operands, enum machine_mode mode) ++{ ++ /* Initial memory offset. */ ++ int offset = WORDS_BIG_ENDIAN ? GET_MODE_SIZE (mode) - 1 : 0; ++ int offset_adj = WORDS_BIG_ENDIAN ? -1 : 1; ++ /* Initial register shift byte. */ ++ int shift = 0; ++ /* The first load byte instruction is not the same. */ ++ int width = GET_MODE_SIZE (mode) - 1; ++ rtx mem[2]; ++ rtx reg[2]; ++ rtx sub_reg; ++ rtx temp_reg, temp_sub_reg; ++ int num_reg; ++ ++ /* Generating a series of store byte instructions. ++ The first store byte instructions and other ++ load byte instructions are not the same. like: ++ First: ++ sbi reg0, [mem + 0] ++ Second: ++ srli temp_reg, reg0, (8 * shift) ++ sbi temp_reg, [mem + offset] */ ++ ++ temp_reg = gen_reg_rtx (SImode); ++ temp_sub_reg = gen_lowpart (QImode, temp_reg); ++ ++ if (mode == DImode) ++ { ++ /* Load doubleword, we need two registers to access. */ ++ reg[0] = nds32_di_low_part_subreg (operands[1]); ++ reg[1] = nds32_di_high_part_subreg (operands[1]); ++ /* A register only store 4 byte. */ ++ width = GET_MODE_SIZE (SImode) - 1; ++ } ++ else ++ { ++ if (VECTOR_MODE_P (mode)) ++ { ++ reg[0] = gen_reg_rtx (SImode); ++ convert_move (reg[0], operands[1], false); ++ } ++ else ++ reg[0] = operands[1]; ++ } ++ ++ for (num_reg = (mode == DImode) ? 2 : 1; num_reg > 0; num_reg--) ++ { ++ sub_reg = gen_lowpart (QImode, reg[0]); ++ mem[0] = gen_rtx_MEM (QImode, plus_constant (Pmode, operands[0], offset)); ++ ++ /* Generating the first part instructions. ++ sbi reg0, [mem + 0] */ ++ emit_move_insn (mem[0], sub_reg); ++ ++ while (width > 0) ++ { ++ offset = offset + offset_adj; ++ shift++; ++ width--; ++ ++ mem[1] = gen_rtx_MEM (QImode, plus_constant (Pmode, ++ operands[0], ++ offset)); ++ /* Generating the second part instructions. ++ srli temp_reg, reg0, (8 * shift) ++ sbi temp_reg, [mem + offset] */ ++ emit_insn (gen_lshrsi3 (temp_reg, reg[0], ++ GEN_INT (shift * 8))); ++ emit_move_insn (mem[1], temp_sub_reg); ++ } ++ ++ if (mode == DImode) ++ { ++ /* Using the second register to load memory information. */ ++ reg[0] = reg[1]; ++ shift = 0; ++ width = GET_MODE_SIZE (SImode) - 1; ++ offset = offset + offset_adj; ++ } ++ } ++} ++ ++/* Using multiple load/store instruction to output doubleword instruction. */ ++const char * ++nds32_output_double (rtx *operands, bool load_p) ++{ ++ char pattern[100]; ++ int reg = load_p ? 0 : 1; ++ int mem = load_p ? 1 : 0; ++ rtx otherops[3]; ++ rtx addr = XEXP (operands[mem], 0); ++ ++ otherops[0] = gen_rtx_REG (SImode, REGNO (operands[reg])); ++ otherops[1] = gen_rtx_REG (SImode, REGNO (operands[reg]) + 1); ++ ++ if (GET_CODE (addr) == POST_INC) ++ { ++ /* (mem (post_inc (reg))) */ ++ otherops[2] = XEXP (addr, 0); ++ snprintf (pattern, sizeof (pattern), ++ "%cmw.bim\t%%0, [%%2], %%1, 0", load_p ? 'l' : 's'); ++ } ++ else ++ { ++ /* (mem (reg)) */ ++ otherops[2] = addr; ++ snprintf (pattern, sizeof (pattern), ++ "%cmw.bi\t%%0, [%%2], %%1, 0", load_p ? 'l' : 's'); ++ ++ } ++ ++ output_asm_insn (pattern, otherops); ++ return ""; ++} ++ ++const char * ++nds32_output_cbranchsi4_equality_zero (rtx_insn *insn, rtx *operands) ++{ ++ enum rtx_code code; ++ bool long_jump_p = false; ++ ++ code = GET_CODE (operands[0]); ++ ++ /* This zero-comparison conditional branch has two forms: ++ 32-bit instruction => beqz/bnez imm16s << 1 ++ 16-bit instruction => beqzs8/bnezs8/beqz38/bnez38 imm8s << 1 ++ ++ For 32-bit case, ++ we assume it is always reachable. (but check range -65500 ~ 65500) ++ ++ For 16-bit case, ++ it must satisfy { 255 >= (label - pc) >= -256 } condition. ++ However, since the $pc for nds32 is at the beginning of the instruction, ++ we should leave some length space for current insn. ++ So we use range -250 ~ 250. */ ++ ++ switch (get_attr_length (insn)) ++ { ++ case 8: ++ long_jump_p = true; ++ /* fall through */ ++ case 2: ++ if (which_alternative == 0) ++ { ++ /* constraint: t */ ++ /* b<cond>zs8 .L0 ++ or ++ b<inverse_cond>zs8 .LCB0 ++ j .L0 ++ .LCB0: ++ */ ++ output_cond_branch_compare_zero (code, "s8", long_jump_p, ++ operands, true); ++ return ""; ++ } ++ else if (which_alternative == 1) ++ { ++ /* constraint: l */ ++ /* b<cond>z38 $r0, .L0 ++ or ++ b<inverse_cond>z38 $r0, .LCB0 ++ j .L0 ++ .LCB0: ++ */ ++ output_cond_branch_compare_zero (code, "38", long_jump_p, ++ operands, false); ++ return ""; ++ } ++ else ++ { ++ /* constraint: r */ ++ /* For which_alternative==2, it should not be here. */ ++ gcc_unreachable (); ++ } ++ case 10: ++ /* including constraints: t, l, and r */ ++ long_jump_p = true; ++ /* fall through */ ++ case 4: ++ /* including constraints: t, l, and r */ ++ output_cond_branch_compare_zero (code, "", long_jump_p, operands, false); ++ return ""; ++ ++ default: ++ gcc_unreachable (); ++ } ++} ++ ++const char * ++nds32_output_cbranchsi4_equality_reg (rtx_insn *insn, rtx *operands) ++{ ++ enum rtx_code code; ++ bool long_jump_p, r5_p; ++ int insn_length; ++ ++ insn_length = get_attr_length (insn); ++ ++ long_jump_p = (insn_length == 10 || insn_length == 8) ? true : false; ++ r5_p = (insn_length == 2 || insn_length == 8) ? true : false; ++ ++ code = GET_CODE (operands[0]); ++ ++ /* This register-comparison conditional branch has one form: ++ 32-bit instruction => beq/bne imm14s << 1 ++ ++ For 32-bit case, ++ we assume it is always reachable. (but check range -16350 ~ 16350). */ ++ ++ switch (code) ++ { ++ case EQ: ++ case NE: ++ output_cond_branch (code, "", r5_p, long_jump_p, operands); ++ return ""; ++ ++ default: ++ gcc_unreachable (); ++ } ++} ++ ++const char * ++nds32_output_cbranchsi4_equality_reg_or_const_int (rtx_insn *insn, ++ rtx *operands) ++{ ++ enum rtx_code code; ++ bool long_jump_p, r5_p; ++ int insn_length; ++ ++ insn_length = get_attr_length (insn); ++ ++ long_jump_p = (insn_length == 10 || insn_length == 8) ? true : false; ++ r5_p = (insn_length == 2 || insn_length == 8) ? true : false; ++ ++ code = GET_CODE (operands[0]); ++ ++ /* This register-comparison conditional branch has one form: ++ 32-bit instruction => beq/bne imm14s << 1 ++ 32-bit instruction => beqc/bnec imm8s << 1 ++ ++ For 32-bit case, we assume it is always reachable. ++ (but check range -16350 ~ 16350 and -250 ~ 250). */ ++ ++ switch (code) ++ { ++ case EQ: ++ case NE: ++ if (which_alternative == 2) ++ { ++ /* r, Is11 */ ++ /* b<cond>c */ ++ output_cond_branch (code, "c", r5_p, long_jump_p, operands); ++ } ++ else ++ { ++ /* r, r */ ++ /* v, r */ ++ output_cond_branch (code, "", r5_p, long_jump_p, operands); ++ } ++ return ""; ++ default: ++ gcc_unreachable (); ++ } ++} ++ ++const char * ++nds32_output_cbranchsi4_greater_less_zero (rtx_insn *insn, rtx *operands) ++{ ++ enum rtx_code code; ++ bool long_jump_p; ++ int insn_length; ++ ++ insn_length = get_attr_length (insn); ++ ++ gcc_assert (insn_length == 4 || insn_length == 10); ++ ++ long_jump_p = (insn_length == 10) ? true : false; ++ ++ code = GET_CODE (operands[0]); ++ ++ /* This zero-greater-less-comparison conditional branch has one form: ++ 32-bit instruction => bgtz/bgez/bltz/blez imm16s << 1 ++ ++ For 32-bit case, we assume it is always reachable. ++ (but check range -65500 ~ 65500). */ ++ ++ switch (code) ++ { ++ case GT: ++ case GE: ++ case LT: ++ case LE: ++ output_cond_branch_compare_zero (code, "", long_jump_p, operands, false); ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ return ""; ++} ++ ++const char * ++nds32_output_unpkd8 (rtx output, rtx input, ++ rtx high_idx_rtx, rtx low_idx_rtx, ++ bool signed_p) ++{ ++ char pattern[100]; ++ rtx output_operands[2]; ++ HOST_WIDE_INT high_idx, low_idx; ++ high_idx = INTVAL (high_idx_rtx); ++ low_idx = INTVAL (low_idx_rtx); ++ ++ gcc_assert (high_idx >= 0 && high_idx <= 3); ++ gcc_assert (low_idx >= 0 && low_idx <= 3); ++ ++ /* We only have 10, 20, 30 and 31. */ ++ if ((low_idx != 0 || high_idx == 0) && ++ !(low_idx == 1 && high_idx == 3)) ++ return "#"; ++ ++ char sign_char = signed_p ? 's' : 'z'; ++ ++ sprintf (pattern, ++ "%cunpkd8" HOST_WIDE_INT_PRINT_DEC HOST_WIDE_INT_PRINT_DEC "\t%%0, %%1", ++ sign_char, high_idx, low_idx); ++ output_operands[0] = output; ++ output_operands[1] = input; ++ output_asm_insn (pattern, output_operands); ++ return ""; ++} ++ ++/* Return true if SYMBOL_REF X binds locally. */ ++ ++static bool ++nds32_symbol_binds_local_p (const_rtx x) ++{ ++ return (SYMBOL_REF_DECL (x) ++ ? targetm.binds_local_p (SYMBOL_REF_DECL (x)) ++ : SYMBOL_REF_LOCAL_P (x)); ++} ++ ++const char * ++nds32_output_call (rtx insn, rtx *operands, rtx symbol, const char *long_call, ++ const char *call, bool align_p) ++{ ++ char pattern[100]; ++ bool noreturn_p; ++ ++ if (nds32_long_call_p (symbol)) ++ strcpy (pattern, long_call); ++ else ++ strcpy (pattern, call); ++ ++ if (flag_pic && CONSTANT_P (symbol) ++ && !nds32_symbol_binds_local_p (symbol)) ++ strcat (pattern, "@PLT"); ++ ++ if (align_p) ++ strcat (pattern, "\n\t.align 2"); ++ ++ noreturn_p = find_reg_note (insn, REG_NORETURN, NULL_RTX) != NULL_RTX; ++ ++ if (noreturn_p) ++ { ++ if (TARGET_16_BIT) ++ strcat (pattern, "\n\tnop16"); ++ else ++ strcat (pattern, "\n\tnop"); ++ } ++ ++ output_asm_insn (pattern, operands); ++ return ""; ++} ++ ++bool ++nds32_need_split_sms_p (rtx in0_idx0, rtx in1_idx0, ++ rtx in0_idx1, rtx in1_idx1) ++{ ++ /* smds or smdrs. */ ++ if (INTVAL (in0_idx0) == INTVAL (in1_idx0) ++ && INTVAL (in0_idx1) == INTVAL (in1_idx1) ++ && INTVAL (in0_idx0) != INTVAL (in0_idx1)) ++ return false; ++ ++ /* smxds. */ ++ if (INTVAL (in0_idx0) != INTVAL (in0_idx1) ++ && INTVAL (in1_idx0) != INTVAL (in1_idx1)) ++ return false; ++ ++ return true; ++} ++ ++const char * ++nds32_output_sms (rtx in0_idx0, rtx in1_idx0, ++ rtx in0_idx1, rtx in1_idx1) ++{ ++ if (nds32_need_split_sms_p (in0_idx0, in1_idx0, ++ in0_idx1, in1_idx1)) ++ return "#"; ++ /* out = in0[in0_idx0] * in1[in1_idx0] - in0[in0_idx1] * in1[in1_idx1] */ ++ ++ /* smds or smdrs. */ ++ if (INTVAL (in0_idx0) == INTVAL (in1_idx0) ++ && INTVAL (in0_idx1) == INTVAL (in1_idx1) ++ && INTVAL (in0_idx0) != INTVAL (in0_idx1)) ++ { ++ if (INTVAL (in0_idx0) == 0) ++ { ++ if (TARGET_BIG_ENDIAN) ++ return "smds\t%0, %1, %2"; ++ else ++ return "smdrs\t%0, %1, %2"; ++ } ++ else ++ { ++ if (TARGET_BIG_ENDIAN) ++ return "smdrs\t%0, %1, %2"; ++ else ++ return "smds\t%0, %1, %2"; ++ } ++ } ++ ++ if (INTVAL (in0_idx0) != INTVAL (in0_idx1) ++ && INTVAL (in1_idx0) != INTVAL (in1_idx1)) ++ { ++ if (INTVAL (in0_idx0) == 1) ++ { ++ if (TARGET_BIG_ENDIAN) ++ return "smxds\t%0, %2, %1"; ++ else ++ return "smxds\t%0, %1, %2"; ++ } ++ else ++ { ++ if (TARGET_BIG_ENDIAN) ++ return "smxds\t%0, %1, %2"; ++ else ++ return "smxds\t%0, %2, %1"; ++ } ++ } ++ ++ gcc_unreachable (); ++ return ""; ++} ++ ++void ++nds32_split_sms (rtx out, rtx in0, rtx in1, ++ rtx in0_idx0, rtx in1_idx0, ++ rtx in0_idx1, rtx in1_idx1) ++{ ++ rtx result0 = gen_reg_rtx (SImode); ++ rtx result1 = gen_reg_rtx (SImode); ++ emit_insn (gen_mulhisi3v (result0, in0, in1, ++ in0_idx0, in1_idx0)); ++ emit_insn (gen_mulhisi3v (result1, in0, in1, ++ in0_idx1, in1_idx1)); ++ emit_insn (gen_subsi3 (out, result0, result1)); ++} ++ ++/* Spilt a doubleword instrucion to two single word instructions. */ ++void ++nds32_spilt_doubleword (rtx *operands, bool load_p) ++{ ++ int reg = load_p ? 0 : 1; ++ int mem = load_p ? 1 : 0; ++ rtx reg_rtx = load_p ? operands[0] : operands[1]; ++ rtx mem_rtx = load_p ? operands[1] : operands[0]; ++ rtx low_part[2], high_part[2]; ++ rtx sub_mem = XEXP (mem_rtx, 0); ++ ++ /* Generate low_part and high_part register pattern. ++ i.e. register pattern like: ++ (reg:DI) -> (subreg:SI (reg:DI)) ++ (subreg:SI (reg:DI)) */ ++ low_part[reg] = simplify_gen_subreg (SImode, reg_rtx, GET_MODE (reg_rtx), 0); ++ high_part[reg] = simplify_gen_subreg (SImode, reg_rtx, GET_MODE (reg_rtx), 4); ++ ++ /* Generate low_part and high_part memory pattern. ++ Memory format is (post_dec) will generate: ++ low_part: lwi.bi reg, [mem], 4 ++ high_part: lwi.bi reg, [mem], -12 */ ++ if (GET_CODE (sub_mem) == POST_DEC) ++ { ++ /* memory format is (post_dec (reg)), ++ so that extract (reg) from the (post_dec (reg)) pattern. */ ++ sub_mem = XEXP (sub_mem, 0); ++ ++ /* generate low_part and high_part memory format: ++ low_part: (post_modify ((reg) (plus (reg) (const 4))) ++ high_part: (post_modify ((reg) (plus (reg) (const -12))) */ ++ low_part[mem] = gen_frame_mem (SImode, ++ gen_rtx_POST_MODIFY (Pmode, sub_mem, ++ gen_rtx_PLUS (Pmode, ++ sub_mem, ++ GEN_INT (4)))); ++ high_part[mem] = gen_frame_mem (SImode, ++ gen_rtx_POST_MODIFY (Pmode, sub_mem, ++ gen_rtx_PLUS (Pmode, ++ sub_mem, ++ GEN_INT (-12)))); ++ } ++ else if (GET_CODE (sub_mem) == POST_MODIFY) ++ { ++ /* Memory format is (post_modify (reg) (plus (reg) (const))), ++ so that extract (reg) from the post_modify pattern. */ ++ rtx post_mem = XEXP (sub_mem, 0); ++ ++ /* Extract (const) from the (post_modify (reg) (plus (reg) (const))) ++ pattern. */ ++ ++ rtx plus_op = XEXP (sub_mem, 1); ++ rtx post_val = XEXP (plus_op, 1); ++ ++ /* Generate low_part and high_part memory format: ++ low_part: (post_modify ((reg) (plus (reg) (const))) ++ high_part: ((plus (reg) (const 4))) */ ++ low_part[mem] = gen_frame_mem (SImode, ++ gen_rtx_POST_MODIFY (Pmode, post_mem, ++ gen_rtx_PLUS (Pmode, ++ post_mem, ++ post_val))); ++ high_part[mem] = gen_frame_mem (SImode, plus_constant (Pmode, ++ post_mem, ++ 4)); ++ } ++ else ++ { ++ /* memory format: (symbol_ref), (const), (reg + const_int). */ ++ low_part[mem] = adjust_address (mem_rtx, SImode, 0); ++ high_part[mem] = adjust_address (mem_rtx, SImode, 4); ++ } ++ ++ /* After reload completed, we have dependent issue by low part register and ++ higt part memory. i.e. we cannot split a sequence ++ like: ++ load $r0, [%r1] ++ spilt to ++ lw $r0, [%r0] ++ lwi $r1, [%r0 + 4] ++ swap position ++ lwi $r1, [%r0 + 4] ++ lw $r0, [%r0] ++ For store instruction we don't have a problem. ++ ++ When memory format is [post_modify], we need to emit high part instruction, ++ before low part instruction. ++ expamle: ++ load $r0, [%r2], post_val ++ spilt to ++ load $r1, [%r2 + 4] ++ load $r0, [$r2], post_val. */ ++ if ((load_p && reg_overlap_mentioned_p (low_part[0], high_part[1])) ++ || GET_CODE (sub_mem) == POST_MODIFY) ++ { ++ operands[2] = high_part[0]; ++ operands[3] = high_part[1]; ++ operands[4] = low_part[0]; ++ operands[5] = low_part[1]; ++ } ++ else ++ { ++ operands[2] = low_part[0]; ++ operands[3] = low_part[1]; ++ operands[4] = high_part[0]; ++ operands[5] = high_part[1]; ++ } ++} ++ ++void ++nds32_split_ashiftdi3 (rtx dst, rtx src, rtx shiftamount) ++{ ++ rtx src_high_part, src_low_part; ++ rtx dst_high_part, dst_low_part; ++ ++ dst_high_part = nds32_di_high_part_subreg (dst); ++ dst_low_part = nds32_di_low_part_subreg (dst); ++ ++ src_high_part = nds32_di_high_part_subreg (src); ++ src_low_part = nds32_di_low_part_subreg (src); ++ ++ /* We need to handle shift more than 32 bit!!!! */ ++ if (CONST_INT_P (shiftamount)) ++ { ++ if (INTVAL (shiftamount) < 32) ++ { ++ rtx ext_start; ++ ext_start = gen_int_mode(32 - INTVAL (shiftamount), SImode); ++ ++ emit_insn (gen_wext (dst_high_part, src, ext_start)); ++ emit_insn (gen_ashlsi3 (dst_low_part, src_low_part, shiftamount)); ++ } ++ else ++ { ++ rtx new_shift_amout = gen_int_mode(INTVAL (shiftamount) - 32, SImode); ++ ++ emit_insn (gen_ashlsi3 (dst_high_part, src_low_part, ++ new_shift_amout)); ++ ++ emit_move_insn (dst_low_part, GEN_INT (0)); ++ } ++ } ++ else ++ { ++ rtx dst_low_part_l32, dst_high_part_l32; ++ rtx dst_low_part_g32, dst_high_part_g32; ++ rtx new_shift_amout, select_reg; ++ dst_low_part_l32 = gen_reg_rtx (SImode); ++ dst_high_part_l32 = gen_reg_rtx (SImode); ++ dst_low_part_g32 = gen_reg_rtx (SImode); ++ dst_high_part_g32 = gen_reg_rtx (SImode); ++ new_shift_amout = gen_reg_rtx (SImode); ++ select_reg = gen_reg_rtx (SImode); ++ ++ rtx ext_start; ++ ext_start = gen_reg_rtx (SImode); ++ ++ /* ++ if (shiftamount < 32) ++ dst_low_part = src_low_part << shiftamout ++ dst_high_part = wext (src, 32 - shiftamount) ++ # wext can't handle wext (src, 32) since it's only take rb[0:4] ++ # for extract. ++ dst_high_part = shiftamount == 0 ? src_high_part : dst_high_part ++ else ++ dst_low_part = 0 ++ dst_high_part = src_low_part << shiftamount & 0x1f ++ */ ++ ++ emit_insn (gen_subsi3 (ext_start, ++ gen_int_mode (32, SImode), ++ shiftamount)); ++ emit_insn (gen_wext (dst_high_part_l32, src, ext_start)); ++ ++ /* Handle for shiftamout == 0. */ ++ emit_insn (gen_cmovzsi (dst_high_part_l32, shiftamount, ++ src_high_part, dst_high_part_l32)); ++ ++ emit_insn (gen_ashlsi3 (dst_low_part_l32, src_low_part, shiftamount)); ++ ++ emit_move_insn (dst_low_part_g32, const0_rtx); ++ emit_insn (gen_andsi3 (new_shift_amout, shiftamount, GEN_INT (0x1f))); ++ emit_insn (gen_ashlsi3 (dst_high_part_g32, src_low_part, ++ new_shift_amout)); ++ ++ emit_insn (gen_slt_compare (select_reg, shiftamount, GEN_INT (32))); ++ ++ emit_insn (gen_cmovnsi (dst_low_part, select_reg, ++ dst_low_part_l32, dst_low_part_g32)); ++ emit_insn (gen_cmovnsi (dst_high_part, select_reg, ++ dst_high_part_l32, dst_high_part_g32)); ++ } ++} ++ ++void ++nds32_split_ashiftrtdi3 (rtx dst, rtx src, rtx shiftamount) ++{ ++ nds32_split_shiftrtdi3 (dst, src, shiftamount, false); ++} ++ ++void ++nds32_split_lshiftrtdi3 (rtx dst, rtx src, rtx shiftamount) ++{ ++ nds32_split_shiftrtdi3 (dst, src, shiftamount, true); ++} ++ ++void ++nds32_split_rotatertdi3 (rtx dst, rtx src, rtx shiftamount) ++{ ++ rtx dst_low_part_l32, dst_high_part_l32; ++ rtx dst_low_part_g32, dst_high_part_g32; ++ rtx select_reg, low5bit, low5bit_inv, minus32sa; ++ rtx dst_low_part_g32_tmph; ++ rtx dst_low_part_g32_tmpl; ++ rtx dst_high_part_l32_tmph; ++ rtx dst_high_part_l32_tmpl; ++ ++ rtx src_low_part, src_high_part; ++ rtx dst_high_part, dst_low_part; ++ ++ shiftamount = force_reg (SImode, shiftamount); ++ ++ emit_insn (gen_andsi3 (shiftamount, ++ shiftamount, ++ gen_int_mode (0x3f, SImode))); ++ ++ dst_high_part = nds32_di_high_part_subreg (dst); ++ dst_low_part = nds32_di_low_part_subreg (dst); ++ ++ src_high_part = nds32_di_high_part_subreg (src); ++ src_low_part = nds32_di_low_part_subreg (src); ++ ++ dst_low_part_l32 = gen_reg_rtx (SImode); ++ dst_high_part_l32 = gen_reg_rtx (SImode); ++ dst_low_part_g32 = gen_reg_rtx (SImode); ++ dst_high_part_g32 = gen_reg_rtx (SImode); ++ low5bit = gen_reg_rtx (SImode); ++ low5bit_inv = gen_reg_rtx (SImode); ++ minus32sa = gen_reg_rtx (SImode); ++ select_reg = gen_reg_rtx (SImode); ++ ++ dst_low_part_g32_tmph = gen_reg_rtx (SImode); ++ dst_low_part_g32_tmpl = gen_reg_rtx (SImode); ++ ++ dst_high_part_l32_tmph = gen_reg_rtx (SImode); ++ dst_high_part_l32_tmpl = gen_reg_rtx (SImode); ++ ++ emit_insn (gen_slt_compare (select_reg, shiftamount, GEN_INT (32))); ++ ++ /* if shiftamount < 32 ++ dst_low_part = wext(src, shiftamount) ++ else ++ dst_low_part = ((src_high_part >> (shiftamount & 0x1f)) ++ | (src_low_part << (32 - (shiftamount & 0x1f)))) ++ */ ++ emit_insn (gen_andsi3 (low5bit, shiftamount, gen_int_mode (0x1f, SImode))); ++ emit_insn (gen_subsi3 (low5bit_inv, gen_int_mode (32, SImode), low5bit)); ++ ++ emit_insn (gen_wext (dst_low_part_l32, src, shiftamount)); ++ ++ emit_insn (gen_lshrsi3 (dst_low_part_g32_tmpl, src_high_part, low5bit)); ++ emit_insn (gen_ashlsi3 (dst_low_part_g32_tmph, src_low_part, low5bit_inv)); ++ ++ emit_insn (gen_iorsi3 (dst_low_part_g32, ++ dst_low_part_g32_tmpl, ++ dst_low_part_g32_tmph)); ++ ++ emit_insn (gen_cmovnsi (dst_low_part, select_reg, ++ dst_low_part_l32, dst_low_part_g32)); ++ ++ /* if shiftamount < 32 ++ dst_high_part = ((src_high_part >> shiftamount) ++ | (src_low_part << (32 - shiftamount))) ++ dst_high_part = shiftamount == 0 ? src_high_part : dst_high_part ++ else ++ dst_high_part = wext(src, shiftamount & 0x1f) ++ */ ++ ++ emit_insn (gen_subsi3 (minus32sa, gen_int_mode (32, SImode), shiftamount)); ++ ++ emit_insn (gen_lshrsi3 (dst_high_part_l32_tmpl, src_high_part, shiftamount)); ++ emit_insn (gen_ashlsi3 (dst_high_part_l32_tmph, src_low_part, minus32sa)); ++ ++ emit_insn (gen_iorsi3 (dst_high_part_l32, ++ dst_high_part_l32_tmpl, ++ dst_high_part_l32_tmph)); ++ ++ emit_insn (gen_cmovzsi (dst_high_part_l32, shiftamount, ++ src_high_part, dst_high_part_l32)); ++ ++ emit_insn (gen_wext (dst_high_part_g32, src, low5bit)); ++ ++ emit_insn (gen_cmovnsi (dst_high_part, select_reg, ++ dst_high_part_l32, dst_high_part_g32)); ++} ++ ++/* Return true if OP contains a symbol reference. */ ++bool ++symbolic_reference_mentioned_p (rtx op) ++{ ++ const char *fmt; ++ int i; + +- /* The v3push/v3pop instruction should only be applied on +- none-isr and none-variadic function. */ +- if (TARGET_V3PUSH +- && !nds32_isr_function_p (current_function_decl) +- && (cfun->machine->va_args_size == 0)) ++ if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF) ++ return true; ++ ++ fmt = GET_RTX_FORMAT (GET_CODE (op)); ++ for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) + { +- /* For stack v3push: +- operands[0]: Re +- operands[1]: imm8u */ ++ if (fmt[i] == 'E') ++ { ++ int j; + +- /* This variable is to check if 'push25 Re,imm8u' is available. */ +- int sp_adjust; ++ for (j = XVECLEN (op, i) - 1; j >= 0; j--) ++ if (symbolic_reference_mentioned_p (XVECEXP (op, i, j))) ++ return true; ++ } + +- /* Set operands[0]. */ +- operands[0] = gen_rtx_REG (SImode, re_callee_saved); ++ else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i))) ++ return true; ++ } + +- /* Check if we can generate 'push25 Re,imm8u', +- otherwise, generate 'push25 Re,0'. */ +- sp_adjust = cfun->machine->local_size +- + cfun->machine->out_args_size +- + cfun->machine->callee_saved_area_gpr_padding_bytes; +- if (satisfies_constraint_Iu08 (GEN_INT (sp_adjust)) +- && NDS32_DOUBLE_WORD_ALIGN_P (sp_adjust)) +- operands[1] = GEN_INT (sp_adjust); +- else +- operands[1] = GEN_INT (0); ++ return false; ++} + +- /* Create assembly code pattern. */ +- snprintf (pattern, sizeof (pattern), "push25\t%%0, %%1"); +- } +- else +- { +- /* For normal stack push multiple: +- operands[0]: Rb +- operands[1]: Re +- operands[2]: En4 */ ++/* Expand PIC code for @GOTOFF and @GOT. + +- /* This variable is used to check if we only need to generate En4 field. +- As long as Rb==Re=SP_REGNUM, we set this variable to 1. */ +- int push_en4_only_p = 0; ++ Example for @GOTOFF: + +- /* Set operands[0] and operands[1]. */ +- operands[0] = gen_rtx_REG (SImode, rb_callee_saved); +- operands[1] = gen_rtx_REG (SImode, re_callee_saved); ++ la $r0, symbol@GOTOFF ++ -> sethi $ta, hi20(symbol@GOTOFF) ++ ori $ta, $ta, lo12(symbol@GOTOFF) ++ add $r0, $ta, $gp + +- /* 'smw.adm $sp,[$sp],$sp,0' means push nothing. */ +- if (!cfun->machine->fp_size +- && !cfun->machine->gp_size +- && !cfun->machine->lp_size +- && REGNO (operands[0]) == SP_REGNUM +- && REGNO (operands[1]) == SP_REGNUM) ++ Example for @GOT: ++ ++ la $r0, symbol@GOT ++ -> sethi $ta, hi20(symbol@GOT) ++ ori $ta, $ta, lo12(symbol@GOT) ++ lw $r0, [$ta + $gp] ++*/ ++rtx ++nds32_legitimize_pic_address (rtx x) ++{ ++ rtx addr = x; ++ rtx reg = gen_reg_rtx (Pmode); ++ rtx pat; ++ ++ if (GET_CODE (x) == LABEL_REF ++ || (GET_CODE (x) == SYMBOL_REF ++ && (CONSTANT_POOL_ADDRESS_P (x) ++ || SYMBOL_REF_LOCAL_P (x)))) ++ { ++ addr = gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_GOTOFF); ++ addr = gen_rtx_CONST (SImode, addr); ++ emit_insn (gen_sethi (reg, addr)); ++ emit_insn (gen_lo_sum (reg, reg, addr)); ++ x = gen_rtx_PLUS (Pmode, reg, pic_offset_table_rtx); ++ } ++ else if (GET_CODE (x) == SYMBOL_REF) ++ { ++ addr = gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_GOT); ++ addr = gen_rtx_CONST (SImode, addr); ++ emit_insn (gen_sethi (reg, addr)); ++ emit_insn (gen_lo_sum (reg, reg, addr)); ++ ++ x = gen_const_mem (SImode, gen_rtx_PLUS (Pmode, pic_offset_table_rtx, ++ reg)); ++ } ++ else if (GET_CODE (x) == CONST) ++ { ++ /* We don't split constant in expand_pic_move because GOTOFF can combine ++ the addend with the symbol. */ ++ addr = XEXP (x, 0); ++ gcc_assert (GET_CODE (addr) == PLUS); ++ ++ rtx op0 = XEXP (addr, 0); ++ rtx op1 = XEXP (addr, 1); ++ ++ if ((GET_CODE (op0) == LABEL_REF ++ || (GET_CODE (op0) == SYMBOL_REF ++ && (CONSTANT_POOL_ADDRESS_P (op0) ++ || SYMBOL_REF_LOCAL_P (op0)))) ++ && GET_CODE (op1) == CONST_INT) + { +- /* No need to generate instruction. */ +- return ""; ++ pat = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), UNSPEC_GOTOFF); ++ pat = gen_rtx_PLUS (Pmode, pat, op1); ++ pat = gen_rtx_CONST (Pmode, pat); ++ emit_insn (gen_sethi (reg, pat)); ++ emit_insn (gen_lo_sum (reg, reg, pat)); ++ x = gen_rtx_PLUS (Pmode, reg, pic_offset_table_rtx); ++ } ++ else if (GET_CODE (op0) == SYMBOL_REF ++ && GET_CODE (op1) == CONST_INT) ++ { ++ /* This is a constant offset from a @GOT symbol reference. */ ++ addr = gen_rtx_UNSPEC (SImode, gen_rtvec (1, op0), UNSPEC_GOT); ++ addr = gen_rtx_CONST (SImode, addr); ++ emit_insn (gen_sethi (reg, addr)); ++ emit_insn (gen_lo_sum (reg, reg, addr)); ++ addr = gen_const_mem (SImode, gen_rtx_PLUS (Pmode, ++ pic_offset_table_rtx, ++ reg)); ++ emit_move_insn (reg, addr); ++ if (satisfies_constraint_Is15 (op1)) ++ x = gen_rtx_PLUS (Pmode, reg, op1); ++ else ++ { ++ rtx tmp_reg = gen_reg_rtx (SImode); ++ emit_insn (gen_movsi (tmp_reg, op1)); ++ x = gen_rtx_PLUS (Pmode, reg, tmp_reg); ++ } + } + else + { +- /* If Rb==Re=SP_REGNUM, we only need to generate En4 field. */ +- if (REGNO (operands[0]) == SP_REGNUM +- && REGNO (operands[1]) == SP_REGNUM) +- push_en4_only_p = 1; +- +- /* Create assembly code pattern. +- We need to handle the form: "Rb, Re, { $fp $gp $lp }". */ +- snprintf (pattern, sizeof (pattern), +- "push.s\t%s{%s%s%s }", +- push_en4_only_p ? "" : "%0, %1, ", +- cfun->machine->fp_size ? " $fp" : "", +- cfun->machine->gp_size ? " $gp" : "", +- cfun->machine->lp_size ? " $lp" : ""); ++ /* Don't handle this pattern. */ ++ debug_rtx (x); ++ gcc_unreachable (); + } + } ++ return x; ++} + +- /* We use output_asm_insn() to output assembly code by ourself. */ +- output_asm_insn (pattern, operands); +- return ""; ++void ++nds32_expand_pic_move (rtx *operands) ++{ ++ rtx src; ++ ++ src = nds32_legitimize_pic_address (operands[1]); ++ emit_move_insn (operands[0], src); + } + +-/* Function to output stack pop operation. +- We need to deal with normal stack pop multiple or stack v3pop. */ +-const char * +-nds32_output_stack_pop (rtx par_rtx ATTRIBUTE_UNUSED) ++/* Expand ICT symbol. ++ Example for @ICT and ICT model=large: ++ ++ la $r0, symbol@ICT ++ -> sethi $rt, hi20(symbol@ICT) ++ lwi $r0, [$rt + lo12(symbol@ICT)] ++ ++*/ ++rtx ++nds32_legitimize_ict_address (rtx x) + { +- /* A string pattern for output_asm_insn(). */ +- char pattern[100]; +- /* The operands array which will be used in output_asm_insn(). */ +- rtx operands[3]; +- /* Pick up callee-saved first regno and last regno for further use. */ +- int rb_callee_saved = cfun->machine->callee_saved_first_gpr_regno; +- int re_callee_saved = cfun->machine->callee_saved_last_gpr_regno; ++ rtx symbol = x; ++ rtx addr = x; ++ rtx reg = gen_reg_rtx (Pmode); ++ gcc_assert (GET_CODE (x) == SYMBOL_REF ++ && nds32_indirect_call_referenced_p (x)); + +- /* If we step here, we are going to do v3pop or multiple pop operation. */ ++ addr = gen_rtx_UNSPEC (SImode, gen_rtvec (1, symbol), UNSPEC_ICT); ++ addr = gen_rtx_CONST (SImode, addr); ++ emit_insn (gen_sethi (reg, addr)); + +- /* The v3push/v3pop instruction should only be applied on +- none-isr and none-variadic function. */ +- if (TARGET_V3PUSH +- && !nds32_isr_function_p (current_function_decl) +- && (cfun->machine->va_args_size == 0)) +- { +- /* For stack v3pop: +- operands[0]: Re +- operands[1]: imm8u */ ++ x = gen_const_mem (SImode, gen_rtx_LO_SUM (Pmode, reg, addr)); + +- /* This variable is to check if 'pop25 Re,imm8u' is available. */ +- int sp_adjust; ++ return x; ++} + +- /* Set operands[0]. */ +- operands[0] = gen_rtx_REG (SImode, re_callee_saved); ++void ++nds32_expand_ict_move (rtx *operands) ++{ ++ rtx src = operands[1]; + +- /* Check if we can generate 'pop25 Re,imm8u', +- otherwise, generate 'pop25 Re,0'. +- We have to consider alloca issue as well. +- If the function does call alloca(), the stack pointer is not fixed. +- In that case, we cannot use 'pop25 Re,imm8u' directly. +- We have to caculate stack pointer from frame pointer +- and then use 'pop25 Re,0'. */ +- sp_adjust = cfun->machine->local_size +- + cfun->machine->out_args_size +- + cfun->machine->callee_saved_area_gpr_padding_bytes; +- if (satisfies_constraint_Iu08 (GEN_INT (sp_adjust)) +- && NDS32_DOUBLE_WORD_ALIGN_P (sp_adjust) +- && !cfun->calls_alloca) +- operands[1] = GEN_INT (sp_adjust); +- else +- operands[1] = GEN_INT (0); ++ src = nds32_legitimize_ict_address (src); + +- /* Create assembly code pattern. */ +- snprintf (pattern, sizeof (pattern), "pop25\t%%0, %%1"); ++ emit_move_insn (operands[0], src); ++} ++ ++/* Return true X is a indirect call symbol. */ ++bool ++nds32_indirect_call_referenced_p (rtx x) ++{ ++ if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_ICT) ++ x = XVECEXP (x, 0, 0); ++ ++ if (GET_CODE (x) == SYMBOL_REF) ++ { ++ tree decl = SYMBOL_REF_DECL (x); ++ ++ return decl ++ && (lookup_attribute("indirect_call", ++ DECL_ATTRIBUTES(decl)) ++ != NULL); + } ++ ++ return false; ++} ++ ++/* Return true X is need use long call. */ ++bool ++nds32_long_call_p (rtx symbol) ++{ ++ if (nds32_indirect_call_referenced_p (symbol)) ++ return TARGET_ICT_MODEL_LARGE; + else +- { +- /* For normal stack pop multiple: +- operands[0]: Rb +- operands[1]: Re +- operands[2]: En4 */ ++ return TARGET_CMODEL_LARGE; ++} + +- /* This variable is used to check if we only need to generate En4 field. +- As long as Rb==Re=SP_REGNUM, we set this variable to 1. */ +- int pop_en4_only_p = 0; ++/* Return true if X contains a thread-local symbol. */ ++bool ++nds32_tls_referenced_p (rtx x) ++{ ++ if (!targetm.have_tls) ++ return false; + +- /* Set operands[0] and operands[1]. */ +- operands[0] = gen_rtx_REG (SImode, rb_callee_saved); +- operands[1] = gen_rtx_REG (SImode, re_callee_saved); ++ if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS) ++ x = XEXP (XEXP (x, 0), 0); + +- /* 'lmw.bim $sp,[$sp],$sp,0' means pop nothing. */ +- if (!cfun->machine->fp_size +- && !cfun->machine->gp_size +- && !cfun->machine->lp_size +- && REGNO (operands[0]) == SP_REGNUM +- && REGNO (operands[1]) == SP_REGNUM) +- { +- /* No need to generate instruction. */ +- return ""; +- } +- else +- { +- /* If Rb==Re=SP_REGNUM, we only need to generate En4 field. */ +- if (REGNO (operands[0]) == SP_REGNUM +- && REGNO (operands[1]) == SP_REGNUM) +- pop_en4_only_p = 1; ++ if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x)) ++ return true; + +- /* Create assembly code pattern. +- We need to handle the form: "Rb, Re, { $fp $gp $lp }". */ +- snprintf (pattern, sizeof (pattern), +- "pop.s\t%s{%s%s%s }", +- pop_en4_only_p ? "" : "%0, %1, ", +- cfun->machine->fp_size ? " $fp" : "", +- cfun->machine->gp_size ? " $gp" : "", +- cfun->machine->lp_size ? " $lp" : ""); ++ return false; ++} ++ ++/* ADDR contains a thread-local SYMBOL_REF. Generate code to compute ++ this (thread-local) address. */ ++rtx ++nds32_legitimize_tls_address (rtx x) ++{ ++ rtx tmp_reg; ++ rtx tp_reg = gen_rtx_REG (Pmode, TP_REGNUM); ++ rtx pat, insns, reg0; ++ ++ if (GET_CODE (x) == SYMBOL_REF) ++ switch (SYMBOL_REF_TLS_MODEL (x)) ++ { ++ case TLS_MODEL_GLOBAL_DYNAMIC: ++ case TLS_MODEL_LOCAL_DYNAMIC: ++ /* Emit UNSPEC_TLS_DESC rather than expand rtl directly because spill ++ may destroy the define-use chain anylysis to insert relax_hint. */ ++ if (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC) ++ pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_TLSGD); ++ else ++ pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_TLSLD); ++ ++ pat = gen_rtx_CONST (SImode, pat); ++ reg0 = gen_rtx_REG (Pmode, 0); ++ /* If we can confirm all clobber reigsters, it doesn't have to use call ++ instruction. */ ++ insns = emit_call_insn (gen_tls_desc (pat, GEN_INT (0))); ++ use_reg (&CALL_INSN_FUNCTION_USAGE (insns), pic_offset_table_rtx); ++ RTL_CONST_CALL_P (insns) = 1; ++ tmp_reg = gen_reg_rtx (SImode); ++ emit_move_insn (tmp_reg, reg0); ++ x = tmp_reg; ++ break; ++ ++ case TLS_MODEL_INITIAL_EXEC: ++ pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_TLSIE); ++ tmp_reg = gen_reg_rtx (SImode); ++ pat = gen_rtx_CONST (SImode, pat); ++ emit_insn (gen_tls_ie (tmp_reg, pat, GEN_INT (0))); ++ if (flag_pic) ++ emit_use (pic_offset_table_rtx); ++ x = gen_rtx_PLUS (Pmode, tmp_reg, tp_reg); ++ break; ++ ++ case TLS_MODEL_LOCAL_EXEC: ++ /* Expand symbol_ref@TPOFF': ++ sethi $ta, hi20(symbol_ref@TPOFF) ++ ori $ta, $ta, lo12(symbol_ref@TPOFF) ++ add $r0, $ta, $tp */ ++ tmp_reg = gen_reg_rtx (SImode); ++ pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_TLSLE); ++ pat = gen_rtx_CONST (SImode, pat); ++ emit_insn (gen_sethi (tmp_reg, pat)); ++ emit_insn (gen_lo_sum (tmp_reg, tmp_reg, pat)); ++ x = gen_rtx_PLUS (Pmode, tmp_reg, tp_reg); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ else if (GET_CODE (x) == CONST) ++ { ++ rtx base, addend; ++ split_const (x, &base, &addend); ++ ++ if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_LOCAL_EXEC) ++ { ++ /* Expand symbol_ref@TPOFF': ++ sethi $ta, hi20(symbol_ref@TPOFF + addend) ++ ori $ta, $ta, lo12(symbol_ref@TPOFF + addend) ++ add $r0, $ta, $tp */ ++ tmp_reg = gen_reg_rtx (SImode); ++ pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, base), UNSPEC_TLSLE); ++ pat = gen_rtx_PLUS (SImode, pat, addend); ++ pat = gen_rtx_CONST (SImode, pat); ++ emit_insn (gen_sethi (tmp_reg, pat)); ++ emit_insn (gen_lo_sum (tmp_reg, tmp_reg, pat)); ++ x = gen_rtx_PLUS (Pmode, tmp_reg, tp_reg); + } + } + +- /* We use output_asm_insn() to output assembly code by ourself. */ +- output_asm_insn (pattern, operands); +- return ""; ++ return x; + } + +-/* Function to generate PC relative jump table. +- Refer to nds32.md for more details. ++void ++nds32_expand_tls_move (rtx *operands) ++{ ++ rtx src = operands[1]; ++ rtx base, addend; + +- The following is the sample for the case that diff value +- can be presented in '.short' size. ++ if (CONSTANT_P (src)) ++ split_const (src, &base, &addend); + +- addi $r1, $r1, -(case_lower_bound) +- slti $ta, $r1, (case_number) +- beqz $ta, .L_skip_label ++ if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_LOCAL_EXEC) ++ src = nds32_legitimize_tls_address (src); ++ else ++ { ++ src = nds32_legitimize_tls_address (base); ++ if (addend != const0_rtx) ++ { ++ src = gen_rtx_PLUS (SImode, src, addend); ++ src = force_operand (src, operands[0]); ++ } ++ } + +- la $ta, .L35 ! get jump table address +- lh $r1, [$ta + $r1 << 1] ! load symbol diff from jump table entry +- addi $ta, $r1, $ta +- jr5 $ta ++ emit_move_insn (operands[0], src); ++} + +- ! jump table entry +- L35: +- .short .L25-.L35 +- .short .L26-.L35 +- .short .L27-.L35 +- .short .L28-.L35 +- .short .L29-.L35 +- .short .L30-.L35 +- .short .L31-.L35 +- .short .L32-.L35 +- .short .L33-.L35 +- .short .L34-.L35 */ +-const char * +-nds32_output_casesi_pc_relative (rtx *operands) ++void ++nds32_expand_constant (enum machine_mode mode, HOST_WIDE_INT val, ++ rtx target, rtx source) + { +- machine_mode mode; +- rtx diff_vec; ++ rtx temp = gen_reg_rtx (mode); ++ int clear_sign_bit_copies = 0; ++ int clear_zero_bit_copies = 0; ++ unsigned HOST_WIDE_INT remainder = val & 0xffffffffUL; ++ ++ /* Count number of leading zeros. */ ++ clear_sign_bit_copies = __builtin_clz (remainder); ++ /* Count number of trailing zeros. */ ++ clear_zero_bit_copies = __builtin_ctz (remainder); ++ ++ HOST_WIDE_INT sign_shift_mask = ((0xffffffffUL ++ << (32 - clear_sign_bit_copies)) ++ & 0xffffffffUL); ++ HOST_WIDE_INT zero_shift_mask = (1 << clear_zero_bit_copies) - 1; ++ ++ if (clear_sign_bit_copies > 0 && clear_sign_bit_copies < 17 ++ && (remainder | sign_shift_mask) == 0xffffffffUL) ++ { ++ /* Transfer AND to two shifts, example: ++ a = b & 0x7fffffff => (b << 1) >> 1 */ ++ rtx shift = GEN_INT (clear_sign_bit_copies); + +- diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[1]))); ++ emit_insn (gen_ashlsi3 (temp, source, shift)); ++ emit_insn (gen_lshrsi3 (target, temp, shift)); ++ } ++ else if (clear_zero_bit_copies > 0 && clear_sign_bit_copies < 17 ++ && (remainder | zero_shift_mask) == 0xffffffffUL) ++ { ++ /* Transfer AND to two shifts, example: ++ a = b & 0xfff00000 => (b >> 20) << 20 */ ++ rtx shift = GEN_INT (clear_zero_bit_copies); + +- gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC); ++ emit_insn (gen_lshrsi3 (temp, source, shift)); ++ emit_insn (gen_ashlsi3 (target, temp, shift)); ++ } ++ else ++ { ++ emit_move_insn (temp, GEN_INT (val)); ++ emit_move_insn (target, gen_rtx_fmt_ee (AND, mode, source, temp)); ++ } ++} + +- /* Step C: "t <-- operands[1]". */ +- output_asm_insn ("la\t$ta, %l1", operands); ++/* Auxiliary functions for lwm/smw. */ ++bool ++nds32_valid_smw_lwm_base_p (rtx op) ++{ ++ rtx base_addr; + +- /* Get the mode of each element in the difference vector. */ +- mode = GET_MODE (diff_vec); ++ if (!MEM_P (op)) ++ return false; + +- /* Step D: "z <-- (mem (plus (operands[0] << m) t))", +- where m is 0, 1, or 2 to load address-diff value from table. */ +- switch (mode) ++ base_addr = XEXP (op, 0); ++ ++ if (REG_P (base_addr)) ++ return true; ++ else + { +- case QImode: +- output_asm_insn ("lb\t%2, [$ta + %0 << 0]", operands); +- break; +- case HImode: +- output_asm_insn ("lh\t%2, [$ta + %0 << 1]", operands); +- break; +- case SImode: +- output_asm_insn ("lw\t%2, [$ta + %0 << 2]", operands); +- break; +- default: +- gcc_unreachable (); ++ if (GET_CODE (base_addr) == POST_INC ++ && REG_P (XEXP (base_addr, 0))) ++ return true; + } + +- /* Step E: "t <-- z + t". +- Add table label_ref with address-diff value to +- obtain target case address. */ +- output_asm_insn ("add\t$ta, %2, $ta", operands); ++ return false; ++} + +- /* Step F: jump to target with register t. */ +- if (TARGET_16_BIT) +- return "jr5\t$ta"; +- else +- return "jr\t$ta"; ++/* Auxiliary functions for manipulation DI mode. */ ++rtx nds32_di_high_part_subreg(rtx reg) ++{ ++ unsigned high_part_offset = subreg_highpart_offset (SImode, DImode); ++ ++ return simplify_gen_subreg ( ++ SImode, reg, ++ DImode, high_part_offset); + } + +-/* Function to generate normal jump table. */ +-const char * +-nds32_output_casesi (rtx *operands) ++rtx nds32_di_low_part_subreg(rtx reg) + { +- /* Step C: "t <-- operands[1]". */ +- output_asm_insn ("la\t$ta, %l1", operands); ++ unsigned low_part_offset = subreg_lowpart_offset (SImode, DImode); + +- /* Step D: "z <-- (mem (plus (operands[0] << 2) t))". */ +- output_asm_insn ("lw\t%2, [$ta + %0 << 2]", operands); ++ return simplify_gen_subreg ( ++ SImode, reg, ++ DImode, low_part_offset); ++} + +- /* No need to perform Step E, which is only used for +- pc relative jump table. */ ++/* ------------------------------------------------------------------------ */ + +- /* Step F: jump to target with register z. */ +- if (TARGET_16_BIT) +- return "jr5\t%2"; ++/* Auxiliary function for output TLS patterns. */ ++ ++const char * ++nds32_output_tls_desc (rtx *operands) ++{ ++ char pattern[1000]; ++ ++ if (TARGET_RELAX_HINT) ++ snprintf (pattern, sizeof (pattern), ++ ".relax_hint %%1\n\tsethi $r0, hi20(%%0)\n\t" ++ ".relax_hint %%1\n\tori $r0, $r0, lo12(%%0)\n\t" ++ ".relax_hint %%1\n\tlw $r15, [$r0 + $gp]\n\t" ++ ".relax_hint %%1\n\tadd $r0, $r0, $gp\n\t" ++ ".relax_hint %%1\n\tjral $r15"); + else +- return "jr\t%2"; ++ snprintf (pattern, sizeof (pattern), ++ "sethi $r0, hi20(%%0)\n\t" ++ "ori $r0, $r0, lo12(%%0)\n\t" ++ "lw $r15, [$r0 + $gp]\n\t" ++ "add $r0, $r0, $gp\n\t" ++ "jral $r15"); ++ output_asm_insn (pattern, operands); ++ return ""; + } + +-/* ------------------------------------------------------------------------ */ ++const char * ++nds32_output_tls_ie (rtx *operands) ++{ ++ char pattern[1000]; ++ ++ if (flag_pic) ++ { ++ if (TARGET_RELAX_HINT) ++ snprintf (pattern, sizeof (pattern), ++ ".relax_hint %%2\n\tsethi %%0, hi20(%%1)\n\t" ++ ".relax_hint %%2\n\tori %%0, %%0, lo12(%%1)\n\t" ++ ".relax_hint %%2\n\tlw %%0, [%%0 + $gp]"); ++ else ++ snprintf (pattern, sizeof (pattern), ++ "sethi %%0, hi20(%%1)\n\t" ++ "ori %%0, %%0, lo12(%%1)\n\t" ++ "lw %%0, [%%0 + $gp]"); ++ } ++ else ++ { ++ if (TARGET_RELAX_HINT) ++ snprintf (pattern, sizeof (pattern), ++ ".relax_hint %%2\n\tsethi %%0, hi20(%%1)\n\t" ++ ".relax_hint %%2\n\tlwi %%0, [%%0 + lo12(%%1)]"); ++ else ++ snprintf (pattern, sizeof (pattern), ++ "sethi %%0, hi20(%%1)\n\t" ++ "lwi %%0, [%%0 + lo12(%%1)]"); ++ } ++ output_asm_insn (pattern, operands); ++ return ""; ++} +diff --git a/gcc/config/nds32/nds32-memory-manipulation.c b/gcc/config/nds32/nds32-memory-manipulation.c +index 4c26dcc..c46ac8f 100644 +--- a/gcc/config/nds32/nds32-memory-manipulation.c ++++ b/gcc/config/nds32/nds32-memory-manipulation.c +@@ -25,28 +25,1255 @@ + #include "system.h" + #include "coretypes.h" + #include "backend.h" +-#include "target.h" ++#include "tree.h" + #include "rtl.h" +-#include "emit-rtl.h" ++#include "df.h" ++#include "alias.h" ++#include "stor-layout.h" ++#include "varasm.h" ++#include "calls.h" ++#include "regs.h" ++#include "insn-config.h" /* Required by recog.h. */ ++#include "conditions.h" ++#include "output.h" ++#include "insn-attr.h" /* For DFA state_t. */ ++#include "insn-codes.h" /* For CODE_FOR_xxx. */ ++#include "reload.h" /* For push_reload(). */ ++#include "flags.h" ++#include "insn-config.h" ++#include "expmed.h" ++#include "dojump.h" + #include "explow.h" ++#include "emit-rtl.h" ++#include "stmt.h" ++#include "expr.h" ++#include "recog.h" ++#include "diagnostic-core.h" ++#include "cfgrtl.h" ++#include "cfganal.h" ++#include "lcm.h" ++#include "cfgbuild.h" ++#include "cfgcleanup.h" ++#include "tm_p.h" ++#include "tm-constrs.h" ++#include "optabs.h" /* For GEN_FCN. */ ++#include "target.h" ++#include "langhooks.h" /* For add_builtin_function(). */ ++#include "builtins.h" ++ ++/* ------------------------------------------------------------------------ */ ++ ++/* This file is divided into six parts: ++ ++ PART 1: Auxiliary static function definitions. ++ ++ PART 2: Auxiliary function for expand movmem pattern. ++ ++ PART 3: Auxiliary function for expand setmem pattern. ++ ++ PART 4: Auxiliary function for expand movstr pattern. ++ ++ PART 5: Auxiliary function for expand strlen pattern. ++ ++ PART 6: Auxiliary function for expand load_multiple/store_multiple ++ pattern. */ ++ ++/* ------------------------------------------------------------------------ */ ++ ++/* PART 1: Auxiliary static function definitions. */ ++ ++static void ++nds32_emit_load_store (rtx reg, rtx mem, ++ enum machine_mode mode, ++ int offset, bool load_p) ++{ ++ rtx new_mem; ++ new_mem = adjust_address (mem, mode, offset); ++ if (load_p) ++ emit_move_insn (reg, new_mem); ++ else ++ emit_move_insn (new_mem, reg); ++} ++ ++static void ++nds32_emit_post_inc_load_store (rtx reg, rtx base_reg, ++ enum machine_mode mode, ++ bool load_p) ++{ ++ gcc_assert (GET_MODE (reg) == mode); ++ gcc_assert (GET_MODE (base_reg) == Pmode); ++ ++ /* Do not gen (set (reg) (mem (post_inc (reg)))) directly here since it may ++ not recognize by gcc, so let gcc combine it at auto_inc_dec pass. */ ++ if (load_p) ++ emit_move_insn (reg, ++ gen_rtx_MEM (mode, ++ base_reg)); ++ else ++ emit_move_insn (gen_rtx_MEM (mode, ++ base_reg), ++ reg); ++ ++ emit_move_insn (base_reg, ++ plus_constant(Pmode, base_reg, GET_MODE_SIZE (mode))); ++} ++ ++static void ++nds32_emit_mem_move (rtx src, rtx dst, ++ enum machine_mode mode, ++ int addr_offset) ++{ ++ gcc_assert (MEM_P (src) && MEM_P (dst)); ++ rtx tmp_reg = gen_reg_rtx (mode); ++ nds32_emit_load_store (tmp_reg, src, mode, ++ addr_offset, /* load_p */ true); ++ nds32_emit_load_store (tmp_reg, dst, mode, ++ addr_offset, /* load_p */ false); ++} ++ ++static void ++nds32_emit_mem_move_block (int base_regno, int count, ++ rtx *dst_base_reg, rtx *dst_mem, ++ rtx *src_base_reg, rtx *src_mem, ++ bool update_base_reg_p) ++{ ++ rtx new_base_reg; ++ ++ emit_insn (nds32_expand_load_multiple (base_regno, count, ++ *src_base_reg, *src_mem, ++ update_base_reg_p, &new_base_reg)); ++ if (update_base_reg_p) ++ { ++ *src_base_reg = new_base_reg; ++ *src_mem = gen_rtx_MEM (SImode, *src_base_reg); ++ } ++ ++ emit_insn (nds32_expand_store_multiple (base_regno, count, ++ *dst_base_reg, *dst_mem, ++ update_base_reg_p, &new_base_reg)); ++ ++ if (update_base_reg_p) ++ { ++ *dst_base_reg = new_base_reg; ++ *dst_mem = gen_rtx_MEM (SImode, *dst_base_reg); ++ } ++} ++ ++/* ------------------------------------------------------------------------ */ ++ ++/* PART 2: Auxiliary function for expand movmem pattern. */ ++ ++static bool ++nds32_expand_movmemsi_loop_unknown_size (rtx dstmem, rtx srcmem, ++ rtx size, ++ rtx alignment, bool use_zol_p) ++{ ++ /* Emit loop version of movmem. ++ ++ andi $size_least_3_bit, $size, #~7 ++ add $dst_end, $dst, $size ++ move $dst_itr, $dst ++ move $src_itr, $src ++ beqz $size_least_3_bit, .Lbyte_mode_entry ! Not large enough. ++ add $double_word_end, $dst, $size_least_3_bit ++ ++ .Ldouble_word_mode_loop: ++ lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr ++ smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr ++ ! move will delete after register allocation ++ move $src_itr, $src_itr' ++ move $dst_itr, $dst_itr' ++ ! Not readch upper bound. Loop. ++ bne $double_word_end, $dst_itr, .Ldouble_word_mode_loop ++ ++ .Lbyte_mode_entry: ++ beq $dst_itr, $dst_end, .Lend_label ++ .Lbyte_mode_loop: ++ lbi.bi $tmp, [$src_itr], #1 ++ sbi.bi $tmp, [$dst_itr], #1 ++ ! Not readch upper bound. Loop. ++ bne $dst_itr, $dst_end, .Lbyte_mode_loop ++ .Lend_label: ++ */ ++ rtx dst_base_reg, src_base_reg; ++ rtx dst_itr, src_itr; ++ rtx dstmem_m, srcmem_m, dst_itr_m, src_itr_m; ++ rtx dst_end; ++ rtx size_least_3_bit; ++ rtx double_word_end = NULL; ++ rtx double_word_mode_loop, byte_mode_entry, byte_mode_loop, end_label; ++ rtx tmp; ++ rtx mask_least_3_bit; ++ int start_regno; ++ bool align_to_4_bytes = (INTVAL (alignment) & 3) == 0; ++ int hwloop_id = cfun->machine->hwloop_group_id; ++ ++ if (TARGET_ISA_V3M && !align_to_4_bytes) ++ return 0; ++ ++ if (TARGET_REDUCED_REGS) ++ start_regno = 2; ++ else ++ start_regno = 16; ++ ++ dst_itr = gen_reg_rtx (Pmode); ++ src_itr = gen_reg_rtx (Pmode); ++ dst_end = gen_reg_rtx (Pmode); ++ tmp = gen_reg_rtx (QImode); ++ mask_least_3_bit = GEN_INT (~7); ++ ++ double_word_mode_loop = gen_label_rtx (); ++ byte_mode_entry = gen_label_rtx (); ++ byte_mode_loop = gen_label_rtx (); ++ end_label = gen_label_rtx (); ++ ++ dst_base_reg = copy_to_mode_reg (Pmode, XEXP (dstmem, 0)); ++ src_base_reg = copy_to_mode_reg (Pmode, XEXP (srcmem, 0)); ++ /* andi $size_least_3_bit, $size, #~7 */ ++ size_least_3_bit = expand_binop (SImode, and_optab, size, mask_least_3_bit, ++ NULL_RTX, 0, OPTAB_WIDEN); ++ /* add $dst_end, $dst, $size */ ++ dst_end = expand_binop (Pmode, add_optab, dst_base_reg, size, ++ NULL_RTX, 0, OPTAB_WIDEN); ++ ++ /* move $dst_itr, $dst ++ move $src_itr, $src */ ++ emit_move_insn (dst_itr, dst_base_reg); ++ emit_move_insn (src_itr, src_base_reg); ++ ++ /* beqz $size_least_3_bit, .Lbyte_mode_entry ! Not large enough. */ ++ emit_cmp_and_jump_insns (size_least_3_bit, const0_rtx, EQ, NULL, ++ SImode, 1, byte_mode_entry); ++ if (TARGET_HWLOOP && use_zol_p) ++ { ++ rtx start_label = gen_rtx_LABEL_REF (Pmode, double_word_mode_loop); ++ /* We use multiple-load/store instruction once to process 8-bytes, ++ division 8-bytes for one cycle, generate ++ srli $size_least_3_bit, size_least_3_bit, 3. */ ++ emit_insn (gen_lshrsi3 (size_least_3_bit, size_least_3_bit, GEN_INT (3))); ++ /* mtlbi .Ldouble_word_mode_loop */ ++ emit_insn (gen_mtlbi_hint (start_label, GEN_INT (hwloop_id))); ++ emit_insn (gen_init_lc (size_least_3_bit, GEN_INT (hwloop_id))); ++ emit_insn (gen_no_hwloop ()); ++ } ++ else ++ { ++ /* add $double_word_end, $dst, $size_least_3_bit */ ++ double_word_end = expand_binop (Pmode, add_optab, ++ dst_base_reg, size_least_3_bit, ++ NULL_RTX, 0, OPTAB_WIDEN); ++ } ++ ++ /* .Ldouble_word_mode_loop: */ ++ emit_label (double_word_mode_loop); ++ /* lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr ++ smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr */ ++ src_itr_m = src_itr; ++ dst_itr_m = dst_itr; ++ srcmem_m = srcmem; ++ dstmem_m = dstmem; ++ nds32_emit_mem_move_block (start_regno, 2, ++ &dst_itr_m, &dstmem_m, ++ &src_itr_m, &srcmem_m, ++ true); ++ /* move $src_itr, $src_itr' ++ move $dst_itr, $dst_itr' */ ++ emit_move_insn (dst_itr, dst_itr_m); ++ emit_move_insn (src_itr, src_itr_m); ++ ++ if (TARGET_HWLOOP && use_zol_p) ++ { ++ rtx start_label = gen_rtx_LABEL_REF (Pmode, double_word_mode_loop); ++ /* Hwloop pseduo instrtion to handle CFG. */ ++ rtx cfg_insn = emit_jump_insn (gen_hwloop_cfg (GEN_INT (hwloop_id), ++ start_label)); ++ JUMP_LABEL (cfg_insn) = double_word_mode_loop; ++ cfun->machine->hwloop_group_id++; ++ } ++ else ++ { ++ /* ! Not readch upper bound. Loop. ++ bne $double_word_end, $dst_itr, .Ldouble_word_mode_loop */ ++ emit_cmp_and_jump_insns (double_word_end, dst_itr, NE, NULL, ++ Pmode, 1, double_word_mode_loop); ++ } ++ ++ /* .Lbyte_mode_entry: */ ++ emit_label (byte_mode_entry); ++ ++ /* beq $dst_itr, $dst_end, .Lend_label */ ++ emit_cmp_and_jump_insns (dst_itr, dst_end, EQ, NULL, ++ Pmode, 1, end_label); ++ /* .Lbyte_mode_loop: */ ++ emit_label (byte_mode_loop); ++ ++ emit_insn (gen_no_hwloop ()); ++ /* lbi.bi $tmp, [$src_itr], #1 */ ++ nds32_emit_post_inc_load_store (tmp, src_itr, QImode, true); ++ ++ /* sbi.bi $tmp, [$dst_itr], #1 */ ++ nds32_emit_post_inc_load_store (tmp, dst_itr, QImode, false); ++ /* ! Not readch upper bound. Loop. ++ bne $dst_itr, $dst_end, .Lbyte_mode_loop */ ++ emit_cmp_and_jump_insns (dst_itr, dst_end, NE, NULL, ++ SImode, 1, byte_mode_loop); ++ ++ /* .Lend_label: */ ++ emit_label (end_label); ++ ++ return true; ++} ++ ++static bool ++nds32_expand_movmemsi_loop_known_size (rtx dstmem, rtx srcmem, ++ rtx size, rtx alignment) ++{ ++ rtx dst_base_reg, src_base_reg; ++ rtx dst_itr, src_itr; ++ rtx dstmem_m, srcmem_m, dst_itr_m, src_itr_m; ++ rtx dst_end; ++ rtx double_word_mode_loop, byte_mode_loop; ++ rtx tmp; ++ int start_regno; ++ bool align_to_4_bytes = (INTVAL (alignment) & 3) == 0; ++ int hwloop_id = cfun->machine->hwloop_group_id; ++ unsigned HOST_WIDE_INT total_bytes = UINTVAL (size); ++ ++ if (TARGET_ISA_V3M && !align_to_4_bytes) ++ return 0; ++ ++ if (TARGET_REDUCED_REGS) ++ start_regno = 2; ++ else ++ start_regno = 16; ++ ++ dst_itr = gen_reg_rtx (Pmode); ++ src_itr = gen_reg_rtx (Pmode); ++ dst_end = gen_reg_rtx (Pmode); ++ tmp = gen_reg_rtx (QImode); ++ ++ double_word_mode_loop = gen_label_rtx (); ++ byte_mode_loop = gen_label_rtx (); ++ ++ dst_base_reg = copy_to_mode_reg (Pmode, XEXP (dstmem, 0)); ++ src_base_reg = copy_to_mode_reg (Pmode, XEXP (srcmem, 0)); ++ ++ if (total_bytes < 8) ++ { ++ /* Emit total_bytes less than 8 loop version of movmem. ++ add $dst_end, $dst, $size ++ move $dst_itr, $dst ++ .Lbyte_mode_loop: ++ lbi.bi $tmp, [$src_itr], #1 ++ sbi.bi $tmp, [$dst_itr], #1 ++ ! Not readch upper bound. Loop. ++ bne $dst_itr, $dst_end, .Lbyte_mode_loop */ ++ ++ /* add $dst_end, $dst, $size */ ++ dst_end = expand_binop (Pmode, add_optab, dst_base_reg, size, ++ NULL_RTX, 0, OPTAB_WIDEN); ++ /* move $dst_itr, $dst ++ move $src_itr, $src */ ++ emit_move_insn (dst_itr, dst_base_reg); ++ emit_move_insn (src_itr, src_base_reg); ++ ++ /* .Lbyte_mode_loop: */ ++ emit_label (byte_mode_loop); ++ ++ emit_insn (gen_no_hwloop ()); ++ /* lbi.bi $tmp, [$src_itr], #1 */ ++ nds32_emit_post_inc_load_store (tmp, src_itr, QImode, true); ++ ++ /* sbi.bi $tmp, [$dst_itr], #1 */ ++ nds32_emit_post_inc_load_store (tmp, dst_itr, QImode, false); ++ /* ! Not readch upper bound. Loop. ++ bne $dst_itr, $dst_end, .Lbyte_mode_loop */ ++ emit_cmp_and_jump_insns (dst_itr, dst_end, NE, NULL, ++ SImode, 1, byte_mode_loop); ++ return true; ++ } ++ else if (total_bytes % 8 == 0) ++ { ++ /* Emit multiple of 8 loop version of movmem. ++ ++ add $dst_end, $dst, $size ++ move $dst_itr, $dst ++ move $src_itr, $src ++ ++ .Ldouble_word_mode_loop: ++ lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr ++ smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr ++ ! move will delete after register allocation ++ move $src_itr, $src_itr' ++ move $dst_itr, $dst_itr' ++ ! Not readch upper bound. Loop. ++ bne $double_word_end, $dst_itr, .Ldouble_word_mode_loop */ ++ ++ if (TARGET_HWLOOP) ++ { ++ rtx start_label = gen_rtx_LABEL_REF (Pmode, double_word_mode_loop); ++ ++ rtx loop_count_reg = gen_reg_rtx (Pmode); ++ /* movi $loop_count_reg, total_bytes / 8 */ ++ emit_move_insn (loop_count_reg, GEN_INT (total_bytes / 8)); ++ /* mtlbi .Ldouble_word_mode_loop */ ++ emit_insn (gen_mtlbi_hint (start_label, GEN_INT (hwloop_id))); ++ /* mtusr $loop_count_reg, LC */ ++ emit_insn (gen_init_lc (loop_count_reg, GEN_INT (hwloop_id))); ++ emit_insn (gen_no_hwloop ()); ++ } ++ else ++ { ++ /* add $dst_end, $dst, $size */ ++ dst_end = expand_binop (Pmode, add_optab, dst_base_reg, size, ++ NULL_RTX, 0, OPTAB_WIDEN); ++ } ++ ++ /* move $dst_itr, $dst ++ move $src_itr, $src */ ++ emit_move_insn (dst_itr, dst_base_reg); ++ emit_move_insn (src_itr, src_base_reg); ++ ++ /* .Ldouble_word_mode_loop: */ ++ emit_label (double_word_mode_loop); ++ /* lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr ++ smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr */ ++ src_itr_m = src_itr; ++ dst_itr_m = dst_itr; ++ srcmem_m = srcmem; ++ dstmem_m = dstmem; ++ nds32_emit_mem_move_block (start_regno, 2, ++ &dst_itr_m, &dstmem_m, ++ &src_itr_m, &srcmem_m, ++ true); ++ /* move $src_itr, $src_itr' ++ move $dst_itr, $dst_itr' */ ++ emit_move_insn (dst_itr, dst_itr_m); ++ emit_move_insn (src_itr, src_itr_m); ++ ++ if (TARGET_HWLOOP) ++ { ++ rtx start_label = gen_rtx_LABEL_REF (Pmode, double_word_mode_loop); ++ /* Hwloop pseduo instrtion to handle CFG. */ ++ rtx cfg_insn = emit_jump_insn (gen_hwloop_cfg (GEN_INT (hwloop_id), ++ start_label)); ++ JUMP_LABEL (cfg_insn) = double_word_mode_loop; ++ cfun->machine->hwloop_group_id++; ++ } ++ else ++ { ++ /* ! Not readch upper bound. Loop. ++ bne $double_word_end, $dst_itr, .Ldouble_word_mode_loop */ ++ emit_cmp_and_jump_insns (dst_end, dst_itr, NE, NULL, ++ Pmode, 1, double_word_mode_loop); ++ } ++ } ++ else ++ { ++ /* Handle size greater than 8, and not a multiple of 8. */ ++ return nds32_expand_movmemsi_loop_unknown_size (dstmem, srcmem, ++ size, alignment, ++ true); ++ } ++ ++ return true; ++} ++ ++static bool ++nds32_expand_movmemsi_loop (rtx dstmem, rtx srcmem, ++ rtx size, rtx alignment) ++{ ++ if (CONST_INT_P (size)) ++ return nds32_expand_movmemsi_loop_known_size (dstmem, srcmem, ++ size, alignment); ++ else ++ return nds32_expand_movmemsi_loop_unknown_size (dstmem, srcmem, ++ size, alignment, false); ++} ++ ++static bool ++nds32_expand_movmemsi_unroll (rtx dstmem, rtx srcmem, ++ rtx total_bytes, rtx alignment) ++{ ++ rtx dst_base_reg, src_base_reg; ++ rtx tmp_reg; ++ int maximum_bytes; ++ int maximum_bytes_per_inst; ++ int maximum_regs; ++ int start_regno; ++ int i, inst_num; ++ HOST_WIDE_INT remain_bytes, remain_words; ++ bool align_to_4_bytes = (INTVAL (alignment) & 3) == 0; ++ bool align_to_2_bytes = (INTVAL (alignment) & 1) == 0; ++ ++ /* Because reduced-set regsiters has few registers ++ (r0~r5, r6~10, r15, r28~r31, where 'r15' and 'r28~r31' ++ cannot be used for register allocation), ++ using 8 registers (32 bytes) for moving memory block ++ may easily consume all of them. ++ It makes register allocation/spilling hard to work. ++ So we only allow maximum=4 registers (16 bytes) for ++ moving memory block under reduced-set registers. */ ++ if (TARGET_REDUCED_REGS) ++ { ++ maximum_regs = 4; ++ maximum_bytes = 64; ++ start_regno = 2; ++ } ++ else ++ { ++ if (TARGET_LINUX_ABI) ++ { ++ /* $r25 is $tp so we use up to 8 registers if using Linux ABI. */ ++ maximum_regs = 8; ++ maximum_bytes = 160; ++ start_regno = 16; ++ } ++ else ++ { ++ maximum_regs = 10; ++ maximum_bytes = 160; ++ start_regno = 16; ++ } ++ } ++ maximum_bytes_per_inst = maximum_regs * UNITS_PER_WORD; ++ ++ /* 1. Total_bytes is integer for sure. ++ 2. Alignment is integer for sure. ++ 3. Maximum 4 or 10 registers and up to 4 instructions, ++ 4 * 4 * 4 = 64 bytes, 8 * 4 * 10 = 160 bytes. ++ 4. The dstmem cannot be volatile memory access. ++ 5. The srcmem cannot be volatile memory access. ++ 6. Known shared alignment not align to 4 byte in v3m since lmw/smw *NOT* ++ support unalign access with v3m configure. */ ++ if (GET_CODE (total_bytes) != CONST_INT ++ || GET_CODE (alignment) != CONST_INT ++ || INTVAL (total_bytes) > maximum_bytes ++ || MEM_VOLATILE_P (dstmem) ++ || MEM_VOLATILE_P (srcmem) ++ || (TARGET_ISA_V3M && !align_to_4_bytes)) ++ return false; ++ ++ dst_base_reg = copy_to_mode_reg (SImode, XEXP (dstmem, 0)); ++ src_base_reg = copy_to_mode_reg (SImode, XEXP (srcmem, 0)); ++ remain_bytes = INTVAL (total_bytes); ++ ++ /* Do not update base address for last lmw/smw pair. */ ++ inst_num = ((INTVAL (total_bytes) + (maximum_bytes_per_inst - 1)) ++ / maximum_bytes_per_inst) - 1; ++ ++ for (i = 0; i < inst_num; i++) ++ { ++ nds32_emit_mem_move_block (start_regno, maximum_regs, ++ &dst_base_reg, &dstmem, ++ &src_base_reg, &srcmem, ++ true); ++ } ++ remain_bytes -= maximum_bytes_per_inst * inst_num; ++ ++ remain_words = remain_bytes / UNITS_PER_WORD; ++ remain_bytes = remain_bytes - (remain_words * UNITS_PER_WORD); ++ ++ if (remain_words != 0) ++ { ++ if (remain_bytes != 0) ++ nds32_emit_mem_move_block (start_regno, remain_words, ++ &dst_base_reg, &dstmem, ++ &src_base_reg, &srcmem, ++ true); ++ else ++ { ++ /* Do not update address if no further byte to move. */ ++ if (remain_words == 1) ++ { ++ /* emit move instruction if align to 4 byte and only 1 ++ word to move. */ ++ if (align_to_4_bytes) ++ nds32_emit_mem_move (srcmem, dstmem, SImode, 0); ++ else ++ { ++ tmp_reg = gen_reg_rtx (SImode); ++ emit_insn ( ++ gen_unaligned_load_w (tmp_reg, ++ gen_rtx_MEM (SImode, src_base_reg))); ++ emit_insn ( ++ gen_unaligned_store_w (gen_rtx_MEM (SImode, dst_base_reg), ++ tmp_reg)); ++ } ++ } ++ else ++ nds32_emit_mem_move_block (start_regno, remain_words, ++ &dst_base_reg, &dstmem, ++ &src_base_reg, &srcmem, ++ false); ++ } ++ } ++ ++ switch (remain_bytes) ++ { ++ case 3: ++ case 2: ++ { ++ if (align_to_2_bytes) ++ nds32_emit_mem_move (srcmem, dstmem, HImode, 0); ++ else ++ { ++ nds32_emit_mem_move (srcmem, dstmem, QImode, 0); ++ nds32_emit_mem_move (srcmem, dstmem, QImode, 1); ++ } ++ ++ if (remain_bytes == 3) ++ nds32_emit_mem_move (srcmem, dstmem, QImode, 2); ++ break; ++ } ++ case 1: ++ nds32_emit_mem_move (srcmem, dstmem, QImode, 0); ++ break; ++ case 0: ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ ++ /* Successfully create patterns, return true. */ ++ return true; ++} ++ ++/* Function to move block memory content by ++ using load_multiple and store_multiple. ++ This is auxiliary extern function to help create rtx template. ++ Check nds32-multiple.md file for the patterns. */ ++bool ++nds32_expand_movmemsi (rtx dstmem, rtx srcmem, rtx total_bytes, rtx alignment) ++{ ++ if (nds32_expand_movmemsi_unroll (dstmem, srcmem, total_bytes, alignment)) ++ return true; ++ ++ if (!optimize_size && optimize > 2) ++ return nds32_expand_movmemsi_loop (dstmem, srcmem, total_bytes, alignment); ++ ++ return false; ++} ++ ++/* ------------------------------------------------------------------------ */ ++ ++/* PART 3: Auxiliary function for expand setmem pattern. */ ++ ++static rtx ++nds32_gen_dup_4_byte_to_word_value_aux (rtx value, rtx value4word) ++{ ++ gcc_assert (GET_MODE (value) == QImode || CONST_INT_P (value)); ++ ++ if (CONST_INT_P (value)) ++ { ++ unsigned HOST_WIDE_INT val = UINTVAL (value) & GET_MODE_MASK(QImode); ++ rtx new_val = gen_int_mode (val | (val << 8) ++ | (val << 16) | (val << 24), SImode); ++ /* Just calculate at here if it's constant value. */ ++ emit_move_insn (value4word, new_val); ++ } ++ else ++ { ++ if (NDS32_EXT_DSP_P ()) ++ { ++ /* ! prepare word ++ insb $tmp, $value, 1 ! $tmp <- 0x0000abab ++ pkbb16 $tmp6, $tmp2, $tmp2 ! $value4word <- 0xabababab */ ++ rtx tmp = gen_reg_rtx (SImode); ++ ++ convert_move (tmp, value, true); ++ ++ emit_insn ( ++ gen_insvsi_internal (tmp, gen_int_mode (0x8, SImode), tmp)); ++ ++ emit_insn (gen_pkbbsi_1 (value4word, tmp, tmp)); ++ } ++ else ++ { ++ /* ! prepare word ++ andi $tmp1, $value, 0xff ! $tmp1 <- 0x000000ab ++ slli $tmp2, $tmp1, 8 ! $tmp2 <- 0x0000ab00 ++ or $tmp3, $tmp1, $tmp2 ! $tmp3 <- 0x0000abab ++ slli $tmp4, $tmp3, 16 ! $tmp4 <- 0xabab0000 ++ or $val4word, $tmp3, $tmp4 ! $value4word <- 0xabababab */ ++ ++ rtx tmp1, tmp2, tmp3, tmp4; ++ tmp1 = expand_binop (SImode, and_optab, value, ++ gen_int_mode (0xff, SImode), ++ NULL_RTX, 0, OPTAB_WIDEN); ++ tmp2 = expand_binop (SImode, ashl_optab, tmp1, ++ gen_int_mode (8, SImode), ++ NULL_RTX, 0, OPTAB_WIDEN); ++ tmp3 = expand_binop (SImode, ior_optab, tmp1, tmp2, ++ NULL_RTX, 0, OPTAB_WIDEN); ++ tmp4 = expand_binop (SImode, ashl_optab, tmp3, ++ gen_int_mode (16, SImode), ++ NULL_RTX, 0, OPTAB_WIDEN); ++ ++ emit_insn (gen_iorsi3 (value4word, tmp3, tmp4)); ++ } ++ } ++ ++ return value4word; ++} ++ ++static rtx ++nds32_gen_dup_4_byte_to_word_value (rtx value) ++{ ++ rtx value4word = gen_reg_rtx (SImode); ++ nds32_gen_dup_4_byte_to_word_value_aux (value, value4word); ++ ++ return value4word; ++} ++ ++static rtx ++nds32_gen_dup_8_byte_to_double_word_value (rtx value) ++{ ++ rtx value4doubleword = gen_reg_rtx (DImode); ++ ++ nds32_gen_dup_4_byte_to_word_value_aux ( ++ value, nds32_di_low_part_subreg(value4doubleword)); ++ ++ emit_move_insn (nds32_di_high_part_subreg(value4doubleword), ++ nds32_di_low_part_subreg(value4doubleword)); ++ return value4doubleword; ++} ++ ++ ++static rtx ++emit_setmem_doubleword_loop (rtx itr, rtx size, rtx value) ++{ ++ rtx word_mode_label = gen_label_rtx (); ++ rtx word_mode_end_label = gen_label_rtx (); ++ rtx byte_mode_size = gen_reg_rtx (SImode); ++ rtx byte_mode_size_tmp = gen_reg_rtx (SImode); ++ rtx word_mode_end = gen_reg_rtx (SImode); ++ rtx size_for_word = gen_reg_rtx (SImode); ++ ++ /* and $size_for_word, $size, #~0x7 */ ++ size_for_word = expand_binop (SImode, and_optab, size, ++ gen_int_mode (~0x7, SImode), ++ NULL_RTX, 0, OPTAB_WIDEN); ++ ++ emit_move_insn (byte_mode_size, size); ++ ++ /* beqz $size_for_word, .Lbyte_mode_entry */ ++ emit_cmp_and_jump_insns (size_for_word, const0_rtx, EQ, NULL, ++ SImode, 1, word_mode_end_label); ++ /* add $word_mode_end, $dst, $size_for_word */ ++ word_mode_end = expand_binop (Pmode, add_optab, itr, size_for_word, ++ NULL_RTX, 0, OPTAB_WIDEN); ++ ++ /* andi $byte_mode_size, $size, 0x7 */ ++ byte_mode_size_tmp = expand_binop (SImode, and_optab, size, GEN_INT (0x7), ++ NULL_RTX, 0, OPTAB_WIDEN); ++ ++ emit_move_insn (byte_mode_size, byte_mode_size_tmp); ++ ++ /* .Lword_mode: */ ++ emit_label (word_mode_label); ++ /* ! word-mode set loop ++ smw.bim $value4word, [$dst_itr], $value4word, 0 ++ bne $word_mode_end, $dst_itr, .Lword_mode */ ++ emit_insn (gen_unaligned_store_update_base_dw (itr, ++ itr, ++ value)); ++ emit_cmp_and_jump_insns (word_mode_end, itr, NE, NULL, ++ Pmode, 1, word_mode_label); ++ ++ emit_label (word_mode_end_label); ++ ++ return byte_mode_size; ++} ++ ++static rtx ++emit_setmem_byte_loop (rtx itr, rtx size, rtx value, bool need_end) ++{ ++ rtx end = gen_reg_rtx (Pmode); ++ rtx byte_mode_label = gen_label_rtx (); ++ rtx end_label = gen_label_rtx (); ++ ++ value = force_reg (QImode, value); ++ ++ if (need_end) ++ end = expand_binop (Pmode, add_optab, itr, size, ++ NULL_RTX, 0, OPTAB_WIDEN); ++ /* beqz $byte_mode_size, .Lend ++ add $byte_mode_end, $dst_itr, $byte_mode_size */ ++ emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL, ++ SImode, 1, end_label); ++ ++ if (!need_end) ++ end = expand_binop (Pmode, add_optab, itr, size, ++ NULL_RTX, 0, OPTAB_WIDEN); ++ ++ /* .Lbyte_mode: */ ++ emit_label (byte_mode_label); ++ ++ emit_insn (gen_no_hwloop ()); ++ /* ! byte-mode set loop ++ sbi.bi $value, [$dst_itr] ,1 ++ bne $byte_mode_end, $dst_itr, .Lbyte_mode */ ++ nds32_emit_post_inc_load_store (value, itr, QImode, false); ++ ++ emit_cmp_and_jump_insns (end, itr, NE, NULL, ++ Pmode, 1, byte_mode_label); ++ /* .Lend: */ ++ emit_label (end_label); ++ ++ if (need_end) ++ return end; ++ else ++ return NULL_RTX; ++} ++ ++static bool ++nds32_expand_setmem_loop (rtx dstmem, rtx size, rtx value) ++{ ++ rtx value4doubleword; ++ rtx value4byte; ++ rtx dst; ++ rtx byte_mode_size; ++ ++ /* Emit loop version of setmem. ++ memset: ++ ! prepare word ++ andi $tmp1, $val, 0xff ! $tmp1 <- 0x000000ab ++ slli $tmp2, $tmp1, 8 ! $tmp2 <- 0x0000ab00 ++ or $tmp3, $val, $tmp2 ! $tmp3 <- 0x0000abab ++ slli $tmp4, $tmp3, 16 ! $tmp4 <- 0xabab0000 ++ or $val4word, $tmp3, $tmp4 ! $value4word <- 0xabababab ++ ++ and $size_for_word, $size, #-4 ++ beqz $size_for_word, .Lword_mode_end ++ ++ add $word_mode_end, $dst, $size_for_word ++ andi $byte_mode_size, $size, 3 ++ ++ .Lword_mode: ++ ! word-mode set loop ++ smw.bim $value4word, [$dst], $value4word, 0 ++ bne $word_mode_end, $dst, .Lword_mode ++ ++ .Lword_mode_end: ++ beqz $byte_mode_size, .Lend ++ add $byte_mode_end, $dst, $byte_mode_size ++ ++ .Lbyte_mode: ++ ! byte-mode set loop ++ sbi.bi $value4word, [$dst] ,1 ++ bne $byte_mode_end, $dst, .Lbyte_mode ++ .Lend: */ ++ ++ dst = copy_to_mode_reg (SImode, XEXP (dstmem, 0)); ++ ++ /* ! prepare word ++ andi $tmp1, $value, 0xff ! $tmp1 <- 0x000000ab ++ slli $tmp2, $tmp1, 8 ! $tmp2 <- 0x0000ab00 ++ or $tmp3, $tmp1, $tmp2 ! $tmp3 <- 0x0000abab ++ slli $tmp4, $tmp3, 16 ! $tmp4 <- 0xabab0000 ++ or $val4word, $tmp3, $tmp4 ! $value4word <- 0xabababab */ ++ value4doubleword = nds32_gen_dup_8_byte_to_double_word_value (value); ++ ++ /* and $size_for_word, $size, #-4 ++ beqz $size_for_word, .Lword_mode_end ++ ++ add $word_mode_end, $dst, $size_for_word ++ andi $byte_mode_size, $size, 3 ++ ++ .Lword_mode: ++ ! word-mode set loop ++ smw.bim $value4word, [$dst], $value4word, 0 ++ bne $word_mode_end, $dst, .Lword_mode ++ .Lword_mode_end: */ ++ byte_mode_size = emit_setmem_doubleword_loop (dst, size, value4doubleword); ++ ++ /* beqz $byte_mode_size, .Lend ++ add $byte_mode_end, $dst, $byte_mode_size ++ ++ .Lbyte_mode: ++ ! byte-mode set loop ++ sbi.bi $value, [$dst] ,1 ++ bne $byte_mode_end, $dst, .Lbyte_mode ++ .Lend: */ ++ ++ value4byte = simplify_gen_subreg (QImode, value4doubleword, DImode, ++ subreg_lowpart_offset (QImode, DImode)); ++ ++ emit_setmem_byte_loop (dst, byte_mode_size, value4byte, false); ++ ++ return true; ++} ++ ++static bool ++nds32_expand_setmem_loop_v3m (rtx dstmem, rtx size, rtx value) ++{ ++ rtx base_reg = copy_to_mode_reg (Pmode, XEXP (dstmem, 0)); ++ rtx need_align_bytes = gen_reg_rtx (SImode); ++ rtx last_2_bit = gen_reg_rtx (SImode); ++ rtx byte_loop_base = gen_reg_rtx (SImode); ++ rtx byte_loop_size = gen_reg_rtx (SImode); ++ rtx remain_size = gen_reg_rtx (SImode); ++ rtx new_base_reg; ++ rtx value4byte, value4doubleword; ++ rtx byte_mode_size; ++ rtx last_byte_loop_label = gen_label_rtx (); ++ ++ size = force_reg (SImode, size); ++ ++ value4doubleword = nds32_gen_dup_8_byte_to_double_word_value (value); ++ value4byte = simplify_gen_subreg (QImode, value4doubleword, DImode, ++ subreg_lowpart_offset (QImode, DImode)); ++ ++ emit_move_insn (byte_loop_size, size); ++ emit_move_insn (byte_loop_base, base_reg); ++ ++ /* Jump to last byte loop if size is less than 16. */ ++ emit_cmp_and_jump_insns (size, gen_int_mode (16, SImode), LE, NULL, ++ SImode, 1, last_byte_loop_label); ++ ++ /* Make sure align to 4 byte first since v3m can't unalign access. */ ++ emit_insn (gen_andsi3 (last_2_bit, ++ base_reg, ++ gen_int_mode (0x3, SImode))); ++ ++ emit_insn (gen_subsi3 (need_align_bytes, ++ gen_int_mode (4, SImode), ++ last_2_bit)); ++ ++ /* Align to 4 byte. */ ++ new_base_reg = emit_setmem_byte_loop (base_reg, ++ need_align_bytes, ++ value4byte, ++ true); ++ ++ /* Calculate remain size. */ ++ emit_insn (gen_subsi3 (remain_size, size, need_align_bytes)); ++ ++ /* Set memory word by word. */ ++ byte_mode_size = emit_setmem_doubleword_loop (new_base_reg, ++ remain_size, ++ value4doubleword); ++ ++ emit_move_insn (byte_loop_base, new_base_reg); ++ emit_move_insn (byte_loop_size, byte_mode_size); ++ ++ emit_label (last_byte_loop_label); ++ ++ /* And set memory for remain bytes. */ ++ emit_setmem_byte_loop (byte_loop_base, byte_loop_size, value4byte, false); ++ return true; ++} ++ ++static bool ++nds32_expand_setmem_unroll (rtx dstmem, rtx size, rtx value, ++ rtx align ATTRIBUTE_UNUSED, ++ rtx expected_align ATTRIBUTE_UNUSED, ++ rtx expected_size ATTRIBUTE_UNUSED) ++{ ++ unsigned maximum_regs, maximum_bytes, start_regno, regno; ++ rtx value4word; ++ rtx dst_base_reg, new_base_reg; ++ unsigned HOST_WIDE_INT remain_bytes, remain_words, prepare_regs, fill_per_smw; ++ unsigned HOST_WIDE_INT real_size; ++ ++ if (TARGET_REDUCED_REGS) ++ { ++ maximum_regs = 4; ++ maximum_bytes = 64; ++ start_regno = 2; ++ } ++ else ++ { ++ maximum_regs = 8; ++ maximum_bytes = 128; ++ start_regno = 16; ++ } ++ ++ real_size = UINTVAL (size) & GET_MODE_MASK(SImode); ++ ++ if (!(CONST_INT_P (size) && real_size <= maximum_bytes)) ++ return false; ++ ++ remain_bytes = real_size; ++ ++ gcc_assert (GET_MODE (value) == QImode || CONST_INT_P (value)); ++ ++ value4word = nds32_gen_dup_4_byte_to_word_value (value); ++ ++ prepare_regs = remain_bytes / UNITS_PER_WORD; ++ ++ dst_base_reg = copy_to_mode_reg (SImode, XEXP (dstmem, 0)); ++ ++ if (prepare_regs > maximum_regs) ++ prepare_regs = maximum_regs; ++ ++ fill_per_smw = prepare_regs * UNITS_PER_WORD; ++ ++ regno = start_regno; ++ switch (prepare_regs) ++ { ++ case 2: ++ default: ++ { ++ rtx reg0 = gen_rtx_REG (SImode, regno); ++ rtx reg1 = gen_rtx_REG (SImode, regno+1); ++ unsigned last_regno = start_regno + prepare_regs - 1; ++ ++ emit_move_insn (reg0, value4word); ++ emit_move_insn (reg1, value4word); ++ rtx regd = gen_rtx_REG (DImode, regno); ++ regno += 2; ++ ++ /* Try to utilize movd44! */ ++ while (regno <= last_regno) ++ { ++ if ((regno + 1) <=last_regno) ++ { ++ rtx reg = gen_rtx_REG (DImode, regno); ++ emit_move_insn (reg, regd); ++ regno += 2; ++ } ++ else ++ { ++ rtx reg = gen_rtx_REG (SImode, regno); ++ emit_move_insn (reg, reg0); ++ regno += 1; ++ } ++ } ++ break; ++ } ++ case 1: ++ { ++ rtx reg = gen_rtx_REG (SImode, regno++); ++ emit_move_insn (reg, value4word); ++ } ++ break; ++ case 0: ++ break; ++ } ++ ++ if (fill_per_smw) ++ for (;remain_bytes >= fill_per_smw;remain_bytes -= fill_per_smw) ++ { ++ emit_insn (nds32_expand_store_multiple (start_regno, prepare_regs, ++ dst_base_reg, dstmem, ++ true, &new_base_reg)); ++ dst_base_reg = new_base_reg; ++ dstmem = gen_rtx_MEM (SImode, dst_base_reg); ++ } ++ ++ remain_words = remain_bytes / UNITS_PER_WORD; ++ ++ if (remain_words) ++ { ++ emit_insn (nds32_expand_store_multiple (start_regno, remain_words, ++ dst_base_reg, dstmem, ++ true, &new_base_reg)); ++ dst_base_reg = new_base_reg; ++ dstmem = gen_rtx_MEM (SImode, dst_base_reg); ++ } ++ ++ remain_bytes = remain_bytes - (remain_words * UNITS_PER_WORD); ++ ++ if (remain_bytes) ++ { ++ value = simplify_gen_subreg (QImode, value4word, SImode, ++ subreg_lowpart_offset(QImode, SImode)); ++ int offset = 0; ++ for (;remain_bytes;--remain_bytes, ++offset) ++ { ++ nds32_emit_load_store (value, dstmem, QImode, offset, false); ++ } ++ } ++ ++ return true; ++} ++ ++bool ++nds32_expand_setmem (rtx dstmem, rtx size, rtx value, rtx align, ++ rtx expected_align, ++ rtx expected_size) ++{ ++ bool align_to_4_bytes = (INTVAL (align) & 3) == 0; ++ ++ /* Only expand at O3 */ ++ if (optimize_size || optimize < 3) ++ return false; ++ ++ if (TARGET_ISA_V3M && !align_to_4_bytes) ++ return nds32_expand_setmem_loop_v3m (dstmem, size, value); ++ ++ if (nds32_expand_setmem_unroll (dstmem, size, value, ++ align, expected_align, expected_size)) ++ return true; ++ ++ return nds32_expand_setmem_loop (dstmem, size, value); ++} ++ ++/* ------------------------------------------------------------------------ */ ++ ++/* PART 4: Auxiliary function for expand movstr pattern. */ ++ ++bool ++nds32_expand_movstr (rtx dst_end_ptr, ++ rtx dstmem, ++ rtx srcmem) ++{ ++ rtx tmp; ++ rtx dst_base_reg, src_base_reg; ++ rtx new_dst_base_reg, new_src_base_reg; ++ rtx last_non_null_char_ptr; ++ rtx ffbi_result; ++ rtx loop_label; ++ ++ if (optimize_size || optimize < 3) ++ return false; ++ ++ tmp = gen_reg_rtx (SImode); ++ ffbi_result = gen_reg_rtx (Pmode); ++ new_dst_base_reg = gen_reg_rtx (Pmode); ++ new_src_base_reg = gen_reg_rtx (Pmode); ++ dst_base_reg = copy_to_mode_reg (SImode, XEXP (dstmem, 0)); ++ src_base_reg = copy_to_mode_reg (SImode, XEXP (srcmem, 0)); ++ loop_label = gen_label_rtx (); ++ ++ emit_label (loop_label); ++ emit_insn (gen_lmwzb (new_src_base_reg, src_base_reg, tmp)); ++ emit_insn (gen_smwzb (new_dst_base_reg, dst_base_reg, tmp)); ++ emit_insn (gen_unspec_ffb (ffbi_result, tmp, const0_rtx)); ++ ++ emit_move_insn (src_base_reg, new_src_base_reg); ++ emit_move_insn (dst_base_reg, new_dst_base_reg); ++ ++ emit_cmp_and_jump_insns (ffbi_result, const0_rtx, EQ, NULL, ++ SImode, 1, loop_label); ++ ++ last_non_null_char_ptr = expand_binop (Pmode, add_optab, dst_base_reg, ++ ffbi_result, NULL_RTX, 0, OPTAB_WIDEN); ++ ++ emit_move_insn (dst_end_ptr, last_non_null_char_ptr); ++ ++ return true; ++} ++ ++/* ------------------------------------------------------------------------ */ ++ ++/* PART 5: Auxiliary function for expand strlen pattern. */ ++ ++bool ++nds32_expand_strlen (rtx result, rtx str, ++ rtx target_char, rtx align ATTRIBUTE_UNUSED) ++{ ++ rtx base_reg, backup_base_reg; ++ rtx ffb_result; ++ rtx target_char_ptr, length; ++ rtx loop_label, tmp; ++ ++ if (optimize_size || optimize < 3) ++ return false; ++ ++ gcc_assert (MEM_P (str)); ++ gcc_assert (CONST_INT_P (target_char) || REG_P (target_char)); ++ ++ base_reg = copy_to_mode_reg (SImode, XEXP (str, 0)); ++ loop_label = gen_label_rtx (); ++ ++ ffb_result = gen_reg_rtx (Pmode); ++ tmp = gen_reg_rtx (SImode); ++ backup_base_reg = gen_reg_rtx (SImode); ++ ++ /* Emit loop version of strlen. ++ move $backup_base, $base ++ .Lloop: ++ lmw.bim $tmp, [$base], $tmp, 0 ++ ffb $ffb_result, $tmp, $target_char ! is there $target_char? ++ beqz $ffb_result, .Lloop ++ add $last_char_ptr, $base, $ffb_result ++ sub $length, $last_char_ptr, $backup_base */ ++ ++ /* move $backup_base, $base */ ++ emit_move_insn (backup_base_reg, base_reg); ++ ++ /* .Lloop: */ ++ emit_label (loop_label); ++ /* lmw.bim $tmp, [$base], $tmp, 0 */ ++ emit_insn (gen_unaligned_load_update_base_w (base_reg, tmp, base_reg)); ++ ++ /* ffb $ffb_result, $tmp, $target_char ! is there $target_char? */ ++ emit_insn (gen_unspec_ffb (ffb_result, tmp, target_char)); ++ ++ /* beqz $ffb_result, .Lloop */ ++ emit_cmp_and_jump_insns (ffb_result, const0_rtx, EQ, NULL, ++ SImode, 1, loop_label); ++ ++ /* add $target_char_ptr, $base, $ffb_result */ ++ target_char_ptr = expand_binop (Pmode, add_optab, base_reg, ++ ffb_result, NULL_RTX, 0, OPTAB_WIDEN); ++ ++ /* sub $length, $target_char_ptr, $backup_base */ ++ length = expand_binop (Pmode, sub_optab, target_char_ptr, ++ backup_base_reg, NULL_RTX, 0, OPTAB_WIDEN); ++ ++ emit_move_insn (result, length); ++ ++ return true; ++} + + /* ------------------------------------------------------------------------ */ + ++/* PART 6: Auxiliary function for expand load_multiple/store_multiple ++ pattern. */ ++ + /* Functions to expand load_multiple and store_multiple. + They are auxiliary extern functions to help create rtx template. + Check nds32-multiple.md file for the patterns. */ + rtx + nds32_expand_load_multiple (int base_regno, int count, +- rtx base_addr, rtx basemem) ++ rtx base_addr, rtx basemem, ++ bool update_base_reg_p, ++ rtx *update_base_reg) + { + int par_index; + int offset; ++ int start_idx; + rtx result; + rtx new_addr, mem, reg; + ++ /* Generate a unaligned load to prevent load instruction pull out from ++ parallel, and then it will generate lwi, and lose unaligned acces */ ++ if (count == 1) ++ { ++ reg = gen_rtx_REG (SImode, base_regno); ++ if (update_base_reg_p) ++ { ++ *update_base_reg = gen_reg_rtx (SImode); ++ return gen_unaligned_load_update_base_w (*update_base_reg, reg, base_addr); ++ } ++ else ++ return gen_unaligned_load_w (reg, gen_rtx_MEM (SImode, base_addr)); ++ } ++ + /* Create the pattern that is presented in nds32-multiple.md. */ ++ if (update_base_reg_p) ++ { ++ result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count + 1)); ++ start_idx = 1; ++ } ++ else ++ { ++ result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count)); ++ start_idx = 0; ++ } ++ ++ if (update_base_reg_p) ++ { ++ offset = count * 4; ++ new_addr = plus_constant (Pmode, base_addr, offset); ++ *update_base_reg = gen_reg_rtx (SImode); + +- result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count)); ++ XVECEXP (result, 0, 0) = gen_rtx_SET (*update_base_reg, new_addr); ++ } + + for (par_index = 0; par_index < count; par_index++) + { +@@ -57,7 +1284,7 @@ nds32_expand_load_multiple (int base_regno, int count, + new_addr, offset); + reg = gen_rtx_REG (SImode, base_regno + par_index); + +- XVECEXP (result, 0, par_index) = gen_rtx_SET (reg, mem); ++ XVECEXP (result, 0, (par_index + start_idx)) = gen_rtx_SET (reg, mem); + } + + return result; +@@ -65,16 +1292,49 @@ nds32_expand_load_multiple (int base_regno, int count, + + rtx + nds32_expand_store_multiple (int base_regno, int count, +- rtx base_addr, rtx basemem) ++ rtx base_addr, rtx basemem, ++ bool update_base_reg_p, ++ rtx *update_base_reg) + { + int par_index; + int offset; ++ int start_idx; + rtx result; + rtx new_addr, mem, reg; + ++ if (count == 1) ++ { ++ reg = gen_rtx_REG (SImode, base_regno); ++ if (update_base_reg_p) ++ { ++ *update_base_reg = gen_reg_rtx (SImode); ++ return gen_unaligned_store_update_base_w (*update_base_reg, base_addr, reg); ++ } ++ else ++ return gen_unaligned_store_w (gen_rtx_MEM (SImode, base_addr), reg); ++ } ++ + /* Create the pattern that is presented in nds32-multiple.md. */ + +- result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count)); ++ if (update_base_reg_p) ++ { ++ result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count + 1)); ++ start_idx = 1; ++ } ++ else ++ { ++ result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count)); ++ start_idx = 0; ++ } ++ ++ if (update_base_reg_p) ++ { ++ offset = count * 4; ++ new_addr = plus_constant (Pmode, base_addr, offset); ++ *update_base_reg = gen_reg_rtx (SImode); ++ ++ XVECEXP (result, 0, 0) = gen_rtx_SET (*update_base_reg, new_addr); ++ } + + for (par_index = 0; par_index < count; par_index++) + { +@@ -85,58 +1345,11 @@ nds32_expand_store_multiple (int base_regno, int count, + new_addr, offset); + reg = gen_rtx_REG (SImode, base_regno + par_index); + +- XVECEXP (result, 0, par_index) = gen_rtx_SET (mem, reg); ++ XVECEXP (result, 0, par_index + start_idx) = gen_rtx_SET (mem, reg); + } + +- return result; +-} +- +-/* Function to move block memory content by +- using load_multiple and store_multiple. +- This is auxiliary extern function to help create rtx template. +- Check nds32-multiple.md file for the patterns. */ +-int +-nds32_expand_movmemqi (rtx dstmem, rtx srcmem, rtx total_bytes, rtx alignment) +-{ +- HOST_WIDE_INT in_words, out_words; +- rtx dst_base_reg, src_base_reg; +- int maximum_bytes; +- +- /* Because reduced-set regsiters has few registers +- (r0~r5, r6~10, r15, r28~r31, where 'r15' and 'r28~r31' +- cannot be used for register allocation), +- using 8 registers (32 bytes) for moving memory block +- may easily consume all of them. +- It makes register allocation/spilling hard to work. +- So we only allow maximum=4 registers (16 bytes) for +- moving memory block under reduced-set registers. */ +- if (TARGET_REDUCED_REGS) +- maximum_bytes = 16; +- else +- maximum_bytes = 32; +- +- /* 1. Total_bytes is integer for sure. +- 2. Alignment is integer for sure. +- 3. Maximum 4 or 8 registers, 4 * 4 = 16 bytes, 8 * 4 = 32 bytes. +- 4. Requires (n * 4) block size. +- 5. Requires 4-byte alignment. */ +- if (GET_CODE (total_bytes) != CONST_INT +- || GET_CODE (alignment) != CONST_INT +- || INTVAL (total_bytes) > maximum_bytes +- || INTVAL (total_bytes) & 3 +- || INTVAL (alignment) & 3) +- return 0; + +- dst_base_reg = copy_to_mode_reg (SImode, XEXP (dstmem, 0)); +- src_base_reg = copy_to_mode_reg (SImode, XEXP (srcmem, 0)); +- +- out_words = in_words = INTVAL (total_bytes) / UNITS_PER_WORD; +- +- emit_insn (nds32_expand_load_multiple (0, in_words, src_base_reg, srcmem)); +- emit_insn (nds32_expand_store_multiple (0, out_words, dst_base_reg, dstmem)); +- +- /* Successfully create patterns, return 1. */ +- return 1; ++ return result; + } + + /* ------------------------------------------------------------------------ */ +diff --git a/gcc/config/nds32/nds32-modes.def b/gcc/config/nds32/nds32-modes.def +index f2d0e6c..7a6f953 100644 +--- a/gcc/config/nds32/nds32-modes.def ++++ b/gcc/config/nds32/nds32-modes.def +@@ -18,4 +18,6 @@ + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +-/* So far, there is no need to define any modes for nds32 target. */ ++/* Vector modes. */ ++VECTOR_MODES (INT, 4); /* V4QI V2HI */ ++VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */ +diff --git a/gcc/config/nds32/nds32-multiple.md b/gcc/config/nds32/nds32-multiple.md +index babc7f2..500a1c6 100644 +--- a/gcc/config/nds32/nds32-multiple.md ++++ b/gcc/config/nds32/nds32-multiple.md +@@ -49,17 +49,19 @@ + otherwise we have to FAIL this rtx generation: + 1. The number of consecutive registers must be integer. + 2. Maximum 4 or 8 registers for lmw.bi instruction +- (based on this nds32-multiple.md design). ++ (based on this nds32-multiple.md design). + 3. Minimum 2 registers for lmw.bi instruction +- (based on this nds32-multiple.md design). ++ (based on this nds32-multiple.md design). + 4. operands[0] must be register for sure. + 5. operands[1] must be memory for sure. +- 6. Do not cross $r15 register because it is not allocatable. */ ++ 6. operands[1] is not volatile memory access. ++ 7. Do not cross $r15 register because it is not allocatable. */ + if (GET_CODE (operands[2]) != CONST_INT + || INTVAL (operands[2]) > maximum + || INTVAL (operands[2]) < 2 + || GET_CODE (operands[0]) != REG + || GET_CODE (operands[1]) != MEM ++ || MEM_VOLATILE_P (operands[1]) + || REGNO (operands[0]) + INTVAL (operands[2]) > TA_REGNUM) + FAIL; + +@@ -69,12 +71,943 @@ + INTVAL (operands[2]), + force_reg (SImode, + XEXP (operands[1], 0)), +- operands[1]); ++ operands[1], ++ false, NULL); + }) + + ;; Ordinary Load Multiple. ++(define_insn "*lmw_bim_si25" ++ [(match_parallel 0 "nds32_load_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 100))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (match_dup 2))) ++ (set (match_operand:SI 4 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 4)))) ++ (set (match_operand:SI 5 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 8)))) ++ (set (match_operand:SI 6 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 12)))) ++ (set (match_operand:SI 7 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 16)))) ++ (set (match_operand:SI 8 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 20)))) ++ (set (match_operand:SI 9 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 24)))) ++ (set (match_operand:SI 10 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 28)))) ++ (set (match_operand:SI 11 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 32)))) ++ (set (match_operand:SI 12 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 36)))) ++ (set (match_operand:SI 13 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 40)))) ++ (set (match_operand:SI 14 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 44)))) ++ (set (match_operand:SI 15 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 48)))) ++ (set (match_operand:SI 16 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 52)))) ++ (set (match_operand:SI 17 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 56)))) ++ (set (match_operand:SI 18 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 60)))) ++ (set (match_operand:SI 19 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 64)))) ++ (set (match_operand:SI 20 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 68)))) ++ (set (match_operand:SI 21 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 72)))) ++ (set (match_operand:SI 22 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 76)))) ++ (set (match_operand:SI 23 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 80)))) ++ (set (match_operand:SI 24 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 84)))) ++ (set (match_operand:SI 25 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 88)))) ++ (set (match_operand:SI 26 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 92)))) ++ (set (match_operand:SI 27 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 96))))])] ++ "(XVECLEN (operands[0], 0) == 26)" ++ "lmw.bim\t%3, [%1], %27, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "25") ++ (set_attr "length" "4")] ++) + +-(define_insn "*lmwsi8" ++(define_insn "*lmw_bim_si24" ++ [(match_parallel 0 "nds32_load_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 96))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (match_dup 2))) ++ (set (match_operand:SI 4 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 4)))) ++ (set (match_operand:SI 5 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 8)))) ++ (set (match_operand:SI 6 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 12)))) ++ (set (match_operand:SI 7 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 16)))) ++ (set (match_operand:SI 8 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 20)))) ++ (set (match_operand:SI 9 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 24)))) ++ (set (match_operand:SI 10 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 28)))) ++ (set (match_operand:SI 11 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 32)))) ++ (set (match_operand:SI 12 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 36)))) ++ (set (match_operand:SI 13 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 40)))) ++ (set (match_operand:SI 14 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 44)))) ++ (set (match_operand:SI 15 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 48)))) ++ (set (match_operand:SI 16 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 52)))) ++ (set (match_operand:SI 17 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 56)))) ++ (set (match_operand:SI 18 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 60)))) ++ (set (match_operand:SI 19 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 64)))) ++ (set (match_operand:SI 20 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 68)))) ++ (set (match_operand:SI 21 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 72)))) ++ (set (match_operand:SI 22 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 76)))) ++ (set (match_operand:SI 23 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 80)))) ++ (set (match_operand:SI 24 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 84)))) ++ (set (match_operand:SI 25 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 88)))) ++ (set (match_operand:SI 26 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 92))))])] ++ "(XVECLEN (operands[0], 0) == 25)" ++ "lmw.bim\t%3, [%1], %26, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "24") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*lmw_bim_si23" ++ [(match_parallel 0 "nds32_load_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 92))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (match_dup 2))) ++ (set (match_operand:SI 4 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 4)))) ++ (set (match_operand:SI 5 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 8)))) ++ (set (match_operand:SI 6 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 12)))) ++ (set (match_operand:SI 7 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 16)))) ++ (set (match_operand:SI 8 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 20)))) ++ (set (match_operand:SI 9 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 24)))) ++ (set (match_operand:SI 10 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 28)))) ++ (set (match_operand:SI 11 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 32)))) ++ (set (match_operand:SI 12 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 36)))) ++ (set (match_operand:SI 13 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 40)))) ++ (set (match_operand:SI 14 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 44)))) ++ (set (match_operand:SI 15 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 48)))) ++ (set (match_operand:SI 16 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 52)))) ++ (set (match_operand:SI 17 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 56)))) ++ (set (match_operand:SI 18 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 60)))) ++ (set (match_operand:SI 19 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 64)))) ++ (set (match_operand:SI 20 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 68)))) ++ (set (match_operand:SI 21 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 72)))) ++ (set (match_operand:SI 22 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 76)))) ++ (set (match_operand:SI 23 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 80)))) ++ (set (match_operand:SI 24 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 84)))) ++ (set (match_operand:SI 25 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 88))))])] ++ "(XVECLEN (operands[0], 0) == 24)" ++ "lmw.bim\t%3, [%1], %25, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "23") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*lmw_bim_si22" ++ [(match_parallel 0 "nds32_load_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 88))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (match_dup 2))) ++ (set (match_operand:SI 4 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 4)))) ++ (set (match_operand:SI 5 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 8)))) ++ (set (match_operand:SI 6 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 12)))) ++ (set (match_operand:SI 7 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 16)))) ++ (set (match_operand:SI 8 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 20)))) ++ (set (match_operand:SI 9 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 24)))) ++ (set (match_operand:SI 10 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 28)))) ++ (set (match_operand:SI 11 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 32)))) ++ (set (match_operand:SI 12 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 36)))) ++ (set (match_operand:SI 13 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 40)))) ++ (set (match_operand:SI 14 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 44)))) ++ (set (match_operand:SI 15 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 48)))) ++ (set (match_operand:SI 16 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 52)))) ++ (set (match_operand:SI 17 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 56)))) ++ (set (match_operand:SI 18 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 60)))) ++ (set (match_operand:SI 19 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 64)))) ++ (set (match_operand:SI 20 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 68)))) ++ (set (match_operand:SI 21 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 72)))) ++ (set (match_operand:SI 22 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 76)))) ++ (set (match_operand:SI 23 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 80)))) ++ (set (match_operand:SI 24 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 84))))])] ++ "(XVECLEN (operands[0], 0) == 23)" ++ "lmw.bim\t%3, [%1], %24, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "22") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*lmw_bim_si21" ++ [(match_parallel 0 "nds32_load_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 84))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (match_dup 2))) ++ (set (match_operand:SI 4 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 4)))) ++ (set (match_operand:SI 5 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 8)))) ++ (set (match_operand:SI 6 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 12)))) ++ (set (match_operand:SI 7 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 16)))) ++ (set (match_operand:SI 8 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 20)))) ++ (set (match_operand:SI 9 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 24)))) ++ (set (match_operand:SI 10 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 28)))) ++ (set (match_operand:SI 11 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 32)))) ++ (set (match_operand:SI 12 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 36)))) ++ (set (match_operand:SI 13 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 40)))) ++ (set (match_operand:SI 14 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 44)))) ++ (set (match_operand:SI 15 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 48)))) ++ (set (match_operand:SI 16 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 52)))) ++ (set (match_operand:SI 17 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 56)))) ++ (set (match_operand:SI 18 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 60)))) ++ (set (match_operand:SI 19 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 64)))) ++ (set (match_operand:SI 20 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 68)))) ++ (set (match_operand:SI 21 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 72)))) ++ (set (match_operand:SI 22 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 76)))) ++ (set (match_operand:SI 23 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 80))))])] ++ "(XVECLEN (operands[0], 0) == 22)" ++ "lmw.bim\t%3, [%1], %23, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "21") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*lmw_bim_si20" ++ [(match_parallel 0 "nds32_load_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 80))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (match_dup 2))) ++ (set (match_operand:SI 4 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 4)))) ++ (set (match_operand:SI 5 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 8)))) ++ (set (match_operand:SI 6 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 12)))) ++ (set (match_operand:SI 7 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 16)))) ++ (set (match_operand:SI 8 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 20)))) ++ (set (match_operand:SI 9 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 24)))) ++ (set (match_operand:SI 10 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 28)))) ++ (set (match_operand:SI 11 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 32)))) ++ (set (match_operand:SI 12 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 36)))) ++ (set (match_operand:SI 13 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 40)))) ++ (set (match_operand:SI 14 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 44)))) ++ (set (match_operand:SI 15 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 48)))) ++ (set (match_operand:SI 16 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 52)))) ++ (set (match_operand:SI 17 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 56)))) ++ (set (match_operand:SI 18 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 60)))) ++ (set (match_operand:SI 19 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 64)))) ++ (set (match_operand:SI 20 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 68)))) ++ (set (match_operand:SI 21 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 72)))) ++ (set (match_operand:SI 22 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 76))))])] ++ "(XVECLEN (operands[0], 0) == 21)" ++ "lmw.bim\t%3, [%1], %22, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "20") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*lmw_bim_si19" ++ [(match_parallel 0 "nds32_load_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 76))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (match_dup 2))) ++ (set (match_operand:SI 4 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 4)))) ++ (set (match_operand:SI 5 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 8)))) ++ (set (match_operand:SI 6 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 12)))) ++ (set (match_operand:SI 7 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 16)))) ++ (set (match_operand:SI 8 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 20)))) ++ (set (match_operand:SI 9 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 24)))) ++ (set (match_operand:SI 10 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 28)))) ++ (set (match_operand:SI 11 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 32)))) ++ (set (match_operand:SI 12 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 36)))) ++ (set (match_operand:SI 13 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 40)))) ++ (set (match_operand:SI 14 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 44)))) ++ (set (match_operand:SI 15 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 48)))) ++ (set (match_operand:SI 16 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 52)))) ++ (set (match_operand:SI 17 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 56)))) ++ (set (match_operand:SI 18 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 60)))) ++ (set (match_operand:SI 19 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 64)))) ++ (set (match_operand:SI 20 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 68)))) ++ (set (match_operand:SI 21 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 72))))])] ++ "(XVECLEN (operands[0], 0) == 20)" ++ "lmw.bim\t%3, [%1], %21, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "19") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*lmw_bim_si18" ++ [(match_parallel 0 "nds32_load_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 72))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (match_dup 2))) ++ (set (match_operand:SI 4 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 4)))) ++ (set (match_operand:SI 5 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 8)))) ++ (set (match_operand:SI 6 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 12)))) ++ (set (match_operand:SI 7 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 16)))) ++ (set (match_operand:SI 8 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 20)))) ++ (set (match_operand:SI 9 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 24)))) ++ (set (match_operand:SI 10 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 28)))) ++ (set (match_operand:SI 11 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 32)))) ++ (set (match_operand:SI 12 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 36)))) ++ (set (match_operand:SI 13 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 40)))) ++ (set (match_operand:SI 14 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 44)))) ++ (set (match_operand:SI 15 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 48)))) ++ (set (match_operand:SI 16 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 52)))) ++ (set (match_operand:SI 17 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 56)))) ++ (set (match_operand:SI 18 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 60)))) ++ (set (match_operand:SI 19 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 64)))) ++ (set (match_operand:SI 20 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 68))))])] ++ "(XVECLEN (operands[0], 0) == 19)" ++ "lmw.bim\t%3, [%1], %20, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "18") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*lmw_bim_si17" ++ [(match_parallel 0 "nds32_load_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 68))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (match_dup 2))) ++ (set (match_operand:SI 4 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 4)))) ++ (set (match_operand:SI 5 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 8)))) ++ (set (match_operand:SI 6 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 12)))) ++ (set (match_operand:SI 7 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 16)))) ++ (set (match_operand:SI 8 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 20)))) ++ (set (match_operand:SI 9 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 24)))) ++ (set (match_operand:SI 10 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 28)))) ++ (set (match_operand:SI 11 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 32)))) ++ (set (match_operand:SI 12 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 36)))) ++ (set (match_operand:SI 13 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 40)))) ++ (set (match_operand:SI 14 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 44)))) ++ (set (match_operand:SI 15 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 48)))) ++ (set (match_operand:SI 16 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 52)))) ++ (set (match_operand:SI 17 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 56)))) ++ (set (match_operand:SI 18 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 60)))) ++ (set (match_operand:SI 19 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 64))))])] ++ "(XVECLEN (operands[0], 0) == 18)" ++ "lmw.bim\t%3, [%1], %19, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "17") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*lmw_bim_si16" ++ [(match_parallel 0 "nds32_load_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 64))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (match_dup 2))) ++ (set (match_operand:SI 4 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 4)))) ++ (set (match_operand:SI 5 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 8)))) ++ (set (match_operand:SI 6 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 12)))) ++ (set (match_operand:SI 7 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 16)))) ++ (set (match_operand:SI 8 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 20)))) ++ (set (match_operand:SI 9 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 24)))) ++ (set (match_operand:SI 10 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 28)))) ++ (set (match_operand:SI 11 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 32)))) ++ (set (match_operand:SI 12 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 36)))) ++ (set (match_operand:SI 13 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 40)))) ++ (set (match_operand:SI 14 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 44)))) ++ (set (match_operand:SI 15 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 48)))) ++ (set (match_operand:SI 16 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 52)))) ++ (set (match_operand:SI 17 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 56)))) ++ (set (match_operand:SI 18 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 60))))])] ++ "(XVECLEN (operands[0], 0) == 17)" ++ "lmw.bim\t%3, [%1], %18, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "16") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*lmw_bim_si15" ++ [(match_parallel 0 "nds32_load_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 60))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (match_dup 2))) ++ (set (match_operand:SI 4 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 4)))) ++ (set (match_operand:SI 5 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 8)))) ++ (set (match_operand:SI 6 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 12)))) ++ (set (match_operand:SI 7 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 16)))) ++ (set (match_operand:SI 8 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 20)))) ++ (set (match_operand:SI 9 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 24)))) ++ (set (match_operand:SI 10 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 28)))) ++ (set (match_operand:SI 11 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 32)))) ++ (set (match_operand:SI 12 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 36)))) ++ (set (match_operand:SI 13 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 40)))) ++ (set (match_operand:SI 14 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 44)))) ++ (set (match_operand:SI 15 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 48)))) ++ (set (match_operand:SI 16 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 52)))) ++ (set (match_operand:SI 17 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 56))))])] ++ "(XVECLEN (operands[0], 0) == 16)" ++ "lmw.bim\t%3, [%1], %17, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "15") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*lmw_bim_si14" ++ [(match_parallel 0 "nds32_load_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 56))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (match_dup 2))) ++ (set (match_operand:SI 4 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 4)))) ++ (set (match_operand:SI 5 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 8)))) ++ (set (match_operand:SI 6 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 12)))) ++ (set (match_operand:SI 7 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 16)))) ++ (set (match_operand:SI 8 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 20)))) ++ (set (match_operand:SI 9 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 24)))) ++ (set (match_operand:SI 10 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 28)))) ++ (set (match_operand:SI 11 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 32)))) ++ (set (match_operand:SI 12 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 36)))) ++ (set (match_operand:SI 13 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 40)))) ++ (set (match_operand:SI 14 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 44)))) ++ (set (match_operand:SI 15 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 48)))) ++ (set (match_operand:SI 16 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 52))))])] ++ "(XVECLEN (operands[0], 0) == 15)" ++ "lmw.bim\t%3, [%1], %16, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "14") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*lmw_bim_si13" ++ [(match_parallel 0 "nds32_load_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 52))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (match_dup 2))) ++ (set (match_operand:SI 4 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 4)))) ++ (set (match_operand:SI 5 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 8)))) ++ (set (match_operand:SI 6 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 12)))) ++ (set (match_operand:SI 7 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 16)))) ++ (set (match_operand:SI 8 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 20)))) ++ (set (match_operand:SI 9 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 24)))) ++ (set (match_operand:SI 10 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 28)))) ++ (set (match_operand:SI 11 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 32)))) ++ (set (match_operand:SI 12 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 36)))) ++ (set (match_operand:SI 13 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 40)))) ++ (set (match_operand:SI 14 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 44)))) ++ (set (match_operand:SI 15 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 48))))])] ++ "(XVECLEN (operands[0], 0) == 14)" ++ "lmw.bim\t%3, [%1], %15, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "13") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*lmw_bim_si12" ++ [(match_parallel 0 "nds32_load_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 48))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (match_dup 2))) ++ (set (match_operand:SI 4 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 4)))) ++ (set (match_operand:SI 5 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 8)))) ++ (set (match_operand:SI 6 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 12)))) ++ (set (match_operand:SI 7 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 16)))) ++ (set (match_operand:SI 8 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 20)))) ++ (set (match_operand:SI 9 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 24)))) ++ (set (match_operand:SI 10 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 28)))) ++ (set (match_operand:SI 11 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 32)))) ++ (set (match_operand:SI 12 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 36)))) ++ (set (match_operand:SI 13 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 40)))) ++ (set (match_operand:SI 14 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 44))))])] ++ "(XVECLEN (operands[0], 0) == 13)" ++ "lmw.bim\t%3, [%1], %14, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "12") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*lmw_bim_si11" ++ [(match_parallel 0 "nds32_load_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 44))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (match_dup 2))) ++ (set (match_operand:SI 4 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 4)))) ++ (set (match_operand:SI 5 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 8)))) ++ (set (match_operand:SI 6 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 12)))) ++ (set (match_operand:SI 7 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 16)))) ++ (set (match_operand:SI 8 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 20)))) ++ (set (match_operand:SI 9 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 24)))) ++ (set (match_operand:SI 10 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 28)))) ++ (set (match_operand:SI 11 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 32)))) ++ (set (match_operand:SI 12 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 36)))) ++ (set (match_operand:SI 13 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 40))))])] ++ "(XVECLEN (operands[0], 0) == 12)" ++ "lmw.bim\t%3, [%1], %13, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "11") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*lmw_bim_si10" ++ [(match_parallel 0 "nds32_load_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 40))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (match_dup 2))) ++ (set (match_operand:SI 4 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 4)))) ++ (set (match_operand:SI 5 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 8)))) ++ (set (match_operand:SI 6 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 12)))) ++ (set (match_operand:SI 7 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 16)))) ++ (set (match_operand:SI 8 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 20)))) ++ (set (match_operand:SI 9 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 24)))) ++ (set (match_operand:SI 10 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 28)))) ++ (set (match_operand:SI 11 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 32)))) ++ (set (match_operand:SI 12 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 36))))])] ++ "(XVECLEN (operands[0], 0) == 11)" ++ "lmw.bim\t%3, [%1], %12, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "10") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*lmw_bim_si9" ++ [(match_parallel 0 "nds32_load_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 36))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (match_dup 2))) ++ (set (match_operand:SI 4 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 4)))) ++ (set (match_operand:SI 5 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 8)))) ++ (set (match_operand:SI 6 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 12)))) ++ (set (match_operand:SI 7 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 16)))) ++ (set (match_operand:SI 8 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 20)))) ++ (set (match_operand:SI 9 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 24)))) ++ (set (match_operand:SI 10 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 28)))) ++ (set (match_operand:SI 11 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 32))))])] ++ "(XVECLEN (operands[0], 0) == 10)" ++ "lmw.bim\t%3, [%1], %11, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "9") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*lmw_bim_si8" ++ [(match_parallel 0 "nds32_load_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 32))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (match_dup 2))) ++ (set (match_operand:SI 4 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 4)))) ++ (set (match_operand:SI 5 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 8)))) ++ (set (match_operand:SI 6 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 12)))) ++ (set (match_operand:SI 7 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 16)))) ++ (set (match_operand:SI 8 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 20)))) ++ (set (match_operand:SI 9 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 24)))) ++ (set (match_operand:SI 10 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 28))))])] ++ "(XVECLEN (operands[0], 0) == 9)" ++ "lmw.bim\t%3, [%1], %10, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "8") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*lmw_bim_si7" ++ [(match_parallel 0 "nds32_load_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 28))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (match_dup 2))) ++ (set (match_operand:SI 4 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 4)))) ++ (set (match_operand:SI 5 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 8)))) ++ (set (match_operand:SI 6 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 12)))) ++ (set (match_operand:SI 7 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 16)))) ++ (set (match_operand:SI 8 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 20)))) ++ (set (match_operand:SI 9 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 24))))])] ++ "(XVECLEN (operands[0], 0) == 8)" ++ "lmw.bim\t%3, [%1], %9, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "7") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*lmw_bim_si6" ++ [(match_parallel 0 "nds32_load_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 24))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (match_dup 2))) ++ (set (match_operand:SI 4 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 4)))) ++ (set (match_operand:SI 5 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 8)))) ++ (set (match_operand:SI 6 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 12)))) ++ (set (match_operand:SI 7 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 16)))) ++ (set (match_operand:SI 8 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 20))))])] ++ "(XVECLEN (operands[0], 0) == 7)" ++ "lmw.bim\t%3, [%1], %8, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "6") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*lmw_bim_si5" ++ [(match_parallel 0 "nds32_load_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 20))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (match_dup 2))) ++ (set (match_operand:SI 4 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 4)))) ++ (set (match_operand:SI 5 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 8)))) ++ (set (match_operand:SI 6 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 12)))) ++ (set (match_operand:SI 7 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 16))))])] ++ "(XVECLEN (operands[0], 0) == 6)" ++ "lmw.bim\t%3, [%1], %7, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "5") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*lmw_bim_si4" ++ [(match_parallel 0 "nds32_load_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 16))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (match_dup 2))) ++ (set (match_operand:SI 4 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 4)))) ++ (set (match_operand:SI 5 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 8)))) ++ (set (match_operand:SI 6 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 12))))])] ++ "(XVECLEN (operands[0], 0) == 5)" ++ "lmw.bim\t%3, [%1], %6, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "4") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*lmw_bim_si3" ++ [(match_parallel 0 "nds32_load_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 12))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (match_dup 2))) ++ (set (match_operand:SI 4 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 4)))) ++ (set (match_operand:SI 5 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 8))))])] ++ "(XVECLEN (operands[0], 0) == 4)" ++ "lmw.bim\t%3, [%1], %5, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "3") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*lmw_bim_si2" ++ [(match_parallel 0 "nds32_load_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 8))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (match_dup 2))) ++ (set (match_operand:SI 4 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 2) (const_int 4))))])] ++ "(XVECLEN (operands[0], 0) == 3)" ++ "lmw.bim\t%3, [%1], %4, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "2") ++ (set_attr "length" "4")] ++) ++ ++(define_expand "unaligned_load_update_base_w" ++ [(parallel [(set (match_operand:SI 0 "register_operand" "") ++ (plus:SI (match_operand:SI 2 "register_operand" "") (const_int 4))) ++ (set (match_operand:SI 1 "register_operand" "") ++ (unspec:SI [(mem:SI (match_dup 2))] UNSPEC_UALOAD_W))])] ++ "" ++{ ++ /* DO NOT emit unaligned_load_w_m immediately since web pass don't ++ recognize post_inc, try it again after GCC 5.0. ++ REF: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63156 */ ++ emit_insn (gen_unaligned_load_w (operands[1], gen_rtx_MEM (SImode, operands[2]))); ++ emit_insn (gen_addsi3 (operands[0], operands[2], gen_int_mode (4, Pmode))); ++ DONE; ++} ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "1") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*lmwsi25" + [(match_parallel 0 "nds32_load_multiple_operation" + [(set (match_operand:SI 2 "register_operand" "") + (mem:SI (match_operand:SI 1 "register_operand" "r"))) +@@ -91,14 +1024,49 @@ + (set (match_operand:SI 8 "register_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 24)))) + (set (match_operand:SI 9 "register_operand" "") +- (mem:SI (plus:SI (match_dup 1) (const_int 28))))])] +- "(XVECLEN (operands[0], 0) == 8)" +- "lmw.bi\t%2, [%1], %9, 0x0" +- [(set_attr "type" "load") +- (set_attr "length" "4")] ++ (mem:SI (plus:SI (match_dup 1) (const_int 28)))) ++ (set (match_operand:SI 10 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 32)))) ++ (set (match_operand:SI 11 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 36)))) ++ (set (match_operand:SI 12 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 40)))) ++ (set (match_operand:SI 13 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 44)))) ++ (set (match_operand:SI 14 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 48)))) ++ (set (match_operand:SI 15 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 52)))) ++ (set (match_operand:SI 16 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 56)))) ++ (set (match_operand:SI 17 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 60)))) ++ (set (match_operand:SI 18 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 64)))) ++ (set (match_operand:SI 19 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 68)))) ++ (set (match_operand:SI 20 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 72)))) ++ (set (match_operand:SI 21 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 76)))) ++ (set (match_operand:SI 22 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 80)))) ++ (set (match_operand:SI 23 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 84)))) ++ (set (match_operand:SI 24 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 88)))) ++ (set (match_operand:SI 25 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 92)))) ++ (set (match_operand:SI 26 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 96))))])] ++ "(XVECLEN (operands[0], 0) == 25)" ++ "lmw.bi\t%2, [%1], %26, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "25") ++ (set_attr "length" "4")] + ) + +-(define_insn "*lmwsi7" ++(define_insn "*lmwsi24" + [(match_parallel 0 "nds32_load_multiple_operation" + [(set (match_operand:SI 2 "register_operand" "") + (mem:SI (match_operand:SI 1 "register_operand" "r"))) +@@ -113,14 +1081,49 @@ + (set (match_operand:SI 7 "register_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 20)))) + (set (match_operand:SI 8 "register_operand" "") +- (mem:SI (plus:SI (match_dup 1) (const_int 24))))])] +- "(XVECLEN (operands[0], 0) == 7)" +- "lmw.bi\t%2, [%1], %8, 0x0" +- [(set_attr "type" "load") +- (set_attr "length" "4")] ++ (mem:SI (plus:SI (match_dup 1) (const_int 24)))) ++ (set (match_operand:SI 9 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 28)))) ++ (set (match_operand:SI 10 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 32)))) ++ (set (match_operand:SI 11 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 36)))) ++ (set (match_operand:SI 12 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 40)))) ++ (set (match_operand:SI 13 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 44)))) ++ (set (match_operand:SI 14 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 48)))) ++ (set (match_operand:SI 15 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 52)))) ++ (set (match_operand:SI 16 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 56)))) ++ (set (match_operand:SI 17 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 60)))) ++ (set (match_operand:SI 18 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 64)))) ++ (set (match_operand:SI 19 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 68)))) ++ (set (match_operand:SI 20 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 72)))) ++ (set (match_operand:SI 21 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 76)))) ++ (set (match_operand:SI 22 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 80)))) ++ (set (match_operand:SI 23 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 84)))) ++ (set (match_operand:SI 24 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 88)))) ++ (set (match_operand:SI 25 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 92))))])] ++ "(XVECLEN (operands[0], 0) == 24)" ++ "lmw.bi\t%2, [%1], %25, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "24") ++ (set_attr "length" "4")] + ) + +-(define_insn "*lmwsi6" ++(define_insn "*lmwsi23" + [(match_parallel 0 "nds32_load_multiple_operation" + [(set (match_operand:SI 2 "register_operand" "") + (mem:SI (match_operand:SI 1 "register_operand" "r"))) +@@ -133,14 +1136,49 @@ + (set (match_operand:SI 6 "register_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 16)))) + (set (match_operand:SI 7 "register_operand" "") +- (mem:SI (plus:SI (match_dup 1) (const_int 20))))])] +- "(XVECLEN (operands[0], 0) == 6)" +- "lmw.bi\t%2, [%1], %7, 0x0" +- [(set_attr "type" "load") +- (set_attr "length" "4")] ++ (mem:SI (plus:SI (match_dup 1) (const_int 20)))) ++ (set (match_operand:SI 8 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 24)))) ++ (set (match_operand:SI 9 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 28)))) ++ (set (match_operand:SI 10 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 32)))) ++ (set (match_operand:SI 11 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 36)))) ++ (set (match_operand:SI 12 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 40)))) ++ (set (match_operand:SI 13 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 44)))) ++ (set (match_operand:SI 14 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 48)))) ++ (set (match_operand:SI 15 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 52)))) ++ (set (match_operand:SI 16 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 56)))) ++ (set (match_operand:SI 17 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 60)))) ++ (set (match_operand:SI 18 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 64)))) ++ (set (match_operand:SI 19 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 68)))) ++ (set (match_operand:SI 20 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 72)))) ++ (set (match_operand:SI 21 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 76)))) ++ (set (match_operand:SI 22 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 80)))) ++ (set (match_operand:SI 23 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 84)))) ++ (set (match_operand:SI 24 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 88))))])] ++ "(XVECLEN (operands[0], 0) == 23)" ++ "lmw.bi\t%2, [%1], %24, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "23") ++ (set_attr "length" "4")] + ) + +-(define_insn "*lmwsi5" ++(define_insn "*lmwsi22" + [(match_parallel 0 "nds32_load_multiple_operation" + [(set (match_operand:SI 2 "register_operand" "") + (mem:SI (match_operand:SI 1 "register_operand" "r"))) +@@ -151,110 +1189,2430 @@ + (set (match_operand:SI 5 "register_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 12)))) + (set (match_operand:SI 6 "register_operand" "") +- (mem:SI (plus:SI (match_dup 1) (const_int 16))))])] +- "(XVECLEN (operands[0], 0) == 5)" +- "lmw.bi\t%2, [%1], %6, 0x0" +- [(set_attr "type" "load") +- (set_attr "length" "4")] ++ (mem:SI (plus:SI (match_dup 1) (const_int 16)))) ++ (set (match_operand:SI 7 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 20)))) ++ (set (match_operand:SI 8 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 24)))) ++ (set (match_operand:SI 9 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 28)))) ++ (set (match_operand:SI 10 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 32)))) ++ (set (match_operand:SI 11 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 36)))) ++ (set (match_operand:SI 12 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 40)))) ++ (set (match_operand:SI 13 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 44)))) ++ (set (match_operand:SI 14 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 48)))) ++ (set (match_operand:SI 15 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 52)))) ++ (set (match_operand:SI 16 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 56)))) ++ (set (match_operand:SI 17 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 60)))) ++ (set (match_operand:SI 18 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 64)))) ++ (set (match_operand:SI 19 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 68)))) ++ (set (match_operand:SI 20 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 72)))) ++ (set (match_operand:SI 21 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 76)))) ++ (set (match_operand:SI 22 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 80)))) ++ (set (match_operand:SI 23 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 84))))])] ++ "(XVECLEN (operands[0], 0) == 22)" ++ "lmw.bi\t%2, [%1], %23, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "22") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*lmwsi21" ++ [(match_parallel 0 "nds32_load_multiple_operation" ++ [(set (match_operand:SI 2 "register_operand" "") ++ (mem:SI (match_operand:SI 1 "register_operand" "r"))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 4)))) ++ (set (match_operand:SI 4 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 8)))) ++ (set (match_operand:SI 5 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 12)))) ++ (set (match_operand:SI 6 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 16)))) ++ (set (match_operand:SI 7 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 20)))) ++ (set (match_operand:SI 8 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 24)))) ++ (set (match_operand:SI 9 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 28)))) ++ (set (match_operand:SI 10 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 32)))) ++ (set (match_operand:SI 11 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 36)))) ++ (set (match_operand:SI 12 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 40)))) ++ (set (match_operand:SI 13 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 44)))) ++ (set (match_operand:SI 14 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 48)))) ++ (set (match_operand:SI 15 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 52)))) ++ (set (match_operand:SI 16 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 56)))) ++ (set (match_operand:SI 17 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 60)))) ++ (set (match_operand:SI 18 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 64)))) ++ (set (match_operand:SI 19 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 68)))) ++ (set (match_operand:SI 20 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 72)))) ++ (set (match_operand:SI 21 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 76)))) ++ (set (match_operand:SI 22 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 80))))])] ++ "(XVECLEN (operands[0], 0) == 21)" ++ "lmw.bi\t%2, [%1], %22, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "21") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*lmwsi20" ++ [(match_parallel 0 "nds32_load_multiple_operation" ++ [(set (match_operand:SI 2 "register_operand" "") ++ (mem:SI (match_operand:SI 1 "register_operand" "r"))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 4)))) ++ (set (match_operand:SI 4 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 8)))) ++ (set (match_operand:SI 5 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 12)))) ++ (set (match_operand:SI 6 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 16)))) ++ (set (match_operand:SI 7 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 20)))) ++ (set (match_operand:SI 8 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 24)))) ++ (set (match_operand:SI 9 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 28)))) ++ (set (match_operand:SI 10 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 32)))) ++ (set (match_operand:SI 11 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 36)))) ++ (set (match_operand:SI 12 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 40)))) ++ (set (match_operand:SI 13 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 44)))) ++ (set (match_operand:SI 14 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 48)))) ++ (set (match_operand:SI 15 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 52)))) ++ (set (match_operand:SI 16 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 56)))) ++ (set (match_operand:SI 17 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 60)))) ++ (set (match_operand:SI 18 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 64)))) ++ (set (match_operand:SI 19 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 68)))) ++ (set (match_operand:SI 20 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 72)))) ++ (set (match_operand:SI 21 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 76))))])] ++ "(XVECLEN (operands[0], 0) == 20)" ++ "lmw.bi\t%2, [%1], %21, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "20") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*lmwsi19" ++ [(match_parallel 0 "nds32_load_multiple_operation" ++ [(set (match_operand:SI 2 "register_operand" "") ++ (mem:SI (match_operand:SI 1 "register_operand" "r"))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 4)))) ++ (set (match_operand:SI 4 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 8)))) ++ (set (match_operand:SI 5 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 12)))) ++ (set (match_operand:SI 6 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 16)))) ++ (set (match_operand:SI 7 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 20)))) ++ (set (match_operand:SI 8 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 24)))) ++ (set (match_operand:SI 9 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 28)))) ++ (set (match_operand:SI 10 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 32)))) ++ (set (match_operand:SI 11 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 36)))) ++ (set (match_operand:SI 12 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 40)))) ++ (set (match_operand:SI 13 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 44)))) ++ (set (match_operand:SI 14 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 48)))) ++ (set (match_operand:SI 15 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 52)))) ++ (set (match_operand:SI 16 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 56)))) ++ (set (match_operand:SI 17 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 60)))) ++ (set (match_operand:SI 18 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 64)))) ++ (set (match_operand:SI 19 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 68)))) ++ (set (match_operand:SI 20 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 72))))])] ++ "(XVECLEN (operands[0], 0) == 19)" ++ "lmw.bi\t%2, [%1], %20, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "19") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*lmwsi18" ++ [(match_parallel 0 "nds32_load_multiple_operation" ++ [(set (match_operand:SI 2 "register_operand" "") ++ (mem:SI (match_operand:SI 1 "register_operand" "r"))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 4)))) ++ (set (match_operand:SI 4 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 8)))) ++ (set (match_operand:SI 5 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 12)))) ++ (set (match_operand:SI 6 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 16)))) ++ (set (match_operand:SI 7 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 20)))) ++ (set (match_operand:SI 8 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 24)))) ++ (set (match_operand:SI 9 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 28)))) ++ (set (match_operand:SI 10 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 32)))) ++ (set (match_operand:SI 11 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 36)))) ++ (set (match_operand:SI 12 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 40)))) ++ (set (match_operand:SI 13 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 44)))) ++ (set (match_operand:SI 14 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 48)))) ++ (set (match_operand:SI 15 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 52)))) ++ (set (match_operand:SI 16 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 56)))) ++ (set (match_operand:SI 17 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 60)))) ++ (set (match_operand:SI 18 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 64)))) ++ (set (match_operand:SI 19 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 68))))])] ++ "(XVECLEN (operands[0], 0) == 18)" ++ "lmw.bi\t%2, [%1], %19, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "18") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*lmwsi17" ++ [(match_parallel 0 "nds32_load_multiple_operation" ++ [(set (match_operand:SI 2 "register_operand" "") ++ (mem:SI (match_operand:SI 1 "register_operand" "r"))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 4)))) ++ (set (match_operand:SI 4 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 8)))) ++ (set (match_operand:SI 5 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 12)))) ++ (set (match_operand:SI 6 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 16)))) ++ (set (match_operand:SI 7 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 20)))) ++ (set (match_operand:SI 8 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 24)))) ++ (set (match_operand:SI 9 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 28)))) ++ (set (match_operand:SI 10 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 32)))) ++ (set (match_operand:SI 11 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 36)))) ++ (set (match_operand:SI 12 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 40)))) ++ (set (match_operand:SI 13 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 44)))) ++ (set (match_operand:SI 14 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 48)))) ++ (set (match_operand:SI 15 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 52)))) ++ (set (match_operand:SI 16 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 56)))) ++ (set (match_operand:SI 17 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 60)))) ++ (set (match_operand:SI 18 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 64))))])] ++ "(XVECLEN (operands[0], 0) == 17)" ++ "lmw.bi\t%2, [%1], %18, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "17") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*lmwsi16" ++ [(match_parallel 0 "nds32_load_multiple_operation" ++ [(set (match_operand:SI 2 "register_operand" "") ++ (mem:SI (match_operand:SI 1 "register_operand" "r"))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 4)))) ++ (set (match_operand:SI 4 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 8)))) ++ (set (match_operand:SI 5 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 12)))) ++ (set (match_operand:SI 6 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 16)))) ++ (set (match_operand:SI 7 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 20)))) ++ (set (match_operand:SI 8 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 24)))) ++ (set (match_operand:SI 9 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 28)))) ++ (set (match_operand:SI 10 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 32)))) ++ (set (match_operand:SI 11 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 36)))) ++ (set (match_operand:SI 12 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 40)))) ++ (set (match_operand:SI 13 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 44)))) ++ (set (match_operand:SI 14 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 48)))) ++ (set (match_operand:SI 15 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 52)))) ++ (set (match_operand:SI 16 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 56)))) ++ (set (match_operand:SI 17 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 60))))])] ++ "(XVECLEN (operands[0], 0) == 16)" ++ "lmw.bi\t%2, [%1], %17, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "16") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*lmwsi15" ++ [(match_parallel 0 "nds32_load_multiple_operation" ++ [(set (match_operand:SI 2 "register_operand" "") ++ (mem:SI (match_operand:SI 1 "register_operand" "r"))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 4)))) ++ (set (match_operand:SI 4 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 8)))) ++ (set (match_operand:SI 5 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 12)))) ++ (set (match_operand:SI 6 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 16)))) ++ (set (match_operand:SI 7 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 20)))) ++ (set (match_operand:SI 8 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 24)))) ++ (set (match_operand:SI 9 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 28)))) ++ (set (match_operand:SI 10 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 32)))) ++ (set (match_operand:SI 11 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 36)))) ++ (set (match_operand:SI 12 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 40)))) ++ (set (match_operand:SI 13 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 44)))) ++ (set (match_operand:SI 14 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 48)))) ++ (set (match_operand:SI 15 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 52)))) ++ (set (match_operand:SI 16 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 56))))])] ++ "(XVECLEN (operands[0], 0) == 15)" ++ "lmw.bi\t%2, [%1], %16, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "15") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*lmwsi14" ++ [(match_parallel 0 "nds32_load_multiple_operation" ++ [(set (match_operand:SI 2 "register_operand" "") ++ (mem:SI (match_operand:SI 1 "register_operand" "r"))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 4)))) ++ (set (match_operand:SI 4 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 8)))) ++ (set (match_operand:SI 5 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 12)))) ++ (set (match_operand:SI 6 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 16)))) ++ (set (match_operand:SI 7 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 20)))) ++ (set (match_operand:SI 8 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 24)))) ++ (set (match_operand:SI 9 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 28)))) ++ (set (match_operand:SI 10 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 32)))) ++ (set (match_operand:SI 11 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 36)))) ++ (set (match_operand:SI 12 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 40)))) ++ (set (match_operand:SI 13 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 44)))) ++ (set (match_operand:SI 14 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 48)))) ++ (set (match_operand:SI 15 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 52))))])] ++ "(XVECLEN (operands[0], 0) == 14)" ++ "lmw.bi\t%2, [%1], %15, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "14") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*lmwsi13" ++ [(match_parallel 0 "nds32_load_multiple_operation" ++ [(set (match_operand:SI 2 "register_operand" "") ++ (mem:SI (match_operand:SI 1 "register_operand" "r"))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 4)))) ++ (set (match_operand:SI 4 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 8)))) ++ (set (match_operand:SI 5 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 12)))) ++ (set (match_operand:SI 6 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 16)))) ++ (set (match_operand:SI 7 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 20)))) ++ (set (match_operand:SI 8 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 24)))) ++ (set (match_operand:SI 9 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 28)))) ++ (set (match_operand:SI 10 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 32)))) ++ (set (match_operand:SI 11 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 36)))) ++ (set (match_operand:SI 12 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 40)))) ++ (set (match_operand:SI 13 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 44)))) ++ (set (match_operand:SI 14 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 48))))])] ++ "(XVECLEN (operands[0], 0) == 13)" ++ "lmw.bi\t%2, [%1], %14, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "13") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*lmwsi12" ++ [(match_parallel 0 "nds32_load_multiple_operation" ++ [(set (match_operand:SI 2 "register_operand" "") ++ (mem:SI (match_operand:SI 1 "register_operand" "r"))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 4)))) ++ (set (match_operand:SI 4 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 8)))) ++ (set (match_operand:SI 5 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 12)))) ++ (set (match_operand:SI 6 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 16)))) ++ (set (match_operand:SI 7 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 20)))) ++ (set (match_operand:SI 8 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 24)))) ++ (set (match_operand:SI 9 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 28)))) ++ (set (match_operand:SI 10 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 32)))) ++ (set (match_operand:SI 11 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 36)))) ++ (set (match_operand:SI 12 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 40)))) ++ (set (match_operand:SI 13 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 44))))])] ++ "(XVECLEN (operands[0], 0) == 12)" ++ "lmw.bi\t%2, [%1], %13, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "12") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*lmwsi11" ++ [(match_parallel 0 "nds32_load_multiple_operation" ++ [(set (match_operand:SI 2 "register_operand" "") ++ (mem:SI (match_operand:SI 1 "register_operand" "r"))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 4)))) ++ (set (match_operand:SI 4 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 8)))) ++ (set (match_operand:SI 5 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 12)))) ++ (set (match_operand:SI 6 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 16)))) ++ (set (match_operand:SI 7 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 20)))) ++ (set (match_operand:SI 8 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 24)))) ++ (set (match_operand:SI 9 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 28)))) ++ (set (match_operand:SI 10 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 32)))) ++ (set (match_operand:SI 11 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 36)))) ++ (set (match_operand:SI 12 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 40))))])] ++ "(XVECLEN (operands[0], 0) == 11)" ++ "lmw.bi\t%2, [%1], %12, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "11") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*lmwsi10" ++ [(match_parallel 0 "nds32_load_multiple_operation" ++ [(set (match_operand:SI 2 "register_operand" "") ++ (mem:SI (match_operand:SI 1 "register_operand" "r"))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 4)))) ++ (set (match_operand:SI 4 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 8)))) ++ (set (match_operand:SI 5 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 12)))) ++ (set (match_operand:SI 6 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 16)))) ++ (set (match_operand:SI 7 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 20)))) ++ (set (match_operand:SI 8 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 24)))) ++ (set (match_operand:SI 9 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 28)))) ++ (set (match_operand:SI 10 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 32)))) ++ (set (match_operand:SI 11 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 36))))])] ++ "(XVECLEN (operands[0], 0) == 10)" ++ "lmw.bi\t%2, [%1], %11, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "10") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*lmwsi9" ++ [(match_parallel 0 "nds32_load_multiple_operation" ++ [(set (match_operand:SI 2 "register_operand" "") ++ (mem:SI (match_operand:SI 1 "register_operand" "r"))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 4)))) ++ (set (match_operand:SI 4 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 8)))) ++ (set (match_operand:SI 5 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 12)))) ++ (set (match_operand:SI 6 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 16)))) ++ (set (match_operand:SI 7 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 20)))) ++ (set (match_operand:SI 8 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 24)))) ++ (set (match_operand:SI 9 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 28)))) ++ (set (match_operand:SI 10 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 32))))])] ++ "(XVECLEN (operands[0], 0) == 9)" ++ "lmw.bi\t%2, [%1], %10, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "9") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*lmwsi8" ++ [(match_parallel 0 "nds32_load_multiple_operation" ++ [(set (match_operand:SI 2 "register_operand" "") ++ (mem:SI (match_operand:SI 1 "register_operand" "r"))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 4)))) ++ (set (match_operand:SI 4 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 8)))) ++ (set (match_operand:SI 5 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 12)))) ++ (set (match_operand:SI 6 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 16)))) ++ (set (match_operand:SI 7 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 20)))) ++ (set (match_operand:SI 8 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 24)))) ++ (set (match_operand:SI 9 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 28))))])] ++ "(XVECLEN (operands[0], 0) == 8)" ++ "lmw.bi\t%2, [%1], %9, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "8") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*lmwsi7" ++ [(match_parallel 0 "nds32_load_multiple_operation" ++ [(set (match_operand:SI 2 "register_operand" "") ++ (mem:SI (match_operand:SI 1 "register_operand" "r"))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 4)))) ++ (set (match_operand:SI 4 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 8)))) ++ (set (match_operand:SI 5 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 12)))) ++ (set (match_operand:SI 6 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 16)))) ++ (set (match_operand:SI 7 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 20)))) ++ (set (match_operand:SI 8 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 24))))])] ++ "(XVECLEN (operands[0], 0) == 7)" ++ "lmw.bi\t%2, [%1], %8, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "7") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*lmwsi6" ++ [(match_parallel 0 "nds32_load_multiple_operation" ++ [(set (match_operand:SI 2 "register_operand" "") ++ (mem:SI (match_operand:SI 1 "register_operand" "r"))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 4)))) ++ (set (match_operand:SI 4 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 8)))) ++ (set (match_operand:SI 5 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 12)))) ++ (set (match_operand:SI 6 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 16)))) ++ (set (match_operand:SI 7 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 20))))])] ++ "(XVECLEN (operands[0], 0) == 6)" ++ "lmw.bi\t%2, [%1], %7, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "6") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*lmwsi5" ++ [(match_parallel 0 "nds32_load_multiple_operation" ++ [(set (match_operand:SI 2 "register_operand" "") ++ (mem:SI (match_operand:SI 1 "register_operand" "r"))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 4)))) ++ (set (match_operand:SI 4 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 8)))) ++ (set (match_operand:SI 5 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 12)))) ++ (set (match_operand:SI 6 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 16))))])] ++ "(XVECLEN (operands[0], 0) == 5)" ++ "lmw.bi\t%2, [%1], %6, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "5") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*lmwsi4" ++ [(match_parallel 0 "nds32_load_multiple_operation" ++ [(set (match_operand:SI 2 "register_operand" "") ++ (mem:SI (match_operand:SI 1 "register_operand" "r"))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 4)))) ++ (set (match_operand:SI 4 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 8)))) ++ (set (match_operand:SI 5 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 12))))])] ++ "(XVECLEN (operands[0], 0) == 4)" ++ "lmw.bi\t%2, [%1], %5, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "4") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*lmwsi3" ++ [(match_parallel 0 "nds32_load_multiple_operation" ++ [(set (match_operand:SI 2 "register_operand" "") ++ (mem:SI (match_operand:SI 1 "register_operand" "r"))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 4)))) ++ (set (match_operand:SI 4 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 8))))])] ++ "(XVECLEN (operands[0], 0) == 3)" ++ "lmw.bi\t%2, [%1], %4, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "3") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*lmwsi2" ++ [(match_parallel 0 "nds32_load_multiple_operation" ++ [(set (match_operand:SI 2 "register_operand" "") ++ (mem:SI (match_operand:SI 1 "register_operand" "r"))) ++ (set (match_operand:SI 3 "register_operand" "") ++ (mem:SI (plus:SI (match_dup 1) (const_int 4))))])] ++ "(XVECLEN (operands[0], 0) == 2)" ++ "lmw.bi\t%2, [%1], %3, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "2") ++ (set_attr "length" "4")] ++) ++ ++;; Store Multiple Insns. ++;; ++;; operands[0] is the first memory location. ++;; operands[1] is the first of the consecutive registers. ++;; operands[2] is the number of consecutive registers. ++ ++(define_expand "store_multiple" ++ [(match_par_dup 3 [(set (match_operand:SI 0 "" "") ++ (match_operand:SI 1 "" "")) ++ (use (match_operand:SI 2 "" ""))])] ++ "" ++{ ++ int maximum; ++ ++ /* Because reduced-set regsiters has few registers ++ (r0~r5, r6~10, r15, r28~r31, where 'r15' and 'r28~r31' cannot ++ be used for register allocation), ++ using 8 registers for store_multiple may easily consume all of them. ++ It makes register allocation/spilling hard to work. ++ So we only allow maximum=4 registers for store_multiple ++ under reduced-set registers. */ ++ if (TARGET_REDUCED_REGS) ++ maximum = 4; ++ else ++ maximum = 8; ++ ++ /* Here are the conditions that must be all passed, ++ otherwise we have to FAIL this rtx generation: ++ 1. The number of consecutive registers must be integer. ++ 2. Maximum 4 or 8 registers for smw.bi instruction ++ (based on this nds32-multiple.md design). ++ 3. Minimum 2 registers for smw.bi instruction ++ (based on this nds32-multiple.md design). ++ 4. operands[0] must be memory for sure. ++ 5. operands[1] must be register for sure. ++ 6. operands[0] is not volatile memory access. ++ 7. Do not cross $r15 register because it is not allocatable. */ ++ if (GET_CODE (operands[2]) != CONST_INT ++ || INTVAL (operands[2]) > maximum ++ || INTVAL (operands[2]) < 2 ++ || GET_CODE (operands[0]) != MEM ++ || GET_CODE (operands[1]) != REG ++ || MEM_VOLATILE_P (operands[0]) ++ || REGNO (operands[1]) + INTVAL (operands[2]) > TA_REGNUM) ++ FAIL; ++ ++ /* For (mem addr), we force_reg on addr here, ++ so that nds32_expand_store_multiple can easily use it. */ ++ operands[3] = nds32_expand_store_multiple (REGNO (operands[1]), ++ INTVAL (operands[2]), ++ force_reg (SImode, ++ XEXP (operands[0], 0)), ++ operands[0], ++ false, NULL); ++}) ++ ++;; Ordinary Store Multiple. ++(define_insn "*stm_bim_si25" ++ [(match_parallel 0 "nds32_store_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 100))) ++ (set (mem:SI (match_dup 2)) ++ (match_operand:SI 3 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 4))) ++ (match_operand:SI 4 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 8))) ++ (match_operand:SI 5 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 12))) ++ (match_operand:SI 6 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 16))) ++ (match_operand:SI 7 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 20))) ++ (match_operand:SI 8 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 24))) ++ (match_operand:SI 9 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 28))) ++ (match_operand:SI 10 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 32))) ++ (match_operand:SI 11 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 36))) ++ (match_operand:SI 12 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 40))) ++ (match_operand:SI 13 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 44))) ++ (match_operand:SI 14 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 48))) ++ (match_operand:SI 15 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 52))) ++ (match_operand:SI 16 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 56))) ++ (match_operand:SI 17 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 60))) ++ (match_operand:SI 18 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 64))) ++ (match_operand:SI 19 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 68))) ++ (match_operand:SI 20 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 72))) ++ (match_operand:SI 21 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 76))) ++ (match_operand:SI 22 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 80))) ++ (match_operand:SI 23 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 84))) ++ (match_operand:SI 24 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 88))) ++ (match_operand:SI 25 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 92))) ++ (match_operand:SI 26 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 96))) ++ (match_operand:SI 27 "register_operand" ""))])] ++ "(XVECLEN (operands[0], 0) == 26)" ++ "smw.bim\t%3, [%1], %27, 0x0" ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "25") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*stm_bim_si24" ++ [(match_parallel 0 "nds32_store_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 96))) ++ (set (mem:SI (match_dup 2)) ++ (match_operand:SI 3 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 4))) ++ (match_operand:SI 4 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 8))) ++ (match_operand:SI 5 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 12))) ++ (match_operand:SI 6 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 16))) ++ (match_operand:SI 7 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 20))) ++ (match_operand:SI 8 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 24))) ++ (match_operand:SI 9 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 28))) ++ (match_operand:SI 10 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 32))) ++ (match_operand:SI 11 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 36))) ++ (match_operand:SI 12 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 40))) ++ (match_operand:SI 13 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 44))) ++ (match_operand:SI 14 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 48))) ++ (match_operand:SI 15 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 52))) ++ (match_operand:SI 16 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 56))) ++ (match_operand:SI 17 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 60))) ++ (match_operand:SI 18 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 64))) ++ (match_operand:SI 19 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 68))) ++ (match_operand:SI 20 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 72))) ++ (match_operand:SI 21 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 76))) ++ (match_operand:SI 22 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 80))) ++ (match_operand:SI 23 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 84))) ++ (match_operand:SI 24 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 88))) ++ (match_operand:SI 25 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 92))) ++ (match_operand:SI 26 "register_operand" ""))])] ++ "(XVECLEN (operands[0], 0) == 25)" ++ "smw.bim\t%3, [%1], %26, 0x0" ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "24") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*stm_bim_si23" ++ [(match_parallel 0 "nds32_store_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 92))) ++ (set (mem:SI (match_dup 2)) ++ (match_operand:SI 3 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 4))) ++ (match_operand:SI 4 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 8))) ++ (match_operand:SI 5 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 12))) ++ (match_operand:SI 6 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 16))) ++ (match_operand:SI 7 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 20))) ++ (match_operand:SI 8 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 24))) ++ (match_operand:SI 9 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 28))) ++ (match_operand:SI 10 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 32))) ++ (match_operand:SI 11 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 36))) ++ (match_operand:SI 12 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 40))) ++ (match_operand:SI 13 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 44))) ++ (match_operand:SI 14 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 48))) ++ (match_operand:SI 15 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 52))) ++ (match_operand:SI 16 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 56))) ++ (match_operand:SI 17 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 60))) ++ (match_operand:SI 18 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 64))) ++ (match_operand:SI 19 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 68))) ++ (match_operand:SI 20 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 72))) ++ (match_operand:SI 21 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 76))) ++ (match_operand:SI 22 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 80))) ++ (match_operand:SI 23 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 84))) ++ (match_operand:SI 24 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 88))) ++ (match_operand:SI 25 "register_operand" ""))])] ++ "(XVECLEN (operands[0], 0) == 24)" ++ "smw.bim\t%3, [%1], %25, 0x0" ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "23") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*stm_bim_si22" ++ [(match_parallel 0 "nds32_store_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 88))) ++ (set (mem:SI (match_dup 2)) ++ (match_operand:SI 3 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 4))) ++ (match_operand:SI 4 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 8))) ++ (match_operand:SI 5 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 12))) ++ (match_operand:SI 6 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 16))) ++ (match_operand:SI 7 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 20))) ++ (match_operand:SI 8 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 24))) ++ (match_operand:SI 9 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 28))) ++ (match_operand:SI 10 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 32))) ++ (match_operand:SI 11 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 36))) ++ (match_operand:SI 12 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 40))) ++ (match_operand:SI 13 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 44))) ++ (match_operand:SI 14 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 48))) ++ (match_operand:SI 15 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 52))) ++ (match_operand:SI 16 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 56))) ++ (match_operand:SI 17 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 60))) ++ (match_operand:SI 18 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 64))) ++ (match_operand:SI 19 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 68))) ++ (match_operand:SI 20 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 72))) ++ (match_operand:SI 21 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 76))) ++ (match_operand:SI 22 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 80))) ++ (match_operand:SI 23 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 84))) ++ (match_operand:SI 24 "register_operand" ""))])] ++ "(XVECLEN (operands[0], 0) == 23)" ++ "smw.bim\t%3, [%1], %24, 0x0" ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "22") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*stm_bim_si21" ++ [(match_parallel 0 "nds32_store_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 84))) ++ (set (mem:SI (match_dup 2)) ++ (match_operand:SI 3 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 4))) ++ (match_operand:SI 4 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 8))) ++ (match_operand:SI 5 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 12))) ++ (match_operand:SI 6 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 16))) ++ (match_operand:SI 7 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 20))) ++ (match_operand:SI 8 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 24))) ++ (match_operand:SI 9 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 28))) ++ (match_operand:SI 10 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 32))) ++ (match_operand:SI 11 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 36))) ++ (match_operand:SI 12 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 40))) ++ (match_operand:SI 13 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 44))) ++ (match_operand:SI 14 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 48))) ++ (match_operand:SI 15 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 52))) ++ (match_operand:SI 16 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 56))) ++ (match_operand:SI 17 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 60))) ++ (match_operand:SI 18 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 64))) ++ (match_operand:SI 19 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 68))) ++ (match_operand:SI 20 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 72))) ++ (match_operand:SI 21 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 76))) ++ (match_operand:SI 22 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 80))) ++ (match_operand:SI 23 "register_operand" ""))])] ++ "(XVECLEN (operands[0], 0) == 22)" ++ "smw.bim\t%3, [%1], %23, 0x0" ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "21") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*stm_bim_si20" ++ [(match_parallel 0 "nds32_store_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 80))) ++ (set (mem:SI (match_dup 2)) ++ (match_operand:SI 3 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 4))) ++ (match_operand:SI 4 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 8))) ++ (match_operand:SI 5 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 12))) ++ (match_operand:SI 6 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 16))) ++ (match_operand:SI 7 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 20))) ++ (match_operand:SI 8 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 24))) ++ (match_operand:SI 9 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 28))) ++ (match_operand:SI 10 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 32))) ++ (match_operand:SI 11 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 36))) ++ (match_operand:SI 12 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 40))) ++ (match_operand:SI 13 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 44))) ++ (match_operand:SI 14 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 48))) ++ (match_operand:SI 15 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 52))) ++ (match_operand:SI 16 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 56))) ++ (match_operand:SI 17 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 60))) ++ (match_operand:SI 18 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 64))) ++ (match_operand:SI 19 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 68))) ++ (match_operand:SI 20 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 72))) ++ (match_operand:SI 21 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 76))) ++ (match_operand:SI 22 "register_operand" ""))])] ++ "(XVECLEN (operands[0], 0) == 21)" ++ "smw.bim\t%3, [%1], %22, 0x0" ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "20") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*stm_bim_si19" ++ [(match_parallel 0 "nds32_store_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 76))) ++ (set (mem:SI (match_dup 2)) ++ (match_operand:SI 3 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 4))) ++ (match_operand:SI 4 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 8))) ++ (match_operand:SI 5 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 12))) ++ (match_operand:SI 6 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 16))) ++ (match_operand:SI 7 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 20))) ++ (match_operand:SI 8 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 24))) ++ (match_operand:SI 9 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 28))) ++ (match_operand:SI 10 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 32))) ++ (match_operand:SI 11 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 36))) ++ (match_operand:SI 12 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 40))) ++ (match_operand:SI 13 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 44))) ++ (match_operand:SI 14 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 48))) ++ (match_operand:SI 15 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 52))) ++ (match_operand:SI 16 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 56))) ++ (match_operand:SI 17 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 60))) ++ (match_operand:SI 18 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 64))) ++ (match_operand:SI 19 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 68))) ++ (match_operand:SI 20 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 72))) ++ (match_operand:SI 21 "register_operand" ""))])] ++ "(XVECLEN (operands[0], 0) == 20)" ++ "smw.bim\t%3, [%1], %21, 0x0" ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "19") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*stm_bim_si18" ++ [(match_parallel 0 "nds32_store_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 72))) ++ (set (mem:SI (match_dup 2)) ++ (match_operand:SI 3 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 4))) ++ (match_operand:SI 4 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 8))) ++ (match_operand:SI 5 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 12))) ++ (match_operand:SI 6 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 16))) ++ (match_operand:SI 7 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 20))) ++ (match_operand:SI 8 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 24))) ++ (match_operand:SI 9 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 28))) ++ (match_operand:SI 10 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 32))) ++ (match_operand:SI 11 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 36))) ++ (match_operand:SI 12 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 40))) ++ (match_operand:SI 13 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 44))) ++ (match_operand:SI 14 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 48))) ++ (match_operand:SI 15 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 52))) ++ (match_operand:SI 16 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 56))) ++ (match_operand:SI 17 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 60))) ++ (match_operand:SI 18 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 64))) ++ (match_operand:SI 19 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 68))) ++ (match_operand:SI 20 "register_operand" ""))])] ++ "(XVECLEN (operands[0], 0) == 19)" ++ "smw.bim\t%3, [%1], %20, 0x0" ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "18") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*stm_bim_si17" ++ [(match_parallel 0 "nds32_store_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 68))) ++ (set (mem:SI (match_dup 2)) ++ (match_operand:SI 3 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 4))) ++ (match_operand:SI 4 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 8))) ++ (match_operand:SI 5 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 12))) ++ (match_operand:SI 6 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 16))) ++ (match_operand:SI 7 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 20))) ++ (match_operand:SI 8 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 24))) ++ (match_operand:SI 9 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 28))) ++ (match_operand:SI 10 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 32))) ++ (match_operand:SI 11 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 36))) ++ (match_operand:SI 12 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 40))) ++ (match_operand:SI 13 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 44))) ++ (match_operand:SI 14 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 48))) ++ (match_operand:SI 15 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 52))) ++ (match_operand:SI 16 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 56))) ++ (match_operand:SI 17 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 60))) ++ (match_operand:SI 18 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 64))) ++ (match_operand:SI 19 "register_operand" ""))])] ++ "(XVECLEN (operands[0], 0) == 18)" ++ "smw.bim\t%3, [%1], %19, 0x0" ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "17") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*stm_bim_si16" ++ [(match_parallel 0 "nds32_store_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 64))) ++ (set (mem:SI (match_dup 2)) ++ (match_operand:SI 3 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 4))) ++ (match_operand:SI 4 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 8))) ++ (match_operand:SI 5 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 12))) ++ (match_operand:SI 6 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 16))) ++ (match_operand:SI 7 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 20))) ++ (match_operand:SI 8 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 24))) ++ (match_operand:SI 9 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 28))) ++ (match_operand:SI 10 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 32))) ++ (match_operand:SI 11 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 36))) ++ (match_operand:SI 12 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 40))) ++ (match_operand:SI 13 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 44))) ++ (match_operand:SI 14 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 48))) ++ (match_operand:SI 15 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 52))) ++ (match_operand:SI 16 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 56))) ++ (match_operand:SI 17 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 60))) ++ (match_operand:SI 18 "register_operand" ""))])] ++ "(XVECLEN (operands[0], 0) == 17)" ++ "smw.bim\t%3, [%1], %18, 0x0" ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "16") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*stm_bim_si15" ++ [(match_parallel 0 "nds32_store_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 60))) ++ (set (mem:SI (match_dup 2)) ++ (match_operand:SI 3 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 4))) ++ (match_operand:SI 4 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 8))) ++ (match_operand:SI 5 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 12))) ++ (match_operand:SI 6 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 16))) ++ (match_operand:SI 7 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 20))) ++ (match_operand:SI 8 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 24))) ++ (match_operand:SI 9 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 28))) ++ (match_operand:SI 10 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 32))) ++ (match_operand:SI 11 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 36))) ++ (match_operand:SI 12 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 40))) ++ (match_operand:SI 13 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 44))) ++ (match_operand:SI 14 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 48))) ++ (match_operand:SI 15 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 52))) ++ (match_operand:SI 16 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 56))) ++ (match_operand:SI 17 "register_operand" ""))])] ++ "(XVECLEN (operands[0], 0) == 16)" ++ "smw.bim\t%3, [%1], %17, 0x0" ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "15") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*stm_bim_si14" ++ [(match_parallel 0 "nds32_store_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 56))) ++ (set (mem:SI (match_dup 2)) ++ (match_operand:SI 3 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 4))) ++ (match_operand:SI 4 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 8))) ++ (match_operand:SI 5 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 12))) ++ (match_operand:SI 6 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 16))) ++ (match_operand:SI 7 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 20))) ++ (match_operand:SI 8 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 24))) ++ (match_operand:SI 9 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 28))) ++ (match_operand:SI 10 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 32))) ++ (match_operand:SI 11 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 36))) ++ (match_operand:SI 12 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 40))) ++ (match_operand:SI 13 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 44))) ++ (match_operand:SI 14 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 48))) ++ (match_operand:SI 15 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 52))) ++ (match_operand:SI 16 "register_operand" ""))])] ++ "(XVECLEN (operands[0], 0) == 15)" ++ "smw.bim\t%3, [%1], %16, 0x0" ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "14") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*stm_bim_si13" ++ [(match_parallel 0 "nds32_store_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 52))) ++ (set (mem:SI (match_dup 2)) ++ (match_operand:SI 3 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 4))) ++ (match_operand:SI 4 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 8))) ++ (match_operand:SI 5 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 12))) ++ (match_operand:SI 6 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 16))) ++ (match_operand:SI 7 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 20))) ++ (match_operand:SI 8 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 24))) ++ (match_operand:SI 9 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 28))) ++ (match_operand:SI 10 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 32))) ++ (match_operand:SI 11 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 36))) ++ (match_operand:SI 12 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 40))) ++ (match_operand:SI 13 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 44))) ++ (match_operand:SI 14 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 48))) ++ (match_operand:SI 15 "register_operand" ""))])] ++ "(XVECLEN (operands[0], 0) == 14)" ++ "smw.bim\t%3, [%1], %15, 0x0" ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "13") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*stm_bim_si12" ++ [(match_parallel 0 "nds32_store_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 48))) ++ (set (mem:SI (match_dup 2)) ++ (match_operand:SI 3 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 4))) ++ (match_operand:SI 4 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 8))) ++ (match_operand:SI 5 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 12))) ++ (match_operand:SI 6 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 16))) ++ (match_operand:SI 7 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 20))) ++ (match_operand:SI 8 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 24))) ++ (match_operand:SI 9 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 28))) ++ (match_operand:SI 10 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 32))) ++ (match_operand:SI 11 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 36))) ++ (match_operand:SI 12 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 40))) ++ (match_operand:SI 13 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 44))) ++ (match_operand:SI 14 "register_operand" ""))])] ++ "(XVECLEN (operands[0], 0) == 13)" ++ "smw.bim\t%3, [%1], %14, 0x0" ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "12") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*stm_bim_si11" ++ [(match_parallel 0 "nds32_store_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 44))) ++ (set (mem:SI (match_dup 2)) ++ (match_operand:SI 3 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 4))) ++ (match_operand:SI 4 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 8))) ++ (match_operand:SI 5 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 12))) ++ (match_operand:SI 6 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 16))) ++ (match_operand:SI 7 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 20))) ++ (match_operand:SI 8 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 24))) ++ (match_operand:SI 9 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 28))) ++ (match_operand:SI 10 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 32))) ++ (match_operand:SI 11 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 36))) ++ (match_operand:SI 12 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 40))) ++ (match_operand:SI 13 "register_operand" ""))])] ++ "(XVECLEN (operands[0], 0) == 12)" ++ "smw.bim\t%3, [%1], %13, 0x0" ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "11") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*stm_bim_si10" ++ [(match_parallel 0 "nds32_store_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 40))) ++ (set (mem:SI (match_dup 2)) ++ (match_operand:SI 3 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 4))) ++ (match_operand:SI 4 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 8))) ++ (match_operand:SI 5 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 12))) ++ (match_operand:SI 6 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 16))) ++ (match_operand:SI 7 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 20))) ++ (match_operand:SI 8 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 24))) ++ (match_operand:SI 9 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 28))) ++ (match_operand:SI 10 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 32))) ++ (match_operand:SI 11 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 36))) ++ (match_operand:SI 12 "register_operand" ""))])] ++ "(XVECLEN (operands[0], 0) == 11)" ++ "smw.bim\t%3, [%1], %12, 0x0" ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "10") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*stm_bim_si9" ++ [(match_parallel 0 "nds32_store_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 36))) ++ (set (mem:SI (match_dup 2)) ++ (match_operand:SI 3 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 4))) ++ (match_operand:SI 4 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 8))) ++ (match_operand:SI 5 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 12))) ++ (match_operand:SI 6 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 16))) ++ (match_operand:SI 7 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 20))) ++ (match_operand:SI 8 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 24))) ++ (match_operand:SI 9 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 28))) ++ (match_operand:SI 10 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 32))) ++ (match_operand:SI 11 "register_operand" ""))])] ++ "(XVECLEN (operands[0], 0) == 10)" ++ "smw.bim\t%3, [%1], %11, 0x0" ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "9") ++ (set_attr "length" "4")] ++) ++ ++ ++(define_insn "*stm_bim_si8" ++ [(match_parallel 0 "nds32_store_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 32))) ++ (set (mem:SI (match_dup 2)) ++ (match_operand:SI 3 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 4))) ++ (match_operand:SI 4 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 8))) ++ (match_operand:SI 5 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 12))) ++ (match_operand:SI 6 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 16))) ++ (match_operand:SI 7 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 20))) ++ (match_operand:SI 8 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 24))) ++ (match_operand:SI 9 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 28))) ++ (match_operand:SI 10 "register_operand" ""))])] ++ "(XVECLEN (operands[0], 0) == 9)" ++ "smw.bim\t%3, [%1], %10, 0x0" ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "8") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*stm_bim_si7" ++ [(match_parallel 0 "nds32_store_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 28))) ++ (set (mem:SI (match_dup 2)) ++ (match_operand:SI 3 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 4))) ++ (match_operand:SI 4 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 8))) ++ (match_operand:SI 5 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 12))) ++ (match_operand:SI 6 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 16))) ++ (match_operand:SI 7 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 20))) ++ (match_operand:SI 8 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 24))) ++ (match_operand:SI 9 "register_operand" ""))])] ++ "(XVECLEN (operands[0], 0) == 8)" ++ "smw.bim\t%3, [%1], %9, 0x0" ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "7") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*stm_bim_si6" ++ [(match_parallel 0 "nds32_store_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 24))) ++ (set (mem:SI (match_dup 2)) ++ (match_operand:SI 3 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 4))) ++ (match_operand:SI 4 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 8))) ++ (match_operand:SI 5 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 12))) ++ (match_operand:SI 6 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 16))) ++ (match_operand:SI 7 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 20))) ++ (match_operand:SI 8 "register_operand" ""))])] ++ "(XVECLEN (operands[0], 0) == 7)" ++ "smw.bim\t%3, [%1], %8, 0x0" ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "6") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*stm_bim_si5" ++ [(match_parallel 0 "nds32_store_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 20))) ++ (set (mem:SI (match_dup 2)) ++ (match_operand:SI 3 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 4))) ++ (match_operand:SI 4 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 8))) ++ (match_operand:SI 5 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 12))) ++ (match_operand:SI 6 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 16))) ++ (match_operand:SI 7 "register_operand" ""))])] ++ "(XVECLEN (operands[0], 0) == 6)" ++ "smw.bim\t%3, [%1], %7, 0x0" ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "5") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*stm_bim_si4" ++ [(match_parallel 0 "nds32_store_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 16))) ++ (set (mem:SI (match_dup 2)) ++ (match_operand:SI 3 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 4))) ++ (match_operand:SI 4 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 8))) ++ (match_operand:SI 5 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 12))) ++ (match_operand:SI 6 "register_operand" ""))])] ++ "(XVECLEN (operands[0], 0) == 5)" ++ "smw.bim\t%3, [%1], %6, 0x0" ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "4") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*stm_bim_si3" ++ [(match_parallel 0 "nds32_store_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 12))) ++ (set (mem:SI (match_dup 2)) ++ (match_operand:SI 3 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 4))) ++ (match_operand:SI 4 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 8))) ++ (match_operand:SI 5 "register_operand" ""))])] ++ "(XVECLEN (operands[0], 0) == 4)" ++ "smw.bim\t%3, [%1], %5, 0x0" ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "3") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*stm_bim_si2" ++ [(match_parallel 0 "nds32_store_multiple_and_update_address_operation" ++ [(set (match_operand:SI 1 "register_operand" "=r") ++ (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 8))) ++ (set (mem:SI (match_dup 2)) ++ (match_operand:SI 3 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 2) (const_int 4))) ++ (match_operand:SI 4 "register_operand" ""))])] ++ "(XVECLEN (operands[0], 0) == 3)" ++ "smw.bim\t%3, [%1], %4, 0x0" ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "2") ++ (set_attr "length" "4")] ++) ++ ++(define_expand "unaligned_store_update_base_w" ++ [(parallel [(set (match_operand:SI 0 "register_operand" "=r") ++ (plus:SI (match_operand:SI 1 "register_operand" "0") (const_int 4))) ++ (set (mem:SI (match_dup 1)) ++ (unspec:SI [(match_operand:SI 2 "register_operand" "r")] UNSPEC_UASTORE_W))])] ++ "" ++{ ++ /* DO NOT emit unaligned_store_w_m immediately since web pass don't ++ recognize post_inc, try it again after GCC 5.0. ++ REF: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63156 */ ++ emit_insn (gen_unaligned_store_w (gen_rtx_MEM (SImode, operands[1]), operands[2])); ++ emit_insn (gen_addsi3 (operands[0], operands[1], gen_int_mode (4, Pmode))); ++ DONE; ++} ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "1") ++ (set_attr "length" "4")] ++) ++ ++(define_expand "unaligned_store_update_base_dw" ++ [(parallel [(set (match_operand:SI 0 "register_operand" "=r") ++ (plus:SI (match_operand:SI 1 "register_operand" "0") (const_int 8))) ++ (set (mem:DI (match_dup 1)) ++ (unspec:DI [(match_operand:DI 2 "register_operand" "r")] UNSPEC_UASTORE_DW))])] ++ "" ++{ ++ /* DO NOT emit unaligned_store_w_m immediately since web pass don't ++ recognize post_inc, try it again after GCC 5.0. ++ REF: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63156 */ ++ emit_insn (gen_unaligned_store_dw (gen_rtx_MEM (DImode, operands[1]), operands[2])); ++ emit_insn (gen_addsi3 (operands[0], operands[1], gen_int_mode (8, Pmode))); ++ DONE; ++} ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "2") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*stmsi25" ++ [(match_parallel 0 "nds32_store_multiple_operation" ++ [(set (mem:SI (match_operand:SI 1 "register_operand" "r")) ++ (match_operand:SI 2 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 4))) ++ (match_operand:SI 3 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 8))) ++ (match_operand:SI 4 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 12))) ++ (match_operand:SI 5 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 16))) ++ (match_operand:SI 6 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 20))) ++ (match_operand:SI 7 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 24))) ++ (match_operand:SI 8 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 28))) ++ (match_operand:SI 9 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 32))) ++ (match_operand:SI 10 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 36))) ++ (match_operand:SI 11 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 40))) ++ (match_operand:SI 12 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 44))) ++ (match_operand:SI 13 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 48))) ++ (match_operand:SI 14 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 52))) ++ (match_operand:SI 15 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 56))) ++ (match_operand:SI 16 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 60))) ++ (match_operand:SI 17 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 64))) ++ (match_operand:SI 18 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 68))) ++ (match_operand:SI 19 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 72))) ++ (match_operand:SI 20 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 76))) ++ (match_operand:SI 21 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 80))) ++ (match_operand:SI 22 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 84))) ++ (match_operand:SI 23 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 88))) ++ (match_operand:SI 24 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 92))) ++ (match_operand:SI 25 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 96))) ++ (match_operand:SI 26 "register_operand" ""))])] ++ "(XVECLEN (operands[0], 0) == 25)" ++ "smw.bi\t%2, [%1], %26, 0x0" ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "25") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*stmsi24" ++ [(match_parallel 0 "nds32_store_multiple_operation" ++ [(set (mem:SI (match_operand:SI 1 "register_operand" "r")) ++ (match_operand:SI 2 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 4))) ++ (match_operand:SI 3 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 8))) ++ (match_operand:SI 4 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 12))) ++ (match_operand:SI 5 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 16))) ++ (match_operand:SI 6 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 20))) ++ (match_operand:SI 7 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 24))) ++ (match_operand:SI 8 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 28))) ++ (match_operand:SI 9 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 32))) ++ (match_operand:SI 10 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 36))) ++ (match_operand:SI 11 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 40))) ++ (match_operand:SI 12 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 44))) ++ (match_operand:SI 13 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 48))) ++ (match_operand:SI 14 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 52))) ++ (match_operand:SI 15 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 56))) ++ (match_operand:SI 16 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 60))) ++ (match_operand:SI 17 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 64))) ++ (match_operand:SI 18 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 68))) ++ (match_operand:SI 19 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 72))) ++ (match_operand:SI 20 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 76))) ++ (match_operand:SI 21 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 80))) ++ (match_operand:SI 22 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 84))) ++ (match_operand:SI 23 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 88))) ++ (match_operand:SI 24 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 92))) ++ (match_operand:SI 25 "register_operand" "")) ++])] ++ "(XVECLEN (operands[0], 0) == 24)" ++ "smw.bi\t%2, [%1], %25, 0x0" ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "24") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*stmsi23" ++ [(match_parallel 0 "nds32_store_multiple_operation" ++ [(set (mem:SI (match_operand:SI 1 "register_operand" "r")) ++ (match_operand:SI 2 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 4))) ++ (match_operand:SI 3 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 8))) ++ (match_operand:SI 4 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 12))) ++ (match_operand:SI 5 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 16))) ++ (match_operand:SI 6 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 20))) ++ (match_operand:SI 7 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 24))) ++ (match_operand:SI 8 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 28))) ++ (match_operand:SI 9 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 32))) ++ (match_operand:SI 10 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 36))) ++ (match_operand:SI 11 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 40))) ++ (match_operand:SI 12 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 44))) ++ (match_operand:SI 13 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 48))) ++ (match_operand:SI 14 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 52))) ++ (match_operand:SI 15 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 56))) ++ (match_operand:SI 16 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 60))) ++ (match_operand:SI 17 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 64))) ++ (match_operand:SI 18 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 68))) ++ (match_operand:SI 19 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 72))) ++ (match_operand:SI 20 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 76))) ++ (match_operand:SI 21 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 80))) ++ (match_operand:SI 22 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 84))) ++ (match_operand:SI 23 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 88))) ++ (match_operand:SI 24 "register_operand" "")) ++])] ++ "(XVECLEN (operands[0], 0) == 23)" ++ "smw.bi\t%2, [%1], %24, 0x0" ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "23") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*stmsi22" ++ [(match_parallel 0 "nds32_store_multiple_operation" ++ [(set (mem:SI (match_operand:SI 1 "register_operand" "r")) ++ (match_operand:SI 2 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 4))) ++ (match_operand:SI 3 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 8))) ++ (match_operand:SI 4 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 12))) ++ (match_operand:SI 5 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 16))) ++ (match_operand:SI 6 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 20))) ++ (match_operand:SI 7 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 24))) ++ (match_operand:SI 8 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 28))) ++ (match_operand:SI 9 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 32))) ++ (match_operand:SI 10 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 36))) ++ (match_operand:SI 11 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 40))) ++ (match_operand:SI 12 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 44))) ++ (match_operand:SI 13 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 48))) ++ (match_operand:SI 14 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 52))) ++ (match_operand:SI 15 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 56))) ++ (match_operand:SI 16 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 60))) ++ (match_operand:SI 17 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 64))) ++ (match_operand:SI 18 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 68))) ++ (match_operand:SI 19 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 72))) ++ (match_operand:SI 20 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 76))) ++ (match_operand:SI 21 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 80))) ++ (match_operand:SI 22 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 84))) ++ (match_operand:SI 23 "register_operand" "")) ++])] ++ "(XVECLEN (operands[0], 0) == 22)" ++ "smw.bi\t%2, [%1], %23, 0x0" ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "22") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*stmsi21" ++ [(match_parallel 0 "nds32_store_multiple_operation" ++ [(set (mem:SI (match_operand:SI 1 "register_operand" "r")) ++ (match_operand:SI 2 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 4))) ++ (match_operand:SI 3 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 8))) ++ (match_operand:SI 4 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 12))) ++ (match_operand:SI 5 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 16))) ++ (match_operand:SI 6 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 20))) ++ (match_operand:SI 7 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 24))) ++ (match_operand:SI 8 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 28))) ++ (match_operand:SI 9 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 32))) ++ (match_operand:SI 10 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 36))) ++ (match_operand:SI 11 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 40))) ++ (match_operand:SI 12 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 44))) ++ (match_operand:SI 13 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 48))) ++ (match_operand:SI 14 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 52))) ++ (match_operand:SI 15 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 56))) ++ (match_operand:SI 16 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 60))) ++ (match_operand:SI 17 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 64))) ++ (match_operand:SI 18 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 68))) ++ (match_operand:SI 19 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 72))) ++ (match_operand:SI 20 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 76))) ++ (match_operand:SI 21 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 80))) ++ (match_operand:SI 22 "register_operand" "")) ++])] ++ "(XVECLEN (operands[0], 0) == 21)" ++ "smw.bi\t%2, [%1], %22, 0x0" ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "21") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*stmsi20" ++ [(match_parallel 0 "nds32_store_multiple_operation" ++ [(set (mem:SI (match_operand:SI 1 "register_operand" "r")) ++ (match_operand:SI 2 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 4))) ++ (match_operand:SI 3 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 8))) ++ (match_operand:SI 4 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 12))) ++ (match_operand:SI 5 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 16))) ++ (match_operand:SI 6 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 20))) ++ (match_operand:SI 7 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 24))) ++ (match_operand:SI 8 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 28))) ++ (match_operand:SI 9 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 32))) ++ (match_operand:SI 10 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 36))) ++ (match_operand:SI 11 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 40))) ++ (match_operand:SI 12 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 44))) ++ (match_operand:SI 13 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 48))) ++ (match_operand:SI 14 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 52))) ++ (match_operand:SI 15 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 56))) ++ (match_operand:SI 16 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 60))) ++ (match_operand:SI 17 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 64))) ++ (match_operand:SI 18 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 68))) ++ (match_operand:SI 19 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 72))) ++ (match_operand:SI 20 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 76))) ++ (match_operand:SI 21 "register_operand" ""))])] ++ "(XVECLEN (operands[0], 0) == 20)" ++ "smw.bi\t%2, [%1], %21, 0x0" ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "20") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*stmsi19" ++ [(match_parallel 0 "nds32_store_multiple_operation" ++ [(set (mem:SI (match_operand:SI 1 "register_operand" "r")) ++ (match_operand:SI 2 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 4))) ++ (match_operand:SI 3 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 8))) ++ (match_operand:SI 4 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 12))) ++ (match_operand:SI 5 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 16))) ++ (match_operand:SI 6 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 20))) ++ (match_operand:SI 7 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 24))) ++ (match_operand:SI 8 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 28))) ++ (match_operand:SI 9 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 32))) ++ (match_operand:SI 10 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 36))) ++ (match_operand:SI 11 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 40))) ++ (match_operand:SI 12 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 44))) ++ (match_operand:SI 13 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 48))) ++ (match_operand:SI 14 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 52))) ++ (match_operand:SI 15 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 56))) ++ (match_operand:SI 16 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 60))) ++ (match_operand:SI 17 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 64))) ++ (match_operand:SI 18 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 68))) ++ (match_operand:SI 19 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 72))) ++ (match_operand:SI 20 "register_operand" ""))])] ++ "(XVECLEN (operands[0], 0) == 19)" ++ "smw.bi\t%2, [%1], %20, 0x0" ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "19") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "*stmsi18" ++ [(match_parallel 0 "nds32_store_multiple_operation" ++ [(set (mem:SI (match_operand:SI 1 "register_operand" "r")) ++ (match_operand:SI 2 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 4))) ++ (match_operand:SI 3 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 8))) ++ (match_operand:SI 4 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 12))) ++ (match_operand:SI 5 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 16))) ++ (match_operand:SI 6 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 20))) ++ (match_operand:SI 7 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 24))) ++ (match_operand:SI 8 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 28))) ++ (match_operand:SI 9 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 32))) ++ (match_operand:SI 10 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 36))) ++ (match_operand:SI 11 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 40))) ++ (match_operand:SI 12 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 44))) ++ (match_operand:SI 13 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 48))) ++ (match_operand:SI 14 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 52))) ++ (match_operand:SI 15 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 56))) ++ (match_operand:SI 16 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 60))) ++ (match_operand:SI 17 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 64))) ++ (match_operand:SI 18 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 68))) ++ (match_operand:SI 19 "register_operand" ""))])] ++ "(XVECLEN (operands[0], 0) == 18)" ++ "smw.bi\t%2, [%1], %19, 0x0" ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "18") ++ (set_attr "length" "4")] + ) + +-(define_insn "*lmwsi4" +- [(match_parallel 0 "nds32_load_multiple_operation" +- [(set (match_operand:SI 2 "register_operand" "") +- (mem:SI (match_operand:SI 1 "register_operand" "r"))) +- (set (match_operand:SI 3 "register_operand" "") +- (mem:SI (plus:SI (match_dup 1) (const_int 4)))) +- (set (match_operand:SI 4 "register_operand" "") +- (mem:SI (plus:SI (match_dup 1) (const_int 8)))) +- (set (match_operand:SI 5 "register_operand" "") +- (mem:SI (plus:SI (match_dup 1) (const_int 12))))])] +- "(XVECLEN (operands[0], 0) == 4)" +- "lmw.bi\t%2, [%1], %5, 0x0" +- [(set_attr "type" "load") +- (set_attr "length" "4")] ++(define_insn "*stmsi17" ++ [(match_parallel 0 "nds32_store_multiple_operation" ++ [(set (mem:SI (match_operand:SI 1 "register_operand" "r")) ++ (match_operand:SI 2 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 4))) ++ (match_operand:SI 3 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 8))) ++ (match_operand:SI 4 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 12))) ++ (match_operand:SI 5 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 16))) ++ (match_operand:SI 6 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 20))) ++ (match_operand:SI 7 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 24))) ++ (match_operand:SI 8 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 28))) ++ (match_operand:SI 9 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 32))) ++ (match_operand:SI 10 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 36))) ++ (match_operand:SI 11 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 40))) ++ (match_operand:SI 12 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 44))) ++ (match_operand:SI 13 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 48))) ++ (match_operand:SI 14 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 52))) ++ (match_operand:SI 15 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 56))) ++ (match_operand:SI 16 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 60))) ++ (match_operand:SI 17 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 64))) ++ (match_operand:SI 18 "register_operand" ""))])] ++ "(XVECLEN (operands[0], 0) == 17)" ++ "smw.bi\t%2, [%1], %18, 0x0" ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "17") ++ (set_attr "length" "4")] + ) + +-(define_insn "*lmwsi3" +- [(match_parallel 0 "nds32_load_multiple_operation" +- [(set (match_operand:SI 2 "register_operand" "") +- (mem:SI (match_operand:SI 1 "register_operand" "r"))) +- (set (match_operand:SI 3 "register_operand" "") +- (mem:SI (plus:SI (match_dup 1) (const_int 4)))) +- (set (match_operand:SI 4 "register_operand" "") +- (mem:SI (plus:SI (match_dup 1) (const_int 8))))])] +- "(XVECLEN (operands[0], 0) == 3)" +- "lmw.bi\t%2, [%1], %4, 0x0" +- [(set_attr "type" "load") +- (set_attr "length" "4")] ++(define_insn "*stmsi16" ++ [(match_parallel 0 "nds32_store_multiple_operation" ++ [(set (mem:SI (match_operand:SI 1 "register_operand" "r")) ++ (match_operand:SI 2 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 4))) ++ (match_operand:SI 3 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 8))) ++ (match_operand:SI 4 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 12))) ++ (match_operand:SI 5 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 16))) ++ (match_operand:SI 6 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 20))) ++ (match_operand:SI 7 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 24))) ++ (match_operand:SI 8 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 28))) ++ (match_operand:SI 9 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 32))) ++ (match_operand:SI 10 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 36))) ++ (match_operand:SI 11 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 40))) ++ (match_operand:SI 12 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 44))) ++ (match_operand:SI 13 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 48))) ++ (match_operand:SI 14 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 52))) ++ (match_operand:SI 15 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 56))) ++ (match_operand:SI 16 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 60))) ++ (match_operand:SI 17 "register_operand" ""))])] ++ "(XVECLEN (operands[0], 0) == 16)" ++ "smw.bi\t%2, [%1], %17, 0x0" ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "16") ++ (set_attr "length" "4")] + ) + +-(define_insn "*lmwsi2" +- [(match_parallel 0 "nds32_load_multiple_operation" +- [(set (match_operand:SI 2 "register_operand" "") +- (mem:SI (match_operand:SI 1 "register_operand" "r"))) +- (set (match_operand:SI 3 "register_operand" "") +- (mem:SI (plus:SI (match_dup 1) (const_int 4))))])] +- "(XVECLEN (operands[0], 0) == 2)" +- "lmw.bi\t%2, [%1], %3, 0x0" +- [(set_attr "type" "load") +- (set_attr "length" "4")] ++(define_insn "*stmsi15" ++ [(match_parallel 0 "nds32_store_multiple_operation" ++ [(set (mem:SI (match_operand:SI 1 "register_operand" "r")) ++ (match_operand:SI 2 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 4))) ++ (match_operand:SI 3 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 8))) ++ (match_operand:SI 4 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 12))) ++ (match_operand:SI 5 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 16))) ++ (match_operand:SI 6 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 20))) ++ (match_operand:SI 7 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 24))) ++ (match_operand:SI 8 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 28))) ++ (match_operand:SI 9 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 32))) ++ (match_operand:SI 10 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 36))) ++ (match_operand:SI 11 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 40))) ++ (match_operand:SI 12 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 44))) ++ (match_operand:SI 13 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 48))) ++ (match_operand:SI 14 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 52))) ++ (match_operand:SI 15 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 56))) ++ (match_operand:SI 16 "register_operand" ""))])] ++ "(XVECLEN (operands[0], 0) == 15)" ++ "smw.bi\t%2, [%1], %16, 0x0" ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "15") ++ (set_attr "length" "4")] + ) + ++(define_insn "*stmsi14" ++ [(match_parallel 0 "nds32_store_multiple_operation" ++ [(set (mem:SI (match_operand:SI 1 "register_operand" "r")) ++ (match_operand:SI 2 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 4))) ++ (match_operand:SI 3 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 8))) ++ (match_operand:SI 4 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 12))) ++ (match_operand:SI 5 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 16))) ++ (match_operand:SI 6 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 20))) ++ (match_operand:SI 7 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 24))) ++ (match_operand:SI 8 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 28))) ++ (match_operand:SI 9 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 32))) ++ (match_operand:SI 10 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 36))) ++ (match_operand:SI 11 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 40))) ++ (match_operand:SI 12 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 44))) ++ (match_operand:SI 13 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 48))) ++ (match_operand:SI 14 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 52))) ++ (match_operand:SI 15 "register_operand" ""))])] ++ "(XVECLEN (operands[0], 0) == 14)" ++ "smw.bi\t%2, [%1], %15, 0x0" ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "14") ++ (set_attr "length" "4")] ++) + +-;; Store Multiple Insns. +-;; +-;; operands[0] is the first memory location. +-;; opernads[1] is the first of the consecutive registers. +-;; operands[2] is the number of consecutive registers. +- +-(define_expand "store_multiple" +- [(match_par_dup 3 [(set (match_operand:SI 0 "" "") +- (match_operand:SI 1 "" "")) +- (use (match_operand:SI 2 "" ""))])] +- "" +-{ +- int maximum; ++(define_insn "*stmsi13" ++ [(match_parallel 0 "nds32_store_multiple_operation" ++ [(set (mem:SI (match_operand:SI 1 "register_operand" "r")) ++ (match_operand:SI 2 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 4))) ++ (match_operand:SI 3 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 8))) ++ (match_operand:SI 4 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 12))) ++ (match_operand:SI 5 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 16))) ++ (match_operand:SI 6 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 20))) ++ (match_operand:SI 7 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 24))) ++ (match_operand:SI 8 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 28))) ++ (match_operand:SI 9 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 32))) ++ (match_operand:SI 10 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 36))) ++ (match_operand:SI 11 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 40))) ++ (match_operand:SI 12 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 44))) ++ (match_operand:SI 13 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 48))) ++ (match_operand:SI 14 "register_operand" ""))])] ++ "(XVECLEN (operands[0], 0) == 13)" ++ "smw.bi\t%2, [%1], %14, 0x0" ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "13") ++ (set_attr "length" "4")] ++) + +- /* Because reduced-set regsiters has few registers +- (r0~r5, r6~10, r15, r28~r31, where 'r15' and 'r28~r31' cannot +- be used for register allocation), +- using 8 registers for store_multiple may easily consume all of them. +- It makes register allocation/spilling hard to work. +- So we only allow maximum=4 registers for store_multiple +- under reduced-set registers. */ +- if (TARGET_REDUCED_REGS) +- maximum = 4; +- else +- maximum = 8; ++(define_insn "*stmsi12" ++ [(match_parallel 0 "nds32_store_multiple_operation" ++ [(set (mem:SI (match_operand:SI 1 "register_operand" "r")) ++ (match_operand:SI 2 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 4))) ++ (match_operand:SI 3 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 8))) ++ (match_operand:SI 4 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 12))) ++ (match_operand:SI 5 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 16))) ++ (match_operand:SI 6 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 20))) ++ (match_operand:SI 7 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 24))) ++ (match_operand:SI 8 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 28))) ++ (match_operand:SI 9 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 32))) ++ (match_operand:SI 10 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 36))) ++ (match_operand:SI 11 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 40))) ++ (match_operand:SI 12 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 44))) ++ (match_operand:SI 13 "register_operand" ""))])] ++ "(XVECLEN (operands[0], 0) == 12)" ++ "smw.bi\t%2, [%1], %13, 0x0" ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "12") ++ (set_attr "length" "4")] ++) + +- /* Here are the conditions that must be all passed, +- otherwise we have to FAIL this rtx generation: +- 1. The number of consecutive registers must be integer. +- 2. Maximum 4 or 8 registers for smw.bi instruction +- (based on this nds32-multiple.md design). +- 3. Minimum 2 registers for smw.bi instruction +- (based on this nds32-multiple.md design). +- 4. operands[0] must be memory for sure. +- 5. operands[1] must be register for sure. +- 6. Do not cross $r15 register because it is not allocatable. */ +- if (GET_CODE (operands[2]) != CONST_INT +- || INTVAL (operands[2]) > maximum +- || INTVAL (operands[2]) < 2 +- || GET_CODE (operands[0]) != MEM +- || GET_CODE (operands[1]) != REG +- || REGNO (operands[1]) + INTVAL (operands[2]) > TA_REGNUM) +- FAIL; ++(define_insn "*stmsi11" ++ [(match_parallel 0 "nds32_store_multiple_operation" ++ [(set (mem:SI (match_operand:SI 1 "register_operand" "r")) ++ (match_operand:SI 2 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 4))) ++ (match_operand:SI 3 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 8))) ++ (match_operand:SI 4 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 12))) ++ (match_operand:SI 5 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 16))) ++ (match_operand:SI 6 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 20))) ++ (match_operand:SI 7 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 24))) ++ (match_operand:SI 8 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 28))) ++ (match_operand:SI 9 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 32))) ++ (match_operand:SI 10 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 36))) ++ (match_operand:SI 11 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 40))) ++ (match_operand:SI 12 "register_operand" ""))])] ++ "(XVECLEN (operands[0], 0) == 11)" ++ "smw.bi\t%2, [%1], %12, 0x0" ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "11") ++ (set_attr "length" "4")] ++) + +- /* For (mem addr), we force_reg on addr here, +- so that nds32_expand_store_multiple can easily use it. */ +- operands[3] = nds32_expand_store_multiple (REGNO (operands[1]), +- INTVAL (operands[2]), +- force_reg (SImode, +- XEXP (operands[0], 0)), +- operands[0]); +-}) ++(define_insn "*stmsi10" ++ [(match_parallel 0 "nds32_store_multiple_operation" ++ [(set (mem:SI (match_operand:SI 1 "register_operand" "r")) ++ (match_operand:SI 2 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 4))) ++ (match_operand:SI 3 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 8))) ++ (match_operand:SI 4 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 12))) ++ (match_operand:SI 5 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 16))) ++ (match_operand:SI 6 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 20))) ++ (match_operand:SI 7 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 24))) ++ (match_operand:SI 8 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 28))) ++ (match_operand:SI 9 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 32))) ++ (match_operand:SI 10 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 36))) ++ (match_operand:SI 11 "register_operand" ""))])] ++ "(XVECLEN (operands[0], 0) == 10)" ++ "smw.bi\t%2, [%1], %11, 0x0" ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "10") ++ (set_attr "length" "4")] ++) + +-;; Ordinary Store Multiple. ++(define_insn "*stmsi9" ++ [(match_parallel 0 "nds32_store_multiple_operation" ++ [(set (mem:SI (match_operand:SI 1 "register_operand" "r")) ++ (match_operand:SI 2 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 4))) ++ (match_operand:SI 3 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 8))) ++ (match_operand:SI 4 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 12))) ++ (match_operand:SI 5 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 16))) ++ (match_operand:SI 6 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 20))) ++ (match_operand:SI 7 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 24))) ++ (match_operand:SI 8 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 28))) ++ (match_operand:SI 9 "register_operand" "")) ++ (set (mem:SI (plus:SI (match_dup 1) (const_int 32))) ++ (match_operand:SI 10 "register_operand" ""))])] ++ "(XVECLEN (operands[0], 0) == 9)" ++ "smw.bi\t%2, [%1], %10, 0x0" ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "9") ++ (set_attr "length" "4")] ++) + + (define_insn "*stmsi8" + [(match_parallel 0 "nds32_store_multiple_operation" +@@ -276,8 +3634,9 @@ + (match_operand:SI 9 "register_operand" ""))])] + "(XVECLEN (operands[0], 0) == 8)" + "smw.bi\t%2, [%1], %9, 0x0" +- [(set_attr "type" "store") +- (set_attr "length" "4")] ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "8") ++ (set_attr "length" "4")] + ) + + (define_insn "*stmsi7" +@@ -298,8 +3657,9 @@ + (match_operand:SI 8 "register_operand" ""))])] + "(XVECLEN (operands[0], 0) == 7)" + "smw.bi\t%2, [%1], %8, 0x0" +- [(set_attr "type" "store") +- (set_attr "length" "4")] ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "7") ++ (set_attr "length" "4")] + ) + + (define_insn "*stmsi6" +@@ -318,8 +3678,9 @@ + (match_operand:SI 7 "register_operand" ""))])] + "(XVECLEN (operands[0], 0) == 6)" + "smw.bi\t%2, [%1], %7, 0x0" +- [(set_attr "type" "store") +- (set_attr "length" "4")] ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "6") ++ (set_attr "length" "4")] + ) + + (define_insn "*stmsi5" +@@ -336,8 +3697,9 @@ + (match_operand:SI 6 "register_operand" ""))])] + "(XVECLEN (operands[0], 0) == 5)" + "smw.bi\t%2, [%1], %6, 0x0" +- [(set_attr "type" "store") +- (set_attr "length" "4")] ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "5") ++ (set_attr "length" "4")] + ) + + (define_insn "*stmsi4" +@@ -352,8 +3714,9 @@ + (match_operand:SI 5 "register_operand" ""))])] + "(XVECLEN (operands[0], 0) == 4)" + "smw.bi\t%2, [%1], %5, 0x0" +- [(set_attr "type" "store") +- (set_attr "length" "4")] ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "4") ++ (set_attr "length" "4")] + ) + + (define_insn "*stmsi3" +@@ -366,8 +3729,9 @@ + (match_operand:SI 4 "register_operand" ""))])] + "(XVECLEN (operands[0], 0) == 3)" + "smw.bi\t%2, [%1], %4, 0x0" +- [(set_attr "type" "store") +- (set_attr "length" "4")] ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "3") ++ (set_attr "length" "4")] + ) + + (define_insn "*stmsi2" +@@ -378,8 +3742,9 @@ + (match_operand:SI 3 "register_operand" ""))])] + "(XVECLEN (operands[0], 0) == 2)" + "smw.bi\t%2, [%1], %3, 0x0" +- [(set_attr "type" "store") +- (set_attr "length" "4")] ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "2") ++ (set_attr "length" "4")] + ) + + ;; Move a block of memory if it is word aligned and MORE than 2 words long. +@@ -391,14 +3756,14 @@ + ;; operands[2] is the number of bytes to move. + ;; operands[3] is the known shared alignment. + +-(define_expand "movmemqi" ++(define_expand "movmemsi" + [(match_operand:BLK 0 "general_operand" "") + (match_operand:BLK 1 "general_operand" "") +- (match_operand:SI 2 "const_int_operand" "") ++ (match_operand:SI 2 "nds32_reg_constant_operand" "") + (match_operand:SI 3 "const_int_operand" "")] + "" + { +- if (nds32_expand_movmemqi (operands[0], ++ if (nds32_expand_movmemsi (operands[0], + operands[1], + operands[2], + operands[3])) +@@ -408,3 +3773,75 @@ + }) + + ;; ------------------------------------------------------------------------ ++ ++(define_insn "lmwzb" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (plus:SI (match_operand:SI 1 "register_operand" "0") (const_int 4))) ++ (set (match_operand:SI 2 "register_operand" "=r") ++ (unspec:SI [(mem:SI (match_dup 1))] UNSPEC_LMWZB))] ++ "" ++ "lmwzb.bm\t%2, [%1], %2, 0x0" ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "1") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "smwzb" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (plus:SI (match_operand:SI 1 "register_operand" "0") (const_int 4))) ++ (set (mem:SI (match_dup 1)) ++ (unspec:SI [(match_operand:SI 2 "register_operand" "r")] UNSPEC_SMWZB))] ++ "" ++ "smwzb.bm\t%2, [%1], %2, 0x0" ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "1") ++ (set_attr "length" "4")] ++) ++ ++(define_expand "movstr" ++ [(match_operand:SI 0 "register_operand" "") ++ (match_operand:BLK 1 "memory_operand" "") ++ (match_operand:BLK 2 "memory_operand" "")] ++ "TARGET_EXT_STRING && TARGET_INLINE_STRCPY" ++{ ++ if (nds32_expand_movstr (operands[0], ++ operands[1], ++ operands[2])) ++ DONE; ++ ++ FAIL; ++}) ++ ++(define_expand "strlensi" ++ [(match_operand:SI 0 "register_operand") ++ (match_operand:BLK 1 "memory_operand") ++ (match_operand:QI 2 "nds32_reg_constant_operand") ++ (match_operand 3 "const_int_operand")] ++ "TARGET_EXT_STRING" ++{ ++ if (nds32_expand_strlen (operands[0], operands[1], operands[2], operands[3])) ++ DONE; ++ ++ FAIL; ++}) ++ ++(define_expand "setmemsi" ++ [(use (match_operand:BLK 0 "memory_operand")) ++ (use (match_operand:SI 1 "nds32_reg_constant_operand")) ++ (use (match_operand:QI 2 "nonmemory_operand")) ++ (use (match_operand 3 "const_int_operand")) ++ (use (match_operand:SI 4 "const_int_operand")) ++ (use (match_operand:SI 5 "const_int_operand"))] ++ "" ++{ ++ if (nds32_expand_setmem (operands[0], operands[1], ++ operands[2], operands[3], ++ operands[4], operands[5])) ++ DONE; ++ ++ FAIL; ++}) ++ ++ ++ ++;; ------------------------------------------------------------------------ +diff --git a/gcc/config/nds32/nds32-n10.md b/gcc/config/nds32/nds32-n10.md +new file mode 100644 +index 0000000..7261608 +--- /dev/null ++++ b/gcc/config/nds32/nds32-n10.md +@@ -0,0 +1,439 @@ ++;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler ++;; Copyright (C) 2012-2016 Free Software Foundation, Inc. ++;; Contributed by Andes Technology Corporation. ++;; ++;; This file is part of GCC. ++;; ++;; GCC is free software; you can redistribute it and/or modify it ++;; under the terms of the GNU General Public License as published ++;; by the Free Software Foundation; either version 3, or (at your ++;; option) any later version. ++;; ++;; GCC is distributed in the hope that it will be useful, but WITHOUT ++;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++;; License for more details. ++;; ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; <http://www.gnu.org/licenses/>. ++ ++ ++;; ------------------------------------------------------------------------ ++;; Define N10 pipeline settings. ++;; ------------------------------------------------------------------------ ++ ++(define_automaton "nds32_n10_machine") ++ ++;; ------------------------------------------------------------------------ ++;; Pipeline Stages ++;; ------------------------------------------------------------------------ ++;; IF - Instruction Fetch ++;; II - Instruction Issue / Instruction Decode ++;; EX - Instruction Execution ++;; MM - Memory Execution ++;; WB - Instruction Retire / Result Write-Back ++ ++(define_cpu_unit "n10_ii" "nds32_n10_machine") ++(define_cpu_unit "n10_ex" "nds32_n10_machine") ++(define_cpu_unit "n10_mm" "nds32_n10_machine") ++(define_cpu_unit "n10_wb" "nds32_n10_machine") ++(define_cpu_unit "n10f_iq" "nds32_n10_machine") ++(define_cpu_unit "n10f_rf" "nds32_n10_machine") ++(define_cpu_unit "n10f_e1" "nds32_n10_machine") ++(define_cpu_unit "n10f_e2" "nds32_n10_machine") ++(define_cpu_unit "n10f_e3" "nds32_n10_machine") ++(define_cpu_unit "n10f_e4" "nds32_n10_machine") ++ ++(define_insn_reservation "nds_n10_unknown" 1 ++ (and (eq_attr "type" "unknown") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_misc" 1 ++ (and (eq_attr "type" "misc") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_mmu" 1 ++ (and (eq_attr "type" "mmu") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_alu" 1 ++ (and (eq_attr "type" "alu") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_alu_shift" 1 ++ (and (eq_attr "type" "alu_shift") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ii+n10_ex, n10_ex+n10_mm, n10_mm+n10_wb, n10_wb") ++ ++(define_insn_reservation "nds_n10_pbsad" 1 ++ (and (eq_attr "type" "pbsad") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex*3, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_pbsada" 1 ++ (and (eq_attr "type" "pbsada") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex*3, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_load" 1 ++ (and (match_test "nds32::load_single_p (insn)") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_store" 1 ++ (and (match_test "nds32::store_single_p (insn)") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_load_multiple_1" 1 ++ (and (eq_attr "pipeline_model" "n10") ++ (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "1"))) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_load_multiple_2" 1 ++ (and (eq_attr "pipeline_model" "n10") ++ (ior (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "2")) ++ (match_test "nds32::load_double_p (insn)"))) ++ "n10_ii, n10_ii+n10_ex, n10_ex+n10_mm, n10_mm+n10_wb, n10_wb") ++ ++(define_insn_reservation "nds_n10_load_multiple_3" 1 ++ (and (eq_attr "pipeline_model" "n10") ++ (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "3"))) ++ "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") ++ ++(define_insn_reservation "nds_n10_load_multiple_4" 1 ++ (and (eq_attr "pipeline_model" "n10") ++ (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "4"))) ++ "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, n10_ii+n10_ex+n10_mm+n10_wb, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") ++ ++(define_insn_reservation "nds_n10_load_multiple_5" 1 ++ (and (eq_attr "pipeline_model" "n10") ++ (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "5"))) ++ "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*2, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") ++ ++(define_insn_reservation "nds_n10_load_multiple_6" 1 ++ (and (eq_attr "pipeline_model" "n10") ++ (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "6"))) ++ "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*3, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") ++ ++(define_insn_reservation "nds_n10_load_multiple_7" 1 ++ (and (eq_attr "pipeline_model" "n10") ++ (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "7"))) ++ "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*4, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") ++ ++(define_insn_reservation "nds_n10_load_multiple_N" 1 ++ (and (eq_attr "pipeline_model" "n10") ++ (and (eq_attr "type" "load_multiple") ++ (match_test "get_attr_combo (insn) >= 8"))) ++ "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*5, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") ++ ++(define_insn_reservation "nds_n10_store_multiple_1" 1 ++ (and (eq_attr "pipeline_model" "n10") ++ (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "1"))) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_store_multiple_2" 1 ++ (and (eq_attr "pipeline_model" "n10") ++ (ior (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "2")) ++ (match_test "nds32::store_double_p (insn)"))) ++ "n10_ii, n10_ii+n10_ex, n10_ex+n10_mm, n10_mm+n10_wb, n10_wb") ++ ++(define_insn_reservation "nds_n10_store_multiple_3" 1 ++ (and (eq_attr "pipeline_model" "n10") ++ (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "3"))) ++ "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") ++ ++(define_insn_reservation "nds_n10_store_multiple_4" 1 ++ (and (eq_attr "pipeline_model" "n10") ++ (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "4"))) ++ "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, n10_ii+n10_ex+n10_mm+n10_wb, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") ++ ++(define_insn_reservation "nds_n10_store_multiple_5" 1 ++ (and (eq_attr "pipeline_model" "n10") ++ (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "5"))) ++ "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*2, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") ++ ++(define_insn_reservation "nds_n10_store_multiple_6" 1 ++ (and (eq_attr "pipeline_model" "n10") ++ (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "6"))) ++ "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*3, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") ++ ++(define_insn_reservation "nds_n10_store_multiple_7" 1 ++ (and (eq_attr "pipeline_model" "n10") ++ (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "7"))) ++ "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*4, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") ++ ++(define_insn_reservation "nds_n10_store_multiple_N" 1 ++ (and (eq_attr "pipeline_model" "n10") ++ (and (eq_attr "type" "store_multiple") ++ (match_test "get_attr_combo (insn) >= 8"))) ++ "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*5, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") ++ ++(define_insn_reservation "nds_n10_mul" 1 ++ (and (eq_attr "type" "mul") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_mac" 1 ++ (and (eq_attr "type" "mac") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_div" 1 ++ (and (eq_attr "type" "div") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex*34, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_branch" 1 ++ (and (eq_attr "type" "branch") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_dsp_alu" 1 ++ (and (eq_attr "type" "dalu") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_dsp_alu64" 1 ++ (and (eq_attr "type" "dalu64") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_dsp_alu_round" 1 ++ (and (eq_attr "type" "daluround") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_dsp_cmp" 1 ++ (and (eq_attr "type" "dcmp") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_dsp_clip" 1 ++ (and (eq_attr "type" "dclip") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_dsp_mul" 1 ++ (and (eq_attr "type" "dmul") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_dsp_mac" 1 ++ (and (eq_attr "type" "dmac") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_dsp_insb" 1 ++ (and (eq_attr "type" "dinsb") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_dsp_pack" 1 ++ (and (eq_attr "type" "dpack") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_dsp_bpick" 1 ++ (and (eq_attr "type" "dbpick") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_dsp_wext" 1 ++ (and (eq_attr "type" "dwext") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_fpu_alu" 4 ++ (and (eq_attr "type" "falu") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4") ++ ++(define_insn_reservation "nds_n10_fpu_muls" 4 ++ (and (eq_attr "type" "fmuls") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4") ++ ++(define_insn_reservation "nds_n10_fpu_muld" 4 ++ (and (eq_attr "type" "fmuld") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2*2, n10f_e3, n10f_e4") ++ ++(define_insn_reservation "nds_n10_fpu_macs" 4 ++ (and (eq_attr "type" "fmacs") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2*3, n10f_e3, n10f_e4") ++ ++(define_insn_reservation "nds_n10_fpu_macd" 4 ++ (and (eq_attr "type" "fmacd") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2*4, n10f_e3, n10f_e4") ++ ++(define_insn_reservation "nds_n10_fpu_divs" 4 ++ (and (ior (eq_attr "type" "fdivs") ++ (eq_attr "type" "fsqrts")) ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2*14, n10f_e3, n10f_e4") ++ ++(define_insn_reservation "nds_n10_fpu_divd" 4 ++ (and (ior (eq_attr "type" "fdivd") ++ (eq_attr "type" "fsqrtd")) ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2*28, n10f_e3, n10f_e4") ++ ++(define_insn_reservation "nds_n10_fpu_fast_alu" 2 ++ (and (ior (eq_attr "type" "fcmp") ++ (ior (eq_attr "type" "fabs") ++ (ior (eq_attr "type" "fcpy") ++ (eq_attr "type" "fcmov")))) ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4") ++ ++(define_insn_reservation "nds_n10_fpu_fmtsr" 4 ++ (and (eq_attr "type" "fmtsr") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4") ++ ++(define_insn_reservation "nds_n10_fpu_fmtdr" 4 ++ (and (eq_attr "type" "fmtdr") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ii+n10f_iq, n10f_iq+n10f_rf, n10f_rf+n10f_e1, n10f_e1+n10f_e2, n10f_e2+n10f_e3, n10f_e3+n10f_e4, n10f_e4") ++ ++(define_insn_reservation "nds_n10_fpu_fmfsr" 2 ++ (and (eq_attr "type" "fmfsr") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4") ++ ++(define_insn_reservation "nds_n10_fpu_fmfdr" 2 ++ (and (eq_attr "type" "fmfdr") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ii+n10f_iq, n10f_iq+n10f_rf, n10f_rf+n10f_e1, n10f_e1+n10f_e2, n10f_e2+n10f_e3, n10f_e3+n10f_e4, n10f_e4") ++ ++(define_insn_reservation "nds_n10_fpu_load" 3 ++ (and (eq_attr "type" "fload") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4") ++ ++(define_insn_reservation "nds_n10_fpu_store" 1 ++ (and (eq_attr "type" "fstore") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4") ++ ++;; ------------------------------------------------------------------------ ++;; Comment Notations and Bypass Rules ++;; ------------------------------------------------------------------------ ++;; Producers (LHS) ++;; LD ++;; Load data from the memory and produce the loaded data. The result is ++;; ready at MM. ++;; LMW(N, M) ++;; There are N micro-operations within an instruction that loads multiple ++;; words. The result produced by the M-th micro-operation is sent to ++;; consumers. The result is ready at MM. ++;; MUL, MAC ++;; Compute data in the multiply-adder and produce the data. The result ++;; is ready at MM. ++;; DIV ++;; Compute data in the divider and produce the data. The result is ready ++;; at MM. ++;; ++;; Consumers (RHS) ++;; ALU, MOVD44, PBSAD, PBSADA_RaRb, MUL, MAC, DIV, MMU ++;; Require operands at EX. ++;; ALU_SHIFT_Rb ++;; An ALU-SHIFT instruction consists of a shift micro-operation followed ++;; by an arithmetic micro-operation. The operand Rb is used by the first ++;; micro-operation, and there are some latencies if data dependency occurs. ++;; MAC_RaRb ++;; A MAC instruction does multiplication at EX and does accumulation at MM, ++;; so the operand Rt is required at MM, and operands Ra and Rb are required ++;; at EX. ++;; ADDR_IN ++;; If an instruction requires an address as its input operand, the address ++;; is required at EX. ++;; ST ++;; A store instruction requires its data at MM. ++;; SMW(N, M) ++;; There are N micro-operations within an instruction that stores multiple ++;; words. Each M-th micro-operation requires its data at MM. ++;; BR ++;; If a branch instruction is conditional, its input data is required at EX. ++ ++;; FPU_ADDR_OUT -> FPU_ADDR_IN ++;; Main pipeline rules don't need this because those default latency is 1. ++(define_bypass 1 ++ "nds_n10_fpu_load, nds_n10_fpu_store" ++ "nds_n10_fpu_load, nds_n10_fpu_store" ++ "nds32_n10_ex_to_ex_p" ++) ++ ++;; LD, MUL, MAC, DIV, DALU64, DMUL, DMAC, DALUROUND, DBPICK, DWEXT ++;; -> ALU, ALU_SHIFT_Rb, PBSAD, PBSADA_RaRb, MOVD44, MUL, MAC_RaRb, DIV, ADDR_IN, BR, MMU, ++;; DALU, DALUROUND, DMUL, DMAC_RaRb, DPACK, DINSB, DCMP, DCLIP, WEXT_O, BPICK_RaRb ++(define_bypass 2 ++ "nds_n10_load, nds_n10_mul, nds_n10_mac, nds_n10_div,\ ++ nds_n10_dsp_alu64, nds_n10_dsp_mul, nds_n10_dsp_mac,\ ++ nds_n10_dsp_alu_round, nds_n10_dsp_bpick, nds_n10_dsp_wext" ++ "nds_n10_alu, nds_n10_alu_shift,\ ++ nds_n10_pbsad, nds_n10_pbsada,\ ++ nds_n10_mul, nds_n10_mac, nds_n10_div,\ ++ nds_n10_branch,\ ++ nds_n10_load, nds_n10_store,\ ++ nds_n10_load_multiple_1, nds_n10_load_multiple_2, nds_n10_load_multiple_3,\ ++ nds_n10_load_multiple_4, nds_n10_load_multiple_5, nds_n10_load_multiple_6,\ ++ nds_n10_load_multiple_7, nds_n10_load_multiple_N,\ ++ nds_n10_store_multiple_1, nds_n10_store_multiple_2, nds_n10_store_multiple_3,\ ++ nds_n10_store_multiple_4, nds_n10_store_multiple_5, nds_n10_store_multiple_6,\ ++ nds_n10_store_multiple_7, nds_n10_store_multiple_N,\ ++ nds_n10_mmu,\ ++ nds_n10_dsp_alu, nds_n10_dsp_alu_round,\ ++ nds_n10_dsp_mul, nds_n10_dsp_mac, nds_n10_dsp_pack,\ ++ nds_n10_dsp_insb, nds_n10_dsp_cmp, nds_n10_dsp_clip,\ ++ nds_n10_dsp_wext, nds_n10_dsp_bpick" ++ "nds32_n10_mm_to_ex_p" ++) ++ ++;; LMW(N, N) ++;; -> ALU, ALU_SHIFT_Rb, PBSAD, PBSADA_RaRb, MOVD44, MUL, MAC_RaRb, DIV, ADDR_IN, BR, MMU ++;; DALU, DALUROUND, DMUL, DMAC_RaRb, DPACK, DINSB, DCMP, DCLIP, WEXT_O, BPICK_RaRb ++(define_bypass 2 ++ "nds_n10_load_multiple_1, nds_n10_load_multiple_2, nds_n10_load_multiple_3,\ ++ nds_n10_load_multiple_4, nds_n10_load_multiple_5, nds_n10_load_multiple_6,\ ++ nds_n10_load_multiple_7, nds_n10_load_multiple_N" ++ "nds_n10_alu, nds_n10_alu_shift,\ ++ nds_n10_pbsad, nds_n10_pbsada,\ ++ nds_n10_mul, nds_n10_mac, nds_n10_div,\ ++ nds_n10_branch,\ ++ nds_n10_load, nds_n10_store,\ ++ nds_n10_load_multiple_1, nds_n10_load_multiple_2, nds_n10_load_multiple_3,\ ++ nds_n10_load_multiple_4, nds_n10_load_multiple_5, nds_n10_load_multiple_6,\ ++ nds_n10_load_multiple_7, nds_n10_load_multiple_N,\ ++ nds_n10_store_multiple_1, nds_n10_store_multiple_2, nds_n10_store_multiple_3,\ ++ nds_n10_store_multiple_4, nds_n10_store_multiple_5, nds_n10_store_multiple_6,\ ++ nds_n10_store_multiple_7, nds_n10_store_multiple_N,\ ++ nds_n10_mmu,\ ++ nds_n10_dsp_alu, nds_n10_dsp_alu_round,\ ++ nds_n10_dsp_mul, nds_n10_dsp_mac, nds_n10_dsp_pack,\ ++ nds_n10_dsp_insb, nds_n10_dsp_cmp, nds_n10_dsp_clip,\ ++ nds_n10_dsp_wext, nds_n10_dsp_bpick" ++ "nds32_n10_last_load_to_ex_p" ++) +diff --git a/gcc/config/nds32/nds32-n13.md b/gcc/config/nds32/nds32-n13.md +new file mode 100644 +index 0000000..622480d +--- /dev/null ++++ b/gcc/config/nds32/nds32-n13.md +@@ -0,0 +1,401 @@ ++;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler ++;; Copyright (C) 2012-2016 Free Software Foundation, Inc. ++;; Contributed by Andes Technology Corporation. ++;; ++;; This file is part of GCC. ++;; ++;; GCC is free software; you can redistribute it and/or modify it ++;; under the terms of the GNU General Public License as published ++;; by the Free Software Foundation; either version 3, or (at your ++;; option) any later version. ++;; ++;; GCC is distributed in the hope that it will be useful, but WITHOUT ++;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++;; License for more details. ++;; ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; <http://www.gnu.org/licenses/>. ++ ++ ++;; ------------------------------------------------------------------------ ++;; Define N13 pipeline settings. ++;; ------------------------------------------------------------------------ ++ ++(define_automaton "nds32_n13_machine") ++ ++;; ------------------------------------------------------------------------ ++;; Pipeline Stages ++;; ------------------------------------------------------------------------ ++;; F1 - Instruction Fetch First ++;; Instruction Tag/Data Arrays ++;; ITLB Address Translation ++;; Branch Target Buffer Prediction ++;; F2 - Instruction Fetch Second ++;; Instruction Cache Hit Detection ++;; Cache Way Selection ++;; Inustruction Alignment ++;; I1 - Instruction Issue First / Instruction Decode ++;; Instruction Cache Replay Triggering ++;; 32/16-Bit Instruction Decode ++;; Return Address Stack Prediction ++;; I2 - Instruction Issue Second / Register File Access ++;; Instruction Issue Logic ++;; Register File Access ++;; E1 - Instruction Execute First / Address Generation / MAC First ++;; Data Access Address generation ++;; Multiply Operation ++;; E2 - Instruction Execute Second / Data Access First / MAC Second / ++;; ALU Execute ++;; Skewed ALU ++;; Branch/Jump/Return Resolution ++;; Data Tag/Data arrays ++;; DTLB address translation ++;; Accumulation Operation ++;; E3 - Instruction Execute Third / Data Access Second ++;; Data Cache Hit Detection ++;; Cache Way Selection ++;; Data Alignment ++;; E4 - Instruction Execute Fourth / Write Back ++;; Interruption Resolution ++;; Instruction Retire ++;; Register File Write Back ++ ++(define_cpu_unit "n13_i1" "nds32_n13_machine") ++(define_cpu_unit "n13_i2" "nds32_n13_machine") ++(define_cpu_unit "n13_e1" "nds32_n13_machine") ++(define_cpu_unit "n13_e2" "nds32_n13_machine") ++(define_cpu_unit "n13_e3" "nds32_n13_machine") ++(define_cpu_unit "n13_e4" "nds32_n13_machine") ++ ++(define_insn_reservation "nds_n13_unknown" 1 ++ (and (eq_attr "type" "unknown") ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4") ++ ++(define_insn_reservation "nds_n13_misc" 1 ++ (and (eq_attr "type" "misc") ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4") ++ ++(define_insn_reservation "nds_n13_mmu" 1 ++ (and (eq_attr "type" "mmu") ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4") ++ ++(define_insn_reservation "nds_n13_alu" 1 ++ (and (eq_attr "type" "alu") ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4") ++ ++(define_insn_reservation "nds_n13_alu_shift" 1 ++ (and (eq_attr "type" "alu_shift") ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i1+n13_i2, n13_i2+n13_e1, n13_e1+n13_e2, n13_e2+n13_e3, n13_e3+n13_e4, n13_e4") ++ ++(define_insn_reservation "nds_n13_pbsad" 1 ++ (and (eq_attr "type" "pbsad") ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i2, n13_e1, n13_e2*2, n13_e3, n13_e4") ++ ++(define_insn_reservation "nds_n13_pbsada" 1 ++ (and (eq_attr "type" "pbsada") ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i2, n13_e1, n13_e2*3, n13_e3, n13_e4") ++ ++(define_insn_reservation "nds_n13_load" 1 ++ (and (match_test "nds32::load_single_p (insn)") ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4") ++ ++(define_insn_reservation "nds_n13_store" 1 ++ (and (match_test "nds32::store_single_p (insn)") ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4") ++ ++(define_insn_reservation "nds_n13_load_multiple_1" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "1")) ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4") ++ ++(define_insn_reservation "nds_n13_load_multiple_2" 1 ++ (and (ior (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "2")) ++ (match_test "nds32::load_double_p (insn)")) ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i1+n13_i2, n13_i2+n13_e1, n13_e1+n13_e2, n13_e2+n13_e3, n13_e3+n13_e4, n13_e4") ++ ++(define_insn_reservation "nds_n13_load_multiple_3" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "3")) ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i2+n13_i2, n13_i1+n13_i2+n13_e1, n13_i2+n13_e1+n13_e2, n13_e1+n13_e2+n13_e3, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") ++ ++(define_insn_reservation "nds_n13_load_multiple_4" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "4")) ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i2+n13_e1+n13_e2+n13_e3, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") ++ ++(define_insn_reservation "nds_n13_load_multiple_5" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "5")) ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") ++ ++(define_insn_reservation "nds_n13_load_multiple_6" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "6")) ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") ++ ++(define_insn_reservation "nds_n13_load_multiple_7" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "7")) ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*2, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") ++ ++(define_insn_reservation "nds_n13_load_multiple_8" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "8")) ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*3, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") ++ ++(define_insn_reservation "nds_n13_load_multiple_12" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "12")) ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*7, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") ++ ++(define_insn_reservation "nds_n13_store_multiple_1" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "1")) ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4") ++ ++(define_insn_reservation "nds_n13_store_multiple_2" 1 ++ (and (ior (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "2")) ++ (match_test "nds32::store_double_p (insn)")) ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i1+n13_i2, n13_i2+n13_e1, n13_e1+n13_e2, n13_e2+n13_e3, n13_e3+n13_e4, n13_e4") ++ ++(define_insn_reservation "nds_n13_store_multiple_3" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "3")) ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i2+n13_i2, n13_i1+n13_i2+n13_e1, n13_i2+n13_e1+n13_e2, n13_e1+n13_e2+n13_e3, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") ++ ++(define_insn_reservation "nds_n13_store_multiple_4" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "4")) ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i2+n13_e1+n13_e2+n13_e3, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") ++ ++(define_insn_reservation "nds_n13_store_multiple_5" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "5")) ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") ++ ++(define_insn_reservation "nds_n13_store_multiple_6" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "6")) ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") ++ ++(define_insn_reservation "nds_n13_store_multiple_7" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "7")) ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*2, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") ++ ++(define_insn_reservation "nds_n13_store_multiple_8" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "8")) ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*3, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") ++ ++(define_insn_reservation "nds_n13_store_multiple_12" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "12")) ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*7, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") ++ ++;; The multiplier at E1 takes two cycles. ++(define_insn_reservation "nds_n13_mul" 1 ++ (and (eq_attr "type" "mul") ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i2, n13_e1*2, n13_e2, n13_e3, n13_e4") ++ ++(define_insn_reservation "nds_n13_mac" 1 ++ (and (eq_attr "type" "mac") ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i2, n13_e1*2, n13_e2, n13_e3, n13_e4") ++ ++;; The cycles consumed at E2 are 32 - CLZ(abs(Ra)) + 2, ++;; so the worst case is 34. ++(define_insn_reservation "nds_n13_div" 1 ++ (and (eq_attr "type" "div") ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i2, n13_e1, n13_e2*34, n13_e3, n13_e4") ++ ++(define_insn_reservation "nds_n13_branch" 1 ++ (and (eq_attr "type" "branch") ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4") ++ ++;; ------------------------------------------------------------------------ ++;; Comment Notations and Bypass Rules ++;; ------------------------------------------------------------------------ ++;; Producers (LHS) ++;; LD ++;; Load data from the memory and produce the loaded data. The result is ++;; ready at E3. ++;; LMW(N, M) ++;; There are N micro-operations within an instruction that loads multiple ++;; words. The result produced by the M-th micro-operation is sent to ++;; consumers. The result is ready at E3. ++;; ADDR_OUT ++;; Most load/store instructions can produce an address output if updating ++;; the base register is required. The result is ready at E2, which is ++;; produced by ALU. ++;; ALU, ALU_SHIFT, SIMD ++;; Compute data in ALU and produce the data. The result is ready at E2. ++;; MUL, MAC ++;; Compute data in the multiply-adder and produce the data. The result ++;; is ready at E2. ++;; DIV ++;; Compute data in the divider and produce the data. The result is ready ++;; at E2. ++;; BR ++;; Branch-with-link instructions produces a result containing the return ++;; address. The result is ready at E2. ++;; ++;; Consumers (RHS) ++;; ALU ++;; General ALU instructions require operands at E2. ++;; ALU_E1 ++;; Some special ALU instructions, such as BSE, BSP and MOVD44, require ++;; operand at E1. ++;; MUL, DIV, PBSAD, MMU ++;; Operands are required at E1. ++;; PBSADA_Rt, PBSADA_RaRb ++;; Operands Ra and Rb are required at E1, and the operand Rt is required ++;; at E2. ++;; ALU_SHIFT_Rb ++;; An ALU-SHIFT instruction consists of a shift micro-operation followed ++;; by an arithmetic micro-operation. The operand Rb is used by the first ++;; micro-operation, and there are some latencies if data dependency occurs. ++;; MAC_RaRb ++;; A MAC instruction does multiplication at E1 and does accumulation at E2, ++;; so the operand Rt is required at E2, and operands Ra and Rb are required ++;; at E1. ++;; ADDR_IN ++;; If an instruction requires an address as its input operand, the address ++;; is required at E1. ++;; ST ++;; A store instruction requires its data at E2. ++;; SMW(N, M) ++;; There are N micro-operations within an instruction that stores multiple ++;; words. Each M-th micro-operation requires its data at E2. ++;; BR ++;; If a branch instruction is conditional, its input data is required at E2. ++ ++;; LD -> ALU_E1, PBSAD, PBSADA_RaRb, MUL, MAC_RaRb, DIV, MMU, ADDR_IN ++(define_bypass 3 ++ "nds_n13_load" ++ "nds_n13_alu, nds_n13_pbsad, nds_n13_pbsada,\ ++ nds_n13_mul, nds_n13_mac, nds_n13_div,\ ++ nds_n13_mmu,\ ++ nds_n13_load, nds_n13_store,\ ++ nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\ ++ nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\ ++ nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12,\ ++ nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\ ++ nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\ ++ nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12" ++ "nds32_n13_load_to_e1_p" ++) ++ ++;; LD -> ALU, ALU_SHIFT_Rb, PBSADA_Rt, BR, ST, SMW(N, 1) ++(define_bypass 2 ++ "nds_n13_load" ++ "nds_n13_alu, nds_n13_alu_shift, nds_n13_pbsada, nds_n13_branch, nds_n13_store,\ ++ nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\ ++ nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\ ++ nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12" ++ "nds32_n13_load_to_e2_p" ++) ++ ++;; LMW(N, N) -> ALU_E1, PBSAD, PBSADA_RaRb, MUL, MAC_RaRb, DIV, MMU, ADDR_IN ++(define_bypass 3 ++ "nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\ ++ nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\ ++ nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12" ++ "nds_n13_alu, nds_n13_pbsad, nds_n13_pbsada,\ ++ nds_n13_mul, nds_n13_mac, nds_n13_div,\ ++ nds_n13_mmu,\ ++ nds_n13_load, nds_n13_store,\ ++ nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\ ++ nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\ ++ nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12,\ ++ nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\ ++ nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\ ++ nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12" ++ "nds32_n13_last_load_to_e1_p") ++ ++;; LMW(N, N) -> ALU, ALU_SHIFT_Rb, PBSADA_Rt, BR, ST, SMW(N, 1) ++(define_bypass 2 ++ "nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\ ++ nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\ ++ nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12" ++ "nds_n13_alu, nds_n13_alu_shift, nds_n13_pbsada, nds_n13_branch, nds_n13_store,\ ++ nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\ ++ nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\ ++ nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12" ++ "nds32_n13_last_load_to_e2_p" ++) ++ ++;; LMW(N, N - 1) -> ALU_E1, PBSAD, PBSADA_RaRb, MUL, MAC_RaRb, DIV, MMU, ADDR_IN ++(define_bypass 2 ++ "nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\ ++ nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\ ++ nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12" ++ "nds_n13_alu, nds_n13_pbsad, nds_n13_pbsada,\ ++ nds_n13_mul, nds_n13_mac, nds_n13_div,\ ++ nds_n13_mmu,\ ++ nds_n13_load, nds_n13_store,\ ++ nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\ ++ nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\ ++ nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12,\ ++ nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\ ++ nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\ ++ nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12" ++ "nds32_n13_last_two_load_to_e1_p") ++ ++;; ALU, ALU_SHIFT, SIMD, BR, MUL, MAC, DIV, ADDR_OUT ++;; -> ALU_E1, PBSAD, PBSADA_RaRb, MUL, MAC_RaRb, DIV, MMU, ADDR_IN ++(define_bypass 2 ++ "nds_n13_alu, nds_n13_alu_shift, nds_n13_pbsad, nds_n13_pbsada, nds_n13_branch,\ ++ nds_n13_mul, nds_n13_mac, nds_n13_div,\ ++ nds_n13_load, nds_n13_store,\ ++ nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\ ++ nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\ ++ nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12,\ ++ nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\ ++ nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\ ++ nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12" ++ "nds_n13_alu, nds_n13_pbsad, nds_n13_pbsada,\ ++ nds_n13_mul, nds_n13_mac, nds_n13_div,\ ++ nds_n13_mmu,\ ++ nds_n13_load, nds_n13_store,\ ++ nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\ ++ nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\ ++ nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12,\ ++ nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\ ++ nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\ ++ nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12" ++ "nds32_n13_e2_to_e1_p") +diff --git a/gcc/config/nds32/nds32-n7.md b/gcc/config/nds32/nds32-n7.md +new file mode 100644 +index 0000000..ff788ce +--- /dev/null ++++ b/gcc/config/nds32/nds32-n7.md +@@ -0,0 +1,298 @@ ++;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler ++;; Copyright (C) 2012-2016 Free Software Foundation, Inc. ++;; Contributed by Andes Technology Corporation. ++;; ++;; This file is part of GCC. ++;; ++;; GCC is free software; you can redistribute it and/or modify it ++;; under the terms of the GNU General Public License as published ++;; by the Free Software Foundation; either version 3, or (at your ++;; option) any later version. ++;; ++;; GCC is distributed in the hope that it will be useful, but WITHOUT ++;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++;; License for more details. ++;; ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; <http://www.gnu.org/licenses/>. ++ ++ ++;; ------------------------------------------------------------------------ ++;; Define N8 pipeline settings. ++;; ------------------------------------------------------------------------ ++ ++(define_automaton "nds32_n7_machine") ++ ++;; ------------------------------------------------------------------------ ++;; Pipeline Stages ++;; ------------------------------------------------------------------------ ++;; IF - Instruction Fetch ++;; Instruction Alignment ++;; Instruction Pre-decode ++;; II - Instruction Issue ++;; Instruction Decode ++;; Register File Access ++;; Instruction Execution ++;; Interrupt Handling ++;; EXD - Psuedo Stage ++;; Load Data Completion ++ ++(define_cpu_unit "n7_ii" "nds32_n7_machine") ++ ++(define_insn_reservation "nds_n7_unknown" 1 ++ (and (eq_attr "type" "unknown") ++ (eq_attr "pipeline_model" "n7")) ++ "n7_ii") ++ ++(define_insn_reservation "nds_n7_misc" 1 ++ (and (eq_attr "type" "misc") ++ (eq_attr "pipeline_model" "n7")) ++ "n7_ii") ++ ++(define_insn_reservation "nds_n7_alu" 1 ++ (and (eq_attr "type" "alu") ++ (eq_attr "pipeline_model" "n7")) ++ "n7_ii") ++ ++(define_insn_reservation "nds_n7_load" 1 ++ (and (match_test "nds32::load_single_p (insn)") ++ (eq_attr "pipeline_model" "n7")) ++ "n7_ii") ++ ++(define_insn_reservation "nds_n7_store" 1 ++ (and (match_test "nds32::store_single_p (insn)") ++ (eq_attr "pipeline_model" "n7")) ++ "n7_ii") ++ ++(define_insn_reservation "nds_n7_load_multiple_1" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "1")) ++ (eq_attr "pipeline_model" "n7")) ++ "n7_ii") ++ ++(define_insn_reservation "nds_n7_load_multiple_2" 1 ++ (and (ior (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "2")) ++ (match_test "nds32::load_double_p (insn)")) ++ (eq_attr "pipeline_model" "n7")) ++ "n7_ii*2") ++ ++(define_insn_reservation "nds_n7_load_multiple_3" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "3")) ++ (eq_attr "pipeline_model" "n7")) ++ "n7_ii*3") ++ ++(define_insn_reservation "nds_n7_load_multiple_4" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "4")) ++ (eq_attr "pipeline_model" "n7")) ++ "n7_ii*4") ++ ++(define_insn_reservation "nds_n7_load_multiple_5" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "5")) ++ (eq_attr "pipeline_model" "n7")) ++ "n7_ii*5") ++ ++(define_insn_reservation "nds_n7_load_multiple_6" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "6")) ++ (eq_attr "pipeline_model" "n7")) ++ "n7_ii*6") ++ ++(define_insn_reservation "nds_n7_load_multiple_7" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "7")) ++ (eq_attr "pipeline_model" "n7")) ++ "n7_ii*7") ++ ++(define_insn_reservation "nds_n7_load_multiple_8" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "8")) ++ (eq_attr "pipeline_model" "n7")) ++ "n7_ii*8") ++ ++(define_insn_reservation "nds_n7_load_multiple_12" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "12")) ++ (eq_attr "pipeline_model" "n7")) ++ "n7_ii*12") ++ ++(define_insn_reservation "nds_n7_store_multiple_1" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "1")) ++ (eq_attr "pipeline_model" "n7")) ++ "n7_ii") ++ ++(define_insn_reservation "nds_n7_store_multiple_2" 1 ++ (and (ior (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "2")) ++ (match_test "nds32::store_double_p (insn)")) ++ (eq_attr "pipeline_model" "n7")) ++ "n7_ii*2") ++ ++(define_insn_reservation "nds_n7_store_multiple_3" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "3")) ++ (eq_attr "pipeline_model" "n7")) ++ "n7_ii*3") ++ ++(define_insn_reservation "nds_n7_store_multiple_4" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "4")) ++ (eq_attr "pipeline_model" "n7")) ++ "n7_ii*4") ++ ++(define_insn_reservation "nds_n7_store_multiple_5" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "5")) ++ (eq_attr "pipeline_model" "n7")) ++ "n7_ii*5") ++ ++(define_insn_reservation "nds_n7_store_multiple_6" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "6")) ++ (eq_attr "pipeline_model" "n7")) ++ "n7_ii*6") ++ ++(define_insn_reservation "nds_n7_store_multiple_7" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "7")) ++ (eq_attr "pipeline_model" "n7")) ++ "n7_ii*7") ++ ++(define_insn_reservation "nds_n7_store_multiple_8" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "8")) ++ (eq_attr "pipeline_model" "n7")) ++ "n7_ii*8") ++ ++(define_insn_reservation "nds_n7_store_multiple_12" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "12")) ++ (eq_attr "pipeline_model" "n7")) ++ "n7_ii*12") ++ ++(define_insn_reservation "nds_n7_mul_fast" 1 ++ (and (match_test "nds32_mul_config != MUL_TYPE_SLOW") ++ (and (eq_attr "type" "mul") ++ (eq_attr "pipeline_model" "n7"))) ++ "n7_ii") ++ ++(define_insn_reservation "nds_n7_mul_slow" 1 ++ (and (match_test "nds32_mul_config == MUL_TYPE_SLOW") ++ (and (eq_attr "type" "mul") ++ (eq_attr "pipeline_model" "n7"))) ++ "n7_ii*17") ++ ++(define_insn_reservation "nds_n7_mac_fast" 1 ++ (and (match_test "nds32_mul_config != MUL_TYPE_SLOW") ++ (and (eq_attr "type" "mac") ++ (eq_attr "pipeline_model" "n7"))) ++ "n7_ii*2") ++ ++(define_insn_reservation "nds_n7_mac_slow" 1 ++ (and (match_test "nds32_mul_config == MUL_TYPE_SLOW") ++ (and (eq_attr "type" "mac") ++ (eq_attr "pipeline_model" "n7"))) ++ "n7_ii*18") ++ ++(define_insn_reservation "nds_n7_div" 1 ++ (and (eq_attr "type" "div") ++ (eq_attr "pipeline_model" "n7")) ++ "n7_ii*37") ++ ++(define_insn_reservation "nds_n7_branch" 1 ++ (and (eq_attr "type" "branch") ++ (eq_attr "pipeline_model" "n7")) ++ "n7_ii") ++ ++;; ------------------------------------------------------------------------ ++;; Comment Notations and Bypass Rules ++;; ------------------------------------------------------------------------ ++;; Producers (LHS) ++;; LD_!bi ++;; Load data from the memory (without updating the base register) and ++;; produce the loaded data. The result is ready at EXD. ++;; LMW(N, M) ++;; There are N micro-operations within an instruction that loads multiple ++;; words. The result produced by the M-th micro-operation is sent to ++;; consumers. The result is ready at EXD. If the base register should be ++;; updated, an extra micro-operation is inserted to the sequence, and the ++;; result is ready at II. ++;; ++;; Consumers (RHS) ++;; ALU, MUL, DIV ++;; Require operands at II. ++;; MOVD44_E ++;; A double-word move instruction needs two micro-operations because the ++;; reigster ports is 2R1W. The first micro-operation writes an even number ++;; register, and the second micro-operation writes an odd number register. ++;; Each input operand is required at II for each micro-operation. The letter ++;; 'E' stands for even. ++;; MAC_RaRb ++;; A MAC instruction is separated into two micro-operations. The first ++;; micro-operation does the multiplication, which requires operands Ra ++;; and Rb at II. The second micro-options does the accumulation, which ++;; requires the operand Rt at II. ++;; ADDR_IN_MOP(N) ++;; Because the reigster port is 2R1W, some load/store instructions are ++;; separated into many micro-operations. N denotes the address input is ++;; required by the N-th micro-operation. Such operand is required at II. ++;; ST_bi ++;; A post-increment store instruction requires its data at II. ++;; ST_!bi_RI ++;; A store instruction with an immediate offset requires its data at II. ++;; If the offset field is a register (ST_!bi_RR), the instruction will be ++;; separated into two micro-operations, and the second one requires the ++;; input operand at II in order to store it to the memory. ++;; SMW(N, M) ++;; There are N micro-operations within an instruction that stores multiple ++;; words. Each M-th micro-operation requires its data at II. If the base ++;; register should be updated, an extra micro-operation is inserted to the ++;; sequence. ++;; BR_COND ++;; If a branch instruction is conditional, its input data is required at II. ++ ++;; LD_!bi ++;; -> ALU, MOVD44_E, MUL, MAC_RaRb, DIV, BR, ADDR_IN_MOP(1), ST_bi, ST_!bi_RI, SMW(N, 1) ++(define_bypass 2 ++ "nds_n7_load" ++ "nds_n7_alu,\ ++ nds_n7_mul_fast, nds_n7_mul_slow,\ ++ nds_n7_mac_fast, nds_n7_mac_slow,\ ++ nds_n7_div,\ ++ nds_n7_branch,\ ++ nds_n7_load, nds_n7_store,\ ++ nds_n7_load_multiple_1,nds_n7_load_multiple_2, nds_n7_load_multiple_3,\ ++ nds_n7_load_multiple_4,nds_n7_load_multiple_5, nds_n7_load_multiple_6,\ ++ nds_n7_load_multiple_7,nds_n7_load_multiple_8, nds_n7_load_multiple_12,\ ++ nds_n7_store_multiple_1,nds_n7_store_multiple_2, nds_n7_store_multiple_3,\ ++ nds_n7_store_multiple_4,nds_n7_store_multiple_5, nds_n7_store_multiple_6,\ ++ nds_n7_store_multiple_7,nds_n7_store_multiple_8, nds_n7_store_multiple_12" ++ "nds32_n7_load_to_ii_p" ++) ++ ++;; LMW(N, N) ++;; -> ALU, MOVD44_E, MUL, MAC_RaRb, DIV, BR, AADR_IN_MOP(1), ST_bi, ST_!bi_RI, SMW(N, 1) ++(define_bypass 2 ++ "nds_n7_load_multiple_1,nds_n7_load_multiple_2, nds_n7_load_multiple_3,\ ++ nds_n7_load_multiple_4,nds_n7_load_multiple_5, nds_n7_load_multiple_6,\ ++ nds_n7_load_multiple_7,nds_n7_load_multiple_8, nds_n7_load_multiple_12" ++ "nds_n7_alu,\ ++ nds_n7_mul_fast, nds_n7_mul_slow,\ ++ nds_n7_mac_fast, nds_n7_mac_slow,\ ++ nds_n7_div,\ ++ nds_n7_branch,\ ++ nds_n7_load, nds_n7_store,\ ++ nds_n7_load_multiple_1,nds_n7_load_multiple_2, nds_n7_load_multiple_3,\ ++ nds_n7_load_multiple_4,nds_n7_load_multiple_5, nds_n7_load_multiple_6,\ ++ nds_n7_load_multiple_7,nds_n7_load_multiple_8, nds_n7_load_multiple_12,\ ++ nds_n7_store_multiple_1,nds_n7_store_multiple_2, nds_n7_store_multiple_3,\ ++ nds_n7_store_multiple_4,nds_n7_store_multiple_5, nds_n7_store_multiple_6,\ ++ nds_n7_store_multiple_7,nds_n7_store_multiple_8, nds_n7_store_multiple_12" ++ "nds32_n7_last_load_to_ii_p" ++) +diff --git a/gcc/config/nds32/nds32-n8.md b/gcc/config/nds32/nds32-n8.md +new file mode 100644 +index 0000000..c3db9cd +--- /dev/null ++++ b/gcc/config/nds32/nds32-n8.md +@@ -0,0 +1,389 @@ ++;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler ++;; Copyright (C) 2012-2016 Free Software Foundation, Inc. ++;; Contributed by Andes Technology Corporation. ++;; ++;; This file is part of GCC. ++;; ++;; GCC is free software; you can redistribute it and/or modify it ++;; under the terms of the GNU General Public License as published ++;; by the Free Software Foundation; either version 3, or (at your ++;; option) any later version. ++;; ++;; GCC is distributed in the hope that it will be useful, but WITHOUT ++;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++;; License for more details. ++;; ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; <http://www.gnu.org/licenses/>. ++ ++ ++;; ------------------------------------------------------------------------ ++;; Define N8 pipeline settings. ++;; ------------------------------------------------------------------------ ++ ++(define_automaton "nds32_n8_machine") ++ ++;; ------------------------------------------------------------------------ ++;; Pipeline Stages ++;; ------------------------------------------------------------------------ ++;; IF - Instruction Fetch ++;; II - Instruction Issue / Address Generation ++;; EX - Instruction Execution ++;; EXD - Psuedo Stage / Load Data Completion ++ ++(define_cpu_unit "n8_ii" "nds32_n8_machine") ++(define_cpu_unit "n8_ex" "nds32_n8_machine") ++ ++(define_insn_reservation "nds_n8_unknown" 1 ++ (and (eq_attr "type" "unknown") ++ (eq_attr "pipeline_model" "n8")) ++ "n8_ii, n8_ex") ++ ++(define_insn_reservation "nds_n8_misc" 1 ++ (and (eq_attr "type" "misc") ++ (eq_attr "pipeline_model" "n8")) ++ "n8_ii, n8_ex") ++ ++(define_insn_reservation "nds_n8_alu" 1 ++ (and (eq_attr "type" "alu") ++ (eq_attr "pipeline_model" "n8")) ++ "n8_ii, n8_ex") ++ ++(define_insn_reservation "nds_n8_load" 1 ++ (and (match_test "nds32::load_single_p (insn)") ++ (eq_attr "pipeline_model" "n8")) ++ "n8_ii, n8_ex") ++ ++(define_insn_reservation "nds_n8_store" 1 ++ (and (match_test "nds32::store_single_p (insn)") ++ (eq_attr "pipeline_model" "n8")) ++ "n8_ii, n8_ex") ++ ++(define_insn_reservation "nds_n8_load_multiple_1" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "1")) ++ (eq_attr "pipeline_model" "n8")) ++ "n8_ii, n8_ex") ++ ++(define_insn_reservation "nds_n8_load_multiple_2" 1 ++ (and (ior (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "2")) ++ (match_test "nds32::load_double_p (insn)")) ++ (eq_attr "pipeline_model" "n8")) ++ "n8_ii, n8_ii+n8_ex, n8_ex") ++ ++(define_insn_reservation "nds_n8_load_multiple_3" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "3")) ++ (eq_attr "pipeline_model" "n8")) ++ "n8_ii, (n8_ii+n8_ex)*2, n8_ex") ++ ++(define_insn_reservation "nds_n8_load_multiple_4" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "4")) ++ (eq_attr "pipeline_model" "n8")) ++ "n8_ii, (n8_ii+n8_ex)*3, n8_ex") ++ ++(define_insn_reservation "nds_n8_load_multiple_5" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "5")) ++ (eq_attr "pipeline_model" "n8")) ++ "n8_ii, (n8_ii+n8_ex)*4, n8_ex") ++ ++(define_insn_reservation "nds_n8_load_multiple_6" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "6")) ++ (eq_attr "pipeline_model" "n8")) ++ "n8_ii, (n8_ii+n8_ex)*5, n8_ex") ++ ++(define_insn_reservation "nds_n8_load_multiple_7" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "7")) ++ (eq_attr "pipeline_model" "n8")) ++ "n8_ii, (n8_ii+n8_ex)*6, n8_ex") ++ ++(define_insn_reservation "nds_n8_load_multiple_8" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "8")) ++ (eq_attr "pipeline_model" "n8")) ++ "n8_ii, (n8_ii+n8_ex)*7, n8_ex") ++ ++(define_insn_reservation "nds_n8_load_multiple_12" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "12")) ++ (eq_attr "pipeline_model" "n8")) ++ "n8_ii, (n8_ii+n8_ex)*11, n8_ex") ++ ++(define_insn_reservation "nds_n8_store_multiple_1" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "1")) ++ (eq_attr "pipeline_model" "n8")) ++ "n8_ii, n8_ex") ++ ++(define_insn_reservation "nds_n8_store_multiple_2" 1 ++ (and (ior (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "2")) ++ (match_test "nds32::store_double_p (insn)")) ++ (eq_attr "pipeline_model" "n8")) ++ "n8_ii, n8_ii+n8_ex, n8_ex") ++ ++(define_insn_reservation "nds_n8_store_multiple_3" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "3")) ++ (eq_attr "pipeline_model" "n8")) ++ "n8_ii, (n8_ii+n8_ex)*2, n8_ex") ++ ++(define_insn_reservation "nds_n8_store_multiple_4" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "4")) ++ (eq_attr "pipeline_model" "n8")) ++ "n8_ii, (n8_ii+n8_ex)*3, n8_ex") ++ ++(define_insn_reservation "nds_n8_store_multiple_5" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "5")) ++ (eq_attr "pipeline_model" "n8")) ++ "n8_ii, (n8_ii+n8_ex)*4, n8_ex") ++ ++(define_insn_reservation "nds_n8_store_multiple_6" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "6")) ++ (eq_attr "pipeline_model" "n8")) ++ "n8_ii, (n8_ii+n8_ex)*5, n8_ex") ++ ++(define_insn_reservation "nds_n8_store_multiple_7" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "7")) ++ (eq_attr "pipeline_model" "n8")) ++ "n8_ii, (n8_ii+n8_ex)*6, n8_ex") ++ ++(define_insn_reservation "nds_n8_store_multiple_8" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "8")) ++ (eq_attr "pipeline_model" "n8")) ++ "n8_ii, (n8_ii+n8_ex)*7, n8_ex") ++ ++(define_insn_reservation "nds_n8_store_multiple_12" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "12")) ++ (eq_attr "pipeline_model" "n8")) ++ "n8_ii, (n8_ii+n8_ex)*11, n8_ex") ++ ++(define_insn_reservation "nds_n8_mul_fast" 1 ++ (and (match_test "nds32_mul_config != MUL_TYPE_SLOW") ++ (and (eq_attr "type" "mul") ++ (eq_attr "pipeline_model" "n8"))) ++ "n8_ii, n8_ex") ++ ++(define_insn_reservation "nds_n8_mul_slow" 1 ++ (and (match_test "nds32_mul_config == MUL_TYPE_SLOW") ++ (and (eq_attr "type" "mul") ++ (eq_attr "pipeline_model" "n8"))) ++ "n8_ii, n8_ex*16") ++ ++(define_insn_reservation "nds_n8_mac_fast" 1 ++ (and (match_test "nds32_mul_config != MUL_TYPE_SLOW") ++ (and (eq_attr "type" "mac") ++ (eq_attr "pipeline_model" "n8"))) ++ "n8_ii, n8_ii+n8_ex, n8_ex") ++ ++(define_insn_reservation "nds_n8_mac_slow" 1 ++ (and (match_test "nds32_mul_config == MUL_TYPE_SLOW") ++ (and (eq_attr "type" "mac") ++ (eq_attr "pipeline_model" "n8"))) ++ "n8_ii, (n8_ii+n8_ex)*16, n8_ex") ++ ++(define_insn_reservation "nds_n8_div" 1 ++ (and (eq_attr "type" "div") ++ (eq_attr "pipeline_model" "n8")) ++ "n8_ii, (n8_ii+n8_ex)*36, n8_ex") ++ ++(define_insn_reservation "nds_n8_branch" 1 ++ (and (eq_attr "type" "branch") ++ (eq_attr "pipeline_model" "n8")) ++ "n8_ii, n8_ex") ++ ++;; ------------------------------------------------------------------------ ++;; Comment Notations and Bypass Rules ++;; ------------------------------------------------------------------------ ++;; Producers (LHS) ++;; LD_!bi ++;; Load data from the memory (without updating the base register) and ++;; produce the loaded data. The result is ready at EXD. ++;; LD_bi ++;; Load data from the memory (with updating the base register) and ++;; produce the loaded data. The result is ready at EXD. Because the ++;; register port is 2R1W, two micro-operations are required in order ++;; to write two registers. The base register is updated by the second ++;; micro-operation and the result is ready at EX. ++;; LMW(N, M) ++;; There are N micro-operations within an instruction that loads multiple ++;; words. The result produced by the M-th micro-operation is sent to ++;; consumers. The result is ready at EXD. If the base register should be ++;; updated, an extra micro-operation is inserted to the sequence, and the ++;; result is ready at EX. ++;; ADDR_OUT ++;; Most load/store instructions can produce an address output if updating ++;; the base register is required. The result is ready at EX, which is ++;; produced by ALU. ++;; ALU, MUL, MAC ++;; The result is ready at EX. ++;; MOVD44_O ++;; A double-word move instruction needs to write registers twice. Because ++;; the register port is 2R1W, two micro-operations are required. The even ++;; number reigster is updated by the first one, and the odd number register ++;; is updated by the second one. Each of the results is ready at EX. ++;; The letter 'O' stands for odd. ++;; DIV_Rs ++;; A division instruction saves the quotient result to Rt and saves the ++;; remainder result to Rs. It requires two micro-operations because the ++;; register port is 2R1W. The first micro-operation writes to Rt, and ++;; the seconde one writes to Rs. Each of the results is ready at EX. ++;; ++;; Consumers (RHS) ++;; ALU, MUL, DIV ++;; Require operands at EX. ++;; MOVD44_E ++;; The letter 'E' stands for even, which is accessed by the first micro- ++;; operation and a movd44 instruction. The operand is required at EX. ++;; MAC_RaRb ++;; A MAC instruction is separated into two micro-operations. The first ++;; micro-operation does the multiplication, which requires operands Ra ++;; and Rb at EX. The second micro-options does the accumulation, which ++;; requires the operand Rt at EX. ++;; ADDR_IN_MOP(N) ++;; Because the reigster port is 2R1W, some load/store instructions are ++;; separated into many micro-operations. N denotes the address input is ++;; required by the N-th micro-operation. Such operand is required at II. ++;; ST_bi ++;; A post-increment store instruction requires its data at EX. ++;; ST_!bi_RI ++;; A store instruction with an immediate offset requires its data at EX. ++;; If the offset field is a register (ST_!bi_RR), the instruction will be ++;; separated into two micro-operations, and the second one requires the ++;; input operand at EX in order to store it to the memory. ++;; SMW(N, M) ++;; There are N micro-operations within an instruction that stores multiple ++;; words. Each M-th micro-operation requires its data at EX. If the base ++;; register should be updated, an extra micro-operation is inserted to the ++;; sequence. ++;; BR_COND ++;; If a branch instruction is conditional, its input data is required at EX. ++ ++;; LD_!bi -> ADDR_IN_MOP(1) ++(define_bypass 3 ++ "nds_n8_load" ++ "nds_n8_branch,\ ++ nds_n8_load, nds_n8_store,\ ++ nds_n8_load_multiple_1,nds_n8_load_multiple_2, nds_n8_load_multiple_3,\ ++ nds_n8_load_multiple_4,nds_n8_load_multiple_5, nds_n8_load_multiple_6,\ ++ nds_n8_load_multiple_7,nds_n8_load_multiple_8, nds_n8_load_multiple_12,\ ++ nds_n8_store_multiple_1,nds_n8_store_multiple_2, nds_n8_store_multiple_3,\ ++ nds_n8_store_multiple_4,nds_n8_store_multiple_5, nds_n8_store_multiple_6,\ ++ nds_n8_store_multiple_7,nds_n8_store_multiple_8, nds_n8_store_multiple_12" ++ "nds32_n8_load_to_ii_p" ++) ++ ++;; LMW(N, N) -> ADDR_IN_MOP(1) ++(define_bypass 3 ++ "nds_n8_load_multiple_1,nds_n8_load_multiple_2, nds_n8_load_multiple_3,\ ++ nds_n8_load_multiple_4,nds_n8_load_multiple_5, nds_n8_load_multiple_6,\ ++ nds_n8_load_multiple_7,nds_n8_load_multiple_8, nds_n8_load_multiple_12" ++ "nds_n8_branch,\ ++ nds_n8_load, nds_n8_store,\ ++ nds_n8_load_multiple_1,nds_n8_load_multiple_2, nds_n8_load_multiple_3,\ ++ nds_n8_load_multiple_4,nds_n8_load_multiple_5, nds_n8_load_multiple_6,\ ++ nds_n8_load_multiple_7,nds_n8_load_multiple_8, nds_n8_load_multiple_12,\ ++ nds_n8_store_multiple_1,nds_n8_store_multiple_2, nds_n8_store_multiple_3,\ ++ nds_n8_store_multiple_4,nds_n8_store_multiple_5, nds_n8_store_multiple_6,\ ++ nds_n8_store_multiple_7,nds_n8_store_multiple_8, nds_n8_store_multiple_12" ++ "nds32_n8_last_load_to_ii_p" ++) ++ ++;; LMW(N, N - 1) -> ADDR_IN_MOP(1) ++(define_bypass 2 ++ "nds_n8_load_multiple_1,nds_n8_load_multiple_2, nds_n8_load_multiple_3,\ ++ nds_n8_load_multiple_4,nds_n8_load_multiple_5, nds_n8_load_multiple_6,\ ++ nds_n8_load_multiple_7,nds_n8_load_multiple_8, nds_n8_load_multiple_12" ++ "nds_n8_branch,\ ++ nds_n8_load, nds_n8_store,\ ++ nds_n8_load_multiple_1,nds_n8_load_multiple_2, nds_n8_load_multiple_3,\ ++ nds_n8_load_multiple_4,nds_n8_load_multiple_5, nds_n8_load_multiple_6,\ ++ nds_n8_load_multiple_7,nds_n8_load_multiple_8, nds_n8_load_multiple_12,\ ++ nds_n8_store_multiple_1,nds_n8_store_multiple_2, nds_n8_store_multiple_3,\ ++ nds_n8_store_multiple_4,nds_n8_store_multiple_5, nds_n8_store_multiple_6,\ ++ nds_n8_store_multiple_7,nds_n8_store_multiple_8, nds_n8_store_multiple_12" ++ "nds32_n8_last_load_two_to_ii_p" ++) ++ ++;; LD_bi -> ADDR_IN_MOP(1) ++(define_bypass 2 ++ "nds_n8_load" ++ "nds_n8_branch,\ ++ nds_n8_load, nds_n8_store,\ ++ nds_n8_load_multiple_1,nds_n8_load_multiple_2, nds_n8_load_multiple_3,\ ++ nds_n8_load_multiple_4,nds_n8_load_multiple_5, nds_n8_load_multiple_6,\ ++ nds_n8_load_multiple_7,nds_n8_load_multiple_8, nds_n8_load_multiple_12,\ ++ nds_n8_store_multiple_1,nds_n8_store_multiple_2, nds_n8_store_multiple_3,\ ++ nds_n8_store_multiple_4,nds_n8_store_multiple_5, nds_n8_store_multiple_6,\ ++ nds_n8_store_multiple_7,nds_n8_store_multiple_8, nds_n8_store_multiple_12" ++ "nds32_n8_load_bi_to_ii_p" ++) ++ ++;; LD_!bi -> ALU, MOVD44_E, MUL, MAC_RaRb, DIV, BR_COND, ST_bi, ST_!bi_RI, SMW(N, 1) ++(define_bypass 2 ++ "nds_n8_load" ++ "nds_n8_alu, ++ nds_n8_mul_fast, nds_n8_mul_slow,\ ++ nds_n8_mac_fast, nds_n8_mac_slow,\ ++ nds_n8_div,\ ++ nds_n8_branch,\ ++ nds_n8_store,\ ++ nds_n8_store_multiple_1,nds_n8_store_multiple_2, nds_n8_store_multiple_3,\ ++ nds_n8_store_multiple_4,nds_n8_store_multiple_5, nds_n8_store_multiple_6,\ ++ nds_n8_store_multiple_7,nds_n8_store_multiple_8, nds_n8_store_multiple_12" ++ "nds32_n8_load_to_ex_p" ++) ++ ++;; ALU, MOVD44_O, MUL, MAC, DIV_Rs, LD_bi, ADDR_OUT -> ADDR_IN_MOP(1) ++(define_bypass 2 ++ "nds_n8_alu, ++ nds_n8_mul_fast, nds_n8_mul_slow,\ ++ nds_n8_mac_fast, nds_n8_mac_slow,\ ++ nds_n8_div,\ ++ nds_n8_load, nds_n8_store,\ ++ nds_n8_load_multiple_1,nds_n8_load_multiple_2, nds_n8_load_multiple_3,\ ++ nds_n8_load_multiple_4,nds_n8_load_multiple_5, nds_n8_load_multiple_6,\ ++ nds_n8_load_multiple_7,nds_n8_load_multiple_8, nds_n8_load_multiple_12,\ ++ nds_n8_store_multiple_1,nds_n8_store_multiple_2, nds_n8_store_multiple_3,\ ++ nds_n8_store_multiple_4,nds_n8_store_multiple_5, nds_n8_store_multiple_6,\ ++ nds_n8_store_multiple_7,nds_n8_store_multiple_8, nds_n8_store_multiple_12" ++ "nds_n8_branch,\ ++ nds_n8_load, nds_n8_store,\ ++ nds_n8_load_multiple_1,nds_n8_load_multiple_2, nds_n8_load_multiple_3,\ ++ nds_n8_load_multiple_4,nds_n8_load_multiple_5, nds_n8_load_multiple_6,\ ++ nds_n8_load_multiple_7,nds_n8_load_multiple_8, nds_n8_load_multiple_12,\ ++ nds_n8_store_multiple_1,nds_n8_store_multiple_2, nds_n8_store_multiple_3,\ ++ nds_n8_store_multiple_4,nds_n8_store_multiple_5, nds_n8_store_multiple_6,\ ++ nds_n8_store_multiple_7,nds_n8_store_multiple_8, nds_n8_store_multiple_12" ++ "nds32_n8_ex_to_ii_p" ++) ++ ++;; LMW(N, N) -> ALU, MOVD44_E, MUL, MAC_RaRb, DIV, BR_COND, ST_bi, ST_!bi_RI, SMW(N, 1) ++(define_bypass 2 ++ "nds_n8_load_multiple_1,nds_n8_load_multiple_2, nds_n8_load_multiple_3,\ ++ nds_n8_load_multiple_4,nds_n8_load_multiple_5, nds_n8_load_multiple_6,\ ++ nds_n8_load_multiple_7,nds_n8_load_multiple_8, nds_n8_load_multiple_12" ++ "nds_n8_alu, ++ nds_n8_mul_fast, nds_n8_mul_slow,\ ++ nds_n8_mac_fast, nds_n8_mac_slow,\ ++ nds_n8_div,\ ++ nds_n8_branch,\ ++ nds_n8_store,\ ++ nds_n8_store_multiple_1,nds_n8_store_multiple_2, nds_n8_store_multiple_3,\ ++ nds_n8_store_multiple_4,nds_n8_store_multiple_5, nds_n8_store_multiple_6,\ ++ nds_n8_store_multiple_7,nds_n8_store_multiple_8, nds_n8_store_multiple_12" ++ "nds32_n8_last_load_to_ex_p" ++) +diff --git a/gcc/config/nds32/nds32-n9-2r1w.md b/gcc/config/nds32/nds32-n9-2r1w.md +new file mode 100644 +index 0000000..d0db953 +--- /dev/null ++++ b/gcc/config/nds32/nds32-n9-2r1w.md +@@ -0,0 +1,362 @@ ++;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler ++;; Copyright (C) 2012-2016 Free Software Foundation, Inc. ++;; Contributed by Andes Technology Corporation. ++;; ++;; This file is part of GCC. ++;; ++;; GCC is free software; you can redistribute it and/or modify it ++;; under the terms of the GNU General Public License as published ++;; by the Free Software Foundation; either version 3, or (at your ++;; option) any later version. ++;; ++;; GCC is distributed in the hope that it will be useful, but WITHOUT ++;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++;; License for more details. ++;; ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; <http://www.gnu.org/licenses/>. ++ ++ ++;; ------------------------------------------------------------------------ ++;; Define N9 2R1W pipeline settings. ++;; ------------------------------------------------------------------------ ++ ++(define_automaton "nds32_n9_2r1w_machine") ++ ++;; ------------------------------------------------------------------------ ++;; Pipeline Stages ++;; ------------------------------------------------------------------------ ++;; IF - Instruction Fetch ++;; II - Instruction Issue / Instruction Decode ++;; EX - Instruction Execution ++;; MM - Memory Execution ++;; WB - Instruction Retire / Result Write-Back ++ ++(define_cpu_unit "n9_2r1w_ii" "nds32_n9_2r1w_machine") ++(define_cpu_unit "n9_2r1w_ex" "nds32_n9_2r1w_machine") ++(define_cpu_unit "n9_2r1w_mm" "nds32_n9_2r1w_machine") ++(define_cpu_unit "n9_2r1w_wb" "nds32_n9_2r1w_machine") ++ ++(define_insn_reservation "nds_n9_2r1w_unknown" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") ++ (and (eq_attr "type" "unknown") ++ (eq_attr "pipeline_model" "n9"))) ++ "n9_2r1w_ii, n9_2r1w_ex, n9_2r1w_mm, n9_2r1w_wb") ++ ++(define_insn_reservation "nds_n9_2r1w_misc" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") ++ (and (eq_attr "type" "misc") ++ (eq_attr "pipeline_model" "n9"))) ++ "n9_2r1w_ii, n9_2r1w_ex, n9_2r1w_mm, n9_2r1w_wb") ++ ++(define_insn_reservation "nds_n9_2r1w_mmu" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") ++ (and (eq_attr "type" "mmu") ++ (eq_attr "pipeline_model" "n9"))) ++ "n9_2r1w_ii, n9_2r1w_ex, n9_2r1w_mm, n9_2r1w_wb") ++ ++(define_insn_reservation "nds_n9_2r1w_alu" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") ++ (and (eq_attr "type" "alu") ++ (eq_attr "pipeline_model" "n9"))) ++ "n9_2r1w_ii, n9_2r1w_ex, n9_2r1w_mm, n9_2r1w_wb") ++ ++(define_insn_reservation "nds_n9_2r1w_alu_shift" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") ++ (and (eq_attr "type" "alu_shift") ++ (eq_attr "pipeline_model" "n9"))) ++ "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ex+n9_2r1w_mm, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb") ++ ++(define_insn_reservation "nds_n9_2r1w_pbsad" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") ++ (and (eq_attr "type" "pbsad") ++ (eq_attr "pipeline_model" "n9"))) ++ "n9_2r1w_ii, n9_2r1w_ex*3, n9_2r1w_mm, n9_2r1w_wb") ++ ++(define_insn_reservation "nds_n9_2r1w_pbsada" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") ++ (and (eq_attr "type" "pbsada") ++ (eq_attr "pipeline_model" "n9"))) ++ "n9_2r1w_ii, n9_2r1w_ex*3, n9_2r1w_mm, n9_2r1w_wb") ++ ++(define_insn_reservation "nds_n9_2r1w_load" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") ++ (and (match_test "nds32::load_single_p (insn)") ++ (eq_attr "pipeline_model" "n9"))) ++ "n9_2r1w_ii, n9_2r1w_ex, n9_2r1w_mm, n9_2r1w_wb") ++ ++(define_insn_reservation "nds_n9_2r1w_store" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") ++ (and (match_test "nds32::store_single_p (insn)") ++ (eq_attr "pipeline_model" "n9"))) ++ "n9_2r1w_ii, n9_2r1w_ex, n9_2r1w_mm, n9_2r1w_wb") ++ ++(define_insn_reservation "nds_n9_2r1w_load_multiple_1" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") ++ (and (eq_attr "pipeline_model" "n9") ++ (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "1")))) ++ "n9_2r1w_ii, n9_2r1w_ex, n9_2r1w_mm, n9_2r1w_wb") ++ ++(define_insn_reservation "nds_n9_2r1w_load_multiple_2" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") ++ (and (eq_attr "pipeline_model" "n9") ++ (ior (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "2")) ++ (match_test "nds32::load_double_p (insn)")))) ++ "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ex+n9_2r1w_mm, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb") ++ ++(define_insn_reservation "nds_n9_2r1w_load_multiple_3" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") ++ (and (eq_attr "pipeline_model" "n9") ++ (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "3")))) ++ "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb") ++ ++(define_insn_reservation "nds_n9_2r1w_load_multiple_4" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") ++ (and (eq_attr "pipeline_model" "n9") ++ (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "4")))) ++ "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb") ++ ++(define_insn_reservation "nds_n9_2r1w_load_multiple_5" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") ++ (and (eq_attr "pipeline_model" "n9") ++ (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "5")))) ++ "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, (n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb)*2, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb") ++ ++(define_insn_reservation "nds_n9_2r1w_load_multiple_6" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") ++ (and (eq_attr "pipeline_model" "n9") ++ (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "6")))) ++ "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, (n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb)*3, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb") ++ ++(define_insn_reservation "nds_n9_2r1w_load_multiple_7" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") ++ (and (eq_attr "pipeline_model" "n9") ++ (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "7")))) ++ "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, (n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb)*4, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb") ++ ++(define_insn_reservation "nds_n9_2r1w_load_multiple_8" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") ++ (and (eq_attr "pipeline_model" "n9") ++ (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "8")))) ++ "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, (n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb)*5, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb") ++ ++(define_insn_reservation "nds_n9_2r1w_load_multiple_12" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") ++ (and (eq_attr "pipeline_model" "n9") ++ (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "12")))) ++ "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, (n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb)*9, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb") ++ ++(define_insn_reservation "nds_n9_2r1w_store_multiple_1" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") ++ (and (eq_attr "pipeline_model" "n9") ++ (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "1")))) ++ "n9_2r1w_ii, n9_2r1w_ex, n9_2r1w_mm, n9_2r1w_wb") ++ ++(define_insn_reservation "nds_n9_2r1w_store_multiple_2" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") ++ (and (eq_attr "pipeline_model" "n9") ++ (ior (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "2")) ++ (match_test "nds32::store_double_p (insn)")))) ++ "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ex+n9_2r1w_mm, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb") ++ ++(define_insn_reservation "nds_n9_2r1w_store_multiple_3" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") ++ (and (eq_attr "pipeline_model" "n9") ++ (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "3")))) ++ "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb") ++ ++(define_insn_reservation "nds_n9_2r1w_store_multiple_4" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") ++ (and (eq_attr "pipeline_model" "n9") ++ (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "4")))) ++ "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb") ++ ++(define_insn_reservation "nds_n9_2r1w_store_multiple_5" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") ++ (and (eq_attr "pipeline_model" "n9") ++ (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "5")))) ++ "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, (n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb)*2, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb") ++ ++(define_insn_reservation "nds_n9_2r1w_store_multiple_6" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") ++ (and (eq_attr "pipeline_model" "n9") ++ (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "6")))) ++ "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, (n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb)*3, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb") ++ ++(define_insn_reservation "nds_n9_2r1w_store_multiple_7" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") ++ (and (eq_attr "pipeline_model" "n9") ++ (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "7")))) ++ "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, (n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb)*4, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb") ++ ++(define_insn_reservation "nds_n9_2r1w_store_multiple_8" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") ++ (and (eq_attr "pipeline_model" "n9") ++ (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "8")))) ++ "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, (n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb)*5, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb") ++ ++(define_insn_reservation "nds_n9_2r1w_store_multiple_12" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") ++ (and (eq_attr "pipeline_model" "n9") ++ (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "12")))) ++ "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, (n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb)*9, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb") ++ ++(define_insn_reservation "nds_n9_2r1w_mul_fast" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_2R1W && nds32_mul_config != MUL_TYPE_SLOW") ++ (and (eq_attr "type" "mul") ++ (eq_attr "pipeline_model" "n9"))) ++ "n9_2r1w_ii, n9_2r1w_ex, n9_2r1w_mm, n9_2r1w_wb") ++ ++(define_insn_reservation "nds_n9_2r1w_mul_slow" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_2R1W && nds32_mul_config == MUL_TYPE_SLOW") ++ (and (eq_attr "type" "mul") ++ (eq_attr "pipeline_model" "n9"))) ++ "n9_2r1w_ii, n9_2r1w_ex*17, n9_2r1w_mm, n9_2r1w_wb") ++ ++(define_insn_reservation "nds_n9_2r1w_mac_fast" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_2R1W && nds32_mul_config != MUL_TYPE_SLOW") ++ (and (eq_attr "type" "mac") ++ (eq_attr "pipeline_model" "n9"))) ++ "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ex+n9_2r1w_mm, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb") ++ ++(define_insn_reservation "nds_n9_2r1w_mac_slow" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_2R1W && nds32_mul_config == MUL_TYPE_SLOW") ++ (and (eq_attr "type" "mac") ++ (eq_attr "pipeline_model" "n9"))) ++ "n9_2r1w_ii, (n9_2r1w_ii+n9_2r1w_ex)*17, n9_2r1w_ex+n9_2r1w_mm, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb") ++ ++(define_insn_reservation "nds_n9_2r1w_div" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") ++ (and (eq_attr "type" "div") ++ (eq_attr "pipeline_model" "n9"))) ++ "n9_2r1w_ii, (n9_2r1w_ii+n9_2r1w_ex)*34, n9_2r1w_ex+n9_2r1w_mm, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb") ++ ++(define_insn_reservation "nds_n9_2r1w_branch" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_2R1W") ++ (and (eq_attr "type" "branch") ++ (eq_attr "pipeline_model" "n9"))) ++ "n9_2r1w_ii, n9_2r1w_ex, n9_2r1w_mm, n9_2r1w_wb") ++ ++;; ------------------------------------------------------------------------ ++;; Comment Notations and Bypass Rules ++;; ------------------------------------------------------------------------ ++;; Producers (LHS) ++;; LD_!bi ++;; Load data from the memory (without updating the base register) and ++;; produce the loaded data. The result is ready at MM. Because the register ++;; port is 2R1W, two micro-operations are required if the base register ++;; should be updated. In this case, the base register is updated by the ++;; second micro-operation, and the updated result is ready at EX. ++;; LMW(N, M) ++;; There are N micro-operations within an instruction that loads multiple ++;; words. The result produced by the M-th micro-operation is sent to ++;; consumers. The result is ready at MM. If the base register should be ++;; updated, an extra micro-operation is apppended to the end of the ++;; sequence, and the result is ready at EX. ++;; MUL, MAC ++;; Compute data in the multiply-adder and produce the data. The result ++;; is ready at MM. ++;; DIV ++;; Compute data in the divider and produce the data. The result is ready ++;; at MM. ++;; ++;; Consumers (RHS) ++;; ALU, PBSAD, PBSADA_RaRb, MUL, MAC, DIV, MMU ++;; Require operands at EX. ++;; ALU_SHIFT_Rb ++;; An ALU-SHIFT instruction consists of a shift micro-operation followed ++;; by an arithmetic micro-operation. The operand Rb is used by the first ++;; micro-operation, and there are some latencies if data dependency occurs. ++;; MOVD44_E ++;; A double-word move instruction needs two micro-operations because the ++;; reigster ports is 2R1W. The first micro-operation writes an even number ++;; register, and the second micro-operation writes an odd number register. ++;; Each input operand is required at EX for each micro-operation. MOVD44_E ++;; stands for the first micro-operation. ++;; MAC_RaRb, M2R ++;; MAC instructions do multiplication at EX and do accumulation at MM, but ++;; MAC instructions which operate on general purpose registers always ++;; require operands at EX because MM stage cannot be forwarded in 2R1W mode. ++;; ADDR_IN ++;; If an instruction requires an address as its input operand, the address ++;; is required at EX. ++;; ST_bi ++;; A post-increment store instruction requires its data at EX because MM ++;; cannot be forwarded in 2R1W mode. ++;; ST_!bi_RI ++;; A store instruction with an immediate offset requires its data at EX ++;; because MM cannot be forwarded in 2R1W mode. If the offset field is a ++;; register (ST_!bi_RR), the instruction will be separated into two micro- ++;; operations, and the second one requires the input operand at EX in order ++;; to store it to the memory. ++;; SMW(N, M) ++;; There are N micro-operations within an instruction that stores multiple ++;; words. Each M-th micro-operation requires its data at MM. ++;; BR ++;; If a branch instruction is conditional, its input data is required at EX. ++ ++;; LD_!bi, MUL, MAC ++;; -> ALU, ALU_SHIFT_Rb, PBSAD, PBSADA_RaRb, MOVD44_E, MUL, MAC_RaRb, M2R, DIV, ADDR_IN_!bi, ADDR_IN_bi_Ra, ST_bi, ST_!bi_RI, BR, MMU ++(define_bypass 2 ++ "nds_n9_2r1w_load,\ ++ nds_n9_2r1w_mul_fast, nds_n9_2r1w_mul_slow,\ ++ nds_n9_2r1w_mac_fast, nds_n9_2r1w_mac_slow" ++ "nds_n9_2r1w_alu, nds_n9_2r1w_alu_shift,\ ++ nds_n9_2r1w_pbsad, nds_n9_2r1w_pbsada,\ ++ nds_n9_2r1w_mul_fast, nds_n9_2r1w_mul_slow,\ ++ nds_n9_2r1w_mac_fast, nds_n9_2r1w_mac_slow,\ ++ nds_n9_2r1w_branch,\ ++ nds_n9_2r1w_div,\ ++ nds_n9_2r1w_load,nds_n9_2r1w_store,\ ++ nds_n9_2r1w_load_multiple_1,nds_n9_2r1w_load_multiple_2, nds_n9_2r1w_load_multiple_3,\ ++ nds_n9_2r1w_load_multiple_4,nds_n9_2r1w_load_multiple_5, nds_n9_2r1w_load_multiple_6,\ ++ nds_n9_2r1w_load_multiple_7,nds_n9_2r1w_load_multiple_8, nds_n9_2r1w_load_multiple_12,\ ++ nds_n9_2r1w_store_multiple_1,nds_n9_2r1w_store_multiple_2, nds_n9_2r1w_store_multiple_3,\ ++ nds_n9_2r1w_store_multiple_4,nds_n9_2r1w_store_multiple_5, nds_n9_2r1w_store_multiple_6,\ ++ nds_n9_2r1w_store_multiple_7,nds_n9_2r1w_store_multiple_8, nds_n9_2r1w_store_multiple_12,\ ++ nds_n9_2r1w_mmu" ++ "nds32_n9_2r1w_mm_to_ex_p" ++) ++ ++;; LMW(N, N) ++;; -> ALU, ALU_SHIFT_Rb, PBSAD, PBSADA_RaRb, MOVD44_E, MUL, MAC_RaRb, M2R, DIV, ADDR_IN_!bi, ADDR_IN_bi_Ra, ST_bi, ST_!bi_RI, BR, MMU ++(define_bypass 2 ++ "nds_n9_2r1w_load_multiple_1,nds_n9_2r1w_load_multiple_2, nds_n9_2r1w_load_multiple_3,\ ++ nds_n9_2r1w_load_multiple_4,nds_n9_2r1w_load_multiple_5, nds_n9_2r1w_load_multiple_6,\ ++ nds_n9_2r1w_load_multiple_7,nds_n9_2r1w_load_multiple_8, nds_n9_2r1w_load_multiple_12" ++ "nds_n9_2r1w_alu, nds_n9_2r1w_alu_shift,\ ++ nds_n9_2r1w_pbsad, nds_n9_2r1w_pbsada,\ ++ nds_n9_2r1w_mul_fast, nds_n9_2r1w_mul_slow,\ ++ nds_n9_2r1w_mac_fast, nds_n9_2r1w_mac_slow,\ ++ nds_n9_2r1w_branch,\ ++ nds_n9_2r1w_div,\ ++ nds_n9_2r1w_load,nds_n9_2r1w_store,\ ++ nds_n9_2r1w_load_multiple_1,nds_n9_2r1w_load_multiple_2, nds_n9_2r1w_load_multiple_3,\ ++ nds_n9_2r1w_load_multiple_4,nds_n9_2r1w_load_multiple_5, nds_n9_2r1w_load_multiple_6,\ ++ nds_n9_2r1w_load_multiple_7,nds_n9_2r1w_load_multiple_8, nds_n9_2r1w_load_multiple_12,\ ++ nds_n9_2r1w_store_multiple_1,nds_n9_2r1w_store_multiple_2, nds_n9_2r1w_store_multiple_3,\ ++ nds_n9_2r1w_store_multiple_4,nds_n9_2r1w_store_multiple_5, nds_n9_2r1w_store_multiple_6,\ ++ nds_n9_2r1w_store_multiple_7,nds_n9_2r1w_store_multiple_8, nds_n9_2r1w_store_multiple_12,\ ++ nds_n9_2r1w_mmu" ++ "nds32_n9_last_load_to_ex_p" ++) +diff --git a/gcc/config/nds32/nds32-n9-3r2w.md b/gcc/config/nds32/nds32-n9-3r2w.md +new file mode 100644 +index 0000000..7849c72 +--- /dev/null ++++ b/gcc/config/nds32/nds32-n9-3r2w.md +@@ -0,0 +1,357 @@ ++;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler ++;; Copyright (C) 2012-2016 Free Software Foundation, Inc. ++;; Contributed by Andes Technology Corporation. ++;; ++;; This file is part of GCC. ++;; ++;; GCC is free software; you can redistribute it and/or modify it ++;; under the terms of the GNU General Public License as published ++;; by the Free Software Foundation; either version 3, or (at your ++;; option) any later version. ++;; ++;; GCC is distributed in the hope that it will be useful, but WITHOUT ++;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++;; License for more details. ++;; ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; <http://www.gnu.org/licenses/>. ++ ++ ++;; ------------------------------------------------------------------------ ++;; Define N9 3R2W pipeline settings. ++;; ------------------------------------------------------------------------ ++ ++(define_automaton "nds32_n9_3r2w_machine") ++ ++;; ------------------------------------------------------------------------ ++;; Pipeline Stages ++;; ------------------------------------------------------------------------ ++;; IF - Instruction Fetch ++;; II - Instruction Issue / Instruction Decode ++;; EX - Instruction Execution ++;; MM - Memory Execution ++;; WB - Instruction Retire / Result Write-Back ++ ++(define_cpu_unit "n9_3r2w_ii" "nds32_n9_3r2w_machine") ++(define_cpu_unit "n9_3r2w_ex" "nds32_n9_3r2w_machine") ++(define_cpu_unit "n9_3r2w_mm" "nds32_n9_3r2w_machine") ++(define_cpu_unit "n9_3r2w_wb" "nds32_n9_3r2w_machine") ++ ++(define_insn_reservation "nds_n9_3r2w_unknown" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") ++ (and (eq_attr "type" "unknown") ++ (eq_attr "pipeline_model" "n9"))) ++ "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb") ++ ++(define_insn_reservation "nds_n9_3r2w_misc" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") ++ (and (eq_attr "type" "misc") ++ (eq_attr "pipeline_model" "n9"))) ++ "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb") ++ ++(define_insn_reservation "nds_n9_3r2w_mmu" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") ++ (and (eq_attr "type" "mmu") ++ (eq_attr "pipeline_model" "n9"))) ++ "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb") ++ ++(define_insn_reservation "nds_n9_3r2w_alu" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") ++ (and (eq_attr "type" "alu") ++ (eq_attr "pipeline_model" "n9"))) ++ "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb") ++ ++(define_insn_reservation "nds_n9_3r2w_alu_shift" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") ++ (and (eq_attr "type" "alu_shift") ++ (eq_attr "pipeline_model" "n9"))) ++ "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ex+n9_3r2w_mm, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb") ++ ++(define_insn_reservation "nds_n9_3r2w_pbsad" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") ++ (and (eq_attr "type" "pbsad") ++ (eq_attr "pipeline_model" "n9"))) ++ "n9_3r2w_ii, n9_3r2w_ex*3, n9_3r2w_mm, n9_3r2w_wb") ++ ++(define_insn_reservation "nds_n9_3r2w_pbsada" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") ++ (and (eq_attr "type" "pbsada") ++ (eq_attr "pipeline_model" "n9"))) ++ "n9_3r2w_ii, n9_3r2w_ex*3, n9_3r2w_mm, n9_3r2w_wb") ++ ++(define_insn_reservation "nds_n9_3r2w_load" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") ++ (and (match_test "nds32::load_single_p (insn)") ++ (eq_attr "pipeline_model" "n9"))) ++ "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb") ++ ++(define_insn_reservation "nds_n9_3r2w_store" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") ++ (and (match_test "nds32::store_single_p (insn)") ++ (eq_attr "pipeline_model" "n9"))) ++ "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb") ++ ++(define_insn_reservation "nds_n9_3r2w_load_multiple_1" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") ++ (and (eq_attr "pipeline_model" "n9") ++ (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "1")))) ++ "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb") ++ ++(define_insn_reservation "nds_n9_3r2w_load_multiple_2" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") ++ (and (eq_attr "pipeline_model" "n9") ++ (ior (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "2")) ++ (match_test "nds32::load_double_p (insn)")))) ++ "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ex+n9_3r2w_mm, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb") ++ ++(define_insn_reservation "nds_n9_3r2w_load_multiple_3" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") ++ (and (eq_attr "pipeline_model" "n9") ++ (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "3")))) ++ "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb") ++ ++(define_insn_reservation "nds_n9_3r2w_load_multiple_4" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") ++ (and (eq_attr "pipeline_model" "n9") ++ (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "4")))) ++ "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb") ++ ++(define_insn_reservation "nds_n9_3r2w_load_multiple_5" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") ++ (and (eq_attr "pipeline_model" "n9") ++ (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "5")))) ++ "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, (n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb)*2, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb") ++ ++(define_insn_reservation "nds_n9_3r2w_load_multiple_6" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") ++ (and (eq_attr "pipeline_model" "n9") ++ (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "6")))) ++ "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, (n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb)*3, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb") ++ ++(define_insn_reservation "nds_n9_3r2w_load_multiple_7" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") ++ (and (eq_attr "pipeline_model" "n9") ++ (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "7")))) ++ "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, (n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb)*4, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb") ++ ++(define_insn_reservation "nds_n9_3r2w_load_multiple_8" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") ++ (and (eq_attr "pipeline_model" "n9") ++ (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "8")))) ++ "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, (n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb)*5, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb") ++ ++(define_insn_reservation "nds_n9_3r2w_load_multiple_12" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") ++ (and (eq_attr "pipeline_model" "n9") ++ (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "12")))) ++ "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, (n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb)*9, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb") ++ ++(define_insn_reservation "nds_n9_3r2w_store_multiple_1" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") ++ (and (eq_attr "pipeline_model" "n9") ++ (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "1")))) ++ "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb") ++ ++(define_insn_reservation "nds_n9_3r2w_store_multiple_2" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") ++ (and (eq_attr "pipeline_model" "n9") ++ (ior (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "2")) ++ (match_test "nds32::store_double_p (insn)")))) ++ "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ex+n9_3r2w_mm, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb") ++ ++(define_insn_reservation "nds_n9_3r2w_store_multiple_3" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") ++ (and (eq_attr "pipeline_model" "n9") ++ (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "3")))) ++ "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb") ++ ++(define_insn_reservation "nds_n9_3r2w_store_multiple_4" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") ++ (and (eq_attr "pipeline_model" "n9") ++ (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "4")))) ++ "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb") ++ ++(define_insn_reservation "nds_n9_3r2w_store_multiple_5" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") ++ (and (eq_attr "pipeline_model" "n9") ++ (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "5")))) ++ "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, (n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb)*2, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb") ++ ++(define_insn_reservation "nds_n9_3r2w_store_multiple_6" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") ++ (and (eq_attr "pipeline_model" "n9") ++ (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "6")))) ++ "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, (n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb)*3, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb") ++ ++(define_insn_reservation "nds_n9_3r2w_store_multiple_7" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") ++ (and (eq_attr "pipeline_model" "n9") ++ (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "7")))) ++ "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, (n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb)*4, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb") ++ ++(define_insn_reservation "nds_n9_3r2w_store_multiple_8" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") ++ (and (eq_attr "pipeline_model" "n9") ++ (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "8")))) ++ "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, (n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb)*5, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb") ++ ++(define_insn_reservation "nds_n9_3r2w_store_multiple_12" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") ++ (and (eq_attr "pipeline_model" "n9") ++ (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "12")))) ++ "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, (n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb)*9, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb") ++ ++(define_insn_reservation "nds_n9_3r2w_mul_fast1" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_3R2W && nds32_mul_config == MUL_TYPE_FAST_1") ++ (and (eq_attr "type" "mul") ++ (eq_attr "pipeline_model" "n9"))) ++ "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb") ++ ++(define_insn_reservation "nds_n9_3r2w_mul_fast2" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_3R2W && nds32_mul_config == MUL_TYPE_FAST_2") ++ (and (eq_attr "type" "mul") ++ (eq_attr "pipeline_model" "n9"))) ++ "n9_3r2w_ii, n9_3r2w_ex*2, n9_3r2w_mm, n9_3r2w_wb") ++ ++(define_insn_reservation "nds_n9_3r2w_mul_slow" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_3R2W && nds32_mul_config == MUL_TYPE_SLOW") ++ (and (eq_attr "type" "mul") ++ (eq_attr "pipeline_model" "n9"))) ++ "n9_3r2w_ii, n9_3r2w_ex*17, n9_3r2w_mm, n9_3r2w_wb") ++ ++(define_insn_reservation "nds_n9_3r2w_mac_fast1" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_3R2W && nds32_mul_config == MUL_TYPE_FAST_1") ++ (and (eq_attr "type" "mac") ++ (eq_attr "pipeline_model" "n9"))) ++ "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb") ++ ++(define_insn_reservation "nds_n9_3r2w_mac_fast2" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_3R2W && nds32_mul_config == MUL_TYPE_FAST_2") ++ (and (eq_attr "type" "mac") ++ (eq_attr "pipeline_model" "n9"))) ++ "n9_3r2w_ii, n9_3r2w_ex*2, n9_3r2w_mm, n9_3r2w_wb") ++ ++(define_insn_reservation "nds_n9_3r2w_mac_slow" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_3R2W && nds32_mul_config == MUL_TYPE_SLOW") ++ (and (eq_attr "type" "mac") ++ (eq_attr "pipeline_model" "n9"))) ++ "n9_3r2w_ii, n9_3r2w_ex*17, n9_3r2w_ex+n9_3r2w_mm, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb") ++ ++(define_insn_reservation "nds_n9_3r2w_div" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") ++ (and (eq_attr "type" "div") ++ (eq_attr "pipeline_model" "n9"))) ++ "n9_3r2w_ii, n9_3r2w_ex*34, n9_3r2w_mm, n9_3r2w_wb") ++ ++(define_insn_reservation "nds_n9_3r2w_branch" 1 ++ (and (match_test "nds32_register_ports_config == REG_PORT_3R2W") ++ (and (eq_attr "type" "branch") ++ (eq_attr "pipeline_model" "n9"))) ++ "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb") ++ ++;; ------------------------------------------------------------------------ ++;; Comment Notations and Bypass Rules ++;; ------------------------------------------------------------------------ ++;; Producers (LHS) ++;; LD ++;; Load data from the memory and produce the loaded data. The result is ++;; ready at MM. ++;; LMW(N, M) ++;; There are N micro-operations within an instruction that loads multiple ++;; words. The result produced by the M-th micro-operation is sent to ++;; consumers. The result is ready at MM. ++;; MUL, MAC ++;; Compute data in the multiply-adder and produce the data. The result ++;; is ready at MM. ++;; DIV ++;; Compute data in the divider and produce the data. The result is ready ++;; at MM. ++;; ++;; Consumers (RHS) ++;; ALU, MOVD44, PBSAD, PBSADA_RaRb, MUL, MAC, DIV, MMU ++;; Require operands at EX. ++;; ALU_SHIFT_Rb ++;; An ALU-SHIFT instruction consists of a shift micro-operation followed ++;; by an arithmetic micro-operation. The operand Rb is used by the first ++;; micro-operation, and there are some latencies if data dependency occurs. ++;; MAC_RaRb ++;; A MAC instruction does multiplication at EX and does accumulation at MM, ++;; so the operand Rt is required at MM, and operands Ra and Rb are required ++;; at EX. ++;; ADDR_IN ++;; If an instruction requires an address as its input operand, the address ++;; is required at EX. ++;; ST ++;; A store instruction requires its data at MM. ++;; SMW(N, M) ++;; There are N micro-operations within an instruction that stores multiple ++;; words. Each M-th micro-operation requires its data at MM. ++;; BR ++;; If a branch instruction is conditional, its input data is required at EX. ++ ++;; LD, MUL, MAC, DIV ++;; -> ALU, ALU_SHIFT_Rb, PBSAD, PBSADA_RaRb, MOVD44, MUL, MAC_RaRb, DIV, ADDR_IN, BR, MMU ++(define_bypass 2 ++ "nds_n9_3r2w_load,\ ++ nds_n9_3r2w_mul_fast1, nds_n9_3r2w_mul_fast2, nds_n9_3r2w_mul_slow,\ ++ nds_n9_3r2w_mac_fast1, nds_n9_3r2w_mac_fast2, nds_n9_3r2w_mac_slow,\ ++ nds_n9_3r2w_div" ++ "nds_n9_3r2w_alu, nds_n9_3r2w_alu_shift,\ ++ nds_n9_3r2w_pbsad, nds_n9_3r2w_pbsada,\ ++ nds_n9_3r2w_mul_fast1, nds_n9_3r2w_mul_fast2, nds_n9_3r2w_mul_slow,\ ++ nds_n9_3r2w_mac_fast1, nds_n9_3r2w_mac_fast2, nds_n9_3r2w_mac_slow,\ ++ nds_n9_3r2w_branch,\ ++ nds_n9_3r2w_div,\ ++ nds_n9_3r2w_load,nds_n9_3r2w_store,\ ++ nds_n9_3r2w_load_multiple_1,nds_n9_3r2w_load_multiple_2, nds_n9_3r2w_load_multiple_3,\ ++ nds_n9_3r2w_load_multiple_4,nds_n9_3r2w_load_multiple_5, nds_n9_3r2w_load_multiple_6,\ ++ nds_n9_3r2w_load_multiple_7,nds_n9_3r2w_load_multiple_8, nds_n9_3r2w_load_multiple_12,\ ++ nds_n9_3r2w_store_multiple_1,nds_n9_3r2w_store_multiple_2, nds_n9_3r2w_store_multiple_3,\ ++ nds_n9_3r2w_store_multiple_4,nds_n9_3r2w_store_multiple_5, nds_n9_3r2w_store_multiple_6,\ ++ nds_n9_3r2w_store_multiple_7,nds_n9_3r2w_store_multiple_8, nds_n9_3r2w_store_multiple_12,\ ++ nds_n9_3r2w_mmu" ++ "nds32_n9_3r2w_mm_to_ex_p" ++) ++ ++;; LMW(N, N) ++;; -> ALU, ALU_SHIFT_Rb, PBSAD, PBSADA_RaRb, MOVD44, MUL, MAC_RaRb, DIV, ADDR_IN, BR, MMU ++(define_bypass 2 ++ "nds_n9_3r2w_load_multiple_1,nds_n9_3r2w_load_multiple_2, nds_n9_3r2w_load_multiple_3,\ ++ nds_n9_3r2w_load_multiple_4,nds_n9_3r2w_load_multiple_5, nds_n9_3r2w_load_multiple_6,\ ++ nds_n9_3r2w_load_multiple_7,nds_n9_3r2w_load_multiple_8, nds_n9_3r2w_load_multiple_12" ++ "nds_n9_3r2w_alu, nds_n9_3r2w_alu_shift,\ ++ nds_n9_3r2w_pbsad, nds_n9_3r2w_pbsada,\ ++ nds_n9_3r2w_mul_fast1, nds_n9_3r2w_mul_fast2, nds_n9_3r2w_mul_slow,\ ++ nds_n9_3r2w_mac_fast1, nds_n9_3r2w_mac_fast2, nds_n9_3r2w_mac_slow,\ ++ nds_n9_3r2w_branch,\ ++ nds_n9_3r2w_div,\ ++ nds_n9_3r2w_load,nds_n9_3r2w_store,\ ++ nds_n9_3r2w_load_multiple_1,nds_n9_3r2w_load_multiple_2, nds_n9_3r2w_load_multiple_3,\ ++ nds_n9_3r2w_load_multiple_4,nds_n9_3r2w_load_multiple_5, nds_n9_3r2w_load_multiple_6,\ ++ nds_n9_3r2w_load_multiple_7,nds_n9_3r2w_load_multiple_8, nds_n9_3r2w_load_multiple_12,\ ++ nds_n9_3r2w_store_multiple_1,nds_n9_3r2w_store_multiple_2, nds_n9_3r2w_store_multiple_3,\ ++ nds_n9_3r2w_store_multiple_4,nds_n9_3r2w_store_multiple_5, nds_n9_3r2w_store_multiple_6,\ ++ nds_n9_3r2w_store_multiple_7,nds_n9_3r2w_store_multiple_8, nds_n9_3r2w_store_multiple_12,\ ++ nds_n9_3r2w_mmu" ++ "nds32_n9_last_load_to_ex_p" ++) +diff --git a/gcc/config/nds32/nds32-opts.h b/gcc/config/nds32/nds32-opts.h +index 25c4081..e4017bb 100644 +--- a/gcc/config/nds32/nds32-opts.h ++++ b/gcc/config/nds32/nds32-opts.h +@@ -22,14 +22,42 @@ + #define NDS32_OPTS_H + + #define NDS32_DEFAULT_CACHE_BLOCK_SIZE 16 +-#define NDS32_DEFAULT_ISR_VECTOR_SIZE (TARGET_ISA_V3 ? 4 : 16) ++#define NDS32_DEFAULT_ISR_VECTOR_SIZE TARGET_DEFAULT_ISR_VECTOR_SIZE + + /* The various ANDES ISA. */ + enum nds32_arch_type + { + ARCH_V2, ++ ARCH_V2J, + ARCH_V3, +- ARCH_V3M ++ ARCH_V3J, ++ ARCH_V3M, ++ ARCH_V3M_PLUS, ++ ARCH_V3F, ++ ARCH_V3S ++}; ++ ++/* The various ANDES CPU. */ ++enum nds32_cpu_type ++{ ++ CPU_N6, ++ CPU_N7, ++ CPU_N8, ++ CPU_E8, ++ CPU_N9, ++ CPU_N10, ++ CPU_GRAYWOLF, ++ CPU_N12, ++ CPU_N13, ++ CPU_PANTHER, ++ CPU_SIMPLE ++}; ++ ++/* The code model defines the address generation strategy. */ ++enum nds32_memory_model_type ++{ ++ MEMORY_MODEL_SLOW, ++ MEMORY_MODEL_FAST + }; + + /* The code model defines the address generation strategy. */ +@@ -40,4 +68,56 @@ enum nds32_cmodel_type + CMODEL_LARGE + }; + ++/* The code model defines the address generation strategy. */ ++enum nds32_ict_model_type ++{ ++ ICT_MODEL_SMALL, ++ ICT_MODEL_LARGE ++}; ++ ++ ++/* Multiply instruction configuration. */ ++enum nds32_mul_type ++{ ++ MUL_TYPE_FAST_1, ++ MUL_TYPE_FAST_2, ++ MUL_TYPE_SLOW ++}; ++ ++/* Register ports configuration. */ ++enum nds32_register_ports ++{ ++ REG_PORT_3R2W, ++ REG_PORT_2R1W ++}; ++ ++/* Which ABI to use. */ ++enum abi_type ++{ ++ NDS32_ABI_V2, ++ NDS32_ABI_V2_FP_PLUS ++}; ++ ++/* The various FPU number of registers. */ ++enum float_reg_number ++{ ++ NDS32_CONFIG_FPU_0, ++ NDS32_CONFIG_FPU_1, ++ NDS32_CONFIG_FPU_2, ++ NDS32_CONFIG_FPU_3, ++ NDS32_CONFIG_FPU_4, ++ NDS32_CONFIG_FPU_5, ++ NDS32_CONFIG_FPU_6, ++ NDS32_CONFIG_FPU_7 ++}; ++ ++/* Do lmwsmw opt model. */ ++enum lmwsmw_cost_type ++{ ++ LMWSMW_OPT_SIZE, ++ LMWSMW_OPT_SPEED, ++ LMWSMW_OPT_ALL, ++ LMWSMW_OPT_AUTO ++}; ++ + #endif +diff --git a/gcc/config/nds32/nds32-panther.md b/gcc/config/nds32/nds32-panther.md +new file mode 100644 +index 0000000..d45de1c +--- /dev/null ++++ b/gcc/config/nds32/nds32-panther.md +@@ -0,0 +1,446 @@ ++;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler ++;; Copyright (C) 2012-2016 Free Software Foundation, Inc. ++;; Contributed by Andes Technology Corporation. ++;; ++;; This file is part of GCC. ++;; ++;; GCC is free software; you can redistribute it and/or modify it ++;; under the terms of the GNU General Public License as published ++;; by the Free Software Foundation; either version 3, or (at your ++;; option) any later version. ++;; ++;; GCC is distributed in the hope that it will be useful, but WITHOUT ++;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++;; License for more details. ++;; ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; <http://www.gnu.org/licenses/>. ++ ++;; ------------------------------------------------------------------------ ++;; Define Panther pipeline settings. ++;; ------------------------------------------------------------------------ ++ ++(define_automaton "nds32_pn_machine") ++ ++(define_cpu_unit "pn_i3_0" "nds32_pn_machine") ++(define_cpu_unit "pn_i3_1" "nds32_pn_machine") ++(define_cpu_unit "pn_e1_p0" "nds32_pn_machine") ++(define_cpu_unit "pn_e2_p0" "nds32_pn_machine") ++(define_cpu_unit "pn_e3_p0" "nds32_pn_machine") ++(define_cpu_unit "pn_e4_p0" "nds32_pn_machine") ++(define_cpu_unit "pn_wb_p0" "nds32_pn_machine") ++(define_cpu_unit "pn_e1_p1" "nds32_pn_machine") ++(define_cpu_unit "pn_e2_p1" "nds32_pn_machine") ++(define_cpu_unit "pn_e3_p1" "nds32_pn_machine") ++(define_cpu_unit "pn_e4_p1" "nds32_pn_machine") ++(define_cpu_unit "pn_wb_p1" "nds32_pn_machine") ++(define_cpu_unit "pn_e1_p2" "nds32_pn_machine") ++(define_cpu_unit "pn_e2_p2" "nds32_pn_machine") ++(define_cpu_unit "pn_e3_p2" "nds32_pn_machine") ++(define_cpu_unit "pn_e4_p2" "nds32_pn_machine") ++(define_cpu_unit "pn_wb_p2" "nds32_pn_machine") ++ ++(define_reservation "pn_i3" "pn_i3_0 | pn_i3_1") ++(define_reservation "pn_e1" "pn_e1_p0 | pn_e1_p1") ++(define_reservation "pn_e2" "pn_e2_p0 | pn_e2_p1") ++(define_reservation "pn_e3" "pn_e3_p0 | pn_e3_p1") ++(define_reservation "pn_e4" "pn_e4_p0 | pn_e4_p1") ++(define_reservation "pn_wb" "pn_wb_p0 | pn_wb_p1") ++ ++(define_insn_reservation "nds_pn_unknown" 1 ++ (and (eq_attr "type" "unknown") ++ (eq_attr "pipeline_model" "panther")) ++ "pn_i3, pn_e1, pn_e2, pn_e3, pn_e4, pn_wb") ++ ++(define_insn_reservation "nds_pn_misc" 1 ++ (and (eq_attr "type" "misc") ++ (eq_attr "pipeline_model" "panther")) ++ "pn_i3, pn_e1, pn_e2, pn_e3, pn_e4, pn_wb") ++ ++(define_insn_reservation "nds_pn_mmu" 1 ++ (and (eq_attr "type" "mmu") ++ (eq_attr "pipeline_model" "panther")) ++ "pn_i3, pn_e1, pn_e2, pn_e3, pn_e4, pn_wb") ++ ++(define_insn_reservation "nds_pn_movd44" 1 ++ (and (and (and (eq_attr "type" "alu") ++ (eq_attr "subtype" "simple")) ++ (match_test "nds32::movd44_insn_p (insn)")) ++ (eq_attr "pipeline_model" "panther")) ++ "pn_i3, pn_e1_p1, pn_e2_p1, pn_e3_p1, pn_e4_p1, pn_wb_p1") ++ ++(define_insn_reservation "nds_pn_alu" 1 ++ (and (and (and (eq_attr "type" "alu") ++ (eq_attr "subtype" "simple")) ++ (match_test "!nds32::movd44_insn_p (insn)")) ++ (eq_attr "pipeline_model" "panther")) ++ "pn_i3, pn_e1, pn_e2, pn_e3, pn_e4, pn_wb") ++ ++(define_insn_reservation "nds_pn_shift" 1 ++ (and (and (eq_attr "type" "alu") ++ (eq_attr "subtype" "shift")) ++ (eq_attr "pipeline_model" "panther")) ++ "pn_i3, pn_e1, pn_e2, pn_e3, pn_e4, pn_wb") ++ ++(define_insn_reservation "nds_pn_alu_shift" 1 ++ (and (eq_attr "type" "alu_shift") ++ (eq_attr "pipeline_model" "panther")) ++ "pn_i3, pn_e1, pn_e2, pn_e3, pn_e4, pn_wb") ++ ++(define_insn_reservation "nds_pn_pbsad" 1 ++ (and (eq_attr "type" "pbsad") ++ (eq_attr "pipeline_model" "panther")) ++ "pn_i3, pn_e1, pn_e2, pn_e3*2, pn_e4, pn_wb") ++ ++(define_insn_reservation "nds_pn_pbsada" 1 ++ (and (eq_attr "type" "pbsada") ++ (eq_attr "pipeline_model" "panther")) ++ "pn_i3, pn_e1, pn_e2, pn_e3*3, pn_e4, pn_wb") ++ ++(define_insn_reservation "nds_pn_load_full_word" 1 ++ (and (match_test "nds32::load_full_word_p (insn)") ++ (eq_attr "pipeline_model" "panther")) ++ "pn_i3, pn_e1_p2, pn_e2_p2, pn_e3_p2, pn_e4_p2, pn_wb_p2") ++ ++(define_insn_reservation "nds_pn_load_partial_word" 1 ++ (and (match_test "nds32::load_partial_word_p (insn)") ++ (eq_attr "pipeline_model" "panther")) ++ "pn_i3, pn_e1_p2, pn_e2_p2, pn_e3_p2, pn_e4_p2, pn_wb_p2") ++ ++(define_insn_reservation "nds_pn_store" 1 ++ (and (match_test "nds32::store_single_p (insn)") ++ (eq_attr "pipeline_model" "panther")) ++ "pn_i3, pn_e1_p2, pn_e2_p2, pn_e3_p2, pn_e4_p2, pn_wb_p2") ++ ++(define_insn_reservation "nds_pn_load_multiple_1" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "1")) ++ (eq_attr "pipeline_model" "panther")) ++ "pn_i3, pn_e1_p2, pn_e2_p2, pn_e3_p2, pn_e4_p2, pn_wb_p2") ++ ++(define_insn_reservation "nds_pn_load_multiple_2" 1 ++ (and (ior (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "2")) ++ (match_test "nds32::load_double_p (insn)")) ++ (eq_attr "pipeline_model" "panther")) ++ "pn_i3, pn_e1_p2*2, pn_e2_p2, pn_e3_p2, pn_e4_p2, pn_wb_p2") ++ ++(define_insn_reservation "nds_pn_load_multiple_3" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "3")) ++ (eq_attr "pipeline_model" "panther")) ++ "pn_i3, pn_e1_p2*3, pn_e2_p2, pn_e3_p2, pn_e4_p2, pn_wb_p2") ++ ++(define_insn_reservation "nds_pn_load_multiple_4" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "4")) ++ (eq_attr "pipeline_model" "panther")) ++ "pn_i3, pn_e1_p2*4, pn_e2_p2, pn_e3_p2, pn_e4_p2, pn_wb_p2") ++ ++(define_insn_reservation "nds_pn_load_multiple_5" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "5")) ++ (eq_attr "pipeline_model" "panther")) ++ "pn_i3, pn_e1_p2*5, pn_e2_p2, pn_e3_p2, pn_e4_p2, pn_wb_p2") ++ ++(define_insn_reservation "nds_pn_load_multiple_6" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "5")) ++ (eq_attr "pipeline_model" "panther")) ++ "pn_i3, pn_e1_p2*6, pn_e2_p2, pn_e3_p2, pn_e4_p2, pn_wb_p2") ++ ++(define_insn_reservation "nds_pn_load_multiple_7" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "7")) ++ (eq_attr "pipeline_model" "panther")) ++ "pn_i3, pn_e1_p2*7, pn_e2_p2, pn_e3_p2, pn_e4_p2, pn_wb_p2") ++ ++(define_insn_reservation "nds_pn_load_multiple_8" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "8")) ++ (eq_attr "pipeline_model" "panther")) ++ "pn_i3, pn_e1_p2*8, pn_e2_p2, pn_e3_p2, pn_e4_p2, pn_wb_p2") ++ ++(define_insn_reservation "nds_pn_load_multiple_12" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "12")) ++ (eq_attr "pipeline_model" "panther")) ++ "pn_i3, pn_e1_p2*12, pn_e2_p2, pn_e3_p2, pn_e4_p2, pn_wb_p2") ++ ++(define_insn_reservation "nds_pn_store_multiple_1" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "1")) ++ (eq_attr "pipeline_model" "panther")) ++ "pn_i3, pn_e1_p2, pn_e2_p2, pn_e3_p2, pn_e4_p2, pn_wb_p2") ++ ++(define_insn_reservation "nds_pn_store_multiple_2" 1 ++ (and (ior (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "2")) ++ (match_test "nds32::store_double_p (insn)")) ++ (eq_attr "pipeline_model" "panther")) ++ "pn_i3, pn_e1_p2*2, pn_e2_p2, pn_e3_p2, pn_e4_p2, pn_wb_p2") ++ ++(define_insn_reservation "nds_pn_store_multiple_3" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "3")) ++ (eq_attr "pipeline_model" "panther")) ++ "pn_i3, pn_e1_p2*3, pn_e2_p2, pn_e3_p2, pn_e4_p2, pn_wb_p2") ++ ++(define_insn_reservation "nds_pn_store_multiple_4" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "4")) ++ (eq_attr "pipeline_model" "panther")) ++ "pn_i3, pn_e1_p2*4, pn_e2_p2, pn_e3_p2, pn_e4_p2, pn_wb_p2") ++ ++(define_insn_reservation "nds_pn_store_multiple_5" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "5")) ++ (eq_attr "pipeline_model" "panther")) ++ "pn_i3, pn_e1_p2*5, pn_e2_p2, pn_e3_p2, pn_e4_p2, pn_wb_p2") ++ ++(define_insn_reservation "nds_pn_store_multiple_6" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "5")) ++ (eq_attr "pipeline_model" "panther")) ++ "pn_i3, pn_e1_p2*6, pn_e2_p2, pn_e3_p2, pn_e4_p2, pn_wb_p2") ++ ++(define_insn_reservation "nds_pn_store_multiple_7" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "7")) ++ (eq_attr "pipeline_model" "panther")) ++ "pn_i3, pn_e1_p2*7, pn_e2_p2, pn_e3_p2, pn_e4_p2, pn_wb_p2") ++ ++(define_insn_reservation "nds_pn_store_multiple_8" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "8")) ++ (eq_attr "pipeline_model" "panther")) ++ "pn_i3, pn_e1_p2*8, pn_e2_p2, pn_e3_p2, pn_e4_p2, pn_wb_p2") ++ ++(define_insn_reservation "nds_pn_store_multiple_12" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "12")) ++ (eq_attr "pipeline_model" "panther")) ++ "pn_i3, pn_e1_p2*12, pn_e2_p2, pn_e3_p2, pn_e4_p2, pn_wb_p2") ++ ++(define_insn_reservation "nds_pn_mul" 1 ++ (and (eq_attr "type" "mul") ++ (eq_attr "pipeline_model" "panther")) ++ "pn_i3, pn_e1_p1, pn_e2_p1, pn_e3_p1, pn_e4_p1, pn_wb_p1") ++ ++(define_insn_reservation "nds_pn_mac" 1 ++ (and (eq_attr "type" "mac") ++ (eq_attr "pipeline_model" "panther")) ++ "pn_i3, pn_e1_p1, pn_e2_p1, pn_e3_p1, pn_e4_p1, pn_wb_p1") ++ ++;; The cycles consumed in E4 stage is 32 - CLZ(abs(Ra)) + 2, ++;; so the worst case is 34. ++(define_insn_reservation "nds_pn_div" 1 ++ (and (eq_attr "type" "div") ++ (eq_attr "pipeline_model" "panther")) ++ "pn_i3, pn_e1_p1, pn_e2_p1, pn_e3_p1, pn_e4_p1*34, pn_wb_p1") ++ ++(define_insn_reservation "nds_pn_branch" 1 ++ (and (eq_attr "type" "branch") ++ (eq_attr "pipeline_model" "panther")) ++ "pn_i3, pn_e1_p0, pn_e2_p0, pn_e3_p0, pn_e4_p0, pn_wb_p0") ++ ++;; SHIFT -> ADDR_IN ++(define_bypass 2 ++ "nds_pn_shift" ++ "nds_pn_load_full_word, nds_pn_load_partial_word, nds_pn_store,\ ++ nds_pn_load_multiple_1, nds_pn_load_multiple_2, nds_pn_load_multiple_3,\ ++ nds_pn_load_multiple_4, nds_pn_load_multiple_5, nds_pn_load_multiple_6,\ ++ nds_pn_load_multiple_7, nds_pn_load_multiple_8, nds_pn_load_multiple_12,\ ++ nds_pn_store_multiple_1, nds_pn_store_multiple_2, nds_pn_store_multiple_3,\ ++ nds_pn_store_multiple_4, nds_pn_store_multiple_5, nds_pn_store_multiple_6,\ ++ nds_pn_store_multiple_7, nds_pn_store_multiple_8, nds_pn_store_multiple_12" ++ "nds32_pn_e2_to_e1_p" ++) ++ ++;; ALU, MOVD44 -> ADDR_IN ++(define_bypass 3 ++ "nds_pn_alu, nds_pn_movd44" ++ "nds_pn_load_full_word, nds_pn_load_partial_word, nds_pn_store,\ ++ nds_pn_load_multiple_1, nds_pn_load_multiple_2, nds_pn_load_multiple_3,\ ++ nds_pn_load_multiple_4, nds_pn_load_multiple_5, nds_pn_load_multiple_6,\ ++ nds_pn_load_multiple_7, nds_pn_load_multiple_8, nds_pn_load_multiple_12,\ ++ nds_pn_store_multiple_1, nds_pn_store_multiple_2, nds_pn_store_multiple_3,\ ++ nds_pn_store_multiple_4, nds_pn_store_multiple_5, nds_pn_store_multiple_6,\ ++ nds_pn_store_multiple_7, nds_pn_store_multiple_8, nds_pn_store_multiple_12" ++ "nds32_pn_e3_to_e1_p" ++) ++ ++;; ALU, MOVD44 -> SHIFT, MUL, MAC_RaRb ++(define_bypass 2 ++ "nds_pn_alu, nds_pn_movd44" ++ "nds_pn_shift, nds_pn_mul, nds_pn_mac" ++ "nds32_pn_e3_to_e2_p" ++) ++ ++;; MUL, MAC, DIV, LW, ADDR_OUT -> ADDR_IN ++(define_bypass 4 ++ "nds_pn_mul, nds_pn_mac, nds_pn_div,\ ++ nds_pn_load_full_word, nds_pn_load_partial_word, nds_pn_store,\ ++ nds_pn_load_multiple_1, nds_pn_load_multiple_2, nds_pn_load_multiple_3,\ ++ nds_pn_load_multiple_4, nds_pn_load_multiple_5, nds_pn_load_multiple_6,\ ++ nds_pn_load_multiple_7, nds_pn_load_multiple_8, nds_pn_load_multiple_12,\ ++ nds_pn_store_multiple_1, nds_pn_store_multiple_2, nds_pn_store_multiple_3,\ ++ nds_pn_store_multiple_4, nds_pn_store_multiple_5, nds_pn_store_multiple_6,\ ++ nds_pn_store_multiple_7, nds_pn_store_multiple_8, nds_pn_store_multiple_12" ++ "nds_pn_load_full_word, nds_pn_load_partial_word, nds_pn_store,\ ++ nds_pn_load_multiple_1, nds_pn_load_multiple_2, nds_pn_load_multiple_3,\ ++ nds_pn_load_multiple_4, nds_pn_load_multiple_5, nds_pn_load_multiple_6,\ ++ nds_pn_load_multiple_7, nds_pn_load_multiple_8, nds_pn_load_multiple_12,\ ++ nds_pn_store_multiple_1, nds_pn_store_multiple_2, nds_pn_store_multiple_3,\ ++ nds_pn_store_multiple_4, nds_pn_store_multiple_5, nds_pn_store_multiple_6,\ ++ nds_pn_store_multiple_7, nds_pn_store_multiple_8, nds_pn_store_multiple_12" ++ "nds32_pn_e4_to_e1_p" ++) ++ ++;; MUL, MAC, DIV, LW, ADDR_OUT -> SHIFT, MUL, MAC_RaRb ++(define_bypass 3 ++ "nds_pn_mul, nds_pn_mac, nds_pn_div,\ ++ nds_pn_load_full_word, nds_pn_load_partial_word, nds_pn_store,\ ++ nds_pn_load_multiple_1, nds_pn_load_multiple_2, nds_pn_load_multiple_3,\ ++ nds_pn_load_multiple_4, nds_pn_load_multiple_5, nds_pn_load_multiple_6,\ ++ nds_pn_load_multiple_7, nds_pn_load_multiple_8, nds_pn_load_multiple_12,\ ++ nds_pn_store_multiple_1, nds_pn_store_multiple_2, nds_pn_store_multiple_3,\ ++ nds_pn_store_multiple_4, nds_pn_store_multiple_5, nds_pn_store_multiple_6,\ ++ nds_pn_store_multiple_7, nds_pn_store_multiple_8, nds_pn_store_multiple_12" ++ "nds_pn_shift, nds_pn_mul, nds_pn_mac" ++ "nds32_pn_e4_to_e2_p" ++) ++ ++;; MUL, MAC, DIV, LW, ADDR_OUT -> ALU, MOVD44, BR_COND, ST, SMW(N, 1) ++(define_bypass 2 ++ "nds_pn_mul, nds_pn_mac, nds_pn_div,\ ++ nds_pn_load_full_word, nds_pn_load_partial_word, nds_pn_store,\ ++ nds_pn_load_multiple_1, nds_pn_load_multiple_2, nds_pn_load_multiple_3,\ ++ nds_pn_load_multiple_4, nds_pn_load_multiple_5, nds_pn_load_multiple_6,\ ++ nds_pn_load_multiple_7, nds_pn_load_multiple_8, nds_pn_load_multiple_12,\ ++ nds_pn_store_multiple_1, nds_pn_store_multiple_2, nds_pn_store_multiple_3,\ ++ nds_pn_store_multiple_4, nds_pn_store_multiple_5, nds_pn_store_multiple_6,\ ++ nds_pn_store_multiple_7, nds_pn_store_multiple_8, nds_pn_store_multiple_12" ++ "nds_pn_alu, nds_pn_movd44, nds_pn_branch,\ ++ nds_pn_store,\ ++ nds_pn_store_multiple_1, nds_pn_store_multiple_2, nds_pn_store_multiple_3,\ ++ nds_pn_store_multiple_4, nds_pn_store_multiple_5, nds_pn_store_multiple_6,\ ++ nds_pn_store_multiple_7, nds_pn_store_multiple_8, nds_pn_store_multiple_12" ++ "nds32_pn_e4_to_e3_p" ++) ++ ++;; LH, LB -> ADDR_IN ++(define_bypass 5 ++ "nds_pn_load_partial_word" ++ "nds_pn_load_full_word, nds_pn_load_partial_word, nds_pn_store,\ ++ nds_pn_load_multiple_1, nds_pn_load_multiple_2, nds_pn_load_multiple_3,\ ++ nds_pn_load_multiple_4, nds_pn_load_multiple_5, nds_pn_load_multiple_6,\ ++ nds_pn_load_multiple_7, nds_pn_load_multiple_8, nds_pn_load_multiple_12,\ ++ nds_pn_store_multiple_1, nds_pn_store_multiple_2, nds_pn_store_multiple_3,\ ++ nds_pn_store_multiple_4, nds_pn_store_multiple_5, nds_pn_store_multiple_6,\ ++ nds_pn_store_multiple_7, nds_pn_store_multiple_8, nds_pn_store_multiple_12" ++ "nds32_pn_wb_to_e1_p" ++) ++ ++;; LH, LB -> SHIFT, MUL, MAC_RaRb ++(define_bypass 4 ++ "nds_pn_load_partial_word" ++ "nds_pn_shift, nds_pn_mul, nds_pn_mac" ++ "nds32_pn_wb_to_e2_p" ++) ++ ++;; LH, LB -> ALU, MOVD44, BR_COND, ST, SMW(N, 1) ++(define_bypass 3 ++ "nds_pn_load_partial_word" ++ "nds_pn_alu, nds_pn_movd44, nds_pn_branch,\ ++ nds_pn_store,\ ++ nds_pn_store_multiple_1, nds_pn_store_multiple_2, nds_pn_store_multiple_3,\ ++ nds_pn_store_multiple_4, nds_pn_store_multiple_5, nds_pn_store_multiple_6,\ ++ nds_pn_store_multiple_7, nds_pn_store_multiple_8, nds_pn_store_multiple_12" ++ "nds32_pn_wb_to_e3_p" ++) ++ ++;; LH, LB -> DIV ++(define_bypass 2 ++ "nds_pn_load_partial_word" ++ "nds_pn_div" ++ "nds32_pn_wb_to_e4_p" ++) ++ ++;; LMW(N, N) -> ADDR_IN ++(define_bypass 4 ++ "nds_pn_load_multiple_1, nds_pn_load_multiple_2, nds_pn_load_multiple_3,\ ++ nds_pn_load_multiple_4, nds_pn_load_multiple_5, nds_pn_load_multiple_6,\ ++ nds_pn_load_multiple_7, nds_pn_load_multiple_8, nds_pn_load_multiple_12" ++ "nds_pn_load_full_word, nds_pn_load_partial_word, nds_pn_store,\ ++ nds_pn_load_multiple_1, nds_pn_load_multiple_2, nds_pn_load_multiple_3,\ ++ nds_pn_load_multiple_4, nds_pn_load_multiple_5, nds_pn_load_multiple_6,\ ++ nds_pn_load_multiple_7, nds_pn_load_multiple_8, nds_pn_load_multiple_12,\ ++ nds_pn_store_multiple_1, nds_pn_store_multiple_2, nds_pn_store_multiple_3,\ ++ nds_pn_store_multiple_4, nds_pn_store_multiple_5, nds_pn_store_multiple_6,\ ++ nds_pn_store_multiple_7, nds_pn_store_multiple_8, nds_pn_store_multiple_12" ++ "nds32_pn_last_load_to_e1_p" ++) ++ ++;; LMW(N, N) -> SHIFT, MUL, MAC_RaRb ++(define_bypass 3 ++ "nds_pn_load_multiple_1, nds_pn_load_multiple_2, nds_pn_load_multiple_3,\ ++ nds_pn_load_multiple_4, nds_pn_load_multiple_5, nds_pn_load_multiple_6,\ ++ nds_pn_load_multiple_7, nds_pn_load_multiple_8, nds_pn_load_multiple_12" ++ "nds_pn_shift, nds_pn_mul, nds_pn_mac" ++ "nds32_pn_last_load_to_e2_p" ++) ++ ++;; LMW(N, N - 1) -> ADDR_IN ++(define_bypass 3 ++ "nds_pn_load_multiple_1, nds_pn_load_multiple_2, nds_pn_load_multiple_3,\ ++ nds_pn_load_multiple_4, nds_pn_load_multiple_5, nds_pn_load_multiple_6,\ ++ nds_pn_load_multiple_7, nds_pn_load_multiple_8, nds_pn_load_multiple_12" ++ "nds_pn_load_full_word, nds_pn_load_partial_word, nds_pn_store,\ ++ nds_pn_load_multiple_1, nds_pn_load_multiple_2, nds_pn_load_multiple_3,\ ++ nds_pn_load_multiple_4, nds_pn_load_multiple_5, nds_pn_load_multiple_6,\ ++ nds_pn_load_multiple_7, nds_pn_load_multiple_8, nds_pn_load_multiple_12,\ ++ nds_pn_store_multiple_1, nds_pn_store_multiple_2, nds_pn_store_multiple_3,\ ++ nds_pn_store_multiple_4, nds_pn_store_multiple_5, nds_pn_store_multiple_6,\ ++ nds_pn_store_multiple_7, nds_pn_store_multiple_8, nds_pn_store_multiple_12" ++ "nds32_pn_last_two_load_to_e1_p" ++) ++ ++;; LMW(N, N - 2) -> ADDR_IN ++(define_bypass 2 ++ "nds_pn_load_multiple_1, nds_pn_load_multiple_2, nds_pn_load_multiple_3,\ ++ nds_pn_load_multiple_4, nds_pn_load_multiple_5, nds_pn_load_multiple_6,\ ++ nds_pn_load_multiple_7, nds_pn_load_multiple_8, nds_pn_load_multiple_12" ++ "nds_pn_load_full_word, nds_pn_load_partial_word, nds_pn_store,\ ++ nds_pn_load_multiple_1, nds_pn_load_multiple_2, nds_pn_load_multiple_3,\ ++ nds_pn_load_multiple_4, nds_pn_load_multiple_5, nds_pn_load_multiple_6,\ ++ nds_pn_load_multiple_7, nds_pn_load_multiple_8, nds_pn_load_multiple_12,\ ++ nds_pn_store_multiple_1, nds_pn_store_multiple_2, nds_pn_store_multiple_3,\ ++ nds_pn_store_multiple_4, nds_pn_store_multiple_5, nds_pn_store_multiple_6,\ ++ nds_pn_store_multiple_7, nds_pn_store_multiple_8, nds_pn_store_multiple_12" ++ "nds32_pn_last_three_load_to_e1_p" ++) ++ ++;; LMW(N, N - 1) -> SHIFT, MUL, MAC_RaRb ++(define_bypass 2 ++ "nds_pn_load_multiple_1, nds_pn_load_multiple_2, nds_pn_load_multiple_3,\ ++ nds_pn_load_multiple_4, nds_pn_load_multiple_5, nds_pn_load_multiple_6,\ ++ nds_pn_load_multiple_7, nds_pn_load_multiple_8, nds_pn_load_multiple_12" ++ "nds_pn_shift, nds_pn_mul, nds_pn_mac" ++ "nds32_pn_last_two_load_to_e2_p" ++) ++ ++;; LMW(N, N) -> ALU, MOVD44, BR_COND ++(define_bypass 2 ++ "nds_pn_load_multiple_1, nds_pn_load_multiple_2, nds_pn_load_multiple_3,\ ++ nds_pn_load_multiple_4, nds_pn_load_multiple_5, nds_pn_load_multiple_6,\ ++ nds_pn_load_multiple_7, nds_pn_load_multiple_8, nds_pn_load_multiple_12" ++ "nds_pn_alu, nds_pn_movd44, nds_pn_branch,\ ++ nds_pn_store,\ ++ nds_pn_store_multiple_1, nds_pn_store_multiple_2, nds_pn_store_multiple_3,\ ++ nds_pn_store_multiple_4, nds_pn_store_multiple_5, nds_pn_store_multiple_6,\ ++ nds_pn_store_multiple_7, nds_pn_store_multiple_8, nds_pn_store_multiple_12" ++ "nds32_pn_last_load_to_e3_p" ++) +diff --git a/gcc/config/nds32/nds32-peephole2.md b/gcc/config/nds32/nds32-peephole2.md +index 07e3a2b..bb47385 100644 +--- a/gcc/config/nds32/nds32-peephole2.md ++++ b/gcc/config/nds32/nds32-peephole2.md +@@ -19,6 +19,197 @@ + ;; <http://www.gnu.org/licenses/>. + + +-;; Use define_peephole2 to handle possible target-specific optimization. ++;; Use define_split, define_peephole, and define_peephole2 to ++;; handle possible target-specific optimization in this file. + + ;; ------------------------------------------------------------------------ ++;; Try to utilize 16-bit instruction by swap operand if possible. ++;; ------------------------------------------------------------------------ ++ ++;; Try to make add as add45. ++(define_peephole2 ++ [(set (match_operand:QIHISI 0 "register_operand" "") ++ (plus:QIHISI (match_operand:QIHISI 1 "register_operand" "") ++ (match_operand:QIHISI 2 "register_operand" "")))] ++ "reload_completed ++ && TARGET_16_BIT ++ && REGNO (operands[0]) == REGNO (operands[2]) ++ && REGNO (operands[0]) != REGNO (operands[1]) ++ && TEST_HARD_REG_BIT (reg_class_contents[MIDDLE_REGS], REGNO (operands[0]))" ++ [(set (match_dup 0) (plus:QIHISI (match_dup 2) (match_dup 1)))]) ++ ++;; Try to make xor/ior/and/mult as xor33/ior33/and33/mult33. ++(define_peephole2 ++ [(set (match_operand:SI 0 "register_operand" "") ++ (match_operator:SI 1 "nds32_have_33_inst_operator" ++ [(match_operand:SI 2 "register_operand" "") ++ (match_operand:SI 3 "register_operand" "")]))] ++ "reload_completed ++ && TARGET_16_BIT ++ && REGNO (operands[0]) == REGNO (operands[3]) ++ && REGNO (operands[0]) != REGNO (operands[2]) ++ && TEST_HARD_REG_BIT (reg_class_contents[LOW_REGS], REGNO (operands[0])) ++ && TEST_HARD_REG_BIT (reg_class_contents[LOW_REGS], REGNO (operands[2]))" ++ [(set (match_dup 0) (match_op_dup 1 [(match_dup 3) (match_dup 2)]))]) ++ ++(define_peephole ++ [(set (match_operand:SI 0 "register_operand" "") ++ (match_operand:SI 1 "register_operand" "")) ++ (set (match_operand:SI 2 "register_operand" "") ++ (match_operand:SI 3 "register_operand" ""))] ++ "TARGET_16_BIT ++ && !TARGET_ISA_V2 ++ && NDS32_IS_GPR_REGNUM (REGNO (operands[0])) ++ && NDS32_IS_GPR_REGNUM (REGNO (operands[1])) ++ && ((REGNO (operands[0]) & 0x1) == 0) ++ && ((REGNO (operands[1]) & 0x1) == 0) ++ && (REGNO (operands[0]) + 1) == REGNO (operands[2]) ++ && (REGNO (operands[1]) + 1) == REGNO (operands[3])" ++ "movd44\t%0, %1" ++ [(set_attr "type" "alu") ++ (set_attr "length" "2")]) ++ ++;; Merge two fcpyss to fcpysd. ++(define_peephole2 ++ [(set (match_operand:SF 0 "float_even_register_operand" "") ++ (match_operand:SF 1 "float_even_register_operand" "")) ++ (set (match_operand:SF 2 "float_odd_register_operand" "") ++ (match_operand:SF 3 "float_odd_register_operand" ""))] ++ "(TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE) ++ && REGNO (operands[0]) == REGNO (operands[2]) - 1 ++ && REGNO (operands[1]) == REGNO (operands[3]) - 1" ++ [(set (match_dup 4) (match_dup 5))] ++ { ++ operands[4] = gen_rtx_REG (DFmode, REGNO (operands[0])); ++ operands[5] = gen_rtx_REG (DFmode, REGNO (operands[1])); ++ }) ++ ++(define_peephole2 ++ [(set (match_operand:SF 0 "float_odd_register_operand" "") ++ (match_operand:SF 1 "float_odd_register_operand" "")) ++ (set (match_operand:SF 2 "float_even_register_operand" "") ++ (match_operand:SF 3 "float_even_register_operand" ""))] ++ "(TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE) ++ && REGNO (operands[2]) == REGNO (operands[0]) - 1 ++ && REGNO (operands[3]) == REGNO (operands[1]) - 1" ++ [(set (match_dup 4) (match_dup 5))] ++ { ++ operands[4] = gen_rtx_REG (DFmode, REGNO (operands[2])); ++ operands[5] = gen_rtx_REG (DFmode, REGNO (operands[3])); ++ }) ++ ++;; Merge two flsi to fldi. ++(define_peephole2 ++ [(set (match_operand:SF 0 "float_even_register_operand" "") ++ (match_operand:SF 1 "memory_operand" "")) ++ (set (match_operand:SF 2 "float_odd_register_operand" "") ++ (match_operand:SF 3 "memory_operand" ""))] ++ "REGNO (operands[0]) == REGNO (operands[2]) - 1 ++ && nds32_memory_merge_peep_p (operands[3], operands[1])" ++ [(set (match_dup 0) (match_dup 1))] ++{ ++ operands[1] = widen_memory_access (operands[3], DFmode, 0); ++ operands[0] = gen_rtx_REG (DFmode, REGNO (operands[0])); ++}) ++ ++(define_peephole2 ++ [(set (match_operand:SF 0 "float_odd_register_operand" "") ++ (match_operand:SF 1 "memory_operand" "")) ++ (set (match_operand:SF 2 "float_even_register_operand" "") ++ (match_operand:SF 3 "memory_operand" ""))] ++ "REGNO (operands[2]) == REGNO (operands[0]) - 1 ++ && nds32_memory_merge_peep_p (operands[1], operands[3])" ++ [(set (match_dup 0) (match_dup 1))] ++{ ++ operands[1] = widen_memory_access (operands[1], DFmode, 0); ++ operands[0] = gen_rtx_REG (DFmode, REGNO (operands[2])); ++}) ++ ++;; Merge two fssi to fsdi. ++(define_peephole2 ++ [(set (match_operand:SF 0 "memory_operand" "") ++ (match_operand:SF 1 "float_even_register_operand" "")) ++ (set (match_operand:SF 2 "memory_operand" "") ++ (match_operand:SF 3 "float_odd_register_operand" ""))] ++ "REGNO (operands[1]) == REGNO (operands[3]) - 1 ++ && nds32_memory_merge_peep_p (operands[2], operands[0])" ++ [(set (match_dup 0) (match_dup 1))] ++{ ++ operands[0] = widen_memory_access (operands[2], DFmode, 0); ++ operands[1] = gen_rtx_REG (DFmode, REGNO (operands[1])); ++}) ++ ++(define_peephole2 ++ [(set (match_operand:SF 0 "memory_operand" "") ++ (match_operand:SF 1 "float_odd_register_operand" "")) ++ (set (match_operand:SF 2 "memory_operand" "") ++ (match_operand:SF 3 "float_even_register_operand" ""))] ++ "REGNO (operands[3]) == REGNO (operands[1]) - 1 ++ && nds32_memory_merge_peep_p (operands[0], operands[2])" ++ [(set (match_dup 0) (match_dup 1))] ++{ ++ operands[0] = widen_memory_access (operands[0], DFmode, 0); ++ operands[1] = gen_rtx_REG (DFmode, REGNO (operands[3])); ++}) ++ ++;; ------------------------------------------------------------------------ ++;; GCC will prefer [u]divmodsi3 rather than [u]divsi3 even remainder is ++;; unused, so we use split to drop mod operation for lower register pressure. ++ ++(define_split ++ [(set (match_operand:SI 0 "register_operand") ++ (div:SI (match_operand:SI 1 "register_operand") ++ (match_operand:SI 2 "register_operand"))) ++ (set (match_operand:SI 3 "register_operand") ++ (mod:SI (match_dup 1) (match_dup 2)))] ++ "find_regno_note (insn, REG_UNUSED, REGNO (operands[3])) != NULL ++ && can_create_pseudo_p ()" ++ [(set (match_dup 0) ++ (div:SI (match_dup 1) ++ (match_dup 2)))]) ++ ++(define_split ++ [(set (match_operand:SI 0 "register_operand") ++ (udiv:SI (match_operand:SI 1 "register_operand") ++ (match_operand:SI 2 "register_operand"))) ++ (set (match_operand:SI 3 "register_operand") ++ (umod:SI (match_dup 1) (match_dup 2)))] ++ "find_regno_note (insn, REG_UNUSED, REGNO (operands[3])) != NULL ++ && can_create_pseudo_p ()" ++ [(set (match_dup 0) ++ (udiv:SI (match_dup 1) ++ (match_dup 2)))]) ++ ++(define_peephole2 ++ [(set (match_operand:DI 0 "register_operand") ++ (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand")) ++ (sign_extend:DI (match_operand:SI 2 "register_operand"))))] ++ "NDS32_EXT_DSP_P () ++ && peep2_regno_dead_p (1, WORDS_BIG_ENDIAN ? REGNO (operands[0]) + 1 : REGNO (operands[0]))" ++ [(const_int 1)] ++{ ++ rtx highpart = nds32_di_high_part_subreg (operands[0]); ++ emit_insn (gen_smulsi3_highpart (highpart, operands[1], operands[2])); ++ DONE; ++}) ++ ++(define_split ++ [(set (match_operand:DI 0 "nds32_general_register_operand" "") ++ (match_operand:DI 1 "nds32_general_register_operand" ""))] ++ "find_regno_note (insn, REG_UNUSED, REGNO (operands[0])) != NULL ++ || find_regno_note (insn, REG_UNUSED, REGNO (operands[0]) + 1) != NULL" ++ [(set (match_dup 0) (match_dup 1))] ++{ ++ rtx dead_note = find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0])); ++ HOST_WIDE_INT offset; ++ if (dead_note == NULL_RTX) ++ offset = 0; ++ else ++ offset = 4; ++ operands[0] = simplify_gen_subreg ( ++ SImode, operands[0], ++ DImode, offset); ++ operands[1] = simplify_gen_subreg ( ++ SImode, operands[1], ++ DImode, offset); ++}) +diff --git a/gcc/config/nds32/nds32-pipelines-auxiliary.c b/gcc/config/nds32/nds32-pipelines-auxiliary.c +index a396fff..903a2ed 100644 +--- a/gcc/config/nds32/nds32-pipelines-auxiliary.c ++++ b/gcc/config/nds32/nds32-pipelines-auxiliary.c +@@ -21,14 +21,2638 @@ + + /* ------------------------------------------------------------------------ */ + ++#include <set> + #include "config.h" + #include "system.h" + #include "coretypes.h" + #include "backend.h" ++#include "tree.h" ++#include "rtl.h" ++#include "df.h" ++#include "alias.h" ++#include "stor-layout.h" ++#include "varasm.h" ++#include "calls.h" ++#include "regs.h" ++#include "insn-config.h" /* Required by recog.h. */ ++#include "conditions.h" ++#include "output.h" ++#include "insn-attr.h" /* For DFA state_t. */ ++#include "insn-codes.h" /* For CODE_FOR_xxx. */ ++#include "reload.h" /* For push_reload(). */ ++#include "flags.h" ++#include "insn-config.h" ++#include "expmed.h" ++#include "dojump.h" ++#include "explow.h" ++#include "emit-rtl.h" ++#include "stmt.h" ++#include "expr.h" ++#include "recog.h" ++#include "diagnostic-core.h" ++#include "cfgrtl.h" ++#include "cfganal.h" ++#include "lcm.h" ++#include "cfgbuild.h" ++#include "cfgcleanup.h" ++#include "tm_p.h" ++#include "tm-constrs.h" ++#include "optabs.h" /* For GEN_FCN. */ ++#include "target.h" ++#include "langhooks.h" /* For add_builtin_function(). */ ++#include "builtins.h" ++#include "tree-pass.h" + + /* ------------------------------------------------------------------------ */ + +-/* This file is prepared for future implementation of precise +- pipeline description for nds32 target. */ ++namespace nds32 { ++namespace scheduling { ++ ++/* Classify the memory access direction. It's unknown if the offset register ++ is not a constant value. */ ++enum memory_access_direction ++{ ++ MEM_ACCESS_DIR_POS, ++ MEM_ACCESS_DIR_NEG, ++ MEM_ACCESS_DIR_UNKNOWN ++}; ++ ++/* This class provides some wrappers of the DFA scheduler. Due to the design ++ drawback of the DFA scheduler, creating two instances at the same time is ++ now allowed. Use the loosest relationship such as 'dependency' instead of ++ 'aggregation' or 'composition' can minimize this issue. */ ++class pipeline_simulator ++{ ++public: ++ pipeline_simulator (); ++ ~pipeline_simulator (); ++ ++ void advance_cycle (int cycles = 1); ++ int query_latency (rtx_insn *producer, rtx_insn *consumer) const; ++ int issue_insn (rtx_insn *insn); ++ int force_issue_insn (rtx_insn *insn); ++ ++private: ++ static int gcc_dfa_initialized_; ++ state_t state_; ++}; ++ ++/* Insert pseudo NOPs so that we can see stall cycles caused by structural or ++ data hazards in the assembly code. The design of this class is similar to ++ the 'template method' pattern, but we don't need to maintain multiple ++ customized algorithms at the same time. Hence this class has no virtual ++ functions providing further customizations. */ ++class stall_inserter ++{ ++private: ++ enum dep_type { RES_DEP, DATA_DEP }; ++ ++public: ++ void insert_stalls (); ++ ++private: ++ static rtx emit_pseudo_nop_before (rtx_insn *insn, int cycles, enum dep_type type); ++ ++ void insert_structural_hazard_stalls (); ++ void insert_data_hazard_stalls (); ++ void emit_pseudo_nops_for_data_hazards (rtx_insn *insn, ++ pipeline_simulator &simulator); ++}; ++ ++class pass_nds32_print_stalls : public rtl_opt_pass ++{ ++public: ++ pass_nds32_print_stalls (gcc::context *ctxt); ++ ++ bool gate (function *); ++ unsigned int execute (function *); ++}; ++ ++int pipeline_simulator::gcc_dfa_initialized_ = 0; ++ ++const pass_data pass_data_nds32_print_stalls = ++{ ++ RTL_PASS, /* type */ ++ "print_stalls", /* name */ ++ OPTGROUP_NONE, /* optinfo_flags */ ++ TV_MACH_DEP, /* tv_id */ ++ 0, /* properties_required */ ++ 0, /* properties_provided */ ++ 0, /* properties_destroyed */ ++ 0, /* todo_flags_start */ ++ 0 /* todo_flags_finish */ ++}; ++ ++rtl_opt_pass * ++make_pass_nds32_print_stalls (gcc::context *ctxt) ++{ ++ return new pass_nds32_print_stalls (ctxt); ++} ++ ++/* A safe wrapper to the function reg_overlap_mentioned_p (). */ ++bool ++reg_overlap_p (rtx x, rtx in) ++{ ++ if (x == NULL_RTX || in == NULL_RTX) ++ return false; ++ ++ return static_cast <bool> (reg_overlap_mentioned_p (x, in)); ++} ++ ++/* Calculate the cycle distance between two insns in pipeline view. ++ Hence each insn can be treated as one cycle. ++ TODO: multi-cycle insns should be handled ++ specially, but we haven't done it here. */ ++int ++cycle_distance (rtx_insn *from, rtx_insn *to) ++{ ++ int count = 1; ++ ++ for (from = NEXT_INSN (from); from && from != to; from = NEXT_INSN (from)) ++ { ++ if (!insn_executable_p (from)) ++ continue; ++ ++ if (insn_pseudo_nop_p (from)) ++ count += INTVAL (XVECEXP (PATTERN (from), 0, 0)); ++ else ++ ++count; ++ } ++ ++ return count; ++} ++ ++/* Determine the memory access direction of a load/store insn. */ ++memory_access_direction ++determine_access_direction (rtx_insn *insn) ++{ ++ int post_update_rtx_index; ++ rtx plus_rtx; ++ rtx mem_rtx; ++ rtx offset_rtx; ++ ++ switch (get_attr_type (insn)) ++ { ++ case TYPE_LOAD_MULTIPLE: ++ gcc_assert (parallel_elements (insn) >= 2); ++ ++ post_update_rtx_index = find_post_update_rtx (insn); ++ if (post_update_rtx_index != -1) ++ plus_rtx = SET_SRC (parallel_element (insn, post_update_rtx_index)); ++ else ++ { ++ /* (parallel ++ [(set (reg) (mem (reg))) : index 0 ++ (set (reg) (mem (plus (reg) (...)))) : index 1 ++ ...]) */ ++ mem_rtx = SET_SRC (parallel_element (insn, 1)); ++ if (GET_CODE (mem_rtx) == UNSPEC) ++ mem_rtx = XVECEXP (mem_rtx, 0, 0); ++ gcc_assert (MEM_P (mem_rtx)); ++ plus_rtx = XEXP (mem_rtx, 0); ++ } ++ break; ++ ++ case TYPE_STORE_MULTIPLE: ++ gcc_assert (parallel_elements (insn) >= 2); ++ ++ post_update_rtx_index = find_post_update_rtx (insn); ++ if (post_update_rtx_index != -1) ++ plus_rtx = SET_SRC (parallel_element (insn, post_update_rtx_index)); ++ else ++ { ++ /* (parallel ++ [(set (mem (reg)) (reg)) : index 0 ++ (set (mem (plus (reg) (...))) (reg)) : index 1 ++ ...]) */ ++ mem_rtx = SET_DEST (parallel_element (insn, 1)); ++ if (GET_CODE (mem_rtx) == UNSPEC) ++ mem_rtx = XVECEXP (mem_rtx, 0, 0); ++ gcc_assert (MEM_P (mem_rtx)); ++ plus_rtx = XEXP (mem_rtx, 0); ++ } ++ break; ++ ++ case TYPE_LOAD: ++ case TYPE_STORE: ++ mem_rtx = extract_mem_rtx (insn); ++ ++ switch (GET_CODE (XEXP (mem_rtx, 0))) ++ { ++ case POST_INC: ++ /* (mem (post_inc (...))) */ ++ return MEM_ACCESS_DIR_POS; ++ ++ case POST_DEC: ++ /* (mem (post_dec (...))) */ ++ return MEM_ACCESS_DIR_NEG; ++ ++ case PLUS: ++ /* (mem (plus (reg) (...))) */ ++ plus_rtx = XEXP (mem_rtx, 0); ++ break; ++ ++ case POST_MODIFY: ++ /* (mem (post_modify (reg) (plus (reg) (...)))) */ ++ plus_rtx = XEXP (XEXP (mem_rtx, 0), 1); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ gcc_assert (GET_CODE (plus_rtx) == PLUS); ++ ++ offset_rtx = XEXP (plus_rtx, 1); ++ if (GET_CODE (offset_rtx) == CONST_INT) ++ { ++ if (INTVAL (offset_rtx) < 0) ++ return MEM_ACCESS_DIR_NEG; ++ else ++ return MEM_ACCESS_DIR_POS; ++ } ++ ++ return MEM_ACCESS_DIR_UNKNOWN; ++} ++ ++/* Return the nth load/store operation in the real micro-operation ++ accessing order. */ ++rtx ++extract_nth_access_rtx (rtx_insn *insn, int n) ++{ ++ int n_elems = parallel_elements (insn); ++ int post_update_rtx_index = find_post_update_rtx (insn); ++ memory_access_direction direction = determine_access_direction (insn); ++ ++ gcc_assert (direction != MEM_ACCESS_DIR_UNKNOWN); ++ ++ /* Reverse the order if the direction negative. */ ++ if (direction == MEM_ACCESS_DIR_NEG) ++ n = -1 * n - 1; ++ ++ if (post_update_rtx_index != -1) ++ { ++ if (n >= 0 && post_update_rtx_index <= n) ++ ++n; ++ else if (n < 0 && post_update_rtx_index >= n + n_elems) ++ --n; ++ } ++ ++ return parallel_element (insn, n); ++} ++ ++/* Returns the register operated by the nth load/store operation in the real ++ micro-operation accessing order. This function assumes INSN must be a ++ multiple-word load/store insn. */ ++rtx ++extract_nth_lmsw_access_reg (rtx_insn *insn, int n) ++{ ++ rtx nth_rtx = extract_nth_access_rtx (insn, n); ++ ++ if (nth_rtx == NULL_RTX) ++ return NULL_RTX; ++ ++ switch (get_attr_type (insn)) ++ { ++ case TYPE_LOAD_MULTIPLE: ++ return SET_DEST (nth_rtx); ++ ++ case TYPE_STORE_MULTIPLE: ++ return SET_SRC (nth_rtx); ++ ++ default: ++ gcc_unreachable (); ++ } ++} ++ ++/* Returns the register operated by the nth load/store operation in the real ++ micro-operation accessing order. This function assumes INSN must be a ++ double-word load/store insn. */ ++rtx ++extract_nth_ls2_access_reg (rtx_insn *insn, int n) ++{ ++ rtx reg; ++ enum machine_mode mode; ++ ++ if (post_update_insn_p (insn)) ++ { ++ memory_access_direction direction = determine_access_direction (insn); ++ gcc_assert (direction != MEM_ACCESS_DIR_UNKNOWN); ++ ++ /* Reverse the order if the direction negative. */ ++ if (direction == MEM_ACCESS_DIR_NEG) ++ n = -1 * n - 1; ++ } ++ ++ /* Handle the out-of-range case. */ ++ if (n < -2 || n > 1) ++ return NULL_RTX; ++ ++ /* Convert the index to a positive one. */ ++ if (n < 0) ++ n = 2 + n; ++ ++ switch (get_attr_type (insn)) ++ { ++ case TYPE_LOAD: ++ reg = SET_DEST (PATTERN (insn)); ++ break; ++ ++ case TYPE_STORE: ++ reg = SET_SRC (PATTERN (insn)); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ gcc_assert (REG_P (reg) || GET_CODE (reg) == SUBREG); ++ ++ switch (GET_MODE (reg)) ++ { ++ case DImode: ++ mode = SImode; ++ break; ++ ++ case DFmode: ++ mode = SFmode; ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ if (n == 0) ++ return gen_lowpart (mode, reg); ++ else ++ return gen_highpart (mode, reg); ++} ++ ++/* Returns the register operated by the nth load/store operation in the real ++ micro-operation accessing order. */ ++rtx ++extract_nth_access_reg (rtx_insn *insn, int index) ++{ ++ switch (GET_CODE (PATTERN (insn))) ++ { ++ case PARALLEL: ++ return extract_nth_lmsw_access_reg (insn, index); ++ ++ case SET: ++ return extract_nth_ls2_access_reg (insn, index); ++ ++ default: ++ gcc_unreachable (); ++ } ++} ++ ++/* Determine if the latency is occured when the consumer PBSADA_INSN uses the ++ value of DEF_REG in its Ra or Rb fields. */ ++bool ++pbsada_insn_ra_rb_dep_reg_p (rtx pbsada_insn, rtx def_reg) ++{ ++ rtx unspec_rtx = SET_SRC (PATTERN (pbsada_insn)); ++ gcc_assert (GET_CODE (unspec_rtx) == UNSPEC); ++ ++ rtx pbsada_ra = XVECEXP (unspec_rtx, 0, 0); ++ rtx pbsada_rb = XVECEXP (unspec_rtx, 0, 1); ++ ++ if (rtx_equal_p (def_reg, pbsada_ra) ++ || rtx_equal_p (def_reg, pbsada_rb)) ++ return true; ++ ++ return false; ++} ++ ++/* Determine if the latency is occured when the consumer PBSADA_INSN uses the ++ value of DEF_REG in its Rt field. */ ++bool ++pbsada_insn_rt_dep_reg_p (rtx pbsada_insn, rtx def_reg) ++{ ++ rtx pbsada_rt = SET_DEST (PATTERN (pbsada_insn)); ++ ++ if (rtx_equal_p (def_reg, pbsada_rt)) ++ return true; ++ ++ return false; ++} ++ ++/* Check if INSN is a movd44 insn consuming DEF_REG. */ ++bool ++movd44_even_dep_p (rtx_insn *insn, rtx def_reg) ++{ ++ if (!movd44_insn_p (insn)) ++ return false; ++ ++ rtx use_rtx = SET_SRC (PATTERN (insn)); ++ ++ if (REG_P (def_reg)) ++ { ++ return rtx_equal_p (def_reg, use_rtx); ++ } ++ else if (GET_CODE (def_reg) == SUBREG ++ && GET_MODE (def_reg) == SImode ++ && rtx_equal_p (SUBREG_REG (def_reg), use_rtx)) ++ { ++ if (TARGET_BIG_ENDIAN && SUBREG_BYTE (def_reg) == 4) ++ return true; ++ ++ if (!TARGET_BIG_ENDIAN && SUBREG_BYTE (def_reg) == 0) ++ return true; ++ ++ return false; ++ } ++ ++ return false; ++} ++ ++/* Check if INSN is a wext insn consuming DEF_REG. */ ++bool ++wext_odd_dep_p (rtx insn, rtx def_reg) ++{ ++ rtx shift_rtx = XEXP (SET_SRC (PATTERN (insn)), 0); ++ rtx use_reg = XEXP (shift_rtx, 0); ++ rtx pos_rtx = XEXP (shift_rtx, 1); ++ ++ if (REG_P (pos_rtx) && reg_overlap_p (def_reg, pos_rtx)) ++ return true; ++ ++ if (GET_MODE (def_reg) == DImode) ++ return reg_overlap_p (def_reg, use_reg); ++ ++ gcc_assert (REG_P (def_reg) || GET_CODE (def_reg) == SUBREG); ++ gcc_assert (REG_P (use_reg)); ++ ++ if (REG_P (def_reg)) ++ { ++ if (!TARGET_BIG_ENDIAN) ++ return REGNO (def_reg) == REGNO (use_reg) + 1; ++ else ++ return REGNO (def_reg) == REGNO (use_reg); ++ } ++ ++ if (GET_CODE (def_reg) == SUBREG) ++ { ++ if (!reg_overlap_p (def_reg, use_reg)) ++ return false; ++ ++ if (!TARGET_BIG_ENDIAN) ++ return SUBREG_BYTE (def_reg) == 4; ++ else ++ return SUBREG_BYTE (def_reg) == 0; ++ } ++ ++ return false; ++} ++ ++/* Check if INSN is a bpick insn consuming DEF_REG. */ ++bool ++bpick_ra_rb_dep_p (rtx insn, rtx def_reg) ++{ ++ rtx ior_rtx = SET_SRC (PATTERN (insn)); ++ rtx and1_rtx = XEXP (ior_rtx, 0); ++ rtx and2_rtx = XEXP (ior_rtx, 1); ++ rtx reg1_0 = XEXP (and1_rtx, 0); ++ rtx reg1_1 = XEXP (and1_rtx, 1); ++ rtx reg2_0 = XEXP (and2_rtx, 0); ++ rtx reg2_1 = XEXP (and2_rtx, 1); ++ ++ if (GET_CODE (reg1_0) == NOT) ++ { ++ if (rtx_equal_p (reg1_0, reg2_0)) ++ return reg_overlap_p (def_reg, reg1_1) ++ || reg_overlap_p (def_reg, reg2_1); ++ ++ if (rtx_equal_p (reg1_0, reg2_1)) ++ return reg_overlap_p (def_reg, reg1_1) ++ || reg_overlap_p (def_reg, reg2_0); ++ } ++ ++ if (GET_CODE (reg1_1) == NOT) ++ { ++ if (rtx_equal_p (reg1_1, reg2_0)) ++ return reg_overlap_p (def_reg, reg1_0) ++ || reg_overlap_p (def_reg, reg2_1); ++ ++ if (rtx_equal_p (reg1_1, reg2_1)) ++ return reg_overlap_p (def_reg, reg1_0) ++ || reg_overlap_p (def_reg, reg2_0); ++ } ++ ++ if (GET_CODE (reg2_0) == NOT) ++ { ++ if (rtx_equal_p (reg2_0, reg1_0)) ++ return reg_overlap_p (def_reg, reg2_1) ++ || reg_overlap_p (def_reg, reg1_1); ++ ++ if (rtx_equal_p (reg2_0, reg1_1)) ++ return reg_overlap_p (def_reg, reg2_1) ++ || reg_overlap_p (def_reg, reg1_0); ++ } ++ ++ if (GET_CODE (reg2_1) == NOT) ++ { ++ if (rtx_equal_p (reg2_1, reg1_0)) ++ return reg_overlap_p (def_reg, reg2_0) ++ || reg_overlap_p (def_reg, reg1_1); ++ ++ if (rtx_equal_p (reg2_1, reg1_1)) ++ return reg_overlap_p (def_reg, reg2_0) ++ || reg_overlap_p (def_reg, reg1_0); ++ } ++ ++ gcc_unreachable (); ++} ++ ++pipeline_simulator::pipeline_simulator () ++{ ++ /* The design of dfa_start () operates on static global variables and ++ allocates memory space without checking whether the function is called ++ twice or not. We add some guards in order to protect it from abusing. */ ++ if (!gcc_dfa_initialized_++) ++ dfa_start (); ++ ++ state_ = xmalloc (state_size()); ++ state_reset (state_); ++} ++ ++pipeline_simulator::~pipeline_simulator () ++{ ++ /* The design of dfa_finish () operates on a static global variable and ++ deallocates memory space without checking whether the function is called ++ twice or not. We add some guards in order to protect it from abusing. */ ++ free (state_); ++ ++ gcc_assert(gcc_dfa_initialized_ > 0); ++ if (!--gcc_dfa_initialized_) ++ dfa_finish (); ++} ++ ++void ++pipeline_simulator::advance_cycle (int cycles) ++{ ++ gcc_assert (cycles > 0); ++ ++ /* The second argument was 'NULL', but we found the expression is directly ++ written in insn-automata.c: ++ if (insn == 0) ++ insn_code = DFA__ADVANCE_CYCLE; ++ Hence we change it to '0' in order to make it consistent. */ ++ while (cycles--) ++ state_transition (state_, 0); ++} ++ ++/* A wrapper of insn_latency () provided by the insn-attr.h in the object tree. ++ See that file for more information. */ ++int ++pipeline_simulator::query_latency (rtx_insn *producer, rtx_insn *consumer) const ++{ ++ return insn_latency (producer, consumer); ++} ++ ++/* Return 0 or negative if we can issue INSN at the current cycle. Otherwise, ++ return a postive value indicates how many cycles we have to wait. The ++ interface is consistent with state_transition () provided by insn-attr.h ++ in the object directory. See that file for more information. */ ++int ++pipeline_simulator::issue_insn (rtx_insn *insn) ++{ ++ int stalls; ++ ++ /* Skip cycles specified by pseudo NOPs. */ ++ if (insn_pseudo_nop_p (insn)) ++ { ++ int nop_stalls = INTVAL (XVECEXP (PATTERN (insn), 0, 0)); ++ ++ gcc_assert (nop_stalls > 0); ++ advance_cycle (nop_stalls); ++ stalls = -1; ++ } ++ else ++ { ++ stalls = state_transition (state_, insn); ++ ++ /* All targets are single-issue, so we advance one cycle once after ++ an insn has been issued successfully. */ ++ if (stalls <= 0) ++ advance_cycle (); ++ } ++ ++ return stalls; ++} ++ ++/* This function is similar to issue_insn (), but it advances cycles until INSN ++ can be issued successfully. If INSN can be issued at the current cycle, the ++ return value will be 0 or negaitive. Otherwise, the function will return ++ the cycles it has been skipped. */ ++int ++pipeline_simulator::force_issue_insn (rtx_insn *insn) ++{ ++ int stalls; ++ ++ stalls = issue_insn (insn); ++ ++ /* Skip cycles until we can issue the insn. */ ++ if (stalls > 0) ++ { ++ advance_cycle (stalls); ++ issue_insn (insn); ++ } ++ ++ return stalls; ++} ++ ++/* The main flow of the class STALL_INSERTER. We insert NOPs for structural ++ hazards because self-stalled instructions also consume the delay cycles ++ caused by data hazards. */ ++void ++stall_inserter::insert_stalls () ++{ ++ compute_bb_for_insn_safe (); ++ ++ insert_structural_hazard_stalls (); ++ insert_data_hazard_stalls (); ++ ++ /* We have to call the following two functions again after we inserting ++ some insns after it has been invoked. Otherwise, an assert expression ++ in final () will be triggered and cause to an internal compiler error. */ ++ init_insn_lengths (); ++ shorten_branches (get_insns ()); ++ ++ free_bb_for_insn (); ++} ++ ++/* A helper function inserting NOPs. CYCLES indicates how many cycles the NOP ++ insn consumes. TYPE indicates what type of the NOP insn we want to insert; ++ now there are two types available: RES_DEP and DATA_DEP. */ ++rtx ++stall_inserter::emit_pseudo_nop_before ( ++ rtx_insn *insn, int cycles, enum dep_type type) ++{ ++ rtx nop_pattern; ++ rtx_insn *nop_insn; ++ int recog; ++ ++ switch (type) ++ { ++ case RES_DEP: ++ nop_pattern = gen_nop_res_dep (GEN_INT (cycles)); ++ break; ++ case DATA_DEP: ++ nop_pattern = gen_nop_data_dep (GEN_INT (cycles)); ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ ++ nop_insn = emit_insn_before (nop_pattern, insn); ++ recog = recog_memoized (nop_insn); ++ gcc_assert(recog != -1); ++ ++ return nop_insn; ++} ++ ++void ++stall_inserter::insert_structural_hazard_stalls () ++{ ++ pipeline_simulator simulator; ++ rtx_insn *insn; ++ ++ for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) ++ { ++ if (!insn_executable_p (insn)) continue; ++ ++ int stalls = simulator.force_issue_insn (insn); ++ ++ if (stalls > 0) ++ emit_pseudo_nop_before (insn, stalls, RES_DEP); ++ } ++} ++ ++void ++stall_inserter::insert_data_hazard_stalls () ++{ ++ pipeline_simulator simulator; ++ rtx_insn *insn; ++ ++ /* Calling to df_insn_rescan_all here is required in order to avoid crash ++ when some special options are specified by users, such as ++ -O0 -fschedule-insns2. */ ++ df_chain_add_problem (DF_DU_CHAIN); ++ df_insn_rescan_all (); ++ df_analyze (); ++ ++ for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) ++ { ++ if (!insn_executable_p (insn)) continue; ++ ++ simulator.force_issue_insn (insn); ++ emit_pseudo_nops_for_data_hazards (insn, simulator); ++ } ++ ++ /* We must call df_finish_pass manually because it should be invoked before ++ BB information is destroyed. Hence we cannot set the TODO_df_finish flag ++ to the pass manager. */ ++ df_insn_rescan_all (); ++ df_finish_pass (false); ++} ++ ++/* Traverse all insns using the results produced by INSN and ask SIMULATOR ++ how many delay cycles between them. If there are some delay cycles, insert ++ corresponding NOP insns there. */ ++void ++stall_inserter::emit_pseudo_nops_for_data_hazards ( ++ rtx_insn *insn, pipeline_simulator &simulator) ++{ ++ df_ref def; ++ df_link *link; ++ std::set<rtx> processed_insns; ++ ++ FOR_EACH_INSN_DEF (def, insn) ++ { ++ for (link = DF_REF_CHAIN (def); link; link = link->next) ++ { ++ if (!DF_REF_INSN_INFO (link->ref)) ++ continue; ++ ++ rtx_insn *use_insn = DF_REF_INSN (link->ref); ++ ++ if (!insn_executable_p (use_insn) ++ || processed_insns.count (use_insn)) ++ continue; ++ ++ int stalls = simulator.query_latency (insn, use_insn); ++ int distance = cycle_distance (insn, use_insn); ++ ++ if (stalls > distance) ++ { ++ stalls -= distance; ++ emit_pseudo_nop_before (use_insn, stalls, DATA_DEP); ++ processed_insns.insert (use_insn); ++ } ++ } ++ } ++} ++ ++pass_nds32_print_stalls::pass_nds32_print_stalls (gcc::context *ctxt) ++ : rtl_opt_pass (pass_data_nds32_print_stalls, ctxt) ++{ ++} ++ ++bool pass_nds32_print_stalls::gate (function *) ++{ ++ return TARGET_PRINT_STALLS; ++} ++ ++unsigned int ++pass_nds32_print_stalls::execute (function *) ++{ ++ stall_inserter inserter; ++ ++ inserter.insert_stalls (); ++ return 0; ++} ++ ++} // namespace scheduling ++} // namespace nds32 ++ ++/* ------------------------------------------------------------------------ */ ++ ++using namespace nds32; ++using namespace nds32::scheduling; ++ ++namespace { // anonymous namespace ++ ++/* Check the dependency between the producer defining DEF_REG and CONSUMER ++ requiring input operand at II. */ ++bool ++n7_consumed_by_ii_dep_p (rtx_insn *consumer, rtx def_reg) ++{ ++ rtx use_rtx; ++ ++ switch (get_attr_type (consumer)) ++ { ++ /* MOVD44_E */ ++ case TYPE_ALU: ++ if (movd44_even_dep_p (consumer, def_reg)) ++ return true; ++ ++ use_rtx = SET_SRC (PATTERN (consumer)); ++ break; ++ ++ case TYPE_MUL: ++ use_rtx = SET_SRC (PATTERN (consumer)); ++ break; ++ ++ case TYPE_MAC: ++ use_rtx = extract_mac_non_acc_rtx (consumer); ++ break; ++ ++ /* Some special instructions, divmodsi4 and udivmodsi4, produce two ++ results, the quotient and the remainder. It requires two micro- ++ operations in order to write two registers. We have to check the ++ dependency from the producer to the first micro-operation. */ ++ case TYPE_DIV: ++ if (divmod_p (consumer)) ++ use_rtx = SET_SRC (parallel_element (consumer, 0)); ++ else ++ use_rtx = SET_SRC (PATTERN (consumer)); ++ break; ++ ++ case TYPE_LOAD: ++ /* ADDR_IN_bi_Ra, ADDR_IN_!bi */ ++ if (post_update_insn_p (consumer)) ++ use_rtx = extract_base_reg (consumer); ++ else ++ use_rtx = extract_mem_rtx (consumer); ++ break; ++ ++ case TYPE_STORE: ++ /* ADDR_IN_bi_Ra, ADDR_IN_!bi */ ++ if (post_update_insn_p (consumer)) ++ use_rtx = extract_base_reg (consumer); ++ else ++ use_rtx = extract_mem_rtx (consumer); ++ ++ if (reg_overlap_p (def_reg, use_rtx)) ++ return true; ++ ++ /* ST_bi, ST_!bi_RI */ ++ if (!post_update_insn_p (consumer) ++ && !immed_offset_p (extract_mem_rtx (consumer))) ++ return false; ++ ++ use_rtx = SET_SRC (PATTERN (consumer)); ++ break; ++ ++ case TYPE_LOAD_MULTIPLE: ++ use_rtx = extract_base_reg (consumer); ++ break; ++ ++ case TYPE_STORE_MULTIPLE: ++ /* ADDR_IN */ ++ use_rtx = extract_base_reg (consumer); ++ if (reg_overlap_p (def_reg, use_rtx)) ++ return true; ++ ++ /* SMW (N, 1) */ ++ use_rtx = extract_nth_access_rtx (consumer, 0); ++ break; ++ ++ case TYPE_BRANCH: ++ use_rtx = PATTERN (consumer); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ if (reg_overlap_p (def_reg, use_rtx)) ++ return true; ++ ++ return false; ++} ++ ++/* Check the dependency between the producer defining DEF_REG and CONSUMER ++ requiring input operand at AG (II). */ ++bool ++n8_consumed_by_addr_in_p (rtx_insn *consumer, rtx def_reg) ++{ ++ rtx use_rtx; ++ ++ switch (get_attr_type (consumer)) ++ { ++ case TYPE_BRANCH: ++ use_rtx = extract_branch_target_rtx (consumer); ++ break; ++ ++ case TYPE_LOAD: ++ if (load_single_p (consumer)) ++ use_rtx = extract_mem_rtx (consumer); ++ else ++ use_rtx = extract_base_reg (consumer); ++ break; ++ ++ case TYPE_STORE: ++ if (store_single_p (consumer) ++ && (!post_update_insn_p (consumer) ++ || immed_offset_p (extract_mem_rtx (consumer)))) ++ use_rtx = extract_mem_rtx (consumer); ++ else ++ use_rtx = extract_base_reg (consumer); ++ break; ++ ++ case TYPE_LOAD_MULTIPLE: ++ case TYPE_STORE_MULTIPLE: ++ use_rtx = extract_base_reg (consumer); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ return reg_overlap_p (def_reg, use_rtx); ++} ++ ++/* Check the dependency between the producer defining DEF_REG and CONSUMER ++ requiring input operand at EX. */ ++bool ++n8_consumed_by_ex_p (rtx_insn *consumer, rtx def_reg) ++{ ++ rtx use_rtx; ++ ++ switch (get_attr_type (consumer)) ++ { ++ case TYPE_ALU: ++ if (movd44_even_dep_p (consumer, def_reg)) ++ return true; ++ ++ use_rtx = SET_SRC (PATTERN (consumer)); ++ break; ++ ++ case TYPE_MUL: ++ use_rtx = SET_SRC (PATTERN (consumer)); ++ break; ++ ++ case TYPE_MAC: ++ use_rtx = extract_mac_non_acc_rtx (consumer); ++ break; ++ ++ /* Some special instructions, divmodsi4 and udivmodsi4, produce two ++ results, the quotient and the remainder. It requires two micro- ++ operations in order to write two registers. We have to check the ++ dependency from the producer to the first micro-operation. */ ++ case TYPE_DIV: ++ if (divmod_p (consumer)) ++ use_rtx = SET_SRC (parallel_element (consumer, 0)); ++ else ++ use_rtx = SET_SRC (PATTERN (consumer)); ++ break; ++ ++ case TYPE_BRANCH: ++ use_rtx = extract_branch_condition_rtx (consumer); ++ break; ++ ++ case TYPE_STORE: ++ /* exclude ST_!bi_RR */ ++ if (!post_update_insn_p (consumer) ++ && !immed_offset_p (extract_mem_rtx (consumer))) ++ return false; ++ ++ use_rtx = SET_SRC (PATTERN (consumer)); ++ break; ++ ++ case TYPE_STORE_MULTIPLE: ++ use_rtx = extract_nth_access_rtx (consumer, 0); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ return reg_overlap_p (def_reg, use_rtx); ++} ++ ++/* Check the dependency between the producer defining DEF_REG and CONSUMER ++ requiring input operand at AG (II). */ ++bool ++e8_consumed_by_addr_in_p (rtx_insn *consumer, rtx def_reg) ++{ ++ return n8_consumed_by_addr_in_p (consumer, def_reg); ++} ++ ++/* Check the dependency between the producer defining DEF_REG and CONSUMER ++ requiring input operand at EX. */ ++bool ++e8_consumed_by_ex_p (rtx_insn *consumer, rtx def_reg) ++{ ++ rtx use_rtx; ++ ++ switch (get_attr_type (consumer)) ++ { ++ case TYPE_ALU: ++ case TYPE_STORE: ++ use_rtx = SET_SRC (PATTERN (consumer)); ++ break; ++ ++ case TYPE_MUL: ++ case TYPE_MAC: ++ case TYPE_DIV: ++ case TYPE_BRANCH: ++ case TYPE_STORE_MULTIPLE: ++ return n8_consumed_by_ex_p (consumer, def_reg); ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ return reg_overlap_p (def_reg, use_rtx); ++} ++ ++/* Check the dependency between the producer defining DEF_REG and CONSUMER ++ requiring input operand at EX. */ ++bool ++n9_2r1w_consumed_by_ex_dep_p (rtx_insn *consumer, rtx def_reg) ++{ ++ rtx use_rtx; ++ ++ switch (get_attr_type (consumer)) ++ { ++ case TYPE_ALU: ++ if (movd44_even_dep_p (consumer, def_reg)) ++ return true; ++ ++ use_rtx = SET_SRC (PATTERN (consumer)); ++ break; ++ ++ case TYPE_PBSAD: ++ case TYPE_MUL: ++ use_rtx = SET_SRC (PATTERN (consumer)); ++ break; ++ ++ case TYPE_ALU_SHIFT: ++ use_rtx = extract_shift_reg (consumer); ++ break; ++ ++ case TYPE_PBSADA: ++ return pbsada_insn_ra_rb_dep_reg_p (consumer, def_reg); ++ ++ case TYPE_MAC: ++ use_rtx = PATTERN (consumer); ++ break; ++ ++ case TYPE_DIV: ++ if (divmod_p (consumer)) ++ use_rtx = SET_SRC (parallel_element (consumer, 0)); ++ else ++ use_rtx = SET_SRC (PATTERN (consumer)); ++ break; ++ ++ case TYPE_MMU: ++ if (GET_CODE (PATTERN (consumer)) == SET) ++ use_rtx = SET_SRC (PATTERN (consumer)); ++ else ++ return true; ++ break; ++ ++ case TYPE_LOAD: ++ /* ADDR_IN_bi_Ra, ADDR_IN_!bi */ ++ if (post_update_insn_p (consumer)) ++ use_rtx = extract_base_reg (consumer); ++ else ++ use_rtx = extract_mem_rtx (consumer); ++ break; ++ ++ case TYPE_STORE: ++ /* ADDR_IN_bi_Ra, ADDR_IN_!bi */ ++ if (post_update_insn_p (consumer)) ++ use_rtx = extract_base_reg (consumer); ++ else ++ use_rtx = extract_mem_rtx (consumer); ++ ++ if (reg_overlap_p (def_reg, use_rtx)) ++ return true; ++ ++ /* exclude ST_!bi_RR */ ++ if (!post_update_insn_p (consumer) ++ && !immed_offset_p (extract_mem_rtx (consumer))) ++ return false; ++ ++ use_rtx = SET_SRC (PATTERN (consumer)); ++ break; ++ ++ case TYPE_LOAD_MULTIPLE: ++ use_rtx = extract_base_reg (consumer); ++ break; ++ ++ case TYPE_STORE_MULTIPLE: ++ /* ADDR_IN */ ++ use_rtx = extract_base_reg (consumer); ++ if (reg_overlap_p (def_reg, use_rtx)) ++ return true; ++ ++ /* SMW (N, 1) */ ++ use_rtx = extract_nth_access_rtx (consumer, 0); ++ break; ++ ++ case TYPE_BRANCH: ++ use_rtx = PATTERN (consumer); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ if (reg_overlap_p (def_reg, use_rtx)) ++ return true; ++ ++ return false; ++} ++ ++/* Check the dependency between the producer defining DEF_REG and CONSUMER ++ requiring input operand at EX. */ ++bool ++n9_3r2w_consumed_by_ex_dep_p (rtx_insn *consumer, rtx def_reg) ++{ ++ rtx use_rtx; ++ ++ switch (get_attr_type (consumer)) ++ { ++ case TYPE_ALU: ++ case TYPE_PBSAD: ++ case TYPE_MUL: ++ use_rtx = SET_SRC (PATTERN (consumer)); ++ break; ++ ++ case TYPE_ALU_SHIFT: ++ use_rtx = extract_shift_reg (consumer); ++ break; ++ ++ case TYPE_PBSADA: ++ return pbsada_insn_ra_rb_dep_reg_p (consumer, def_reg); ++ ++ case TYPE_MAC: ++ use_rtx = extract_mac_non_acc_rtx (consumer); ++ break; ++ ++ /* Some special instructions, divmodsi4 and udivmodsi4, produce two ++ results, the quotient and the remainder. In 2R1W configuration, ++ it requires two micro-operations in order to write two registers. ++ We have to check the dependency from the producer to the first ++ micro-operation. */ ++ case TYPE_DIV: ++ if (divmod_p (consumer)) ++ use_rtx = SET_SRC (parallel_element (consumer, 0)); ++ else ++ use_rtx = SET_SRC (PATTERN (consumer)); ++ break; ++ ++ case TYPE_MMU: ++ if (GET_CODE (PATTERN (consumer)) == SET) ++ use_rtx = SET_SRC (PATTERN (consumer)); ++ else ++ return true; ++ break; ++ ++ case TYPE_LOAD: ++ case TYPE_STORE: ++ use_rtx = extract_mem_rtx (consumer); ++ break; ++ ++ case TYPE_LOAD_MULTIPLE: ++ case TYPE_STORE_MULTIPLE: ++ use_rtx = extract_base_reg (consumer); ++ break; ++ ++ case TYPE_BRANCH: ++ use_rtx = PATTERN (consumer); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ if (reg_overlap_p (def_reg, use_rtx)) ++ return true; ++ ++ return false; ++} ++ ++/* Check the dependency between the producer defining DEF_REG and CONSUMER ++ requiring input operand at EX. */ ++bool ++n10_consumed_by_ex_dep_p (rtx_insn *consumer, rtx def_reg) ++{ ++ rtx use_rtx; ++ ++ switch (get_attr_type (consumer)) ++ { ++ case TYPE_ALU: ++ case TYPE_PBSAD: ++ case TYPE_MUL: ++ case TYPE_DALU: ++ case TYPE_DALU64: ++ case TYPE_DMUL: ++ case TYPE_DPACK: ++ case TYPE_DINSB: ++ case TYPE_DCMP: ++ case TYPE_DCLIP: ++ case TYPE_DALUROUND: ++ use_rtx = SET_SRC (PATTERN (consumer)); ++ break; ++ ++ case TYPE_ALU_SHIFT: ++ use_rtx = extract_shift_reg (consumer); ++ break; ++ ++ case TYPE_PBSADA: ++ return pbsada_insn_ra_rb_dep_reg_p (consumer, def_reg); ++ ++ case TYPE_MAC: ++ case TYPE_DMAC: ++ use_rtx = extract_mac_non_acc_rtx (consumer); ++ break; ++ ++ /* Some special instructions, divmodsi4 and udivmodsi4, produce two ++ results, the quotient and the remainder. */ ++ case TYPE_DIV: ++ if (divmod_p (consumer)) ++ use_rtx = SET_SRC (parallel_element (consumer, 0)); ++ else ++ use_rtx = SET_SRC (PATTERN (consumer)); ++ break; ++ ++ case TYPE_DWEXT: ++ return wext_odd_dep_p (consumer, def_reg); ++ ++ case TYPE_DBPICK: ++ return bpick_ra_rb_dep_p (consumer, def_reg); ++ ++ case TYPE_MMU: ++ if (GET_CODE (PATTERN (consumer)) == SET) ++ use_rtx = SET_SRC (PATTERN (consumer)); ++ else ++ return true; ++ break; ++ ++ case TYPE_LOAD: ++ case TYPE_STORE: ++ use_rtx = extract_mem_rtx (consumer); ++ break; ++ ++ case TYPE_LOAD_MULTIPLE: ++ case TYPE_STORE_MULTIPLE: ++ use_rtx = extract_base_reg (consumer); ++ break; ++ ++ case TYPE_BRANCH: ++ use_rtx = PATTERN (consumer); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ if (reg_overlap_p (def_reg, use_rtx)) ++ return true; ++ ++ return false; ++} ++ ++/* Check the dependency between the producer defining DEF_REG and CONSUMER ++ requiring input operand at EX. */ ++bool ++gw_consumed_by_ex_dep_p (rtx_insn *consumer, rtx def_reg) ++{ ++ rtx use_rtx; ++ ++ switch (get_attr_type (consumer)) ++ { ++ case TYPE_ALU: ++ case TYPE_PBSAD: ++ case TYPE_MUL: ++ case TYPE_DALU: ++ case TYPE_DALU64: ++ case TYPE_DMUL: ++ case TYPE_DPACK: ++ case TYPE_DINSB: ++ case TYPE_DCMP: ++ case TYPE_DCLIP: ++ case TYPE_DALUROUND: ++ use_rtx = SET_SRC (PATTERN (consumer)); ++ break; ++ ++ case TYPE_ALU_SHIFT: ++ use_rtx = extract_shift_reg (consumer); ++ break; ++ ++ case TYPE_PBSADA: ++ return pbsada_insn_ra_rb_dep_reg_p (consumer, def_reg); ++ ++ case TYPE_MAC: ++ case TYPE_DMAC: ++ use_rtx = extract_mac_non_acc_rtx (consumer); ++ break; ++ ++ /* Some special instructions, divmodsi4 and udivmodsi4, produce two ++ results, the quotient and the remainder. We have to check the ++ dependency from the producer to the first micro-operation. */ ++ case TYPE_DIV: ++ if (divmod_p (consumer)) ++ use_rtx = SET_SRC (parallel_element (consumer, 0)); ++ else ++ use_rtx = SET_SRC (PATTERN (consumer)); ++ break; ++ ++ case TYPE_DWEXT: ++ return wext_odd_dep_p (consumer, def_reg); ++ ++ case TYPE_DBPICK: ++ return bpick_ra_rb_dep_p (consumer, def_reg); ++ ++ case TYPE_MMU: ++ if (GET_CODE (PATTERN (consumer)) == SET) ++ use_rtx = SET_SRC (PATTERN (consumer)); ++ else ++ return true; ++ break; ++ ++ case TYPE_LOAD: ++ case TYPE_STORE: ++ use_rtx = extract_mem_rtx (consumer); ++ break; ++ ++ case TYPE_LOAD_MULTIPLE: ++ case TYPE_STORE_MULTIPLE: ++ use_rtx = extract_base_reg (consumer); ++ break; ++ ++ case TYPE_BRANCH: ++ use_rtx = PATTERN (consumer); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ if (reg_overlap_p (def_reg, use_rtx)) ++ return true; ++ ++ return false; ++} ++ ++/* Check dependencies from any stages to ALU_E1 (E1). This is a helper ++ function of n13_consumed_by_e1_dep_p (). */ ++bool ++n13_alu_e1_insn_dep_reg_p (rtx_insn *alu_e1_insn, rtx def_reg) ++{ ++ rtx unspec_rtx, operand_ra, operand_rb; ++ rtx src_rtx, dst_rtx; ++ ++ switch (INSN_CODE (alu_e1_insn)) ++ { ++ /* BSP and BSE are supported by built-in functions, the corresponding ++ patterns are formed by UNSPEC RTXs. We have to handle them ++ individually. */ ++ case CODE_FOR_unspec_bsp: ++ case CODE_FOR_unspec_bse: ++ unspec_rtx = SET_SRC (parallel_element (alu_e1_insn, 0)); ++ gcc_assert (GET_CODE (unspec_rtx) == UNSPEC); ++ ++ operand_ra = XVECEXP (unspec_rtx, 0, 0); ++ operand_rb = XVECEXP (unspec_rtx, 0, 1); ++ ++ if (rtx_equal_p (def_reg, operand_ra) ++ || rtx_equal_p (def_reg, operand_rb)) ++ return true; ++ ++ return false; ++ ++ /* Unlink general ALU instructions, MOVD44 requires operands at E1. */ ++ case CODE_FOR_move_di: ++ case CODE_FOR_move_df: ++ src_rtx = SET_SRC (PATTERN (alu_e1_insn)); ++ dst_rtx = SET_DEST (PATTERN (alu_e1_insn)); ++ ++ if (REG_P (dst_rtx) && REG_P (src_rtx) ++ && rtx_equal_p (src_rtx, def_reg)) ++ return true; ++ ++ return false; ++ ++ default: ++ return false; ++ } ++} ++ ++/* Check the dependency between the producer defining DEF_REG and CONSUMER ++ requiring input operand at E1. Because the address generation unti is ++ at E1, the address input should be ready at E1. Note that the branch ++ target is also a kind of addresses, so we have to check it. */ ++bool ++n13_consumed_by_e1_dep_p (rtx_insn *consumer, rtx def_reg) ++{ ++ rtx use_rtx; ++ ++ switch (get_attr_type (consumer)) ++ { ++ /* ALU_E1 */ ++ case TYPE_ALU: ++ return n13_alu_e1_insn_dep_reg_p (consumer, def_reg); ++ ++ case TYPE_PBSADA: ++ return pbsada_insn_ra_rb_dep_reg_p (consumer, def_reg); ++ ++ case TYPE_PBSAD: ++ case TYPE_MUL: ++ use_rtx = SET_SRC (PATTERN (consumer)); ++ break; ++ ++ case TYPE_MAC: ++ use_rtx = extract_mac_non_acc_rtx (consumer); ++ break; ++ ++ case TYPE_DIV: ++ if (divmod_p (consumer)) ++ use_rtx = SET_SRC (parallel_element (consumer, 0)); ++ else ++ use_rtx = SET_SRC (PATTERN (consumer)); ++ break; ++ ++ case TYPE_MMU: ++ if (GET_CODE (PATTERN (consumer)) == SET) ++ use_rtx = SET_SRC (PATTERN (consumer)); ++ else ++ return true; ++ break; ++ ++ case TYPE_BRANCH: ++ use_rtx = extract_branch_target_rtx (consumer); ++ break; ++ ++ case TYPE_LOAD: ++ case TYPE_STORE: ++ use_rtx = extract_mem_rtx (consumer); ++ break; ++ ++ case TYPE_LOAD_MULTIPLE: ++ case TYPE_STORE_MULTIPLE: ++ use_rtx = extract_base_reg (consumer); ++ break; ++ ++ default: ++ return false; ++ } ++ ++ if (reg_overlap_p (def_reg, use_rtx)) ++ return true; ++ ++ return false; ++} ++ ++/* Check the dependency between the producer defining DEF_REG and CONSUMER ++ requiring input operand at E2. */ ++bool ++n13_consumed_by_e2_dep_p (rtx_insn *consumer, rtx def_reg) ++{ ++ rtx use_rtx; ++ ++ switch (get_attr_type (consumer)) ++ { ++ case TYPE_ALU: ++ case TYPE_STORE: ++ use_rtx = SET_SRC (PATTERN (consumer)); ++ break; ++ ++ case TYPE_ALU_SHIFT: ++ use_rtx = extract_shift_reg (consumer); ++ break; ++ ++ case TYPE_PBSADA: ++ return pbsada_insn_rt_dep_reg_p (consumer, def_reg); ++ ++ case TYPE_STORE_MULTIPLE: ++ use_rtx = extract_nth_access_rtx (consumer, 0); ++ break; ++ ++ case TYPE_BRANCH: ++ use_rtx = extract_branch_condition_rtx (consumer); ++ break; ++ ++ default: ++ gcc_unreachable(); ++ } ++ ++ if (reg_overlap_p (def_reg, use_rtx)) ++ return true; ++ ++ return false; ++} ++ ++/* Check the dependency between the producer defining DEF_REG and CONSUMER ++ requiring input operand at AG (E1). */ ++bool ++pn_consumed_by_e1_dep_p (rtx_insn *consumer, rtx def_reg) ++{ ++ rtx use_rtx; ++ ++ switch (get_attr_type (consumer)) ++ { ++ case TYPE_LOAD: ++ if (load_single_p (consumer)) ++ use_rtx = extract_mem_rtx (consumer); ++ else ++ use_rtx = extract_base_reg (consumer); ++ break; ++ ++ case TYPE_STORE: ++ if (store_single_p (consumer) ++ && (!post_update_insn_p (consumer) ++ || immed_offset_p (extract_mem_rtx (consumer)))) ++ use_rtx = extract_mem_rtx (consumer); ++ else ++ use_rtx = extract_base_reg (consumer); ++ break; ++ ++ case TYPE_LOAD_MULTIPLE: ++ case TYPE_STORE_MULTIPLE: ++ use_rtx = extract_base_reg (consumer); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ return reg_overlap_p (def_reg, use_rtx); ++} ++ ++bool ++pn_consumed_by_e2_dep_p (rtx_insn *consumer, rtx def_reg) ++{ ++ rtx use_rtx; ++ ++ switch (get_attr_type (consumer)) ++ { ++ case TYPE_ALU: ++ if (get_attr_subtype (consumer) != SUBTYPE_SHIFT) ++ return false; ++ case TYPE_PBSAD: ++ case TYPE_PBSADA: ++ case TYPE_MUL: ++ use_rtx = SET_SRC (PATTERN (consumer)); ++ break; ++ ++ case TYPE_MAC: ++ use_rtx = extract_mac_non_acc_rtx (consumer); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ return reg_overlap_p (def_reg, use_rtx); ++} ++ ++bool ++pn_consumed_by_e3_dep_p (rtx_insn *consumer, rtx def_reg) ++{ ++ rtx use_rtx; ++ ++ switch (get_attr_type (consumer)) ++ { ++ case TYPE_ALU: ++ if (get_attr_subtype (consumer) == SUBTYPE_SHIFT) ++ return false; ++ case TYPE_PBSAD: ++ case TYPE_PBSADA: ++ use_rtx = SET_SRC (PATTERN (consumer)); ++ break; ++ ++ case TYPE_BRANCH: ++ return (reg_overlap_p (def_reg, extract_branch_target_rtx (consumer)) ++ || reg_overlap_p (def_reg, ++ extract_branch_condition_rtx (consumer))); ++ break; ++ ++ case TYPE_STORE: ++ use_rtx = SET_SRC (PATTERN (consumer)); ++ break; ++ ++ case TYPE_STORE_MULTIPLE: ++ use_rtx = extract_nth_access_rtx (consumer, 0); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ return reg_overlap_p (def_reg, use_rtx); ++} ++ ++bool ++pn_consumed_by_e4_dep_p (rtx_insn *consumer, rtx def_reg) ++{ ++ rtx use_rtx; ++ ++ switch (get_attr_type (consumer)) ++ { ++ case TYPE_MAC: ++ use_rtx = SET_DEST (PATTERN (consumer)); ++ break; ++ ++ case TYPE_DIV: ++ if (divmod_p (consumer)) ++ use_rtx = SET_SRC (parallel_element (consumer, 0)); ++ else ++ use_rtx = SET_SRC (PATTERN (consumer)); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ return reg_overlap_p (def_reg, use_rtx); ++} ++ ++} // anonymous namespace ++ ++/* ------------------------------------------------------------------------ */ ++ ++/* Guard functions for N7 core. */ ++ ++bool ++nds32_n7_load_to_ii_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ if (post_update_insn_p (producer)) ++ return false; ++ ++ rtx def_reg = SET_DEST (PATTERN (producer)); ++ ++ return n7_consumed_by_ii_dep_p (consumer, def_reg); ++} ++ ++bool ++nds32_n7_last_load_to_ii_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ /* If PRODUCER is a post-update LMW insn, the last micro-operation updates ++ the base register and the result is ready in II stage, so we don't need ++ to handle that case in this guard function and the corresponding bypass ++ rule. */ ++ if (post_update_insn_p (producer)) ++ return false; ++ ++ rtx last_def_reg = extract_nth_access_reg (producer, -1); ++ ++ if (last_def_reg == NULL_RTX) ++ return false; ++ ++ gcc_assert (REG_P (last_def_reg) || GET_CODE (last_def_reg) == SUBREG); ++ ++ return n7_consumed_by_ii_dep_p (consumer, last_def_reg); ++} ++ ++/* Guard functions for N8 core. */ ++ ++bool ++nds32_n8_load_to_ii_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ if (post_update_insn_p (producer)) ++ return false; ++ ++ rtx def_reg = SET_DEST (PATTERN (producer)); ++ ++ return n8_consumed_by_addr_in_p (consumer, def_reg); ++} ++ ++bool ++nds32_n8_load_bi_to_ii_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ if (!post_update_insn_p (producer)) ++ return false; ++ ++ rtx def_reg = SET_DEST (PATTERN (producer)); ++ ++ return n8_consumed_by_addr_in_p (consumer, def_reg); ++} ++ ++bool ++nds32_n8_load_to_ex_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ if (post_update_insn_p (producer)) ++ return false; ++ ++ rtx def_reg = SET_DEST (PATTERN (producer)); ++ ++ return n8_consumed_by_ex_p (consumer, def_reg); ++} ++ ++bool ++nds32_n8_ex_to_ii_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx def_reg; ++ ++ switch (get_attr_type (producer)) ++ { ++ case TYPE_ALU: ++ if (movd44_insn_p (producer)) ++ def_reg = extract_movd44_odd_reg (producer); ++ else ++ def_reg = SET_DEST (PATTERN (producer)); ++ break; ++ ++ case TYPE_MUL: ++ case TYPE_MAC: ++ def_reg = SET_DEST (PATTERN (producer)); ++ break; ++ ++ case TYPE_DIV: ++ if (divmod_p (producer)) ++ def_reg = SET_DEST (parallel_element (producer, 1)); ++ else ++ def_reg = SET_DEST (PATTERN (producer)); ++ break; ++ ++ case TYPE_LOAD: ++ case TYPE_STORE: ++ case TYPE_LOAD_MULTIPLE: ++ case TYPE_STORE_MULTIPLE: ++ if (!post_update_insn_p (producer)) ++ return false; ++ ++ def_reg = extract_base_reg (producer); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ return n8_consumed_by_addr_in_p (consumer, def_reg); ++} ++ ++bool ++nds32_n8_last_load_to_ii_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ /* If PRODUCER is a post-update LMW insn, the last micro-operation updates ++ the base register and the result is ready in EX stage, so we don't need ++ to handle that case in this guard function and the corresponding bypass ++ rule. */ ++ if (post_update_insn_p (producer)) ++ return false; ++ ++ rtx last_def_reg = extract_nth_access_reg (producer, -1); ++ ++ if (last_def_reg == NULL_RTX) ++ return false; ++ ++ gcc_assert (REG_P (last_def_reg) || GET_CODE (last_def_reg) == SUBREG); ++ ++ return n8_consumed_by_addr_in_p (consumer, last_def_reg); ++} ++ ++bool ++nds32_n8_last_load_two_to_ii_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ int index = -2; ++ ++ /* If PRODUCER is a post-update insn, there is an additional one micro- ++ operation inserted in the end, so the last memory access operation should ++ be handled by this guard function and the corresponding bypass rule. */ ++ if (post_update_insn_p (producer)) ++ index = -1; ++ ++ rtx last_two_def_reg = extract_nth_access_reg (producer, index); ++ ++ if (last_two_def_reg == NULL_RTX) ++ return false; ++ ++ gcc_assert (REG_P (last_two_def_reg) ++ || GET_CODE (last_two_def_reg) == SUBREG); ++ ++ return n8_consumed_by_addr_in_p (consumer, last_two_def_reg); ++} ++ ++bool ++nds32_n8_last_load_to_ex_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ /* If PRODUCER is a post-update LMW insn, the last micro-operation updates ++ the base register and the result is ready in EX stage, so we don't need ++ to handle that case in this guard function and the corresponding bypass ++ rule. */ ++ if (post_update_insn_p (producer)) ++ return false; ++ ++ rtx last_def_reg = extract_nth_access_reg (producer, -1); ++ ++ if (last_def_reg == NULL_RTX) ++ return false; ++ ++ gcc_assert (REG_P (last_def_reg) || GET_CODE (last_def_reg) == SUBREG); ++ ++ return n8_consumed_by_ex_p (consumer, last_def_reg); ++} ++ ++/* Guard functions for E8 cores. */ ++ ++bool ++nds32_e8_load_to_ii_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx def_reg = SET_DEST (PATTERN (producer)); ++ ++ return e8_consumed_by_addr_in_p (consumer, def_reg); ++} ++ ++bool ++nds32_e8_load_to_ex_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx def_reg = SET_DEST (PATTERN (producer)); ++ ++ return e8_consumed_by_ex_p (consumer, def_reg); ++} ++ ++bool ++nds32_e8_ex_to_ii_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx def_reg; ++ ++ switch (get_attr_type (producer)) ++ { ++ case TYPE_ALU: ++ /* No data hazards if AGEN's input is produced by MOVI or SETHI. */ ++ if (GET_CODE (PATTERN (producer)) == SET) ++ { ++ rtx dest = SET_DEST (PATTERN (producer)); ++ rtx src = SET_SRC (PATTERN (producer)); ++ ++ if ((REG_P (dest) || GET_CODE (dest) == SUBREG) ++ && (GET_CODE (src) == CONST_INT || GET_CODE (src) == HIGH)) ++ return false; ++ } ++ ++ def_reg = SET_DEST (PATTERN (producer)); ++ break; ++ ++ case TYPE_MUL: ++ case TYPE_MAC: ++ def_reg = SET_DEST (PATTERN (producer)); ++ break; ++ ++ case TYPE_DIV: ++ if (divmod_p (producer)) ++ { ++ rtx def_reg1 = SET_DEST (parallel_element (producer, 0)); ++ rtx def_reg2 = SET_DEST (parallel_element (producer, 1)); ++ ++ return (e8_consumed_by_addr_in_p (consumer, def_reg1) ++ || e8_consumed_by_addr_in_p (consumer, def_reg2)); ++ } ++ ++ def_reg = SET_DEST (PATTERN (producer)); ++ break; ++ ++ case TYPE_LOAD: ++ case TYPE_STORE: ++ case TYPE_LOAD_MULTIPLE: ++ case TYPE_STORE_MULTIPLE: ++ if (!post_update_insn_p (producer)) ++ return false; ++ ++ def_reg = extract_base_reg (producer); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ return e8_consumed_by_addr_in_p (consumer, def_reg); ++} ++ ++bool ++nds32_e8_last_load_to_ii_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx last_def_reg = extract_nth_access_reg (producer, -1); ++ ++ if (last_def_reg == NULL_RTX) ++ return false; ++ ++ gcc_assert (REG_P (last_def_reg) || GET_CODE (last_def_reg) == SUBREG); ++ ++ return e8_consumed_by_addr_in_p (consumer, last_def_reg); ++} ++ ++bool ++nds32_e8_last_load_to_ex_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx last_def_reg = extract_nth_access_reg (producer, -1); ++ ++ if (last_def_reg == NULL_RTX) ++ return false; ++ ++ gcc_assert (REG_P (last_def_reg) || GET_CODE (last_def_reg) == SUBREG); ++ ++ return e8_consumed_by_ex_p (consumer, last_def_reg); ++} ++ ++/* Guard functions for N9 cores. */ ++ ++/* Check dependencies from MM to EX. */ ++bool ++nds32_n9_2r1w_mm_to_ex_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx def_reg; ++ ++ switch (get_attr_type (producer)) ++ { ++ /* LD_!bi */ ++ case TYPE_LOAD: ++ if (post_update_insn_p (producer)) ++ return false; ++ ++ def_reg = SET_DEST (PATTERN (producer)); ++ break; ++ ++ case TYPE_MUL: ++ case TYPE_MAC: ++ def_reg = SET_DEST (PATTERN (producer)); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ return n9_2r1w_consumed_by_ex_dep_p (consumer, def_reg); ++} ++ ++/* Check dependencies from MM to EX. */ ++bool ++nds32_n9_3r2w_mm_to_ex_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx def_reg; ++ ++ switch (get_attr_type (producer)) ++ { ++ case TYPE_LOAD: ++ case TYPE_MUL: ++ case TYPE_MAC: ++ def_reg = SET_DEST (PATTERN (producer)); ++ break; ++ ++ /* Some special instructions, divmodsi4 and udivmodsi4, produce two ++ results, the quotient and the remainder. We have to handle them ++ individually. */ ++ case TYPE_DIV: ++ if (divmod_p (producer)) ++ { ++ rtx def_reg1 = SET_DEST (parallel_element (producer, 0)); ++ rtx def_reg2 = SET_DEST (parallel_element (producer, 1)); ++ ++ return (n9_3r2w_consumed_by_ex_dep_p (consumer, def_reg1) ++ || n9_3r2w_consumed_by_ex_dep_p (consumer, def_reg2)); ++ } ++ ++ def_reg = SET_DEST (PATTERN (producer)); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ return n9_3r2w_consumed_by_ex_dep_p (consumer, def_reg); ++} ++ ++/* Check dependencies from LMW(N, N) to EX. */ ++bool ++nds32_n9_last_load_to_ex_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx last_def_reg = extract_nth_access_reg (producer, -1); ++ ++ if (nds32_register_ports_config == REG_PORT_2R1W) ++ { ++ /* The base-update micro operation occupies the last cycle. */ ++ if (post_update_insn_p (producer)) ++ return false; ++ ++ /* When the base register is in the list of a load multiple insn and the ++ access order of the base register is not the last one, we need an ++ additional micro operation to commit the load result to the base ++ register -- we can treat the base register as the last defined ++ register. */ ++ size_t i; ++ size_t n_elems = parallel_elements (producer); ++ rtx base_reg = extract_base_reg (producer); ++ ++ for (i = 0; i < n_elems; ++i) ++ { ++ rtx load_rtx = extract_nth_access_rtx (producer, i); ++ rtx list_element = SET_DEST (load_rtx); ++ ++ if (rtx_equal_p (base_reg, list_element) && i != n_elems - 1) ++ { ++ last_def_reg = base_reg; ++ break; ++ } ++ } ++ ++ return n9_2r1w_consumed_by_ex_dep_p (consumer, last_def_reg); ++ } ++ else ++ return n9_3r2w_consumed_by_ex_dep_p (consumer, last_def_reg); ++} ++ ++/* Guard functions for N10 cores. */ ++ ++/* Check dependencies from EX to EX (ADDR_OUT -> ADDR_IN). */ ++bool ++nds32_n10_ex_to_ex_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ gcc_assert (get_attr_type (producer) == TYPE_FLOAD ++ || get_attr_type (producer) == TYPE_FSTORE); ++ gcc_assert (get_attr_type (consumer) == TYPE_FLOAD ++ || get_attr_type (consumer) == TYPE_FSTORE); ++ ++ if (!post_update_insn_p (producer)) ++ return false; ++ ++ return reg_overlap_p (extract_base_reg (producer), ++ extract_mem_rtx (consumer)); ++} ++ ++/* Check dependencies from MM to EX. */ ++bool ++nds32_n10_mm_to_ex_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx def_reg; ++ ++ switch (get_attr_type (producer)) ++ { ++ case TYPE_LOAD: ++ case TYPE_MUL: ++ case TYPE_MAC: ++ case TYPE_DALU64: ++ case TYPE_DMUL: ++ case TYPE_DMAC: ++ case TYPE_DALUROUND: ++ case TYPE_DBPICK: ++ case TYPE_DWEXT: ++ def_reg = SET_DEST (PATTERN (producer)); ++ break; ++ ++ /* Some special instructions, divmodsi4 and udivmodsi4, produce two ++ results, the quotient and the remainder. We have to handle them ++ individually. */ ++ case TYPE_DIV: ++ if (divmod_p (producer)) ++ { ++ rtx def_reg1 = SET_DEST (parallel_element (producer, 0)); ++ rtx def_reg2 = SET_DEST (parallel_element (producer, 1)); ++ ++ return (n10_consumed_by_ex_dep_p (consumer, def_reg1) ++ || n10_consumed_by_ex_dep_p (consumer, def_reg2)); ++ } ++ ++ def_reg = SET_DEST (PATTERN (producer)); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ return n10_consumed_by_ex_dep_p (consumer, def_reg); ++} ++ ++/* Check dependencies from LMW(N, N) to EX. */ ++bool ++nds32_n10_last_load_to_ex_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx last_def_reg = extract_nth_access_reg (producer, -1); ++ ++ return n10_consumed_by_ex_dep_p (consumer, last_def_reg); ++} ++ ++/* Guard functions for Graywolf cores. */ ++ ++/* Check dependencies from EX to EX (ADDR_OUT -> ADDR_IN). */ ++bool ++nds32_gw_ex_to_ex_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ return nds32_n10_ex_to_ex_p (producer, consumer); ++} ++ ++/* Check dependencies from MM to EX. */ ++bool ++nds32_gw_mm_to_ex_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx def_reg; ++ ++ switch (get_attr_type (producer)) ++ { ++ case TYPE_LOAD: ++ case TYPE_MUL: ++ case TYPE_MAC: ++ case TYPE_DALU64: ++ case TYPE_DMUL: ++ case TYPE_DMAC: ++ case TYPE_DALUROUND: ++ case TYPE_DBPICK: ++ case TYPE_DWEXT: ++ def_reg = SET_DEST (PATTERN (producer)); ++ break; ++ ++ /* Some special instructions, divmodsi4 and udivmodsi4, produce two ++ results, the quotient and the remainder. We have to handle them ++ individually. */ ++ case TYPE_DIV: ++ if (divmod_p (producer)) ++ { ++ rtx def_reg1 = SET_DEST (parallel_element (producer, 0)); ++ rtx def_reg2 = SET_DEST (parallel_element (producer, 1)); ++ ++ return (gw_consumed_by_ex_dep_p (consumer, def_reg1) ++ || gw_consumed_by_ex_dep_p (consumer, def_reg2)); ++ } ++ ++ def_reg = SET_DEST (PATTERN (producer)); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ return gw_consumed_by_ex_dep_p (consumer, def_reg); ++} ++ ++/* Check dependencies from LMW(N, N) to EX. */ ++bool ++nds32_gw_last_load_to_ex_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx last_def_reg = extract_nth_access_reg (producer, -1); ++ ++ return gw_consumed_by_ex_dep_p (consumer, last_def_reg); ++} ++ ++/* Guard functions for N12/N13 cores. */ ++ ++/* Check dependencies from E2 to E1. */ ++bool ++nds32_n13_e2_to_e1_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx def_reg; ++ ++ switch (get_attr_type (producer)) ++ { ++ /* Only post-update load/store instructions are considered. These ++ instructions produces address output at E2. */ ++ case TYPE_LOAD: ++ case TYPE_STORE: ++ case TYPE_LOAD_MULTIPLE: ++ case TYPE_STORE_MULTIPLE: ++ if (!post_update_insn_p (producer)) ++ return false; ++ ++ def_reg = extract_base_reg (producer); ++ break; ++ ++ case TYPE_ALU: ++ case TYPE_ALU_SHIFT: ++ case TYPE_PBSAD: ++ case TYPE_PBSADA: ++ case TYPE_MUL: ++ case TYPE_MAC: ++ def_reg = SET_DEST (PATTERN (producer)); ++ break; ++ ++ case TYPE_BRANCH: ++ return true; ++ ++ case TYPE_DIV: ++ /* Some special instructions, divmodsi4 and udivmodsi4, produce two ++ results, the quotient and the remainder. We have to handle them ++ individually. */ ++ if (divmod_p (producer)) ++ { ++ rtx def_reg1 = SET_DEST (parallel_element (producer, 0)); ++ rtx def_reg2 = SET_DEST (parallel_element (producer, 1)); ++ ++ return (n13_consumed_by_e1_dep_p (consumer, def_reg1) ++ || n13_consumed_by_e1_dep_p (consumer, def_reg2)); ++ } ++ ++ def_reg = SET_DEST (PATTERN (producer)); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ return n13_consumed_by_e1_dep_p (consumer, def_reg); ++} ++ ++/* Check dependencies from Load-Store Unit (E3) to E1. */ ++bool ++nds32_n13_load_to_e1_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx def_reg = SET_DEST (PATTERN (producer)); ++ ++ gcc_assert (get_attr_type (producer) == TYPE_LOAD); ++ gcc_assert (REG_P (def_reg) || GET_CODE (def_reg) == SUBREG); ++ ++ return n13_consumed_by_e1_dep_p (consumer, def_reg); ++} ++ ++/* Check dependencies from Load-Store Unit (E3) to E2. */ ++bool ++nds32_n13_load_to_e2_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx def_reg = SET_DEST (PATTERN (producer)); ++ ++ gcc_assert (get_attr_type (producer) == TYPE_LOAD); ++ gcc_assert (REG_P (def_reg) || GET_CODE (def_reg) == SUBREG); ++ ++ return n13_consumed_by_e2_dep_p (consumer, def_reg); ++} ++ ++/* Check dependencies from LMW(N, N) to E1. */ ++bool ++nds32_n13_last_load_to_e1_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx last_def_reg = extract_nth_access_reg (producer, -1); ++ ++ return n13_consumed_by_e1_dep_p (consumer, last_def_reg); ++} ++ ++/* Check dependencies from LMW(N, N) to E2. */ ++bool ++nds32_n13_last_load_to_e2_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx last_def_reg = extract_nth_access_reg (producer, -1); ++ ++ return n13_consumed_by_e2_dep_p (consumer, last_def_reg); ++} ++ ++/* Check dependencies from LMW(N, N-1) to E2. */ ++bool ++nds32_n13_last_two_load_to_e1_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx last_two_def_reg = extract_nth_access_reg (producer, -2); ++ ++ if (last_two_def_reg == NULL_RTX) ++ return false; ++ ++ return n13_consumed_by_e1_dep_p (consumer, last_two_def_reg); ++} ++ ++/* Guard functions for Panther cores. */ ++ ++/* Check dependencies from E2 to E1. */ ++bool ++nds32_pn_e2_to_e1_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx def_reg; ++ ++ switch (get_attr_type (producer)) ++ { ++ case TYPE_ALU: ++ gcc_assert (get_attr_subtype (producer) == SUBTYPE_SHIFT); ++ def_reg = SET_DEST (PATTERN (producer)); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ return pn_consumed_by_e1_dep_p (consumer, def_reg); ++} ++ ++/* Check dependencies from E3 to E1. */ ++bool ++nds32_pn_e3_to_e1_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx def_reg; ++ ++ switch (get_attr_type (producer)) ++ { ++ case TYPE_ALU: ++ def_reg = SET_DEST (PATTERN (producer)); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ return pn_consumed_by_e1_dep_p (consumer, def_reg); ++} ++ ++/* Check dependencies from E3 to E2. */ ++bool ++nds32_pn_e3_to_e2_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx def_reg; ++ ++ switch (get_attr_type (producer)) ++ { ++ case TYPE_ALU: ++ def_reg = SET_DEST (PATTERN (producer)); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ return pn_consumed_by_e2_dep_p (consumer, def_reg); ++} ++ ++/* Check dependencies from E4 to E1. */ ++bool ++nds32_pn_e4_to_e1_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx def_reg; ++ ++ switch (get_attr_type (producer)) ++ { ++ case TYPE_MUL: ++ case TYPE_MAC: ++ def_reg = SET_DEST (PATTERN (producer)); ++ break; ++ ++ case TYPE_DIV: ++ if (divmod_p (producer)) ++ { ++ rtx def_reg1 = SET_DEST (parallel_element (producer, 0)); ++ rtx def_reg2 = SET_DEST (parallel_element (producer, 1)); ++ ++ return (pn_consumed_by_e1_dep_p (consumer, def_reg1) ++ || pn_consumed_by_e1_dep_p (consumer, def_reg2)); ++ } ++ ++ def_reg = SET_DEST (PATTERN (producer)); ++ break; ++ ++ case TYPE_LOAD: ++ if (post_update_insn_p (producer) ++ && pn_consumed_by_e1_dep_p (consumer, extract_base_reg (producer))) ++ return true; ++ ++ if (!load_full_word_p (producer)) ++ return false; ++ ++ def_reg = SET_DEST (PATTERN (producer)); ++ break; ++ ++ case TYPE_STORE: ++ case TYPE_LOAD_MULTIPLE: ++ case TYPE_STORE_MULTIPLE: ++ if (!post_update_insn_p (producer)) ++ return false; ++ ++ def_reg = extract_base_reg (producer); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ return pn_consumed_by_e1_dep_p (consumer, def_reg); ++} ++ ++/* Check dependencies from E4 to E2. */ ++bool ++nds32_pn_e4_to_e2_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx def_reg; ++ ++ switch (get_attr_type (producer)) ++ { ++ case TYPE_MUL: ++ case TYPE_MAC: ++ def_reg = SET_DEST (PATTERN (producer)); ++ break; ++ ++ case TYPE_DIV: ++ if (divmod_p (producer)) ++ { ++ rtx def_reg1 = SET_DEST (parallel_element (producer, 0)); ++ rtx def_reg2 = SET_DEST (parallel_element (producer, 1)); ++ ++ return (pn_consumed_by_e2_dep_p (consumer, def_reg1) ++ || pn_consumed_by_e2_dep_p (consumer, def_reg2)); ++ } ++ ++ def_reg = SET_DEST (PATTERN (producer)); ++ break; ++ ++ case TYPE_LOAD: ++ if (post_update_insn_p (producer) ++ && pn_consumed_by_e2_dep_p (consumer, extract_base_reg (producer))) ++ return true; ++ ++ if (!load_full_word_p (producer)) ++ return false; ++ ++ def_reg = SET_DEST (PATTERN (producer)); ++ break; ++ ++ case TYPE_STORE: ++ case TYPE_LOAD_MULTIPLE: ++ case TYPE_STORE_MULTIPLE: ++ if (!post_update_insn_p (producer)) ++ return false; ++ ++ def_reg = extract_base_reg (producer); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ return pn_consumed_by_e2_dep_p (consumer, def_reg); ++} ++ ++/* Check dependencies from E4 to E3. */ ++bool ++nds32_pn_e4_to_e3_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx def_reg; ++ ++ switch (get_attr_type (producer)) ++ { ++ case TYPE_MUL: ++ case TYPE_MAC: ++ def_reg = SET_DEST (PATTERN (producer)); ++ break; ++ ++ case TYPE_DIV: ++ if (divmod_p (producer)) ++ { ++ rtx def_reg1 = SET_DEST (parallel_element (producer, 0)); ++ rtx def_reg2 = SET_DEST (parallel_element (producer, 1)); ++ ++ return (pn_consumed_by_e3_dep_p (consumer, def_reg1) ++ || pn_consumed_by_e3_dep_p (consumer, def_reg2)); ++ } ++ ++ def_reg = SET_DEST (PATTERN (producer)); ++ break; ++ ++ case TYPE_LOAD: ++ if (post_update_insn_p (producer) ++ && pn_consumed_by_e3_dep_p (consumer, extract_base_reg (producer))) ++ return true; ++ ++ if (load_partial_word_p (producer)) ++ return false; ++ ++ def_reg = SET_DEST (PATTERN (producer)); ++ break; ++ ++ case TYPE_STORE: ++ case TYPE_LOAD_MULTIPLE: ++ case TYPE_STORE_MULTIPLE: ++ if (!post_update_insn_p (producer)) ++ return false; ++ ++ def_reg = extract_base_reg (producer); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ return pn_consumed_by_e3_dep_p (consumer, def_reg); ++} ++ ++/* Check dependencies from WB to E1. */ ++bool ++nds32_pn_wb_to_e1_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx def_reg; ++ ++ switch (get_attr_type (producer)) ++ { ++ case TYPE_LOAD: ++ if (!load_partial_word_p (producer)) ++ return false; ++ ++ def_reg = SET_DEST (PATTERN (producer)); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ return pn_consumed_by_e1_dep_p (consumer, def_reg); ++} ++ ++/* Check dependencies from WB to E2. */ ++bool ++nds32_pn_wb_to_e2_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx def_reg; ++ ++ switch (get_attr_type (producer)) ++ { ++ case TYPE_LOAD: ++ if (!load_partial_word_p (producer)) ++ return false; ++ ++ def_reg = SET_DEST (PATTERN (producer)); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ return pn_consumed_by_e2_dep_p (consumer, def_reg); ++} ++ ++/* Check dependencies from WB to E3. */ ++bool ++nds32_pn_wb_to_e3_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx def_reg; ++ ++ switch (get_attr_type (producer)) ++ { ++ case TYPE_LOAD: ++ if (!load_partial_word_p (producer)) ++ return false; ++ ++ def_reg = SET_DEST (PATTERN (producer)); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ return pn_consumed_by_e3_dep_p (consumer, def_reg); ++} ++ ++/* Check dependencies from WB to E4. */ ++bool ++nds32_pn_wb_to_e4_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx def_reg; ++ ++ switch (get_attr_type (producer)) ++ { ++ case TYPE_LOAD: ++ if (!load_partial_word_p (producer)) ++ return false; ++ ++ def_reg = SET_DEST (PATTERN (producer)); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ return pn_consumed_by_e4_dep_p (consumer, def_reg); ++} ++ ++/* Check dependencies from LMW(N, N) to E1. */ ++bool ++nds32_pn_last_load_to_e1_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx last_def_reg = extract_nth_access_reg (producer, -1); ++ ++ return pn_consumed_by_e1_dep_p (consumer, last_def_reg); ++} ++ ++/* Check dependencies from LMW(N, N) to E2. */ ++bool ++nds32_pn_last_load_to_e2_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx last_def_reg = extract_nth_access_reg (producer, -1); ++ ++ return pn_consumed_by_e2_dep_p (consumer, last_def_reg); ++} ++ ++/* Check dependencies from LMW(N, N) to E3. */ ++bool ++nds32_pn_last_load_to_e3_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx last_def_reg = extract_nth_access_reg (producer, -1); ++ ++ return pn_consumed_by_e3_dep_p (consumer, last_def_reg); ++} ++ ++/* Check dependencies from LMW(N, N - 1) to E1. */ ++bool ++nds32_pn_last_two_load_to_e1_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx last_two_def_reg = extract_nth_access_reg (producer, -2); ++ ++ if (last_two_def_reg == NULL_RTX) ++ return false; ++ ++ return pn_consumed_by_e1_dep_p (consumer, last_two_def_reg); ++} ++ ++/* Check dependencies from LMW(N, N - 1) to E2. */ ++bool ++nds32_pn_last_two_load_to_e2_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx last_two_def_reg = extract_nth_access_reg (producer, -2); ++ ++ if (last_two_def_reg == NULL_RTX) ++ return false; ++ ++ return pn_consumed_by_e2_dep_p (consumer, last_two_def_reg); ++} ++ ++/* Check dependencies from LMW(N, N - 2) to E1. */ ++bool ++nds32_pn_last_three_load_to_e1_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx last_three_def_reg = extract_nth_access_reg (producer, -3); ++ ++ if (last_three_def_reg == NULL_RTX) ++ return false; ++ ++ return pn_consumed_by_e1_dep_p (consumer, last_three_def_reg); ++} + + /* ------------------------------------------------------------------------ */ +diff --git a/gcc/config/nds32/nds32-predicates.c b/gcc/config/nds32/nds32-predicates.c +index 361d001..b45d3e6 100644 +--- a/gcc/config/nds32/nds32-predicates.c ++++ b/gcc/config/nds32/nds32-predicates.c +@@ -24,14 +24,41 @@ + #include "system.h" + #include "coretypes.h" + #include "backend.h" +-#include "target.h" +-#include "rtl.h" + #include "tree.h" +-#include "tm_p.h" +-#include "optabs.h" /* For GEN_FCN. */ ++#include "rtl.h" ++#include "df.h" ++#include "alias.h" ++#include "stor-layout.h" ++#include "varasm.h" ++#include "calls.h" ++#include "regs.h" ++#include "insn-config.h" /* Required by recog.h. */ ++#include "conditions.h" ++#include "output.h" ++#include "insn-attr.h" /* For DFA state_t. */ ++#include "insn-codes.h" /* For CODE_FOR_xxx. */ ++#include "reload.h" /* For push_reload(). */ ++#include "flags.h" ++#include "insn-config.h" ++#include "expmed.h" ++#include "dojump.h" ++#include "explow.h" + #include "emit-rtl.h" ++#include "stmt.h" ++#include "expr.h" + #include "recog.h" ++#include "diagnostic-core.h" ++#include "cfgrtl.h" ++#include "cfganal.h" ++#include "lcm.h" ++#include "cfgbuild.h" ++#include "cfgcleanup.h" ++#include "tm_p.h" + #include "tm-constrs.h" ++#include "optabs.h" /* For GEN_FCN. */ ++#include "target.h" ++#include "langhooks.h" /* For add_builtin_function(). */ ++#include "builtins.h" + + /* ------------------------------------------------------------------------ */ + +@@ -98,21 +125,33 @@ nds32_consecutive_registers_load_store_p (rtx op, + We have to extract reg and mem of every element and + check if the information is valid for multiple load/store operation. */ + bool +-nds32_valid_multiple_load_store (rtx op, bool load_p) ++nds32_valid_multiple_load_store_p (rtx op, bool load_p, bool bim_p) + { + int count; + int first_elt_regno; ++ int update_base_elt_idx; ++ int offset; + rtx elt; ++ rtx update_base; + +- /* Get the counts of elements in the parallel rtx. */ +- count = XVECLEN (op, 0); +- /* Pick up the first element. */ +- elt = XVECEXP (op, 0, 0); ++ /* Get the counts of elements in the parallel rtx. ++ Last one is update base register if bim_p. ++ and pick up the first element. */ ++ if (bim_p) ++ { ++ count = XVECLEN (op, 0) - 1; ++ elt = XVECEXP (op, 0, 1); ++ } ++ else ++ { ++ count = XVECLEN (op, 0); ++ elt = XVECEXP (op, 0, 0); ++ } + + /* Perform some quick check for the first element in the parallel rtx. */ + if (GET_CODE (elt) != SET + || count <= 1 +- || count > 8) ++ || count > 25) + return false; + + /* Pick up regno of first element for further detail checking. +@@ -138,11 +177,29 @@ nds32_valid_multiple_load_store (rtx op, bool load_p) + Refer to nds32-multiple.md for more information + about following checking. + The starting element of parallel rtx is index 0. */ +- if (!nds32_consecutive_registers_load_store_p (op, load_p, 0, ++ if (!nds32_consecutive_registers_load_store_p (op, load_p, bim_p ? 1 : 0, + first_elt_regno, + count)) + return false; + ++ if (bim_p) ++ { ++ update_base_elt_idx = 0; ++ update_base = XVECEXP (op, 0, update_base_elt_idx); ++ if (!REG_P (SET_DEST (update_base))) ++ return false; ++ if (GET_CODE (SET_SRC (update_base)) != PLUS) ++ return false; ++ else ++ { ++ offset = count * UNITS_PER_WORD; ++ elt = XEXP (SET_SRC (update_base), 1); ++ if (GET_CODE (elt) != CONST_INT ++ || (INTVAL (elt) != offset)) ++ return false; ++ } ++ } ++ + /* Pass all test, this is a valid rtx. */ + return true; + } +@@ -174,47 +231,47 @@ nds32_valid_stack_push_pop_p (rtx op, bool push_p) + { + elt = XVECEXP (op, 0, index); + if (GET_CODE (elt) != SET) +- return false; ++ return false; + } + + /* For push operation, the parallel rtx looks like: + (parallel [(set (mem (plus (reg:SI SP_REGNUM) (const_int -32))) +- (reg:SI Rb)) +- (set (mem (plus (reg:SI SP_REGNUM) (const_int -28))) +- (reg:SI Rb+1)) +- ... +- (set (mem (plus (reg:SI SP_REGNUM) (const_int -16))) +- (reg:SI Re)) +- (set (mem (plus (reg:SI SP_REGNUM) (const_int -12))) +- (reg:SI FP_REGNUM)) +- (set (mem (plus (reg:SI SP_REGNUM) (const_int -8))) +- (reg:SI GP_REGNUM)) +- (set (mem (plus (reg:SI SP_REGNUM) (const_int -4))) +- (reg:SI LP_REGNUM)) +- (set (reg:SI SP_REGNUM) +- (plus (reg:SI SP_REGNUM) (const_int -32)))]) ++ (reg:SI Rb)) ++ (set (mem (plus (reg:SI SP_REGNUM) (const_int -28))) ++ (reg:SI Rb+1)) ++ ... ++ (set (mem (plus (reg:SI SP_REGNUM) (const_int -16))) ++ (reg:SI Re)) ++ (set (mem (plus (reg:SI SP_REGNUM) (const_int -12))) ++ (reg:SI FP_REGNUM)) ++ (set (mem (plus (reg:SI SP_REGNUM) (const_int -8))) ++ (reg:SI GP_REGNUM)) ++ (set (mem (plus (reg:SI SP_REGNUM) (const_int -4))) ++ (reg:SI LP_REGNUM)) ++ (set (reg:SI SP_REGNUM) ++ (plus (reg:SI SP_REGNUM) (const_int -32)))]) + + For pop operation, the parallel rtx looks like: + (parallel [(set (reg:SI Rb) +- (mem (reg:SI SP_REGNUM))) +- (set (reg:SI Rb+1) +- (mem (plus (reg:SI SP_REGNUM) (const_int 4)))) +- ... +- (set (reg:SI Re) +- (mem (plus (reg:SI SP_REGNUM) (const_int 16)))) +- (set (reg:SI FP_REGNUM) +- (mem (plus (reg:SI SP_REGNUM) (const_int 20)))) +- (set (reg:SI GP_REGNUM) +- (mem (plus (reg:SI SP_REGNUM) (const_int 24)))) +- (set (reg:SI LP_REGNUM) +- (mem (plus (reg:SI SP_REGNUM) (const_int 28)))) +- (set (reg:SI SP_REGNUM) +- (plus (reg:SI SP_REGNUM) (const_int 32)))]) */ ++ (mem (reg:SI SP_REGNUM))) ++ (set (reg:SI Rb+1) ++ (mem (plus (reg:SI SP_REGNUM) (const_int 4)))) ++ ... ++ (set (reg:SI Re) ++ (mem (plus (reg:SI SP_REGNUM) (const_int 16)))) ++ (set (reg:SI FP_REGNUM) ++ (mem (plus (reg:SI SP_REGNUM) (const_int 20)))) ++ (set (reg:SI GP_REGNUM) ++ (mem (plus (reg:SI SP_REGNUM) (const_int 24)))) ++ (set (reg:SI LP_REGNUM) ++ (mem (plus (reg:SI SP_REGNUM) (const_int 28)))) ++ (set (reg:SI SP_REGNUM) ++ (plus (reg:SI SP_REGNUM) (const_int 32)))]) */ + + /* 1. Consecutive registers push/pop operations. +- We need to calculate how many registers should be consecutive. +- The $sp adjustment rtx, $fp push rtx, $gp push rtx, +- and $lp push rtx are excluded. */ ++ We need to calculate how many registers should be consecutive. ++ The $sp adjustment rtx, $fp push rtx, $gp push rtx, ++ and $lp push rtx are excluded. */ + + /* Detect whether we have $fp, $gp, or $lp in the parallel rtx. */ + save_fp = reg_mentioned_p (gen_rtx_REG (SImode, FP_REGNUM), op); +@@ -238,19 +295,19 @@ nds32_valid_stack_push_pop_p (rtx op, bool push_p) + first_regno = REGNO (elt_reg); + + /* The 'push' operation is a kind of store operation. +- The 'pop' operation is a kind of load operation. +- Pass corresponding false/true as second argument (bool load_p). +- The par_index is supposed to start with index 0. */ ++ The 'pop' operation is a kind of load operation. ++ Pass corresponding false/true as second argument (bool load_p). ++ The par_index is supposed to start with index 0. */ + if (!nds32_consecutive_registers_load_store_p (op, + !push_p ? true : false, + 0, + first_regno, + rest_count)) +- return false; ++ return false; + } + + /* 2. Valid $fp/$gp/$lp push/pop operations. +- Remember to set start index for checking them. */ ++ Remember to set start index for checking them. */ + + /* The rest_count is the start index for checking $fp/$gp/$lp. */ + index = rest_count; +@@ -269,9 +326,9 @@ nds32_valid_stack_push_pop_p (rtx op, bool push_p) + index++; + + if (GET_CODE (elt_mem) != MEM +- || GET_CODE (elt_reg) != REG +- || REGNO (elt_reg) != FP_REGNUM) +- return false; ++ || GET_CODE (elt_reg) != REG ++ || REGNO (elt_reg) != FP_REGNUM) ++ return false; + } + if (save_gp) + { +@@ -281,9 +338,9 @@ nds32_valid_stack_push_pop_p (rtx op, bool push_p) + index++; + + if (GET_CODE (elt_mem) != MEM +- || GET_CODE (elt_reg) != REG +- || REGNO (elt_reg) != GP_REGNUM) +- return false; ++ || GET_CODE (elt_reg) != REG ++ || REGNO (elt_reg) != GP_REGNUM) ++ return false; + } + if (save_lp) + { +@@ -293,16 +350,16 @@ nds32_valid_stack_push_pop_p (rtx op, bool push_p) + index++; + + if (GET_CODE (elt_mem) != MEM +- || GET_CODE (elt_reg) != REG +- || REGNO (elt_reg) != LP_REGNUM) +- return false; ++ || GET_CODE (elt_reg) != REG ++ || REGNO (elt_reg) != LP_REGNUM) ++ return false; + } + + /* 3. The last element must be stack adjustment rtx. +- Its form of rtx should be: +- (set (reg:SI SP_REGNUM) +- (plus (reg:SI SP_REGNUM) (const_int X))) +- The X could be positive or negative value. */ ++ Its form of rtx should be: ++ (set (reg:SI SP_REGNUM) ++ (plus (reg:SI SP_REGNUM) (const_int X))) ++ The X could be positive or negative value. */ + + /* Pick up the last element. */ + elt = XVECEXP (op, 0, total_count - 1); +@@ -322,54 +379,57 @@ nds32_valid_stack_push_pop_p (rtx op, bool push_p) + } + + /* Function to check if 'bclr' instruction can be used with IVAL. */ +-int +-nds32_can_use_bclr_p (int ival) ++bool ++nds32_can_use_bclr_p (HOST_WIDE_INT ival) + { + int one_bit_count; ++ unsigned HOST_WIDE_INT mask = GET_MODE_MASK (SImode); + + /* Calculate the number of 1-bit of (~ival), if there is only one 1-bit, + it means the original ival has only one 0-bit, + So it is ok to perform 'bclr' operation. */ + +- one_bit_count = popcount_hwi ((unsigned HOST_WIDE_INT) (~ival)); ++ one_bit_count = popcount_hwi ((unsigned HOST_WIDE_INT) (~ival) & mask); + + /* 'bclr' is a performance extension instruction. */ +- return (TARGET_PERF_EXT && (one_bit_count == 1)); ++ return (TARGET_EXT_PERF && (one_bit_count == 1)); + } + + /* Function to check if 'bset' instruction can be used with IVAL. */ +-int +-nds32_can_use_bset_p (int ival) ++bool ++nds32_can_use_bset_p (HOST_WIDE_INT ival) + { + int one_bit_count; ++ unsigned HOST_WIDE_INT mask = GET_MODE_MASK (SImode); + + /* Caculate the number of 1-bit of ival, if there is only one 1-bit, + it is ok to perform 'bset' operation. */ + +- one_bit_count = popcount_hwi ((unsigned HOST_WIDE_INT) (ival)); ++ one_bit_count = popcount_hwi ((unsigned HOST_WIDE_INT) (ival) & mask); + + /* 'bset' is a performance extension instruction. */ +- return (TARGET_PERF_EXT && (one_bit_count == 1)); ++ return (TARGET_EXT_PERF && (one_bit_count == 1)); + } + + /* Function to check if 'btgl' instruction can be used with IVAL. */ +-int +-nds32_can_use_btgl_p (int ival) ++bool ++nds32_can_use_btgl_p (HOST_WIDE_INT ival) + { + int one_bit_count; ++ unsigned HOST_WIDE_INT mask = GET_MODE_MASK (SImode); + + /* Caculate the number of 1-bit of ival, if there is only one 1-bit, + it is ok to perform 'btgl' operation. */ + +- one_bit_count = popcount_hwi ((unsigned HOST_WIDE_INT) (ival)); ++ one_bit_count = popcount_hwi ((unsigned HOST_WIDE_INT) (ival) & mask); + + /* 'btgl' is a performance extension instruction. */ +- return (TARGET_PERF_EXT && (one_bit_count == 1)); ++ return (TARGET_EXT_PERF && (one_bit_count == 1)); + } + + /* Function to check if 'bitci' instruction can be used with IVAL. */ +-int +-nds32_can_use_bitci_p (int ival) ++bool ++nds32_can_use_bitci_p (HOST_WIDE_INT ival) + { + /* If we are using V3 ISA, we have 'bitci' instruction. + Try to see if we can present 'andi' semantic with +@@ -381,4 +441,286 @@ nds32_can_use_bitci_p (int ival) + && satisfies_constraint_Iu15 (gen_int_mode (~ival, SImode))); + } + ++/* Return true if is load/store with SYMBOL_REF addressing mode ++ and memory mode is SImode. */ ++bool ++nds32_symbol_load_store_p (rtx_insn *insn) ++{ ++ rtx mem_src = NULL_RTX; ++ ++ switch (get_attr_type (insn)) ++ { ++ case TYPE_LOAD: ++ mem_src = SET_SRC (PATTERN (insn)); ++ break; ++ case TYPE_STORE: ++ mem_src = SET_DEST (PATTERN (insn)); ++ break; ++ default: ++ break; ++ } ++ ++ /* Find load/store insn with addressing mode is SYMBOL_REF. */ ++ if (mem_src != NULL_RTX) ++ { ++ if ((GET_CODE (mem_src) == ZERO_EXTEND) ++ || (GET_CODE (mem_src) == SIGN_EXTEND)) ++ mem_src = XEXP (mem_src, 0); ++ ++ if ((GET_CODE (XEXP (mem_src, 0)) == SYMBOL_REF) ++ || (GET_CODE (XEXP (mem_src, 0)) == LO_SUM)) ++ return true; ++ } ++ ++ return false; ++} ++ ++/* Vaild memory operand for floating-point loads and stores */ ++bool ++nds32_float_mem_operand_p (rtx op) ++{ ++ enum machine_mode mode = GET_MODE (op); ++ rtx addr = XEXP (op, 0); ++ ++ /* Not support [symbol] [const] memory */ ++ if (GET_CODE (addr) == SYMBOL_REF ++ || GET_CODE (addr) == CONST ++ || GET_CODE (addr) == LO_SUM) ++ return false; ++ ++ if (GET_CODE (addr) == PLUS) ++ { ++ if (GET_CODE (XEXP (addr, 0)) == SYMBOL_REF) ++ return false; ++ ++ /* Restrict const range: (imm12s << 2) */ ++ if (GET_CODE (XEXP (addr, 1)) == CONST_INT) ++ { ++ if ((mode == SImode || mode == SFmode) ++ && NDS32_SINGLE_WORD_ALIGN_P (INTVAL (XEXP (addr, 1))) ++ && !satisfies_constraint_Is14 ( XEXP(addr, 1))) ++ return false; ++ ++ if ((mode == DImode || mode == DFmode) ++ && NDS32_DOUBLE_WORD_ALIGN_P (INTVAL (XEXP (addr, 1))) ++ && !satisfies_constraint_Is14 (XEXP (addr, 1))) ++ return false; ++ } ++ } ++ ++ return true; ++} ++ ++int ++nds32_cond_move_p (rtx cmp_rtx) ++{ ++ enum machine_mode cmp0_mode = GET_MODE (XEXP (cmp_rtx, 0)); ++ enum machine_mode cmp1_mode = GET_MODE (XEXP (cmp_rtx, 1)); ++ enum rtx_code cond = GET_CODE (cmp_rtx); ++ ++ if ((cmp0_mode == DFmode || cmp0_mode == SFmode) ++ && (cmp1_mode == DFmode || cmp1_mode == SFmode) ++ && (cond == ORDERED || cond == UNORDERED)) ++ return true; ++ return false; ++} ++ ++/* Return true if the addresses in mem1 and mem2 are suitable for use in ++ an fldi or fsdi instruction. ++ ++ This can only happen when addr1 and addr2, the addresses in mem1 ++ and mem2, are consecutive memory locations (addr1 + 4 == addr2). ++ addr1 must also be aligned on a 64-bit boundary. */ ++bool ++nds32_memory_merge_peep_p (rtx mem1, rtx mem2) ++{ ++ rtx addr1, addr2; ++ unsigned int reg1; ++ HOST_WIDE_INT offset1; ++ ++ /* The mems cannot be volatile. */ ++ if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2)) ++ return false; ++ ++ /* MEM1 should be aligned on a 64-bit boundary. */ ++ if (MEM_ALIGN (mem1) < 64) ++ return false; ++ ++ addr1 = XEXP (mem1, 0); ++ addr2 = XEXP (mem2, 0); ++ ++ /* Extract a register number and offset (if used) from the first addr. */ ++ if (GET_CODE (addr1) == PLUS) ++ { ++ if (GET_CODE (XEXP (addr1, 0)) != REG) ++ return false; ++ else ++ { ++ reg1 = REGNO (XEXP (addr1, 0)); ++ if (GET_CODE (XEXP (addr1, 1)) != CONST_INT) ++ return false; ++ ++ offset1 = INTVAL (XEXP (addr1, 1)); ++ } ++ } ++ else if (GET_CODE (addr1) != REG) ++ return false; ++ else ++ { ++ reg1 = REGNO (addr1); ++ /* This was a simple (mem (reg)) expression. Offset is 0. */ ++ offset1 = 0; ++ } ++ /* Make sure the second address is a (mem (plus (reg) (const_int). */ ++ if (GET_CODE (addr2) != PLUS) ++ return false; ++ ++ if (GET_CODE (XEXP (addr2, 0)) != REG ++ || GET_CODE (XEXP (addr2, 1)) != CONST_INT) ++ return false; ++ ++ if (reg1 != REGNO (XEXP (addr2, 0))) ++ return false; ++ ++ /* The first offset must be evenly divisible by 8 to ensure the ++ address is 64 bit aligned. */ ++ if (offset1 % 8 != 0) ++ return false; ++ ++ /* The offset for the second addr must be 4 more than the first addr. */ ++ if (INTVAL (XEXP (addr2, 1)) != offset1 + 4) ++ return false; ++ ++ return true; ++} ++ ++bool ++nds32_const_double_range_ok_p (rtx op, enum machine_mode mode, ++ HOST_WIDE_INT lower, HOST_WIDE_INT upper) ++{ ++ if (GET_CODE (op) != CONST_DOUBLE ++ || GET_MODE (op) != mode) ++ return false; ++ ++ const REAL_VALUE_TYPE *rv; ++ long val; ++ ++ rv = CONST_DOUBLE_REAL_VALUE (op); ++ REAL_VALUE_TO_TARGET_SINGLE (*rv, val); ++ ++ return val >= lower && val < upper; ++} ++ ++bool ++nds32_const_unspec_p (rtx x) ++{ ++ if (GET_CODE (x) == CONST) ++ { ++ x = XEXP (x, 0); ++ ++ if (GET_CODE (x) == PLUS) ++ x = XEXP (x, 0); ++ ++ if (GET_CODE (x) == UNSPEC) ++ { ++ switch (XINT (x, 1)) ++ { ++ case UNSPEC_GOTINIT: ++ case UNSPEC_GOT: ++ case UNSPEC_GOTOFF: ++ case UNSPEC_PLT: ++ case UNSPEC_TLSGD: ++ case UNSPEC_TLSLD: ++ case UNSPEC_TLSIE: ++ case UNSPEC_TLSLE: ++ return false; ++ default: ++ return true; ++ } ++ } ++ } ++ ++ if (GET_CODE (x) == SYMBOL_REF ++ && SYMBOL_REF_TLS_MODEL (x)) ++ return false; ++ ++ return true; ++} ++ ++HOST_WIDE_INT ++const_vector_to_hwint (rtx op) ++{ ++ HOST_WIDE_INT hwint = 0; ++ HOST_WIDE_INT mask; ++ int i; ++ int shift_adv; ++ int shift = 0; ++ int nelem; ++ ++ switch (GET_MODE (op)) ++ { ++ case V2HImode: ++ mask = 0xffff; ++ shift_adv = 16; ++ nelem = 2; ++ break; ++ case V4QImode: ++ mask = 0xff; ++ shift_adv = 8; ++ nelem = 4; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ ++ if (TARGET_BIG_ENDIAN) ++ { ++ for (i = 0; i < nelem; ++i) ++ { ++ HOST_WIDE_INT val = XINT (XVECEXP (op, 0, nelem - i - 1), 0); ++ hwint |= (val & mask) << shift; ++ shift = shift + shift_adv; ++ } ++ } ++ else ++ { ++ for (i = 0; i < nelem; ++i) ++ { ++ HOST_WIDE_INT val = XINT (XVECEXP (op, 0, i), 0); ++ hwint |= (val & mask) << shift; ++ shift = shift + shift_adv; ++ } ++ } ++ ++ return hwint; ++} ++ ++bool ++nds32_valid_CVp5_p (rtx op) ++{ ++ HOST_WIDE_INT ival = const_vector_to_hwint (op); ++ return (ival < ((1 << 5) + 16)) && (ival >= (0 + 16)); ++} ++ ++bool ++nds32_valid_CVs5_p (rtx op) ++{ ++ HOST_WIDE_INT ival = const_vector_to_hwint (op); ++ return (ival < (1 << 4)) && (ival >= -(1 << 4)); ++} ++ ++bool ++nds32_valid_CVs2_p (rtx op) ++{ ++ HOST_WIDE_INT ival = const_vector_to_hwint (op); ++ return (ival < (1 << 19)) && (ival >= -(1 << 19)); ++} ++ ++bool ++nds32_valid_CVhi_p (rtx op) ++{ ++ HOST_WIDE_INT ival = const_vector_to_hwint (op); ++ return (ival != 0) && ((ival & 0xfff) == 0); ++} ++ + /* ------------------------------------------------------------------------ */ +diff --git a/gcc/config/nds32/nds32-protos.h b/gcc/config/nds32/nds32-protos.h +index d66749d..19e69e3 100644 +--- a/gcc/config/nds32/nds32-protos.h ++++ b/gcc/config/nds32/nds32-protos.h +@@ -28,10 +28,14 @@ extern void nds32_init_expanders (void); + + /* Register Usage. */ + ++/* -- Order of Allocation of Registers. */ ++extern void nds32_adjust_reg_alloc_order (void); ++ + /* -- How Values Fit in Registers. */ + +-extern int nds32_hard_regno_nregs (int, machine_mode); +-extern int nds32_hard_regno_mode_ok (int, machine_mode); ++extern int nds32_hard_regno_nregs (int, enum machine_mode); ++extern int nds32_hard_regno_mode_ok (int, enum machine_mode); ++extern int nds32_modes_tieable_p (enum machine_mode, enum machine_mode); + + + /* Register Classes. */ +@@ -43,6 +47,7 @@ extern enum reg_class nds32_regno_reg_class (int); + + /* -- Basic Stack Layout. */ + ++extern rtx nds32_dynamic_chain_address (rtx); + extern rtx nds32_return_addr_rtx (int, rtx); + + /* -- Eliminating Frame Pointer and Arg Pointer. */ +@@ -61,22 +66,88 @@ extern void nds32_expand_prologue (void); + extern void nds32_expand_epilogue (bool); + extern void nds32_expand_prologue_v3push (void); + extern void nds32_expand_epilogue_v3pop (bool); ++extern void nds32_emit_push_fpr_callee_saved (int); ++extern void nds32_emit_pop_fpr_callee_saved (int); ++extern void nds32_emit_v3pop_fpr_callee_saved (int); ++ ++/* Controlling Debugging Information Format. */ ++ ++extern unsigned int nds32_dbx_register_number (unsigned int); + + /* ------------------------------------------------------------------------ */ + +-/* Auxiliary functions for auxiliary macros in nds32.h. */ ++/* Auxiliary functions for manipulation DI mode. */ + +-extern bool nds32_ls_333_p (rtx, rtx, rtx, machine_mode); ++extern rtx nds32_di_high_part_subreg(rtx); ++extern rtx nds32_di_low_part_subreg(rtx); + + /* Auxiliary functions for expanding rtl used in nds32-multiple.md. */ + +-extern rtx nds32_expand_load_multiple (int, int, rtx, rtx); +-extern rtx nds32_expand_store_multiple (int, int, rtx, rtx); +-extern int nds32_expand_movmemqi (rtx, rtx, rtx, rtx); ++extern rtx nds32_expand_load_multiple (int, int, rtx, rtx, bool, rtx *); ++extern rtx nds32_expand_store_multiple (int, int, rtx, rtx, bool, rtx *); ++extern bool nds32_expand_movmemsi (rtx, rtx, rtx, rtx); ++extern bool nds32_expand_setmem (rtx, rtx, rtx, rtx, rtx, rtx); ++extern bool nds32_expand_movstr (rtx, rtx, rtx); ++extern bool nds32_expand_strlen (rtx, rtx, rtx, rtx); + + /* Auxiliary functions for multiple load/store predicate checking. */ + +-extern bool nds32_valid_multiple_load_store (rtx, bool); ++extern bool nds32_valid_multiple_load_store_p (rtx, bool, bool); ++ ++/* Auxiliary functions for guard function checking in pipelines.md. */ ++ ++extern bool nds32_n7_load_to_ii_p (rtx_insn *, rtx_insn *); ++extern bool nds32_n7_last_load_to_ii_p (rtx_insn *, rtx_insn *); ++ ++extern bool nds32_n8_load_to_ii_p (rtx_insn *, rtx_insn *); ++extern bool nds32_n8_load_bi_to_ii_p (rtx_insn *, rtx_insn *); ++extern bool nds32_n8_load_to_ex_p (rtx_insn *, rtx_insn *); ++extern bool nds32_n8_ex_to_ii_p (rtx_insn *, rtx_insn *); ++extern bool nds32_n8_last_load_to_ii_p (rtx_insn *, rtx_insn *); ++extern bool nds32_n8_last_load_two_to_ii_p (rtx_insn *, rtx_insn *); ++extern bool nds32_n8_last_load_to_ex_p (rtx_insn *, rtx_insn *); ++ ++extern bool nds32_e8_load_to_ii_p (rtx_insn *, rtx_insn *); ++extern bool nds32_e8_load_to_ex_p (rtx_insn *, rtx_insn *); ++extern bool nds32_e8_ex_to_ii_p (rtx_insn *, rtx_insn *); ++extern bool nds32_e8_last_load_to_ii_p (rtx_insn *, rtx_insn *); ++extern bool nds32_e8_last_load_to_ex_p (rtx_insn *, rtx_insn *); ++ ++extern bool nds32_n9_2r1w_mm_to_ex_p (rtx_insn *, rtx_insn *); ++extern bool nds32_n9_3r2w_mm_to_ex_p (rtx_insn *, rtx_insn *); ++extern bool nds32_n9_last_load_to_ex_p (rtx_insn *, rtx_insn *); ++ ++extern bool nds32_n10_ex_to_ex_p (rtx_insn *, rtx_insn *); ++extern bool nds32_n10_mm_to_ex_p (rtx_insn *, rtx_insn *); ++extern bool nds32_n10_last_load_to_ex_p (rtx_insn *, rtx_insn *); ++ ++extern bool nds32_gw_ex_to_ex_p (rtx_insn *, rtx_insn *); ++extern bool nds32_gw_mm_to_ex_p (rtx_insn *, rtx_insn *); ++extern bool nds32_gw_last_load_to_ex_p (rtx_insn *, rtx_insn *); ++ ++extern bool nds32_n13_e2_to_e1_p (rtx_insn *, rtx_insn *); ++extern bool nds32_n13_load_to_e1_p (rtx_insn *, rtx_insn *); ++extern bool nds32_n13_load_to_e2_p (rtx_insn *, rtx_insn *); ++extern bool nds32_n13_last_load_to_e1_p (rtx_insn *, rtx_insn *); ++extern bool nds32_n13_last_load_to_e2_p (rtx_insn *, rtx_insn *); ++extern bool nds32_n13_last_two_load_to_e1_p (rtx_insn *, rtx_insn *); ++ ++extern bool nds32_pn_e2_to_e1_p (rtx_insn *, rtx_insn *); ++extern bool nds32_pn_e3_to_e1_p (rtx_insn *, rtx_insn *); ++extern bool nds32_pn_e3_to_e2_p (rtx_insn *, rtx_insn *); ++extern bool nds32_pn_e4_to_e1_p (rtx_insn *, rtx_insn *); ++extern bool nds32_pn_e4_to_e2_p (rtx_insn *, rtx_insn *); ++extern bool nds32_pn_e4_to_e3_p (rtx_insn *, rtx_insn *); ++extern bool nds32_pn_wb_to_e1_p (rtx_insn *, rtx_insn *); ++extern bool nds32_pn_wb_to_e2_p (rtx_insn *, rtx_insn *); ++extern bool nds32_pn_wb_to_e3_p (rtx_insn *, rtx_insn *); ++extern bool nds32_pn_wb_to_e4_p (rtx_insn *, rtx_insn *); ++extern bool nds32_pn_last_load_to_e1_p (rtx_insn *, rtx_insn *); ++extern bool nds32_pn_last_load_to_e2_p (rtx_insn *, rtx_insn *); ++extern bool nds32_pn_last_load_to_e3_p (rtx_insn *, rtx_insn *); ++extern bool nds32_pn_last_two_load_to_e1_p (rtx_insn *, rtx_insn *); ++extern bool nds32_pn_last_two_load_to_e2_p (rtx_insn *, rtx_insn *); ++extern bool nds32_pn_last_three_load_to_e1_p (rtx_insn *, rtx_insn *); + + /* Auxiliary functions for stack operation predicate checking. */ + +@@ -84,55 +155,176 @@ extern bool nds32_valid_stack_push_pop_p (rtx, bool); + + /* Auxiliary functions for bit operation detection. */ + +-extern int nds32_can_use_bclr_p (int); +-extern int nds32_can_use_bset_p (int); +-extern int nds32_can_use_btgl_p (int); ++extern bool nds32_can_use_bclr_p (HOST_WIDE_INT); ++extern bool nds32_can_use_bset_p (HOST_WIDE_INT); ++extern bool nds32_can_use_btgl_p (HOST_WIDE_INT); + +-extern int nds32_can_use_bitci_p (int); ++extern bool nds32_can_use_bitci_p (HOST_WIDE_INT); + +-/* Auxiliary function for 'Computing the Length of an Insn'. */ ++extern bool nds32_const_double_range_ok_p (rtx, enum machine_mode, ++ HOST_WIDE_INT, HOST_WIDE_INT); + +-extern int nds32_adjust_insn_length (rtx_insn *, int); ++extern bool nds32_const_unspec_p (rtx x); + + /* Auxiliary functions for FP_AS_GP detection. */ + +-extern int nds32_fp_as_gp_check_available (void); ++extern bool nds32_symbol_load_store_p (rtx_insn *); ++extern bool nds32_naked_function_p (tree); + + /* Auxiliary functions for jump table generation. */ + + extern const char *nds32_output_casesi_pc_relative (rtx *); + extern const char *nds32_output_casesi (rtx *); + ++/* Auxiliary functions for conditional branch generation. */ ++ ++extern enum nds32_expand_result_type nds32_expand_cbranch (rtx *); ++extern enum nds32_expand_result_type nds32_expand_cstore (rtx *); ++extern void nds32_expand_float_cbranch (rtx *); ++extern void nds32_expand_float_cstore (rtx *); ++ ++/* Auxiliary functions for conditional move generation. */ ++ ++extern enum nds32_expand_result_type nds32_expand_movcc (rtx *); ++extern void nds32_expand_float_movcc (rtx *); ++ ++/* Auxiliary functions for expand unalign load instruction. */ ++ ++extern void nds32_expand_unaligned_load (rtx *, enum machine_mode); ++ ++/* Auxiliary functions for expand extv/insv instruction. */ ++ ++extern enum nds32_expand_result_type nds32_expand_extv (rtx *); ++extern enum nds32_expand_result_type nds32_expand_insv (rtx *); ++ ++/* Auxiliary functions for expand unalign store instruction. */ ++ ++extern void nds32_expand_unaligned_store (rtx *, enum machine_mode); ++ ++/* Auxiliary functions for expand PIC instruction. */ ++ ++extern void nds32_expand_pic_move (rtx *); ++ ++/* Auxiliary functions to legitimize PIC address. */ ++ ++extern rtx nds32_legitimize_pic_address (rtx); ++ ++/* Auxiliary functions for expand TLS instruction. */ ++ ++extern void nds32_expand_tls_move (rtx *); ++ ++/* Auxiliary functions to legitimize TLS address. */ ++ ++extern rtx nds32_legitimize_tls_address (rtx); ++ ++/* Auxiliary functions to identify thread-local symbol. */ ++ ++extern bool nds32_tls_referenced_p (rtx); ++ ++/* Auxiliary functions for expand ICT instruction. */ ++ ++extern void nds32_expand_ict_move (rtx *); ++ ++/* Auxiliary functions to legitimize address for indirect-call symbol. */ ++ ++extern rtx nds32_legitimize_ict_address (rtx); ++ ++/* Auxiliary functions to identify indirect-call symbol. */ ++ ++extern bool nds32_indirect_call_referenced_p (rtx); ++ ++/* Auxiliary functions to identify long-call symbol. */ ++extern bool nds32_long_call_p (rtx); ++ ++/* Auxiliary functions to identify SYMBOL_REF and LABEL_REF pattern. */ ++ ++extern bool symbolic_reference_mentioned_p (rtx); ++ ++/* Auxiliary functions to identify conditional move comparison operand. */ ++ ++extern int nds32_cond_move_p (rtx); ++ ++/* Auxiliary functions to identify address for peephole2 merge instruction. */ ++ ++extern bool nds32_memory_merge_peep_p (rtx, rtx); ++ + /* Auxiliary functions to identify 16 bit addresing mode. */ + + extern enum nds32_16bit_address_type nds32_mem_format (rtx); + ++/* Auxiliary functions to identify floating-point addresing mode. */ ++ ++extern bool nds32_float_mem_operand_p (rtx); ++ + /* Auxiliary functions to output assembly code. */ + + extern const char *nds32_output_16bit_store (rtx *, int); + extern const char *nds32_output_16bit_load (rtx *, int); + extern const char *nds32_output_32bit_store (rtx *, int); + extern const char *nds32_output_32bit_load (rtx *, int); +-extern const char *nds32_output_32bit_load_s (rtx *, int); ++extern const char *nds32_output_32bit_load_se (rtx *, int); ++extern const char *nds32_output_float_load(rtx *); ++extern const char *nds32_output_float_store(rtx *); ++extern const char *nds32_output_smw_single_word (rtx *); ++extern const char *nds32_output_smw_double_word (rtx *); ++extern const char *nds32_output_lmw_single_word (rtx *); ++extern const char *nds32_output_double (rtx *, bool); ++extern const char *nds32_output_cbranchsi4_equality_zero (rtx_insn *, rtx *); ++extern const char *nds32_output_cbranchsi4_equality_reg (rtx_insn *, rtx *); ++extern const char *nds32_output_cbranchsi4_equality_reg_or_const_int (rtx_insn *, ++ rtx *); ++extern const char *nds32_output_cbranchsi4_greater_less_zero (rtx_insn *, rtx *); ++ ++extern const char *nds32_output_unpkd8 (rtx, rtx, rtx, rtx, bool); ++ ++extern const char *nds32_output_call (rtx, rtx *, rtx, ++ const char *, const char *, bool); ++extern const char *nds32_output_tls_desc (rtx *); ++extern const char *nds32_output_tls_ie (rtx *); + + /* Auxiliary functions to output stack push/pop instruction. */ + + extern const char *nds32_output_stack_push (rtx); + extern const char *nds32_output_stack_pop (rtx); ++extern const char *nds32_output_return (void); ++ ++ ++/* Auxiliary functions to split/output sms pattern. */ ++extern bool nds32_need_split_sms_p (rtx, rtx, rtx, rtx); ++extern const char *nds32_output_sms (rtx, rtx, rtx, rtx); ++extern void nds32_split_sms (rtx, rtx, rtx, rtx, rtx, rtx, rtx); ++ ++/* Auxiliary functions to split double word RTX pattern. */ ++ ++extern void nds32_spilt_doubleword (rtx *, bool); ++extern void nds32_split_ashiftdi3 (rtx, rtx, rtx); ++extern void nds32_split_ashiftrtdi3 (rtx, rtx, rtx); ++extern void nds32_split_lshiftrtdi3 (rtx, rtx, rtx); ++extern void nds32_split_rotatertdi3 (rtx, rtx, rtx); ++ ++/* Auxiliary functions to split large constant RTX pattern. */ ++ ++extern void nds32_expand_constant (enum machine_mode, ++ HOST_WIDE_INT, rtx, rtx); + + /* Auxiliary functions to check using return with null epilogue. */ + + extern int nds32_can_use_return_insn (void); ++extern enum machine_mode nds32_case_vector_shorten_mode (int, int, rtx); + + /* Auxiliary functions to decide output alignment or not. */ + + extern int nds32_target_alignment (rtx); ++extern unsigned int nds32_data_alignment (tree, unsigned int); ++extern unsigned int nds32_constant_alignment (tree, unsigned int); ++extern unsigned int nds32_local_alignment (tree, unsigned int); + + /* Auxiliary functions to expand builtin functions. */ + + extern void nds32_init_builtins_impl (void); + extern rtx nds32_expand_builtin_impl (tree, rtx, rtx, +- machine_mode, int); ++ enum machine_mode, int); ++extern tree nds32_builtin_decl_impl (unsigned, bool); + + /* Auxiliary functions for ISR implementation. */ + +@@ -141,10 +333,86 @@ extern void nds32_construct_isr_vectors_information (tree, const char *); + extern void nds32_asm_file_start_for_isr (void); + extern void nds32_asm_file_end_for_isr (void); + extern bool nds32_isr_function_p (tree); ++extern bool nds32_isr_function_critical_p (tree); + + /* Auxiliary functions for cost calculation. */ + ++extern void nds32_init_rtx_costs (void); + extern bool nds32_rtx_costs_impl (rtx, machine_mode, int, int, int *, bool); +-extern int nds32_address_cost_impl (rtx, machine_mode, addr_space_t, bool); ++extern int nds32_address_cost_impl (rtx, enum machine_mode, addr_space_t, bool); ++extern struct register_pass_info insert_pass_fp_as_gp; ++ ++extern int nds32_adjust_insn_length (rtx_insn *, int); ++ ++/* Auxiliary functions for pre-define marco. */ ++extern void nds32_cpu_cpp_builtins(struct cpp_reader *); ++ ++/* Auxiliary functions for const_vector's constraints. */ ++ ++extern HOST_WIDE_INT const_vector_to_hwint (rtx); ++extern bool nds32_valid_CVp5_p (rtx); ++extern bool nds32_valid_CVs5_p (rtx); ++extern bool nds32_valid_CVs2_p (rtx); ++extern bool nds32_valid_CVhi_p (rtx); ++ ++/* Auxiliary functions for lwm/smw. */ ++ ++extern bool nds32_valid_smw_lwm_base_p (rtx); ++ ++/* Auxiliary functions for register rename pass. */ ++extern reg_class_t nds32_preferred_rename_class_impl (reg_class_t); ++ ++extern bool nds32_split_double_word_load_store_p (rtx *,bool); ++ ++namespace nds32 { ++ ++extern rtx extract_pattern_from_insn (rtx); ++ ++size_t parallel_elements (rtx); ++rtx parallel_element (rtx, int); ++ ++bool insn_pseudo_nop_p (rtx_insn *); ++bool insn_executable_p (rtx_insn *); ++rtx_insn *prev_executable_insn (rtx_insn *); ++rtx_insn *next_executable_insn (rtx_insn *); ++rtx_insn *prev_executable_insn_local (rtx_insn *); ++rtx_insn *next_executable_insn_local (rtx_insn *); ++bool insn_deleted_p (rtx_insn *); ++ ++bool load_single_p (rtx_insn *); ++bool store_single_p (rtx_insn *); ++bool load_double_p (rtx_insn *); ++bool store_double_p (rtx_insn *); ++bool store_offset_reg_p (rtx_insn *); ++bool load_full_word_p (rtx_insn *); ++bool load_partial_word_p (rtx_insn *); ++bool post_update_insn_p (rtx_insn *); ++bool immed_offset_p (rtx); ++int find_post_update_rtx (rtx_insn *); ++rtx extract_mem_rtx (rtx_insn *); ++rtx extract_base_reg (rtx_insn *); ++rtx extract_offset_rtx (rtx_insn *); ++ ++rtx extract_shift_reg (rtx_insn *); ++ ++bool movd44_insn_p (rtx_insn *); ++rtx extract_movd44_even_reg (rtx_insn *); ++rtx extract_movd44_odd_reg (rtx_insn *); ++ ++rtx extract_mac_acc_rtx (rtx_insn *); ++rtx extract_mac_non_acc_rtx (rtx_insn *); ++ ++bool divmod_p (rtx_insn *); ++ ++rtx extract_branch_target_rtx (rtx_insn *); ++rtx extract_branch_condition_rtx (rtx_insn *); ++ ++void compute_bb_for_insn_safe (); ++ ++void exchange_insns (rtx_insn *, rtx_insn *); ++ ++} // namespace nds32 ++ ++extern bool nds32_include_fp_arith; + + /* ------------------------------------------------------------------------ */ +diff --git a/gcc/config/nds32/nds32-reg-utils.c b/gcc/config/nds32/nds32-reg-utils.c +new file mode 100644 +index 0000000..1fd8a83 +--- /dev/null ++++ b/gcc/config/nds32/nds32-reg-utils.c +@@ -0,0 +1,190 @@ ++ ++/* lmwsmw pass of Andes NDS32 cpu for GNU compiler ++ Copyright (C) 2012-2016 Free Software Foundation, Inc. ++ Contributed by Andes Technology Corporation. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published ++ by the Free Software Foundation; either version 3, or (at your ++ option) any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ <http://www.gnu.org/licenses/>. */ ++ ++/* ------------------------------------------------------------------------ */ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "tm.h" ++#include "hash-set.h" ++#include "machmode.h" ++#include "vec.h" ++#include "double-int.h" ++#include "input.h" ++#include "alias.h" ++#include "symtab.h" ++#include "wide-int.h" ++#include "inchash.h" ++#include "tree.h" ++#include "stor-layout.h" ++#include "varasm.h" ++#include "calls.h" ++#include "rtl.h" ++#include "regs.h" ++#include "hard-reg-set.h" ++#include "insn-config.h" /* Required by recog.h. */ ++#include "conditions.h" ++#include "output.h" ++#include "insn-attr.h" /* For DFA state_t. */ ++#include "insn-codes.h" /* For CODE_FOR_xxx. */ ++#include "reload.h" /* For push_reload(). */ ++#include "flags.h" ++#include "input.h" ++#include "function.h" ++#include "expr.h" ++#include "recog.h" ++#include "diagnostic-core.h" ++#include "dominance.h" ++#include "cfg.h" ++#include "cfgrtl.h" ++#include "cfganal.h" ++#include "lcm.h" ++#include "cfgbuild.h" ++#include "cfgcleanup.h" ++#include "predict.h" ++#include "basic-block.h" ++#include "bitmap.h" ++#include "df.h" ++#include "tm_p.h" ++#include "tm-constrs.h" ++#include "optabs.h" /* For GEN_FCN. */ ++#include "target.h" ++#include "langhooks.h" /* For add_builtin_function(). */ ++#include "ggc.h" ++#include "tree-pass.h" ++#include "target-globals.h" ++#include "ira.h" ++#include "ira-int.h" ++#include "nds32-reg-utils.h" ++ ++#define NDS32_GPR_NUM 32 ++ ++static bool debug_live_reg = false; ++ ++void ++nds32_live_regs (basic_block bb, rtx_insn *first, rtx_insn *last, bitmap *live) ++{ ++ df_ref def; ++ rtx_insn *insn; ++ bitmap_copy (*live, DF_LR_IN (bb)); ++ df_simulate_initialize_forwards (bb, *live); ++ rtx_insn *first_insn = BB_HEAD (bb); ++ ++ for (insn = first_insn; insn != first; insn = NEXT_INSN (insn)) ++ df_simulate_one_insn_forwards (bb, insn, *live); ++ ++ if (dump_file && debug_live_reg) ++ { ++ fprintf (dump_file, "scan live regs:\nfrom:\n"); ++ print_rtl_single (dump_file, first); ++ ++ fprintf (dump_file, "to:\n"); ++ print_rtl_single (dump_file, last); ++ ++ fprintf (dump_file, "bb lr in:\n"); ++ dump_bitmap (dump_file, DF_LR_IN (bb)); ++ ++ fprintf (dump_file, "init:\n"); ++ dump_bitmap (dump_file, *live); ++ } ++ ++ for (insn = first; insn != last; insn = NEXT_INSN (insn)) ++ { ++ if (!INSN_P (insn)) ++ continue; ++ ++ FOR_EACH_INSN_DEF (def, insn) ++ bitmap_set_bit (*live, DF_REF_REGNO (def)); ++ ++ if (dump_file && debug_live_reg) ++ { ++ fprintf (dump_file, "scaning:\n"); ++ print_rtl_single (dump_file, insn); ++ dump_bitmap (dump_file, *live); ++ } ++ } ++ ++ gcc_assert (INSN_P (insn)); ++ ++ FOR_EACH_INSN_DEF (def, insn) ++ bitmap_set_bit (*live, DF_REF_REGNO (def)); ++ ++ if (dump_file && debug_live_reg) ++ { ++ fprintf (dump_file, "scaning:\n"); ++ print_rtl_single (dump_file, last); ++ dump_bitmap (dump_file, *live); ++ } ++} ++ ++void ++print_hard_reg_set (FILE *file, const char *prefix, HARD_REG_SET set) ++{ ++ int i; ++ bool first = true; ++ fprintf (file, "%s{ ", prefix); ++ ++ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) ++ { ++ if (TEST_HARD_REG_BIT (set, i)) ++ { ++ if (first) ++ { ++ fprintf (file, "%s", reg_names[i]); ++ first = false; ++ } ++ else ++ fprintf (file, ", %s", reg_names[i]); ++ } ++ } ++ fprintf (file, "}\n"); ++} ++ ++void ++nds32_get_available_reg_set (basic_block bb, ++ rtx_insn *first, ++ rtx_insn *last, ++ HARD_REG_SET *available_regset) ++{ ++ bitmap live; ++ HARD_REG_SET live_regset; ++ unsigned i; ++ live = BITMAP_ALLOC (®_obstack); ++ ++ nds32_live_regs (bb, first, last, &live); ++ ++ REG_SET_TO_HARD_REG_SET (live_regset, live); ++ ++ /* Reverse available_regset. */ ++ COMPL_HARD_REG_SET (*available_regset, live_regset); ++ ++ /* We only care $r0-$r31, so mask $r0-$r31. */ ++ AND_HARD_REG_SET (*available_regset, reg_class_contents[GENERAL_REGS]); ++ ++ /* Fixed register also not available. */ ++ for (i = NDS32_FIRST_GPR_REGNUM; i <= NDS32_LAST_GPR_REGNUM; ++i) ++ { ++ if (fixed_regs[i]) ++ CLEAR_HARD_REG_BIT (*available_regset, i); ++ } ++ ++ BITMAP_FREE (live); ++} +diff --git a/gcc/config/nds32/nds32-reg-utils.h b/gcc/config/nds32/nds32-reg-utils.h +new file mode 100644 +index 0000000..16c23a3 +--- /dev/null ++++ b/gcc/config/nds32/nds32-reg-utils.h +@@ -0,0 +1,61 @@ ++/* Prototypes for load-store-opt of Andes NDS32 cpu for GNU compiler ++ Copyright (C) 2012-2016 Free Software Foundation, Inc. ++ Contributed by Andes Technology Corporation. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published ++ by the Free Software Foundation; either version 3, or (at your ++ option) any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ <http://www.gnu.org/licenses/>. */ ++ ++#ifndef NDS32_REG_UTILS_OPT_H ++#define NDS32_REG_UTILS_OPT_H ++ ++/* Auxiliary functions for register usage analysis. */ ++extern void nds32_live_regs (basic_block, rtx_insn *, rtx_insn *, bitmap *); ++extern void print_hard_reg_set (FILE *, const char *, HARD_REG_SET); ++extern void nds32_get_available_reg_set (basic_block, rtx_insn *, ++ rtx_insn *, HARD_REG_SET *); ++ ++static inline bool ++in_reg_class_p (unsigned regno, enum reg_class clazz) ++{ ++ return TEST_HARD_REG_BIT (reg_class_contents[clazz], regno); ++} ++ ++static inline bool ++in_reg_class_p (rtx reg, enum reg_class clazz) ++{ ++ gcc_assert (REG_P (reg)); ++ return in_reg_class_p (REGNO (reg), clazz); ++} ++ ++static inline unsigned ++find_available_reg (HARD_REG_SET *available_regset, enum reg_class clazz) ++{ ++ hard_reg_set_iterator hrsi; ++ unsigned regno; ++ EXECUTE_IF_SET_IN_HARD_REG_SET (reg_class_contents[clazz], 0, regno, hrsi) ++ { ++ /* Caller-save register or callee-save register but it's ever live. */ ++ if (TEST_HARD_REG_BIT (*available_regset, regno) ++ && (call_used_regs[regno] || df_regs_ever_live_p (regno))) ++ return regno; ++ } ++ ++ return INVALID_REGNUM; ++} ++ ++ ++ ++#endif +diff --git a/gcc/config/nds32/nds32-regrename.c b/gcc/config/nds32/nds32-regrename.c +new file mode 100644 +index 0000000..0875722 +--- /dev/null ++++ b/gcc/config/nds32/nds32-regrename.c +@@ -0,0 +1,389 @@ ++/* Register rename pass of Andes NDS32 cpu for GNU compiler ++ Copyright (C) 2012-2016 Free Software Foundation, Inc. ++ Contributed by Andes Technology Corporation. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published ++ by the Free Software Foundation; either version 3, or (at your ++ option) any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ <http://www.gnu.org/licenses/>. */ ++ ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "backend.h" ++#include "tree.h" ++#include "rtl.h" ++#include "df.h" ++#include "alias.h" ++#include "stor-layout.h" ++#include "varasm.h" ++#include "calls.h" ++#include "regs.h" ++#include "insn-config.h" /* Required by recog.h. */ ++#include "conditions.h" ++#include "output.h" ++#include "insn-attr.h" /* For DFA state_t. */ ++#include "insn-codes.h" /* For CODE_FOR_xxx. */ ++#include "reload.h" /* For push_reload(). */ ++#include "flags.h" ++#include "insn-config.h" ++#include "expmed.h" ++#include "dojump.h" ++#include "explow.h" ++#include "emit-rtl.h" ++#include "stmt.h" ++#include "expr.h" ++#include "recog.h" ++#include "diagnostic-core.h" ++#include "cfgrtl.h" ++#include "cfganal.h" ++#include "lcm.h" ++#include "cfgbuild.h" ++#include "cfgcleanup.h" ++#include "tm_p.h" ++#include "tm-constrs.h" ++#include "optabs.h" /* For GEN_FCN. */ ++#include "target.h" ++#include "langhooks.h" /* For add_builtin_function(). */ ++#include "builtins.h" ++#include "cpplib.h" ++#include "params.h" ++#include "tree-pass.h" ++#include "regrename.h" ++ ++static reg_class_t current_preferred_rename_class = NO_REGS; ++ ++reg_class_t ++nds32_preferred_rename_class_impl (reg_class_t rclass) ++{ ++ if (rclass == GENERAL_REGS) ++ return current_preferred_rename_class; ++ else ++ return NO_REGS; ++} ++ ++static void ++print_hard_reg_set (FILE *file, HARD_REG_SET set) ++{ ++ int i; ++ ++ fprintf (file, "{ "); ++ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) ++ { ++ if (TEST_HARD_REG_BIT (set, i)) ++ fprintf (file, "%d ", i); ++ } ++ fprintf (file, "}\n"); ++} ++ ++void ++dump_hard_reg_set (FILE *file, HARD_REG_SET set) ++{ ++ print_hard_reg_set (file, set); ++} ++ ++static bool ++in_reg_class_p (unsigned regno, enum reg_class clazz) ++{ ++ return TEST_HARD_REG_BIT (reg_class_contents[clazz], regno); ++} ++ ++static unsigned ++try_find_best_rename_reg (du_head_p op_chain, reg_class_t preferred_class) ++{ ++ HARD_REG_SET unavailable; ++ unsigned new_reg; ++ current_preferred_rename_class = preferred_class; ++ ++ COMPL_HARD_REG_SET (unavailable, reg_class_contents[preferred_class]); ++ CLEAR_HARD_REG_BIT (unavailable, op_chain->regno); ++ ++ new_reg = find_rename_reg (op_chain, GENERAL_REGS, ++ &unavailable, op_chain->regno, false); ++ ++ current_preferred_rename_class = NO_REGS; ++ return new_reg; ++} ++ ++static bool ++try_rename_operand_to (rtx insn, unsigned op_pos, ++ reg_class_t preferred_rename_class) ++{ ++ insn_rr_info *info; ++ du_head_p op_chain; ++ unsigned newreg; ++ unsigned oldreg; ++ ++ info = &insn_rr[INSN_UID (insn)]; ++ ++ if (info->op_info == NULL) ++ return false; ++ ++ if (info->op_info[op_pos].n_chains == 0) ++ return false; ++ ++ op_chain = regrename_chain_from_id (info->op_info[op_pos].heads[0]->id); ++ ++ if (op_chain->cannot_rename) ++ return false; ++ ++ /* Already use preferred class, so do nothing. */ ++ if (TEST_HARD_REG_BIT (reg_class_contents[preferred_rename_class], ++ op_chain->regno)) ++ return false; ++ ++ if (dump_file) ++ { ++ fprintf (dump_file, "Try to rename operand %d to %s:\n", ++ op_pos, reg_class_names[preferred_rename_class]); ++ print_rtl_single (dump_file, insn); ++ } ++ ++ oldreg = op_chain->regno; ++ newreg = try_find_best_rename_reg (op_chain, preferred_rename_class); ++ ++ if (newreg == oldreg) ++ return false; ++ ++ regrename_do_replace (op_chain, newreg); ++ ++ if (dump_file) ++ { ++ fprintf (dump_file, "Rename operand %d to %s is Done:\n", ++ op_pos, reg_class_names[preferred_rename_class]); ++ print_rtl_single (dump_file, insn); ++ } ++ return true; ++} ++ ++static bool ++rename_slt_profitlable (rtx insn) ++{ ++ rtx pattern; ++ pattern = PATTERN (insn); ++ rtx src = SET_SRC (pattern); ++ rtx op0 = XEXP (src, 0); ++ rtx op1 = XEXP (src, 0); ++ ++ insn_rr_info *info; ++ du_head_p op_chain; ++ int op_pos = 0; ++ ++ info = &insn_rr[INSN_UID (insn)]; ++ ++ if (info->op_info == NULL) ++ return false; ++ ++ if (info->op_info[op_pos].n_chains == 0) ++ return false; ++ ++ op_chain = regrename_chain_from_id (info->op_info[op_pos].heads[0]->id); ++ ++ if (in_reg_class_p (op_chain->regno, R15_TA_REG)) ++ return false; ++ ++ /* slt[s]45 need second operand in MIDDLE_REGS class. */ ++ if (!REG_P (op0) || !in_reg_class_p (REGNO (op0), MIDDLE_REGS)) ++ return false; ++ ++ /* slt[s]i45 only allow 5 bit unsigned integer. */ ++ if (REG_P (op1) ++ || (CONST_INT_P (op1) && satisfies_constraint_Iu05 (op1))) ++ return true; ++ ++ return false; ++} ++ ++static bool ++rename_cbranch_eq0_low_reg_profitlable (rtx insn) ++{ ++ insn_rr_info *info; ++ du_head_p op_chain; ++ int op_pos = 1; ++ ++ info = &insn_rr[INSN_UID (insn)]; ++ ++ if (info->op_info == NULL) ++ return false; ++ ++ if (info->op_info[op_pos].n_chains == 0) ++ return false; ++ ++ op_chain = regrename_chain_from_id (info->op_info[op_pos].heads[0]->id); ++ ++ if (in_reg_class_p (op_chain->regno, LOW_REGS)) ++ return false; ++ ++ return true; ++} ++ ++ ++static bool ++rename_cbranch_eq0_r15_profitlable (rtx insn) ++{ ++ rtx pattern; ++ pattern = PATTERN (insn); ++ rtx if_then_else = SET_SRC (pattern); ++ rtx cond = XEXP (if_then_else, 0); ++ rtx op0 = XEXP (cond, 0); ++ ++ insn_rr_info *info; ++ du_head_p op_chain; ++ int op_pos = 1; ++ ++ info = &insn_rr[INSN_UID (insn)]; ++ ++ if (info->op_info == NULL) ++ return false; ++ ++ if (info->op_info[op_pos].n_chains == 0) ++ return false; ++ ++ op_chain = regrename_chain_from_id (info->op_info[op_pos].heads[0]->id); ++ ++ if (in_reg_class_p (op_chain->regno, R15_TA_REG)) ++ return false; ++ ++ /* LOW_REGS or R15_TA_REG both are 2-byte instruction. */ ++ if (REG_P (op0) && in_reg_class_p (REGNO (op0), LOW_REGS)) ++ return false; ++ ++ return true; ++} ++ ++static bool ++rename_cbranch_eq_reg_profitlable (rtx insn) ++{ ++ rtx pattern; ++ pattern = PATTERN (insn); ++ rtx if_then_else = SET_SRC (pattern); ++ rtx cond = XEXP (if_then_else, 0); ++ rtx op1 = XEXP (cond, 1); ++ ++ insn_rr_info *info; ++ du_head_p op_chain; ++ int op_pos = 1; ++ ++ info = &insn_rr[INSN_UID (insn)]; ++ ++ if (info->op_info == NULL) ++ return false; ++ ++ if (info->op_info[op_pos].n_chains == 0) ++ return false; ++ ++ op_chain = regrename_chain_from_id (info->op_info[op_pos].heads[0]->id); ++ ++ if (in_reg_class_p (op_chain->regno, R5_REG)) ++ return false; ++ ++ if (REG_P (op1) && in_reg_class_p (REGNO (op1), LOW_REGS)) ++ return true; ++ else ++ return false; ++} ++ ++static void ++do_regrename () ++{ ++ basic_block bb; ++ rtx_insn *insn; ++ ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ FOR_BB_INSNS (bb, insn) ++ { ++ if (!INSN_P (insn)) ++ continue; ++ ++ switch (recog_memoized (insn)) ++ { ++ case CODE_FOR_slts_compare_impl: ++ case CODE_FOR_slt_compare_impl: ++ /* Try to rename operand 0 to $r15 if profitable. */ ++ if (rename_slt_profitlable (insn)) ++ try_rename_operand_to (insn, 0, R15_TA_REG); ++ break; ++ case CODE_FOR_slt_eq0: ++ /* Try to rename operand 0 to $r15. */ ++ if (rename_slt_profitlable (insn)) ++ try_rename_operand_to (insn, 0, R15_TA_REG); ++ break; ++ case CODE_FOR_cbranchsi4_equality_zero: ++ /* Try to rename operand 1 to $r15. */ ++ if (rename_cbranch_eq0_r15_profitlable (insn)) ++ if (!try_rename_operand_to (insn, 1, R15_TA_REG)) ++ if (rename_cbranch_eq0_low_reg_profitlable (insn)) ++ try_rename_operand_to (insn, 1, LOW_REGS); ++ break; ++ case CODE_FOR_cbranchsi4_equality_reg: ++ case CODE_FOR_cbranchsi4_equality_reg_or_const_int: ++ /* Try to rename operand 1 to $r5. */ ++ if (rename_cbranch_eq_reg_profitlable (insn)) ++ try_rename_operand_to (insn, 1, R5_REG); ++ break; ++ } ++ } ++ } ++} ++ ++static unsigned int ++nds32_regrename (void) ++{ ++ df_set_flags (DF_LR_RUN_DCE); ++ df_note_add_problem (); ++ df_analyze (); ++ df_set_flags (DF_DEFER_INSN_RESCAN); ++ ++ regrename_init (true); ++ ++ regrename_analyze (NULL); ++ ++ do_regrename (); ++ ++ regrename_finish (); ++ return 1; ++} ++ ++const pass_data pass_data_nds32_regrename = ++{ ++ RTL_PASS, /* type */ ++ "nds32-regrename", /* name */ ++ OPTGROUP_NONE, /* optinfo_flags */ ++ TV_MACH_DEP, /* tv_id */ ++ 0, /* properties_required */ ++ 0, /* properties_provided */ ++ 0, /* properties_destroyed */ ++ 0, /* todo_flags_start */ ++ TODO_df_finish, /* todo_flags_finish */ ++}; ++ ++class pass_nds32_regrename_opt : public rtl_opt_pass ++{ ++public: ++ pass_nds32_regrename_opt (gcc::context *ctxt) ++ : rtl_opt_pass (pass_data_nds32_regrename, ctxt) ++ {} ++ ++ /* opt_pass methods: */ ++ bool gate (function *) { return TARGET_16_BIT && TARGET_REGRENAME_OPT; } ++ unsigned int execute (function *) { return nds32_regrename (); } ++}; ++ ++rtl_opt_pass * ++make_pass_nds32_regrename_opt (gcc::context *ctxt) ++{ ++ return new pass_nds32_regrename_opt (ctxt); ++} +diff --git a/gcc/config/nds32/nds32-relax-opt.c b/gcc/config/nds32/nds32-relax-opt.c +new file mode 100644 +index 0000000..0919af6 +--- /dev/null ++++ b/gcc/config/nds32/nds32-relax-opt.c +@@ -0,0 +1,612 @@ ++/* relax-opt pass of Andes NDS32 cpu for GNU compiler ++ Copyright (C) 2012-2016 Free Software Foundation, Inc. ++ Contributed by Andes Technology Corporation. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published ++ by the Free Software Foundation; either version 3, or (at your ++ option) any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ <http://www.gnu.org/licenses/>. */ ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "tm.h" ++#include "hash-set.h" ++#include "machmode.h" ++#include "vec.h" ++#include "double-int.h" ++#include "input.h" ++#include "alias.h" ++#include "symtab.h" ++#include "wide-int.h" ++#include "inchash.h" ++#include "tree.h" ++#include "stor-layout.h" ++#include "varasm.h" ++#include "calls.h" ++#include "rtl.h" ++#include "regs.h" ++#include "hard-reg-set.h" ++#include "insn-config.h" /* Required by recog.h. */ ++#include "conditions.h" ++#include "output.h" ++#include "insn-attr.h" /* For DFA state_t. */ ++#include "insn-codes.h" /* For CODE_FOR_xxx. */ ++#include "reload.h" /* For push_reload (). */ ++#include "flags.h" ++#include "input.h" ++#include "function.h" ++#include "emit-rtl.h" ++#include "expr.h" ++#include "recog.h" ++#include "diagnostic-core.h" ++#include "dominance.h" ++#include "cfg.h" ++#include "cfgrtl.h" ++#include "cfganal.h" ++#include "lcm.h" ++#include "cfgbuild.h" ++#include "cfgcleanup.h" ++#include "predict.h" ++#include "basic-block.h" ++#include "bitmap.h" ++#include "df.h" ++#include "tm_p.h" ++#include "tm-constrs.h" ++#include "optabs.h" /* For GEN_FCN. */ ++#include "target.h" ++#include "langhooks.h" /* For add_builtin_function (). */ ++#include "ggc.h" ++#include "tree-pass.h" ++#include "target-globals.h" ++using namespace nds32; ++ ++/* This is used to create unique relax hint id value. ++ The initial value is 0. */ ++static int relax_group_id = 0; ++ ++/* Group the following pattern as relax candidates: ++ ++ 1. sethi $ra, hi20(sym) ++ ori $ra, $ra, lo12(sym) ++ ==> ++ addi.gp $ra, sym ++ ++ 2. sethi $ra, hi20(sym) ++ lwi $rb, [$ra + lo12(sym)] ++ ==> ++ lwi.gp $rb, [(sym)] ++ ++ 3. sethi $ra, hi20(sym) ++ ori $ra, $ra, lo12(sym) ++ lwi $rb, [$ra] ++ swi $rc, [$ra] ++ ==> ++ lwi37 $rb, [(sym)] ++ swi37 $rc, [(sym)] */ ++ ++/* Return true if is load/store with REG addressing mode ++ and memory mode is SImode. */ ++static bool ++nds32_reg_base_load_store_p (rtx_insn *insn) ++{ ++ rtx mem_src = NULL_RTX; ++ ++ switch (get_attr_type (insn)) ++ { ++ case TYPE_LOAD: ++ mem_src = SET_SRC (PATTERN (insn)); ++ break; ++ case TYPE_STORE: ++ mem_src = SET_DEST (PATTERN (insn)); ++ break; ++ default: ++ break; ++ } ++ ++ /* Find load/store insn with addressing mode is REG. */ ++ if (mem_src != NULL_RTX) ++ { ++ if ((GET_CODE (mem_src) == ZERO_EXTEND) ++ || (GET_CODE (mem_src) == SIGN_EXTEND)) ++ mem_src = XEXP (mem_src, 0); ++ ++ if (GET_CODE (XEXP (mem_src, 0)) == REG) ++ return true; ++ } ++ ++ return false; ++} ++ ++/* Return true if insn is a sp/fp base or sp/fp plus load-store instruction. */ ++ ++static bool ++nds32_sp_base_or_plus_load_store_p (rtx_insn *insn) ++{ ++ rtx mem_src = NULL_RTX; ++ ++ switch (get_attr_type (insn)) ++ { ++ case TYPE_LOAD: ++ mem_src = SET_SRC (PATTERN (insn)); ++ break; ++ case TYPE_STORE: ++ mem_src = SET_DEST (PATTERN (insn)); ++ break; ++ default: ++ break; ++ } ++ /* Find load/store insn with addressing mode is REG. */ ++ if (mem_src != NULL_RTX) ++ { ++ if ((GET_CODE (mem_src) == ZERO_EXTEND) ++ || (GET_CODE (mem_src) == SIGN_EXTEND)) ++ mem_src = XEXP (mem_src, 0); ++ ++ if ((GET_CODE (XEXP (mem_src, 0)) == PLUS)) ++ mem_src = XEXP (mem_src, 0); ++ ++ if (REG_P (XEXP (mem_src, 0)) ++ && ((frame_pointer_needed ++ && REGNO (XEXP (mem_src, 0)) == FP_REGNUM) ++ || REGNO (XEXP (mem_src, 0)) == SP_REGNUM)) ++ return true; ++ } ++ ++ return false; ++} ++ ++/* Return true if is load with [REG + REG/CONST_INT] addressing mode. */ ++static bool ++nds32_plus_reg_load_store_p (rtx_insn *insn) ++{ ++ rtx mem_src = NULL_RTX; ++ ++ switch (get_attr_type (insn)) ++ { ++ case TYPE_LOAD: ++ mem_src = SET_SRC (PATTERN (insn)); ++ break; ++ case TYPE_STORE: ++ mem_src = SET_DEST (PATTERN (insn)); ++ break; ++ default: ++ break; ++ } ++ ++ /* Find load/store insn with addressing mode is [REG + REG/CONST]. */ ++ if (mem_src != NULL_RTX) ++ { ++ if ((GET_CODE (mem_src) == ZERO_EXTEND) ++ || (GET_CODE (mem_src) == SIGN_EXTEND)) ++ mem_src = XEXP (mem_src, 0); ++ ++ if ((GET_CODE (XEXP (mem_src, 0)) == PLUS)) ++ mem_src = XEXP (mem_src, 0); ++ else ++ return false; ++ ++ if (GET_CODE (XEXP (mem_src, 0)) == REG) ++ return true; ++ ++ } ++ ++ return false; ++} ++ ++/* Return true if ins is hwloop last instruction. */ ++static bool ++nds32_hwloop_last_insn_p (rtx_insn *insn) ++{ ++ if (recog_memoized (insn) == CODE_FOR_hwloop_last_insn) ++ return true; ++ ++ return false; ++} ++ ++/* Return true if x is const and the referance is ict symbol. */ ++static bool ++nds32_ict_const_p (rtx x) ++{ ++ if (GET_CODE (x) == CONST) ++ { ++ x = XEXP (x, 0); ++ return nds32_indirect_call_referenced_p (x); ++ } ++ return FALSE; ++} ++ ++/* Group the following pattern as relax candidates: ++ ++ GOT: ++ sethi $ra, hi20(sym) ++ ori $ra, $ra, lo12(sym) ++ lw $rb, [$ra + $gp] ++ ++ GOTOFF, TLSLE: ++ sethi $ra, hi20(sym) ++ ori $ra, $ra, lo12(sym) ++ LS $rb, [$ra + $gp] ++ ++ GOTOFF, TLSLE: ++ sethi $ra, hi20(sym) ++ ori $ra, $ra, lo12(sym) ++ add $rb, $ra, $gp($tp) ++ ++ Initial GOT table: ++ sethi $gp,hi20(sym) ++ ori $gp, $gp, lo12(sym) ++ add5.pc $gp */ ++ ++static auto_vec<rtx_insn *, 32> nds32_group_infos; ++/* Group the PIC and TLS relax candidate instructions for linker. */ ++static bool ++nds32_pic_tls_group (rtx_insn *def_insn, ++ enum nds32_relax_insn_type relax_type, ++ int sym_type) ++{ ++ df_ref def_record; ++ df_link *link; ++ rtx_insn *use_insn = NULL; ++ rtx pat, new_pat; ++ def_record = DF_INSN_DEFS (def_insn); ++ for (link = DF_REF_CHAIN (def_record); link; link = link->next) ++ { ++ if (!DF_REF_INSN_INFO (link->ref)) ++ continue; ++ ++ use_insn = DF_REF_INSN (link->ref); ++ ++ /* Skip if define insn and use insn not in the same basic block. */ ++ if (!dominated_by_p (CDI_DOMINATORS, ++ BLOCK_FOR_INSN (use_insn), ++ BLOCK_FOR_INSN (def_insn))) ++ return FALSE; ++ ++ /* Skip if use_insn not active insn. */ ++ if (!active_insn_p (use_insn)) ++ return FALSE; ++ ++ switch (relax_type) ++ { ++ case RELAX_ORI: ++ ++ /* GOTOFF, TLSLE: ++ sethi $ra, hi20(sym) ++ ori $ra, $ra, lo12(sym) ++ add $rb, $ra, $gp($tp) */ ++ if ((sym_type == UNSPEC_TLSLE ++ || sym_type == UNSPEC_GOTOFF) ++ && (recog_memoized (use_insn) == CODE_FOR_addsi3)) ++ { ++ pat = XEXP (PATTERN (use_insn), 1); ++ new_pat = ++ gen_rtx_UNSPEC (SImode, ++ gen_rtvec (2, XEXP (pat, 0), XEXP (pat, 1)), ++ UNSPEC_ADD32); ++ validate_replace_rtx (pat, new_pat, use_insn); ++ nds32_group_infos.safe_push (use_insn); ++ } ++ else if (nds32_plus_reg_load_store_p (use_insn) ++ && !nds32_sp_base_or_plus_load_store_p (use_insn)) ++ nds32_group_infos.safe_push (use_insn); ++ else ++ return FALSE; ++ break; ++ ++ default: ++ return FALSE; ++ } ++ } ++ return TRUE; ++} ++ ++static int ++nds32_pic_tls_symbol_type (rtx x) ++{ ++ x = XEXP (SET_SRC (PATTERN (x)), 1); ++ ++ if (GET_CODE (x) == CONST) ++ { ++ x = XEXP (x, 0); ++ ++ if (GET_CODE (x) == PLUS) ++ x = XEXP (x, 0); ++ ++ return XINT (x, 1); ++ } ++ ++ return XINT (x, 1); ++} ++ ++/* Group the relax candidates with group id. */ ++static void ++nds32_group_insns (rtx sethi) ++{ ++ df_ref def_record, use_record; ++ df_link *link; ++ rtx_insn *use_insn = NULL; ++ rtx group_id; ++ bool valid; ++ ++ def_record = DF_INSN_DEFS (sethi); ++ ++ for (link = DF_REF_CHAIN (def_record); link; link = link->next) ++ { ++ if (!DF_REF_INSN_INFO (link->ref)) ++ continue; ++ ++ use_insn = DF_REF_INSN (link->ref); ++ ++ /* Skip if define insn and use insn not in the same basic block. */ ++ if (!dominated_by_p (CDI_DOMINATORS, ++ BLOCK_FOR_INSN (use_insn), ++ BLOCK_FOR_INSN (sethi))) ++ return; ++ ++ /* Skip if the low-part used register is from different high-part ++ instructions. */ ++ use_record = DF_INSN_USES (use_insn); ++ if (DF_REF_CHAIN (use_record) && DF_REF_CHAIN (use_record)->next) ++ return; ++ ++ /* Skip if use_insn not active insn. */ ++ if (!active_insn_p (use_insn)) ++ return; ++ ++ /* Initial use_insn_type. */ ++ if (!(recog_memoized (use_insn) == CODE_FOR_lo_sum ++ || nds32_symbol_load_store_p (use_insn) ++ || (nds32_reg_base_load_store_p (use_insn) ++ &&!nds32_sp_base_or_plus_load_store_p (use_insn)))) ++ return; ++ } ++ ++ group_id = GEN_INT (relax_group_id); ++ /* Insert .relax_* directive for sethi. */ ++ emit_insn_before (gen_relax_group (group_id), sethi); ++ ++ /* Scan the use insns and insert the directive. */ ++ for (link = DF_REF_CHAIN (def_record); link; link = link->next) ++ { ++ if (!DF_REF_INSN_INFO (link->ref)) ++ continue; ++ ++ use_insn = DF_REF_INSN (link->ref); ++ ++ /* Insert .relax_* directive. */ ++ if (active_insn_p (use_insn)) ++ emit_insn_before (gen_relax_group (group_id), use_insn); ++ ++ /* Find ori ra, ra, unspec(symbol) instruction. */ ++ if (use_insn != NULL ++ && recog_memoized (use_insn) == CODE_FOR_lo_sum ++ && !nds32_const_unspec_p (XEXP (SET_SRC (PATTERN (use_insn)), 1))) ++ { ++ int sym_type = nds32_pic_tls_symbol_type (use_insn); ++ valid = nds32_pic_tls_group (use_insn, RELAX_ORI, sym_type); ++ ++ /* Insert .relax_* directive. */ ++ while (!nds32_group_infos.is_empty ()) ++ { ++ use_insn = nds32_group_infos.pop (); ++ if (valid) ++ emit_insn_before (gen_relax_group (group_id), use_insn); ++ } ++ } ++ } ++ ++ relax_group_id++; ++} ++ ++/* Convert relax group id in rtl. */ ++ ++static void ++nds32_group_tls_insn (rtx insn) ++{ ++ rtx pat = PATTERN (insn); ++ rtx unspec_relax_group = XEXP (XVECEXP (pat, 0, 1), 0); ++ ++ while (GET_CODE (pat) != SET && GET_CODE (pat) == PARALLEL) ++ { ++ pat = XVECEXP (pat, 0, 0); ++ } ++ ++ if (GET_CODE (unspec_relax_group) == UNSPEC ++ && XINT (unspec_relax_group, 1) == UNSPEC_VOLATILE_RELAX_GROUP) ++ { ++ XVECEXP (unspec_relax_group, 0, 0) = GEN_INT (relax_group_id); ++ } ++ ++ relax_group_id++; ++} ++ ++static bool ++nds32_float_reg_load_store_p (rtx_insn *insn) ++{ ++ rtx pat = PATTERN (insn); ++ ++ if (get_attr_type (insn) == TYPE_FLOAD ++ && GET_CODE (pat) == SET ++ && (GET_MODE (XEXP (pat, 0)) == SFmode ++ || GET_MODE (XEXP (pat, 0)) == DFmode) ++ && MEM_P (XEXP (pat, 1))) ++ { ++ rtx addr = XEXP (XEXP (pat, 1), 0); ++ ++ /* [$ra] */ ++ if (REG_P (addr)) ++ return true; ++ /* [$ra + offset] */ ++ if (GET_CODE (addr) == PLUS ++ && REG_P (XEXP (addr, 0)) ++ && CONST_INT_P (XEXP (addr, 1))) ++ return true; ++ } ++ return false; ++} ++ ++ ++/* Group float load-store instructions: ++ la $ra, symbol ++ flsi $rt, [$ra + offset] */ ++ ++static void ++nds32_group_float_insns (rtx insn) ++{ ++ df_ref def_record, use_record; ++ df_link *link; ++ rtx_insn *use_insn = NULL; ++ rtx group_id; ++ ++ def_record = DF_INSN_DEFS (insn); ++ ++ for (link = DF_REF_CHAIN (def_record); link; link = link->next) ++ { ++ if (!DF_REF_INSN_INFO (link->ref)) ++ continue; ++ ++ use_insn = DF_REF_INSN (link->ref); ++ ++ /* Skip if define insn and use insn not in the same basic block. */ ++ if (!dominated_by_p (CDI_DOMINATORS, ++ BLOCK_FOR_INSN (use_insn), ++ BLOCK_FOR_INSN (insn))) ++ return; ++ ++ /* Skip if the low-part used register is from different high-part ++ instructions. */ ++ use_record = DF_INSN_USES (use_insn); ++ if (DF_REF_CHAIN (use_record) && DF_REF_CHAIN (use_record)->next) ++ return; ++ ++ /* Skip if use_insn not active insn. */ ++ if (!active_insn_p (use_insn)) ++ return; ++ ++ if (!nds32_float_reg_load_store_p (use_insn) ++ || find_post_update_rtx (use_insn) != -1) ++ return; ++ } ++ ++ group_id = GEN_INT (relax_group_id); ++ /* Insert .relax_* directive for insn. */ ++ emit_insn_before (gen_relax_group (group_id), insn); ++ ++ /* Scan the use insns and insert the directive. */ ++ for (link = DF_REF_CHAIN (def_record); link; link = link->next) ++ { ++ if (!DF_REF_INSN_INFO (link->ref)) ++ continue; ++ ++ use_insn = DF_REF_INSN (link->ref); ++ ++ /* Insert .relax_* directive. */ ++ emit_insn_before (gen_relax_group (group_id), use_insn); ++ } ++ ++ relax_group_id++; ++} ++ ++/* Group the relax candidate instructions for linker. */ ++static void ++nds32_relax_group (void) ++{ ++ rtx_insn *insn; ++ ++ compute_bb_for_insn (); ++ ++ df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN); ++ df_insn_rescan_all (); ++ df_analyze (); ++ df_set_flags (DF_DEFER_INSN_RESCAN); ++ calculate_dominance_info (CDI_DOMINATORS); ++ ++ insn = get_insns (); ++ gcc_assert (NOTE_P (insn)); ++ ++ for (insn = next_active_insn (insn); insn; insn = next_active_insn (insn)) ++ { ++ if (NONJUMP_INSN_P (insn)) ++ { ++ /* Find sethi ra, symbol instruction. */ ++ if (recog_memoized (insn) == CODE_FOR_sethi ++ && nds32_symbolic_operand (XEXP (SET_SRC (PATTERN (insn)), 0), ++ SImode) ++ && !nds32_ict_const_p (XEXP (SET_SRC (PATTERN (insn)), 0)) ++ && !nds32_hwloop_last_insn_p (next_active_insn (insn))) ++ ++ nds32_group_insns (insn); ++ else if (recog_memoized (insn) == CODE_FOR_tls_ie) ++ nds32_group_tls_insn (insn); ++ else if (TARGET_FPU_SINGLE ++ && recog_memoized (insn) == CODE_FOR_move_addr ++ && !nds32_ict_const_p (XEXP (SET_SRC (PATTERN (insn)), 0)) ++ && !nds32_hwloop_last_insn_p (next_active_insn (insn))) ++ { ++ nds32_group_float_insns (insn); ++ } ++ } ++ else if (CALL_P (insn) && recog_memoized (insn) == CODE_FOR_tls_desc) ++ { ++ nds32_group_tls_insn (insn); ++ } ++ } ++ ++ /* We must call df_finish_pass manually because it should be invoked before ++ BB information is destroyed. Hence we cannot set the TODO_df_finish flag ++ to the pass manager. */ ++ df_insn_rescan_all (); ++ df_finish_pass (false); ++ free_dominance_info (CDI_DOMINATORS); ++} ++ ++static unsigned int ++nds32_relax_opt (void) ++{ ++ if (TARGET_RELAX_HINT) ++ nds32_relax_group (); ++ return 1; ++} ++ ++const pass_data pass_data_nds32_relax_opt = ++{ ++ RTL_PASS, /* type */ ++ "relax_opt", /* name */ ++ OPTGROUP_NONE, /* optinfo_flags */ ++ TV_MACH_DEP, /* tv_id */ ++ 0, /* properties_required */ ++ 0, /* properties_provided */ ++ 0, /* properties_destroyed */ ++ 0, /* todo_flags_start */ ++ TODO_df_finish, /* todo_flags_finish */ ++}; ++ ++class pass_nds32_relax_opt : public rtl_opt_pass ++{ ++public: ++ pass_nds32_relax_opt (gcc::context *ctxt) ++ : rtl_opt_pass (pass_data_nds32_relax_opt, ctxt) ++ {} ++ ++ /* opt_pass methods: */ ++ bool gate (function *) { return TARGET_RELAX_HINT; } ++ unsigned int execute (function *) { return nds32_relax_opt (); } ++}; ++ ++rtl_opt_pass * ++make_pass_nds32_relax_opt (gcc::context *ctxt) ++{ ++ return new pass_nds32_relax_opt (ctxt); ++} +diff --git a/gcc/config/nds32/nds32-scalbn-transform.c b/gcc/config/nds32/nds32-scalbn-transform.c +new file mode 100644 +index 0000000..fba7c6f +--- /dev/null ++++ b/gcc/config/nds32/nds32-scalbn-transform.c +@@ -0,0 +1,364 @@ ++/* A Gimple-level pass of Andes NDS32 cpu for GNU compiler. ++ This pass transforms the multiplications whose multiplier is a ++ power of 2. ++ ++ Copyright (C) 2012-2016 Free Software Foundation, Inc. ++ Contributed by Andes Technology Corporation. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify it under ++the terms of the GNU General Public License as published by the Free ++Software Foundation; either version 3, or (at your option) any later ++version. ++ ++GCC is distributed in the hope that it will be useful, but WITHOUT ANY ++WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++<http://www.gnu.org/licenses/>. */ ++ ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "tm.h" ++#include "hash-set.h" ++#include "machmode.h" ++#include "vec.h" ++#include "double-int.h" ++#include "input.h" ++#include "alias.h" ++#include "symtab.h" ++#include "wide-int.h" ++#include "inchash.h" ++#include "tree.h" ++#include "stor-layout.h" ++#include "varasm.h" ++#include "calls.h" ++#include "rtl.h" ++#include "regs.h" ++#include "hard-reg-set.h" ++#include "insn-config.h" /* Required by recog.h. */ ++#include "conditions.h" ++#include "output.h" ++#include "insn-attr.h" /* For DFA state_t. */ ++#include "insn-codes.h" /* For CODE_FOR_xxx. */ ++#include "reload.h" /* For push_reload (). */ ++#include "flags.h" ++#include "input.h" ++#include "function.h" ++#include "expr.h" ++#include "recog.h" ++#include "diagnostic-core.h" ++#include "dominance.h" ++#include "cfg.h" ++#include "cfgrtl.h" ++#include "cfganal.h" ++#include "lcm.h" ++#include "cfgbuild.h" ++#include "cfgcleanup.h" ++#include "predict.h" ++#include "basic-block.h" ++#include "bitmap.h" ++#include "df.h" ++#include "tm_p.h" ++#include "tm-constrs.h" ++#include "optabs.h" /* For GEN_FCN. */ ++#include "target.h" ++#include "langhooks.h" /* For add_builtin_function (). */ ++#include "ggc.h" ++#include "tree-pass.h" ++#include "tree-ssa-alias.h" ++#include "fold-const.h" ++#include "gimple-expr.h" ++#include "is-a.h" ++#include "gimple.h" ++#include "gimplify.h" ++#include "gimple-iterator.h" ++#include "gimplify-me.h" ++#include "gimple-ssa.h" ++#include "ipa-ref.h" ++#include "lto-streamer.h" ++#include "cgraph.h" ++#include "tree-cfg.h" ++#include "tree-phinodes.h" ++#include "stringpool.h" ++#include "tree-ssanames.h" ++#include "tree-pass.h" ++#include "gimple-pretty-print.h" ++#include "gimple-fold.h" ++ ++ ++/* Return true if the current function name is scalbn/scalbnf, or its alias ++ includes scalbn/scalbnf, otherwise return false. */ ++ ++static bool ++nds32_is_scalbn_alias_func_p (void) ++{ ++ int i; ++ struct ipa_ref *ref; ++ struct cgraph_node *cfun_node; ++ ++ if (!strcmp (function_name (cfun), "scalbn") ++ || !strcmp (function_name (cfun), "scalbnf")) ++ return true; ++ ++ cfun_node = cgraph_node::get (current_function_decl); ++ ++ if (!cfun_node) ++ return false; ++ ++ for (i = 0; cfun_node->iterate_referring (i, ref); i++) ++ if (ref->use == IPA_REF_ALIAS) ++ { ++ struct cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring); ++ if (!strcmp (alias->asm_name (), "scalbn") ++ || !strcmp (alias->asm_name (), "scalbnf")) ++ return true; ++ } ++ ++ return false; ++} ++ ++/* Return true if value of tree node RT is power of 2. */ ++ ++static bool ++nds32_real_ispow2_p (tree rt) ++{ ++ if (TREE_CODE (rt) != REAL_CST) ++ return false; ++ ++ if (TREE_REAL_CST_PTR (rt)->cl != rvc_normal) ++ return false; ++ ++ int i; ++ for (i = 0; i < SIGSZ-1; ++i) ++ if (TREE_REAL_CST_PTR (rt)->sig[i] != 0) ++ return false; ++ if (TREE_REAL_CST_PTR (rt)->sig[SIGSZ-1] != SIG_MSB) ++ return false; ++ ++ return true; ++} ++ ++/* Return the exponent of tree node RT in base 2. */ ++ ++static int ++nds32_real_pow2exp (tree rt) ++{ ++ return REAL_EXP (TREE_REAL_CST_PTR (rt)) - 1; ++} ++ ++/* Return true if GS is the target of scalbn transform. */ ++ ++static bool ++nds32_scalbn_transform_target_p (gimple *gs) ++{ ++ if (is_gimple_assign (gs)) ++ if ((gimple_assign_rhs_code (gs) == MULT_EXPR) ++ && (TREE_CODE (TREE_TYPE (gimple_assign_rhs1 (gs))) == REAL_TYPE) ++ && nds32_real_ispow2_p (gimple_assign_rhs2 (gs))) ++ return true; ++ return false; ++} ++ ++/* Do scalbn transform for a GIMPLE statement GS. ++ ++ When the multiplier of GIMPLE statement GS is a positive number, ++ GS will be transform to one gimple_call statement and one ++ gimple_assign statement as follows: ++ A = B * 128.0 -> temp = BUILT_IN_SCALBN (B, 7) ++ A = temp ++ ++ When the multiplier is a negative number, the multiplier will be ++ conversed the sign first since BUILT_IN_SCALBN can't handle ++ negative multiplier. The example is shown below: ++ A = B * -128.0 -> temp = BUILT_IN_SCALBN (B, 7) ++ A = -temp ++*/ ++ ++static void ++nds32_do_scalbn_transform (gimple *gs) ++{ ++ tree mult_cand = gimple_assign_rhs1 (gs); /* Multiplicand */ ++ tree mult_er = gimple_assign_rhs2 (gs); /* Multiplier */ ++ bool is_neg = false; ++ ++ /* Choose the function by type of arg. */ ++ enum built_in_function fn_name; ++ tree type = TREE_TYPE (mult_cand); ++ if (TYPE_MAIN_VARIANT (type) == double_type_node) ++ fn_name = BUILT_IN_SCALBN; ++ else if (TYPE_MAIN_VARIANT (type) == float_type_node) ++ fn_name = BUILT_IN_SCALBNF; ++ /* Do not transform long double to scalbnl since some c library don't provide ++ it if target don't have real long double type ++ else if (TYPE_MAIN_VARIANT (type) == long_double_type_node) ++ fn_name = BUILT_IN_SCALBNL; ++ */ ++ else ++ return; ++ ++ /* Converse the sign of negative number. */ ++ if (REAL_VALUE_NEGATIVE (TREE_REAL_CST (mult_er))) ++ { ++ is_neg = true; ++ mult_er = build_real (TREE_TYPE (mult_er), ++ real_value_negate (&TREE_REAL_CST (mult_er))); ++ } ++ ++ /* Set function name for building gimple_call. */ ++ tree fndecl = builtin_decl_explicit (fn_name); ++ ++ /* Set last arg for building gimple_call. */ ++ tree exp = build_int_cst (integer_type_node, ++ nds32_real_pow2exp (mult_er)); ++ ++ /* Build a new temp ssa. */ ++ tree temp_call_ssa = make_ssa_name (TREE_TYPE (gimple_assign_lhs (gs)), NULL); ++ ++ /* Build gimple_call stmt to replace GS. */ ++ gimple *call_stmt = gimple_build_call (fndecl, ++ 2, ++ mult_cand, ++ exp); ++ gimple_call_set_lhs (call_stmt, temp_call_ssa); ++ ++ enum tree_code subcode = NOP_EXPR; ++ /* Handle negative value. */ ++ if (is_neg) ++ subcode = NEGATE_EXPR; ++ ++ /* Build gimple_assign for return value or change the sign. */ ++ gimple *assign_stmt = ++ gimple_build_assign (gimple_assign_lhs (gs), ++ subcode, ++ gimple_call_lhs (call_stmt)); ++ ++ /* Replace gimple_assign GS by new gimple_call. */ ++ gimple_stmt_iterator gsi = gsi_for_stmt (gs); ++ update_stmt (call_stmt); ++ gsi_insert_before (&gsi, call_stmt, GSI_NEW_STMT); ++ ++ /* Insert the gimple_assign after the scalbn call. */ ++ update_stmt (assign_stmt); ++ gsi_next (&gsi); ++ gsi_replace (&gsi, assign_stmt, false); ++} ++ ++/* Do scalbn transform for each basic block BB. */ ++ ++static int ++nds32_scalbn_transform_basic_block (basic_block bb) ++{ ++ gimple_stmt_iterator gsi; ++ int transform_number = 0; ++ ++ if (dump_file) ++ fprintf (dump_file, ++ "\n;; Transforming the multiplication for basic block %d\n", ++ bb->index); ++ ++ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ gimple *stmt = gsi_stmt (gsi); ++ ++ if (nds32_scalbn_transform_target_p (stmt)) ++ { ++ if (dump_file) ++ { ++ fprintf (dump_file, ++ "* The multiplier of stmt %d is transforming.\n", ++ gimple_uid (stmt)); ++ print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM|TDF_RAW); ++ } ++ nds32_do_scalbn_transform (stmt); ++ transform_number++; ++ } ++ } ++ ++ return transform_number; ++} ++ ++/* This function is the entry of scalbn transform pass. */ ++ ++static int ++nds32_scalbn_transform_opt (void) ++{ ++ basic_block bb; ++ int total_transform_number = 0; ++ ++ /* Ignore current and builtin function name are the same. */ ++ if (nds32_is_scalbn_alias_func_p ()) ++ { ++ if (dump_file) ++ fprintf (dump_file, ++ "* Ignore function %s. " ++ "Transform it will cause infinite loop.\n", ++ function_name (cfun)); ++ return 0; ++ } ++ ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ total_transform_number += nds32_scalbn_transform_basic_block (bb); ++ } ++ ++ if (dump_file) ++ { ++ if (total_transform_number > 0) ++ fprintf (dump_file, ++ "\n;; Transform %d multiplication stmt in function %s\n", ++ total_transform_number, ++ current_function_name ()); ++ else ++ fprintf (dump_file, ++ "\n;; No multiplication stmt is transformed in function %s\n", ++ current_function_name ()); ++ } ++ ++ return 1; ++} ++ ++static bool ++gate_nds32_scalbn_transform (void) ++{ ++ return flag_nds32_scalbn_transform ++ && !TARGET_FPU_SINGLE ++ && !flag_no_builtin; ++} ++ ++const pass_data pass_data_nds32_scalbn_transform_opt = ++{ ++ GIMPLE_PASS, /* type */ ++ "scalbn_transform", /* name */ ++ OPTGROUP_NONE, /* optinfo_flags */ ++ TV_MACH_DEP, /* tv_id */ ++ ( PROP_cfg | PROP_ssa ), /* properties_required */ ++ 0, /* properties_provided */ ++ 0, /* properties_destroyed */ ++ 0, /* todo_flags_start */ ++ TODO_update_ssa, /* todo_flags_finish */ ++}; ++ ++class pass_nds32_scalbn_transform_opt : public gimple_opt_pass ++{ ++public: ++ pass_nds32_scalbn_transform_opt (gcc::context *ctxt) ++ : gimple_opt_pass (pass_data_nds32_scalbn_transform_opt, ctxt) ++ {} ++ ++ /* opt_pass methods: */ ++ bool gate (function *) { return gate_nds32_scalbn_transform (); } ++ unsigned int execute (function *) { return nds32_scalbn_transform_opt (); } ++}; ++ ++gimple_opt_pass * ++make_pass_nds32_scalbn_transform_opt (gcc::context *ctxt) ++{ ++ return new pass_nds32_scalbn_transform_opt (ctxt); ++} +diff --git a/gcc/config/nds32/nds32-sign-conversion.c b/gcc/config/nds32/nds32-sign-conversion.c +new file mode 100644 +index 0000000..74eefba +--- /dev/null ++++ b/gcc/config/nds32/nds32-sign-conversion.c +@@ -0,0 +1,218 @@ ++/* A Gimple-level pass of Andes NDS32 cpu for GNU compiler that ++ converse the sign of constant operand when the FPU do not be ++ accessed. ++ ++ Copyright (C) 2012-2016 Free Software Foundation, Inc. ++ Contributed by Andes Technology Corporation. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify it under ++the terms of the GNU General Public License as published by the Free ++Software Foundation; either version 3, or (at your option) any later ++version. ++ ++GCC is distributed in the hope that it will be useful, but WITHOUT ANY ++WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++<http://www.gnu.org/licenses/>. */ ++ ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "tm.h" ++#include "hash-set.h" ++#include "machmode.h" ++#include "vec.h" ++#include "double-int.h" ++#include "input.h" ++#include "alias.h" ++#include "symtab.h" ++#include "wide-int.h" ++#include "inchash.h" ++#include "tree.h" ++#include "stor-layout.h" ++#include "varasm.h" ++#include "calls.h" ++#include "rtl.h" ++#include "regs.h" ++#include "hard-reg-set.h" ++#include "insn-config.h" /* Required by recog.h. */ ++#include "conditions.h" ++#include "output.h" ++#include "insn-attr.h" /* For DFA state_t. */ ++#include "insn-codes.h" /* For CODE_FOR_xxx. */ ++#include "reload.h" /* For push_reload (). */ ++#include "flags.h" ++#include "input.h" ++#include "function.h" ++#include "expr.h" ++#include "recog.h" ++#include "diagnostic-core.h" ++#include "dominance.h" ++#include "cfg.h" ++#include "cfgrtl.h" ++#include "cfganal.h" ++#include "lcm.h" ++#include "cfgbuild.h" ++#include "cfgcleanup.h" ++#include "predict.h" ++#include "basic-block.h" ++#include "bitmap.h" ++#include "df.h" ++#include "tm_p.h" ++#include "tm-constrs.h" ++#include "optabs.h" /* For GEN_FCN. */ ++#include "target.h" ++#include "langhooks.h" /* For add_builtin_function (). */ ++#include "ggc.h" ++#include "tree-pass.h" ++#include "tree-ssa-alias.h" ++#include "fold-const.h" ++#include "gimple-expr.h" ++#include "is-a.h" ++#include "gimple.h" ++#include "gimplify.h" ++#include "gimple-iterator.h" ++#include "gimplify-me.h" ++#include "gimple-ssa.h" ++#include "ipa-ref.h" ++#include "lto-streamer.h" ++#include "cgraph.h" ++#include "tree-cfg.h" ++#include "tree-phinodes.h" ++#include "stringpool.h" ++#include "tree-ssanames.h" ++#include "tree-pass.h" ++#include "gimple-pretty-print.h" ++#include "gimple-fold.h" ++ ++/* Return true if GS is the target of sign conversion. */ ++ ++static bool ++nds32_sign_conversion_target_p (gimple *gs) ++{ ++ if (is_gimple_assign (gs)) ++ if ((gimple_assign_rhs_code (gs) == MINUS_EXPR) ++ && (TREE_CODE (gimple_assign_rhs2 (gs)) == REAL_CST)) ++ return true; ++ return false; ++} ++ ++/* Do sign conversion for a GIMPLE statement GS. */ ++ ++static void ++nds32_do_sign_conversion (gimple *gs) ++{ ++ /* Rewrite the rhs operand. */ ++ enum tree_code op_code = gimple_assign_rhs_code (gs); ++ op_code = PLUS_EXPR; ++ gimple_assign_set_rhs_code (gs, op_code); ++ /* Rewrite the constant value. */ ++ tree rhs2 = gimple_assign_rhs2 (gs); ++ rhs2 = build_real (TREE_TYPE (rhs2), ++ real_value_negate (&TREE_REAL_CST (rhs2))); ++ gimple_assign_set_rhs2 (gs, rhs2); ++ /* When the statement is modified, please mark this statement is modified. */ ++ update_stmt (gs); ++} ++ ++/* Do sign conversion for each basic block BB. */ ++ ++static int ++nds32_sign_conversion_basic_block (basic_block bb) ++{ ++ gimple_stmt_iterator gsi; ++ int converse_number = 0; ++ ++ if (dump_file) ++ fprintf (dump_file, ++ "\n;; Conversing the sign of gimple stmts for basic block %d\n", ++ bb->index); ++ ++ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ gimple *stmt = gsi_stmt (gsi); ++ ++ if (nds32_sign_conversion_target_p (stmt)) ++ { ++ if (dump_file) ++ { ++ fprintf (dump_file, "* The sign of stmt %d is conversing.\n", ++ gimple_uid (stmt)); ++ print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM|TDF_RAW); ++ } ++ nds32_do_sign_conversion (stmt); ++ converse_number++; ++ } ++ } ++ ++ return converse_number; ++} ++ ++/* This function is the entry of sign conversion pass. */ ++ ++static int ++nds32_sign_conversion_opt (void) ++{ ++ basic_block bb; ++ int total_converse_number = 0; ++ ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ total_converse_number += nds32_sign_conversion_basic_block (bb); ++ } ++ ++ if (dump_file) ++ { ++ if (total_converse_number > 0) ++ fprintf (dump_file, "\n;; Converse %d stmts in function %s\n", ++ total_converse_number, ++ current_function_name ()); ++ else ++ fprintf (dump_file, ++ "\n;; No sign of stmt is conversed in function %s\n", ++ current_function_name ()); ++ } ++ ++ return 1; ++} ++ ++const pass_data pass_data_nds32_sign_conversion_opt = ++{ ++ GIMPLE_PASS, /* type */ ++ "sign_conversion", /* name */ ++ OPTGROUP_NONE, /* optinfo_flags */ ++ TV_MACH_DEP, /* tv_id */ ++ ( PROP_cfg | PROP_ssa ), /* properties_required */ ++ 0, /* properties_provided */ ++ 0, /* properties_destroyed */ ++ 0, /* todo_flags_start */ ++ TODO_update_ssa, /* todo_flags_finish */ ++}; ++ ++class pass_nds32_sign_conversion_opt : public gimple_opt_pass ++{ ++public: ++ pass_nds32_sign_conversion_opt (gcc::context *ctxt) ++ : gimple_opt_pass (pass_data_nds32_sign_conversion_opt, ctxt) ++ {} ++ ++ /* opt_pass methods: */ ++ bool gate (function *) ++ { ++ return flag_nds32_sign_conversion && !TARGET_FPU_SINGLE; ++ } ++ unsigned int execute (function *) { return nds32_sign_conversion_opt (); } ++}; ++ ++gimple_opt_pass * ++make_pass_nds32_sign_conversion_opt (gcc::context *ctxt) ++{ ++ return new pass_nds32_sign_conversion_opt (ctxt); ++} +diff --git a/gcc/config/nds32/nds32-soft-fp-comm.c b/gcc/config/nds32/nds32-soft-fp-comm.c +new file mode 100644 +index 0000000..98ba3d5 +--- /dev/null ++++ b/gcc/config/nds32/nds32-soft-fp-comm.c +@@ -0,0 +1,205 @@ ++/* Operand commutative for soft floating point arithmetic pass ++ of Andes NDS32 cpu for GNU compiler ++ Copyright (C) 2012-2016 Free Software Foundation, Inc. ++ Contributed by Andes Technology Corporation. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published ++ by the Free Software Foundation; either version 3, or (at your ++ option) any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ <http://www.gnu.org/licenses/>. */ ++ ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "backend.h" ++#include "tree.h" ++#include "rtl.h" ++#include "df.h" ++#include "alias.h" ++#include "stor-layout.h" ++#include "varasm.h" ++#include "calls.h" ++#include "regs.h" ++#include "insn-config.h" /* Required by recog.h. */ ++#include "conditions.h" ++#include "output.h" ++#include "insn-attr.h" /* For DFA state_t. */ ++#include "insn-codes.h" /* For CODE_FOR_xxx. */ ++#include "reload.h" /* For push_reload(). */ ++#include "flags.h" ++#include "insn-config.h" ++#include "expmed.h" ++#include "dojump.h" ++#include "explow.h" ++#include "emit-rtl.h" ++#include "stmt.h" ++#include "expr.h" ++#include "recog.h" ++#include "diagnostic-core.h" ++#include "cfgrtl.h" ++#include "cfganal.h" ++#include "lcm.h" ++#include "cfgbuild.h" ++#include "cfgcleanup.h" ++#include "tm_p.h" ++#include "tm-constrs.h" ++#include "optabs.h" /* For GEN_FCN. */ ++#include "target.h" ++#include "langhooks.h" /* For add_builtin_function(). */ ++#include "builtins.h" ++#include "cpplib.h" ++#include "params.h" ++#include "tree-pass.h" ++ ++#define SF_ARG0_REGNO 0 ++#define SF_ARG1_REGNO 1 ++ ++#define DF_ARG0_REGNO 0 ++#define DF_ARG1_REGNO 2 ++ ++static int ++nds32_soft_fp_arith_comm_opt (void) ++{ ++ basic_block bb; ++ rtx_insn *insn; ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ FOR_BB_INSNS (bb, insn) ++ { ++ if (!CALL_P (insn)) ++ continue; ++ ++ rtx pat = PATTERN (insn); ++ rtx call_rtx = XVECEXP (pat, 0, 0); ++ ++ if (GET_CODE (call_rtx) == SET) ++ call_rtx = SET_SRC (call_rtx); ++ ++ rtx func_mem = XEXP (call_rtx, 0); ++ rtx symbol = XEXP (func_mem, 0); ++ ++ if (GET_CODE (symbol) != SYMBOL_REF) ++ continue; ++ ++ const char *func_name = XSTR (symbol, 0); ++ bool df_p; ++ if (((strcmp("__mulsf3", func_name) == 0) ++ || (strcmp("__addsf3", func_name) == 0))) ++ df_p = false; ++ else if (((strcmp("__muldf3", func_name) == 0) ++ || (strcmp("__adddf3", func_name) == 0))) ++ df_p = true; ++ else ++ continue; ++ ++ rtx_insn *prev_insn = insn; ++ rtx_insn *arg0_insn = NULL; ++ rtx_insn *arg1_insn = NULL; ++ unsigned arg0_regno = df_p ? DF_ARG0_REGNO : SF_ARG0_REGNO; ++ unsigned arg1_regno = df_p ? DF_ARG1_REGNO : SF_ARG1_REGNO; ++ enum machine_mode mode = df_p ? DFmode : SFmode; ++ while ((prev_insn = PREV_INSN (prev_insn)) && prev_insn) ++ { ++ if (arg0_insn != NULL && arg1_insn != NULL) ++ break; ++ ++ if (BLOCK_FOR_INSN (prev_insn) != BLOCK_FOR_INSN (insn)) ++ break; ++ ++ if (!NONJUMP_INSN_P (prev_insn)) ++ break; ++ ++ if (!INSN_P (prev_insn)) ++ continue; ++ ++ rtx set = PATTERN (prev_insn); ++ ++ if (GET_CODE (set) != SET) ++ continue; ++ ++ rtx dst_reg = SET_DEST (set); ++ ++ if (!REG_P (dst_reg)) ++ break; ++ ++ unsigned regno = REGNO (dst_reg); ++ ++ if (regno == arg0_regno) ++ { ++ arg0_insn = prev_insn; ++ continue; ++ } ++ else if (regno == arg1_regno) ++ { ++ arg1_insn = prev_insn; ++ continue; ++ } ++ break; ++ } ++ if (arg0_insn == NULL || arg1_insn == NULL) ++ continue; ++ ++ rtx arg0_src = SET_SRC (PATTERN (arg0_insn)); ++ rtx arg1_src = SET_SRC (PATTERN (arg1_insn)); ++ ++ if ((REG_P (arg0_src) ++ && GET_MODE (arg0_src) == mode ++ && REGNO (arg0_src) == arg1_regno) ++ || (REG_P (arg1_src) ++ && GET_MODE (arg1_src) == mode ++ && REGNO (arg1_src) == arg0_regno)) ++ { ++ /* Swap operand! */ ++ rtx tmp = SET_DEST (PATTERN (arg0_insn)); ++ SET_DEST (PATTERN (arg0_insn)) = SET_DEST (PATTERN (arg1_insn)); ++ SET_DEST (PATTERN (arg1_insn)) = tmp; ++ } ++ } ++ } ++ return 1; ++} ++ ++const pass_data pass_data_nds32_soft_fp_arith_comm_opt = ++{ ++ RTL_PASS, /* type */ ++ "soft_fp_arith_comm", /* name */ ++ OPTGROUP_NONE, /* optinfo_flags */ ++ TV_MACH_DEP, /* tv_id */ ++ 0, /* properties_required */ ++ 0, /* properties_provided */ ++ 0, /* properties_destroyed */ ++ 0, /* todo_flags_start */ ++ 0, /* todo_flags_finish */ ++}; ++ ++class pass_nds32_soft_fp_arith_comm_opt : public rtl_opt_pass ++{ ++public: ++ pass_nds32_soft_fp_arith_comm_opt (gcc::context *ctxt) ++ : rtl_opt_pass (pass_data_nds32_soft_fp_arith_comm_opt, ctxt) ++ {} ++ ++ /* opt_pass methods: */ ++ bool gate (function *) { ++ return TARGET_SOFT_FP_ARITH_COMM && !TARGET_FPU_SINGLE; ++ } ++ unsigned int execute (function *) { return nds32_soft_fp_arith_comm_opt (); } ++}; ++ ++rtl_opt_pass * ++make_pass_nds32_soft_fp_arith_comm_opt (gcc::context *ctxt) ++{ ++ return new pass_nds32_soft_fp_arith_comm_opt (ctxt); ++} +diff --git a/gcc/config/nds32/nds32-utils.c b/gcc/config/nds32/nds32-utils.c +new file mode 100644 +index 0000000..3b16738 +--- /dev/null ++++ b/gcc/config/nds32/nds32-utils.c +@@ -0,0 +1,923 @@ ++/* Auxiliary functions for pipeline descriptions pattern of Andes ++ NDS32 cpu for GNU compiler ++ Copyright (C) 2012-2016 Free Software Foundation, Inc. ++ Contributed by Andes Technology Corporation. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published ++ by the Free Software Foundation; either version 3, or (at your ++ option) any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ <http://www.gnu.org/licenses/>. */ ++ ++/* ------------------------------------------------------------------------ */ ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "tm.h" ++#include "hash-set.h" ++#include "machmode.h" ++#include "vec.h" ++#include "double-int.h" ++#include "input.h" ++#include "alias.h" ++#include "symtab.h" ++#include "wide-int.h" ++#include "inchash.h" ++#include "tree.h" ++#include "stor-layout.h" ++#include "varasm.h" ++#include "calls.h" ++#include "rtl.h" ++#include "regs.h" ++#include "hard-reg-set.h" ++#include "insn-config.h" /* Required by recog.h. */ ++#include "conditions.h" ++#include "output.h" ++#include "insn-attr.h" /* For DFA state_t. */ ++#include "insn-codes.h" /* For CODE_FOR_xxx. */ ++#include "reload.h" /* For push_reload(). */ ++#include "flags.h" ++#include "input.h" ++#include "function.h" ++#include "expr.h" ++#include "recog.h" ++#include "diagnostic-core.h" ++#include "dominance.h" ++#include "cfg.h" ++#include "cfgrtl.h" ++#include "cfganal.h" ++#include "lcm.h" ++#include "cfgbuild.h" ++#include "cfgcleanup.h" ++#include "predict.h" ++#include "basic-block.h" ++#include "nds32-protos.h" ++ ++namespace nds32 { ++ ++/* Get the rtx in the PATTERN field of an insn. If INSN is not an insn, ++ the funciton doesn't change anything and returns it directly. */ ++rtx ++extract_pattern_from_insn (rtx insn) ++{ ++ if (INSN_P (insn)) ++ return PATTERN (insn); ++ ++ return insn; ++} ++ ++/* Get the number of elements in a parallel rtx. */ ++size_t ++parallel_elements (rtx parallel_rtx) ++{ ++ parallel_rtx = extract_pattern_from_insn (parallel_rtx); ++ gcc_assert (GET_CODE (parallel_rtx) == PARALLEL); ++ ++ return XVECLEN (parallel_rtx, 0); ++} ++ ++/* Extract an rtx from a parallel rtx with index NTH. If NTH is a negative ++ value, the function returns the last NTH rtx. */ ++rtx ++parallel_element (rtx parallel_rtx, int nth) ++{ ++ parallel_rtx = extract_pattern_from_insn (parallel_rtx); ++ gcc_assert (GET_CODE (parallel_rtx) == PARALLEL); ++ ++ int len = parallel_elements (parallel_rtx); ++ ++ if (nth >= 0) ++ { ++ if (nth >= len) ++ return NULL_RTX; ++ ++ return XVECEXP (parallel_rtx, 0, nth); ++ } ++ else ++ { ++ if (len + nth < 0) ++ return NULL_RTX; ++ ++ return XVECEXP (parallel_rtx, 0, len + nth); ++ } ++} ++ ++/* Return true if an insn is a pseudo NOP that is not a real instruction ++ occupying a real cycle and space of the text section. */ ++bool ++insn_pseudo_nop_p (rtx_insn *insn) ++{ ++ if (INSN_CODE (insn) == CODE_FOR_nop_data_dep ++ || INSN_CODE (insn) == CODE_FOR_nop_res_dep) ++ return true; ++ ++ return false; ++} ++ ++/* Indicate whether an insn is a real insn which occupy at least one cycle ++ or not. The determination cannot be target-independent because some targets ++ use UNSPEC or UNSPEC_VOLATILE insns to represent real instructions. */ ++bool ++insn_executable_p (rtx_insn *insn) ++{ ++ if (!INSN_P (insn)) ++ return false; ++ ++ if (insn_pseudo_nop_p (insn)) ++ return true; ++ ++ if (get_attr_length (insn) == 0) ++ return false; ++ ++ switch (GET_CODE (PATTERN (insn))) ++ { ++ case CONST_INT: ++ case USE: ++ case CLOBBER: ++ case ADDR_VEC: ++ case ADDR_DIFF_VEC: ++ case UNSPEC: ++ case UNSPEC_VOLATILE: ++ return false; ++ ++ default: ++ return true; ++ } ++ ++ return true; ++} ++ ++/* Find the previous executable insn. */ ++rtx_insn * ++prev_executable_insn (rtx_insn *insn) ++{ ++ insn = PREV_INSN (insn); ++ while (insn && !insn_executable_p (insn)) ++ insn = PREV_INSN (insn); ++ ++ return insn; ++} ++ ++/* Find the next executable insn. */ ++rtx_insn * ++next_executable_insn (rtx_insn *insn) ++{ ++ insn = NEXT_INSN (insn); ++ while (insn && !insn_executable_p (insn)) ++ insn = NEXT_INSN (insn); ++ ++ return insn; ++} ++ ++/* Find the previous executable insn in the current basic block. */ ++rtx_insn * ++prev_executable_insn_local (rtx_insn *insn) ++{ ++ insn = PREV_INSN (insn); ++ while (insn && !insn_executable_p (insn)) ++ { ++ if(LABEL_P (insn) || JUMP_P (insn) || CALL_P (insn)) ++ return NULL; ++ ++ insn = PREV_INSN (insn); ++ } ++ ++ return insn; ++} ++ ++/* Find the next executable insn in the current basic block. */ ++rtx_insn * ++next_executable_insn_local (rtx_insn *insn) ++{ ++ insn = NEXT_INSN (insn); ++ while (insn && !insn_executable_p (insn)) ++ { ++ if(LABEL_P (insn) || JUMP_P (insn) || CALL_P (insn)) ++ return NULL; ++ ++ insn = NEXT_INSN (insn); ++ } ++ ++ return insn; ++} ++ ++/* Return true if an insn is marked as deleted. */ ++bool ++insn_deleted_p (rtx_insn *insn) ++{ ++ if (insn->deleted ()) ++ return true; ++ ++ if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED) ++ return true; ++ ++ return false; ++} ++ ++/* Functions to determine whether INSN is single-word, double-word ++ or partial-word load/store insn. */ ++ ++bool ++load_single_p (rtx_insn *insn) ++{ ++ if (get_attr_type (insn) != TYPE_LOAD) ++ return false; ++ ++ if (INSN_CODE (insn) == CODE_FOR_move_di || ++ INSN_CODE (insn) == CODE_FOR_move_df) ++ return false; ++ ++ return true; ++} ++ ++bool ++store_single_p (rtx_insn *insn) ++{ ++ if (get_attr_type (insn) != TYPE_STORE) ++ return false; ++ ++ if (INSN_CODE (insn) == CODE_FOR_move_di || ++ INSN_CODE (insn) == CODE_FOR_move_df) ++ return false; ++ ++ return true; ++} ++ ++bool ++load_double_p (rtx_insn *insn) ++{ ++ if (get_attr_type (insn) != TYPE_LOAD) ++ return false; ++ ++ if (INSN_CODE (insn) != CODE_FOR_move_di && ++ INSN_CODE (insn) != CODE_FOR_move_df) ++ return false; ++ ++ return true; ++} ++ ++bool ++store_double_p (rtx_insn *insn) ++{ ++ if (get_attr_type (insn) != TYPE_STORE) ++ return false; ++ ++ if (INSN_CODE (insn) != CODE_FOR_move_di && ++ INSN_CODE (insn) != CODE_FOR_move_df) ++ return false; ++ ++ return true; ++} ++ ++bool ++store_offset_reg_p (rtx_insn *insn) ++{ ++ if (get_attr_type (insn) != TYPE_STORE) ++ return false; ++ ++ rtx offset_rtx = extract_offset_rtx (insn); ++ ++ if (offset_rtx == NULL_RTX) ++ return false; ++ ++ if (REG_P (offset_rtx)) ++ return true; ++ ++ return false; ++} ++ ++bool ++load_full_word_p (rtx_insn *insn) ++{ ++ if (!nds32::load_single_p (insn)) ++ return false; ++ ++ if (GET_MODE (SET_SRC (PATTERN (insn))) == SImode) ++ return true; ++ ++ return false; ++} ++ ++bool ++load_partial_word_p (rtx_insn *insn) ++{ ++ if (!nds32::load_single_p (insn)) ++ return false; ++ ++ if (GET_MODE (SET_SRC (PATTERN (insn))) == HImode ++ || GET_MODE (SET_SRC (PATTERN (insn))) == QImode) ++ return true; ++ ++ return false; ++} ++ ++/* Determine if INSN is a post update insn. */ ++bool ++post_update_insn_p (rtx_insn *insn) ++{ ++ if (find_post_update_rtx (insn) == -1) ++ return false; ++ else ++ return true; ++} ++ ++/* Check if the address of MEM_RTX consists of a base register and an ++ immediate offset. */ ++bool ++immed_offset_p (rtx mem_rtx) ++{ ++ gcc_assert (MEM_P (mem_rtx)); ++ ++ rtx addr_rtx = XEXP (mem_rtx, 0); ++ ++ /* (mem (reg)) is equivalent to (mem (plus (reg) (const_int 0))) */ ++ if (REG_P (addr_rtx)) ++ return true; ++ ++ /* (mem (plus (reg) (const_int))) */ ++ if (GET_CODE (addr_rtx) == PLUS ++ && GET_CODE (XEXP (addr_rtx, 1)) == CONST_INT) ++ return true; ++ ++ return false; ++} ++ ++/* Find the post update rtx in INSN. If INSN is a load/store multiple insn, ++ the function returns the vector index of its parallel part. If INSN is a ++ single load/store insn, the function returns 0. If INSN is not a post- ++ update insn, the function returns -1. */ ++int ++find_post_update_rtx (rtx_insn *insn) ++{ ++ rtx mem_rtx; ++ int i, len; ++ ++ switch (get_attr_type (insn)) ++ { ++ case TYPE_LOAD_MULTIPLE: ++ case TYPE_STORE_MULTIPLE: ++ /* Find a pattern in a parallel rtx: ++ (set (reg) (plus (reg) (const_int))) */ ++ len = parallel_elements (insn); ++ for (i = 0; i < len; ++i) ++ { ++ rtx curr_insn = parallel_element (insn, i); ++ ++ if (GET_CODE (curr_insn) == SET ++ && REG_P (SET_DEST (curr_insn)) ++ && GET_CODE (SET_SRC (curr_insn)) == PLUS) ++ return i; ++ } ++ return -1; ++ ++ case TYPE_LOAD: ++ case TYPE_FLOAD: ++ case TYPE_STORE: ++ case TYPE_FSTORE: ++ mem_rtx = extract_mem_rtx (insn); ++ /* (mem (post_inc (reg))) */ ++ switch (GET_CODE (XEXP (mem_rtx, 0))) ++ { ++ case POST_INC: ++ case POST_DEC: ++ case POST_MODIFY: ++ return 0; ++ ++ default: ++ return -1; ++ } ++ ++ default: ++ gcc_unreachable (); ++ } ++} ++ ++/* Extract the MEM rtx from a load/store insn. */ ++rtx ++extract_mem_rtx (rtx_insn *insn) ++{ ++ rtx body = PATTERN (insn); ++ ++ switch (get_attr_type (insn)) ++ { ++ case TYPE_LOAD: ++ case TYPE_FLOAD: ++ if (MEM_P (SET_SRC (body))) ++ return SET_SRC (body); ++ ++ /* unaligned address: (unspec [(mem)]) */ ++ if (GET_CODE (SET_SRC (body)) == UNSPEC) ++ { ++ gcc_assert (MEM_P (XVECEXP (SET_SRC (body), 0, 0))); ++ return XVECEXP (SET_SRC (body), 0, 0); ++ } ++ ++ /* (sign_extend (mem)) */ ++ gcc_assert (MEM_P (XEXP (SET_SRC (body), 0))); ++ return XEXP (SET_SRC (body), 0); ++ ++ case TYPE_STORE: ++ case TYPE_FSTORE: ++ if (MEM_P (SET_DEST (body))) ++ return SET_DEST (body); ++ ++ /* unaligned address: (unspec [(mem)]) */ ++ if (GET_CODE (SET_DEST (body)) == UNSPEC) ++ { ++ gcc_assert (MEM_P (XVECEXP (SET_DEST (body), 0, 0))); ++ return XVECEXP (SET_DEST (body), 0, 0); ++ } ++ ++ /* (sign_extend (mem)) */ ++ gcc_assert (MEM_P (XEXP (SET_DEST (body), 0))); ++ return XEXP (SET_DEST (body), 0); ++ ++ default: ++ gcc_unreachable (); ++ } ++} ++ ++/* Extract the base register from load/store insns. The function returns ++ NULL_RTX if the address is not consist of any registers. */ ++rtx ++extract_base_reg (rtx_insn *insn) ++{ ++ int post_update_rtx_index; ++ rtx mem_rtx; ++ rtx plus_rtx; ++ ++ /* Find the MEM rtx. If we can find an insn updating the base register, ++ the base register will be returned directly. */ ++ switch (get_attr_type (insn)) ++ { ++ case TYPE_LOAD_MULTIPLE: ++ post_update_rtx_index = find_post_update_rtx (insn); ++ ++ if (post_update_rtx_index != -1) ++ return SET_DEST (parallel_element (insn, post_update_rtx_index)); ++ ++ mem_rtx = SET_SRC (parallel_element (insn, 0)); ++ break; ++ ++ case TYPE_STORE_MULTIPLE: ++ post_update_rtx_index = find_post_update_rtx (insn); ++ ++ if (post_update_rtx_index != -1) ++ return SET_DEST (parallel_element (insn, post_update_rtx_index)); ++ ++ mem_rtx = SET_DEST (parallel_element (insn, 0)); ++ break; ++ ++ case TYPE_LOAD: ++ case TYPE_FLOAD: ++ case TYPE_STORE: ++ case TYPE_FSTORE: ++ mem_rtx = extract_mem_rtx (insn); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ gcc_assert (MEM_P (mem_rtx)); ++ ++ /* (mem (reg)) */ ++ if (REG_P (XEXP (mem_rtx, 0))) ++ return XEXP (mem_rtx, 0); ++ ++ /* (mem (lo_sum (reg) (symbol_ref)) */ ++ if (GET_CODE (XEXP (mem_rtx, 0)) == LO_SUM) ++ return XEXP (XEXP (mem_rtx, 0), 0); ++ ++ plus_rtx = XEXP (mem_rtx, 0); ++ ++ if (GET_CODE (plus_rtx) == SYMBOL_REF ++ || GET_CODE (plus_rtx) == CONST) ++ return NULL_RTX; ++ ++ /* (mem (plus (reg) (const_int))) or ++ (mem (plus (mult (reg) (const_int 4)) (reg))) or ++ (mem (post_inc (reg))) or ++ (mem (post_dec (reg))) or ++ (mem (post_modify (reg) (plus (reg) (reg)))) */ ++ gcc_assert (GET_CODE (plus_rtx) == PLUS ++ || GET_CODE (plus_rtx) == POST_INC ++ || GET_CODE (plus_rtx) == POST_DEC ++ || GET_CODE (plus_rtx) == POST_MODIFY); ++ ++ if (REG_P (XEXP (plus_rtx, 0))) ++ return XEXP (plus_rtx, 0); ++ ++ gcc_assert (REG_P (XEXP (plus_rtx, 1))); ++ return XEXP (plus_rtx, 1); ++} ++ ++/* Extract the offset rtx from load/store insns. The function returns ++ NULL_RTX if offset is absent. */ ++rtx ++extract_offset_rtx (rtx_insn *insn) ++{ ++ rtx mem_rtx; ++ rtx plus_rtx; ++ rtx offset_rtx; ++ ++ /* Find the MEM rtx. The multiple load/store insns doens't have ++ the offset field so we can return NULL_RTX here. */ ++ switch (get_attr_type (insn)) ++ { ++ case TYPE_LOAD_MULTIPLE: ++ case TYPE_STORE_MULTIPLE: ++ return NULL_RTX; ++ ++ case TYPE_LOAD: ++ case TYPE_FLOAD: ++ case TYPE_STORE: ++ case TYPE_FSTORE: ++ mem_rtx = extract_mem_rtx (insn); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ gcc_assert (MEM_P (mem_rtx)); ++ ++ /* (mem (reg)) */ ++ if (REG_P (XEXP (mem_rtx, 0))) ++ return NULL_RTX; ++ ++ plus_rtx = XEXP (mem_rtx, 0); ++ ++ switch (GET_CODE (plus_rtx)) ++ { ++ case SYMBOL_REF: ++ case CONST: ++ case POST_INC: ++ case POST_DEC: ++ return NULL_RTX; ++ ++ case PLUS: ++ /* (mem (plus (reg) (const_int))) or ++ (mem (plus (mult (reg) (const_int 4)) (reg))) */ ++ if (REG_P (XEXP (plus_rtx, 0))) ++ offset_rtx = XEXP (plus_rtx, 1); ++ else ++ { ++ gcc_assert (REG_P (XEXP (plus_rtx, 1))); ++ offset_rtx = XEXP (plus_rtx, 0); ++ } ++ ++ if (ARITHMETIC_P (offset_rtx)) ++ { ++ gcc_assert (GET_CODE (offset_rtx) == MULT); ++ gcc_assert (REG_P (XEXP (offset_rtx, 0))); ++ offset_rtx = XEXP (offset_rtx, 0); ++ } ++ break; ++ ++ case LO_SUM: ++ /* (mem (lo_sum (reg) (symbol_ref)) */ ++ offset_rtx = XEXP (plus_rtx, 1); ++ break; ++ ++ case POST_MODIFY: ++ /* (mem (post_modify (reg) (plus (reg) (reg / const_int)))) */ ++ gcc_assert (REG_P (XEXP (plus_rtx, 0))); ++ plus_rtx = XEXP (plus_rtx, 1); ++ gcc_assert (GET_CODE (plus_rtx) == PLUS); ++ offset_rtx = XEXP (plus_rtx, 0); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ return offset_rtx; ++} ++ ++/* Extract the register of the shift operand from an ALU_SHIFT rtx. */ ++rtx ++extract_shift_reg (rtx_insn *insn) ++{ ++ rtx alu_shift_rtx = extract_pattern_from_insn (insn); ++ ++ rtx alu_rtx = SET_SRC (alu_shift_rtx); ++ rtx shift_rtx; ++ ++ /* Various forms of ALU_SHIFT can be made by the combiner. ++ See the difference between add_slli and sub_slli in nds32.md. */ ++ if (REG_P (XEXP (alu_rtx, 0))) ++ shift_rtx = XEXP (alu_rtx, 1); ++ else ++ shift_rtx = XEXP (alu_rtx, 0); ++ ++ return XEXP (shift_rtx, 0); ++} ++ ++/* Check if INSN is a movd44 insn. */ ++bool ++movd44_insn_p (rtx_insn *insn) ++{ ++ if (get_attr_type (insn) == TYPE_ALU ++ && (INSN_CODE (insn) == CODE_FOR_move_di ++ || INSN_CODE (insn) == CODE_FOR_move_df)) ++ { ++ rtx body = PATTERN (insn); ++ gcc_assert (GET_CODE (body) == SET); ++ ++ rtx src = SET_SRC (body); ++ rtx dest = SET_DEST (body); ++ ++ if ((REG_P (src) || GET_CODE (src) == SUBREG) ++ && (REG_P (dest) || GET_CODE (dest) == SUBREG)) ++ return true; ++ ++ return false; ++ } ++ ++ return false; ++} ++ ++/* Extract the first result (even reg) of a movd44 insn. */ ++rtx ++extract_movd44_even_reg (rtx_insn *insn) ++{ ++ gcc_assert (movd44_insn_p (insn)); ++ ++ rtx def_reg = SET_DEST (PATTERN (insn)); ++ enum machine_mode mode; ++ ++ gcc_assert (REG_P (def_reg) || GET_CODE (def_reg) == SUBREG); ++ switch (GET_MODE (def_reg)) ++ { ++ case DImode: ++ mode = SImode; ++ break; ++ ++ case DFmode: ++ mode = SFmode; ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ return gen_lowpart (mode, def_reg); ++} ++ ++/* Extract the second result (odd reg) of a movd44 insn. */ ++rtx ++extract_movd44_odd_reg (rtx_insn *insn) ++{ ++ gcc_assert (movd44_insn_p (insn)); ++ ++ rtx def_reg = SET_DEST (PATTERN (insn)); ++ enum machine_mode mode; ++ ++ gcc_assert (REG_P (def_reg) || GET_CODE (def_reg) == SUBREG); ++ switch (GET_MODE (def_reg)) ++ { ++ case DImode: ++ mode = SImode; ++ break; ++ ++ case DFmode: ++ mode = SFmode; ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ return gen_highpart (mode, def_reg); ++} ++ ++/* Extract the rtx representing the accumulation operand of a MAC insn. */ ++rtx ++extract_mac_acc_rtx (rtx_insn *insn) ++{ ++ return SET_DEST (PATTERN (insn)); ++} ++ ++/* Extract the rtx representing non-accumulation operands of a MAC insn. */ ++rtx ++extract_mac_non_acc_rtx (rtx_insn *insn) ++{ ++ rtx exp = SET_SRC (PATTERN (insn)); ++ ++ switch (get_attr_type (insn)) ++ { ++ case TYPE_MAC: ++ case TYPE_DMAC: ++ if (REG_P (XEXP (exp, 0))) ++ return XEXP (exp, 1); ++ else ++ return XEXP (exp, 0); ++ ++ default: ++ gcc_unreachable (); ++ } ++} ++ ++/* Check if the DIV insn needs two write ports. */ ++bool ++divmod_p (rtx_insn *insn) ++{ ++ gcc_assert (get_attr_type (insn) == TYPE_DIV); ++ ++ if (INSN_CODE (insn) == CODE_FOR_divmodsi4 ++ || INSN_CODE (insn) == CODE_FOR_udivmodsi4) ++ return true; ++ ++ return false; ++} ++ ++/* Extract the rtx representing the branch target to help recognize ++ data hazards. */ ++rtx ++extract_branch_target_rtx (rtx_insn *insn) ++{ ++ gcc_assert (CALL_P (insn) || JUMP_P (insn)); ++ ++ rtx body = PATTERN (insn); ++ ++ if (GET_CODE (body) == SET) ++ { ++ /* RTXs in IF_THEN_ELSE are branch conditions. */ ++ if (GET_CODE (SET_SRC (body)) == IF_THEN_ELSE) ++ return NULL_RTX; ++ ++ return SET_SRC (body); ++ } ++ ++ if (GET_CODE (body) == CALL) ++ return XEXP (body, 0); ++ ++ if (GET_CODE (body) == PARALLEL) ++ { ++ rtx first_rtx = parallel_element (body, 0); ++ ++ if (GET_CODE (first_rtx) == SET) ++ return SET_SRC (first_rtx); ++ ++ if (GET_CODE (first_rtx) == CALL) ++ return XEXP (first_rtx, 0); ++ } ++ ++ /* Handle special cases of bltzal, bgezal and jralnez. */ ++ if (GET_CODE (body) == COND_EXEC) ++ { ++ rtx addr_rtx = XEXP (body, 1); ++ ++ if (GET_CODE (addr_rtx) == SET) ++ return SET_SRC (addr_rtx); ++ ++ if (GET_CODE (addr_rtx) == PARALLEL) ++ { ++ rtx first_rtx = parallel_element (addr_rtx, 0); ++ ++ if (GET_CODE (first_rtx) == SET) ++ { ++ rtx call_rtx = SET_SRC (first_rtx); ++ gcc_assert (GET_CODE (call_rtx) == CALL); ++ ++ return XEXP (call_rtx, 0); ++ } ++ ++ if (GET_CODE (first_rtx) == CALL) ++ return XEXP (first_rtx, 0); ++ } ++ } ++ ++ gcc_unreachable (); ++} ++ ++/* Extract the rtx representing the branch condition to help recognize ++ data hazards. */ ++rtx ++extract_branch_condition_rtx (rtx_insn *insn) ++{ ++ gcc_assert (CALL_P (insn) || JUMP_P (insn)); ++ ++ rtx body = PATTERN (insn); ++ ++ if (GET_CODE (body) == SET) ++ { ++ rtx if_then_else_rtx = SET_SRC (body); ++ ++ if (GET_CODE (if_then_else_rtx) == IF_THEN_ELSE) ++ return XEXP (if_then_else_rtx, 0); ++ ++ return NULL_RTX; ++ } ++ ++ if (GET_CODE (body) == COND_EXEC) ++ return XEXP (body, 0); ++ ++ return NULL_RTX; ++} ++ ++/* Building the CFG in later back end passes cannot call compute_bb_for_insn () ++ directly because calling to BLOCK_FOR_INSN (insn) when some insns have been ++ deleted can cause a segmentation fault. Use this function to rebuild the CFG ++ can avoid such issues. */ ++void ++compute_bb_for_insn_safe () ++{ ++ basic_block bb; ++ ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ rtx_insn *insn, *next_insn, *last_insn; ++ bool after_last_insn = false; ++ ++ /* Find the last non-deleted insn. */ ++ for (last_insn = BB_END (bb); ++ PREV_INSN (last_insn) && insn_deleted_p (last_insn); ++ last_insn = PREV_INSN (last_insn)); ++ ++ /* Bind each insn to its BB and adjust BB_END (bb). */ ++ for (insn = BB_HEAD (bb); insn; insn = NEXT_INSN (insn)) ++ { ++ BLOCK_FOR_INSN (insn) = bb; ++ ++ if (insn == last_insn) ++ after_last_insn = true; ++ ++ next_insn = NEXT_INSN (insn); ++ ++ if (after_last_insn ++ && (!next_insn ++ || LABEL_P (next_insn) ++ || NOTE_INSN_BASIC_BLOCK_P (next_insn))) ++ { ++ BB_END (bb) = insn; ++ break; ++ } ++ } ++ } ++} ++ ++/* Exchange insns positions. */ ++void ++exchange_insns (rtx_insn *insn1, rtx_insn *insn2) ++{ ++ if (INSN_UID (insn1) == INSN_UID (insn2)) ++ return; ++ ++ rtx_insn *insn1_from = insn1; ++ rtx_insn *insn1_to = insn1; ++ rtx_insn *insn2_from = insn2; ++ rtx_insn *insn2_to = insn2; ++ ++ if (PREV_INSN (insn1) ++ && INSN_CODE (PREV_INSN (insn1)) == CODE_FOR_relax_group) ++ insn1_from = PREV_INSN (insn1); ++ ++ if (PREV_INSN (insn2) ++ && INSN_CODE (PREV_INSN (insn2)) == CODE_FOR_relax_group) ++ insn2_from = PREV_INSN (insn2); ++ ++ if (GET_MODE (insn1) == TImode && GET_MODE (insn2) == VOIDmode) ++ { ++ PUT_MODE (insn1, VOIDmode); ++ PUT_MODE (insn2, TImode); ++ } ++ else if (GET_MODE (insn1) == VOIDmode && GET_MODE (insn2) == TImode) ++ { ++ PUT_MODE (insn1, TImode); ++ PUT_MODE (insn2, VOIDmode); ++ } ++ ++ if (PREV_INSN (insn1_from)) ++ { ++ rtx_insn *insn1_prev = PREV_INSN (insn1_from); ++ ++ reorder_insns (insn1_from, insn1_to, insn2); ++ reorder_insns (insn2_from, insn2_to, insn1_prev); ++ ++ return; ++ } ++ ++ gcc_assert (PREV_INSN (insn2_from)); ++ ++ rtx_insn *insn2_prev = PREV_INSN (insn2_from); ++ ++ reorder_insns (insn2_from, insn2_to, insn1); ++ reorder_insns (insn1_from, insn1_to, insn2_prev); ++ ++ return; ++} ++ ++} // namespace nds32 +diff --git a/gcc/config/nds32/nds32.c b/gcc/config/nds32/nds32.c +index c47c122..5f5e668 100644 +--- a/gcc/config/nds32/nds32.c ++++ b/gcc/config/nds32/nds32.c +@@ -24,48 +24,103 @@ + #include "system.h" + #include "coretypes.h" + #include "backend.h" +-#include "target.h" +-#include "rtl.h" + #include "tree.h" ++#include "rtl.h" + #include "df.h" +-#include "tm_p.h" +-#include "optabs.h" /* For GEN_FCN. */ +-#include "regs.h" +-#include "emit-rtl.h" +-#include "recog.h" +-#include "diagnostic-core.h" ++#include "alias.h" ++#include "stringpool.h" + #include "stor-layout.h" + #include "varasm.h" + #include "calls.h" ++#include "regs.h" ++#include "insn-config.h" /* Required by recog.h. */ ++#include "conditions.h" + #include "output.h" ++#include "insn-attr.h" /* For DFA state_t. */ ++#include "insn-codes.h" /* For CODE_FOR_xxx. */ ++#include "reload.h" /* For push_reload(). */ ++#include "flags.h" ++#include "insn-config.h" ++#include "expmed.h" ++#include "dojump.h" + #include "explow.h" ++#include "emit-rtl.h" ++#include "stmt.h" + #include "expr.h" ++#include "recog.h" ++#include "diagnostic-core.h" ++#include "cfgrtl.h" ++#include "cfganal.h" ++#include "lcm.h" ++#include "cfgbuild.h" ++#include "cfgcleanup.h" ++#include "tm_p.h" + #include "tm-constrs.h" ++#include "optabs.h" /* For GEN_FCN. */ ++#include "target.h" ++#include "langhooks.h" /* For add_builtin_function(). */ + #include "builtins.h" ++#include "cpplib.h" ++#include "params.h" ++#include "tree-pass.h" ++#include "cfgloop.h" ++#include "cfghooks.h" ++#include "hw-doloop.h" ++#include "context.h" ++#include "sched-int.h" + + /* This file should be included last. */ + #include "target-def.h" + + /* ------------------------------------------------------------------------ */ + +-/* This file is divided into five parts: ++/* This file is divided into six parts: + +- PART 1: Auxiliary static variable definitions and +- target hook static variable definitions. ++ PART 1: Auxiliary external function and variable declarations. + +- PART 2: Auxiliary static function definitions. ++ PART 2: Auxiliary static variable definitions and ++ target hook static variable definitions. + +- PART 3: Implement target hook stuff definitions. ++ PART 3: Auxiliary static function definitions. + +- PART 4: Implemet extern function definitions, +- the prototype is in nds32-protos.h. ++ PART 4: Implement target hook stuff definitions. + +- PART 5: Initialize target hook structure and definitions. */ ++ PART 5: Implemet extern function definitions, ++ the prototype is in nds32-protos.h. ++ ++ PART 6: Initialize target hook structure and definitions. */ ++ ++/* ------------------------------------------------------------------------ */ ++ ++/* PART 1: Auxiliary function and variable declarations. */ ++ ++namespace nds32 { ++namespace scheduling { ++ ++rtl_opt_pass *make_pass_nds32_print_stalls (gcc::context *); ++ ++} // namespace scheduling ++} // namespace nds32 ++ ++rtl_opt_pass *make_pass_nds32_fp_as_gp (gcc::context *); ++rtl_opt_pass *make_pass_nds32_load_store_opt (gcc::context *); ++rtl_opt_pass *make_pass_nds32_soft_fp_arith_comm_opt(gcc::context *); ++rtl_opt_pass *make_pass_nds32_regrename_opt (gcc::context *); ++rtl_opt_pass *make_pass_nds32_gcse_opt (gcc::context *); ++rtl_opt_pass *make_pass_nds32_relax_opt (gcc::context *); ++rtl_opt_pass *make_pass_nds32_rename_lmwsmw_opt (gcc::context *); ++rtl_opt_pass *make_pass_nds32_gen_lmwsmw_opt (gcc::context *); ++rtl_opt_pass *make_pass_nds32_const_remater_opt (gcc::context *); ++rtl_opt_pass *make_pass_nds32_cprop_acc_opt (gcc::context *); ++ ++gimple_opt_pass *make_pass_nds32_sign_conversion_opt (gcc::context *); ++gimple_opt_pass *make_pass_nds32_scalbn_transform_opt (gcc::context *); ++gimple_opt_pass *make_pass_nds32_abi_compatible (gcc::context *); + + /* ------------------------------------------------------------------------ */ + +-/* PART 1: Auxiliary static variable definitions and +- target hook static variable definitions. */ ++/* PART 2: Auxiliary static variable definitions and ++ target hook static variable definitions. */ + + /* Define intrinsic register names. + Please refer to nds32_intrinsic.h file, the index is corresponding to +@@ -73,14 +128,217 @@ + NOTE that the base value starting from 1024. */ + static const char * const nds32_intrinsic_register_names[] = + { +- "$PSW", "$IPSW", "$ITYPE", "$IPC" ++ "$CPU_VER", ++ "$ICM_CFG", ++ "$DCM_CFG", ++ "$MMU_CFG", ++ "$MSC_CFG", ++ "$MSC_CFG2", ++ "$CORE_ID", ++ "$FUCOP_EXIST", ++ ++ "$PSW", ++ "$IPSW", ++ "$P_IPSW", ++ "$IVB", ++ "$EVA", ++ "$P_EVA", ++ "$ITYPE", ++ "$P_ITYPE", ++ ++ "$MERR", ++ "$IPC", ++ "$P_IPC", ++ "$OIPC", ++ "$P_P0", ++ "$P_P1", ++ ++ "$INT_MASK", ++ "$INT_MASK2", ++ "$INT_MASK3", ++ "$INT_PEND", ++ "$INT_PEND2", ++ "$INT_PEND3", ++ "$SP_USR", ++ "$SP_PRIV", ++ "$INT_PRI", ++ "$INT_PRI2", ++ "$INT_PRI3", ++ "$INT_PRI4", ++ "$INT_CTRL", ++ "$INT_TRIGGER", ++ "$INT_TRIGGER2", ++ "$INT_GPR_PUSH_DIS", ++ ++ "$MMU_CTL", ++ "$L1_PPTB", ++ "$TLB_VPN", ++ "$TLB_DATA", ++ "$TLB_MISC", ++ "$VLPT_IDX", ++ "$ILMB", ++ "$DLMB", ++ ++ "$CACHE_CTL", ++ "$HSMP_SADDR", ++ "$HSMP_EADDR", ++ "$SDZ_CTL", ++ "$N12MISC_CTL", ++ "$MISC_CTL", ++ "$ECC_MISC", ++ ++ "$BPC0", ++ "$BPC1", ++ "$BPC2", ++ "$BPC3", ++ "$BPC4", ++ "$BPC5", ++ "$BPC6", ++ "$BPC7", ++ ++ "$BPA0", ++ "$BPA1", ++ "$BPA2", ++ "$BPA3", ++ "$BPA4", ++ "$BPA5", ++ "$BPA6", ++ "$BPA7", ++ ++ "$BPAM0", ++ "$BPAM1", ++ "$BPAM2", ++ "$BPAM3", ++ "$BPAM4", ++ "$BPAM5", ++ "$BPAM6", ++ "$BPAM7", ++ ++ "$BPV0", ++ "$BPV1", ++ "$BPV2", ++ "$BPV3", ++ "$BPV4", ++ "$BPV5", ++ "$BPV6", ++ "$BPV7", ++ ++ "$BPCID0", ++ "$BPCID1", ++ "$BPCID2", ++ "$BPCID3", ++ "$BPCID4", ++ "$BPCID5", ++ "$BPCID6", ++ "$BPCID7", ++ ++ "$EDM_CFG", ++ "$EDMSW", ++ "$EDM_CTL", ++ "$EDM_DTR", ++ "$BPMTC", ++ "$DIMBR", ++ ++ "$TECR0", ++ "$TECR1", ++ "$PFMC0", ++ "$PFMC1", ++ "$PFMC2", ++ "$PFM_CTL", ++ "$PFT_CTL", ++ "$HSP_CTL", ++ "$SP_BOUND", ++ "$SP_BOUND_PRIV", ++ "$SP_BASE", ++ "$SP_BASE_PRIV", ++ "$FUCOP_CTL", ++ "$PRUSR_ACC_CTL", ++ ++ "$DMA_CFG", ++ "$DMA_GCSW", ++ "$DMA_CHNSEL", ++ "$DMA_ACT", ++ "$DMA_SETUP", ++ "$DMA_ISADDR", ++ "$DMA_ESADDR", ++ "$DMA_TCNT", ++ "$DMA_STATUS", ++ "$DMA_2DSET", ++ "$DMA_2DSCTL", ++ "$DMA_RCNT", ++ "$DMA_HSTATUS", ++ ++ "$PC", ++ "$SP_USR1", ++ "$SP_USR2", ++ "$SP_USR3", ++ "$SP_PRIV1", ++ "$SP_PRIV2", ++ "$SP_PRIV3", ++ "$BG_REGION", ++ "$SFCR", ++ "$SIGN", ++ "$ISIGN", ++ "$P_ISIGN", ++ "$IFC_LP", ++ "$ITB" ++}; ++ ++/* Define instrinsic cctl names. */ ++static const char * const nds32_cctl_names[] = ++{ ++ "L1D_VA_FILLCK", ++ "L1D_VA_ULCK", ++ "L1I_VA_FILLCK", ++ "L1I_VA_ULCK", ++ ++ "L1D_IX_WBINVAL", ++ "L1D_IX_INVAL", ++ "L1D_IX_WB", ++ "L1I_IX_INVAL", ++ ++ "L1D_VA_INVAL", ++ "L1D_VA_WB", ++ "L1D_VA_WBINVAL", ++ "L1I_VA_INVAL", ++ ++ "L1D_IX_RTAG", ++ "L1D_IX_RWD", ++ "L1I_IX_RTAG", ++ "L1I_IX_RWD", ++ ++ "L1D_IX_WTAG", ++ "L1D_IX_WWD", ++ "L1I_IX_WTAG", ++ "L1I_IX_WWD" ++}; ++ ++static const char * const nds32_dpref_names[] = ++{ ++ "SRD", ++ "MRD", ++ "SWR", ++ "MWR", ++ "PTE", ++ "CLWR" ++}; ++ ++/* Defining register allocation order for performance. ++ We want to allocate callee-saved registers after others. ++ It may be used by nds32_adjust_reg_alloc_order(). */ ++static const int nds32_reg_alloc_order_for_speed[] = ++{ ++ 0, 1, 2, 3, 4, 5, 16, 17, ++ 18, 19, 20, 21, 22, 23, 24, 25, ++ 26, 27, 6, 7, 8, 9, 10, 11, ++ 12, 13, 14, 15 + }; + + /* Defining target-specific uses of __attribute__. */ + static const struct attribute_spec nds32_attribute_table[] = + { + /* Syntax: { name, min_len, max_len, decl_required, type_required, +- function_type_required, handler, affects_type_identity } */ ++ function_type_required, handler, affects_type_identity } */ + + /* The interrupt vid: [0-63]+ (actual vector number starts from 9 to 72). */ + { "interrupt", 1, 64, false, false, false, NULL, false }, +@@ -93,6 +351,7 @@ static const struct attribute_spec nds32_attribute_table[] = + { "nested", 0, 0, false, false, false, NULL, false }, + { "not_nested", 0, 0, false, false, false, NULL, false }, + { "nested_ready", 0, 0, false, false, false, NULL, false }, ++ { "critical", 0, 0, false, false, false, NULL, false }, + + /* The attributes describing isr register save scheme. */ + { "save_all", 0, 0, false, false, false, NULL, false }, +@@ -102,17 +361,32 @@ static const struct attribute_spec nds32_attribute_table[] = + { "nmi", 1, 1, false, false, false, NULL, false }, + { "warm", 1, 1, false, false, false, NULL, false }, + ++ /* The attributes describing isr security level. */ ++ { "secure", 1, 1, false, false, false, NULL, false }, ++ + /* The attribute telling no prologue/epilogue. */ + { "naked", 0, 0, false, false, false, NULL, false }, + ++ /* The attribute is used to set signature. */ ++ { "signature", 0, 0, false, false, false, NULL, false }, ++ ++ /* The attribute is used to tell this function to be ROM patch. */ ++ { "indirect_call",0, 0, false, false, false, NULL, false }, ++ ++ /* FOR BACKWARD COMPATIBILITY, ++ this attribute also tells no prologue/epilogue. */ ++ { "no_prologue", 0, 0, false, false, false, NULL, false }, ++ ++ /* The attribute turn off hwloop optimization. */ ++ { "no_ext_zol", 0, 0, false, false, false, NULL, false}, ++ + /* The last attribute spec is set to be NULL. */ + { NULL, 0, 0, false, false, false, NULL, false } + }; + +- + /* ------------------------------------------------------------------------ */ + +-/* PART 2: Auxiliary static function definitions. */ ++/* PART 3: Auxiliary static function definitions. */ + + /* Function to save and restore machine-specific function data. */ + static struct machine_function * +@@ -121,12 +395,24 @@ nds32_init_machine_status (void) + struct machine_function *machine; + machine = ggc_cleared_alloc<machine_function> (); + ++ /* Initially assume this function does not use __builtin_eh_return. */ ++ machine->use_eh_return_p = 0; ++ + /* Initially assume this function needs prologue/epilogue. */ + machine->naked_p = 0; + + /* Initially assume this function does NOT use fp_as_gp optimization. */ + machine->fp_as_gp_p = 0; + ++ /* Initially this function is not under strictly aligned situation. */ ++ machine->strict_aligned_p = 0; ++ ++ /* Initially this function has no naked and no_prologue attributes. */ ++ machine->attr_naked_p = 0; ++ machine->attr_no_prologue_p = 0; ++ ++ /* Initially this function hwloop group ID number. */ ++ machine->hwloop_group_id = 0; + return machine; + } + +@@ -137,23 +423,63 @@ nds32_compute_stack_frame (void) + { + int r; + int block_size; ++ bool v3pushpop_p; + + /* Because nds32_compute_stack_frame() will be called from different place, + everytime we enter this function, we have to assume this function + needs prologue/epilogue. */ + cfun->machine->naked_p = 0; + ++ /* We need to mark whether this function has naked and no_prologue ++ attribute so that we can distinguish the difference if users applies ++ -mret-in-naked-func option. */ ++ cfun->machine->attr_naked_p ++ = lookup_attribute ("naked", DECL_ATTRIBUTES (current_function_decl)) ++ ? 1 : 0; ++ cfun->machine->attr_no_prologue_p ++ = lookup_attribute ("no_prologue", DECL_ATTRIBUTES (current_function_decl)) ++ ? 1 : 0; ++ ++ /* If __builtin_eh_return is used, we better have frame pointer needed ++ so that we can easily locate the stack slot of return address. */ ++ if (crtl->calls_eh_return) ++ { ++ frame_pointer_needed = 1; ++ ++ /* We need to mark eh data registers that need to be saved ++ in the stack. */ ++ cfun->machine->eh_return_data_first_regno = EH_RETURN_DATA_REGNO (0); ++ for (r = 0; EH_RETURN_DATA_REGNO (r) != INVALID_REGNUM; r++) ++ cfun->machine->eh_return_data_last_regno = r; ++ ++ cfun->machine->eh_return_data_regs_size ++ = 4 * (cfun->machine->eh_return_data_last_regno ++ - cfun->machine->eh_return_data_first_regno ++ + 1); ++ cfun->machine->use_eh_return_p = 1; ++ } ++ else ++ { ++ /* Assigning SP_REGNUM to eh_first_regno and eh_last_regno means we ++ do not need to handle __builtin_eh_return case in this function. */ ++ cfun->machine->eh_return_data_first_regno = SP_REGNUM; ++ cfun->machine->eh_return_data_last_regno = SP_REGNUM; ++ ++ cfun->machine->eh_return_data_regs_size = 0; ++ cfun->machine->use_eh_return_p = 0; ++ } ++ + /* Get variadic arguments size to prepare pretend arguments and + we will push them into stack at prologue by ourself. */ + cfun->machine->va_args_size = crtl->args.pretend_args_size; + if (cfun->machine->va_args_size != 0) + { + cfun->machine->va_args_first_regno +- = NDS32_GPR_ARG_FIRST_REGNUM +- + NDS32_MAX_GPR_REGS_FOR_ARGS +- - (crtl->args.pretend_args_size / UNITS_PER_WORD); ++ = NDS32_GPR_ARG_FIRST_REGNUM ++ + NDS32_MAX_GPR_REGS_FOR_ARGS ++ - (crtl->args.pretend_args_size / UNITS_PER_WORD); + cfun->machine->va_args_last_regno +- = NDS32_GPR_ARG_FIRST_REGNUM + NDS32_MAX_GPR_REGS_FOR_ARGS - 1; ++ = NDS32_GPR_ARG_FIRST_REGNUM + NDS32_MAX_GPR_REGS_FOR_ARGS - 1; + } + else + { +@@ -183,11 +509,13 @@ nds32_compute_stack_frame (void) + + /* If $gp value is required to be saved on stack, it needs 4 bytes space. + Check whether we are using PIC code genration. */ +- cfun->machine->gp_size = (flag_pic) ? 4 : 0; ++ cfun->machine->gp_size = ++ (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)) ? 4 : 0; + + /* If $lp value is required to be saved on stack, it needs 4 bytes space. + Check whether $lp is ever live. */ +- cfun->machine->lp_size = (df_regs_ever_live_p (LP_REGNUM)) ? 4 : 0; ++ cfun->machine->lp_size ++ = (flag_always_save_lp || df_regs_ever_live_p (LP_REGNUM)) ? 4 : 0; + + /* Initially there is no padding bytes. */ + cfun->machine->callee_saved_area_gpr_padding_bytes = 0; +@@ -196,6 +524,10 @@ nds32_compute_stack_frame (void) + cfun->machine->callee_saved_gpr_regs_size = 0; + cfun->machine->callee_saved_first_gpr_regno = SP_REGNUM; + cfun->machine->callee_saved_last_gpr_regno = SP_REGNUM; ++ cfun->machine->callee_saved_fpr_regs_size = 0; ++ cfun->machine->callee_saved_first_fpr_regno = SP_REGNUM; ++ cfun->machine->callee_saved_last_fpr_regno = SP_REGNUM; ++ + /* Currently, there is no need to check $r28~$r31 + because we will save them in another way. */ + for (r = 0; r < 28; r++) +@@ -213,43 +545,77 @@ nds32_compute_stack_frame (void) + } + } + ++ /* Recording fpu callee-saved register. */ ++ if (TARGET_HARD_FLOAT) ++ { ++ for (r = NDS32_FIRST_FPR_REGNUM; r < NDS32_LAST_FPR_REGNUM; r++) ++ { ++ if (NDS32_REQUIRED_CALLEE_SAVED_P (r)) ++ { ++ /* Mark the first required callee-saved register. */ ++ if (cfun->machine->callee_saved_first_fpr_regno == SP_REGNUM) ++ { ++ /* Make first callee-saved number is even, ++ bacause we use doubleword access, and this way ++ promise 8-byte alignemt. */ ++ if (!NDS32_FPR_REGNO_OK_FOR_DOUBLE (r)) ++ cfun->machine->callee_saved_first_fpr_regno = r - 1; ++ else ++ cfun->machine->callee_saved_first_fpr_regno = r; ++ } ++ cfun->machine->callee_saved_last_fpr_regno = r; ++ } ++ } ++ ++ /* Make last callee-saved register number is odd, ++ we hope callee-saved register is even. */ ++ int last_fpr = cfun->machine->callee_saved_last_fpr_regno; ++ if (NDS32_FPR_REGNO_OK_FOR_DOUBLE (last_fpr)) ++ cfun->machine->callee_saved_last_fpr_regno++; ++ } ++ + /* Check if this function can omit prologue/epilogue code fragment. +- If there is 'naked' attribute in this function, ++ If there is 'no_prologue'/'naked'/'secure' attribute in this function, + we can set 'naked_p' flag to indicate that + we do not have to generate prologue/epilogue. + Or, if all the following conditions succeed, + we can set this function 'naked_p' as well: + condition 1: first_regno == last_regno == SP_REGNUM, +- which means we do not have to save +- any callee-saved registers. ++ which means we do not have to save ++ any callee-saved registers. + condition 2: Both $lp and $fp are NOT live in this function, +- which means we do not need to save them and there +- is no outgoing size. ++ which means we do not need to save them and there ++ is no outgoing size. + condition 3: There is no local_size, which means +- we do not need to adjust $sp. */ +- if (lookup_attribute ("naked", DECL_ATTRIBUTES (current_function_decl)) ++ we do not need to adjust $sp. */ ++ if (lookup_attribute ("no_prologue", DECL_ATTRIBUTES (current_function_decl)) ++ || lookup_attribute ("naked", DECL_ATTRIBUTES (current_function_decl)) ++ || lookup_attribute ("secure", DECL_ATTRIBUTES (current_function_decl)) + || (cfun->machine->callee_saved_first_gpr_regno == SP_REGNUM + && cfun->machine->callee_saved_last_gpr_regno == SP_REGNUM ++ && cfun->machine->callee_saved_first_fpr_regno == SP_REGNUM ++ && cfun->machine->callee_saved_last_fpr_regno == SP_REGNUM + && !df_regs_ever_live_p (FP_REGNUM) + && !df_regs_ever_live_p (LP_REGNUM) +- && cfun->machine->local_size == 0)) ++ && cfun->machine->local_size == 0 ++ && !flag_pic)) + { + /* Set this function 'naked_p' and other functions can check this flag. +- Note that in nds32 port, the 'naked_p = 1' JUST means there is no +- callee-saved, local size, and outgoing size. +- The varargs space and ret instruction may still present in +- the prologue/epilogue expanding. */ ++ Note that in nds32 port, the 'naked_p = 1' JUST means there is no ++ callee-saved, local size, and outgoing size. ++ The varargs space and ret instruction may still present in ++ the prologue/epilogue expanding. */ + cfun->machine->naked_p = 1; + + /* No need to save $fp, $gp, and $lp. +- We should set these value to be zero +- so that nds32_initial_elimination_offset() can work properly. */ ++ We should set these value to be zero ++ so that nds32_initial_elimination_offset() can work properly. */ + cfun->machine->fp_size = 0; + cfun->machine->gp_size = 0; + cfun->machine->lp_size = 0; + + /* If stack usage computation is required, +- we need to provide the static stack size. */ ++ we need to provide the static stack size. */ + if (flag_stack_usage_info) + current_function_static_stack_size = 0; + +@@ -257,6 +623,8 @@ nds32_compute_stack_frame (void) + return; + } + ++ v3pushpop_p = NDS32_V3PUSH_AVAILABLE_P; ++ + /* Adjustment for v3push instructions: + If we are using v3push (push25/pop25) instructions, + we need to make sure Rb is $r6 and Re is +@@ -264,16 +632,14 @@ nds32_compute_stack_frame (void) + Some results above will be discarded and recomputed. + Note that it is only available under V3/V3M ISA and we + DO NOT setup following stuff for isr or variadic function. */ +- if (TARGET_V3PUSH +- && !nds32_isr_function_p (current_function_decl) +- && (cfun->machine->va_args_size == 0)) ++ if (v3pushpop_p) + { + /* Recompute: +- cfun->machine->fp_size +- cfun->machine->gp_size +- cfun->machine->lp_size +- cfun->machine->callee_saved_regs_first_regno +- cfun->machine->callee_saved_regs_last_regno */ ++ cfun->machine->fp_size ++ cfun->machine->gp_size ++ cfun->machine->lp_size ++ cfun->machine->callee_saved_first_gpr_regno ++ cfun->machine->callee_saved_last_gpr_regno */ + + /* For v3push instructions, $fp, $gp, and $lp are always saved. */ + cfun->machine->fp_size = 4; +@@ -316,11 +682,46 @@ nds32_compute_stack_frame (void) + } + } + +- /* We have correctly set callee_saved_regs_first_regno +- and callee_saved_regs_last_regno. +- Initially, the callee_saved_regs_size is supposed to be 0. +- As long as callee_saved_regs_last_regno is not SP_REGNUM, +- we can update callee_saved_regs_size with new size. */ ++ int sp_adjust = cfun->machine->local_size ++ + cfun->machine->out_args_size ++ + cfun->machine->callee_saved_area_gpr_padding_bytes ++ + cfun->machine->callee_saved_fpr_regs_size; ++ ++ if (!v3pushpop_p ++ && nds32_memory_model_option == MEMORY_MODEL_FAST ++ && sp_adjust == 0 ++ && !frame_pointer_needed) ++ { ++ block_size = cfun->machine->fp_size ++ + cfun->machine->gp_size ++ + cfun->machine->lp_size ++ + (4 * (cfun->machine->callee_saved_last_gpr_regno ++ - cfun->machine->callee_saved_first_gpr_regno ++ + 1)); ++ ++ if (!NDS32_DOUBLE_WORD_ALIGN_P (block_size)) ++ { ++ /* $r14 is last callee save register. */ ++ if (cfun->machine->callee_saved_last_gpr_regno ++ < NDS32_LAST_CALLEE_SAVE_GPR_REGNUM) ++ { ++ cfun->machine->callee_saved_last_gpr_regno++; ++ } ++ else if (cfun->machine->callee_saved_first_gpr_regno == SP_REGNUM) ++ { ++ cfun->machine->callee_saved_first_gpr_regno ++ = NDS32_FIRST_CALLEE_SAVE_GPR_REGNUM; ++ cfun->machine->callee_saved_last_gpr_regno ++ = NDS32_FIRST_CALLEE_SAVE_GPR_REGNUM; ++ } ++ } ++ } ++ ++ /* We have correctly set callee_saved_first_gpr_regno ++ and callee_saved_last_gpr_regno. ++ Initially, the callee_saved_gpr_regs_size is supposed to be 0. ++ As long as callee_saved_last_gpr_regno is not SP_REGNUM, ++ we can update callee_saved_gpr_regs_size with new size. */ + if (cfun->machine->callee_saved_last_gpr_regno != SP_REGNUM) + { + /* Compute pushed size of callee-saved registers. */ +@@ -330,10 +731,22 @@ nds32_compute_stack_frame (void) + + 1); + } + ++ if (TARGET_HARD_FLOAT) ++ { ++ /* Compute size of callee svaed floating-point registers. */ ++ if (cfun->machine->callee_saved_last_fpr_regno != SP_REGNUM) ++ { ++ cfun->machine->callee_saved_fpr_regs_size ++ = 4 * (cfun->machine->callee_saved_last_fpr_regno ++ - cfun->machine->callee_saved_first_fpr_regno ++ + 1); ++ } ++ } ++ + /* Important: We need to make sure that +- (fp_size + gp_size + lp_size + callee_saved_regs_size) +- is 8-byte alignment. +- If it is not, calculate the padding bytes. */ ++ (fp_size + gp_size + lp_size + callee_saved_gpr_regs_size) ++ is 8-byte alignment. ++ If it is not, calculate the padding bytes. */ + block_size = cfun->machine->fp_size + + cfun->machine->gp_size + + cfun->machine->lp_size +@@ -361,14 +774,15 @@ nds32_compute_stack_frame (void) + "push registers to memory", + "adjust stack pointer". */ + static void +-nds32_emit_stack_push_multiple (rtx Rb, rtx Re, rtx En4, bool vaarg_p) ++nds32_emit_stack_push_multiple (unsigned Rb, unsigned Re, ++ bool save_fp_p, bool save_gp_p, bool save_lp_p, ++ bool vaarg_p) + { +- int regno; ++ unsigned regno; + int extra_count; + int num_use_regs; + int par_index; + int offset; +- int save_fp, save_gp, save_lp; + + rtx reg; + rtx mem; +@@ -381,39 +795,34 @@ nds32_emit_stack_push_multiple (rtx Rb, rtx Re, rtx En4, bool vaarg_p) + necessary information for data analysis, + so we create a parallel rtx like this: + (parallel [(set (mem (plus (reg:SI SP_REGNUM) (const_int -32))) +- (reg:SI Rb)) +- (set (mem (plus (reg:SI SP_REGNUM) (const_int -28))) +- (reg:SI Rb+1)) +- ... +- (set (mem (plus (reg:SI SP_REGNUM) (const_int -16))) +- (reg:SI Re)) +- (set (mem (plus (reg:SI SP_REGNUM) (const_int -12))) +- (reg:SI FP_REGNUM)) +- (set (mem (plus (reg:SI SP_REGNUM) (const_int -8))) +- (reg:SI GP_REGNUM)) +- (set (mem (plus (reg:SI SP_REGNUM) (const_int -4))) +- (reg:SI LP_REGNUM)) +- (set (reg:SI SP_REGNUM) +- (plus (reg:SI SP_REGNUM) (const_int -32)))]) */ +- +- /* Determine whether we need to save $fp, $gp, or $lp. */ +- save_fp = INTVAL (En4) & 0x8; +- save_gp = INTVAL (En4) & 0x4; +- save_lp = INTVAL (En4) & 0x2; ++ (reg:SI Rb)) ++ (set (mem (plus (reg:SI SP_REGNUM) (const_int -28))) ++ (reg:SI Rb+1)) ++ ... ++ (set (mem (plus (reg:SI SP_REGNUM) (const_int -16))) ++ (reg:SI Re)) ++ (set (mem (plus (reg:SI SP_REGNUM) (const_int -12))) ++ (reg:SI FP_REGNUM)) ++ (set (mem (plus (reg:SI SP_REGNUM) (const_int -8))) ++ (reg:SI GP_REGNUM)) ++ (set (mem (plus (reg:SI SP_REGNUM) (const_int -4))) ++ (reg:SI LP_REGNUM)) ++ (set (reg:SI SP_REGNUM) ++ (plus (reg:SI SP_REGNUM) (const_int -32)))]) */ + + /* Calculate the number of registers that will be pushed. */ + extra_count = 0; +- if (save_fp) ++ if (save_fp_p) + extra_count++; +- if (save_gp) ++ if (save_gp_p) + extra_count++; +- if (save_lp) ++ if (save_lp_p) + extra_count++; + /* Note that Rb and Re may be SP_REGNUM. DO NOT count it in. */ +- if (REGNO (Rb) == SP_REGNUM && REGNO (Re) == SP_REGNUM) ++ if (Rb == SP_REGNUM && Re == SP_REGNUM) + num_use_regs = extra_count; + else +- num_use_regs = REGNO (Re) - REGNO (Rb) + 1 + extra_count; ++ num_use_regs = Re - Rb + 1 + extra_count; + + /* In addition to used registers, + we need one more space for (set sp sp-x) rtx. */ +@@ -425,10 +834,10 @@ nds32_emit_stack_push_multiple (rtx Rb, rtx Re, rtx En4, bool vaarg_p) + offset = -(num_use_regs * 4); + + /* Create (set mem regX) from Rb, Rb+1 up to Re. */ +- for (regno = REGNO (Rb); regno <= (int) REGNO (Re); regno++) ++ for (regno = Rb; regno <= Re; regno++) + { + /* Rb and Re may be SP_REGNUM. +- We need to break this loop immediately. */ ++ We need to break this loop immediately. */ + if (regno == SP_REGNUM) + break; + +@@ -444,7 +853,7 @@ nds32_emit_stack_push_multiple (rtx Rb, rtx Re, rtx En4, bool vaarg_p) + } + + /* Create (set mem fp), (set mem gp), and (set mem lp) if necessary. */ +- if (save_fp) ++ if (save_fp_p) + { + reg = gen_rtx_REG (SImode, FP_REGNUM); + mem = gen_frame_mem (SImode, plus_constant (Pmode, +@@ -456,7 +865,7 @@ nds32_emit_stack_push_multiple (rtx Rb, rtx Re, rtx En4, bool vaarg_p) + offset = offset + 4; + par_index++; + } +- if (save_gp) ++ if (save_gp_p) + { + reg = gen_rtx_REG (SImode, GP_REGNUM); + mem = gen_frame_mem (SImode, plus_constant (Pmode, +@@ -468,7 +877,7 @@ nds32_emit_stack_push_multiple (rtx Rb, rtx Re, rtx En4, bool vaarg_p) + offset = offset + 4; + par_index++; + } +- if (save_lp) ++ if (save_lp_p) + { + reg = gen_rtx_REG (SImode, LP_REGNUM); + mem = gen_frame_mem (SImode, plus_constant (Pmode, +@@ -514,14 +923,14 @@ nds32_emit_stack_push_multiple (rtx Rb, rtx Re, rtx En4, bool vaarg_p) + "pop registers from memory", + "adjust stack pointer". */ + static void +-nds32_emit_stack_pop_multiple (rtx Rb, rtx Re, rtx En4) ++nds32_emit_stack_pop_multiple (unsigned Rb, unsigned Re, ++ bool save_fp_p, bool save_gp_p, bool save_lp_p) + { +- int regno; ++ unsigned regno; + int extra_count; + int num_use_regs; + int par_index; + int offset; +- int save_fp, save_gp, save_lp; + + rtx reg; + rtx mem; +@@ -534,39 +943,34 @@ nds32_emit_stack_pop_multiple (rtx Rb, rtx Re, rtx En4) + necessary information for data analysis, + so we create a parallel rtx like this: + (parallel [(set (reg:SI Rb) +- (mem (reg:SI SP_REGNUM))) +- (set (reg:SI Rb+1) +- (mem (plus (reg:SI SP_REGNUM) (const_int 4)))) +- ... +- (set (reg:SI Re) +- (mem (plus (reg:SI SP_REGNUM) (const_int 16)))) +- (set (reg:SI FP_REGNUM) +- (mem (plus (reg:SI SP_REGNUM) (const_int 20)))) +- (set (reg:SI GP_REGNUM) +- (mem (plus (reg:SI SP_REGNUM) (const_int 24)))) +- (set (reg:SI LP_REGNUM) +- (mem (plus (reg:SI SP_REGNUM) (const_int 28)))) +- (set (reg:SI SP_REGNUM) +- (plus (reg:SI SP_REGNUM) (const_int 32)))]) */ +- +- /* Determine whether we need to restore $fp, $gp, or $lp. */ +- save_fp = INTVAL (En4) & 0x8; +- save_gp = INTVAL (En4) & 0x4; +- save_lp = INTVAL (En4) & 0x2; ++ (mem (reg:SI SP_REGNUM))) ++ (set (reg:SI Rb+1) ++ (mem (plus (reg:SI SP_REGNUM) (const_int 4)))) ++ ... ++ (set (reg:SI Re) ++ (mem (plus (reg:SI SP_REGNUM) (const_int 16)))) ++ (set (reg:SI FP_REGNUM) ++ (mem (plus (reg:SI SP_REGNUM) (const_int 20)))) ++ (set (reg:SI GP_REGNUM) ++ (mem (plus (reg:SI SP_REGNUM) (const_int 24)))) ++ (set (reg:SI LP_REGNUM) ++ (mem (plus (reg:SI SP_REGNUM) (const_int 28)))) ++ (set (reg:SI SP_REGNUM) ++ (plus (reg:SI SP_REGNUM) (const_int 32)))]) */ + + /* Calculate the number of registers that will be poped. */ + extra_count = 0; +- if (save_fp) ++ if (save_fp_p) + extra_count++; +- if (save_gp) ++ if (save_gp_p) + extra_count++; +- if (save_lp) ++ if (save_lp_p) + extra_count++; + /* Note that Rb and Re may be SP_REGNUM. DO NOT count it in. */ +- if (REGNO (Rb) == SP_REGNUM && REGNO (Re) == SP_REGNUM) ++ if (Rb == SP_REGNUM && Re == SP_REGNUM) + num_use_regs = extra_count; + else +- num_use_regs = REGNO (Re) - REGNO (Rb) + 1 + extra_count; ++ num_use_regs = Re - Rb + 1 + extra_count; + + /* In addition to used registers, + we need one more space for (set sp sp+x) rtx. */ +@@ -578,10 +982,10 @@ nds32_emit_stack_pop_multiple (rtx Rb, rtx Re, rtx En4) + offset = 0; + + /* Create (set regX mem) from Rb, Rb+1 up to Re. */ +- for (regno = REGNO (Rb); regno <= (int) REGNO (Re); regno++) ++ for (regno = Rb; regno <= Re; regno++) + { + /* Rb and Re may be SP_REGNUM. +- We need to break this loop immediately. */ ++ We need to break this loop immediately. */ + if (regno == SP_REGNUM) + break; + +@@ -599,7 +1003,7 @@ nds32_emit_stack_pop_multiple (rtx Rb, rtx Re, rtx En4) + } + + /* Create (set fp mem), (set gp mem), and (set lp mem) if necessary. */ +- if (save_fp) ++ if (save_fp_p) + { + reg = gen_rtx_REG (SImode, FP_REGNUM); + mem = gen_frame_mem (SImode, plus_constant (Pmode, +@@ -613,7 +1017,7 @@ nds32_emit_stack_pop_multiple (rtx Rb, rtx Re, rtx En4) + + dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf); + } +- if (save_gp) ++ if (save_gp_p) + { + reg = gen_rtx_REG (SImode, GP_REGNUM); + mem = gen_frame_mem (SImode, plus_constant (Pmode, +@@ -627,7 +1031,7 @@ nds32_emit_stack_pop_multiple (rtx Rb, rtx Re, rtx En4) + + dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf); + } +- if (save_lp) ++ if (save_lp_p) + { + reg = gen_rtx_REG (SImode, LP_REGNUM); + mem = gen_frame_mem (SImode, plus_constant (Pmode, +@@ -670,12 +1074,11 @@ nds32_emit_stack_pop_multiple (rtx Rb, rtx Re, rtx En4) + "push registers to memory", + "adjust stack pointer". */ + static void +-nds32_emit_stack_v3push (rtx Rb, +- rtx Re, +- rtx En4 ATTRIBUTE_UNUSED, +- rtx imm8u) ++nds32_emit_stack_v3push (unsigned Rb, ++ unsigned Re, ++ unsigned imm8u) + { +- int regno; ++ unsigned regno; + int num_use_regs; + int par_index; + int offset; +@@ -690,27 +1093,27 @@ nds32_emit_stack_v3push (rtx Rb, + necessary information for data analysis, + so we create a parallel rtx like this: + (parallel [(set (mem (plus (reg:SI SP_REGNUM) (const_int -32))) +- (reg:SI Rb)) +- (set (mem (plus (reg:SI SP_REGNUM) (const_int -28))) +- (reg:SI Rb+1)) +- ... +- (set (mem (plus (reg:SI SP_REGNUM) (const_int -16))) +- (reg:SI Re)) +- (set (mem (plus (reg:SI SP_REGNUM) (const_int -12))) +- (reg:SI FP_REGNUM)) +- (set (mem (plus (reg:SI SP_REGNUM) (const_int -8))) +- (reg:SI GP_REGNUM)) +- (set (mem (plus (reg:SI SP_REGNUM) (const_int -4))) +- (reg:SI LP_REGNUM)) +- (set (reg:SI SP_REGNUM) +- (plus (reg:SI SP_REGNUM) (const_int -32-imm8u)))]) */ ++ (reg:SI Rb)) ++ (set (mem (plus (reg:SI SP_REGNUM) (const_int -28))) ++ (reg:SI Rb+1)) ++ ... ++ (set (mem (plus (reg:SI SP_REGNUM) (const_int -16))) ++ (reg:SI Re)) ++ (set (mem (plus (reg:SI SP_REGNUM) (const_int -12))) ++ (reg:SI FP_REGNUM)) ++ (set (mem (plus (reg:SI SP_REGNUM) (const_int -8))) ++ (reg:SI GP_REGNUM)) ++ (set (mem (plus (reg:SI SP_REGNUM) (const_int -4))) ++ (reg:SI LP_REGNUM)) ++ (set (reg:SI SP_REGNUM) ++ (plus (reg:SI SP_REGNUM) (const_int -32-imm8u)))]) */ + + /* Calculate the number of registers that will be pushed. + Since $fp, $gp, and $lp is always pushed with v3push instruction, + we need to count these three registers. + Under v3push, Rb is $r6, while Re is $r6, $r8, $r10, or $r14. + So there is no need to worry about Rb=Re=SP_REGNUM case. */ +- num_use_regs = REGNO (Re) - REGNO (Rb) + 1 + 3; ++ num_use_regs = Re - Rb + 1 + 3; + + /* In addition to used registers, + we need one more space for (set sp sp-x-imm8u) rtx. */ +@@ -724,7 +1127,7 @@ nds32_emit_stack_v3push (rtx Rb, + /* Create (set mem regX) from Rb, Rb+1 up to Re. + Under v3push, Rb is $r6, while Re is $r6, $r8, $r10, or $r14. + So there is no need to worry about Rb=Re=SP_REGNUM case. */ +- for (regno = REGNO (Rb); regno <= (int) REGNO (Re); regno++) ++ for (regno = Rb; regno <= Re; regno++) + { + reg = gen_rtx_REG (SImode, regno); + mem = gen_frame_mem (SImode, plus_constant (Pmode, +@@ -776,7 +1179,7 @@ nds32_emit_stack_v3push (rtx Rb, + = gen_rtx_SET (stack_pointer_rtx, + plus_constant (Pmode, + stack_pointer_rtx, +- offset - INTVAL (imm8u))); ++ offset - imm8u)); + XVECEXP (parallel_insn, 0, par_index) = adjust_sp_rtx; + RTX_FRAME_RELATED_P (adjust_sp_rtx) = 1; + +@@ -794,12 +1197,11 @@ nds32_emit_stack_v3push (rtx Rb, + "pop registers from memory", + "adjust stack pointer". */ + static void +-nds32_emit_stack_v3pop (rtx Rb, +- rtx Re, +- rtx En4 ATTRIBUTE_UNUSED, +- rtx imm8u) ++nds32_emit_stack_v3pop (unsigned Rb, ++ unsigned Re, ++ unsigned imm8u) + { +- int regno; ++ unsigned regno; + int num_use_regs; + int par_index; + int offset; +@@ -815,27 +1217,27 @@ nds32_emit_stack_v3pop (rtx Rb, + necessary information for data analysis, + so we create a parallel rtx like this: + (parallel [(set (reg:SI Rb) +- (mem (reg:SI SP_REGNUM))) +- (set (reg:SI Rb+1) +- (mem (plus (reg:SI SP_REGNUM) (const_int 4)))) +- ... +- (set (reg:SI Re) +- (mem (plus (reg:SI SP_REGNUM) (const_int 16)))) +- (set (reg:SI FP_REGNUM) +- (mem (plus (reg:SI SP_REGNUM) (const_int 20)))) +- (set (reg:SI GP_REGNUM) +- (mem (plus (reg:SI SP_REGNUM) (const_int 24)))) +- (set (reg:SI LP_REGNUM) +- (mem (plus (reg:SI SP_REGNUM) (const_int 28)))) +- (set (reg:SI SP_REGNUM) +- (plus (reg:SI SP_REGNUM) (const_int 32+imm8u)))]) */ ++ (mem (reg:SI SP_REGNUM))) ++ (set (reg:SI Rb+1) ++ (mem (plus (reg:SI SP_REGNUM) (const_int 4)))) ++ ... ++ (set (reg:SI Re) ++ (mem (plus (reg:SI SP_REGNUM) (const_int 16)))) ++ (set (reg:SI FP_REGNUM) ++ (mem (plus (reg:SI SP_REGNUM) (const_int 20)))) ++ (set (reg:SI GP_REGNUM) ++ (mem (plus (reg:SI SP_REGNUM) (const_int 24)))) ++ (set (reg:SI LP_REGNUM) ++ (mem (plus (reg:SI SP_REGNUM) (const_int 28)))) ++ (set (reg:SI SP_REGNUM) ++ (plus (reg:SI SP_REGNUM) (const_int 32+imm8u)))]) */ + + /* Calculate the number of registers that will be poped. + Since $fp, $gp, and $lp is always poped with v3pop instruction, + we need to count these three registers. + Under v3push, Rb is $r6, while Re is $r6, $r8, $r10, or $r14. + So there is no need to worry about Rb=Re=SP_REGNUM case. */ +- num_use_regs = REGNO (Re) - REGNO (Rb) + 1 + 3; ++ num_use_regs = Re - Rb + 1 + 3; + + /* In addition to used registers, + we need one more space for (set sp sp+x+imm8u) rtx. */ +@@ -849,7 +1251,7 @@ nds32_emit_stack_v3pop (rtx Rb, + /* Create (set regX mem) from Rb, Rb+1 up to Re. + Under v3pop, Rb is $r6, while Re is $r6, $r8, $r10, or $r14. + So there is no need to worry about Rb=Re=SP_REGNUM case. */ +- for (regno = REGNO (Rb); regno <= (int) REGNO (Re); regno++) ++ for (regno = Rb; regno <= Re; regno++) + { + reg = gen_rtx_REG (SImode, regno); + mem = gen_frame_mem (SImode, plus_constant (Pmode, +@@ -907,11 +1309,24 @@ nds32_emit_stack_v3pop (rtx Rb, + = gen_rtx_SET (stack_pointer_rtx, + plus_constant (Pmode, + stack_pointer_rtx, +- offset + INTVAL (imm8u))); ++ offset + imm8u)); + XVECEXP (parallel_insn, 0, par_index) = adjust_sp_rtx; + +- /* Tell gcc we adjust SP in this insn. */ +- dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, copy_rtx (adjust_sp_rtx), dwarf); ++ if (frame_pointer_needed) ++ { ++ /* (expr_list:REG_CFA_DEF_CFA (plus:SI (reg/f:SI $sp) ++ (const_int 0)) ++ mean reset frame pointer to $sp and reset to offset 0. */ ++ rtx cfa_adjust_rtx = gen_rtx_PLUS (Pmode, stack_pointer_rtx, ++ const0_rtx); ++ dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf); ++ } ++ else ++ { ++ /* Tell gcc we adjust SP in this insn. */ ++ dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, ++ copy_rtx (adjust_sp_rtx), dwarf); ++ } + + parallel_insn = emit_insn (parallel_insn); + +@@ -924,6 +1339,32 @@ nds32_emit_stack_v3pop (rtx Rb, + REG_NOTES (parallel_insn) = dwarf; + } + ++static void ++nds32_emit_load_gp (void) ++{ ++ rtx got_symbol, pat; ++ ++ /* Initial GLOBAL OFFSET TABLE don't do the scheduling. */ ++ emit_insn (gen_blockage ()); ++ ++ got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); ++ /* sethi $gp, _GLOBAL_OFFSET_TABLE_ -8 */ ++ pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, got_symbol), UNSPEC_GOTINIT); ++ pat = gen_rtx_CONST (SImode, gen_rtx_PLUS (Pmode, pat, GEN_INT (-8))); ++ emit_insn (gen_sethi (pic_offset_table_rtx,pat)); ++ ++ /* ori $gp, $gp, _GLOBAL_OFFSET_TABLE_ -4 */ ++ pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, got_symbol), UNSPEC_GOTINIT); ++ pat = gen_rtx_CONST (SImode, gen_rtx_PLUS (Pmode, pat, GEN_INT (-4))); ++ emit_insn (gen_lo_sum (pic_offset_table_rtx, pic_offset_table_rtx, pat)); ++ ++ /* add5.pc $gp */ ++ emit_insn (gen_add_pc (pic_offset_table_rtx, pic_offset_table_rtx)); ++ ++ /* Initial GLOBAL OFFSET TABLE don't do the scheduling. */ ++ emit_insn (gen_blockage ()); ++} ++ + /* Function that may creates more instructions + for large value on adjusting stack pointer. + +@@ -933,79 +1374,70 @@ nds32_emit_stack_v3pop (rtx Rb, + the adjustment value is not able to be fit in the 'addi' instruction. + One solution is to move value into a register + and then use 'add' instruction. +- In practice, we use TA_REGNUM ($r15) to accomplish this purpose. +- Also, we need to return zero for sp adjustment so that +- proglogue/epilogue knows there is no need to create 'addi' instruction. */ +-static int +-nds32_force_addi_stack_int (int full_value) ++ In practice, we use TA_REGNUM ($r15) to accomplish this purpose. */ ++static void ++nds32_emit_adjust_frame (rtx to_reg, rtx from_reg, int adjust_value) + { +- int adjust_value; +- + rtx tmp_reg; +- rtx sp_adjust_insn; ++ rtx frame_adjust_insn; ++ rtx adjust_value_rtx = GEN_INT (adjust_value); + +- if (!satisfies_constraint_Is15 (GEN_INT (full_value))) ++ if (adjust_value == 0) ++ return; ++ ++ if (!satisfies_constraint_Is15 (adjust_value_rtx)) + { + /* The value is not able to fit in single addi instruction. +- Create more instructions of moving value into a register +- and then add stack pointer with it. */ ++ Create more instructions of moving value into a register ++ and then add stack pointer with it. */ + + /* $r15 is going to be temporary register to hold the value. */ + tmp_reg = gen_rtx_REG (SImode, TA_REGNUM); + + /* Create one more instruction to move value +- into the temporary register. */ +- emit_move_insn (tmp_reg, GEN_INT (full_value)); ++ into the temporary register. */ ++ emit_move_insn (tmp_reg, adjust_value_rtx); + + /* Create new 'add' rtx. */ +- sp_adjust_insn = gen_addsi3 (stack_pointer_rtx, +- stack_pointer_rtx, +- tmp_reg); ++ frame_adjust_insn = gen_addsi3 (to_reg, ++ from_reg, ++ tmp_reg); + /* Emit rtx into insn list and receive its transformed insn rtx. */ +- sp_adjust_insn = emit_insn (sp_adjust_insn); +- +- /* At prologue, we need to tell GCC that this is frame related insn, +- so that we can consider this instruction to output debug information. +- If full_value is NEGATIVE, it means this function +- is invoked by expand_prologue. */ +- if (full_value < 0) +- { +- /* Because (tmp_reg <- full_value) may be split into two +- rtl patterns, we can not set its RTX_FRAME_RELATED_P. +- We need to construct another (sp <- sp + full_value) +- and then insert it into sp_adjust_insn's reg note to +- represent a frame related expression. +- GCC knows how to refer it and output debug information. */ +- +- rtx plus_rtx; +- rtx set_rtx; ++ frame_adjust_insn = emit_insn (frame_adjust_insn); + +- plus_rtx = plus_constant (Pmode, stack_pointer_rtx, full_value); +- set_rtx = gen_rtx_SET (stack_pointer_rtx, plus_rtx); +- add_reg_note (sp_adjust_insn, REG_FRAME_RELATED_EXPR, set_rtx); ++ /* Because (tmp_reg <- full_value) may be split into two ++ rtl patterns, we can not set its RTX_FRAME_RELATED_P. ++ We need to construct another (sp <- sp + full_value) ++ and then insert it into sp_adjust_insn's reg note to ++ represent a frame related expression. ++ GCC knows how to refer it and output debug information. */ + +- RTX_FRAME_RELATED_P (sp_adjust_insn) = 1; +- } ++ rtx plus_rtx; ++ rtx set_rtx; + +- /* We have used alternative way to adjust stack pointer value. +- Return zero so that prologue/epilogue +- will not generate other instructions. */ +- return 0; ++ plus_rtx = plus_constant (Pmode, from_reg, adjust_value); ++ set_rtx = gen_rtx_SET (to_reg, plus_rtx); ++ add_reg_note (frame_adjust_insn, REG_FRAME_RELATED_EXPR, set_rtx); + } + else + { +- /* The value is able to fit in addi instruction. +- However, remember to make it to be positive value +- because we want to return 'adjustment' result. */ +- adjust_value = (full_value < 0) ? (-full_value) : (full_value); +- +- return adjust_value; ++ /* Generate sp adjustment instruction if and only if sp_adjust != 0. */ ++ frame_adjust_insn = gen_addsi3 (to_reg, ++ from_reg, ++ adjust_value_rtx); ++ /* Emit rtx into instructions list and receive INSN rtx form. */ ++ frame_adjust_insn = emit_insn (frame_adjust_insn); + } ++ ++ /* The insn rtx 'sp_adjust_insn' will change frame layout. ++ We need to use RTX_FRAME_RELATED_P so that GCC is able to ++ generate CFI (Call Frame Information) stuff. */ ++ RTX_FRAME_RELATED_P (frame_adjust_insn) = 1; + } + + /* Return true if MODE/TYPE need double word alignment. */ + static bool +-nds32_needs_double_word_align (machine_mode mode, const_tree type) ++nds32_needs_double_word_align (enum machine_mode mode, const_tree type) + { + unsigned int align; + +@@ -1015,18 +1447,25 @@ nds32_needs_double_word_align (machine_mode mode, const_tree type) + return (align > PARM_BOUNDARY); + } + +-/* Return true if FUNC is a naked function. */ +-static bool ++bool + nds32_naked_function_p (tree func) + { +- tree t; ++ /* FOR BACKWARD COMPATIBILITY, ++ we need to support 'no_prologue' attribute as well. */ ++ tree t_naked; ++ tree t_no_prologue; + + if (TREE_CODE (func) != FUNCTION_DECL) + abort (); + +- t = lookup_attribute ("naked", DECL_ATTRIBUTES (func)); ++ /* We have to use lookup_attribute() to check attributes. ++ Because attr_naked_p and attr_no_prologue_p are set in ++ nds32_compute_stack_frame() and the function has not been ++ invoked yet. */ ++ t_naked = lookup_attribute ("naked", DECL_ATTRIBUTES (func)); ++ t_no_prologue = lookup_attribute ("no_prologue", DECL_ATTRIBUTES (func)); + +- return (t != NULL_TREE); ++ return ((t_naked != NULL_TREE) || (t_no_prologue != NULL_TREE)); + } + + /* Function that check if 'X' is a valid address register. +@@ -1035,7 +1474,7 @@ nds32_naked_function_p (tree func) + + STRICT : true + => We are in reload pass or after reload pass. +- The register number should be strictly limited in general registers. ++ The register number should be strictly limited in general registers. + + STRICT : false + => Before reload pass, we are free to use any register number. */ +@@ -1058,10 +1497,10 @@ nds32_address_register_rtx_p (rtx x, bool strict) + /* Function that check if 'INDEX' is valid to be a index rtx for address. + + OUTER_MODE : Machine mode of outer address rtx. +- INDEX : Check if this rtx is valid to be a index for address. ++ INDEX : Check if this rtx is valid to be a index for address. + STRICT : If it is true, we are in reload pass or after reload pass. */ + static bool +-nds32_legitimate_index_p (machine_mode outer_mode, ++nds32_legitimate_index_p (enum machine_mode outer_mode, + rtx index, + bool strict) + { +@@ -1074,7 +1513,7 @@ nds32_legitimate_index_p (machine_mode outer_mode, + case REG: + regno = REGNO (index); + /* If we are in reload pass or after reload pass, +- we need to limit it to general register. */ ++ we need to limit it to general register. */ + if (strict) + return REGNO_OK_FOR_INDEX_P (regno); + else +@@ -1082,45 +1521,73 @@ nds32_legitimate_index_p (machine_mode outer_mode, + + case CONST_INT: + /* The alignment of the integer value is determined by 'outer_mode'. */ +- if (GET_MODE_SIZE (outer_mode) == 1) ++ switch (GET_MODE_SIZE (outer_mode)) + { ++ case 1: + /* Further check if the value is legal for the 'outer_mode'. */ +- if (!satisfies_constraint_Is15 (index)) +- return false; ++ if (satisfies_constraint_Is15 (index)) ++ return true; ++ break; + +- /* Pass all test, the value is valid, return true. */ +- return true; +- } +- if (GET_MODE_SIZE (outer_mode) == 2 +- && NDS32_HALF_WORD_ALIGN_P (INTVAL (index))) +- { ++ case 2: + /* Further check if the value is legal for the 'outer_mode'. */ +- if (!satisfies_constraint_Is16 (index)) +- return false; ++ if (satisfies_constraint_Is16 (index)) ++ { ++ /* If it is not under strictly aligned situation, ++ we can return true without checking alignment. */ ++ if (!cfun->machine->strict_aligned_p) ++ return true; ++ /* Make sure address is half word alignment. */ ++ else if (NDS32_HALF_WORD_ALIGN_P (INTVAL (index))) ++ return true; ++ } ++ break; + +- /* Pass all test, the value is valid, return true. */ +- return true; +- } +- if (GET_MODE_SIZE (outer_mode) == 4 +- && NDS32_SINGLE_WORD_ALIGN_P (INTVAL (index))) +- { ++ case 4: + /* Further check if the value is legal for the 'outer_mode'. */ +- if (!satisfies_constraint_Is17 (index)) +- return false; ++ if (satisfies_constraint_Is17 (index)) ++ { ++ if ((TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE)) ++ { ++ if (!satisfies_constraint_Is14 (index)) ++ return false; ++ } ++ ++ /* If it is not under strictly aligned situation, ++ we can return true without checking alignment. */ ++ if (!cfun->machine->strict_aligned_p) ++ return true; ++ /* Make sure address is word alignment. */ ++ else if (NDS32_SINGLE_WORD_ALIGN_P (INTVAL (index))) ++ return true; ++ } ++ break; + +- /* Pass all test, the value is valid, return true. */ +- return true; +- } +- if (GET_MODE_SIZE (outer_mode) == 8 +- && NDS32_SINGLE_WORD_ALIGN_P (INTVAL (index))) +- { +- /* Further check if the value is legal for the 'outer_mode'. */ +- if (!satisfies_constraint_Is17 (gen_int_mode (INTVAL (index) + 4, +- SImode))) +- return false; ++ case 8: ++ if (satisfies_constraint_Is17 (gen_int_mode (INTVAL (index) + 4, ++ SImode))) ++ { ++ if ((TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE)) ++ { ++ if (!satisfies_constraint_Is14 (index)) ++ return false; ++ } ++ ++ /* If it is not under strictly aligned situation, ++ we can return true without checking alignment. */ ++ if (!cfun->machine->strict_aligned_p) ++ return true; ++ /* Make sure address is word alignment. ++ Currently we do not have 64-bit load/store yet, ++ so we will use two 32-bit load/store instructions to do ++ memory access and they are single word alignment. */ ++ else if (NDS32_SINGLE_WORD_ALIGN_P (INTVAL (index))) ++ return true; ++ } ++ break; + +- /* Pass all test, the value is valid, return true. */ +- return true; ++ default: ++ return false; + } + + return false; +@@ -1134,9 +1601,10 @@ nds32_legitimate_index_p (machine_mode outer_mode, + int multiplier; + multiplier = INTVAL (op1); + +- /* We only allow (mult reg const_int_1) +- or (mult reg const_int_2) or (mult reg const_int_4). */ +- if (multiplier != 1 && multiplier != 2 && multiplier != 4) ++ /* We only allow (mult reg const_int_1), (mult reg const_int_2), ++ (mult reg const_int_4) or (mult reg const_int_8). */ ++ if (multiplier != 1 && multiplier != 2 ++ && multiplier != 4 && multiplier != 8) + return false; + + regno = REGNO (op0); +@@ -1161,8 +1629,9 @@ nds32_legitimate_index_p (machine_mode outer_mode, + sv = INTVAL (op1); + + /* We only allow (ashift reg const_int_0) +- or (ashift reg const_int_1) or (ashift reg const_int_2). */ +- if (sv != 0 && sv != 1 && sv !=2) ++ or (ashift reg const_int_1) or (ashift reg const_int_2) or ++ (ashift reg const_int_3). */ ++ if (sv != 0 && sv != 1 && sv !=2 && sv != 3) + return false; + + regno = REGNO (op0); +@@ -1181,18 +1650,302 @@ nds32_legitimate_index_p (machine_mode outer_mode, + } + } + ++static void ++nds32_insert_innermost_loop (void) ++{ ++ struct loop *loop; ++ basic_block *bbs, bb; ++ ++ compute_bb_for_insn (); ++ /* initial loop structure */ ++ loop_optimizer_init (AVOID_CFG_MODIFICATIONS); ++ ++ /* Scan all inner most loops. */ ++ FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST) ++ { ++ bbs = get_loop_body (loop); ++ bb = *bbs; ++ free (bbs); ++ ++ emit_insn_before (gen_innermost_loop_begin (), ++ BB_HEAD (bb)); ++ ++ /* Find the final basic block in the loop. */ ++ while (bb) ++ { ++ if (bb->next_bb == NULL) ++ break; ++ ++ if (bb->next_bb->loop_father != loop) ++ break; ++ ++ bb = bb->next_bb; ++ } ++ ++ emit_insn_before (gen_innermost_loop_end (), ++ BB_END (bb)); ++ } ++ ++ /* release loop structre */ ++ loop_optimizer_finalize (); ++} ++ ++/* Insert isps for function with signature attribute. */ ++static void ++nds32_insert_isps (void) ++{ ++ rtx_insn *insn; ++ unsigned first = 0; ++ ++ if (!lookup_attribute ("signature", DECL_ATTRIBUTES (current_function_decl))) ++ return; ++ ++ insn = get_insns (); ++ while (insn) ++ { ++ /* In order to ensure protect whole function, emit the first ++ isps here rather than in prologue.*/ ++ if (!first && INSN_P (insn)) ++ { ++ emit_insn_before (gen_unspec_signature_begin (), insn); ++ first = 1; ++ } ++ ++ if (LABEL_P (insn) || CALL_P (insn) || any_condjump_p (insn) ++ || (INSN_P (insn) && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE ++ && (XINT (PATTERN (insn), 1) == UNSPEC_VOLATILE_SYSCALL ++ || XINT (PATTERN (insn), 1) == UNSPEC_VOLATILE_TRAP ++ || XINT (PATTERN (insn), 1) == UNSPEC_VOLATILE_TEQZ ++ || XINT (PATTERN (insn), 1) == UNSPEC_VOLATILE_TNEZ))) ++ { ++ emit_insn_after (gen_unspec_signature_begin (), insn); ++ } ++ insn = NEXT_INSN (insn); ++ } ++} ++ ++static void ++nds32_register_pass ( ++ rtl_opt_pass *(*make_pass_func) (gcc::context *), ++ enum pass_positioning_ops pass_pos, ++ const char *ref_pass_name) ++{ ++ opt_pass *new_opt_pass = make_pass_func (g); ++ ++ struct register_pass_info insert_pass = ++ { ++ new_opt_pass, /* pass */ ++ ref_pass_name, /* reference_pass_name */ ++ 1, /* ref_pass_instance_number */ ++ pass_pos /* po_op */ ++ }; ++ ++ register_pass (&insert_pass); ++} ++ ++static void ++nds32_register_pass ( ++ gimple_opt_pass *(*make_pass_func) (gcc::context *), ++ enum pass_positioning_ops pass_pos, ++ const char *ref_pass_name) ++{ ++ opt_pass *new_opt_pass = make_pass_func (g); ++ ++ struct register_pass_info insert_pass = ++ { ++ new_opt_pass, /* pass */ ++ ref_pass_name, /* reference_pass_name */ ++ 1, /* ref_pass_instance_number */ ++ pass_pos /* po_op */ ++ }; ++ ++ register_pass (&insert_pass); ++} ++ ++/* This function is called from nds32_option_override (). ++ All new passes should be registered here. */ ++static void ++nds32_register_passes (void) ++{ ++ nds32_register_pass ( ++ make_pass_nds32_fp_as_gp, ++ PASS_POS_INSERT_BEFORE, ++ "ira"); ++ ++ nds32_register_pass ( ++ make_pass_nds32_relax_opt, ++ PASS_POS_INSERT_AFTER, ++ "mach"); ++ ++ nds32_register_pass ( ++ make_pass_nds32_load_store_opt, ++ PASS_POS_INSERT_AFTER, ++ "mach"); ++ ++ nds32_register_pass ( ++ make_pass_nds32_soft_fp_arith_comm_opt, ++ PASS_POS_INSERT_BEFORE, ++ "mach"); ++ ++ nds32_register_pass ( ++ make_pass_nds32_regrename_opt, ++ PASS_POS_INSERT_AFTER, ++ "mach"); ++ ++ nds32_register_pass ( ++ make_pass_nds32_gcse_opt, ++ PASS_POS_INSERT_BEFORE, ++ "cprop_hardreg"); ++ ++ nds32_register_pass ( ++ make_pass_nds32_cprop_acc_opt, ++ PASS_POS_INSERT_AFTER, ++ "cprop_hardreg"); ++ ++ nds32_register_pass ( ++ make_pass_cprop_hardreg, ++ PASS_POS_INSERT_AFTER, ++ "mach"); ++ ++ nds32_register_pass ( ++ make_pass_nds32_rename_lmwsmw_opt, ++ PASS_POS_INSERT_AFTER, ++ "jump2"); ++ ++ nds32_register_pass ( ++ make_pass_nds32_gen_lmwsmw_opt, ++ PASS_POS_INSERT_BEFORE, ++ "peephole2"); ++ ++ nds32_register_pass ( ++ make_pass_nds32_const_remater_opt, ++ PASS_POS_INSERT_BEFORE, ++ "ira"); ++ ++ nds32_register_pass ( ++ make_pass_nds32_scalbn_transform_opt, ++ PASS_POS_INSERT_AFTER, ++ "optimized"); ++ ++ nds32_register_pass ( ++ make_pass_nds32_sign_conversion_opt, ++ PASS_POS_INSERT_BEFORE, ++ "optimized"); ++ ++ nds32_register_pass ( ++ make_pass_nds32_abi_compatible, ++ PASS_POS_INSERT_BEFORE, ++ "optimized"); ++ ++ nds32_register_pass ( ++ nds32::scheduling::make_pass_nds32_print_stalls, ++ PASS_POS_INSERT_BEFORE, ++ "final"); ++} ++ + /* ------------------------------------------------------------------------ */ + +-/* PART 3: Implement target hook stuff definitions. */ ++/* PART 4: Implement target hook stuff definitions. */ ++ ++ ++/* Computing the Length of an Insn. ++ Modifies the length assigned to instruction INSN. ++ LEN is the initially computed length of the insn. */ ++int ++nds32_adjust_insn_length (rtx_insn *insn, int length) ++{ ++ int adjust_value = 0; ++ switch (recog_memoized (insn)) ++ { ++ case CODE_FOR_call_internal: ++ case CODE_FOR_call_value_internal: ++ { ++ if (NDS32_ALIGN_P ()) ++ { ++ rtx_insn *next_insn = next_active_insn (insn); ++ if (next_insn && get_attr_length (next_insn) != 2) ++ adjust_value += 2; ++ } ++ /* We need insert a nop after a noretun function call ++ to prevent software breakpoint corrupt the next function. */ ++ if (find_reg_note (insn, REG_NORETURN, NULL_RTX)) ++ { ++ if (TARGET_16_BIT) ++ adjust_value += 2; ++ else ++ adjust_value += 4; ++ } ++ } ++ return length + adjust_value; ++ ++ default: ++ return length; ++ } ++} ++ ++/* Storage Layout. */ ++ ++/* This function will be called just before expansion into rtl. */ ++static void ++nds32_expand_to_rtl_hook (void) ++{ ++ /* We need to set strictly aligned situation. ++ After that, the memory address checking in nds32_legitimate_address_p() ++ will take alignment offset into consideration so that it will not create ++ unaligned [base + offset] access during the rtl optimization. */ ++ cfun->machine->strict_aligned_p = 1; ++} ++ ++ ++/* Register Usage. */ ++ ++static void ++nds32_conditional_register_usage (void) ++{ ++ int regno; ++ ++ if (TARGET_LINUX_ABI) ++ fixed_regs[TP_REGNUM] = 1; ++ ++ if (TARGET_HARD_FLOAT) ++ { ++ for (regno = NDS32_FIRST_FPR_REGNUM; ++ regno <= NDS32_LAST_FPR_REGNUM; regno++) ++ { ++ fixed_regs[regno] = 0; ++ if (regno < NDS32_FIRST_FPR_REGNUM + NDS32_MAX_FPR_REGS_FOR_ARGS) ++ call_used_regs[regno] = 1; ++ else if (regno >= NDS32_FIRST_FPR_REGNUM + 22 ++ && regno < NDS32_FIRST_FPR_REGNUM + 48) ++ call_used_regs[regno] = 1; ++ else ++ call_used_regs[regno] = 0; ++ } ++ } ++ else if (TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE) ++ { ++ for (regno = NDS32_FIRST_FPR_REGNUM; ++ regno <= NDS32_LAST_FPR_REGNUM; ++ regno++) ++ fixed_regs[regno] = 0; ++ } ++} ++ + + /* Register Classes. */ + ++static reg_class_t ++nds32_preferred_rename_class (reg_class_t rclass) ++{ ++ return nds32_preferred_rename_class_impl (rclass); ++} ++ + static unsigned char + nds32_class_max_nregs (reg_class_t rclass ATTRIBUTE_UNUSED, +- machine_mode mode) ++ enum machine_mode mode) + { + /* Return the maximum number of consecutive registers +- needed to represent "mode" in a register of "rclass". */ ++ needed to represent MODE in a register of RCLASS. */ + return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD); + } + +@@ -1200,9 +1953,24 @@ static int + nds32_register_priority (int hard_regno) + { + /* Encourage to use r0-r7 for LRA when optimize for size. */ +- if (optimize_size && hard_regno < 8) +- return 4; +- return 3; ++ if (optimize_size) ++ { ++ if (hard_regno < 8) ++ return 4; ++ else if (hard_regno < 16) ++ return 3; ++ else if (hard_regno < 28) ++ return 2; ++ else ++ return 1; ++ } ++ else ++ { ++ if (hard_regno > 27) ++ return 1; ++ else ++ return 4; ++ } + } + + +@@ -1222,8 +1990,8 @@ nds32_register_priority (int hard_regno) + 2. return address + 3. callee-saved registers + 4. <padding bytes> (we will calculte in nds32_compute_stack_frame() +- and save it at +- cfun->machine->callee_saved_area_padding_bytes) ++ and save it at ++ cfun->machine->callee_saved_area_padding_bytes) + + [Block B] + 1. local variables +@@ -1241,29 +2009,29 @@ nds32_register_priority (int hard_regno) + By applying the basic frame/stack/argument pointers concept, + the layout of a stack frame shoule be like this: + +- | | ++ | | + old stack pointer -> ---- +- | | \ +- | | saved arguments for +- | | vararg functions +- | | / ++ | | \ ++ | | saved arguments for ++ | | vararg functions ++ | | / + hard frame pointer -> -- + & argument pointer | | \ +- | | previous hardware frame pointer +- | | return address +- | | callee-saved registers +- | | / +- frame pointer -> -- +- | | \ +- | | local variables +- | | and incoming arguments +- | | / +- -- +- | | \ +- | | outgoing +- | | arguments +- | | / +- stack pointer -> ---- ++ | | previous hardware frame pointer ++ | | return address ++ | | callee-saved registers ++ | | / ++ frame pointer -> -- ++ | | \ ++ | | local variables ++ | | and incoming arguments ++ | | / ++ -- ++ | | \ ++ | | outgoing ++ | | arguments ++ | | / ++ stack pointer -> ---- + + $SFP and $AP are used to represent frame pointer and arguments pointer, + which will be both eliminated as hard frame pointer. */ +@@ -1291,7 +2059,7 @@ nds32_can_eliminate (const int from_reg, const int to_reg) + /* -- Passing Arguments in Registers. */ + + static rtx +-nds32_function_arg (cumulative_args_t ca, machine_mode mode, ++nds32_function_arg (cumulative_args_t ca, enum machine_mode mode, + const_tree type, bool named) + { + unsigned int regno; +@@ -1306,7 +2074,7 @@ nds32_function_arg (cumulative_args_t ca, machine_mode mode, + if (!named) + { + /* If we are under hard float abi, we have arguments passed on the +- stack and all situation can be handled by GCC itself. */ ++ stack and all situation can be handled by GCC itself. */ + if (TARGET_HARD_FLOAT) + return NULL_RTX; + +@@ -1320,7 +2088,7 @@ nds32_function_arg (cumulative_args_t ca, machine_mode mode, + } + + /* No register available, return NULL_RTX. +- The compiler will use stack to pass argument instead. */ ++ The compiler will use stack to pass argument instead. */ + return NULL_RTX; + } + +@@ -1329,14 +2097,34 @@ nds32_function_arg (cumulative_args_t ca, machine_mode mode, + are different. */ + if (TARGET_HARD_FLOAT) + { +- /* Currently we have not implemented hard float yet. */ +- gcc_unreachable (); ++ /* For TARGET_HARD_FLOAT calling convention, we use GPR and FPR ++ to pass argument. We have to further check TYPE and MODE so ++ that we can determine which kind of register we shall use. */ ++ ++ /* Note that we need to pass argument entirely in registers under ++ hard float abi. */ ++ if (GET_MODE_CLASS (mode) == MODE_FLOAT ++ && NDS32_ARG_ENTIRE_IN_FPR_REG_P (cum->fpr_offset, mode, type)) ++ { ++ /* Pick up the next available FPR register number. */ ++ regno ++ = NDS32_AVAILABLE_REGNUM_FOR_FPR_ARG (cum->fpr_offset, mode, type); ++ return gen_rtx_REG (mode, regno); ++ } ++ else if (GET_MODE_CLASS (mode) != MODE_FLOAT ++ && NDS32_ARG_ENTIRE_IN_GPR_REG_P (cum->gpr_offset, mode, type)) ++ { ++ /* Pick up the next available GPR register number. */ ++ regno ++ = NDS32_AVAILABLE_REGNUM_FOR_GPR_ARG (cum->gpr_offset, mode, type); ++ return gen_rtx_REG (mode, regno); ++ } + } + else + { + /* For !TARGET_HARD_FLOAT calling convention, we always use GPR to pass +- argument. Since we allow to pass argument partially in registers, +- we can just return it if there are still registers available. */ ++ argument. Since we allow to pass argument partially in registers, ++ we can just return it if there are still registers available. */ + if (NDS32_ARG_PARTIAL_IN_GPR_REG_P (cum->gpr_offset, mode, type)) + { + /* Pick up the next available register number. */ +@@ -1353,7 +2141,7 @@ nds32_function_arg (cumulative_args_t ca, machine_mode mode, + } + + static bool +-nds32_must_pass_in_stack (machine_mode mode, const_tree type) ++nds32_must_pass_in_stack (enum machine_mode mode, const_tree type) + { + /* Return true if a type must be passed in memory. + If it is NOT using hard float abi, small aggregates can be +@@ -1366,7 +2154,7 @@ nds32_must_pass_in_stack (machine_mode mode, const_tree type) + } + + static int +-nds32_arg_partial_bytes (cumulative_args_t ca, machine_mode mode, ++nds32_arg_partial_bytes (cumulative_args_t ca, enum machine_mode mode, + tree type, bool named ATTRIBUTE_UNUSED) + { + /* Returns the number of bytes at the beginning of an argument that +@@ -1400,7 +2188,7 @@ nds32_arg_partial_bytes (cumulative_args_t ca, machine_mode mode, + remaining_reg_count + = NDS32_MAX_GPR_REGS_FOR_ARGS + - (NDS32_AVAILABLE_REGNUM_FOR_GPR_ARG (cum->gpr_offset, mode, type) +- - NDS32_GPR_ARG_FIRST_REGNUM); ++ - NDS32_GPR_ARG_FIRST_REGNUM); + + /* Note that we have to return the nubmer of bytes, not registers count. */ + if (needed_reg_count > remaining_reg_count) +@@ -1410,26 +2198,23 @@ nds32_arg_partial_bytes (cumulative_args_t ca, machine_mode mode, + } + + static void +-nds32_function_arg_advance (cumulative_args_t ca, machine_mode mode, ++nds32_function_arg_advance (cumulative_args_t ca, enum machine_mode mode, + const_tree type, bool named) + { +- machine_mode sub_mode; + CUMULATIVE_ARGS *cum = get_cumulative_args (ca); + + if (named) + { + /* We need to further check TYPE and MODE so that we can determine +- which kind of register we shall advance. */ +- if (type && TREE_CODE (type) == COMPLEX_TYPE) +- sub_mode = TYPE_MODE (TREE_TYPE (type)); +- else +- sub_mode = mode; ++ which kind of register we shall advance. */ + + /* Under hard float abi, we may advance FPR registers. */ +- if (TARGET_HARD_FLOAT && GET_MODE_CLASS (sub_mode) == MODE_FLOAT) ++ if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT) + { +- /* Currently we have not implemented hard float yet. */ +- gcc_unreachable (); ++ cum->fpr_offset ++ = NDS32_AVAILABLE_REGNUM_FOR_FPR_ARG (cum->fpr_offset, mode, type) ++ - NDS32_FPR_ARG_FIRST_REGNUM ++ + NDS32_NEED_N_REGS_FOR_ARG (mode, type); + } + else + { +@@ -1442,9 +2227,9 @@ nds32_function_arg_advance (cumulative_args_t ca, machine_mode mode, + else + { + /* If this nameless argument is NOT under TARGET_HARD_FLOAT, +- we can advance next register as well so that caller is +- able to pass arguments in registers and callee must be +- in charge of pushing all of them into stack. */ ++ we can advance next register as well so that caller is ++ able to pass arguments in registers and callee must be ++ in charge of pushing all of them into stack. */ + if (!TARGET_HARD_FLOAT) + { + cum->gpr_offset +@@ -1456,13 +2241,23 @@ nds32_function_arg_advance (cumulative_args_t ca, machine_mode mode, + } + + static unsigned int +-nds32_function_arg_boundary (machine_mode mode, const_tree type) ++nds32_function_arg_boundary (enum machine_mode mode, const_tree type) + { + return (nds32_needs_double_word_align (mode, type) + ? NDS32_DOUBLE_WORD_ALIGNMENT + : PARM_BOUNDARY); + } + ++bool ++nds32_vector_mode_supported_p (enum machine_mode mode) ++{ ++ if (mode == V4QImode ++ || mode == V2HImode) ++ return NDS32_EXT_DSP_P (); ++ ++ return false; ++} ++ + /* -- How Scalar Function Values Are Returned. */ + + static rtx +@@ -1470,28 +2265,68 @@ nds32_function_value (const_tree ret_type, + const_tree fn_decl_or_type ATTRIBUTE_UNUSED, + bool outgoing ATTRIBUTE_UNUSED) + { +- machine_mode mode; ++ enum machine_mode mode; + int unsignedp; + + mode = TYPE_MODE (ret_type); + unsignedp = TYPE_UNSIGNED (ret_type); + +- mode = promote_mode (ret_type, mode, &unsignedp); ++ if (INTEGRAL_TYPE_P (ret_type)) ++ mode = promote_mode (ret_type, mode, &unsignedp); + +- return gen_rtx_REG (mode, NDS32_GPR_RET_FIRST_REGNUM); ++ if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode)) ++ return gen_rtx_REG (mode, NDS32_FPR_RET_FIRST_REGNUM); ++ else ++ return gen_rtx_REG (mode, NDS32_GPR_RET_FIRST_REGNUM); + } + + static rtx +-nds32_libcall_value (machine_mode mode, ++nds32_libcall_value (enum machine_mode mode, + const_rtx fun ATTRIBUTE_UNUSED) + { ++ if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode)) ++ return gen_rtx_REG (mode, NDS32_FPR_RET_FIRST_REGNUM); ++ + return gen_rtx_REG (mode, NDS32_GPR_RET_FIRST_REGNUM); + } + + static bool + nds32_function_value_regno_p (const unsigned int regno) + { +- return (regno == NDS32_GPR_RET_FIRST_REGNUM); ++ if (regno == NDS32_GPR_RET_FIRST_REGNUM ++ || (TARGET_HARD_FLOAT ++ && regno == NDS32_FPR_RET_FIRST_REGNUM)) ++ return true; ++ ++ return false; ++} ++ ++/* -- How Large Values Are Returned. */ ++ ++static bool ++nds32_return_in_memory (const_tree type, ++ const_tree fntype ATTRIBUTE_UNUSED) ++{ ++ /* Note that int_size_in_bytes can return -1 if the size can vary ++ or is larger than an integer. */ ++ HOST_WIDE_INT size = int_size_in_bytes (type); ++ ++ /* For COMPLEX_TYPE, if the total size cannot be hold within two registers, ++ the return value is supposed to be in memory. We need to be aware of ++ that the size may be -1. */ ++ if (TREE_CODE (type) == COMPLEX_TYPE) ++ if (size < 0 || size > 2 * UNITS_PER_WORD) ++ return true; ++ ++ /* If it is BLKmode and the total size cannot be hold within two registers, ++ the return value is supposed to be in memory. We need to be aware of ++ that the size may be -1. */ ++ if (TYPE_MODE (type) == BLKmode) ++ if (size < 0 || size > 2 * UNITS_PER_WORD) ++ return true; ++ ++ /* For other cases, having result in memory is unnecessary. */ ++ return false; + } + + /* -- Function Entry and Exit. */ +@@ -1522,7 +2357,7 @@ nds32_asm_function_prologue (FILE *file, + /* Use df_regs_ever_live_p() to detect if the register + is ever used in the current function. */ + fprintf (file, "\t! registers ever_live: "); +- for (r = 0; r < 32; r++) ++ for (r = 0; r < 65; r++) + { + if (df_regs_ever_live_p (r)) + fprintf (file, "%s, ", reg_names[r]); +@@ -1554,6 +2389,10 @@ nds32_asm_function_prologue (FILE *file, + attrs = TREE_CHAIN (attrs); + } + fputc ('\n', file); ++ ++ /* If there is any critical isr in this file, disable linker ifc. */ ++ if (nds32_isr_function_critical_p (current_function_decl)) ++ fprintf (file, "\t.no_relax ifc\n"); + } + + /* After rtl prologue has been expanded, this function is used. */ +@@ -1561,56 +2400,12 @@ static void + nds32_asm_function_end_prologue (FILE *file) + { + fprintf (file, "\t! END PROLOGUE\n"); +- +- /* If frame pointer is NOT needed and -mfp-as-gp is issued, +- we can generate special directive: ".omit_fp_begin" +- to guide linker doing fp-as-gp optimization. +- However, for a naked function, which means +- it should not have prologue/epilogue, +- using fp-as-gp still requires saving $fp by push/pop behavior and +- there is no benefit to use fp-as-gp on such small function. +- So we need to make sure this function is NOT naked as well. */ +- if (!frame_pointer_needed +- && !cfun->machine->naked_p +- && cfun->machine->fp_as_gp_p) +- { +- fprintf (file, "\t! ----------------------------------------\n"); +- fprintf (file, "\t! Guide linker to do " +- "link time optimization: fp-as-gp\n"); +- fprintf (file, "\t! We add one more instruction to " +- "initialize $fp near to $gp location.\n"); +- fprintf (file, "\t! If linker fails to use fp-as-gp transformation,\n"); +- fprintf (file, "\t! this extra instruction should be " +- "eliminated at link stage.\n"); +- fprintf (file, "\t.omit_fp_begin\n"); +- fprintf (file, "\tla\t$fp,_FP_BASE_\n"); +- fprintf (file, "\t! ----------------------------------------\n"); +- } + } + + /* Before rtl epilogue has been expanded, this function is used. */ + static void + nds32_asm_function_begin_epilogue (FILE *file) + { +- /* If frame pointer is NOT needed and -mfp-as-gp is issued, +- we can generate special directive: ".omit_fp_end" +- to claim fp-as-gp optimization range. +- However, for a naked function, +- which means it should not have prologue/epilogue, +- using fp-as-gp still requires saving $fp by push/pop behavior and +- there is no benefit to use fp-as-gp on such small function. +- So we need to make sure this function is NOT naked as well. */ +- if (!frame_pointer_needed +- && !cfun->machine->naked_p +- && cfun->machine->fp_as_gp_p) +- { +- fprintf (file, "\t! ----------------------------------------\n"); +- fprintf (file, "\t! Claim the range of fp-as-gp " +- "link time optimization\n"); +- fprintf (file, "\t.omit_fp_end\n"); +- fprintf (file, "\t! ----------------------------------------\n"); +- } +- + fprintf (file, "\t! BEGIN EPILOGUE\n"); + } + +@@ -1638,41 +2433,104 @@ nds32_asm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, + ? 1 + : 0); + ++ if (flag_pic) ++ { ++ fprintf (file, "\tsmw.adm\t$r31, [$r31], $r31, 4\n"); ++ fprintf (file, "\tsethi\t%s, hi20(_GLOBAL_OFFSET_TABLE_-8)\n", ++ reg_names [PIC_OFFSET_TABLE_REGNUM]); ++ fprintf (file, "\tori\t%s, %s, lo12(_GLOBAL_OFFSET_TABLE_-4)\n", ++ reg_names [PIC_OFFSET_TABLE_REGNUM], ++ reg_names [PIC_OFFSET_TABLE_REGNUM]); ++ ++ if (TARGET_ISA_V3) ++ fprintf (file, "\tadd5.pc\t$gp\n"); ++ else ++ { ++ fprintf (file, "\tmfusr\t$ta, $pc\n"); ++ fprintf (file, "\tadd\t%s, $ta, %s\n", ++ reg_names [PIC_OFFSET_TABLE_REGNUM], ++ reg_names [PIC_OFFSET_TABLE_REGNUM]); ++ } ++ } ++ + if (delta != 0) + { + if (satisfies_constraint_Is15 (GEN_INT (delta))) + { +- fprintf (file, "\taddi\t$r%d, $r%d, %ld\n", ++ fprintf (file, "\taddi\t$r%d, $r%d, " HOST_WIDE_INT_PRINT_DEC "\n", + this_regno, this_regno, delta); + } + else if (satisfies_constraint_Is20 (GEN_INT (delta))) + { +- fprintf (file, "\tmovi\t$ta, %ld\n", delta); ++ fprintf (file, "\tmovi\t$ta, " HOST_WIDE_INT_PRINT_DEC "\n", delta); + fprintf (file, "\tadd\t$r%d, $r%d, $ta\n", this_regno, this_regno); + } + else + { +- fprintf (file, "\tsethi\t$ta, hi20(%ld)\n", delta); +- fprintf (file, "\tori\t$ta, $ta, lo12(%ld)\n", delta); ++ fprintf (file, ++ "\tsethi\t$ta, hi20(" HOST_WIDE_INT_PRINT_DEC ")\n", ++ delta); ++ fprintf (file, ++ "\tori\t$ta, $ta, lo12(" HOST_WIDE_INT_PRINT_DEC ")\n", ++ delta); + fprintf (file, "\tadd\t$r%d, $r%d, $ta\n", this_regno, this_regno); + } + } + +- fprintf (file, "\tb\t"); +- assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0)); +- fprintf (file, "\n"); ++ if (flag_pic) ++ { ++ fprintf (file, "\tla\t$ta, "); ++ assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0)); ++ fprintf (file, "@PLT\n"); ++ fprintf (file, "\t! epilogue\n"); ++ fprintf (file, "\tlwi.bi\t%s, [%s], 4\n", ++ reg_names[PIC_OFFSET_TABLE_REGNUM], ++ reg_names[STACK_POINTER_REGNUM]); ++ fprintf (file, "\tbr\t$ta\n"); ++ } ++ else ++ { ++ fprintf (file, "\tb\t"); ++ assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0)); ++ fprintf (file, "\n"); ++ } + + final_end_function (); + } + + /* -- Permitting tail calls. */ + ++/* Return true if it is ok to do sibling call optimization. */ ++static bool ++nds32_function_ok_for_sibcall (tree decl, ++ tree exp ATTRIBUTE_UNUSED) ++{ ++ /* The DECL is NULL if it is an indirect call. */ ++ ++ /* 1. Do not apply sibling call if -mv3push is enabled, ++ because pop25 instruction also represents return behavior. ++ 2. If this function is a isr function, do not apply sibling call ++ because it may perform the behavior that user does not expect. ++ 3. If this function is a variadic function, do not apply sibling call ++ because the stack layout may be a mess. ++ 4. We don't want to apply sibling call optimization for indirect ++ sibcall because the pop behavior in epilogue may pollute the ++ content of caller-saved regsiter when the register is used for ++ indirect sibcall. ++ 5. In pic mode, it may use some registers for PLT call. */ ++ return (!TARGET_V3PUSH ++ && !nds32_isr_function_p (current_function_decl) ++ && (cfun->machine->va_args_size == 0) ++ && decl ++ && !flag_pic); ++} ++ + /* Determine whether we need to enable warning for function return check. */ + static bool + nds32_warn_func_return (tree decl) + { +-/* Naked functions are implemented entirely in assembly, including the +- return sequence, so suppress warnings about this. */ ++ /* Naked functions are implemented entirely in assembly, including the ++ return sequence, so suppress warnings about this. */ + return !nds32_naked_function_p (decl); + } + +@@ -1681,7 +2539,7 @@ nds32_warn_func_return (tree decl) + + static void + nds32_setup_incoming_varargs (cumulative_args_t ca, +- machine_mode mode, ++ enum machine_mode mode, + tree type, + int *pretend_args_size, + int second_time ATTRIBUTE_UNUSED) +@@ -1795,7 +2653,7 @@ nds32_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) + sorry ("a nested function is not supported for reduced registers"); + + /* STEP 1: Copy trampoline code template into stack, +- fill up essential data into stack. */ ++ fill up essential data into stack. */ + + /* Extract nested function address rtx. */ + fnaddr = XEXP (DECL_RTL (fndecl), 0); +@@ -1831,8 +2689,8 @@ nds32_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) + && (tramp_align_in_bytes % nds32_cache_block_size) == 0) + { + /* Under this condition, the starting address of trampoline +- must be aligned to the starting address of each cache block +- and we do not have to worry about cross-boundary issue. */ ++ must be aligned to the starting address of each cache block ++ and we do not have to worry about cross-boundary issue. */ + for (i = 0; + i < (TRAMPOLINE_SIZE + nds32_cache_block_size - 1) + / nds32_cache_block_size; +@@ -1847,10 +2705,10 @@ nds32_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) + else if (TRAMPOLINE_SIZE > nds32_cache_block_size) + { + /* The starting address of trampoline code +- may not be aligned to the cache block, +- so the trampoline code may be across two cache block. +- We need to sync the last element, which is 4-byte size, +- of trampoline template. */ ++ may not be aligned to the cache block, ++ so the trampoline code may be across two cache block. ++ We need to sync the last element, which is 4-byte size, ++ of trampoline template. */ + for (i = 0; + i < (TRAMPOLINE_SIZE + nds32_cache_block_size - 1) + / nds32_cache_block_size; +@@ -1871,16 +2729,16 @@ nds32_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) + else + { + /* This is the simplest case. +- Because TRAMPOLINE_SIZE is less than or +- equal to nds32_cache_block_size, +- we can just sync start address and +- the last element of trampoline code. */ ++ Because TRAMPOLINE_SIZE is less than or ++ equal to nds32_cache_block_size, ++ we can just sync start address and ++ the last element of trampoline code. */ + + /* Sync starting address of tampoline code. */ + emit_move_insn (tmp_reg, sync_cache_addr); + emit_insn (isync_insn); + /* Sync the last element, which is 4-byte size, +- of trampoline template. */ ++ of trampoline template. */ + emit_move_insn (tmp_reg, + plus_constant (Pmode, sync_cache_addr, + TRAMPOLINE_SIZE - 4)); +@@ -1896,11 +2754,52 @@ nds32_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) + /* Addressing Modes. */ + + static bool +-nds32_legitimate_address_p (machine_mode mode, rtx x, bool strict) ++nds32_legitimate_address_p (enum machine_mode mode, rtx x, bool strict) + { ++ if (TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE) ++ { ++ /* When using floating-point instructions, ++ we don't allow 'addr' to be [symbol_ref], [CONST] pattern. */ ++ if ((mode == DFmode || mode == SFmode) ++ && (GET_CODE (x) == SYMBOL_REF ++ || GET_CODE(x) == CONST)) ++ return false; ++ ++ /* Allow [post_modify] addressing mode, when using FPU instructions. */ ++ if (GET_CODE (x) == POST_MODIFY ++ && mode == DFmode) ++ { ++ if (GET_CODE (XEXP (x, 0)) == REG ++ && GET_CODE (XEXP (x, 1)) == PLUS) ++ { ++ rtx plus_op = XEXP (x, 1); ++ rtx op0 = XEXP (plus_op, 0); ++ rtx op1 = XEXP (plus_op, 1); ++ ++ if (nds32_address_register_rtx_p (op0, strict) ++ && CONST_INT_P (op1)) ++ { ++ if (satisfies_constraint_Is14 (op1)) ++ { ++ /* If it is not under strictly aligned situation, ++ we can return true without checking alignment. */ ++ if (!cfun->machine->strict_aligned_p) ++ return true; ++ /* Make sure address is word alignment. ++ Currently we do not have 64-bit load/store yet, ++ so we will use two 32-bit load/store instructions to do ++ memory access and they are single word alignment. */ ++ else if (NDS32_SINGLE_WORD_ALIGN_P (INTVAL (op1))) ++ return true; ++ } ++ } ++ } ++ } ++ } ++ + /* For (mem:DI addr) or (mem:DF addr) case, + we only allow 'addr' to be [reg], [symbol_ref], +- [const], or [reg + const_int] pattern. */ ++ [const], or [reg + const_int] pattern. */ + if (mode == DImode || mode == DFmode) + { + /* Allow [Reg + const_int] addressing mode. */ +@@ -1910,13 +2809,19 @@ nds32_legitimate_address_p (machine_mode mode, rtx x, bool strict) + && nds32_legitimate_index_p (mode, XEXP (x, 1), strict) + && CONST_INT_P (XEXP (x, 1))) + return true; +- + else if (nds32_address_register_rtx_p (XEXP (x, 1), strict) + && nds32_legitimate_index_p (mode, XEXP (x, 0), strict) + && CONST_INT_P (XEXP (x, 0))) + return true; + } + ++ /* Allow [post_inc] and [post_dec] addressing mode. */ ++ if (GET_CODE (x) == POST_INC || GET_CODE (x) == POST_DEC) ++ { ++ if (nds32_address_register_rtx_p (XEXP (x, 0), strict)) ++ return true; ++ } ++ + /* Now check [reg], [symbol_ref], and [const]. */ + if (GET_CODE (x) != REG + && GET_CODE (x) != SYMBOL_REF +@@ -1933,18 +2838,26 @@ nds32_legitimate_address_p (machine_mode mode, rtx x, bool strict) + + case SYMBOL_REF: + /* (mem (symbol_ref A)) => [symbol_ref] */ ++ ++ if (flag_pic || SYMBOL_REF_TLS_MODEL (x)) ++ return false; ++ ++ if (TARGET_ICT_MODEL_LARGE && nds32_indirect_call_referenced_p (x)) ++ return false; ++ + /* If -mcmodel=large, the 'symbol_ref' is not a valid address +- during or after LRA/reload phase. */ ++ during or after LRA/reload phase. */ + if (TARGET_CMODEL_LARGE + && (reload_completed + || reload_in_progress + || lra_in_progress)) + return false; + /* If -mcmodel=medium and the symbol references to rodata section, +- the 'symbol_ref' is not a valid address during or after +- LRA/reload phase. */ ++ the 'symbol_ref' is not a valid address during or after ++ LRA/reload phase. */ + if (TARGET_CMODEL_MEDIUM +- && NDS32_SYMBOL_REF_RODATA_P (x) ++ && (NDS32_SYMBOL_REF_RODATA_P (x) ++ || CONSTANT_POOL_ADDRESS_P (x)) + && (reload_completed + || reload_in_progress + || lra_in_progress)) +@@ -1954,7 +2867,7 @@ nds32_legitimate_address_p (machine_mode mode, rtx x, bool strict) + + case CONST: + /* (mem (const (...))) +- => [ + const_addr ], where const_addr = symbol_ref + const_int */ ++ => [ + const_addr ], where const_addr = symbol_ref + const_int */ + if (GET_CODE (XEXP (x, 0)) == PLUS) + { + rtx plus_op = XEXP (x, 0); +@@ -1965,17 +2878,21 @@ nds32_legitimate_address_p (machine_mode mode, rtx x, bool strict) + if (GET_CODE (op0) == SYMBOL_REF && CONST_INT_P (op1)) + { + /* Now we see the [ + const_addr ] pattern, but we need +- some further checking. */ ++ some further checking. */ ++ ++ if (flag_pic) ++ return false; ++ + /* If -mcmodel=large, the 'const_addr' is not a valid address +- during or after LRA/reload phase. */ ++ during or after LRA/reload phase. */ + if (TARGET_CMODEL_LARGE + && (reload_completed + || reload_in_progress + || lra_in_progress)) + return false; + /* If -mcmodel=medium and the symbol references to rodata section, +- the 'const_addr' is not a valid address during or after +- LRA/reload phase. */ ++ the 'const_addr' is not a valid address during or after ++ LRA/reload phase. */ + if (TARGET_CMODEL_MEDIUM + && NDS32_SYMBOL_REF_RODATA_P (op0) + && (reload_completed +@@ -1993,9 +2910,9 @@ nds32_legitimate_address_p (machine_mode mode, rtx x, bool strict) + + case POST_MODIFY: + /* (mem (post_modify (reg) (plus (reg) (reg)))) +- => [Ra], Rb */ ++ => [Ra], Rb */ + /* (mem (post_modify (reg) (plus (reg) (const_int)))) +- => [Ra], const_int */ ++ => [Ra], const_int */ + if (GET_CODE (XEXP (x, 0)) == REG + && GET_CODE (XEXP (x, 1)) == PLUS) + { +@@ -2018,7 +2935,7 @@ nds32_legitimate_address_p (machine_mode mode, rtx x, bool strict) + /* (mem (post_inc reg)) => [Ra], 1/2/4 */ + /* (mem (post_dec reg)) => [Ra], -1/-2/-4 */ + /* The 1/2/4 or -1/-2/-4 have been displayed in nds32.md. +- We only need to deal with register Ra. */ ++ We only need to deal with register Ra. */ + if (nds32_address_register_rtx_p (XEXP (x, 0), strict)) + return true; + else +@@ -2026,11 +2943,11 @@ nds32_legitimate_address_p (machine_mode mode, rtx x, bool strict) + + case PLUS: + /* (mem (plus reg const_int)) +- => [Ra + imm] */ ++ => [Ra + imm] */ + /* (mem (plus reg reg)) +- => [Ra + Rb] */ ++ => [Ra + Rb] */ + /* (mem (plus (mult reg const_int) reg)) +- => [Ra + Rb << sv] */ ++ => [Ra + Rb << sv] */ + if (nds32_address_register_rtx_p (XEXP (x, 0), strict) + && nds32_legitimate_index_p (mode, XEXP (x, 1), strict)) + return true; +@@ -2042,39 +2959,292 @@ nds32_legitimate_address_p (machine_mode mode, rtx x, bool strict) + + case LO_SUM: + /* (mem (lo_sum (reg) (symbol_ref))) */ +- /* (mem (lo_sum (reg) (const))) */ +- gcc_assert (REG_P (XEXP (x, 0))); +- if (GET_CODE (XEXP (x, 1)) == SYMBOL_REF +- || GET_CODE (XEXP (x, 1)) == CONST) +- return nds32_legitimate_address_p (mode, XEXP (x, 1), strict); +- else ++ /* (mem (lo_sum (reg) (const (plus (symbol_ref) (reg)))) */ ++ /* TLS case: (mem (lo_sum (reg) (const (unspec symbol_ref X)))) */ ++ /* The LO_SUM is a valid address if and only if we would like to ++ generate 32-bit full address memory access with any of following ++ circumstance: ++ 1. -mcmodel=large. ++ 2. -mcmodel=medium and the symbol_ref references to rodata. */ ++ { ++ rtx sym = NULL_RTX; ++ ++ if (flag_pic) ++ return false; ++ ++ if (!REG_P (XEXP (x, 0))) ++ return false; ++ ++ if (GET_CODE (XEXP (x, 1)) == SYMBOL_REF) ++ sym = XEXP (x, 1); ++ else if (GET_CODE (XEXP (x, 1)) == CONST) ++ { ++ rtx plus = XEXP(XEXP (x, 1), 0); ++ if (GET_CODE (plus) == PLUS) ++ sym = XEXP (plus, 0); ++ else if (GET_CODE (plus) == UNSPEC) ++ sym = XVECEXP (plus, 0, 0); ++ } ++ else ++ return false; ++ ++ gcc_assert (GET_CODE (sym) == SYMBOL_REF); ++ ++ if (TARGET_ICT_MODEL_LARGE ++ && nds32_indirect_call_referenced_p (sym)) ++ return true; ++ ++ if (TARGET_CMODEL_LARGE) ++ return true; ++ else if (TARGET_CMODEL_MEDIUM ++ && NDS32_SYMBOL_REF_RODATA_P (sym)) ++ return true; ++ else ++ return false; ++ } ++ ++ default: ++ return false; ++ } ++} ++ ++static rtx ++nds32_legitimize_address (rtx x, ++ rtx oldx ATTRIBUTE_UNUSED, ++ enum machine_mode mode ATTRIBUTE_UNUSED) ++{ ++ if (nds32_tls_referenced_p (x)) ++ x = nds32_legitimize_tls_address (x); ++ else if (flag_pic && SYMBOLIC_CONST_P (x)) ++ x = nds32_legitimize_pic_address (x); ++ else if (TARGET_ICT_MODEL_LARGE && nds32_indirect_call_referenced_p (x)) ++ x = nds32_legitimize_ict_address (x); ++ ++ return x; ++} ++ ++static bool ++nds32_legitimate_constant_p (enum machine_mode mode, rtx x) ++{ ++ switch (GET_CODE (x)) ++ { ++ case CONST_DOUBLE: ++ if ((TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE) ++ && (mode == DFmode || mode == SFmode)) ++ return false; ++ break; ++ case CONST: ++ x = XEXP (x, 0); ++ ++ if (GET_CODE (x) == PLUS) ++ { ++ if (!CONST_INT_P (XEXP (x, 1))) ++ return false; ++ x = XEXP (x, 0); ++ } ++ ++ if (GET_CODE (x) == UNSPEC) ++ { ++ switch (XINT (x, 1)) ++ { ++ case UNSPEC_GOT: ++ case UNSPEC_GOTOFF: ++ case UNSPEC_PLT: ++ case UNSPEC_TLSGD: ++ case UNSPEC_TLSLD: ++ case UNSPEC_TLSIE: ++ case UNSPEC_TLSLE: ++ case UNSPEC_ICT: ++ return false; ++ default: ++ return true; ++ } ++ } ++ break; ++ case SYMBOL_REF: ++ /* TLS symbols need a call to resolve in ++ precompute_register_parameters. */ ++ if (SYMBOL_REF_TLS_MODEL (x)) + return false; ++ break; ++ default: ++ return true; ++ } ++ ++ return true; ++} ++ ++/* Reorgnize the UNSPEC CONST and return its direct symbol. */ ++static rtx ++nds32_delegitimize_address (rtx x) ++{ ++ x = delegitimize_mem_from_attrs (x); ++ ++ if (GET_CODE(x) == CONST) ++ { ++ rtx inner = XEXP (x, 0); ++ ++ /* Handle for GOTOFF. */ ++ if (GET_CODE (inner) == PLUS) ++ inner = XEXP (inner, 0); ++ ++ if (GET_CODE (inner) == UNSPEC) ++ { ++ switch (XINT (inner, 1)) ++ { ++ case UNSPEC_GOTINIT: ++ case UNSPEC_GOT: ++ case UNSPEC_GOTOFF: ++ case UNSPEC_PLT: ++ case UNSPEC_TLSGD: ++ case UNSPEC_TLSLD: ++ case UNSPEC_TLSIE: ++ case UNSPEC_TLSLE: ++ case UNSPEC_ICT: ++ x = XVECEXP (inner, 0, 0); ++ break; ++ default: ++ break; ++ } ++ } ++ } ++ return x; ++} ++ ++static enum machine_mode ++nds32_vectorize_preferred_simd_mode (enum machine_mode mode) ++{ ++ if (!NDS32_EXT_DSP_P ()) ++ return word_mode; ++ ++ switch (mode) ++ { ++ case QImode: ++ return V4QImode; ++ case HImode: ++ return V2HImode; ++ default: ++ return word_mode; ++ } ++} + ++static bool ++nds32_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x) ++{ ++ switch (GET_CODE (x)) ++ { ++ case CONST: ++ return !nds32_legitimate_constant_p (mode, x); ++ case SYMBOL_REF: ++ /* All symbols have to be accessed through gp-relative in PIC mode. */ ++ /* We don't want to force symbol as constant pool in .text section, ++ because we use the gp-relatived instruction to load in small ++ or medium model. */ ++ if (flag_pic ++ || SYMBOL_REF_TLS_MODEL (x) ++ || TARGET_CMODEL_SMALL ++ || TARGET_CMODEL_MEDIUM) ++ return true; ++ break; ++ case CONST_INT: ++ case CONST_DOUBLE: ++ if (flag_pic && (lra_in_progress || reload_completed)) ++ return true; ++ break; + default: + return false; + } ++ return false; ++} ++ ++ ++/* Condition Code Status. */ ++ ++/* -- Representation of condition codes using registers. */ ++ ++static void ++nds32_canonicalize_comparison (int *code, ++ rtx *op0 ATTRIBUTE_UNUSED, ++ rtx *op1, ++ bool op0_preserve_value ATTRIBUTE_UNUSED) ++{ ++ /* When the instruction combination pass tries to combine a comparison insn ++ with its previous insns, it also transforms the operator in order to ++ minimize its constant field. For example, it tries to transform a ++ comparison insn from ++ (set (reg:SI 54) ++ (ltu:SI (reg:SI 52) ++ (const_int 10 [0xa]))) ++ to ++ (set (reg:SI 54) ++ (leu:SI (reg:SI 52) ++ (const_int 9 [0x9]))) ++ ++ However, the nds32 target only provides instructions supporting the LTU ++ operation directly, and the implementation of the pattern "cbranchsi4" ++ only expands the LTU form. In order to handle the non-LTU operations ++ generated from passes other than the RTL expansion pass, we have to ++ implement this hook to revert those changes. Since we only expand the LTU ++ operator in the RTL expansion pass, we might only need to handle the LEU ++ case, unless we find other optimization passes perform more aggressive ++ transformations. */ ++ ++ if (*code == LEU && CONST_INT_P (*op1)) ++ { ++ *op1 = gen_int_mode (INTVAL (*op1) + 1, SImode); ++ *code = LTU; ++ } + } + + + /* Describing Relative Costs of Operations. */ + + static int +-nds32_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED, ++nds32_register_move_cost (enum machine_mode mode, + reg_class_t from, + reg_class_t to) + { +- if (from == HIGH_REGS || to == HIGH_REGS) +- return 6; ++ /* In garywolf cpu, FPR to GPR is chaper than other cpu. */ ++ if (TARGET_PIPELINE_GRAYWOLF) ++ { ++ if (GET_MODE_SIZE (mode) == 8) ++ { ++ /* DPR to GPR. */ ++ if (from == FP_REGS && to != FP_REGS) ++ return 3; ++ /* GPR to DPR. */ ++ if (from != FP_REGS && to == FP_REGS) ++ return 2; ++ } ++ else ++ { ++ if ((from == FP_REGS && to != FP_REGS) ++ || (from != FP_REGS && to == FP_REGS)) ++ return 2; ++ } ++ } + +- return 2; ++ if ((from == FP_REGS && to != FP_REGS) ++ || (from != FP_REGS && to == FP_REGS)) ++ return 3; ++ else if (from == HIGH_REGS || to == HIGH_REGS) ++ return optimize_size ? 6 : 2; ++ else ++ return 2; + } + + static int +-nds32_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED, ++nds32_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED, + reg_class_t rclass ATTRIBUTE_UNUSED, + bool in ATTRIBUTE_UNUSED) + { +- return 8; ++ /* Memory access is only need 1 cycle in our low-end processor, ++ however memory access is most 4-byte instruction, ++ so let it 8 for optimize_size, otherwise be 2. */ ++ if (nds32_memory_model_option == MEMORY_MODEL_FAST) ++ return optimize_size ? 8 : 4; ++ else ++ return 8; + } + + /* This target hook describes the relative costs of RTL expressions. +@@ -2094,7 +3264,7 @@ nds32_rtx_costs (rtx x, + + static int + nds32_address_cost (rtx address, +- machine_mode mode, ++ enum machine_mode mode, + addr_space_t as, + bool speed) + { +@@ -2102,6 +3272,55 @@ nds32_address_cost (rtx address, + } + + ++/* Adjusting the Instruction Scheduler. */ ++ ++static int ++nds32_sched_issue_rate (void) ++{ ++ switch (nds32_cpu_option) ++ { ++ case CPU_GRAYWOLF: ++ case CPU_PANTHER: ++ return 2; ++ ++ default: ++ return 1; ++ } ++} ++ ++static int ++nds32_sched_adjust_cost (rtx_insn *insn ATTRIBUTE_UNUSED, rtx link, rtx_insn *dep ATTRIBUTE_UNUSED, int cost) ++{ ++ if (REG_NOTE_KIND (link) == REG_DEP_ANTI ++ || REG_NOTE_KIND (link) == REG_DEP_OUTPUT) ++ { ++ if (nds32_sched_issue_rate () > 1) ++ return 1; ++ ++ return 0; ++ } ++ ++ return cost; ++} ++ ++static void ++nds32_set_sched_flags (spec_info_t spec_info ATTRIBUTE_UNUSED) ++{ ++ if (!flag_reorg_out_of_order ++ || nds32_sched_issue_rate () < 2) ++ return; ++ ++ unsigned int *flags = &(current_sched_info->flags); ++ ++ // Disallow the sheculder to find inc/mem pairs and break dependencies by ++ // duplication address computations. Otherwise, after doing so, the ++ // scheduler will treat that the two insns can be issued at the same cycle ++ // so that the later insn isn't marked as TImode. It will result in a wrong ++ // behavior for out-of-order reorganization. ++ *flags |= DONT_BREAK_DEPENDENCIES; ++} ++ ++ + /* Dividing the Output into Sections (Texts, Data, . . . ). */ + + /* If references to a symbol or a constant must be treated differently +@@ -2150,17 +3369,56 @@ nds32_asm_file_start (void) + { + default_file_start (); + ++ if (flag_pic) ++ fprintf (asm_out_file, "\t.pic\n"); ++ + /* Tell assembler which ABI we are using. */ + fprintf (asm_out_file, "\t! ABI version\n"); +- fprintf (asm_out_file, "\t.abi_2\n"); ++ if (TARGET_HARD_FLOAT) ++ fprintf (asm_out_file, "\t.abi_2fp_plus\n"); ++ else ++ fprintf (asm_out_file, "\t.abi_2\n"); + + /* Tell assembler that this asm code is generated by compiler. */ + fprintf (asm_out_file, "\t! This asm file is generated by compiler\n"); + fprintf (asm_out_file, "\t.flag\tverbatim\n"); +- /* Give assembler the size of each vector for interrupt handler. */ +- fprintf (asm_out_file, "\t! This vector size directive is required " +- "for checking inconsistency on interrupt handler\n"); +- fprintf (asm_out_file, "\t.vec_size\t%d\n", nds32_isr_vector_size); ++ ++ /* We need to provide the size of each vector for interrupt handler ++ under elf toolchain. */ ++ if (!TARGET_LINUX_ABI) ++ { ++ fprintf (asm_out_file, "\t! This vector size directive is required " ++ "for checking inconsistency on interrupt handler\n"); ++ fprintf (asm_out_file, "\t.vec_size\t%d\n", nds32_isr_vector_size); ++ } ++ ++ /* If user enables '-mforce-fp-as-gp' or compiles programs with -Os, ++ the compiler may produce 'la $fp,_FP_BASE_' instruction ++ at prologue for fp-as-gp optimization. ++ We should emit weak reference of _FP_BASE_ to avoid undefined reference ++ in case user does not pass '--relax' option to linker. */ ++ if (!TARGET_LINUX_ABI && (TARGET_FORCE_FP_AS_GP || optimize_size)) ++ { ++ fprintf (asm_out_file, "\t! This weak reference is required to do " ++ "fp-as-gp link time optimization\n"); ++ fprintf (asm_out_file, "\t.weak\t_FP_BASE_\n"); ++ } ++ /* If user enables '-mifc', we should emit relaxation directive ++ to tell linker that this file is allowed to do ifc optimization. */ ++ if (TARGET_IFC) ++ { ++ fprintf (asm_out_file, "\t! This relaxation directive is required " ++ "to do ifc link time optimization\n"); ++ fprintf (asm_out_file, "\t.relax\tifc\n"); ++ } ++ /* If user enables '-mex9', we should emit relaxation directive ++ to tell linker that this file is allowed to do ex9 optimization. */ ++ if (TARGET_EX9) ++ { ++ fprintf (asm_out_file, "\t! This relaxation directive is required " ++ "to do ex9 link time optimization\n"); ++ fprintf (asm_out_file, "\t.relax\tex9\n"); ++ } + + fprintf (asm_out_file, "\t! ------------------------------------\n"); + +@@ -2171,6 +3429,53 @@ nds32_asm_file_start (void) + if (TARGET_ISA_V3M) + fprintf (asm_out_file, "\t! ISA family\t\t: %s\n", "V3M"); + ++ switch (nds32_cpu_option) ++ { ++ case CPU_N6: ++ fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "N6"); ++ break; ++ ++ case CPU_N7: ++ fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "N7"); ++ break; ++ ++ case CPU_N8: ++ fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "N8"); ++ break; ++ ++ case CPU_E8: ++ fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "E8"); ++ break; ++ ++ case CPU_N9: ++ fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "N9"); ++ break; ++ ++ case CPU_N10: ++ fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "N10"); ++ break; ++ ++ case CPU_GRAYWOLF: ++ fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "Graywolf"); ++ break; ++ ++ case CPU_N12: ++ case CPU_N13: ++ fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "N13"); ++ break; ++ ++ case CPU_PANTHER: ++ fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "Panther"); ++ break; ++ ++ case CPU_SIMPLE: ++ fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "SIMPLE"); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ + if (TARGET_CMODEL_SMALL) + fprintf (asm_out_file, "\t! Code model\t\t: %s\n", "SMALL"); + if (TARGET_CMODEL_MEDIUM) +@@ -2181,6 +3486,15 @@ nds32_asm_file_start (void) + fprintf (asm_out_file, "\t! Endian setting\t: %s\n", + ((TARGET_BIG_ENDIAN) ? "big-endian" + : "little-endian")); ++ fprintf (asm_out_file, "\t! Use SP floating-point instruction\t: %s\n", ++ ((TARGET_FPU_SINGLE) ? "Yes" ++ : "No")); ++ fprintf (asm_out_file, "\t! Use DP floating-point instruction\t: %s\n", ++ ((TARGET_FPU_DOUBLE) ? "Yes" ++ : "No")); ++ fprintf (asm_out_file, "\t! ABI version\t\t: %s\n", ++ ((TARGET_HARD_FLOAT) ? "ABI2FP+" ++ : "ABI2")); + + fprintf (asm_out_file, "\t! ------------------------------------\n"); + +@@ -2188,8 +3502,14 @@ nds32_asm_file_start (void) + ((TARGET_CMOV) ? "Yes" + : "No")); + fprintf (asm_out_file, "\t! Use performance extension\t: %s\n", +- ((TARGET_PERF_EXT) ? "Yes" ++ ((TARGET_EXT_PERF) ? "Yes" + : "No")); ++ fprintf (asm_out_file, "\t! Use performance extension 2\t: %s\n", ++ ((TARGET_EXT_PERF2) ? "Yes" ++ : "No")); ++ fprintf (asm_out_file, "\t! Use string extension\t\t: %s\n", ++ ((TARGET_EXT_STRING) ? "Yes" ++ : "No")); + + fprintf (asm_out_file, "\t! ------------------------------------\n"); + +@@ -2203,10 +3523,18 @@ nds32_asm_file_start (void) + ((TARGET_REDUCED_REGS) ? "Yes" + : "No")); + ++ fprintf (asm_out_file, "\t! Support unaligned access\t\t: %s\n", ++ (flag_unaligned_access ? "Yes" ++ : "No")); ++ + fprintf (asm_out_file, "\t! ------------------------------------\n"); + + if (optimize_size) + fprintf (asm_out_file, "\t! Optimization level\t: -Os\n"); ++ else if (optimize_fast) ++ fprintf (asm_out_file, "\t! Optimization level\t: -Ofast\n"); ++ else if (optimize_debug) ++ fprintf (asm_out_file, "\t! Optimization level\t: -Og\n"); + else + fprintf (asm_out_file, "\t! Optimization level\t: -O%d\n", optimize); + +@@ -2225,9 +3553,65 @@ nds32_asm_file_end (void) + { + nds32_asm_file_end_for_isr (); + ++ /* The NDS32 Linux stack is mapped non-executable by default, so add a ++ .note.GNU-stack section. */ ++ if (TARGET_LINUX_ABI) ++ file_end_indicate_exec_stack (); ++ + fprintf (asm_out_file, "\t! ------------------------------------\n"); + } + ++static bool ++nds32_asm_output_addr_const_extra (FILE *file, rtx x) ++{ ++ if (GET_CODE (x) == UNSPEC) ++ { ++ switch (XINT (x, 1)) ++ { ++ case UNSPEC_GOTINIT: ++ output_addr_const (file, XVECEXP (x, 0, 0)); ++ break; ++ case UNSPEC_GOTOFF: ++ output_addr_const (file, XVECEXP (x, 0, 0)); ++ fputs ("@GOTOFF", file); ++ break; ++ case UNSPEC_GOT: ++ output_addr_const (file, XVECEXP (x, 0, 0)); ++ fputs ("@GOT", file); ++ break; ++ case UNSPEC_PLT: ++ output_addr_const (file, XVECEXP (x, 0, 0)); ++ fputs ("@PLT", file); ++ break; ++ case UNSPEC_TLSGD: ++ output_addr_const (file, XVECEXP (x, 0, 0)); ++ fputs ("@TLSDESC", file); ++ break; ++ case UNSPEC_TLSLD: ++ output_addr_const (file, XVECEXP (x, 0, 0)); ++ fputs ("@TLSDESC", file); ++ break; ++ case UNSPEC_TLSIE: ++ output_addr_const (file, XVECEXP (x, 0, 0)); ++ fputs ("@GOTTPOFF", file); ++ break; ++ case UNSPEC_TLSLE: ++ output_addr_const (file, XVECEXP (x, 0, 0)); ++ fputs ("@TPOFF", file); ++ break; ++ case UNSPEC_ICT: ++ output_addr_const (file, XVECEXP (x, 0, 0)); ++ fputs ("@ICT", file); ++ break; ++ default: ++ return false; ++ } ++ return true; ++ } ++ else ++ return false; ++} ++ + /* -- Output and Generation of Labels. */ + + static void +@@ -2243,7 +3627,15 @@ nds32_asm_globalize_label (FILE *stream, const char *name) + static void + nds32_print_operand (FILE *stream, rtx x, int code) + { +- int op_value; ++ HOST_WIDE_INT op_value = 0; ++ HOST_WIDE_INT one_position; ++ HOST_WIDE_INT zero_position; ++ bool pick_lsb_p = false; ++ bool pick_msb_p = false; ++ int regno; ++ ++ if (CONST_INT_P (x)) ++ op_value = INTVAL (x); + + switch (code) + { +@@ -2251,29 +3643,82 @@ nds32_print_operand (FILE *stream, rtx x, int code) + /* Do nothing special. */ + break; + +- case 'V': +- /* 'x' is supposed to be CONST_INT, get the value. */ ++ case 'b': ++ /* Use exact_log2() to search the 0-bit position. */ + gcc_assert (CONST_INT_P (x)); +- op_value = INTVAL (x); ++ zero_position = exact_log2 (~UINTVAL (x) & GET_MODE_MASK (SImode)); ++ gcc_assert (zero_position != -1); ++ fprintf (stream, HOST_WIDE_INT_PRINT_DEC, zero_position); + +- /* According to the Andes architecture, +- the system/user register index range is 0 ~ 1023. +- In order to avoid conflict between user-specified-integer value +- and enum-specified-register value, +- the 'enum nds32_intrinsic_registers' value +- in nds32_intrinsic.h starts from 1024. */ +- if (op_value < 1024 && op_value >= 0) +- { +- /* If user gives integer value directly (0~1023), +- we just print out the value. */ +- fprintf (stream, "%d", op_value); +- } +- else if (op_value < 0 +- || op_value >= ((int) ARRAY_SIZE (nds32_intrinsic_register_names) +- + 1024)) +- { +- /* The enum index value for array size is out of range. */ +- error ("intrinsic register index is out of range"); ++ /* No need to handle following process, so return immediately. */ ++ return; ++ ++ case 'e': ++ gcc_assert (MEM_P (x) ++ && GET_CODE (XEXP (x, 0)) == PLUS ++ && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT); ++ fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (XEXP (XEXP (x, 0), 1))); ++ ++ /* No need to handle following process, so return immediately. */ ++ return; ++ ++ case 'v': ++ gcc_assert (CONST_INT_P (x) ++ && (INTVAL (x) == 0 ++ || INTVAL (x) == 8 ++ || INTVAL (x) == 16 ++ || INTVAL (x) == 24)); ++ fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) / 8); ++ ++ /* No need to handle following process, so return immediately. */ ++ return; ++ ++ case 'B': ++ /* Use exact_log2() to search the 1-bit position. */ ++ gcc_assert (CONST_INT_P (x)); ++ one_position = exact_log2 (UINTVAL (x) & GET_MODE_MASK (SImode)); ++ gcc_assert (one_position != -1); ++ fprintf (stream, HOST_WIDE_INT_PRINT_DEC, one_position); ++ ++ /* No need to handle following process, so return immediately. */ ++ return; ++ ++ case 'L': ++ /* X is supposed to be REG rtx. */ ++ gcc_assert (REG_P (x)); ++ /* Claim that we are going to pick LSB part of X. */ ++ pick_lsb_p = true; ++ break; ++ ++ case 'H': ++ /* X is supposed to be REG rtx. */ ++ gcc_assert (REG_P (x)); ++ /* Claim that we are going to pick MSB part of X. */ ++ pick_msb_p = true; ++ break; ++ ++ case 'V': ++ /* X is supposed to be CONST_INT, get the value. */ ++ gcc_assert (CONST_INT_P (x)); ++ ++ /* According to the Andes architecture, ++ the system/user register index range is 0 ~ 1023. ++ In order to avoid conflict between user-specified-integer value ++ and enum-specified-register value, ++ the 'enum nds32_intrinsic_registers' value ++ in nds32_intrinsic.h starts from 1024. */ ++ if (op_value < 1024 && op_value >= 0) ++ { ++ /* If user gives integer value directly (0~1023), ++ we just print out the value. */ ++ fprintf (stream, HOST_WIDE_INT_PRINT_DEC, op_value); ++ } ++ else if (op_value < 0 ++ || op_value >= ((int) ARRAY_SIZE (nds32_intrinsic_register_names) ++ + 1024)) ++ { ++ /* The enum index value for array size is out of range. */ ++ error ("intrinsic register index is out of range"); + } + else + { +@@ -2286,6 +3731,45 @@ nds32_print_operand (FILE *stream, rtx x, int code) + /* No need to handle following process, so return immediately. */ + return; + ++ case 'R': /* cctl valck */ ++ /* Note the cctl divide to 5 group and share the same name table. */ ++ if (op_value < 0 || op_value > 4) ++ error ("CCTL intrinsic function subtype out of range!"); ++ fprintf (stream, "%s", nds32_cctl_names[op_value]); ++ return; ++ ++ case 'T': /* cctl idxwbinv */ ++ /* Note the cctl divide to 5 group and share the same name table. */ ++ if (op_value < 0 || op_value > 4) ++ error ("CCTL intrinsic function subtype out of range!"); ++ fprintf (stream, "%s", nds32_cctl_names[op_value + 4]); ++ return; ++ ++ case 'U': /* cctl vawbinv */ ++ /* Note the cctl divide to 5 group and share the same name table. */ ++ if (op_value < 0 || op_value > 4) ++ error ("CCTL intrinsic function subtype out of range!"); ++ fprintf (stream, "%s", nds32_cctl_names[op_value + 8]); ++ return; ++ ++ case 'X': /* cctl idxread */ ++ /* Note the cctl divide to 5 group and share the same name table. */ ++ if (op_value < 0 || op_value > 4) ++ error ("CCTL intrinsic function subtype out of range!"); ++ fprintf (stream, "%s", nds32_cctl_names[op_value + 12]); ++ return; ++ ++ case 'W': /* cctl idxwitre */ ++ /* Note the cctl divide to 5 group and share the same name table. */ ++ if (op_value < 0 || op_value > 4) ++ error ("CCTL intrinsic function subtype out of range!"); ++ fprintf (stream, "%s", nds32_cctl_names[op_value + 16]); ++ return; ++ ++ case 'Z': /* dpref */ ++ fprintf (stream, "%s", nds32_dpref_names[op_value]); ++ return; ++ + default : + /* Unknown flag. */ + output_operand_lossage ("invalid operand output code"); +@@ -2295,35 +3779,113 @@ nds32_print_operand (FILE *stream, rtx x, int code) + switch (GET_CODE (x)) + { + case LABEL_REF: ++ output_addr_const (stream, x); ++ break; ++ + case SYMBOL_REF: + output_addr_const (stream, x); ++ ++ if (!TARGET_LINUX_ABI && nds32_indirect_call_referenced_p (x)) ++ fprintf (stream, "@ICT"); ++ + break; + + case REG: ++ /* Print a Double-precision register name. */ ++ if ((GET_MODE (x) == DImode || GET_MODE (x) == DFmode) ++ && NDS32_IS_FPR_REGNUM (REGNO (x))) ++ { ++ regno = REGNO (x); ++ if (!NDS32_FPR_REGNO_OK_FOR_DOUBLE (regno)) ++ { ++ output_operand_lossage ("invalid operand for code '%c'", code); ++ break; ++ } ++ fprintf (stream, "$fd%d", (regno - NDS32_FIRST_FPR_REGNUM) >> 1); ++ break; ++ } ++ ++ /* Print LSB or MSB part of register pair if the ++ constraint modifier 'L' or 'H' is specified. */ ++ if ((GET_MODE (x) == DImode || GET_MODE (x) == DFmode) ++ && NDS32_IS_GPR_REGNUM (REGNO (x))) ++ { ++ if ((pick_lsb_p && WORDS_BIG_ENDIAN) ++ || (pick_msb_p && !WORDS_BIG_ENDIAN)) ++ { ++ /* If we would like to print out LSB register under big-endian, ++ or print out MSB register under little-endian, we need to ++ increase register number. */ ++ regno = REGNO (x); ++ regno++; ++ fputs (reg_names[regno], stream); ++ break; ++ } ++ } ++ + /* Forbid using static chain register ($r16) +- on reduced-set registers configuration. */ ++ on reduced-set registers configuration. */ + if (TARGET_REDUCED_REGS + && REGNO (x) == STATIC_CHAIN_REGNUM) + sorry ("a nested function is not supported for reduced registers"); + + /* Normal cases, print out register name. */ +- fputs (reg_names[REGNO (x)], stream); ++ regno = REGNO (x); ++ fputs (reg_names[regno], stream); + break; + + case MEM: + output_address (GET_MODE (x), XEXP (x, 0)); + break; + ++ case HIGH: ++ if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE) ++ { ++ const REAL_VALUE_TYPE *rv; ++ long val; ++ gcc_assert (GET_MODE (x) == SFmode); ++ ++ rv = CONST_DOUBLE_REAL_VALUE (XEXP (x, 0)); ++ REAL_VALUE_TO_TARGET_SINGLE (*rv, val); ++ ++ fprintf (stream, "hi20(0x%lx)", val); ++ } ++ else ++ gcc_unreachable (); ++ break; ++ ++ case CONST_DOUBLE: ++ const REAL_VALUE_TYPE *rv; ++ long val; ++ gcc_assert (GET_MODE (x) == SFmode); ++ ++ rv = CONST_DOUBLE_REAL_VALUE (x); ++ REAL_VALUE_TO_TARGET_SINGLE (*rv, val); ++ ++ fprintf (stream, "0x%lx", val); ++ break; ++ + case CODE_LABEL: + case CONST_INT: + case CONST: + output_addr_const (stream, x); + break; + ++ case CONST_VECTOR: ++ fprintf (stream, HOST_WIDE_INT_PRINT_HEX, const_vector_to_hwint (x)); ++ break; ++ ++ case LO_SUM: ++ /* This is a special case for inline assembly using memory address 'p'. ++ The inline assembly code is expected to use pesudo instruction ++ for the operand. EX: la */ ++ output_addr_const (stream, XEXP(x, 1)); ++ break; ++ + default: + /* Generally, output_addr_const () is able to handle most cases. +- We want to see what CODE could appear, +- so we use gcc_unreachable() to stop it. */ ++ We want to see what CODE could appear, ++ so we use gcc_unreachable() to stop it. */ + debug_rtx (x); + gcc_unreachable (); + break; +@@ -2331,7 +3893,9 @@ nds32_print_operand (FILE *stream, rtx x, int code) + } + + static void +-nds32_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x) ++nds32_print_operand_address (FILE *stream, ++ machine_mode mode ATTRIBUTE_UNUSED, ++ rtx x) + { + rtx op0, op1; + +@@ -2346,15 +3910,25 @@ nds32_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x) + fputs ("]", stream); + break; + ++ case LO_SUM: ++ /* This is a special case for inline assembly using memory operand 'm'. ++ The inline assembly code is expected to use pesudo instruction ++ for the operand. EX: [ls].[bhw] */ ++ fputs ("[ + ", stream); ++ op1 = XEXP (x, 1); ++ output_addr_const (stream, op1); ++ fputs ("]", stream); ++ break; ++ + case REG: + /* Forbid using static chain register ($r16) +- on reduced-set registers configuration. */ ++ on reduced-set registers configuration. */ + if (TARGET_REDUCED_REGS + && REGNO (x) == STATIC_CHAIN_REGNUM) + sorry ("a nested function is not supported for reduced registers"); + + /* [Ra] */ +- fprintf (stream, "[%s]", reg_names[REGNO (x)]); ++ fprintf (stream, "[%s + 0]", reg_names[REGNO (x)]); + break; + + case PLUS: +@@ -2362,13 +3936,13 @@ nds32_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x) + op1 = XEXP (x, 1); + + /* Checking op0, forbid using static chain register ($r16) +- on reduced-set registers configuration. */ ++ on reduced-set registers configuration. */ + if (TARGET_REDUCED_REGS + && REG_P (op0) + && REGNO (op0) == STATIC_CHAIN_REGNUM) + sorry ("a nested function is not supported for reduced registers"); + /* Checking op1, forbid using static chain register ($r16) +- on reduced-set registers configuration. */ ++ on reduced-set registers configuration. */ + if (TARGET_REDUCED_REGS + && REG_P (op1) + && REGNO (op1) == STATIC_CHAIN_REGNUM) +@@ -2377,8 +3951,8 @@ nds32_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x) + if (REG_P (op0) && CONST_INT_P (op1)) + { + /* [Ra + imm] */ +- fprintf (stream, "[%s + (%d)]", +- reg_names[REGNO (op0)], (int)INTVAL (op1)); ++ fprintf (stream, "[%s + (" HOST_WIDE_INT_PRINT_DEC ")]", ++ reg_names[REGNO (op0)], INTVAL (op1)); + } + else if (REG_P (op0) && REG_P (op1)) + { +@@ -2391,8 +3965,8 @@ nds32_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x) + /* [Ra + Rb << sv] + From observation, the pattern looks like: + (plus:SI (mult:SI (reg:SI 58) +- (const_int 4 [0x4])) +- (reg/f:SI 57)) */ ++ (const_int 4 [0x4])) ++ (reg/f:SI 57)) */ + int sv; + + /* We need to set sv to output shift value. */ +@@ -2402,6 +3976,8 @@ nds32_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x) + sv = 1; + else if (INTVAL (XEXP (op0, 1)) == 4) + sv = 2; ++ else if (INTVAL (XEXP (op0, 1)) == 8) ++ sv = 3; + else + gcc_unreachable (); + +@@ -2410,6 +3986,20 @@ nds32_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x) + reg_names[REGNO (XEXP (op0, 0))], + sv); + } ++ else if (GET_CODE (op0) == ASHIFT && REG_P (op1)) ++ { ++ /* [Ra + Rb << sv] ++ In normal, ASHIFT can be converted to MULT like above case. ++ But when the address rtx does not go through canonicalize_address ++ defined in fwprop, we'll need this case. */ ++ int sv = INTVAL (XEXP (op0, 1)); ++ gcc_assert (sv <= 3 && sv >=0); ++ ++ fprintf (stream, "[%s + %s << %d]", ++ reg_names[REGNO (op1)], ++ reg_names[REGNO (XEXP (op0, 0))], ++ sv); ++ } + else + { + /* The control flow is not supposed to be here. */ +@@ -2421,20 +4011,20 @@ nds32_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x) + + case POST_MODIFY: + /* (post_modify (regA) (plus (regA) (regB))) +- (post_modify (regA) (plus (regA) (const_int))) +- We would like to extract +- regA and regB (or const_int) from plus rtx. */ ++ (post_modify (regA) (plus (regA) (const_int))) ++ We would like to extract ++ regA and regB (or const_int) from plus rtx. */ + op0 = XEXP (XEXP (x, 1), 0); + op1 = XEXP (XEXP (x, 1), 1); + + /* Checking op0, forbid using static chain register ($r16) +- on reduced-set registers configuration. */ ++ on reduced-set registers configuration. */ + if (TARGET_REDUCED_REGS + && REG_P (op0) + && REGNO (op0) == STATIC_CHAIN_REGNUM) + sorry ("a nested function is not supported for reduced registers"); + /* Checking op1, forbid using static chain register ($r16) +- on reduced-set registers configuration. */ ++ on reduced-set registers configuration. */ + if (TARGET_REDUCED_REGS + && REG_P (op1) + && REGNO (op1) == STATIC_CHAIN_REGNUM) +@@ -2449,8 +4039,8 @@ nds32_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x) + else if (REG_P (op0) && CONST_INT_P (op1)) + { + /* [Ra], imm */ +- fprintf (stream, "[%s], %d", +- reg_names[REGNO (op0)], (int)INTVAL (op1)); ++ fprintf (stream, "[%s], " HOST_WIDE_INT_PRINT_DEC, ++ reg_names[REGNO (op0)], INTVAL (op1)); + } + else + { +@@ -2466,7 +4056,7 @@ nds32_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x) + op0 = XEXP (x, 0); + + /* Checking op0, forbid using static chain register ($r16) +- on reduced-set registers configuration. */ ++ on reduced-set registers configuration. */ + if (TARGET_REDUCED_REGS + && REG_P (op0) + && REGNO (op0) == STATIC_CHAIN_REGNUM) +@@ -2490,14 +4080,92 @@ nds32_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x) + + default : + /* Generally, output_addr_const () is able to handle most cases. +- We want to see what CODE could appear, +- so we use gcc_unreachable() to stop it. */ ++ We want to see what CODE could appear, ++ so we use gcc_unreachable() to stop it. */ + debug_rtx (x); + gcc_unreachable (); + break; + } + } + ++/* -- Assembler Commands for Exception Regions. */ ++ ++static rtx ++nds32_dwarf_register_span (rtx reg) ++{ ++ rtx dwarf_high, dwarf_low; ++ rtx dwarf_single; ++ enum machine_mode mode; ++ int regno; ++ ++ mode = GET_MODE (reg); ++ regno = REGNO (reg); ++ ++ /* We need to adjust dwarf register information for floating-point registers ++ rather than using default register number mapping. */ ++ if (regno >= NDS32_FIRST_FPR_REGNUM ++ && regno <= NDS32_LAST_FPR_REGNUM) ++ { ++ if (mode == DFmode || mode == SCmode) ++ { ++ /* By default, GCC maps increasing register numbers to increasing ++ memory locations, but paired FPRs in NDS32 target are always ++ big-endian, i.e.: ++ ++ fd0 : fs0 fs1 ++ (MSB) (LSB) ++ ++ We must return parallel rtx to represent such layout. */ ++ dwarf_high = gen_rtx_REG (word_mode, regno); ++ dwarf_low = gen_rtx_REG (word_mode, regno + 1); ++ return gen_rtx_PARALLEL (VOIDmode, ++ gen_rtvec (2, dwarf_low, dwarf_high)); ++ } ++ else if (mode == DCmode) ++ { ++ rtx dwarf_high_re = gen_rtx_REG (word_mode, regno); ++ rtx dwarf_low_re = gen_rtx_REG (word_mode, regno + 1); ++ rtx dwarf_high_im = gen_rtx_REG (word_mode, regno); ++ rtx dwarf_low_im = gen_rtx_REG (word_mode, regno + 1); ++ return gen_rtx_PARALLEL (VOIDmode, ++ gen_rtvec (4, dwarf_low_re, dwarf_high_re, ++ dwarf_high_im, dwarf_low_im)); ++ } ++ else if (mode == SFmode || mode == SImode) ++ { ++ /* Create new dwarf information with adjusted register number. */ ++ dwarf_single = gen_rtx_REG (word_mode, regno); ++ return gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, dwarf_single)); ++ } ++ else ++ { ++ /* We should not be here. */ ++ gcc_unreachable (); ++ } ++ } ++ ++ return NULL_RTX; ++} ++ ++/* Map internal gcc register numbers to DWARF2 register numbers. */ ++ ++unsigned int ++nds32_dbx_register_number (unsigned int regno) ++{ ++ /* The nds32 port in GDB maintains a mapping between dwarf register ++ number and displayed register name. For backward compatibility to ++ previous toolchain, currently our gdb still has four registers ++ (d0.l, d0.h, d1.l, and d1.h) between GPR and FPR while compiler ++ does not count those four registers in its register number table. ++ So we have to add 4 on its register number and then create new ++ dwarf information. Hopefully we can discard such workaround ++ in the future. */ ++ if (NDS32_IS_FPR_REGNUM (regno)) ++ return regno + 4; ++ ++ return regno; ++} ++ + + /* Defining target-specific uses of __attribute__. */ + +@@ -2526,6 +4194,27 @@ nds32_merge_decl_attributes (tree olddecl, tree newdecl) + static void + nds32_insert_attributes (tree decl, tree *attributes) + { ++ /* A "indirect_call" function attribute implies "noinline" and "noclone" ++ for elf toolchain to support ROM patch mechanism. */ ++ if (TREE_CODE (decl) == FUNCTION_DECL ++ && lookup_attribute ("indirect_call", *attributes) != NULL) ++ { ++ tree new_attrs = *attributes; ++ ++ if (TARGET_LINUX_ABI) ++ error("cannot use indirect_call attribute under linux toolchain"); ++ ++ if (lookup_attribute ("noinline", new_attrs) == NULL) ++ new_attrs = tree_cons (get_identifier ("noinline"), NULL, new_attrs); ++ if (lookup_attribute ("noclone", new_attrs) == NULL) ++ new_attrs = tree_cons (get_identifier ("noclone"), NULL, new_attrs); ++ ++ if (!TREE_PUBLIC (decl)) ++ error("indirect_call attribute can't apply for static function"); ++ ++ *attributes = new_attrs; ++ } ++ + /* For function declaration, we need to check isr-specific attributes: + 1. Call nds32_check_isr_attrs_conflict() to check any conflict. + 2. Check valid integer value for interrupt/exception. +@@ -2543,14 +4232,46 @@ nds32_insert_attributes (tree decl, tree *attributes) + nds32_check_isr_attrs_conflict (decl, func_attrs); + + /* Now we are starting to check valid id value +- for interrupt/exception/reset. +- Note that we ONLY check its validity here. +- To construct isr vector information, it is still performed +- by nds32_construct_isr_vectors_information(). */ ++ for interrupt/exception/reset. ++ Note that we ONLY check its validity here. ++ To construct isr vector information, it is still performed ++ by nds32_construct_isr_vectors_information(). */ + intr = lookup_attribute ("interrupt", func_attrs); + excp = lookup_attribute ("exception", func_attrs); + reset = lookup_attribute ("reset", func_attrs); + ++ /* The following code may use attribute arguments. If there is no ++ argument from source code, it will cause segmentation fault. ++ Therefore, return dircetly and report error message later. */ ++ if ((intr && TREE_VALUE (intr) == NULL) ++ || (excp && TREE_VALUE (excp) == NULL) ++ || (reset && TREE_VALUE (reset) == NULL)) ++ return; ++ ++ /* ------------------------------------------------------------- */ ++ /* FIXME: ++ FOR BACKWARD COMPATIBILITY, we need to support following patterns: ++ ++ __attribute__((interrupt("XXX;YYY;id=ZZZ"))) ++ __attribute__((exception("XXX;YYY;id=ZZZ"))) ++ __attribute__((reset("vectors=XXX;nmi_func=YYY;warm_func=ZZZ"))) ++ ++ If interrupt/exception/reset appears and its argument is a ++ STRING_CST, we will use other functions to parse string in the ++ nds32_construct_isr_vectors_information() and then set necessary ++ isr information in the nds32_isr_vectors[] array. Here we can ++ just return immediately to avoid new-syntax checking. */ ++ if (intr != NULL_TREE ++ && TREE_CODE (TREE_VALUE (TREE_VALUE (intr))) == STRING_CST) ++ return; ++ if (excp != NULL_TREE ++ && TREE_CODE (TREE_VALUE (TREE_VALUE (excp))) == STRING_CST) ++ return; ++ if (reset != NULL_TREE ++ && TREE_CODE (TREE_VALUE (TREE_VALUE (reset))) == STRING_CST) ++ return; ++ /* ------------------------------------------------------------- */ ++ + if (intr || excp) + { + /* Deal with interrupt/exception. */ +@@ -2576,8 +4297,8 @@ nds32_insert_attributes (tree decl, tree *attributes) + id = TREE_VALUE (id_list); + /* Issue error if it is not a valid integer value. */ + if (TREE_CODE (id) != INTEGER_CST +- || wi::ltu_p (id, lower_bound) +- || wi::gtu_p (id, upper_bound)) ++ || TREE_INT_CST_LOW (id) < lower_bound ++ || TREE_INT_CST_LOW (id) > upper_bound) + error ("invalid id value for interrupt/exception attribute"); + + /* Advance to next id. */ +@@ -2604,8 +4325,8 @@ nds32_insert_attributes (tree decl, tree *attributes) + + /* 3. Check valid integer value for reset. */ + if (TREE_CODE (id) != INTEGER_CST +- || wi::ltu_p (id, lower_bound) +- || wi::gtu_p (id, upper_bound)) ++ || TREE_INT_CST_LOW (id) < lower_bound ++ || TREE_INT_CST_LOW (id) > upper_bound) + error ("invalid id value for reset attribute"); + + /* 4. Check valid function for nmi/warm. */ +@@ -2667,17 +4388,40 @@ nds32_option_override (void) + { + /* Under V2 ISA, we need to strictly disable TARGET_V3PUSH. */ + target_flags &= ~MASK_V3PUSH; ++ /* Under V2 ISA, we need to strictly disable TARGET_IFC. */ ++ target_flags &= ~MASK_IFC; ++ /* Under V2 ISA, we need to strictly disable TARGET_EX9. */ ++ target_flags &= ~MASK_EX9; ++ /* If this is ARCH_V2J, we need to enable TARGET_REDUCED_REGS. */ ++ if (nds32_arch_option == ARCH_V2J) ++ target_flags |= MASK_REDUCED_REGS; + } + if (TARGET_ISA_V3) + { +- /* Under V3 ISA, currently nothing should be strictly set. */ ++ /* If this is ARCH_V3J, we need to enable TARGET_REDUCED_REGS. */ ++ if (nds32_arch_option == ARCH_V3J) ++ target_flags |= MASK_REDUCED_REGS; + } + if (TARGET_ISA_V3M) + { + /* Under V3M ISA, we need to strictly enable TARGET_REDUCED_REGS. */ + target_flags |= MASK_REDUCED_REGS; +- /* Under V3M ISA, we need to strictly disable TARGET_PERF_EXT. */ +- target_flags &= ~MASK_PERF_EXT; ++ if (nds32_arch_option != ARCH_V3M_PLUS) ++ { ++ /* Under V3M ISA, we need to strictly disable TARGET_IFC. */ ++ target_flags &= ~MASK_IFC; ++ /* Under V3M ISA, we need to strictly disable TARGET_EX9. */ ++ target_flags &= ~MASK_EX9; ++ } ++ /* Under V3M ISA, we need to strictly disable TARGET_EXT_PERF. */ ++ target_flags &= ~MASK_EXT_PERF; ++ /* Under V3M ISA, we need to strictly disable TARGET_EXT_PERF2. */ ++ target_flags &= ~MASK_EXT_PERF2; ++ /* Under V3M ISA, we need to strictly disable TARGET_EXT_STRING. */ ++ target_flags &= ~MASK_EXT_STRING; ++ ++ if (flag_pic) ++ error ("not support -fpic option for v3m toolchain"); + } + + /* See if we are using reduced-set registers: +@@ -2688,48 +4432,568 @@ nds32_option_override (void) + int r; + + /* Prevent register allocator from +- choosing it as doing register allocation. */ ++ choosing it as doing register allocation. */ + for (r = 11; r <= 14; r++) + fixed_regs[r] = call_used_regs[r] = 1; + for (r = 16; r <= 27; r++) + fixed_regs[r] = call_used_regs[r] = 1; + } + ++ /* See if user explicitly would like to use fp-as-gp optimization. ++ If so, we must prevent $fp from being allocated ++ during register allocation. */ ++ if (TARGET_FORCE_FP_AS_GP) ++ fixed_regs[FP_REGNUM] = call_used_regs[FP_REGNUM] = 1; ++ + if (!TARGET_16_BIT) + { + /* Under no 16 bit ISA, we need to strictly disable TARGET_V3PUSH. */ + target_flags &= ~MASK_V3PUSH; + } + +- /* Currently, we don't support PIC code generation yet. */ +- if (flag_pic) +- sorry ("not support -fpic"); ++ if (TARGET_HARD_FLOAT && !(TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE)) ++ { ++ if (nds32_arch_option == ARCH_V3S || nds32_arch_option == ARCH_V3F) ++ error ("Disable FPU ISA, " ++ "the ABI option must be enable '-mfloat-abi=soft'"); ++ else ++ error ("'-mabi=2fp+' option only support when FPU available, " ++ "must be enable '-mext-fpu-sp' or '-mext-fpu-dp'"); ++ } ++ ++ nds32_register_passes (); ++ ++ nds32_init_rtx_costs (); + } + + + /* Miscellaneous Parameters. */ + ++static rtx_insn * ++nds32_md_asm_adjust (vec<rtx> &outputs ATTRIBUTE_UNUSED, ++ vec<rtx> &inputs ATTRIBUTE_UNUSED, ++ vec<const char *> &constraints ATTRIBUTE_UNUSED, ++ vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs) ++{ ++ clobbers.safe_push (gen_rtx_REG (SImode, TA_REGNUM)); ++ SET_HARD_REG_BIT (clobbered_regs, TA_REGNUM); ++ return NULL; ++} ++/* Insert end_label and check loop body whether is empty. */ ++static bool ++nds32_hwloop_insert_end_label (rtx loop_id, rtx end_label) ++{ ++ rtx_insn *insn = NULL; ++ basic_block bb; ++ rtx cfg_id; ++ rtx_insn *last_insn; ++ ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ FOR_BB_INSNS (bb, insn) ++ { ++ if (NOTE_P (insn)) ++ continue; ++ ++ if (recog_memoized (insn) == CODE_FOR_hwloop_cfg ++ && INSN_P (insn)) ++ { ++ cfg_id = XVECEXP (XVECEXP (PATTERN (insn), 0, 5), 0, 0); ++ if (cfg_id == loop_id) ++ { ++ for (last_insn = PREV_INSN (insn); last_insn != BB_HEAD (bb); ++ last_insn = PREV_INSN (last_insn)) ++ { ++ if (NONDEBUG_INSN_P (last_insn)) ++ { ++ emit_label_before (end_label, last_insn); ++ if (TARGET_IFC) ++ { ++ /* The last_insn don't do ifcall. */ ++ emit_insn_before (gen_no_ifc_begin (), last_insn); ++ emit_insn_after (gen_no_ifc_end (), last_insn); ++ } ++ if (TARGET_EX9) ++ { ++ /* The last_insn don't do ex9. */ ++ emit_insn_before (gen_no_ex9_begin (), last_insn); ++ emit_insn_after (gen_no_ex9_end (), last_insn); ++ } ++ /* Record last instruction for identify in relax pass. */ ++ emit_insn_after (gen_hwloop_last_insn (), last_insn); ++ return true; ++ } ++ } ++ ++ if (NOTE_INSN_BASIC_BLOCK_P (last_insn)) ++ { ++ rtx_insn *nop = emit_insn_before (gen_unspec_nop (), ++ last_insn); ++ emit_label_before (end_label, nop); ++ if (TARGET_IFC) ++ { ++ /* The last_insn don't do ifcall. */ ++ emit_insn_before (gen_no_ifc_begin (), last_insn); ++ emit_insn_after (gen_no_ifc_end (), last_insn); ++ } ++ if (TARGET_EX9) ++ { ++ /* The last_insn don't do ex9. */ ++ emit_insn_before (gen_no_ex9_begin (), last_insn); ++ emit_insn_after (gen_no_ex9_end (), last_insn); ++ } ++ return true; ++ } ++ } ++ } ++ } ++ } ++ ++ if (insn != NULL) ++ delete_insn (insn); ++ return false; ++} ++ ++static void ++nds32_hwloop_remove (rtx loop_id) ++{ ++ rtx_insn *insn; ++ rtx le_id; ++ basic_block bb; ++ ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ FOR_BB_INSNS (bb, insn) ++ { ++ if (NOTE_P (insn)) ++ continue; ++ ++ if (recog_memoized (insn) == CODE_FOR_init_lc ++ && INSN_P (insn)) ++ { ++ le_id = XVECEXP (XVECEXP (PATTERN (insn), 0, 1), 0, 0); ++ if (loop_id == le_id) ++ { ++ delete_insn (insn); ++ return; ++ } ++ } ++ } ++ } ++} ++ ++/* Insert isb instruction for hwloop. */ ++static void ++nds32_hwloop_insert_isb (rtx loop_id) ++{ ++ rtx_insn *insn; ++ rtx le_id; ++ basic_block bb; ++ ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ FOR_BB_INSNS (bb, insn) ++ { ++ if (NOTE_P (insn)) ++ continue; ++ ++ if (recog_memoized (insn) == CODE_FOR_init_lc ++ && INSN_P (insn)) ++ { ++ le_id = XVECEXP (XVECEXP (PATTERN (insn), 0, 1), 0, 0); ++ if (loop_id == le_id) ++ { ++ emit_insn_after (gen_unspec_volatile_isb (), insn); ++ return; ++ } ++ } ++ } ++ } ++} ++/* Insert mtlei instruction for hwloop. */ ++static void ++nds32_hwloop_insert_init_end () ++{ ++ rtx_insn *insn; ++ basic_block bb; ++ rtx loop_id, end_label; ++ bool hwloop_p; ++ ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ FOR_BB_INSNS (bb, insn) ++ { ++ if (NOTE_P (insn)) ++ continue; ++ ++ if (recog_memoized (insn) == CODE_FOR_mtlbi_hint ++ && INSN_P (insn)) ++ { ++ end_label = gen_label_rtx (); ++ loop_id = XVECEXP (XVECEXP (PATTERN (insn), 0, 1), 0, 0); ++ hwloop_p = nds32_hwloop_insert_end_label (loop_id, end_label); ++ ++ if (!hwloop_p) ++ { ++ delete_insn (insn); ++ nds32_hwloop_remove (loop_id); ++ } ++ else ++ { ++ emit_insn_after (gen_mtlei (gen_rtx_LABEL_REF (Pmode, end_label)), insn); ++ nds32_hwloop_insert_isb (loop_id); ++ } ++ } ++ } ++ } ++} ++ ++/* Reorganize insns issued at the same cycle in out of order. */ ++static void ++nds32_reorg_out_of_order () ++{ ++ using namespace nds32; ++ ++ // The function is controoled by -mreorg-out-of-order and the issue rate. ++ if (!flag_reorg_out_of_order ++ || nds32_sched_issue_rate () < 2) ++ return; ++ ++ // We only move load insns up at this moment. ++ rtx_insn *insn; ++ ++ for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) ++ { ++ if (!insn_executable_p (insn) ++ || GET_MODE (insn) != TImode ++ || get_attr_type (insn) == TYPE_STORE_MULTIPLE ++ || get_attr_type (insn) == TYPE_LOAD_MULTIPLE ++ || get_attr_type (insn) == TYPE_LOAD ++ || get_attr_type (insn) == TYPE_FLOAD ++ || get_attr_type (insn) == TYPE_STORE ++ || get_attr_type (insn) == TYPE_FSTORE) ++ continue; ++ ++ rtx_insn *load_insn = insn; ++ ++ while ((load_insn = next_executable_insn_local (load_insn))) ++ { ++ if (GET_MODE (load_insn) == TImode) ++ { ++ load_insn = NULL; ++ break; ++ } ++ ++ if ((get_attr_type (load_insn) == TYPE_LOAD ++ || get_attr_type (load_insn) == TYPE_FLOAD) ++ && get_attr_length (load_insn) < 4) ++ break; ++ } ++ ++ if (load_insn == NULL_RTX) ++ continue; ++ ++ exchange_insns (insn, load_insn); ++ } ++} ++ ++/* Perform machine-dependent processing. */ ++static void ++nds32_machine_dependent_reorg (void) ++{ ++ /* We are freeing block_for_insn in the toplev to keep compatibility ++ with old MDEP_REORGS that are not CFG based. Recompute it ++ now. */ ++ compute_bb_for_insn (); ++ ++ nds32_reorg_out_of_order (); ++ ++ if (TARGET_HWLOOP) ++ nds32_hwloop_insert_init_end (); ++ ++ if (flag_var_tracking) ++ { ++ df_analyze (); ++ timevar_push (TV_VAR_TRACKING); ++ variable_tracking_main (); ++ timevar_pop (TV_VAR_TRACKING); ++ df_finish_pass (false); ++ } ++ ++ /* Use -minnermost-loop to enable, ++ need more testing to verify result. */ ++ if (TARGET_INNERMOST_LOOP) ++ nds32_insert_innermost_loop (); ++ ++ nds32_insert_isps (); ++} ++ + static void + nds32_init_builtins (void) + { + nds32_init_builtins_impl (); + } + ++static tree ++nds32_builtin_decl (unsigned code, bool initialize_p) ++{ ++ /* Implement in nds32-intrinsic.c. */ ++ return nds32_builtin_decl_impl (code, initialize_p); ++} ++ + static rtx + nds32_expand_builtin (tree exp, + rtx target, + rtx subtarget, +- machine_mode mode, ++ enum machine_mode mode, + int ignore) + { ++ /* Implement in nds32-intrinsic.c. */ + return nds32_expand_builtin_impl (exp, target, subtarget, mode, ignore); + } + ++static bool ++nds32_have_conditional_execution (void) ++{ ++ /* Lie to gcc that we have conditional execution for change optimization flow ++ in if-conversion, LRA and scheduling phase. ++ In our experiment result show that cand reduce about 2% code size with very ++ minor performance degradation in average. */ ++ return optimize_size; ++} ++ ++/* Implement TARGET_INIT_LIBFUNCS. */ ++static void ++nds32_init_libfuncs (void) ++{ ++ if (TARGET_LINUX_ABI) ++ init_sync_libfuncs (UNITS_PER_WORD); ++} ++ ++/* Implement TARGET_CAN_USE_DOLOOP_P. */ ++static bool ++nds32_can_use_doloop_p (const widest_int &, const widest_int &iterations_max, ++ unsigned int, bool entered_at_top) ++{ ++ /* Using hwloop must be entered from the top. */ ++ if (!entered_at_top) ++ return false; ++ ++ if (lookup_attribute ("no_ext_zol", DECL_ATTRIBUTES (current_function_decl))) ++ return false; ++ ++ /* Initial hardware loops too costly, so we must avoid to ++ generate a hardware loops when loop count less then 8. */ ++ if (!NDS32_HW_LOOP_P () ++ || iterations_max.ulow() < 8) ++ return false; ++ return true; ++} ++ ++/* NULL if INSN insn is valid within a low-overhead loop. ++ Otherwise return why doloop cannot be applied. */ ++static const char * ++nds32_invalid_within_doloop (const rtx_insn *insn) ++{ ++ if (CALL_P (insn)) ++ return "Function call in the loop."; ++ else if (INSN_CODE (insn) == CODE_FOR_pop25return ++ || INSN_CODE (insn) == CODE_FOR_return_internal) ++ return "Simple return in the loop."; ++ else if (INSN_CODE (insn) == CODE_FOR_unspec_no_hwloop) ++ return "no_hwloop hint in the loop"; ++ ++ return NULL; ++} + + /* ------------------------------------------------------------------------ */ + +-/* PART 4: Implemet extern function definitions, +- the prototype is in nds32-protos.h. */ ++/* PART 5: Implemet extern function definitions, ++ the prototype is in nds32-protos.h. */ ++ ++/* Run-time Target Specification. */ ++ ++void ++nds32_cpu_cpp_builtins(struct cpp_reader *pfile) ++{ ++#define builtin_define(TXT) cpp_define (pfile, TXT) ++#define builtin_assert(TXT) cpp_assert (pfile, TXT) ++ builtin_define ("__nds32__"); ++ builtin_define ("__NDS32__"); ++ ++ /* We need to provide builtin macro to describe the size of ++ each vector for interrupt handler under elf toolchain. */ ++ if (!TARGET_LINUX_ABI) ++ { ++ if (TARGET_ISR_VECTOR_SIZE_4_BYTE) ++ builtin_define ("__NDS32_ISR_VECTOR_SIZE_4__"); ++ else ++ builtin_define ("__NDS32_ISR_VECTOR_SIZE_16__"); ++ } ++ ++ if (TARGET_HARD_FLOAT) ++ builtin_define ("__NDS32_ABI_2FP_PLUS__"); ++ else ++ builtin_define ("__NDS32_ABI_2__"); ++ ++ if (TARGET_ISA_V2) ++ builtin_define ("__NDS32_ISA_V2__"); ++ if (TARGET_ISA_V3) ++ builtin_define ("__NDS32_ISA_V3__"); ++ if (TARGET_ISA_V3M) ++ builtin_define ("__NDS32_ISA_V3M__"); ++ ++ if (TARGET_FPU_SINGLE) ++ builtin_define ("__NDS32_EXT_FPU_SP__"); ++ if (TARGET_FPU_DOUBLE) ++ builtin_define ("__NDS32_EXT_FPU_DP__"); ++ ++ if (TARGET_EXT_FPU_FMA) ++ builtin_define ("__NDS32_EXT_FPU_FMA__"); ++ if (NDS32_EXT_FPU_DOT_E) ++ builtin_define ("__NDS32_EXT_FPU_DOT_E__"); ++ if (TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE) ++ { ++ switch (nds32_fp_regnum) ++ { ++ case 0: ++ case 4: ++ builtin_define ("__NDS32_EXT_FPU_CONFIG_0__"); ++ break; ++ case 1: ++ case 5: ++ builtin_define ("__NDS32_EXT_FPU_CONFIG_1__"); ++ break; ++ case 2: ++ case 6: ++ builtin_define ("__NDS32_EXT_FPU_CONFIG_2__"); ++ break; ++ case 3: ++ case 7: ++ builtin_define ("__NDS32_EXT_FPU_CONFIG_3__"); ++ break; ++ default: ++ abort (); ++ } ++ } ++ ++ if (TARGET_BIG_ENDIAN) ++ builtin_define ("__NDS32_EB__"); ++ else ++ builtin_define ("__NDS32_EL__"); ++ ++ if (TARGET_REDUCED_REGS) ++ builtin_define ("__NDS32_REDUCED_REGS__"); ++ if (TARGET_CMOV) ++ builtin_define ("__NDS32_CMOV__"); ++ if (TARGET_EXT_PERF) ++ builtin_define ("__NDS32_EXT_PERF__"); ++ if (TARGET_EXT_PERF2) ++ builtin_define ("__NDS32_EXT_PERF2__"); ++ if (TARGET_EXT_STRING) ++ builtin_define ("__NDS32_EXT_STRING__"); ++ if (TARGET_16_BIT) ++ builtin_define ("__NDS32_16_BIT__"); ++ if (TARGET_GP_DIRECT) ++ builtin_define ("__NDS32_GP_DIRECT__"); ++ if (TARGET_VH) ++ builtin_define ("__NDS32_VH__"); ++ if (NDS32_EXT_DSP_P ()) ++ builtin_define ("__NDS32_EXT_DSP__"); ++ if (NDS32_HW_LOOP_P ()) ++ builtin_define ("__NDS32_EXT_ZOL__"); ++ ++ /* Extra builtin macros. */ ++ if (TARGET_ISA_V3 || TARGET_ISA_V3M_PLUS) ++ builtin_define ("__NDS32_EXT_IFC__"); ++ if (TARGET_ISA_V3 || TARGET_ISA_V3M_PLUS) ++ builtin_define ("__NDS32_EXT_EX9__"); ++ if (TARGET_BIG_ENDIAN) ++ builtin_define ("__big_endian__"); ++ ++ builtin_assert ("cpu=nds32"); ++ builtin_assert ("machine=nds32"); ++ ++ /* FOR BACKWARD COMPATIBILITY. */ ++ if (TARGET_ISA_V2) ++ builtin_define ("__NDS32_BASELINE_V2__"); ++ if (TARGET_ISA_V3) ++ builtin_define ("__NDS32_BASELINE_V3__"); ++ if (TARGET_ISA_V3M) ++ builtin_define ("__NDS32_BASELINE_V3M__"); ++ if (TARGET_REDUCED_REGS) ++ builtin_define ("__NDS32_REDUCE_REGS__"); ++ ++ if (TARGET_ISA_V2) ++ builtin_define ("NDS32_BASELINE_V2"); ++ if (TARGET_ISA_V3) ++ builtin_define ("NDS32_BASELINE_V3"); ++ if (TARGET_ISA_V3M) ++ builtin_define ("NDS32_BASELINE_V3M"); ++ if (TARGET_REDUCED_REGS) ++ builtin_define ("NDS32_REDUCE_REGS"); ++ if (TARGET_FPU_SINGLE) ++ builtin_define ("NDS32_EXT_FPU_SP"); ++ if (TARGET_FPU_DOUBLE) ++ builtin_define ("NDS32_EXT_FPU_DP"); ++ if (TARGET_EXT_PERF) ++ builtin_define ("NDS32_EXT_PERF"); ++ if (TARGET_EXT_PERF2) ++ builtin_define ("NDS32_EXT_PERF2"); ++ if (TARGET_EXT_STRING) ++ builtin_define ("NDS32_EXT_STRING"); ++ if (TARGET_ISA_V3) ++ builtin_define ("NDS32_EXT_IFC"); ++ if (TARGET_ISA_V3) ++ builtin_define ("NDS32_EXT_EX9"); ++ ++ if (TARGET_HARD_FLOAT) ++ builtin_define ("NDS32_ABI_2FP_PLUS"); ++ else ++ builtin_define ("NDS32_ABI_2"); ++ ++ if (TARGET_BIG_ENDIAN) ++ builtin_define ("NDS32_EB"); ++ else ++ builtin_define ("NDS32_EL"); ++ ++ if (TARGET_ISA_V2) ++ builtin_define ("__NDS32_BASELINE_V2"); ++ if (TARGET_ISA_V3) ++ builtin_define ("__NDS32_BASELINE_V3"); ++ if (TARGET_ISA_V3M) ++ builtin_define ("__NDS32_BASELINE_V3M"); ++ if (TARGET_REDUCED_REGS) ++ builtin_define ("__NDS32_REDUCE_REGS"); ++ if (TARGET_FPU_SINGLE) ++ builtin_define ("__NDS32_EXT_FPU_SP"); ++ if (TARGET_FPU_DOUBLE) ++ builtin_define ("__NDS32_EXT_FPU_DP"); ++ if (TARGET_EXT_PERF) ++ builtin_define ("__NDS32_EXT_PERF"); ++ if (TARGET_EXT_PERF2) ++ builtin_define ("__NDS32_EXT_PERF2"); ++ if (TARGET_EXT_STRING) ++ builtin_define ("__NDS32_EXT_STRING"); ++ if (TARGET_ISA_V3) ++ builtin_define ("__NDS32_EXT_IFC"); ++ ++ if (TARGET_ISA_V3) ++ builtin_define ("__NDS32_EXT_EX9"); ++ ++ if (TARGET_HARD_FLOAT) ++ builtin_define ("__NDS32_ABI_2FP_PLUS"); ++ else ++ builtin_define ("__NDS32_ABI_2"); ++ ++ if (TARGET_BIG_ENDIAN) ++ builtin_define ("__NDS32_EB"); ++ else ++ builtin_define ("__NDS32_EL"); ++#undef builtin_define ++#undef builtin_assert ++} ++ + + /* Defining Data Structures for Per-function Information. */ + +@@ -2743,26 +5007,80 @@ nds32_init_expanders (void) + + /* Register Usage. */ + ++/* -- Order of Allocation of Registers. */ ++ ++void ++nds32_adjust_reg_alloc_order (void) ++{ ++ const int nds32_reg_alloc_order[] = REG_ALLOC_ORDER; ++ ++ /* Copy the default register allocation order, which is designed ++ to optimize for code size. */ ++ memcpy(reg_alloc_order, nds32_reg_alloc_order, sizeof (reg_alloc_order)); ++ ++ /* Adjust few register allocation order when optimizing for speed. */ ++ if (!optimize_size) ++ { ++ memcpy (reg_alloc_order, nds32_reg_alloc_order_for_speed, ++ sizeof (nds32_reg_alloc_order_for_speed)); ++ } ++} ++ + /* -- How Values Fit in Registers. */ + + int + nds32_hard_regno_nregs (int regno ATTRIBUTE_UNUSED, +- machine_mode mode) ++ enum machine_mode mode) + { + return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD); + } + + int +-nds32_hard_regno_mode_ok (int regno, machine_mode mode) ++nds32_hard_regno_mode_ok (int regno, enum machine_mode mode) + { ++ if (regno > FIRST_PSEUDO_REGISTER) ++ return true; ++ ++ if ((TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE) && NDS32_IS_FPR_REGNUM (regno)) ++ { ++ if (NDS32_IS_EXT_FPR_REGNUM(regno)) ++ return (NDS32_FPR_REGNO_OK_FOR_DOUBLE(regno) && (mode == DFmode)); ++ else if (mode == SFmode || mode == SImode) ++ return NDS32_FPR_REGNO_OK_FOR_SINGLE (regno); ++ else if (mode == DFmode) ++ return NDS32_FPR_REGNO_OK_FOR_DOUBLE (regno); ++ ++ return false; ++ } ++ + /* Restrict double-word quantities to even register pairs. */ +- if (HARD_REGNO_NREGS (regno, mode) == 1 +- || !((regno) & 1)) +- return 1; ++ if (regno <= NDS32_LAST_GPR_REGNUM) ++ return (HARD_REGNO_NREGS (regno, mode) == 1 ++ || !((regno) & 1)); + +- return 0; ++ return false; + } + ++int ++nds32_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2) ++{ ++ if ((GET_MODE_CLASS (mode1) == MODE_INT ++ && GET_MODE_CLASS (mode2) == MODE_INT) ++ && GET_MODE_SIZE (mode1) <= UNITS_PER_WORD ++ && GET_MODE_SIZE (mode2) <= UNITS_PER_WORD) ++ return true; ++ ++ if (GET_MODE_SIZE (mode1) == GET_MODE_SIZE (mode2)) ++ { ++ if ((TARGET_FPU_SINGLE && !TARGET_FPU_DOUBLE) ++ && (mode1 == DFmode || mode2 == DFmode)) ++ return false; ++ else ++ return true; ++ } ++ ++ return false; ++} + + /* Register Classes. */ + +@@ -2784,7 +5102,16 @@ nds32_regno_reg_class (int regno) + else if (regno >= 20 && regno <= 31) + return HIGH_REGS; + else if (regno == 32 || regno == 33) +- return FRAME_REGS; ++ { ++ /* $SFP and $AP is FRAME_REGS in fact, However prevent IRA don't ++ know how to allocate register for $SFP and $AP, just tell IRA they ++ are GENERAL_REGS, and ARM do this hack too. */ ++ return GENERAL_REGS; ++ } ++ else if (regno >= 34 && regno <= 97) ++ return FP_REGS; ++ else if (regno >= 98 && regno <= 100) ++ return LOOP_REGS; + else + return NO_REGS; + } +@@ -2795,14 +5122,39 @@ nds32_regno_reg_class (int regno) + /* -- Basic Stack Layout. */ + + rtx ++nds32_dynamic_chain_address (rtx frameaddr) ++{ ++ if (TARGET_V3PUSH) ++ { ++ /* If -mv3push is specified, we push $fp, $gp, and $lp into stack. ++ We can access dynamic chain address from stack by [$fp - 12]. */ ++ return plus_constant (Pmode, frameaddr, -12); ++ } ++ else ++ { ++ /* For general case we push $fp and $lp into stack at prologue. ++ We can access dynamic chain address from stack by [$fp - 8]. */ ++ return plus_constant (Pmode, frameaddr, -8); ++ } ++} ++ ++rtx + nds32_return_addr_rtx (int count, +- rtx frameaddr ATTRIBUTE_UNUSED) ++ rtx frameaddr) + { +- /* There is no way to determine the return address +- if frameaddr is the frame that has 'count' steps +- up from current frame. */ ++ int offset; ++ rtx addr; ++ + if (count != 0) +- return NULL_RTX; ++ { ++ /* In nds32 ABI design, we can expect that $lp is always available ++ from stack by [$fp - 4] location. */ ++ offset = -4; ++ addr = plus_constant (Pmode, frameaddr, offset); ++ addr = memory_address (Pmode, addr); ++ ++ return gen_rtx_MEM (Pmode, addr); ++ } + + /* If count == 0, it means we are at current frame, + the return address is $r30 ($lp). */ +@@ -2821,15 +5173,18 @@ nds32_initial_elimination_offset (unsigned int from_reg, unsigned int to_reg) + nds32_compute_stack_frame (); + + /* Remember to consider +- cfun->machine->callee_saved_area_padding_bytes ++ cfun->machine->callee_saved_area_gpr_padding_bytes and ++ cfun->machine->eh_return_data_regs_size + when calculating offset. */ + if (from_reg == ARG_POINTER_REGNUM && to_reg == STACK_POINTER_REGNUM) + { + offset = (cfun->machine->fp_size +- + cfun->machine->gp_size ++ + cfun->machine->gp_size + + cfun->machine->lp_size + + cfun->machine->callee_saved_gpr_regs_size + + cfun->machine->callee_saved_area_gpr_padding_bytes ++ + cfun->machine->callee_saved_fpr_regs_size ++ + cfun->machine->eh_return_data_regs_size + + cfun->machine->local_size + + cfun->machine->out_args_size); + } +@@ -2850,7 +5205,9 @@ nds32_initial_elimination_offset (unsigned int from_reg, unsigned int to_reg) + + cfun->machine->gp_size + + cfun->machine->lp_size + + cfun->machine->callee_saved_gpr_regs_size +- + cfun->machine->callee_saved_area_gpr_padding_bytes); ++ + cfun->machine->callee_saved_area_gpr_padding_bytes ++ + cfun->machine->callee_saved_fpr_regs_size ++ + cfun->machine->eh_return_data_regs_size); + } + else + { +@@ -2869,10 +5226,11 @@ nds32_init_cumulative_args (CUMULATIVE_ARGS *cum, + tree fndecl ATTRIBUTE_UNUSED, + int n_named_args ATTRIBUTE_UNUSED) + { +- /* Initial available registers +- (in offset, corresponding to NDS32_GPR_ARG_FIRST_REGNUM) ++ /* Initial available registers. The values are offset against ++ NDS32_GPR_ARG_FIRST_REGNUM and NDS32_FPR_ARG_FIRST_REGNUM + for passing arguments. */ + cum->gpr_offset = 0; ++ cum->fpr_offset = 0; + } + + /* -- Function Entry and Exit. */ +@@ -2883,125 +5241,178 @@ nds32_expand_prologue (void) + { + int fp_adjust; + int sp_adjust; +- int en4_const; +- +- rtx Rb, Re; +- rtx fp_adjust_insn, sp_adjust_insn; ++ unsigned Rb, Re; + + /* Compute and setup stack frame size. + The result will be in cfun->machine. */ + nds32_compute_stack_frame (); + ++ /* Check frame_pointer_needed again to prevent fp is need after reload. */ ++ if (frame_pointer_needed) ++ cfun->machine->fp_as_gp_p = false; ++ + /* If this is a variadic function, first we need to push argument + registers that hold the unnamed argument value. */ + if (cfun->machine->va_args_size != 0) + { +- Rb = gen_rtx_REG (SImode, cfun->machine->va_args_first_regno); +- Re = gen_rtx_REG (SImode, cfun->machine->va_args_last_regno); +- /* No need to push $fp, $gp, or $lp, so use GEN_INT(0). */ +- nds32_emit_stack_push_multiple (Rb, Re, GEN_INT (0), true); ++ Rb = cfun->machine->va_args_first_regno; ++ Re = cfun->machine->va_args_last_regno; ++ /* No need to push $fp, $gp, or $lp. */ ++ nds32_emit_stack_push_multiple (Rb, Re, false, false, false, true); + + /* We may also need to adjust stack pointer for padding bytes +- because varargs may cause $sp not 8-byte aligned. */ ++ because varargs may cause $sp not 8-byte aligned. */ + if (cfun->machine->va_args_area_padding_bytes) + { + /* Generate sp adjustment instruction. */ + sp_adjust = cfun->machine->va_args_area_padding_bytes; +- sp_adjust_insn = gen_addsi3 (stack_pointer_rtx, +- stack_pointer_rtx, +- GEN_INT (-1 * sp_adjust)); + +- /* Emit rtx into instructions list and receive INSN rtx form. */ +- sp_adjust_insn = emit_insn (sp_adjust_insn); +- +- /* The insn rtx 'sp_adjust_insn' will change frame layout. +- We need to use RTX_FRAME_RELATED_P so that GCC is able to +- generate CFI (Call Frame Information) stuff. */ +- RTX_FRAME_RELATED_P (sp_adjust_insn) = 1; ++ nds32_emit_adjust_frame (stack_pointer_rtx, ++ stack_pointer_rtx, ++ -1 * sp_adjust); + } + } + + /* If the function is 'naked', + we do not have to generate prologue code fragment. */ +- if (cfun->machine->naked_p) ++ if (cfun->machine->naked_p && !flag_pic) + return; + + /* Get callee_first_regno and callee_last_regno. */ +- Rb = gen_rtx_REG (SImode, cfun->machine->callee_saved_first_gpr_regno); +- Re = gen_rtx_REG (SImode, cfun->machine->callee_saved_last_gpr_regno); +- +- /* nds32_emit_stack_push_multiple(first_regno, last_regno), +- the pattern 'stack_push_multiple' is implemented in nds32.md. +- For En4 field, we have to calculate its constant value. +- Refer to Andes ISA for more information. */ +- en4_const = 0; +- if (cfun->machine->fp_size) +- en4_const += 8; +- if (cfun->machine->gp_size) +- en4_const += 4; +- if (cfun->machine->lp_size) +- en4_const += 2; ++ Rb = cfun->machine->callee_saved_first_gpr_regno; ++ Re = cfun->machine->callee_saved_last_gpr_regno; + + /* If $fp, $gp, $lp, and all callee-save registers are NOT required + to be saved, we don't have to create multiple push instruction. + Otherwise, a multiple push instruction is needed. */ +- if (!(REGNO (Rb) == SP_REGNUM && REGNO (Re) == SP_REGNUM && en4_const == 0)) ++ if (!(Rb == SP_REGNUM && Re == SP_REGNUM ++ && cfun->machine->fp_size == 0 ++ && cfun->machine->gp_size == 0 ++ && cfun->machine->lp_size == 0)) + { + /* Create multiple push instruction rtx. */ +- nds32_emit_stack_push_multiple (Rb, Re, GEN_INT (en4_const), false); ++ nds32_emit_stack_push_multiple ( ++ Rb, Re, ++ cfun->machine->fp_size, cfun->machine->gp_size, cfun->machine->lp_size, ++ false); ++ } ++ ++ /* Save eh data registers. */ ++ if (cfun->machine->use_eh_return_p) ++ { ++ Rb = cfun->machine->eh_return_data_first_regno; ++ Re = cfun->machine->eh_return_data_last_regno; ++ ++ /* No need to push $fp, $gp, or $lp. ++ Also, this is not variadic arguments push. */ ++ nds32_emit_stack_push_multiple (Rb, Re, false, false, false, false); + } + +- /* Check frame_pointer_needed to see +- if we shall emit fp adjustment instruction. */ +- if (frame_pointer_needed) +- { +- /* adjust $fp = $sp + ($fp size) + ($gp size) + ($lp size) +- + (4 * callee-saved-registers) +- Note: No need to adjust +- cfun->machine->callee_saved_area_padding_bytes, +- because, at this point, stack pointer is just +- at the position after push instruction. */ +- fp_adjust = cfun->machine->fp_size +- + cfun->machine->gp_size +- + cfun->machine->lp_size +- + cfun->machine->callee_saved_gpr_regs_size; +- fp_adjust_insn = gen_addsi3 (hard_frame_pointer_rtx, ++ /* Check frame_pointer_needed to see ++ if we shall emit fp adjustment instruction. */ ++ if (frame_pointer_needed) ++ { ++ /* adjust $fp = $sp + ($fp size) + ($gp size) + ($lp size) ++ + (4 * callee-saved-registers) ++ + (4 * exception-handling-data-registers) ++ Note: No need to adjust ++ cfun->machine->callee_saved_area_gpr_padding_bytes, ++ because, at this point, stack pointer is just ++ at the position after push instruction. */ ++ fp_adjust = cfun->machine->fp_size ++ + cfun->machine->gp_size ++ + cfun->machine->lp_size ++ + cfun->machine->callee_saved_gpr_regs_size ++ + cfun->machine->eh_return_data_regs_size; ++ ++ nds32_emit_adjust_frame (hard_frame_pointer_rtx, ++ stack_pointer_rtx, ++ fp_adjust); ++ } ++ ++ /* Save fpu registers. */ ++ if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM) ++ { ++ /* When $sp moved to bottom of stack, we need to check whether ++ the range of offset in the FPU instruction. */ ++ int fpr_offset = cfun->machine->local_size ++ + cfun->machine->out_args_size ++ + cfun->machine->callee_saved_fpr_regs_size; ++ ++ /* Check FPU instruction offset imm14s. */ ++ if (!satisfies_constraint_Is14 (GEN_INT (fpr_offset))) ++ { ++ int fpr_space = cfun->machine->callee_saved_area_gpr_padding_bytes ++ + cfun->machine->callee_saved_fpr_regs_size; ++ ++ /* Save fpu registers, need to allocate stack space ++ for fpu callee registers. And now $sp position ++ on callee saved fpr registers. */ ++ nds32_emit_adjust_frame (stack_pointer_rtx, ++ stack_pointer_rtx, ++ -1 * fpr_space); ++ ++ /* Emit fpu store instruction, using [$sp + offset] store ++ fpu registers. */ ++ nds32_emit_push_fpr_callee_saved (0); ++ ++ /* Adjust $sp = $sp - local_size - out_args_size. */ ++ sp_adjust = cfun->machine->local_size ++ + cfun->machine->out_args_size; ++ ++ /* Allocate stack space for local size and out args size. */ ++ nds32_emit_adjust_frame (stack_pointer_rtx, ++ stack_pointer_rtx, ++ -1 * sp_adjust); ++ } ++ else ++ { ++ /* Offset range in Is14, so $sp moved to bottom of stack. */ ++ ++ /* Adjust $sp = $sp - local_size - out_args_size ++ - callee_saved_area_gpr_padding_bytes ++ - callee_saved_fpr_regs_size. */ ++ sp_adjust = cfun->machine->local_size ++ + cfun->machine->out_args_size ++ + cfun->machine->callee_saved_area_gpr_padding_bytes ++ + cfun->machine->callee_saved_fpr_regs_size; ++ ++ nds32_emit_adjust_frame (stack_pointer_rtx, + stack_pointer_rtx, +- GEN_INT (fp_adjust)); +- /* Emit rtx into instructions list and receive INSN rtx form. */ +- fp_adjust_insn = emit_insn (fp_adjust_insn); ++ -1 * sp_adjust); + +- /* The insn rtx 'fp_adjust_insn' will change frame layout. */ +- RTX_FRAME_RELATED_P (fp_adjust_insn) = 1; ++ /* Emit fpu store instruction, using [$sp + offset] store ++ fpu registers. */ ++ int fpr_position = cfun->machine->out_args_size ++ + cfun->machine->local_size; ++ nds32_emit_push_fpr_callee_saved (fpr_position); ++ } + } +- +- /* Adjust $sp = $sp - local_size - out_args_size +- - callee_saved_area_padding_bytes. */ +- sp_adjust = cfun->machine->local_size +- + cfun->machine->out_args_size +- + cfun->machine->callee_saved_area_gpr_padding_bytes; +- /* sp_adjust value may be out of range of the addi instruction, +- create alternative add behavior with TA_REGNUM if necessary, +- using NEGATIVE value to tell that we are decreasing address. */ +- sp_adjust = nds32_force_addi_stack_int ( (-1) * sp_adjust); +- if (sp_adjust) ++ else + { +- /* Generate sp adjustment instruction if and only if sp_adjust != 0. */ +- sp_adjust_insn = gen_addsi3 (stack_pointer_rtx, +- stack_pointer_rtx, +- GEN_INT (-1 * sp_adjust)); +- /* Emit rtx into instructions list and receive INSN rtx form. */ +- sp_adjust_insn = emit_insn (sp_adjust_insn); ++ /* Adjust $sp = $sp - local_size - out_args_size ++ - callee_saved_area_gpr_padding_bytes. */ ++ sp_adjust = cfun->machine->local_size ++ + cfun->machine->out_args_size ++ + cfun->machine->callee_saved_area_gpr_padding_bytes; + +- /* The insn rtx 'sp_adjust_insn' will change frame layout. +- We need to use RTX_FRAME_RELATED_P so that GCC is able to +- generate CFI (Call Frame Information) stuff. */ +- RTX_FRAME_RELATED_P (sp_adjust_insn) = 1; ++ /* sp_adjust value may be out of range of the addi instruction, ++ create alternative add behavior with TA_REGNUM if necessary, ++ using NEGATIVE value to tell that we are decreasing address. */ ++ nds32_emit_adjust_frame (stack_pointer_rtx, ++ stack_pointer_rtx, ++ -1 * sp_adjust); + } + +- /* Prevent the instruction scheduler from +- moving instructions across the boundary. */ +- emit_insn (gen_blockage ()); ++ /* Emit gp setup instructions for -fpic. */ ++ if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)) ++ nds32_emit_load_gp (); ++ ++ /* If user applies -mno-sched-prolog-epilog option, ++ we need to prevent instructions of function body from being ++ scheduled with stack adjustment in prologue. */ ++ if (!flag_sched_prolog_epilog) ++ emit_insn (gen_blockage ()); + } + + /* Function for normal multiple pop epilogue. */ +@@ -3009,18 +5420,17 @@ void + nds32_expand_epilogue (bool sibcall_p) + { + int sp_adjust; +- int en4_const; +- +- rtx Rb, Re; +- rtx sp_adjust_insn; ++ unsigned Rb, Re; + + /* Compute and setup stack frame size. + The result will be in cfun->machine. */ + nds32_compute_stack_frame (); + +- /* Prevent the instruction scheduler from +- moving instructions across the boundary. */ +- emit_insn (gen_blockage ()); ++ /* If user applies -mno-sched-prolog-epilog option, ++ we need to prevent instructions of function body from being ++ scheduled with stack adjustment in epilogue. */ ++ if (!flag_sched_prolog_epilog) ++ emit_insn (gen_blockage ()); + + /* If the function is 'naked', we do not have to generate + epilogue code fragment BUT 'ret' instruction. +@@ -3029,110 +5439,156 @@ nds32_expand_epilogue (bool sibcall_p) + if (cfun->machine->naked_p) + { + /* If this is a variadic function, we do not have to restore argument +- registers but need to adjust stack pointer back to previous stack +- frame location before return. */ ++ registers but need to adjust stack pointer back to previous stack ++ frame location before return. */ + if (cfun->machine->va_args_size != 0) + { + /* Generate sp adjustment instruction. + We need to consider padding bytes here. */ + sp_adjust = cfun->machine->va_args_size + + cfun->machine->va_args_area_padding_bytes; +- sp_adjust_insn = gen_addsi3 (stack_pointer_rtx, +- stack_pointer_rtx, +- GEN_INT (sp_adjust)); +- /* Emit rtx into instructions list and receive INSN rtx form. */ +- sp_adjust_insn = emit_insn (sp_adjust_insn); + +- /* The insn rtx 'sp_adjust_insn' will change frame layout. +- We need to use RTX_FRAME_RELATED_P so that GCC is able to +- generate CFI (Call Frame Information) stuff. */ +- RTX_FRAME_RELATED_P (sp_adjust_insn) = 1; ++ nds32_emit_adjust_frame (stack_pointer_rtx, ++ stack_pointer_rtx, ++ sp_adjust); + } + + /* Generate return instruction by using 'return_internal' pattern. +- Make sure this instruction is after gen_blockage(). */ ++ Make sure this instruction is after gen_blockage(). ++ First we need to check this is a function without sibling call. */ + if (!sibcall_p) +- emit_jump_insn (gen_return_internal ()); ++ { ++ /* We need to further check attributes to determine whether ++ there should be return instruction at epilogue. ++ If the attribute naked exists but -mno-ret-in-naked-func ++ is issued, there is NO need to generate return instruction. */ ++ if (cfun->machine->attr_naked_p && !flag_ret_in_naked_func) ++ return; ++ ++ emit_jump_insn (gen_return_internal ()); ++ } + return; + } + + if (frame_pointer_needed) + { +- /* adjust $sp = $fp - ($fp size) - ($gp size) - ($lp size) +- - (4 * callee-saved-registers) +- Note: No need to adjust +- cfun->machine->callee_saved_area_padding_bytes, +- because we want to adjust stack pointer +- to the position for pop instruction. */ +- sp_adjust = cfun->machine->fp_size +- + cfun->machine->gp_size +- + cfun->machine->lp_size +- + cfun->machine->callee_saved_gpr_regs_size; +- sp_adjust_insn = gen_addsi3 (stack_pointer_rtx, ++ /* Restore fpu registers. */ ++ if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM) ++ { ++ int gpr_padding = cfun->machine->callee_saved_area_gpr_padding_bytes; ++ ++ /* adjust $sp = $fp - ($fp size) - ($gp size) - ($lp size) ++ - (4 * callee-saved-registers) ++ - (4 * exception-handling-data-registers) ++ - (4 * callee-saved-gpr-registers padding byte) ++ - (4 * callee-saved-fpr-registers) ++ Note: we want to adjust stack pointer ++ to the position for callee-saved fpr register, ++ And restore fpu register use .bi instruction to adjust $sp ++ from callee-saved fpr register to pop instruction. */ ++ sp_adjust = cfun->machine->fp_size ++ + cfun->machine->gp_size ++ + cfun->machine->lp_size ++ + cfun->machine->callee_saved_gpr_regs_size ++ + cfun->machine->eh_return_data_regs_size ++ + cfun->machine->callee_saved_area_gpr_padding_bytes ++ + cfun->machine->callee_saved_fpr_regs_size; ++ ++ nds32_emit_adjust_frame (stack_pointer_rtx, + hard_frame_pointer_rtx, +- GEN_INT (-1 * sp_adjust)); +- /* Emit rtx into instructions list and receive INSN rtx form. */ +- sp_adjust_insn = emit_insn (sp_adjust_insn); ++ -1 * sp_adjust); ++ ++ /* Emit fpu load instruction, using .bi instruction ++ load fpu registers. */ ++ nds32_emit_pop_fpr_callee_saved (gpr_padding); ++ } ++ else ++ { ++ /* adjust $sp = $fp - ($fp size) - ($gp size) - ($lp size) ++ - (4 * callee-saved-registers) ++ - (4 * exception-handling-data-registers) ++ Note: No need to adjust ++ cfun->machine->callee_saved_area_gpr_padding_bytes, ++ because we want to adjust stack pointer ++ to the position for pop instruction. */ ++ sp_adjust = cfun->machine->fp_size ++ + cfun->machine->gp_size ++ + cfun->machine->lp_size ++ + cfun->machine->callee_saved_gpr_regs_size ++ + cfun->machine->eh_return_data_regs_size; + +- /* The insn rtx 'sp_adjust_insn' will change frame layout. */ +- RTX_FRAME_RELATED_P (sp_adjust_insn) = 1; ++ nds32_emit_adjust_frame (stack_pointer_rtx, ++ hard_frame_pointer_rtx, ++ -1 * sp_adjust); ++ } + } + else + { +- /* If frame pointer is NOT needed, +- we cannot calculate the sp adjustment from frame pointer. +- Instead, we calculate the adjustment by local_size, +- out_args_size, and callee_saved_area_padding_bytes. +- Notice that such sp adjustment value may be out of range, +- so we have to deal with it as well. */ ++ /* Restore fpu registers. */ ++ if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM) ++ { ++ int gpr_padding = cfun->machine->callee_saved_area_gpr_padding_bytes; + +- /* Adjust $sp = $sp + local_size + out_args_size +- + callee_saved_area_padding_bytes. */ +- sp_adjust = cfun->machine->local_size +- + cfun->machine->out_args_size +- + cfun->machine->callee_saved_area_gpr_padding_bytes; +- /* sp_adjust value may be out of range of the addi instruction, +- create alternative add behavior with TA_REGNUM if necessary, +- using POSITIVE value to tell that we are increasing address. */ +- sp_adjust = nds32_force_addi_stack_int (sp_adjust); +- if (sp_adjust) +- { +- /* Generate sp adjustment instruction +- if and only if sp_adjust != 0. */ +- sp_adjust_insn = gen_addsi3 (stack_pointer_rtx, +- stack_pointer_rtx, +- GEN_INT (sp_adjust)); +- /* Emit rtx into instructions list and receive INSN rtx form. */ +- sp_adjust_insn = emit_insn (sp_adjust_insn); ++ /* Adjust $sp = $sp + local_size + out_args_size. */ ++ sp_adjust = cfun->machine->local_size ++ + cfun->machine->out_args_size; + +- /* The insn rtx 'sp_adjust_insn' will change frame layout. */ +- RTX_FRAME_RELATED_P (sp_adjust_insn) = 1; ++ nds32_emit_adjust_frame (stack_pointer_rtx, ++ stack_pointer_rtx, ++ sp_adjust); ++ ++ /* Emit fpu load instruction, using .bi instruction ++ load fpu registers, and adjust $sp from callee-saved fpr register ++ to callee-saved gpr register. */ ++ nds32_emit_pop_fpr_callee_saved (gpr_padding); ++ } ++ else ++ { ++ /* If frame pointer is NOT needed, ++ we cannot calculate the sp adjustment from frame pointer. ++ Instead, we calculate the adjustment by local_size, ++ out_args_size, and callee_saved_area_gpr_padding_bytes. ++ Notice that such sp adjustment value may be out of range, ++ so we have to deal with it as well. */ ++ ++ /* Adjust $sp = $sp + local_size + out_args_size ++ + callee_saved_area_gpr_padding_bytes. */ ++ sp_adjust = cfun->machine->local_size ++ + cfun->machine->out_args_size ++ + cfun->machine->callee_saved_area_gpr_padding_bytes; ++ ++ nds32_emit_adjust_frame (stack_pointer_rtx, ++ stack_pointer_rtx, ++ sp_adjust); + } + } + ++ /* Restore eh data registers. */ ++ if (cfun->machine->use_eh_return_p) ++ { ++ Rb = cfun->machine->eh_return_data_first_regno; ++ Re = cfun->machine->eh_return_data_last_regno; ++ ++ /* No need to pop $fp, $gp, or $lp. */ ++ nds32_emit_stack_pop_multiple (Rb, Re, false, false, false); ++ } ++ + /* Get callee_first_regno and callee_last_regno. */ +- Rb = gen_rtx_REG (SImode, cfun->machine->callee_saved_first_gpr_regno); +- Re = gen_rtx_REG (SImode, cfun->machine->callee_saved_last_gpr_regno); +- +- /* nds32_emit_stack_pop_multiple(first_regno, last_regno), +- the pattern 'stack_pop_multiple' is implementad in nds32.md. +- For En4 field, we have to calculate its constant value. +- Refer to Andes ISA for more information. */ +- en4_const = 0; +- if (cfun->machine->fp_size) +- en4_const += 8; +- if (cfun->machine->gp_size) +- en4_const += 4; +- if (cfun->machine->lp_size) +- en4_const += 2; ++ Rb = cfun->machine->callee_saved_first_gpr_regno; ++ Re = cfun->machine->callee_saved_last_gpr_regno; + + /* If $fp, $gp, $lp, and all callee-save registers are NOT required + to be saved, we don't have to create multiple pop instruction. + Otherwise, a multiple pop instruction is needed. */ +- if (!(REGNO (Rb) == SP_REGNUM && REGNO (Re) == SP_REGNUM && en4_const == 0)) ++ if (!(Rb == SP_REGNUM && Re == SP_REGNUM ++ && cfun->machine->fp_size == 0 ++ && cfun->machine->gp_size == 0 ++ && cfun->machine->lp_size == 0)) + { + /* Create multiple pop instruction rtx. */ +- nds32_emit_stack_pop_multiple (Rb, Re, GEN_INT (en4_const)); ++ nds32_emit_stack_pop_multiple ( ++ Rb, Re, ++ cfun->machine->fp_size, cfun->machine->gp_size, cfun->machine->lp_size); + } + + /* If this is a variadic function, we do not have to restore argument +@@ -3141,19 +5597,49 @@ nds32_expand_epilogue (bool sibcall_p) + if (cfun->machine->va_args_size != 0) + { + /* Generate sp adjustment instruction. +- We need to consider padding bytes here. */ ++ We need to consider padding bytes here. */ + sp_adjust = cfun->machine->va_args_size + + cfun->machine->va_args_area_padding_bytes; +- sp_adjust_insn = gen_addsi3 (stack_pointer_rtx, +- stack_pointer_rtx, +- GEN_INT (sp_adjust)); +- /* Emit rtx into instructions list and receive INSN rtx form. */ +- sp_adjust_insn = emit_insn (sp_adjust_insn); + +- /* The insn rtx 'sp_adjust_insn' will change frame layout. +- We need to use RTX_FRAME_RELATED_P so that GCC is able to +- generate CFI (Call Frame Information) stuff. */ +- RTX_FRAME_RELATED_P (sp_adjust_insn) = 1; ++ nds32_emit_adjust_frame (stack_pointer_rtx, ++ stack_pointer_rtx, ++ sp_adjust); ++ } ++ ++ /* If this function uses __builtin_eh_return, make stack adjustment ++ for exception handler. */ ++ if (cfun->machine->use_eh_return_p) ++ { ++ /* We need to unwind the stack by the offset computed by ++ EH_RETURN_STACKADJ_RTX. However, at this point the CFA is ++ based on SP. Ideally we would update the SP and define the ++ CFA along the lines of: ++ ++ SP = SP + EH_RETURN_STACKADJ_RTX ++ (regnote CFA = SP - EH_RETURN_STACKADJ_RTX) ++ ++ However the dwarf emitter only understands a constant ++ register offset. ++ ++ The solution chosen here is to use the otherwise $ta ($r15) ++ as a temporary register to hold the current SP value. The ++ CFA is described using $ta then SP is modified. */ ++ ++ rtx ta_reg; ++ rtx insn; ++ ++ ta_reg = gen_rtx_REG (SImode, TA_REGNUM); ++ ++ insn = emit_move_insn (ta_reg, stack_pointer_rtx); ++ add_reg_note (insn, REG_CFA_DEF_CFA, ta_reg); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ ++ emit_insn (gen_addsi3 (stack_pointer_rtx, ++ stack_pointer_rtx, ++ EH_RETURN_STACKADJ_RTX)); ++ ++ /* Ensure the assignment to $ta does not get optimized away. */ ++ emit_use (ta_reg); + } + + /* Generate return instruction. */ +@@ -3167,28 +5653,35 @@ nds32_expand_prologue_v3push (void) + { + int fp_adjust; + int sp_adjust; +- +- rtx Rb, Re; +- rtx fp_adjust_insn, sp_adjust_insn; ++ int fpr_space = 0; ++ unsigned Rb, Re; + + /* Compute and setup stack frame size. + The result will be in cfun->machine. */ + nds32_compute_stack_frame (); + ++ if (cfun->machine->callee_saved_gpr_regs_size > 0) ++ df_set_regs_ever_live (FP_REGNUM, 1); ++ ++ /* Check frame_pointer_needed again to prevent fp is need after reload. */ ++ if (frame_pointer_needed) ++ cfun->machine->fp_as_gp_p = false; ++ + /* If the function is 'naked', + we do not have to generate prologue code fragment. */ +- if (cfun->machine->naked_p) ++ if (cfun->machine->naked_p && !flag_pic) + return; + + /* Get callee_first_regno and callee_last_regno. */ +- Rb = gen_rtx_REG (SImode, cfun->machine->callee_saved_first_gpr_regno); +- Re = gen_rtx_REG (SImode, cfun->machine->callee_saved_last_gpr_regno); ++ Rb = cfun->machine->callee_saved_first_gpr_regno; ++ Re = cfun->machine->callee_saved_last_gpr_regno; + + /* Calculate sp_adjust first to test if 'push25 Re,imm8u' is available, + where imm8u has to be 8-byte alignment. */ + sp_adjust = cfun->machine->local_size + + cfun->machine->out_args_size +- + cfun->machine->callee_saved_area_gpr_padding_bytes; ++ + cfun->machine->callee_saved_area_gpr_padding_bytes ++ + cfun->machine->callee_saved_fpr_regs_size; + + if (satisfies_constraint_Iu08 (GEN_INT (sp_adjust)) + && NDS32_DOUBLE_WORD_ALIGN_P (sp_adjust)) +@@ -3196,94 +5689,118 @@ nds32_expand_prologue_v3push (void) + /* We can use 'push25 Re,imm8u'. */ + + /* nds32_emit_stack_v3push(last_regno, sp_adjust), +- the pattern 'stack_v3push' is implemented in nds32.md. +- The (const_int 14) means v3push always push { $fp $gp $lp }. */ +- nds32_emit_stack_v3push (Rb, Re, +- GEN_INT (14), GEN_INT (sp_adjust)); ++ the pattern 'stack_v3push' is implemented in nds32.md. */ ++ nds32_emit_stack_v3push (Rb, Re, sp_adjust); ++ ++ /* Save fpu registers. */ ++ if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM) ++ { ++ /* Calculate fpr position. */ ++ int fpr_position = cfun->machine->local_size ++ + cfun->machine->out_args_size; ++ /* Emit fpu store instruction, using [$sp + offset] store ++ fpu registers. */ ++ nds32_emit_push_fpr_callee_saved (fpr_position); ++ } + + /* Check frame_pointer_needed to see +- if we shall emit fp adjustment instruction. */ ++ if we shall emit fp adjustment instruction. */ + if (frame_pointer_needed) + { + /* adjust $fp = $sp + 4 ($fp size) +- + 4 ($gp size) +- + 4 ($lp size) +- + (4 * n) (callee-saved registers) +- + sp_adjust ('push25 Re,imm8u') ++ + 4 ($gp size) ++ + 4 ($lp size) ++ + (4 * n) (callee-saved registers) ++ + sp_adjust ('push25 Re,imm8u') + Note: Since we use 'push25 Re,imm8u', +- the position of stack pointer is further +- changed after push instruction. +- Hence, we need to take sp_adjust value +- into consideration. */ ++ the position of stack pointer is further ++ changed after push instruction. ++ Hence, we need to take sp_adjust value ++ into consideration. */ + fp_adjust = cfun->machine->fp_size + + cfun->machine->gp_size + + cfun->machine->lp_size + + cfun->machine->callee_saved_gpr_regs_size + + sp_adjust; +- fp_adjust_insn = gen_addsi3 (hard_frame_pointer_rtx, +- stack_pointer_rtx, +- GEN_INT (fp_adjust)); +- /* Emit rtx into instructions list and receive INSN rtx form. */ +- fp_adjust_insn = emit_insn (fp_adjust_insn); ++ ++ nds32_emit_adjust_frame (hard_frame_pointer_rtx, ++ stack_pointer_rtx, ++ fp_adjust); + } + } + else + { +- /* We have to use 'push25 Re,0' and +- expand one more instruction to adjust $sp later. */ ++ if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM) ++ { ++ /* Calculate fpr space. */ ++ fpr_space = cfun->machine->callee_saved_area_gpr_padding_bytes ++ + cfun->machine->callee_saved_fpr_regs_size; ++ ++ /* We have to use 'push25 Re, fpr_space', to pre-allocate ++ callee saved fpr registers space. */ ++ nds32_emit_stack_v3push (Rb, Re, fpr_space); ++ nds32_emit_push_fpr_callee_saved (0); ++ } ++ else ++ { ++ /* We have to use 'push25 Re,0' and ++ expand one more instruction to adjust $sp later. */ + +- /* nds32_emit_stack_v3push(last_regno, sp_adjust), +- the pattern 'stack_v3push' is implemented in nds32.md. +- The (const_int 14) means v3push always push { $fp $gp $lp }. */ +- nds32_emit_stack_v3push (Rb, Re, +- GEN_INT (14), GEN_INT (0)); ++ /* nds32_emit_stack_v3push(last_regno, sp_adjust), ++ the pattern 'stack_v3push' is implemented in nds32.md. */ ++ nds32_emit_stack_v3push (Rb, Re, 0); ++ } + + /* Check frame_pointer_needed to see +- if we shall emit fp adjustment instruction. */ ++ if we shall emit fp adjustment instruction. */ + if (frame_pointer_needed) + { + /* adjust $fp = $sp + 4 ($fp size) +- + 4 ($gp size) +- + 4 ($lp size) +- + (4 * n) (callee-saved registers) ++ + 4 ($gp size) ++ + 4 ($lp size) ++ + (4 * n) (callee-saved registers) + Note: Since we use 'push25 Re,0', +- the stack pointer is just at the position +- after push instruction. +- No need to take sp_adjust into consideration. */ ++ the stack pointer is just at the position ++ after push instruction. ++ No need to take sp_adjust into consideration. */ + fp_adjust = cfun->machine->fp_size + + cfun->machine->gp_size + + cfun->machine->lp_size + + cfun->machine->callee_saved_gpr_regs_size; +- fp_adjust_insn = gen_addsi3 (hard_frame_pointer_rtx, +- stack_pointer_rtx, +- GEN_INT (fp_adjust)); +- /* Emit rtx into instructions list and receive INSN rtx form. */ +- fp_adjust_insn = emit_insn (fp_adjust_insn); +- } + +- /* Because we use 'push25 Re,0', +- we need to expand one more instruction to adjust $sp. +- However, sp_adjust value may be out of range of the addi instruction, +- create alternative add behavior with TA_REGNUM if necessary, +- using NEGATIVE value to tell that we are decreasing address. */ +- sp_adjust = nds32_force_addi_stack_int ( (-1) * sp_adjust); +- if (sp_adjust) +- { +- /* Generate sp adjustment instruction +- if and only if sp_adjust != 0. */ +- sp_adjust_insn = gen_addsi3 (stack_pointer_rtx, +- stack_pointer_rtx, +- GEN_INT (-1 * sp_adjust)); +- /* Emit rtx into instructions list and receive INSN rtx form. */ +- sp_adjust_insn = emit_insn (sp_adjust_insn); ++ if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM) ++ { ++ /* We use 'push25 Re, fpr_space', the $sp is ++ on callee saved fpr position, so need to consider ++ fpr space. */ ++ fp_adjust = fp_adjust + fpr_space; ++ } ++ ++ nds32_emit_adjust_frame (hard_frame_pointer_rtx, ++ stack_pointer_rtx, ++ fp_adjust); ++ } + +- /* The insn rtx 'sp_adjust_insn' will change frame layout. +- We need to use RTX_FRAME_RELATED_P so that GCC is able to +- generate CFI (Call Frame Information) stuff. */ +- RTX_FRAME_RELATED_P (sp_adjust_insn) = 1; ++ if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM) ++ { ++ /* We use 'push25 Re, fpr_space', ++ the $sp is on callee saved fpr position, ++ no need to consider fpr space. */ ++ sp_adjust = sp_adjust - fpr_space; + } ++ ++ /* Because we use 'push25 Re,0', ++ we need to expand one more instruction to adjust $sp. ++ using NEGATIVE value to tell that we are decreasing address. */ ++ nds32_emit_adjust_frame (stack_pointer_rtx, ++ stack_pointer_rtx, ++ -1 * sp_adjust); + } + ++ /* Emit gp setup instructions for -fpic. */ ++ if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)) ++ nds32_emit_load_gp (); ++ + /* Prevent the instruction scheduler from + moving instructions across the boundary. */ + emit_insn (gen_blockage ()); +@@ -3294,9 +5811,7 @@ void + nds32_expand_epilogue_v3pop (bool sibcall_p) + { + int sp_adjust; +- +- rtx Rb, Re; +- rtx sp_adjust_insn; ++ unsigned Rb, Re; + + /* Compute and setup stack frame size. + The result will be in cfun->machine. */ +@@ -3311,21 +5826,32 @@ nds32_expand_epilogue_v3pop (bool sibcall_p) + if (cfun->machine->naked_p) + { + /* Generate return instruction by using 'return_internal' pattern. +- Make sure this instruction is after gen_blockage(). */ ++ Make sure this instruction is after gen_blockage(). ++ First we need to check this is a function without sibling call. */ + if (!sibcall_p) +- emit_jump_insn (gen_return_internal ()); ++ { ++ /* We need to further check attributes to determine whether ++ there should be return instruction at epilogue. ++ If the attribute naked exists but -mno-ret-in-naked-func ++ is issued, there is NO need to generate return instruction. */ ++ if (cfun->machine->attr_naked_p && !flag_ret_in_naked_func) ++ return; ++ ++ emit_jump_insn (gen_return_internal ()); ++ } + return; + } + + /* Get callee_first_regno and callee_last_regno. */ +- Rb = gen_rtx_REG (SImode, cfun->machine->callee_saved_first_gpr_regno); +- Re = gen_rtx_REG (SImode, cfun->machine->callee_saved_last_gpr_regno); ++ Rb = cfun->machine->callee_saved_first_gpr_regno; ++ Re = cfun->machine->callee_saved_last_gpr_regno; + + /* Calculate sp_adjust first to test if 'pop25 Re,imm8u' is available, + where imm8u has to be 8-byte alignment. */ + sp_adjust = cfun->machine->local_size + + cfun->machine->out_args_size +- + cfun->machine->callee_saved_area_gpr_padding_bytes; ++ + cfun->machine->callee_saved_area_gpr_padding_bytes ++ + cfun->machine->callee_saved_fpr_regs_size; + + /* We have to consider alloca issue as well. + If the function does call alloca(), the stack pointer is not fixed. +@@ -3338,38 +5864,65 @@ nds32_expand_epilogue_v3pop (bool sibcall_p) + && NDS32_DOUBLE_WORD_ALIGN_P (sp_adjust) + && !cfun->calls_alloca) + { ++ /* Restore fpu registers. */ ++ if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM) ++ { ++ int fpr_position = cfun->machine->local_size ++ + cfun->machine->out_args_size; ++ /* Emit fpu load instruction, using [$sp + offset] restore ++ fpu registers. */ ++ nds32_emit_v3pop_fpr_callee_saved (fpr_position); ++ } ++ + /* We can use 'pop25 Re,imm8u'. */ + + /* nds32_emit_stack_v3pop(last_regno, sp_adjust), +- the pattern 'stack_v3pop' is implementad in nds32.md. +- The (const_int 14) means v3pop always pop { $fp $gp $lp }. */ +- nds32_emit_stack_v3pop (Rb, Re, +- GEN_INT (14), GEN_INT (sp_adjust)); ++ the pattern 'stack_v3pop' is implementad in nds32.md. */ ++ nds32_emit_stack_v3pop (Rb, Re, sp_adjust); + } + else + { + /* We have to use 'pop25 Re,0', and prior to it, +- we must expand one more instruction to adjust $sp. */ ++ we must expand one more instruction to adjust $sp. */ + + if (frame_pointer_needed) + { + /* adjust $sp = $fp - 4 ($fp size) +- - 4 ($gp size) +- - 4 ($lp size) +- - (4 * n) (callee-saved registers) ++ - 4 ($gp size) ++ - 4 ($lp size) ++ - (4 * n) (callee-saved registers) + Note: No need to adjust +- cfun->machine->callee_saved_area_padding_bytes, +- because we want to adjust stack pointer +- to the position for pop instruction. */ ++ cfun->machine->callee_saved_area_gpr_padding_bytes, ++ because we want to adjust stack pointer ++ to the position for pop instruction. */ + sp_adjust = cfun->machine->fp_size + + cfun->machine->gp_size + + cfun->machine->lp_size + + cfun->machine->callee_saved_gpr_regs_size; +- sp_adjust_insn = gen_addsi3 (stack_pointer_rtx, ++ ++ /* Restore fpu registers. */ ++ if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM) ++ { ++ /* Set $sp to callee saved fpr position, we need to restore ++ fpr registers. */ ++ sp_adjust = sp_adjust ++ + cfun->machine->callee_saved_area_gpr_padding_bytes ++ + cfun->machine->callee_saved_fpr_regs_size; ++ ++ nds32_emit_adjust_frame (stack_pointer_rtx, ++ hard_frame_pointer_rtx, ++ -1 * sp_adjust); ++ ++ /* Emit fpu load instruction, using [$sp + offset] restore ++ fpu registers. */ ++ nds32_emit_v3pop_fpr_callee_saved (0); ++ } ++ else ++ { ++ nds32_emit_adjust_frame (stack_pointer_rtx, + hard_frame_pointer_rtx, +- GEN_INT (-1 * sp_adjust)); +- /* Emit rtx into instructions list and receive INSN rtx form. */ +- sp_adjust_insn = emit_insn (sp_adjust_insn); ++ -1 * sp_adjust); ++ } + } + else + { +@@ -3381,33 +5934,57 @@ nds32_expand_epilogue_v3pop (bool sibcall_p) + so we have to deal with it as well. */ + + /* Adjust $sp = $sp + local_size + out_args_size +- + callee_saved_area_padding_bytes. */ ++ + callee_saved_area_gpr_padding_bytes ++ + callee_saved_fpr_regs_size. */ + sp_adjust = cfun->machine->local_size + + cfun->machine->out_args_size +- + cfun->machine->callee_saved_area_gpr_padding_bytes; +- /* sp_adjust value may be out of range of the addi instruction, +- create alternative add behavior with TA_REGNUM if necessary, +- using POSITIVE value to tell that we are increasing address. */ +- sp_adjust = nds32_force_addi_stack_int (sp_adjust); +- if (sp_adjust) ++ + cfun->machine->callee_saved_area_gpr_padding_bytes ++ + cfun->machine->callee_saved_fpr_regs_size; ++ ++ /* Restore fpu registers. */ ++ if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM) ++ { ++ /* Set $sp to callee saved fpr position, we need to restore ++ fpr registers. */ ++ sp_adjust = sp_adjust ++ - cfun->machine->callee_saved_area_gpr_padding_bytes ++ - cfun->machine->callee_saved_fpr_regs_size; ++ ++ nds32_emit_adjust_frame (stack_pointer_rtx, ++ stack_pointer_rtx, ++ sp_adjust); ++ ++ /* Emit fpu load instruction, using [$sp + offset] restore ++ fpu registers. */ ++ nds32_emit_v3pop_fpr_callee_saved (0); ++ } ++ else + { +- /* Generate sp adjustment instruction +- if and only if sp_adjust != 0. */ +- sp_adjust_insn = gen_addsi3 (stack_pointer_rtx, +- stack_pointer_rtx, +- GEN_INT (sp_adjust)); +- /* Emit rtx into instructions list and receive INSN rtx form. */ +- sp_adjust_insn = emit_insn (sp_adjust_insn); ++ /* sp_adjust value may be out of range of the addi instruction, ++ create alternative add behavior with TA_REGNUM if necessary, ++ using POSITIVE value to tell that we are increasing ++ address. */ ++ nds32_emit_adjust_frame (stack_pointer_rtx, ++ stack_pointer_rtx, ++ sp_adjust); + } + } + +- /* nds32_emit_stack_v3pop(last_regno, sp_adjust), +- the pattern 'stack_v3pop' is implementad in nds32.md. */ +- /* The (const_int 14) means v3pop always pop { $fp $gp $lp }. */ +- nds32_emit_stack_v3pop (Rb, Re, +- GEN_INT (14), GEN_INT (0)); ++ if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM) ++ { ++ /* We have fpr need to restore, so $sp is set on callee saved fpr ++ position. And we use 'pop25 Re, fpr_space' to adjust $sp. */ ++ int fpr_space = cfun->machine->callee_saved_area_gpr_padding_bytes ++ + cfun->machine->callee_saved_fpr_regs_size; ++ nds32_emit_stack_v3pop (Rb, Re, fpr_space); ++ } ++ else ++ { ++ /* nds32_emit_stack_v3pop(last_regno, sp_adjust), ++ the pattern 'stack_v3pop' is implementad in nds32.md. */ ++ nds32_emit_stack_v3pop (Rb, Re, 0); ++ } + } +- + /* Generate return instruction. */ + emit_jump_insn (gen_pop25return ()); + } +@@ -3418,97 +5995,179 @@ nds32_expand_epilogue_v3pop (bool sibcall_p) + int + nds32_can_use_return_insn (void) + { ++ int sp_adjust; ++ + /* Prior to reloading, we can't tell how many registers must be saved. + Thus we can not determine whether this function has null epilogue. */ + if (!reload_completed) + return 0; + ++ /* If attribute 'naked' appears but -mno-ret-in-naked-func is used, ++ we cannot use return instruction. */ ++ if (cfun->machine->attr_naked_p && !flag_ret_in_naked_func) ++ return 0; ++ ++ sp_adjust = cfun->machine->local_size ++ + cfun->machine->out_args_size ++ + cfun->machine->callee_saved_area_gpr_padding_bytes ++ + cfun->machine->callee_saved_fpr_regs_size; ++ if (!cfun->machine->fp_as_gp_p ++ && satisfies_constraint_Iu08 (GEN_INT (sp_adjust)) ++ && NDS32_DOUBLE_WORD_ALIGN_P (sp_adjust) ++ && !cfun->calls_alloca ++ && NDS32_V3PUSH_AVAILABLE_P ++ && !(TARGET_HARD_FLOAT ++ && (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM))) ++ return 1; ++ + /* If no stack was created, two conditions must be satisfied: + 1. This is a naked function. +- So there is no callee-saved, local size, or outgoing size. ++ So there is no callee-saved, local size, or outgoing size. + 2. This is NOT a variadic function. +- So there is no pushing arguement registers into the stack. */ +- return (cfun->machine->naked_p && (cfun->machine->va_args_size == 0)); ++ So there is no pushing arguement registers into the stack. */ ++ return ((cfun->machine->naked_p && (cfun->machine->va_args_size == 0))); + } + +-/* ------------------------------------------------------------------------ */ +- +-/* Function to test 333-form for load/store instructions. +- This is auxiliary extern function for auxiliary macro in nds32.h. +- Because it is a little complicated, we use function instead of macro. */ +-bool +-nds32_ls_333_p (rtx rt, rtx ra, rtx imm, machine_mode mode) ++enum machine_mode ++nds32_case_vector_shorten_mode (int min_offset, int max_offset, ++ rtx body ATTRIBUTE_UNUSED) + { +- if (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS +- && REGNO_REG_CLASS (REGNO (ra)) == LOW_REGS) ++ if (min_offset < 0 || max_offset >= 0x2000) ++ return SImode; ++ else + { +- if (GET_MODE_SIZE (mode) == 4) +- return satisfies_constraint_Iu05 (imm); +- +- if (GET_MODE_SIZE (mode) == 2) +- return satisfies_constraint_Iu04 (imm); +- +- if (GET_MODE_SIZE (mode) == 1) +- return satisfies_constraint_Iu03 (imm); ++ /* The jump table maybe need to 2 byte alignment, ++ so reserved 1 byte for check max_offset. */ ++ if (max_offset >= 0xff) ++ return HImode; ++ else ++ return QImode; + } ++} ++ ++static bool ++nds32_cannot_copy_insn_p (rtx_insn *insn) ++{ ++ /* The hwloop_cfg insn cannot be copied. */ ++ if (recog_memoized (insn) == CODE_FOR_hwloop_cfg) ++ return true; + + return false; + } + +- +-/* Computing the Length of an Insn. +- Modifies the length assigned to instruction INSN. +- LEN is the initially computed length of the insn. */ ++/* Return alignment for the label. */ + int +-nds32_adjust_insn_length (rtx_insn *insn, int length) ++nds32_target_alignment (rtx label) + { +- rtx src, dst; ++ rtx_insn *insn; + +- switch (recog_memoized (insn)) ++ if (!NDS32_ALIGN_P ()) ++ return 0; ++ ++ insn = next_active_insn (label); ++ ++ /* Always align to 4 byte when first instruction after label is jump ++ instruction since length for that might changed, so let's always align ++ it for make sure we don't lose any perfomance here. */ ++ if (insn == 0 ++ || (get_attr_length (insn) == 2 ++ && !JUMP_P (insn) && !CALL_P (insn))) ++ return 0; ++ else ++ return 2; ++} ++ ++/* Return alignment for data. */ ++unsigned int ++nds32_data_alignment (tree data, ++ unsigned int basic_align) ++{ ++ if ((basic_align < BITS_PER_WORD) ++ && (TREE_CODE (data) == ARRAY_TYPE ++ || TREE_CODE (data) == UNION_TYPE ++ || TREE_CODE (data) == RECORD_TYPE)) ++ return BITS_PER_WORD; ++ else ++ return basic_align; ++} ++ ++/* Return alignment for constant value. */ ++unsigned int ++nds32_constant_alignment (tree constant, ++ unsigned int basic_align) ++{ ++ /* Make string literal and constant for constructor to word align. */ ++ if (((TREE_CODE (constant) == STRING_CST ++ || TREE_CODE (constant) == CONSTRUCTOR ++ || TREE_CODE (constant) == UNION_TYPE ++ || TREE_CODE (constant) == RECORD_TYPE ++ || TREE_CODE (constant) == ARRAY_TYPE) ++ && basic_align < BITS_PER_WORD)) ++ return BITS_PER_WORD; ++ else ++ return basic_align; ++} ++ ++/* Return alignment for local variable. */ ++unsigned int ++nds32_local_alignment (tree local ATTRIBUTE_UNUSED, ++ unsigned int basic_align) ++{ ++ bool at_least_align_to_word = false; ++ /* Make local array, struct and union at least align to word for make ++ sure it can unroll memcpy when initialize by constant. */ ++ switch (TREE_CODE (local)) + { +- case CODE_FOR_move_df: +- case CODE_FOR_move_di: +- /* Adjust length of movd44 to 2. */ +- src = XEXP (PATTERN (insn), 1); +- dst = XEXP (PATTERN (insn), 0); +- +- if (REG_P (src) +- && REG_P (dst) +- && (REGNO (src) % 2) == 0 +- && (REGNO (dst) % 2) == 0) +- length = 2; ++ case ARRAY_TYPE: ++ case RECORD_TYPE: ++ case UNION_TYPE: ++ at_least_align_to_word = true; + break; +- + default: ++ at_least_align_to_word = false; + break; + } +- +- return length; ++ if (at_least_align_to_word ++ && (basic_align < BITS_PER_WORD)) ++ return BITS_PER_WORD; ++ else ++ return basic_align; + } + +- +-/* Return align 2 (log base 2) if the next instruction of LABEL is 4 byte. */ +-int +-nds32_target_alignment (rtx label) ++bool ++nds32_split_double_word_load_store_p(rtx *operands, bool load_p) + { +- rtx_insn *insn; ++ rtx mem = load_p ? operands[1] : operands[0]; ++ /* Do split at split2 if -O0 or schedule 2 not enable. */ ++ if (optimize == 0 || !flag_schedule_insns_after_reload) ++ return !satisfies_constraint_Da (mem) || MEM_VOLATILE_P (mem); + +- if (optimize_size) +- return 0; ++ /* Split double word load store after copy propgation. */ ++ if (current_pass == NULL) ++ return false; + +- insn = next_active_insn (label); ++ const char *pass_name = current_pass->name; ++ if (pass_name && ((strcmp (pass_name, "split4") == 0) ++ || (strcmp (pass_name, "split5") == 0))) ++ return !satisfies_constraint_Da (mem) || MEM_VOLATILE_P (mem); + +- if (insn == 0) +- return 0; +- else if ((get_attr_length (insn) % 4) == 0) +- return 2; ++ return false; ++} ++ ++static bool ++nds32_use_blocks_for_constant_p (enum machine_mode mode, ++ const_rtx x ATTRIBUTE_UNUSED) ++{ ++ if ((TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE) ++ && (mode == DFmode || mode == SFmode)) ++ return true; + else +- return 0; ++ return false; + } + + /* ------------------------------------------------------------------------ */ + +-/* PART 5: Initialize target hook structure and definitions. */ ++/* PART 6: Initialize target hook structure and definitions. */ + + /* Controlling the Compilation Driver. */ + +@@ -3525,6 +6184,9 @@ nds32_target_alignment (rtx label) + #define TARGET_PROMOTE_FUNCTION_MODE \ + default_promote_function_mode_always_promote + ++#undef TARGET_EXPAND_TO_RTL_HOOK ++#define TARGET_EXPAND_TO_RTL_HOOK nds32_expand_to_rtl_hook ++ + + /* Layout of Source Language Data Types. */ + +@@ -3533,6 +6195,9 @@ nds32_target_alignment (rtx label) + + /* -- Basic Characteristics of Registers. */ + ++#undef TARGET_CONDITIONAL_REGISTER_USAGE ++#define TARGET_CONDITIONAL_REGISTER_USAGE nds32_conditional_register_usage ++ + /* -- Order of Allocation of Registers. */ + + /* -- How Values Fit in Registers. */ +@@ -3544,6 +6209,9 @@ nds32_target_alignment (rtx label) + + /* Register Classes. */ + ++#undef TARGET_PREFERRED_RENAME_CLASS ++#define TARGET_PREFERRED_RENAME_CLASS nds32_preferred_rename_class ++ + #undef TARGET_CLASS_MAX_NREGS + #define TARGET_CLASS_MAX_NREGS nds32_class_max_nregs + +@@ -3591,6 +6259,9 @@ nds32_target_alignment (rtx label) + #undef TARGET_FUNCTION_ARG_BOUNDARY + #define TARGET_FUNCTION_ARG_BOUNDARY nds32_function_arg_boundary + ++#undef TARGET_VECTOR_MODE_SUPPORTED_P ++#define TARGET_VECTOR_MODE_SUPPORTED_P nds32_vector_mode_supported_p ++ + /* -- How Scalar Function Values Are Returned. */ + + #undef TARGET_FUNCTION_VALUE +@@ -3604,6 +6275,9 @@ nds32_target_alignment (rtx label) + + /* -- How Large Values Are Returned. */ + ++#undef TARGET_RETURN_IN_MEMORY ++#define TARGET_RETURN_IN_MEMORY nds32_return_in_memory ++ + /* -- Caller-Saves Register Allocation. */ + + /* -- Function Entry and Exit. */ +@@ -3630,6 +6304,9 @@ nds32_target_alignment (rtx label) + + /* -- Permitting tail calls. */ + ++#undef TARGET_FUNCTION_OK_FOR_SIBCALL ++#define TARGET_FUNCTION_OK_FOR_SIBCALL nds32_function_ok_for_sibcall ++ + #undef TARGET_WARN_FUNC_RETURN + #define TARGET_WARN_FUNC_RETURN nds32_warn_func_return + +@@ -3662,6 +6339,21 @@ nds32_target_alignment (rtx label) + #undef TARGET_LEGITIMATE_ADDRESS_P + #define TARGET_LEGITIMATE_ADDRESS_P nds32_legitimate_address_p + ++#undef TARGET_LEGITIMIZE_ADDRESS ++#define TARGET_LEGITIMIZE_ADDRESS nds32_legitimize_address ++ ++#undef TARGET_LEGITIMATE_CONSTANT_P ++#define TARGET_LEGITIMATE_CONSTANT_P nds32_legitimate_constant_p ++ ++#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE ++#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE nds32_vectorize_preferred_simd_mode ++ ++#undef TARGET_CANNOT_FORCE_CONST_MEM ++#define TARGET_CANNOT_FORCE_CONST_MEM nds32_cannot_force_const_mem ++ ++#undef TARGET_DELEGITIMIZE_ADDRESS ++#define TARGET_DELEGITIMIZE_ADDRESS nds32_delegitimize_address ++ + + /* Anchored Addresses. */ + +@@ -3672,6 +6364,9 @@ nds32_target_alignment (rtx label) + + /* -- Representation of condition codes using registers. */ + ++#undef TARGET_CANONICALIZE_COMPARISON ++#define TARGET_CANONICALIZE_COMPARISON nds32_canonicalize_comparison ++ + /* -- Macros to control conditional execution. */ + + +@@ -3692,6 +6387,15 @@ nds32_target_alignment (rtx label) + + /* Adjusting the Instruction Scheduler. */ + ++#undef TARGET_SCHED_ISSUE_RATE ++#define TARGET_SCHED_ISSUE_RATE nds32_sched_issue_rate ++ ++#undef TARGET_SCHED_ADJUST_COST ++#define TARGET_SCHED_ADJUST_COST nds32_sched_adjust_cost ++ ++#undef TARGET_SCHED_SET_SCHED_FLAGS ++#define TARGET_SCHED_SET_SCHED_FLAGS nds32_set_sched_flags ++ + + /* Dividing the Output into Sections (Texts, Data, . . . ). */ + +@@ -3719,6 +6423,9 @@ nds32_target_alignment (rtx label) + #undef TARGET_ASM_ALIGNED_SI_OP + #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t" + ++#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA ++#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA nds32_asm_output_addr_const_extra ++ + /* -- Output of Uninitialized Variables. */ + + /* -- Output and Generation of Labels. */ +@@ -3741,6 +6448,9 @@ nds32_target_alignment (rtx label) + + /* -- Assembler Commands for Exception Regions. */ + ++#undef TARGET_DWARF_REGISTER_SPAN ++#define TARGET_DWARF_REGISTER_SPAN nds32_dwarf_register_span ++ + /* -- Assembler Commands for Alignment. */ + + +@@ -3756,6 +6466,11 @@ nds32_target_alignment (rtx label) + + /* -- Macros for SDB and DWARF Output. */ + ++/* Variable tracking should be run after all optimizations which ++ change order of insns. It also needs a valid CFG. */ ++#undef TARGET_DELAY_VARTRACK ++#define TARGET_DELAY_VARTRACK true ++ + /* -- Macros for VMS Debug Format. */ + + +@@ -3785,6 +6500,9 @@ nds32_target_alignment (rtx label) + + /* Emulating TLS. */ + ++#undef TARGET_HAVE_TLS ++#define TARGET_HAVE_TLS TARGET_LINUX_ABI ++ + + /* Defining coprocessor specifics for MIPS targets. */ + +@@ -3800,12 +6518,43 @@ nds32_target_alignment (rtx label) + + /* Miscellaneous Parameters. */ + ++#undef TARGET_MD_ASM_ADJUST ++#define TARGET_MD_ASM_ADJUST nds32_md_asm_adjust ++ ++#undef TARGET_MACHINE_DEPENDENT_REORG ++#define TARGET_MACHINE_DEPENDENT_REORG nds32_machine_dependent_reorg ++ + #undef TARGET_INIT_BUILTINS + #define TARGET_INIT_BUILTINS nds32_init_builtins + ++#undef TARGET_BUILTIN_DECL ++#define TARGET_BUILTIN_DECL nds32_builtin_decl ++ + #undef TARGET_EXPAND_BUILTIN + #define TARGET_EXPAND_BUILTIN nds32_expand_builtin + ++#undef TARGET_HAVE_CONDITIONAL_EXECUTION ++#define TARGET_HAVE_CONDITIONAL_EXECUTION nds32_have_conditional_execution ++ ++#undef TARGET_INIT_LIBFUNCS ++#define TARGET_INIT_LIBFUNCS nds32_init_libfuncs ++ ++#undef TARGET_CAN_USE_DOLOOP_P ++#define TARGET_CAN_USE_DOLOOP_P nds32_can_use_doloop_p ++ ++#undef TARGET_INVALID_WITHIN_DOLOOP ++#define TARGET_INVALID_WITHIN_DOLOOP nds32_invalid_within_doloop ++ ++#undef TARGET_CANNOT_COPY_INSN_P ++#define TARGET_CANNOT_COPY_INSN_P nds32_cannot_copy_insn_p ++ ++#undef TARGET_MIN_ANCHOR_OFFSET ++#define TARGET_MIN_ANCHOR_OFFSET -((long long int) 1 << 14) ++#undef TARGET_MAX_ANCHOR_OFFSET ++#define TARGET_MAX_ANCHOR_OFFSET (((long long int) 1 << 14) - 1) ++#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P ++#define TARGET_USE_BLOCKS_FOR_CONSTANT_P nds32_use_blocks_for_constant_p ++ + + /* ------------------------------------------------------------------------ */ + +diff --git a/gcc/config/nds32/nds32.h b/gcc/config/nds32/nds32.h +index eb4558c..a3e07cd 100644 +--- a/gcc/config/nds32/nds32.h ++++ b/gcc/config/nds32/nds32.h +@@ -24,6 +24,9 @@ + /* The following are auxiliary macros or structure declarations + that are used all over the nds32.c and nds32.h. */ + ++#define ADJUST_INSN_LENGTH(INSN, LENGTH) \ ++ (LENGTH = nds32_adjust_insn_length (INSN, LENGTH)) ++ + /* Use SYMBOL_FLAG_MACH_DEP to define our own symbol_ref flag. + It is used in nds32_encode_section_info() to store flag in symbol_ref + in case the symbol should be placed in .rodata section. +@@ -33,68 +36,23 @@ + #define NDS32_SYMBOL_REF_RODATA_P(x) \ + ((SYMBOL_REF_FLAGS (x) & NDS32_SYMBOL_FLAG_RODATA) != 0) + +-/* Computing the Length of an Insn. */ +-#define ADJUST_INSN_LENGTH(INSN, LENGTH) \ +- (LENGTH = nds32_adjust_insn_length (INSN, LENGTH)) ++enum nds32_relax_insn_type ++{ ++ RELAX_ORI, ++ RELAX_PLT_ADD, ++ RELAX_TLS_ADD_or_LW, ++ RELAX_TLS_ADD_LW, ++ RELAX_TLS_LW_JRAL, ++ RELAX_DONE ++}; + +-/* Check instruction LS-37-FP-implied form. +- Note: actually its immediate range is imm9u +- since it is used for lwi37/swi37 instructions. */ +-#define NDS32_LS_37_FP_P(rt, ra, imm) \ +- (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS \ +- && REGNO (ra) == FP_REGNUM \ +- && satisfies_constraint_Iu09 (imm)) +- +-/* Check instruction LS-37-SP-implied form. +- Note: actually its immediate range is imm9u +- since it is used for lwi37/swi37 instructions. */ +-#define NDS32_LS_37_SP_P(rt, ra, imm) \ +- (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS \ +- && REGNO (ra) == SP_REGNUM \ +- && satisfies_constraint_Iu09 (imm)) +- +- +-/* Check load/store instruction form : Rt3, Ra3, imm3u. */ +-#define NDS32_LS_333_P(rt, ra, imm, mode) nds32_ls_333_p (rt, ra, imm, mode) +- +-/* Check load/store instruction form : Rt4, Ra5, const_int_0. +- Note: no need to check ra because Ra5 means it covers all registers. */ +-#define NDS32_LS_450_P(rt, ra, imm) \ +- ((imm == const0_rtx) \ +- && (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS \ +- || REGNO_REG_CLASS (REGNO (rt)) == MIDDLE_REGS)) +- +-/* Check instruction RRI-333-form. */ +-#define NDS32_RRI_333_P(rt, ra, imm) \ +- (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS \ +- && REGNO_REG_CLASS (REGNO (ra)) == LOW_REGS \ +- && satisfies_constraint_Iu03 (imm)) +- +-/* Check instruction RI-45-form. */ +-#define NDS32_RI_45_P(rt, ra, imm) \ +- (REGNO (rt) == REGNO (ra) \ +- && (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS \ +- || REGNO_REG_CLASS (REGNO (rt)) == MIDDLE_REGS) \ +- && satisfies_constraint_Iu05 (imm)) +- +- +-/* Check instruction RR-33-form. */ +-#define NDS32_RR_33_P(rt, ra) \ +- (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS \ +- && REGNO_REG_CLASS (REGNO (ra)) == LOW_REGS) +- +-/* Check instruction RRR-333-form. */ +-#define NDS32_RRR_333_P(rt, ra, rb) \ +- (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS \ +- && REGNO_REG_CLASS (REGNO (ra)) == LOW_REGS \ +- && REGNO_REG_CLASS (REGNO (rb)) == LOW_REGS) +- +-/* Check instruction RR-45-form. +- Note: no need to check rb because Rb5 means it covers all registers. */ +-#define NDS32_RR_45_P(rt, ra, rb) \ +- (REGNO (rt) == REGNO (ra) \ +- && (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS \ +- || REGNO_REG_CLASS (REGNO (rt)) == MIDDLE_REGS)) ++/* Classifies expand result for expand helper function. */ ++enum nds32_expand_result_type ++{ ++ EXPAND_DONE, ++ EXPAND_FAIL, ++ EXPAND_CREATE_TEMPLATE ++}; + + /* Classifies address type to distinguish 16-bit/32-bit format. */ + enum nds32_16bit_address_type +@@ -105,6 +63,10 @@ enum nds32_16bit_address_type + ADDRESS_LO_REG_IMM3U, + /* post_inc [lo_reg + imm3u]: 333 format address. */ + ADDRESS_POST_INC_LO_REG_IMM3U, ++ /* post_modify [lo_reg + imm3u]: 333 format address. */ ++ ADDRESS_POST_MODIFY_LO_REG_IMM3U, ++ /* [$r8 + imm7u]: r8 imply address. */ ++ ADDRESS_R8_IMM7U, + /* [$fp + imm7u]: fp imply address. */ + ADDRESS_FP_IMM7U, + /* [$sp + imm7u]: sp imply address. */ +@@ -113,23 +75,67 @@ enum nds32_16bit_address_type + ADDRESS_NOT_16BIT_FORMAT + }; + +- + /* ------------------------------------------------------------------------ */ + + /* Define maximum numbers of registers for passing arguments. */ + #define NDS32_MAX_GPR_REGS_FOR_ARGS 6 ++#define NDS32_MAX_FPR_REGS_FOR_ARGS 6 + + /* Define the register number for first argument. */ + #define NDS32_GPR_ARG_FIRST_REGNUM 0 ++#define NDS32_FPR_ARG_FIRST_REGNUM 34 + + /* Define the register number for return value. */ + #define NDS32_GPR_RET_FIRST_REGNUM 0 ++#define NDS32_FPR_RET_FIRST_REGNUM 34 + + /* Define the first integer register number. */ + #define NDS32_FIRST_GPR_REGNUM 0 + /* Define the last integer register number. */ + #define NDS32_LAST_GPR_REGNUM 31 + ++#define NDS32_FIRST_CALLEE_SAVE_GPR_REGNUM 6 ++#define NDS32_LAST_CALLEE_SAVE_GPR_REGNUM \ ++ (TARGET_REDUCED_REGS ? 10 : 14) ++ ++/* Define the floating-point number of registers. */ ++#define NDS32_FLOAT_REGISTER_NUMBER \ ++ (((nds32_fp_regnum == NDS32_CONFIG_FPU_0) \ ++ || (nds32_fp_regnum == NDS32_CONFIG_FPU_4)) ? 8 \ ++ : ((nds32_fp_regnum == NDS32_CONFIG_FPU_1) \ ++ || (nds32_fp_regnum == NDS32_CONFIG_FPU_5)) ? 16 \ ++ : ((nds32_fp_regnum == NDS32_CONFIG_FPU_2) \ ++ || (nds32_fp_regnum == NDS32_CONFIG_FPU_6)) ? 32 \ ++ : ((nds32_fp_regnum == NDS32_CONFIG_FPU_3) \ ++ || (nds32_fp_regnum == NDS32_CONFIG_FPU_7)) ? 64 \ ++ : 32) ++ ++#define NDS32_EXT_FPU_DOT_E (nds32_fp_regnum >= 4) ++ ++/* Define the first floating-point register number. */ ++#define NDS32_FIRST_FPR_REGNUM 34 ++/* Define the last floating-point register number. */ ++#define NDS32_LAST_FPR_REGNUM \ ++ (NDS32_FIRST_FPR_REGNUM + NDS32_FLOAT_REGISTER_NUMBER - 1) ++ ++ ++#define NDS32_IS_EXT_FPR_REGNUM(regno) \ ++ (((regno) >= NDS32_FIRST_FPR_REGNUM + 32) \ ++ && ((regno) < NDS32_FIRST_FPR_REGNUM + 64)) ++ ++#define NDS32_IS_FPR_REGNUM(regno) \ ++ (((regno) >= NDS32_FIRST_FPR_REGNUM) \ ++ && ((regno) <= NDS32_LAST_FPR_REGNUM)) ++ ++#define NDS32_FPR_REGNO_OK_FOR_SINGLE(regno) \ ++ ((regno) <= NDS32_LAST_FPR_REGNUM) ++ ++#define NDS32_FPR_REGNO_OK_FOR_DOUBLE(regno) \ ++ ((((regno) - NDS32_FIRST_FPR_REGNUM) & 1) == 0) ++ ++#define NDS32_IS_GPR_REGNUM(regno) \ ++ (((regno) <= NDS32_LAST_GPR_REGNUM)) ++ + /* Define double word alignment bits. */ + #define NDS32_DOUBLE_WORD_ALIGNMENT 64 + +@@ -138,6 +144,16 @@ enum nds32_16bit_address_type + #define NDS32_SINGLE_WORD_ALIGN_P(value) (((value) & 0x03) == 0) + #define NDS32_DOUBLE_WORD_ALIGN_P(value) (((value) & 0x07) == 0) + ++/* Determine whether we would like to have code generation strictly aligned. ++ We set it strictly aligned when -malways-align is enabled. ++ Check gcc/common/config/nds32/nds32-common.c for the optimizations that ++ apply -malways-align. */ ++#define NDS32_ALIGN_P() (TARGET_ALWAYS_ALIGN) ++ ++#define NDS32_HW_LOOP_P() (TARGET_HWLOOP && !TARGET_FORCE_NO_HWLOOP) ++ ++#define NDS32_EXT_DSP_P() (TARGET_EXT_DSP && !TARGET_FORCE_NO_EXT_DSP) ++ + /* Get alignment according to mode or type information. + When 'type' is nonnull, there is no need to look at 'mode'. */ + #define NDS32_MODE_TYPE_ALIGN(mode, type) \ +@@ -159,21 +175,28 @@ enum nds32_16bit_address_type + /* This macro is used to return the register number for passing argument. + We need to obey the following rules: + 1. If it is required MORE THAN one register, +- we need to further check if it really needs to be +- aligned on double words. +- a) If double word alignment is necessary, +- the register number must be even value. +- b) Otherwise, the register number can be odd or even value. ++ we need to further check if it really needs to be ++ aligned on double words. ++ a) If double word alignment is necessary, ++ the register number must be even value. ++ b) Otherwise, the register number can be odd or even value. + 2. If it is required ONLY one register, +- the register number can be odd or even value. */ +-#define NDS32_AVAILABLE_REGNUM_FOR_GPR_ARG(reg_offset, mode, type) \ +- ((NDS32_NEED_N_REGS_FOR_ARG (mode, type) > 1) \ +- ? ((NDS32_MODE_TYPE_ALIGN (mode, type) > PARM_BOUNDARY) \ +- ? (((reg_offset) + NDS32_GPR_ARG_FIRST_REGNUM + 1) & ~1) \ +- : ((reg_offset) + NDS32_GPR_ARG_FIRST_REGNUM)) \ ++ the register number can be odd or even value. */ ++#define NDS32_AVAILABLE_REGNUM_FOR_GPR_ARG(reg_offset, mode, type) \ ++ ((NDS32_NEED_N_REGS_FOR_ARG (mode, type) > 1) \ ++ ? ((NDS32_MODE_TYPE_ALIGN (mode, type) > PARM_BOUNDARY) \ ++ ? (((reg_offset) + NDS32_GPR_ARG_FIRST_REGNUM + 1) & ~1) \ ++ : ((reg_offset) + NDS32_GPR_ARG_FIRST_REGNUM)) \ + : ((reg_offset) + NDS32_GPR_ARG_FIRST_REGNUM)) + +-/* This macro is to check if there are still available registers ++#define NDS32_AVAILABLE_REGNUM_FOR_FPR_ARG(reg_offset, mode, type) \ ++ ((NDS32_NEED_N_REGS_FOR_ARG (mode, type) > 1) \ ++ ? ((NDS32_MODE_TYPE_ALIGN (mode, type) > PARM_BOUNDARY) \ ++ ? (((reg_offset) + NDS32_FPR_ARG_FIRST_REGNUM + 1) & ~1) \ ++ : ((reg_offset) + NDS32_FPR_ARG_FIRST_REGNUM)) \ ++ : ((reg_offset) + NDS32_FPR_ARG_FIRST_REGNUM)) ++ ++/* These two macros are to check if there are still available registers + for passing argument, which must be entirely in registers. */ + #define NDS32_ARG_ENTIRE_IN_GPR_REG_P(reg_offset, mode, type) \ + ((NDS32_AVAILABLE_REGNUM_FOR_GPR_ARG (reg_offset, mode, type) \ +@@ -181,13 +204,23 @@ enum nds32_16bit_address_type + <= (NDS32_GPR_ARG_FIRST_REGNUM \ + + NDS32_MAX_GPR_REGS_FOR_ARGS)) + +-/* This macro is to check if there are still available registers ++#define NDS32_ARG_ENTIRE_IN_FPR_REG_P(reg_offset, mode, type) \ ++ ((NDS32_AVAILABLE_REGNUM_FOR_FPR_ARG (reg_offset, mode, type) \ ++ + NDS32_NEED_N_REGS_FOR_ARG (mode, type)) \ ++ <= (NDS32_FPR_ARG_FIRST_REGNUM \ ++ + NDS32_MAX_FPR_REGS_FOR_ARGS)) ++ ++/* These two macros are to check if there are still available registers + for passing argument, either entirely in registers or partially + in registers. */ + #define NDS32_ARG_PARTIAL_IN_GPR_REG_P(reg_offset, mode, type) \ + (NDS32_AVAILABLE_REGNUM_FOR_GPR_ARG (reg_offset, mode, type) \ + < NDS32_GPR_ARG_FIRST_REGNUM + NDS32_MAX_GPR_REGS_FOR_ARGS) + ++#define NDS32_ARG_PARTIAL_IN_FPR_REG_P(reg_offset, mode, type) \ ++ (NDS32_AVAILABLE_REGNUM_FOR_FPR_ARG (reg_offset, mode, type) \ ++ < NDS32_FPR_ARG_FIRST_REGNUM + NDS32_MAX_FPR_REGS_FOR_ARGS) ++ + /* This macro is to check if the register is required to be saved on stack. + If call_used_regs[regno] == 0, regno is the callee-saved register. + If df_regs_ever_live_p(regno) == true, it is used in the current function. +@@ -196,6 +229,19 @@ enum nds32_16bit_address_type + #define NDS32_REQUIRED_CALLEE_SAVED_P(regno) \ + ((!call_used_regs[regno]) && (df_regs_ever_live_p (regno))) + ++/* This macro is to check if the push25/pop25 are available to be used ++ for code generation. Because pop25 also performs return behavior, ++ the instructions may not be available for some cases. ++ If we want to use push25/pop25, all the following conditions must ++ be satisfied: ++ 1. TARGET_V3PUSH is set. ++ 2. Current function is not an ISR function. ++ 3. Current function is not a variadic function.*/ ++#define NDS32_V3PUSH_AVAILABLE_P \ ++ (TARGET_V3PUSH \ ++ && !nds32_isr_function_p (current_function_decl) \ ++ && (cfun->machine->va_args_size == 0)) ++ + /* ------------------------------------------------------------------------ */ + + /* A C structure for machine-specific, per-function data. +@@ -222,6 +268,10 @@ struct GTY(()) machine_function + callee-saved registers. */ + int callee_saved_gpr_regs_size; + ++ /* Number of bytes on the stack for saving floating-point ++ callee-saved registers. */ ++ int callee_saved_fpr_regs_size; ++ + /* The padding bytes in callee-saved area may be required. */ + int callee_saved_area_gpr_padding_bytes; + +@@ -230,26 +280,57 @@ struct GTY(()) machine_function + /* The last required general purpose callee-saved register. */ + int callee_saved_last_gpr_regno; + ++ /* The first required floating-point callee-saved register. */ ++ int callee_saved_first_fpr_regno; ++ /* The last required floating-point callee-saved register. */ ++ int callee_saved_last_fpr_regno; ++ + /* The padding bytes in varargs area may be required. */ + int va_args_area_padding_bytes; +- + /* The first required register that should be saved on stack for va_args. */ + int va_args_first_regno; + /* The last required register that should be saved on stack for va_args. */ + int va_args_last_regno; + ++ /* Number of bytes on the stack for saving exception handling registers. */ ++ int eh_return_data_regs_size; ++ /* The first register of passing exception handling information. */ ++ int eh_return_data_first_regno; ++ /* The last register of passing exception handling information. */ ++ int eh_return_data_last_regno; ++ ++ /* Indicate that whether this function ++ calls __builtin_eh_return. */ ++ int use_eh_return_p; ++ + /* Indicate that whether this function needs + prologue/epilogue code generation. */ + int naked_p; + /* Indicate that whether this function + uses fp_as_gp optimization. */ + int fp_as_gp_p; ++ /* Indicate that whether this function is under strictly aligned ++ situation for legitimate address checking. This flag informs ++ nds32_legitimate_address_p() how to treat offset alignment: ++ 1. The IVOPT phase needs to detect available range for memory access, ++ such as checking [base + 32767] ~ [base + (-32768)]. ++ For this case we do not want address to be strictly aligned. ++ 2. The rtl lowering and optimization are close to target code. ++ For this case we need address to be strictly aligned. */ ++ int strict_aligned_p; ++ ++ /* Record two similar attributes status. */ ++ int attr_naked_p; ++ int attr_no_prologue_p; ++ /* Record hwloop group, use in reorg pass. */ ++ int hwloop_group_id; + }; + + /* A C structure that contains the arguments information. */ + typedef struct + { + unsigned int gpr_offset; ++ unsigned int fpr_offset; + } nds32_cumulative_args; + + /* ------------------------------------------------------------------------ */ +@@ -288,7 +369,8 @@ enum nds32_isr_nested_type + { + NDS32_NESTED, + NDS32_NOT_NESTED, +- NDS32_NESTED_READY ++ NDS32_NESTED_READY, ++ NDS32_CRITICAL + }; + + /* Define structure to record isr information. +@@ -316,6 +398,13 @@ struct nds32_isr_info + unless user specifies attribute to change it. */ + enum nds32_isr_nested_type nested_type; + ++ /* Secure isr level. ++ Currently we have 0-3 security level. ++ It should be set to 0 by default. ++ For security processors, this is determined by secure ++ attribute or compiler options. */ ++ unsigned int security_level; ++ + /* Total vectors. + The total vectors = interrupt + exception numbers + reset. + It should be set to 0 by default. +@@ -340,19 +429,477 @@ enum nds32_builtins + { + NDS32_BUILTIN_ISYNC, + NDS32_BUILTIN_ISB, ++ NDS32_BUILTIN_DSB, ++ NDS32_BUILTIN_MSYNC_ALL, ++ NDS32_BUILTIN_MSYNC_STORE, + NDS32_BUILTIN_MFSR, + NDS32_BUILTIN_MFUSR, + NDS32_BUILTIN_MTSR, ++ NDS32_BUILTIN_MTSR_ISB, ++ NDS32_BUILTIN_MTSR_DSB, + NDS32_BUILTIN_MTUSR, + NDS32_BUILTIN_SETGIE_EN, +- NDS32_BUILTIN_SETGIE_DIS ++ NDS32_BUILTIN_SETGIE_DIS, ++ NDS32_BUILTIN_FMFCFG, ++ NDS32_BUILTIN_FMFCSR, ++ NDS32_BUILTIN_FMTCSR, ++ NDS32_BUILTIN_FCPYNSS, ++ NDS32_BUILTIN_FCPYSS, ++ NDS32_BUILTIN_FCPYNSD, ++ NDS32_BUILTIN_FCPYSD, ++ NDS32_BUILTIN_FABSS, ++ NDS32_BUILTIN_FABSD, ++ NDS32_BUILTIN_FSQRTS, ++ NDS32_BUILTIN_FSQRTD, ++ NDS32_BUILTIN_ABS, ++ NDS32_BUILTIN_AVE, ++ NDS32_BUILTIN_BCLR, ++ NDS32_BUILTIN_BSET, ++ NDS32_BUILTIN_BTGL, ++ NDS32_BUILTIN_BTST, ++ NDS32_BUILTIN_CLIP, ++ NDS32_BUILTIN_CLIPS, ++ NDS32_BUILTIN_CLZ, ++ NDS32_BUILTIN_CLO, ++ NDS32_BUILTIN_MAX, ++ NDS32_BUILTIN_MIN, ++ NDS32_BUILTIN_PBSAD, ++ NDS32_BUILTIN_PBSADA, ++ NDS32_BUILTIN_BSE, ++ NDS32_BUILTIN_BSP, ++ NDS32_BUILTIN_FFB, ++ NDS32_BUILTIN_FFMISM, ++ NDS32_BUILTIN_FLMISM, ++ NDS32_BUILTIN_KADDW, ++ NDS32_BUILTIN_KSUBW, ++ NDS32_BUILTIN_KADDH, ++ NDS32_BUILTIN_KSUBH, ++ NDS32_BUILTIN_KDMBB, ++ NDS32_BUILTIN_V_KDMBB, ++ NDS32_BUILTIN_KDMBT, ++ NDS32_BUILTIN_V_KDMBT, ++ NDS32_BUILTIN_KDMTB, ++ NDS32_BUILTIN_V_KDMTB, ++ NDS32_BUILTIN_KDMTT, ++ NDS32_BUILTIN_V_KDMTT, ++ NDS32_BUILTIN_KHMBB, ++ NDS32_BUILTIN_V_KHMBB, ++ NDS32_BUILTIN_KHMBT, ++ NDS32_BUILTIN_V_KHMBT, ++ NDS32_BUILTIN_KHMTB, ++ NDS32_BUILTIN_V_KHMTB, ++ NDS32_BUILTIN_KHMTT, ++ NDS32_BUILTIN_V_KHMTT, ++ NDS32_BUILTIN_KSLRAW, ++ NDS32_BUILTIN_KSLRAW_U, ++ NDS32_BUILTIN_RDOV, ++ NDS32_BUILTIN_CLROV, ++ NDS32_BUILTIN_ROTR, ++ NDS32_BUILTIN_SVA, ++ NDS32_BUILTIN_SVS, ++ NDS32_BUILTIN_WSBH, ++ NDS32_BUILTIN_JR_ITOFF, ++ NDS32_BUILTIN_JR_TOFF, ++ NDS32_BUILTIN_JRAL_ITON, ++ NDS32_BUILTIN_JRAL_TON, ++ NDS32_BUILTIN_RET_ITOFF, ++ NDS32_BUILTIN_RET_TOFF, ++ NDS32_BUILTIN_STANDBY_NO_WAKE_GRANT, ++ NDS32_BUILTIN_STANDBY_WAKE_GRANT, ++ NDS32_BUILTIN_STANDBY_WAKE_DONE, ++ NDS32_BUILTIN_TEQZ, ++ NDS32_BUILTIN_TNEZ, ++ NDS32_BUILTIN_TRAP, ++ NDS32_BUILTIN_SETEND_BIG, ++ NDS32_BUILTIN_SETEND_LITTLE, ++ NDS32_BUILTIN_SYSCALL, ++ NDS32_BUILTIN_BREAK, ++ NDS32_BUILTIN_NOP, ++ NDS32_BUILTIN_SCHE_BARRIER, ++ NDS32_BUILTIN_GET_CURRENT_SP, ++ NDS32_BUILTIN_SET_CURRENT_SP, ++ NDS32_BUILTIN_RETURN_ADDRESS, ++ NDS32_BUILTIN_LLW, ++ NDS32_BUILTIN_LWUP, ++ NDS32_BUILTIN_LBUP, ++ NDS32_BUILTIN_SCW, ++ NDS32_BUILTIN_SWUP, ++ NDS32_BUILTIN_SBUP, ++ NDS32_BUILTIN_CCTL_VA_LCK, ++ NDS32_BUILTIN_CCTL_IDX_WBINVAL, ++ NDS32_BUILTIN_CCTL_VA_WBINVAL_L1, ++ NDS32_BUILTIN_CCTL_VA_WBINVAL_LA, ++ NDS32_BUILTIN_CCTL_IDX_READ, ++ NDS32_BUILTIN_CCTL_IDX_WRITE, ++ NDS32_BUILTIN_CCTL_L1D_INVALALL, ++ NDS32_BUILTIN_CCTL_L1D_WBALL_ALVL, ++ NDS32_BUILTIN_CCTL_L1D_WBALL_ONE_LVL, ++ NDS32_BUILTIN_DPREF_QW, ++ NDS32_BUILTIN_DPREF_HW, ++ NDS32_BUILTIN_DPREF_W, ++ NDS32_BUILTIN_DPREF_DW, ++ NDS32_BUILTIN_TLBOP_TRD, ++ NDS32_BUILTIN_TLBOP_TWR, ++ NDS32_BUILTIN_TLBOP_RWR, ++ NDS32_BUILTIN_TLBOP_RWLK, ++ NDS32_BUILTIN_TLBOP_UNLK, ++ NDS32_BUILTIN_TLBOP_PB, ++ NDS32_BUILTIN_TLBOP_INV, ++ NDS32_BUILTIN_TLBOP_FLUA, ++ NDS32_BUILTIN_UALOAD_HW, ++ NDS32_BUILTIN_UALOAD_W, ++ NDS32_BUILTIN_UALOAD_DW, ++ NDS32_BUILTIN_UASTORE_HW, ++ NDS32_BUILTIN_UASTORE_W, ++ NDS32_BUILTIN_UASTORE_DW, ++ NDS32_BUILTIN_GIE_DIS, ++ NDS32_BUILTIN_GIE_EN, ++ NDS32_BUILTIN_ENABLE_INT, ++ NDS32_BUILTIN_DISABLE_INT, ++ NDS32_BUILTIN_SET_PENDING_SWINT, ++ NDS32_BUILTIN_CLR_PENDING_SWINT, ++ NDS32_BUILTIN_CLR_PENDING_HWINT, ++ NDS32_BUILTIN_GET_ALL_PENDING_INT, ++ NDS32_BUILTIN_GET_PENDING_INT, ++ NDS32_BUILTIN_SET_INT_PRIORITY, ++ NDS32_BUILTIN_GET_INT_PRIORITY, ++ NDS32_BUILTIN_SET_TRIG_LEVEL, ++ NDS32_BUILTIN_SET_TRIG_EDGE, ++ NDS32_BUILTIN_GET_TRIG_TYPE, ++ NDS32_BUILTIN_SIGNATURE_BEGIN, ++ NDS32_BUILTIN_SIGNATURE_END, ++ NDS32_BUILTIN_DSP_BEGIN, ++ NDS32_BUILTIN_ADD16, ++ NDS32_BUILTIN_V_UADD16, ++ NDS32_BUILTIN_V_SADD16, ++ NDS32_BUILTIN_RADD16, ++ NDS32_BUILTIN_V_RADD16, ++ NDS32_BUILTIN_URADD16, ++ NDS32_BUILTIN_V_URADD16, ++ NDS32_BUILTIN_KADD16, ++ NDS32_BUILTIN_V_KADD16, ++ NDS32_BUILTIN_UKADD16, ++ NDS32_BUILTIN_V_UKADD16, ++ NDS32_BUILTIN_SUB16, ++ NDS32_BUILTIN_V_USUB16, ++ NDS32_BUILTIN_V_SSUB16, ++ NDS32_BUILTIN_RSUB16, ++ NDS32_BUILTIN_V_RSUB16, ++ NDS32_BUILTIN_URSUB16, ++ NDS32_BUILTIN_V_URSUB16, ++ NDS32_BUILTIN_KSUB16, ++ NDS32_BUILTIN_V_KSUB16, ++ NDS32_BUILTIN_UKSUB16, ++ NDS32_BUILTIN_V_UKSUB16, ++ NDS32_BUILTIN_CRAS16, ++ NDS32_BUILTIN_V_UCRAS16, ++ NDS32_BUILTIN_V_SCRAS16, ++ NDS32_BUILTIN_RCRAS16, ++ NDS32_BUILTIN_V_RCRAS16, ++ NDS32_BUILTIN_URCRAS16, ++ NDS32_BUILTIN_V_URCRAS16, ++ NDS32_BUILTIN_KCRAS16, ++ NDS32_BUILTIN_V_KCRAS16, ++ NDS32_BUILTIN_UKCRAS16, ++ NDS32_BUILTIN_V_UKCRAS16, ++ NDS32_BUILTIN_CRSA16, ++ NDS32_BUILTIN_V_UCRSA16, ++ NDS32_BUILTIN_V_SCRSA16, ++ NDS32_BUILTIN_RCRSA16, ++ NDS32_BUILTIN_V_RCRSA16, ++ NDS32_BUILTIN_URCRSA16, ++ NDS32_BUILTIN_V_URCRSA16, ++ NDS32_BUILTIN_KCRSA16, ++ NDS32_BUILTIN_V_KCRSA16, ++ NDS32_BUILTIN_UKCRSA16, ++ NDS32_BUILTIN_V_UKCRSA16, ++ NDS32_BUILTIN_ADD8, ++ NDS32_BUILTIN_V_UADD8, ++ NDS32_BUILTIN_V_SADD8, ++ NDS32_BUILTIN_RADD8, ++ NDS32_BUILTIN_V_RADD8, ++ NDS32_BUILTIN_URADD8, ++ NDS32_BUILTIN_V_URADD8, ++ NDS32_BUILTIN_KADD8, ++ NDS32_BUILTIN_V_KADD8, ++ NDS32_BUILTIN_UKADD8, ++ NDS32_BUILTIN_V_UKADD8, ++ NDS32_BUILTIN_SUB8, ++ NDS32_BUILTIN_V_USUB8, ++ NDS32_BUILTIN_V_SSUB8, ++ NDS32_BUILTIN_RSUB8, ++ NDS32_BUILTIN_V_RSUB8, ++ NDS32_BUILTIN_URSUB8, ++ NDS32_BUILTIN_V_URSUB8, ++ NDS32_BUILTIN_KSUB8, ++ NDS32_BUILTIN_V_KSUB8, ++ NDS32_BUILTIN_UKSUB8, ++ NDS32_BUILTIN_V_UKSUB8, ++ NDS32_BUILTIN_SRA16, ++ NDS32_BUILTIN_V_SRA16, ++ NDS32_BUILTIN_SRA16_U, ++ NDS32_BUILTIN_V_SRA16_U, ++ NDS32_BUILTIN_SRL16, ++ NDS32_BUILTIN_V_SRL16, ++ NDS32_BUILTIN_SRL16_U, ++ NDS32_BUILTIN_V_SRL16_U, ++ NDS32_BUILTIN_SLL16, ++ NDS32_BUILTIN_V_SLL16, ++ NDS32_BUILTIN_KSLL16, ++ NDS32_BUILTIN_V_KSLL16, ++ NDS32_BUILTIN_KSLRA16, ++ NDS32_BUILTIN_V_KSLRA16, ++ NDS32_BUILTIN_KSLRA16_U, ++ NDS32_BUILTIN_V_KSLRA16_U, ++ NDS32_BUILTIN_CMPEQ16, ++ NDS32_BUILTIN_V_SCMPEQ16, ++ NDS32_BUILTIN_V_UCMPEQ16, ++ NDS32_BUILTIN_SCMPLT16, ++ NDS32_BUILTIN_V_SCMPLT16, ++ NDS32_BUILTIN_SCMPLE16, ++ NDS32_BUILTIN_V_SCMPLE16, ++ NDS32_BUILTIN_UCMPLT16, ++ NDS32_BUILTIN_V_UCMPLT16, ++ NDS32_BUILTIN_UCMPLE16, ++ NDS32_BUILTIN_V_UCMPLE16, ++ NDS32_BUILTIN_CMPEQ8, ++ NDS32_BUILTIN_V_SCMPEQ8, ++ NDS32_BUILTIN_V_UCMPEQ8, ++ NDS32_BUILTIN_SCMPLT8, ++ NDS32_BUILTIN_V_SCMPLT8, ++ NDS32_BUILTIN_SCMPLE8, ++ NDS32_BUILTIN_V_SCMPLE8, ++ NDS32_BUILTIN_UCMPLT8, ++ NDS32_BUILTIN_V_UCMPLT8, ++ NDS32_BUILTIN_UCMPLE8, ++ NDS32_BUILTIN_V_UCMPLE8, ++ NDS32_BUILTIN_SMIN16, ++ NDS32_BUILTIN_V_SMIN16, ++ NDS32_BUILTIN_UMIN16, ++ NDS32_BUILTIN_V_UMIN16, ++ NDS32_BUILTIN_SMAX16, ++ NDS32_BUILTIN_V_SMAX16, ++ NDS32_BUILTIN_UMAX16, ++ NDS32_BUILTIN_V_UMAX16, ++ NDS32_BUILTIN_SCLIP16, ++ NDS32_BUILTIN_V_SCLIP16, ++ NDS32_BUILTIN_UCLIP16, ++ NDS32_BUILTIN_V_UCLIP16, ++ NDS32_BUILTIN_KHM16, ++ NDS32_BUILTIN_V_KHM16, ++ NDS32_BUILTIN_KHMX16, ++ NDS32_BUILTIN_V_KHMX16, ++ NDS32_BUILTIN_KABS16, ++ NDS32_BUILTIN_V_KABS16, ++ NDS32_BUILTIN_SMIN8, ++ NDS32_BUILTIN_V_SMIN8, ++ NDS32_BUILTIN_UMIN8, ++ NDS32_BUILTIN_V_UMIN8, ++ NDS32_BUILTIN_SMAX8, ++ NDS32_BUILTIN_V_SMAX8, ++ NDS32_BUILTIN_UMAX8, ++ NDS32_BUILTIN_V_UMAX8, ++ NDS32_BUILTIN_KABS8, ++ NDS32_BUILTIN_V_KABS8, ++ NDS32_BUILTIN_SUNPKD810, ++ NDS32_BUILTIN_V_SUNPKD810, ++ NDS32_BUILTIN_SUNPKD820, ++ NDS32_BUILTIN_V_SUNPKD820, ++ NDS32_BUILTIN_SUNPKD830, ++ NDS32_BUILTIN_V_SUNPKD830, ++ NDS32_BUILTIN_SUNPKD831, ++ NDS32_BUILTIN_V_SUNPKD831, ++ NDS32_BUILTIN_ZUNPKD810, ++ NDS32_BUILTIN_V_ZUNPKD810, ++ NDS32_BUILTIN_ZUNPKD820, ++ NDS32_BUILTIN_V_ZUNPKD820, ++ NDS32_BUILTIN_ZUNPKD830, ++ NDS32_BUILTIN_V_ZUNPKD830, ++ NDS32_BUILTIN_ZUNPKD831, ++ NDS32_BUILTIN_V_ZUNPKD831, ++ NDS32_BUILTIN_RADDW, ++ NDS32_BUILTIN_URADDW, ++ NDS32_BUILTIN_RSUBW, ++ NDS32_BUILTIN_URSUBW, ++ NDS32_BUILTIN_SRA_U, ++ NDS32_BUILTIN_KSLL, ++ NDS32_BUILTIN_PKBB16, ++ NDS32_BUILTIN_V_PKBB16, ++ NDS32_BUILTIN_PKBT16, ++ NDS32_BUILTIN_V_PKBT16, ++ NDS32_BUILTIN_PKTB16, ++ NDS32_BUILTIN_V_PKTB16, ++ NDS32_BUILTIN_PKTT16, ++ NDS32_BUILTIN_V_PKTT16, ++ NDS32_BUILTIN_SMMUL, ++ NDS32_BUILTIN_SMMUL_U, ++ NDS32_BUILTIN_KMMAC, ++ NDS32_BUILTIN_KMMAC_U, ++ NDS32_BUILTIN_KMMSB, ++ NDS32_BUILTIN_KMMSB_U, ++ NDS32_BUILTIN_KWMMUL, ++ NDS32_BUILTIN_KWMMUL_U, ++ NDS32_BUILTIN_SMMWB, ++ NDS32_BUILTIN_V_SMMWB, ++ NDS32_BUILTIN_SMMWB_U, ++ NDS32_BUILTIN_V_SMMWB_U, ++ NDS32_BUILTIN_SMMWT, ++ NDS32_BUILTIN_V_SMMWT, ++ NDS32_BUILTIN_SMMWT_U, ++ NDS32_BUILTIN_V_SMMWT_U, ++ NDS32_BUILTIN_KMMAWB, ++ NDS32_BUILTIN_V_KMMAWB, ++ NDS32_BUILTIN_KMMAWB_U, ++ NDS32_BUILTIN_V_KMMAWB_U, ++ NDS32_BUILTIN_KMMAWT, ++ NDS32_BUILTIN_V_KMMAWT, ++ NDS32_BUILTIN_KMMAWT_U, ++ NDS32_BUILTIN_V_KMMAWT_U, ++ NDS32_BUILTIN_SMBB, ++ NDS32_BUILTIN_V_SMBB, ++ NDS32_BUILTIN_SMBT, ++ NDS32_BUILTIN_V_SMBT, ++ NDS32_BUILTIN_SMTT, ++ NDS32_BUILTIN_V_SMTT, ++ NDS32_BUILTIN_KMDA, ++ NDS32_BUILTIN_V_KMDA, ++ NDS32_BUILTIN_KMXDA, ++ NDS32_BUILTIN_V_KMXDA, ++ NDS32_BUILTIN_SMDS, ++ NDS32_BUILTIN_V_SMDS, ++ NDS32_BUILTIN_SMDRS, ++ NDS32_BUILTIN_V_SMDRS, ++ NDS32_BUILTIN_SMXDS, ++ NDS32_BUILTIN_V_SMXDS, ++ NDS32_BUILTIN_KMABB, ++ NDS32_BUILTIN_V_KMABB, ++ NDS32_BUILTIN_KMABT, ++ NDS32_BUILTIN_V_KMABT, ++ NDS32_BUILTIN_KMATT, ++ NDS32_BUILTIN_V_KMATT, ++ NDS32_BUILTIN_KMADA, ++ NDS32_BUILTIN_V_KMADA, ++ NDS32_BUILTIN_KMAXDA, ++ NDS32_BUILTIN_V_KMAXDA, ++ NDS32_BUILTIN_KMADS, ++ NDS32_BUILTIN_V_KMADS, ++ NDS32_BUILTIN_KMADRS, ++ NDS32_BUILTIN_V_KMADRS, ++ NDS32_BUILTIN_KMAXDS, ++ NDS32_BUILTIN_V_KMAXDS, ++ NDS32_BUILTIN_KMSDA, ++ NDS32_BUILTIN_V_KMSDA, ++ NDS32_BUILTIN_KMSXDA, ++ NDS32_BUILTIN_V_KMSXDA, ++ NDS32_BUILTIN_SMAL, ++ NDS32_BUILTIN_V_SMAL, ++ NDS32_BUILTIN_BITREV, ++ NDS32_BUILTIN_WEXT, ++ NDS32_BUILTIN_BPICK, ++ NDS32_BUILTIN_INSB, ++ NDS32_BUILTIN_SADD64, ++ NDS32_BUILTIN_UADD64, ++ NDS32_BUILTIN_RADD64, ++ NDS32_BUILTIN_URADD64, ++ NDS32_BUILTIN_KADD64, ++ NDS32_BUILTIN_UKADD64, ++ NDS32_BUILTIN_SSUB64, ++ NDS32_BUILTIN_USUB64, ++ NDS32_BUILTIN_RSUB64, ++ NDS32_BUILTIN_URSUB64, ++ NDS32_BUILTIN_KSUB64, ++ NDS32_BUILTIN_UKSUB64, ++ NDS32_BUILTIN_SMAR64, ++ NDS32_BUILTIN_SMSR64, ++ NDS32_BUILTIN_UMAR64, ++ NDS32_BUILTIN_UMSR64, ++ NDS32_BUILTIN_KMAR64, ++ NDS32_BUILTIN_KMSR64, ++ NDS32_BUILTIN_UKMAR64, ++ NDS32_BUILTIN_UKMSR64, ++ NDS32_BUILTIN_SMALBB, ++ NDS32_BUILTIN_V_SMALBB, ++ NDS32_BUILTIN_SMALBT, ++ NDS32_BUILTIN_V_SMALBT, ++ NDS32_BUILTIN_SMALTT, ++ NDS32_BUILTIN_V_SMALTT, ++ NDS32_BUILTIN_SMALDA, ++ NDS32_BUILTIN_V_SMALDA, ++ NDS32_BUILTIN_SMALXDA, ++ NDS32_BUILTIN_V_SMALXDA, ++ NDS32_BUILTIN_SMALDS, ++ NDS32_BUILTIN_V_SMALDS, ++ NDS32_BUILTIN_SMALDRS, ++ NDS32_BUILTIN_V_SMALDRS, ++ NDS32_BUILTIN_SMALXDS, ++ NDS32_BUILTIN_V_SMALXDS, ++ NDS32_BUILTIN_SMUL16, ++ NDS32_BUILTIN_V_SMUL16, ++ NDS32_BUILTIN_SMULX16, ++ NDS32_BUILTIN_V_SMULX16, ++ NDS32_BUILTIN_UMUL16, ++ NDS32_BUILTIN_V_UMUL16, ++ NDS32_BUILTIN_UMULX16, ++ NDS32_BUILTIN_V_UMULX16, ++ NDS32_BUILTIN_SMSLDA, ++ NDS32_BUILTIN_V_SMSLDA, ++ NDS32_BUILTIN_SMSLXDA, ++ NDS32_BUILTIN_V_SMSLXDA, ++ NDS32_BUILTIN_UCLIP32, ++ NDS32_BUILTIN_SCLIP32, ++ NDS32_BUILTIN_KABS, ++ NDS32_BUILTIN_UALOAD_U16, ++ NDS32_BUILTIN_UALOAD_S16, ++ NDS32_BUILTIN_UALOAD_U8, ++ NDS32_BUILTIN_UALOAD_S8, ++ NDS32_BUILTIN_UASTORE_U16, ++ NDS32_BUILTIN_UASTORE_S16, ++ NDS32_BUILTIN_UASTORE_U8, ++ NDS32_BUILTIN_UASTORE_S8, ++ NDS32_BUILTIN_DSP_END, ++ NDS32_BUILTIN_NO_HWLOOP, ++ NDS32_BUILTIN_UNALIGNED_FEATURE, ++ NDS32_BUILTIN_ENABLE_UNALIGNED, ++ NDS32_BUILTIN_DISABLE_UNALIGNED, ++ NDS32_BUILTIN_COUNT + }; + + /* ------------------------------------------------------------------------ */ + +-#define TARGET_ISA_V2 (nds32_arch_option == ARCH_V2) +-#define TARGET_ISA_V3 (nds32_arch_option == ARCH_V3) +-#define TARGET_ISA_V3M (nds32_arch_option == ARCH_V3M) ++#define TARGET_ISR_VECTOR_SIZE_4_BYTE \ ++ (nds32_isr_vector_size == 4) ++ ++#define TARGET_ISA_V2 \ ++ (nds32_arch_option == ARCH_V2 || nds32_arch_option == ARCH_V2J) ++#define TARGET_ISA_V3 \ ++ (nds32_arch_option == ARCH_V3 \ ++ || nds32_arch_option == ARCH_V3J \ ++ || nds32_arch_option == ARCH_V3F \ ++ || nds32_arch_option == ARCH_V3S) ++#define TARGET_ISA_V3M \ ++ (nds32_arch_option == ARCH_V3M || \ ++ nds32_arch_option == ARCH_V3M_PLUS) ++ ++#define TARGET_ISA_V3M_PLUS \ ++ (nds32_arch_option == ARCH_V3M_PLUS) ++ ++#define TARGET_PIPELINE_N7 \ ++ (nds32_cpu_option == CPU_N7) ++#define TARGET_PIPELINE_N8 \ ++ (nds32_cpu_option == CPU_N6 \ ++ || nds32_cpu_option == CPU_N8) ++#define TARGET_PIPELINE_N9 \ ++ (nds32_cpu_option == CPU_N9) ++#define TARGET_PIPELINE_N10 \ ++ (nds32_cpu_option == CPU_N10) ++#define TARGET_PIPELINE_N13 \ ++ (nds32_cpu_option == CPU_N12 || nds32_cpu_option == CPU_N13) ++#define TARGET_PIPELINE_GRAYWOLF \ ++ (nds32_cpu_option == CPU_GRAYWOLF) ++#define TARGET_PIPELINE_PANTHER \ ++ (nds32_cpu_option == CPU_PANTHER) ++#define TARGET_PIPELINE_SIMPLE \ ++ (nds32_cpu_option == CPU_SIMPLE) + + #define TARGET_CMODEL_SMALL \ + (nds32_cmodel_option == CMODEL_SMALL) +@@ -361,55 +908,153 @@ enum nds32_builtins + #define TARGET_CMODEL_LARGE \ + (nds32_cmodel_option == CMODEL_LARGE) + ++#define TARGET_ICT_MODEL_SMALL \ ++ (nds32_ict_model == ICT_MODEL_SMALL) ++ ++#define TARGET_ICT_MODEL_LARGE \ ++ (nds32_ict_model == ICT_MODEL_LARGE) ++ + /* When -mcmodel=small or -mcmodel=medium, + compiler may generate gp-base instruction directly. */ + #define TARGET_GP_DIRECT \ + (nds32_cmodel_option == CMODEL_SMALL\ + || nds32_cmodel_option == CMODEL_MEDIUM) + +-#define TARGET_SOFT_FLOAT 1 +-#define TARGET_HARD_FLOAT 0 ++/* There are three kinds of mul configurations: ++ 1-cycle fast mul, 2-cycle fast mul, and slow mul operation. */ ++#define TARGET_MUL_FAST_1 \ ++ (nds32_mul_config == MUL_TYPE_FAST_1) ++#define TARGET_MUL_FAST_2 \ ++ (nds32_mul_config == MUL_TYPE_FAST_2) ++#define TARGET_MUL_SLOW \ ++ (nds32_mul_config == MUL_TYPE_SLOW) ++ ++/* Run-time Target Specification. */ ++#define TARGET_SOFT_FLOAT (nds32_abi == NDS32_ABI_V2) ++/* Use hardware floating point calling convention. */ ++#define TARGET_HARD_FLOAT (nds32_abi == NDS32_ABI_V2_FP_PLUS) ++ ++/* Record arch version in TARGET_ARCH_DEFAULT. 0 means soft ABI, ++ 1 means hard ABI and using full floating-point instruction, ++ 2 means hard ABI and only using single-precision floating-point ++ instruction */ ++#if TARGET_ARCH_DEFAULT == 1 ++# define TARGET_DEFAULT_ABI NDS32_ABI_V2_FP_PLUS ++# define TARGET_DEFAULT_FPU_ISA MASK_FPU_DOUBLE | MASK_FPU_SINGLE ++# define TARGET_DEFAULT_FPU_FMA 0 ++#else ++# if TARGET_ARCH_DEFAULT == 2 ++# define TARGET_DEFAULT_ABI NDS32_ABI_V2_FP_PLUS ++# define TARGET_DEFAULT_FPU_ISA MASK_FPU_SINGLE ++# define TARGET_DEFAULT_FPU_FMA 0 ++# else ++# define TARGET_DEFAULT_ABI NDS32_ABI_V2 ++# define TARGET_DEFAULT_FPU_ISA 0 ++# define TARGET_DEFAULT_FPU_FMA 0 ++# endif ++#endif ++ ++#define TARGET_CONFIG_FPU_DEFAULT NDS32_CONFIG_FPU_2 ++ ++#define TARGET_LMWSMW_OPT_AUTO \ ++ (flag_lmwsmw_cost == LMWSMW_OPT_AUTO) ++ ++#define TARGET_LMWSMW_OPT_SIZE \ ++ (flag_lmwsmw_cost == LMWSMW_OPT_SIZE) ++ ++#define TARGET_LMWSMW_OPT_SPEED \ ++ (flag_lmwsmw_cost == LMWSMW_OPT_SPEED) ++ ++#define TARGET_LMWSMW_OPT_ALL \ ++ (flag_lmwsmw_cost == LMWSMW_OPT_ALL) ++ ++/* ------------------------------------------------------------------------ */ ++ ++#ifdef TARGET_DEFAULT_RELAX ++# define NDS32_RELAX_SPEC " %{!mno-relax:--relax}" ++#else ++# define NDS32_RELAX_SPEC " %{mrelax:--relax}" ++#endif ++ ++#ifdef TARGET_OS_DEFAULT_IFC ++# define NDS32_IFC_SPEC " %{Os3|Os|mifc:%{!mno-ifc:--mifc}}" ++#else ++# define NDS32_IFC_SPEC " %{mifc:--mifc}" ++#endif ++#define NDS32_IFC_V3M_PLUS_SPEC " %{march=v3m+:%{Os3|Os|mifc:%{!mno-ifc:-mifc}}}" ++ ++#ifdef TARGET_OS_DEFAULT_EX9 ++# define NDS32_EX9_SPEC " %{Os3|Os|mex9:%{!mno-ex9:--mex9}}" ++#else ++# define NDS32_EX9_SPEC " %{mex9:--mex9}" ++#endif ++#define NDS32_EX9_V3M_PLUS_SPEC " %{march=v3m+:%{Os3|Os|mex9:%{!mno-ex9:-mex9}}}" ++ ++#ifdef TARGET_DEFAULT_EXT_DSP ++# define NDS32_EXT_DSP_SPEC " %{!mno-ext-dsp:-mext-dsp}" ++#else ++# define NDS32_EXT_DSP_SPEC "" ++#endif ++ ++#ifdef TARGET_DEFAULT_HWLOOP ++# define NDS32_HWLOOP_SPEC " %{!mno-ext-zol:-mext-zol}" ++#else ++# define NDS32_HWLOOP_SPEC "" ++#endif ++ ++#ifdef TARGET_DEFAULT_16BIT ++# define NDS32_16BIT_SPEC " %{!mno-16-bit:%{!mno-16bit:-m16bit}}" ++#else ++# define NDS32_16BIT_SPEC " %{!m16-bit:%{!m16bit:-mno-16bit}}" ++#endif + + /* ------------------------------------------------------------------------ */ + + /* Controlling the Compilation Driver. */ + ++#define DRIVER_SELF_SPECS \ ++ " %{mno-16bit|mno-16-bit:-mno-ifc -mno-ex9}" \ ++ NDS32_IFC_V3M_PLUS_SPEC \ ++ NDS32_EX9_V3M_PLUS_SPEC \ ++ NDS32_16BIT_SPEC ++ + #define OPTION_DEFAULT_SPECS \ +- {"arch", "%{!march=*:-march=%(VALUE)}" } ++ {"arch", " %{!march=*:-march=%(VALUE)}" \ ++ " %{march=v3f:%{!mfloat-abi=*:-mfloat-abi=hard}" \ ++ " %{!mno-ext-fpu-sp:%{!mext-fpu-sp:-mext-fpu-sp}}" \ ++ " %{!mno-ext-fpu-dp:%{!mext-fpu-dp:-mext-fpu-dp}}}" \ ++ " %{march=v3s:%{!mfloat-abi=*:-mfloat-abi=hard}" \ ++ " %{!mno-ext-fpu-sp:%{!mext-fpu-sp:-mext-fpu-sp}}}" }, \ ++ {"cpu", "%{!mcpu=*:-mcpu=%(VALUE)}" }, \ ++ {"memory_model", "%{!mmemory-model=*:-mmemory-model=%(VALUE)}"}, \ ++ {"float", "%{!mfloat-abi=*:-mfloat-abi=%(VALUE)}" } + + #define CC1_SPEC \ +- "" ++ " %{Os1:-Os -mno-ifc -mno-ex9;" \ ++ "Os2:-Os -minnermost-loop;" \ ++ "Os3:-Os}" \ ++ " %{ffast-math:%{!mno-soft-fp-arith-comm:-msoft-fp-arith-comm}}" \ ++ NDS32_EXT_DSP_SPEC \ ++ NDS32_HWLOOP_SPEC + + #define ASM_SPEC \ +- " %{mbig-endian:-EB} %{mlittle-endian:-EL}" +- +-/* If user issues -mrelax, we need to pass '--relax' to linker. */ +-#define LINK_SPEC \ + " %{mbig-endian:-EB} %{mlittle-endian:-EL}" \ +- " %{mrelax:--relax}" +- +-#define LIB_SPEC \ +- " -lc -lgloss" +- +-/* The option -mno-ctor-dtor can disable constructor/destructor feature +- by applying different crt stuff. In the convention, crt0.o is the +- startup file without constructor/destructor; +- crt1.o, crti.o, crtbegin.o, crtend.o, and crtn.o are the +- startup files with constructor/destructor. +- Note that crt0.o, crt1.o, crti.o, and crtn.o are provided +- by newlib/mculib/glibc/ublic, while crtbegin.o and crtend.o are +- currently provided by GCC for nds32 target. +- +- For nds32 target so far: +- If -mno-ctor-dtor, we are going to link +- "crt0.o [user objects]". +- If general cases, we are going to link +- "crt1.o crtbegin1.o [user objects] crtend1.o". */ +-#define STARTFILE_SPEC \ +- " %{!mno-ctor-dtor:crt1.o%s;:crt0.o%s}" \ +- " %{!mno-ctor-dtor:crtbegin1.o%s}" +-#define ENDFILE_SPEC \ +- " %{!mno-ctor-dtor:crtend1.o%s}" ++ " %{march=*:-march=%*}" \ ++ " %{mno-16-bit|mno-16bit:-mno-16bit-ext}" \ ++ " %{march=v3m:%{!mfull-regs:%{!mreduced-regs:-mreduced-regs}}}" \ ++ " %{mfull-regs:-mno-reduced-regs}" \ ++ " %{mreduced-regs:-mreduced-regs}" \ ++ " %{mabi=*:-mabi=v%*}" \ ++ " %{mconfig-fpu=*:-mfpu-freg=%*}" \ ++ " %{mext-fpu-mac:-mmac}" \ ++ " %{mno-ext-fpu-mac:-mno-mac}" \ ++ " %{mext-fpu-sp:-mfpu-sp-ext}" \ ++ " %{mno-ext-fpu-sp:-mno-fpu-sp-ext}" \ ++ " %{mext-fpu-dp:-mfpu-dp-ext}" \ ++ " %{mno-ext-fpu-sp:-mno-fpu-dp-ext}" \ ++ " %{mext-dsp:-mdsp-ext}" \ ++ " %{mext-zol:-mzol-ext}" \ ++ " %{O|O1|O2|O3|Ofast:-O1;:-Os}" + + /* The TARGET_BIG_ENDIAN_DEFAULT is defined if we + configure gcc with --target=nds32be-* setting. +@@ -422,7 +1067,11 @@ enum nds32_builtins + + /* Currently we only have elf toolchain, + where -mcmodel=medium is always the default. */ +-#define NDS32_CMODEL_DEFAULT "mcmodel=medium" ++#if TARGET_ELF ++# define NDS32_CMODEL_DEFAULT "mcmodel=medium" ++#else ++# define NDS32_CMODEL_DEFAULT "mcmodel=medium" ++#endif + + #define MULTILIB_DEFAULTS \ + { NDS32_ENDIAN_DEFAULT, NDS32_CMODEL_DEFAULT } +@@ -430,34 +1079,8 @@ enum nds32_builtins + + /* Run-time Target Specification. */ + +-#define TARGET_CPU_CPP_BUILTINS() \ +- do \ +- { \ +- builtin_define ("__nds32__"); \ +- \ +- if (TARGET_ISA_V2) \ +- builtin_define ("__NDS32_ISA_V2__"); \ +- if (TARGET_ISA_V3) \ +- builtin_define ("__NDS32_ISA_V3__"); \ +- if (TARGET_ISA_V3M) \ +- builtin_define ("__NDS32_ISA_V3M__"); \ +- \ +- if (TARGET_BIG_ENDIAN) \ +- builtin_define ("__big_endian__"); \ +- if (TARGET_REDUCED_REGS) \ +- builtin_define ("__NDS32_REDUCED_REGS__"); \ +- if (TARGET_CMOV) \ +- builtin_define ("__NDS32_CMOV__"); \ +- if (TARGET_PERF_EXT) \ +- builtin_define ("__NDS32_PERF_EXT__"); \ +- if (TARGET_16_BIT) \ +- builtin_define ("__NDS32_16_BIT__"); \ +- if (TARGET_GP_DIRECT) \ +- builtin_define ("__NDS32_GP_DIRECT__"); \ +- \ +- builtin_assert ("cpu=nds32"); \ +- builtin_assert ("machine=nds32"); \ +- } while (0) ++#define TARGET_CPU_CPP_BUILTINS() \ ++ nds32_cpu_cpp_builtins (pfile) + + + /* Defining Data Structures for Per-function Information. */ +@@ -487,10 +1110,20 @@ enum nds32_builtins + + #define STACK_BOUNDARY 64 + +-#define FUNCTION_BOUNDARY 32 ++#define FUNCTION_BOUNDARY \ ++ ((NDS32_ALIGN_P () || TARGET_ALIGN_FUNCTION) ? (TARGET_PIPELINE_PANTHER ? 64 : 32) : 16) + + #define BIGGEST_ALIGNMENT 64 + ++#define DATA_ALIGNMENT(constant, basic_align) \ ++ nds32_data_alignment (constant, basic_align) ++ ++#define CONSTANT_ALIGNMENT(constant, basic_align) \ ++ nds32_constant_alignment (constant, basic_align) ++ ++#define LOCAL_ALIGNMENT(type, basic_align) \ ++ nds32_local_alignment (type, basic_align) ++ + #define EMPTY_FIELD_BOUNDARY 32 + + #define STRUCTURE_SIZE_BOUNDARY 8 +@@ -515,8 +1148,8 @@ enum nds32_builtins + + #define SIZE_TYPE "long unsigned int" + #define PTRDIFF_TYPE "long int" +-#define WCHAR_TYPE "short unsigned int" +-#define WCHAR_TYPE_SIZE 16 ++#define WCHAR_TYPE "unsigned int" ++#define WCHAR_TYPE_SIZE 32 + + + /* Register Usage. */ +@@ -526,7 +1159,7 @@ enum nds32_builtins + from 0 to just below FIRST_PSEUDO_REGISTER. + All registers that the compiler knows about must be given numbers, + even those that are not normally considered general registers. */ +-#define FIRST_PSEUDO_REGISTER 34 ++#define FIRST_PSEUDO_REGISTER 101 + + /* An initializer that says which registers are used for fixed + purposes all throughout the compiled code and are therefore +@@ -537,24 +1170,38 @@ enum nds32_builtins + $r30 : $lp + $r31 : $sp + +- caller-save registers: $r0 ~ $r5, $r16 ~ $r23 +- callee-save registers: $r6 ~ $r10, $r11 ~ $r14 ++ caller-save registers: $r0 ~ $r5, $r16 ~ $r23, $fs0 ~ $fs5, $fs22 ~ $fs47 ++ callee-save registers: $r6 ~ $r10, $r11 ~ $r14, $fs6 ~ $fs21, $fs48 ~ $fs63 + + reserved for assembler : $r15 + reserved for other use : $r24, $r25, $r26, $r27 */ +-#define FIXED_REGISTERS \ +-{ /* r0 r1 r2 r3 r4 r5 r6 r7 */ \ +- 0, 0, 0, 0, 0, 0, 0, 0, \ +- /* r8 r9 r10 r11 r12 r13 r14 r15 */ \ +- 0, 0, 0, 0, 0, 0, 0, 1, \ +- /* r16 r17 r18 r19 r20 r21 r22 r23 */ \ +- 0, 0, 0, 0, 0, 0, 0, 0, \ +- /* r24 r25 r26 r27 r28 r29 r30 r31 */ \ +- 1, 1, 1, 1, 0, 1, 0, 1, \ +- /* ARG_POINTER:32 */ \ +- 1, \ +- /* FRAME_POINTER:33 */ \ +- 1 \ ++#define FIXED_REGISTERS \ ++{ /* r0 r1 r2 r3 r4 r5 r6 r7 */ \ ++ 0, 0, 0, 0, 0, 0, 0, 0, \ ++ /* r8 r9 r10 r11 r12 r13 r14 r15 */ \ ++ 0, 0, 0, 0, 0, 0, 0, 0, \ ++ /* r16 r17 r18 r19 r20 r21 r22 r23 */ \ ++ 0, 0, 0, 0, 0, 0, 0, 0, \ ++ /* r24 r25 r26 r27 r28 r29 r30 r31 */ \ ++ 0, 0, 1, 1, 0, 1, 0, 1, \ ++ /* AP FP fs0 fs1 fs2 fs3 fs4 fs5 */ \ ++ 1, 1, 1, 1, 1, 1, 1, 1, \ ++ /* fs6 fs7 fs8 fs9 fs10 fs11 fs12 fs13 */ \ ++ 1, 1, 1, 1, 1, 1, 1, 1, \ ++ /* fs14 fs15 fs16 fs17 fs18 fs19 fs20 fs21 */ \ ++ 1, 1, 1, 1, 1, 1, 1, 1, \ ++ /* fs22 fs23 fs24 fs25 fs26 fs27 fs28 fs29 */ \ ++ 1, 1, 1, 1, 1, 1, 1, 1, \ ++ /* fs30 fs31 fd16 fd17 fd18 */ \ ++ 1, 1, 1, 1, 1, 1, 1, 1, \ ++ /* fd19 fd20 fd21 fd22 */ \ ++ 1, 1, 1, 1, 1, 1, 1, 1, \ ++ /* fd23 fd24 fd25 fd26 */ \ ++ 1, 1, 1, 1, 1, 1, 1, 1, \ ++ /* fd27 fd28 fd29 fd30 */ \ ++ 1, 1, 1, 1, 1, 1, 1, 1, \ ++ /* fd31 LB LE LC */ \ ++ 1, 1, 1, 1, 1 \ + } + + /* Identifies the registers that are not available for +@@ -563,35 +1210,59 @@ enum nds32_builtins + + 0 : callee-save registers + 1 : caller-save registers */ +-#define CALL_USED_REGISTERS \ +-{ /* r0 r1 r2 r3 r4 r5 r6 r7 */ \ +- 1, 1, 1, 1, 1, 1, 0, 0, \ +- /* r8 r9 r10 r11 r12 r13 r14 r15 */ \ +- 0, 0, 0, 0, 0, 0, 0, 1, \ +- /* r16 r17 r18 r19 r20 r21 r22 r23 */ \ +- 1, 1, 1, 1, 1, 1, 1, 1, \ +- /* r24 r25 r26 r27 r28 r29 r30 r31 */ \ +- 1, 1, 1, 1, 0, 1, 0, 1, \ +- /* ARG_POINTER:32 */ \ +- 1, \ +- /* FRAME_POINTER:33 */ \ +- 1 \ ++#define CALL_USED_REGISTERS \ ++{ /* r0 r1 r2 r3 r4 r5 r6 r7 */ \ ++ 1, 1, 1, 1, 1, 1, 0, 0, \ ++ /* r8 r9 r10 r11 r12 r13 r14 r15 */ \ ++ 0, 0, 0, 0, 0, 0, 0, 1, \ ++ /* r16 r17 r18 r19 r20 r21 r22 r23 */ \ ++ 1, 1, 1, 1, 1, 1, 1, 1, \ ++ /* r24 r25 r26 r27 r28 r29 r30 r31 */ \ ++ 1, 1, 1, 1, 0, 1, 0, 1, \ ++ /* AP FP fs0 fs1 fs2 fs3 fs4 fs5 */ \ ++ 1, 1, 1, 1, 1, 1, 1, 1, \ ++ /* fs6 fs7 fs8 fs9 fs10 fs11 fs12 fs13 */ \ ++ 1, 1, 1, 1, 1, 1, 1, 1, \ ++ /* fs14 fs15 fs16 fs17 fs18 fs19 fs20 fs21 */ \ ++ 1, 1, 1, 1, 1, 1, 1, 1, \ ++ /* fs22 fs23 fs24 fs25 fs26 fs27 fs28 fs29 */ \ ++ 1, 1, 1, 1, 1, 1, 1, 1, \ ++ /* fs30 fs31 fd16 fd17 fd18 */ \ ++ 1, 1, 1, 1, 1, 1, 1, 1, \ ++ /* fd19 fd20 fd21 fd22 */ \ ++ 1, 1, 1, 1, 1, 1, 1, 1, \ ++ /* fd23 fd24 fd25 fd26 */ \ ++ 1, 1, 1, 1, 1, 1, 1, 1, \ ++ /* fd27 fd28 fd29 fd30 */ \ ++ 1, 1, 1, 1, 1, 1, 1, 1, \ ++ /* fd31 LB LE LC */ \ ++ 1, 1, 1, 1, 1 \ + } + + /* In nds32 target, we have three levels of registers: + LOW_COST_REGS : $r0 ~ $r7 + MIDDLE_COST_REGS : $r8 ~ $r11, $r16 ~ $r19 + HIGH_COST_REGS : $r12 ~ $r14, $r20 ~ $r31 */ +-#define REG_ALLOC_ORDER \ +-{ \ +- 0, 1, 2, 3, 4, 5, 6, 7, \ +- 8, 9, 10, 11, 16, 17, 18, 19, \ +- 12, 13, 14, 15, 20, 21, 22, 23, \ +- 24, 25, 26, 27, 28, 29, 30, 31, \ +- 32, \ +- 33 \ ++#define REG_ALLOC_ORDER \ ++{ 0, 1, 2, 3, 4, 5, 6, 7, \ ++ 16, 17, 18, 19, 9, 10, 11, 12, \ ++ 13, 14, 8, 15, 20, 21, 22, 23, \ ++ 24, 25, 26, 27, 28, 29, 30, 31, \ ++ 32, 33, 34, 35, 36, 37, 38, 39, \ ++ 40, 41, 42, 43, 44, 45, 46, 47, \ ++ 48, 49, 50, 51, 52, 53, 54, 55, \ ++ 56, 57, 58, 59, 60, 61, 62, 63, \ ++ 64, 65, 66, 67, 68, 69, 70, 71, \ ++ 72, 73, 74, 75, 76, 77, 78, 79, \ ++ 80, 81, 82, 83, 84, 85, 86, 87, \ ++ 88, 89, 90, 91, 92, 93, 94, 95, \ ++ 96, 97, 98, 99, 100, \ + } + ++/* ADJUST_REG_ALLOC_ORDER is a macro which permits reg_alloc_order ++ to be rearranged based on optimizing for speed or size. */ ++#define ADJUST_REG_ALLOC_ORDER nds32_adjust_reg_alloc_order () ++ + /* Tell IRA to use the order we define rather than messing it up with its + own cost calculations. */ + #define HONOR_REG_ALLOC_ORDER optimize_size +@@ -609,11 +1280,7 @@ enum nds32_builtins + Define this macro to return nonzero in as many cases as possible + since doing so will allow GCC to perform better register allocation. + We can use general registers to tie QI/HI/SI modes together. */ +-#define MODES_TIEABLE_P(mode1, mode2) \ +- (GET_MODE_CLASS (mode1) == MODE_INT \ +- && GET_MODE_CLASS (mode2) == MODE_INT \ +- && GET_MODE_SIZE (mode1) <= UNITS_PER_WORD \ +- && GET_MODE_SIZE (mode2) <= UNITS_PER_WORD) ++#define MODES_TIEABLE_P(mode1, mode2) nds32_modes_tieable_p (mode1, mode2) + + + /* Register Classes. */ +@@ -628,13 +1295,18 @@ enum nds32_builtins + enum reg_class + { + NO_REGS, ++ R5_REG, ++ R8_REG, + R15_TA_REG, + STACK_REG, ++ FRAME_POINTER_REG, + LOW_REGS, + MIDDLE_REGS, + HIGH_REGS, + GENERAL_REGS, + FRAME_REGS, ++ FP_REGS, ++ LOOP_REGS, + ALL_REGS, + LIM_REG_CLASSES + }; +@@ -644,27 +1316,50 @@ enum reg_class + #define REG_CLASS_NAMES \ + { \ + "NO_REGS", \ ++ "R5_REG", \ ++ "R8_REG", \ + "R15_TA_REG", \ + "STACK_REG", \ ++ "FRAME_POINTER_REG", \ + "LOW_REGS", \ + "MIDDLE_REGS", \ + "HIGH_REGS", \ + "GENERAL_REGS", \ + "FRAME_REGS", \ ++ "FP_REGS", \ ++ "LOOP_REGS", \ + "ALL_REGS" \ + } + + #define REG_CLASS_CONTENTS \ +-{ \ +- {0x00000000, 0x00000000}, /* NO_REGS : */ \ +- {0x00008000, 0x00000000}, /* R15_TA_REG : 15 */ \ +- {0x80000000, 0x00000000}, /* STACK_REG : 31 */ \ +- {0x000000ff, 0x00000000}, /* LOW_REGS : 0-7 */ \ +- {0x000f0fff, 0x00000000}, /* MIDDLE_REGS : 0-11, 16-19 */ \ +- {0xfff07000, 0x00000000}, /* HIGH_REGS : 12-14, 20-31 */ \ +- {0xffffffff, 0x00000000}, /* GENERAL_REGS: 0-31 */ \ +- {0x00000000, 0x00000003}, /* FRAME_REGS : 32, 33 */ \ +- {0xffffffff, 0x00000003} /* ALL_REGS : 0-31, 32, 33 */ \ ++{ /* NO_REGS */ \ ++ {0x00000000, 0x00000000, 0x00000000, 0x00000000}, \ ++ /* R5_REG : 5 */ \ ++ {0x00000020, 0x00000000, 0x00000000, 0x00000000}, \ ++ /* R8_REG : 8 */ \ ++ {0x00000100, 0x00000000, 0x00000000, 0x00000000}, \ ++ /* R15_TA_REG : 15 */ \ ++ {0x00008000, 0x00000000, 0x00000000, 0x00000000}, \ ++ /* STACK_REG : 31 */ \ ++ {0x80000000, 0x00000000, 0x00000000, 0x00000000}, \ ++ /* FRAME_POINTER_REG : 28 */ \ ++ {0x10000000, 0x00000000, 0x00000000, 0x00000000}, \ ++ /* LOW_REGS : 0-7 */ \ ++ {0x000000ff, 0x00000000, 0x00000000, 0x00000000}, \ ++ /* MIDDLE_REGS : 0-11, 16-19 */ \ ++ {0x000f0fff, 0x00000000, 0x00000000, 0x00000000}, \ ++ /* HIGH_REGS : 12-14, 20-31 */ \ ++ {0xfff07000, 0x00000000, 0x00000000, 0x00000000}, \ ++ /* GENERAL_REGS : 0-31 */ \ ++ {0xffffffff, 0x00000000, 0x00000000, 0x00000000}, \ ++ /* FRAME_REGS : 32, 33 */ \ ++ {0x00000000, 0x00000003, 0x00000000, 0x00000000}, \ ++ /* FP_REGS : 34-98 */ \ ++ {0x00000000, 0xfffffffc, 0xffffffff, 0x00000003}, \ ++ /* LOOP_REGS 99-101 */ \ ++ {0x00000000, 0x00000000, 0x00000000, 0x0000001c}, \ ++ /* ALL_REGS : 0-101 */ \ ++ {0xffffffff, 0xffffffff, 0xffffffff, 0x0000001f} \ + } + + #define REGNO_REG_CLASS(regno) nds32_regno_reg_class (regno) +@@ -672,13 +1367,18 @@ enum reg_class + #define BASE_REG_CLASS GENERAL_REGS + #define INDEX_REG_CLASS GENERAL_REGS + ++#define TEST_REGNO(R, TEST, VALUE) \ ++ ((R TEST VALUE) || ((unsigned) reg_renumber[R] TEST VALUE)) ++ + /* Return nonzero if it is suitable for use as a + base register in operand addresses. + So far, we return nonzero only if "num" is a hard reg + of the suitable class or a pseudo register which is + allocated to a suitable hard reg. */ + #define REGNO_OK_FOR_BASE_P(num) \ +- ((num) < 32 || (unsigned) reg_renumber[num] < 32) ++ (TEST_REGNO (num, <, 32) \ ++ || TEST_REGNO (num, ==, FRAME_POINTER_REGNUM) \ ++ || TEST_REGNO (num, ==, ARG_POINTER_REGNUM)) + + /* Return nonzero if it is suitable for use as a + index register in operand addresses. +@@ -688,7 +1388,15 @@ enum reg_class + The difference between an index register and a base register is that + the index register may be scaled. */ + #define REGNO_OK_FOR_INDEX_P(num) \ +- ((num) < 32 || (unsigned) reg_renumber[num] < 32) ++ (TEST_REGNO (num, <, 32) \ ++ || TEST_REGNO (num, ==, FRAME_POINTER_REGNUM) \ ++ || TEST_REGNO (num, ==, ARG_POINTER_REGNUM)) ++ ++/* Don't spill double-precision register to two singal-precision registers */ ++#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \ ++ ((TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE) \ ++ && GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO) \ ++ ? reg_classes_intersect_p (CLASS, FP_REGS) : 0) + + + /* Obsolete Macros for Defining Constraints. */ +@@ -707,6 +1415,11 @@ enum reg_class + #define FIRST_PARM_OFFSET(fundecl) \ + (NDS32_DOUBLE_WORD_ALIGN_P (crtl->args.pretend_args_size) ? 0 : 4) + ++/* A C expression whose value is RTL representing the address in a stack frame ++ where the pointer to the caller's frame is stored. */ ++#define DYNAMIC_CHAIN_ADDRESS(frameaddr) \ ++ nds32_dynamic_chain_address (frameaddr) ++ + #define RETURN_ADDR_RTX(count, frameaddr) \ + nds32_return_addr_rtx (count, frameaddr) + +@@ -718,6 +1431,15 @@ enum reg_class + #define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, LP_REGNUM) + #define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (LP_REGNUM) + ++/* Use $r0 $r1 to pass exception handling information. */ ++#define EH_RETURN_DATA_REGNO(N) (((N) < 2) ? (N) : INVALID_REGNUM) ++/* The register $r2 that represents a location in which to store a stack ++ adjustment to be applied before function return. ++ This is used to unwind the stack to an exception handler's call frame. */ ++#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, 2) ++ ++#define DBX_REGISTER_NUMBER(REGNO) nds32_dbx_register_number (REGNO) ++ + #define STACK_POINTER_REGNUM SP_REGNUM + + #define FRAME_POINTER_REGNUM 33 +@@ -746,12 +1468,11 @@ enum reg_class + #define INIT_CUMULATIVE_ARGS(cum, fntype, libname, fndecl, n_named_args) \ + nds32_init_cumulative_args (&cum, fntype, libname, fndecl, n_named_args) + +-/* The REGNO is an unsigned integer but NDS32_GPR_ARG_FIRST_REGNUM may be 0. +- We better cast REGNO into signed integer so that we can avoid +- 'comparison of unsigned expression >= 0 is always true' warning. */ +-#define FUNCTION_ARG_REGNO_P(regno) \ +- (((int) regno - NDS32_GPR_ARG_FIRST_REGNUM >= 0) \ +- && ((int) regno - NDS32_GPR_ARG_FIRST_REGNUM < NDS32_MAX_GPR_REGS_FOR_ARGS)) ++#define FUNCTION_ARG_REGNO_P(regno) \ ++ (IN_RANGE ((regno), NDS32_FIRST_GPR_REGNUM, NDS32_MAX_GPR_REGS_FOR_ARGS - 1) \ ++ || ((TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE) \ ++ && IN_RANGE ((regno), NDS32_FPR_ARG_FIRST_REGNUM, \ ++ NDS32_FIRST_FPR_REGNUM + NDS32_MAX_FPR_REGS_FOR_ARGS - 1))) + + #define DEFAULT_PCC_STRUCT_RETURN 0 + +@@ -763,7 +1484,15 @@ enum reg_class + #define EXIT_IGNORE_STACK 1 + + #define FUNCTION_PROFILER(file, labelno) \ +- fprintf (file, "/* profiler %d */", (labelno)) ++ fprintf (file, "/* profiler %d */\n", (labelno)) ++ ++#define PROFILE_HOOK(LABEL) \ ++ { \ ++ rtx fun, lp; \ ++ lp = get_hard_reg_initial_val (Pmode, LP_REGNUM); \ ++ fun = gen_rtx_SYMBOL_REF (Pmode, "_mcount"); \ ++ emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lp, Pmode); \ ++ } + + + /* Implementing the Varargs Macros. */ +@@ -780,13 +1509,13 @@ enum reg_class + The trampoline code for nds32 target must contains following parts: + + 1. instructions (4 * 4 = 16 bytes): +- get $pc first +- load chain_value to static chain register via $pc +- load nested function address to $r15 via $pc +- jump to desired nested function via $r15 ++ get $pc first ++ load chain_value to static chain register via $pc ++ load nested function address to $r15 via $pc ++ jump to desired nested function via $r15 + 2. data (4 * 2 = 8 bytes): +- chain_value +- nested function address ++ chain_value ++ nested function address + + Please check nds32.c implementation for more information. */ + #define TRAMPOLINE_SIZE 24 +@@ -811,9 +1540,22 @@ enum reg_class + /* We have "LW.bi Rt, [Ra], Rb" instruction form. */ + #define HAVE_POST_MODIFY_REG 1 + +-#define CONSTANT_ADDRESS_P(x) (CONSTANT_P (x) && GET_CODE (x) != CONST_DOUBLE) ++#define USE_LOAD_POST_INCREMENT(mode) \ ++ (GET_MODE_SIZE (mode) <= GET_MODE_SIZE(DImode)) ++#define USE_LOAD_POST_DECREMENT(mode) \ ++ (GET_MODE_SIZE (mode) <= GET_MODE_SIZE(DImode)) ++#define USE_STORE_POST_DECREMENT(mode) USE_LOAD_POST_DECREMENT(mode) ++#define USE_STORE_POST_INCREMENT(mode) USE_LOAD_POST_INCREMENT(mode) ++ ++#define CONSTANT_ADDRESS_P(x) \ ++ (CONSTANT_P (x) && memory_address_p (GET_MODE (x), x)) + +-#define MAX_REGS_PER_ADDRESS 2 ++/* CONST_DOUBLE is legal without TARGET_FPU in legitimate_constant_p. ++ Therefore, let it be a legal PIC operand and split it later.*/ ++#define LEGITIMATE_PIC_OPERAND_P(x) \ ++ (GET_CODE (x) != CONST_DOUBLE || !(TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE)) ++ ++#define MAX_REGS_PER_ADDRESS 3 + + + /* Anchored Addresses. */ +@@ -827,7 +1569,11 @@ enum reg_class + /* A C expression for the cost of a branch instruction. + A value of 1 is the default; + other values are interpreted relative to that. */ +-#define BRANCH_COST(speed_p, predictable_p) ((speed_p) ? 2 : 0) ++#define BRANCH_COST(speed_p, predictable_p) ((speed_p) ? 2 : 1) ++ ++/* Override BRANCH_COST heuristic which empirically produces worse ++ performance for removing short circuiting from the logical ops. */ ++#define LOGICAL_OP_NON_SHORT_CIRCUIT 0 + + #define SLOW_BYTE_ACCESS 1 + +@@ -857,12 +1603,17 @@ enum reg_class + + #define PIC_OFFSET_TABLE_REGNUM GP_REGNUM + ++#define SYMBOLIC_CONST_P(X) \ ++(GET_CODE (X) == SYMBOL_REF \ ++ || GET_CODE (X) == LABEL_REF \ ++ || (GET_CODE (X) == CONST && symbolic_reference_mentioned_p (X))) ++ + + /* Defining the Output Assembler Language. */ + + #define ASM_COMMENT_START "!" + +-#define ASM_APP_ON "! #APP" ++#define ASM_APP_ON "! #APP\n" + + #define ASM_APP_OFF "! #NO_APP\n" + +@@ -877,14 +1628,77 @@ enum reg_class + + #define LOCAL_LABEL_PREFIX "." + +-#define REGISTER_NAMES \ +-{ \ +- "$r0", "$r1", "$r2", "$r3", "$r4", "$r5", "$r6", "$r7", \ ++#define REGISTER_NAMES \ ++{ "$r0", "$r1", "$r2", "$r3", "$r4", "$r5", "$r6", "$r7", \ + "$r8", "$r9", "$r10", "$r11", "$r12", "$r13", "$r14", "$ta", \ + "$r16", "$r17", "$r18", "$r19", "$r20", "$r21", "$r22", "$r23", \ + "$r24", "$r25", "$r26", "$r27", "$fp", "$gp", "$lp", "$sp", \ +- "$AP", \ +- "$SFP" \ ++ "$AP", "$SFP", "$fs0", "$fs1", "$fs2", "$fs3", "$fs4", "$fs5", \ ++ "$fs6", "$fs7", "$fs8", "$fs9", "$fs10","$fs11","$fs12","$fs13",\ ++ "$fs14","$fs15","$fs16","$fs17","$fs18","$fs19","$fs20","$fs21",\ ++ "$fs22","$fs23","$fs24","$fs25","$fs26","$fs27","$fs28","$fs29",\ ++ "$fs30","$fs31","$fs32","$fs33","$fs34","$fs35","$fs36","$fs37",\ ++ "$fs38","$fs39","$fs40","$fs41","$fs42","$fs43","$fs44","$fs45",\ ++ "$fs46","$fs47","$fs48","$fs49","$fs50","$fs51","$fs52","$fs53",\ ++ "$fs54","$fs55","$fs56","$fs57","$fs58","$fs59","$fs60","$fs61",\ ++ "$fs62","$fs63", "LB", "LE", "LC" \ ++} ++ ++#define ADDITIONAL_REGISTER_NAMES \ ++{ \ ++ {"$r15", 15}, \ ++ {"$r28", 28}, {"$r29", 29}, {"$r30", 30}, {"$r31", 31}, \ ++ {"$a0", 0}, {"$a1", 1}, {"$a2", 2}, \ ++ {"$a3", 3}, {"$a4", 4}, {"$a5", 5}, \ ++ {"$s0", 6}, {"$s1", 7}, {"$s2", 8}, {"$s3", 9}, \ ++ {"$s4", 10}, {"$s5", 11}, {"$s6", 12}, {"$s7", 13}, \ ++ {"$s8", 14}, \ ++ {"$t0", 16}, {"$t1", 17}, {"$t2", 18}, {"$t3", 19}, \ ++ {"$t4", 20}, {"$t5", 21}, {"$t6", 22}, {"$t7", 23}, \ ++ {"$t8", 24}, {"$t9", 25}, \ ++ {"$p0", 26}, {"$p1", 27}, \ ++ {"$h0", 0}, {"$h1", 1}, {"$h2", 2}, {"$h3", 3}, \ ++ {"$h4", 4}, {"$h5", 5}, {"$h6", 6}, {"$h7", 7}, \ ++ {"$h8", 8}, {"$h9", 9}, {"$h10", 10}, {"$h11", 11}, \ ++ {"$h12", 16}, {"$h13", 17}, {"$h14", 18}, {"$h15", 19}, \ ++ {"$o0", 0}, {"$o1", 1}, {"$o2", 2}, {"$o3", 3}, \ ++ {"$o4", 4}, {"$o5", 5}, {"$o6", 6}, {"$o7", 7}, \ ++} ++ ++#define OVERLAPPING_REGISTER_NAMES \ ++{ \ ++ {"$fd0", NDS32_FIRST_FPR_REGNUM + 0, 2}, \ ++ {"$fd1", NDS32_FIRST_FPR_REGNUM + 2, 2}, \ ++ {"$fd2", NDS32_FIRST_FPR_REGNUM + 4, 2}, \ ++ {"$fd3", NDS32_FIRST_FPR_REGNUM + 6, 2}, \ ++ {"$fd4", NDS32_FIRST_FPR_REGNUM + 8, 2}, \ ++ {"$fd5", NDS32_FIRST_FPR_REGNUM + 10, 2}, \ ++ {"$fd6", NDS32_FIRST_FPR_REGNUM + 12, 2}, \ ++ {"$fd7", NDS32_FIRST_FPR_REGNUM + 14, 2}, \ ++ {"$fd8", NDS32_FIRST_FPR_REGNUM + 16, 2}, \ ++ {"$fd9", NDS32_FIRST_FPR_REGNUM + 18, 2}, \ ++ {"$fd10", NDS32_FIRST_FPR_REGNUM + 20, 2}, \ ++ {"$fd11", NDS32_FIRST_FPR_REGNUM + 22, 2}, \ ++ {"$fd12", NDS32_FIRST_FPR_REGNUM + 24, 2}, \ ++ {"$fd13", NDS32_FIRST_FPR_REGNUM + 26, 2}, \ ++ {"$fd14", NDS32_FIRST_FPR_REGNUM + 28, 2}, \ ++ {"$fd15", NDS32_FIRST_FPR_REGNUM + 30, 2}, \ ++ {"$fd16", NDS32_FIRST_FPR_REGNUM + 32, 2}, \ ++ {"$fd17", NDS32_FIRST_FPR_REGNUM + 34, 2}, \ ++ {"$fd18", NDS32_FIRST_FPR_REGNUM + 36, 2}, \ ++ {"$fd19", NDS32_FIRST_FPR_REGNUM + 38, 2}, \ ++ {"$fd20", NDS32_FIRST_FPR_REGNUM + 40, 2}, \ ++ {"$fd21", NDS32_FIRST_FPR_REGNUM + 42, 2}, \ ++ {"$fd22", NDS32_FIRST_FPR_REGNUM + 44, 2}, \ ++ {"$fd23", NDS32_FIRST_FPR_REGNUM + 46, 2}, \ ++ {"$fd24", NDS32_FIRST_FPR_REGNUM + 48, 2}, \ ++ {"$fd25", NDS32_FIRST_FPR_REGNUM + 50, 2}, \ ++ {"$fd26", NDS32_FIRST_FPR_REGNUM + 52, 2}, \ ++ {"$fd27", NDS32_FIRST_FPR_REGNUM + 54, 2}, \ ++ {"$fd28", NDS32_FIRST_FPR_REGNUM + 56, 2}, \ ++ {"$fd29", NDS32_FIRST_FPR_REGNUM + 58, 2}, \ ++ {"$fd30", NDS32_FIRST_FPR_REGNUM + 60, 2}, \ ++ {"$fd31", NDS32_FIRST_FPR_REGNUM + 62, 2}, \ + } + + /* Output normal jump table entry. */ +@@ -896,19 +1710,19 @@ enum reg_class + do \ + { \ + switch (GET_MODE (body)) \ +- { \ +- case QImode: \ +- asm_fprintf (stream, "\t.byte\t.L%d-.L%d\n", value, rel); \ +- break; \ +- case HImode: \ +- asm_fprintf (stream, "\t.short\t.L%d-.L%d\n", value, rel); \ +- break; \ +- case SImode: \ +- asm_fprintf (stream, "\t.word\t.L%d-.L%d\n", value, rel); \ +- break; \ +- default: \ +- gcc_unreachable(); \ +- } \ ++ { \ ++ case QImode: \ ++ asm_fprintf (stream, "\t.byte\t.L%d-.L%d\n", value, rel); \ ++ break; \ ++ case HImode: \ ++ asm_fprintf (stream, "\t.short\t.L%d-.L%d\n", value, rel); \ ++ break; \ ++ case SImode: \ ++ asm_fprintf (stream, "\t.word\t.L%d-.L%d\n", value, rel); \ ++ break; \ ++ default: \ ++ gcc_unreachable(); \ ++ } \ + } while (0) + + /* We have to undef it first because elfos.h formerly define it +@@ -925,10 +1739,10 @@ enum reg_class + do \ + { \ + /* Because our jump table is in text section, \ +- we need to make sure 2-byte alignment after \ +- the jump table for instructions fetch. */ \ ++ we need to make sure 2-byte alignment after \ ++ the jump table for instructions fetch. */ \ + if (GET_MODE (PATTERN (table)) == QImode) \ +- ASM_OUTPUT_ALIGN (stream, 1); \ ++ ASM_OUTPUT_ALIGN (stream, 1); \ + asm_fprintf (stream, "\t! Jump Table End\n"); \ + } while (0) + +@@ -992,9 +1806,7 @@ enum reg_class + /* Return the preferred mode for and addr_diff_vec when the mininum + and maximum offset are known. */ + #define CASE_VECTOR_SHORTEN_MODE(min_offset, max_offset, body) \ +- ((min_offset < 0 || max_offset >= 0x2000 ) ? SImode \ +- : (max_offset >= 100) ? HImode \ +- : QImode) ++ nds32_case_vector_shorten_mode (min_offset, max_offset, body) + + /* Generate pc relative jump table when -fpic or -Os. */ + #define CASE_VECTOR_PC_RELATIVE (flag_pic || optimize_size) +@@ -1027,6 +1839,11 @@ enum reg_class + when the condition is true. */ + #define STORE_FLAG_VALUE 1 + ++/* A C expression that indicates whether the architecture defines a value for ++ clz or ctz with a zero operand. In nds32 clz for 0 result 32 is defined ++ in ISA spec */ ++#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 32, 1) ++ + /* An alias for the machine mode for pointers. */ + #define Pmode SImode + +diff --git a/gcc/config/nds32/nds32.md b/gcc/config/nds32/nds32.md +index 5cdd8b2..557c466 100644 +--- a/gcc/config/nds32/nds32.md ++++ b/gcc/config/nds32/nds32.md +@@ -46,58 +46,144 @@ + ;; Include DImode/DFmode operations. + (include "nds32-doubleword.md") + ++;; Include floating-point patterns. ++(include "nds32-fpu.md") ++ + ;; Include peephole patterns. + (include "nds32-peephole2.md") + + ++;; ------------------------------------------------------------------------ ++ ++;; CPU pipeline model. ++(define_attr "pipeline_model" "n7,n8,e8,n9,n10,graywolf,n13,panther,simple" ++ (const ++ (cond [(match_test "nds32_cpu_option == CPU_N7") (const_string "n7") ++ (match_test "nds32_cpu_option == CPU_N6 || nds32_cpu_option == CPU_N8") (const_string "n8") ++ (match_test "nds32_cpu_option == CPU_E8") (const_string "e8") ++ (match_test "nds32_cpu_option == CPU_N9") (const_string "n9") ++ (match_test "nds32_cpu_option == CPU_N10") (const_string "n10") ++ (match_test "nds32_cpu_option == CPU_GRAYWOLF") (const_string "graywolf") ++ (match_test "nds32_cpu_option == CPU_N12") (const_string "n13") ++ (match_test "nds32_cpu_option == CPU_N13") (const_string "n13") ++ (match_test "nds32_cpu_option == CPU_PANTHER") (const_string "panther") ++ (match_test "nds32_cpu_option == CPU_SIMPLE") (const_string "simple")] ++ (const_string "n9")))) ++ + ;; Insn type, it is used to default other attribute values. + (define_attr "type" +- "unknown,move,load,store,alu,compare,branch,call,misc" ++ "unknown,load,store,load_multiple,store_multiple,alu,alu_shift,pbsad,pbsada,mul,mac,div,branch,mmu,misc,\ ++ falu,fmuls,fmuld,fmacs,fmacd,fdivs,fdivd,fsqrts,fsqrtd,fcmp,fabs,fcpy,fcmov,fmfsr,fmfdr,fmtsr,fmtdr,fload,fstore,\ ++ dalu,dalu64,daluround,dcmp,dclip,dmul,dmac,dinsb,dpack,dbpick,dwext" + (const_string "unknown")) + ++;; Insn sub-type ++(define_attr "subtype" ++ "simple,shift,saturation" ++ (const_string "simple")) + + ;; Length, in bytes, default is 4-bytes. + (define_attr "length" "" (const_int 4)) + ++;; Indicate the amount of micro instructions. ++(define_attr "combo" ++ "0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25" ++ (const_string "1")) ++ ++;; Insn in which feature set, it is used to enable/disable insn alternatives. ++;; v1 : Baseline Instructions ++;; v2 : Baseline Version 2 Instructions ++;; v3m : Baseline Version 3m Instructions ++;; v3 : Baseline Version 3 Instructions ++;; pe1 : Performance Extension Instructions ++;; pe2 : Performance Extension Version 2 Instructions ++;; se : String Extension instructions ++(define_attr "feature" ++ "v1,v2,v3m,v3,pe1,pe2,se,fpu" ++ (const_string "v1")) + + ;; Enabled, which is used to enable/disable insn alternatives. + ;; Note that we use length and TARGET_16_BIT here as criteria. +-;; If the instruction pattern already check TARGET_16_BIT to +-;; determine the length by itself, its enabled attribute should be +-;; always 1 to avoid the conflict with the settings here. +-(define_attr "enabled" "" +- (cond [(and (eq_attr "length" "2") +- (match_test "!TARGET_16_BIT")) +- (const_int 0)] +- (const_int 1))) ++;; If the instruction pattern already check TARGET_16_BIT to determine ++;; the length by itself, its enabled attribute should be customized to ++;; avoid the conflict between length attribute and this default setting. ++(define_attr "enabled" "no,yes" ++ (if_then_else ++ (and (eq_attr "length" "2") ++ (match_test "!TARGET_16_BIT")) ++ (const_string "no") ++ (cond [(eq_attr "feature" "v1") (const_string "yes") ++ (eq_attr "feature" "v2") (if_then_else (match_test "TARGET_ISA_V2 || TARGET_ISA_V3 || TARGET_ISA_V3M") ++ (const_string "yes") ++ (const_string "no")) ++ (eq_attr "feature" "v3") (if_then_else (match_test "TARGET_ISA_V3") ++ (const_string "yes") ++ (const_string "no")) ++ (eq_attr "feature" "v3m") (if_then_else (match_test "TARGET_ISA_V3 || TARGET_ISA_V3M") ++ (const_string "yes") ++ (const_string "no")) ++ (eq_attr "feature" "pe1") (if_then_else (match_test "TARGET_EXT_PERF") ++ (const_string "yes") ++ (const_string "no")) ++ (eq_attr "feature" "pe2") (if_then_else (match_test "TARGET_EXT_PERF2") ++ (const_string "yes") ++ (const_string "no")) ++ (eq_attr "feature" "se") (if_then_else (match_test "TARGET_EXT_STRING") ++ (const_string "yes") ++ (const_string "no")) ++ (eq_attr "feature" "fpu") (if_then_else (match_test "TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE") ++ (const_string "yes") ++ (const_string "no"))] ++ (const_string "yes")))) + + + ;; ---------------------------------------------------------------------------- + ++(include "nds32-dspext.md") + + ;; Move instructions. + + ;; For QImode and HImode, the immediate value can be fit in imm20s. + ;; So there is no need to split rtx for QI and HI patterns. + +-(define_expand "movqi" +- [(set (match_operand:QI 0 "general_operand" "") +- (match_operand:QI 1 "general_operand" ""))] ++(define_expand "mov<mode>" ++ [(set (match_operand:QIHI 0 "general_operand" "") ++ (match_operand:QIHI 1 "general_operand" ""))] + "" + { + /* Need to force register if mem <- !reg. */ + if (MEM_P (operands[0]) && !REG_P (operands[1])) +- operands[1] = force_reg (QImode, operands[1]); ++ operands[1] = force_reg (<MODE>mode, operands[1]); ++ ++ if (MEM_P (operands[1]) && optimize > 0) ++ { ++ rtx reg = gen_reg_rtx (SImode); ++ ++ emit_insn (gen_zero_extend<mode>si2 (reg, operands[1])); ++ operands[1] = gen_lowpart (<MODE>mode, reg); ++ } + }) + +-(define_expand "movhi" +- [(set (match_operand:HI 0 "general_operand" "") +- (match_operand:HI 1 "general_operand" ""))] ++(define_expand "movmisalign<mode>" ++ [(set (match_operand:SIDI 0 "general_operand" "") ++ (match_operand:SIDI 1 "general_operand" ""))] + "" + { +- /* Need to force register if mem <- !reg. */ ++ rtx addr; + if (MEM_P (operands[0]) && !REG_P (operands[1])) +- operands[1] = force_reg (HImode, operands[1]); ++ operands[1] = force_reg (<MODE>mode, operands[1]); ++ ++ if (MEM_P (operands[0])) ++ { ++ addr = force_reg (Pmode, XEXP (operands[0], 0)); ++ emit_insn (gen_unaligned_store<mode> (addr, operands[1])); ++ } ++ else ++ { ++ addr = force_reg (Pmode, XEXP (operands[1], 0)); ++ emit_insn (gen_unaligned_load<mode> (operands[0], addr)); ++ } ++ DONE; + }) + + (define_expand "movsi" +@@ -130,12 +216,33 @@ + low12_int)); + DONE; + } ++ ++ if (REG_P (operands[0]) && SYMBOLIC_CONST_P (operands[1])) ++ { ++ if (TARGET_ICT_MODEL_LARGE ++ && nds32_indirect_call_referenced_p (operands[1])) ++ { ++ nds32_expand_ict_move (operands); ++ DONE; ++ } ++ else if (nds32_tls_referenced_p (operands [1])) ++ { ++ nds32_expand_tls_move (operands); ++ DONE; ++ } ++ else if (flag_pic) ++ { ++ nds32_expand_pic_move (operands); ++ DONE; ++ } ++ } + }) + + (define_insn "*mov<mode>" +- [(set (match_operand:QIHISI 0 "nonimmediate_operand" "=r, r, U45, U33, U37, U45, m, l, l, l, d, r, d, r, r, r") +- (match_operand:QIHISI 1 "nds32_move_operand" " r, r, l, l, l, d, r, U45, U33, U37, U45, m, Ip05, Is05, Is20, Ihig"))] +- "" ++ [(set (match_operand:QIHISI 0 "nonimmediate_operand" "=r, r,U45,U33,U37,U45, m, l, l, l, d, d, r, d, r, r, r, *f, *f, r, *f, Q, A") ++ (match_operand:QIHISI 1 "nds32_move_operand" " r, r, l, l, l, d, r,U45,U33,U37,U45,Ufe, m,Ip05, Is05, Is20, Ihig, *f, r, *f, Q, *f, r"))] ++ "register_operand(operands[0], <MODE>mode) ++ || register_operand(operands[1], <MODE>mode)" + { + switch (which_alternative) + { +@@ -154,37 +261,54 @@ + case 8: + case 9: + case 10: +- return nds32_output_16bit_load (operands, <byte>); + case 11: +- return nds32_output_32bit_load (operands, <byte>); ++ return nds32_output_16bit_load (operands, <byte>); + case 12: +- return "movpi45\t%0, %1"; ++ return nds32_output_32bit_load (operands, <byte>); + case 13: +- return "movi55\t%0, %1"; ++ return "movpi45\t%0, %1"; + case 14: +- return "movi\t%0, %1"; ++ return "movi55\t%0, %1"; + case 15: ++ return "movi\t%0, %1"; ++ case 16: + return "sethi\t%0, hi20(%1)"; ++ case 17: ++ if (TARGET_FPU_SINGLE) ++ return "fcpyss\t%0, %1, %1"; ++ else ++ return "#"; ++ case 18: ++ return "fmtsr\t%1, %0"; ++ case 19: ++ return "fmfsr\t%0, %1"; ++ case 20: ++ return nds32_output_float_load (operands); ++ case 21: ++ return nds32_output_float_store (operands); ++ case 22: ++ return "mtusr\t%1, %0"; + default: + gcc_unreachable (); + } + } +- [(set_attr "type" "alu,alu,store,store,store,store,store,load,load,load,load,load,alu,alu,alu,alu") +- (set_attr "length" " 2, 4, 2, 2, 2, 2, 4, 2, 2, 2, 2, 4, 2, 2, 4, 4")]) ++ [(set_attr "type" "alu,alu,store,store,store,store,store,load,load,load,load,load,load,alu,alu,alu,alu,fcpy,fmtsr,fmfsr,fload,fstore,alu") ++ (set_attr "length" " 2, 4, 2, 2, 2, 2, 4, 2, 2, 2, 2, 2, 4, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4") ++ (set_attr "feature" " v1, v1, v1, v1, v1, v1, v1, v1, v1, v1, v1, v3m, v1, v1, v1, v1, v1, fpu, fpu, fpu, fpu, fpu, v1")]) + + + ;; We use nds32_symbolic_operand to limit that only CONST/SYMBOL_REF/LABEL_REF + ;; are able to match such instruction template. +-(define_insn "*move_addr" +- [(set (match_operand:SI 0 "register_operand" "=l, r") +- (match_operand:SI 1 "nds32_symbolic_operand" " i, i"))] ++(define_insn "move_addr" ++ [(set (match_operand:SI 0 "nds32_general_register_operand" "=l, r") ++ (match_operand:SI 1 "nds32_nonunspec_symbolic_operand" " i, i"))] + "" + "la\t%0, %1" +- [(set_attr "type" "move") ++ [(set_attr "type" "alu") + (set_attr "length" "8")]) + + +-(define_insn "*sethi" ++(define_insn "sethi" + [(set (match_operand:SI 0 "register_operand" "=r") + (high:SI (match_operand:SI 1 "nds32_symbolic_operand" " i")))] + "" +@@ -193,7 +317,7 @@ + (set_attr "length" "4")]) + + +-(define_insn "*lo_sum" ++(define_insn "lo_sum" + [(set (match_operand:SI 0 "register_operand" "=r") + (lo_sum:SI (match_operand:SI 1 "register_operand" " r") + (match_operand:SI 2 "nds32_symbolic_operand" " i")))] +@@ -208,8 +332,8 @@ + ;; Zero extension instructions. + + (define_insn "zero_extend<mode>si2" +- [(set (match_operand:SI 0 "register_operand" "=l, r, l, *r") +- (zero_extend:SI (match_operand:QIHI 1 "nonimmediate_operand" " l, r, U33, m")))] ++ [(set (match_operand:SI 0 "register_operand" "=l, r, l, *r") ++ (zero_extend:SI (match_operand:QIHI 1 "nonimmediate_operand" " l, r,U33, m")))] + "" + { + switch (which_alternative) +@@ -245,7 +369,7 @@ + case 1: + return "se<size>\t%0, %1"; + case 2: +- return nds32_output_32bit_load_s (operands, <byte>); ++ return nds32_output_32bit_load_se (operands, <byte>); + + default: + gcc_unreachable (); +@@ -256,25 +380,70 @@ + + + ;; ---------------------------------------------------------------------------- ++(define_expand "extv" ++ [(set (match_operand 0 "register_operand" "") ++ (sign_extract (match_operand 1 "nonimmediate_operand" "") ++ (match_operand 2 "const_int_operand" "") ++ (match_operand 3 "const_int_operand" "")))] ++ "" ++{ ++ enum nds32_expand_result_type result = nds32_expand_extv (operands); ++ switch (result) ++ { ++ case EXPAND_DONE: ++ DONE; ++ break; ++ case EXPAND_FAIL: ++ FAIL; ++ break; ++ case EXPAND_CREATE_TEMPLATE: ++ break; ++ default: ++ gcc_unreachable (); ++ } ++}) ++ ++(define_expand "insv" ++ [(set (zero_extract (match_operand 0 "nonimmediate_operand" "") ++ (match_operand 1 "const_int_operand" "") ++ (match_operand 2 "const_int_operand" "")) ++ (match_operand 3 "register_operand" ""))] ++ "" ++{ ++ enum nds32_expand_result_type result = nds32_expand_insv (operands); ++ switch (result) ++ { ++ case EXPAND_DONE: ++ DONE; ++ break; ++ case EXPAND_FAIL: ++ FAIL; ++ break; ++ case EXPAND_CREATE_TEMPLATE: ++ break; ++ default: ++ gcc_unreachable (); ++ } ++}) + + ;; Arithmetic instructions. + +-(define_insn "add<mode>3" +- [(set (match_operand:QIHISI 0 "register_operand" "= d, l, d, l, d, l, k, l, r, r") +- (plus:QIHISI (match_operand:QIHISI 1 "register_operand" "% 0, l, 0, l, 0, l, 0, k, r, r") +- (match_operand:QIHISI 2 "nds32_rimm15s_operand" " In05, In03, Iu05, Iu03, r, l, Is10, Iu06, Is15, r")))] ++(define_insn "addsi3" ++ [(set (match_operand:SI 0 "register_operand" "= d, l, d, l, d,l, k, l, r, r") ++ (plus:SI (match_operand:SI 1 "register_operand" "% 0, l, 0, l, 0,l, 0, k, r, r") ++ (match_operand:SI 2 "nds32_rimm15s_operand" " In05,In03,Iu05,Iu03, r,l,Is10,IU06, Is15, r")))] + "" + { + switch (which_alternative) + { + case 0: + /* addi Rt4,Rt4,-x ==> subi45 Rt4,x +- where 0 <= x <= 31 */ ++ where 0 <= x <= 31 */ + operands[2] = gen_int_mode (-INTVAL (operands[2]), SImode); + return "subi45\t%0, %2"; + case 1: + /* addi Rt3,Ra3,-x ==> subi333 Rt3,Ra3,x +- where 0 <= x <= 7 */ ++ where 0 <= x <= 7 */ + operands[2] = gen_int_mode (-INTVAL (operands[2]), SImode); + return "subi333\t%0, %1, %2"; + case 2: +@@ -298,19 +467,20 @@ + gcc_unreachable (); + } + } +- [(set_attr "type" "alu,alu,alu,alu,alu,alu,alu,alu,alu,alu") +- (set_attr "length" " 2, 2, 2, 2, 2, 2, 2, 2, 4, 4")]) +- +-(define_insn "sub<mode>3" +- [(set (match_operand:QIHISI 0 "register_operand" "=d, l, r, r") +- (minus:QIHISI (match_operand:QIHISI 1 "nds32_rimm15s_operand" " 0, l, Is15, r") +- (match_operand:QIHISI 2 "register_operand" " r, l, r, r")))] ++ [(set_attr "type" "alu,alu,alu,alu,alu,alu,alu,alu,alu,alu") ++ (set_attr "length" " 2, 2, 2, 2, 2, 2, 2, 2, 4, 4") ++ (set_attr "feature" " v1, v1, v1, v1, v1, v1, v2, v1, v1, v1")]) ++ ++(define_insn "subsi3" ++ [(set (match_operand:SI 0 "register_operand" "=d, l, r, r") ++ (minus:SI (match_operand:SI 1 "nds32_rimm15s_operand" " 0, l, Is15, r") ++ (match_operand:SI 2 "register_operand" " r, l, r, r")))] + "" + "@ +- sub45\t%0, %2 +- sub333\t%0, %1, %2 +- subri\t%0, %2, %1 +- sub\t%0, %1, %2" ++ sub45\t%0, %2 ++ sub333\t%0, %1, %2 ++ subri\t%0, %2, %1 ++ sub\t%0, %1, %2" + [(set_attr "type" "alu,alu,alu,alu") + (set_attr "length" " 2, 2, 4, 4")]) + +@@ -320,10 +490,10 @@ + ;; and needs to ensure it is exact_log2 value. + (define_insn "*add_slli" + [(set (match_operand:SI 0 "register_operand" "=r") +- (plus:SI (mult:SI (match_operand:SI 1 "register_operand" " r") ++ (plus:SI (mult:SI (match_operand:SI 1 "register_operand" " r") + (match_operand:SI 2 "immediate_operand" " i")) + (match_operand:SI 3 "register_operand" " r")))] +- "TARGET_ISA_V3 ++ "TARGET_ISA_V3 && (TARGET_PIPELINE_PANTHER || optimize_size) + && (exact_log2 (INTVAL (operands[2])) != -1) + && (exact_log2 (INTVAL (operands[2])) <= 31)" + { +@@ -333,18 +503,20 @@ + + return "add_slli\t%0, %3, %1, %2"; + } +- [(set_attr "type" "alu") +- (set_attr "length" "4")]) ++ [(set_attr "type" "alu_shift") ++ (set_attr "combo" "2") ++ (set_attr "length" "4")]) + + (define_insn "*add_srli" +- [(set (match_operand:SI 0 "register_operand" "= r") +- (plus:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" " r") +- (match_operand:SI 2 "immediate_operand" " Iu05")) +- (match_operand:SI 3 "register_operand" " r")))] +- "TARGET_ISA_V3" ++ [(set (match_operand:SI 0 "register_operand" "= r") ++ (plus:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" " r") ++ (match_operand:SI 2 "nds32_imm5u_operand" " Iu05")) ++ (match_operand:SI 3 "register_operand" " r")))] ++ "TARGET_ISA_V3 && (TARGET_PIPELINE_PANTHER || optimize_size)" + "add_srli\t%0, %3, %1, %2" +- [(set_attr "type" "alu") +- (set_attr "length" "4")]) ++ [(set_attr "type" "alu_shift") ++ (set_attr "combo" "2") ++ (set_attr "length" "4")]) + + + ;; GCC intends to simplify (minus (reg) (ashift ...)) +@@ -355,7 +527,7 @@ + (minus:SI (match_operand:SI 1 "register_operand" " r") + (mult:SI (match_operand:SI 2 "register_operand" " r") + (match_operand:SI 3 "immediate_operand" " i"))))] +- "TARGET_ISA_V3 ++ "TARGET_ISA_V3 && (TARGET_PIPELINE_PANTHER || optimize_size) + && (exact_log2 (INTVAL (operands[3])) != -1) + && (exact_log2 (INTVAL (operands[3])) <= 31)" + { +@@ -365,32 +537,35 @@ + + return "sub_slli\t%0, %1, %2, %3"; + } +- [(set_attr "type" "alu") +- (set_attr "length" "4")]) ++ [(set_attr "type" "alu_shift") ++ (set_attr "combo" "2") ++ (set_attr "length" "4")]) + + (define_insn "*sub_srli" +- [(set (match_operand:SI 0 "register_operand" "= r") +- (minus:SI (match_operand:SI 1 "register_operand" " r") +- (lshiftrt:SI (match_operand:SI 2 "register_operand" " r") +- (match_operand:SI 3 "immediate_operand" " Iu05"))))] +- "TARGET_ISA_V3" ++ [(set (match_operand:SI 0 "register_operand" "= r") ++ (minus:SI (match_operand:SI 1 "register_operand" " r") ++ (lshiftrt:SI (match_operand:SI 2 "register_operand" " r") ++ (match_operand:SI 3 "nds32_imm5u_operand" " Iu05"))))] ++ "TARGET_ISA_V3 && (TARGET_PIPELINE_PANTHER || optimize_size)" + "sub_srli\t%0, %1, %2, %3" +- [(set_attr "type" "alu") +- (set_attr "length" "4")]) ++ [(set_attr "type" "alu_shift") ++ (set_attr "combo" "2") ++ (set_attr "length" "4")]) + + + ;; Multiplication instructions. + + (define_insn "mulsi3" +- [(set (match_operand:SI 0 "register_operand" "=w, r") ++ [(set (match_operand:SI 0 "register_operand" "=l, r") + (mult:SI (match_operand:SI 1 "register_operand" "%0, r") +- (match_operand:SI 2 "register_operand" " w, r")))] ++ (match_operand:SI 2 "register_operand" " l, r")))] + "" + "@ +- mul33\t%0, %2 +- mul\t%0, %1, %2" +- [(set_attr "type" "alu,alu") +- (set_attr "length" " 2, 4")]) ++ mul33\t%0, %2 ++ mul\t%0, %1, %2" ++ [(set_attr "type" "mul,mul") ++ (set_attr "length" " 2, 4") ++ (set_attr "feature" "v3m, v1")]) + + (define_insn "mulsidi3" + [(set (match_operand:DI 0 "register_operand" "=r") +@@ -398,7 +573,7 @@ + (sign_extend:DI (match_operand:SI 2 "register_operand" " r"))))] + "TARGET_ISA_V2 || TARGET_ISA_V3" + "mulsr64\t%0, %1, %2" +- [(set_attr "type" "alu") ++ [(set_attr "type" "mul") + (set_attr "length" "4")]) + + (define_insn "umulsidi3" +@@ -407,7 +582,7 @@ + (zero_extend:DI (match_operand:SI 2 "register_operand" " r"))))] + "TARGET_ISA_V2 || TARGET_ISA_V3" + "mulr64\t%0, %1, %2" +- [(set_attr "type" "alu") ++ [(set_attr "type" "mul") + (set_attr "length" "4")]) + + +@@ -415,32 +590,32 @@ + + (define_insn "*maddr32_0" + [(set (match_operand:SI 0 "register_operand" "=r") +- (plus:SI (match_operand:SI 3 "register_operand" " 0") +- (mult:SI (match_operand:SI 1 "register_operand" " r") +- (match_operand:SI 2 "register_operand" " r"))))] ++ (plus:SI (match_operand:SI 3 "register_operand" " 0") ++ (mult:SI (match_operand:SI 1 "register_operand" " r") ++ (match_operand:SI 2 "register_operand" " r"))))] + "" + "maddr32\t%0, %1, %2" +- [(set_attr "type" "alu") ++ [(set_attr "type" "mac") + (set_attr "length" "4")]) + + (define_insn "*maddr32_1" + [(set (match_operand:SI 0 "register_operand" "=r") +- (plus:SI (mult:SI (match_operand:SI 1 "register_operand" " r") +- (match_operand:SI 2 "register_operand" " r")) +- (match_operand:SI 3 "register_operand" " 0")))] ++ (plus:SI (mult:SI (match_operand:SI 1 "register_operand" " r") ++ (match_operand:SI 2 "register_operand" " r")) ++ (match_operand:SI 3 "register_operand" " 0")))] + "" + "maddr32\t%0, %1, %2" +- [(set_attr "type" "alu") ++ [(set_attr "type" "mac") + (set_attr "length" "4")]) + + (define_insn "*msubr32" + [(set (match_operand:SI 0 "register_operand" "=r") +- (minus:SI (match_operand:SI 3 "register_operand" " 0") +- (mult:SI (match_operand:SI 1 "register_operand" " r") +- (match_operand:SI 2 "register_operand" " r"))))] ++ (minus:SI (match_operand:SI 3 "register_operand" " 0") ++ (mult:SI (match_operand:SI 1 "register_operand" " r") ++ (match_operand:SI 2 "register_operand" " r"))))] + "" + "msubr32\t%0, %1, %2" +- [(set_attr "type" "alu") ++ [(set_attr "type" "mac") + (set_attr "length" "4")]) + + +@@ -448,26 +623,46 @@ + + (define_insn "divmodsi4" + [(set (match_operand:SI 0 "register_operand" "=r") +- (div:SI (match_operand:SI 1 "register_operand" " r") +- (match_operand:SI 2 "register_operand" " r"))) ++ (div:SI (match_operand:SI 1 "register_operand" " r") ++ (match_operand:SI 2 "register_operand" " r"))) + (set (match_operand:SI 3 "register_operand" "=r") +- (mod:SI (match_dup 1) (match_dup 2)))] ++ (mod:SI (match_dup 1) (match_dup 2)))] + "" + "divsr\t%0, %3, %1, %2" +- [(set_attr "type" "alu") ++ [(set_attr "type" "div") + (set_attr "length" "4")]) + + (define_insn "udivmodsi4" + [(set (match_operand:SI 0 "register_operand" "=r") +- (udiv:SI (match_operand:SI 1 "register_operand" " r") +- (match_operand:SI 2 "register_operand" " r"))) ++ (udiv:SI (match_operand:SI 1 "register_operand" " r") ++ (match_operand:SI 2 "register_operand" " r"))) + (set (match_operand:SI 3 "register_operand" "=r") +- (umod:SI (match_dup 1) (match_dup 2)))] ++ (umod:SI (match_dup 1) (match_dup 2)))] + "" + "divr\t%0, %3, %1, %2" +- [(set_attr "type" "alu") ++ [(set_attr "type" "div") ++ (set_attr "length" "4")]) ++ ++;; divsr/divr will keep quotient only when quotient and remainder is the same ++;; register in our ISA spec, it's can reduce 1 register presure if we don't ++;; want remainder. ++(define_insn "divsi4" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (div:SI (match_operand:SI 1 "register_operand" " r") ++ (match_operand:SI 2 "register_operand" " r")))] ++ "" ++ "divsr\t%0, %0, %1, %2" ++ [(set_attr "type" "div") + (set_attr "length" "4")]) + ++(define_insn "udivsi4" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (udiv:SI (match_operand:SI 1 "register_operand" " r") ++ (match_operand:SI 2 "register_operand" " r")))] ++ "" ++ "divr\t%0, %0, %1, %2" ++ [(set_attr "type" "div") ++ (set_attr "length" "4")]) + + ;; ---------------------------------------------------------------------------- + +@@ -488,14 +683,28 @@ + (set_attr "length" "4")] + ) + +-(define_insn "andsi3" +- [(set (match_operand:SI 0 "register_operand" "=w, r, l, l, l, l, l, l, r, r, r, r, r") +- (and:SI (match_operand:SI 1 "register_operand" "%0, r, l, l, l, l, 0, 0, r, r, r, r, r") +- (match_operand:SI 2 "general_operand" " w, r, Izeb, Izeh, Ixls, Ix11, Ibms, Ifex, Izeb, Izeh, Iu15, Ii15, Ic15")))] ++(define_expand "andsi3" ++ [(set (match_operand:SI 0 "register_operand" "") ++ (and:SI (match_operand:SI 1 "register_operand" "") ++ (match_operand:SI 2 "nds32_reg_constant_operand" "")))] ++ "" ++{ ++ if (CONST_INT_P (operands[2]) ++ && !nds32_and_operand (operands[2], SImode)) ++ { ++ nds32_expand_constant (SImode, INTVAL (operands[2]), ++ operands[0], operands[1]); ++ DONE; ++ } ++}) ++ ++(define_insn "*andsi3" ++ [(set (match_operand:SI 0 "register_operand" "=l, r, l, l, l, l, l, l, r, r, r, r, r") ++ (and:SI (match_operand:SI 1 "register_operand" "%0, r, l, l, l, l, 0, 0, r, r, r, r, r") ++ (match_operand:SI 2 "nds32_and_operand" " l, r,Izeb,Izeh,Ixls,Ix11,Ibms,Ifex, Izeb, Izeh, Iu15, Ii15, Ic15")))] + "" + { + HOST_WIDE_INT mask = INTVAL (operands[2]); +- int zero_position; + + /* 16-bit andi instructions: + andi Rt3,Ra3,0xff -> zeb33 Rt3,Ra3 +@@ -520,8 +729,7 @@ + case 5: + return "x11b33\t%0, %1"; + case 6: +- operands[2] = GEN_INT (floor_log2 (mask)); +- return "bmski33\t%0, %2"; ++ return "bmski33\t%0, %B2"; + case 7: + operands[2] = GEN_INT (floor_log2 (mask + 1) - 1); + return "fexti33\t%0, %2"; +@@ -535,47 +743,35 @@ + operands[2] = GEN_INT (~mask); + return "bitci\t%0, %1, %2"; + case 12: +- /* If we reach this alternative, +- it must pass the nds32_can_use_bclr_p() test, +- so that we can guarantee there is only one 0-bit +- within the immediate value. */ +- for (zero_position = 31; zero_position >= 0; zero_position--) +- { +- if ((INTVAL (operands[2]) & (1 << zero_position)) == 0) +- { +- /* Found the 0-bit position. */ +- operands[2] = GEN_INT (zero_position); +- break; +- } +- } +- return "bclr\t%0, %1, %2"; ++ return "bclr\t%0, %1, %b2"; + + default: + gcc_unreachable (); + } + } +- [(set_attr "type" "alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu") +- (set_attr "length" " 2, 4, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4")]) ++ [(set_attr "type" "alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu") ++ (set_attr "length" " 2, 4, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4") ++ (set_attr "feature" "v3m, v1, v1, v1, v1, v1,v3m,v3m, v1, v1, v1, v3,pe1")]) + + (define_insn "*and_slli" +- [(set (match_operand:SI 0 "register_operand" "= r") +- (and:SI (ashift:SI (match_operand:SI 1 "register_operand" " r") +- (match_operand:SI 2 "immediate_operand" " Iu05")) +- (match_operand:SI 3 "register_operand" " r")))] +- "TARGET_ISA_V3" ++ [(set (match_operand:SI 0 "register_operand" "= r") ++ (and:SI (ashift:SI (match_operand:SI 1 "register_operand" " r") ++ (match_operand:SI 2 "nds32_imm5u_operand" " Iu05")) ++ (match_operand:SI 3 "register_operand" " r")))] ++ "TARGET_ISA_V3 && (TARGET_PIPELINE_PANTHER || optimize_size)" + "and_slli\t%0, %3, %1, %2" +- [(set_attr "type" "alu") +- (set_attr "length" "4")]) ++ [(set_attr "type" "alu_shift") ++ (set_attr "length" "4")]) + + (define_insn "*and_srli" +- [(set (match_operand:SI 0 "register_operand" "= r") +- (and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" " r") +- (match_operand:SI 2 "immediate_operand" " Iu05")) +- (match_operand:SI 3 "register_operand" " r")))] +- "TARGET_ISA_V3" ++ [(set (match_operand:SI 0 "register_operand" "= r") ++ (and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" " r") ++ (match_operand:SI 2 "nds32_imm5u_operand" " Iu05")) ++ (match_operand:SI 3 "register_operand" " r")))] ++ "TARGET_ISA_V3 && (TARGET_PIPELINE_PANTHER || optimize_size)" + "and_srli\t%0, %3, %1, %2" +- [(set_attr "type" "alu") +- (set_attr "length" "4")]) ++ [(set_attr "type" "alu_shift") ++ (set_attr "length" "4")]) + + + ;; ---------------------------------------------------------------------------- +@@ -584,58 +780,50 @@ + + ;; For V3/V3M ISA, we have 'or33' instruction. + ;; So we can identify 'or Rt3,Rt3,Ra3' case and set its length to be 2. +-(define_insn "iorsi3" +- [(set (match_operand:SI 0 "register_operand" "=w, r, r, r") +- (ior:SI (match_operand:SI 1 "register_operand" "%0, r, r, r") +- (match_operand:SI 2 "general_operand" " w, r, Iu15, Ie15")))] ++ ++(define_expand "iorsi3" ++ [(set (match_operand:SI 0 "register_operand" "") ++ (ior:SI (match_operand:SI 1 "register_operand" "") ++ (match_operand:SI 2 "general_operand" "")))] + "" + { +- int one_position; +- +- switch (which_alternative) +- { +- case 0: +- return "or33\t%0, %2"; +- case 1: +- return "or\t%0, %1, %2"; +- case 2: +- return "ori\t%0, %1, %2"; +- case 3: +- /* If we reach this alternative, +- it must pass the nds32_can_use_bset_p() test, +- so that we can guarantee there is only one 1-bit +- within the immediate value. */ +- /* Use exact_log2() to search the 1-bit position. */ +- one_position = exact_log2 (INTVAL (operands[2])); +- operands[2] = GEN_INT (one_position); +- return "bset\t%0, %1, %2"; ++ if (!nds32_ior_operand (operands[2], SImode)) ++ operands[2] = force_reg (SImode, operands[2]); ++}) + +- default: +- gcc_unreachable (); +- } +-} +- [(set_attr "type" "alu,alu,alu,alu") +- (set_attr "length" " 2, 4, 4, 4")]) ++(define_insn "*iorsi3" ++ [(set (match_operand:SI 0 "register_operand" "=l, r, r, r") ++ (ior:SI (match_operand:SI 1 "register_operand" "%0, r, r, r") ++ (match_operand:SI 2 "nds32_ior_operand" " l, r, Iu15, Ie15")))] ++ "" ++ "@ ++ or33\t%0, %2 ++ or\t%0, %1, %2 ++ ori\t%0, %1, %2 ++ bset\t%0, %1, %B2" ++ [(set_attr "type" "alu,alu,alu,alu") ++ (set_attr "length" " 2, 4, 4, 4") ++ (set_attr "feature" "v3m, v1, v1,pe1")]) + + (define_insn "*or_slli" +- [(set (match_operand:SI 0 "register_operand" "= r") +- (ior:SI (ashift:SI (match_operand:SI 1 "register_operand" " r") +- (match_operand:SI 2 "immediate_operand" " Iu05")) +- (match_operand:SI 3 "register_operand" " r")))] +- "TARGET_ISA_V3" ++ [(set (match_operand:SI 0 "register_operand" "= r") ++ (ior:SI (ashift:SI (match_operand:SI 1 "register_operand" " r") ++ (match_operand:SI 2 "nds32_imm5u_operand" " Iu05")) ++ (match_operand:SI 3 "register_operand" " r")))] ++ "TARGET_ISA_V3 && (TARGET_PIPELINE_PANTHER || optimize_size)" + "or_slli\t%0, %3, %1, %2" +- [(set_attr "type" "alu") +- (set_attr "length" "4")]) ++ [(set_attr "type" "alu_shift") ++ (set_attr "length" "4")]) + + (define_insn "*or_srli" +- [(set (match_operand:SI 0 "register_operand" "= r") +- (ior:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" " r") +- (match_operand:SI 2 "immediate_operand" " Iu05")) +- (match_operand:SI 3 "register_operand" " r")))] +- "TARGET_ISA_V3" ++ [(set (match_operand:SI 0 "register_operand" "= r") ++ (ior:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" " r") ++ (match_operand:SI 2 "nds32_imm5u_operand" " Iu05")) ++ (match_operand:SI 3 "register_operand" " r")))] ++ "TARGET_ISA_V3 && (TARGET_PIPELINE_PANTHER || optimize_size)" + "or_srli\t%0, %3, %1, %2" +- [(set_attr "type" "alu") +- (set_attr "length" "4")]) ++ [(set_attr "type" "alu_shift") ++ (set_attr "length" "4")]) + + + ;; ---------------------------------------------------------------------------- +@@ -644,71 +832,64 @@ + + ;; For V3/V3M ISA, we have 'xor33' instruction. + ;; So we can identify 'xor Rt3,Rt3,Ra3' case and set its length to be 2. +-(define_insn "xorsi3" +- [(set (match_operand:SI 0 "register_operand" "=w, r, r, r") +- (xor:SI (match_operand:SI 1 "register_operand" "%0, r, r, r") +- (match_operand:SI 2 "general_operand" " w, r, Iu15, It15")))] ++ ++(define_expand "xorsi3" ++ [(set (match_operand:SI 0 "register_operand" "") ++ (xor:SI (match_operand:SI 1 "register_operand" "") ++ (match_operand:SI 2 "general_operand" "")))] + "" + { +- int one_position; +- +- switch (which_alternative) +- { +- case 0: +- return "xor33\t%0, %2"; +- case 1: +- return "xor\t%0, %1, %2"; +- case 2: +- return "xori\t%0, %1, %2"; +- case 3: +- /* If we reach this alternative, +- it must pass the nds32_can_use_btgl_p() test, +- so that we can guarantee there is only one 1-bit +- within the immediate value. */ +- /* Use exact_log2() to search the 1-bit position. */ +- one_position = exact_log2 (INTVAL (operands[2])); +- operands[2] = GEN_INT (one_position); +- return "btgl\t%0, %1, %2"; ++ if (!nds32_xor_operand (operands[2], SImode)) ++ operands[2] = force_reg (SImode, operands[2]); ++}) + +- default: +- gcc_unreachable (); +- } +-} +- [(set_attr "type" "alu,alu,alu,alu") +- (set_attr "length" " 2, 4, 4, 4")]) ++(define_insn "*xorsi3" ++ [(set (match_operand:SI 0 "register_operand" "=l, r, r, r") ++ (xor:SI (match_operand:SI 1 "register_operand" "%0, r, r, r") ++ (match_operand:SI 2 "nds32_xor_operand" " l, r, Iu15, It15")))] ++ "" ++ "@ ++ xor33\t%0, %2 ++ xor\t%0, %1, %2 ++ xori\t%0, %1, %2 ++ btgl\t%0, %1, %B2" ++ [(set_attr "type" "alu,alu,alu,alu") ++ (set_attr "length" " 2, 4, 4, 4") ++ (set_attr "feature" "v3m, v1, v1,pe1")]) + + (define_insn "*xor_slli" + [(set (match_operand:SI 0 "register_operand" "= r") + (xor:SI (ashift:SI (match_operand:SI 1 "register_operand" " r") +- (match_operand:SI 2 "immediate_operand" " Iu05")) ++ (match_operand:SI 2 "nds32_imm5u_operand" " Iu05")) + (match_operand:SI 3 "register_operand" " r")))] +- "TARGET_ISA_V3" ++ "TARGET_ISA_V3 && (TARGET_PIPELINE_PANTHER || optimize_size)" + "xor_slli\t%0, %3, %1, %2" +- [(set_attr "type" "alu") +- (set_attr "length" "4")]) ++ [(set_attr "type" "alu_shift") ++ (set_attr "length" "4")]) + + (define_insn "*xor_srli" +- [(set (match_operand:SI 0 "register_operand" "= r") +- (xor:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" " r") +- (match_operand:SI 2 "immediate_operand" " Iu05")) +- (match_operand:SI 3 "register_operand" " r")))] +- "TARGET_ISA_V3" ++ [(set (match_operand:SI 0 "register_operand" "= r") ++ (xor:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" " r") ++ (match_operand:SI 2 "nds32_imm5u_operand" " Iu05")) ++ (match_operand:SI 3 "register_operand" " r")))] ++ "TARGET_ISA_V3 && (TARGET_PIPELINE_PANTHER || optimize_size)" + "xor_srli\t%0, %3, %1, %2" +- [(set_attr "type" "alu") +- (set_attr "length" "4")]) ++ [(set_attr "type" "alu_shift") ++ (set_attr "length" "4")]) + + ;; Rotate Right Instructions. + +-(define_insn "rotrsi3" +- [(set (match_operand:SI 0 "register_operand" "= r, r") +- (rotatert:SI (match_operand:SI 1 "register_operand" " r, r") +- (match_operand:SI 2 "nonmemory_operand" " Iu05, r")))] ++(define_insn "*rotrsi3" ++ [(set (match_operand:SI 0 "register_operand" "= r, r") ++ (rotatert:SI (match_operand:SI 1 "register_operand" " r, r") ++ (match_operand:SI 2 "nds32_rimm5u_operand" " Iu05, r")))] + "" + "@ +- rotri\t%0, %1, %2 +- rotr\t%0, %1, %2" +- [(set_attr "type" "alu,alu") +- (set_attr "length" " 4, 4")]) ++ rotri\t%0, %1, %2 ++ rotr\t%0, %1, %2" ++ [(set_attr "type" " alu, alu") ++ (set_attr "subtype" "shift,shift") ++ (set_attr "length" " 4, 4")]) + + + ;; ---------------------------------------------------------------------------- +@@ -720,14 +901,95 @@ + ;; And for V2 ISA, there is NO 'neg33' instruction. + ;; The only option is to use 'subri A,B,0' (its semantic is 'A = 0 - B'). + (define_insn "negsi2" +- [(set (match_operand:SI 0 "register_operand" "=w, r") +- (neg:SI (match_operand:SI 1 "register_operand" " w, r")))] ++ [(set (match_operand:SI 0 "register_operand" "=l, r") ++ (neg:SI (match_operand:SI 1 "register_operand" " l, r")))] + "" + "@ + neg33\t%0, %1 + subri\t%0, %1, 0" +- [(set_attr "type" "alu,alu") +- (set_attr "length" " 2, 4")]) ++ [(set_attr "type" "alu,alu") ++ (set_attr "length" " 2, 4") ++ (set_attr "feature" "v3m, v1")]) ++ ++(define_expand "negsf2" ++ [(set (match_operand:SF 0 "register_operand" "") ++ (neg:SF (match_operand:SF 1 "register_operand" "")))] ++ "" ++{ ++ if (!TARGET_FPU_SINGLE && !TARGET_EXT_PERF) ++ { ++ rtx new_dst = simplify_gen_subreg (SImode, operands[0], SFmode, 0); ++ rtx new_src = simplify_gen_subreg (SImode, operands[1], SFmode, 0); ++ ++ emit_insn (gen_xorsi3 (new_dst, ++ new_src, ++ gen_int_mode (0x80000000, SImode))); ++ ++ DONE; ++ } ++}) ++ ++(define_expand "negdf2" ++ [(set (match_operand:DF 0 "register_operand" "") ++ (neg:DF (match_operand:DF 1 "register_operand" "")))] ++ "" ++{ ++}) ++ ++(define_insn_and_split "soft_negdf2" ++ [(set (match_operand:DF 0 "register_operand" "") ++ (neg:DF (match_operand:DF 1 "register_operand" "")))] ++ "!TARGET_FPU_DOUBLE" ++ "#" ++ "!TARGET_FPU_DOUBLE" ++ [(const_int 1)] ++{ ++ rtx src = operands[1]; ++ rtx dst = operands[0]; ++ rtx ori_dst = operands[0]; ++ ++ bool need_extra_move_for_dst_p; ++ /* FPU register can't change mode to SI directly, so we need create a ++ tmp register to handle it, and FPU register can't do `xor` or btgl. */ ++ if (HARD_REGISTER_P (src) ++ && TEST_HARD_REG_BIT (reg_class_contents[FP_REGS], REGNO (src))) ++ { ++ rtx tmp = gen_reg_rtx (DFmode); ++ emit_move_insn (tmp, src); ++ src = tmp; ++ } ++ ++ if (HARD_REGISTER_P (dst) ++ && TEST_HARD_REG_BIT (reg_class_contents[FP_REGS], REGNO (dst))) ++ { ++ need_extra_move_for_dst_p = true; ++ rtx tmp = gen_reg_rtx (DFmode); ++ dst = tmp; ++ } ++ ++ rtx dst_high_part = simplify_gen_subreg ( ++ SImode, dst, ++ DFmode, subreg_highpart_offset (SImode, DFmode)); ++ rtx dst_low_part = simplify_gen_subreg ( ++ SImode, dst, ++ DFmode, subreg_lowpart_offset (SImode, DFmode)); ++ rtx src_high_part = simplify_gen_subreg ( ++ SImode, src, ++ DFmode, subreg_highpart_offset (SImode, DFmode)); ++ rtx src_low_part = simplify_gen_subreg ( ++ SImode, src, ++ DFmode, subreg_lowpart_offset (SImode, DFmode)); ++ ++ emit_insn (gen_xorsi3 (dst_high_part, ++ src_high_part, ++ gen_int_mode (0x80000000, SImode))); ++ emit_move_insn (dst_low_part, src_low_part); ++ ++ if (need_extra_move_for_dst_p) ++ emit_move_insn (ori_dst, dst); ++ ++ DONE; ++}) + + + ;; ---------------------------------------------------------------------------- +@@ -737,55 +999,72 @@ + ;; For V3/V3M ISA, we have 'not33' instruction. + ;; So we can identify 'not Rt3,Ra3' case and set its length to be 2. + (define_insn "one_cmplsi2" +- [(set (match_operand:SI 0 "register_operand" "=w, r") +- (not:SI (match_operand:SI 1 "register_operand" " w, r")))] ++ [(set (match_operand:SI 0 "register_operand" "=l, r") ++ (not:SI (match_operand:SI 1 "register_operand" " l, r")))] + "" + "@ + not33\t%0, %1 + nor\t%0, %1, %1" +- [(set_attr "type" "alu,alu") +- (set_attr "length" " 2, 4")]) ++ [(set_attr "type" "alu,alu") ++ (set_attr "length" " 2, 4") ++ (set_attr "feature" "v3m, v1")]) + + + ;; ---------------------------------------------------------------------------- + + ;; Shift instructions. + +-(define_insn "ashlsi3" +- [(set (match_operand:SI 0 "register_operand" "= l, r, r") +- (ashift:SI (match_operand:SI 1 "register_operand" " l, r, r") +- (match_operand:SI 2 "nonmemory_operand" " Iu03, Iu05, r")))] ++(define_expand "<shift>si3" ++ [(set (match_operand:SI 0 "register_operand" "") ++ (shift_rotate:SI (match_operand:SI 1 "register_operand" "") ++ (match_operand:SI 2 "nds32_rimm5u_operand" "")))] + "" +- "@ +- slli333\t%0, %1, %2 +- slli\t%0, %1, %2 +- sll\t%0, %1, %2" +- [(set_attr "type" "alu,alu,alu") +- (set_attr "length" " 2, 4, 4")]) ++{ ++ if (operands[2] == const0_rtx) ++ { ++ emit_move_insn (operands[0], operands[1]); ++ DONE; ++ } ++}) + +-(define_insn "ashrsi3" +- [(set (match_operand:SI 0 "register_operand" "= d, r, r") +- (ashiftrt:SI (match_operand:SI 1 "register_operand" " 0, r, r") +- (match_operand:SI 2 "nonmemory_operand" " Iu05, Iu05, r")))] ++(define_insn "*ashlsi3" ++ [(set (match_operand:SI 0 "register_operand" "= l, r, r") ++ (ashift:SI (match_operand:SI 1 "register_operand" " l, r, r") ++ (match_operand:SI 2 "nds32_rimm5u_operand" " Iu03, Iu05, r")))] + "" + "@ +- srai45\t%0, %2 +- srai\t%0, %1, %2 +- sra\t%0, %1, %2" +- [(set_attr "type" "alu,alu,alu") +- (set_attr "length" " 2, 4, 4")]) +- +-(define_insn "lshrsi3" +- [(set (match_operand:SI 0 "register_operand" "= d, r, r") +- (lshiftrt:SI (match_operand:SI 1 "register_operand" " 0, r, r") +- (match_operand:SI 2 "nonmemory_operand" " Iu05, Iu05, r")))] ++ slli333\t%0, %1, %2 ++ slli\t%0, %1, %2 ++ sll\t%0, %1, %2" ++ [(set_attr "type" " alu, alu, alu") ++ (set_attr "subtype" "shift,shift,shift") ++ (set_attr "length" " 2, 4, 4")]) ++ ++(define_insn "*ashrsi3" ++ [(set (match_operand:SI 0 "register_operand" "= d, r, r") ++ (ashiftrt:SI (match_operand:SI 1 "register_operand" " 0, r, r") ++ (match_operand:SI 2 "nds32_rimm5u_operand" " Iu05, Iu05, r")))] ++ "" ++ "@ ++ srai45\t%0, %2 ++ srai\t%0, %1, %2 ++ sra\t%0, %1, %2" ++ [(set_attr "type" " alu, alu, alu") ++ (set_attr "subtype" "shift,shift,shift") ++ (set_attr "length" " 2, 4, 4")]) ++ ++(define_insn "*lshrsi3" ++ [(set (match_operand:SI 0 "register_operand" "= d, r, r") ++ (lshiftrt:SI (match_operand:SI 1 "register_operand" " 0, r, r") ++ (match_operand:SI 2 "nds32_rimm5u_operand" " Iu05, Iu05, r")))] + "" + "@ +- srli45\t%0, %2 +- srli\t%0, %1, %2 +- srl\t%0, %1, %2" +- [(set_attr "type" "alu,alu,alu") +- (set_attr "length" " 2, 4, 4")]) ++ srli45\t%0, %2 ++ srli\t%0, %1, %2 ++ srl\t%0, %1, %2" ++ [(set_attr "type" " alu, alu, alu") ++ (set_attr "subtype" "shift,shift,shift") ++ (set_attr "length" " 2, 4, 4")]) + + + ;; ---------------------------------------------------------------------------- +@@ -794,148 +1073,65 @@ + ;; Conditional Move patterns + ;; ---------------------------------------------------------------------------- + +-(define_expand "movsicc" +- [(set (match_operand:SI 0 "register_operand" "") +- (if_then_else:SI (match_operand 1 "comparison_operator" "") +- (match_operand:SI 2 "register_operand" "") +- (match_operand:SI 3 "register_operand" "")))] +- "TARGET_CMOV" ++(define_expand "mov<mode>cc" ++ [(set (match_operand:QIHISI 0 "register_operand" "") ++ (if_then_else:QIHISI (match_operand 1 "nds32_movecc_comparison_operator" "") ++ (match_operand:QIHISI 2 "register_operand" "") ++ (match_operand:QIHISI 3 "register_operand" "")))] ++ "TARGET_CMOV && !optimize_size" + { +- if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE) +- && GET_MODE (XEXP (operands[1], 0)) == SImode +- && XEXP (operands[1], 1) == const0_rtx) +- { +- /* If the operands[1] rtx is already (eq X 0) or (ne X 0), +- we have gcc generate original template rtx. */ +- goto create_template; +- } +- else ++ enum nds32_expand_result_type result = nds32_expand_movcc (operands); ++ switch (result) + { +- /* Since there is only 'slt'(Set when Less Than) instruction for +- comparison in Andes ISA, the major strategy we use here is to +- convert conditional move into 'LT + EQ' or 'LT + NE' rtx combination. +- We design constraints properly so that the reload phase will assist +- to make one source operand to use same register as result operand. +- Then we can use cmovz/cmovn to catch the other source operand +- which has different register. */ +- enum rtx_code code = GET_CODE (operands[1]); +- enum rtx_code new_code = code; +- rtx cmp_op0 = XEXP (operands[1], 0); +- rtx cmp_op1 = XEXP (operands[1], 1); +- rtx tmp; +- int reverse = 0; +- +- /* Main Goal: Use 'LT + EQ' or 'LT + NE' to target "then" part +- Strategy : Reverse condition and swap comparison operands +- +- For example: +- +- a <= b ? P : Q (LE or LEU) +- --> a > b ? Q : P (reverse condition) +- --> b < a ? Q : P (swap comparison operands to achieve 'LT/LTU') +- +- a >= b ? P : Q (GE or GEU) +- --> a < b ? Q : P (reverse condition to achieve 'LT/LTU') +- +- a < b ? P : Q (LT or LTU) +- --> (NO NEED TO CHANGE, it is already 'LT/LTU') +- +- a > b ? P : Q (GT or GTU) +- --> b < a ? P : Q (swap comparison operands to achieve 'LT/LTU') */ +- switch (code) +- { +- case NE: +- /* (a != b ? P : Q) +- can be expressed as +- (a == b ? Q : P) +- so, fall through to reverse condition */ +- case GE: case GEU: case LE: case LEU: +- new_code = reverse_condition (code); +- reverse = 1; +- break; +- case EQ: case GT: case GTU: case LT: case LTU: +- /* no need to reverse condition */ +- break; +- default: +- FAIL; +- } +- +- /* For '>' comparison operator, we swap operands +- so that we can have 'LT/LTU' operator. */ +- if (new_code == GT || new_code == GTU) +- { +- tmp = cmp_op0; +- cmp_op0 = cmp_op1; +- cmp_op1 = tmp; +- +- new_code = swap_condition (new_code); +- } +- +- /* Use a temporary register to store slt/slts result. */ +- tmp = gen_reg_rtx (SImode); +- +- /* Split EQ and NE because we don't have direct comparison of EQ and NE. +- If we don't split it, the conditional move transformation will fail +- when producing (SET A (EQ B C)) or (SET A (NE B C)). */ +- if (new_code == EQ) +- { +- emit_insn (gen_xorsi3 (tmp, cmp_op0, cmp_op1)); +- emit_insn (gen_slt_compare (tmp, tmp, GEN_INT (1))); +- } +- else if (new_code == NE) +- { +- emit_insn (gen_xorsi3 (tmp, cmp_op0, cmp_op1)); +- emit_insn (gen_slt_compare (tmp, GEN_INT (0), tmp)); +- } +- else +- /* This emit_insn will create corresponding 'slt/slts' insturction. */ +- emit_insn (gen_rtx_SET (tmp, gen_rtx_fmt_ee (new_code, SImode, +- cmp_op0, cmp_op1))); +- +- /* Change comparison semantic into (eq X 0) or (ne X 0) behavior +- so that cmovz or cmovn will be matched later. +- +- For reverse condition cases, we want to create a semantic that: +- (eq X 0) --> pick up "else" part +- For normal cases, we want to create a semantic that: +- (ne X 0) --> pick up "then" part +- +- Later we will have cmovz/cmovn instruction pattern to +- match corresponding behavior and output instruction. */ +- operands[1] = gen_rtx_fmt_ee (reverse ? EQ : NE, +- VOIDmode, tmp, const0_rtx); ++ case EXPAND_DONE: ++ DONE; ++ break; ++ case EXPAND_FAIL: ++ FAIL; ++ break; ++ case EXPAND_CREATE_TEMPLATE: ++ break; ++ default: ++ gcc_unreachable (); + } +- +-create_template: +- do {} while(0); /* dummy line */ + }) + +-(define_insn "cmovz" +- [(set (match_operand:SI 0 "register_operand" "=r, r") +- (if_then_else:SI (eq (match_operand:SI 1 "register_operand" " r, r") ++(define_insn "cmovz<mode>" ++ [(set (match_operand:QIHISI 0 "register_operand" "=r, r") ++ (if_then_else:QIHISI (eq (match_operand:SI 1 "register_operand" " r, r") + (const_int 0)) +- (match_operand:SI 2 "register_operand" " r, 0") +- (match_operand:SI 3 "register_operand" " 0, r")))] ++ (match_operand:QIHISI 2 "register_operand" " r, 0") ++ (match_operand:QIHISI 3 "register_operand" " 0, r")))] + "TARGET_CMOV" + "@ + cmovz\t%0, %2, %1 + cmovn\t%0, %3, %1" +- [(set_attr "type" "move") ++ [(set_attr "type" "alu") + (set_attr "length" "4")]) + +-(define_insn "cmovn" +- [(set (match_operand:SI 0 "register_operand" "=r, r") +- (if_then_else:SI (ne (match_operand:SI 1 "register_operand" " r, r") ++(define_insn "cmovn<mode>" ++ [(set (match_operand:QIHISI 0 "register_operand" "=r, r") ++ (if_then_else:QIHISI (ne (match_operand:SI 1 "register_operand" " r, r") + (const_int 0)) +- (match_operand:SI 2 "register_operand" " r, 0") +- (match_operand:SI 3 "register_operand" " 0, r")))] ++ (match_operand:QIHISI 2 "register_operand" " r, 0") ++ (match_operand:QIHISI 3 "register_operand" " 0, r")))] + "TARGET_CMOV" + "@ + cmovn\t%0, %2, %1 + cmovz\t%0, %3, %1" +- [(set_attr "type" "move") ++ [(set_attr "type" "alu") + (set_attr "length" "4")]) + ++;; A hotfix to help RTL combiner to merge a cmovn insn and a zero_extend insn. ++;; It should be removed once after we change the expansion form of the cmovn. ++(define_insn "*cmovn_simplified_<mode>" ++ [(set (match_operand:QIHISI 0 "register_operand" "=r") ++ (if_then_else:QIHISI (match_operand:SI 1 "register_operand" "r") ++ (match_operand:QIHISI 2 "register_operand" "r") ++ (match_operand:QIHISI 3 "register_operand" "0")))] ++ "" ++ "cmovn\t%0, %2, %1" ++ [(set_attr "type" "alu")]) + + ;; ---------------------------------------------------------------------------- + ;; Conditional Branch patterns +@@ -950,573 +1146,188 @@ create_template: + (pc)))] + "" + { +- rtx tmp_reg; +- enum rtx_code code; +- +- code = GET_CODE (operands[0]); +- +- /* If operands[2] is (const_int 0), +- we can use beqz,bnez,bgtz,bgez,bltz,or blez instructions. +- So we have gcc generate original template rtx. */ +- if (GET_CODE (operands[2]) == CONST_INT) +- if (INTVAL (operands[2]) == 0) +- if ((code != GTU) +- && (code != GEU) +- && (code != LTU) +- && (code != LEU)) +- goto create_template; +- +- /* For other comparison, NDS32 ISA only has slt (Set-on-Less-Than) +- behavior for the comparison, we might need to generate other +- rtx patterns to achieve same semantic. */ +- switch (code) ++ enum nds32_expand_result_type result = nds32_expand_cbranch (operands); ++ switch (result) + { +- case GT: +- case GTU: +- if (GET_CODE (operands[2]) == CONST_INT) +- { +- /* GT reg_A, const_int => !(LT reg_A, const_int + 1) */ +- tmp_reg = gen_rtx_REG (SImode, TA_REGNUM); +- +- /* We want to plus 1 into the integer value +- of operands[2] to create 'slt' instruction. +- This caculation is performed on the host machine, +- which may be 64-bit integer. +- So the meaning of caculation result may be +- different from the 32-bit nds32 target. +- +- For example: +- 0x7fffffff + 0x1 -> 0x80000000, +- this value is POSITIVE on 64-bit machine, +- but the expected value on 32-bit nds32 target +- should be NEGATIVE value. +- +- Hence, instead of using GEN_INT(), we use gen_int_mode() to +- explicitly create SImode constant rtx. */ +- operands[2] = gen_int_mode (INTVAL (operands[2]) + 1, SImode); +- +- if (code == GT) +- { +- /* GT, use slts instruction */ +- emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2])); +- } +- else +- { +- /* GTU, use slt instruction */ +- emit_insn (gen_slt_compare (tmp_reg, operands[1], operands[2])); +- } +- +- PUT_CODE (operands[0], EQ); +- operands[1] = tmp_reg; +- operands[2] = const0_rtx; +- emit_insn (gen_cbranchsi4 (operands[0], operands[1], +- operands[2], operands[3])); +- +- DONE; +- } +- else +- { +- /* GT reg_A, reg_B => LT reg_B, reg_A */ +- tmp_reg = gen_rtx_REG (SImode, TA_REGNUM); +- +- if (code == GT) +- { +- /* GT, use slts instruction */ +- emit_insn (gen_slts_compare (tmp_reg, operands[2], operands[1])); +- } +- else +- { +- /* GTU, use slt instruction */ +- emit_insn (gen_slt_compare (tmp_reg, operands[2], operands[1])); +- } +- +- PUT_CODE (operands[0], NE); +- operands[1] = tmp_reg; +- operands[2] = const0_rtx; +- emit_insn (gen_cbranchsi4 (operands[0], operands[1], +- operands[2], operands[3])); +- +- DONE; +- } +- +- case GE: +- case GEU: +- /* GE reg_A, reg_B => !(LT reg_A, reg_B) */ +- /* GE reg_A, const_int => !(LT reg_A, const_int) */ +- tmp_reg = gen_rtx_REG (SImode, TA_REGNUM); +- +- if (code == GE) +- { +- /* GE, use slts instruction */ +- emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2])); +- } +- else +- { +- /* GEU, use slt instruction */ +- emit_insn (gen_slt_compare (tmp_reg, operands[1], operands[2])); +- } +- +- PUT_CODE (operands[0], EQ); +- operands[1] = tmp_reg; +- operands[2] = const0_rtx; +- emit_insn (gen_cbranchsi4 (operands[0], operands[1], +- operands[2], operands[3])); +- ++ case EXPAND_DONE: + DONE; +- +- case LT: +- case LTU: +- /* LT reg_A, reg_B => LT reg_A, reg_B */ +- /* LT reg_A, const_int => LT reg_A, const_int */ +- tmp_reg = gen_rtx_REG (SImode, TA_REGNUM); +- +- if (code == LT) +- { +- /* LT, use slts instruction */ +- emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2])); +- } +- else +- { +- /* LTU, use slt instruction */ +- emit_insn (gen_slt_compare (tmp_reg, operands[1], operands[2])); +- } +- +- PUT_CODE (operands[0], NE); +- operands[1] = tmp_reg; +- operands[2] = const0_rtx; +- emit_insn (gen_cbranchsi4 (operands[0], operands[1], +- operands[2], operands[3])); +- +- DONE; +- +- case LE: +- case LEU: +- if (GET_CODE (operands[2]) == CONST_INT) +- { +- /* LE reg_A, const_int => LT reg_A, const_int + 1 */ +- tmp_reg = gen_rtx_REG (SImode, TA_REGNUM); +- +- /* Note that (le:SI X INT_MAX) is not the same as (lt:SI X INT_MIN). +- We better have an assert here in case GCC does not properly +- optimize it away. The INT_MAX here is 0x7fffffff for target. */ +- gcc_assert (code != LE || INTVAL (operands[2]) != 0x7fffffff); +- operands[2] = gen_int_mode (INTVAL (operands[2]) + 1, SImode); +- +- if (code == LE) +- { +- /* LE, use slts instruction */ +- emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2])); +- } +- else +- { +- /* LEU, use slt instruction */ +- emit_insn (gen_slt_compare (tmp_reg, operands[1], operands[2])); +- } +- +- PUT_CODE (operands[0], NE); +- operands[1] = tmp_reg; +- operands[2] = const0_rtx; +- emit_insn (gen_cbranchsi4 (operands[0], operands[1], +- operands[2], operands[3])); +- +- DONE; +- } +- else +- { +- /* LE reg_A, reg_B => !(LT reg_B, reg_A) */ +- tmp_reg = gen_rtx_REG (SImode, TA_REGNUM); +- +- if (code == LE) +- { +- /* LE, use slts instruction */ +- emit_insn (gen_slts_compare (tmp_reg, operands[2], operands[1])); +- } +- else +- { +- /* LEU, use slt instruction */ +- emit_insn (gen_slt_compare (tmp_reg, operands[2], operands[1])); +- } +- +- PUT_CODE (operands[0], EQ); +- operands[1] = tmp_reg; +- operands[2] = const0_rtx; +- emit_insn (gen_cbranchsi4 (operands[0], operands[1], +- operands[2], operands[3])); +- +- DONE; +- } +- +- case EQ: +- case NE: +- /* NDS32 ISA has various form for eq/ne behavior no matter +- what kind of the operand is. +- So just generate original template rtx. */ +- goto create_template; +- +- default: ++ break; ++ case EXPAND_FAIL: + FAIL; ++ break; ++ case EXPAND_CREATE_TEMPLATE: ++ break; ++ default: ++ gcc_unreachable (); + } +- +-create_template: +- do {} while(0); /* dummy line */ + }) + + +-(define_insn "*cbranchsi4_equality_zero" ++(define_insn "cbranchsi4_equality_zero" + [(set (pc) + (if_then_else (match_operator 0 "nds32_equality_comparison_operator" +- [(match_operand:SI 1 "register_operand" "t, l, r") ++ [(match_operand:SI 1 "register_operand" "t,l, r") + (const_int 0)]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" + { +- enum rtx_code code; +- +- code = GET_CODE (operands[0]); +- +- /* This zero-comparison conditional branch has two forms: +- 32-bit instruction => beqz/bnez imm16s << 1 +- 16-bit instruction => beqzs8/bnezs8/beqz38/bnez38 imm8s << 1 +- +- For 32-bit case, +- we assume it is always reachable. (but check range -65500 ~ 65500) +- +- For 16-bit case, +- it must satisfy { 255 >= (label - pc) >= -256 } condition. +- However, since the $pc for nds32 is at the beginning of the instruction, +- we should leave some length space for current insn. +- So we use range -250 ~ 250. */ +- +- switch (get_attr_length (insn)) +- { +- case 2: +- if (which_alternative == 0) +- { +- /* constraint: t */ +- return (code == EQ) ? "beqzs8\t%2" : "bnezs8\t%2"; +- } +- else if (which_alternative == 1) +- { +- /* constraint: l */ +- return (code == EQ) ? "beqz38\t%1, %2" : "bnez38\t%1, %2"; +- } +- else +- { +- /* constraint: r */ +- /* For which_alternative==2, it should not be here. */ +- gcc_unreachable (); +- } +- case 4: +- /* including constraints: t, l, and r */ +- return (code == EQ) ? "beqz\t%1, %2" : "bnez\t%1, %2"; +- case 6: +- if (which_alternative == 0) +- { +- /* constraint: t */ +- if (code == EQ) +- { +- /* beqzs8 .L0 +- => +- bnezs8 .LCB0 +- j .L0 +- .LCB0: +- */ +- return "bnezs8\t.LCB%=\;j\t%2\n.LCB%=:"; +- } +- else +- { +- /* bnezs8 .L0 +- => +- beqzs8 .LCB0 +- j .L0 +- .LCB0: +- */ +- return "beqzs8\t.LCB%=\;j\t%2\n.LCB%=:"; +- } +- } +- else if (which_alternative == 1) +- { +- /* constraint: l */ +- if (code == EQ) +- { +- /* beqz38 $r0, .L0 +- => +- bnez38 $r0, .LCB0 +- j .L0 +- .LCB0: +- */ +- return "bnez38\t%1, .LCB%=\;j\t%2\n.LCB%=:"; +- } +- else +- { +- /* bnez38 $r0, .L0 +- => +- beqz38 $r0, .LCB0 +- j .L0 +- .LCB0: +- */ +- return "beqz38\t%1, .LCB%=\;j\t%2\n.LCB%=:"; +- } +- } +- else +- { +- /* constraint: r */ +- /* For which_alternative==2, it should not be here. */ +- gcc_unreachable (); +- } +- case 8: +- /* constraint: t, l, r. */ +- if (code == EQ) +- { +- /* beqz $r8, .L0 +- => +- bnez $r8, .LCB0 +- j .L0 +- .LCB0: +- */ +- return "bnez\t%1, .LCB%=\;j\t%2\n.LCB%=:"; +- } +- else +- { +- /* bnez $r8, .L0 +- => +- beqz $r8, .LCB0 +- j .L0 +- .LCB0: +- */ +- return "beqz\t%1, .LCB%=\;j\t%2\n.LCB%=:"; +- } +- default: +- gcc_unreachable (); +- } ++ return nds32_output_cbranchsi4_equality_zero (insn, operands); + } + [(set_attr "type" "branch") +- (set_attr "enabled" "1") ++ (set_attr_alternative "enabled" ++ [ ++ ;; Alternative 0 ++ (if_then_else (match_test "TARGET_16_BIT") ++ (const_string "yes") ++ (const_string "no")) ++ ;; Alternative 1 ++ (if_then_else (match_test "TARGET_16_BIT") ++ (const_string "yes") ++ (const_string "no")) ++ ;; Alternative 2 ++ (const_string "yes") ++ ]) + (set_attr_alternative "length" + [ + ;; Alternative 0 +- (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250)) +- (le (minus (match_dup 2) (pc)) (const_int 250))) +- (if_then_else (match_test "TARGET_16_BIT") +- (const_int 2) +- (const_int 4)) +- (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500)) +- (le (minus (match_dup 2) (pc)) (const_int 65500))) +- (const_int 4) ++ (if_then_else (match_test "!CROSSING_JUMP_P (insn)") ++ (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250)) ++ (le (minus (match_dup 2) (pc)) (const_int 250))) + (if_then_else (match_test "TARGET_16_BIT") +- (const_int 6) +- (const_int 8)))) ++ (const_int 2) ++ (const_int 4)) ++ (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500)) ++ (le (minus (match_dup 2) (pc)) (const_int 65500))) ++ (const_int 4) ++ (if_then_else (match_test "TARGET_16_BIT") ++ (const_int 8) ++ (const_int 10)))) ++ (const_int 10)) + ;; Alternative 1 +- (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250)) +- (le (minus (match_dup 2) (pc)) (const_int 250))) +- (if_then_else (match_test "TARGET_16_BIT") +- (const_int 2) +- (const_int 4)) ++ (if_then_else (match_test "!CROSSING_JUMP_P (insn)") ++ (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250)) ++ (le (minus (match_dup 2) (pc)) (const_int 250))) ++ (if_then_else (match_test "TARGET_16_BIT") ++ (const_int 2) ++ (const_int 4)) ++ (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500)) ++ (le (minus (match_dup 2) (pc)) (const_int 65500))) ++ (const_int 4) ++ (if_then_else (match_test "TARGET_16_BIT") ++ (const_int 8) ++ (const_int 10)))) ++ (const_int 10)) ++ ;; Alternative 2 ++ (if_then_else (match_test "!CROSSING_JUMP_P (insn)") + (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500)) + (le (minus (match_dup 2) (pc)) (const_int 65500))) + (const_int 4) +- (if_then_else (match_test "TARGET_16_BIT") +- (const_int 6) +- (const_int 8)))) +- ;; Alternative 2 +- (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500)) +- (le (minus (match_dup 2) (pc)) (const_int 65500))) +- (const_int 4) +- (const_int 8)) ++ (const_int 10)) ++ (const_int 10)) + ])]) + + + ;; This pattern is dedicated to V2 ISA, + ;; because V2 DOES NOT HAVE beqc/bnec instruction. +-(define_insn "*cbranchsi4_equality_reg" ++(define_insn "cbranchsi4_equality_reg" + [(set (pc) + (if_then_else (match_operator 0 "nds32_equality_comparison_operator" +- [(match_operand:SI 1 "register_operand" "r") +- (match_operand:SI 2 "nds32_reg_constant_operand" "r")]) ++ [(match_operand:SI 1 "register_operand" "v, r") ++ (match_operand:SI 2 "register_operand" "l, r")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "TARGET_ISA_V2" + { +- enum rtx_code code; +- +- code = GET_CODE (operands[0]); +- +- /* This register-comparison conditional branch has one form: +- 32-bit instruction => beq/bne imm14s << 1 +- +- For 32-bit case, +- we assume it is always reachable. (but check range -16350 ~ 16350). */ +- +- switch (code) +- { +- case EQ: +- /* r, r */ +- switch (get_attr_length (insn)) +- { +- case 4: +- return "beq\t%1, %2, %3"; +- case 8: +- /* beq $r0, $r1, .L0 +- => +- bne $r0, $r1, .LCB0 +- j .L0 +- .LCB0: +- */ +- return "bne\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:"; +- default: +- gcc_unreachable (); +- } +- +- case NE: +- /* r, r */ +- switch (get_attr_length (insn)) +- { +- case 4: +- return "bne\t%1, %2, %3"; +- case 8: +- /* bne $r0, $r1, .L0 +- => +- beq $r0, $r1, .LCB0 +- j .L0 +- .LCB0: +- */ +- return "beq\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:"; +- default: +- gcc_unreachable (); +- } +- +- default: +- gcc_unreachable (); +- } ++ return nds32_output_cbranchsi4_equality_reg (insn, operands); + } + [(set_attr "type" "branch") +- (set (attr "length") +- (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350)) +- (le (minus (match_dup 3) (pc)) (const_int 16350))) +- (const_int 4) +- (const_int 8)))]) ++ (set_attr_alternative "enabled" ++ [ ++ ;; Alternative 0 ++ (if_then_else (match_test "TARGET_16_BIT") ++ (const_string "yes") ++ (const_string "no")) ++ ;; Alternative 1 ++ (const_string "yes") ++ ]) ++ (set_attr_alternative "length" ++ [ ++ ;; Alternative 0 ++ (if_then_else (match_test "!CROSSING_JUMP_P (insn)") ++ (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -250)) ++ (le (minus (match_dup 3) (pc)) (const_int 250))) ++ (const_int 2) ++ (if_then_else (and (ge (minus (match_dup 3) (pc)) ++ (const_int -16350)) ++ (le (minus (match_dup 3) (pc)) ++ (const_int 16350))) ++ (const_int 4) ++ (const_int 8))) ++ (const_int 8)) ++ ;; Alternative 1 ++ (if_then_else (match_test "!CROSSING_JUMP_P (insn)") ++ (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350)) ++ (le (minus (match_dup 3) (pc)) (const_int 16350))) ++ (const_int 4) ++ (const_int 10)) ++ (const_int 10)) ++ ])]) + + + ;; This pattern is dedicated to V3/V3M, + ;; because V3/V3M DO HAVE beqc/bnec instruction. +-(define_insn "*cbranchsi4_equality_reg_or_const_int" ++(define_insn "cbranchsi4_equality_reg_or_const_int" + [(set (pc) + (if_then_else (match_operator 0 "nds32_equality_comparison_operator" +- [(match_operand:SI 1 "register_operand" "r, r") +- (match_operand:SI 2 "nds32_reg_constant_operand" "r, Is11")]) ++ [(match_operand:SI 1 "register_operand" "v, r, r") ++ (match_operand:SI 2 "nds32_rimm11s_operand" "l, r, Is11")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "TARGET_ISA_V3 || TARGET_ISA_V3M" + { +- enum rtx_code code; +- +- code = GET_CODE (operands[0]); +- +- /* This register-comparison conditional branch has one form: +- 32-bit instruction => beq/bne imm14s << 1 +- 32-bit instruction => beqc/bnec imm8s << 1 +- +- For 32-bit case, we assume it is always reachable. +- (but check range -16350 ~ 16350 and -250 ~ 250). */ +- +- switch (code) +- { +- case EQ: +- if (which_alternative == 0) +- { +- /* r, r */ +- switch (get_attr_length (insn)) +- { +- case 4: +- return "beq\t%1, %2, %3"; +- case 8: +- /* beq $r0, $r1, .L0 +- => +- bne $r0, $r1, .LCB0 +- j .L0 +- .LCB0: +- */ +- return "bne\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:"; +- default: +- gcc_unreachable (); +- } +- } +- else +- { +- /* r, Is11 */ +- switch (get_attr_length (insn)) +- { +- case 4: +- return "beqc\t%1, %2, %3"; +- case 8: +- /* beqc $r0, constant, .L0 +- => +- bnec $r0, constant, .LCB0 +- j .L0 +- .LCB0: +- */ +- return "bnec\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:"; +- default: +- gcc_unreachable (); +- } +- } +- case NE: +- if (which_alternative == 0) +- { +- /* r, r */ +- switch (get_attr_length (insn)) +- { +- case 4: +- return "bne\t%1, %2, %3"; +- case 8: +- /* bne $r0, $r1, .L0 +- => +- beq $r0, $r1, .LCB0 +- j .L0 +- .LCB0: +- */ +- return "beq\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:"; +- default: +- gcc_unreachable (); +- } +- } +- else +- { +- /* r, Is11 */ +- switch (get_attr_length (insn)) +- { +- case 4: +- return "bnec\t%1, %2, %3"; +- case 8: +- /* bnec $r0, constant, .L0 +- => +- beqc $r0, constant, .LCB0 +- j .L0 +- .LCB0: +- */ +- return "beqc\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:"; +- default: +- gcc_unreachable (); +- } +- } +- default: +- gcc_unreachable (); +- } ++ return nds32_output_cbranchsi4_equality_reg_or_const_int (insn, operands); + } + [(set_attr "type" "branch") ++ (set_attr_alternative "enabled" ++ [ ++ ;; Alternative 0 ++ (if_then_else (match_test "TARGET_16_BIT") ++ (const_string "yes") ++ (const_string "no")) ++ ;; Alternative 1 ++ (const_string "yes") ++ ;; Alternative 2 ++ (const_string "yes") ++ ]) + (set_attr_alternative "length" + [ + ;; Alternative 0 +- (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350)) +- (le (minus (match_dup 3) (pc)) (const_int 16350))) +- (const_int 4) +- (const_int 8)) ++ (if_then_else (match_test "!CROSSING_JUMP_P (insn)") ++ (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -250)) ++ (le (minus (match_dup 3) (pc)) (const_int 250))) ++ (const_int 2) ++ (if_then_else (and (ge (minus (match_dup 3) (pc)) ++ (const_int -16350)) ++ (le (minus (match_dup 3) (pc)) ++ (const_int 16350))) ++ (const_int 4) ++ (const_int 8))) ++ (const_int 8)) + ;; Alternative 1 +- (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -250)) +- (le (minus (match_dup 3) (pc)) (const_int 250))) +- (const_int 4) +- (const_int 8)) ++ (if_then_else (match_test "!CROSSING_JUMP_P (insn)") ++ (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350)) ++ (le (minus (match_dup 3) (pc)) (const_int 16350))) ++ (const_int 4) ++ (const_int 10)) ++ (const_int 10)) ++ ;; Alternative 2 ++ (if_then_else (match_test "!CROSSING_JUMP_P (insn)") ++ (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -250)) ++ (le (minus (match_dup 3) (pc)) (const_int 250))) ++ (const_int 4) ++ (const_int 10)) ++ (const_int 10)) + ])]) + + +@@ -1529,80 +1340,16 @@ create_template: + (pc)))] + "" + { +- enum rtx_code code; +- +- code = GET_CODE (operands[0]); +- +- /* This zero-greater-less-comparison conditional branch has one form: +- 32-bit instruction => bgtz/bgez/bltz/blez imm16s << 1 +- +- For 32-bit case, we assume it is always reachable. +- (but check range -65500 ~ 65500). */ +- +- if (get_attr_length (insn) == 8) +- { +- /* The branch target is too far to simply use one +- bgtz/bgez/bltz/blez instruction. +- We need to reverse condition and use 'j' to jump to the target. */ +- switch (code) +- { +- case GT: +- /* bgtz $r8, .L0 +- => +- blez $r8, .LCB0 +- j .L0 +- .LCB0: +- */ +- return "blez\t%1, .LCB%=\;j\t%2\n.LCB%=:"; +- case GE: +- /* bgez $r8, .L0 +- => +- bltz $r8, .LCB0 +- j .L0 +- .LCB0: +- */ +- return "bltz\t%1, .LCB%=\;j\t%2\n.LCB%=:"; +- case LT: +- /* bltz $r8, .L0 +- => +- bgez $r8, .LCB0 +- j .L0 +- .LCB0: +- */ +- return "bgez\t%1, .LCB%=\;j\t%2\n.LCB%=:"; +- case LE: +- /* blez $r8, .L0 +- => +- bgtz $r8, .LCB0 +- j .L0 +- .LCB0: +- */ +- return "bgtz\t%1, .LCB%=\;j\t%2\n.LCB%=:"; +- default: +- gcc_unreachable (); +- } +- } +- +- switch (code) +- { +- case GT: +- return "bgtz\t%1, %2"; +- case GE: +- return "bgez\t%1, %2"; +- case LT: +- return "bltz\t%1, %2"; +- case LE: +- return "blez\t%1, %2"; +- default: +- gcc_unreachable (); +- } ++ return nds32_output_cbranchsi4_greater_less_zero (insn, operands); + } + [(set_attr "type" "branch") + (set (attr "length") +- (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500)) +- (le (minus (match_dup 2) (pc)) (const_int 65500))) +- (const_int 4) +- (const_int 8)))]) ++ (if_then_else (match_test "!CROSSING_JUMP_P (insn)") ++ (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500)) ++ (le (minus (match_dup 2) (pc)) (const_int 65500))) ++ (const_int 4) ++ (const_int 10)) ++ (const_int 10)))]) + + + (define_expand "cstoresi4" +@@ -1612,237 +1359,85 @@ create_template: + (match_operand:SI 3 "nonmemory_operand" "")]))] + "" + { +- rtx tmp_reg; +- enum rtx_code code; +- +- code = GET_CODE (operands[1]); +- +- switch (code) ++ enum nds32_expand_result_type result = nds32_expand_cstore (operands); ++ switch (result) + { +- case EQ: +- if (GET_CODE (operands[3]) == CONST_INT) +- { +- /* reg_R = (reg_A == const_int_B) +- --> addi reg_C, reg_A, -const_int_B +- slti reg_R, reg_C, const_int_1 */ +- tmp_reg = gen_reg_rtx (SImode); +- operands[3] = gen_int_mode (-INTVAL (operands[3]), SImode); +- /* If the integer value is not in the range of imm15s, +- we need to force register first because our addsi3 pattern +- only accept nds32_rimm15s_operand predicate. */ +- if (!satisfies_constraint_Is15 (operands[3])) +- operands[3] = force_reg (SImode, operands[3]); +- emit_insn (gen_addsi3 (tmp_reg, operands[2], operands[3])); +- emit_insn (gen_slt_compare (operands[0], tmp_reg, const1_rtx)); +- +- DONE; +- } +- else +- { +- /* reg_R = (reg_A == reg_B) +- --> xor reg_C, reg_A, reg_B +- slti reg_R, reg_C, const_int_1 */ +- tmp_reg = gen_reg_rtx (SImode); +- emit_insn (gen_xorsi3 (tmp_reg, operands[2], operands[3])); +- emit_insn (gen_slt_compare (operands[0], tmp_reg, const1_rtx)); +- +- DONE; +- } +- +- case NE: +- if (GET_CODE (operands[3]) == CONST_INT) +- { +- /* reg_R = (reg_A != const_int_B) +- --> addi reg_C, reg_A, -const_int_B +- slti reg_R, const_int_0, reg_C */ +- tmp_reg = gen_reg_rtx (SImode); +- operands[3] = gen_int_mode (-INTVAL (operands[3]), SImode); +- /* If the integer value is not in the range of imm15s, +- we need to force register first because our addsi3 pattern +- only accept nds32_rimm15s_operand predicate. */ +- if (!satisfies_constraint_Is15 (operands[3])) +- operands[3] = force_reg (SImode, operands[3]); +- emit_insn (gen_addsi3 (tmp_reg, operands[2], operands[3])); +- emit_insn (gen_slt_compare (operands[0], const0_rtx, tmp_reg)); +- +- DONE; +- } +- else +- { +- /* reg_R = (reg_A != reg_B) +- --> xor reg_C, reg_A, reg_B +- slti reg_R, const_int_0, reg_C */ +- tmp_reg = gen_reg_rtx (SImode); +- emit_insn (gen_xorsi3 (tmp_reg, operands[2], operands[3])); +- emit_insn (gen_slt_compare (operands[0], const0_rtx, tmp_reg)); +- +- DONE; +- } +- +- case GT: +- case GTU: +- /* reg_R = (reg_A > reg_B) --> slt reg_R, reg_B, reg_A */ +- /* reg_R = (reg_A > const_int_B) --> slt reg_R, const_int_B, reg_A */ +- if (code == GT) +- { +- /* GT, use slts instruction */ +- emit_insn (gen_slts_compare (operands[0], operands[3], operands[2])); +- } +- else +- { +- /* GTU, use slt instruction */ +- emit_insn (gen_slt_compare (operands[0], operands[3], operands[2])); +- } +- ++ case EXPAND_DONE: + DONE; +- +- case GE: +- case GEU: +- if (GET_CODE (operands[3]) == CONST_INT) +- { +- /* reg_R = (reg_A >= const_int_B) +- --> movi reg_C, const_int_B - 1 +- slt reg_R, reg_C, reg_A */ +- tmp_reg = gen_reg_rtx (SImode); +- +- emit_insn (gen_movsi (tmp_reg, +- gen_int_mode (INTVAL (operands[3]) - 1, +- SImode))); +- if (code == GE) +- { +- /* GE, use slts instruction */ +- emit_insn (gen_slts_compare (operands[0], tmp_reg, operands[2])); +- } +- else +- { +- /* GEU, use slt instruction */ +- emit_insn (gen_slt_compare (operands[0], tmp_reg, operands[2])); +- } +- +- DONE; +- } +- else +- { +- /* reg_R = (reg_A >= reg_B) +- --> slt reg_R, reg_A, reg_B +- xori reg_R, reg_R, const_int_1 */ +- if (code == GE) +- { +- /* GE, use slts instruction */ +- emit_insn (gen_slts_compare (operands[0], +- operands[2], operands[3])); +- } +- else +- { +- /* GEU, use slt instruction */ +- emit_insn (gen_slt_compare (operands[0], +- operands[2], operands[3])); +- } +- +- /* perform 'not' behavior */ +- emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx)); +- +- DONE; +- } +- +- case LT: +- case LTU: +- /* reg_R = (reg_A < reg_B) --> slt reg_R, reg_A, reg_B */ +- /* reg_R = (reg_A < const_int_B) --> slt reg_R, reg_A, const_int_B */ +- if (code == LT) +- { +- /* LT, use slts instruction */ +- emit_insn (gen_slts_compare (operands[0], operands[2], operands[3])); +- } +- else +- { +- /* LTU, use slt instruction */ +- emit_insn (gen_slt_compare (operands[0], operands[2], operands[3])); +- } +- +- DONE; +- +- case LE: +- case LEU: +- if (GET_CODE (operands[3]) == CONST_INT) +- { +- /* reg_R = (reg_A <= const_int_B) +- --> movi reg_C, const_int_B + 1 +- slt reg_R, reg_A, reg_C */ +- tmp_reg = gen_reg_rtx (SImode); +- +- emit_insn (gen_movsi (tmp_reg, +- gen_int_mode (INTVAL (operands[3]) + 1, +- SImode))); +- if (code == LE) +- { +- /* LE, use slts instruction */ +- emit_insn (gen_slts_compare (operands[0], operands[2], tmp_reg)); +- } +- else +- { +- /* LEU, use slt instruction */ +- emit_insn (gen_slt_compare (operands[0], operands[2], tmp_reg)); +- } +- +- DONE; +- } +- else +- { +- /* reg_R = (reg_A <= reg_B) --> slt reg_R, reg_B, reg_A +- xori reg_R, reg_R, const_int_1 */ +- if (code == LE) +- { +- /* LE, use slts instruction */ +- emit_insn (gen_slts_compare (operands[0], +- operands[3], operands[2])); +- } +- else +- { +- /* LEU, use slt instruction */ +- emit_insn (gen_slt_compare (operands[0], +- operands[3], operands[2])); +- } +- +- /* perform 'not' behavior */ +- emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx)); +- +- DONE; +- } +- +- ++ break; ++ case EXPAND_FAIL: ++ FAIL; ++ break; ++ case EXPAND_CREATE_TEMPLATE: ++ break; + default: + gcc_unreachable (); + } + }) + + +-(define_insn "slts_compare" +- [(set (match_operand:SI 0 "register_operand" "=t, t, r, r") +- (lt:SI (match_operand:SI 1 "nonmemory_operand" " d, d, r, r") +- (match_operand:SI 2 "nonmemory_operand" " r, Iu05, r, Is15")))] ++(define_expand "slts_compare" ++ [(set (match_operand:SI 0 "register_operand" "") ++ (lt:SI (match_operand:SI 1 "general_operand" "") ++ (match_operand:SI 2 "general_operand" "")))] ++ "" ++{ ++ if (!REG_P (operands[1])) ++ operands[1] = force_reg (SImode, operands[1]); ++ ++ if (!REG_P (operands[2]) && !satisfies_constraint_Is15 (operands[2])) ++ operands[2] = force_reg (SImode, operands[2]); ++}) ++ ++(define_insn "slts_compare_impl" ++ [(set (match_operand:SI 0 "register_operand" "=t, t, r, r") ++ (lt:SI (match_operand:SI 1 "register_operand" " d, d, r, r") ++ (match_operand:SI 2 "nds32_rimm15s_operand" " r,Iu05, r, Is15")))] + "" + "@ + slts45\t%1, %2 + sltsi45\t%1, %2 + slts\t%0, %1, %2 + sltsi\t%0, %1, %2" +- [(set_attr "type" "compare,compare,compare,compare") +- (set_attr "length" " 2, 2, 4, 4")]) ++ [(set_attr "type" "alu, alu, alu, alu") ++ (set_attr "length" " 2, 2, 4, 4")]) ++ ++(define_insn "slt_eq0" ++ [(set (match_operand:SI 0 "register_operand" "=t, r") ++ (eq:SI (match_operand:SI 1 "register_operand" " d, r") ++ (const_int 0)))] ++ "" ++ "@ ++ slti45\t%1, 1 ++ slti\t%0, %1, 1" ++ [(set_attr "type" "alu, alu") ++ (set_attr "length" " 2, 4")]) ++ ++(define_expand "slt_compare" ++ [(set (match_operand:SI 0 "register_operand" "") ++ (ltu:SI (match_operand:SI 1 "general_operand" "") ++ (match_operand:SI 2 "general_operand" "")))] ++ "" ++{ ++ if (!REG_P (operands[1])) ++ operands[1] = force_reg (SImode, operands[1]); + +-(define_insn "slt_compare" +- [(set (match_operand:SI 0 "register_operand" "=t, t, r, r") +- (ltu:SI (match_operand:SI 1 "nonmemory_operand" " d, d, r, r") +- (match_operand:SI 2 "nonmemory_operand" " r, Iu05, r, Is15")))] ++ if (!REG_P (operands[2]) && !satisfies_constraint_Is15 (operands[2])) ++ operands[2] = force_reg (SImode, operands[2]); ++}) ++ ++(define_insn "slt_compare_impl" ++ [(set (match_operand:SI 0 "register_operand" "=t, t, r, r") ++ (ltu:SI (match_operand:SI 1 "register_operand" " d, d, r, r") ++ (match_operand:SI 2 "nds32_rimm15s_operand" " r,Iu05, r, Is15")))] + "" + "@ + slt45\t%1, %2 + slti45\t%1, %2 + slt\t%0, %1, %2 + slti\t%0, %1, %2" +- [(set_attr "type" "compare,compare,compare,compare") +- (set_attr "length" " 2, 2, 4, 4")]) +- ++ [(set_attr "type" "alu, alu, alu, alu") ++ (set_attr "length" " 2, 2, 4, 4")]) + + ;; ---------------------------------------------------------------------------- + +@@ -1874,12 +1469,14 @@ create_template: + } + } + [(set_attr "type" "branch") +- (set_attr "enabled" "1") ++ (set_attr "enabled" "yes") + (set (attr "length") +- (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250)) +- (le (minus (match_dup 0) (pc)) (const_int 250))) +- (if_then_else (match_test "TARGET_16_BIT") +- (const_int 2) ++ (if_then_else (match_test "!CROSSING_JUMP_P (insn)") ++ (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250)) ++ (le (minus (match_dup 0) (pc)) (const_int 250))) ++ (if_then_else (match_test "TARGET_16_BIT") ++ (const_int 2) ++ (const_int 4)) + (const_int 4)) + (const_int 4)))]) + +@@ -1887,14 +1484,27 @@ create_template: + [(set (pc) (match_operand:SI 0 "register_operand" "r, r"))] + "" + "@ +- jr5\t%0 +- jr\t%0" ++ jr5\t%0 ++ jr\t%0" + [(set_attr "type" "branch,branch") + (set_attr "length" " 2, 4")]) + ++(define_insn "*cond_indirect_jump" ++ [(cond_exec (ne (match_operand:SI 0 "register_operand" "r") ++ (const_int 0)) ++ (set (pc) (match_operand:SI 1 "register_operand" "0")))] ++ "" ++ "jrnez\t%0" ++ [(set_attr "type" "branch") ++ (set_attr "length" "4")]) ++ ++;; ---------------------------------------------------------------------------- ++ ++;; Normal call patterns. ++ + ;; Subroutine call instruction returning no value. + ;; operands[0]: It should be a mem RTX whose address is +-;; the address of the function. ++;; the the address of the function. + ;; operands[1]: It is the number of bytes of arguments pushed as a const_int. + ;; operands[2]: It is the number of registers used as operands. + +@@ -1904,39 +1514,114 @@ create_template: + (clobber (reg:SI LP_REGNUM)) + (clobber (reg:SI TA_REGNUM))])] + "" +- "" ++ { ++ rtx insn; ++ rtx sym = XEXP (operands[0], 0); ++ ++ if (TARGET_ICT_MODEL_LARGE ++ && nds32_indirect_call_referenced_p (sym)) ++ { ++ rtx reg = gen_reg_rtx (Pmode); ++ emit_move_insn (reg, sym); ++ operands[0] = gen_const_mem (Pmode, reg); ++ } ++ ++ if (flag_pic) ++ { ++ insn = emit_call_insn (gen_call_internal ++ (XEXP (operands[0], 0), GEN_INT (0))); ++ use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx); ++ DONE; ++ } ++ } + ) + +-(define_insn "*call_register" +- [(parallel [(call (mem (match_operand:SI 0 "register_operand" "r, r")) +- (match_operand 1)) +- (clobber (reg:SI LP_REGNUM)) +- (clobber (reg:SI TA_REGNUM))])] +- "" +- "@ +- jral5\t%0 +- jral\t%0" +- [(set_attr "type" "branch,branch") +- (set_attr "length" " 2, 4")]) +- +-(define_insn "*call_immediate" +- [(parallel [(call (mem (match_operand:SI 0 "immediate_operand" "i")) ++(define_insn "call_internal" ++ [(parallel [(call (mem (match_operand:SI 0 "nds32_call_address_operand" "r, i")) + (match_operand 1)) + (clobber (reg:SI LP_REGNUM)) + (clobber (reg:SI TA_REGNUM))])] + "" + { +- if (TARGET_CMODEL_LARGE) +- return "bal\t%0"; +- else +- return "jal\t%0"; ++ rtx_insn *next_insn = next_active_insn (insn); ++ bool align_p = (!(next_insn && get_attr_length (next_insn) == 2)) ++ && NDS32_ALIGN_P (); ++ switch (which_alternative) ++ { ++ case 0: ++ if (TARGET_16_BIT) ++ { ++ if (align_p) ++ return "jral5\t%0\;.align 2"; ++ else ++ return "jral5\t%0"; ++ } ++ else ++ { ++ if (align_p) ++ return "jral\t%0\;.align 2"; ++ else ++ return "jral\t%0"; ++ } ++ case 1: ++ return nds32_output_call (insn, operands, operands[0], ++ "bal\t%0", "jal\t%0", align_p); ++ default: ++ gcc_unreachable (); ++ } + } +- [(set_attr "type" "branch") +- (set (attr "length") +- (if_then_else (match_test "TARGET_CMODEL_LARGE") +- (const_int 12) +- (const_int 4)))]) ++ [(set_attr "enabled" "yes") ++ (set_attr "type" "branch") ++ (set_attr_alternative "length" ++ [ ++ ;; Alternative 0 ++ (if_then_else (match_test "TARGET_16_BIT") ++ (const_int 2) ++ (const_int 4)) ++ ;; Alternative 1 ++ (if_then_else (match_test "flag_pic") ++ (const_int 16) ++ (if_then_else (match_test "nds32_long_call_p (operands[0])") ++ (const_int 12) ++ (const_int 4))) ++ ])] ++) + ++(define_insn "*cond_call_register" ++ [(cond_exec (ne (match_operand:SI 0 "register_operand" "r") ++ (const_int 0)) ++ (parallel [(call (mem (match_operand:SI 1 "register_operand" "0")) ++ (match_operand 2)) ++ (clobber (reg:SI LP_REGNUM)) ++ (clobber (reg:SI TA_REGNUM))]))] ++ "TARGET_ISA_V3" ++ "jralnez\t%0" ++ [(set_attr "type" "branch") ++ (set_attr "length" "4")]) ++ ++(define_insn "*cond_call_immediate" ++ [(cond_exec (match_operator 0 "nds32_conditional_call_comparison_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (const_int 0)]) ++ (parallel [(call (mem (match_operand:SI 2 "nds32_symbolic_operand" "i")) ++ (match_operand 3)) ++ (clobber (reg:SI LP_REGNUM)) ++ (clobber (reg:SI TA_REGNUM))]))] ++ "!flag_pic && !TARGET_CMODEL_LARGE ++ && nds32_indirect_call_referenced_p (operands[2])" ++{ ++ switch (GET_CODE (operands[0])) ++ { ++ case LT: ++ return "bltzal\t%1, %2"; ++ case GE: ++ return "bgezal\t%1, %2"; ++ default: ++ gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "branch") ++ (set_attr "length" "4")]) + + ;; Subroutine call instruction returning a value. + ;; operands[0]: It is the hard regiser in which the value is returned. +@@ -1951,49 +1636,152 @@ create_template: + (clobber (reg:SI LP_REGNUM)) + (clobber (reg:SI TA_REGNUM))])] + "" +- "" ++ { ++ rtx insn; ++ rtx sym = XEXP (operands[1], 0); ++ ++ if (TARGET_ICT_MODEL_LARGE ++ && nds32_indirect_call_referenced_p (sym)) ++ { ++ rtx reg = gen_reg_rtx (Pmode); ++ emit_move_insn (reg, sym); ++ operands[1] = gen_const_mem (Pmode, reg); ++ } ++ ++ if (flag_pic) ++ { ++ insn = ++ emit_call_insn (gen_call_value_internal ++ (operands[0], XEXP (operands[1], 0), GEN_INT (0))); ++ use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx); ++ DONE; ++ } ++ } + ) + +-(define_insn "*call_value_register" ++(define_insn "call_value_internal" + [(parallel [(set (match_operand 0) +- (call (mem (match_operand:SI 1 "register_operand" "r, r")) ++ (call (mem (match_operand:SI 1 "nds32_call_address_operand" "r, i")) + (match_operand 2))) + (clobber (reg:SI LP_REGNUM)) + (clobber (reg:SI TA_REGNUM))])] + "" +- "@ +- jral5\t%1 +- jral\t%1" +- [(set_attr "type" "branch,branch") +- (set_attr "length" " 2, 4")]) +- +-(define_insn "*call_value_immediate" +- [(parallel [(set (match_operand 0) +- (call (mem (match_operand:SI 1 "immediate_operand" "i")) +- (match_operand 2))) +- (clobber (reg:SI LP_REGNUM)) +- (clobber (reg:SI TA_REGNUM))])] +- "" + { +- if (TARGET_CMODEL_LARGE) +- return "bal\t%1"; +- else +- return "jal\t%1"; ++ rtx_insn *next_insn = next_active_insn (insn); ++ bool align_p = (!(next_insn && get_attr_length (next_insn) == 2)) ++ && NDS32_ALIGN_P (); ++ switch (which_alternative) ++ { ++ case 0: ++ if (TARGET_16_BIT) ++ { ++ if (align_p) ++ return "jral5\t%1\;.align 2"; ++ else ++ return "jral5\t%1"; ++ } ++ else ++ { ++ if (align_p) ++ return "jral\t%1\;.align 2"; ++ else ++ return "jral\t%1"; ++ } ++ case 1: ++ return nds32_output_call (insn, operands, operands[1], ++ "bal\t%1", "jal\t%1", align_p); ++ default: ++ gcc_unreachable (); ++ } ++} ++ [(set_attr "enabled" "yes") ++ (set_attr "type" "branch") ++ (set_attr_alternative "length" ++ [ ++ ;; Alternative 0 ++ (if_then_else (match_test "TARGET_16_BIT") ++ (const_int 2) ++ (const_int 4)) ++ ;; Alternative 1 ++ (if_then_else (match_test "flag_pic") ++ (const_int 16) ++ (if_then_else (match_test "nds32_long_call_p (operands[1])") ++ (const_int 12) ++ (const_int 4))) ++ ])] ++) ++ ++(define_insn "*cond_call_value_register" ++ [(cond_exec (ne (match_operand:SI 0 "register_operand" "r") ++ (const_int 0)) ++ (parallel [(set (match_operand 1) ++ (call (mem (match_operand:SI 2 "register_operand" "0")) ++ (match_operand 3))) ++ (clobber (reg:SI LP_REGNUM)) ++ (clobber (reg:SI TA_REGNUM))]))] ++ "TARGET_ISA_V3" ++ "jralnez\t%0" ++ [(set_attr "type" "branch") ++ (set_attr "length" "4")]) ++ ++(define_insn "*cond_call_value_immediate" ++ [(cond_exec (match_operator 0 "nds32_conditional_call_comparison_operator" ++ [(match_operand:SI 1 "register_operand" "r") ++ (const_int 0)]) ++ (parallel [(set (match_operand 2) ++ (call (mem (match_operand:SI 3 "nds32_symbolic_operand" "i")) ++ (match_operand 4))) ++ (clobber (reg:SI LP_REGNUM)) ++ (clobber (reg:SI TA_REGNUM))]))] ++ "!flag_pic && !TARGET_CMODEL_LARGE ++ && nds32_indirect_call_referenced_p (operands[3])" ++{ ++ switch (GET_CODE (operands[0])) ++ { ++ case LT: ++ return "bltzal\t%1, %3"; ++ case GE: ++ return "bgezal\t%1, %3"; ++ default: ++ gcc_unreachable (); ++ } + } + [(set_attr "type" "branch") +- (set (attr "length") +- (if_then_else (match_test "TARGET_CMODEL_LARGE") +- (const_int 12) +- (const_int 4)))]) ++ (set_attr "length" "4")]) ++ ++;; Call subroutine returning any type. ++ ++(define_expand "untyped_call" ++ [(parallel [(call (match_operand 0 "" "") ++ (const_int 0)) ++ (match_operand 1 "" "") ++ (match_operand 2 "" "")])] ++ "" ++{ ++ int i; ++ ++ emit_call_insn (gen_call (operands[0], const0_rtx)); ++ ++ for (i = 0; i < XVECLEN (operands[2], 0); i++) ++ { ++ rtx set = XVECEXP (operands[2], 0, i); ++ emit_move_insn (SET_DEST (set), SET_SRC (set)); ++ } + ++ /* The optimizer does not know that the call sets the function value ++ registers we stored in the result block. We avoid problems by ++ claiming that all hard registers are used and clobbered at this ++ point. */ ++ emit_insn (gen_blockage ()); ++ DONE; ++}) + + ;; ---------------------------------------------------------------------------- + + ;; The sibcall patterns. + + ;; sibcall +-;; sibcall_register +-;; sibcall_immediate ++;; sibcall_internal + + (define_expand "sibcall" + [(parallel [(call (match_operand 0 "memory_operand" "") +@@ -2001,41 +1789,60 @@ create_template: + (clobber (reg:SI TA_REGNUM)) + (return)])] + "" +- "" +-) +- +-(define_insn "*sibcall_register" +- [(parallel [(call (mem (match_operand:SI 0 "register_operand" "r, r")) +- (match_operand 1)) +- (clobber (reg:SI TA_REGNUM)) +- (return)])] +- "" +- "@ +- jr5\t%0 +- jr\t%0" +- [(set_attr "type" "branch,branch") +- (set_attr "length" " 2, 4")]) ++{ ++ rtx sym = XEXP (operands[0], 0); ++ ++ if (TARGET_ICT_MODEL_LARGE ++ && nds32_indirect_call_referenced_p (sym)) ++ { ++ rtx reg = gen_reg_rtx (Pmode); ++ emit_move_insn (reg, sym); ++ operands[0] = gen_const_mem (Pmode, reg); ++ } ++}) + +-(define_insn "*sibcall_immediate" +- [(parallel [(call (mem (match_operand:SI 0 "immediate_operand" "i")) ++(define_insn "sibcall_internal" ++ [(parallel [(call (mem (match_operand:SI 0 "nds32_call_address_operand" "r, i")) + (match_operand 1)) + (clobber (reg:SI TA_REGNUM)) + (return)])] + "" + { +- if (TARGET_CMODEL_LARGE) +- return "b\t%0"; +- else +- return "j\t%0"; ++ switch (which_alternative) ++ { ++ case 0: ++ if (TARGET_16_BIT) ++ return "jr5\t%0"; ++ else ++ return "jr\t%0"; ++ case 1: ++ if (nds32_long_call_p (operands[0])) ++ return "b\t%0"; ++ else ++ return "j\t%0"; ++ default: ++ gcc_unreachable (); ++ } + } +- [(set_attr "type" "branch") +- (set (attr "length") +- (if_then_else (match_test "TARGET_CMODEL_LARGE") +- (const_int 12) +- (const_int 4)))]) ++ [(set_attr "enabled" "yes") ++ (set_attr "type" "branch") ++ (set_attr_alternative "length" ++ [ ++ ;; Alternative 0 ++ (if_then_else (match_test "TARGET_16_BIT") ++ (const_int 2) ++ (const_int 4)) ++ ;; Alternative 1 ++ (if_then_else (match_test "flag_pic") ++ (const_int 16) ++ (if_then_else (match_test "nds32_long_call_p (operands[0])") ++ (const_int 12) ++ (const_int 4))) ++ ])] ++) + + ;; sibcall_value +-;; sibcall_value_register ++;; sibcall_value_internal + ;; sibcall_value_immediate + + (define_expand "sibcall_value" +@@ -2045,73 +1852,106 @@ create_template: + (clobber (reg:SI TA_REGNUM)) + (return)])] + "" +- "" +-) +- +-(define_insn "*sibcall_value_register" +- [(parallel [(set (match_operand 0) +- (call (mem (match_operand:SI 1 "register_operand" "r, r")) +- (match_operand 2))) +- (clobber (reg:SI TA_REGNUM)) +- (return)])] +- "" +- "@ +- jr5\t%1 +- jr\t%1" +- [(set_attr "type" "branch,branch") +- (set_attr "length" " 2, 4")]) ++{ ++ rtx sym = XEXP (operands[1], 0); ++ ++ if (TARGET_ICT_MODEL_LARGE ++ && nds32_indirect_call_referenced_p (sym)) ++ { ++ rtx reg = gen_reg_rtx (Pmode); ++ emit_move_insn (reg, sym); ++ operands[1] = gen_const_mem (Pmode, reg); ++ } ++}) + +-(define_insn "*sibcall_value_immediate" ++(define_insn "sibcall_value_internal" + [(parallel [(set (match_operand 0) +- (call (mem (match_operand:SI 1 "immediate_operand" "i")) ++ (call (mem (match_operand:SI 1 "nds32_call_address_operand" "r, i")) + (match_operand 2))) + (clobber (reg:SI TA_REGNUM)) + (return)])] + "" + { +- if (TARGET_CMODEL_LARGE) +- return "b\t%1"; +- else +- return "j\t%1"; ++ switch (which_alternative) ++ { ++ case 0: ++ if (TARGET_16_BIT) ++ return "jr5\t%1"; ++ else ++ return "jr\t%1"; ++ case 1: ++ if (nds32_long_call_p (operands[1])) ++ return "b\t%1"; ++ else ++ return "j\t%1"; ++ default: ++ gcc_unreachable (); ++ } + } +- [(set_attr "type" "branch") +- (set (attr "length") +- (if_then_else (match_test "TARGET_CMODEL_LARGE") +- (const_int 12) +- (const_int 4)))]) +- ++ [(set_attr "enabled" "yes") ++ (set_attr "type" "branch") ++ (set_attr_alternative "length" ++ [ ++ ;; Alternative 0 ++ (if_then_else (match_test "TARGET_16_BIT") ++ (const_int 2) ++ (const_int 4)) ++ ;; Alternative 1 ++ (if_then_else (match_test "flag_pic") ++ (const_int 16) ++ (if_then_else (match_test "nds32_long_call_p (operands[1])") ++ (const_int 12) ++ (const_int 4))) ++ ])] ++) + + ;; ---------------------------------------------------------------------------- + +-;; prologue and epilogue. ++;; The prologue and epilogue. + + (define_expand "prologue" [(const_int 0)] + "" + { + /* Note that only under V3/V3M ISA, we could use v3push prologue. +- In addition, we do not want to use v3push for isr function +- and variadic function. */ +- if (TARGET_V3PUSH +- && !nds32_isr_function_p (current_function_decl) +- && (cfun->machine->va_args_size == 0)) ++ In addition, we need to check if v3push is indeed available. */ ++ if (NDS32_V3PUSH_AVAILABLE_P) + nds32_expand_prologue_v3push (); + else + nds32_expand_prologue (); ++ ++ /* If cfun->machine->fp_as_gp_p is true, we can generate special ++ directive to guide linker doing fp-as-gp optimization. ++ However, for a naked function, which means ++ it should not have prologue/epilogue, ++ using fp-as-gp still requires saving $fp by push/pop behavior and ++ there is no benefit to use fp-as-gp on such small function. ++ So we need to make sure this function is NOT naked as well. */ ++ if (cfun->machine->fp_as_gp_p && !cfun->machine->naked_p) ++ emit_insn (gen_omit_fp_begin (gen_rtx_REG (SImode, FP_REGNUM))); ++ + DONE; + }) + + (define_expand "epilogue" [(const_int 0)] + "" + { ++ /* If cfun->machine->fp_as_gp_p is true, we can generate special ++ directive to guide linker doing fp-as-gp optimization. ++ However, for a naked function, which means ++ it should not have prologue/epilogue, ++ using fp-as-gp still requires saving $fp by push/pop behavior and ++ there is no benefit to use fp-as-gp on such small function. ++ So we need to make sure this function is NOT naked as well. */ ++ if (cfun->machine->fp_as_gp_p && !cfun->machine->naked_p) ++ emit_insn (gen_omit_fp_end (gen_rtx_REG (SImode, FP_REGNUM))); ++ + /* Note that only under V3/V3M ISA, we could use v3pop epilogue. +- In addition, we do not want to use v3pop for isr function +- and variadic function. */ +- if (TARGET_V3PUSH +- && !nds32_isr_function_p (current_function_decl) +- && (cfun->machine->va_args_size == 0)) ++ In addition, we need to check if v3push is indeed available. */ ++ if (NDS32_V3PUSH_AVAILABLE_P) + nds32_expand_epilogue_v3pop (false); + else + nds32_expand_epilogue (false); ++ + DONE; + }) + +@@ -2121,15 +1961,11 @@ create_template: + /* Pass true to indicate that this is sibcall epilogue and + exit from a function without the final branch back to the + calling function. */ +- if (TARGET_V3PUSH && !nds32_isr_function_p (current_function_decl)) +- nds32_expand_epilogue_v3pop (true); +- else +- nds32_expand_epilogue (true); ++ nds32_expand_epilogue (true); + + DONE; + }) + +- + ;; nop instruction. + + (define_insn "nop" +@@ -2142,7 +1978,7 @@ create_template: + return "nop"; + } + [(set_attr "type" "misc") +- (set_attr "enabled" "1") ++ (set_attr "enabled" "yes") + (set (attr "length") + (if_then_else (match_test "TARGET_16_BIT") + (const_int 2) +@@ -2166,12 +2002,11 @@ create_template: + { + return nds32_output_stack_push (operands[0]); + } +- [(set_attr "type" "misc") +- (set_attr "enabled" "1") ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "12") ++ (set_attr "enabled" "yes") + (set (attr "length") +- (if_then_else (match_test "TARGET_V3PUSH +- && !nds32_isr_function_p (cfun->decl) +- && (cfun->machine->va_args_size == 0)") ++ (if_then_else (match_test "NDS32_V3PUSH_AVAILABLE_P") + (const_int 2) + (const_int 4)))]) + +@@ -2188,12 +2023,11 @@ create_template: + { + return nds32_output_stack_pop (operands[0]); + } +- [(set_attr "type" "misc") +- (set_attr "enabled" "1") ++ [(set_attr "type" "load_multiple") ++ (set_attr "combo" "12") ++ (set_attr "enabled" "yes") + (set (attr "length") +- (if_then_else (match_test "TARGET_V3PUSH +- && !nds32_isr_function_p (cfun->decl) +- && (cfun->machine->va_args_size == 0)") ++ (if_then_else (match_test "NDS32_V3PUSH_AVAILABLE_P") + (const_int 2) + (const_int 4)))]) + +@@ -2205,34 +2039,64 @@ create_template: + ;; Use this pattern to expand a return instruction + ;; with simple_return rtx if no epilogue is required. + (define_expand "return" +- [(simple_return)] ++ [(parallel [(return) ++ (clobber (reg:SI FP_REGNUM))])] + "nds32_can_use_return_insn ()" +- "" +-) ++{ ++ /* Emit as the simple return. */ ++ if (!cfun->machine->fp_as_gp_p ++ && cfun->machine->naked_p ++ && (cfun->machine->va_args_size == 0)) ++ { ++ emit_jump_insn (gen_return_internal ()); ++ DONE; ++ } ++}) + + ;; This pattern is expanded only by the shrink-wrapping optimization + ;; on paths where the function prologue has not been executed. ++;; However, such optimization may reorder the prologue/epilogue blocks ++;; together with basic blocks within function body. ++;; So we must disable this pattern if we have already decided ++;; to perform fp_as_gp optimization, which requires prologue to be ++;; first block and epilogue to be last block. + (define_expand "simple_return" + [(simple_return)] +- "" ++ "!cfun->machine->fp_as_gp_p" + "" + ) + ++(define_insn "*nds32_return" ++ [(parallel [(return) ++ (clobber (reg:SI FP_REGNUM))])] ++ "" ++{ ++ return nds32_output_return (); ++} ++ [(set_attr "type" "branch") ++ (set_attr "enabled" "yes") ++ (set_attr "length" "4")]) ++ + (define_insn "return_internal" + [(simple_return)] + "" + { ++ if (nds32_isr_function_critical_p (current_function_decl)) ++ return "iret"; ++ + if (TARGET_16_BIT) + return "ret5"; + else + return "ret"; + } + [(set_attr "type" "branch") +- (set_attr "enabled" "1") ++ (set_attr "enabled" "yes") + (set (attr "length") +- (if_then_else (match_test "TARGET_16_BIT") +- (const_int 2) +- (const_int 4)))]) ++ (if_then_else (match_test "nds32_isr_function_critical_p (current_function_decl)") ++ (const_int 4) ++ (if_then_else (match_test "TARGET_16_BIT") ++ (const_int 2) ++ (const_int 4))))]) + + + ;; ---------------------------------------------------------------------------- +@@ -2267,6 +2131,7 @@ create_template: + { + rtx add_tmp; + rtx reg, test; ++ rtx tmp_reg; + + /* Step A: "k <-- (plus (operands[0]) (-operands[1]))". */ + if (operands[1] != const0_rtx) +@@ -2275,8 +2140,8 @@ create_template: + add_tmp = gen_int_mode (-INTVAL (operands[1]), SImode); + + /* If the integer value is not in the range of imm15s, +- we need to force register first because our addsi3 pattern +- only accept nds32_rimm15s_operand predicate. */ ++ we need to force register first because our addsi3 pattern ++ only accept nds32_rimm15s_operand predicate. */ + add_tmp = force_reg (SImode, add_tmp); + + emit_insn (gen_addsi3 (reg, operands[0], add_tmp)); +@@ -2288,11 +2153,14 @@ create_template: + emit_jump_insn (gen_cbranchsi4 (test, operands[0], operands[2], + operands[4])); + +- operands[5] = gen_reg_rtx (SImode); +- /* Step C, D, E, and F, using another temporary register operands[5]. */ ++ tmp_reg = gen_reg_rtx (SImode); ++ /* Step C, D, E, and F, using another temporary register tmp_reg. */ ++ if (flag_pic) ++ emit_use (pic_offset_table_rtx); ++ + emit_jump_insn (gen_casesi_internal (operands[0], + operands[3], +- operands[5])); ++ tmp_reg)); + DONE; + }) + +@@ -2328,17 +2196,34 @@ create_template: + else + return nds32_output_casesi (operands); + } +- [(set_attr "length" "20") +- (set_attr "type" "alu")]) ++ [(set_attr "type" "branch") ++ (set (attr "length") ++ (if_then_else (match_test "flag_pic") ++ (const_int 28) ++ (const_int 20)))]) + + ;; ---------------------------------------------------------------------------- + + ;; Performance Extension + ++; If -fwrapv option is issued, GCC expects there will be ++; signed overflow situation. So the ABS(INT_MIN) is still INT_MIN ++; (e.g. ABS(0x80000000)=0x80000000). ++; However, the hardware ABS instruction of nds32 target ++; always performs saturation: abs 0x80000000 -> 0x7fffffff. ++; So that we can only enable abssi2 pattern if flag_wrapv is NOT presented. ++(define_insn "abssi2" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (abs:SI (match_operand:SI 1 "register_operand" " r")))] ++ "TARGET_EXT_PERF && TARGET_HW_ABS && !flag_wrapv" ++ "abs\t%0, %1" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ + (define_insn "clzsi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (clz:SI (match_operand:SI 1 "register_operand" " r")))] +- "TARGET_PERF_EXT" ++ "TARGET_EXT_PERF" + "clz\t%0, %1" + [(set_attr "type" "alu") + (set_attr "length" "4")]) +@@ -2347,34 +2232,212 @@ create_template: + [(set (match_operand:SI 0 "register_operand" "=r") + (smax:SI (match_operand:SI 1 "register_operand" " r") + (match_operand:SI 2 "register_operand" " r")))] +- "TARGET_PERF_EXT" ++ "TARGET_EXT_PERF" + "max\t%0, %1, %2" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + ++(define_expand "uminqi3" ++ [(set (match_operand:QI 0 "register_operand" "") ++ (umin:QI (match_operand:QI 1 "register_operand" "") ++ (match_operand:QI 2 "register_operand" "")))] ++ "TARGET_EXT_PERF" ++{ ++ rtx tmpop[3]; ++ tmpop[0] = gen_reg_rtx (SImode); ++ tmpop[1] = gen_reg_rtx (SImode); ++ tmpop[2] = gen_reg_rtx (SImode); ++ ++ emit_insn (gen_zero_extendqisi2 (tmpop[1], operands[1])); ++ emit_insn (gen_zero_extendqisi2 (tmpop[2], operands[2])); ++ emit_insn (gen_sminsi3 (tmpop[0], tmpop[1], tmpop[2])); ++ convert_move (operands[0], tmpop[0], false); ++ DONE; ++}) ++ ++(define_expand "sminqi3" ++ [(set (match_operand:QI 0 "register_operand" "") ++ (smin:QI (match_operand:QI 1 "register_operand" "") ++ (match_operand:QI 2 "register_operand" "")))] ++ "TARGET_EXT_PERF" ++{ ++ rtx tmpop[3]; ++ tmpop[0] = gen_reg_rtx (SImode); ++ tmpop[1] = gen_reg_rtx (SImode); ++ tmpop[2] = gen_reg_rtx (SImode); ++ ++ emit_insn (gen_extendqisi2 (tmpop[1], operands[1])); ++ emit_insn (gen_extendqisi2 (tmpop[2], operands[2])); ++ emit_insn (gen_sminsi3 (tmpop[0], tmpop[1], tmpop[2])); ++ convert_move (operands[0], tmpop[0], false); ++ DONE; ++}) ++ ++(define_expand "uminhi3" ++ [(set (match_operand:HI 0 "register_operand" "") ++ (umin:HI (match_operand:HI 1 "register_operand" "") ++ (match_operand:HI 2 "register_operand" "")))] ++ "TARGET_EXT_PERF" ++{ ++ rtx tmpop[3]; ++ tmpop[0] = gen_reg_rtx (SImode); ++ tmpop[1] = gen_reg_rtx (SImode); ++ tmpop[2] = gen_reg_rtx (SImode); ++ ++ emit_insn (gen_zero_extendhisi2 (tmpop[1], operands[1])); ++ emit_insn (gen_zero_extendhisi2 (tmpop[2], operands[2])); ++ emit_insn (gen_sminsi3 (tmpop[0], tmpop[1], tmpop[2])); ++ convert_move (operands[0], tmpop[0], false); ++ DONE; ++}) ++ ++(define_expand "sminhi3" ++ [(set (match_operand:HI 0 "register_operand" "") ++ (smin:HI (match_operand:HI 1 "register_operand" "") ++ (match_operand:HI 2 "register_operand" "")))] ++ "TARGET_EXT_PERF" ++{ ++ rtx tmpop[3]; ++ tmpop[0] = gen_reg_rtx (SImode); ++ tmpop[1] = gen_reg_rtx (SImode); ++ tmpop[2] = gen_reg_rtx (SImode); ++ ++ emit_insn (gen_extendhisi2 (tmpop[1], operands[1])); ++ emit_insn (gen_extendhisi2 (tmpop[2], operands[2])); ++ emit_insn (gen_sminsi3 (tmpop[0], tmpop[1], tmpop[2])); ++ convert_move (operands[0], tmpop[0], false); ++ DONE; ++}) ++ + (define_insn "sminsi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (smin:SI (match_operand:SI 1 "register_operand" " r") + (match_operand:SI 2 "register_operand" " r")))] +- "TARGET_PERF_EXT" ++ "TARGET_EXT_PERF" + "min\t%0, %1, %2" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +-(define_insn "*btst" +- [(set (match_operand:SI 0 "register_operand" "= r") +- (zero_extract:SI (match_operand:SI 1 "register_operand" " r") ++(define_insn "btst" ++ [(set (match_operand:SI 0 "register_operand" "= r") ++ (zero_extract:SI (match_operand:SI 1 "register_operand" " r") + (const_int 1) +- (match_operand:SI 2 "immediate_operand" " Iu05")))] +- "TARGET_PERF_EXT" ++ (match_operand:SI 2 "nds32_imm5u_operand" " Iu05")))] ++ "TARGET_EXT_PERF" + "btst\t%0, %1, %2" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + ++(define_insn "ave" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (truncate:SI ++ (ashiftrt:DI ++ (plus:DI ++ (plus:DI ++ (sign_extend:DI (match_operand:SI 1 "register_operand" "r")) ++ (sign_extend:DI (match_operand:SI 2 "register_operand" "r"))) ++ (const_int 1)) ++ (const_int 1))))] ++ "TARGET_EXT_PERF" ++ "ave\t%0, %1, %2" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ + ;; ---------------------------------------------------------------------------- + + ;; Pseudo NOPs + ++;; Structural hazards NOP ++(define_insn "nop_res_dep" ++ [(unspec [(match_operand 0 "const_int_operand" "i")] UNSPEC_VOLATILE_RES_DEP)] ++ "" ++ "! structural dependency (%0 cycles)" ++ [(set_attr "length" "0")] ++) ++ ++;; Data hazards NOP ++(define_insn "nop_data_dep" ++ [(unspec [(match_operand 0 "const_int_operand" "i")] UNSPEC_VOLATILE_DATA_DEP)] ++ "" ++ "! data dependency (%0 cycles)" ++ [(set_attr "length" "0")] ++) ++ ++(define_insn "relax_group" ++ [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "i")] UNSPEC_VOLATILE_RELAX_GROUP)] ++ "" ++ ".relax_hint %0" ++ [(set_attr "length" "0")] ++) ++ ++(define_insn "innermost_loop_begin" ++ [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_INNERMOST_LOOP_BEGIN)] ++ "" ++ ".innermost_loop_begin" ++ [(set_attr "length" "0")] ++) ++ ++(define_insn "innermost_loop_end" ++ [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_INNERMOST_LOOP_END)] ++ "" ++ ".innermost_loop_end" ++ [(set_attr "length" "0")] ++) ++ ++(define_insn "no_ifc_begin" ++ [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_NO_IFC_BEGIN)] ++ "" ++ ".no_ifc_begin" ++ [(set_attr "length" "0")] ++) ++ ++(define_insn "no_ifc_end" ++ [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_NO_IFC_END)] ++ "" ++ ".no_ifc_end" ++ [(set_attr "length" "0")] ++) ++ ++(define_insn "no_ex9_begin" ++ [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_NO_EX9_BEGIN)] ++ "" ++ ".no_ex9_begin" ++ [(set_attr "length" "0")] ++) ++ ++(define_insn "no_ex9_end" ++ [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_NO_EX9_END)] ++ "" ++ ".no_ex9_end" ++ [(set_attr "length" "0")] ++) ++ ++(define_insn "hwloop_last_insn" ++ [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_HWLOOP_LAST_INSN)] ++ "" ++ "" ++ [(set_attr "length" "0")] ++) ++ ++;; Output .omit_fp_begin for fp-as-gp optimization. ++;; Also we have to set $fp register. ++(define_insn "omit_fp_begin" ++ [(set (match_operand:SI 0 "register_operand" "=x") ++ (unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_OMIT_FP_BEGIN))] ++ "" ++ "! -----\;.omit_fp_begin\;la\t$fp,_FP_BASE_\;! -----" ++ [(set_attr "length" "8")] ++) ++ ++;; Output .omit_fp_end for fp-as-gp optimization. ++;; Claim that we have to use $fp register. ++(define_insn "omit_fp_end" ++ [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "x")] UNSPEC_VOLATILE_OMIT_FP_END)] ++ "" ++ "! -----\;.omit_fp_end\;! -----" ++ [(set_attr "length" "0")] ++) ++ + (define_insn "pop25return" + [(return) + (unspec_volatile:SI [(reg:SI LP_REGNUM)] UNSPEC_VOLATILE_POP25_RETURN)] +@@ -2383,4 +2446,262 @@ create_template: + [(set_attr "length" "0")] + ) + ++;; Add pc ++(define_insn "add_pc" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (plus:SI (match_operand:SI 1 "register_operand" "0") ++ (pc)))] ++ "TARGET_LINUX_ABI || flag_pic" ++ "add5.pc\t%0" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")] ++) ++ ++(define_expand "bswapsi2" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (bswap:SI (match_operand:SI 1 "register_operand" "r")))] ++ "" ++{ ++ emit_insn (gen_unspec_wsbh (operands[0], operands[1])); ++ emit_insn (gen_rotrsi3 (operands[0], operands[0], GEN_INT (16))); ++ DONE; ++}) ++ ++(define_insn "bswaphi2" ++ [(set (match_operand:HI 0 "register_operand" "=r") ++ (bswap:HI (match_operand:HI 1 "register_operand" "r")))] ++ "" ++ "wsbh\t%0, %1" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")] ++) ++ ++;; Hardware loop ++ ++; operand 0 is the loop count pseudo register ++; operand 1 is the label to jump to at the top of the loop ++(define_expand "doloop_end" ++ [(parallel [(set (pc) (if_then_else ++ (ne (match_operand:SI 0 "" "") ++ (const_int 1)) ++ (label_ref (match_operand 1 "" "")) ++ (pc))) ++ (set (match_dup 0) ++ (plus:SI (match_dup 0) ++ (const_int -1))) ++ (unspec [(const_int 0)] UNSPEC_LOOP_END) ++ (clobber (match_dup 2))])] ; match_scratch ++ "NDS32_HW_LOOP_P ()" ++{ ++ /* The loop optimizer doesn't check the predicates... */ ++ if (GET_MODE (operands[0]) != SImode) ++ FAIL; ++ operands[2] = gen_rtx_SCRATCH (SImode); ++}) ++ ++(define_insn "loop_end" ++ [(set (pc) ++ (if_then_else (ne (match_operand:SI 3 "nonimmediate_operand" "0, 0, *r, 0") ++ (const_int 1)) ++ (label_ref (match_operand 1 "" "")) ++ (pc))) ++ (set (match_operand:SI 0 "nonimmediate_operand" "=r, m, m, *f") ++ (plus:SI (match_dup 3) ++ (const_int -1))) ++ (unspec [(const_int 0)] UNSPEC_LOOP_END) ++ (clobber (match_scratch:SI 2 "=X, &r, &r, &r"))] ++ "NDS32_HW_LOOP_P ()" ++ "#" ++ [(set_attr "length" "12, 12, 12, 12")]) ++ ++(define_split ++ [(set (pc) ++ (if_then_else (ne (match_operand:SI 3 "nonimmediate_operand" "") ++ (const_int 1)) ++ (label_ref (match_operand 1 "" "")) ++ (pc))) ++ (set (match_operand:SI 0 "fpu_reg_or_memory_operand" "") ++ (plus:SI (match_dup 3) ++ (const_int -1))) ++ (unspec [(const_int 0)] UNSPEC_LOOP_END) ++ (clobber (match_scratch:SI 2 ""))] ++ "NDS32_HW_LOOP_P ()" ++ [(set (match_dup 2) (plus:SI (match_dup 3) (const_int -1))) ++ (set (match_dup 0) (match_dup 2)) ++ (set (pc) ++ (if_then_else (ne (match_dup 2) (const_int 0)) ++ (label_ref (match_dup 1)) ++ (pc)))] ++{ ++ if (fpu_reg_or_memory_operand (operands[3], SImode)) ++ { ++ emit_move_insn (operands[2], operands[3]); ++ operands[3] = operands[2]; ++ } ++}) ++ ++(define_insn "mtlbi_hint" ++ [(set (reg:SI LB_REGNUM) ++ (match_operand:SI 0 "nds32_label_operand" "i")) ++ (unspec [(match_operand 1 "const_int_operand" "i")] UNSPEC_LOOP_END)] ++ "NDS32_HW_LOOP_P ()" ++ "mtlbi\t%0" ++ [(set_attr "length" "4")]) ++ ++(define_insn "mtlbi" ++ [(set (reg:SI LB_REGNUM) ++ (match_operand:SI 0 "nds32_label_operand" "i"))] ++ "NDS32_HW_LOOP_P ()" ++ "mtlbi\t%0" ++ [(set_attr "length" "4")]) ++ ++(define_insn "mtlei" ++ [(set (reg:SI LE_REGNUM) ++ (match_operand:SI 0 "nds32_label_operand" "i"))] ++ "NDS32_HW_LOOP_P ()" ++ "mtlei\t%0" ++ [(set_attr "length" "4")]) ++ ++(define_insn "init_lc" ++ [(set (reg:SI LC_REGNUM) ++ (match_operand:SI 0 "register_operand" "r")) ++ (unspec [(match_operand 1 "const_int_operand" "i")] UNSPEC_LOOP_END)] ++ "NDS32_HW_LOOP_P ()" ++ "mtusr\t%0, LC" ++ [(set_attr "length" "4")]) ++ ++; After replace hwloop, use this is pattern to get right CFG ++(define_insn "hwloop_cfg" ++ [(set (pc) ++ (if_then_else (ne (reg:SI LC_REGNUM) ++ (const_int 1)) ++ (match_operand:SI 1 "nds32_label_operand" "i") ++ (pc))) ++ (set (reg:SI LC_REGNUM) ++ (plus:SI (reg:SI LC_REGNUM) ++ (const_int -1))) ++ (use (reg:SI LB_REGNUM)) ++ (use (reg:SI LE_REGNUM)) ++ (use (reg:SI LC_REGNUM)) ++ (unspec [(match_operand 0 "const_int_operand" "i")] UNSPEC_LOOP_END)] ++ "TARGET_HWLOOP" ++ "" ++ [(set_attr "length" "0")]) ++;; ---------------------------------------------------------------------------- ++ ++;; Patterns for exception handling ++ ++(define_expand "eh_return" ++ [(use (match_operand 0 "general_operand"))] ++ "" ++{ ++ emit_insn (gen_nds32_eh_return (operands[0])); ++ DONE; ++}) ++ ++(define_insn_and_split "nds32_eh_return" ++ [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_EH_RETURN)] ++ "" ++ "#" ++ "reload_completed" ++ [(const_int 0)] ++{ ++ rtx place; ++ rtx addr; ++ ++ /* The operands[0] is the handler address. We need to assign it ++ to return address rtx so that we can jump to exception handler ++ when returning from current function. */ ++ ++ if (cfun->machine->lp_size == 0) ++ { ++ /* If $lp is not saved in the stack frame, we can take $lp directly. */ ++ place = gen_rtx_REG (SImode, LP_REGNUM); ++ } ++ else ++ { ++ /* Otherwise, we need to locate the stack slot of return address. ++ The return address is generally saved in [$fp-4] location. ++ However, DSE (dead store elimination) does not detect an alias ++ between [$fp-x] and [$sp+y]. This can result in a store to save ++ $lp introduced by builtin_eh_return() being incorrectly deleted ++ if it is based on $fp. The solution we take here is to compute ++ the offset relative to stack pointer and then use $sp to access ++ location so that the alias can be detected. ++ FIXME: What if the immediate value "offset" is too large to be ++ fit in a single addi instruction? */ ++ HOST_WIDE_INT offset; ++ ++ offset = (cfun->machine->fp_size ++ + cfun->machine->gp_size ++ + cfun->machine->lp_size ++ + cfun->machine->callee_saved_gpr_regs_size ++ + cfun->machine->callee_saved_area_gpr_padding_bytes ++ + cfun->machine->callee_saved_fpr_regs_size ++ + cfun->machine->eh_return_data_regs_size ++ + cfun->machine->local_size ++ + cfun->machine->out_args_size); ++ ++ addr = plus_constant (Pmode, stack_pointer_rtx, offset - 4); ++ place = gen_frame_mem (SImode, addr); ++ } ++ ++ emit_move_insn (place, operands[0]); ++ DONE; ++}) ++ ++;; ---------------------------------------------------------------------------- ++ ++;; Patterns for TLS. ++;; The following two tls patterns don't be expanded directly because the ++;; intermediate value may be spilled into the stack. As a result, it is ++;; hard to analyze the define-use chain in the relax_opt pass. ++ ++ ++;; There is a unspec operand to record RELAX_GROUP number because each ++;; emitted instruction need a relax_hint above it. ++(define_insn "tls_desc" ++ [(set (reg:SI 0) ++ (call (unspec_volatile:SI [(match_operand:SI 0 "nds32_symbolic_operand" "i")] UNSPEC_TLS_DESC) ++ (const_int 1))) ++ (use (unspec [(match_operand:SI 1 "immediate_operand" "i")] UNSPEC_VOLATILE_RELAX_GROUP)) ++ (use (reg:SI GP_REGNUM)) ++ (clobber (reg:SI LP_REGNUM)) ++ (clobber (reg:SI TA_REGNUM))] ++ "" ++ { ++ return nds32_output_tls_desc (operands); ++ } ++ [(set_attr "length" "20") ++ (set_attr "type" "branch")] ++) ++ ++;; There is a unspec operand to record RELAX_GROUP number because each ++;; emitted instruction need a relax_hint above it. ++(define_insn "tls_ie" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(match_operand:SI 1 "nds32_symbolic_operand" "i")] UNSPEC_TLS_IE)) ++ (use (unspec [(match_operand:SI 2 "immediate_operand" "i")] UNSPEC_VOLATILE_RELAX_GROUP)) ++ (use (reg:SI GP_REGNUM))] ++ "" ++ { ++ return nds32_output_tls_ie (operands); ++ } ++ [(set (attr "length") (if_then_else (match_test "flag_pic") ++ (const_int 12) ++ (const_int 8))) ++ (set_attr "type" "misc")] ++) ++ ++;; The pattern is for some relaxation groups that have to keep addsi3 in 32-bit mode. ++(define_insn "addsi3_32bit" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(match_operand:SI 1 "register_operand" "%r") ++ (match_operand:SI 2 "register_operand" " r")] UNSPEC_ADD32))] ++ "" ++ "add\t%0, %1, %2"; ++ [(set_attr "type" "alu") ++ (set_attr "length" "4") ++ (set_attr "feature" "v1")]) ++ + ;; ---------------------------------------------------------------------------- +diff --git a/gcc/config/nds32/nds32.opt b/gcc/config/nds32/nds32.opt +index 938136f..a70ced9 100644 +--- a/gcc/config/nds32/nds32.opt ++++ b/gcc/config/nds32/nds32.opt +@@ -21,14 +21,67 @@ + HeaderInclude + config/nds32/nds32-opts.h + +-mbig-endian +-Target Report RejectNegative Negative(mlittle-endian) Mask(BIG_ENDIAN) ++; --------------------------------------------------------------- ++; The following options are designed for aliasing and compatibility options. ++ ++EB ++Target RejectNegative Alias(mbig-endian) + Generate code in big-endian mode. + +-mlittle-endian +-Target Report RejectNegative Negative(mbig-endian) InverseMask(BIG_ENDIAN) ++EL ++Target RejectNegative Alias(mlittle-endian) + Generate code in little-endian mode. + ++mfp-as-gp ++Target RejectNegative Alias(mforce-fp-as-gp) ++Force performing fp-as-gp optimization. ++ ++mno-fp-as-gp ++Target RejectNegative Alias(mforbid-fp-as-gp) ++Forbid performing fp-as-gp optimization. ++ ++m16bit ++Target Undocumented Alias(m16-bit) ++Generate 16-bit instructions. ++ ++mcrt-arg=yes ++Target Undocumented Alias(mcrt-arg) ++Obsolete option. Users SHOULD NOT use this option in the command line. ++ ++mreduce-regs ++Target Undocumented Alias(mreduced-regs) ++Use reduced-set registers for register allocation. ++ ++mcache-line-size= ++Target RejectNegative Joined UInteger Undocumented Alias(mcache-block-size=) ++Alias of -mcache-block-size= ++ ++; --------------------------------------------------------------- ++ ++mabi= ++Target RejectNegative Joined Enum(abi_type) Var(nds32_abi) Init(TARGET_DEFAULT_ABI) ++Specify which ABI type to generate code for: 2, 2fp+. ++ ++Enum ++Name(abi_type) Type(enum abi_type) ++Known ABIs (for use with the -mabi= option): ++ ++EnumValue ++Enum(abi_type) String(2) Value(NDS32_ABI_V2) ++ ++EnumValue ++Enum(abi_type) String(2fp+) Value(NDS32_ABI_V2_FP_PLUS) ++ ++mfloat-abi=soft ++Target RejectNegative Alias(mabi=, 2) ++Specify use soft floating point ABI which mean alias to -mabi=2. ++ ++mfloat-abi=hard ++Target RejectNegative Alias(mabi=, 2fp+) ++Specify use soft floating point ABI which mean alias to -mabi=2fp+. ++ ++; --------------------------------------------------------------- ++ + mreduced-regs + Target Report RejectNegative Negative(mfull-regs) Mask(REDUCED_REGS) + Use reduced-set registers for register allocation. +@@ -37,14 +90,148 @@ mfull-regs + Target Report RejectNegative Negative(mreduced-regs) InverseMask(REDUCED_REGS) + Use full-set registers for register allocation. + ++; --------------------------------------------------------------- ++ ++Os1 ++Target ++Optimize for size level 1. This option will disable IFC and EX9 to prevent performance drop. ++ ++Os2 ++Target ++Optimize for size level 2. This option will disable IFC and EX9 for innermost loop to prevent performance drop. ++ ++Os3 ++Target ++Optimize for size level 3 which mean don't care performance. ++ ++malways-align ++Target Mask(ALWAYS_ALIGN) ++Always align function entry, jump target and return address. ++ ++malign-functions ++Target Mask(ALIGN_FUNCTION) ++Align function entry to 4 byte. ++ ++mbig-endian ++Target Undocumented RejectNegative Negative(mlittle-endian) Mask(BIG_ENDIAN) ++Generate code in big-endian mode. ++ ++mlittle-endian ++Target Undocumented RejectNegative Negative(mbig-endian) InverseMask(BIG_ENDIAN) ++Generate code in little-endian mode. ++ ++mforce-fp-as-gp ++Target Undocumented Mask(FORCE_FP_AS_GP) ++Prevent $fp being allocated during register allocation so that compiler is able to force performing fp-as-gp optimization. ++ ++mforbid-fp-as-gp ++Target Undocumented Mask(FORBID_FP_AS_GP) ++Forbid using $fp to access static and global variables. This option strictly forbids fp-as-gp optimization regardless of '-mforce-fp-as-gp'. ++ ++minline-strcpy ++Target Undocumented Mask(INLINE_STRCPY) ++Inlining strcpy function. ++ ++mload-store-opt ++Target Mask(LOAD_STORE_OPT) ++Enable load store optimization. ++ ++mregrename ++Target Mask(REGRENAME_OPT) ++Enable target dependent register rename optimization. ++ ++mgcse ++Target Mask(GCSE_OPT) ++Enable target dependent global CSE optimization. ++ ++mconst-remater ++Target Var(flag_nds32_const_remater_opt) ++Enable target dependent constant remeterialization optimization. ++ ++msoft-fp-arith-comm ++Target Mask(SOFT_FP_ARITH_COMM) ++Enable operand commutative for soft floating point arithmetic optimization. ++ ++msign-conversion ++Target Var(flag_nds32_sign_conversion) ++Enable the sign conversion in Gimple level. ++ ++mscalbn-transform ++Target Var(flag_nds32_scalbn_transform) ++Enable the scalbn transform in Gimple level. ++ ++mlmwsmw-opt ++Target Var(flag_nds32_lmwsmw_opt) ++Enable the load/store multiple optimization. ++ ++mict-model= ++Target Undocumented RejectNegative Joined Enum(nds32_ict_model_type) Var(nds32_ict_model) Init(ICT_MODEL_SMALL) ++Specify the address generation strategy for ICT call's code model. ++ ++Enum ++Name(nds32_ict_model_type) Type(enum nds32_ict_model_type) ++Known cmodel types (for use with the -mict-model= option): ++ ++EnumValue ++Enum(nds32_ict_model_type) String(small) Value(ICT_MODEL_SMALL) ++ ++EnumValue ++Enum(nds32_ict_model_type) String(large) Value(ICT_MODEL_LARGE) ++ ++mlmwsmw-cost= ++Target RejectNegative Joined Enum(lmwsmw_cost_type) Var(flag_lmwsmw_cost) Init(LMWSMW_OPT_AUTO) ++Specify the load/store insn generate to lmw/smw. ++ ++Enum ++Name(lmwsmw_cost_type) Type(enum lmwsmw_cost_type) ++Known lmwsmw cost type (for use with the -mlmwsmw-cost= option): ++ ++EnumValue ++Enum(lmwsmw_cost_type) String(size) Value(LMWSMW_OPT_SIZE) ++ ++EnumValue ++Enum(lmwsmw_cost_type) String(speed) Value(LMWSMW_OPT_SPEED) ++ ++EnumValue ++Enum(lmwsmw_cost_type) String(all) Value(LMWSMW_OPT_ALL) ++ ++EnumValue ++Enum(lmwsmw_cost_type) String(auto) Value(LMWSMW_OPT_AUTO) ++ ++mabi-compatible ++Target Var(flag_nds32_abi_compatible) ++Enable the ABI compatible detection. ++ ++mcprop-acc ++Target Var(flag_nds32_cprop_acc) ++Enable the copy propagation for accumulate style instructions. ++ ++; --------------------------------------------------------------- ++ + mcmov + Target Report Mask(CMOV) + Generate conditional move instructions. + +-mperf-ext +-Target Report Mask(PERF_EXT) ++mhw-abs ++Target Report Mask(HW_ABS) ++Generate hardware abs instructions. ++ ++mext-perf ++Target Report Mask(EXT_PERF) + Generate performance extension instructions. + ++mext-perf2 ++Target Report Mask(EXT_PERF2) ++Generate performance extension version 2 instructions. ++ ++mext-string ++Target Report Mask(EXT_STRING) ++Generate string extension instructions. ++ ++mext-dsp ++Target Report Mask(EXT_DSP) ++Generate DSP extension instructions. ++ + mv3push + Target Report Mask(V3PUSH) + Generate v3 push25/pop25 instructions. +@@ -53,10 +240,22 @@ m16-bit + Target Report Mask(16_BIT) + Generate 16-bit instructions. + ++mrelax-hint ++Target Report Mask(RELAX_HINT) ++Insert relax hint for linker to do relaxation. ++ ++mvh ++Target Report Mask(VH) Condition(!TARGET_LINUX_ABI) ++Enable Virtual Hosting support. ++ + misr-vector-size= +-Target RejectNegative Joined UInteger Var(nds32_isr_vector_size) Init(NDS32_DEFAULT_ISR_VECTOR_SIZE) ++Target RejectNegative Joined UInteger Var(nds32_isr_vector_size) Init(NDS32_DEFAULT_ISR_VECTOR_SIZE) Condition(!TARGET_LINUX_ABI) + Specify the size of each interrupt vector, which must be 4 or 16. + ++misr-secure= ++Target RejectNegative Joined UInteger Var(nds32_isr_secure_level) Init(0) ++Specify the security level of c-isr for the whole file. ++ + mcache-block-size= + Target RejectNegative Joined UInteger Var(nds32_cache_block_size) Init(NDS32_DEFAULT_CACHE_BLOCK_SIZE) + Specify the size of each cache block, which must be a power of 2 between 4 and 512. +@@ -73,32 +272,418 @@ EnumValue + Enum(nds32_arch_type) String(v2) Value(ARCH_V2) + + EnumValue ++Enum(nds32_arch_type) String(v2j) Value(ARCH_V2J) ++ ++EnumValue + Enum(nds32_arch_type) String(v3) Value(ARCH_V3) + + EnumValue ++Enum(nds32_arch_type) String(v3j) Value(ARCH_V3J) ++ ++EnumValue + Enum(nds32_arch_type) String(v3m) Value(ARCH_V3M) + +-mcmodel= +-Target RejectNegative Joined Enum(nds32_cmodel_type) Var(nds32_cmodel_option) Init(CMODEL_MEDIUM) +-Specify the address generation strategy for code model. ++EnumValue ++Enum(nds32_arch_type) String(v3m+) Value(ARCH_V3M_PLUS) ++ ++EnumValue ++Enum(nds32_arch_type) String(v3f) Value(ARCH_V3F) ++ ++EnumValue ++Enum(nds32_arch_type) String(v3s) Value(ARCH_V3S) ++ ++mcpu= ++Target RejectNegative Joined Enum(nds32_cpu_type) Var(nds32_cpu_option) Init(CPU_N9) ++Specify the cpu for pipeline model. + + Enum +-Name(nds32_cmodel_type) Type(enum nds32_cmodel_type) +-Known cmodel types (for use with the -mcmodel= option): ++Name(nds32_cpu_type) Type(enum nds32_cpu_type) ++Known cpu types (for use with the -mcpu= option): ++ ++EnumValue ++Enum(nds32_cpu_type) String(n6) Value(CPU_N6) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n650) Value(CPU_N6) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n7) Value(CPU_N7) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n705) Value(CPU_N7) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n8) Value(CPU_N8) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n801) Value(CPU_N8) ++ ++EnumValue ++Enum(nds32_cpu_type) String(sn8) Value(CPU_N8) ++ ++EnumValue ++Enum(nds32_cpu_type) String(sn801) Value(CPU_N8) ++ ++EnumValue ++Enum(nds32_cpu_type) String(s8) Value(CPU_N8) ++ ++EnumValue ++Enum(nds32_cpu_type) String(s801) Value(CPU_N8) ++ ++EnumValue ++Enum(nds32_cpu_type) String(e8) Value(CPU_E8) ++ ++EnumValue ++Enum(nds32_cpu_type) String(e801) Value(CPU_E8) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n820) Value(CPU_E8) ++ ++EnumValue ++Enum(nds32_cpu_type) String(s830) Value(CPU_E8) ++ ++EnumValue ++Enum(nds32_cpu_type) String(e830) Value(CPU_E8) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n9) Value(CPU_N9) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n903) Value(CPU_N9) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n903a) Value(CPU_N9) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n968) Value(CPU_N9) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n968a) Value(CPU_N9) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n10) Value(CPU_N10) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n1033) Value(CPU_N10) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n1033a) Value(CPU_N10) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n1033-fpu) Value(CPU_N10) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n1033-spu) Value(CPU_N10) + + EnumValue +-Enum(nds32_cmodel_type) String(small) Value(CMODEL_SMALL) ++Enum(nds32_cpu_type) String(n1068) Value(CPU_N10) + + EnumValue +-Enum(nds32_cmodel_type) String(medium) Value(CMODEL_MEDIUM) ++Enum(nds32_cpu_type) String(n1068a) Value(CPU_N10) + + EnumValue +-Enum(nds32_cmodel_type) String(large) Value(CMODEL_LARGE) ++Enum(nds32_cpu_type) String(n1068-fpu) Value(CPU_N10) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n1068a-fpu) Value(CPU_N10) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n1068-spu) Value(CPU_N10) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n1068a-spu) Value(CPU_N10) ++ ++EnumValue ++Enum(nds32_cpu_type) String(d10) Value(CPU_N10) ++ ++EnumValue ++Enum(nds32_cpu_type) String(d1088) Value(CPU_N10) ++ ++EnumValue ++Enum(nds32_cpu_type) String(d1088-fpu) Value(CPU_N10) ++ ++EnumValue ++Enum(nds32_cpu_type) String(d1088-spu) Value(CPU_N10) ++ ++EnumValue ++Enum(nds32_cpu_type) Undocumented String(graywolf) Value(CPU_GRAYWOLF) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n15) Value(CPU_GRAYWOLF) ++ ++EnumValue ++Enum(nds32_cpu_type) String(d15) Value(CPU_GRAYWOLF) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n15s) Value(CPU_GRAYWOLF) ++ ++EnumValue ++Enum(nds32_cpu_type) String(d15s) Value(CPU_GRAYWOLF) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n15f) Value(CPU_GRAYWOLF) ++ ++EnumValue ++Enum(nds32_cpu_type) String(d15f) Value(CPU_GRAYWOLF) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n12) Value(CPU_N12) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n1213) Value(CPU_N12) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n1233) Value(CPU_N12) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n1233-fpu) Value(CPU_N12) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n1233-spu) Value(CPU_N12) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n13) Value(CPU_N13) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n1337) Value(CPU_N13) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n1337-fpu) Value(CPU_N13) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n1337-spu) Value(CPU_N13) ++ ++EnumValue ++Enum(nds32_cpu_type) Undocumented String(panther) Value(CPU_PANTHER) ++ ++EnumValue ++Enum(nds32_cpu_type) Undocumented String(simple) Value(CPU_SIMPLE) ++ ++mcpu=n15 ++Target RejectNegative Undocumented Alias(mcpu=, graywolf) ++Alias for multi-lib work. ++ ++mcpu=n15f ++Target RejectNegative Undocumented Alias(mcpu=, graywolf) ++Alias for multi-lib work. ++ ++mcpu=n15s ++Target RejectNegative Undocumented Alias(mcpu=, graywolf) ++Alias for multi-lib work. ++ ++mcpu=d15 ++Target RejectNegative Undocumented Alias(mcpu=, graywolf) ++Alias for multi-lib work. ++ ++mcpu=d15s ++Target RejectNegative Undocumented Alias(mcpu=, graywolf) ++Alias for multi-lib work. ++ ++mcpu=d15f ++Target RejectNegative Undocumented Alias(mcpu=, graywolf) ++Alias for multi-lib work. ++ ++mgraywolf ++Target RejectNegative Undocumented Alias(mcpu=, graywolf) ++This alias is only for gcc parallel test. ++ ++mv3m+ ++Target RejectNegative Undocumented Alias(march=, v3m+) ++This alias is only for gcc parallel test. ++ ++mmemory-model= ++Target RejectNegative Joined Enum(nds32_memory_model_type) Var(nds32_memory_model_option) Init(MEMORY_MODEL_FAST) ++Specify the memory model, fast or slow memory. ++ ++Enum ++Name(nds32_memory_model_type) Type(enum nds32_memory_model_type) ++ ++EnumValue ++Enum(nds32_memory_model_type) String(slow) Value(MEMORY_MODEL_SLOW) ++ ++EnumValue ++Enum(nds32_memory_model_type) String(fast) Value(MEMORY_MODEL_FAST) ++ ++mconfig-fpu= ++Target RejectNegative Joined Enum(float_reg_number) Var(nds32_fp_regnum) Init(TARGET_CONFIG_FPU_DEFAULT) ++Specify a fpu configuration value from 0 to 7; 0-3 is as FPU spec says, and 4-7 is corresponding to 0-3. ++ ++Enum ++Name(float_reg_number) Type(enum float_reg_number) ++Known floating-point number of registers (for use with the -mconfig-fpu= option): ++ ++EnumValue ++Enum(float_reg_number) String(0) Value(NDS32_CONFIG_FPU_0) ++ ++EnumValue ++Enum(float_reg_number) String(1) Value(NDS32_CONFIG_FPU_1) ++ ++EnumValue ++Enum(float_reg_number) String(2) Value(NDS32_CONFIG_FPU_2) ++ ++EnumValue ++Enum(float_reg_number) String(3) Value(NDS32_CONFIG_FPU_3) ++ ++EnumValue ++Enum(float_reg_number) String(4) Value(NDS32_CONFIG_FPU_4) ++ ++EnumValue ++Enum(float_reg_number) String(5) Value(NDS32_CONFIG_FPU_5) ++ ++EnumValue ++Enum(float_reg_number) String(6) Value(NDS32_CONFIG_FPU_6) ++ ++EnumValue ++Enum(float_reg_number) String(7) Value(NDS32_CONFIG_FPU_7) ++ ++mconfig-mul= ++Target RejectNegative Joined Enum(nds32_mul_type) Var(nds32_mul_config) Init(MUL_TYPE_FAST_1) ++Specify configuration of instruction mul: fast1, fast2 or slow. The default is fast1. ++ ++Enum ++Name(nds32_mul_type) Type(enum nds32_mul_type) ++ ++EnumValue ++Enum(nds32_mul_type) String(fast) Value(MUL_TYPE_FAST_1) ++ ++EnumValue ++Enum(nds32_mul_type) String(fast1) Value(MUL_TYPE_FAST_1) ++ ++EnumValue ++Enum(nds32_mul_type) String(fast2) Value(MUL_TYPE_FAST_2) ++ ++EnumValue ++Enum(nds32_mul_type) String(slow) Value(MUL_TYPE_SLOW) ++ ++mconfig-register-ports= ++Target RejectNegative Joined Enum(nds32_register_ports) Var(nds32_register_ports_config) Init(REG_PORT_3R2W) ++Specify how many read/write ports for n9/n10 cores. The value should be 3r2w or 2r1w. ++ ++Enum ++Name(nds32_register_ports) Type(enum nds32_register_ports) ++ ++EnumValue ++Enum(nds32_register_ports) String(3r2w) Value(REG_PORT_3R2W) ++ ++EnumValue ++Enum(nds32_register_ports) String(2r1w) Value(REG_PORT_2R1W) ++ ++mreorg-out-of-order ++Target Report Var(flag_reorg_out_of_order) Init(0) ++Allow out-of-order reorganization for multiple issue micro-architectures. ++ ++mifc ++Target Report Mask(IFC) ++Use special directives to guide linker doing ifc optimization. ++ ++mex9 ++Target Report Mask(EX9) ++Use special directives to guide linker doing ex9 optimization. ++ ++mprint-stall-cycles ++Target Report Mask(PRINT_STALLS) ++Print stall cycles due to structural or data dependencies. It should be used with the option '-S'. ++Note that stall cycles are determined by the compiler's pipeline model and it may not be precise. + + mctor-dtor + Target Report + Enable constructor/destructor feature. + ++mcrt-arg ++Target Report ++Enable argc/argv passed by simulator. ++ + mrelax + Target Report + Guide linker to relax instructions. ++ ++minnermost-loop ++Target Report Mask(INNERMOST_LOOP) ++Insert the innermost loop directive. ++ ++mext-fpu-fma ++Target Report Mask(EXT_FPU_FMA) ++Generate floating-point multiply-accumulation instructions. ++ ++mext-fpu-sp ++Target Report Mask(FPU_SINGLE) ++Generate single-precision floating-point instructions. ++ ++mext-fpu-dp ++Target Report Mask(FPU_DOUBLE) ++Generate double-precision floating-point instructions. ++ ++mext-zol ++Target Report Mask(HWLOOP) ++Insert the hardware loop directive. ++ ++mforce-no-ext-zol ++Target Undocumented Report Mask(FORCE_NO_HWLOOP) ++Force disable hardware loop, even use -mext-zol. ++ ++mforce-no-ext-dsp ++Target Undocumented Report Mask(FORCE_NO_EXT_DSP) ++Force disable hardware loop, even use -mext-dsp. ++ ++mforce-memcpy-zol ++Target Report Var(flag_force_memcpy_zol) Init(0) ++Force enable hardware loop in memcpy function. ++ ++msched-prolog-epilog ++Target Var(flag_sched_prolog_epilog) Init(1) ++Permit scheduling of a function's prologue and epilogue sequence. ++ ++mret-in-naked-func ++Target Var(flag_ret_in_naked_func) Init(1) ++Generate return instruction in naked function. ++ ++malways-save-lp ++Target Var(flag_always_save_lp) Init(0) ++Always save $lp in the stack. ++ ++munaligned-access ++Target Report Var(flag_unaligned_access) Init(0) ++Enable unaligned word and halfword accesses to packed data. ++ ++; --------------------------------------------------------------- ++; The following options are designed for compatibility issue. ++; Hopefully these obsolete options will be removed one day. ++ ++mg ++Target Undocumented Warn(%qs is deprecated and has no effect) ++Obsolete option. Users SHOULD NOT use this option in the command line. ++ ++mdx-regs ++Target Undocumented Warn(%qs is deprecated and has no effect) ++Obsolete option. Users SHOULD NOT use this option in the command line. ++ ++mexpand-isr ++Target Undocumented Warn(%qs is deprecated and has no effect) ++Obsolete option. Users SHOULD NOT use this option in the command line. ++ ++mcrt-cpp=yes ++Target Undocumented Warn(%qs is deprecated and has no effect, use -mctor-dtor instead) ++Obsolete option. Users SHOULD NOT use this option in the command line. ++ ++mcrt-exit=yes ++Target Undocumented Warn(%qs is deprecated and has no effect, use -mctor-dtor instead) ++Obsolete option. Users SHOULD NOT use this option in the command line. ++ ++mlib= ++Target RejectNegative Joined Undocumented Warn(%qs is deprecated and has no effect) ++Obsolete option. Users SHOULD NOT use this option in the command line. ++ ++; --------------------------------------------------------------- ++; The following options are designed for compatibility issue. ++; Hopefully these obsolete options will be removed one day. ++ ++mace ++Target RejectNegative ++Compile with Andes ACE. ++ ++mace-s2s= ++Target Joined RejectNegative ++Argument for pass to Andes's ACE source-to-source translator. ++ ++ ++; --------------------------------------------------------------- +diff --git a/gcc/config/nds32/nds32_init.inc b/gcc/config/nds32/nds32_init.inc +new file mode 100644 +index 0000000..1084ad0 +--- /dev/null ++++ b/gcc/config/nds32/nds32_init.inc +@@ -0,0 +1,43 @@ ++/* ++ * nds32_init.inc ++ * ++ * NDS32 architecture startup assembler header file ++ * ++ */ ++ ++.macro nds32_init ++ ++ ! Initialize GP for data access ++ la $gp, _SDA_BASE_ ++ ++#if defined(__NDS32_EXT_EX9__) ++ ! Check HW for EX9 ++ mfsr $r0, $MSC_CFG ++ li $r1, (1 << 24) ++ and $r2, $r0, $r1 ++ beqz $r2, 1f ++ ++ ! Initialize the table base of EX9 instruction ++ la $r0, _ITB_BASE_ ++ mtusr $r0, $ITB ++1: ++#endif ++ ++#if defined(__NDS32_EXT_FPU_DP__) || defined(__NDS32_EXT_FPU_SP__) ++ ! Enable FPU ++ mfsr $r0, $FUCOP_CTL ++ ori $r0, $r0, #0x1 ++ mtsr $r0, $FUCOP_CTL ++ dsb ++ ++ ! Enable denormalized flush-to-Zero mode ++ fmfcsr $r0 ++ ori $r0,$r0,#0x1000 ++ fmtcsr $r0 ++ dsb ++#endif ++ ++ ! Initialize default stack pointer ++ la $sp, _stack ++ ++.endm +diff --git a/gcc/config/nds32/nds32_intrinsic.h b/gcc/config/nds32/nds32_intrinsic.h +index 3e868dc..fef727b 100644 +--- a/gcc/config/nds32/nds32_intrinsic.h ++++ b/gcc/config/nds32/nds32_intrinsic.h +@@ -26,12 +26,1383 @@ + #ifndef _NDS32_INTRINSIC_H + #define _NDS32_INTRINSIC_H + ++typedef signed char int8x4_t __attribute ((vector_size(4))); ++typedef short int16x2_t __attribute ((vector_size(4))); ++typedef int int32x2_t __attribute__((vector_size(8))); ++typedef unsigned char uint8x4_t __attribute__ ((vector_size (4))); ++typedef unsigned short uint16x2_t __attribute__ ((vector_size (4))); ++typedef unsigned int uint32x2_t __attribute__((vector_size(8))); ++ ++/* General instrinsic register names. */ + enum nds32_intrinsic_registers + { +- __NDS32_REG_PSW__ = 1024, ++ __NDS32_REG_CPU_VER__ = 1024, ++ __NDS32_REG_ICM_CFG__, ++ __NDS32_REG_DCM_CFG__, ++ __NDS32_REG_MMU_CFG__, ++ __NDS32_REG_MSC_CFG__, ++ __NDS32_REG_MSC_CFG2__, ++ __NDS32_REG_CORE_ID__, ++ __NDS32_REG_FUCOP_EXIST__, ++ ++ __NDS32_REG_PSW__, + __NDS32_REG_IPSW__, ++ __NDS32_REG_P_IPSW__, ++ __NDS32_REG_IVB__, ++ __NDS32_REG_EVA__, ++ __NDS32_REG_P_EVA__, + __NDS32_REG_ITYPE__, +- __NDS32_REG_IPC__ ++ __NDS32_REG_P_ITYPE__, ++ ++ __NDS32_REG_MERR__, ++ __NDS32_REG_IPC__, ++ __NDS32_REG_P_IPC__, ++ __NDS32_REG_OIPC__, ++ __NDS32_REG_P_P0__, ++ __NDS32_REG_P_P1__, ++ ++ __NDS32_REG_INT_MASK__, ++ __NDS32_REG_INT_MASK2__, ++ __NDS32_REG_INT_MASK3__, ++ __NDS32_REG_INT_PEND__, ++ __NDS32_REG_INT_PEND2__, ++ __NDS32_REG_INT_PEND3__, ++ __NDS32_REG_SP_USR__, ++ __NDS32_REG_SP_PRIV__, ++ __NDS32_REG_INT_PRI__, ++ __NDS32_REG_INT_PRI2__, ++ __NDS32_REG_INT_PRI3__, ++ __NDS32_REG_INT_PRI4__, ++ __NDS32_REG_INT_CTRL__, ++ __NDS32_REG_INT_TRIGGER__, ++ __NDS32_REG_INT_TRIGGER2__, ++ __NDS32_REG_INT_GPR_PUSH_DIS__, ++ ++ __NDS32_REG_MMU_CTL__, ++ __NDS32_REG_L1_PPTB__, ++ __NDS32_REG_TLB_VPN__, ++ __NDS32_REG_TLB_DATA__, ++ __NDS32_REG_TLB_MISC__, ++ __NDS32_REG_VLPT_IDX__, ++ __NDS32_REG_ILMB__, ++ __NDS32_REG_DLMB__, ++ ++ __NDS32_REG_CACHE_CTL__, ++ __NDS32_REG_HSMP_SADDR__, ++ __NDS32_REG_HSMP_EADDR__, ++ __NDS32_REG_SDZ_CTL__, ++ __NDS32_REG_N12MISC_CTL__, ++ __NDS32_REG_MISC_CTL__, ++ __NDS32_REG_ECC_MISC__, ++ ++ __NDS32_REG_BPC0__, ++ __NDS32_REG_BPC1__, ++ __NDS32_REG_BPC2__, ++ __NDS32_REG_BPC3__, ++ __NDS32_REG_BPC4__, ++ __NDS32_REG_BPC5__, ++ __NDS32_REG_BPC6__, ++ __NDS32_REG_BPC7__, ++ ++ __NDS32_REG_BPA0__, ++ __NDS32_REG_BPA1__, ++ __NDS32_REG_BPA2__, ++ __NDS32_REG_BPA3__, ++ __NDS32_REG_BPA4__, ++ __NDS32_REG_BPA5__, ++ __NDS32_REG_BPA6__, ++ __NDS32_REG_BPA7__, ++ ++ __NDS32_REG_BPAM0__, ++ __NDS32_REG_BPAM1__, ++ __NDS32_REG_BPAM2__, ++ __NDS32_REG_BPAM3__, ++ __NDS32_REG_BPAM4__, ++ __NDS32_REG_BPAM5__, ++ __NDS32_REG_BPAM6__, ++ __NDS32_REG_BPAM7__, ++ ++ __NDS32_REG_BPV0__, ++ __NDS32_REG_BPV1__, ++ __NDS32_REG_BPV2__, ++ __NDS32_REG_BPV3__, ++ __NDS32_REG_BPV4__, ++ __NDS32_REG_BPV5__, ++ __NDS32_REG_BPV6__, ++ __NDS32_REG_BPV7__, ++ ++ __NDS32_REG_BPCID0__, ++ __NDS32_REG_BPCID1__, ++ __NDS32_REG_BPCID2__, ++ __NDS32_REG_BPCID3__, ++ __NDS32_REG_BPCID4__, ++ __NDS32_REG_BPCID5__, ++ __NDS32_REG_BPCID6__, ++ __NDS32_REG_BPCID7__, ++ ++ __NDS32_REG_EDM_CFG__, ++ __NDS32_REG_EDMSW__, ++ __NDS32_REG_EDM_CTL__, ++ __NDS32_REG_EDM_DTR__, ++ __NDS32_REG_BPMTC__, ++ __NDS32_REG_DIMBR__, ++ ++ __NDS32_REG_TECR0__, ++ __NDS32_REG_TECR1__, ++ __NDS32_REG_PFMC0__, ++ __NDS32_REG_PFMC1__, ++ __NDS32_REG_PFMC2__, ++ __NDS32_REG_PFM_CTL__, ++ __NDS32_REG_PFT_CTL__, ++ __NDS32_REG_HSP_CTL__, ++ __NDS32_REG_SP_BOUND__, ++ __NDS32_REG_SP_BOUND_PRIV__, ++ __NDS32_REG_SP_BASE__, ++ __NDS32_REG_SP_BASE_PRIV__, ++ __NDS32_REG_FUCOP_CTL__, ++ __NDS32_REG_PRUSR_ACC_CTL__, ++ ++ __NDS32_REG_DMA_CFG__, ++ __NDS32_REG_DMA_GCSW__, ++ __NDS32_REG_DMA_CHNSEL__, ++ __NDS32_REG_DMA_ACT__, ++ __NDS32_REG_DMA_SETUP__, ++ __NDS32_REG_DMA_ISADDR__, ++ __NDS32_REG_DMA_ESADDR__, ++ __NDS32_REG_DMA_TCNT__, ++ __NDS32_REG_DMA_STATUS__, ++ __NDS32_REG_DMA_2DSET__, ++ __NDS32_REG_DMA_2DSCTL__, ++ __NDS32_REG_DMA_RCNT__, ++ __NDS32_REG_DMA_HSTATUS__, ++ ++ __NDS32_REG_PC__, ++ __NDS32_REG_SP_USR1__, ++ __NDS32_REG_SP_USR2__, ++ __NDS32_REG_SP_USR3__, ++ __NDS32_REG_SP_PRIV1__, ++ __NDS32_REG_SP_PRIV2__, ++ __NDS32_REG_SP_PRIV3__, ++ __NDS32_REG_BG_REGION__, ++ __NDS32_REG_SFCR__, ++ __NDS32_REG_SIGN__, ++ __NDS32_REG_ISIGN__, ++ __NDS32_REG_P_ISIGN__, ++ __NDS32_REG_IFC_LP__, ++ __NDS32_REG_ITB__ + }; + ++/* The cctl subtype for intrinsic. */ ++enum nds32_cctl_valck ++{ ++ __NDS32_CCTL_L1D_VA_FILLCK__, ++ __NDS32_CCTL_L1D_VA_ULCK__, ++ __NDS32_CCTL_L1I_VA_FILLCK__, ++ __NDS32_CCTL_L1I_VA_ULCK__ ++}; ++ ++enum nds32_cctl_idxwbinv ++{ ++ __NDS32_CCTL_L1D_IX_WBINVAL__, ++ __NDS32_CCTL_L1D_IX_INVAL__, ++ __NDS32_CCTL_L1D_IX_WB__, ++ __NDS32_CCTL_L1I_IX_INVAL__ ++}; ++ ++enum nds32_cctl_vawbinv ++{ ++ __NDS32_CCTL_L1D_VA_INVAL__, ++ __NDS32_CCTL_L1D_VA_WB__, ++ __NDS32_CCTL_L1D_VA_WBINVAL__, ++ __NDS32_CCTL_L1I_VA_INVAL__ ++}; ++ ++enum nds32_cctl_idxread ++{ ++ __NDS32_CCTL_L1D_IX_RTAG__, ++ __NDS32_CCTL_L1D_IX_RWD__, ++ __NDS32_CCTL_L1I_IX_RTAG__, ++ __NDS32_CCTL_L1I_IX_RWD__ ++}; ++ ++enum nds32_cctl_idxwrite ++{ ++ __NDS32_CCTL_L1D_IX_WTAG__, ++ __NDS32_CCTL_L1D_IX_WWD__, ++ __NDS32_CCTL_L1I_IX_WTAG__, ++ __NDS32_CCTL_L1I_IX_WWD__ ++}; ++ ++enum nds32_dpref ++{ ++ __NDS32_DPREF_SRD__, ++ __NDS32_DPREF_MRD__, ++ __NDS32_DPREF_SWR__, ++ __NDS32_DPREF_MWR__, ++ __NDS32_DPREF_PTE__, ++ __NDS32_DPREF_CLWR__ ++}; ++ ++/* ------------------------------------------------------------------------ */ ++ ++/* Define interrupt number for intrinsic function. */ ++#define NDS32_INT_H0 0 ++#define NDS32_INT_H1 1 ++#define NDS32_INT_H2 2 ++#define NDS32_INT_H3 3 ++#define NDS32_INT_H4 4 ++#define NDS32_INT_H5 5 ++#define NDS32_INT_H6 6 ++#define NDS32_INT_H7 7 ++#define NDS32_INT_H8 8 ++#define NDS32_INT_H9 9 ++#define NDS32_INT_H10 10 ++#define NDS32_INT_H11 11 ++#define NDS32_INT_H12 12 ++#define NDS32_INT_H13 13 ++#define NDS32_INT_H14 14 ++#define NDS32_INT_H15 15 ++#define NDS32_INT_H16 16 ++#define NDS32_INT_H17 17 ++#define NDS32_INT_H18 18 ++#define NDS32_INT_H19 19 ++#define NDS32_INT_H20 20 ++#define NDS32_INT_H21 21 ++#define NDS32_INT_H22 22 ++#define NDS32_INT_H23 23 ++#define NDS32_INT_H24 24 ++#define NDS32_INT_H25 25 ++#define NDS32_INT_H26 26 ++#define NDS32_INT_H27 27 ++#define NDS32_INT_H28 28 ++#define NDS32_INT_H29 29 ++#define NDS32_INT_H30 30 ++#define NDS32_INT_H31 31 ++#define NDS32_INT_H32 32 ++#define NDS32_INT_H33 33 ++#define NDS32_INT_H34 34 ++#define NDS32_INT_H35 35 ++#define NDS32_INT_H36 36 ++#define NDS32_INT_H37 37 ++#define NDS32_INT_H38 38 ++#define NDS32_INT_H39 39 ++#define NDS32_INT_H40 40 ++#define NDS32_INT_H41 41 ++#define NDS32_INT_H42 42 ++#define NDS32_INT_H43 43 ++#define NDS32_INT_H44 44 ++#define NDS32_INT_H45 45 ++#define NDS32_INT_H46 46 ++#define NDS32_INT_H47 47 ++#define NDS32_INT_H48 48 ++#define NDS32_INT_H49 49 ++#define NDS32_INT_H50 50 ++#define NDS32_INT_H51 51 ++#define NDS32_INT_H52 52 ++#define NDS32_INT_H53 53 ++#define NDS32_INT_H54 54 ++#define NDS32_INT_H55 55 ++#define NDS32_INT_H56 56 ++#define NDS32_INT_H57 57 ++#define NDS32_INT_H58 58 ++#define NDS32_INT_H59 59 ++#define NDS32_INT_H60 60 ++#define NDS32_INT_H61 61 ++#define NDS32_INT_H62 62 ++#define NDS32_INT_H63 63 ++#define NDS32_INT_SWI 64 ++#define NDS32_INT_ALZ 65 ++#define NDS32_INT_IDIVZE 66 ++#define NDS32_INT_DSSIM 67 ++ ++/* ------------------------------------------------------------------------ */ ++ ++/* Define intrinsic register name macro for compatibility. */ ++#define NDS32_SR_CPU_VER __NDS32_REG_CPU_VER__ ++#define NDS32_SR_ICM_CFG __NDS32_REG_ICM_CFG__ ++#define NDS32_SR_DCM_CFG __NDS32_REG_DCM_CFG__ ++#define NDS32_SR_MMU_CFG __NDS32_REG_MMU_CFG__ ++#define NDS32_SR_MSC_CFG __NDS32_REG_MSC_CFG__ ++#define NDS32_SR_MSC_CFG2 __NDS32_REG_MSC_CFG2__ ++#define NDS32_SR_CORE_ID __NDS32_REG_CORE_ID__ ++#define NDS32_SR_FUCOP_EXIST __NDS32_REG_FUCOP_EXIST__ ++#define NDS32_SR_PSW __NDS32_REG_PSW__ ++#define NDS32_SR_IPSW __NDS32_REG_IPSW__ ++#define NDS32_SR_P_IPSW __NDS32_REG_P_IPSW__ ++#define NDS32_SR_IVB __NDS32_REG_IVB__ ++#define NDS32_SR_EVA __NDS32_REG_EVA__ ++#define NDS32_SR_P_EVA __NDS32_REG_P_EVA__ ++#define NDS32_SR_ITYPE __NDS32_REG_ITYPE__ ++#define NDS32_SR_P_ITYPE __NDS32_REG_P_ITYPE__ ++#define NDS32_SR_MERR __NDS32_REG_MERR__ ++#define NDS32_SR_IPC __NDS32_REG_IPC__ ++#define NDS32_SR_P_IPC __NDS32_REG_P_IPC__ ++#define NDS32_SR_OIPC __NDS32_REG_OIPC__ ++#define NDS32_SR_P_P0 __NDS32_REG_P_P0__ ++#define NDS32_SR_P_P1 __NDS32_REG_P_P1__ ++#define NDS32_SR_INT_MASK __NDS32_REG_INT_MASK__ ++#define NDS32_SR_INT_MASK2 __NDS32_REG_INT_MASK2__ ++#define NDS32_SR_INT_MASK3 __NDS32_REG_INT_MASK3__ ++#define NDS32_SR_INT_PEND __NDS32_REG_INT_PEND__ ++#define NDS32_SR_INT_PEND2 __NDS32_REG_INT_PEND2__ ++#define NDS32_SR_INT_PEND3 __NDS32_REG_INT_PEND3__ ++#define NDS32_SR_SP_USR __NDS32_REG_SP_USR__ ++#define NDS32_SR_SP_PRIV __NDS32_REG_SP_PRIV__ ++#define NDS32_SR_INT_PRI __NDS32_REG_INT_PRI__ ++#define NDS32_SR_INT_PRI2 __NDS32_REG_INT_PRI2__ ++#define NDS32_SR_INT_PRI3 __NDS32_REG_INT_PRI3__ ++#define NDS32_SR_INT_PRI4 __NDS32_REG_INT_PRI4__ ++#define NDS32_SR_INT_CTRL __NDS32_REG_INT_CTRL__ ++#define NDS32_SR_INT_TRIGGER __NDS32_REG_INT_TRIGGER__ ++#define NDS32_SR_INT_TRIGGER2 __NDS32_REG_INT_TRIGGER2__ ++#define NDS32_SR_INT_GPR_PUSH_DIS __NDS32_REG_INT_GPR_PUSH_DIS__ ++#define NDS32_SR_MMU_CTL __NDS32_REG_MMU_CTL__ ++#define NDS32_SR_L1_PPTB __NDS32_REG_L1_PPTB__ ++#define NDS32_SR_TLB_VPN __NDS32_REG_TLB_VPN__ ++#define NDS32_SR_TLB_DATA __NDS32_REG_TLB_DATA__ ++#define NDS32_SR_TLB_MISC __NDS32_REG_TLB_MISC__ ++#define NDS32_SR_VLPT_IDX __NDS32_REG_VLPT_IDX__ ++#define NDS32_SR_ILMB __NDS32_REG_ILMB__ ++#define NDS32_SR_DLMB __NDS32_REG_DLMB__ ++#define NDS32_SR_CACHE_CTL __NDS32_REG_CACHE_CTL__ ++#define NDS32_SR_HSMP_SADDR __NDS32_REG_HSMP_SADDR__ ++#define NDS32_SR_HSMP_EADDR __NDS32_REG_HSMP_EADDR__ ++#define NDS32_SR_SDZ_CTL __NDS32_REG_SDZ_CTL__ ++#define NDS32_SR_N12MISC_CTL __NDS32_REG_N12MISC_CTL__ ++#define NDS32_SR_MISC_CTL __NDS32_REG_MISC_CTL__ ++#define NDS32_SR_ECC_MISC __NDS32_REG_ECC_MISC__ ++#define NDS32_SR_BPC0 __NDS32_REG_BPC0__ ++#define NDS32_SR_BPC1 __NDS32_REG_BPC1__ ++#define NDS32_SR_BPC2 __NDS32_REG_BPC2__ ++#define NDS32_SR_BPC3 __NDS32_REG_BPC3__ ++#define NDS32_SR_BPC4 __NDS32_REG_BPC4__ ++#define NDS32_SR_BPC5 __NDS32_REG_BPC5__ ++#define NDS32_SR_BPC6 __NDS32_REG_BPC6__ ++#define NDS32_SR_BPC7 __NDS32_REG_BPC7__ ++#define NDS32_SR_BPA0 __NDS32_REG_BPA0__ ++#define NDS32_SR_BPA1 __NDS32_REG_BPA1__ ++#define NDS32_SR_BPA2 __NDS32_REG_BPA2__ ++#define NDS32_SR_BPA3 __NDS32_REG_BPA3__ ++#define NDS32_SR_BPA4 __NDS32_REG_BPA4__ ++#define NDS32_SR_BPA5 __NDS32_REG_BPA5__ ++#define NDS32_SR_BPA6 __NDS32_REG_BPA6__ ++#define NDS32_SR_BPA7 __NDS32_REG_BPA7__ ++#define NDS32_SR_BPAM0 __NDS32_REG_BPAM0__ ++#define NDS32_SR_BPAM1 __NDS32_REG_BPAM1__ ++#define NDS32_SR_BPAM2 __NDS32_REG_BPAM2__ ++#define NDS32_SR_BPAM3 __NDS32_REG_BPAM3__ ++#define NDS32_SR_BPAM4 __NDS32_REG_BPAM4__ ++#define NDS32_SR_BPAM5 __NDS32_REG_BPAM5__ ++#define NDS32_SR_BPAM6 __NDS32_REG_BPAM6__ ++#define NDS32_SR_BPAM7 __NDS32_REG_BPAM7__ ++#define NDS32_SR_BPV0 __NDS32_REG_BPV0__ ++#define NDS32_SR_BPV1 __NDS32_REG_BPV1__ ++#define NDS32_SR_BPV2 __NDS32_REG_BPV2__ ++#define NDS32_SR_BPV3 __NDS32_REG_BPV3__ ++#define NDS32_SR_BPV4 __NDS32_REG_BPV4__ ++#define NDS32_SR_BPV5 __NDS32_REG_BPV5__ ++#define NDS32_SR_BPV6 __NDS32_REG_BPV6__ ++#define NDS32_SR_BPV7 __NDS32_REG_BPV7__ ++#define NDS32_SR_BPCID0 __NDS32_REG_BPCID0__ ++#define NDS32_SR_BPCID1 __NDS32_REG_BPCID1__ ++#define NDS32_SR_BPCID2 __NDS32_REG_BPCID2__ ++#define NDS32_SR_BPCID3 __NDS32_REG_BPCID3__ ++#define NDS32_SR_BPCID4 __NDS32_REG_BPCID4__ ++#define NDS32_SR_BPCID5 __NDS32_REG_BPCID5__ ++#define NDS32_SR_BPCID6 __NDS32_REG_BPCID6__ ++#define NDS32_SR_BPCID7 __NDS32_REG_BPCID7__ ++#define NDS32_SR_EDM_CFG __NDS32_REG_EDM_CFG__ ++#define NDS32_SR_EDMSW __NDS32_REG_EDMSW__ ++#define NDS32_SR_EDM_CTL __NDS32_REG_EDM_CTL__ ++#define NDS32_SR_EDM_DTR __NDS32_REG_EDM_DTR__ ++#define NDS32_SR_BPMTC __NDS32_REG_BPMTC__ ++#define NDS32_SR_DIMBR __NDS32_REG_DIMBR__ ++#define NDS32_SR_TECR0 __NDS32_REG_TECR0__ ++#define NDS32_SR_TECR1 __NDS32_REG_TECR1__ ++#define NDS32_SR_PFMC0 __NDS32_REG_PFMC0__ ++#define NDS32_SR_PFMC1 __NDS32_REG_PFMC1__ ++#define NDS32_SR_PFMC2 __NDS32_REG_PFMC2__ ++#define NDS32_SR_PFM_CTL __NDS32_REG_PFM_CTL__ ++#define NDS32_SR_HSP_CTL __NDS32_REG_HSP_CTL__ ++#define NDS32_SR_SP_BOUND __NDS32_REG_SP_BOUND__ ++#define NDS32_SR_SP_BOUND_PRIV __NDS32_REG_SP_BOUND_PRIV__ ++#define NDS32_SR_SP_BASE __NDS32_REG_SP_BASE__ ++#define NDS32_SR_SP_BASE_PRIV __NDS32_REG_SP_BASE_PRIV__ ++#define NDS32_SR_FUCOP_CTL __NDS32_REG_FUCOP_CTL__ ++#define NDS32_SR_PRUSR_ACC_CTL __NDS32_REG_PRUSR_ACC_CTL__ ++#define NDS32_SR_DMA_CFG __NDS32_REG_DMA_CFG__ ++#define NDS32_SR_DMA_GCSW __NDS32_REG_DMA_GCSW__ ++#define NDS32_SR_DMA_CHNSEL __NDS32_REG_DMA_CHNSEL__ ++#define NDS32_SR_DMA_ACT __NDS32_REG_DMA_ACT__ ++#define NDS32_SR_DMA_SETUP __NDS32_REG_DMA_SETUP__ ++#define NDS32_SR_DMA_ISADDR __NDS32_REG_DMA_ISADDR__ ++#define NDS32_SR_DMA_ESADDR __NDS32_REG_DMA_ESADDR__ ++#define NDS32_SR_DMA_TCNT __NDS32_REG_DMA_TCNT__ ++#define NDS32_SR_DMA_STATUS __NDS32_REG_DMA_STATUS__ ++#define NDS32_SR_DMA_2DSET __NDS32_REG_DMA_2DSET__ ++#define NDS32_SR_DMA_2DSCTL __NDS32_REG_DMA_2DSCTL__ ++#define NDS32_SR_DMA_RCNT __NDS32_REG_DMA_RCNT__ ++#define NDS32_SR_DMA_HSTATUS __NDS32_REG_DMA_HSTATUS__ ++#define NDS32_SR_SP_USR1 __NDS32_REG_SP_USR1__ ++#define NDS32_SR_SP_USR2 __NDS32_REG_SP_USR2__ ++#define NDS32_SR_SP_USR3 __NDS32_REG_SP_USR3__ ++#define NDS32_SR_SP_PRIV1 __NDS32_REG_SP_PRIV1__ ++#define NDS32_SR_SP_PRIV2 __NDS32_REG_SP_PRIV2__ ++#define NDS32_SR_SP_PRIV3 __NDS32_REG_SP_PRIV3__ ++#define NDS32_SR_BG_REGION __NDS32_REG_BG_REGION__ ++#define NDS32_SR_SFCR __NDS32_REG_SFCR__ ++#define NDS32_SR_SIGN __NDS32_REG_SIGN__ ++#define NDS32_SR_ISIGN __NDS32_REG_ISIGN__ ++#define NDS32_SR_P_ISIGN __NDS32_REG_P_ISIGN__ ++ ++#define NDS32_USR_PC __NDS32_REG_PC__ ++#define NDS32_USR_DMA_CFG __NDS32_REG_DMA_CFG__ ++#define NDS32_USR_DMA_GCSW __NDS32_REG_DMA_GCSW__ ++#define NDS32_USR_DMA_CHNSEL __NDS32_REG_DMA_CHNSEL__ ++#define NDS32_USR_DMA_ACT __NDS32_REG_DMA_ACT__ ++#define NDS32_USR_DMA_SETUP __NDS32_REG_DMA_SETUP__ ++#define NDS32_USR_DMA_ISADDR __NDS32_REG_DMA_ISADDR__ ++#define NDS32_USR_DMA_ESADDR __NDS32_REG_DMA_ESADDR__ ++#define NDS32_USR_DMA_TCNT __NDS32_REG_DMA_TCNT__ ++#define NDS32_USR_DMA_STATUS __NDS32_REG_DMA_STATUS__ ++#define NDS32_USR_DMA_2DSET __NDS32_REG_DMA_2DSET__ ++#define NDS32_USR_DMA_2DSCTL __NDS32_REG_DMA_2DSCTL__ ++#define NDS32_USR_PFMC0 __NDS32_REG_PFMC0__ ++#define NDS32_USR_PFMC1 __NDS32_REG_PFMC1__ ++#define NDS32_USR_PFMC2 __NDS32_REG_PFMC2__ ++#define NDS32_USR_PFM_CTL __NDS32_REG_PFM_CTL__ ++#define NDS32_USR_IFC_LP __NDS32_REG_IFC_LP__ ++#define NDS32_USR_ITB __NDS32_REG_ITB__ ++ ++#define NDS32_CCTL_L1D_VA_FILLCK __NDS32_CCTL_L1D_VA_FILLCK__ ++#define NDS32_CCTL_L1D_VA_ULCK __NDS32_CCTL_L1D_VA_ULCK__ ++#define NDS32_CCTL_L1I_VA_FILLCK __NDS32_CCTL_L1I_VA_FILLCK__ ++#define NDS32_CCTL_L1I_VA_ULCK __NDS32_CCTL_L1I_VA_ULCK__ ++ ++#define NDS32_CCTL_L1D_IX_WBINVAL __NDS32_CCTL_L1D_IX_WBINVAL__ ++#define NDS32_CCTL_L1D_IX_INVAL __NDS32_CCTL_L1D_IX_INVAL__ ++#define NDS32_CCTL_L1D_IX_WB __NDS32_CCTL_L1D_IX_WB__ ++#define NDS32_CCTL_L1I_IX_INVAL __NDS32_CCTL_L1I_IX_INVAL__ ++ ++#define NDS32_CCTL_L1D_VA_INVAL __NDS32_CCTL_L1D_VA_INVAL__ ++#define NDS32_CCTL_L1D_VA_WB __NDS32_CCTL_L1D_VA_WB__ ++#define NDS32_CCTL_L1D_VA_WBINVAL __NDS32_CCTL_L1D_VA_WBINVAL__ ++#define NDS32_CCTL_L1I_VA_INVAL __NDS32_CCTL_L1I_VA_INVAL__ ++ ++#define NDS32_CCTL_L1D_IX_RTAG __NDS32_CCTL_L1D_IX_RTAG__ ++#define NDS32_CCTL_L1D_IX_RWD __NDS32_CCTL_L1D_IX_RWD__ ++#define NDS32_CCTL_L1I_IX_RTAG __NDS32_CCTL_L1I_IX_RTAG__ ++#define NDS32_CCTL_L1I_IX_RWD __NDS32_CCTL_L1I_IX_RWD__ ++ ++#define NDS32_CCTL_L1D_IX_WTAG __NDS32_CCTL_L1D_IX_WTAG__ ++#define NDS32_CCTL_L1D_IX_WWD __NDS32_CCTL_L1D_IX_WWD__ ++#define NDS32_CCTL_L1I_IX_WTAG __NDS32_CCTL_L1I_IX_WTAG__ ++#define NDS32_CCTL_L1I_IX_WWD __NDS32_CCTL_L1I_IX_WWD__ ++ ++#define NDS32_DPREF_SRD __NDS32_DPREF_SRD__ ++#define NDS32_DPREF_MRD __NDS32_DPREF_MRD__ ++#define NDS32_DPREF_SWR __NDS32_DPREF_SWR__ ++#define NDS32_DPREF_MWR __NDS32_DPREF_MWR__ ++#define NDS32_DPREF_PTE __NDS32_DPREF_PTE__ ++#define NDS32_DPREF_CLWR __NDS32_DPREF_CLWR__ ++ ++/* ------------------------------------------------------------------------ */ ++ ++/* Define user friendly macro. */ ++#define SIGNATURE_BEGIN __nds32__signature_begin () ++#define SIGNATURE_END __nds32__signature_end () ++ ++/* Map __nds32__xxx() to __builtin_xxx() functions for compatibility. */ ++#define __nds32__llw(a) \ ++ (__builtin_nds32_llw ((a))) ++#define __nds32__lwup(a) \ ++ (__builtin_nds32_lwup ((a))) ++#define __nds32__lbup(a) \ ++ (__builtin_nds32_lbup ((a))) ++#define __nds32__scw(a, b) \ ++ (__builtin_nds32_scw ((a), (b))) ++#define __nds32__swup(a, b) \ ++ (__builtin_nds32_swup ((a), (b))) ++#define __nds32__sbup(a, b) \ ++ (__builtin_nds32_sbup ((a), (b))) ++ ++#define __nds32__mfsr(srname) \ ++ (__builtin_nds32_mfsr ((srname))) ++#define __nds32__mfusr(usrname) \ ++ (__builtin_nds32_mfusr ((usrname))) ++#define __nds32__mtsr(val, srname) \ ++ (__builtin_nds32_mtsr ((val), (srname))) ++#define __nds32__mtsr_isb(val, srname) \ ++ (__builtin_nds32_mtsr_isb ((val), (srname))) ++#define __nds32__mtsr_dsb(val, srname) \ ++ (__builtin_nds32_mtsr_dsb ((val), (srname))) ++#define __nds32__mtusr(val, usrname) \ ++ (__builtin_nds32_mtusr ((val), (usrname))) ++ ++#define __nds32__break(swid) \ ++ (__builtin_nds32_break(swid)) ++#define __nds32__cctlva_lck(subtype, va) \ ++ (__builtin_nds32_cctl_va_lck ((subtype), (va))) ++#define __nds32__cctlidx_wbinval(subtype, idx) \ ++ (__builtin_nds32_cctl_idx_wbinval ((subtype), (idx))) ++#define __nds32__cctlva_wbinval_alvl(subtype, va) \ ++ (__builtin_nds32_cctl_va_wbinval_la ((subtype), (va))) ++#define __nds32__cctlva_wbinval_one_lvl(subtype, va) \ ++ (__builtin_nds32_cctl_va_wbinval_l1 ((subtype), (va))) ++#define __nds32__cctlidx_read(subtype, idx) \ ++ (__builtin_nds32_cctl_idx_read ((subtype), (idx))) ++#define __nds32__cctlidx_write(subtype, b, idxw) \ ++ (__builtin_nds32_cctl_idx_write ((subtype), (b), (idxw))) ++#define __nds32__cctl_l1d_invalall() \ ++ (__builtin_nds32_cctl_l1d_invalall()) ++#define __nds32__cctl_l1d_wball_alvl() \ ++ (__builtin_nds32_cctl_l1d_wball_alvl()) ++#define __nds32__cctl_l1d_wball_one_lvl() \ ++ (__builtin_nds32_cctl_l1d_wball_one_lvl()) ++ ++#define __nds32__dsb() \ ++ (__builtin_nds32_dsb()) ++#define __nds32__isb() \ ++ (__builtin_nds32_isb()) ++#define __nds32__msync_store() \ ++ (__builtin_nds32_msync_store()) ++#define __nds32__msync_all() \ ++ (__builtin_nds32_msync_all()) ++#define __nds32__nop() \ ++ (__builtin_nds32_nop()) ++ ++#define __nds32__standby_wait_done() \ ++ (__builtin_nds32_standby_wait_done()) ++#define __nds32__standby_no_wake_grant() \ ++ (__builtin_nds32_standby_no_wake_grant()) ++#define __nds32__standby_wake_grant() \ ++ (__builtin_nds32_standby_wake_grant()) ++#define __nds32__schedule_barrier() \ ++ (__builtin_nds32_schedule_barrier()) ++#define __nds32__setend_big() \ ++ (__builtin_nds32_setend_big()) ++#define __nds32__setend_little() \ ++ (__builtin_nds32_setend_little()) ++#define __nds32__setgie_en() \ ++ (__builtin_nds32_setgie_en()) ++#define __nds32__setgie_dis() \ ++ (__builtin_nds32_setgie_dis()) ++ ++#define __nds32__jr_itoff(a) \ ++ (__builtin_nds32_jr_itoff ((a))) ++#define __nds32__jr_toff(a) \ ++ (__builtin_nds32_jr_toff ((a))) ++#define __nds32__jral_iton(a) \ ++ (__builtin_nds32_jral_iton ((a))) ++#define __nds32__jral_ton(a) \ ++ (__builtin_nds32_jral_ton ((a))) ++#define __nds32__ret_itoff(a) \ ++ (__builtin_nds32_ret_itoff ((a))) ++#define __nds32__ret_toff(a) \ ++ (__builtin_nds32_ret_toff ((a))) ++#define __nds32__svs(a, b) \ ++ (__builtin_nds32_svs ((a), (b))) ++#define __nds32__sva(a, b) \ ++ (__builtin_nds32_sva ((a), (b))) ++#define __nds32__dpref_qw(a, b, subtype) \ ++ (__builtin_nds32_dpref_qw ((a), (b), (subtype))) ++#define __nds32__dpref_hw(a, b, subtype) \ ++ (__builtin_nds32_dpref_hw ((a), (b), (subtype))) ++#define __nds32__dpref_w(a, b, subtype) \ ++ (__builtin_nds32_dpref_w ((a), (b), (subtype))) ++#define __nds32__dpref_dw(a, b, subtype) \ ++ (__builtin_nds32_dpref_dw ((a), (b), (subtype))) ++ ++#define __nds32__teqz(a, swid) \ ++ (__builtin_nds32_teqz ((a), (swid))) ++#define __nds32__tnez(a, swid) \ ++ ( __builtin_nds32_tnez ((a), (swid))) ++#define __nds32__trap(swid) \ ++ (__builtin_nds32_trap ((swid))) ++#define __nds32__isync(a) \ ++ (__builtin_nds32_isync ((a))) ++#define __nds32__rotr(val, ror) \ ++ (__builtin_nds32_rotr ((val), (ror))) ++#define __nds32__wsbh(a) \ ++ (__builtin_nds32_wsbh ((a))) ++#define __nds32__syscall(a) \ ++ (__builtin_nds32_syscall ((a))) ++#define __nds32__return_address() \ ++ (__builtin_nds32_return_address()) ++#define __nds32__get_current_sp() \ ++ (__builtin_nds32_get_current_sp()) ++#define __nds32__set_current_sp(a) \ ++ (__builtin_nds32_set_current_sp ((a))) ++#define __nds32__abs(a) \ ++ (__builtin_nds32_pe_abs ((a))) ++#define __nds32__ave(a, b) \ ++ (__builtin_nds32_pe_ave ((a), (b))) ++#define __nds32__bclr(a, pos) \ ++ (__builtin_nds32_pe_bclr ((a), (pos))) ++#define __nds32__bset(a, pos) \ ++ (__builtin_nds32_pe_bset ((a), (pos))) ++#define __nds32__btgl(a, pos) \ ++ (__builtin_nds32_pe_btgl ((a), (pos))) ++#define __nds32__btst(a, pos) \ ++ (__builtin_nds32_pe_btst ((a), (pos))) ++ ++#define __nds32__clip(a, imm) \ ++ (__builtin_nds32_pe_clip ((a), (imm))) ++#define __nds32__clips(a, imm) \ ++ (__builtin_nds32_pe_clips ((a), (imm))) ++#define __nds32__clz(a) \ ++ (__builtin_nds32_pe_clz ((a))) ++#define __nds32__clo(a) \ ++ (__builtin_nds32_pe_clo ((a))) ++#define __nds32__bse(r, a, b) \ ++ (__builtin_nds32_pe2_bse ((r), (a), (b))) ++#define __nds32__bsp(r, a, b) \ ++ (__builtin_nds32_pe2_bsp ((r), (a), (b))) ++#define __nds32__pbsad(a, b) \ ++ (__builtin_nds32_pe2_pbsad ((a), (b))) ++#define __nds32__pbsada(acc, a, b) \ ++ (__builtin_nds32_pe2_pbsada ((acc), (a), (b))) ++ ++#define __nds32__ffb(a, b) \ ++ (__builtin_nds32_se_ffb ((a), (b))) ++#define __nds32__ffmism(a, b) \ ++ (__builtin_nds32_se_ffmism ((a), (b))) ++#define __nds32__flmism(a, b) \ ++ (__builtin_nds32_se_flmism ((a), (b))) ++#define __nds32__fcpynsd(a, b) \ ++ (__builtin_nds32_fcpynsd ((a), (b))) ++#define __nds32__fcpynss(a, b) \ ++ (__builtin_nds32_fcpynss ((a), (b))) ++#define __nds32__fcpysd(a, b) \ ++ (__builtin_nds32_fcpysd ((a), (b))) ++#define __nds32__fcpyss(a, b) \ ++ (__builtin_nds32_fcpyss ((a), (b))) ++#define __nds32__fmfcsr() \ ++ (__builtin_nds32_fmfcsr()) ++#define __nds32__fmtcsr(fpcsr) \ ++ (__builtin_nds32_fmtcsr ((fpcsr))) ++#define __nds32__fmfcfg() \ ++ (__builtin_nds32_fmfcfg()) ++ ++#define __nds32__tlbop_trd(a) \ ++ (__builtin_nds32_tlbop_trd ((a))) ++#define __nds32__tlbop_twr(a) \ ++ (__builtin_nds32_tlbop_twr ((a))) ++#define __nds32__tlbop_rwr(a) \ ++ (__builtin_nds32_tlbop_rwr ((a))) ++#define __nds32__tlbop_rwlk(a) \ ++ (__builtin_nds32_tlbop_rwlk ((a))) ++#define __nds32__tlbop_unlk(a) \ ++ (__builtin_nds32_tlbop_unlk ((a))) ++#define __nds32__tlbop_pb(a) \ ++ (__builtin_nds32_tlbop_pb ((a))) ++#define __nds32__tlbop_inv(a) \ ++ (__builtin_nds32_tlbop_inv ((a))) ++#define __nds32__tlbop_flua() \ ++(__builtin_nds32_tlbop_flua()) ++ ++#define __nds32__kaddw(a, b) \ ++ (__builtin_nds32_kaddw ((a), (b))) ++#define __nds32__kaddh(a, b) \ ++ (__builtin_nds32_kaddh ((a), (b))) ++#define __nds32__ksubw(a, b) \ ++ (__builtin_nds32_ksubw ((a), (b))) ++#define __nds32__ksubh(a, b) \ ++ (__builtin_nds32_ksubh ((a), (b))) ++#define __nds32__kdmbb(a, b) \ ++ (__builtin_nds32_kdmbb ((a), (b))) ++#define __nds32__v_kdmbb(a, b) \ ++ (__builtin_nds32_v_kdmbb ((a), (b))) ++#define __nds32__kdmbt(a, b) \ ++ (__builtin_nds32_kdmbt ((a), (b))) ++#define __nds32__v_kdmbt(a, b) \ ++ (__builtin_nds32_v_kdmbt ((a), (b))) ++#define __nds32__kdmtb(a, b) \ ++ (__builtin_nds32_kdmtb ((a), (b))) ++#define __nds32__v_kdmtb(a, b) \ ++ (__builtin_nds32_v_kdmtb ((a), (b))) ++#define __nds32__kdmtt(a, b) \ ++ (__builtin_nds32_kdmtt ((a), (b))) ++#define __nds32__v_kdmtt(a, b) \ ++ (__builtin_nds32_v_kdmtt ((a), (b))) ++#define __nds32__khmbb(a, b) \ ++ (__builtin_nds32_khmbb ((a), (b))) ++#define __nds32__v_khmbb(a, b) \ ++ (__builtin_nds32_v_khmbb ((a), (b))) ++#define __nds32__khmbt(a, b) \ ++ (__builtin_nds32_khmbt ((a), (b))) ++#define __nds32__v_khmbt(a, b) \ ++ (__builtin_nds32_v_khmbt ((a), (b))) ++#define __nds32__khmtb(a, b) \ ++ (__builtin_nds32_khmtb ((a), (b))) ++#define __nds32__v_khmtb(a, b) \ ++ (__builtin_nds32_v_khmtb ((a), (b))) ++#define __nds32__khmtt(a, b) \ ++ (__builtin_nds32_khmtt ((a), (b))) ++#define __nds32__v_khmtt(a, b) \ ++ (__builtin_nds32_v_khmtt ((a), (b))) ++#define __nds32__kslraw(a, b) \ ++ (__builtin_nds32_kslraw ((a), (b))) ++#define __nds32__kslraw_u(a, b) \ ++ (__builtin_nds32_kslraw_u ((a), (b))) ++ ++#define __nds32__rdov() \ ++ (__builtin_nds32_rdov()) ++#define __nds32__clrov() \ ++ (__builtin_nds32_clrov()) ++#define __nds32__gie_dis() \ ++ (__builtin_nds32_gie_dis()) ++#define __nds32__gie_en() \ ++ (__builtin_nds32_gie_en()) ++#define __nds32__enable_int(a) \ ++ (__builtin_nds32_enable_int ((a))) ++#define __nds32__disable_int(a) \ ++ (__builtin_nds32_disable_int ((a))) ++#define __nds32__set_pending_swint() \ ++ (__builtin_nds32_set_pending_swint()) ++#define __nds32__clr_pending_swint() \ ++ (__builtin_nds32_clr_pending_swint()) ++#define __nds32__clr_pending_hwint(a) \ ++ (__builtin_nds32_clr_pending_hwint(a)) ++#define __nds32__get_all_pending_int() \ ++ (__builtin_nds32_get_all_pending_int()) ++#define __nds32__get_pending_int(a) \ ++ (__builtin_nds32_get_pending_int ((a))) ++#define __nds32__set_int_priority(a, b) \ ++ (__builtin_nds32_set_int_priority ((a), (b))) ++#define __nds32__get_int_priority(a) \ ++ (__builtin_nds32_get_int_priority ((a))) ++#define __nds32__set_trig_type_level(a) \ ++ (__builtin_nds32_set_trig_level(a)) ++#define __nds32__set_trig_type_edge(a) \ ++ (__builtin_nds32_set_trig_edge(a)) ++#define __nds32__get_trig_type(a) \ ++ (__builtin_nds32_get_trig_type ((a))) ++ ++#define __nds32__get_unaligned_hw(a) \ ++ (__builtin_nds32_unaligned_load_hw ((a))) ++#define __nds32__get_unaligned_w(a) \ ++ (__builtin_nds32_unaligned_load_w ((a))) ++#define __nds32__get_unaligned_dw(a) \ ++ (__builtin_nds32_unaligned_load_dw ((a))) ++#define __nds32__put_unaligned_hw(a, data) \ ++ (__builtin_nds32_unaligned_store_hw ((a), (data))) ++#define __nds32__put_unaligned_w(a, data) \ ++ (__builtin_nds32_unaligned_store_w ((a), (data))) ++#define __nds32__put_unaligned_dw(a, data) \ ++ (__builtin_nds32_unaligned_store_dw ((a), (data))) ++ ++#define __nds32__signature_begin() \ ++ (__builtin_nds32_signature_begin ()) ++#define __nds32__signature_end() \ ++ (__builtin_nds32_signature_end ()) ++ ++#define __nds32__add16(a, b) \ ++ (__builtin_nds32_add16 ((a), (b))) ++#define __nds32__v_uadd16(a, b) \ ++ (__builtin_nds32_v_uadd16 ((a), (b))) ++#define __nds32__v_sadd16(a, b) \ ++ (__builtin_nds32_v_sadd16 ((a), (b))) ++#define __nds32__radd16(a, b) \ ++ (__builtin_nds32_radd16 ((a), (b))) ++#define __nds32__v_radd16(a, b) \ ++ (__builtin_nds32_v_radd16 ((a), (b))) ++#define __nds32__uradd16(a, b) \ ++ (__builtin_nds32_uradd16 ((a), (b))) ++#define __nds32__v_uradd16(a, b) \ ++ (__builtin_nds32_v_uradd16 ((a), (b))) ++#define __nds32__kadd16(a, b) \ ++ (__builtin_nds32_kadd16 ((a), (b))) ++#define __nds32__v_kadd16(a, b) \ ++ (__builtin_nds32_v_kadd16 ((a), (b))) ++#define __nds32__ukadd16(a, b) \ ++ (__builtin_nds32_ukadd16 ((a), (b))) ++#define __nds32__v_ukadd16(a, b) \ ++ (__builtin_nds32_v_ukadd16 ((a), (b))) ++#define __nds32__sub16(a, b) \ ++ (__builtin_nds32_sub16 ((a), (b))) ++#define __nds32__v_usub16(a, b) \ ++ (__builtin_nds32_v_usub16 ((a), (b))) ++#define __nds32__v_ssub16(a, b) \ ++ (__builtin_nds32_v_ssub16 ((a), (b))) ++#define __nds32__rsub16(a, b) \ ++ (__builtin_nds32_rsub16 ((a), (b))) ++#define __nds32__v_rsub16(a, b) \ ++ (__builtin_nds32_v_rsub16 ((a), (b))) ++#define __nds32__ursub16(a, b) \ ++ (__builtin_nds32_ursub16 ((a), (b))) ++#define __nds32__v_ursub16(a, b) \ ++ (__builtin_nds32_v_ursub16 ((a), (b))) ++#define __nds32__ksub16(a, b) \ ++ (__builtin_nds32_ksub16 ((a), (b))) ++#define __nds32__v_ksub16(a, b) \ ++ (__builtin_nds32_v_ksub16 ((a), (b))) ++#define __nds32__uksub16(a, b) \ ++ (__builtin_nds32_uksub16 ((a), (b))) ++#define __nds32__v_uksub16(a, b) \ ++ (__builtin_nds32_v_uksub16 ((a), (b))) ++#define __nds32__cras16(a, b) \ ++ (__builtin_nds32_cras16 ((a), (b))) ++#define __nds32__v_ucras16(a, b) \ ++ (__builtin_nds32_v_ucras16 ((a), (b))) ++#define __nds32__v_scras16(a, b) \ ++ (__builtin_nds32_v_scras16 ((a), (b))) ++#define __nds32__rcras16(a, b) \ ++ (__builtin_nds32_rcras16 ((a), (b))) ++#define __nds32__v_rcras16(a, b) \ ++ (__builtin_nds32_v_rcras16 ((a), (b))) ++#define __nds32__urcras16(a, b) \ ++ (__builtin_nds32_urcras16 ((a), (b))) ++#define __nds32__v_urcras16(a, b) \ ++ (__builtin_nds32_v_urcras16 ((a), (b))) ++#define __nds32__kcras16(a, b) \ ++ (__builtin_nds32_kcras16 ((a), (b))) ++#define __nds32__v_kcras16(a, b) \ ++ (__builtin_nds32_v_kcras16 ((a), (b))) ++#define __nds32__ukcras16(a, b) \ ++ (__builtin_nds32_ukcras16 ((a), (b))) ++#define __nds32__v_ukcras16(a, b) \ ++ (__builtin_nds32_v_ukcras16 ((a), (b))) ++#define __nds32__crsa16(a, b) \ ++ (__builtin_nds32_crsa16 ((a), (b))) ++#define __nds32__v_ucrsa16(a, b) \ ++ (__builtin_nds32_v_ucrsa16 ((a), (b))) ++#define __nds32__v_scrsa16(a, b) \ ++ (__builtin_nds32_v_scrsa16 ((a), (b))) ++#define __nds32__rcrsa16(a, b) \ ++ (__builtin_nds32_rcrsa16 ((a), (b))) ++#define __nds32__v_rcrsa16(a, b) \ ++ (__builtin_nds32_v_rcrsa16 ((a), (b))) ++#define __nds32__urcrsa16(a, b) \ ++ (__builtin_nds32_urcrsa16 ((a), (b))) ++#define __nds32__v_urcrsa16(a, b) \ ++ (__builtin_nds32_v_urcrsa16 ((a), (b))) ++#define __nds32__kcrsa16(a, b) \ ++ (__builtin_nds32_kcrsa16 ((a), (b))) ++#define __nds32__v_kcrsa16(a, b) \ ++ (__builtin_nds32_v_kcrsa16 ((a), (b))) ++#define __nds32__ukcrsa16(a, b) \ ++ (__builtin_nds32_ukcrsa16 ((a), (b))) ++#define __nds32__v_ukcrsa16(a, b) \ ++ (__builtin_nds32_v_ukcrsa16 ((a), (b))) ++ ++#define __nds32__add8(a, b) \ ++ (__builtin_nds32_add8 ((a), (b))) ++#define __nds32__v_uadd8(a, b) \ ++ (__builtin_nds32_v_uadd8 ((a), (b))) ++#define __nds32__v_sadd8(a, b) \ ++ (__builtin_nds32_v_sadd8 ((a), (b))) ++#define __nds32__radd8(a, b) \ ++ (__builtin_nds32_radd8 ((a), (b))) ++#define __nds32__v_radd8(a, b) \ ++ (__builtin_nds32_v_radd8 ((a), (b))) ++#define __nds32__uradd8(a, b) \ ++ (__builtin_nds32_uradd8 ((a), (b))) ++#define __nds32__v_uradd8(a, b) \ ++ (__builtin_nds32_v_uradd8 ((a), (b))) ++#define __nds32__kadd8(a, b) \ ++ (__builtin_nds32_kadd8 ((a), (b))) ++#define __nds32__v_kadd8(a, b) \ ++ (__builtin_nds32_v_kadd8 ((a), (b))) ++#define __nds32__ukadd8(a, b) \ ++ (__builtin_nds32_ukadd8 ((a), (b))) ++#define __nds32__v_ukadd8(a, b) \ ++ (__builtin_nds32_v_ukadd8 ((a), (b))) ++#define __nds32__sub8(a, b) \ ++ (__builtin_nds32_sub8 ((a), (b))) ++#define __nds32__v_usub8(a, b) \ ++ (__builtin_nds32_v_usub8 ((a), (b))) ++#define __nds32__v_ssub8(a, b) \ ++ (__builtin_nds32_v_ssub8 ((a), (b))) ++#define __nds32__rsub8(a, b) \ ++ (__builtin_nds32_rsub8 ((a), (b))) ++#define __nds32__v_rsub8(a, b) \ ++ (__builtin_nds32_v_rsub8 ((a), (b))) ++#define __nds32__ursub8(a, b) \ ++ (__builtin_nds32_ursub8 ((a), (b))) ++#define __nds32__v_ursub8(a, b) \ ++ (__builtin_nds32_v_ursub8 ((a), (b))) ++#define __nds32__ksub8(a, b) \ ++ (__builtin_nds32_ksub8 ((a), (b))) ++#define __nds32__v_ksub8(a, b) \ ++ (__builtin_nds32_v_ksub8 ((a), (b))) ++#define __nds32__uksub8(a, b) \ ++ (__builtin_nds32_uksub8 ((a), (b))) ++#define __nds32__v_uksub8(a, b) \ ++ (__builtin_nds32_v_uksub8 ((a), (b))) ++ ++#define __nds32__sra16(a, b) \ ++ (__builtin_nds32_sra16 ((a), (b))) ++#define __nds32__v_sra16(a, b) \ ++ (__builtin_nds32_v_sra16 ((a), (b))) ++#define __nds32__sra16_u(a, b) \ ++ (__builtin_nds32_sra16_u ((a), (b))) ++#define __nds32__v_sra16_u(a, b) \ ++ (__builtin_nds32_v_sra16_u ((a), (b))) ++#define __nds32__srl16(a, b) \ ++ (__builtin_nds32_srl16 ((a), (b))) ++#define __nds32__v_srl16(a, b) \ ++ (__builtin_nds32_v_srl16 ((a), (b))) ++#define __nds32__srl16_u(a, b) \ ++ (__builtin_nds32_srl16_u ((a), (b))) ++#define __nds32__v_srl16_u(a, b) \ ++ (__builtin_nds32_v_srl16_u ((a), (b))) ++#define __nds32__sll16(a, b) \ ++ (__builtin_nds32_sll16 ((a), (b))) ++#define __nds32__v_sll16(a, b) \ ++ (__builtin_nds32_v_sll16 ((a), (b))) ++#define __nds32__ksll16(a, b) \ ++ (__builtin_nds32_ksll16 ((a), (b))) ++#define __nds32__v_ksll16(a, b) \ ++ (__builtin_nds32_v_ksll16 ((a), (b))) ++#define __nds32__kslra16(a, b) \ ++ (__builtin_nds32_kslra16 ((a), (b))) ++#define __nds32__v_kslra16(a, b) \ ++ (__builtin_nds32_v_kslra16 ((a), (b))) ++#define __nds32__kslra16_u(a, b) \ ++ (__builtin_nds32_kslra16_u ((a), (b))) ++#define __nds32__v_kslra16_u(a, b) \ ++ (__builtin_nds32_v_kslra16_u ((a), (b))) ++ ++#define __nds32__cmpeq16(a, b) \ ++ (__builtin_nds32_cmpeq16 ((a), (b))) ++#define __nds32__v_scmpeq16(a, b) \ ++ (__builtin_nds32_v_scmpeq16 ((a), (b))) ++#define __nds32__v_ucmpeq16(a, b) \ ++ (__builtin_nds32_v_ucmpeq16 ((a), (b))) ++#define __nds32__scmplt16(a, b) \ ++ (__builtin_nds32_scmplt16 ((a), (b))) ++#define __nds32__v_scmplt16(a, b) \ ++ (__builtin_nds32_v_scmplt16 ((a), (b))) ++#define __nds32__scmple16(a, b) \ ++ (__builtin_nds32_scmple16 ((a), (b))) ++#define __nds32__v_scmple16(a, b) \ ++ (__builtin_nds32_v_scmple16 ((a), (b))) ++#define __nds32__ucmplt16(a, b) \ ++ (__builtin_nds32_ucmplt16 ((a), (b))) ++#define __nds32__v_ucmplt16(a, b) \ ++ (__builtin_nds32_v_ucmplt16 ((a), (b))) ++#define __nds32__ucmple16(a, b) \ ++ (__builtin_nds32_ucmple16 ((a), (b))) ++#define __nds32__v_ucmple16(a, b) \ ++ (__builtin_nds32_v_ucmple16 ((a), (b))) ++ ++#define __nds32__cmpeq8(a, b) \ ++ (__builtin_nds32_cmpeq8 ((a), (b))) ++#define __nds32__v_scmpeq8(a, b) \ ++ (__builtin_nds32_v_scmpeq8 ((a), (b))) ++#define __nds32__v_ucmpeq8(a, b) \ ++ (__builtin_nds32_v_ucmpeq8 ((a), (b))) ++#define __nds32__scmplt8(a, b) \ ++ (__builtin_nds32_scmplt8 ((a), (b))) ++#define __nds32__v_scmplt8(a, b) \ ++ (__builtin_nds32_v_scmplt8 ((a), (b))) ++#define __nds32__scmple8(a, b) \ ++ (__builtin_nds32_scmple8 ((a), (b))) ++#define __nds32__v_scmple8(a, b) \ ++ (__builtin_nds32_v_scmple8 ((a), (b))) ++#define __nds32__ucmplt8(a, b) \ ++ (__builtin_nds32_ucmplt8 ((a), (b))) ++#define __nds32__v_ucmplt8(a, b) \ ++ (__builtin_nds32_v_ucmplt8 ((a), (b))) ++#define __nds32__ucmple8(a, b) \ ++ (__builtin_nds32_ucmple8 ((a), (b))) ++#define __nds32__v_ucmple8(a, b) \ ++ (__builtin_nds32_v_ucmple8 ((a), (b))) ++ ++#define __nds32__smin16(a, b) \ ++ (__builtin_nds32_smin16 ((a), (b))) ++#define __nds32__v_smin16(a, b) \ ++ (__builtin_nds32_v_smin16 ((a), (b))) ++#define __nds32__umin16(a, b) \ ++ (__builtin_nds32_umin16 ((a), (b))) ++#define __nds32__v_umin16(a, b) \ ++ (__builtin_nds32_v_umin16 ((a), (b))) ++#define __nds32__smax16(a, b) \ ++ (__builtin_nds32_smax16 ((a), (b))) ++#define __nds32__v_smax16(a, b) \ ++ (__builtin_nds32_v_smax16 ((a), (b))) ++#define __nds32__umax16(a, b) \ ++ (__builtin_nds32_umax16 ((a), (b))) ++#define __nds32__v_umax16(a, b) \ ++ (__builtin_nds32_v_umax16 ((a), (b))) ++#define __nds32__sclip16(a, b) \ ++ (__builtin_nds32_sclip16 ((a), (b))) ++#define __nds32__v_sclip16(a, b) \ ++ (__builtin_nds32_v_sclip16 ((a), (b))) ++#define __nds32__uclip16(a, b) \ ++ (__builtin_nds32_uclip16 ((a), (b))) ++#define __nds32__v_uclip16(a, b) \ ++ (__builtin_nds32_v_uclip16 ((a), (b))) ++#define __nds32__khm16(a, b) \ ++ (__builtin_nds32_khm16 ((a), (b))) ++#define __nds32__v_khm16(a, b) \ ++ (__builtin_nds32_v_khm16 ((a), (b))) ++#define __nds32__khmx16(a, b) \ ++ (__builtin_nds32_khmx16 ((a), (b))) ++#define __nds32__v_khmx16(a, b) \ ++ (__builtin_nds32_v_khmx16 ((a), (b))) ++#define __nds32__kabs16(a) \ ++ (__builtin_nds32_kabs16 ((a))) ++#define __nds32__v_kabs16(a) \ ++ (__builtin_nds32_v_kabs16 ((a))) ++ ++#define __nds32__smin8(a, b) \ ++ (__builtin_nds32_smin8 ((a), (b))) ++#define __nds32__v_smin8(a, b) \ ++ (__builtin_nds32_v_smin8 ((a), (b))) ++#define __nds32__umin8(a, b) \ ++ (__builtin_nds32_umin8 ((a), (b))) ++#define __nds32__v_umin8(a, b) \ ++ (__builtin_nds32_v_umin8 ((a), (b))) ++#define __nds32__smax8(a, b) \ ++ (__builtin_nds32_smax8 ((a), (b))) ++#define __nds32__v_smax8(a, b) \ ++ (__builtin_nds32_v_smax8 ((a), (b))) ++#define __nds32__umax8(a, b) \ ++ (__builtin_nds32_umax8 ((a), (b))) ++#define __nds32__v_umax8(a, b) \ ++ (__builtin_nds32_v_umax8 ((a), (b))) ++#define __nds32__kabs8(a) \ ++ (__builtin_nds32_kabs8 ((a))) ++#define __nds32__v_kabs8(a) \ ++ (__builtin_nds32_v_kabs8 ((a))) ++ ++#define __nds32__sunpkd810(a) \ ++ (__builtin_nds32_sunpkd810 ((a))) ++#define __nds32__v_sunpkd810(a) \ ++ (__builtin_nds32_v_sunpkd810 ((a))) ++#define __nds32__sunpkd820(a) \ ++ (__builtin_nds32_sunpkd820 ((a))) ++#define __nds32__v_sunpkd820(a) \ ++ (__builtin_nds32_v_sunpkd820 ((a))) ++#define __nds32__sunpkd830(a) \ ++ (__builtin_nds32_sunpkd830 ((a))) ++#define __nds32__v_sunpkd830(a) \ ++ (__builtin_nds32_v_sunpkd830 ((a))) ++#define __nds32__sunpkd831(a) \ ++ (__builtin_nds32_sunpkd831 ((a))) ++#define __nds32__v_sunpkd831(a) \ ++ (__builtin_nds32_v_sunpkd831 ((a))) ++#define __nds32__zunpkd810(a) \ ++ (__builtin_nds32_zunpkd810 ((a))) ++#define __nds32__v_zunpkd810(a) \ ++ (__builtin_nds32_v_zunpkd810 ((a))) ++#define __nds32__zunpkd820(a) \ ++ (__builtin_nds32_zunpkd820 ((a))) ++#define __nds32__v_zunpkd820(a) \ ++ (__builtin_nds32_v_zunpkd820 ((a))) ++#define __nds32__zunpkd830(a) \ ++ (__builtin_nds32_zunpkd830 ((a))) ++#define __nds32__v_zunpkd830(a) \ ++ (__builtin_nds32_v_zunpkd830 ((a))) ++#define __nds32__zunpkd831(a) \ ++ (__builtin_nds32_zunpkd831 ((a))) ++#define __nds32__v_zunpkd831(a) \ ++ (__builtin_nds32_v_zunpkd831 ((a))) ++ ++#define __nds32__raddw(a, b) \ ++ (__builtin_nds32_raddw ((a), (b))) ++#define __nds32__uraddw(a, b) \ ++ (__builtin_nds32_uraddw ((a), (b))) ++#define __nds32__rsubw(a, b) \ ++ (__builtin_nds32_rsubw ((a), (b))) ++#define __nds32__ursubw(a, b) \ ++ (__builtin_nds32_ursubw ((a), (b))) ++ ++#define __nds32__sra_u(a, b) \ ++ (__builtin_nds32_sra_u ((a), (b))) ++#define __nds32__ksll(a, b) \ ++ (__builtin_nds32_ksll ((a), (b))) ++#define __nds32__pkbb16(a, b) \ ++ (__builtin_nds32_pkbb16 ((a), (b))) ++#define __nds32__v_pkbb16(a, b) \ ++ (__builtin_nds32_v_pkbb16 ((a), (b))) ++#define __nds32__pkbt16(a, b) \ ++ (__builtin_nds32_pkbt16 ((a), (b))) ++#define __nds32__v_pkbt16(a, b) \ ++ (__builtin_nds32_v_pkbt16 ((a), (b))) ++#define __nds32__pktb16(a, b) \ ++ (__builtin_nds32_pktb16 ((a), (b))) ++#define __nds32__v_pktb16(a, b) \ ++ (__builtin_nds32_v_pktb16 ((a), (b))) ++#define __nds32__pktt16(a, b) \ ++ (__builtin_nds32_pktt16 ((a), (b))) ++#define __nds32__v_pktt16(a, b) \ ++ (__builtin_nds32_v_pktt16 ((a), (b))) ++ ++#define __nds32__smmul(a, b) \ ++ (__builtin_nds32_smmul ((a), (b))) ++#define __nds32__smmul_u(a, b) \ ++ (__builtin_nds32_smmul_u ((a), (b))) ++#define __nds32__kmmac(r, a, b) \ ++ (__builtin_nds32_kmmac ((r), (a), (b))) ++#define __nds32__kmmac_u(r, a, b) \ ++ (__builtin_nds32_kmmac_u ((r), (a), (b))) ++#define __nds32__kmmsb(r, a, b) \ ++ (__builtin_nds32_kmmsb ((r), (a), (b))) ++#define __nds32__kmmsb_u(r, a, b) \ ++ (__builtin_nds32_kmmsb_u ((r), (a), (b))) ++#define __nds32__kwmmul(a, b) \ ++ (__builtin_nds32_kwmmul ((a), (b))) ++#define __nds32__kwmmul_u(a, b) \ ++ (__builtin_nds32_kwmmul_u ((a), (b))) ++ ++#define __nds32__smmwb(a, b) \ ++ (__builtin_nds32_smmwb ((a), (b))) ++#define __nds32__v_smmwb(a, b) \ ++ (__builtin_nds32_v_smmwb ((a), (b))) ++#define __nds32__smmwb_u(a, b) \ ++ (__builtin_nds32_smmwb_u ((a), (b))) ++#define __nds32__v_smmwb_u(a, b) \ ++ (__builtin_nds32_v_smmwb_u ((a), (b))) ++#define __nds32__smmwt(a, b) \ ++ (__builtin_nds32_smmwt ((a), (b))) ++#define __nds32__v_smmwt(a, b) \ ++ (__builtin_nds32_v_smmwt ((a), (b))) ++#define __nds32__smmwt_u(a, b) \ ++ (__builtin_nds32_smmwt_u ((a), (b))) ++#define __nds32__v_smmwt_u(a, b) \ ++ (__builtin_nds32_v_smmwt_u ((a), (b))) ++#define __nds32__kmmawb(r, a, b) \ ++ (__builtin_nds32_kmmawb ((r), (a), (b))) ++#define __nds32__v_kmmawb(r, a, b) \ ++ (__builtin_nds32_v_kmmawb ((r), (a), (b))) ++#define __nds32__kmmawb_u(r, a, b) \ ++ (__builtin_nds32_kmmawb_u ((r), (a), (b))) ++#define __nds32__v_kmmawb_u(r, a, b) \ ++ (__builtin_nds32_v_kmmawb_u ((r), (a), (b))) ++#define __nds32__kmmawt(r, a, b) \ ++ (__builtin_nds32_kmmawt ((r), (a), (b))) ++#define __nds32__v_kmmawt(r, a, b) \ ++ (__builtin_nds32_v_kmmawt ((r), (a), (b))) ++#define __nds32__kmmawt_u(r, a, b) \ ++ (__builtin_nds32_kmmawt_u ((r), (a), (b))) ++#define __nds32__v_kmmawt_u(r, a, b) \ ++ (__builtin_nds32_v_kmmawt_u ((r), (a), (b))) ++ ++#define __nds32__smbb(a, b) \ ++ (__builtin_nds32_smbb ((a), (b))) ++#define __nds32__v_smbb(a, b) \ ++ (__builtin_nds32_v_smbb ((a), (b))) ++#define __nds32__smbt(a, b) \ ++ (__builtin_nds32_smbt ((a), (b))) ++#define __nds32__v_smbt(a, b) \ ++ (__builtin_nds32_v_smbt ((a), (b))) ++#define __nds32__smtt(a, b) \ ++ (__builtin_nds32_smtt ((a), (b))) ++#define __nds32__v_smtt(a, b) \ ++ (__builtin_nds32_v_smtt ((a), (b))) ++#define __nds32__kmda(a, b) \ ++ (__builtin_nds32_kmda ((a), (b))) ++#define __nds32__v_kmda(a, b) \ ++ (__builtin_nds32_v_kmda ((a), (b))) ++#define __nds32__kmxda(a, b) \ ++ (__builtin_nds32_kmxda ((a), (b))) ++#define __nds32__v_kmxda(a, b) \ ++ (__builtin_nds32_v_kmxda ((a), (b))) ++#define __nds32__smds(a, b) \ ++ (__builtin_nds32_smds ((a), (b))) ++#define __nds32__v_smds(a, b) \ ++ (__builtin_nds32_v_smds ((a), (b))) ++#define __nds32__smdrs(a, b) \ ++ (__builtin_nds32_smdrs ((a), (b))) ++#define __nds32__v_smdrs(a, b) \ ++ (__builtin_nds32_v_smdrs ((a), (b))) ++#define __nds32__smxds(a, b) \ ++ (__builtin_nds32_smxds ((a), (b))) ++#define __nds32__v_smxds(a, b) \ ++ (__builtin_nds32_v_smxds ((a), (b))) ++#define __nds32__kmabb(r, a, b) \ ++ (__builtin_nds32_kmabb ((r), (a), (b))) ++#define __nds32__v_kmabb(r, a, b) \ ++ (__builtin_nds32_v_kmabb ((r), (a), (b))) ++#define __nds32__kmabt(r, a, b) \ ++ (__builtin_nds32_kmabt ((r), (a), (b))) ++#define __nds32__v_kmabt(r, a, b) \ ++ (__builtin_nds32_v_kmabt ((r), (a), (b))) ++#define __nds32__kmatt(r, a, b) \ ++ (__builtin_nds32_kmatt ((r), (a), (b))) ++#define __nds32__v_kmatt(r, a, b) \ ++ (__builtin_nds32_v_kmatt ((r), (a), (b))) ++#define __nds32__kmada(r, a, b) \ ++ (__builtin_nds32_kmada ((r), (a), (b))) ++#define __nds32__v_kmada(r, a, b) \ ++ (__builtin_nds32_v_kmada ((r), (a), (b))) ++#define __nds32__kmaxda(r, a, b) \ ++ (__builtin_nds32_kmaxda ((r), (a), (b))) ++#define __nds32__v_kmaxda(r, a, b) \ ++ (__builtin_nds32_v_kmaxda ((r), (a), (b))) ++#define __nds32__kmads(r, a, b) \ ++ (__builtin_nds32_kmads ((r), (a), (b))) ++#define __nds32__v_kmads(r, a, b) \ ++ (__builtin_nds32_v_kmads ((r), (a), (b))) ++#define __nds32__kmadrs(r, a, b) \ ++ (__builtin_nds32_kmadrs ((r), (a), (b))) ++#define __nds32__v_kmadrs(r, a, b) \ ++ (__builtin_nds32_v_kmadrs ((r), (a), (b))) ++#define __nds32__kmaxds(r, a, b) \ ++ (__builtin_nds32_kmaxds ((r), (a), (b))) ++#define __nds32__v_kmaxds(r, a, b) \ ++ (__builtin_nds32_v_kmaxds ((r), (a), (b))) ++#define __nds32__kmsda(r, a, b) \ ++ (__builtin_nds32_kmsda ((r), (a), (b))) ++#define __nds32__v_kmsda(r, a, b) \ ++ (__builtin_nds32_v_kmsda ((r), (a), (b))) ++#define __nds32__kmsxda(r, a, b) \ ++ (__builtin_nds32_kmsxda ((r), (a), (b))) ++#define __nds32__v_kmsxda(r, a, b) \ ++ (__builtin_nds32_v_kmsxda ((r), (a), (b))) ++ ++#define __nds32__smal(a, b) \ ++ (__builtin_nds32_smal ((a), (b))) ++#define __nds32__v_smal(a, b) \ ++ (__builtin_nds32_v_smal ((a), (b))) ++ ++#define __nds32__bitrev(a, b) \ ++ (__builtin_nds32_bitrev ((a), (b))) ++#define __nds32__wext(a, b) \ ++ (__builtin_nds32_wext ((a), (b))) ++#define __nds32__bpick(r, a, b) \ ++ (__builtin_nds32_bpick ((r), (a), (b))) ++#define __nds32__insb(r, a, b) \ ++ (__builtin_nds32_insb ((r), (a), (b))) ++ ++#define __nds32__sadd64(a, b) \ ++ (__builtin_nds32_sadd64 ((a), (b))) ++#define __nds32__uadd64(a, b) \ ++ (__builtin_nds32_uadd64 ((a), (b))) ++#define __nds32__radd64(a, b) \ ++ (__builtin_nds32_radd64 ((a), (b))) ++#define __nds32__uradd64(a, b) \ ++ (__builtin_nds32_uradd64 ((a), (b))) ++#define __nds32__kadd64(a, b) \ ++ (__builtin_nds32_kadd64 ((a), (b))) ++#define __nds32__ukadd64(a, b) \ ++ (__builtin_nds32_ukadd64 ((a), (b))) ++#define __nds32__ssub64(a, b) \ ++ (__builtin_nds32_ssub64 ((a), (b))) ++#define __nds32__usub64(a, b) \ ++ (__builtin_nds32_usub64 ((a), (b))) ++#define __nds32__rsub64(a, b) \ ++ (__builtin_nds32_rsub64 ((a), (b))) ++#define __nds32__ursub64(a, b) \ ++ (__builtin_nds32_ursub64 ((a), (b))) ++#define __nds32__ksub64(a, b) \ ++ (__builtin_nds32_ksub64 ((a), (b))) ++#define __nds32__uksub64(a, b) \ ++ (__builtin_nds32_uksub64 ((a), (b))) ++ ++#define __nds32__smar64(r, a, b) \ ++ (__builtin_nds32_smar64 ((r), (a), (b))) ++#define __nds32__smsr64(r, a, b) \ ++ (__builtin_nds32_smsr64 ((r), (a), (b))) ++#define __nds32__umar64(r, a, b) \ ++ (__builtin_nds32_umar64 ((r), (a), (b))) ++#define __nds32__umsr64(r, a, b) \ ++ (__builtin_nds32_umsr64 ((r), (a), (b))) ++#define __nds32__kmar64(r, a, b) \ ++ (__builtin_nds32_kmar64 ((r), (a), (b))) ++#define __nds32__kmsr64(r, a, b) \ ++ (__builtin_nds32_kmsr64 ((r), (a), (b))) ++#define __nds32__ukmar64(r, a, b) \ ++ (__builtin_nds32_ukmar64 ((r), (a), (b))) ++#define __nds32__ukmsr64(r, a, b) \ ++ (__builtin_nds32_ukmsr64 ((r), (a), (b))) ++ ++#define __nds32__smalbb(r, a, b) \ ++ (__builtin_nds32_smalbb ((r), (a), (b))) ++#define __nds32__v_smalbb(r, a, b) \ ++ (__builtin_nds32_v_smalbb ((r), (a), (b))) ++#define __nds32__smalbt(r, a, b) \ ++ (__builtin_nds32_smalbt ((r), (a), (b))) ++#define __nds32__v_smalbt(r, a, b) \ ++ (__builtin_nds32_v_smalbt ((r), (a), (b))) ++#define __nds32__smaltt(r, a, b) \ ++ (__builtin_nds32_smaltt ((r), (a), (b))) ++#define __nds32__v_smaltt(r, a, b) \ ++ (__builtin_nds32_v_smaltt ((r), (a), (b))) ++#define __nds32__smalda(r, a, b) \ ++ (__builtin_nds32_smalda ((r), (a), (b))) ++#define __nds32__v_smalda(r, a, b) \ ++ (__builtin_nds32_v_smalda ((r), (a), (b))) ++#define __nds32__smalxda(r, a, b) \ ++ (__builtin_nds32_smalxda ((r), (a), (b))) ++#define __nds32__v_smalxda(r, a, b) \ ++ (__builtin_nds32_v_smalxda ((r), (a), (b))) ++#define __nds32__smalds(r, a, b) \ ++ (__builtin_nds32_smalds ((r), (a), (b))) ++#define __nds32__v_smalds(r, a, b) \ ++ (__builtin_nds32_v_smalds ((r), (a), (b))) ++#define __nds32__smaldrs(r, a, b) \ ++ (__builtin_nds32_smaldrs ((r), (a), (b))) ++#define __nds32__v_smaldrs(r, a, b) \ ++ (__builtin_nds32_v_smaldrs ((r), (a), (b))) ++#define __nds32__smalxds(r, a, b) \ ++ (__builtin_nds32_smalxds ((r), (a), (b))) ++#define __nds32__v_smalxds(r, a, b) \ ++ (__builtin_nds32_v_smalxds ((r), (a), (b))) ++#define __nds32__smslda(r, a, b) \ ++ (__builtin_nds32_smslda ((r), (a), (b))) ++#define __nds32__v_smslda(r, a, b) \ ++ (__builtin_nds32_v_smslda ((r), (a), (b))) ++#define __nds32__smslxda(r, a, b) \ ++ (__builtin_nds32_smslxda ((r), (a), (b))) ++#define __nds32__v_smslxda(r, a, b) \ ++ (__builtin_nds32_v_smslxda ((r), (a), (b))) ++ ++#define __nds32__smul16(a, b) \ ++ (__builtin_nds32_smul16 ((a), (b))) ++#define __nds32__v_smul16(a, b) \ ++ (__builtin_nds32_v_smul16 ((a), (b))) ++#define __nds32__smulx16(a, b) \ ++ (__builtin_nds32_smulx16 ((a), (b))) ++#define __nds32__v_smulx16(a, b) \ ++ (__builtin_nds32_v_smulx16 ((a), (b))) ++#define __nds32__umul16(a, b) \ ++ (__builtin_nds32_umul16 ((a), (b))) ++#define __nds32__v_umul16(a, b) \ ++ (__builtin_nds32_v_umul16 ((a), (b))) ++#define __nds32__umulx16(a, b) \ ++ (__builtin_nds32_umulx16 ((a), (b))) ++#define __nds32__v_umulx16(a, b) \ ++ (__builtin_nds32_v_umulx16 ((a), (b))) ++ ++#define __nds32__uclip32(a, imm) \ ++ (__builtin_nds32_uclip32 ((a), (imm))) ++#define __nds32__sclip32(a, imm) \ ++ (__builtin_nds32_sclip32 ((a), (imm))) ++#define __nds32__kabs(a) \ ++ (__builtin_nds32_kabs ((a))) ++ ++#define __nds32__no_ext_zol() \ ++ (__builtin_nds32_no_ext_zol()) ++ ++#define __nds32__unaligned_feature() \ ++ (__builtin_nds32_unaligned_feature()) ++#define __nds32__enable_unaligned() \ ++ (__builtin_nds32_enable_unaligned()) ++#define __nds32__disable_unaligned() \ ++ (__builtin_nds32_disable_unaligned()) ++ ++#define __nds32__get_unaligned_u16x2(a) \ ++ (__builtin_nds32_get_unaligned_u16x2 ((a))) ++#define __nds32__get_unaligned_s16x2(a) \ ++ (__builtin_nds32_get_unaligned_s16x2 ((a))) ++#define __nds32__get_unaligned_u8x4(a) \ ++ (__builtin_nds32_get_unaligned_u8x4 ((a))) ++#define __nds32__get_unaligned_s8x4(a) \ ++ (__builtin_nds32_get_unaligned_s8x4 ((a))) ++ ++#define __nds32__put_unaligned_u16x2(a, data) \ ++ (__builtin_nds32_put_unaligned_u16x2 ((a), (data))) ++#define __nds32__put_unaligned_s16x2(a, data) \ ++ (__builtin_nds32_put_unaligned_s16x2 ((a), (data))) ++#define __nds32__put_unaligned_u8x4(a, data) \ ++ (__builtin_nds32_put_unaligned_u8x4 ((a), (data))) ++#define __nds32__put_unaligned_s8x4(a, data) \ ++ (__builtin_nds32_put_unaligned_s8x4 ((a), (data))) ++ ++#define NDS32ATTR_SIGNATURE __attribute__((signature)) ++ + #endif /* nds32_intrinsic.h */ +diff --git a/gcc/config/nds32/nds32_isr.h b/gcc/config/nds32/nds32_isr.h +new file mode 100644 +index 0000000..6fabd3e +--- /dev/null ++++ b/gcc/config/nds32/nds32_isr.h +@@ -0,0 +1,526 @@ ++/* Intrinsic definitions of Andes NDS32 cpu for GNU compiler ++ Copyright (C) 2012-2016 Free Software Foundation, Inc. ++ Contributed by Andes Technology Corporation. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published ++ by the Free Software Foundation; either version 3, or (at your ++ option) any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ <http://www.gnu.org/licenses/>. */ ++ ++#ifndef _NDS32_ISR_H ++#define _NDS32_ISR_H ++ ++/* Attribute of a interrupt or exception handler: ++ ++ NDS32_READY_NESTED: This handler is interruptible if user re-enable GIE bit. ++ NDS32_NESTED : This handler is interruptible. This is not suitable ++ exception handler. ++ NDS32_NOT_NESTED : This handler is NOT interruptible. Users have to do ++ some work if nested is wanted ++ NDS32_CRITICAL : This handler is critical ISR, which means it is small ++ and efficient. */ ++#define NDS32_READY_NESTED 0 ++#define NDS32_NESTED 1 ++#define NDS32_NOT_NESTED 2 ++#define NDS32_CRITICAL 3 ++ ++/* Attribute of a interrupt or exception handler: ++ ++ NDS32_SAVE_ALL_REGS : Save all registers in a table. ++ NDS32_SAVE_PARTIAL_REGS: Save partial registers. */ ++#define NDS32_SAVE_CALLER_REGS 0 ++#define NDS32_SAVE_ALL_REGS 1 ++ ++/* There are two version of Register table for interrupt and exception handler, ++ one for 16-register CPU the other for 32-register CPU. These structures are ++ used for context switching or system call handling. The address of this ++ data can be get from the input argument of the handler functions. ++ ++ For system call handling, r0 to r5 are used to pass arguments. If more ++ arguments are used they are put into the stack and its starting address is ++ in sp. Return value of system call can be put into r0 and r1 upon exit from ++ system call handler. System call ID is in a system register and it can be ++ fetched via intrinsic function. For more information please read ABI and ++ other related documents. ++ ++ For context switching, at least 2 values need to saved in kernel. One is ++ IPC and the other is the stack address of current task. Use intrinsic ++ function to get IPC and the input argument of the handler functions + 8 to ++ get stack address of current task. To do context switching, you replace ++ new_sp with the stack address of new task and replace IPC system register ++ with IPC of new task, then, just return from handler. The context switching ++ will happen. */ ++ ++/* Register table for exception handler; 32-register version. */ ++typedef struct ++{ ++ int r0; ++ int r1; ++ int r2; ++ int r3; ++ int r4; ++ int r5; ++ int r6; ++ int r7; ++ int r8; ++ int r9; ++ int r10; ++ int r11; ++ int r12; ++ int r13; ++ int r14; ++ int r15; ++ int r16; ++ int r17; ++ int r18; ++ int r19; ++ int r20; ++ int r21; ++ int r22; ++ int r23; ++ int r24; ++ int r25; ++ int r26; ++ int r27; ++ int fp; ++ int gp; ++ int lp; ++ int sp; ++} NDS32_GPR32; ++ ++/* Register table for exception handler; 16-register version. */ ++typedef struct ++{ ++ int r0; ++ int r1; ++ int r2; ++ int r3; ++ int r4; ++ int r5; ++ int r6; ++ int r7; ++ int r8; ++ int r9; ++ int r10; ++ int r15; ++ int fp; ++ int gp; ++ int lp; ++ int sp; ++} NDS32_GPR16; ++ ++ ++/* Use NDS32_REG32_TAB or NDS32_REG16_TAB in your program to ++ access register table. */ ++typedef struct ++{ ++ union ++ { ++ int reg_a[32] ; ++ NDS32_GPR32 reg_s ; ++ } u ; ++} NDS32_REG32_TAB; ++ ++typedef struct ++{ ++ union ++ { ++ int reg_a[16] ; ++ NDS32_GPR16 reg_s ; ++ } u ; ++} NDS32_REG16_TAB; ++ ++typedef struct ++{ ++ int d0lo; ++ int d0hi; ++ int d1lo; ++ int d1hi; ++} NDS32_DX_TAB; ++ ++typedef struct ++{ ++#ifdef __NDS32_EB__ ++ float fsr0; ++ float fsr1; ++ float fsr2; ++ float fsr3; ++ float fsr4; ++ float fsr5; ++ float fsr6; ++ float fsr7; ++#else ++ float fsr1; ++ float fsr0; ++ float fsr3; ++ float fsr2; ++ float fsr5; ++ float fsr4; ++ float fsr7; ++ float fsr6; ++#endif ++} NDS32_FSR8; ++ ++typedef struct ++{ ++ double dsr0; ++ double dsr1; ++ double dsr2; ++ double dsr3; ++} NDS32_DSR4; ++ ++typedef struct ++{ ++#ifdef __NDS32_EB__ ++ float fsr0; ++ float fsr1; ++ float fsr2; ++ float fsr3; ++ float fsr4; ++ float fsr5; ++ float fsr6; ++ float fsr7; ++ float fsr8; ++ float fsr9; ++ float fsr10; ++ float fsr11; ++ float fsr12; ++ float fsr13; ++ float fsr14; ++ float fsr15; ++#else ++ float fsr1; ++ float fsr0; ++ float fsr3; ++ float fsr2; ++ float fsr5; ++ float fsr4; ++ float fsr7; ++ float fsr6; ++ float fsr9; ++ float fsr8; ++ float fsr11; ++ float fsr10; ++ float fsr13; ++ float fsr12; ++ float fsr15; ++ float fsr14; ++#endif ++} NDS32_FSR16; ++ ++typedef struct ++{ ++ double dsr0; ++ double dsr1; ++ double dsr2; ++ double dsr3; ++ double dsr4; ++ double dsr5; ++ double dsr6; ++ double dsr7; ++} NDS32_DSR8; ++ ++typedef struct ++{ ++#ifdef __NDS32_EB__ ++ float fsr0; ++ float fsr1; ++ float fsr2; ++ float fsr3; ++ float fsr4; ++ float fsr5; ++ float fsr6; ++ float fsr7; ++ float fsr8; ++ float fsr9; ++ float fsr10; ++ float fsr11; ++ float fsr12; ++ float fsr13; ++ float fsr14; ++ float fsr15; ++ float fsr16; ++ float fsr17; ++ float fsr18; ++ float fsr19; ++ float fsr20; ++ float fsr21; ++ float fsr22; ++ float fsr23; ++ float fsr24; ++ float fsr25; ++ float fsr26; ++ float fsr27; ++ float fsr28; ++ float fsr29; ++ float fsr30; ++ float fsr31; ++#else ++ float fsr1; ++ float fsr0; ++ float fsr3; ++ float fsr2; ++ float fsr5; ++ float fsr4; ++ float fsr7; ++ float fsr6; ++ float fsr9; ++ float fsr8; ++ float fsr11; ++ float fsr10; ++ float fsr13; ++ float fsr12; ++ float fsr15; ++ float fsr14; ++ float fsr17; ++ float fsr16; ++ float fsr19; ++ float fsr18; ++ float fsr21; ++ float fsr20; ++ float fsr23; ++ float fsr22; ++ float fsr25; ++ float fsr24; ++ float fsr27; ++ float fsr26; ++ float fsr29; ++ float fsr28; ++ float fsr31; ++ float fsr30; ++#endif ++} NDS32_FSR32; ++ ++typedef struct ++{ ++ double dsr0; ++ double dsr1; ++ double dsr2; ++ double dsr3; ++ double dsr4; ++ double dsr5; ++ double dsr6; ++ double dsr7; ++ double dsr8; ++ double dsr9; ++ double dsr10; ++ double dsr11; ++ double dsr12; ++ double dsr13; ++ double dsr14; ++ double dsr15; ++} NDS32_DSR16; ++ ++typedef struct ++{ ++ double dsr0; ++ double dsr1; ++ double dsr2; ++ double dsr3; ++ double dsr4; ++ double dsr5; ++ double dsr6; ++ double dsr7; ++ double dsr8; ++ double dsr9; ++ double dsr10; ++ double dsr11; ++ double dsr12; ++ double dsr13; ++ double dsr14; ++ double dsr15; ++ double dsr16; ++ double dsr17; ++ double dsr18; ++ double dsr19; ++ double dsr20; ++ double dsr21; ++ double dsr22; ++ double dsr23; ++ double dsr24; ++ double dsr25; ++ double dsr26; ++ double dsr27; ++ double dsr28; ++ double dsr29; ++ double dsr30; ++ double dsr31; ++} NDS32_DSR32; ++ ++typedef struct ++{ ++ union ++ { ++ NDS32_FSR8 fsr_s ; ++ NDS32_DSR4 dsr_s ; ++ } u ; ++} NDS32_FPU8_TAB; ++ ++typedef struct ++{ ++ union ++ { ++ NDS32_FSR16 fsr_s ; ++ NDS32_DSR8 dsr_s ; ++ } u ; ++} NDS32_FPU16_TAB; ++ ++typedef struct ++{ ++ union ++ { ++ NDS32_FSR32 fsr_s ; ++ NDS32_DSR16 dsr_s ; ++ } u ; ++} NDS32_FPU32_TAB; ++ ++typedef struct ++{ ++ union ++ { ++ NDS32_FSR32 fsr_s ; ++ NDS32_DSR32 dsr_s ; ++ } u ; ++} NDS32_FPU64_TAB; ++ ++typedef struct ++{ ++ int ipc; ++ int ipsw; ++#if defined(NDS32_EXT_FPU_CONFIG_0) ++ NDS32_FPU8_TAB fpr; ++#elif defined(NDS32_EXT_FPU_CONFIG_1) ++ NDS32_FPU16_TAB fpr; ++#elif defined(NDS32_EXT_FPU_CONFIG_2) ++ NDS32_FPU32_TAB fpr; ++#elif defined(NDS32_EXT_FPU_CONFIG_3) ++ NDS32_FPU64_TAB fpr; ++#endif ++#if __NDS32_DX_REGS__ ++ NDS32_DX_TAB dxr; ++#endif ++#if __NDS32_EXT_IFC__ ++ int ifc_lp; ++ int filler; ++#endif ++#if __NDS32_REDUCED_REGS__ || __NDS32_REDUCE_REGS ++ NDS32_REG16_TAB gpr; ++#else ++ NDS32_REG32_TAB gpr; ++#endif ++} NDS32_CONTEXT; ++ ++/* Predefined Vector Definition. ++ ++ For IVIC Mode: 9 to 14 are for hardware interrupt ++ and 15 is for software interrupt. ++ For EVIC Mode: 9 to 72 are for hardware interrupt ++ and software interrupt can be routed to any one of them. ++ ++ You may want to define your hardware interrupts in the following way ++ for easy maintainance. ++ ++ IVIC mode: ++ #define MY_HW_IVIC_TIMER NDS32_VECTOR_INTERRUPT_HW0 + 1 ++ #define MY_HW_IVIC_USB NDS32_VECTOR_INTERRUPT_HW0 + 3 ++ EVIC mode: ++ #define MY_HW_EVIC_DMA NDS32_VECTOR_INTERRUPT_HW0 + 2 ++ #define MY_HW_EVIC_SWI NDS32_VECTOR_INTERRUPT_HW0 + 10 */ ++#define NDS32_VECTOR_RESET 0 ++#define NDS32_VECTOR_TLB_FILL 1 ++#define NDS32_VECTOR_PTE_NOT_PRESENT 2 ++#define NDS32_VECTOR_TLB_MISC 3 ++#define NDS32_VECTOR_TLB_VLPT_MISS 4 ++#define NDS32_VECTOR_MACHINE_ERROR 5 ++#define NDS32_VECTOR_DEBUG_RELATED 6 ++#define NDS32_VECTOR_GENERAL_EXCEPTION 7 ++#define NDS32_VECTOR_SYSCALL 8 ++#define NDS32_VECTOR_INTERRUPT_HW0 9 ++#define NDS32_VECTOR_INTERRUPT_HW1 10 ++#define NDS32_VECTOR_INTERRUPT_HW2 11 ++#define NDS32_VECTOR_INTERRUPT_HW3 12 ++#define NDS32_VECTOR_INTERRUPT_HW4 13 ++#define NDS32_VECTOR_INTERRUPT_HW5 14 ++#define NDS32_VECTOR_INTERRUPT_HW6 15 ++#define NDS32_VECTOR_SWI 15 /* THIS IS FOR IVIC MODE ONLY */ ++#define NDS32_VECTOR_INTERRUPT_HW7 16 ++#define NDS32_VECTOR_INTERRUPT_HW8 17 ++#define NDS32_VECTOR_INTERRUPT_HW9 18 ++#define NDS32_VECTOR_INTERRUPT_HW10 19 ++#define NDS32_VECTOR_INTERRUPT_HW11 20 ++#define NDS32_VECTOR_INTERRUPT_HW12 21 ++#define NDS32_VECTOR_INTERRUPT_HW13 22 ++#define NDS32_VECTOR_INTERRUPT_HW14 23 ++#define NDS32_VECTOR_INTERRUPT_HW15 24 ++#define NDS32_VECTOR_INTERRUPT_HW16 25 ++#define NDS32_VECTOR_INTERRUPT_HW17 26 ++#define NDS32_VECTOR_INTERRUPT_HW18 27 ++#define NDS32_VECTOR_INTERRUPT_HW19 28 ++#define NDS32_VECTOR_INTERRUPT_HW20 29 ++#define NDS32_VECTOR_INTERRUPT_HW21 30 ++#define NDS32_VECTOR_INTERRUPT_HW22 31 ++#define NDS32_VECTOR_INTERRUPT_HW23 32 ++#define NDS32_VECTOR_INTERRUPT_HW24 33 ++#define NDS32_VECTOR_INTERRUPT_HW25 34 ++#define NDS32_VECTOR_INTERRUPT_HW26 35 ++#define NDS32_VECTOR_INTERRUPT_HW27 36 ++#define NDS32_VECTOR_INTERRUPT_HW28 37 ++#define NDS32_VECTOR_INTERRUPT_HW29 38 ++#define NDS32_VECTOR_INTERRUPT_HW30 39 ++#define NDS32_VECTOR_INTERRUPT_HW31 40 ++#define NDS32_VECTOR_INTERRUPT_HW32 41 ++#define NDS32_VECTOR_INTERRUPT_HW33 42 ++#define NDS32_VECTOR_INTERRUPT_HW34 43 ++#define NDS32_VECTOR_INTERRUPT_HW35 44 ++#define NDS32_VECTOR_INTERRUPT_HW36 45 ++#define NDS32_VECTOR_INTERRUPT_HW37 46 ++#define NDS32_VECTOR_INTERRUPT_HW38 47 ++#define NDS32_VECTOR_INTERRUPT_HW39 48 ++#define NDS32_VECTOR_INTERRUPT_HW40 49 ++#define NDS32_VECTOR_INTERRUPT_HW41 50 ++#define NDS32_VECTOR_INTERRUPT_HW42 51 ++#define NDS32_VECTOR_INTERRUPT_HW43 52 ++#define NDS32_VECTOR_INTERRUPT_HW44 53 ++#define NDS32_VECTOR_INTERRUPT_HW45 54 ++#define NDS32_VECTOR_INTERRUPT_HW46 55 ++#define NDS32_VECTOR_INTERRUPT_HW47 56 ++#define NDS32_VECTOR_INTERRUPT_HW48 57 ++#define NDS32_VECTOR_INTERRUPT_HW49 58 ++#define NDS32_VECTOR_INTERRUPT_HW50 59 ++#define NDS32_VECTOR_INTERRUPT_HW51 60 ++#define NDS32_VECTOR_INTERRUPT_HW52 61 ++#define NDS32_VECTOR_INTERRUPT_HW53 62 ++#define NDS32_VECTOR_INTERRUPT_HW54 63 ++#define NDS32_VECTOR_INTERRUPT_HW55 64 ++#define NDS32_VECTOR_INTERRUPT_HW56 65 ++#define NDS32_VECTOR_INTERRUPT_HW57 66 ++#define NDS32_VECTOR_INTERRUPT_HW58 67 ++#define NDS32_VECTOR_INTERRUPT_HW59 68 ++#define NDS32_VECTOR_INTERRUPT_HW60 69 ++#define NDS32_VECTOR_INTERRUPT_HW61 70 ++#define NDS32_VECTOR_INTERRUPT_HW62 71 ++#define NDS32_VECTOR_INTERRUPT_HW63 72 ++ ++#define NDS32ATTR_RESET(option) __attribute__((reset(option))) ++#define NDS32ATTR_EXCEPT(type) __attribute__((exception(type))) ++#define NDS32ATTR_EXCEPTION(type) __attribute__((exception(type))) ++#define NDS32ATTR_INTERRUPT(type) __attribute__((interrupt(type))) ++#define NDS32ATTR_ISR(type) __attribute__((interrupt(type))) ++ ++#endif /* nds32_isr.h */ +diff --git a/gcc/config/nds32/pipelines.md b/gcc/config/nds32/pipelines.md +index f7e2fa8..6cd854d 100644 +--- a/gcc/config/nds32/pipelines.md ++++ b/gcc/config/nds32/pipelines.md +@@ -18,12 +18,65 @@ + ;; along with GCC; see the file COPYING3. If not see + ;; <http://www.gnu.org/licenses/>. + +-(define_automaton "nds32_machine") ++;; ------------------------------------------------------------------------ ++;; Include N7 pipeline settings. ++;; ------------------------------------------------------------------------ ++(include "nds32-n7.md") ++ ++ ++;; ------------------------------------------------------------------------ ++;; Include N8 pipeline settings. ++;; ------------------------------------------------------------------------ ++(include "nds32-n8.md") ++ ++ ++;; ------------------------------------------------------------------------ ++;; Include E8 pipeline settings. ++;; ------------------------------------------------------------------------ ++(include "nds32-e8.md") ++ ++ ++;; ------------------------------------------------------------------------ ++;; Include N9/N10 pipeline settings. ++;; ------------------------------------------------------------------------ ++(include "nds32-n9-3r2w.md") ++(include "nds32-n9-2r1w.md") ++ ++ ++;; ------------------------------------------------------------------------ ++;; Include N10 pipeline settings. ++;; ------------------------------------------------------------------------ ++(include "nds32-n10.md") ++ ++ ++;; ------------------------------------------------------------------------ ++;; Include Graywolf pipeline settings. ++;; ------------------------------------------------------------------------ ++(include "nds32-graywolf.md") ++ ++ ++;; ------------------------------------------------------------------------ ++;; Include N12/N13 pipeline settings. ++;; ------------------------------------------------------------------------ ++(include "nds32-n13.md") ++ ++ ++;; ------------------------------------------------------------------------ ++;; Include Panther pipeline settings. ++;; ------------------------------------------------------------------------ ++(include "nds32-panther.md") ++ ++ ++;; ------------------------------------------------------------------------ ++;; Define simple pipeline settings. ++;; ------------------------------------------------------------------------ ++ ++(define_automaton "nds32_simple_machine") + +-(define_cpu_unit "general_unit" "nds32_machine") ++(define_cpu_unit "simple_unit" "nds32_simple_machine") + + (define_insn_reservation "simple_insn" 1 +- (eq_attr "type" "unknown,load,store,move,alu,compare,branch,call,misc") +- "general_unit") ++ (eq_attr "pipeline_model" "simple") ++ "simple_unit") + + ;; ------------------------------------------------------------------------ +diff --git a/gcc/config/nds32/predicates.md b/gcc/config/nds32/predicates.md +index 05a039d..71a3615 100644 +--- a/gcc/config/nds32/predicates.md ++++ b/gcc/config/nds32/predicates.md +@@ -24,25 +24,93 @@ + (define_predicate "nds32_greater_less_comparison_operator" + (match_code "gt,ge,lt,le")) + ++(define_predicate "nds32_float_comparison_operator" ++ (match_code "eq,ne,le,lt,ge,gt,ordered,unordered,ungt,unge,unlt,unle")) ++ ++(define_predicate "nds32_movecc_comparison_operator" ++ (match_code "eq,ne,le,leu,ge,geu")) ++ + (define_special_predicate "nds32_logical_binary_operator" + (match_code "and,ior,xor")) + ++(define_special_predicate "nds32_conditional_call_comparison_operator" ++ (match_code "lt,ge")) ++ ++(define_special_predicate "nds32_have_33_inst_operator" ++ (match_code "mult,and,ior,xor")) ++ + (define_predicate "nds32_symbolic_operand" +- (match_code "const,symbol_ref,label_ref")) ++ (and (match_code "const,symbol_ref,label_ref") ++ (match_test "!(TARGET_ICT_MODEL_LARGE ++ && nds32_indirect_call_referenced_p (op))"))) ++ ++(define_predicate "nds32_nonunspec_symbolic_operand" ++ (and (match_code "const,symbol_ref,label_ref") ++ (match_test "!flag_pic && nds32_const_unspec_p (op) ++ && !(TARGET_ICT_MODEL_LARGE ++ && nds32_indirect_call_referenced_p (op))"))) ++ ++(define_predicate "nds32_label_operand" ++ (match_code "label_ref")) + + (define_predicate "nds32_reg_constant_operand" +- (ior (match_operand 0 "register_operand") +- (match_operand 0 "const_int_operand"))) ++ (match_code "reg,const_int")) + + (define_predicate "nds32_rimm15s_operand" + (ior (match_operand 0 "register_operand") + (and (match_operand 0 "const_int_operand") + (match_test "satisfies_constraint_Is15 (op)")))) + ++(define_predicate "nds32_rimm11s_operand" ++ (ior (match_operand 0 "register_operand") ++ (and (match_operand 0 "const_int_operand") ++ (match_test "satisfies_constraint_Is11 (op)")))) ++ ++(define_predicate "nds32_imm_0_1_operand" ++ (and (match_operand 0 "const_int_operand") ++ (ior (match_test "satisfies_constraint_Iv00 (op)") ++ (match_test "satisfies_constraint_Iv01 (op)")))) ++ ++(define_predicate "nds32_imm_1_2_operand" ++ (and (match_operand 0 "const_int_operand") ++ (ior (match_test "satisfies_constraint_Iv01 (op)") ++ (match_test "satisfies_constraint_Iv02 (op)")))) ++ ++(define_predicate "nds32_imm_1_2_4_8_operand" ++ (and (match_operand 0 "const_int_operand") ++ (ior (ior (match_test "satisfies_constraint_Iv01 (op)") ++ (match_test "satisfies_constraint_Iv02 (op)")) ++ (ior (match_test "satisfies_constraint_Iv04 (op)") ++ (match_test "satisfies_constraint_Iv08 (op)"))))) ++ ++(define_predicate "nds32_imm2u_operand" ++ (and (match_operand 0 "const_int_operand") ++ (match_test "satisfies_constraint_Iu02 (op)"))) ++ ++(define_predicate "nds32_imm4u_operand" ++ (and (match_operand 0 "const_int_operand") ++ (match_test "satisfies_constraint_Iu04 (op)"))) ++ + (define_predicate "nds32_imm5u_operand" + (and (match_operand 0 "const_int_operand") + (match_test "satisfies_constraint_Iu05 (op)"))) + ++(define_predicate "nds32_imm6u_operand" ++ (and (match_operand 0 "const_int_operand") ++ (match_test "satisfies_constraint_Iu06 (op)"))) ++ ++(define_predicate "nds32_rimm4u_operand" ++ (ior (match_operand 0 "register_operand") ++ (match_operand 0 "nds32_imm4u_operand"))) ++ ++(define_predicate "nds32_rimm5u_operand" ++ (ior (match_operand 0 "register_operand") ++ (match_operand 0 "nds32_imm5u_operand"))) ++ ++(define_predicate "nds32_rimm6u_operand" ++ (ior (match_operand 0 "register_operand") ++ (match_operand 0 "nds32_imm6u_operand"))) ++ + (define_predicate "nds32_move_operand" + (and (match_operand 0 "general_operand") + (not (match_code "high,const,symbol_ref,label_ref"))) +@@ -57,12 +125,121 @@ + return true; + }) + ++(define_predicate "nds32_vmove_operand" ++ (and (match_operand 0 "general_operand") ++ (not (match_code "high,const,symbol_ref,label_ref"))) ++{ ++ /* If the constant op does NOT satisfy Is20 nor Ihig, ++ we can not perform move behavior by a single instruction. */ ++ if (GET_CODE (op) == CONST_VECTOR ++ && !satisfies_constraint_CVs2 (op) ++ && !satisfies_constraint_CVhi (op)) ++ return false; ++ ++ return true; ++}) ++ ++(define_predicate "nds32_and_operand" ++ (match_code "reg,const_int") ++{ ++ return (REG_P (op) && GET_MODE (op) == mode) ++ || satisfies_constraint_Izeb (op) ++ || satisfies_constraint_Izeh (op) ++ || satisfies_constraint_Ixls (op) ++ || satisfies_constraint_Ix11 (op) ++ || satisfies_constraint_Ibms (op) ++ || satisfies_constraint_Ifex (op) ++ || satisfies_constraint_Iu15 (op) ++ || satisfies_constraint_Ii15 (op) ++ || satisfies_constraint_Ic15 (op); ++}) ++ ++(define_predicate "nds32_ior_operand" ++ (match_code "reg,const_int") ++{ ++ return (REG_P (op) && GET_MODE (op) == mode) ++ || satisfies_constraint_Iu15 (op) ++ || satisfies_constraint_Ie15 (op); ++}) ++ ++(define_predicate "nds32_xor_operand" ++ (match_code "reg,const_int") ++{ ++ return (REG_P (op) && GET_MODE (op) == mode) ++ || GET_CODE (op) == SUBREG ++ || satisfies_constraint_Iu15 (op) ++ || satisfies_constraint_It15 (op); ++}) ++ ++(define_predicate "nds32_general_register_operand" ++ (match_code "reg,subreg") ++{ ++ if (GET_CODE (op) == SUBREG) ++ op = SUBREG_REG (op); ++ ++ return (REG_P (op) ++ && (REGNO (op) >= FIRST_PSEUDO_REGISTER ++ || REGNO (op) <= NDS32_LAST_GPR_REGNUM)); ++}) ++ ++(define_predicate "nds32_fpu_register_operand" ++ (match_code "reg,subreg") ++{ ++ if (GET_CODE (op) == SUBREG) ++ op = SUBREG_REG (op); ++ ++ return (REG_P (op) ++ && NDS32_IS_FPR_REGNUM (REGNO (op))); ++}) ++ ++(define_predicate "fpu_reg_or_memory_operand" ++ (ior (match_operand 0 "nds32_fpu_register_operand") ++ (match_operand 0 "memory_operand"))) ++ ++(define_predicate "nds32_call_address_operand" ++ (ior (match_operand 0 "nds32_symbolic_operand") ++ (match_operand 0 "nds32_general_register_operand"))) ++ ++(define_predicate "nds32_insv_operand" ++ (match_code "const_int") ++{ ++ return INTVAL (op) == 0 ++ || INTVAL (op) == 8 ++ || INTVAL (op) == 16 ++ || INTVAL (op) == 24; ++}) ++ ++(define_predicate "nds32_lmw_smw_base_operand" ++ (and (match_code "mem") ++ (match_test "nds32_valid_smw_lwm_base_p (op)"))) ++ ++(define_predicate "float_even_register_operand" ++ (and (match_code "reg") ++ (and (match_test "REGNO (op) >= NDS32_FIRST_FPR_REGNUM") ++ (match_test "REGNO (op) <= NDS32_LAST_FPR_REGNUM") ++ (match_test "(REGNO (op) & 1) == 0")))) ++ ++(define_predicate "float_odd_register_operand" ++ (and (match_code "reg") ++ (and (match_test "REGNO (op) >= NDS32_FIRST_FPR_REGNUM") ++ (match_test "REGNO (op) <= NDS32_LAST_FPR_REGNUM") ++ (match_test "(REGNO (op) & 1) != 0")))) ++ + (define_special_predicate "nds32_load_multiple_operation" + (match_code "parallel") + { + /* To verify 'load' operation, pass 'true' for the second argument. + See the implementation in nds32.c for details. */ +- return nds32_valid_multiple_load_store (op, true); ++ return nds32_valid_multiple_load_store_p (op, true, false); ++}) ++ ++(define_special_predicate "nds32_load_multiple_and_update_address_operation" ++ (match_code "parallel") ++{ ++ /* To verify 'load' operation, pass 'true' for the second argument. ++ to verify 'update address' operation, pass 'true' for the third argument ++ See the implementation in nds32.c for details. */ ++ return nds32_valid_multiple_load_store_p (op, true, true); + }) + + (define_special_predicate "nds32_store_multiple_operation" +@@ -70,7 +247,16 @@ + { + /* To verify 'store' operation, pass 'false' for the second argument. + See the implementation in nds32.c for details. */ +- return nds32_valid_multiple_load_store (op, false); ++ return nds32_valid_multiple_load_store_p (op, false, false); ++}) ++ ++(define_special_predicate "nds32_store_multiple_and_update_address_operation" ++ (match_code "parallel") ++{ ++ /* To verify 'store' operation, pass 'false' for the second argument, ++ to verify 'update address' operation, pass 'true' for the third argument ++ See the implementation in nds32.c for details. */ ++ return nds32_valid_multiple_load_store_p (op, false, true); + }) + + (define_special_predicate "nds32_stack_push_operation" +diff --git a/gcc/config/nds32/t-elf b/gcc/config/nds32/t-elf +new file mode 100644 +index 0000000..a63a310 +--- /dev/null ++++ b/gcc/config/nds32/t-elf +@@ -0,0 +1,42 @@ ++# The multilib settings of Andes NDS32 cpu for GNU compiler ++# Copyright (C) 2012-2016 Free Software Foundation, Inc. ++# Contributed by Andes Technology Corporation. ++# ++# This file is part of GCC. ++# ++# GCC is free software; you can redistribute it and/or modify it ++# under the terms of the GNU General Public License as published ++# by the Free Software Foundation; either version 3, or (at your ++# option) any later version. ++# ++# GCC is distributed in the hope that it will be useful, but WITHOUT ++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++# License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with GCC; see the file COPYING3. If not see ++# <http://www.gnu.org/licenses/>. ++ ++# We also define a macro MULTILIB_DEFAULTS in nds32.h that tells the ++# driver program which options are defaults for this target and thus ++# do not need to be handled specially. ++MULTILIB_OPTIONS += mcmodel=small/mcmodel=medium/mcmodel=large mvh ++ ++ifneq ($(filter graywolf,$(TM_MULTILIB_CONFIG)),) ++MULTILIB_OPTIONS += mcpu=graywolf ++endif ++ ++ifneq ($(filter dsp,$(TM_MULTILIB_CONFIG)),) ++MULTILIB_OPTIONS += mext-dsp ++endif ++ ++ifneq ($(filter zol,$(TM_MULTILIB_CONFIG)),) ++MULTILIB_OPTIONS += mext-zol ++endif ++ ++ifneq ($(filter v3m+,$(TM_MULTILIB_CONFIG)),) ++MULTILIB_OPTIONS += march=v3m+ ++endif ++ ++# ------------------------------------------------------------------------ +diff --git a/gcc/config/nds32/t-mlibs b/gcc/config/nds32/t-linux +similarity index 94% +rename from gcc/config/nds32/t-mlibs +rename to gcc/config/nds32/t-linux +index 5cb13f7..a4d8ab3 100644 +--- a/gcc/config/nds32/t-mlibs ++++ b/gcc/config/nds32/t-linux +@@ -21,6 +21,6 @@ + # We also define a macro MULTILIB_DEFAULTS in nds32.h that tells the + # driver program which options are defaults for this target and thus + # do not need to be handled specially. +-MULTILIB_OPTIONS = mcmodel=small/mcmodel=medium/mcmodel=large ++MULTILIB_OPTIONS += + + # ------------------------------------------------------------------------ +diff --git a/gcc/config/nds32/t-nds32 b/gcc/config/nds32/t-nds32 +index cf3aea6..e34b844 100644 +--- a/gcc/config/nds32/t-nds32 ++++ b/gcc/config/nds32/t-nds32 +@@ -1,51 +1,294 @@ +-# General rules that all nds32/ targets must have. ++# Dependency rules rule of Andes NDS32 cpu for GNU compiler + # Copyright (C) 2012-2016 Free Software Foundation, Inc. + # Contributed by Andes Technology Corporation. + # + # This file is part of GCC. + # +-# GCC is free software; you can redistribute it and/or modify +-# it under the terms of the GNU General Public License as published by +-# the Free Software Foundation; either version 3, or (at your option) +-# any later version. ++# GCC is free software; you can redistribute it and/or modify it ++# under the terms of the GNU General Public License as published ++# by the Free Software Foundation; either version 3, or (at your ++# option) any later version. + # +-# GCC is distributed in the hope that it will be useful, +-# but WITHOUT ANY WARRANTY; without even the implied warranty of +-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-# GNU General Public License for more details. ++# GCC is distributed in the hope that it will be useful, but WITHOUT ++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++# License for more details. + # + # You should have received a copy of the GNU General Public License + # along with GCC; see the file COPYING3. If not see + # <http://www.gnu.org/licenses/>. + +-nds32-cost.o: $(srcdir)/config/nds32/nds32-cost.c +- $(COMPILE) $< +- $(POSTCOMPILE) + +-nds32-intrinsic.o: $(srcdir)/config/nds32/nds32-intrinsic.c +- $(COMPILE) $< +- $(POSTCOMPILE) ++nds32-md-auxiliary.o: $(srcdir)/config/nds32/nds32-md-auxiliary.c \ ++ $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ ++ $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \ ++ insn-config.h conditions.h output.h dumpfile.h \ ++ $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \ ++ $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \ ++ $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \ ++ $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \ ++ intl.h libfuncs.h $(PARAMS_H) $(OPTS_H) ++ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ ++ $(srcdir)/config/nds32/nds32-md-auxiliary.c + +-nds32-isr.o: $(srcdir)/config/nds32/nds32-isr.c +- $(COMPILE) $< +- $(POSTCOMPILE) ++nds32-memory-manipulation.o: $(srcdir)/config/nds32/nds32-memory-manipulation.c \ ++ $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ ++ $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \ ++ insn-config.h conditions.h output.h dumpfile.h \ ++ $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \ ++ $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \ ++ $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \ ++ $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \ ++ intl.h libfuncs.h $(PARAMS_H) $(OPTS_H) ++ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ ++ $(srcdir)/config/nds32/nds32-memory-manipulation.c + +-nds32-md-auxiliary.o: $(srcdir)/config/nds32/nds32-md-auxiliary.c +- $(COMPILE) $< +- $(POSTCOMPILE) ++nds32-predicates.o: $(srcdir)/config/nds32/nds32-predicates.c \ ++ $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ ++ $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \ ++ insn-config.h conditions.h output.h dumpfile.h \ ++ $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \ ++ $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \ ++ $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \ ++ $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \ ++ intl.h libfuncs.h $(PARAMS_H) $(OPTS_H) ++ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ ++ $(srcdir)/config/nds32/nds32-predicates.c + +-nds32-pipelines-auxiliary.o: $(srcdir)/config/nds32/nds32-pipelines-auxiliary.c +- $(COMPILE) $< +- $(POSTCOMPILE) ++nds32-intrinsic.o: $(srcdir)/config/nds32/nds32-intrinsic.c \ ++ $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ ++ $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \ ++ insn-config.h conditions.h output.h dumpfile.h \ ++ $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \ ++ $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \ ++ $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \ ++ $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \ ++ intl.h libfuncs.h $(PARAMS_H) $(OPTS_H) ++ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ ++ $(srcdir)/config/nds32/nds32-intrinsic.c + +-nds32-predicates.o: $(srcdir)/config/nds32/nds32-predicates.c +- $(COMPILE) $< +- $(POSTCOMPILE) ++nds32-pipelines-auxiliary.o: \ ++ $(srcdir)/config/nds32/nds32-pipelines-auxiliary.c \ ++ $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ ++ $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \ ++ insn-config.h conditions.h output.h dumpfile.h \ ++ $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \ ++ $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \ ++ $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \ ++ $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \ ++ intl.h libfuncs.h $(PARAMS_H) $(OPTS_H) ++ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ ++ $(srcdir)/config/nds32/nds32-pipelines-auxiliary.c + +-nds32-memory-manipulation.o: $(srcdir)/config/nds32/nds32-memory-manipulation.c +- $(COMPILE) $< +- $(POSTCOMPILE) ++nds32-isr.o: \ ++ $(srcdir)/config/nds32/nds32-isr.c \ ++ $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ ++ $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \ ++ insn-config.h conditions.h output.h dumpfile.h \ ++ $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \ ++ $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \ ++ $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \ ++ $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \ ++ intl.h libfuncs.h $(PARAMS_H) $(OPTS_H) ++ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ ++ $(srcdir)/config/nds32/nds32-isr.c + +-nds32-fp-as-gp.o: $(srcdir)/config/nds32/nds32-fp-as-gp.c +- $(COMPILE) $< +- $(POSTCOMPILE) ++nds32-cost.o: \ ++ $(srcdir)/config/nds32/nds32-cost.c \ ++ $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ ++ $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \ ++ insn-config.h conditions.h output.h dumpfile.h \ ++ $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \ ++ $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \ ++ $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \ ++ $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \ ++ intl.h libfuncs.h $(PARAMS_H) $(OPTS_H) ++ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ ++ $(srcdir)/config/nds32/nds32-cost.c ++ ++nds32-fp-as-gp.o: \ ++ $(srcdir)/config/nds32/nds32-fp-as-gp.c \ ++ $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ ++ $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \ ++ insn-config.h conditions.h output.h dumpfile.h \ ++ $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \ ++ $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \ ++ $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \ ++ $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \ ++ intl.h libfuncs.h $(PARAMS_H) $(OPTS_H) ++ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ ++ $(srcdir)/config/nds32/nds32-fp-as-gp.c ++ ++nds32-load-store-opt.o: \ ++ $(srcdir)/config/nds32/nds32-load-store-opt.c \ ++ $(srcdir)/config/nds32/nds32-load-store-opt.h \ ++ $(srcdir)/config/nds32/nds32-reg-utils.h \ ++ $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ ++ $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \ ++ insn-config.h conditions.h output.h dumpfile.h \ ++ $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \ ++ $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \ ++ $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \ ++ $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \ ++ intl.h libfuncs.h $(PARAMS_H) $(OPTS_H) ++ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ ++ $(srcdir)/config/nds32/nds32-load-store-opt.c ++ ++nds32-soft-fp-comm.o: \ ++ $(srcdir)/config/nds32/nds32-soft-fp-comm.c \ ++ $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ ++ $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \ ++ insn-config.h conditions.h output.h dumpfile.h \ ++ $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \ ++ $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \ ++ $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \ ++ $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \ ++ intl.h libfuncs.h $(PARAMS_H) $(OPTS_H) ++ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ ++ $(srcdir)/config/nds32/nds32-soft-fp-comm.c ++ ++nds32-regrename.o: \ ++ $(srcdir)/config/nds32/nds32-regrename.c \ ++ $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ ++ $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \ ++ insn-config.h conditions.h output.h dumpfile.h \ ++ $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \ ++ $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \ ++ $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \ ++ $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \ ++ intl.h libfuncs.h $(PARAMS_H) $(OPTS_H) ++ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ ++ $(srcdir)/config/nds32/nds32-regrename.c ++ ++nds32-gcse.o: \ ++ $(srcdir)/config/nds32/nds32-gcse.c \ ++ $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ ++ $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \ ++ insn-config.h conditions.h output.h dumpfile.h \ ++ $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \ ++ $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \ ++ $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \ ++ $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \ ++ intl.h libfuncs.h $(PARAMS_H) $(OPTS_H) ++ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ ++ $(srcdir)/config/nds32/nds32-gcse.c ++ ++nds32-relax-opt.o: \ ++ $(srcdir)/config/nds32/nds32-relax-opt.c \ ++ $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ ++ $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \ ++ insn-config.h conditions.h output.h dumpfile.h \ ++ $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \ ++ $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \ ++ $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \ ++ $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \ ++ intl.h libfuncs.h $(PARAMS_H) $(OPTS_H) ++ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ ++ $(srcdir)/config/nds32/nds32-relax-opt.c ++ ++nds32-cprop-acc.o: \ ++ $(srcdir)/config/nds32/nds32-cprop-acc.c \ ++ $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ ++ $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \ ++ insn-config.h conditions.h output.h dumpfile.h \ ++ $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \ ++ $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \ ++ $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \ ++ $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \ ++ intl.h libfuncs.h $(PARAMS_H) $(OPTS_H) ++ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ ++ $(srcdir)/config/nds32/nds32-cprop-acc.c ++ ++nds32-sign-conversion.o: \ ++ $(srcdir)/config/nds32/nds32-sign-conversion.c \ ++ $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ ++ $(GIMPLE_H) $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \ ++ insn-config.h conditions.h output.h dumpfile.h \ ++ $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \ ++ $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \ ++ $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \ ++ $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \ ++ intl.h libfuncs.h $(PARAMS_H) $(OPTS_H) ++ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ ++ $(srcdir)/config/nds32/nds32-sign-conversion.c ++ ++nds32-scalbn-transform.o: \ ++ $(srcdir)/config/nds32/nds32-scalbn-transform.c \ ++ $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ ++ $(GIMPLE_H) $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \ ++ insn-config.h conditions.h output.h dumpfile.h \ ++ $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \ ++ $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \ ++ $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \ ++ $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \ ++ intl.h libfuncs.h $(PARAMS_H) $(OPTS_H) ++ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ ++ $(srcdir)/config/nds32/nds32-scalbn-transform.c ++ ++nds32-abi-compatible.o: \ ++ $(srcdir)/config/nds32/nds32-abi-compatible.c \ ++ $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ ++ $(GIMPLE_H) $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \ ++ insn-config.h conditions.h output.h dumpfile.h \ ++ $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \ ++ $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \ ++ $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \ ++ $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \ ++ intl.h libfuncs.h $(PARAMS_H) $(OPTS_H) ++ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ ++ $(srcdir)/config/nds32/nds32-abi-compatible.c ++ ++nds32-lmwsmw.o: \ ++ $(srcdir)/config/nds32/nds32-lmwsmw.c \ ++ $(srcdir)/config/nds32/nds32-load-store-opt.h \ ++ $(srcdir)/config/nds32/nds32-reg-utils.h \ ++ $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ ++ $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \ ++ insn-config.h conditions.h output.h dumpfile.h \ ++ $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \ ++ $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \ ++ $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \ ++ $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \ ++ intl.h libfuncs.h $(PARAMS_H) $(OPTS_H) ++ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ ++ $(srcdir)/config/nds32/nds32-lmwsmw.c ++ ++nds32-reg-utils.o: \ ++ $(srcdir)/config/nds32/nds32-reg-utils.c \ ++ $(srcdir)/config/nds32/nds32-reg-utils.h \ ++ $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ ++ $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \ ++ insn-config.h conditions.h output.h dumpfile.h \ ++ $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \ ++ $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \ ++ $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \ ++ $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \ ++ intl.h libfuncs.h $(PARAMS_H) $(OPTS_H) ++ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ ++ $(srcdir)/config/nds32/nds32-reg-utils.c ++ ++nds32-const-remater.o: \ ++ $(srcdir)/config/nds32/nds32-const-remater.c \ ++ $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ ++ $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \ ++ insn-config.h conditions.h output.h dumpfile.h \ ++ $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \ ++ $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \ ++ $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \ ++ $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \ ++ intl.h libfuncs.h $(PARAMS_H) $(OPTS_H) ++ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ ++ $(srcdir)/config/nds32/nds32-const-remater.c ++ ++nds32-utils.o: \ ++ $(srcdir)/config/nds32/nds32-utils.c \ ++ $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ ++ $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \ ++ insn-config.h conditions.h output.h dumpfile.h \ ++ $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \ ++ $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \ ++ $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \ ++ $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \ ++ intl.h libfuncs.h $(PARAMS_H) $(OPTS_H) ++ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ ++ $(srcdir)/config/nds32/nds32-utils.c +diff --git a/gcc/configure b/gcc/configure +index 954673c..ca21885 100755 +--- a/gcc/configure ++++ b/gcc/configure +@@ -27327,7 +27327,7 @@ esac + # version to the per-target configury. + case "$cpu_type" in + aarch64 | alpha | arm | avr | bfin | cris | i386 | m32c | m68k | microblaze \ +- | mips | nios2 | pa | rs6000 | score | sparc | spu | tilegx | tilepro \ ++ | mips | nds32 | nios2 | pa | rs6000 | score | sparc | spu | tilegx | tilepro \ + | visium | xstormy16 | xtensa) + insn="nop" + ;; +diff --git a/gcc/configure.ac b/gcc/configure.ac +index 4c65d44..d7a5efc 100644 +--- a/gcc/configure.ac ++++ b/gcc/configure.ac +@@ -4667,7 +4667,7 @@ esac + # version to the per-target configury. + case "$cpu_type" in + aarch64 | alpha | arm | avr | bfin | cris | i386 | m32c | m68k | microblaze \ +- | mips | nios2 | pa | rs6000 | score | sparc | spu | tilegx | tilepro \ ++ | mips | nds32 | nios2 | pa | rs6000 | score | sparc | spu | tilegx | tilepro \ + | visium | xstormy16 | xtensa) + insn="nop" + ;; +diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi +index ee2715d..37fa3b5 100644 +--- a/gcc/doc/extend.texi ++++ b/gcc/doc/extend.texi +@@ -13587,38 +13587,33 @@ builtin is exact. + + These built-in functions are available for the NDS32 target: + +-@deftypefn {Built-in Function} void __builtin_nds32_isync (int *@var{addr}) ++@table @code ++@item void __builtin_nds32_isync (int *@var{addr}) + Insert an ISYNC instruction into the instruction stream where + @var{addr} is an instruction address for serialization. +-@end deftypefn + +-@deftypefn {Built-in Function} void __builtin_nds32_isb (void) ++@item void __builtin_nds32_isb (void) + Insert an ISB instruction into the instruction stream. +-@end deftypefn + +-@deftypefn {Built-in Function} int __builtin_nds32_mfsr (int @var{sr}) ++@item int __builtin_nds32_mfsr (int @var{sr}) + Return the content of a system register which is mapped by @var{sr}. +-@end deftypefn + +-@deftypefn {Built-in Function} int __builtin_nds32_mfusr (int @var{usr}) ++@item int __builtin_nds32_mfusr (int @var{usr}) + Return the content of a user space register which is mapped by @var{usr}. +-@end deftypefn + +-@deftypefn {Built-in Function} void __builtin_nds32_mtsr (int @var{value}, int @var{sr}) ++@item void __builtin_nds32_mtsr (int @var{value}, int @var{sr}) + Move the @var{value} to a system register which is mapped by @var{sr}. +-@end deftypefn + +-@deftypefn {Built-in Function} void __builtin_nds32_mtusr (int @var{value}, int @var{usr}) ++@item void __builtin_nds32_mtusr (int @var{value}, int @var{usr}) + Move the @var{value} to a user space register which is mapped by @var{usr}. +-@end deftypefn + +-@deftypefn {Built-in Function} void __builtin_nds32_setgie_en (void) ++@item void __builtin_nds32_setgie_en (void) + Enable global interrupt. +-@end deftypefn + +-@deftypefn {Built-in Function} void __builtin_nds32_setgie_dis (void) ++@item void __builtin_nds32_setgie_dis (void) + Disable global interrupt. +-@end deftypefn ++ ++@end table + + @node picoChip Built-in Functions + @subsection picoChip Built-in Functions +diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi +index b60b53a..fc23722 100644 +--- a/gcc/doc/install.texi ++++ b/gcc/doc/install.texi +@@ -2109,7 +2109,7 @@ supported since version 4.7.2 and is the default in 4.8.0 and newer. + + @item --with-nds32-lib=@var{library} + Specifies that @var{library} setting is used for building @file{libgcc.a}. +-Currently, the valid @var{library} is @samp{newlib} or @samp{mculib}. ++Currently, the valid @var{library} are 'newlib' or 'mculib'. + This option is only supported for the NDS32 target. + + @item --with-build-time-tools=@var{dir} +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 2ed9285..75e0042 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -904,13 +904,19 @@ Objective-C and Objective-C++ Dialects}. + -mreduced-regs -mfull-regs @gol + -mcmov -mno-cmov @gol + -mperf-ext -mno-perf-ext @gol ++-mperf2-ext -mno-perf2-ext @gol ++-mstring-ext -mno-string-ext @gol + -mv3push -mno-v3push @gol + -m16bit -mno-16bit @gol ++-mgp-direct -mno-gp-direct @gol + -misr-vector-size=@var{num} @gol + -mcache-block-size=@var{num} @gol + -march=@var{arch} @gol +--mcmodel=@var{code-model} @gol +--mctor-dtor -mrelax} ++-mcpu=@var{cpu} @gol ++-mmemory-model=@var{cpu} @gol ++-mconfig-register-ports=@var{ports} @gol ++-mforce-fp-as-gp -mforbid-fp-as-gp @gol ++-mex9 -mctor-dtor -mrelax} + + @emph{Nios II Options} + @gccoptlist{-G @var{num} -mgpopt=@var{option} -mgpopt -mno-gpopt @gol +@@ -5006,7 +5012,7 @@ example, warn if an unsigned variable is compared against zero with + @opindex Wbad-function-cast + @opindex Wno-bad-function-cast + Warn when a function call is cast to a non-matching type. +-For example, warn if a call to a function returning an integer type ++For example, warn if a call to a function returning an integer type + is cast to a pointer type. + + @item -Wc90-c99-compat @r{(C and Objective-C only)} +@@ -19089,6 +19095,22 @@ Generate performance extension instructions. + @opindex mno-perf-ext + Do not generate performance extension instructions. + ++@item -mperf2-ext ++@opindex mperf2-ext ++Generate performance extension version 2 instructions. ++ ++@item -mno-perf2-ext ++@opindex mno-perf2-ext ++Do not generate performance extension version 2 instructions. ++ ++@item -mstring-ext ++@opindex mstring-ext ++Generate string extension instructions. ++ ++@item -mno-string-ext ++@opindex mno-string-ext ++Do not generate string extension instructions. ++ + @item -mv3push + @opindex mv3push + Generate v3 push25/pop25 instructions. +@@ -19105,6 +19127,14 @@ Generate 16-bit instructions. + @opindex mno-16-bit + Do not generate 16-bit instructions. + ++@item -mgp-direct ++@opindex mgp-direct ++Generate GP base instructions directly. ++ ++@item -mno-gp-direct ++@opindex mno-gp-direct ++Do no generate GP base instructions directly. ++ + @item -misr-vector-size=@var{num} + @opindex misr-vector-size + Specify the size of each interrupt vector, which must be 4 or 16. +@@ -19118,20 +19148,33 @@ which must be a power of 2 between 4 and 512. + @opindex march + Specify the name of the target architecture. + +-@item -mcmodel=@var{code-model} +-@opindex mcmodel +-Set the code model to one of +-@table @asis +-@item @samp{small} +-All the data and read-only data segments must be within 512KB addressing space. +-The text segment must be within 16MB addressing space. +-@item @samp{medium} +-The data segment must be within 512KB while the read-only data segment can be +-within 4GB addressing space. The text segment should be still within 16MB +-addressing space. +-@item @samp{large} +-All the text and data segments can be within 4GB addressing space. +-@end table ++@item -mcpu=@var{cpu} ++@opindex mcpu ++Specify the cpu for pipeline model. ++ ++@item -mmemory-model=@var{cpu} ++@opindex mmemory-model ++Specify fast or slow memory model. ++ ++@item -mconfig-register-ports=@var{ports} ++@opindex mconfig-register-ports ++Specify how many read/write ports for n9/n10 cores. ++The value should be 3r2w or 2r1w. ++ ++@item -mforce-fp-as-gp ++@opindex mforce-fp-as-gp ++Prevent $fp being allocated during register allocation so that compiler ++is able to force performing fp-as-gp optimization. ++ ++@item -mforbid-fp-as-gp ++@opindex mforbid-fp-as-gp ++Forbid using $fp to access static and global variables. ++This option strictly forbids fp-as-gp optimization ++regardless of @option{-mforce-fp-as-gp}. ++ ++@item -mex9 ++@opindex mex9 ++Use special directives to guide linker doing ex9 optimization. + + @item -mctor-dtor + @opindex mctor-dtor +@@ -19159,55 +19202,15 @@ Put global and static objects less than or equal to @var{num} bytes + into the small data or BSS sections instead of the normal data or BSS + sections. The default value of @var{num} is 8. + +-@item -mgpopt=@var{option} + @item -mgpopt + @itemx -mno-gpopt + @opindex mgpopt + @opindex mno-gpopt +-Generate (do not generate) GP-relative accesses. The following +-@var{option} names are recognized: +- +-@table @samp +- +-@item none +-Do not generate GP-relative accesses. +- +-@item local +-Generate GP-relative accesses for small data objects that are not +-external, weak, or uninitialized common symbols. +-Also use GP-relative addressing for objects that +-have been explicitly placed in a small data section via a @code{section} +-attribute. +- +-@item global +-As for @samp{local}, but also generate GP-relative accesses for +-small data objects that are external, weak, or common. If you use this option, +-you must ensure that all parts of your program (including libraries) are +-compiled with the same @option{-G} setting. +- +-@item data +-Generate GP-relative accesses for all data objects in the program. If you +-use this option, the entire data and BSS segments +-of your program must fit in 64K of memory and you must use an appropriate +-linker script to allocate them within the addressable range of the +-global pointer. +- +-@item all +-Generate GP-relative addresses for function pointers as well as data +-pointers. If you use this option, the entire text, data, and BSS segments +-of your program must fit in 64K of memory and you must use an appropriate +-linker script to allocate them within the addressable range of the +-global pointer. +- +-@end table +- +-@option{-mgpopt} is equivalent to @option{-mgpopt=local}, and +-@option{-mno-gpopt} is equivalent to @option{-mgpopt=none}. +- +-The default is @option{-mgpopt} except when @option{-fpic} or +-@option{-fPIC} is specified to generate position-independent code. +-Note that the Nios II ABI does not permit GP-relative accesses from +-shared libraries. ++Generate (do not generate) GP-relative accesses for objects in the ++small data or BSS sections. The default is @option{-mgpopt} except ++when @option{-fpic} or @option{-fPIC} is specified to generate ++position-independent code. Note that the Nios II ABI does not permit ++GP-relative accesses from shared libraries. + + You may need to specify @option{-mno-gpopt} explicitly when building + programs that include large amounts of small data, including large +diff --git a/gcc/gcc.c b/gcc/gcc.c +index 0f042b0..5c43f33 100644 +--- a/gcc/gcc.c ++++ b/gcc/gcc.c +@@ -1288,7 +1288,7 @@ static const struct compiler default_compilers[] = + {".zip", "#Java", 0, 0, 0}, {".jar", "#Java", 0, 0, 0}, + {".go", "#Go", 0, 1, 0}, + /* Next come the entries for C. */ +- {".c", "@c", 0, 0, 1}, ++ {".c", "@nds32_c", 0, 0, 1}, + {"@c", + /* cc1 has an integrated ISO C preprocessor. We should invoke the + external preprocessor if -save-temps is given. */ +@@ -1303,6 +1303,38 @@ static const struct compiler default_compilers[] = + %{!save-temps*:%{!traditional-cpp:%{!no-integrated-cpp:\ + cc1 %(cpp_unique_options) %(cc1_options)}}}\ + %{!fsyntax-only:%(invoke_as)}}}}", 0, 0, 1}, ++ {"@nds32_c", ++ /* cc1 has an integrated ISO C preprocessor. We should invoke the ++ external preprocessor if -save-temps is given. */ ++ "%{E|M|MM:%(trad_capable_cpp) %(cpp_options) %(cpp_debug_options)}\ ++ %{mace:\ ++ %{!E:%{!M:%{!MM:\ ++ %{traditional:\ ++%eGNU C no longer supports -traditional without -E}\ ++ %{save-temps*|traditional-cpp|no-integrated-cpp:%(trad_capable_cpp) \ ++ %(cpp_options) -o %{save-temps*:%b.i} %{!save-temps*:%g.i} \n\ ++ cs2 %{mace-s2s*} %{save-temps*:%b.i} %{!save-temps*:%g.i} \ ++ -o %{save-temps*:%b.ace.i} %{!save-temps*:%g.ace.i} --\n\ ++ cc1 -fpreprocessed %{save-temps*:%b.ace.i} %{!save-temps*:%g.ace.i} \ ++ %(cc1_options)}\ ++ %{!save-temps*:%{!traditional-cpp:%{!no-integrated-cpp:\ ++ %(trad_capable_cpp) %(cpp_options) -o %u.i\n}}}\ ++ %{!save-temps*:%{!traditional-cpp:%{!no-integrated-cpp:\ ++ cs2 %{mace-s2s*} %U.i -o %u.ace.i --\n}}}\ ++ %{!save-temps*:%{!traditional-cpp:%{!no-integrated-cpp:\ ++ cc1 -fpreprocessed %U.ace.i %(cc1_options)}}}\ ++ %{!fsyntax-only:%(invoke_as)}}}}}\ ++ %{!mace:\ ++ %{!E:%{!M:%{!MM:\ ++ %{traditional:\ ++%eGNU C no longer supports -traditional without -E}\ ++ %{save-temps*|traditional-cpp|no-integrated-cpp:%(trad_capable_cpp) \ ++ %(cpp_options) -o %{save-temps*:%b.i} %{!save-temps*:%g.i} \n\ ++ cc1 -fpreprocessed %{save-temps*:%b.i} %{!save-temps*:%g.i} \ ++ %(cc1_options)}\ ++ %{!save-temps*:%{!traditional-cpp:%{!no-integrated-cpp:\ ++ cc1 %(cpp_unique_options) %(cc1_options)}}}\ ++ %{!fsyntax-only:%(invoke_as)}}}}}", 0, 0, 1}, + {"-", + "%{!E:%e-E or -x required when input is from standard input}\ + %(trad_capable_cpp) %(cpp_options) %(cpp_debug_options)", 0, 0, 0}, +diff --git a/gcc/loop-unroll.c b/gcc/loop-unroll.c +index 4d26e2f..60f934c 100644 +--- a/gcc/loop-unroll.c ++++ b/gcc/loop-unroll.c +@@ -1132,7 +1132,9 @@ decide_unroll_stupid (struct loop *loop, int flags) + of mispredicts. + TODO: this heuristic needs tunning; call inside the loop body + is also relatively good reason to not unroll. */ +- if (num_loop_branches (loop) > 1) ++ unsigned branch_count = PARAM_VALUE (PARAM_MAX_LOOP_UNROLL_BRANCH); ++ ++ if (num_loop_branches (loop) > branch_count) + { + if (dump_file) + fprintf (dump_file, ";; Not unrolling, contains branches\n"); +diff --git a/gcc/opt-read.awk b/gcc/opt-read.awk +index b304ccb..2e6e8df 100644 +--- a/gcc/opt-read.awk ++++ b/gcc/opt-read.awk +@@ -99,6 +99,7 @@ BEGIN { + val_flags = "0" + val_flags = val_flags \ + test_flag("Canonical", props, "| CL_ENUM_CANONICAL") \ ++ test_flag("Undocumented", props, "| CL_UNDOCUMENTED") \ + test_flag("DriverOnly", props, "| CL_ENUM_DRIVER_ONLY") + enum_data[enum_name] = enum_data[enum_name] \ + " { " quote string quote ", " value ", " val_flags \ +diff --git a/gcc/opts.c b/gcc/opts.c +index 0f9431a..da75332 100644 +--- a/gcc/opts.c ++++ b/gcc/opts.c +@@ -1271,6 +1271,10 @@ print_filtered_help (unsigned int include_flags, + { + unsigned int len = strlen (cl_enums[i].values[j].arg); + ++ /* Skip the undocument enum value */ ++ if (cl_enums[i].values[j].flags & CL_UNDOCUMENTED) ++ continue; ++ + if (pos > 4 && pos + 1 + len <= columns) + { + printf (" %s", cl_enums[i].values[j].arg); +diff --git a/gcc/params.def b/gcc/params.def +index dbff305..44847b3 100644 +--- a/gcc/params.def ++++ b/gcc/params.def +@@ -297,6 +297,11 @@ DEFPARAM(PARAM_MAX_UNROLL_TIMES, + "max-unroll-times", + "The maximum number of unrollings of a single loop.", + 8, 0, 0) ++/* Maximum number of loop unroll loop branch count. */ ++DEFPARAM (PARAM_MAX_LOOP_UNROLL_BRANCH, ++ "max-unroll-loop-branch", ++ "Maximum number of loop branch count", ++ 1, 1, 20) + /* The maximum number of insns of a peeled loop. */ + DEFPARAM(PARAM_MAX_PEELED_INSNS, + "max-peeled-insns", +diff --git a/gcc/testsuite/g++.dg/init/array15.C b/gcc/testsuite/g++.dg/init/array15.C +index 17160d0..280fe69 100644 +--- a/gcc/testsuite/g++.dg/init/array15.C ++++ b/gcc/testsuite/g++.dg/init/array15.C +@@ -1,4 +1,6 @@ + // { dg-do run } ++// { dg-require-effective-target nds32_full_addr_space { target nds32*-*-elf* } } ++// { dg-options "-mcmodel=large" { target nds32*-*-elf* } } + + // Copyright (C) 2004 Free Software Foundation, Inc. + // Contributed by Nathan Sidwell 8 Dec 2004 <nathan@codesourcery.com> +diff --git a/gcc/testsuite/g++.dg/init/array16.C b/gcc/testsuite/g++.dg/init/array16.C +index 188d1a8..83c0d47 100644 +--- a/gcc/testsuite/g++.dg/init/array16.C ++++ b/gcc/testsuite/g++.dg/init/array16.C +@@ -2,6 +2,7 @@ + // have "compile" for some targets and "run" for others. + // { dg-do run { target { ! mmix-*-* } } } + // { dg-options "-mstructure-size-boundary=8" { target arm*-*-* } } ++// { dg-skip-if "" { nds32_gp_direct } } + + // Copyright (C) 2004 Free Software Foundation, Inc. + // Contributed by Nathan Sidwell 8 Dec 2004 <nathan@codesourcery.com> +diff --git a/gcc/testsuite/g++.dg/torture/type-generic-1.C b/gcc/testsuite/g++.dg/torture/type-generic-1.C +index 4d82592..5ae789c 100644 +--- a/gcc/testsuite/g++.dg/torture/type-generic-1.C ++++ b/gcc/testsuite/g++.dg/torture/type-generic-1.C +@@ -4,6 +4,7 @@ + /* { dg-do run } */ + /* { dg-add-options ieee } */ + /* { dg-skip-if "No Inf/NaN support" { spu-*-* } } */ ++/* { dg-skip-if "No Denormmalized support" { nds32_ext_fpu } } */ + + #include "../../gcc.dg/tg-tests.h" + +diff --git a/gcc/testsuite/gcc.c-torture/compile/limits-fndefn.c b/gcc/testsuite/gcc.c-torture/compile/limits-fndefn.c +index 228c5d9..d2d3e51 100644 +--- a/gcc/testsuite/gcc.c-torture/compile/limits-fndefn.c ++++ b/gcc/testsuite/gcc.c-torture/compile/limits-fndefn.c +@@ -1,4 +1,5 @@ + /* { dg-skip-if "too complex for avr" { avr-*-* } { "*" } { "" } } */ ++/* { dg-skip-if "lto may cause internal compiler error on cygwin with gcc-4.9" { nds32*-*-* } { "*" } { "" } } */ + /* { dg-skip-if "ptxas times out" { nvptx-*-* } { "*" } { "" } } */ + /* { dg-timeout-factor 4.0 } */ + #define LIM1(x) x##0, x##1, x##2, x##3, x##4, x##5, x##6, x##7, x##8, x##9, +diff --git a/gcc/testsuite/gcc.c-torture/execute/20010122-1.c b/gcc/testsuite/gcc.c-torture/execute/20010122-1.c +index 4eeb8c7..6cd02bc 100644 +--- a/gcc/testsuite/gcc.c-torture/execute/20010122-1.c ++++ b/gcc/testsuite/gcc.c-torture/execute/20010122-1.c +@@ -1,4 +1,5 @@ + /* { dg-skip-if "requires frame pointers" { *-*-* } "-fomit-frame-pointer" "" } */ ++/* { dg-additional-options "-malways-save-lp" { target nds32*-*-* } } */ + /* { dg-require-effective-target return_address } */ + + extern void exit (int); +diff --git a/gcc/testsuite/gcc.c-torture/execute/920501-8.x b/gcc/testsuite/gcc.c-torture/execute/920501-8.x +new file mode 100644 +index 0000000..96f05bc +--- /dev/null ++++ b/gcc/testsuite/gcc.c-torture/execute/920501-8.x +@@ -0,0 +1,11 @@ ++# Please see Andes Bugzilla #11005 for the details. ++if { [istarget "nds32*-*-*"] } { ++ # The nds32 mculib toolchains require ++ # "-u_printf_float" and "-u_scanf_float" options ++ # to fully support printf and scanf functionality. ++ # These options are supposed to be harmless to newlib toolchain. ++ set additional_flags "-u_printf_float -u_scanf_float" ++} ++ ++return 0 ++ +diff --git a/gcc/testsuite/gcc.c-torture/execute/930513-1.x b/gcc/testsuite/gcc.c-torture/execute/930513-1.x +new file mode 100644 +index 0000000..96f05bc +--- /dev/null ++++ b/gcc/testsuite/gcc.c-torture/execute/930513-1.x +@@ -0,0 +1,11 @@ ++# Please see Andes Bugzilla #11005 for the details. ++if { [istarget "nds32*-*-*"] } { ++ # The nds32 mculib toolchains require ++ # "-u_printf_float" and "-u_scanf_float" options ++ # to fully support printf and scanf functionality. ++ # These options are supposed to be harmless to newlib toolchain. ++ set additional_flags "-u_printf_float -u_scanf_float" ++} ++ ++return 0 ++ +diff --git a/gcc/testsuite/gcc.c-torture/execute/ieee/ieee.exp b/gcc/testsuite/gcc.c-torture/execute/ieee/ieee.exp +index 009984e..19cfcca 100644 +--- a/gcc/testsuite/gcc.c-torture/execute/ieee/ieee.exp ++++ b/gcc/testsuite/gcc.c-torture/execute/ieee/ieee.exp +@@ -30,6 +30,10 @@ load_lib c-torture.exp + # Disable tests on machines with no hardware support for IEEE arithmetic. + if { [istarget "vax-*-*"] || [ istarget "powerpc-*-*spe"] || [istarget "pdp11-*-*"] } { return } + ++# Since we cannot use dg-skip-if or dg-require-effective-target for individual ++# test case under ieee category, we disable all ieee tests on nds32 fpu toolchains. ++if { [istarget "nds32*-*-*"] && [check_effective_target_nds32_ext_fpu] } { return } ++ + if $tracelevel then { + strace $tracelevel + } +diff --git a/gcc/testsuite/gcc.c-torture/execute/pr60822.c b/gcc/testsuite/gcc.c-torture/execute/pr60822.c +index dcd2447..a305df3 100644 +--- a/gcc/testsuite/gcc.c-torture/execute/pr60822.c ++++ b/gcc/testsuite/gcc.c-torture/execute/pr60822.c +@@ -1,4 +1,5 @@ + /* { dg-require-effective-target int32plus } */ ++/* { dg-additional-options "-mcmodel=large" { target nds32*-*-elf* } } */ + struct X { + char fill0[800000]; + int a; +diff --git a/gcc/testsuite/gcc.c-torture/execute/struct-ret-1.x b/gcc/testsuite/gcc.c-torture/execute/struct-ret-1.x +new file mode 100644 +index 0000000..96f05bc +--- /dev/null ++++ b/gcc/testsuite/gcc.c-torture/execute/struct-ret-1.x +@@ -0,0 +1,11 @@ ++# Please see Andes Bugzilla #11005 for the details. ++if { [istarget "nds32*-*-*"] } { ++ # The nds32 mculib toolchains require ++ # "-u_printf_float" and "-u_scanf_float" options ++ # to fully support printf and scanf functionality. ++ # These options are supposed to be harmless to newlib toolchain. ++ set additional_flags "-u_printf_float -u_scanf_float" ++} ++ ++return 0 ++ +diff --git a/gcc/testsuite/gcc.dg/constructor-1.c b/gcc/testsuite/gcc.dg/constructor-1.c +index 73e9fc3..827987e 100644 +--- a/gcc/testsuite/gcc.dg/constructor-1.c ++++ b/gcc/testsuite/gcc.dg/constructor-1.c +@@ -1,6 +1,7 @@ + /* { dg-do run } */ + /* { dg-options "-O2" } */ + /* { dg-skip-if "" { ! global_constructor } { "*" } { "" } } */ ++/* { dg-options "-O2 -mctor-dtor" { target { nds32*-*-* } } } */ + + /* The ipa-split pass pulls the body of the if(!x) block + into a separate function to make foo a better inlining +diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-0.c b/gcc/testsuite/gcc.dg/graphite/interchange-0.c +index d56be46..b83535c 100644 +--- a/gcc/testsuite/gcc.dg/graphite/interchange-0.c ++++ b/gcc/testsuite/gcc.dg/graphite/interchange-0.c +@@ -1,4 +1,5 @@ + /* { dg-require-effective-target size32plus } */ ++/* { dg-additional-options "-mcmodel=large" { target nds32*-*-elf* } } */ + + #define DEBUG 0 + +diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-1.c b/gcc/testsuite/gcc.dg/graphite/interchange-1.c +index b65d486..2d77f0e 100644 +--- a/gcc/testsuite/gcc.dg/graphite/interchange-1.c ++++ b/gcc/testsuite/gcc.dg/graphite/interchange-1.c +@@ -1,4 +1,6 @@ + /* { dg-require-effective-target size32plus } */ ++/* { dg-require-effective-target nds32_full_addr_space { target nds32*-*-elf* } } */ ++/* { dg-additional-options "-mcmodel=large" { target nds32*-*-elf* } } */ + + /* Formerly known as ltrans-1.c */ + +diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-10.c b/gcc/testsuite/gcc.dg/graphite/interchange-10.c +index a955644..2021de2 100644 +--- a/gcc/testsuite/gcc.dg/graphite/interchange-10.c ++++ b/gcc/testsuite/gcc.dg/graphite/interchange-10.c +@@ -1,4 +1,6 @@ + /* { dg-require-effective-target size32plus } */ ++/* { dg-require-effective-target nds32_full_addr_space { target nds32*-*-elf* } } */ ++/* { dg-additional-options "-mcmodel=large" { target nds32*-*-elf* } } */ + + #define DEBUG 0 + #if DEBUG +diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-11.c b/gcc/testsuite/gcc.dg/graphite/interchange-11.c +index 6102822..5abb316 100644 +--- a/gcc/testsuite/gcc.dg/graphite/interchange-11.c ++++ b/gcc/testsuite/gcc.dg/graphite/interchange-11.c +@@ -1,4 +1,6 @@ + /* { dg-require-effective-target size32plus } */ ++/* { dg-require-effective-target nds32_full_addr_space { target nds32*-*-elf* } } */ ++/* { dg-additional-options "-mcmodel=large" { target nds32*-*-elf* } } */ + + #define DEBUG 0 + #if DEBUG +diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-15.c b/gcc/testsuite/gcc.dg/graphite/interchange-15.c +index 7410f29..1f71f06 100644 +--- a/gcc/testsuite/gcc.dg/graphite/interchange-15.c ++++ b/gcc/testsuite/gcc.dg/graphite/interchange-15.c +@@ -1,4 +1,6 @@ + /* { dg-require-effective-target size32plus } */ ++/* { dg-require-effective-target nds32_full_addr_space { target nds32*-*-elf* } } */ ++/* { dg-additional-options "-mcmodel=large" { target nds32*-*-elf* } } */ + + #define DEBUG 0 + #if DEBUG +diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-2.c b/gcc/testsuite/gcc.dg/graphite/interchange-2.c +index 936ee00..0041649 100644 +--- a/gcc/testsuite/gcc.dg/graphite/interchange-2.c ++++ b/gcc/testsuite/gcc.dg/graphite/interchange-2.c +@@ -1,4 +1,6 @@ + /* { dg-require-effective-target size32plus } */ ++/* { dg-require-effective-target nds32_full_addr_space { target nds32*-*-elf* } } */ ++/* { dg-additional-options "-mcmodel=large" { target nds32*-*-elf* } } */ + + /* Formerly known as ltrans-2.c */ + +diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-3.c b/gcc/testsuite/gcc.dg/graphite/interchange-3.c +index 4aec824..6635529 100644 +--- a/gcc/testsuite/gcc.dg/graphite/interchange-3.c ++++ b/gcc/testsuite/gcc.dg/graphite/interchange-3.c +@@ -1,4 +1,6 @@ + /* { dg-require-effective-target size32plus } */ ++/* { dg-require-effective-target nds32_full_addr_space { target nds32*-*-elf* } } */ ++/* { dg-additional-options "-mcmodel=large" { target nds32*-*-elf* } } */ + + /* Formerly known as ltrans-3.c */ + +diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-4.c b/gcc/testsuite/gcc.dg/graphite/interchange-4.c +index 463ecb5..359f0ac 100644 +--- a/gcc/testsuite/gcc.dg/graphite/interchange-4.c ++++ b/gcc/testsuite/gcc.dg/graphite/interchange-4.c +@@ -1,4 +1,6 @@ + /* { dg-require-effective-target size32plus } */ ++/* { dg-require-effective-target nds32_full_addr_space { target nds32*-*-elf* } } */ ++/* { dg-additional-options "-mcmodel=large" { target nds32*-*-elf* } } */ + + /* Formerly known as ltrans-4.c */ + +diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-5.c b/gcc/testsuite/gcc.dg/graphite/interchange-5.c +index e5aaa64..892257e 100644 +--- a/gcc/testsuite/gcc.dg/graphite/interchange-5.c ++++ b/gcc/testsuite/gcc.dg/graphite/interchange-5.c +@@ -1,4 +1,5 @@ + /* { dg-require-effective-target size32plus } */ ++/* { dg-additional-options "-mcmodel=large" { target nds32*-*-elf* } } */ + + /* Formerly known as ltrans-5.c */ + +diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-mvt.c b/gcc/testsuite/gcc.dg/graphite/interchange-mvt.c +index c6543ec..51c6ee5 100644 +--- a/gcc/testsuite/gcc.dg/graphite/interchange-mvt.c ++++ b/gcc/testsuite/gcc.dg/graphite/interchange-mvt.c +@@ -1,4 +1,6 @@ + /* { dg-require-effective-target size32plus } */ ++/* { dg-require-effective-target nds32_full_addr_space { target nds32*-*-elf* } } */ ++/* { dg-additional-options "-mcmodel=large" { target nds32*-*-elf* } } */ + + #define DEBUG 0 + #if DEBUG +diff --git a/gcc/testsuite/gcc.dg/graphite/pr46185.c b/gcc/testsuite/gcc.dg/graphite/pr46185.c +index 36d46a4..738c9a8 100644 +--- a/gcc/testsuite/gcc.dg/graphite/pr46185.c ++++ b/gcc/testsuite/gcc.dg/graphite/pr46185.c +@@ -1,5 +1,7 @@ + /* { dg-do run } */ ++/* { dg-require-effective-target nds32_full_addr_space { target nds32*-*-elf* } } */ + /* { dg-options "-O2 -floop-interchange -ffast-math -fno-ipa-cp" } */ ++/* { dg-additional-options "-mcmodel=large" { target nds32*-*-elf* } } */ + + #define DEBUG 0 + #if DEBUG +diff --git a/gcc/testsuite/gcc.dg/graphite/uns-interchange-15.c b/gcc/testsuite/gcc.dg/graphite/uns-interchange-15.c +index fe2669f..dd77aa3 100644 +--- a/gcc/testsuite/gcc.dg/graphite/uns-interchange-15.c ++++ b/gcc/testsuite/gcc.dg/graphite/uns-interchange-15.c +@@ -1,4 +1,6 @@ + /* { dg-require-effective-target size32plus } */ ++/* { dg-require-effective-target nds32_full_addr_space { target nds32*-*-elf* } } */ ++/* { dg-additional-options "-mcmodel=large" { target nds32*-*-elf* } } */ + + #define DEBUG 0 + #if DEBUG +diff --git a/gcc/testsuite/gcc.dg/graphite/uns-interchange-mvt.c b/gcc/testsuite/gcc.dg/graphite/uns-interchange-mvt.c +index 211c9ab..c7defb4 100644 +--- a/gcc/testsuite/gcc.dg/graphite/uns-interchange-mvt.c ++++ b/gcc/testsuite/gcc.dg/graphite/uns-interchange-mvt.c +@@ -1,4 +1,6 @@ + /* { dg-require-effective-target size32plus } */ ++/* { dg-require-effective-target nds32_full_addr_space { target nds32*-*-elf* } } */ ++/* { dg-additional-options "-mcmodel=large" { target nds32*-*-elf* } } */ + + #define DEBUG 0 + #if DEBUG +diff --git a/gcc/testsuite/gcc.dg/initpri1.c b/gcc/testsuite/gcc.dg/initpri1.c +index 794ea2b..10b3a24 100644 +--- a/gcc/testsuite/gcc.dg/initpri1.c ++++ b/gcc/testsuite/gcc.dg/initpri1.c +@@ -1,4 +1,5 @@ + /* { dg-do run { target init_priority } } */ ++/* { dg-options "-mctor-dtor" { target { nds32*-*-* } } } */ + + extern void abort (); + +diff --git a/gcc/testsuite/gcc.dg/initpri2.c b/gcc/testsuite/gcc.dg/initpri2.c +index fa9fda0..1418411 100644 +--- a/gcc/testsuite/gcc.dg/initpri2.c ++++ b/gcc/testsuite/gcc.dg/initpri2.c +@@ -1,4 +1,5 @@ + /* { dg-do compile { target init_priority } } */ ++/* { dg-options "-mctor-dtor" { target { nds32*-*-* } } } */ + + /* Priorities must be in the range [0, 65535]. */ + void c1() +diff --git a/gcc/testsuite/gcc.dg/initpri3.c b/gcc/testsuite/gcc.dg/initpri3.c +index 1633da0..e1b8cf6 100644 +--- a/gcc/testsuite/gcc.dg/initpri3.c ++++ b/gcc/testsuite/gcc.dg/initpri3.c +@@ -1,6 +1,7 @@ + /* { dg-do run { target init_priority } } */ + /* { dg-require-effective-target lto } */ + /* { dg-options "-flto -O3" } */ ++/* { dg-options "-flto -O3 -mctor-dtor" { target { nds32*-*-* } } } */ + + extern void abort (); + +diff --git a/gcc/testsuite/gcc.dg/ipa/ipa-sra-1.c b/gcc/testsuite/gcc.dg/ipa/ipa-sra-1.c +index 4db904b..2290d8b 100644 +--- a/gcc/testsuite/gcc.dg/ipa/ipa-sra-1.c ++++ b/gcc/testsuite/gcc.dg/ipa/ipa-sra-1.c +@@ -1,5 +1,6 @@ + /* { dg-do run } */ + /* { dg-options "-O2 -fipa-sra -fdump-tree-eipa_sra-details" } */ ++/* { dg-additional-options "-u_printf_float -u_scanf_float" { target nds32*-*-* } } */ + + struct bovid + { +diff --git a/gcc/testsuite/gcc.dg/lower-subreg-1.c b/gcc/testsuite/gcc.dg/lower-subreg-1.c +index 47057fe..25439b1 100644 +--- a/gcc/testsuite/gcc.dg/lower-subreg-1.c ++++ b/gcc/testsuite/gcc.dg/lower-subreg-1.c +@@ -1,4 +1,4 @@ +-/* { dg-do compile { target { ! { mips64 || { aarch64*-*-* arm*-*-* ia64-*-* sparc*-*-* spu-*-* tilegx-*-* } } } } } */ ++/* { dg-do compile { target { ! { mips64 || { aarch64*-*-* arm*-*-* ia64-*-* sparc*-*-* spu-*-* tilegx-*-* nds32*-*-* } } } } } */ + /* { dg-options "-O -fdump-rtl-subreg1" } */ + /* { dg-additional-options "-mno-stv" { target ia32 } } */ + /* { dg-skip-if "" { { i?86-*-* x86_64-*-* } && x32 } { "*" } { "" } } */ +diff --git a/gcc/testsuite/gcc.dg/pr28796-2.c b/gcc/testsuite/gcc.dg/pr28796-2.c +index f56a5d4..fff71bc 100644 +--- a/gcc/testsuite/gcc.dg/pr28796-2.c ++++ b/gcc/testsuite/gcc.dg/pr28796-2.c +@@ -2,6 +2,7 @@ + /* { dg-options "-O2 -funsafe-math-optimizations -fno-finite-math-only -DUNSAFE" } */ + /* { dg-add-options ieee } */ + /* { dg-skip-if "No Inf/NaN support" { spu-*-* } } */ ++/* { dg-skip-if "No Denormmalized support" { nds32_ext_fpu } } */ + + #include "tg-tests.h" + +diff --git a/gcc/testsuite/gcc.dg/sibcall-10.c b/gcc/testsuite/gcc.dg/sibcall-10.c +index d98b43a..bb0e24c 100644 +--- a/gcc/testsuite/gcc.dg/sibcall-10.c ++++ b/gcc/testsuite/gcc.dg/sibcall-10.c +@@ -5,7 +5,7 @@ + Copyright (C) 2002 Free Software Foundation Inc. + Contributed by Hans-Peter Nilsson <hp@bitrange.com> */ + +-/* { dg-do run { xfail { { cris-*-* crisv32-*-* h8300-*-* hppa*64*-*-* m32r-*-* mcore-*-* mn10300-*-* msp430*-*-* nds32*-*-* xstormy16-*-* v850*-*-* vax-*-* xtensa*-*-* } || { arm*-*-* && { ! arm32 } } } } } */ ++/* { dg-do run { xfail { { cris-*-* crisv32-*-* h8300-*-* hppa*64*-*-* m32r-*-* mcore-*-* mn10300-*-* msp430*-*-* xstormy16-*-* v850*-*-* vax-*-* xtensa*-*-* } || { arm*-*-* && { ! arm32 } } } } } */ + /* -mlongcall disables sibcall patterns. */ + /* { dg-skip-if "" { powerpc*-*-* } { "-mlongcall" } { "" } } */ + /* { dg-options "-O2 -foptimize-sibling-calls" } */ +diff --git a/gcc/testsuite/gcc.dg/sibcall-3.c b/gcc/testsuite/gcc.dg/sibcall-3.c +index eafe8dd..f188a18 100644 +--- a/gcc/testsuite/gcc.dg/sibcall-3.c ++++ b/gcc/testsuite/gcc.dg/sibcall-3.c +@@ -5,7 +5,7 @@ + Copyright (C) 2002 Free Software Foundation Inc. + Contributed by Hans-Peter Nilsson <hp@bitrange.com> */ + +-/* { dg-do run { xfail { { cris-*-* crisv32-*-* h8300-*-* hppa*64*-*-* m32r-*-* mcore-*-* mn10300-*-* msp430*-*-* nds32*-*-* xstormy16-*-* v850*-*-* vax-*-* xtensa*-*-* } || { arm*-*-* && { ! arm32 } } } } } */ ++/* { dg-do run { xfail { { cris-*-* crisv32-*-* h8300-*-* hppa*64*-*-* m32r-*-* mcore-*-* mn10300-*-* msp430*-*-* xstormy16-*-* v850*-*-* vax-*-* xtensa*-*-* } || { arm*-*-* && { ! arm32 } } } } } */ + /* -mlongcall disables sibcall patterns. */ + /* { dg-skip-if "" { powerpc*-*-* } { "-mlongcall" } { "" } } */ + /* { dg-options "-O2 -foptimize-sibling-calls" } */ +diff --git a/gcc/testsuite/gcc.dg/sibcall-4.c b/gcc/testsuite/gcc.dg/sibcall-4.c +index 1e039c6..a8c844a 100644 +--- a/gcc/testsuite/gcc.dg/sibcall-4.c ++++ b/gcc/testsuite/gcc.dg/sibcall-4.c +@@ -5,7 +5,7 @@ + Copyright (C) 2002 Free Software Foundation Inc. + Contributed by Hans-Peter Nilsson <hp@bitrange.com> */ + +-/* { dg-do run { xfail { { cris-*-* crisv32-*-* h8300-*-* hppa*64*-*-* m32r-*-* mcore-*-* mn10300-*-* msp430*-*-* nds32*-*-* xstormy16-*-* v850*-*-* vax-*-* xtensa*-*-* } || { arm*-*-* && { ! arm32 } } } } } */ ++/* { dg-do run { xfail { { cris-*-* crisv32-*-* h8300-*-* hppa*64*-*-* m32r-*-* mcore-*-* mn10300-*-* msp430*-*-* xstormy16-*-* v850*-*-* vax-*-* xtensa*-*-* } || { arm*-*-* && { ! arm32 } } } } } */ + /* -mlongcall disables sibcall patterns. */ + /* { dg-skip-if "" { powerpc*-*-* } { "-mlongcall" } { "" } } */ + /* { dg-options "-O2 -foptimize-sibling-calls" } */ +diff --git a/gcc/testsuite/gcc.dg/sibcall-9.c b/gcc/testsuite/gcc.dg/sibcall-9.c +index 34e7053..71c3251 100644 +--- a/gcc/testsuite/gcc.dg/sibcall-9.c ++++ b/gcc/testsuite/gcc.dg/sibcall-9.c +@@ -5,7 +5,7 @@ + Copyright (C) 2002 Free Software Foundation Inc. + Contributed by Hans-Peter Nilsson <hp@bitrange.com> */ + +-/* { dg-do run { xfail { { cris-*-* crisv32-*-* h8300-*-* hppa*64*-*-* m32r-*-* mcore-*-* mn10300-*-* msp430*-*-* nds32*-*-* nvptx-*-* xstormy16-*-* v850*-*-* vax-*-* xtensa*-*-* } || { arm*-*-* && { ! arm32 } } } } } */ ++/* { dg-do run { xfail { { cris-*-* crisv32-*-* h8300-*-* hppa*64*-*-* m32r-*-* mcore-*-* mn10300-*-* msp430*-*-* nvptx-*-* xstormy16-*-* v850*-*-* vax-*-* xtensa*-*-* } || { arm*-*-* && { ! arm32 } } } } } */ + /* -mlongcall disables sibcall patterns. */ + /* { dg-skip-if "" { powerpc*-*-* } { "-mlongcall" } { "" } } */ + /* { dg-options "-O2 -foptimize-sibling-calls" } */ +diff --git a/gcc/testsuite/gcc.dg/stack-usage-1.c b/gcc/testsuite/gcc.dg/stack-usage-1.c +index 7864c6a..c768ca2 100644 +--- a/gcc/testsuite/gcc.dg/stack-usage-1.c ++++ b/gcc/testsuite/gcc.dg/stack-usage-1.c +@@ -2,6 +2,7 @@ + /* { dg-options "-fstack-usage" } */ + /* nvptx doesn't have a reg allocator, and hence no stack usage data. */ + /* { dg-skip-if "" { nvptx-*-* } { "*" } { "" } } */ ++/* { dg-options "-fstack-usage -fno-omit-frame-pointer" { target { nds32*-*-* } } } */ + + /* This is aimed at testing basic support for -fstack-usage in the back-ends. + See the SPARC back-end for example (grep flag_stack_usage_info in sparc.c). +diff --git a/gcc/testsuite/gcc.dg/torture/type-generic-1.c b/gcc/testsuite/gcc.dg/torture/type-generic-1.c +index 3897818..6815e8b 100644 +--- a/gcc/testsuite/gcc.dg/torture/type-generic-1.c ++++ b/gcc/testsuite/gcc.dg/torture/type-generic-1.c +@@ -3,6 +3,7 @@ + + /* { dg-do run } */ + /* { dg-skip-if "No Inf/NaN support" { spu-*-* } } */ ++/* { dg-skip-if "No Denormmalized support" { nds32_ext_fpu } } */ + /* { dg-options "-DUNSAFE" { target tic6x*-*-* visium-*-* } } */ + /* { dg-add-options ieee } */ + +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c +index 1a4bfe6..78c948a 100644 +--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c ++++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c +@@ -25,4 +25,4 @@ foo () + but the loop reads only one element at a time, and DOM cannot resolve these. + The same happens on powerpc depending on the SIMD support available. */ + +-/* { dg-final { scan-tree-dump "return 28;" "optimized" { xfail { { alpha*-*-* hppa*64*-*-* powerpc64*-*-* } || { sparc*-*-* && lp64 } } } } } */ ++/* { dg-final { scan-tree-dump "return 28;" "optimized" { xfail { { alpha*-*-* hppa*64*-*-* powerpc64*-*-* nds32*-*-*} || { sparc*-*-* && lp64 } } } } } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/vrp88.c b/gcc/testsuite/gcc.dg/tree-ssa/vrp88.c +index f70b311..8a1081c 100644 +--- a/gcc/testsuite/gcc.dg/tree-ssa/vrp88.c ++++ b/gcc/testsuite/gcc.dg/tree-ssa/vrp88.c +@@ -33,6 +33,6 @@ bitmap_single_bit_set_p (const_bitmap a) + } + + /* Verify that VRP simplified an "if" statement. */ +-/* { dg-final { scan-tree-dump "Folded into: if.*" "vrp1"} } */ ++/* { dg-final { scan-tree-dump "Folded into: if.*" "vrp1" { xfail *-*-* } } } */ + + +diff --git a/gcc/testsuite/gcc.target/nds32/basic-main.c b/gcc/testsuite/gcc.target/nds32/basic-main.c +index 6fdbc35..7341fb5 100644 +--- a/gcc/testsuite/gcc.target/nds32/basic-main.c ++++ b/gcc/testsuite/gcc.target/nds32/basic-main.c +@@ -1,9 +1,10 @@ + /* This is a basic main function test program. */ + +-/* { dg-do run } */ +-/* { dg-options "-O0" } */ ++/* { dg-do run } */ ++/* { dg-options "-O0" } */ + +-int main(void) ++int ++main (void) + { + return 0; + } +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-PE1-abs.c b/gcc/testsuite/gcc.target/nds32/builtin-PE1-abs.c +new file mode 100644 +index 0000000..8cadcfd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-PE1-abs.c +@@ -0,0 +1,20 @@ ++/* This is a test program for abs instruction. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-O1" } */ ++/* { dg-require-effective-target nds32_ext_perf } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++int ++main () ++{ ++ int a = -4; ++ int abs = __nds32__abs (a); ++ ++ if (abs != 4) ++ abort (); ++ else ++ exit (0); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-PE1-ave.c b/gcc/testsuite/gcc.target/nds32/builtin-PE1-ave.c +new file mode 100644 +index 0000000..d2c87db +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-PE1-ave.c +@@ -0,0 +1,21 @@ ++/* This is a test program for ave instruction. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-O1" } */ ++/* { dg-require-effective-target nds32_ext_perf } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++int ++main () ++{ ++ int a = 4; ++ int b = 2; ++ int ave = __nds32__ave (a, b); ++ ++ if (ave != 3) ++ abort (); ++ else ++ exit (0); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-PE1-bclr.c b/gcc/testsuite/gcc.target/nds32/builtin-PE1-bclr.c +new file mode 100644 +index 0000000..0e6c1e0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-PE1-bclr.c +@@ -0,0 +1,20 @@ ++/* This is a test program for bclr instruction. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-O1" } */ ++/* { dg-require-effective-target nds32_ext_perf } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++int ++main () ++{ ++ int a = 1; ++ int c = __nds32__bclr (a, 0); ++ ++ if (c != 0) ++ abort (); ++ else ++ exit (0); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-PE1-bset.c b/gcc/testsuite/gcc.target/nds32/builtin-PE1-bset.c +new file mode 100644 +index 0000000..1bd8513 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-PE1-bset.c +@@ -0,0 +1,20 @@ ++/* This is a test program for bset instruction. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-O1" } */ ++/* { dg-require-effective-target nds32_ext_perf } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++int ++main () ++{ ++ int c = 0; ++ c = __nds32__bset (c, 0); ++ ++ if (c != 1) ++ abort (); ++ else ++ exit (0); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-PE1-btgl.c b/gcc/testsuite/gcc.target/nds32/builtin-PE1-btgl.c +new file mode 100644 +index 0000000..a1dbc00 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-PE1-btgl.c +@@ -0,0 +1,20 @@ ++/* This is a test program for btgl instruction. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-O1" } */ ++/* { dg-require-effective-target nds32_ext_perf } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++int ++main () ++{ ++ int a = 1; ++ int c = __nds32__btgl (1, 0); ++ ++ if (c != 0) ++ abort (); ++ else ++ exit (0); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-PE1-btst.c b/gcc/testsuite/gcc.target/nds32/builtin-PE1-btst.c +new file mode 100644 +index 0000000..c001f94 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-PE1-btst.c +@@ -0,0 +1,20 @@ ++/* This is a test program for btst instruction. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-O1" } */ ++/* { dg-require-effective-target nds32_ext_perf } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++int ++main () ++{ ++ int c = 1; ++ c = __nds32__btst (c, 0); ++ ++ if (c != 1) ++ abort (); ++ else ++ exit (0); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-PE1-clip.c b/gcc/testsuite/gcc.target/nds32/builtin-PE1-clip.c +new file mode 100644 +index 0000000..d63b298 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-PE1-clip.c +@@ -0,0 +1,20 @@ ++/* This is a test program for clip instruction. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-O1" } */ ++/* { dg-require-effective-target nds32_ext_perf } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++int ++main () ++{ ++ int c = 33; ++ c = __nds32__clip (c, 5); ++ ++ if (c != 31) ++ abort (); ++ else ++ exit (0); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-PE1-clips.c b/gcc/testsuite/gcc.target/nds32/builtin-PE1-clips.c +new file mode 100644 +index 0000000..3e3f663 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-PE1-clips.c +@@ -0,0 +1,20 @@ ++/* This is a test program for clips instruction. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-O1" } */ ++/* { dg-require-effective-target nds32_ext_perf } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++int ++main () ++{ ++ int a = -33; ++ int c = __nds32__clips (a, 5); ++ ++ if (c != -32) ++ abort (); ++ else ++ exit (0); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-PE1-clo.c b/gcc/testsuite/gcc.target/nds32/builtin-PE1-clo.c +new file mode 100644 +index 0000000..d672a33 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-PE1-clo.c +@@ -0,0 +1,20 @@ ++/* This is a test program for clo instruction. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-O1" } */ ++/* { dg-require-effective-target nds32_ext_perf } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++int ++main () ++{ ++ int c = 0xFFFF0000; ++ c = __nds32__clo (c); ++ ++ if (c != 16) ++ abort (); ++ else ++ exit (0); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-PE1-clz.c b/gcc/testsuite/gcc.target/nds32/builtin-PE1-clz.c +new file mode 100644 +index 0000000..17e6318 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-PE1-clz.c +@@ -0,0 +1,20 @@ ++/* This is a test program for clz instruction. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-O1" } */ ++/* { dg-require-effective-target nds32_ext_perf } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++int ++main () ++{ ++ int c = 0x0000FFFF; ++ c = __nds32__clz (c); ++ ++ if (c != 16) ++ abort (); ++ else ++ exit (0); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-PE2-bse.c b/gcc/testsuite/gcc.target/nds32/builtin-PE2-bse.c +new file mode 100644 +index 0000000..c769fea +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-PE2-bse.c +@@ -0,0 +1,28 @@ ++/* This is a test program for bse instruction. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-O1" } */ ++/* { dg-require-effective-target nds32_ext_perf2 } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++int ++main () ++{ ++ unsigned int a = 0xF0F0F0F0; ++ unsigned int b = 0x00000300; ++ unsigned int r = 0; ++ ++ unsigned int verify_b = 0x00000300; ++ unsigned int verify_r = 0; ++ ++ __nds32__bse (&r, a, &b); ++ a = 0xF0F0F0F0; ++ asm volatile ("bse %0, %2, %1": "+&r" (verify_r), "+&r" (verify_b) : "r" (a)); ++ ++ if ((verify_b == b) && (verify_r == r)) ++ exit (0); ++ else ++ abort (); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-PE2-bsp.c b/gcc/testsuite/gcc.target/nds32/builtin-PE2-bsp.c +new file mode 100644 +index 0000000..d798719 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-PE2-bsp.c +@@ -0,0 +1,26 @@ ++/* This is a test program for bsp instruction. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-O1" } */ ++/* { dg-require-effective-target nds32_ext_perf2 } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++int ++main () ++{ ++ unsigned int a = 0x0000000F; ++ unsigned int b = 0x00000300; ++ unsigned int r = 0; ++ unsigned int verify_b = 0x00000300; ++ unsigned int verify_r = 0; ++ ++ __nds32__bsp (&r, a, &b); ++ asm volatile ("bsp %0, %2, %1": "+&r" (verify_r), "+&r" (verify_b) : "r" (a)); ++ ++ if ((verify_b == b) && (verify_r == r)) ++ exit (0); ++ else ++ abort (); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-PE2-pbsad.c b/gcc/testsuite/gcc.target/nds32/builtin-PE2-pbsad.c +new file mode 100644 +index 0000000..bc4fe42 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-PE2-pbsad.c +@@ -0,0 +1,21 @@ ++/* This is a test program for pbsad instruction. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-O1" } */ ++/* { dg-require-effective-target nds32_ext_perf2 } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++int ++main () ++{ ++ unsigned int a = 0x09070605; ++ unsigned int b = 0x04020301; ++ unsigned int r = __nds32__pbsad (a, b); ++ ++ if (r != 17) ++ abort (); ++ else ++ exit (0); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-PE2-pbsada.c b/gcc/testsuite/gcc.target/nds32/builtin-PE2-pbsada.c +new file mode 100644 +index 0000000..6ed1b08 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-PE2-pbsada.c +@@ -0,0 +1,23 @@ ++/* This is a test program for pbsada instruction. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-O1" } */ ++/* { dg-require-effective-target nds32_ext_perf2 } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++int ++main () ++{ ++ unsigned int a = 0x09070605; ++ unsigned int b = 0x04020301; ++ unsigned int r = 1; ++ ++ r = __nds32__pbsada(r, a, b); ++ ++ if (r != 18) ++ abort (); ++ else ++ exit (0); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-add16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-add16.c +new file mode 100644 +index 0000000..0eec324 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-add16.c +@@ -0,0 +1,49 @@ ++/* This is a test program for add16 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int add16 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__add16 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++uint16x2_t v_uadd16 (uint16x2_t ra, uint16x2_t rb) ++{ ++ return __nds32__v_uadd16 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++int16x2_t v_sadd16 (int16x2_t ra, int16x2_t rb) ++{ ++ return __nds32__v_sadd16 (ra, rb); ++} ++ ++int ++main () ++{ ++ unsigned int a = add16 (0x0001f000, 0x00011000); ++ uint16x2_t v_ua = v_uadd16 ((uint16x2_t) {0xf000, 0xf000}, ++ (uint16x2_t) {0x1000, 0x2000}); ++ int16x2_t v_sa = v_sadd16 ((int16x2_t) {0xf777, 0xf111}, ++ (int16x2_t) {0x1000, 0x2000}); ++ ++ if (a != 0x00020000) ++ abort (); ++ else if (v_ua[0] != 0x0000 ++ || v_ua[1] != 0x1000) ++ abort (); ++ else if (v_sa[0] != 0x0777 ++ || v_sa[1] != 0x1111) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-add64.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-add64.c +new file mode 100644 +index 0000000..b761b7f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-add64.c +@@ -0,0 +1,36 @@ ++/* This is a test program for add64 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++long long sadd64 (long long ra, long long rb) ++{ ++ return __nds32__sadd64 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++unsigned long long uadd64 (unsigned long long ra, unsigned long long rb) ++{ ++ return __nds32__uadd64 (ra, rb); ++} ++ ++int ++main () ++{ ++ long long sa = sadd64 (0x1122334400000000ll, 0x55667788ll); ++ unsigned long long ua = uadd64 (0xffff00000000ull, 0x55667788ull); ++ ++ if (sa != 0x1122334455667788ll) ++ abort (); ++ else if (ua != 0xffff55667788ull) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-add8.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-add8.c +new file mode 100644 +index 0000000..77e686c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-add8.c +@@ -0,0 +1,53 @@ ++/* This is a test program for add8 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int add8 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__add8 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++uint8x4_t v_uadd8 (uint8x4_t ra, uint8x4_t rb) ++{ ++ return __nds32__v_uadd8 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++int8x4_t v_sadd8 (int8x4_t ra, int8x4_t rb) ++{ ++ return __nds32__v_sadd8 (ra, rb); ++} ++ ++int ++main () ++{ ++ unsigned int a = add8 (0x11223344, 0x55667788); ++ uint8x4_t v_ua = v_uadd8 ((uint8x4_t) {0xff, 0xee, 0xdd, 0xcc}, ++ (uint8x4_t) {0x1, 0xee, 0xdd, 0xcc}); ++ int8x4_t v_sa = v_sadd8 ((int8x4_t) {0x80, 0x7f, 0xbb, 0xaa}, ++ (int8x4_t) {0x80, 0x7f, 0xbb, 0xaa}); ++ ++ if (a != 0x6688aacc) ++ abort (); ++ else if (v_ua[0] != 0 ++ || v_ua[1] != 0xdc ++ || v_ua[2] != 0xba ++ || v_ua[3] != 0x98) ++ abort (); ++ else if (v_sa[0] != 0 ++ || v_sa[1] != (char) 0xfe ++ || v_sa[2] != 0x76 ++ || v_sa[3] != 0x54) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-bitrev.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-bitrev.c +new file mode 100644 +index 0000000..2c8c297 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-bitrev.c +@@ -0,0 +1,27 @@ ++/* This is a test program for bitrev instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int bitrev (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__bitrev (ra, rb); ++} ++ ++int ++main () ++{ ++ unsigned int a = bitrev (0xd, 1); ++ ++ if (a != 0x2) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-bpick.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-bpick.c +new file mode 100644 +index 0000000..78893cb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-bpick.c +@@ -0,0 +1,27 @@ ++/* This is a test program for bpick instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int bpick (unsigned int ra, unsigned int rb, unsigned int rc) ++{ ++ return __nds32__bpick (ra, rb, rc); ++} ++ ++int ++main () ++{ ++ unsigned int a = bpick (0x11223344, 0x11332244, 0); ++ ++ if (a != 0x11332244) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-cmpeq16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-cmpeq16.c +new file mode 100644 +index 0000000..c37abf4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-cmpeq16.c +@@ -0,0 +1,49 @@ ++/* This is a test program for cmpeq16 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int cmpeq16 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__cmpeq16 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++uint16x2_t v_scmpeq16 (int16x2_t ra, int16x2_t rb) ++{ ++ return __nds32__v_scmpeq16 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++uint16x2_t v_ucmpeq16 (uint16x2_t ra, uint16x2_t rb) ++{ ++ return __nds32__v_ucmpeq16 (ra, rb); ++} ++ ++int ++main () ++{ ++ unsigned int a = cmpeq16 (0xffff0000, 0xffff0001); ++ uint16x2_t v_sa = v_scmpeq16 ((int16x2_t) {0x7fff, 0x8000}, ++ (int16x2_t) {0x8000, 0x8000}); ++ uint16x2_t v_ua = v_ucmpeq16 ((uint16x2_t) {0x7fff, 0x8000}, ++ (uint16x2_t) {0x8000, 0x8000}); ++ ++ if (a != 0xffff0000) ++ abort (); ++ else if (v_sa[0] != 0 ++ || v_sa[1] != 0xffff) ++ abort (); ++ else if (v_ua[0] != 0 ++ || v_ua[1] != 0xffff) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-cmpeq8.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-cmpeq8.c +new file mode 100644 +index 0000000..a692dac +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-cmpeq8.c +@@ -0,0 +1,53 @@ ++/* This is a test program for cmpeq8 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int cmpeq8 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__cmpeq8 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++uint8x4_t v_scmpeq8 (int8x4_t ra, int8x4_t rb) ++{ ++ return __nds32__v_scmpeq8 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++uint8x4_t v_ucmpeq8 (uint8x4_t ra, uint8x4_t rb) ++{ ++ return __nds32__v_ucmpeq8 (ra, rb); ++} ++ ++int ++main () ++{ ++ unsigned int a = cmpeq8 (0xffff0000, 0xffff0101); ++ uint8x4_t v_sa = v_scmpeq8 ((int8x4_t) { 0x7f, 0x7f, 0x01, 0x01}, ++ (int8x4_t) { 0x7f, 0x7f, 0x00, 0x00}); ++ uint8x4_t v_ua = v_ucmpeq8 ((uint8x4_t) { 0x7f, 0x7f, 0x01, 0x01}, ++ (uint8x4_t) { 0x7f, 0x7f, 0x00, 0x00}); ++ ++ if (a != 0xffff0000) ++ abort (); ++ else if (v_sa[0] != 0xff ++ || v_sa[1] != 0xff ++ || v_sa[2] != 0 ++ || v_sa[3] != 0) ++ abort (); ++ else if (v_ua[0] != 0xff ++ || v_ua[1] != 0xff ++ || v_ua[2] != 0 ++ || v_ua[3] != 0) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-cras16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-cras16.c +new file mode 100644 +index 0000000..7d6da46 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-cras16.c +@@ -0,0 +1,58 @@ ++/* This is a test program for cras16 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int cras16 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__cras16 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++uint16x2_t v_ucras16 (uint16x2_t ra, uint16x2_t rb) ++{ ++ return __nds32__v_ucras16 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++int16x2_t v_scras16 (int16x2_t ra, int16x2_t rb) ++{ ++ return __nds32__v_scras16 (ra, rb); ++} ++ ++int ++main () ++{ ++ ++#ifdef __NDS32_EL__ ++ uint16x2_t v_ua_p = {1, 0}; ++ int16x2_t v_sa_p = {0x1000, 0x111}; ++#else ++ uint16x2_t v_ua_p = {0x2469, 0xe000}; ++ int16x2_t v_sa_p = {0x3000, 0xe111}; ++#endif ++ ++ unsigned int a = cras16 (0x0001f000, 0x0001f000); ++ uint16x2_t v_ua = v_ucras16 ((uint16x2_t) {0x1235, 0xf000}, ++ (uint16x2_t) {0x1000, 0x1234}); ++ int16x2_t v_sa = v_scras16 ((int16x2_t) {0x2000, 0xf111}, ++ (int16x2_t) {0x1000, 0x1000}); ++ ++ if (a != 0xf001efff) ++ abort (); ++ else if (v_ua[0] != v_ua_p[0] ++ || v_ua[1] != v_ua_p[1]) ++ abort (); ++ else if (v_sa[0] != v_sa_p[0] ++ || v_sa[1] != v_sa_p[1]) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-crsa16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-crsa16.c +new file mode 100644 +index 0000000..de99c3a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-crsa16.c +@@ -0,0 +1,57 @@ ++/* This is a test program for crsa16 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int crsa16 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__crsa16 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++uint16x2_t v_ucrsa16 (uint16x2_t ra, uint16x2_t rb) ++{ ++ return __nds32__v_ucrsa16 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++int16x2_t v_scrsa16 (int16x2_t ra, int16x2_t rb) ++{ ++ return __nds32__v_scrsa16 (ra, rb); ++} ++ ++int ++main () ++{ ++#ifdef __NDS32_EL__ ++ uint16x2_t v_ua_p = {0x2469, 0xe000}; ++ int16x2_t v_sa_p = {0x3000, 0x110}; ++#else ++ uint16x2_t v_ua_p = {1, 0}; ++ int16x2_t v_sa_p = {0x1000, 0x112}; ++#endif ++ ++ unsigned int a = crsa16 (0x0001f000, 0x0001f000); ++ uint16x2_t v_ua = v_ucrsa16 ((uint16x2_t) {0x1235, 0xf000}, ++ (uint16x2_t) {0x1000, 0x1234}); ++ int16x2_t v_sa = v_scrsa16 ((int16x2_t) {0x2000, 0x0111}, ++ (int16x2_t) {0x0001, 0x1000}); ++ ++ if (a != 0x1001f001) ++ abort (); ++ else if (v_ua[0] != v_ua_p[0] ++ || v_ua[1] != v_ua_p[1]) ++ abort (); ++ else if (v_sa[0] != v_sa_p[0] ++ || v_sa[1] != v_sa_p[1]) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-insb.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-insb.c +new file mode 100644 +index 0000000..ebd0348 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-insb.c +@@ -0,0 +1,27 @@ ++/* This is a test program for insb instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int insb (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__insb (ra, rb, 1); ++} ++ ++int ++main () ++{ ++ unsigned int a = insb (0x11220044, 0x33); ++ ++ if (a != 0x11223344) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-pkbb16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-pkbb16.c +new file mode 100644 +index 0000000..23d92e9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-pkbb16.c +@@ -0,0 +1,44 @@ ++/* This is a test program for pkbb16 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int pkbb16 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__pkbb16 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++uint16x2_t v_pkbb16 (uint16x2_t ra, uint16x2_t rb) ++{ ++ return __nds32__v_pkbb16 (ra, rb); ++} ++ ++int ++main () ++{ ++#ifdef __NDS32_EL__ ++ uint16x2_t va_p = {0xcccc, 0xaaaa}; ++#else ++ uint16x2_t va_p = {0xbbbb, 0xdddd}; ++#endif ++ ++ unsigned int a = pkbb16 (0x11223344, 0x55667788); ++ uint16x2_t va = v_pkbb16 ((uint16x2_t) {0xaaaa, 0xbbbb}, ++ (uint16x2_t) {0xcccc, 0xdddd}); ++ ++ if (a != 0x33447788) ++ abort (); ++ else if (va[0] != va_p[0] ++ || va[1] != va_p[1]) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-pkbt16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-pkbt16.c +new file mode 100644 +index 0000000..6c34420 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-pkbt16.c +@@ -0,0 +1,44 @@ ++/* This is a test program for pkbt16 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int pkbt16 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__pkbt16 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++uint16x2_t v_pkbt16 (uint16x2_t ra, uint16x2_t rb) ++{ ++ return __nds32__v_pkbt16 (ra, rb); ++} ++ ++int ++main () ++{ ++#ifdef __NDS32_EL__ ++ uint16x2_t va_p = {0xdddd, 0xaaaa}; ++#else ++ uint16x2_t va_p = {0xbbbb, 0xcccc}; ++#endif ++ ++ unsigned int a = pkbt16 (0x11223344, 0x55667788); ++ uint16x2_t va = v_pkbt16 ((uint16x2_t) {0xaaaa, 0xbbbb}, ++ (uint16x2_t) {0xcccc, 0xdddd}); ++ ++ if (a != 0x33445566) ++ abort (); ++ else if (va[0] != va_p[0] ++ || va[1] != va_p[1]) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-pktb16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-pktb16.c +new file mode 100644 +index 0000000..0aab5df +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-pktb16.c +@@ -0,0 +1,44 @@ ++/* This is a test program for pktb16 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int pktb16 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__pktb16 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++uint16x2_t v_pktb16 (uint16x2_t ra, uint16x2_t rb) ++{ ++ return __nds32__v_pktb16 (ra, rb); ++} ++ ++int ++main () ++{ ++#ifdef __NDS32_EL__ ++ uint16x2_t va_p = {0xcccc, 0xbbbb}; ++#else ++ uint16x2_t va_p = {0xaaaa, 0xdddd}; ++#endif ++ ++ unsigned int a = pktb16 (0x11223344, 0x55667788); ++ uint16x2_t va = v_pktb16 ((uint16x2_t) {0xaaaa, 0xbbbb}, ++ (uint16x2_t) {0xcccc, 0xdddd}); ++ ++ if (a != 0x11227788) ++ abort (); ++ else if (va[0] != va_p[0] ++ || va[1] != va_p[1]) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-pktt16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-pktt16.c +new file mode 100644 +index 0000000..745cde5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-pktt16.c +@@ -0,0 +1,44 @@ ++/* This is a test program for pktt16 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int pktt16 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__pktt16 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++uint16x2_t v_pktt16 (uint16x2_t ra, uint16x2_t rb) ++{ ++ return __nds32__v_pktt16 (ra, rb); ++} ++ ++int ++main () ++{ ++#ifdef __NDS32_EL__ ++ uint16x2_t va_p = {0xdddd, 0xbbbb}; ++#else ++ uint16x2_t va_p = {0xaaaa, 0xcccc}; ++#endif ++ ++ unsigned int a = pktt16 (0x11223344, 0x55667788); ++ uint16x2_t va = v_pktt16 ((uint16x2_t) {0xaaaa, 0xbbbb}, ++ (uint16x2_t) {0xcccc, 0xdddd}); ++ ++ if (a != 0x11225566) ++ abort (); ++ else if (va[0] != va_p[0] ++ || va[1] != va_p[1]) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-radd16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-radd16.c +new file mode 100644 +index 0000000..5271b41 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-radd16.c +@@ -0,0 +1,38 @@ ++/* This is a test program for radd16 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int radd16 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__radd16 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++int16x2_t v_radd16 (int16x2_t ra, int16x2_t rb) ++{ ++ return __nds32__v_radd16 (ra, rb); ++} ++ ++int ++main () ++{ ++ unsigned int a = radd16 (0x7fff7fff, 0x7fff7fff); ++ int16x2_t va = v_radd16 ((int16x2_t) {0x8000, 0x4000}, ++ (int16x2_t) {0x8000, 0x8000}); ++ ++ if (a != 0x7fff7fff) ++ abort (); ++ else if (va[0] != (short) 0x8000 ++ || va[1] != (short) 0xe000) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-radd64.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-radd64.c +new file mode 100644 +index 0000000..3e82ff5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-radd64.c +@@ -0,0 +1,27 @@ ++/* This is a test program for radd64 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++long long radd64 (long long ra, long long rb) ++{ ++ return __nds32__radd64 (ra, rb); ++} ++ ++int ++main () ++{ ++ long long a = radd64 (0xf000000000000000ll, 0xf000000000000000ll); ++ ++ if (a != 0xf000000000000000ll) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-radd8.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-radd8.c +new file mode 100644 +index 0000000..10735a1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-radd8.c +@@ -0,0 +1,40 @@ ++/* This is a test program for radd8 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int radd8 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__radd8 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++int8x4_t v_radd8 (int8x4_t ra, int8x4_t rb) ++{ ++ return __nds32__v_radd8 (ra, rb); ++} ++ ++int ++main () ++{ ++ unsigned int a = radd8 (0x11223344, 0x55667788); ++ int8x4_t va = v_radd8 ((int8x4_t) {0x7f, 0x80, 0x80, 0xaa}, ++ (int8x4_t) {0x7f, 0x80, 0x40, 0xaa}); ++ ++ if (a != 0x334455e6) ++ abort (); ++ else if (va[0] != 0x7f ++ || va[1] != (char) 0x80 ++ || va[2] != (char) 0xe0 ++ || va[3] != (char) 0xaa) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-raddw.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-raddw.c +new file mode 100644 +index 0000000..190a477 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-raddw.c +@@ -0,0 +1,27 @@ ++/* This is a test program for raddw instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++int raddw (int ra, int rb) ++{ ++ return __nds32__raddw (ra, rb); ++} ++ ++int ++main () ++{ ++ int a = raddw (0x80000000, 0x80000000); ++ ++ if (a != 0x80000000) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-rcras16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-rcras16.c +new file mode 100644 +index 0000000..2a2288a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-rcras16.c +@@ -0,0 +1,44 @@ ++/* This is a test program for rcras16 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int rcras16 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__rcras16 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++int16x2_t v_rcras16 (int16x2_t ra, int16x2_t rb) ++{ ++ return __nds32__v_rcras16 (ra, rb); ++} ++ ++int ++main () ++{ ++#ifdef __NDS32_EL__ ++ int16x2_t va_p = {0x7fff, 0x8000}; ++#else ++ int16x2_t va_p = {0xffff, 0}; ++#endif ++ ++ unsigned int a = rcras16 (0x0fff0000, 0x00000fff); ++ int16x2_t va = v_rcras16 ((int16x2_t) {0x7fff, 0x8000}, ++ (int16x2_t) {0x8000, 0x8000}); ++ ++ if (a != 0x0fff0000) ++ abort (); ++ else if (va[0] != va_p[0] ++ || va[1] != va_p[1]) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-rcrsa16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-rcrsa16.c +new file mode 100644 +index 0000000..ebcc0f6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-rcrsa16.c +@@ -0,0 +1,44 @@ ++/* This is a test program for rcrsa16 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int rcrsa16 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__rcrsa16 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++int16x2_t v_rcrsa16 (int16x2_t ra, int16x2_t rb) ++{ ++ return __nds32__v_rcrsa16 (ra, rb); ++} ++ ++int ++main () ++{ ++#ifdef __NDS32_EL__ ++ int16x2_t va_p = {0x8000, 0x8000}; ++#else ++ int16x2_t va_p = {0, 0xffff}; ++#endif ++ ++ unsigned int a = rcrsa16 (0x7fff7fff, 0x7fff8000); ++ int16x2_t va = v_rcrsa16 ((int16x2_t) {0x8000, 0x8000}, ++ (int16x2_t) {0x7fff, 0x8000}); ++ ++ if (a != 0x7fff7fff) ++ abort (); ++ else if (va[0] != va_p [0] ++ || va[1] != va_p [1]) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-rsub16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-rsub16.c +new file mode 100644 +index 0000000..f9fcc86 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-rsub16.c +@@ -0,0 +1,38 @@ ++/* This is a test program for rsub16 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int rsub16 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__rsub16 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++int16x2_t v_rsub16 (int16x2_t ra, int16x2_t rb) ++{ ++ return __nds32__v_rsub16 (ra, rb); ++} ++ ++int ++main () ++{ ++ unsigned int a = rsub16 (0x7fff7fff, 0x80008000); ++ int16x2_t va = v_rsub16 ((int16x2_t) {0x8000, 0x8000}, ++ (int16x2_t) {0x7fff, 0x4000}); ++ ++ if (a != 0x7fff7fff) ++ abort (); ++ else if (va[0] != (short) 0x8000 ++ || va[1] != (short) 0xa000) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-rsub64.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-rsub64.c +new file mode 100644 +index 0000000..227eba7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-rsub64.c +@@ -0,0 +1,27 @@ ++/* This is a test program for rsub64 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++long long rsub64 (long long ra, long long rb) ++{ ++ return __nds32__rsub64 (ra, rb); ++} ++ ++int ++main () ++{ ++ long long a = rsub64 (0xe, 0xf); ++ ++ if (a != 0xffffffffffffffff) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-rsub8.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-rsub8.c +new file mode 100644 +index 0000000..0f1dddc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-rsub8.c +@@ -0,0 +1,40 @@ ++/* This is a test program for rsub8 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int rsub8 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__rsub8 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++int8x4_t v_rsub8 (int8x4_t ra, int8x4_t rb) ++{ ++ return __nds32__v_rsub8 (ra, rb); ++} ++ ++int ++main () ++{ ++ unsigned int a = rsub8 (0x55667788, 0x11223344); ++ int8x4_t va = v_rsub8 ((int8x4_t) {0x7f, 0x80, 0x80, 0xaa}, ++ (int8x4_t) {0x80, 0x7f, 0x40, 0xaa}); ++ ++ if (a != 0x222222a2) ++ abort (); ++ else if (va[0] != 0x7f ++ || va[1] != (char) 0x80 ++ || va[2] != (char) 0xa0 ++ || va[3] != 0) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-rsubw.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-rsubw.c +new file mode 100644 +index 0000000..b70a229 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-rsubw.c +@@ -0,0 +1,27 @@ ++/* This is a test program for rsubw instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++int rsubw (int ra, int rb) ++{ ++ return __nds32__rsubw (ra, rb); ++} ++ ++int ++main () ++{ ++ int a = rsubw (0x80000000, 0x7fffffff); ++ ++ if (a != 0x80000000) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-scmple16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-scmple16.c +new file mode 100644 +index 0000000..95251d6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-scmple16.c +@@ -0,0 +1,37 @@ ++/* This is a test program for scmple16 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int scmple16 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__scmple16 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++uint16x2_t v_scmple16 (int16x2_t ra, int16x2_t rb) ++{ ++ return __nds32__v_scmple16 (ra, rb); ++} ++ ++int ++main () ++{ ++ unsigned int a = scmple16 (0xfffe0001, 0xffff0000); ++ uint16x2_t va = v_scmple16 ((int16x2_t) {0x7fff, 0x7ffe}, ++ (int16x2_t) {0x7ffe, 0x7fff}); ++ if (a != 0xffff0000) ++ abort (); ++ else if (va[0] != 0 ++ || va[1] != 0xffff) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-scmple8.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-scmple8.c +new file mode 100644 +index 0000000..6c0033d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-scmple8.c +@@ -0,0 +1,40 @@ ++/* This is a test program for scmple8 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int scmple8 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__scmple8 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++uint8x4_t v_scmple8 (int8x4_t ra, int8x4_t rb) ++{ ++ return __nds32__v_scmple8 (ra, rb); ++} ++ ++int ++main () ++{ ++ unsigned int a = scmple8 (0xfefe0101, 0xffff0000); ++ uint8x4_t va = v_scmple8 ((int8x4_t) {0x7e, 0x7e, 0x01, 0x01}, ++ (int8x4_t) {0x7f, 0x7f, 0x00, 0x00}); ++ ++ if (a != 0xffff0000) ++ abort (); ++ else if (va[0] != 0xff ++ || va[1] != 0xff ++ || va[2] != 0 ++ || va[3] != 0) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-scmplt16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-scmplt16.c +new file mode 100644 +index 0000000..5797711 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-scmplt16.c +@@ -0,0 +1,38 @@ ++/* This is a test program for scmplt16 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int scmplt16 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__scmplt16 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++uint16x2_t v_scmplt16 (int16x2_t ra, int16x2_t rb) ++{ ++ return __nds32__v_scmplt16 (ra, rb); ++} ++ ++int ++main () ++{ ++ unsigned int a = scmplt16 (0xfffe0001, 0xffff0000); ++ uint16x2_t va = v_scmplt16 ((int16x2_t) {0x7fff, 0x7ffe}, ++ (int16x2_t) {0x7ffe, 0x7fff}); ++ ++ if (a != 0xffff0000) ++ abort (); ++ else if (va[0] != 0 ++ || va[1] != 0xffff) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-scmplt8.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-scmplt8.c +new file mode 100644 +index 0000000..3e52006 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-scmplt8.c +@@ -0,0 +1,40 @@ ++/* This is a test program for scmplt8 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int scmplt8 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__scmplt8 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++uint8x4_t v_scmplt8 (int8x4_t ra, int8x4_t rb) ++{ ++ return __nds32__v_scmplt8 (ra, rb); ++} ++ ++int ++main () ++{ ++ unsigned int a = scmplt8 (0xfefe0101, 0xffff0000); ++ uint8x4_t va = v_scmplt8 ((int8x4_t) {0x7e, 0x7e, 0x01, 0x01}, ++ (int8x4_t) {0x7f, 0x7f, 0x00, 0x00}); ++ ++ if (a != 0xffff0000) ++ abort (); ++ else if (va[0] != 0xff ++ || va[1] != 0xff ++ || va[2] != 0 ++ || va[3] != 0) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-sll16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sll16.c +new file mode 100644 +index 0000000..5ab9506 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sll16.c +@@ -0,0 +1,37 @@ ++/* This is a test program for sll16 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int sll16 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__sll16 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++uint16x2_t v_sll16 (uint16x2_t ra, unsigned int rb) ++{ ++ return __nds32__v_sll16 (ra, rb); ++} ++ ++int ++main () ++{ ++ unsigned int a = sll16 (0x0f00f000, 4); ++ uint16x2_t va = v_sll16 ((uint16x2_t) {0x7fff, 0x8000}, 4); ++ ++ if (a != 0xf0000000) ++ abort (); ++ else if (va[0] != 0xfff0 ++ || va[1] != 0) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smal.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smal.c +new file mode 100644 +index 0000000..f7e54b7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smal.c +@@ -0,0 +1,36 @@ ++/* This is a test program for smal instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++long long smal (long long ra, unsigned int rb) ++{ ++ return __nds32__smal (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++long long v_smal (long long ra, int16x2_t rb) ++{ ++ return __nds32__v_smal (ra, rb); ++} ++ ++int ++main () ++{ ++ long long a = smal (0xfffff0000ll, 0x0001ffff); ++ long long va = v_smal (0xffffff0000ll, ++ (int16x2_t) {0x0002, 0xffff}); ++ if (a != 0xffffeffffll) ++ abort (); ++ else if (va != 0xfffffefffell) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smalbb.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smalbb.c +new file mode 100644 +index 0000000..c39a889 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smalbb.c +@@ -0,0 +1,45 @@ ++/* This is a test program for smalbb instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++long long smalbb (long long t, unsigned int a, unsigned int b) ++{ ++ return __nds32__smalbb (t, a, b); ++} ++ ++static __attribute__ ((noinline)) ++long long v_smalbb (long long t, int16x2_t a, int16x2_t b) ++{ ++ return __nds32__v_smalbb (t, a, b); ++} ++ ++ ++int ++main () ++{ ++#ifdef __NDS32_EL__ ++ long long a_p = 0x12345679075ca9d3ll; ++ long long va_p = 0x12345679075ca9d3ll; ++#else ++ long long a_p = 0x12345679075ca9d3ll; ++ long long va_p = 0x12345678ffffffffll; ++#endif ++ ++ long long a = smalbb (0x12345678ffffffffll,0x00006789, 0x00001234); ++ long long va = v_smalbb (0x12345678ffffffffll, (int16x2_t) {0x6789, 0}, ++ (int16x2_t) {0x1234, 0}); ++ if (a != a_p) ++ abort (); ++ else if (va != va_p) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smalbt.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smalbt.c +new file mode 100644 +index 0000000..06577fd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smalbt.c +@@ -0,0 +1,45 @@ ++/* This is a test program for smalbt instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++long long smalbt (long long t, unsigned int a, unsigned int b) ++{ ++ return __nds32__smalbt (t, a, b); ++} ++ ++static __attribute__ ((noinline)) ++long long v_smalbt (long long t, int16x2_t a, int16x2_t b) ++{ ++ return __nds32__v_smalbt (t, a, b); ++} ++ ++ ++int ++main () ++{ ++#ifdef __NDS32_EL__ ++ long long a_p = 0x12345679075ca9d3ll; ++ long long va_p = 0x12345679075ca9d3ll; ++#else ++ long long a_p = 0x12345679075ca9d3ll; ++ long long va_p = 0x12345678ffffffffll; ++#endif ++ ++ long long a = smalbt (0x12345678ffffffffll, 0x00006789, 0x12340000); ++ long long va = v_smalbt (0x12345678ffffffffll, (int16x2_t) {0x6789, 0}, ++ (int16x2_t) {0, 0x1234}); ++ if (a != a_p) ++ abort (); ++ else if (va != va_p) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smalda.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smalda.c +new file mode 100644 +index 0000000..33b4b3f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smalda.c +@@ -0,0 +1,38 @@ ++/* This is a test program for smalda instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++long long smalda (long long t, unsigned int a, unsigned int b) ++{ ++ return __nds32__smalda (t, a, b); ++} ++ ++static __attribute__ ((noinline)) ++long long v_smalda (long long t, int16x2_t a, int16x2_t b) ++{ ++ return __nds32__v_smalda (t, a, b); ++} ++ ++ ++int ++main () ++{ ++ long long a = smalda (0x12345678ffffffffll, 0x67890000, 0x12340000); ++ long long va = v_smalda (0x12345678ffffffffll, (int16x2_t) {0, 0x6789}, ++ (int16x2_t) {0, 0x1234}); ++ ++ if (a != 0x12345679075CA9D3ll) ++ abort (); ++ else if (va != 0x12345679075CA9D3ll) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smaldrs.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smaldrs.c +new file mode 100644 +index 0000000..48255b1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smaldrs.c +@@ -0,0 +1,46 @@ ++/* This is a test program for smaldrs instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++long long smaldrs (long long t, unsigned int a, unsigned int b) ++{ ++ return __nds32__smaldrs (t, a, b); ++} ++ ++static __attribute__ ((noinline)) ++long long v_smaldrs (long long t, int16x2_t a, int16x2_t b) ++{ ++ return __nds32__v_smaldrs (t, a, b); ++} ++ ++ ++int ++main () ++{ ++#ifdef __NDS32_EL__ ++ long long a_p = 0x12345678ffffaaaall; ++ long long va_p = 0x12345678ffffaaaall; ++#else ++ long long a_p = 0x12345678ffffaaaall; ++ long long va_p = 0x1234567900005554ll; ++#endif ++ ++ long long a = smaldrs (0x12345678ffffffffll, 0x67890001, 0x00011234); ++ long long va = v_smaldrs (0x12345678ffffffffll, (int16x2_t) {0x0001, 0x6789}, ++ (int16x2_t) {0x1234, 0x0001}); ++ ++ if (a != a_p) ++ abort (); ++ else if (va != va_p) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smalds.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smalds.c +new file mode 100644 +index 0000000..5a89ea6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smalds.c +@@ -0,0 +1,46 @@ ++/* This is a test program for smalds instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++long long smalds (long long t, unsigned int a, unsigned int b) ++{ ++ return __nds32__smalds (t, a, b); ++} ++ ++static __attribute__ ((noinline)) ++long long v_smalds (long long t, int16x2_t a, int16x2_t b) ++{ ++ return __nds32__v_smalds (t, a, b); ++} ++ ++ ++int ++main () ++{ ++#ifdef __NDS32_EL__ ++ long long a_p = 0x12345678ffffaaaall; ++ long long va_p = 0x12345678ffffaaaall; ++#else ++ long long a_p = 0x12345678ffffaaaall; ++ long long va_p = 0x1234567900005554ll; ++#endif ++ ++ long long a = smalds (0x12345678ffffffffll, 0x12340001, 0x00016789); ++ long long va = v_smalds (0x12345678ffffffffll, (int16x2_t) {0x0001, 0x1234}, ++ (int16x2_t) {0x6789, 0x0001}); ++ ++ if (a != a_p) ++ abort (); ++ else if (va != va_p) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smaltt.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smaltt.c +new file mode 100644 +index 0000000..709607a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smaltt.c +@@ -0,0 +1,46 @@ ++/* This is a test program for smaltt instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++long long smaltt (long long t, unsigned int a, unsigned int b) ++{ ++ return __nds32__smaltt (t, a, b); ++} ++ ++static __attribute__ ((noinline)) ++long long v_smaltt (long long t, int16x2_t a, int16x2_t b) ++{ ++ return __nds32__v_smaltt (t, a, b); ++} ++ ++ ++int ++main () ++{ ++#ifdef __NDS32_EL__ ++ long long a_p = 0x12345679075ca9d3ll; ++ long long va_p = 0x12345679075ca9d3ll; ++#else ++ long long a_p = 0x12345679075ca9d3ll; ++ long long va_p = 0x12345678ffffffffll; ++#endif ++ ++ long long a = smaltt (0x12345678ffffffffll, 0x67890000, 0x12340000); ++ long long va = v_smaltt (0x12345678ffffffffll, (int16x2_t) {0, 0x6789}, ++ (int16x2_t) {0, 0x1234}); ++ ++ if (a != a_p) ++ abort (); ++ else if (va != va_p) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smalxda.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smalxda.c +new file mode 100644 +index 0000000..0f90250 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smalxda.c +@@ -0,0 +1,38 @@ ++/* This is a test program for smalxda instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++long long smalxda (long long t, unsigned int a, unsigned int b) ++{ ++ return __nds32__smalxda (t, a, b); ++} ++ ++static __attribute__ ((noinline)) ++long long v_smalxda (long long t, int16x2_t a, int16x2_t b) ++{ ++ return __nds32__v_smalxda (t, a, b); ++} ++ ++ ++int ++main () ++{ ++ long long a = smalxda (0x12345678ffffffffll, 0x67890000, 0x00001234); ++ long long va = v_smalxda (0x12345678ffffffffll, (int16x2_t) {0, 0x6789}, ++ (int16x2_t) {0x1234, 0}); ++ ++ if (a != 0x12345679075CA9D3) ++ abort (); ++ else if (va != 0x12345679075CA9D3) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smalxds.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smalxds.c +new file mode 100644 +index 0000000..ee2e098 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smalxds.c +@@ -0,0 +1,46 @@ ++/* This is a test program for smalxds instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++long long smalxds (long long t, unsigned int a, unsigned int b) ++{ ++ return __nds32__smalxds (t, a, b); ++} ++ ++static __attribute__ ((noinline)) ++long long v_smalxds (long long t, int16x2_t a, int16x2_t b) ++{ ++ return __nds32__v_smalxds (t, a, b); ++} ++ ++ ++int ++main () ++{ ++#ifdef __NDS32_EL__ ++ long long a_p = 0x12345678ffffaaaall; ++ long long va_p = 0x12345678ffffaaaall; ++#else ++ long long a_p = 0x12345678ffffaaaall; ++ long long va_p = 0x1234567900005554ll; ++#endif ++ ++ long long a = smalxds (0x12345678ffffffffll, 0x12340001, 0x67890001); ++ long long va = v_smalxds (0x12345678ffffffffll, (int16x2_t) {0x0001, 0x1234}, ++ (int16x2_t) {0x0001, 0x6789}); ++ ++ if (a != a_p) ++ abort (); ++ else if (va != va_p) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smar64.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smar64.c +new file mode 100644 +index 0000000..59c6f1f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smar64.c +@@ -0,0 +1,27 @@ ++/* This is a test program for smar64 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++long long smar64 (long long t, int a, int b) ++{ ++ return __nds32__smar64 (t, a, b); ++} ++ ++int ++main () ++{ ++ long long a = smar64 (0xf000000000000000ll, 0x12345678, 0x23); ++ ++ if (a != 0xf00000027d27d268ll) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smax16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smax16.c +new file mode 100644 +index 0000000..72bf957 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smax16.c +@@ -0,0 +1,37 @@ ++/* This is a test program for smax16 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int smax16 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__smax16 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++int16x2_t v_smax16 (int16x2_t ra, int16x2_t rb) ++{ ++ return __nds32__v_smax16 (ra, rb); ++} ++ ++int ++main () ++{ ++ unsigned int a = smax16 (0xfffe0001, 0xffff0000); ++ int16x2_t va = v_smax16 ((int16x2_t) {0x7fff, 0}, ++ (int16x2_t) {0x7ffe, 1}); ++ if (a != 0xffff0001) ++ abort (); ++ else if (va[0] != 0x7fff ++ || va[1] != 1) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smax8.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smax8.c +new file mode 100644 +index 0000000..128bf19 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smax8.c +@@ -0,0 +1,41 @@ ++/* This is a test program for smax8 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int smax8 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__smax8 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++int8x4_t v_smax8 (int8x4_t ra, int8x4_t rb) ++{ ++ return __nds32__v_smax8 (ra, rb); ++} ++ ++ ++int ++main () ++{ ++ unsigned int a = smax8 (0xffff0000, 0xfefe0001); ++ int8x4_t va = v_smax8 ((int8x4_t) {0x7f, 0x7f, 0x01, 0x01}, ++ (int8x4_t) {0x7e, 0x7e, 0x00, 0x00}); ++ ++ if (a != 0xffff0001) ++ abort (); ++ else if (va[0] != 0x7f ++ || va[1] != 0x7f ++ || va[2] != 1 ++ || va[3] != 1) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smbb.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smbb.c +new file mode 100644 +index 0000000..25759bd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smbb.c +@@ -0,0 +1,44 @@ ++/* This is a test program for smbb instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++int smbb (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__smbb (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++int v_smbb (int16x2_t ra, int16x2_t rb) ++{ ++ return __nds32__v_smbb (ra, rb); ++} ++ ++int ++main () ++{ ++#ifdef __NDS32_EL__ ++ int va_p = 1; ++#else ++ int va_p = 2; ++#endif ++ ++ int a = smbb (0x80000002, 0x80000001); ++ ++ int va = v_smbb ((int16x2_t) {0xffff, 0x0002}, ++ (int16x2_t) {0xffff, 0x0001}); ++ ++ if (a != 2) ++ abort (); ++ else if (va != va_p) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smbt.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smbt.c +new file mode 100644 +index 0000000..7ed2c22 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smbt.c +@@ -0,0 +1,44 @@ ++/* This is a test program for smbt instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++int smbt (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__smbt (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++int v_smbt (int16x2_t ra, int16x2_t rb) ++{ ++ return __nds32__v_smbt (ra, rb); ++} ++ ++int ++main () ++{ ++#ifdef __NDS32_EL__ ++ int va_p = 0xffffffff; ++#else ++ int va_p = 0xfffffffe; ++#endif ++ ++ int a = smbt (0x80000002, 0x80000001); ++ ++ int va = v_smbt ((int16x2_t) {0xffff, 0x0002}, ++ (int16x2_t) {0xffff, 0x0001}); ++ ++ if (a != 0xffff0000) ++ abort (); ++ else if (va != va_p) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smdrs.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smdrs.c +new file mode 100644 +index 0000000..4224b04 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smdrs.c +@@ -0,0 +1,43 @@ ++/* This is a test program for smdrs instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++int smdrs (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__smdrs (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++int v_smdrs (int16x2_t ra, int16x2_t rb) ++{ ++ return __nds32__v_smdrs (ra, rb); ++} ++ ++int ++main () ++{ ++#ifdef __NDS32_EL__ ++ int va_p = 0xffffffff; ++#else ++ int va_p = 1; ++#endif ++ ++ int a = smdrs (0x80000002, 0x80000001); ++ int va = v_smdrs ((int16x2_t) {0xffff, 0x0002}, ++ (int16x2_t) {0xffff, 0x0001}); ++ ++ if (a != 0xc0000002) ++ abort (); ++ else if (va != va_p) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smds.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smds.c +new file mode 100644 +index 0000000..9875efb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smds.c +@@ -0,0 +1,43 @@ ++/* This is a test program for smds instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++int smds (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__smds (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++int v_smds (int16x2_t ra, int16x2_t rb) ++{ ++ return __nds32__v_smds (ra, rb); ++} ++ ++int ++main () ++{ ++#ifdef __NDS32_EL__ ++ int va_p = 1; ++#else ++ int va_p = 0xffffffff; ++#endif ++ ++ int a = smds (0x80000002, 0x80000001); ++ int va = v_smds ((int16x2_t) {0xffff, 0x0002}, ++ (int16x2_t) {0xffff, 0x0001}); ++ ++ if (a != 0x3ffffffe) ++ abort (); ++ else if (va != va_p) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smin16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smin16.c +new file mode 100644 +index 0000000..60deb4b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smin16.c +@@ -0,0 +1,37 @@ ++/* This is a test program for smin16 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int smin16 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__smin16 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++int16x2_t v_smin16 (int16x2_t ra, int16x2_t rb) ++{ ++ return __nds32__v_smin16 (ra, rb); ++} ++ ++int ++main () ++{ ++ unsigned int a = smin16 (0xfffe0001, 0xffff0000); ++ int16x2_t v_sa = v_smin16 ((int16x2_t) {0x7fff, 0}, ++ (int16x2_t) {0x7ffe, 1}); ++ if (a != 0xfffe0000) ++ abort (); ++ else if (v_sa[0] != 0x7ffe ++ || v_sa[1] != 0) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smmul.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smmul.c +new file mode 100644 +index 0000000..5735efa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smmul.c +@@ -0,0 +1,27 @@ ++/* This is a test program for smmul instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++int smmul (int ra, int rb) ++{ ++ return __nds32__smmul (ra, rb); ++} ++ ++int ++main () ++{ ++ int a = smmul (0x80000000, 0x80000000); ++ ++ if (a != 0x40000000) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smmulu.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smmulu.c +new file mode 100644 +index 0000000..fbe0b15 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smmulu.c +@@ -0,0 +1,27 @@ ++/* This is a test program for smmul.u instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++int smmul_u (int ra, int rb) ++{ ++ return __nds32__smmul_u (ra, rb); ++} ++ ++int ++main () ++{ ++ int a = smmul_u (0x80000002, 0x80000001); ++ ++ if (a != 0x3fffffff) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smmwb.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smmwb.c +new file mode 100644 +index 0000000..9160b9a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smmwb.c +@@ -0,0 +1,43 @@ ++/* This is a test program for smmwb instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++int smmwb (int ra, unsigned int rb) ++{ ++ return __nds32__smmwb (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++int v_smmwb (int ra, int16x2_t rb) ++{ ++ return __nds32__v_smmwb (ra, rb); ++} ++ ++int ++main () ++{ ++#ifdef __NDS32_EL__ ++ int va_p = 0; ++#else ++ int va_p = 0xffffffff; ++#endif ++ ++ int a = smmwb (0x80000002, 0x80000001); ++ ++ int va = v_smmwb (0xffff0002, (int16x2_t) {0xffff, 0x0001}); ++ ++ if (a != 0xffff8000) ++ abort (); ++ else if (va != va_p) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smmwbu.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smmwbu.c +new file mode 100644 +index 0000000..46ebed2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smmwbu.c +@@ -0,0 +1,43 @@ ++/* This is a test program for smmwb.u instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++int smmwb_u (int ra, unsigned int rb) ++{ ++ return __nds32__smmwb_u (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++int v_smmwb_u (int ra, int16x2_t rb) ++{ ++ return __nds32__v_smmwb_u (ra, rb); ++} ++ ++int ++main () ++{ ++#ifdef __NDS32_EL__ ++ int va_p = 1; ++#else ++ int va_p = 0xffffffff; ++#endif ++ ++ int a = smmwb_u (0x80000002, 0x80000001); ++ ++ int va = v_smmwb_u (0xffff0002, (int16x2_t) {0xffff, 0x0001}); ++ ++ if (a != 0xffff8000) ++ abort (); ++ else if (va != va_p) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smmwt.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smmwt.c +new file mode 100644 +index 0000000..45d4792 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smmwt.c +@@ -0,0 +1,43 @@ ++/* This is a test program for smmwt instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++int smmwt (int ra, unsigned int rb) ++{ ++ return __nds32__smmwt (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++int v_smmwt (int ra, int16x2_t rb) ++{ ++ return __nds32__v_smmwt (ra, rb); ++} ++ ++int ++main () ++{ ++#ifdef __NDS32_EL__ ++ int va_p = 0xffffffff; ++#else ++ int va_p = 0; ++#endif ++ ++ int a = smmwt (0x80000002, 0x80000001); ++ ++ int va = v_smmwt (0xffff0002, (int16x2_t) {0xffff, 0x0001}); ++ ++ if (a != 0x3fffffff) ++ abort (); ++ else if (va != va_p) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smmwtu.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smmwtu.c +new file mode 100644 +index 0000000..3b4b487 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smmwtu.c +@@ -0,0 +1,43 @@ ++/* This is a test program for smmwt.u instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++int smmwt_u (int ra, unsigned int rb) ++{ ++ return __nds32__smmwt_u (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++int v_smmwt_u (int ra, int16x2_t rb) ++{ ++ return __nds32__v_smmwt_u (ra, rb); ++} ++ ++int ++main () ++{ ++#ifdef __NDS32_EL__ ++ int va_p = 0xffffffff; ++#else ++ int va_p = 1; ++#endif ++ ++ int a = smmwt_u (0x80000002, 0x80000001); ++ ++ int va = v_smmwt_u (0xffff0002, (int16x2_t) {0xffff, 0x0001}); ++ ++ if (a != 0x3fffffff) ++ abort (); ++ else if (va != va_p) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smslda.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smslda.c +new file mode 100644 +index 0000000..be2ac27 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smslda.c +@@ -0,0 +1,37 @@ ++/* This is a test program for smslda instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++long long smslda (long long rt, unsigned int ra, unsigned int rb) ++{ ++ return __nds32__smslda (rt, ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++long long v_smslda (long long rt, int16x2_t ra, int16x2_t rb) ++{ ++ return __nds32__v_smslda (rt, ra, rb); ++} ++ ++int ++main () ++{ ++ long long a = smslda (0xff0000000000ll, 0xffffffff, 0x2); ++ long long va = v_smslda (0x100000000ll, ++ (int16x2_t) {0xf000, 0}, (int16x2_t) {0, 3}); ++ ++ if (a != 0xff0000000002ll) ++ abort (); ++ else if (va != 0x100000000ll) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smslxda.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smslxda.c +new file mode 100644 +index 0000000..f276a2e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smslxda.c +@@ -0,0 +1,37 @@ ++/* This is a test program for smslxda instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++long long smslxda (long long rt, unsigned int ra, unsigned int rb) ++{ ++ return __nds32__smslxda (rt, ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++long long v_smslxda (long long rt, int16x2_t ra, int16x2_t rb) ++{ ++ return __nds32__v_smslxda (rt, ra, rb); ++} ++ ++int ++main () ++{ ++ long long a = smslxda (0xff0000000000ll, 0xffffffff, 0x2); ++ long long va = v_smslxda (0x100000000ll, ++ (int16x2_t) {0xf000, 0}, (int16x2_t) {0, 3}); ++ ++ if (a != 0xff0000000002ll) ++ abort (); ++ else if (va != 0x100003000ll) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smsr64.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smsr64.c +new file mode 100644 +index 0000000..64a84e9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smsr64.c +@@ -0,0 +1,27 @@ ++/* This is a test program for smsr64 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++long long smsr64 (long long t, int a, int b) ++{ ++ return __nds32__smsr64 (t, a, b); ++} ++ ++int ++main () ++{ ++ long long a = smsr64 (0x5000000300000000ll, 0x12345678, 0x23); ++ ++ if (a != 0x5000000082D82D98ll) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smtt.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smtt.c +new file mode 100644 +index 0000000..bfb30f2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smtt.c +@@ -0,0 +1,44 @@ ++/* This is a test program for smtt instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++int smtt (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__smtt (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++int v_smtt (int16x2_t ra, int16x2_t rb) ++{ ++ return __nds32__v_smtt (ra, rb); ++} ++ ++int ++main () ++{ ++#ifdef __NDS32_EL__ ++ int va_p = 2; ++#else ++ int va_p = 1; ++#endif ++ ++ int a = smtt (0x80000002, 0x80000001); ++ ++ int va = v_smtt ((int16x2_t) {0xffff, 0x0002}, ++ (int16x2_t) {0xffff, 0x0001}); ++ ++ if (a != 0x40000000) ++ abort (); ++ else if (va != va_p) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smul16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smul16.c +new file mode 100644 +index 0000000..bb3fad4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smul16.c +@@ -0,0 +1,38 @@ ++/* This is a test program for smul16 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned long long smul16 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__smul16 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++int32x2_t v_smul16 (int16x2_t ra, int16x2_t rb) ++{ ++ return __nds32__v_smul16 (ra, rb); ++} ++ ++int ++main () ++{ ++ unsigned long long a = smul16 (0xffff0000, 0x0001ffff); ++ int32x2_t va = v_smul16 ((int16x2_t) {0xffff, 0}, ++ (int16x2_t) {0x0001, 0xffff}); ++ ++ if (a != 0xffffffff00000000) ++ abort (); ++ else if (va[0] != 0xffffffff ++ || va[1] != 0) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smulx16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smulx16.c +new file mode 100644 +index 0000000..0e65a2a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smulx16.c +@@ -0,0 +1,37 @@ ++/* This is a test program for smulx16 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned long long smulx16 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__smulx16 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++int32x2_t v_smulx16 (int16x2_t ra, int16x2_t rb) ++{ ++ return __nds32__v_smulx16 (ra, rb); ++} ++ ++int ++main () ++{ ++ unsigned long long a = smulx16 (0xffff0000, 0xffff0001); ++ int32x2_t va = v_smulx16 ((int16x2_t) {0xffff, 0xffff}, ++ (int16x2_t) {1, 0}); ++ if (a != 0xffffffff00000000) ++ abort (); ++ else if (va[0] != 0 ++ || va[1] != 0xffffffff) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-smxds.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smxds.c +new file mode 100644 +index 0000000..e429aa3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-smxds.c +@@ -0,0 +1,45 @@ ++/* This is a test program for smxds instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++int smxds (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__smxds (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++int v_smxds (int16x2_t ra, int16x2_t rb) ++{ ++ return __nds32__v_smxds (ra, rb); ++} ++ ++int ++main () ++{ ++#ifdef __NDS32_EL__ ++ int a_p = 0x8000; ++ int va_p = 0xffffffff; ++#else ++ int a_p = 0x8000; ++ int va_p = 1; ++#endif ++ ++ int a = smxds (0x80000002, 0x80000001); ++ int va = v_smxds ((int16x2_t) {0xffff, 0x0002}, ++ (int16x2_t) {0xffff, 0x0001}); ++ ++ if (a != a_p) ++ abort (); ++ else if (va != va_p) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-sra16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sra16.c +new file mode 100644 +index 0000000..7d85032 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sra16.c +@@ -0,0 +1,37 @@ ++/* This is a test program for sra16 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int sra16 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__sra16 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++int16x2_t v_sra16 (int16x2_t ra, unsigned int rb) ++{ ++ return __nds32__v_sra16 (ra, rb); ++} ++ ++int ++main () ++{ ++ unsigned int a = sra16 (0x0ffff000, 4); ++ int16x2_t va = v_sra16 ((int16x2_t) {0x7fff, 0x8000}, 4); ++ ++ if (a != 0x00ffff00) ++ abort (); ++ else if (va[0] != 0x7ff ++ || va[1] != (short) 0xf800) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-sra16u.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sra16u.c +new file mode 100644 +index 0000000..5bc127c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sra16u.c +@@ -0,0 +1,37 @@ ++/* This is a test program for sra16.u instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int sra16u (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__sra16_u (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++int16x2_t v_sra16u (int16x2_t ra, unsigned int rb) ++{ ++ return __nds32__v_sra16_u (ra, rb); ++} ++ ++int ++main () ++{ ++ unsigned int a = sra16u (0x0ffff000, 4); ++ int16x2_t va = v_sra16u ((int16x2_t) {0x7fff, 0x8000}, 4); ++ ++ if (a != 0x100ff00) ++ abort (); ++ else if (va[0] != 0x800 ++ || va[1] != (short) 0xf800) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-srai16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-srai16.c +new file mode 100644 +index 0000000..f3c6e16 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-srai16.c +@@ -0,0 +1,39 @@ ++/* This is a test program for srai16 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int srai16 (unsigned int ra) ++{ ++ return __nds32__sra16 (ra, 4); ++} ++ ++static __attribute__ ((noinline)) ++int16x2_t v_srai16 (int16x2_t ra) ++{ ++ return __nds32__v_sra16 (ra, 4); ++} ++ ++int ++main () ++{ ++ unsigned int a = srai16 (0x0ffff000); ++ ++ int16x2_t aa; ++ int16x2_t va = v_srai16 ((int16x2_t) {0x7fff, 0x8000}); ++ ++ if (a != 0x00ffff00) ++ abort (); ++ else if (va[0] != 0x7ff ++ || va[1] != (short) 0xf800) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-srai16u.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-srai16u.c +new file mode 100644 +index 0000000..380bd2e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-srai16u.c +@@ -0,0 +1,37 @@ ++/* This is a test program for srai16.u instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int srai16u (unsigned int ra) ++{ ++ return __nds32__sra16_u (ra, 4); ++} ++ ++static __attribute__ ((noinline)) ++int16x2_t v_srai16u (int16x2_t ra) ++{ ++ return __nds32__v_sra16_u (ra, 4); ++} ++ ++int ++main () ++{ ++ unsigned int a = srai16u (0x0ffff000); ++ int16x2_t va = v_srai16u ((int16x2_t) {0x7fff, 0x8000}); ++ ++ if (a != 0x100ff00) ++ abort (); ++ else if (va[0] != 0x800 ++ || va[1] != (short) 0xf800) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-sraiu.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sraiu.c +new file mode 100644 +index 0000000..4090762 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sraiu.c +@@ -0,0 +1,27 @@ ++/* This is a test program for srai.u instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++int sraiu (int ra) ++{ ++ return __nds32__sra_u (ra, 8); ++} ++ ++int ++main () ++{ ++ int a = sraiu (0xf00ff); ++ ++ if (a != 0xf01) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-srau.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-srau.c +new file mode 100644 +index 0000000..e3a3137 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-srau.c +@@ -0,0 +1,27 @@ ++/* This is a test program for sra.u instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++int srau (int ra, unsigned int rb) ++{ ++ return __nds32__sra_u (ra, rb); ++} ++ ++int ++main () ++{ ++ int a = srau (0xf00ff, 8); ++ ++ if (a != 0xf01) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-srl16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-srl16.c +new file mode 100644 +index 0000000..8aa9c59 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-srl16.c +@@ -0,0 +1,37 @@ ++/* This is a test program for srl16 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int srl16 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__srl16 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++uint16x2_t v_srl16 (uint16x2_t ra, unsigned int rb) ++{ ++ return __nds32__v_srl16 (ra, rb); ++} ++ ++int ++main () ++{ ++ unsigned int a = srl16 (0x0f00f000, 4); ++ uint16x2_t va = v_srl16 ((uint16x2_t) {0x7fff, 0x8000}, 4); ++ ++ if (a != 0xf00f00) ++ abort (); ++ else if (va[0] != 0x7ff ++ || va[1] != 0x0800) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-srl16u.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-srl16u.c +new file mode 100644 +index 0000000..3f4ac5b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-srl16u.c +@@ -0,0 +1,37 @@ ++/* This is a test program for srl16.u instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int srl16_u (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__srl16_u (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++uint16x2_t v_srl16_u (uint16x2_t ra, unsigned int rb) ++{ ++ return __nds32__v_srl16_u (ra, rb); ++} ++ ++int ++main () ++{ ++ unsigned int a = srl16_u (0x0f00f000, 4); ++ uint16x2_t va = v_srl16_u ((uint16x2_t) {0x7fff, 0x8000}, 4); ++ ++ if (a != 0xf00f00) ++ abort (); ++ else if (va[0] != 0x800 ++ || va[1] != 0x800) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-srli16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-srli16.c +new file mode 100644 +index 0000000..200bf8c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-srli16.c +@@ -0,0 +1,37 @@ ++/* This is a test program for srli16 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int srli16 (unsigned int ra) ++{ ++ return __nds32__srl16 (ra, 4); ++} ++ ++static __attribute__ ((noinline)) ++uint16x2_t v_srli16 (uint16x2_t ra) ++{ ++ return __nds32__v_srl16 (ra, 4); ++} ++ ++int ++main () ++{ ++ unsigned int a = srli16 (0x0f00f000); ++ uint16x2_t va = v_srli16 ((uint16x2_t) {0x7fff, 0x8000}); ++ ++ if (a != 0xf00f00) ++ abort (); ++ else if (va[0] != 0x7ff ++ || va[1] != 0x0800) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-srli16u.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-srli16u.c +new file mode 100644 +index 0000000..808319b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-srli16u.c +@@ -0,0 +1,37 @@ ++/* This is a test program for sril16.u instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int srli16_u (unsigned int ra) ++{ ++ return __nds32__srl16_u (ra, 4); ++} ++ ++static __attribute__ ((noinline)) ++uint16x2_t v_srli16_u (uint16x2_t ra) ++{ ++ return __nds32__v_srl16_u (ra, 4); ++} ++ ++int ++main () ++{ ++ unsigned int a = srli16_u (0x0f00f000); ++ uint16x2_t va = v_srli16_u ((uint16x2_t) {0x7fff, 0x8000}); ++ ++ if (a != 0xf00f00) ++ abort (); ++ else if (va[0] != 0x800 ++ || va[1] != 0x800) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-sub16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sub16.c +new file mode 100644 +index 0000000..eff5f92 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sub16.c +@@ -0,0 +1,49 @@ ++/* This is a test program for sub16 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int sub16 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__sub16 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++uint16x2_t v_usub16 (uint16x2_t ra, uint16x2_t rb) ++{ ++ return __nds32__v_usub16 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++int16x2_t v_ssub16 (int16x2_t ra, int16x2_t rb) ++{ ++ return __nds32__v_ssub16 (ra, rb); ++} ++ ++int ++main () ++{ ++ unsigned int a = sub16 (0x00010000, 0x00010001); ++ uint16x2_t v_ua = v_usub16 ((uint16x2_t) {0x1000, 0x0001}, ++ (uint16x2_t) {0xf000, 0x0000}); ++ int16x2_t v_sa = v_ssub16 ((int16x2_t) {0x7777, 0x2111}, ++ (int16x2_t) {0x1000, 0x2000}); ++ ++ if (a != 0x0000ffff) ++ abort (); ++ else if (v_ua[0] != 0x2000 ++ || v_ua[1] != 0x0001) ++ abort (); ++ else if (v_sa[0] != 0x6777 ++ || v_sa[1] != 0x0111) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-sub64.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sub64.c +new file mode 100644 +index 0000000..efdd879 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sub64.c +@@ -0,0 +1,36 @@ ++/* This is a test program for sub64 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++long long ssub64 (long long ra, long long rb) ++{ ++ return __nds32__ssub64 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++unsigned long long usub64 (unsigned long long ra, unsigned long long rb) ++{ ++ return __nds32__usub64 (ra, rb); ++} ++ ++int ++main () ++{ ++ long long sa = ssub64 (0x100000000ll, 0xffffffffll); ++ unsigned long long ua = usub64 (0xf00000000ull, 0x1111ull); ++ ++ if (sa != 1ll) ++ abort (); ++ else if (ua != 0xeffffeeefull) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-sub8.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sub8.c +new file mode 100644 +index 0000000..b21f8a5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sub8.c +@@ -0,0 +1,53 @@ ++/* This is a test program for sub8 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int sub8 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__sub8 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++uint8x4_t v_usub8 (uint8x4_t ra, uint8x4_t rb) ++{ ++ return __nds32__v_usub8 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++int8x4_t v_ssub8 (int8x4_t ra, int8x4_t rb) ++{ ++ return __nds32__v_ssub8 (ra, rb); ++} ++ ++int ++main () ++{ ++ unsigned int a = sub8 (0x55667788, 0x11223344); ++ uint8x4_t v_ua = v_usub8 ((uint8x4_t) {0xff, 0xee, 0xee, 0xcc}, ++ (uint8x4_t) {0x1, 0xee, 0xdd, 0xdd}); ++ int8x4_t v_sa = v_ssub8 ((int8x4_t) {0x81, 0x0, 0xdd, 0xaa}, ++ (int8x4_t) {0x80, 0x1, 0xcc, 0xaa}); ++ ++ if (a != 0x44444444) ++ abort (); ++ else if (v_ua[0] != 0xfe ++ || v_ua[1] != 0 ++ || v_ua[2] != 0x11 ++ || v_ua[3] != 0xef) ++ abort (); ++ else if (v_sa[0] != 1 ++ || v_sa[1] != (char) 0xff ++ || v_sa[2] != 0x11 ++ || v_sa[3] != 0) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-sunpkd810.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sunpkd810.c +new file mode 100644 +index 0000000..29fff3a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sunpkd810.c +@@ -0,0 +1,43 @@ ++/* This is a test program for sunpkd810 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int sunpkd810 (unsigned int a) ++{ ++ return __nds32__sunpkd810 (a); ++} ++ ++static __attribute__ ((noinline)) ++int16x2_t v_sunpkd810 (int8x4_t a) ++{ ++ return __nds32__v_sunpkd810 (a); ++} ++ ++int ++main () ++{ ++#ifdef __NDS32_EL__ ++ int16x2_t va_p = {0xfff8, 0x56}; ++#else ++ int16x2_t va_p = {0, 0}; ++#endif ++ ++ unsigned int a = sunpkd810 (0x000056f8); ++ int16x2_t va = v_sunpkd810 ((int8x4_t) {0xf8, 0x56, 0, 0}); ++ ++ if (a != 0x0056fff8) ++ abort (); ++ else if (va[0] != va_p[0] ++ || va[1] != va_p[1]) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-sunpkd820.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sunpkd820.c +new file mode 100644 +index 0000000..43f969a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sunpkd820.c +@@ -0,0 +1,43 @@ ++/* This is a test program for sunpkd820 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int sunpkd820 (unsigned int a) ++{ ++ return __nds32__sunpkd820 (a); ++} ++ ++static __attribute__ ((noinline)) ++int16x2_t v_sunpkd820 (int8x4_t a) ++{ ++ return __nds32__v_sunpkd820 (a); ++} ++ ++int ++main () ++{ ++#ifdef __NDS32_EL__ ++ int16x2_t va_p = {0xfff8, 0x34}; ++#else ++ int16x2_t va_p = {0, 0}; ++#endif ++ ++ unsigned int a = sunpkd820 (0x003400f8); ++ int16x2_t va = v_sunpkd820 ((int8x4_t) {0xf8, 0, 0x34, 0}); ++ ++ if (a != 0x0034fff8) ++ abort (); ++ else if (va[0] != va_p[0] ++ || va[1] != va_p[1]) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-sunpkd830.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sunpkd830.c +new file mode 100644 +index 0000000..76540b5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sunpkd830.c +@@ -0,0 +1,37 @@ ++/* This is a test program for sunpkd830 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int sunpkd830 (unsigned int a) ++{ ++ return __nds32__sunpkd830 (a); ++} ++ ++static __attribute__ ((noinline)) ++int16x2_t v_sunpkd830 (int8x4_t a) ++{ ++ return __nds32__v_sunpkd830 (a); ++} ++ ++int ++main () ++{ ++ unsigned int a = sunpkd830 (0x120000f8); ++ int16x2_t va = v_sunpkd830 ((int8x4_t) {0xf8, 0x00, 0, 0x12}); ++ ++ if (a != 0x0012fff8) ++ abort (); ++ else if (va[0] != (short) 0xfff8 ++ || va[1] != 0x0012) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-sunpkd831.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sunpkd831.c +new file mode 100644 +index 0000000..05149e6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-sunpkd831.c +@@ -0,0 +1,43 @@ ++/* This is a test program for sunpkd831 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int sunpkd831 (unsigned int a) ++{ ++ return __nds32__sunpkd831 (a); ++} ++ ++static __attribute__ ((noinline)) ++int16x2_t v_sunpkd831 (int8x4_t a) ++{ ++ return __nds32__v_sunpkd831 (a); ++} ++ ++int ++main () ++{ ++#ifdef __NDS32_EL__ ++ int16x2_t va_p = {0xfff8, 0x12}; ++#else ++ int16x2_t va_p = {0, 0}; ++#endif ++ ++ unsigned int a = sunpkd831 (0x1200f800); ++ int16x2_t va = v_sunpkd831 ((int8x4_t) {0, 0xf8, 0, 0x12}); ++ ++ if (a != 0x0012fff8) ++ abort (); ++ else if (va[0] != va_p[0] ++ || va[1] != va_p[1]) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-ucmple16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-ucmple16.c +new file mode 100644 +index 0000000..17b5344 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-ucmple16.c +@@ -0,0 +1,37 @@ ++/* This is a test program for ucmple16 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int ucmple16 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__ucmple16 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++uint16x2_t v_ucmple16 (uint16x2_t ra, uint16x2_t rb) ++{ ++ return __nds32__v_ucmple16 (ra, rb); ++} ++ ++int ++main () ++{ ++ unsigned int a = ucmple16 (0xfffe0001, 0xffff0000); ++ uint16x2_t va = v_ucmple16 ((uint16x2_t) {0x7fff, 0x7ffe}, ++ (uint16x2_t) {0x7ffe, 0x7fff}); ++ if (a != 0xffff0000) ++ abort (); ++ else if (va[0] != 0 ++ || va[1] != 0xffff) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-ucmple8.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-ucmple8.c +new file mode 100644 +index 0000000..561b500 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-ucmple8.c +@@ -0,0 +1,40 @@ ++/* This is a test program for ucmple8 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int ucmple8 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__ucmple8 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++uint8x4_t v_ucmple8 (uint8x4_t ra, uint8x4_t rb) ++{ ++ return __nds32__v_ucmple8 (ra, rb); ++} ++ ++int ++main () ++{ ++ unsigned int a = ucmple8 (0xfefe0101, 0xffff0000); ++ uint8x4_t va = v_ucmple8 ((uint8x4_t) {0x7e, 0x7e, 0x01, 0x01}, ++ (uint8x4_t) {0x7f, 0x7f, 0x00, 0x00}); ++ ++ if (a != 0xffff0000) ++ abort (); ++ else if (va[0] != 0xff ++ || va[1] != 0xff ++ || va[2] != 0 ++ || va[3] != 0) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-ucmplt16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-ucmplt16.c +new file mode 100644 +index 0000000..820ce1e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-ucmplt16.c +@@ -0,0 +1,37 @@ ++/* This is a test program for ucmplt16 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int ucmplt16 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__ucmplt16 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++uint16x2_t v_ucmplt16 (uint16x2_t ra, uint16x2_t rb) ++{ ++ return __nds32__v_ucmplt16 (ra, rb); ++} ++ ++int ++main () ++{ ++ unsigned int a = ucmplt16 (0xfffe0001, 0xffff0000); ++ uint16x2_t va = v_ucmplt16 ((uint16x2_t) {0x7fff, 0x7ffe}, ++ (uint16x2_t) {0x7ffe, 0x7fff}); ++ if (a != 0xffff0000) ++ abort (); ++ else if (va[0] != 0 ++ || va[1] != 0xffff) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-ucmplt8.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-ucmplt8.c +new file mode 100644 +index 0000000..8001586 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-ucmplt8.c +@@ -0,0 +1,40 @@ ++/* This is a test program for ucmplt8 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int ucmplt8 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__ucmplt8 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++uint8x4_t v_ucmplt8 (uint8x4_t ra, uint8x4_t rb) ++{ ++ return __nds32__v_ucmplt8 (ra, rb); ++} ++ ++int ++main () ++{ ++ unsigned int a = ucmplt8 (0xfefe0101, 0xffff0000); ++ uint8x4_t va = v_ucmplt8 ((uint8x4_t) {0x7e, 0x7e, 0x01, 0x01}, ++ (uint8x4_t) {0x7f, 0x7f, 0x00, 0x00}); ++ ++ if (a != 0xffff0000) ++ abort (); ++ else if (va[0] != 0xff ++ || va[1] != 0xff ++ || va[2] != 0 ++ || va[3] != 0) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-umar64.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-umar64.c +new file mode 100644 +index 0000000..ac32ae1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-umar64.c +@@ -0,0 +1,27 @@ ++/* This is a test program for umar64 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned long long umar64 (unsigned long long t,unsigned int a,unsigned int b) ++{ ++ return __nds32__umar64 (t, a, b); ++} ++ ++int ++main () ++{ ++ unsigned long long a = umar64 (0xf000000000000000ull, 0x12345678, 0x23); ++ ++ if (a != 0xf00000027d27d268ull) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-umax16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-umax16.c +new file mode 100644 +index 0000000..99a43d2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-umax16.c +@@ -0,0 +1,37 @@ ++/* This is a test program for umax16 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int umax16 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__umax16 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++uint16x2_t v_umax16 (uint16x2_t ra, uint16x2_t rb) ++{ ++ return __nds32__v_umax16 (ra, rb); ++} ++ ++int ++main () ++{ ++ unsigned int a = umax16 (0xfffe0001, 0xffff0000); ++ uint16x2_t va = v_umax16 ((uint16x2_t) {0xffff, 0}, ++ (uint16x2_t) {0xfffe, 1}); ++ if (a != 0xffff0001) ++ abort (); ++ else if (va[0] != 0xffff ++ || va[1] != 1) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-umax8.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-umax8.c +new file mode 100644 +index 0000000..23904b2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-umax8.c +@@ -0,0 +1,41 @@ ++/* This is a test program for umax8 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int umax8 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__umax8 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++uint8x4_t v_umax8 (uint8x4_t ra, uint8x4_t rb) ++{ ++ return __nds32__v_umax8 (ra, rb); ++} ++ ++ ++int ++main () ++{ ++ unsigned int a = umax8 (0xffff0000, 0xfffe0001); ++ uint8x4_t va = v_umax8 ((uint8x4_t) {0xff, 0xff, 0x01, 0x01}, ++ (uint8x4_t) {0xfe, 0xfe, 0x00, 0x00}); ++ ++ if (a != 0xffff0001) ++ abort (); ++ else if (va[0] != 0xff ++ || va[1] != 0xff ++ || va[2] != 1 ++ || va[3] != 1) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-umin16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-umin16.c +new file mode 100644 +index 0000000..eec7058 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-umin16.c +@@ -0,0 +1,37 @@ ++/* This is a test program for umin16 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int umin16 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__umin16 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++uint16x2_t v_umin16 (uint16x2_t ra, uint16x2_t rb) ++{ ++ return __nds32__v_umin16 (ra, rb); ++} ++ ++int ++main () ++{ ++ unsigned int a = umin16 (0xfffe0001, 0xffff0000); ++ uint16x2_t va = v_umin16 ((uint16x2_t) {0x7fff, 0}, ++ (uint16x2_t) {0x7ffe, 1}); ++ if (a != 0xfffe0000) ++ abort (); ++ else if (va[0] != 0x7ffe ++ || va[1] != 0) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-umsr64.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-umsr64.c +new file mode 100644 +index 0000000..3fb20bf +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-umsr64.c +@@ -0,0 +1,27 @@ ++/* This is a test program for umsr64 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned long long umsr64 (unsigned long long t, unsigned int a, unsigned int b) ++{ ++ return __nds32__umsr64 (t, a, b); ++} ++ ++int ++main () ++{ ++ unsigned long long a = umsr64 (0x5000000300000000ull, 0x12345678, 0x23); ++ ++ if (a != 0x5000000082D82D98ull) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-umul16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-umul16.c +new file mode 100644 +index 0000000..ddfb6be +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-umul16.c +@@ -0,0 +1,37 @@ ++/* This is a test program for umul16 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned long long umul16 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__umul16 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++uint32x2_t v_umul16 (uint16x2_t ra, uint16x2_t rb) ++{ ++ return __nds32__v_umul16 (ra, rb); ++} ++ ++int ++main () ++{ ++ unsigned long long a = umul16 (0xffff0000, 0x0001ffff); ++ uint32x2_t va = v_umul16 ((uint16x2_t) {0xffff, 0}, ++ (uint16x2_t) {0x0001, 0xffff}); ++ if (a != 0xffff00000000) ++ abort (); ++ else if (va[0] != 0xffff ++ || va[1] != 0) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-umulx16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-umulx16.c +new file mode 100644 +index 0000000..c57d304 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-umulx16.c +@@ -0,0 +1,37 @@ ++/* This is a test program for umulx16 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned long long umulx16 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__umulx16 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++uint32x2_t v_umulx16 (uint16x2_t ra, uint16x2_t rb) ++{ ++ return __nds32__v_umulx16 (ra, rb); ++} ++ ++int ++main () ++{ ++ unsigned long long a = umulx16 (0xffff0000, 0xffff0001); ++ uint32x2_t va = v_umulx16 ((uint16x2_t) {0xffff, 0xffff}, ++ (uint16x2_t) {1, 0}); ++ if (a != 0xffff00000000) ++ abort (); ++ else if (va[0] != 0 ++ || va[1] != 0xffff) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-uradd16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-uradd16.c +new file mode 100644 +index 0000000..82c7be7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-uradd16.c +@@ -0,0 +1,38 @@ ++/* This is a test program for uradd16 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int uradd16 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__uradd16 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++uint16x2_t v_uradd16 (uint16x2_t ra, uint16x2_t rb) ++{ ++ return __nds32__v_uradd16 (ra, rb); ++} ++ ++int ++main () ++{ ++ unsigned int a = uradd16 (0x7fff7fff, 0x7fff7fff); ++ uint16x2_t va = v_uradd16 ((uint16x2_t) {0x8000, 0x4000}, ++ (uint16x2_t) {0x8000, 0x8000}); ++ ++ if (a != 0x7fff7fff) ++ abort (); ++ else if (va[0] != 0x8000 ++ || va[1] != 0x6000) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-uradd64.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-uradd64.c +new file mode 100644 +index 0000000..51ee961 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-uradd64.c +@@ -0,0 +1,27 @@ ++/* This is a test program for uradd64 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned long long uradd64 (unsigned long long ra, unsigned long long rb) ++{ ++ return __nds32__uradd64 (ra, rb); ++} ++ ++int ++main () ++{ ++ unsigned long long a = uradd64 (0xf000000000000000ull, 0xf000000000000000ull); ++ ++ if (a != 0xf000000000000000ull) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-uradd8.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-uradd8.c +new file mode 100644 +index 0000000..d4f91d6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-uradd8.c +@@ -0,0 +1,40 @@ ++/* This is a test program for uradd8 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int uradd8 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__uradd8 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++uint8x4_t v_uradd8 (uint8x4_t ra, uint8x4_t rb) ++{ ++ return __nds32__v_uradd8 (ra, rb); ++} ++ ++int ++main () ++{ ++ unsigned int a = uradd8 (0x11223344, 0x55667788); ++ uint8x4_t va = v_uradd8 ((uint8x4_t) {0x7f, 0x80, 0x40, 0xaa}, ++ (uint8x4_t) {0x7f, 0x80, 0x80, 0xaa}); ++ ++ if (a != 0x33445566) ++ abort (); ++ else if (va[0] != 0x7f ++ || va[1] != 0x80 ++ || va[2] != 0x60 ++ || va[3] != 0xaa) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-uraddw.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-uraddw.c +new file mode 100644 +index 0000000..9fc76b0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-uraddw.c +@@ -0,0 +1,27 @@ ++/* This is a test program for uraddw instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int uraddw (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__uraddw (ra, rb); ++} ++ ++unsigned int ++main () ++{ ++ unsigned int a = uraddw (0x80000000, 0x80000000); ++ ++ if (a != 0x80000000) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-urcras16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-urcras16.c +new file mode 100644 +index 0000000..1330374 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-urcras16.c +@@ -0,0 +1,44 @@ ++/* This is a test program for urcras16 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int urcras16 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__urcras16 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++uint16x2_t v_urcras16 (uint16x2_t ra, uint16x2_t rb) ++{ ++ return __nds32__v_urcras16 (ra, rb); ++} ++ ++int ++main () ++{ ++#ifdef __NDS32_EL__ ++ uint16x2_t va_p = {0xffff, 0x8000}; ++#else ++ uint16x2_t va_p = {0x7fff, 0}; ++#endif ++ ++ unsigned int a = urcras16 (0x7fff7fff, 0x80007fff); ++ uint16x2_t va = v_urcras16 ((uint16x2_t) {0x7fff, 0x8000}, ++ (uint16x2_t) {0x8000, 0x8000}); ++ ++ if (a != 0x7fffffff) ++ abort (); ++ else if (va[0] != va_p[0] ++ || va[1] != va_p[1]) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-urcrsa16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-urcrsa16.c +new file mode 100644 +index 0000000..806fa7a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-urcrsa16.c +@@ -0,0 +1,44 @@ ++/* This is a test program for urcrsa16 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int urcrsa16 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__urcrsa16 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++uint16x2_t v_urcrsa16 (uint16x2_t ra, uint16x2_t rb) ++{ ++ return __nds32__v_urcrsa16 (ra, rb); ++} ++ ++int ++main () ++{ ++#ifdef __NDS32_EL__ ++ uint16x2_t va_p = {0x8000, 0xffff}; ++#else ++ uint16x2_t va_p = {0, 0x7fff}; ++#endif ++ ++ unsigned int a = urcrsa16 (0x7fff7fff, 0x7fff8000); ++ uint16x2_t va = v_urcrsa16 ((uint16x2_t) {0x8000, 0x7fff}, ++ (uint16x2_t) {0x8000, 0x8000}); ++ ++ if (a != 0xffff7fff) ++ abort (); ++ else if (va[0] != va_p[0] ++ || va[1] != va_p[1]) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-ursub16.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-ursub16.c +new file mode 100644 +index 0000000..9e87234 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-ursub16.c +@@ -0,0 +1,38 @@ ++/* This is a test program for ursub16 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int ursub16 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__ursub16 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++uint16x2_t v_ursub16 (uint16x2_t ra, uint16x2_t rb) ++{ ++ return __nds32__v_ursub16 (ra, rb); ++} ++ ++int ++main () ++{ ++ unsigned int a = ursub16 (0x7fff7fff, 0x80008000); ++ uint16x2_t va = v_ursub16 ((uint16x2_t) {0x8000, 0x8000}, ++ (uint16x2_t) {0x7fff, 0x4000}); ++ ++ if (a != 0xffffffff) ++ abort (); ++ else if (va[0] != 0 ++ || va[1] != 0x2000) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-ursub64.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-ursub64.c +new file mode 100644 +index 0000000..e1f7b15 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-ursub64.c +@@ -0,0 +1,27 @@ ++/* This is a test program for ursub64 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned long long ursub64 (unsigned long long ra, unsigned long long rb) ++{ ++ return __nds32__ursub64 (ra, rb); ++} ++ ++int ++main () ++{ ++ unsigned long long a = ursub64 (0xeull, 0xfull); ++ ++ if (a != 0xffffffffffffffffull) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-ursub8.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-ursub8.c +new file mode 100644 +index 0000000..f5e3ff6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-ursub8.c +@@ -0,0 +1,40 @@ ++/* This is a test program for ursub8 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int ursub8 (unsigned int ra, unsigned int rb) ++{ ++ return __nds32__ursub8 (ra, rb); ++} ++ ++static __attribute__ ((noinline)) ++uint8x4_t v_ursub8 (uint8x4_t ra, uint8x4_t rb) ++{ ++ return __nds32__v_ursub8 (ra, rb); ++} ++ ++int ++main () ++{ ++ unsigned int a = ursub8 (0x55667788, 0x11223344); ++ uint8x4_t va = v_ursub8 ((uint8x4_t) {0x7f, 0x80, 0x80, 0xaa}, ++ (uint8x4_t) {0x80, 0x7f, 0x40, 0xaa}); ++ ++ if (a != 0x22222222) ++ abort (); ++ else if (va[0] != 0xff ++ || va[1] != 0 ++ || va[2] != 0x20 ++ || va[3] != 0) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-ursubw.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-ursubw.c +new file mode 100644 +index 0000000..b12afb0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-ursubw.c +@@ -0,0 +1,27 @@ ++/* This is a test program for ursubw instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int ursubw (unsigned int ra,unsigned int rb) ++{ ++ return __nds32__ursubw (ra, rb); ++} ++ ++int ++main () ++{ ++ unsigned int a = ursubw (0x80000000, 0x40000000); ++ ++ if (a != 0x20000000) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-wext.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-wext.c +new file mode 100644 +index 0000000..d86fb8f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-wext.c +@@ -0,0 +1,27 @@ ++/* This is a test program for wext instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int wext (long long ra, unsigned int rb) ++{ ++ return __nds32__wext (ra, rb); ++} ++ ++int ++main () ++{ ++ unsigned int a = wext (0x1234ffff0000ll, 16); ++ ++ if (a != 0x1234ffff) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-wexti.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-wexti.c +new file mode 100644 +index 0000000..8f09423 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-wexti.c +@@ -0,0 +1,27 @@ ++/* This is a test program for wexti instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int wexti (long long ra) ++{ ++ return __nds32__wext (ra, 16); ++} ++ ++int ++main () ++{ ++ unsigned int a = wexti (0x1234ffff0000ll); ++ ++ if (a != 0x1234ffff) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-zunpkd810.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-zunpkd810.c +new file mode 100644 +index 0000000..7b3aebb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-zunpkd810.c +@@ -0,0 +1,43 @@ ++/* This is a test program for zunpkd810 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int zunpkd810 (unsigned int a) ++{ ++ return __nds32__zunpkd810 (a); ++} ++ ++static __attribute__ ((noinline)) ++uint16x2_t v_zunpkd810 (uint8x4_t a) ++{ ++ return __nds32__v_zunpkd810 (a); ++} ++ ++int ++main () ++{ ++#ifdef __NDS32_EL__ ++ int16x2_t va_p = {0xf8, 0x56}; ++#else ++ int16x2_t va_p = {0, 0}; ++#endif ++ ++ unsigned int a = zunpkd810 (0x000056f8); ++ uint16x2_t va = v_zunpkd810 ((uint8x4_t) {0xf8, 0x56, 0, 0}); ++ ++ if (a != 0x005600f8) ++ abort (); ++ else if (va[0] != va_p[0] ++ || va[1] != va_p[1]) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-zunpkd820.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-zunpkd820.c +new file mode 100644 +index 0000000..dc37a3d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-zunpkd820.c +@@ -0,0 +1,43 @@ ++/* This is a test program for zunpkd820 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int zunpkd820 (unsigned int a) ++{ ++ return __nds32__zunpkd820 (a); ++} ++ ++static __attribute__ ((noinline)) ++uint16x2_t v_zunpkd820 (uint8x4_t a) ++{ ++ return __nds32__v_zunpkd820 (a); ++} ++ ++int ++main () ++{ ++#ifdef __NDS32_EL__ ++ int16x2_t va_p = {0xf8, 0x34}; ++#else ++ int16x2_t va_p = {0, 0}; ++#endif ++ ++ unsigned int a = zunpkd820 (0x003400f8); ++ uint16x2_t va = v_zunpkd820 ((uint8x4_t) {0xf8, 0, 0x34, 0}); ++ ++ if (a != 0x003400f8) ++ abort (); ++ else if (va[0] != va_p[0] ++ || va[1] != va_p[1]) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-zunpkd830.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-zunpkd830.c +new file mode 100644 +index 0000000..8f5a224 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-zunpkd830.c +@@ -0,0 +1,37 @@ ++/* This is a test program for zunpkd830 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int zunpkd830 (unsigned int a) ++{ ++ return __nds32__zunpkd830 (a); ++} ++ ++static __attribute__ ((noinline)) ++uint16x2_t v_zunpkd830 (uint8x4_t a) ++{ ++ return __nds32__v_zunpkd830 (a); ++} ++ ++int ++main () ++{ ++ unsigned int a = zunpkd830 (0x120000f8); ++ uint16x2_t va = v_zunpkd830 ((uint8x4_t) { 0xf8, 0x00, 0, 0x12}); ++ ++ if (a != 0x001200f8) ++ abort (); ++ else if (va[0] != 0x00f8 ++ || va[1] != 0x0012) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-dsp-zunpkd831.c b/gcc/testsuite/gcc.target/nds32/builtin-dsp-zunpkd831.c +new file mode 100644 +index 0000000..6878cd3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-dsp-zunpkd831.c +@@ -0,0 +1,43 @@ ++/* This is a test program for zunpkd831 instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++static __attribute__ ((noinline)) ++unsigned int zunpkd831 (unsigned int a) ++{ ++ return __nds32__zunpkd831 (a); ++} ++ ++static __attribute__ ((noinline)) ++uint16x2_t v_zunpkd831 (uint8x4_t a) ++{ ++ return __nds32__v_zunpkd831 (a); ++} ++ ++int ++main () ++{ ++#ifdef __NDS32_EL__ ++ int16x2_t va_p = {0xf8, 0x12}; ++#else ++ int16x2_t va_p = {0, 0}; ++#endif ++ ++ unsigned int a = zunpkd831 (0x1200f800); ++ uint16x2_t va = v_zunpkd831 ((uint8x4_t) {0, 0xf8, 0, 0x12}); ++ ++ if (a != 0x001200f8) ++ abort (); ++ else if (va[0] != va_p[0] ++ || va[1] != va_p[1]) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-fpu-fcpyd.c b/gcc/testsuite/gcc.target/nds32/builtin-fpu-fcpyd.c +new file mode 100644 +index 0000000..4ee7e5e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-fpu-fcpyd.c +@@ -0,0 +1,21 @@ ++/* This is a test program for fcpysd instruction. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-O1" } */ ++/* { dg-require-effective-target nds32_ext_fpu_dp } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++int ++main () ++{ ++ double da = -1.5; ++ double db = 1.3; ++ double dr = __nds32__fcpysd (da, db); ++ ++ if (dr != 1.5) ++ abort (); ++ else ++ exit (0); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-fpu-fcpynd.c b/gcc/testsuite/gcc.target/nds32/builtin-fpu-fcpynd.c +new file mode 100644 +index 0000000..804410b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-fpu-fcpynd.c +@@ -0,0 +1,21 @@ ++/* This is a test program for fcpynsd instruction. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-O1" } */ ++/* { dg-require-effective-target nds32_ext_fpu_dp } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++int ++main () ++{ ++ double da = -1.5; ++ double db = -1.3; ++ double dr = __nds32__fcpynsd (da, db); ++ ++ if (dr != 1.5) ++ abort (); ++ else ++ exit (0); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-fpu-fcpyns.c b/gcc/testsuite/gcc.target/nds32/builtin-fpu-fcpyns.c +new file mode 100644 +index 0000000..0d86734 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-fpu-fcpyns.c +@@ -0,0 +1,21 @@ ++/* This is a test program for fcpynss instruction. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-O1" } */ ++/* { dg-require-effective-target nds32_ext_fpu_sp } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++int ++main () ++{ ++ float a = -1.5; ++ float b = -1.3; ++ float r = __nds32__fcpynss (a, b); ++ ++ if (r != 1.5) ++ abort (); ++ else ++ exit (0); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-fpu-fcpys.c b/gcc/testsuite/gcc.target/nds32/builtin-fpu-fcpys.c +new file mode 100644 +index 0000000..4bccf57 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-fpu-fcpys.c +@@ -0,0 +1,21 @@ ++/* This is a test program for fcpyss instruction. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-O1" } */ ++/* { dg-require-effective-target nds32_ext_fpu_sp } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++int ++main () ++{ ++ float a = -1.5; ++ float b = 1.3; ++ float r = __nds32__fcpyss (a, b); ++ ++ if (r != 1.5) ++ abort (); ++ else ++ exit (0); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-fpu-fmfcfg.c b/gcc/testsuite/gcc.target/nds32/builtin-fpu-fmfcfg.c +new file mode 100644 +index 0000000..83e65ed +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-fpu-fmfcfg.c +@@ -0,0 +1,23 @@ ++/* This is a test program for fmfcfg instruction. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-O1" } */ ++/* { dg-require-effective-target nds32_ext_fpu } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++int ++main () ++{ ++ unsigned int intrinsic_fmfcfg = -1; ++ unsigned int inline_assemble_fmfcfg = -2; ++ ++ intrinsic_fmfcfg = __nds32__fmfcfg (); ++ __asm volatile ("fmfcfg %0" : "=r" (inline_assemble_fmfcfg)); ++ ++ if (intrinsic_fmfcfg == inline_assemble_fmfcfg) ++ exit (0); ++ else ++ abort (); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-fpu-fpcsr.c b/gcc/testsuite/gcc.target/nds32/builtin-fpu-fpcsr.c +new file mode 100644 +index 0000000..787b430 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-fpu-fpcsr.c +@@ -0,0 +1,35 @@ ++/* This is a test program for fmtcsr/fmfcsr instruction. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-O1" } */ ++/* { dg-require-effective-target nds32_ext_fpu } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++int ++main () ++{ ++ unsigned int fpcsr; ++ unsigned int real_fpcsr; ++ ++ /* Keep real fpcsr value. */ ++ real_fpcsr = __nds32__fmfcsr (); ++ ++ /* write fpcsr */ ++ fpcsr = 3; ++ __nds32__fmtcsr (fpcsr); ++ ++ /* read fpcsr */ ++ fpcsr = 0; ++ fpcsr = __nds32__fmfcsr (); ++ fpcsr = fpcsr & 0x00001fff; ++ ++ /* Recover fpcsr value. */ ++ __nds32__fmtcsr (real_fpcsr); ++ ++ if (fpcsr == 3) ++ exit (0); ++ else ++ abort (); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-get-lp.c b/gcc/testsuite/gcc.target/nds32/builtin-get-lp.c +new file mode 100644 +index 0000000..80b4921 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-get-lp.c +@@ -0,0 +1,22 @@ ++/* Verify the return address with builtin function. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-O1" } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++int main() ++{ ++ unsigned int intrinsic_lp = -1; ++ unsigned int inline_assemble_lp = -2; ++ ++ intrinsic_lp = __nds32__return_address (); ++ ++ __asm volatile ("mov55 %0, $lp" : "=r" (inline_assemble_lp)); ++ ++ if (intrinsic_lp != inline_assemble_lp) ++ abort (); ++ else ++ exit (0); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-isb.c b/gcc/testsuite/gcc.target/nds32/builtin-isb.c +deleted file mode 100644 +index e65061b..0000000 +--- a/gcc/testsuite/gcc.target/nds32/builtin-isb.c ++++ /dev/null +@@ -1,11 +0,0 @@ +-/* Verify that we generate isb instruction with builtin function. */ +- +-/* { dg-do compile } */ +-/* { dg-options "-O0" } */ +-/* { dg-final { scan-assembler "\\tisb" } } */ +- +-void +-test (void) +-{ +- __builtin_nds32_isb (); +-} +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-isync.c b/gcc/testsuite/gcc.target/nds32/builtin-isync.c +deleted file mode 100644 +index 3160e4a..0000000 +--- a/gcc/testsuite/gcc.target/nds32/builtin-isync.c ++++ /dev/null +@@ -1,12 +0,0 @@ +-/* Verify that we generate isync instruction with builtin function. */ +- +-/* { dg-do compile } */ +-/* { dg-options "-O0" } */ +-/* { dg-final { scan-assembler "\\tisync" } } */ +- +-void +-test (void) +-{ +- int *addr = (int *) 0x53000000; +- __builtin_nds32_isync (addr); +-} +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-mfsr-mtsr.c b/gcc/testsuite/gcc.target/nds32/builtin-mfsr-mtsr.c +deleted file mode 100644 +index db4c558..0000000 +--- a/gcc/testsuite/gcc.target/nds32/builtin-mfsr-mtsr.c ++++ /dev/null +@@ -1,17 +0,0 @@ +-/* Verify that we generate mfsr/mtsr instruction with builtin function. */ +- +-/* { dg-do compile } */ +-/* { dg-options "-O0" } */ +-/* { dg-final { scan-assembler "\\tmfsr" } } */ +-/* { dg-final { scan-assembler "\\tmtsr" } } */ +- +-#include <nds32_intrinsic.h> +- +-void +-test (void) +-{ +- int ipsw_value; +- +- ipsw_value = __builtin_nds32_mfsr (__NDS32_REG_IPSW__); +- __builtin_nds32_mtsr (ipsw_value, __NDS32_REG_IPSW__); +-} +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-mfusr-mtusr.c b/gcc/testsuite/gcc.target/nds32/builtin-mfusr-mtusr.c +deleted file mode 100644 +index 3cfaab9..0000000 +--- a/gcc/testsuite/gcc.target/nds32/builtin-mfusr-mtusr.c ++++ /dev/null +@@ -1,17 +0,0 @@ +-/* Verify that we generate mfusr/mtusr instruction with builtin function. */ +- +-/* { dg-do compile } */ +-/* { dg-options "-O0" } */ +-/* { dg-final { scan-assembler "\\tmfusr" } } */ +-/* { dg-final { scan-assembler "\\tmtusr" } } */ +- +-#include <nds32_intrinsic.h> +- +-void +-test (void) +-{ +- int itype_value; +- +- itype_value = __builtin_nds32_mfusr (__NDS32_REG_ITYPE__); +- __builtin_nds32_mtusr (itype_value, __NDS32_REG_ITYPE__); +-} +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-rotr.c b/gcc/testsuite/gcc.target/nds32/builtin-rotr.c +new file mode 100644 +index 0000000..a295cb2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-rotr.c +@@ -0,0 +1,19 @@ ++/* This is a test program for rotr instruction. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-O0" } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++int ++main () ++{ ++ unsigned int a = 1; ++ a = __nds32__rotr (a, 30); ++ ++ if (a != 4) ++ abort (); ++ else ++ exit (0); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-setgie-dis.c b/gcc/testsuite/gcc.target/nds32/builtin-setgie-dis.c +deleted file mode 100644 +index 2dceed9..0000000 +--- a/gcc/testsuite/gcc.target/nds32/builtin-setgie-dis.c ++++ /dev/null +@@ -1,11 +0,0 @@ +-/* Verify that we generate setgie.d instruction with builtin function. */ +- +-/* { dg-do compile } */ +-/* { dg-options "-O0" } */ +-/* { dg-final { scan-assembler "\\tsetgie.d" } } */ +- +-void +-test (void) +-{ +- __builtin_nds32_setgie_dis (); +-} +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-setgie-en.c b/gcc/testsuite/gcc.target/nds32/builtin-setgie-en.c +deleted file mode 100644 +index 8928870..0000000 +--- a/gcc/testsuite/gcc.target/nds32/builtin-setgie-en.c ++++ /dev/null +@@ -1,11 +0,0 @@ +-/* Verify that we generate setgie.e instruction with builtin function. */ +- +-/* { dg-do compile } */ +-/* { dg-options "-O0" } */ +-/* { dg-final { scan-assembler "\\tsetgie.e" } } */ +- +-void +-test (void) +-{ +- __builtin_nds32_setgie_en (); +-} +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-setgie_mtsr_mfsr.c b/gcc/testsuite/gcc.target/nds32/builtin-setgie_mtsr_mfsr.c +new file mode 100644 +index 0000000..b353909 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-setgie_mtsr_mfsr.c +@@ -0,0 +1,43 @@ ++/* This is a test program for checking gie with ++ mtsr/mfsr instruction. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-O0" } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++int ++main () ++{ ++ unsigned int psw; ++ unsigned int gie; ++ unsigned int pfm_ctl; ++ unsigned int real_psw; ++ ++ /* Keep PSW value. */ ++ real_psw = __nds32__mfsr (NDS32_SR_PSW); ++ ++ __nds32__setgie_en (); ++ __nds32__dsb(); /* This is needed for waiting pipeline. */ ++ psw = __nds32__mfsr (NDS32_SR_PSW); ++ ++ gie = psw & 0x00000001; ++ ++ if (gie != 1) ++ abort (); ++ ++ psw = psw & 0xFFFFFFFE; ++ __nds32__mtsr (psw, NDS32_SR_PSW); ++ __nds32__dsb(); /* This is needed for waiting pipeline. */ ++ psw = __nds32__mfsr (NDS32_SR_PSW); ++ gie = psw & 0x00000001; ++ ++ /* Recover PSW value. */ ++ __nds32__mtsr (real_psw, NDS32_SR_PSW); ++ ++ if (gie != 0) ++ abort (); ++ else ++ exit (0); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-sp.c b/gcc/testsuite/gcc.target/nds32/builtin-sp.c +new file mode 100644 +index 0000000..2e5499d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-sp.c +@@ -0,0 +1,33 @@ ++/* This is a test program for sp intrinsic usage. ++ Because we want to use frame pointer to access local variable, ++ we need to use -fno-omit-frame-pointer to make sure the existence ++ of frame pointer. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-O0 -fno-omit-frame-pointer" } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++int ++main () ++{ ++ unsigned int old_sp, new_sp; ++ ++ old_sp = __nds32__get_current_sp (); ++ new_sp = old_sp - 4; ++ __nds32__set_current_sp (new_sp); ++ new_sp = __nds32__get_current_sp (); ++ ++ if (new_sp != (old_sp - 4)) ++ abort (); ++ ++ new_sp = new_sp + 4; ++ __nds32__set_current_sp (new_sp); ++ new_sp = __nds32__get_current_sp (); ++ ++ if (new_sp != old_sp) ++ abort (); ++ else ++ exit (0); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-string-ffb.c b/gcc/testsuite/gcc.target/nds32/builtin-string-ffb.c +new file mode 100644 +index 0000000..cf02434 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-string-ffb.c +@@ -0,0 +1,28 @@ ++/* This is a test program for ffb instruction. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-O1" } */ ++/* { dg-require-effective-target nds32_ext_string } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++int ++main () ++{ ++ unsigned int a = 0x1b2a3d4c; ++ unsigned int b = 0x0000003d; ++ int r; ++ ++ r = __nds32__ffb (a, b); ++ ++#ifdef __NDS32_EL__ ++ if (r != -3) ++ abort (); ++#else ++ if (r != -2) ++ abort (); ++#endif ++ ++ exit (0); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-string-ffmism.c b/gcc/testsuite/gcc.target/nds32/builtin-string-ffmism.c +new file mode 100644 +index 0000000..b2fb008 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-string-ffmism.c +@@ -0,0 +1,28 @@ ++/* This is a test program for ffmism instruction. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-O1" } */ ++/* { dg-require-effective-target nds32_ext_string } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++int ++main () ++{ ++ unsigned int a = 0x1b2a3d4c; ++ unsigned int b = 0x112a334c; ++ int r; ++ ++ r = __nds32__ffmism (a, b); ++ ++#ifdef __NDS32_EL__ ++ if (r != -3) ++ abort (); ++#else ++ if (r != -4) ++ abort (); ++#endif ++ ++ exit (0); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-string-flmism.c b/gcc/testsuite/gcc.target/nds32/builtin-string-flmism.c +new file mode 100644 +index 0000000..105fce5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-string-flmism.c +@@ -0,0 +1,28 @@ ++/* This is a test program for flmism instruction. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-O1" } */ ++/* { dg-require-effective-target nds32_ext_string } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++int ++main () ++{ ++ unsigned int a = 0x1b2a3d4c; ++ unsigned int b = 0x112a334c; ++ int r; ++ ++ r = __nds32__flmism (a, b); ++ ++#ifdef __NDS32_EL__ ++ if (r != -1) ++ abort (); ++#else ++ if (r != -2) ++ abort (); ++#endif ++ ++ exit (0); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-unaligned-s16x2.c b/gcc/testsuite/gcc.target/nds32/builtin-unaligned-s16x2.c +new file mode 100644 +index 0000000..5a2e8b7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-unaligned-s16x2.c +@@ -0,0 +1,36 @@ ++/* This is a test program for smbb instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++int ++main (void) ++{ ++ char data[] = {0x55,0x66,0x77,0x88}; ++ short* short_data = (short*)& data[1]; ++ int16x2_t test_short = {0x1111, 0xaaaa}; ++ int16x2_t vecdata = __nds32__get_unaligned_s16x2 (short_data); ++ ++#ifdef __NDS32_EL__ ++ if (vecdata[0] != 0x7766) ++ abort (); ++#else ++ if (vecdata[0] != 0x6677) ++ abort (); ++#endif ++ ++ __nds32__put_unaligned_s16x2 (short_data, test_short); ++ vecdata = __nds32__get_unaligned_s16x2 (short_data); ++ ++ if (vecdata[0] != 0x1111 ++ & vecdata[1] != 0xaaaa) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-unaligned-s8x4.c b/gcc/testsuite/gcc.target/nds32/builtin-unaligned-s8x4.c +new file mode 100644 +index 0000000..f6cb4c9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-unaligned-s8x4.c +@@ -0,0 +1,36 @@ ++/* This is a test program for smbb instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++int ++main (void) ++{ ++ char data[] = {0x55,0x66,0x77,0x88}; ++ char* char_data = (char*)& data[1]; ++ int8x4_t test_char = {0x11, 0x22, 0xaa, 0xbb}; ++ int8x4_t vecdata = __nds32__get_unaligned_s8x4 (char_data); ++ ++#ifdef __NDS32_EL__ ++ if (vecdata[0] != 0x66) ++ abort (); ++#else ++ if (vecdata[0] != 0x66) ++ abort (); ++#endif ++ ++ __nds32__put_unaligned_s8x4 (char_data, test_char); ++ vecdata = __nds32__get_unaligned_s8x4 (char_data); ++ ++ if (vecdata[0] != 0x11 ++ & vecdata[3] != 0xaa) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-unaligned-u16x2.c b/gcc/testsuite/gcc.target/nds32/builtin-unaligned-u16x2.c +new file mode 100644 +index 0000000..63ebd40 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-unaligned-u16x2.c +@@ -0,0 +1,36 @@ ++/* This is a test program for smbb instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++int ++main (void) ++{ ++ unsigned char data[] = {0x55,0x66,0x77,0x88}; ++ unsigned short* short_data = (unsigned short*)& data[1]; ++ uint16x2_t test_short = {0x1111, 0xaaaa}; ++ uint16x2_t vecdata = __nds32__get_unaligned_u16x2 (short_data); ++ ++#ifdef __NDS32_EL__ ++ if (vecdata[0] != 0x7766) ++ abort (); ++#else ++ if (vecdata[0] != 0x6677) ++ abort (); ++#endif ++ ++ __nds32__put_unaligned_u16x2 (short_data, test_short); ++ vecdata = __nds32__get_unaligned_u16x2 (short_data); ++ ++ if (vecdata[0] != 0x1111 ++ & vecdata[1] != 0xaaaa) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-unaligned-u8x4.c b/gcc/testsuite/gcc.target/nds32/builtin-unaligned-u8x4.c +new file mode 100644 +index 0000000..7b48274 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-unaligned-u8x4.c +@@ -0,0 +1,36 @@ ++/* This is a test program for smbb instruction. */ ++ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++#ifdef __NDS32_EXT_DSP__ ++int ++main (void) ++{ ++ char data[] = {0x55,0x66,0x77,0x88}; ++ unsigned char* char_data = (char*)& data[1]; ++ uint8x4_t test_char = {0x11, 0x22, 0xaa, 0xbb}; ++ uint8x4_t vecdata = __nds32__get_unaligned_u8x4 (char_data); ++ ++#ifdef __NDS32_EL__ ++ if (vecdata[0] != 0x66) ++ abort (); ++#else ++ if (vecdata[0] != 0x66) ++ abort (); ++#endif ++ ++ __nds32__put_unaligned_u8x4 (char_data, test_char); ++ vecdata = __nds32__get_unaligned_u8x4 (char_data); ++ ++ if (vecdata[0] != 0x11 ++ & vecdata[3] != 0xaa) ++ abort (); ++ else ++ exit (0); ++} ++#else ++int main(){return 0;} ++#endif +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-unaligned_dw.c b/gcc/testsuite/gcc.target/nds32/builtin-unaligned_dw.c +new file mode 100644 +index 0000000..42640b4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-unaligned_dw.c +@@ -0,0 +1,31 @@ ++/* This is a test program for unaligned double word access. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-O0 -std=c99" } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++int ++main () ++{ ++ unsigned char data[] = {0x55, 0x66, 0x77, 0x88, 0xAA, ++ 0xBB, 0xCC, 0xDD, 0xEE, 0xFF}; ++ unsigned long long* long_long_data = (unsigned long long*) & data[1]; ++ unsigned long long test_long_long = 0x1122334455667788LL; ++ ++#ifdef __NDS32_EL__ ++ if (__nds32__get_unaligned_dw (long_long_data) != 0xEEDDCCBBAA887766LL) ++ abort (); ++#else ++ if (__nds32__get_unaligned_dw (long_long_data) != 0x667788AABBCCDDEELL) ++ abort (); ++#endif ++ ++ __nds32__put_unaligned_dw (long_long_data, test_long_long); ++ ++ if (__nds32__get_unaligned_dw (long_long_data) != 0x1122334455667788LL) ++ abort (); ++ else ++ exit (0); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-unaligned_hw.c b/gcc/testsuite/gcc.target/nds32/builtin-unaligned_hw.c +new file mode 100644 +index 0000000..f9e1ceb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-unaligned_hw.c +@@ -0,0 +1,30 @@ ++/* This is a test program for unaligned half word access. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-O0" } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++int ++main () ++{ ++ unsigned char data[] = {0x55,0x66,0x77,0x88}; ++ unsigned short* short_data = (unsigned short*)& data[1]; ++ unsigned short test_short = 0x5566; ++ ++#ifdef __NDS32_EL__ ++ if (__nds32__get_unaligned_hw (short_data) != 0x7766) ++ abort (); ++#else ++ if (__nds32__get_unaligned_hw (short_data) != 0x6677) ++ abort (); ++#endif ++ ++ __nds32__put_unaligned_hw (short_data, test_short); ++ ++ if (__nds32__get_unaligned_hw (short_data) != 0x5566) ++ abort (); ++ else ++ exit (0); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-unaligned_w.c b/gcc/testsuite/gcc.target/nds32/builtin-unaligned_w.c +new file mode 100644 +index 0000000..40d8711 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-unaligned_w.c +@@ -0,0 +1,30 @@ ++/* This is a test program for unaligned word access. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-O0 -std=c99" } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++int ++main () ++{ ++ unsigned char data[] = {0x55,0x66,0x77,0x88,0xAA,0xBB,0xCC,0xDD}; ++ unsigned int* int_data = (unsigned int*)& data[1]; ++ unsigned int test_int = 0x55667788; ++ ++#ifdef __NDS32_EL__ ++ if (__nds32__get_unaligned_w (int_data) != 0xAA887766) ++ abort (); ++#else ++ if (__nds32__get_unaligned_w (int_data) != 0x667788AA) ++ abort (); ++#endif ++ ++ __nds32__put_unaligned_w (int_data, test_int); ++ ++ if (__nds32__get_unaligned_w (int_data) != 0x55667788) ++ abort (); ++ else ++ exit (0); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/builtin-wsbh.c b/gcc/testsuite/gcc.target/nds32/builtin-wsbh.c +new file mode 100644 +index 0000000..1cee2ed +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/builtin-wsbh.c +@@ -0,0 +1,21 @@ ++/* This is a test program for wsbh instruction. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-O0" } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++int ++main () ++{ ++ unsigned int a = 0x03020100; ++ unsigned int b; ++ ++ b = __nds32__wsbh (a); ++ ++ if (b != 0x02030001) ++ abort (); ++ else ++ exit (0); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-all-pending.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-all-pending.c +new file mode 100644 +index 0000000..0e57831 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-all-pending.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O1" } */ ++ ++#include <nds32_intrinsic.h> ++ ++int ++main (void) ++{ ++ int a = __nds32__get_all_pending_int (); ++ return a; ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-cctl.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-cctl.c +new file mode 100644 +index 0000000..2af55f5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-cctl.c +@@ -0,0 +1,29 @@ ++/* Verify that we generate cache control instruction with builtin function. */ ++ ++/* { dg-do compile } */ ++/* { dg-options "-O0" } */ ++/* { dg-final { scan-assembler "L1D_VA_INVAL" } } */ ++/* { dg-final { scan-assembler "L1D_VA_INVAL" } } */ ++/* { dg-final { scan-assembler "L1D_INVALALL" } } */ ++/* { dg-final { scan-assembler "L1D_IX_WWD" } } */ ++/* { dg-final { scan-assembler "L1D_IX_RWD" } } */ ++/* { dg-final { scan-assembler "PFM_CTL" } } */ ++/* { dg-final { scan-assembler "PFM_CTL" } } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++test (void) ++{ ++ unsigned int va = 0; ++ ++ __nds32__cctlva_lck (NDS32_CCTL_L1D_VA_FILLCK, &va); ++ __nds32__cctlidx_wbinval (NDS32_CCTL_L1D_IX_WBINVAL, va); ++ __nds32__cctlva_wbinval_alvl (NDS32_CCTL_L1D_VA_INVAL, &va); ++ __nds32__cctlva_wbinval_one_lvl (NDS32_CCTL_L1D_VA_INVAL, &va); ++ __nds32__cctl_l1d_invalall (); ++ __nds32__cctlidx_write (NDS32_CCTL_L1D_IX_WWD, va, 1); ++ __nds32__cctlidx_read (NDS32_CCTL_L1D_IX_RWD, 1); ++ __nds32__mtusr (0, NDS32_USR_PFM_CTL); ++ __nds32__mfusr (NDS32_USR_PFM_CTL); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-clr-pending-hw.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-clr-pending-hw.c +new file mode 100644 +index 0000000..fce90e9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-clr-pending-hw.c +@@ -0,0 +1,16 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O1" } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++main (void) ++{ ++ __nds32__clr_pending_hwint (NDS32_INT_H0); ++ __nds32__clr_pending_hwint (NDS32_INT_H1); ++ __nds32__clr_pending_hwint (NDS32_INT_H2); ++ ++ __nds32__clr_pending_hwint (NDS32_INT_H15); ++ __nds32__clr_pending_hwint (NDS32_INT_H16); ++ __nds32__clr_pending_hwint (NDS32_INT_H31); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-clr-pending.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-clr-pending.c +new file mode 100644 +index 0000000..08e1dd0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-clr-pending.c +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O1" } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++main (void) ++{ ++ __nds32__clr_pending_swint (); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-disable.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-disable.c +new file mode 100644 +index 0000000..a3a1f44 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-disable.c +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O1" } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++main (void) ++{ ++ __nds32__disable_int (NDS32_INT_H15); ++ __nds32__disable_int (NDS32_INT_H16); ++ __nds32__disable_int (NDS32_INT_H31); ++ __nds32__disable_int (NDS32_INT_SWI); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-dpref.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-dpref.c +new file mode 100644 +index 0000000..38cf822 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-dpref.c +@@ -0,0 +1,24 @@ ++/* Verify that we generate data prefetch instruction with builtin function. */ ++ ++/* { dg-do compile } */ ++/* { dg-options "-O0" } */ ++/* { dg-final { scan-assembler "dpref\\tSRD" } } */ ++/* { dg-final { scan-assembler "dpref\\tSRD" } } */ ++/* { dg-final { scan-assembler "dpref\\tSRD" } } */ ++/* { dg-final { scan-assembler "dpref\\tSRD" } } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++test (void) ++{ ++ unsigned char dpref_q = 0; ++ unsigned short dpref_h = 0; ++ unsigned int dpref_w = 0; ++ unsigned long long dpref_dw = 0; ++ ++ __nds32__dpref_qw (&dpref_q, 0, NDS32_DPREF_SRD); ++ __nds32__dpref_hw (&dpref_h, 0, NDS32_DPREF_SRD); ++ __nds32__dpref_w (&dpref_w, 0, NDS32_DPREF_SRD); ++ __nds32__dpref_dw (&dpref_dw, 0, NDS32_DPREF_SRD); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-enable.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-enable.c +new file mode 100644 +index 0000000..e18ed7a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-enable.c +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O1" } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++main (void) ++{ ++ __nds32__enable_int (NDS32_INT_H15); ++ __nds32__enable_int (NDS32_INT_H16); ++ __nds32__enable_int (NDS32_INT_H31); ++ __nds32__enable_int (NDS32_INT_SWI); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-get-pending-int.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-get-pending-int.c +new file mode 100644 +index 0000000..4ced0a5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-get-pending-int.c +@@ -0,0 +1,14 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O1" } */ ++ ++#include <nds32_intrinsic.h> ++ ++int ++main (void) ++{ ++ int a = __nds32__get_pending_int (NDS32_INT_H15); ++ int b = __nds32__get_pending_int (NDS32_INT_SWI); ++ int c = __nds32__get_pending_int (NDS32_INT_H16); ++ ++ return a + b + c; ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-get-trig.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-get-trig.c +new file mode 100644 +index 0000000..a394a60 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-get-trig.c +@@ -0,0 +1,14 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O1" } */ ++ ++#include <nds32_intrinsic.h> ++ ++int ++main (void) ++{ ++ int a = __nds32__get_trig_type (NDS32_INT_H0); ++ int b = __nds32__get_trig_type (NDS32_INT_H15); ++ int c = __nds32__get_trig_type (NDS32_INT_H16); ++ int d = __nds32__get_trig_type (NDS32_INT_H31); ++ return a + b + c + d; ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-isb.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-isb.c +new file mode 100644 +index 0000000..c699966 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-isb.c +@@ -0,0 +1,13 @@ ++/* Verify that we generate isb instruction with builtin function. */ ++ ++/* { dg-do compile } */ ++/* { dg-options "-O0" } */ ++/* { dg-final { scan-assembler "\\tisb" } } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++test (void) ++{ ++ __nds32__isb (); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-isync.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-isync.c +new file mode 100644 +index 0000000..0c312e4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-isync.c +@@ -0,0 +1,14 @@ ++/* Verify that we generate isync instruction with builtin function. */ ++ ++/* { dg-do compile } */ ++/* { dg-options "-O0" } */ ++/* { dg-final { scan-assembler "\\tisync" } } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++test (void) ++{ ++ int *addr = (int *) 0x53000000; ++ __nds32__isync (addr); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-load-store.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-load-store.c +new file mode 100644 +index 0000000..fc15716 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-load-store.c +@@ -0,0 +1,25 @@ ++/* Verify that we generate llw/lwup/scw/swup instruction ++ with builtin function. */ ++ ++/* { dg-do compile } */ ++/* { dg-require-effective-target nds32_no_v3m } */ ++/* { dg-options "-O0" } */ ++/* { dg-final { scan-assembler "\\tllw" } } */ ++/* { dg-final { scan-assembler "\\tlwup" } } */ ++/* { dg-final { scan-assembler "\\tscw" } } */ ++/* { dg-final { scan-assembler "\\tswup" } } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++test (void) ++{ ++ int a = 0; ++ int b = 0; ++ unsigned int cc = 0; ++ ++ __nds32__llw (&a); ++ cc = __nds32__lwup (&a); ++ __nds32__scw (&a, b); ++ __nds32__swup (&a, b); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-lto.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-lto.c +new file mode 100644 +index 0000000..fbebcb6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-lto.c +@@ -0,0 +1,28 @@ ++/* Verify that we use -flto option to generate instructions ++ with builtin function. */ ++ ++/* { dg-do compile } */ ++/* { dg-options "-O0 -flto" } */ ++/* { dg-final { scan-assembler "\\tdsb" } } */ ++/* { dg-final { scan-assembler "\\tisb" } } */ ++/* { dg-final { scan-assembler "\\tmsync\\tall" } } */ ++/* { dg-final { scan-assembler "\\tmsync\\tstore" } } */ ++/* { dg-final { scan-assembler "\\tnop" } } */ ++/* { dg-final { scan-assembler "\\tstandby\\tno_wake_grant" } } */ ++/* { dg-final { scan-assembler "\\tstandby\\twake_grant" } } */ ++/* { dg-final { scan-assembler "\\tstandby\\twait_done" } } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++test (void) ++{ ++ __nds32__dsb (); ++ __nds32__isb (); ++ __nds32__msync_all (); ++ __nds32__msync_store (); ++ __nds32__nop (); ++ __nds32__standby_no_wake_grant (); ++ __nds32__standby_wake_grant (); ++ __nds32__standby_wait_done (); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-machine-sva.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-machine-sva.c +new file mode 100644 +index 0000000..f927c72 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-machine-sva.c +@@ -0,0 +1,16 @@ ++/* Verify that we generate sva instruction with builtin function. */ ++ ++/* { dg-do compile } */ ++/* { dg-options "-O0" } */ ++/* { dg-final { scan-assembler "\\tsva" } } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++test (void) ++{ ++ int a, b; ++ char c; ++ ++ c = __nds32__sva (a, b); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-machine-svs.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-machine-svs.c +new file mode 100644 +index 0000000..f998491 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-machine-svs.c +@@ -0,0 +1,16 @@ ++/* Verify that we generate svs instruction with builtin function. */ ++ ++/* { dg-do compile } */ ++/* { dg-options "-O0" } */ ++/* { dg-final { scan-assembler "\\tsvs" } } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++test (void) ++{ ++ int a, b; ++ char c; ++ ++ c = __nds32__svs (a, b); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-mfsr-mtsr.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-mfsr-mtsr.c +new file mode 100644 +index 0000000..f069507 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-mfsr-mtsr.c +@@ -0,0 +1,17 @@ ++/* Verify that we generate mfsr/mtsr instruction with builtin function. */ ++ ++/* { dg-do compile } */ ++/* { dg-options "-O0" } */ ++/* { dg-final { scan-assembler "\\tmfsr" } } */ ++/* { dg-final { scan-assembler "\\tmtsr" } } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++test (void) ++{ ++ int ipsw_value; ++ ++ ipsw_value = __nds32__mfsr (__NDS32_REG_IPSW__); ++ __nds32__mtsr (ipsw_value, __NDS32_REG_IPSW__); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-mfusr-mtusr.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-mfusr-mtusr.c +new file mode 100644 +index 0000000..d6d069b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-mfusr-mtusr.c +@@ -0,0 +1,17 @@ ++/* Verify that we generate mfusr/mtusr instruction with builtin function. */ ++ ++/* { dg-do compile } */ ++/* { dg-options "-O0" } */ ++/* { dg-final { scan-assembler "\\tmfusr" } } */ ++/* { dg-final { scan-assembler "\\tmtusr" } } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++test (void) ++{ ++ int itype_value; ++ ++ itype_value = __nds32__mfusr (__NDS32_REG_ITYPE__); ++ __nds32__mtusr (itype_value, __NDS32_REG_ITYPE__); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-misc.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-misc.c +new file mode 100644 +index 0000000..a11f6d9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-misc.c +@@ -0,0 +1,39 @@ ++/* Verify that we generate other instructions with builtin function. */ ++ ++/* { dg-do compile } */ ++/* { dg-options "-O0" } */ ++/* { dg-final { scan-assembler "\\tbreak" } } */ ++/* { dg-final { scan-assembler "\\tdsb" } } */ ++/* { dg-final { scan-assembler "\\tisb" } } */ ++/* { dg-final { scan-assembler "\\tisync" } } */ ++/* { dg-final { scan-assembler "\\tmsync\\tall" } } */ ++/* { dg-final { scan-assembler "\\tmsync\\tstore" } } */ ++/* { dg-final { scan-assembler "\\tnop" } } */ ++/* { dg-final { scan-assembler "\\tstandby\\tno_wake_grant" } } */ ++/* { dg-final { scan-assembler "\\tstandby\\twake_grant" } } */ ++/* { dg-final { scan-assembler "\\tstandby\\twait_done" } } */ ++/* { dg-final { scan-assembler "\\tteqz" } } */ ++/* { dg-final { scan-assembler "\\ttnez" } } */ ++/* { dg-final { scan-assembler "\\ttrap" } } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++test (void) ++{ ++ int a = 0; ++ ++ __nds32__break (2); ++ __nds32__dsb (); ++ __nds32__isb (); ++ __nds32__isync (&a); ++ __nds32__msync_all (); ++ __nds32__msync_store (); ++ __nds32__nop (); ++ __nds32__standby_no_wake_grant (); ++ __nds32__standby_wake_grant (); ++ __nds32__standby_wait_done (); ++ __nds32__teqz (a, 2); ++ __nds32__tnez (a, 2); ++ __nds32__trap (2); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-mtsr-dsb.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-mtsr-dsb.c +new file mode 100644 +index 0000000..226d627 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-mtsr-dsb.c +@@ -0,0 +1,14 @@ ++/* Verify that we generate mtsr and dsb instruction with builtin function. */ ++ ++/* { dg-do compile } */ ++/* { dg-options "-O0" } */ ++/* { dg-final { scan-assembler "\\tmtsr" } } */ ++/* { dg-final { scan-assembler "\\tdsb" } } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++main (void) ++{ ++ __nds32__mtsr_dsb (1, NDS32_SR_ILMB); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-mtsr-isb.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-mtsr-isb.c +new file mode 100644 +index 0000000..e8b1f98 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-mtsr-isb.c +@@ -0,0 +1,14 @@ ++/* Verify that we generate mtsr and isb instruction with builtin function. */ ++ ++/* { dg-do compile } */ ++/* { dg-options "-O0" } */ ++/* { dg-final { scan-assembler "\\tmtsr" } } */ ++/* { dg-final { scan-assembler "\\tisb" } } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++main (void) ++{ ++ __nds32__mtsr_isb (1, NDS32_SR_ILMB); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-priority.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-priority.c +new file mode 100644 +index 0000000..c2ec6f6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-priority.c +@@ -0,0 +1,18 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O1" } */ ++ ++#include <nds32_intrinsic.h> ++ ++int ++main (void) ++{ ++ __nds32__set_int_priority (NDS32_INT_H0, 0); ++ __nds32__set_int_priority (NDS32_INT_H15, 3); ++ __nds32__set_int_priority (NDS32_INT_H31, 3); ++ ++ int a = __nds32__get_int_priority (NDS32_INT_H0); ++ int b = __nds32__get_int_priority (NDS32_INT_H15); ++ int c = __nds32__get_int_priority (NDS32_INT_H31); ++ ++ return a + b + c; ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-set-pending.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-set-pending.c +new file mode 100644 +index 0000000..f10b83d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-set-pending.c +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O1" } */ ++ ++#include <nds32_intrinsic.h> ++ ++int ++main (void) ++{ ++ __nds32__set_pending_swint (); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-set-trig-edge.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-set-trig-edge.c +new file mode 100644 +index 0000000..bd8178c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-set-trig-edge.c +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O1" } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++main (void) ++{ ++ __nds32__set_trig_type_edge (NDS32_INT_H0); ++ __nds32__set_trig_type_edge (NDS32_INT_H15); ++ __nds32__set_trig_type_edge (NDS32_INT_H16); ++ __nds32__set_trig_type_edge (NDS32_INT_H31); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-set-trig-level.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-set-trig-level.c +new file mode 100644 +index 0000000..1780543 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-set-trig-level.c +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O1" } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++main (void) ++{ ++ __nds32__set_trig_type_level (NDS32_INT_H0); ++ __nds32__set_trig_type_level (NDS32_INT_H15); ++ __nds32__set_trig_type_level (NDS32_INT_H16); ++ __nds32__set_trig_type_level (NDS32_INT_H31); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-setgie-dis.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-setgie-dis.c +new file mode 100644 +index 0000000..e143d3f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-setgie-dis.c +@@ -0,0 +1,13 @@ ++/* Verify that we generate setgie.d instruction with builtin function. */ ++ ++/* { dg-do compile } */ ++/* { dg-options "-O0" } */ ++/* { dg-final { scan-assembler "\\tsetgie.d" } } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++test (void) ++{ ++ __nds32__setgie_dis (); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-setgie-en.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-setgie-en.c +new file mode 100644 +index 0000000..ed95782 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-setgie-en.c +@@ -0,0 +1,13 @@ ++/* Verify that we generate setgie.e instruction with builtin function. */ ++ ++/* { dg-do compile */ ++/* { dg-options "-O0" } */ ++/* { dg-final { scan-assembler "\\tsetgie.e" } } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++test (void) ++{ ++ __nds32__setgie_en (); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-add16.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-add16.c +new file mode 100644 +index 0000000..49fca46 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-add16.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mext-dsp" } */ ++/* { dg-final { scan-assembler "kadd16" } } */ ++/* { dg-final { scan-assembler "kadd16" } } */ ++/* { dg-final { scan-assembler "ukadd16" } } */ ++/* { dg-final { scan-assembler "ukadd16" } } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++test (void) ++{ ++ unsigned int r, a, b; ++ int16x2_t vr, va, vb; ++ uint16x2_t v_ur, v_ua, v_ub; ++ ++ r = __nds32__kadd16 (a, b); ++ vr = __nds32__v_kadd16 (va, vb); ++ ++ r = __nds32__ukadd16 (a, b); ++ v_ur = __nds32__v_ukadd16 (v_ua, v_ub); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-add64.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-add64.c +new file mode 100644 +index 0000000..1f33a42 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-add64.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mext-dsp" } */ ++/* { dg-final { scan-assembler "kadd64" } } */ ++/* { dg-final { scan-assembler "ukadd64" } } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++test (void) ++{ ++ long long r, a, b; ++ unsigned long long ur, ua, ub; ++ ++ r = __nds32__kadd64 (a, b); ++ ur = __nds32__ukadd64 (ua, ub); ++ ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-add8.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-add8.c +new file mode 100644 +index 0000000..1f2d226 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-add8.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mext-dsp" } */ ++/* { dg-final { scan-assembler "kadd8" } } */ ++/* { dg-final { scan-assembler "kadd8" } } */ ++/* { dg-final { scan-assembler "ukadd8" } } */ ++/* { dg-final { scan-assembler "ukadd8" } } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++test (void) ++{ ++ unsigned int r, a, b; ++ int8x4_t vr, va, vb; ++ uint8x4_t v_ur, v_ua, v_ub; ++ ++ r = __nds32__kadd8 (a, b); ++ vr = __nds32__v_kadd8 (va, vb); ++ ++ r = __nds32__ukadd8 (a, b); ++ v_ur = __nds32__v_ukadd8 (v_ua, v_ub); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-cras16.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-cras16.c +new file mode 100644 +index 0000000..89c7e6d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-cras16.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mext-dsp" } */ ++/* { dg-final { scan-assembler "kcras16" } } */ ++/* { dg-final { scan-assembler "kcras16" } } */ ++/* { dg-final { scan-assembler "ukcras16" } } */ ++/* { dg-final { scan-assembler "ukcras16" } } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++test (void) ++{ ++ unsigned int r, a, b; ++ int16x2_t vr, va, vb; ++ uint16x2_t v_ur, v_ua, v_ub; ++ ++ r = __nds32__kcras16 (a, b); ++ vr = __nds32__v_kcras16 (va, vb); ++ ++ r = __nds32__ukcras16 (a, b); ++ v_ur = __nds32__v_ukcras16 (v_ua, v_ub); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-crsa16.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-crsa16.c +new file mode 100644 +index 0000000..beaa69a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-crsa16.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mext-dsp" } */ ++/* { dg-final { scan-assembler "kcrsa16" } } */ ++/* { dg-final { scan-assembler "kcrsa16" } } */ ++/* { dg-final { scan-assembler "ukcrsa16" } } */ ++/* { dg-final { scan-assembler "ukcrsa16" } } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++test (void) ++{ ++ unsigned int r, a, b; ++ int16x2_t vr, va, vb; ++ uint16x2_t v_ur, v_ua, v_ub; ++ ++ r = __nds32__kcrsa16 (a, b); ++ vr = __nds32__v_kcrsa16 (va, vb); ++ ++ r = __nds32__ukcrsa16 (a, b); ++ v_ur = __nds32__v_ukcrsa16 (v_ua, v_ub); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-kabs8.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-kabs8.c +new file mode 100644 +index 0000000..de2e3c3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-kabs8.c +@@ -0,0 +1,16 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mext-dsp" } */ ++/* { dg-final { scan-assembler "kabs8" } } */ ++/* { dg-final { scan-assembler "kabs8" } } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++test (void) ++{ ++ unsigned int r, a; ++ int8x4_t vr, va; ++ ++ r = __nds32__kabs8 (a); ++ vr = __nds32__v_kabs8 (va); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-ksll.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-ksll.c +new file mode 100644 +index 0000000..316b10c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-ksll.c +@@ -0,0 +1,16 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mext-dsp" } */ ++/* { dg-final { scan-assembler "ksll" } } */ ++/* { dg-final { scan-assembler "kslli" } } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++test (void) ++{ ++ int r, a; ++ unsigned int b; ++ ++ r = __nds32__ksll (a, b); ++ r = __nds32__ksll (a, 0); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-ksll16.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-ksll16.c +new file mode 100644 +index 0000000..be9a08e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-ksll16.c +@@ -0,0 +1,21 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mext-dsp" } */ ++/* { dg-final { scan-assembler "ksll16" } } */ ++/* { dg-final { scan-assembler "ksll16" } } */ ++/* { dg-final { scan-assembler "kslli16" } } */ ++/* { dg-final { scan-assembler "kslli16" } } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++test (void) ++{ ++ unsigned int r, a, b; ++ int16x2_t vr, va; ++ ++ r = __nds32__ksll16 (a, b); ++ vr = __nds32__v_ksll16 (va, b); ++ ++ r = __nds32__ksll16 (a, 0); ++ vr = __nds32__v_ksll16 (va, 0); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-kslrawu.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-kslrawu.c +new file mode 100644 +index 0000000..4eb03e5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-kslrawu.c +@@ -0,0 +1,14 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mext-dsp" } */ ++/* { dg-final { scan-assembler "kslraw.u" } } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++test (void) ++{ ++ int r, a; ++ unsigned int b; ++ ++ r = __nds32__kslraw_u (a, b); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-mar64.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-mar64.c +new file mode 100644 +index 0000000..79a3eb3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-mar64.c +@@ -0,0 +1,16 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mext-dsp" } */ ++/* { dg-final { scan-assembler "kmar64" } } */ ++/* { dg-final { scan-assembler "ukmar64" } } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++test (void) ++{ ++ long long r, a, b; ++ unsigned long long ur, ua, ub; ++ ++ r = __nds32__kmar64 (r, a, b); ++ ur = __nds32__ukmar64 (ur, ua, ub); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-misc16.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-misc16.c +new file mode 100644 +index 0000000..272e922 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-misc16.c +@@ -0,0 +1,36 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mext-dsp" } */ ++/* { dg-final { scan-assembler "sclip16" } } */ ++/* { dg-final { scan-assembler "sclip16" } } */ ++/* { dg-final { scan-assembler "uclip16" } } */ ++/* { dg-final { scan-assembler "uclip16" } } */ ++/* { dg-final { scan-assembler "khm16" } } */ ++/* { dg-final { scan-assembler "khm16" } } */ ++/* { dg-final { scan-assembler "khmx16" } } */ ++/* { dg-final { scan-assembler "khmx16" } } */ ++/* { dg-final { scan-assembler "kabs16" } } */ ++/* { dg-final { scan-assembler "kabs16" } } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++test (void) ++{ ++ unsigned int r, a, b; ++ int16x2_t vr, va, vb; ++ ++ r = __nds32__sclip16 (a, 0); ++ vr = __nds32__v_sclip16 (va, 0); ++ ++ r = __nds32__uclip16 (a, 0); ++ vr = __nds32__v_uclip16 (va, 0); ++ ++ r = __nds32__khm16 (a, b); ++ vr = __nds32__v_khm16 (va, vb); ++ ++ r = __nds32__khmx16 (a, b); ++ vr = __nds32__v_khmx16 (va, vb); ++ ++ r = __nds32__kabs16 (a); ++ vr = __nds32__v_kabs16 (va); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-msr64.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-msr64.c +new file mode 100644 +index 0000000..2ad64fa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-msr64.c +@@ -0,0 +1,16 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mext-dsp" } */ ++/* { dg-final { scan-assembler "kmsr64" } } */ ++/* { dg-final { scan-assembler "ukmsr64" } } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++test (void) ++{ ++ long long r, a, b; ++ unsigned long long ur, ua, ub; ++ ++ r = __nds32__kmsr64 (r, a, b); ++ ur = __nds32__ukmsr64 (ur, ua, ub); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-msw16.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-msw16.c +new file mode 100644 +index 0000000..d7ccecb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-msw16.c +@@ -0,0 +1,32 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mext-dsp" } */ ++/* { dg-final { scan-assembler "kmmawb" } } */ ++/* { dg-final { scan-assembler "kmmawb" } } */ ++/* { dg-final { scan-assembler "kmmawb.u" } } */ ++/* { dg-final { scan-assembler "kmmawb.u" } } */ ++/* { dg-final { scan-assembler "kmmawt" } } */ ++/* { dg-final { scan-assembler "kmmawt" } } */ ++/* { dg-final { scan-assembler "kmmawt.u" } } */ ++/* { dg-final { scan-assembler "kmmawt.u" } } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++test (void) ++{ ++ int r, a; ++ unsigned int b; ++ int16x2_t vb; ++ ++ r = __nds32__kmmawb (r, a, b); ++ r = __nds32__v_kmmawb (r, a, vb); ++ ++ r = __nds32__kmmawb_u (r, a, b); ++ r = __nds32__v_kmmawb_u (r, a, vb); ++ ++ r = __nds32__kmmawt (r, a, b); ++ r = __nds32__v_kmmawt (r, a, vb); ++ ++ r = __nds32__kmmawt_u (r, a, b); ++ r = __nds32__v_kmmawt_u (r, a, vb); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-msw32.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-msw32.c +new file mode 100644 +index 0000000..64d8d4a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-msw32.c +@@ -0,0 +1,24 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mext-dsp" } */ ++/* { dg-final { scan-assembler "kmmac" } } */ ++/* { dg-final { scan-assembler "kmmac.u" } } */ ++/* { dg-final { scan-assembler "kmmsb" } } */ ++/* { dg-final { scan-assembler "kmmsb.u" } } */ ++/* { dg-final { scan-assembler "kwmmul" } } */ ++/* { dg-final { scan-assembler "kwmmul.u" } } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++test (void) ++{ ++ int r, a, b; ++ r = __nds32__kmmac (r, a, b); ++ r = __nds32__kmmac_u (r, a, b); ++ ++ r = __nds32__kmmsb (r, a, b); ++ r = __nds32__kmmsb_u (r, a, b); ++ ++ r = __nds32__kwmmul (a, b); ++ r = __nds32__kwmmul_u (a, b); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-smul16x32.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-smul16x32.c +new file mode 100644 +index 0000000..0d2b87f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-smul16x32.c +@@ -0,0 +1,72 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mext-dsp" } */ ++/* { dg-final { scan-assembler "kmda" } } */ ++/* { dg-final { scan-assembler "kmda" } } */ ++/* { dg-final { scan-assembler "kmxda" } } */ ++/* { dg-final { scan-assembler "kmxda" } } */ ++/* { dg-final { scan-assembler "kmabb" } } */ ++/* { dg-final { scan-assembler "kmabb" } } */ ++/* { dg-final { scan-assembler "kmabt" } } */ ++/* { dg-final { scan-assembler "kmabt" } } */ ++/* { dg-final { scan-assembler "kmatt" } } */ ++/* { dg-final { scan-assembler "kmatt" } } */ ++/* { dg-final { scan-assembler "kmada" } } */ ++/* { dg-final { scan-assembler "kmada" } } */ ++/* { dg-final { scan-assembler "kmaxda" } } */ ++/* { dg-final { scan-assembler "kmaxda" } } */ ++/* { dg-final { scan-assembler "kmads" } } */ ++/* { dg-final { scan-assembler "kmads" } } */ ++/* { dg-final { scan-assembler "kmadrs" } } */ ++/* { dg-final { scan-assembler "kmadrs" } } */ ++/* { dg-final { scan-assembler "kmaxds" } } */ ++/* { dg-final { scan-assembler "kmaxds" } } */ ++/* { dg-final { scan-assembler "kmsda" } } */ ++/* { dg-final { scan-assembler "kmsda" } } */ ++/* { dg-final { scan-assembler "kmsxda" } } */ ++/* { dg-final { scan-assembler "kmsxda" } } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++test (void) ++{ ++ int r; ++ unsigned int a, b; ++ int16x2_t va, vb; ++ ++ r = __nds32__kmda (a, b); ++ r = __nds32__v_kmda (va, vb); ++ ++ r = __nds32__kmxda (a, b); ++ r = __nds32__v_kmxda (va, vb); ++ ++ r = __nds32__kmabb (r, a, b); ++ r = __nds32__v_kmabb (r, va, vb); ++ ++ r = __nds32__kmabt (r, a, b); ++ r = __nds32__v_kmabt (r, va, vb); ++ ++ r = __nds32__kmatt (r, a, b); ++ r = __nds32__v_kmatt (r, va, vb); ++ ++ r = __nds32__kmada (r, a, b); ++ r = __nds32__v_kmada (r, va, vb); ++ ++ r = __nds32__kmaxda (r, a, b); ++ r = __nds32__v_kmaxda (r, va, vb); ++ ++ r = __nds32__kmads (r, a, b); ++ r = __nds32__v_kmads (r, va, vb); ++ ++ r = __nds32__kmadrs (r, a, b); ++ r = __nds32__v_kmadrs (r, va, vb); ++ ++ r = __nds32__kmaxds (r, a, b); ++ r = __nds32__v_kmaxds (r, va, vb); ++ ++ r = __nds32__kmsda (r, a, b); ++ r = __nds32__v_kmsda (r, va, vb); ++ ++ r = __nds32__kmsxda (r, a, b); ++ r = __nds32__v_kmsxda (r, va, vb); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-sub16.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-sub16.c +new file mode 100644 +index 0000000..ecea7bb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-sub16.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mext-dsp" } */ ++/* { dg-final { scan-assembler "ksub16" } } */ ++/* { dg-final { scan-assembler "ksub16" } } */ ++/* { dg-final { scan-assembler "uksub16" } } */ ++/* { dg-final { scan-assembler "uksub16" } } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++test (void) ++{ ++ unsigned int r, a, b; ++ int16x2_t vr, va, vb; ++ uint16x2_t v_ur, v_ua, v_ub; ++ ++ r = __nds32__ksub16 (a, b); ++ vr = __nds32__v_ksub16 (va, vb); ++ ++ r = __nds32__uksub16 (a, b); ++ v_ur = __nds32__v_uksub16 (v_ua, v_ub); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-sub64.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-sub64.c +new file mode 100644 +index 0000000..fae30e9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-sub64.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mext-dsp" } */ ++/* { dg-final { scan-assembler "ksub64" } } */ ++/* { dg-final { scan-assembler "uksub64" } } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++test (void) ++{ ++ long long r, a, b; ++ unsigned long long ur, ua, ub; ++ ++ r = __nds32__ksub64 (a, b); ++ ur = __nds32__uksub64 (ua, ub); ++ ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-sub8.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-sub8.c +new file mode 100644 +index 0000000..5e343e9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-stura-sub8.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mext-dsp" } */ ++/* { dg-final { scan-assembler "ksub8" } } */ ++/* { dg-final { scan-assembler "ksub8" } } */ ++/* { dg-final { scan-assembler "uksub8" } } */ ++/* { dg-final { scan-assembler "uksub8" } } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++test (void) ++{ ++ unsigned int r, a, b; ++ int8x4_t vr, va, vb; ++ uint8x4_t v_ur, v_ua, v_ub; ++ ++ r = __nds32__ksub8 (a, b); ++ vr = __nds32__v_ksub8 (va, vb); ++ ++ r = __nds32__uksub8 (a, b); ++ v_ur = __nds32__v_uksub8 (v_ua, v_ub); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-unaligned-feature.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-unaligned-feature.c +new file mode 100644 +index 0000000..6199109 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-unaligned-feature.c +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O1" } */ ++ ++#include <nds32_intrinsic.h> ++ ++int ++main () ++{ ++ unsigned unalign = __nds32__unaligned_feature (); ++ __nds32__enable_unaligned (); ++ __nds32__disable_unaligned (); ++ return unalign; ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/dsp-add-sub.c b/gcc/testsuite/gcc.target/nds32/compile/dsp-add-sub.c +new file mode 100644 +index 0000000..704610e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/dsp-add-sub.c +@@ -0,0 +1,47 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mext-dsp" } */ ++/* { dg-final { scan-assembler "add8" } } */ ++/* { dg-final { scan-assembler "add16" } } */ ++/* { dg-final { scan-assembler "add64" } } */ ++/* { dg-final { scan-assembler "sub8" } } */ ++/* { dg-final { scan-assembler "sub16" } } */ ++/* { dg-final { scan-assembler "sub64" } } */ ++ ++typedef signed char v4qi __attribute__ ((vector_size (4))); ++typedef short v2hi __attribute__ ((vector_size (4))); ++ ++v4qi __attribute__ ((noinline)) ++add8 (v4qi a, v4qi b) ++{ ++ return a + b; ++} ++ ++v4qi __attribute__ ((noinline)) ++sub8 (v4qi a, v4qi b) ++{ ++ return a - b; ++} ++ ++v2hi __attribute__ ((noinline)) ++add16 (v2hi a, v2hi b) ++{ ++ return a + b; ++} ++ ++v2hi __attribute__ ((noinline)) ++sub16 (v2hi a, v2hi b) ++{ ++ return a - b; ++} ++ ++long long ++add64 (long long a, long long b) ++{ ++ return a + b; ++} ++ ++long long ++sub64 (long long a, long long b) ++{ ++ return a - b; ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/dsp-bpick.c b/gcc/testsuite/gcc.target/nds32/compile/dsp-bpick.c +new file mode 100644 +index 0000000..5f9d7de +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/dsp-bpick.c +@@ -0,0 +1,8 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mext-dsp" } */ ++/* { dg-final { scan-assembler "bpick" } } */ ++ ++int bpick(int a, int b, int mask) ++{ ++ return (a & mask) | (b & ~mask); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/dsp-mmul.c b/gcc/testsuite/gcc.target/nds32/compile/dsp-mmul.c +new file mode 100644 +index 0000000..5c9cdeb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/dsp-mmul.c +@@ -0,0 +1,12 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mext-dsp" } */ ++/* { dg-final { scan-assembler "smmul" } } */ ++ ++typedef signed char v4qi __attribute__ ((vector_size (4))); ++typedef short v2hi __attribute__ ((vector_size (4))); ++ ++int smmul(int a, int b) ++{ ++ long long tmp = (long long)a * b; ++ return (int)((tmp >> 32) & 0xffffffffll); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/dsp-mulhisi.c b/gcc/testsuite/gcc.target/nds32/compile/dsp-mulhisi.c +new file mode 100644 +index 0000000..856530b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/dsp-mulhisi.c +@@ -0,0 +1,23 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mext-dsp" } */ ++/* { dg-final { scan-assembler "smbb" } } */ ++/* { dg-final { scan-assembler "smbt" } } */ ++/* { dg-final { scan-assembler "smtt" } } */ ++ ++typedef signed char v4qi __attribute__ ((vector_size (4))); ++typedef short v2hi __attribute__ ((vector_size (4))); ++ ++int smbb(v2hi a, v2hi b) ++{ ++ return a[0] * b[0]; ++} ++ ++int smbt(v2hi a, v2hi b) ++{ ++ return a[0] * b[1]; ++} ++ ++int smtt(v2hi a, v2hi b) ++{ ++ return a[1] * b[1]; ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/dsp-raddsub.c b/gcc/testsuite/gcc.target/nds32/compile/dsp-raddsub.c +new file mode 100644 +index 0000000..4817637 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/dsp-raddsub.c +@@ -0,0 +1,26 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mext-dsp" } */ ++/* { dg-final { scan-assembler "raddw" } } */ ++/* { dg-final { scan-assembler "rsubw" } } */ ++/* { dg-final { scan-assembler "uraddw" } } */ ++/* { dg-final { scan-assembler "ursubw" } } */ ++ ++int raddw(int a, int b) ++{ ++ return (a + b) >> 1; ++} ++ ++int rsubw(int a, int b) ++{ ++ return (a - b) >> 1; ++} ++ ++unsigned uraddw(unsigned a, unsigned b) ++{ ++ return (a + b) >> 1; ++} ++ ++unsigned ursubw(unsigned a, unsigned b) ++{ ++ return (a - b) >> 1; ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/dsp-smals.c b/gcc/testsuite/gcc.target/nds32/compile/dsp-smals.c +new file mode 100644 +index 0000000..f1dc684 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/dsp-smals.c +@@ -0,0 +1,30 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mext-dsp" } */ ++/* { dg-final { scan-assembler "smalbb" } } */ ++/* { dg-final { scan-assembler "smalbt" } } */ ++/* { dg-final { scan-assembler "smaltt" } } */ ++/* { dg-final { scan-assembler "smal" } } */ ++ ++typedef signed char v4qi __attribute__ ((vector_size (4))); ++typedef short v2hi __attribute__ ((vector_size (4))); ++ ++ ++long long smalbb(long long acc, v2hi a, v2hi b) ++{ ++ return acc + a[0] * b[0]; ++} ++ ++long long smalbt(long long acc, v2hi a, v2hi b) ++{ ++ return acc + a[1] * b[0]; ++} ++ ++long long smaltt(long long acc, v2hi a, v2hi b) ++{ ++ return acc + a[1] * b[1]; ++} ++ ++long long smal(v2hi a, long long b) ++{ ++ return b + (long long)(a[0] * a[1]); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/dsp-smalxda.c b/gcc/testsuite/gcc.target/nds32/compile/dsp-smalxda.c +new file mode 100644 +index 0000000..2fe606b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/dsp-smalxda.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mext-dsp" } */ ++/* { dg-final { scan-assembler "smalxda" } } */ ++/* { dg-final { scan-assembler "smalxds" } } */ ++ ++typedef signed char v4qi __attribute__ ((vector_size (4))); ++typedef short v2hi __attribute__ ((vector_size (4))); ++ ++long long smalxda(long long acc, v2hi a, v2hi b) ++{ ++ return acc + (a[0] * b[1] + a[1] * b[0]); ++} ++ ++long long smalxds(long long acc, v2hi a, v2hi b) ++{ ++ return acc + (a[1] * b[0] - a[0] * b[1]); ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/dsp-unpkd.c b/gcc/testsuite/gcc.target/nds32/compile/dsp-unpkd.c +new file mode 100644 +index 0000000..2de7107 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/dsp-unpkd.c +@@ -0,0 +1,79 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mext-dsp" } */ ++/* { dg-final { scan-assembler "sunpkd810" } } */ ++/* { dg-final { scan-assembler "sunpkd820" } } */ ++/* { dg-final { scan-assembler "sunpkd830" } } */ ++/* { dg-final { scan-assembler "sunpkd831" } } */ ++/* { dg-final { scan-assembler "zunpkd810" } } */ ++/* { dg-final { scan-assembler "zunpkd820" } } */ ++/* { dg-final { scan-assembler "zunpkd830" } } */ ++/* { dg-final { scan-assembler "zunpkd831" } } */ ++ ++typedef signed char v4qi __attribute__ ((vector_size (4))); ++typedef short v2hi __attribute__ ((vector_size (4))); ++typedef unsigned char uv4qi __attribute__ ((vector_size (4))); ++typedef unsigned short uv2hi __attribute__ ((vector_size (4))); ++ ++v2hi sunpkd810(v4qi v) ++{ ++ v2hi ret; ++ ret[0] = v[0]; ++ ret[1] = v[1]; ++ return ret; ++} ++ ++v2hi sunpkd820(v4qi v) ++{ ++ v2hi ret; ++ ret[0] = v[0]; ++ ret[1] = v[2]; ++ return ret; ++} ++ ++v2hi sunpkd830(v4qi v) ++{ ++ v2hi ret; ++ ret[0] = v[0]; ++ ret[1] = v[3]; ++ return ret; ++} ++ ++v2hi sunpkd831(v4qi v) ++{ ++ v2hi ret; ++ ret[0] = v[1]; ++ ret[1] = v[3]; ++ return ret; ++} ++ ++uv2hi zunpkd810(uv4qi v) ++{ ++ uv2hi ret; ++ ret[0] = v[0]; ++ ret[1] = v[1]; ++ return ret; ++} ++ ++uv2hi zunpkd820(uv4qi v) ++{ ++ uv2hi ret; ++ ret[0] = v[0]; ++ ret[1] = v[2]; ++ return ret; ++} ++ ++uv2hi zunpkd830(uv4qi v) ++{ ++ uv2hi ret; ++ ret[0] = v[0]; ++ ret[1] = v[3]; ++ return ret; ++} ++ ++uv2hi zunpkd831(uv4qi v) ++{ ++ uv2hi ret; ++ ret[0] = v[1]; ++ ret[1] = v[3]; ++ return ret; ++} +diff --git a/gcc/testsuite/gcc.target/nds32/compile/scalbn-transform-1.c b/gcc/testsuite/gcc.target/nds32/compile/scalbn-transform-1.c +new file mode 100644 +index 0000000..d456fa5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/scalbn-transform-1.c +@@ -0,0 +1,21 @@ ++/* Verify scalbn transform pass for normal case. */ ++ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-tree-all -lm" } */ ++/* { dg-require-effective-target nds32_soft_fp } */ ++ ++float test_scalbnf (float x) ++{ ++ return x * 128; ++} ++ ++double test_scalbn (double x) ++{ ++ return x * 256; ++} ++ ++/* { dg-final { scan-tree-dump "(_\[0-9\]+) = __builtin_scalbnf \\(x_\[0-9\]+\\(D\\), 7\\);\\s*_\[0-9\]+ = \\(float\\) \\1;" "scalbn_transform" } } */ ++/* { dg-final { scan-tree-dump "(_\[0-9\]+) = __builtin_scalbn \\(x_\[0-9\]+\\(D\\), 8\\);\\s*_\[0-9\]+ = \\(double\\) \\1;" "scalbn_transform" } } */ ++/* { dg-final { scan-tree-dump-not " \\* 1.28e\\+2" "scalbn_transform" } } */ ++/* { dg-final { scan-tree-dump-not " \\* 2.56e\\+2" "scalbn_transform" } } */ ++/* { dg-final { cleanup-tree-dump "*" } } */ +diff --git a/gcc/testsuite/gcc.target/nds32/compile/scalbn-transform-2.c b/gcc/testsuite/gcc.target/nds32/compile/scalbn-transform-2.c +new file mode 100644 +index 0000000..480cf23 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/scalbn-transform-2.c +@@ -0,0 +1,14 @@ ++/* Verify scalbn transform pass for negative number case. */ ++ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-tree-all" } */ ++/* { dg-require-effective-target nds32_soft_fp } */ ++ ++double test_neg_scalbn (double x) ++{ ++ return x * -8; ++} ++ ++/* { dg-final { scan-tree-dump "(_\[0-9\]+) = __builtin_scalbn \\(x_\[0-9\]+\\(D\\), 3\\);\\s*_\[0-9\]+ = -\\1;" "scalbn_transform" } } */ ++/* { dg-final { scan-tree-dump-not " \\* -8.0e\\+0" "scalbn_transform" } } */ ++/* { dg-final { cleanup-tree-dump "*" } } */ +diff --git a/gcc/testsuite/gcc.target/nds32/compile/scalbn-transform-3.c b/gcc/testsuite/gcc.target/nds32/compile/scalbn-transform-3.c +new file mode 100644 +index 0000000..256f31a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/scalbn-transform-3.c +@@ -0,0 +1,14 @@ ++/* Verify scalbn transform pass for negative-exponent case. */ ++ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-tree-all" } */ ++/* { dg-require-effective-target nds32_soft_fp } */ ++ ++double test_neg_exp_scalbnf (double x) ++{ ++ return x * 0.0625; ++} ++ ++/* { dg-final { scan-tree-dump "(_\[0-9\]+) = __builtin_scalbn \\(x_\[0-9\]+\\(D\\), -4\\);\\s*_\[0-9\]+ = \\(double\\) \\1;" "scalbn_transform" } } */ ++/* { dg-final { scan-tree-dump-not " \\* 6.25e\\-2" "scalbn_transform" } } */ ++/* { dg-final { cleanup-tree-dump "*" } } */ +diff --git a/gcc/testsuite/gcc.target/nds32/compile/scalbn-transform-4.c b/gcc/testsuite/gcc.target/nds32/compile/scalbn-transform-4.c +new file mode 100644 +index 0000000..b6ba596 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/scalbn-transform-4.c +@@ -0,0 +1,52 @@ ++/* Verify scalbn transform pass for cases that can't be optimized. */ ++ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-tree-all" } */ ++/* { dg-require-effective-target nds32_soft_fp } */ ++ ++#include "math.h" ++ ++double test_filter_condition_1 (double x) ++{ ++ return x * 0; ++} ++ ++double test_filter_condition_2 (double x) ++{ ++ return x * -0; ++} ++ ++double test_filter_condition_3 (double x) ++{ ++ return x * 485; ++} ++ ++double test_filter_condition_4 (double x) ++{ ++ return x * -85; ++} ++ ++double test_filter_condition_5 (double x) ++{ ++ return x * 0.12; ++} ++ ++double test_filter_condition_6 (double x) ++{ ++ return x * -INFINITY; ++} ++ ++double test_filter_condition_7 (double x) ++{ ++ return x * NAN; ++} ++ ++/* { dg-final { scan-tree-dump-times "x_\[0-9\]+\\(D\\) \\* 0.0" 2 "scalbn_transform" } } */ ++/* { dg-final { scan-tree-dump " \\* 4.85e\\+2" "scalbn_transform" } } */ ++/* { dg-final { scan-tree-dump " \\* -8.5e\\+1" "scalbn_transform" } } */ ++/* { dg-final { scan-tree-dump " \\* 1.19999" "scalbn_transform" } } */ ++/* { dg-final { scan-tree-dump " \\* -Inf" "scalbn_transform" } } */ ++/* { dg-final { scan-tree-dump " \\* Nan" "scalbn_transform" } } */ ++/* { dg-final { scan-tree-dump-not "__builtin_scalbn" "scalbn_transform" } } */ ++/* { dg-final { scan-tree-dump-times "No multiplication stmt is transformed" 7 "scalbn_transform" } } */ ++/* { dg-final { cleanup-tree-dump "*" } } */ +diff --git a/gcc/testsuite/gcc.target/nds32/compile/scalbn-transform-5.c b/gcc/testsuite/gcc.target/nds32/compile/scalbn-transform-5.c +new file mode 100644 +index 0000000..874170e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/compile/scalbn-transform-5.c +@@ -0,0 +1,20 @@ ++/* Verify scalbn transform pass for bug 11424 case. */ ++ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-tree-all" } */ ++/* { dg-require-effective-target nds32_soft_fp } */ ++ ++typedef float float32_t; ++float32_t test_case (float32_t *pIn) ++{ ++ float32_t in; ++ in = *pIn++; ++ in = (in * 128); ++ in += in > 0.0f ? 0.5f : -0.5f; ++ ++ return in; ++} ++ ++/* { dg-final { scan-tree-dump "(_\[0-9\]+) = __builtin_scalbnf \\(in_\[0-9\]+, 7\\);\\s*in_\[0-9\]+ = \\(float32_t\\) \\1;" "scalbn_transform" } } */ ++/* { dg-final { scan-tree-dump-not "in_\[0-9\]+ = in_\[0-9\]+ \\* 1.28e\\+2" "scalbn_transform" } } */ ++/* { dg-final { cleanup-tree-dump "*" } } */ +diff --git a/gcc/testsuite/gcc.target/nds32/dsp-v2hi-packing00.c b/gcc/testsuite/gcc.target/nds32/dsp-v2hi-packing00.c +new file mode 100644 +index 0000000..d1c61b7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/nds32/dsp-v2hi-packing00.c +@@ -0,0 +1,127 @@ ++/* { dg-do run } */ ++ ++#include <nds32_intrinsic.h> ++ ++int16x2_t packing01(int16x2_t x, int16x2_t y) __attribute__ ((noinline)); ++int16x2_t packing01(int16x2_t x, int16x2_t y) ++{ ++ int16x2_t ret; ++ ret[0] = x[0]; ++ ret[1] = y[1]; ++ return ret; ++} ++ ++int16x2_t packing10(int16x2_t x, int16x2_t y) __attribute__ ((noinline)); ++int16x2_t packing10(int16x2_t x, int16x2_t y) ++{ ++ int16x2_t ret; ++ ret[0] = x[1]; ++ ret[1] = y[0]; ++ return ret; ++} ++ ++int16x2_t packing00(int16x2_t x, int16x2_t y) __attribute__ ((noinline)); ++int16x2_t packing00(int16x2_t x, int16x2_t y) ++{ ++ int16x2_t ret; ++ ret[0] = x[0]; ++ ret[1] = y[0]; ++ return ret; ++} ++ ++int16x2_t packing0cv0(int16x2_t x) __attribute__ ((noinline)); ++int16x2_t packing0cv0(int16x2_t x) ++{ ++ int16x2_t ret = {0, 0}; ++ ret[0] = x[0]; ++ return ret; ++} ++ ++int16x2_t packingcv00(int16x2_t x) __attribute__ ((noinline)); ++int16x2_t packingcv00(int16x2_t x) ++{ ++ int16x2_t ret = {0, 0}; ++ ret[1] = x[0]; ++ return ret; ++} ++ ++int16x2_t packing11(int16x2_t x, int16x2_t y) __attribute__ ((noinline)); ++int16x2_t packing11(int16x2_t x, int16x2_t y) ++{ ++ int16x2_t ret; ++ ret[0] = x[1]; ++ ret[1] = y[1]; ++ return ret; ++} ++int16x2_t packing1cv0(int16x2_t x) __attribute__ ((noinline)); ++int16x2_t packing1cv0(int16x2_t x) ++{ ++ int16x2_t ret = {0, 0}; ++ ret[0] = x[1]; ++ return ret; ++} ++ ++int16x2_t packingcv01(int16x2_t x) __attribute__ ((noinline)); ++int16x2_t packingcv01(int16x2_t x) ++{ ++ int16x2_t ret = {0, 0}; ++ ret[1] = x[1]; ++ return ret; ++} ++ ++int main() { ++ int16x2_t a = {0x11, 0x22}; ++ int16x2_t b = {0x33, 0x44}; ++ ++ int16x2_t ret00, ret01, ret10, ret11; ++ int16x2_t ret0cv0, retcv00, ret1cv0, retcv01; ++ ret00 = packing00 (a, b); ++ ++ if (ret00[0] != 0x11 ++ || ret00[1] != 0x33) ++ return 1; ++ ++ ret0cv0 = packing0cv0 (a); ++ ++ if (ret0cv0[0] != 0x11 ++ || ret0cv0[1] != 0) ++ return 1; ++ ++ retcv00 = packingcv00 (a); ++ ++ if (retcv00[0] != 0 ++ || retcv00[1] != 0x11) ++ return 1; ++ ++ ret11 = packing11 (a, b); ++ ++ if (ret11[0] != 0x22 ++ || ret11[1] != 0x44) ++ return 1; ++ ++ ret1cv0 = packing1cv0 (a); ++ ++ if (ret1cv0[0] != 0x22 ++ || ret1cv0[1] != 0) ++ return 1; ++ ++ retcv01 = packingcv01 (a); ++ ++ if (retcv01[0] != 0 ++ || retcv01[1] != 0x22) ++ return 1; ++ ++ ret01 = packing01 (a, b); ++ ++ if (ret01[0] != 0x11 ++ || ret01[1] != 0x44) ++ return 1; ++ ++ ret10 = packing10 (a, b); ++ ++ if (ret10[0] != 0x22 ++ || ret10[1] != 0x33) ++ return 1; ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/nds32/nds32.exp b/gcc/testsuite/gcc.target/nds32/nds32.exp +index 1c245f6..2f5a150 100644 +--- a/gcc/testsuite/gcc.target/nds32/nds32.exp ++++ b/gcc/testsuite/gcc.target/nds32/nds32.exp +@@ -38,8 +38,10 @@ if ![info exists DEFAULT_CFLAGS] then { + dg-init + + # Main loop. +-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cS\]]] \ ++dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/compile/*.\[cS\]]] \ + "" $DEFAULT_CFLAGS ++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cS\]]] \ ++ "" "" + + # All done. + dg-finish +diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp +index f0f5ac4..5a9b57d 100644 +--- a/gcc/testsuite/lib/target-supports.exp ++++ b/gcc/testsuite/lib/target-supports.exp +@@ -487,6 +487,10 @@ proc check_effective_target_trampolines { } { + || [istarget hppa64-hp-hpux11.23] } { + return 0; + } ++ if { [istarget nds32*-*-*] ++ && [check_effective_target_nds32_reduced_regs] } { ++ return 0; ++ } + return 1 + } + +@@ -500,7 +504,7 @@ proc check_effective_target_keeps_null_pointer_checks { } { + if [target_info exists keeps_null_pointer_checks] { + return 1 + } +- if { [istarget avr-*-*] } { ++ if { [istarget avr-*-*] || [istarget nds32*-*-elf] } { + return 1; + } + return 0 +@@ -3597,6 +3601,125 @@ proc check_effective_target_arm_prefer_ldrd_strd { } { + } "-O2 -mthumb" ] + } + ++# If board info says it only has 16M addressing space, return 0. ++# Otherwise, return 1. ++proc check_effective_target_nds32_full_addr_space { } { ++ if [board_info target exists addr16m] { ++ return 0 ++ } ++ return 1; ++} ++ ++# Return 1 if gp direct is enable by default. ++proc check_effective_target_nds32_gp_direct { } { ++ return [check_no_compiler_messages gp_direct object { ++ #ifdef __NDS32_GP_DIRECT__ ++ int dummy; ++ #else ++ #error no GP_DIRECT ++ #endif ++ }] ++} ++ ++# Return 1 if this is a nds32 target supporting -mext-perf. ++proc check_effective_target_nds32_ext_perf { } { ++ return [check_no_compiler_messages ext_perf object { ++ #ifdef __NDS32_EXT_PERF__ ++ int dummy; ++ #else ++ #error no EXT_PERF ++ #endif ++ }] ++} ++ ++# Return 1 if this is a nds32 target supporting -mext-perf2. ++proc check_effective_target_nds32_ext_perf2 { } { ++ return [check_no_compiler_messages ext_perf2 object { ++ #ifdef __NDS32_EXT_PERF2__ ++ int dummy; ++ #else ++ #error no EXT_PERF2 ++ #endif ++ }] ++} ++ ++# Return 1 if this is a nds32 target supporting -mext-string. ++proc check_effective_target_nds32_ext_string { } { ++ return [check_no_compiler_messages ext_string object { ++ #ifdef __NDS32_EXT_STRING__ ++ int dummy; ++ #else ++ #error no EXT_STRING ++ #endif ++ }] ++} ++ ++# Return 1 if this is a nds32 target supporting -mext-fpu-sp or -mext-fpu-dp. ++proc check_effective_target_nds32_ext_fpu { } { ++ return [check_no_compiler_messages ext_fpu object { ++ #if defined(__NDS32_EXT_FPU_SP__) || defined(__NDS32_EXT_FPU_DP__) ++ int dummy; ++ #else ++ #error no support FPU ++ #endif ++ }] ++} ++ ++# Return 1 if this is a nds32 target not supporting -mext-fpu-sp or -mext-fpu-dp. ++proc check_effective_target_nds32_soft_fp { } { ++ return [check_no_compiler_messages soft_fp object { ++ #if defined(__NDS32_EXT_FPU_SP__) || defined(__NDS32_EXT_FPU_DP__) ++ #error Hard FP ++ #else ++ int dummy; ++ #endif ++ }] ++} ++ ++# Return 1 if this is a nds32 target supporting -mext-fpu-sp. ++proc check_effective_target_nds32_ext_fpu_sp { } { ++ return [check_no_compiler_messages ext_fpu_sp object { ++ #ifdef __NDS32_EXT_FPU_SP__ ++ int dummy; ++ #else ++ #error no EXT_FPU_SP ++ #endif ++ }] ++} ++ ++# Return 1 if this is a nds32 target supporting -mext-fpu-dp. ++proc check_effective_target_nds32_ext_fpu_dp { } { ++ return [check_no_compiler_messages ext_fpu_dp object { ++ #ifdef __NDS32_EXT_FPU_DP__ ++ int dummy; ++ #else ++ #error no EXT_FPU_DP ++ #endif ++ }] ++} ++ ++# Return 1 if this is a nds32 target supporting -mreduced-regs. ++proc check_effective_target_nds32_reduced_regs { } { ++ return [check_no_compiler_messages reduced_regs object { ++ #ifdef __NDS32_REDUCED_REGS__ ++ int dummy; ++ #else ++ #error no REDUCED_REGS ++ #endif ++ }] ++} ++ ++# Return 1 if this is a nds32 target not supporting v3m ISA. ++proc check_effective_target_nds32_no_v3m { } { ++ return [check_no_compiler_messages no_v3m object { ++ #if !defined(__NDS32_BASELINE_V3M__) ++ int dummy; ++ #else ++ #error Support V3M ISA ++ #endif ++ }] ++} ++ + # Return 1 if this is a PowerPC target supporting -meabi. + + proc check_effective_target_powerpc_eabi_ok { } { +@@ -6897,6 +7020,7 @@ proc check_effective_target_logical_op_short_circuit {} { + || [istarget avr*-*-*] + || [istarget crisv32-*-*] || [istarget cris-*-*] + || [istarget mmix-*-*] ++ || [istarget nds32*-*-*] + || [istarget s390*-*-*] + || [istarget powerpc*-*-*] + || [istarget nios2*-*-*] +diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c +index 154df21..acd1a52 100644 +--- a/gcc/tree-vrp.c ++++ b/gcc/tree-vrp.c +@@ -9518,6 +9518,7 @@ simplify_cond_using_ranges (gcond *stmt) + used for the comparison directly if we just massage the constant in the + comparison. */ + if (TREE_CODE (op0) == SSA_NAME ++ && has_single_use (op0) + && TREE_CODE (op1) == INTEGER_CST) + { + gimple *def_stmt = SSA_NAME_DEF_STMT (op0); +diff --git a/libgcc/config.host b/libgcc/config.host +index 124f2ce..107ccb1 100644 +--- a/libgcc/config.host ++++ b/libgcc/config.host +@@ -946,6 +946,23 @@ msp430*-*-elf) + tmake_file="$tm_file t-crtstuff t-fdpbit msp430/t-msp430" + extra_parts="$extra_parts libmul_none.a libmul_16.a libmul_32.a libmul_f5.a" + ;; ++nds32*-linux*) ++ # Basic makefile fragment and extra_parts for crt stuff. ++ # We also append c-isr library implementation. ++ tmake_file="${tmake_file} t-slibgcc-libgcc" ++ tmake_file="${tmake_file} nds32/t-nds32-glibc nds32/t-crtstuff t-softfp-sfdf t-softfp" ++ # The header file of defining MD_FALLBACK_FRAME_STATE_FOR. ++ md_unwind_header=nds32/linux-unwind.h ++ # Append library definition makefile fragment according to --with-nds32-lib=X setting. ++ case "${with_nds32_lib}" in ++ "" | glibc | uclibc ) ++ ;; ++ *) ++ echo "Cannot accept --with-nds32-lib=$with_nds32_lib, available values are: glibc uclibc" 1>&2 ++ exit 1 ++ ;; ++ esac ++ ;; + nds32*-elf*) + # Basic makefile fragment and extra_parts for crt stuff. + # We also append c-isr library implementation. +@@ -959,9 +976,19 @@ nds32*-elf*) + tmake_file="${tmake_file} nds32/t-nds32-newlib t-softfp-sfdf t-softfp" + ;; + mculib) +- # Append library definition makefile fragment t-nds32-mculib. ++ case "${with_arch}" in ++ "" | v2 | v2j | v3 | v3j | v3m) ++ # Append library definition makefile fragment t-nds32-mculib-generic. + # The software floating point library is included in mculib. +- tmake_file="${tmake_file} nds32/t-nds32-mculib" ++ tmake_file="${tmake_file} nds32/t-nds32-mculib-generic" ++ ;; ++ v3f | v3s) ++ # Append library definition makefile fragment t-nds32-mculib-softfp. ++ # Append mculib do not support ABI2FP_PLUS, ++ # so using'soft-fp' software floating point make rule fragment provided by gcc. ++ tmake_file="${tmake_file} nds32/t-nds32-mculib-softfp t-softfp-sfdf t-softfp" ++ ;; ++ esac + ;; + *) + echo "Cannot accept --with-nds32-lib=$with_nds32_lib, available values are: newlib mculib" 1>&2 +diff --git a/libgcc/config/nds32/crtzero.S b/libgcc/config/nds32/crtzero.S +deleted file mode 100644 +index 9898525..0000000 +--- a/libgcc/config/nds32/crtzero.S ++++ /dev/null +@@ -1,103 +0,0 @@ +-/* The startup code sample of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +-!!============================================================================== +-!! +-!! crtzero.S +-!! +-!! This is JUST A SAMPLE of nds32 startup code !! +-!! You can refer this content and implement +-!! the actual one in newlib/mculib. +-!! +-!!============================================================================== +- +-!!------------------------------------------------------------------------------ +-!! Jump to start up code +-!!------------------------------------------------------------------------------ +- .section .nds32_init, "ax" +- j _start +- +-!!------------------------------------------------------------------------------ +-!! Startup code implementation +-!!------------------------------------------------------------------------------ +- .section .text +- .global _start +- .weak _SDA_BASE_ +- .weak _FP_BASE_ +- .align 2 +- .func _start +- .type _start, @function +-_start: +-.L_fp_gp_lp_init: +- la $fp, _FP_BASE_ ! init $fp +- la $gp, _SDA_BASE_ ! init $gp for small data access +- movi $lp, 0 ! init $lp +- +-.L_stack_init: +- la $sp, _stack ! init $sp +- movi $r0, -8 ! align $sp to 8-byte (use 0xfffffff8) +- and $sp, $sp, $r0 ! align $sp to 8-byte (filter out lower 3-bit) +- +-.L_bss_init: +- ! clear BSS, this process can be 4 time faster if data is 4 byte aligned +- ! if so, use swi.p instead of sbi.p +- ! the related stuff are defined in linker script +- la $r0, _edata ! get the starting addr of bss +- la $r2, _end ! get ending addr of bss +- beq $r0, $r2, .L_call_main ! if no bss just do nothing +- movi $r1, 0 ! should be cleared to 0 +-.L_clear_bss: +- sbi.p $r1, [$r0], 1 ! Set 0 to bss +- bne $r0, $r2, .L_clear_bss ! Still bytes left to set +- +-!.L_stack_heap_check: +-! la $r0, _end ! init heap_end +-! s.w $r0, heap_end ! save it +- +- +-!.L_init_argc_argv: +-! ! argc/argv initialization if necessary; default implementation is in crt1.o +-! la $r9, _arg_init ! load address of _arg_init? +-! beqz $r9, .L4 ! has _arg_init? no, go check main() +-! addi $sp, $sp, -512 ! allocate space for command line + arguments +-! move $r6, $sp ! r6 = buffer addr of cmd line +-! move $r0, $r6 ! r0 = buffer addr of cmd line +-! syscall 6002 ! get cmd line +-! move $r0, $r6 ! r0 = buffer addr of cmd line +-! addi $r1, $r6, 256 ! r1 = argv +-! jral $r9 ! init argc/argv +-! addi $r1, $r6, 256 ! r1 = argv +- +-.L_call_main: +- ! call main() if main() is provided +- la $r15, main ! load address of main +- jral $r15 ! call main +- +-.L_terminate_program: +- syscall 0x1 ! use syscall 0x1 to terminate program +- .size _start, .-_start +- .end +- +-!! ------------------------------------------------------------------------ +diff --git a/libgcc/config/nds32/initfini.c b/libgcc/config/nds32/initfini.c +index 0aa33f5..34406f0 100644 +--- a/libgcc/config/nds32/initfini.c ++++ b/libgcc/config/nds32/initfini.c +@@ -25,6 +25,10 @@ + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + ++#include <stddef.h> ++/* Need header file for `struct object' type. */ ++#include "../libgcc/unwind-dw2-fde.h" ++ + /* Declare a pointer to void function type. */ + typedef void (*func_ptr) (void); + +@@ -42,11 +46,59 @@ typedef void (*func_ptr) (void); + refer to only the __CTOR_END__ symbol in crtfini.o and the __DTOR_LIST__ + symbol in crtinit.o, where they are defined. */ + +-static func_ptr __CTOR_LIST__[1] __attribute__ ((section (".ctors"))) +- = { (func_ptr) (-1) }; ++static func_ptr __CTOR_LIST__[1] __attribute__ ((section (".ctors"), used)) ++ = { (func_ptr) 0 }; ++ ++static func_ptr __DTOR_LIST__[1] __attribute__ ((section (".dtors"), used)) ++ = { (func_ptr) 0 }; ++ ++ ++#ifdef SUPPORT_UNWINDING_DWARF2 ++/* Preparation of exception handling with dwar2 mechanism registration. */ + +-static func_ptr __DTOR_LIST__[1] __attribute__ ((section (".dtors"))) +- = { (func_ptr) (-1) }; ++asm ("\n\ ++ .section .eh_frame,\"aw\",@progbits\n\ ++ .global __EH_FRAME_BEGIN__\n\ ++ .type __EH_FRAME_BEGIN__, @object\n\ ++ .align 2\n\ ++__EH_FRAME_BEGIN__:\n\ ++ ! Beginning location of eh_frame section\n\ ++ .previous\n\ ++"); ++ ++extern func_ptr __EH_FRAME_BEGIN__[]; ++ ++ ++/* Note that the following two functions are going to be chained into ++ constructor and destructor list, repectively. So these two declarations ++ must be placed after __CTOR_LIST__ and __DTOR_LIST. */ ++extern void __nds32_register_eh(void) __attribute__((constructor, used)); ++extern void __nds32_deregister_eh(void) __attribute__((destructor, used)); ++ ++/* Register the exception handling table as the first constructor. */ ++void ++__nds32_register_eh (void) ++{ ++ static struct object object; ++ if (__register_frame_info) ++ __register_frame_info (__EH_FRAME_BEGIN__, &object); ++} ++ ++/* Unregister the exception handling table as a deconstructor. */ ++void ++__nds32_deregister_eh (void) ++{ ++ static int completed = 0; ++ ++ if (completed) ++ return; ++ ++ if (__deregister_frame_info) ++ __deregister_frame_info (__EH_FRAME_BEGIN__); ++ ++ completed = 1; ++} ++#endif + + /* Run all the global destructors on exit from the program. */ + +@@ -63,7 +115,7 @@ static func_ptr __DTOR_LIST__[1] __attribute__ ((section (".dtors"))) + same particular root executable or shared library file. */ + + static void __do_global_dtors (void) +-asm ("__do_global_dtors") __attribute__ ((section (".text"))); ++asm ("__do_global_dtors") __attribute__ ((section (".text"), used)); + + static void + __do_global_dtors (void) +@@ -116,23 +168,37 @@ void *__dso_handle = 0; + last, these words naturally end up at the very ends of the two lists + contained in these two sections. */ + +-static func_ptr __CTOR_END__[1] __attribute__ ((section (".ctors"))) ++static func_ptr __CTOR_END__[1] __attribute__ ((section (".ctors"), used)) + = { (func_ptr) 0 }; + +-static func_ptr __DTOR_END__[1] __attribute__ ((section (".dtors"))) ++static func_ptr __DTOR_END__[1] __attribute__ ((section (".dtors"), used)) + = { (func_ptr) 0 }; + ++#ifdef SUPPORT_UNWINDING_DWARF2 ++/* ZERO terminator in .eh_frame section. */ ++asm ("\n\ ++ .section .eh_frame,\"aw\",@progbits\n\ ++ .global __EH_FRAME_END__\n\ ++ .type __EH_FRAME_END__, @object\n\ ++ .align 2\n\ ++__EH_FRAME_END__:\n\ ++ ! End location of eh_frame section with ZERO terminator\n\ ++ .word 0\n\ ++ .previous\n\ ++"); ++#endif ++ + /* Run all global constructors for the program. + Note that they are run in reverse order. */ + + static void __do_global_ctors (void) +-asm ("__do_global_ctors") __attribute__ ((section (".text"))); ++asm ("__do_global_ctors") __attribute__ ((section (".text"), used)); + + static void + __do_global_ctors (void) + { + func_ptr *p; +- for (p = __CTOR_END__ - 1; *p != (func_ptr) -1; p--) ++ for (p = __CTOR_END__ - 1; *p; p--) + (*p) (); + } + +diff --git a/libgcc/config/nds32/isr-library/adj_intr_lvl.inc b/libgcc/config/nds32/isr-library/adj_intr_lvl.inc +index 3e978b4..a519df8 100644 +--- a/libgcc/config/nds32/isr-library/adj_intr_lvl.inc ++++ b/libgcc/config/nds32/isr-library/adj_intr_lvl.inc +@@ -26,13 +26,26 @@ + .macro ADJ_INTR_LVL + #if defined(NDS32_NESTED) /* Nested handler. */ + mfsr $r3, $PSW ++ /* By substracting 1 from $PSW, we can lower PSW.INTL ++ and enable GIE simultaneously. */ + addi $r3, $r3, #-0x1 ++ #if __NDS32_EXT_ZOL__ || __NDS32_EXT_DSP__ ++ ori $r3, $r3, 0x2000 /* Set PSW.AEN(b'13) */ ++ #endif + mtsr $r3, $PSW + #elif defined(NDS32_NESTED_READY) /* Nested ready handler. */ + /* Save ipc and ipsw and lower INT level. */ + mfsr $r3, $PSW + addi $r3, $r3, #-0x2 ++ #if __NDS32_EXT_ZOL__ || __NDS32_EXT_DSP__ ++ ori $r3, $r3, 0x2000 /* Set PSW.AEN(b'13) */ ++ #endif + mtsr $r3, $PSW + #else /* Not nested handler. */ ++ #if __NDS32_EXT_ZOL__ || __NDS32_EXT_DSP__ ++ mfsr $r3, $PSW ++ ori $r3, $r3, 0x2000 /* Set PSW.AEN(b'13) */ ++ mtsr $r3, $PSW ++ #endif + #endif + .endm +diff --git a/libgcc/config/nds32/isr-library/excp_isr.S b/libgcc/config/nds32/isr-library/excp_isr.S +index 6179a98..f1a3b59 100644 +--- a/libgcc/config/nds32/isr-library/excp_isr.S ++++ b/libgcc/config/nds32/isr-library/excp_isr.S +@@ -23,6 +23,7 @@ + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + ++#include "save_usr_regs.inc" + #include "save_mac_regs.inc" + #include "save_fpu_regs.inc" + #include "save_fpu_regs_00.inc" +@@ -32,35 +33,33 @@ + #include "save_all.inc" + #include "save_partial.inc" + #include "adj_intr_lvl.inc" +-#include "restore_mac_regs.inc" + #include "restore_fpu_regs_00.inc" + #include "restore_fpu_regs_01.inc" + #include "restore_fpu_regs_02.inc" + #include "restore_fpu_regs_03.inc" + #include "restore_fpu_regs.inc" ++#include "restore_mac_regs.inc" ++#include "restore_usr_regs.inc" + #include "restore_all.inc" + #include "restore_partial.inc" ++ + .section .nds32_isr, "ax" /* Put it in the section of 1st level handler. */ + .align 1 +-/* +- First Level Handlers +- 1. First Level Handlers are invokded in vector section via jump instruction +- with specific names for different configurations. +- 2. Naming Format: _nds32_e_SR_NT for exception handlers. +- _nds32_i_SR_NT for interrupt handlers. +- 2.1 All upper case letters are replaced with specific lower case letters encodings. +- 2.2 SR: Saved Registers +- sa: Save All regs (context) +- ps: Partial Save (all caller-saved regs) +- 2.3 NT: Nested Type +- ns: nested +- nn: not nested +- nr: nested ready +-*/ +- +-/* +- This is original 16-byte vector size version. +-*/ ++ ++/* First Level Handlers ++ 1. First Level Handlers are invokded in vector section via jump instruction ++ with specific names for different configurations. ++ 2. Naming Format: _nds32_e_SR_NT for exception handlers. ++ _nds32_i_SR_NT for interrupt handlers. ++ 2.1 All upper case letters are replaced with specific lower case letters encodings. ++ 2.2 SR -- Saved Registers ++ sa: Save All regs (context) ++ ps: Partial Save (all caller-saved regs) ++ 2.3 NT -- Nested Type ++ ns: nested ++ nn: not nested ++ nr: nested ready */ ++ + #ifdef NDS32_SAVE_ALL_REGS + #if defined(NDS32_NESTED) + .globl _nds32_e_sa_ns +@@ -91,21 +90,26 @@ _nds32_e_ps_nn: + #endif /* endif for Nest Type */ + #endif /* not NDS32_SAVE_ALL_REGS */ + +-/* +- This is 16-byte vector size version. +- The vector id was restored into $r0 in vector by compiler. +-*/ ++ ++/* For 4-byte vector size version, the vector id is ++ extracted from $ITYPE and is set into $r0 by library. ++ For 16-byte vector size version, the vector id ++ is set into $r0 in vector section by compiler. */ ++ ++/* Save used registers. */ + #ifdef NDS32_SAVE_ALL_REGS + SAVE_ALL + #else + SAVE_PARTIAL + #endif ++ + /* Prepare to call 2nd level handler. */ + la $r2, _nds32_jmptbl_00 + lw $r2, [$r2 + $r0 << #2] + ADJ_INTR_LVL /* Adjust INTR level. $r3 is clobbered. */ + jral $r2 +- /* Restore used registers. */ ++ ++/* Restore used registers. */ + #ifdef NDS32_SAVE_ALL_REGS + RESTORE_ALL + #else +@@ -113,6 +117,7 @@ _nds32_e_ps_nn: + #endif + iret + ++ + #ifdef NDS32_SAVE_ALL_REGS + #if defined(NDS32_NESTED) + .size _nds32_e_sa_ns, .-_nds32_e_sa_ns +diff --git a/libgcc/config/nds32/isr-library/excp_isr_4b.S b/libgcc/config/nds32/isr-library/excp_isr_4b.S +deleted file mode 100644 +index af70c7a..0000000 +--- a/libgcc/config/nds32/isr-library/excp_isr_4b.S ++++ /dev/null +@@ -1,133 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +-#include "save_mac_regs.inc" +-#include "save_fpu_regs.inc" +-#include "save_fpu_regs_00.inc" +-#include "save_fpu_regs_01.inc" +-#include "save_fpu_regs_02.inc" +-#include "save_fpu_regs_03.inc" +-#include "save_all.inc" +-#include "save_partial.inc" +-#include "adj_intr_lvl.inc" +-#include "restore_mac_regs.inc" +-#include "restore_fpu_regs_00.inc" +-#include "restore_fpu_regs_01.inc" +-#include "restore_fpu_regs_02.inc" +-#include "restore_fpu_regs_03.inc" +-#include "restore_fpu_regs.inc" +-#include "restore_all.inc" +-#include "restore_partial.inc" +- .section .nds32_isr, "ax" /* Put it in the section of 1st level handler. */ +- .align 1 +-/* +- First Level Handlers +- 1. First Level Handlers are invokded in vector section via jump instruction +- with specific names for different configurations. +- 2. Naming Format: _nds32_e_SR_NT for exception handlers. +- _nds32_i_SR_NT for interrupt handlers. +- 2.1 All upper case letters are replaced with specific lower case letters encodings. +- 2.2 SR: Saved Registers +- sa: Save All regs (context) +- ps: Partial Save (all caller-saved regs) +- 2.3 NT: Nested Type +- ns: nested +- nn: not nested +- nr: nested ready +-*/ +- +-/* +- This is 4-byte vector size version. +- The "_4b" postfix was added for 4-byte version symbol. +-*/ +-#ifdef NDS32_SAVE_ALL_REGS +-#if defined(NDS32_NESTED) +- .globl _nds32_e_sa_ns_4b +- .type _nds32_e_sa_ns_4b, @function +-_nds32_e_sa_ns_4b: +-#elif defined(NDS32_NESTED_READY) +- .globl _nds32_e_sa_nr_4b +- .type _nds32_e_sa_nr_4b, @function +-_nds32_e_sa_nr_4b: +-#else /* Not nested handler. */ +- .globl _nds32_e_sa_nn_4b +- .type _nds32_e_sa_nn_4b, @function +-_nds32_e_sa_nn_4b: +-#endif /* endif for Nest Type */ +-#else /* not NDS32_SAVE_ALL_REGS */ +-#if defined(NDS32_NESTED) +- .globl _nds32_e_ps_ns_4b +- .type _nds32_e_ps_ns_4b, @function +-_nds32_e_ps_ns_4b: +-#elif defined(NDS32_NESTED_READY) +- .globl _nds32_e_ps_nr_4b +- .type _nds32_e_ps_nr_4b, @function +-_nds32_e_ps_nr_4b: +-#else /* Not nested handler. */ +- .globl _nds32_e_ps_nn_4b +- .type _nds32_e_ps_nn_4b, @function +-_nds32_e_ps_nn_4b: +-#endif /* endif for Nest Type */ +-#endif /* not NDS32_SAVE_ALL_REGS */ +- +-/* +- This is 4-byte vector size version. +- The vector id was restored into $lp in vector by compiler. +-*/ +-#ifdef NDS32_SAVE_ALL_REGS +- SAVE_ALL_4B +-#else +- SAVE_PARTIAL_4B +-#endif +- /* Prepare to call 2nd level handler. */ +- la $r2, _nds32_jmptbl_00 +- lw $r2, [$r2 + $r0 << #2] +- ADJ_INTR_LVL /* Adjust INTR level. $r3 is clobbered. */ +- jral $r2 +- /* Restore used registers. */ +-#ifdef NDS32_SAVE_ALL_REGS +- RESTORE_ALL +-#else +- RESTORE_PARTIAL +-#endif +- iret +- +-#ifdef NDS32_SAVE_ALL_REGS +-#if defined(NDS32_NESTED) +- .size _nds32_e_sa_ns_4b, .-_nds32_e_sa_ns_4b +-#elif defined(NDS32_NESTED_READY) +- .size _nds32_e_sa_nr_4b, .-_nds32_e_sa_nr_4b +-#else /* Not nested handler. */ +- .size _nds32_e_sa_nn_4b, .-_nds32_e_sa_nn_4b +-#endif /* endif for Nest Type */ +-#else /* not NDS32_SAVE_ALL_REGS */ +-#if defined(NDS32_NESTED) +- .size _nds32_e_ps_ns_4b, .-_nds32_e_ps_ns_4b +-#elif defined(NDS32_NESTED_READY) +- .size _nds32_e_ps_nr_4b, .-_nds32_e_ps_nr_4b +-#else /* Not nested handler. */ +- .size _nds32_e_ps_nn_4b, .-_nds32_e_ps_nn_4b +-#endif /* endif for Nest Type */ +-#endif /* not NDS32_SAVE_ALL_REGS */ +diff --git a/libgcc/config/nds32/isr-library/intr_isr.S b/libgcc/config/nds32/isr-library/intr_isr.S +index c55da1c..90c5c25 100644 +--- a/libgcc/config/nds32/isr-library/intr_isr.S ++++ b/libgcc/config/nds32/isr-library/intr_isr.S +@@ -23,6 +23,7 @@ + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + ++#include "save_usr_regs.inc" + #include "save_mac_regs.inc" + #include "save_fpu_regs.inc" + #include "save_fpu_regs_00.inc" +@@ -32,35 +33,33 @@ + #include "save_all.inc" + #include "save_partial.inc" + #include "adj_intr_lvl.inc" +-#include "restore_mac_regs.inc" + #include "restore_fpu_regs_00.inc" + #include "restore_fpu_regs_01.inc" + #include "restore_fpu_regs_02.inc" + #include "restore_fpu_regs_03.inc" + #include "restore_fpu_regs.inc" ++#include "restore_mac_regs.inc" ++#include "restore_usr_regs.inc" + #include "restore_all.inc" + #include "restore_partial.inc" ++ + .section .nds32_isr, "ax" /* Put it in the section of 1st level handler. */ + .align 1 +-/* +- First Level Handlers +- 1. First Level Handlers are invokded in vector section via jump instruction +- with specific names for different configurations. +- 2. Naming Format: _nds32_e_SR_NT for exception handlers. +- _nds32_i_SR_NT for interrupt handlers. +- 2.1 All upper case letters are replaced with specific lower case letters encodings. +- 2.2 SR: Saved Registers +- sa: Save All regs (context) +- ps: Partial Save (all caller-saved regs) +- 2.3 NT: Nested Type +- ns: nested +- nn: not nested +- nr: nested ready +-*/ +- +-/* +- This is original 16-byte vector size version. +-*/ ++ ++/* First Level Handlers ++ 1. First Level Handlers are invokded in vector section via jump instruction ++ with specific names for different configurations. ++ 2. Naming Format: _nds32_e_SR_NT for exception handlers. ++ _nds32_i_SR_NT for interrupt handlers. ++ 2.1 All upper case letters are replaced with specific lower case letters encodings. ++ 2.2 SR -- Saved Registers ++ sa: Save All regs (context) ++ ps: Partial Save (all caller-saved regs) ++ 2.3 NT -- Nested Type ++ ns: nested ++ nn: not nested ++ nr: nested ready */ ++ + #ifdef NDS32_SAVE_ALL_REGS + #if defined(NDS32_NESTED) + .globl _nds32_i_sa_ns +@@ -91,21 +90,36 @@ _nds32_i_ps_nn: + #endif /* endif for Nest Type */ + #endif /* not NDS32_SAVE_ALL_REGS */ + +-/* +- This is 16-byte vector size version. +- The vector id was restored into $r0 in vector by compiler. +-*/ ++ ++/* For 4-byte vector size version, the vector id is ++ extracted from $ITYPE and is set into $r0 by library. ++ For 16-byte vector size version, the vector id ++ is set into $r0 in vector section by compiler. */ ++ ++/* Save used registers first. */ + #ifdef NDS32_SAVE_ALL_REGS + SAVE_ALL + #else + SAVE_PARTIAL + #endif +- /* Prepare to call 2nd level handler. */ ++ ++/* According to vector size, we need to have different implementation. */ ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* Prepare to call 2nd level handler. */ ++ la $r2, _nds32_jmptbl_00 ++ lw $r2, [$r2 + $r0 << #2] ++ addi $r0, $r0, #-9 /* Make interrput vector id zero-based. */ ++ ADJ_INTR_LVL /* Adjust INTR level. $r3 is clobbered. */ ++ jral $r2 ++#else /* not __NDS32_ISR_VECTOR_SIZE_4__ */ ++ /* Prepare to call 2nd level handler. */ + la $r2, _nds32_jmptbl_09 /* For zero-based vcetor id. */ + lw $r2, [$r2 + $r0 << #2] + ADJ_INTR_LVL /* Adjust INTR level. $r3 is clobbered. */ + jral $r2 +- /* Restore used registers. */ ++#endif /* not __NDS32_ISR_VECTOR_SIZE_4__ */ ++ ++/* Restore used registers. */ + #ifdef NDS32_SAVE_ALL_REGS + RESTORE_ALL + #else +@@ -113,6 +127,7 @@ _nds32_i_ps_nn: + #endif + iret + ++ + #ifdef NDS32_SAVE_ALL_REGS + #if defined(NDS32_NESTED) + .size _nds32_i_sa_ns, .-_nds32_i_sa_ns +diff --git a/libgcc/config/nds32/isr-library/intr_isr_4b.S b/libgcc/config/nds32/isr-library/intr_isr_4b.S +deleted file mode 100644 +index d82c007..0000000 +--- a/libgcc/config/nds32/isr-library/intr_isr_4b.S ++++ /dev/null +@@ -1,134 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +-#include "save_mac_regs.inc" +-#include "save_fpu_regs.inc" +-#include "save_fpu_regs_00.inc" +-#include "save_fpu_regs_01.inc" +-#include "save_fpu_regs_02.inc" +-#include "save_fpu_regs_03.inc" +-#include "save_all.inc" +-#include "save_partial.inc" +-#include "adj_intr_lvl.inc" +-#include "restore_mac_regs.inc" +-#include "restore_fpu_regs_00.inc" +-#include "restore_fpu_regs_01.inc" +-#include "restore_fpu_regs_02.inc" +-#include "restore_fpu_regs_03.inc" +-#include "restore_fpu_regs.inc" +-#include "restore_all.inc" +-#include "restore_partial.inc" +- .section .nds32_isr, "ax" /* Put it in the section of 1st level handler. */ +- .align 1 +-/* +- First Level Handlers +- 1. First Level Handlers are invokded in vector section via jump instruction +- with specific names for different configurations. +- 2. Naming Format: _nds32_e_SR_NT for exception handlers. +- _nds32_i_SR_NT for interrupt handlers. +- 2.1 All upper case letters are replaced with specific lower case letters encodings. +- 2.2 SR: Saved Registers +- sa: Save All regs (context) +- ps: Partial Save (all caller-saved regs) +- 2.3 NT: Nested Type +- ns: nested +- nn: not nested +- nr: nested ready +-*/ +- +-/* +- This is 4-byte vector size version. +- The "_4b" postfix was added for 4-byte version symbol. +-*/ +-#ifdef NDS32_SAVE_ALL_REGS +-#if defined(NDS32_NESTED) +- .globl _nds32_i_sa_ns_4b +- .type _nds32_i_sa_ns_4b, @function +-_nds32_i_sa_ns_4b: +-#elif defined(NDS32_NESTED_READY) +- .globl _nds32_i_sa_nr_4b +- .type _nds32_i_sa_nr_4b, @function +-_nds32_i_sa_nr_4b: +-#else /* Not nested handler. */ +- .globl _nds32_i_sa_nn_4b +- .type _nds32_i_sa_nn_4b, @function +-_nds32_i_sa_nn_4b: +-#endif /* endif for Nest Type */ +-#else /* not NDS32_SAVE_ALL_REGS */ +-#if defined(NDS32_NESTED) +- .globl _nds32_i_ps_ns_4b +- .type _nds32_i_ps_ns_4b, @function +-_nds32_i_ps_ns_4b: +-#elif defined(NDS32_NESTED_READY) +- .globl _nds32_i_ps_nr_4b +- .type _nds32_i_ps_nr_4b, @function +-_nds32_i_ps_nr_4b: +-#else /* Not nested handler. */ +- .globl _nds32_i_ps_nn_4b +- .type _nds32_i_ps_nn_4b, @function +-_nds32_i_ps_nn_4b: +-#endif /* endif for Nest Type */ +-#endif /* not NDS32_SAVE_ALL_REGS */ +- +-/* +- This is 4-byte vector size version. +- The vector id was restored into $lp in vector by compiler. +-*/ +-#ifdef NDS32_SAVE_ALL_REGS +- SAVE_ALL_4B +-#else +- SAVE_PARTIAL_4B +-#endif +- /* Prepare to call 2nd level handler. */ +- la $r2, _nds32_jmptbl_00 +- lw $r2, [$r2 + $r0 << #2] +- addi $r0, $r0, #-9 /* Make interrput vector id zero-based. */ +- ADJ_INTR_LVL /* Adjust INTR level. $r3 is clobbered. */ +- jral $r2 +- /* Restore used registers. */ +-#ifdef NDS32_SAVE_ALL_REGS +- RESTORE_ALL +-#else +- RESTORE_PARTIAL +-#endif +- iret +- +-#ifdef NDS32_SAVE_ALL_REGS +-#if defined(NDS32_NESTED) +- .size _nds32_i_sa_ns_4b, .-_nds32_i_sa_ns_4b +-#elif defined(NDS32_NESTED_READY) +- .size _nds32_i_sa_nr_4b, .-_nds32_i_sa_nr_4b +-#else /* Not nested handler. */ +- .size _nds32_i_sa_nn_4b, .-_nds32_i_sa_nn_4b +-#endif /* endif for Nest Type */ +-#else /* not NDS32_SAVE_ALL_REGS */ +-#if defined(NDS32_NESTED) +- .size _nds32_i_ps_ns_4b, .-_nds32_i_ps_ns_4b +-#elif defined(NDS32_NESTED_READY) +- .size _nds32_i_ps_nr_4b, .-_nds32_i_ps_nr_4b +-#else /* Not nested handler. */ +- .size _nds32_i_ps_nn_4b, .-_nds32_i_ps_nn_4b +-#endif /* endif for Nest Type */ +-#endif /* not NDS32_SAVE_ALL_REGS */ +diff --git a/libgcc/config/nds32/isr-library/reset.S b/libgcc/config/nds32/isr-library/reset.S +index 961d731..8b9ccf5 100644 +--- a/libgcc/config/nds32/isr-library/reset.S ++++ b/libgcc/config/nds32/isr-library/reset.S +@@ -26,22 +26,18 @@ + .section .nds32_isr, "ax" /* Put it in the section of 1st level handler. */ + .align 1 + .weak _SDA_BASE_ /* For reset handler only. */ +- .weak _FP_BASE_ /* For reset handler only. */ + .weak _nds32_init_mem /* User defined memory initialization function. */ + .globl _start + .globl _nds32_reset + .type _nds32_reset, @function + _nds32_reset: + _start: +-#ifdef NDS32_EXT_EX9 +- .no_ex9_begin +-#endif + /* Handle NMI and warm boot if any of them exists. */ + beqz $sp, 1f /* Reset, NMI or warm boot? */ + /* Either NMI or warm boot; save all regs. */ + + /* Preserve registers for context-switching. */ +-#ifdef __NDS32_REDUCED_REGS__ ++#if __NDS32_REDUCED_REGS__ || __NDS32_REDUCE_REGS + /* For 16-reg mode. */ + smw.adm $r0, [$sp], $r10, #0x0 + smw.adm $r15, [$sp], $r15, #0xf +@@ -49,10 +45,9 @@ _start: + /* For 32-reg mode. */ + smw.adm $r0, [$sp], $r27, #0xf + #endif +-#ifdef NDS32_EXT_IFC ++#if __NDS32_EXT_IFC__ + mfusr $r1, $IFC_LP +- smw.adm $r1, [$sp], $r2, #0x0 /* Save extra $r2 to keep +- stack 8-byte alignment. */ ++ smw.adm $r1, [$sp], $r2, #0x0 /* Save extra $r2 to keep stack 8-byte alignment. */ + #endif + + la $gp, _SDA_BASE_ /* Init GP for small data access. */ +@@ -71,12 +66,11 @@ _start: + bnez $r0, 1f /* If fail to resume, do cold boot. */ + + /* Restore registers for context-switching. */ +-#ifdef NDS32_EXT_IFC +- lmw.bim $r1, [$sp], $r2, #0x0 /* Restore extra $r2 to keep +- stack 8-byte alignment. */ ++#if __NDS32_EXT_IFC__ ++ lmw.bim $r1, [$sp], $r2, #0x0 /* Restore extra $r2 to keep stack 8-byte alignment. */ + mtusr $r1, $IFC_LP + #endif +-#ifdef __NDS32_REDUCED_REGS__ ++#if __NDS32_REDUCED_REGS__ || __NDS32_REDUCE_REGS + /* For 16-reg mode. */ + lmw.bim $r15, [$sp], $r15, #0xf + lmw.bim $r0, [$sp], $r10, #0x0 +@@ -88,6 +82,17 @@ _start: + + + 1: /* Cold boot. */ ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* With vector ID feature for v3 architecture, default vector size is 4-byte. */ ++ /* Set IVB.ESZ = 0 (vector table entry size = 4 bytes) */ ++ mfsr $r0, $IVB ++ li $r1, #0xc000 ++ or $r0, $r0, $r1 ++ xor $r0, $r0, $r1 ++ mtsr $r0, $IVB ++ dsb ++#else ++ /* There is no vector ID feature, so the vector size must be 16-byte. */ + /* Set IVB.ESZ = 1 (vector table entry size = 16 bytes) */ + mfsr $r0, $IVB + li $r1, #0xffff3fff +@@ -95,36 +100,54 @@ _start: + ori $r0, $r0, #0x4000 + mtsr $r0, $IVB + dsb ++#endif + + la $gp, _SDA_BASE_ /* Init $gp. */ +- la $fp, _FP_BASE_ /* Init $fp. */ + la $sp, _stack /* Init $sp. */ +-#ifdef NDS32_EXT_EX9 +-/* +- * Initialize the table base of EX9 instruction +- * ex9 generation needs to disable before the ITB is set +- */ +- mfsr $r0, $MSC_CFG /* Check if HW support of EX9. */ ++ ++#if __NDS32_EXT_EX9__ ++.L_init_itb: ++ /* Initialization for Instruction Table Base (ITB). ++ The symbol _ITB_BASE_ is determined by Linker. ++ Set $ITB only if MSC_CFG.EIT (cr4.b'24) is set. */ ++ mfsr $r0, $MSC_CFG + srli $r0, $r0, 24 + andi $r0, $r0, 0x1 +- beqz $r0, 4f /* Zero means HW does not support EX9. */ +- la $r0, _ITB_BASE_ /* Init $ITB. */ ++ beqz $r0, 4f /* Fall through ? */ ++ la $r0, _ITB_BASE_ + mtusr $r0, $ITB +- .no_ex9_end + 4: + #endif +- la $r15, _nds32_init_mem /* Call DRAM init. _nds32_init_mem +- may written by C language. */ ++ ++#if __NDS32_EXT_FPU_SP__ || __NDS32_EXT_FPU_DP__ ++.L_init_fpu: ++ /* Initialize FPU ++ Set FUCOP_CTL.CP0EN (fucpr.b'0). */ ++ mfsr $r0, $FUCOP_CTL ++ ori $r0, $r0, 0x1 ++ mtsr $r0, $FUCOP_CTL ++ dsb ++ /* According to [bugzilla #9425], set flush-to-zero mode. ++ That is, set $FPCSR.DNZ(b'12) = 1. */ ++ FMFCSR $r0 ++ ori $r0, $r0, 0x1000 ++ FMTCSR $r0 ++ dsb ++#endif ++ ++ /* Call DRAM init. _nds32_init_mem may written by C language. */ ++ la $r15, _nds32_init_mem + beqz $r15, 6f + jral $r15 + 6: + l.w $r15, _nds32_jmptbl_00 /* Load reset handler. */ + jral $r15 +-/* Reset handler() should never return in a RTOS or non-OS system. +- In case it does return, an exception will be generated. +- This exception will be caught either by default break handler or by EDM. +- Default break handle may just do an infinite loop. +- EDM will notify GDB and GDB will regain control when the ID is 0x7fff. */ ++ ++ /* Reset handler() should never return in a RTOS or non-OS system. ++ In case it does return, an exception will be generated. ++ This exception will be caught either by default break handler or by EDM. ++ Default break handle may just do an infinite loop. ++ EDM will notify GDB and GDB will regain control when the ID is 0x7fff. */ + 5: + break #0x7fff + .size _nds32_reset, .-_nds32_reset +diff --git a/libgcc/config/nds32/isr-library/reset_4b.S b/libgcc/config/nds32/isr-library/reset_4b.S +deleted file mode 100644 +index 792e655..0000000 +--- a/libgcc/config/nds32/isr-library/reset_4b.S ++++ /dev/null +@@ -1,131 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_isr, "ax" /* Put it in the section of 1st level handler. */ +- .align 1 +- .weak _SDA_BASE_ /* For reset handler only. */ +- .weak _FP_BASE_ /* For reset handler only. */ +- .weak _nds32_init_mem /* User defined memory initialization function. */ +- .globl _start +- .globl _nds32_reset_4b +- .type _nds32_reset_4b, @function +-_nds32_reset_4b: +-_start: +-#ifdef NDS32_EXT_EX9 +- .no_ex9_begin +-#endif +- /* Handle NMI and warm boot if any of them exists. */ +- beqz $sp, 1f /* Reset, NMI or warm boot? */ +- /* Either NMI or warm boot; save all regs. */ +- +- /* Preserve registers for context-switching. */ +-#ifdef __NDS32_REDUCED_REGS__ +- /* For 16-reg mode. */ +- smw.adm $r0, [$sp], $r10, #0x0 +- smw.adm $r15, [$sp], $r15, #0xf +-#else +- /* For 32-reg mode. */ +- smw.adm $r0, [$sp], $r27, #0xf +-#endif +-#ifdef NDS32_EXT_IFC +- mfusr $r1, $IFC_LP +- smw.adm $r1, [$sp], $r2, #0x0 /* Save extra $r2 to keep +- stack 8-byte alignment. */ +-#endif +- +- la $gp, _SDA_BASE_ /* Init GP for small data access. */ +- move $r0, $sp /* Init parameter. */ +- mfsr $r1, $ITYPE /* Check ITYPE for NMI or warm boot. */ +- andi $r1, $r1, #0xf +- addi $r1, $r1, #-1 +- beqz $r1, 2f /* Warm boot if true. */ +- l.w $r15, _nds32_nmih /* Load NMI handler. */ +- j 3f +-2: +- l.w $r15, _nds32_wrh /* Load warm boot handler. */ +-3: +- beqz $r15, 1f /* If no handler, do cold boot. */ +- jral $r15 /* Call handler. */ +- bnez $r0, 1f /* If fail to resume, do cold boot. */ +- +- /* Restore registers for context-switching. */ +-#ifdef NDS32_EXT_IFC +- lmw.bim $r1, [$sp], $r2, #0x0 /* Restore extra $r2 to keep +- stack 8-byte alignment. */ +- mtusr $r1, $IFC_LP +-#endif +-#ifdef __NDS32_REDUCED_REGS__ +- /* For 16-reg mode. */ +- lmw.bim $r15, [$sp], $r15, #0xf +- lmw.bim $r0, [$sp], $r10, #0x0 +-#else +- /* For 32-reg mode. */ +- lmw.bim $r0, [$sp], $r27, #0xf +-#endif +- iret /* Resume operation. */ +- +- +-1: /* Cold boot. */ +- /* With vector ID feature, set default vector size to 4B. */ +- /* Set IVB.ESZ = 0 (vector table entry size = 4 bytes) */ +- mfsr $r0, $IVB +- li $r1, #0xc000 +- or $r0, $r0, $r1 +- xor $r0, $r0, $r1 +- mtsr $r0, $IVB +- dsb +- +- la $gp, _SDA_BASE_ /* Init $gp. */ +- la $fp, _FP_BASE_ /* Init $fp. */ +- la $sp, _stack /* Init $sp. */ +-#ifdef NDS32_EXT_EX9 +-/* +- * Initialize the table base of EX9 instruction +- * ex9 generation needs to disable before the ITB is set +- */ +- mfsr $r0, $MSC_CFG /* Check if HW support of EX9. */ +- srli $r0, $r0, 24 +- andi $r0, $r0, 0x1 +- beqz $r0, 4f /* Zero means HW does not support EX9. */ +- la $r0, _ITB_BASE_ /* Init $ITB. */ +- mtusr $r0, $ITB +- .no_ex9_end +-4: +-#endif +- la $r15, _nds32_init_mem /* Call DRAM init. _nds32_init_mem +- may written by C language. */ +- beqz $r15, 6f +- jral $r15 +-6: +- l.w $r15, _nds32_jmptbl_00 /* Load reset handler. */ +- jral $r15 +-/* Reset handler() should never return in a RTOS or non-OS system. +- In case it does return, an exception will be generated. +- This exception will be caught either by default break handler or by EDM. +- Default break handle may just do an infinite loop. +- EDM will notify GDB and GDB will regain control when the ID is 0x7fff. */ +-5: +- break #0x7fff +- .size _nds32_reset_4b, .-_nds32_reset_4b +diff --git a/libgcc/config/nds32/isr-library/restore_all.inc b/libgcc/config/nds32/isr-library/restore_all.inc +index c25b46e..96f87ec 100644 +--- a/libgcc/config/nds32/isr-library/restore_all.inc ++++ b/libgcc/config/nds32/isr-library/restore_all.inc +@@ -31,15 +31,11 @@ + mtsr $r2, $IPSW + RESTORE_FPU_REGS + RESTORE_MAC_REGS +-#ifdef NDS32_EXT_IFC +- lmw.bim $r1, [$sp], $r2, #0x0 /* Restore extra $r2 to keep +- stack 8-byte alignment. */ +- mtusr $r1, $IFC_LP +-#endif +-#ifdef __NDS32_REDUCED_REGS__ ++ RESTORE_USR_REGS ++#if __NDS32_REDUCED_REGS__ || __NDS32_REDUCE_REGS + lmw.bim $r0, [$sp], $r10, #0x0 /* Restore all regs. */ + lmw.bim $r15, [$sp], $r15, #0xf +-#else /* not __NDS32_REDUCED_REGS__ */ ++#else + lmw.bim $r0, [$sp], $r27, #0xf /* Restore all regs. */ + #endif + .endm +diff --git a/libgcc/config/nds32/isr-library/restore_mac_regs.inc b/libgcc/config/nds32/isr-library/restore_mac_regs.inc +index 0ffc980..a15024c 100644 +--- a/libgcc/config/nds32/isr-library/restore_mac_regs.inc ++++ b/libgcc/config/nds32/isr-library/restore_mac_regs.inc +@@ -24,7 +24,7 @@ + <http://www.gnu.org/licenses/>. */ + + .macro RESTORE_MAC_REGS +-#ifdef NDS32_DX_REGS ++#if __NDS32_DX_REGS__ + lmw.bim $r1, [$sp], $r4, #0x0 + mtusr $r1, $d0.lo + mtusr $r2, $d0.hi +diff --git a/libgcc/config/nds32/isr-library/restore_partial.inc b/libgcc/config/nds32/isr-library/restore_partial.inc +index 70d5421..c07d30e 100644 +--- a/libgcc/config/nds32/isr-library/restore_partial.inc ++++ b/libgcc/config/nds32/isr-library/restore_partial.inc +@@ -31,15 +31,11 @@ + mtsr $r1, $IPC /* Set IPC. */ + mtsr $r2, $IPSW /* Set IPSW. */ + #endif +- RESTORE_FPU_REGS +- RESTORE_MAC_REGS +-#ifdef NDS32_EXT_IFC +- lmw.bim $r1, [$sp], $r2, #0x0 /* Restore extra $r2 to keep +- stack 8-byte alignment. */ +- mtusr $r1, $IFC_LP +-#endif ++ RESTORE_FPU_REGS ++ RESTORE_MAC_REGS ++ RESTORE_USR_REGS + lmw.bim $r0, [$sp], $r5, #0x0 /* Restore all regs. */ +-#ifdef __NDS32_REDUCED_REGS__ ++#if __NDS32_REDUCED_REGS__ || __NDS32_REDUCE_REGS + lmw.bim $r15, [$sp], $r15, #0x2 + #else + lmw.bim $r15, [$sp], $r27, #0x2 /* Restore all regs. */ +diff --git a/libgcc/config/nds32/isr-library/vec_vid03_4b.S b/libgcc/config/nds32/isr-library/restore_usr_regs.inc +similarity index 72% +rename from libgcc/config/nds32/isr-library/vec_vid03_4b.S +rename to libgcc/config/nds32/isr-library/restore_usr_regs.inc +index cd30906..c8f6e4a 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid03_4b.S ++++ b/libgcc/config/nds32/isr-library/restore_usr_regs.inc +@@ -23,12 +23,20 @@ + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +- .section .nds32_vector.03, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_03_4b +- .type _nds32_vector_03_4b, @function +-_nds32_vector_03_4b: +-1: +- j 1b +- .size _nds32_vector_03_4b, .-_nds32_vector_03_4b ++.macro RESTORE_USR_REGS ++#if __NDS32_EXT_IFC__ && (__NDS32_EXT_ZOL__ || __NDS32_EXT_DSP__) ++ lmw.bim $r1, [$sp], $r4, #0x0 ++ mtusr $r1, $IFC_LP ++ mtusr $r2, $LB ++ mtusr $r3, $LE ++ mtusr $r4, $LC ++#elif __NDS32_EXT_IFC__ ++ lmw.bim $r1, [$sp], $r2, #0x0 ++ mtusr $r1, $IFC_LP ++#elif __NDS32_EXT_ZOL__ || __NDS32_EXT_DSP__ ++ lmw.bim $r1, [$sp], $r4, #0x0 ++ mtusr $r1, $LB ++ mtusr $r2, $LE ++ mtusr $r3, $LC ++#endif ++.endm +diff --git a/libgcc/config/nds32/isr-library/save_all.inc b/libgcc/config/nds32/isr-library/save_all.inc +index 20eb29d..c926664 100644 +--- a/libgcc/config/nds32/isr-library/save_all.inc ++++ b/libgcc/config/nds32/isr-library/save_all.inc +@@ -23,45 +23,42 @@ + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +-.macro SAVE_ALL_4B +-#ifdef __NDS32_REDUCED_REGS__ ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ ++/* If vector size is 4-byte, we have to save registers ++ in the macro implementation. */ ++.macro SAVE_ALL ++#if __NDS32_REDUCED_REGS__ || __NDS32_REDUCE_REGS + smw.adm $r15, [$sp], $r15, #0xf + smw.adm $r0, [$sp], $r10, #0x0 +-#else /* not __NDS32_REDUCED_REGS__ */ ++#else + smw.adm $r0, [$sp], $r27, #0xf +-#endif /* not __NDS32_REDUCED_REGS__ */ +-#ifdef NDS32_EXT_IFC +- mfusr $r1, $IFC_LP +- smw.adm $r1, [$sp], $r2, #0x0 /* Save extra $r2 to keep +- stack 8-byte alignment. */ + #endif +- SAVE_MAC_REGS +- SAVE_FPU_REGS ++ SAVE_USR_REGS ++ SAVE_MAC_REGS ++ SAVE_FPU_REGS + mfsr $r1, $IPC /* Get IPC. */ + mfsr $r2, $IPSW /* Get IPSW. */ + smw.adm $r1, [$sp], $r2, #0x0 /* Push IPC, IPSW. */ + move $r1, $sp /* $r1 is ptr to NDS32_CONTEXT. */ + mfsr $r0, $ITYPE /* Get VID to $r0. */ + srli $r0, $r0, #5 +-#ifdef __NDS32_ISA_V2__ + andi $r0, $r0, #127 +-#else +- fexti33 $r0, #6 +-#endif + .endm + ++#else /* not __NDS32_ISR_VECTOR_SIZE_4__ */ ++ ++/* If vector size is 16-byte, some works can be done in ++ the vector section generated by compiler, so that we ++ can implement less in the macro. */ + .macro SAVE_ALL +-/* SAVE_REG_TBL code has been moved to +- vector table generated by compiler. */ +-#ifdef NDS32_EXT_IFC +- mfusr $r1, $IFC_LP +- smw.adm $r1, [$sp], $r2, #0x0 /* Save extra $r2 to keep +- stack 8-byte alignment. */ +-#endif +- SAVE_MAC_REGS +- SAVE_FPU_REGS ++ SAVE_USR_REGS ++ SAVE_MAC_REGS ++ SAVE_FPU_REGS + mfsr $r1, $IPC /* Get IPC. */ + mfsr $r2, $IPSW /* Get IPSW. */ + smw.adm $r1, [$sp], $r2, #0x0 /* Push IPC, IPSW. */ + move $r1, $sp /* $r1 is ptr to NDS32_CONTEXT. */ + .endm ++ ++#endif /* not __NDS32_ISR_VECTOR_SIZE_4__ */ +diff --git a/libgcc/config/nds32/isr-library/save_mac_regs.inc b/libgcc/config/nds32/isr-library/save_mac_regs.inc +index ddb5e77..2d79d70 100644 +--- a/libgcc/config/nds32/isr-library/save_mac_regs.inc ++++ b/libgcc/config/nds32/isr-library/save_mac_regs.inc +@@ -24,7 +24,7 @@ + <http://www.gnu.org/licenses/>. */ + + .macro SAVE_MAC_REGS +-#ifdef NDS32_DX_REGS ++#if __NDS32_DX_REGS__ + mfusr $r1, $d0.lo + mfusr $r2, $d0.hi + mfusr $r3, $d1.lo +diff --git a/libgcc/config/nds32/isr-library/save_partial.inc b/libgcc/config/nds32/isr-library/save_partial.inc +index ee514c4..0c6d481 100644 +--- a/libgcc/config/nds32/isr-library/save_partial.inc ++++ b/libgcc/config/nds32/isr-library/save_partial.inc +@@ -23,20 +23,20 @@ + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +-.macro SAVE_PARTIAL_4B +-#ifdef __NDS32_REDUCED_REGS__ ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ ++/* If vector size is 4-byte, we have to save registers ++ in the macro implementation. */ ++.macro SAVE_PARTIAL ++#if __NDS32_REDUCED_REGS__ || __NDS32_REDUCE_REGS + smw.adm $r15, [$sp], $r15, #0x2 +-#else /* not __NDS32_REDUCED_REGS__ */ ++#else + smw.adm $r15, [$sp], $r27, #0x2 +-#endif /* not __NDS32_REDUCED_REGS__ */ +- smw.adm $r0, [$sp], $r5, #0x0 +-#ifdef NDS32_EXT_IFC +- mfusr $r1, $IFC_LP +- smw.adm $r1, [$sp], $r2, #0x0 /* Save extra $r2 to keep +- stack 8-byte alignment. */ + #endif +- SAVE_MAC_REGS +- SAVE_FPU_REGS ++ smw.adm $r0, [$sp], $r5, #0x0 ++ SAVE_USR_REGS ++ SAVE_MAC_REGS ++ SAVE_FPU_REGS + #if defined(NDS32_NESTED) || defined(NDS32_NESTED_READY) + mfsr $r1, $IPC /* Get IPC. */ + mfsr $r2, $IPSW /* Get IPSW. */ +@@ -44,26 +44,24 @@ + #endif + mfsr $r0, $ITYPE /* Get VID to $r0. */ + srli $r0, $r0, #5 +-#ifdef __NDS32_ISA_V2__ + andi $r0, $r0, #127 +-#else +- fexti33 $r0, #6 +-#endif + .endm + ++#else /* not __NDS32_ISR_VECTOR_SIZE_4__ */ ++ ++/* If vector size is 16-byte, some works can be done in ++ the vector section generated by compiler, so that we ++ can implement less in the macro. */ ++ + .macro SAVE_PARTIAL +-/* SAVE_CALLER_REGS code has been moved to +- vector table generated by compiler. */ +-#ifdef NDS32_EXT_IFC +- mfusr $r1, $IFC_LP +- smw.adm $r1, [$sp], $r2, #0x0 /* Save extra $r2 to keep +- stack 8-byte alignment. */ +-#endif +- SAVE_MAC_REGS +- SAVE_FPU_REGS ++ SAVE_USR_REGS ++ SAVE_MAC_REGS ++ SAVE_FPU_REGS + #if defined(NDS32_NESTED) || defined(NDS32_NESTED_READY) + mfsr $r1, $IPC /* Get IPC. */ + mfsr $r2, $IPSW /* Get IPSW. */ + smw.adm $r1, [$sp], $r2, #0x0 /* Push IPC, IPSW. */ + #endif + .endm ++ ++#endif /* not __NDS32_ISR_VECTOR_SIZE_4__ */ +diff --git a/libgcc/config/nds32/isr-library/vec_vid00_4b.S b/libgcc/config/nds32/isr-library/save_usr_regs.inc +similarity index 61% +rename from libgcc/config/nds32/isr-library/vec_vid00_4b.S +rename to libgcc/config/nds32/isr-library/save_usr_regs.inc +index e1a37b4..b6807d7 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid00_4b.S ++++ b/libgcc/config/nds32/isr-library/save_usr_regs.inc +@@ -23,12 +23,22 @@ + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +- .section .nds32_vector.00, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_00_4b +- .type _nds32_vector_00_4b, @function +-_nds32_vector_00_4b: +-1: +- j 1b +- .size _nds32_vector_00_4b, .-_nds32_vector_00_4b ++.macro SAVE_USR_REGS ++/* Store User Special Registers according to supported ISA extension ++ !!! WATCH OUT !!! Take care of 8-byte alignment issue. */ ++#if __NDS32_EXT_IFC__ && (__NDS32_EXT_ZOL__ || __NDS32_EXT_DSP__) ++ mfusr $r1, $IFC_LP ++ mfusr $r2, $LB ++ mfusr $r3, $LE ++ mfusr $r4, $LC ++ smw.adm $r1, [$sp], $r4, #0x0 /* Save even. Ok! */ ++#elif __NDS32_EXT_IFC__ ++ mfusr $r1, $IFC_LP ++ smw.adm $r1, [$sp], $r2, #0x0 /* Save extra $r2 to keep stack 8-byte aligned. */ ++#elif (__NDS32_EXT_ZOL__ || __NDS32_EXT_DSP__) ++ mfusr $r1, $LB ++ mfusr $r2, $LE ++ mfusr $r3, $LC ++ smw.adm $r1, [$sp], $r4, #0x0 /* Save extra $r4 to keep stack 8-byte aligned. */ ++#endif ++.endm +diff --git a/libgcc/config/nds32/isr-library/vec_vid00.S b/libgcc/config/nds32/isr-library/vec_vid00.S +index ccdbd19..f02e92c 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid00.S ++++ b/libgcc/config/nds32/isr-library/vec_vid00.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.00, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_00 + .type _nds32_vector_00, @function + _nds32_vector_00: +diff --git a/libgcc/config/nds32/isr-library/vec_vid01.S b/libgcc/config/nds32/isr-library/vec_vid01.S +index ed5a88e..542fcf8 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid01.S ++++ b/libgcc/config/nds32/isr-library/vec_vid01.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.01, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_01 + .type _nds32_vector_01, @function + _nds32_vector_01: +diff --git a/libgcc/config/nds32/isr-library/vec_vid01_4b.S b/libgcc/config/nds32/isr-library/vec_vid01_4b.S +deleted file mode 100644 +index 239bd75..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid01_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.01, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_01_4b +- .type _nds32_vector_01_4b, @function +-_nds32_vector_01_4b: +-1: +- j 1b +- .size _nds32_vector_01_4b, .-_nds32_vector_01_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid02.S b/libgcc/config/nds32/isr-library/vec_vid02.S +index 1a95a57..72b8b56 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid02.S ++++ b/libgcc/config/nds32/isr-library/vec_vid02.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.02, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_02 + .type _nds32_vector_02, @function + _nds32_vector_02: +diff --git a/libgcc/config/nds32/isr-library/vec_vid02_4b.S b/libgcc/config/nds32/isr-library/vec_vid02_4b.S +deleted file mode 100644 +index c532e62..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid02_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.02, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_02_4b +- .type _nds32_vector_02_4b, @function +-_nds32_vector_02_4b: +-1: +- j 1b +- .size _nds32_vector_02_4b, .-_nds32_vector_02_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid03.S b/libgcc/config/nds32/isr-library/vec_vid03.S +index 9bc572a..b0f8a60 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid03.S ++++ b/libgcc/config/nds32/isr-library/vec_vid03.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.03, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_03 + .type _nds32_vector_03, @function + _nds32_vector_03: +diff --git a/libgcc/config/nds32/isr-library/vec_vid04.S b/libgcc/config/nds32/isr-library/vec_vid04.S +index e8d4e10..d76ef73 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid04.S ++++ b/libgcc/config/nds32/isr-library/vec_vid04.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.04, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_04 + .type _nds32_vector_04, @function + _nds32_vector_04: +diff --git a/libgcc/config/nds32/isr-library/vec_vid04_4b.S b/libgcc/config/nds32/isr-library/vec_vid04_4b.S +deleted file mode 100644 +index 21fc77e..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid04_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.04, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_04_4b +- .type _nds32_vector_04_4b, @function +-_nds32_vector_04_4b: +-1: +- j 1b +- .size _nds32_vector_04_4b, .-_nds32_vector_04_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid05.S b/libgcc/config/nds32/isr-library/vec_vid05.S +index 1621a9d..ed5a5bb 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid05.S ++++ b/libgcc/config/nds32/isr-library/vec_vid05.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.05, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_05 + .type _nds32_vector_05, @function + _nds32_vector_05: +diff --git a/libgcc/config/nds32/isr-library/vec_vid05_4b.S b/libgcc/config/nds32/isr-library/vec_vid05_4b.S +deleted file mode 100644 +index b86fe19..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid05_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.05, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_05_4b +- .type _nds32_vector_05_4b, @function +-_nds32_vector_05_4b: +-1: +- j 1b +- .size _nds32_vector_05_4b, .-_nds32_vector_05_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid06.S b/libgcc/config/nds32/isr-library/vec_vid06.S +index 934f0b1..834c7de 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid06.S ++++ b/libgcc/config/nds32/isr-library/vec_vid06.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.06, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_06 + .type _nds32_vector_06, @function + _nds32_vector_06: +diff --git a/libgcc/config/nds32/isr-library/vec_vid06_4b.S b/libgcc/config/nds32/isr-library/vec_vid06_4b.S +deleted file mode 100644 +index 3624cfd..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid06_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.06, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_06_4b +- .type _nds32_vector_06_4b, @function +-_nds32_vector_06_4b: +-1: +- j 1b +- .size _nds32_vector_06_4b, .-_nds32_vector_06_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid07.S b/libgcc/config/nds32/isr-library/vec_vid07.S +index 0b0484d..cb3b33a 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid07.S ++++ b/libgcc/config/nds32/isr-library/vec_vid07.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.07, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_07 + .type _nds32_vector_07, @function + _nds32_vector_07: +diff --git a/libgcc/config/nds32/isr-library/vec_vid07_4b.S b/libgcc/config/nds32/isr-library/vec_vid07_4b.S +deleted file mode 100644 +index 997ca75..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid07_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.07, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_07_4b +- .type _nds32_vector_07_4b, @function +-_nds32_vector_07_4b: +-1: +- j 1b +- .size _nds32_vector_07_4b, .-_nds32_vector_07_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid08.S b/libgcc/config/nds32/isr-library/vec_vid08.S +index 2a30375..b4ae947 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid08.S ++++ b/libgcc/config/nds32/isr-library/vec_vid08.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.08, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_08 + .type _nds32_vector_08, @function + _nds32_vector_08: +diff --git a/libgcc/config/nds32/isr-library/vec_vid08_4b.S b/libgcc/config/nds32/isr-library/vec_vid08_4b.S +deleted file mode 100644 +index 83546d1..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid08_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.08, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_08_4b +- .type _nds32_vector_08_4b, @function +-_nds32_vector_08_4b: +-1: +- j 1b +- .size _nds32_vector_08_4b, .-_nds32_vector_08_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid09.S b/libgcc/config/nds32/isr-library/vec_vid09.S +index 9aeaf78..47fa5c1 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid09.S ++++ b/libgcc/config/nds32/isr-library/vec_vid09.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.09, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_09 + .type _nds32_vector_09, @function + _nds32_vector_09: +diff --git a/libgcc/config/nds32/isr-library/vec_vid09_4b.S b/libgcc/config/nds32/isr-library/vec_vid09_4b.S +deleted file mode 100644 +index 2d1944f..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid09_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.09, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_09_4b +- .type _nds32_vector_09_4b, @function +-_nds32_vector_09_4b: +-1: +- j 1b +- .size _nds32_vector_09_4b, .-_nds32_vector_09_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid10.S b/libgcc/config/nds32/isr-library/vec_vid10.S +index 411edd7..6bf2c7c 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid10.S ++++ b/libgcc/config/nds32/isr-library/vec_vid10.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.10, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_10 + .type _nds32_vector_10, @function + _nds32_vector_10: +diff --git a/libgcc/config/nds32/isr-library/vec_vid10_4b.S b/libgcc/config/nds32/isr-library/vec_vid10_4b.S +deleted file mode 100644 +index 04761ab..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid10_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.10, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_10_4b +- .type _nds32_vector_10_4b, @function +-_nds32_vector_10_4b: +-1: +- j 1b +- .size _nds32_vector_10_4b, .-_nds32_vector_10_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid11.S b/libgcc/config/nds32/isr-library/vec_vid11.S +index 8de45a4..86975ea 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid11.S ++++ b/libgcc/config/nds32/isr-library/vec_vid11.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.11, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_11 + .type _nds32_vector_11, @function + _nds32_vector_11: +diff --git a/libgcc/config/nds32/isr-library/vec_vid11_4b.S b/libgcc/config/nds32/isr-library/vec_vid11_4b.S +deleted file mode 100644 +index 328c1e6..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid11_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.11, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_11_4b +- .type _nds32_vector_11_4b, @function +-_nds32_vector_11_4b: +-1: +- j 1b +- .size _nds32_vector_11_4b, .-_nds32_vector_11_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid12.S b/libgcc/config/nds32/isr-library/vec_vid12.S +index ff5c6df..07cb7de 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid12.S ++++ b/libgcc/config/nds32/isr-library/vec_vid12.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.12, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_12 + .type _nds32_vector_12, @function + _nds32_vector_12: +diff --git a/libgcc/config/nds32/isr-library/vec_vid12_4b.S b/libgcc/config/nds32/isr-library/vec_vid12_4b.S +deleted file mode 100644 +index 52b7d23..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid12_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.12, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_12_4b +- .type _nds32_vector_12_4b, @function +-_nds32_vector_12_4b: +-1: +- j 1b +- .size _nds32_vector_12_4b, .-_nds32_vector_12_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid13.S b/libgcc/config/nds32/isr-library/vec_vid13.S +index 66014c3..5ac1a83 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid13.S ++++ b/libgcc/config/nds32/isr-library/vec_vid13.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.13, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_13 + .type _nds32_vector_13, @function + _nds32_vector_13: +diff --git a/libgcc/config/nds32/isr-library/vec_vid13_4b.S b/libgcc/config/nds32/isr-library/vec_vid13_4b.S +deleted file mode 100644 +index 59029ad..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid13_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.13, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_13_4b +- .type _nds32_vector_13_4b, @function +-_nds32_vector_13_4b: +-1: +- j 1b +- .size _nds32_vector_13_4b, .-_nds32_vector_13_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid14.S b/libgcc/config/nds32/isr-library/vec_vid14.S +index ca6f66f..5116f2f 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid14.S ++++ b/libgcc/config/nds32/isr-library/vec_vid14.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.14, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_14 + .type _nds32_vector_14, @function + _nds32_vector_14: +diff --git a/libgcc/config/nds32/isr-library/vec_vid14_4b.S b/libgcc/config/nds32/isr-library/vec_vid14_4b.S +deleted file mode 100644 +index 0d2afe4..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid14_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.14, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_14_4b +- .type _nds32_vector_14_4b, @function +-_nds32_vector_14_4b: +-1: +- j 1b +- .size _nds32_vector_14_4b, .-_nds32_vector_14_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid15.S b/libgcc/config/nds32/isr-library/vec_vid15.S +index c94b42a..03449c0 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid15.S ++++ b/libgcc/config/nds32/isr-library/vec_vid15.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.15, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_15 + .type _nds32_vector_15, @function + _nds32_vector_15: +diff --git a/libgcc/config/nds32/isr-library/vec_vid15_4b.S b/libgcc/config/nds32/isr-library/vec_vid15_4b.S +deleted file mode 100644 +index 60799d7..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid15_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.15, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_15_4b +- .type _nds32_vector_15_4b, @function +-_nds32_vector_15_4b: +-1: +- j 1b +- .size _nds32_vector_15_4b, .-_nds32_vector_15_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid16.S b/libgcc/config/nds32/isr-library/vec_vid16.S +index f19454d..b01d673 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid16.S ++++ b/libgcc/config/nds32/isr-library/vec_vid16.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.16, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_16 + .type _nds32_vector_16, @function + _nds32_vector_16: +diff --git a/libgcc/config/nds32/isr-library/vec_vid16_4b.S b/libgcc/config/nds32/isr-library/vec_vid16_4b.S +deleted file mode 100644 +index 6791204..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid16_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.16, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_16_4b +- .type _nds32_vector_16_4b, @function +-_nds32_vector_16_4b: +-1: +- j 1b +- .size _nds32_vector_16_4b, .-_nds32_vector_16_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid17.S b/libgcc/config/nds32/isr-library/vec_vid17.S +index 486a0aa..c6ed785 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid17.S ++++ b/libgcc/config/nds32/isr-library/vec_vid17.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.17, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_17 + .type _nds32_vector_17, @function + _nds32_vector_17: +diff --git a/libgcc/config/nds32/isr-library/vec_vid17_4b.S b/libgcc/config/nds32/isr-library/vec_vid17_4b.S +deleted file mode 100644 +index 04f4285..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid17_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.17, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_17_4b +- .type _nds32_vector_17_4b, @function +-_nds32_vector_17_4b: +-1: +- j 1b +- .size _nds32_vector_17_4b, .-_nds32_vector_17_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid18.S b/libgcc/config/nds32/isr-library/vec_vid18.S +index 137511f..e0e7b7e 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid18.S ++++ b/libgcc/config/nds32/isr-library/vec_vid18.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.18, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_18 + .type _nds32_vector_18, @function + _nds32_vector_18: +diff --git a/libgcc/config/nds32/isr-library/vec_vid18_4b.S b/libgcc/config/nds32/isr-library/vec_vid18_4b.S +deleted file mode 100644 +index 4d80192..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid18_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.18, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_18_4b +- .type _nds32_vector_18_4b, @function +-_nds32_vector_18_4b: +-1: +- j 1b +- .size _nds32_vector_18_4b, .-_nds32_vector_18_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid19.S b/libgcc/config/nds32/isr-library/vec_vid19.S +index 791e135..ef7075f 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid19.S ++++ b/libgcc/config/nds32/isr-library/vec_vid19.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.19, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_19 + .type _nds32_vector_19, @function + _nds32_vector_19: +diff --git a/libgcc/config/nds32/isr-library/vec_vid19_4b.S b/libgcc/config/nds32/isr-library/vec_vid19_4b.S +deleted file mode 100644 +index 87d4c7c..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid19_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.19, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_19_4b +- .type _nds32_vector_19_4b, @function +-_nds32_vector_19_4b: +-1: +- j 1b +- .size _nds32_vector_19_4b, .-_nds32_vector_19_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid20.S b/libgcc/config/nds32/isr-library/vec_vid20.S +index e7ab0e3..99bcf01 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid20.S ++++ b/libgcc/config/nds32/isr-library/vec_vid20.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.20, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_20 + .type _nds32_vector_20, @function + _nds32_vector_20: +diff --git a/libgcc/config/nds32/isr-library/vec_vid20_4b.S b/libgcc/config/nds32/isr-library/vec_vid20_4b.S +deleted file mode 100644 +index 308385a..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid20_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.20, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_20_4b +- .type _nds32_vector_20_4b, @function +-_nds32_vector_20_4b: +-1: +- j 1b +- .size _nds32_vector_20_4b, .-_nds32_vector_20_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid21.S b/libgcc/config/nds32/isr-library/vec_vid21.S +index 315ae56..8c66bef 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid21.S ++++ b/libgcc/config/nds32/isr-library/vec_vid21.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.21, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_21 + .type _nds32_vector_21, @function + _nds32_vector_21: +diff --git a/libgcc/config/nds32/isr-library/vec_vid21_4b.S b/libgcc/config/nds32/isr-library/vec_vid21_4b.S +deleted file mode 100644 +index 16cf02a..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid21_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.21, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_21_4b +- .type _nds32_vector_21_4b, @function +-_nds32_vector_21_4b: +-1: +- j 1b +- .size _nds32_vector_21_4b, .-_nds32_vector_21_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid22.S b/libgcc/config/nds32/isr-library/vec_vid22.S +index 6f9de85..5c442ce 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid22.S ++++ b/libgcc/config/nds32/isr-library/vec_vid22.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.22, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_22 + .type _nds32_vector_22, @function + _nds32_vector_22: +diff --git a/libgcc/config/nds32/isr-library/vec_vid22_4b.S b/libgcc/config/nds32/isr-library/vec_vid22_4b.S +deleted file mode 100644 +index 587ee7f..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid22_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.22, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_22_4b +- .type _nds32_vector_22_4b, @function +-_nds32_vector_22_4b: +-1: +- j 1b +- .size _nds32_vector_22_4b, .-_nds32_vector_22_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid23.S b/libgcc/config/nds32/isr-library/vec_vid23.S +index 956b585..c5d73df 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid23.S ++++ b/libgcc/config/nds32/isr-library/vec_vid23.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.23, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_23 + .type _nds32_vector_23, @function + _nds32_vector_23: +diff --git a/libgcc/config/nds32/isr-library/vec_vid23_4b.S b/libgcc/config/nds32/isr-library/vec_vid23_4b.S +deleted file mode 100644 +index 5e4b643..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid23_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.23, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_23_4b +- .type _nds32_vector_23_4b, @function +-_nds32_vector_23_4b: +-1: +- j 1b +- .size _nds32_vector_23_4b, .-_nds32_vector_23_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid24.S b/libgcc/config/nds32/isr-library/vec_vid24.S +index 57086e9..fe7dada 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid24.S ++++ b/libgcc/config/nds32/isr-library/vec_vid24.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.24, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_24 + .type _nds32_vector_24, @function + _nds32_vector_24: +diff --git a/libgcc/config/nds32/isr-library/vec_vid24_4b.S b/libgcc/config/nds32/isr-library/vec_vid24_4b.S +deleted file mode 100644 +index 43495f9..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid24_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.24, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_24_4b +- .type _nds32_vector_24_4b, @function +-_nds32_vector_24_4b: +-1: +- j 1b +- .size _nds32_vector_24_4b, .-_nds32_vector_24_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid25.S b/libgcc/config/nds32/isr-library/vec_vid25.S +index 61fa526..ada24e4 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid25.S ++++ b/libgcc/config/nds32/isr-library/vec_vid25.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.25, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_25 + .type _nds32_vector_25, @function + _nds32_vector_25: +diff --git a/libgcc/config/nds32/isr-library/vec_vid25_4b.S b/libgcc/config/nds32/isr-library/vec_vid25_4b.S +deleted file mode 100644 +index 1ce6cf3..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid25_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.25, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_25_4b +- .type _nds32_vector_25_4b, @function +-_nds32_vector_25_4b: +-1: +- j 1b +- .size _nds32_vector_25_4b, .-_nds32_vector_25_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid26.S b/libgcc/config/nds32/isr-library/vec_vid26.S +index 3d9191d..1f97945 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid26.S ++++ b/libgcc/config/nds32/isr-library/vec_vid26.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.26, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_26 + .type _nds32_vector_26, @function + _nds32_vector_26: +diff --git a/libgcc/config/nds32/isr-library/vec_vid26_4b.S b/libgcc/config/nds32/isr-library/vec_vid26_4b.S +deleted file mode 100644 +index 5803247..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid26_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.26, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_26_4b +- .type _nds32_vector_26_4b, @function +-_nds32_vector_26_4b: +-1: +- j 1b +- .size _nds32_vector_26_4b, .-_nds32_vector_26_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid27.S b/libgcc/config/nds32/isr-library/vec_vid27.S +index ff12cfb..f440a8b 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid27.S ++++ b/libgcc/config/nds32/isr-library/vec_vid27.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.27, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_27 + .type _nds32_vector_27, @function + _nds32_vector_27: +diff --git a/libgcc/config/nds32/isr-library/vec_vid27_4b.S b/libgcc/config/nds32/isr-library/vec_vid27_4b.S +deleted file mode 100644 +index d61e3f9..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid27_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.27, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_27_4b +- .type _nds32_vector_27_4b, @function +-_nds32_vector_27_4b: +-1: +- j 1b +- .size _nds32_vector_27_4b, .-_nds32_vector_27_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid28.S b/libgcc/config/nds32/isr-library/vec_vid28.S +index 6b7610e..e1621c7 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid28.S ++++ b/libgcc/config/nds32/isr-library/vec_vid28.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.28, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_28 + .type _nds32_vector_28, @function + _nds32_vector_28: +diff --git a/libgcc/config/nds32/isr-library/vec_vid28_4b.S b/libgcc/config/nds32/isr-library/vec_vid28_4b.S +deleted file mode 100644 +index a39d015..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid28_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.28, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_28_4b +- .type _nds32_vector_28_4b, @function +-_nds32_vector_28_4b: +-1: +- j 1b +- .size _nds32_vector_28_4b, .-_nds32_vector_28_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid29.S b/libgcc/config/nds32/isr-library/vec_vid29.S +index b995841..4fa29c1 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid29.S ++++ b/libgcc/config/nds32/isr-library/vec_vid29.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.29, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_29 + .type _nds32_vector_29, @function + _nds32_vector_29: +diff --git a/libgcc/config/nds32/isr-library/vec_vid29_4b.S b/libgcc/config/nds32/isr-library/vec_vid29_4b.S +deleted file mode 100644 +index 803f323..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid29_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.29, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_29_4b +- .type _nds32_vector_29_4b, @function +-_nds32_vector_29_4b: +-1: +- j 1b +- .size _nds32_vector_29_4b, .-_nds32_vector_29_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid30.S b/libgcc/config/nds32/isr-library/vec_vid30.S +index 57d1507..214e67b 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid30.S ++++ b/libgcc/config/nds32/isr-library/vec_vid30.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.30, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_30 + .type _nds32_vector_30, @function + _nds32_vector_30: +diff --git a/libgcc/config/nds32/isr-library/vec_vid30_4b.S b/libgcc/config/nds32/isr-library/vec_vid30_4b.S +deleted file mode 100644 +index a2a1e3e..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid30_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.30, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_30_4b +- .type _nds32_vector_30_4b, @function +-_nds32_vector_30_4b: +-1: +- j 1b +- .size _nds32_vector_30_4b, .-_nds32_vector_30_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid31.S b/libgcc/config/nds32/isr-library/vec_vid31.S +index f9aee4e..b758b8c 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid31.S ++++ b/libgcc/config/nds32/isr-library/vec_vid31.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.31, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_31 + .type _nds32_vector_31, @function + _nds32_vector_31: +diff --git a/libgcc/config/nds32/isr-library/vec_vid31_4b.S b/libgcc/config/nds32/isr-library/vec_vid31_4b.S +deleted file mode 100644 +index 989645f..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid31_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.31, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_31_4b +- .type _nds32_vector_31_4b, @function +-_nds32_vector_31_4b: +-1: +- j 1b +- .size _nds32_vector_31_4b, .-_nds32_vector_31_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid32.S b/libgcc/config/nds32/isr-library/vec_vid32.S +index fc26cad..58234d5 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid32.S ++++ b/libgcc/config/nds32/isr-library/vec_vid32.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.32, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_32 + .type _nds32_vector_32, @function + _nds32_vector_32: +diff --git a/libgcc/config/nds32/isr-library/vec_vid32_4b.S b/libgcc/config/nds32/isr-library/vec_vid32_4b.S +deleted file mode 100644 +index 1ac7e31..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid32_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.32, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_32_4b +- .type _nds32_vector_32_4b, @function +-_nds32_vector_32_4b: +-1: +- j 1b +- .size _nds32_vector_32_4b, .-_nds32_vector_32_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid33.S b/libgcc/config/nds32/isr-library/vec_vid33.S +index dd655e6..d920352 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid33.S ++++ b/libgcc/config/nds32/isr-library/vec_vid33.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.33, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_33 + .type _nds32_vector_33, @function + _nds32_vector_33: +diff --git a/libgcc/config/nds32/isr-library/vec_vid33_4b.S b/libgcc/config/nds32/isr-library/vec_vid33_4b.S +deleted file mode 100644 +index 3c99412..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid33_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.33, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_33_4b +- .type _nds32_vector_33_4b, @function +-_nds32_vector_33_4b: +-1: +- j 1b +- .size _nds32_vector_33_4b, .-_nds32_vector_33_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid34.S b/libgcc/config/nds32/isr-library/vec_vid34.S +index a6b8517..01999b4 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid34.S ++++ b/libgcc/config/nds32/isr-library/vec_vid34.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.34, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_34 + .type _nds32_vector_34, @function + _nds32_vector_34: +diff --git a/libgcc/config/nds32/isr-library/vec_vid34_4b.S b/libgcc/config/nds32/isr-library/vec_vid34_4b.S +deleted file mode 100644 +index 77c07b9..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid34_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.34, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_34_4b +- .type _nds32_vector_34_4b, @function +-_nds32_vector_34_4b: +-1: +- j 1b +- .size _nds32_vector_34_4b, .-_nds32_vector_34_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid35.S b/libgcc/config/nds32/isr-library/vec_vid35.S +index 65ceeab..7ab0536 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid35.S ++++ b/libgcc/config/nds32/isr-library/vec_vid35.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.35, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_35 + .type _nds32_vector_35, @function + _nds32_vector_35: +diff --git a/libgcc/config/nds32/isr-library/vec_vid35_4b.S b/libgcc/config/nds32/isr-library/vec_vid35_4b.S +deleted file mode 100644 +index 432873a..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid35_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.35, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_35_4b +- .type _nds32_vector_35_4b, @function +-_nds32_vector_35_4b: +-1: +- j 1b +- .size _nds32_vector_35_4b, .-_nds32_vector_35_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid36.S b/libgcc/config/nds32/isr-library/vec_vid36.S +index 688dbb9..5da079d 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid36.S ++++ b/libgcc/config/nds32/isr-library/vec_vid36.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.36, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_36 + .type _nds32_vector_36, @function + _nds32_vector_36: +diff --git a/libgcc/config/nds32/isr-library/vec_vid36_4b.S b/libgcc/config/nds32/isr-library/vec_vid36_4b.S +deleted file mode 100644 +index dadd381..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid36_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.36, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_36_4b +- .type _nds32_vector_36_4b, @function +-_nds32_vector_36_4b: +-1: +- j 1b +- .size _nds32_vector_36_4b, .-_nds32_vector_36_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid37.S b/libgcc/config/nds32/isr-library/vec_vid37.S +index 712bbe8..704d6b8 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid37.S ++++ b/libgcc/config/nds32/isr-library/vec_vid37.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.37, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_37 + .type _nds32_vector_37, @function + _nds32_vector_37: +diff --git a/libgcc/config/nds32/isr-library/vec_vid37_4b.S b/libgcc/config/nds32/isr-library/vec_vid37_4b.S +deleted file mode 100644 +index ec845e1..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid37_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.37, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_37_4b +- .type _nds32_vector_37_4b, @function +-_nds32_vector_37_4b: +-1: +- j 1b +- .size _nds32_vector_37_4b, .-_nds32_vector_37_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid38.S b/libgcc/config/nds32/isr-library/vec_vid38.S +index b6e4979..fdfc4a9 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid38.S ++++ b/libgcc/config/nds32/isr-library/vec_vid38.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.38, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_38 + .type _nds32_vector_38, @function + _nds32_vector_38: +diff --git a/libgcc/config/nds32/isr-library/vec_vid38_4b.S b/libgcc/config/nds32/isr-library/vec_vid38_4b.S +deleted file mode 100644 +index 84919ed..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid38_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.38, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_38_4b +- .type _nds32_vector_38_4b, @function +-_nds32_vector_38_4b: +-1: +- j 1b +- .size _nds32_vector_38_4b, .-_nds32_vector_38_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid39.S b/libgcc/config/nds32/isr-library/vec_vid39.S +index 2dee269..00dd245 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid39.S ++++ b/libgcc/config/nds32/isr-library/vec_vid39.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.39, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_39 + .type _nds32_vector_39, @function + _nds32_vector_39: +diff --git a/libgcc/config/nds32/isr-library/vec_vid39_4b.S b/libgcc/config/nds32/isr-library/vec_vid39_4b.S +deleted file mode 100644 +index 8f2f634..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid39_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.39, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_39_4b +- .type _nds32_vector_39_4b, @function +-_nds32_vector_39_4b: +-1: +- j 1b +- .size _nds32_vector_39_4b, .-_nds32_vector_39_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid40.S b/libgcc/config/nds32/isr-library/vec_vid40.S +index fe7508c..82b579f 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid40.S ++++ b/libgcc/config/nds32/isr-library/vec_vid40.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.40, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_40 + .type _nds32_vector_40, @function + _nds32_vector_40: +diff --git a/libgcc/config/nds32/isr-library/vec_vid40_4b.S b/libgcc/config/nds32/isr-library/vec_vid40_4b.S +deleted file mode 100644 +index 0aab8f4..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid40_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.40, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_40_4b +- .type _nds32_vector_40_4b, @function +-_nds32_vector_40_4b: +-1: +- j 1b +- .size _nds32_vector_40_4b, .-_nds32_vector_40_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid41.S b/libgcc/config/nds32/isr-library/vec_vid41.S +index 711fcd5..721c735 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid41.S ++++ b/libgcc/config/nds32/isr-library/vec_vid41.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.41, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_41 + .type _nds32_vector_41, @function + _nds32_vector_41: +diff --git a/libgcc/config/nds32/isr-library/vec_vid41_4b.S b/libgcc/config/nds32/isr-library/vec_vid41_4b.S +deleted file mode 100644 +index e8a8527..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid41_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.41, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_41_4b +- .type _nds32_vector_41_4b, @function +-_nds32_vector_41_4b: +-1: +- j 1b +- .size _nds32_vector_41_4b, .-_nds32_vector_41_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid42.S b/libgcc/config/nds32/isr-library/vec_vid42.S +index 0c6a849..307b51d 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid42.S ++++ b/libgcc/config/nds32/isr-library/vec_vid42.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.42, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_42 + .type _nds32_vector_42, @function + _nds32_vector_42: +diff --git a/libgcc/config/nds32/isr-library/vec_vid42_4b.S b/libgcc/config/nds32/isr-library/vec_vid42_4b.S +deleted file mode 100644 +index cfe184c..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid42_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.42, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_42_4b +- .type _nds32_vector_42_4b, @function +-_nds32_vector_42_4b: +-1: +- j 1b +- .size _nds32_vector_42_4b, .-_nds32_vector_42_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid43.S b/libgcc/config/nds32/isr-library/vec_vid43.S +index 2b4681a..c0ce02d 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid43.S ++++ b/libgcc/config/nds32/isr-library/vec_vid43.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.43, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_43 + .type _nds32_vector_43, @function + _nds32_vector_43: +diff --git a/libgcc/config/nds32/isr-library/vec_vid43_4b.S b/libgcc/config/nds32/isr-library/vec_vid43_4b.S +deleted file mode 100644 +index 3edd606..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid43_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.43, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_43_4b +- .type _nds32_vector_43_4b, @function +-_nds32_vector_43_4b: +-1: +- j 1b +- .size _nds32_vector_43_4b, .-_nds32_vector_43_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid44.S b/libgcc/config/nds32/isr-library/vec_vid44.S +index 232ef41..c2a384c 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid44.S ++++ b/libgcc/config/nds32/isr-library/vec_vid44.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.44, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_44 + .type _nds32_vector_44, @function + _nds32_vector_44: +diff --git a/libgcc/config/nds32/isr-library/vec_vid44_4b.S b/libgcc/config/nds32/isr-library/vec_vid44_4b.S +deleted file mode 100644 +index 0f2b8a3..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid44_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.44, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_44_4b +- .type _nds32_vector_44_4b, @function +-_nds32_vector_44_4b: +-1: +- j 1b +- .size _nds32_vector_44_4b, .-_nds32_vector_44_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid45.S b/libgcc/config/nds32/isr-library/vec_vid45.S +index e2f9863..e13c52b 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid45.S ++++ b/libgcc/config/nds32/isr-library/vec_vid45.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.45, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_45 + .type _nds32_vector_45, @function + _nds32_vector_45: +diff --git a/libgcc/config/nds32/isr-library/vec_vid45_4b.S b/libgcc/config/nds32/isr-library/vec_vid45_4b.S +deleted file mode 100644 +index 7358ec1..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid45_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.45, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_45_4b +- .type _nds32_vector_45_4b, @function +-_nds32_vector_45_4b: +-1: +- j 1b +- .size _nds32_vector_45_4b, .-_nds32_vector_45_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid46.S b/libgcc/config/nds32/isr-library/vec_vid46.S +index f3b93aa..71bfb53 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid46.S ++++ b/libgcc/config/nds32/isr-library/vec_vid46.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.46, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_46 + .type _nds32_vector_46, @function + _nds32_vector_46: +diff --git a/libgcc/config/nds32/isr-library/vec_vid46_4b.S b/libgcc/config/nds32/isr-library/vec_vid46_4b.S +deleted file mode 100644 +index 2782e86..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid46_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.46, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_46_4b +- .type _nds32_vector_46_4b, @function +-_nds32_vector_46_4b: +-1: +- j 1b +- .size _nds32_vector_46_4b, .-_nds32_vector_46_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid47.S b/libgcc/config/nds32/isr-library/vec_vid47.S +index 130c8d7..d1f2131 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid47.S ++++ b/libgcc/config/nds32/isr-library/vec_vid47.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.47, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_47 + .type _nds32_vector_47, @function + _nds32_vector_47: +diff --git a/libgcc/config/nds32/isr-library/vec_vid47_4b.S b/libgcc/config/nds32/isr-library/vec_vid47_4b.S +deleted file mode 100644 +index f237577..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid47_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.47, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_47_4b +- .type _nds32_vector_47_4b, @function +-_nds32_vector_47_4b: +-1: +- j 1b +- .size _nds32_vector_47_4b, .-_nds32_vector_47_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid48.S b/libgcc/config/nds32/isr-library/vec_vid48.S +index f3bca05..4ba5eb9 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid48.S ++++ b/libgcc/config/nds32/isr-library/vec_vid48.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.48, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_48 + .type _nds32_vector_48, @function + _nds32_vector_48: +diff --git a/libgcc/config/nds32/isr-library/vec_vid48_4b.S b/libgcc/config/nds32/isr-library/vec_vid48_4b.S +deleted file mode 100644 +index 3e35f68..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid48_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.48, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_48_4b +- .type _nds32_vector_48_4b, @function +-_nds32_vector_48_4b: +-1: +- j 1b +- .size _nds32_vector_48_4b, .-_nds32_vector_48_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid49.S b/libgcc/config/nds32/isr-library/vec_vid49.S +index 0b32691..dd3d35e 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid49.S ++++ b/libgcc/config/nds32/isr-library/vec_vid49.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.49, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_49 + .type _nds32_vector_49, @function + _nds32_vector_49: +diff --git a/libgcc/config/nds32/isr-library/vec_vid49_4b.S b/libgcc/config/nds32/isr-library/vec_vid49_4b.S +deleted file mode 100644 +index a510bbb..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid49_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.49, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_49_4b +- .type _nds32_vector_49_4b, @function +-_nds32_vector_49_4b: +-1: +- j 1b +- .size _nds32_vector_49_4b, .-_nds32_vector_49_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid50.S b/libgcc/config/nds32/isr-library/vec_vid50.S +index 48334feb..8f801ec 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid50.S ++++ b/libgcc/config/nds32/isr-library/vec_vid50.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.50, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_50 + .type _nds32_vector_50, @function + _nds32_vector_50: +diff --git a/libgcc/config/nds32/isr-library/vec_vid50_4b.S b/libgcc/config/nds32/isr-library/vec_vid50_4b.S +deleted file mode 100644 +index 1f42b73..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid50_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.50, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_50_4b +- .type _nds32_vector_50_4b, @function +-_nds32_vector_50_4b: +-1: +- j 1b +- .size _nds32_vector_50_4b, .-_nds32_vector_50_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid51.S b/libgcc/config/nds32/isr-library/vec_vid51.S +index 4c27f27..445abf9 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid51.S ++++ b/libgcc/config/nds32/isr-library/vec_vid51.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.51, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_51 + .type _nds32_vector_51, @function + _nds32_vector_51: +diff --git a/libgcc/config/nds32/isr-library/vec_vid51_4b.S b/libgcc/config/nds32/isr-library/vec_vid51_4b.S +deleted file mode 100644 +index 7bb8abe..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid51_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.51, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_51_4b +- .type _nds32_vector_51_4b, @function +-_nds32_vector_51_4b: +-1: +- j 1b +- .size _nds32_vector_51_4b, .-_nds32_vector_51_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid52.S b/libgcc/config/nds32/isr-library/vec_vid52.S +index 4c44811..7283975 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid52.S ++++ b/libgcc/config/nds32/isr-library/vec_vid52.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.52, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_52 + .type _nds32_vector_52, @function + _nds32_vector_52: +diff --git a/libgcc/config/nds32/isr-library/vec_vid52_4b.S b/libgcc/config/nds32/isr-library/vec_vid52_4b.S +deleted file mode 100644 +index 4cb89f6..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid52_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.52, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_52_4b +- .type _nds32_vector_52_4b, @function +-_nds32_vector_52_4b: +-1: +- j 1b +- .size _nds32_vector_52_4b, .-_nds32_vector_52_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid53.S b/libgcc/config/nds32/isr-library/vec_vid53.S +index 2882583..299c645 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid53.S ++++ b/libgcc/config/nds32/isr-library/vec_vid53.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.53, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_53 + .type _nds32_vector_53, @function + _nds32_vector_53: +diff --git a/libgcc/config/nds32/isr-library/vec_vid53_4b.S b/libgcc/config/nds32/isr-library/vec_vid53_4b.S +deleted file mode 100644 +index 9abc839..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid53_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.53, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_53_4b +- .type _nds32_vector_53_4b, @function +-_nds32_vector_53_4b: +-1: +- j 1b +- .size _nds32_vector_53_4b, .-_nds32_vector_53_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid54.S b/libgcc/config/nds32/isr-library/vec_vid54.S +index a014c72..ae99390 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid54.S ++++ b/libgcc/config/nds32/isr-library/vec_vid54.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.54, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_54 + .type _nds32_vector_54, @function + _nds32_vector_54: +diff --git a/libgcc/config/nds32/isr-library/vec_vid54_4b.S b/libgcc/config/nds32/isr-library/vec_vid54_4b.S +deleted file mode 100644 +index f736ba8..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid54_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.54, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_54_4b +- .type _nds32_vector_54_4b, @function +-_nds32_vector_54_4b: +-1: +- j 1b +- .size _nds32_vector_54_4b, .-_nds32_vector_54_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid55.S b/libgcc/config/nds32/isr-library/vec_vid55.S +index 44d820c..e75d24a 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid55.S ++++ b/libgcc/config/nds32/isr-library/vec_vid55.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.55, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_55 + .type _nds32_vector_55, @function + _nds32_vector_55: +diff --git a/libgcc/config/nds32/isr-library/vec_vid55_4b.S b/libgcc/config/nds32/isr-library/vec_vid55_4b.S +deleted file mode 100644 +index d09c665..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid55_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.55, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_55_4b +- .type _nds32_vector_55_4b, @function +-_nds32_vector_55_4b: +-1: +- j 1b +- .size _nds32_vector_55_4b, .-_nds32_vector_55_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid56.S b/libgcc/config/nds32/isr-library/vec_vid56.S +index d5cb362..cc4904e 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid56.S ++++ b/libgcc/config/nds32/isr-library/vec_vid56.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.56, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_56 + .type _nds32_vector_56, @function + _nds32_vector_56: +diff --git a/libgcc/config/nds32/isr-library/vec_vid56_4b.S b/libgcc/config/nds32/isr-library/vec_vid56_4b.S +deleted file mode 100644 +index 86b4103..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid56_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.56, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_56_4b +- .type _nds32_vector_56_4b, @function +-_nds32_vector_56_4b: +-1: +- j 1b +- .size _nds32_vector_56_4b, .-_nds32_vector_56_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid57.S b/libgcc/config/nds32/isr-library/vec_vid57.S +index 5fb3ce9..a17ed45 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid57.S ++++ b/libgcc/config/nds32/isr-library/vec_vid57.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.57, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_57 + .type _nds32_vector_57, @function + _nds32_vector_57: +diff --git a/libgcc/config/nds32/isr-library/vec_vid57_4b.S b/libgcc/config/nds32/isr-library/vec_vid57_4b.S +deleted file mode 100644 +index 45c5d29..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid57_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.57, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_57_4b +- .type _nds32_vector_57_4b, @function +-_nds32_vector_57_4b: +-1: +- j 1b +- .size _nds32_vector_57_4b, .-_nds32_vector_57_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid58.S b/libgcc/config/nds32/isr-library/vec_vid58.S +index d420d68..629bf1a 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid58.S ++++ b/libgcc/config/nds32/isr-library/vec_vid58.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.58, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_58 + .type _nds32_vector_58, @function + _nds32_vector_58: +diff --git a/libgcc/config/nds32/isr-library/vec_vid58_4b.S b/libgcc/config/nds32/isr-library/vec_vid58_4b.S +deleted file mode 100644 +index 812470c..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid58_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.58, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_58_4b +- .type _nds32_vector_58_4b, @function +-_nds32_vector_58_4b: +-1: +- j 1b +- .size _nds32_vector_58_4b, .-_nds32_vector_58_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid59.S b/libgcc/config/nds32/isr-library/vec_vid59.S +index 78a1885..540e02e 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid59.S ++++ b/libgcc/config/nds32/isr-library/vec_vid59.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.59, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_59 + .type _nds32_vector_59, @function + _nds32_vector_59: +diff --git a/libgcc/config/nds32/isr-library/vec_vid59_4b.S b/libgcc/config/nds32/isr-library/vec_vid59_4b.S +deleted file mode 100644 +index fa3a467..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid59_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.59, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_59_4b +- .type _nds32_vector_59_4b, @function +-_nds32_vector_59_4b: +-1: +- j 1b +- .size _nds32_vector_59_4b, .-_nds32_vector_59_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid60.S b/libgcc/config/nds32/isr-library/vec_vid60.S +index a6f704d..8658249 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid60.S ++++ b/libgcc/config/nds32/isr-library/vec_vid60.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.60, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_60 + .type _nds32_vector_60, @function + _nds32_vector_60: +diff --git a/libgcc/config/nds32/isr-library/vec_vid60_4b.S b/libgcc/config/nds32/isr-library/vec_vid60_4b.S +deleted file mode 100644 +index 505da2a..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid60_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.60, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_60_4b +- .type _nds32_vector_60_4b, @function +-_nds32_vector_60_4b: +-1: +- j 1b +- .size _nds32_vector_60_4b, .-_nds32_vector_60_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid61.S b/libgcc/config/nds32/isr-library/vec_vid61.S +index 4e79bde..376acb9 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid61.S ++++ b/libgcc/config/nds32/isr-library/vec_vid61.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.61, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_61 + .type _nds32_vector_61, @function + _nds32_vector_61: +diff --git a/libgcc/config/nds32/isr-library/vec_vid61_4b.S b/libgcc/config/nds32/isr-library/vec_vid61_4b.S +deleted file mode 100644 +index 9a0cce5..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid61_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.61, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_61_4b +- .type _nds32_vector_61_4b, @function +-_nds32_vector_61_4b: +-1: +- j 1b +- .size _nds32_vector_61_4b, .-_nds32_vector_61_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid62.S b/libgcc/config/nds32/isr-library/vec_vid62.S +index 5eef0a6..5ab06a8 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid62.S ++++ b/libgcc/config/nds32/isr-library/vec_vid62.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.62, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_62 + .type _nds32_vector_62, @function + _nds32_vector_62: +diff --git a/libgcc/config/nds32/isr-library/vec_vid62_4b.S b/libgcc/config/nds32/isr-library/vec_vid62_4b.S +deleted file mode 100644 +index da8ba28..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid62_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.62, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_62_4b +- .type _nds32_vector_62_4b, @function +-_nds32_vector_62_4b: +-1: +- j 1b +- .size _nds32_vector_62_4b, .-_nds32_vector_62_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid63.S b/libgcc/config/nds32/isr-library/vec_vid63.S +index 0a8c0ad..6646bcc 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid63.S ++++ b/libgcc/config/nds32/isr-library/vec_vid63.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.63, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_63 + .type _nds32_vector_63, @function + _nds32_vector_63: +diff --git a/libgcc/config/nds32/isr-library/vec_vid63_4b.S b/libgcc/config/nds32/isr-library/vec_vid63_4b.S +deleted file mode 100644 +index 8f1045e..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid63_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.63, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_63_4b +- .type _nds32_vector_63_4b, @function +-_nds32_vector_63_4b: +-1: +- j 1b +- .size _nds32_vector_63_4b, .-_nds32_vector_63_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid64.S b/libgcc/config/nds32/isr-library/vec_vid64.S +index b3f034b..f892aec 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid64.S ++++ b/libgcc/config/nds32/isr-library/vec_vid64.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.64, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_64 + .type _nds32_vector_64, @function + _nds32_vector_64: +diff --git a/libgcc/config/nds32/isr-library/vec_vid64_4b.S b/libgcc/config/nds32/isr-library/vec_vid64_4b.S +deleted file mode 100644 +index 81d9679..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid64_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.64, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_64_4b +- .type _nds32_vector_64_4b, @function +-_nds32_vector_64_4b: +-1: +- j 1b +- .size _nds32_vector_64_4b, .-_nds32_vector_64_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid65.S b/libgcc/config/nds32/isr-library/vec_vid65.S +index 72db454..03f79a5 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid65.S ++++ b/libgcc/config/nds32/isr-library/vec_vid65.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.65, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_65 + .type _nds32_vector_65, @function + _nds32_vector_65: +diff --git a/libgcc/config/nds32/isr-library/vec_vid65_4b.S b/libgcc/config/nds32/isr-library/vec_vid65_4b.S +deleted file mode 100644 +index aa9ad2b..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid65_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.65, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_65_4b +- .type _nds32_vector_65_4b, @function +-_nds32_vector_65_4b: +-1: +- j 1b +- .size _nds32_vector_65_4b, .-_nds32_vector_65_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid66.S b/libgcc/config/nds32/isr-library/vec_vid66.S +index 75469e7..ff805bd 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid66.S ++++ b/libgcc/config/nds32/isr-library/vec_vid66.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.66, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_66 + .type _nds32_vector_66, @function + _nds32_vector_66: +diff --git a/libgcc/config/nds32/isr-library/vec_vid66_4b.S b/libgcc/config/nds32/isr-library/vec_vid66_4b.S +deleted file mode 100644 +index 9830fe2..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid66_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.66, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_66_4b +- .type _nds32_vector_66_4b, @function +-_nds32_vector_66_4b: +-1: +- j 1b +- .size _nds32_vector_66_4b, .-_nds32_vector_66_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid67.S b/libgcc/config/nds32/isr-library/vec_vid67.S +index 4b076cd..f592aba 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid67.S ++++ b/libgcc/config/nds32/isr-library/vec_vid67.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.67, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_67 + .type _nds32_vector_67, @function + _nds32_vector_67: +diff --git a/libgcc/config/nds32/isr-library/vec_vid67_4b.S b/libgcc/config/nds32/isr-library/vec_vid67_4b.S +deleted file mode 100644 +index c7e31dd..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid67_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.67, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_67_4b +- .type _nds32_vector_67_4b, @function +-_nds32_vector_67_4b: +-1: +- j 1b +- .size _nds32_vector_67_4b, .-_nds32_vector_67_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid68.S b/libgcc/config/nds32/isr-library/vec_vid68.S +index 7df1cdd..ee2702a 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid68.S ++++ b/libgcc/config/nds32/isr-library/vec_vid68.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.68, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_68 + .type _nds32_vector_68, @function + _nds32_vector_68: +diff --git a/libgcc/config/nds32/isr-library/vec_vid68_4b.S b/libgcc/config/nds32/isr-library/vec_vid68_4b.S +deleted file mode 100644 +index 0d6fcb5..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid68_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.68, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_68_4b +- .type _nds32_vector_68_4b, @function +-_nds32_vector_68_4b: +-1: +- j 1b +- .size _nds32_vector_68_4b, .-_nds32_vector_68_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid69.S b/libgcc/config/nds32/isr-library/vec_vid69.S +index e30e5bf..c152015 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid69.S ++++ b/libgcc/config/nds32/isr-library/vec_vid69.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.69, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_69 + .type _nds32_vector_69, @function + _nds32_vector_69: +diff --git a/libgcc/config/nds32/isr-library/vec_vid69_4b.S b/libgcc/config/nds32/isr-library/vec_vid69_4b.S +deleted file mode 100644 +index 3508162..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid69_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.69, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_69_4b +- .type _nds32_vector_69_4b, @function +-_nds32_vector_69_4b: +-1: +- j 1b +- .size _nds32_vector_69_4b, .-_nds32_vector_69_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid70.S b/libgcc/config/nds32/isr-library/vec_vid70.S +index d436ac5..a3578d6 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid70.S ++++ b/libgcc/config/nds32/isr-library/vec_vid70.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.70, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_70 + .type _nds32_vector_70, @function + _nds32_vector_70: +diff --git a/libgcc/config/nds32/isr-library/vec_vid70_4b.S b/libgcc/config/nds32/isr-library/vec_vid70_4b.S +deleted file mode 100644 +index f3f0dd6..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid70_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.70, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_70_4b +- .type _nds32_vector_70_4b, @function +-_nds32_vector_70_4b: +-1: +- j 1b +- .size _nds32_vector_70_4b, .-_nds32_vector_70_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid71.S b/libgcc/config/nds32/isr-library/vec_vid71.S +index d7d7ab3..6790888 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid71.S ++++ b/libgcc/config/nds32/isr-library/vec_vid71.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.71, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_71 + .type _nds32_vector_71, @function + _nds32_vector_71: +diff --git a/libgcc/config/nds32/isr-library/vec_vid71_4b.S b/libgcc/config/nds32/isr-library/vec_vid71_4b.S +deleted file mode 100644 +index 505c79e..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid71_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.71, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_71_4b +- .type _nds32_vector_71_4b, @function +-_nds32_vector_71_4b: +-1: +- j 1b +- .size _nds32_vector_71_4b, .-_nds32_vector_71_4b +diff --git a/libgcc/config/nds32/isr-library/vec_vid72.S b/libgcc/config/nds32/isr-library/vec_vid72.S +index 08652d2..32984a0 100644 +--- a/libgcc/config/nds32/isr-library/vec_vid72.S ++++ b/libgcc/config/nds32/isr-library/vec_vid72.S +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.72, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_72 + .type _nds32_vector_72, @function + _nds32_vector_72: +diff --git a/libgcc/config/nds32/isr-library/vec_vid72_4b.S b/libgcc/config/nds32/isr-library/vec_vid72_4b.S +deleted file mode 100644 +index 1083c03..0000000 +--- a/libgcc/config/nds32/isr-library/vec_vid72_4b.S ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .nds32_vector.72, "ax" +- .vec_size 4 +- .align 2 +- .weak _nds32_vector_72_4b +- .type _nds32_vector_72_4b, @function +-_nds32_vector_72_4b: +-1: +- j 1b +- .size _nds32_vector_72_4b, .-_nds32_vector_72_4b +diff --git a/libgcc/config/nds32/lib1asmsrc-mculib.S b/libgcc/config/nds32/lib1asmsrc-mculib.S +deleted file mode 100644 +index bdbcd74..0000000 +--- a/libgcc/config/nds32/lib1asmsrc-mculib.S ++++ /dev/null +@@ -1,5213 +0,0 @@ +-/* mculib libgcc routines of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +- .section .mdebug.abi_nds32 +- .previous +- +- +-/* ------------------------------------------- */ +-/* FPBIT floating point operations for libgcc */ +-/* ------------------------------------------- */ +- +-#ifdef L_addsub_sf +- +- .text +- .align 2 +- .global __subsf3 +- .type __subsf3, @function +-__subsf3: +- push $lp +- pushm $r6, $r9 +- +- move $r2, #0x80000000 +- xor $r1, $r1, $r2 +- +- j .Lsfpadd +- +- .global __addsf3 +- .type __addsf3, @function +-__addsf3: +- push $lp +- pushm $r6, $r9 +-.Lsfpadd: +- srli $r5, $r0, #23 +- andi $r5, $r5, #0xff +- srli $r7, $r1, #23 +- andi $r7, $r7, #0xff +- move $r3, #0x80000000 +- slli $r4, $r0, #8 +- or $r4, $r4, $r3 +- slli $r6, $r1, #8 +- or $r6, $r6, $r3 +- +- addi $r9, $r5, #-1 +- slti $r15, $r9, #0xfe +- beqzs8 .LEspecA +- +-.LElab1: +- addi $r9, $r7, #-1 +- slti $r15, $r9, #0xfe +- beqzs8 .LEspecB +- +-.LElab2: +- sub $r8, $r5, $r7 +- sltsi $r15, $r8, #0 +- bnezs8 .Li1 +- sltsi $r15, $r8, #0x20 +- bnezs8 .Li2 +- move $r6, #2 +- j .Le1 +-.Li2: +- move $r2, $r6 +- srl $r6, $r6, $r8 +- sll $r9, $r6, $r8 +- beq $r9, $r2, .Le1 +- ori $r6, $r6, #2 +- j .Le1 +-.Li1: +- move $r5, $r7 +- subri $r8, $r8, #0 +- sltsi $r15, $r8, #0x20 +- bnezs8 .Li4 +- move $r4, #2 +- j .Le1 +-.Li4: +- move $r2, $r4 +- srl $r4, $r4, $r8 +- sll $r9, $r4, $r8 +- beq $r9, $r2, .Le1 +- ori $r4, $r4, #2 +- +-.Le1: +- and $r8, $r0, $r3 +- xor $r9, $r8, $r1 +- sltsi $r15, $r9, #0 +- bnezs8 .LEsub1 +- +- #ADD($r4, $r6) +- add $r4, $r4, $r6 +- slt $r15, $r4, $r6 +- beqzs8 .LEres +- andi $r9, $r4, #1 +- beqz $r9, .Li7 +- ori $r4, $r4, #2 +-.Li7: +- srli $r4, $r4, #1 +- addi $r5, $r5, #1 +- subri $r15, $r5, #0xff +- bnezs8 .LEres +- move $r4, #0 +- j .LEres +- +-.LEsub1: +- #SUB($r4, $r6) +- move $r15, $r4 +- sub $r4, $r4, $r6 +- slt $r15, $r15, $r4 +- beqzs8 .Li9 +- subri $r4, $r4, #0 +- xor $r8, $r8, $r3 +- j .Le9 +-.Li9: +- beqz $r4, .LEzer +-.Le9: +-#ifdef __NDS32_PERF_EXT__ +- clz $r2, $r4 +-#else +- pushm $r0, $r1 +- pushm $r3, $r5 +- move $r0, $r4 +- bal __clzsi2 +- move $r2, $r0 +- popm $r3, $r5 +- popm $r0, $r1 +-#endif +- sub $r5, $r5, $r2 +- sll $r4, $r4, $r2 +- +-.LEres: +- blez $r5, .LEund +- +-.LElab12: +- #ADD($r4, $0x80) +- move $r15, #0x80 +- add $r4, $r4, $r15 +- slt $r15, $r4, $r15 +- +- #ADDC($r5, $0x0) +- add $r5, $r5, $r15 +- srli $r9, $r4, #8 +- andi $r9, $r9, #1 +- sub $r4, $r4, $r9 +- slli $r4, $r4, #1 +- srli $r4, $r4, #9 +- slli $r9, $r5, #23 +- or $r4, $r4, $r9 +- or $r0, $r4, $r8 +- +-.LE999: +- popm $r6, $r9 +- pop $lp +- ret5 $lp +- +-.LEund: +- subri $r2, $r5, #1 +- slti $r15, $r2, #0x20 +- beqzs8 .LEzer +- move $r9, #0x80000000 +- or $r4, $r4, $r9 +- subri $r9, $r2, #0x20 +- sll $r5, $r4, $r9 +- srl $r4, $r4, $r2 +- beqz $r5, .Li10 +- ori $r4, $r4, #1 +-.Li10: +- move $r5, #0 +- addi $r9, $r4, #0x80 +- sltsi $r15, $r9, #0 +- beqzs8 .LElab12 +- move $r5, #1 +- j .LElab12 +- +-.LEspecA: +- bnez $r5, .Li12 +- add $r4, $r4, $r4 +- beqz $r4, .Li13 +-#ifdef __NDS32_PERF_EXT__ +- clz $r8, $r4 +-#else +- pushm $r0, $r5 +- move $r0, $r4 +- bal __clzsi2 +- move $r8, $r0 +- popm $r0, $r5 +-#endif +- sub $r5, $r5, $r8 +- sll $r4, $r4, $r8 +- j .LElab1 +-.Li13: +- subri $r15, $r7, #0xff +- beqzs8 .LEspecB +- move $r9, #0x80000000 +- bne $r1, $r9, .LEretB +-.Li12: +- add $r9, $r4, $r4 +- bnez $r9, .LEnan +- subri $r15, $r7, #0xff +- bnezs8 .LEretA +- xor $r9, $r0, $r1 +- sltsi $r15, $r9, #0 +- bnezs8 .LEnan +- j .LEretB +- +-.LEspecB: +- bnez $r7, .Li15 +- add $r6, $r6, $r6 +- beqz $r6, .LEretA +-#ifdef __NDS32_PERF_EXT__ +- clz $r8, $r6 +-#else +- pushm $r0, $r5 +- move $r0, $r6 +- bal __clzsi2 +- move $r8, $r0 +- popm $r0, $r5 +-#endif +- sub $r7, $r7, $r8 +- sll $r6, $r6, $r8 +- j .LElab2 +-.Li15: +- add $r9, $r6, $r6 +- bnez $r9, .LEnan +- +-.LEretB: +- move $r0, $r1 +- j .LE999 +- +-.LEretA: +- j .LE999 +- +-.LEzer: +- move $r0, #0 +- j .LE999 +- +-.LEnan: +- move $r0, #0xffc00000 +- j .LE999 +- .size __subsf3, .-__subsf3 +- .size __addsf3, .-__addsf3 +-#endif /* L_addsub_sf */ +- +- +- +-#ifdef L_sf_to_si +- +- .text +- .align 2 +- .global __fixsfsi +- .type __fixsfsi, @function +-__fixsfsi: +- push $lp +- +- slli $r1, $r0, #8 +- move $r3, #0x80000000 +- or $r1, $r1, $r3 +- srli $r3, $r0, #23 +- andi $r3, $r3, #0xff +- subri $r2, $r3, #0x9e +- blez $r2, .LJspec +- sltsi $r15, $r2, #0x20 +- bnezs8 .Li42 +- move $r0, #0 +- j .LJ999 +-.Li42: +- srl $r1, $r1, $r2 +- sltsi $r15, $r0, #0 +- beqzs8 .Li43 +- subri $r1, $r1, #0 +-.Li43: +- move $r0, $r1 +- +-.LJ999: +- pop $lp +- ret5 $lp +- +-.LJspec: +- move $r3, #0x7f800000 +- slt $r15, $r3, $r0 +- beqzs8 .Li44 +- move $r0, #0x80000000 +- j .LJ999 +-.Li44: +- move $r0, #0x7fffffff +- j .LJ999 +- .size __fixsfsi, .-__fixsfsi +-#endif /* L_sf_to_si */ +- +- +- +-#ifdef L_divsi3 +- +- .text +- .align 2 +- .globl __divsi3 +- .type __divsi3, @function +-__divsi3: +- ! --------------------------------------------------------------------- +- ! neg = 0; +- ! if (a < 0) +- ! { a = -a; +- ! neg = !neg; +- ! } +- ! --------------------------------------------------------------------- +- sltsi $r5, $r0, 0 ! $r5 <- neg = (a < 0) ? 1 : 0 +- subri $r4, $r0, 0 ! $r4 <- a = -a +- cmovn $r0, $r4, $r5 ! $r0 <- a = neg ? -a : a +-.L2: +- ! --------------------------------------------------------------------- +- ! if (b < 0) +- ! --------------------------------------------------------------------- +- bgez $r1, .L3 ! if b >= 0, skip +- ! --------------------------------------------------------------------- +- ! { b=-b; +- ! neg=!neg; +- ! } +- ! --------------------------------------------------------------------- +- subri $r1, $r1, 0 ! $r1 <- b = -b +- subri $r5, $r5, 1 ! $r5 <- neg = !neg +-.L3: +- ! --------------------------------------------------------------------- +- !!res = udivmodsi4 (a, b, 1); +- ! res = 0; +- ! if (den != 0) +- ! --------------------------------------------------------------------- +- movi $r2, 0 ! $r2 <- res = 0 +- beqz $r1, .L1 ! if den == 0, skip +- ! --------------------------------------------------------------------- +- ! bit = 1; +- ! --------------------------------------------------------------------- +- movi $r4, 1 ! $r4 <- bit = 1 +-#ifndef __OPTIMIZE_SIZE__ +-.L6: +-#endif +- ! --------------------------------------------------------------------- +- ! while (den < num && bit && !(den & (1L << 31))) +- ! --------------------------------------------------------------------- +- slt $ta, $r1, $r0 ! $ta <- den < num ? +- beqz $ta, .L5 ! if no, skip +- ! --------------------------------------------------------------------- +- ! { den << = 1; +- ! bit << = 1; +- ! } +- ! --------------------------------------------------------------------- +-#if defined (__OPTIMIZE_SIZE__) && !defined (__NDS32_ISA_V3M__) +- clz $r3, $r1 ! $r3 <- leading zero count for den +- clz $ta, $r0 ! $ta <- leading zero count for num +- sub $r3, $r3, $ta ! $r3 <- number of bits to shift +- sll $r1, $r1, $r3 ! $r1 <- den +- sll $r4, $r4, $r3 ! $r2 <- bit +-#else +- slli $r1, $r1, 1 ! $r1 <- den << = 1 +- slli $r4, $r4, 1 ! $r4 <- bit << = 1 +- b .L6 ! continue loop +-#endif +-.L5: +- ! --------------------------------------------------------------------- +- ! while (bit) +- ! { if (num >= den) +- ! --------------------------------------------------------------------- +- slt $ta, $r0, $r1 ! $ta <- num < den ? +- bnez $ta, .L9 ! if yes, skip +- ! --------------------------------------------------------------------- +- ! { num -= den; +- ! res |= bit; +- ! } +- ! --------------------------------------------------------------------- +- sub $r0, $r0, $r1 ! $r0 <- num -= den +- or $r2, $r2, $r4 ! $r2 <- res |= bit +-.L9: +- ! --------------------------------------------------------------------- +- ! bit >> = 1; +- ! den >> = 1; +- ! } +- !!if (modwanted) +- !! return num; +- !!return res; +- ! --------------------------------------------------------------------- +- srli $r4, $r4, 1 ! $r4 <- bit >> = 1 +- srli $r1, $r1, 1 ! $r1 <- den >> = 1 +- bnez $r4, .L5 ! if bit != 0, continue loop +-.L1: +- ! --------------------------------------------------------------------- +- ! if (neg) +- ! res = -res; +- ! return res; +- ! --------------------------------------------------------------------- +- subri $r0, $r2, 0 ! $r0 <- -res +- cmovz $r0, $r2, $r5 ! $r0 <- neg ? -res : res +- ! --------------------------------------------------------------------- +- ret +- .size __divsi3, .-__divsi3 +-#endif /* L_divsi3 */ +- +- +- +-#ifdef L_divdi3 +- +- !-------------------------------------- +- #ifdef __big_endian__ +- #define V1H $r0 +- #define V1L $r1 +- #define V2H $r2 +- #define V2L $r3 +- #else +- #define V1H $r1 +- #define V1L $r0 +- #define V2H $r3 +- #define V2L $r2 +- #endif +- !-------------------------------------- +- .text +- .align 2 +- .globl __divdi3 +- .type __divdi3, @function +-__divdi3: +- ! prologue +-#ifdef __NDS32_ISA_V3M__ +- push25 $r10, 0 +-#else +- smw.adm $r6, [$sp], $r10, 2 +-#endif +- ! end of prologue +- move $r8, V1L +- move $r9, V1H +- move $r6, V2L +- move $r7, V2H +- movi $r10, 0 +- bgez V1H, .L80 +- bal __negdi2 +- move $r8, V1L +- move $r9, V1H +- movi $r10, -1 +-.L80: +- bgez $r7, .L81 +- move V1L, $r6 +- move V1H, $r7 +- bal __negdi2 +- move $r6, V1L +- move $r7, V1H +- nor $r10, $r10, $r10 +-.L81: +- move V2L, $r6 +- move V2H, $r7 +- move V1L, $r8 +- move V1H, $r9 +- movi $r4, 0 +- bal __udivmoddi4 +- beqz $r10, .L82 +- bal __negdi2 +-.L82: +- ! epilogue +-#ifdef __NDS32_ISA_V3M__ +- pop25 $r10, 0 +-#else +- lmw.bim $r6, [$sp], $r10, 2 +- ret +-#endif +- .size __divdi3, .-__divdi3 +-#endif /* L_divdi3 */ +- +- +- +-#ifdef L_modsi3 +- +- .text +- .align 2 +- .globl __modsi3 +- .type __modsi3, @function +-__modsi3: +- ! --------------------------------------------------------------------- +- ! neg=0; +- ! if (a<0) +- ! { a=-a; +- ! neg=1; +- ! } +- ! --------------------------------------------------------------------- +- sltsi $r5, $r0, 0 ! $r5 <- neg < 0 ? 1 : 0 +- subri $r4, $r0, 0 ! $r4 <- -a +- cmovn $r0, $r4, $r5 ! $r0 <- |a| +- ! --------------------------------------------------------------------- +- ! if (b < 0) +-#ifndef __NDS32_PERF_EXT__ +- ! --------------------------------------------------------------------- +- bgez $r1, .L3 ! if b >= 0, skip +- ! --------------------------------------------------------------------- +- ! b = -b; +- ! --------------------------------------------------------------------- +- subri $r1, $r1, 0 ! $r1 <- |b| +-.L3: +- ! --------------------------------------------------------------------- +- !!res = udivmodsi4 (a, b, 1); +- ! if (den != 0) +- ! --------------------------------------------------------------------- +-#else /* __NDS32_PERF_EXT__ */ +- ! b = -b; +- !!res = udivmodsi4 (a, b, 1); +- ! if (den != 0) +- ! --------------------------------------------------------------------- +- abs $r1, $r1 ! $r1 <- |b| +-#endif /* __NDS32_PERF_EXT__ */ +- beqz $r1, .L1 ! if den == 0, skip +- ! --------------------------------------------------------------------- +- ! { bit = 1; +- ! res = 0; +- ! --------------------------------------------------------------------- +- movi $r4, 1 ! $r4 <- bit = 1 +-#ifndef __OPTIMIZE_SIZE__ +-.L6: +-#endif +- ! --------------------------------------------------------------------- +- ! while (den < num&&bit && !(den & (1L << 31))) +- ! --------------------------------------------------------------------- +- slt $ta, $r1, $r0 ! $ta <- den < num ? +- beqz $ta, .L5 ! if no, skip +- ! --------------------------------------------------------------------- +- ! { den << = 1; +- ! bit << = 1; +- ! } +- ! --------------------------------------------------------------------- +-#if defined (__OPTIMIZE_SIZE__) && ! defined (__NDS32_ISA_V3M__) +- clz $r3, $r1 ! $r3 <- leading zero count for den +- clz $ta, $r0 ! $ta <- leading zero count for num +- sub $r3, $r3, $ta ! $r3 <- number of bits to shift +- sll $r1, $r1, $r3 ! $r1 <- den +- sll $r4, $r4, $r3 ! $r2 <- bit +-#else +- slli $r1, $r1, 1 ! $r1 <- den << = 1 +- slli $r4, $r4, 1 ! $r4 <- bit << = 1 +- b .L6 ! continue loop +-#endif +-.L5: +- ! --------------------------------------------------------------------- +- ! while (bit) +- ! { if (num >= den) +- ! { num -= den; +- ! res |= bit; +- ! } +- ! bit >> = 1; +- ! den >> = 1; +- ! } +- ! } +- !!if (modwanted) +- !! return num; +- !!return res; +- ! --------------------------------------------------------------------- +- sub $r2, $r0, $r1 ! $r2 <- num - den +- slt $ta, $r0, $r1 ! $ta <- num < den ? +- srli $r4, $r4, 1 ! $r4 <- bit >> = 1 +- cmovz $r0, $r2, $ta ! $r0 <- num = (num < den) ? num : num - den +- srli $r1, $r1, 1 ! $r1 <- den >> = 1 +- bnez $r4, .L5 ! if bit != 0, continue loop +-.L1: +- ! --------------------------------------------------------------------- +- ! if (neg) +- ! res = -res; +- ! return res; +- ! --------------------------------------------------------------------- +- subri $r3, $r0, 0 ! $r3 <- -res +- cmovn $r0, $r3, $r5 ! $r0 <- neg ? -res : res +- ! --------------------------------------------------------------------- +- ret +- .size __modsi3, .-__modsi3 +-#endif /* L_modsi3 */ +- +- +- +-#ifdef L_moddi3 +- +- !-------------------------------------- +- #ifdef __big_endian__ +- #define V1H $r0 +- #define V1L $r1 +- #define V2H $r2 +- #define V2L $r3 +- #else +- #define V1H $r1 +- #define V1L $r0 +- #define V2H $r3 +- #define V2L $r2 +- #endif +- !-------------------------------------- +- .text +- .align 2 +- .globl __moddi3 +- .type __moddi3, @function +-__moddi3: +- ! ===================================================================== +- ! stack allocation: +- ! sp+32 +-----------------------+ +- ! | $lp | +- ! sp+28 +-----------------------+ +- ! | $r6 - $r10 | +- ! sp+8 +-----------------------+ +- ! | | +- ! sp+4 +-----------------------+ +- ! | | +- ! sp +-----------------------+ +- ! ===================================================================== +- ! prologue +-#ifdef __NDS32_ISA_V3M__ +- push25 $r10, 8 +-#else +- smw.adm $r6, [$sp], $r10, 2 +- addi $sp, $sp, -8 +-#endif +- ! end of prologue +- !------------------------------------------ +- ! __moddi3 (DWtype u, DWtype v) +- ! { +- ! word_type c = 0; +- ! DWunion uu = {.ll = u}; +- ! DWunion vv = {.ll = v}; +- ! DWtype w; +- ! if (uu.s.high < 0) +- ! c = ~c, +- ! uu.ll = -uu.ll; +- !--------------------------------------------- +- move $r8, V1L +- move $r9, V1H +- move $r6, V2L +- move $r7, V2H +- movi $r10, 0 ! r10 = c = 0 +- bgez V1H, .L80 ! if u > 0 , go L80 +- bal __negdi2 +- move $r8, V1L +- move $r9, V1H +- movi $r10, -1 ! r10 = c = ~c +- !------------------------------------------------ +- ! if (vv.s.high < 0) +- ! vv.ll = -vv.ll; +- !---------------------------------------------- +-.L80: +- bgez $r7, .L81 ! if v > 0 , go L81 +- move V1L, $r6 +- move V1H, $r7 +- bal __negdi2 +- move $r6, V1L +- move $r7, V1H +- !------------------------------------------ +- ! (void) __udivmoddi4 (uu.ll, vv.ll, &w); +- ! if (c) +- ! w = -w; +- ! return w; +- !----------------------------------------- +-.L81: +- move V2L, $r6 +- move V2H, $r7 +- move V1L, $r8 +- move V1H, $r9 +- addi $r4, $sp, 0 +- bal __udivmoddi4 +- lwi $r0, [$sp+(0)] ! le: sp + 0 is low, be: sp + 0 is high +- lwi $r1, [$sp+(4)] ! le: sp + 4 is low, be: sp + 4 is high +- beqz $r10, .L82 +- bal __negdi2 +-.L82: +- ! epilogue +-#ifdef __NDS32_ISA_V3M__ +- pop25 $r10, 8 +-#else +- addi $sp, $sp, 8 +- lmw.bim $r6, [$sp], $r10, 2 +- ret +-#endif +- .size __moddi3, .-__moddi3 +-#endif /* L_moddi3 */ +- +- +- +-#ifdef L_mulsi3 +- +- .text +- .align 2 +- .globl __mulsi3 +- .type __mulsi3, @function +-__mulsi3: +- ! --------------------------------------------------------------------- +- ! r = 0; +- ! while (a) +- ! $r0: r +- ! $r1: b +- ! $r2: a +- ! --------------------------------------------------------------------- +- beqz $r0, .L7 ! if a == 0, done +- move $r2, $r0 ! $r2 <- a +- movi $r0, 0 ! $r0 <- r <- 0 +-.L8: +- ! --------------------------------------------------------------------- +- ! { if (a & 1) +- ! r += b; +- ! a >> = 1; +- ! b << = 1; +- ! } +- ! $r0: r +- ! $r1: b +- ! $r2: a +- ! $r3: scratch +- ! $r4: scratch +- ! --------------------------------------------------------------------- +- andi $r3, $r2, 1 ! $r3 <- a & 1 +- add $r4, $r0, $r1 ! $r4 <- r += b +- cmovn $r0, $r4, $r3 ! $r0 <- r +- srli $r2, $r2, 1 ! $r2 <- a >> = 1 +- slli $r1, $r1, 1 ! $r1 <- b << = 1 +- bnez $r2, .L8 ! if a != 0, continue loop +-.L7: +- ! --------------------------------------------------------------------- +- ! $r0: return code +- ! --------------------------------------------------------------------- +- ret +- .size __mulsi3, .-__mulsi3 +-#endif /* L_mulsi3 */ +- +- +- +-#ifdef L_udivsi3 +- +- .text +- .align 2 +- .globl __udivsi3 +- .type __udivsi3, @function +-__udivsi3: +- ! --------------------------------------------------------------------- +- !!res=udivmodsi4(a,b,0); +- ! res=0; +- ! if (den!=0) +- ! --------------------------------------------------------------------- +- movi $r2, 0 ! $r2 <- res=0 +- beqz $r1, .L1 ! if den==0, skip +- ! --------------------------------------------------------------------- +- ! { bit=1; +- ! --------------------------------------------------------------------- +- movi $r4, 1 ! $r4 <- bit=1 +-#ifndef __OPTIMIZE_SIZE__ +-.L6: +-#endif +- ! --------------------------------------------------------------------- +- ! while (den<num +- ! --------------------------------------------------------------------- +- slt $ta, $r1, $r0 ! $ta <- den<num? +- beqz $ta, .L5 ! if no, skip +- ! --------------------------------------------------------------------- +- ! &&bit&&!(den&(1L<<31))) +- ! --------------------------------------------------------------------- +- bltz $r1, .L5 ! if den<0, skip +- ! --------------------------------------------------------------------- +- ! { den<<=1; +- ! bit<<=1; +- ! } +- ! --------------------------------------------------------------------- +-#if defined (__OPTIMIZE_SIZE__) && ! defined (__NDS32_ISA_V3M__) +- clz $r3, $r1 ! $r3 <- leading zero count for den +- clz $ta, $r0 ! $ta <- leading zero count for num +- sub $r3, $r3, $ta ! $r3 <- number of bits to shift +- sll $r1, $r1, $r3 ! $r1 <- den +- sll $r2, $r2, $r3 ! $r2 <- bit +-#else +- slli $r1, $r1, 1 ! $r1 <- den<<=1 +- slli $r4, $r4, 1 ! $r4 <- bit<<=1 +- b .L6 ! continue loop +-#endif +-.L5: +- ! --------------------------------------------------------------------- +- ! while (bit) +- ! { if (num>=den) +- ! --------------------------------------------------------------------- +- slt $ta, $r0, $r1 ! $ta <- num<den? +- bnez $ta, .L9 ! if yes, skip +- ! --------------------------------------------------------------------- +- ! { num-=den; +- ! res|=bit; +- ! } +- ! --------------------------------------------------------------------- +- sub $r0, $r0, $r1 ! $r0 <- num-=den +- or $r2, $r2, $r4 ! $r2 <- res|=bit +-.L9: +- ! --------------------------------------------------------------------- +- ! bit>>=1; +- ! den>>=1; +- ! } +- ! } +- !!if (modwanted) +- !! return num; +- !!return res; +- ! --------------------------------------------------------------------- +- srli $r4, $r4, 1 ! $r4 <- bit>>=1 +- srli $r1, $r1, 1 ! $r1 <- den>>=1 +- bnez $r4, .L5 ! if bit!=0, continue loop +-.L1: +- ! --------------------------------------------------------------------- +- ! return res; +- ! --------------------------------------------------------------------- +- move $r0, $r2 ! $r0 <- return value +- ! --------------------------------------------------------------------- +- ! --------------------------------------------------------------------- +- ret +- .size __udivsi3, .-__udivsi3 +-#endif /* L_udivsi3 */ +- +- +- +-#ifdef L_udivdi3 +- +- !-------------------------------------- +- #ifdef __big_endian__ +- #define V1H $r0 +- #define V1L $r1 +- #define V2H $r2 +- #define V2L $r3 +- #else +- #define V1H $r1 +- #define V1L $r0 +- #define V2H $r3 +- #define V2L $r2 +- #endif +- !-------------------------------------- +- +- .text +- .align 2 +- .globl __udivdi3 +- .type __udivdi3, @function +-__udivdi3: +- ! prologue +-#ifdef __NDS32_ISA_V3M__ +- push25 $r8, 0 +-#else +- smw.adm $r6, [$sp], $r8, 2 +-#endif +- ! end of prologue +- movi $r4, 0 +- bal __udivmoddi4 +- ! epilogue +-#ifdef __NDS32_ISA_V3M__ +- pop25 $r8, 0 +-#else +- lmw.bim $r6, [$sp], $r8, 2 +- ret +-#endif +- .size __udivdi3, .-__udivdi3 +-#endif /* L_udivdi3 */ +- +- +- +-#ifdef L_udivmoddi4 +- +- .text +- .align 2 +- .globl fudiv_qrnnd +- .type fudiv_qrnnd, @function +- #ifdef __big_endian__ +- #define P1H $r0 +- #define P1L $r1 +- #define P2H $r2 +- #define P2L $r3 +- #define W6H $r4 +- #define W6L $r5 +- #define OFFSET_L 4 +- #define OFFSET_H 0 +- #else +- #define P1H $r1 +- #define P1L $r0 +- #define P2H $r3 +- #define P2L $r2 +- #define W6H $r5 +- #define W6L $r4 +- #define OFFSET_L 0 +- #define OFFSET_H 4 +- #endif +-fudiv_qrnnd: +- !------------------------------------------------------ +- ! function: fudiv_qrnnd(quotient, remainder, high_numerator, low_numerator, denominator) +- ! divides a UDWtype, composed by the UWtype integers,HIGH_NUMERATOR (from $r4) +- ! and LOW_NUMERATOR(from $r5) by DENOMINATOR(from $r6), and places the quotient +- ! in $r7 and the remainder in $r8. +- !------------------------------------------------------ +- ! in reg:$r4(n1), $r5(n0), $r6(d0) +- ! __d1 = ((USItype) (d) >> ((4 * 8) / 2)); +- ! __d0 = ((USItype) (d) & (((USItype) 1 << ((4 * 8) / 2)) - 1)); +- ! __r1 = (n1) % __d1; +- ! __q1 = (n1) / __d1; +- ! __m = (USItype) __q1 * __d0; +- ! __r1 = __r1 * ((USItype) 1 << ((4 * 8) / 2)) | ((USItype) (n0) >> ((4 * 8) / 2)); +- ! if (__r1 < __m) +- ! { +- !------------------------------------------------------ +- smw.adm $r0, [$sp], $r4, 2 ! store $lp, when use BASELINE_V1,and must store $r0-$r3 +- srli $r7, $r6, 16 ! $r7 = d1 =__ll_highpart (d) +- movi $ta, 65535 +- and $r8, $r6, $ta ! $r8 = d0 = __ll_lowpart (d) +- +- divr $r9, $r10, $r4, $r7 ! $r9 = q1, $r10 = r1 +- and $r4, $r5, $ta ! $r4 = __ll_lowpart (n0) +- slli $r10, $r10, 16 ! $r10 = r1 << 16 +- srli $ta, $r5, 16 ! $ta = __ll_highpart (n0) +- +- or $r10, $r10, $ta ! $r10 <- $r0|$r3=__r1 +- mul $r5, $r9, $r8 ! $r5 = m = __q1*__d0 +- slt $ta, $r10, $r5 ! $ta <- __r1<__m +- beqz $ta, .L2 !if yes,skip +- !------------------------------------------------------ +- ! __q1--, __r1 += (d); +- ! if (__r1 >= (d)) +- ! { +- !------------------------------------------------------ +- +- add $r10, $r10, $r6 !$r10 <- __r1+d=__r1 +- addi $r9, $r9, -1 !$r9 <- __q1--=__q1 +- slt $ta, $r10, $r6 !$ta <- __r1<d +- bnez $ta, .L2 !if yes,skip +- !------------------------------------------------------ +- ! if (__r1 < __m) +- ! { +- !------------------------------------------------------ +- +- slt $ta, $r10, $r5 !$ta <- __r1<__m +- beqz $ta, .L2 !if yes,skip +- !------------------------------------------------------ +- ! __q1--, __r1 += (d); +- ! } +- ! } +- ! } +- !------------------------------------------------------ +- +- addi $r9, $r9, -1 !$r9 <- __q1--=__q1 +- add $r10, $r10, $r6 !$r2 <- __r1+d=__r1 +-.L2: +- !------------------------------------------------------ +- ! __r1 -= __m; +- ! __r0 = __r1 % __d1; +- ! __q0 = __r1 / __d1; +- ! __m = (USItype) __q0 * __d0; +- ! __r0 = __r0 * ((USItype) 1 << ((4 * 8) / 2)) \ +- ! | ((USItype) (n0) & (((USItype) 1 << ((4 * 8) / 2)) - 1)); +- ! if (__r0 < __m) +- ! { +- !------------------------------------------------------ +- sub $r10, $r10, $r5 !$r10 <- __r1-__m=__r1 +- divr $r7, $r10, $r10, $r7 !$r7 <- r1/__d1=__q0,$r10 <- r1%__d1=__r0 +- slli $r10, $r10, 16 !$r10 <- __r0<<16 +- mul $r5, $r8, $r7 !$r5 <- __q0*__d0=__m +- or $r10, $r4, $r10 !$r3 <- $r0|__ll_lowpart (n0) =__r0 +- slt $ta, $r10, $r5 !$ta <- __r0<__m +- beqz $ta, .L5 !if yes,skip +- !------------------------------------------------------ +- ! __q0--, __r0 += (d); +- ! if (__r0 >= (d)) +- ! { +- !------------------------------------------------------ +- +- add $r10, $r10, $r6 !$r10 <- __r0+d=__r0 +- addi $r7, $r7, -1 !$r7 <- __q0--=__q0 +- slt $ta, $r10, $r6 !$ta <- __r0<d +- bnez $ta, .L5 !if yes,skip +- !------------------------------------------------------ +- ! if (__r0 < __m) +- ! { +- !------------------------------------------------------ +- +- slt $ta, $r10, $r5 !$ta <- __r0<__m +- beqz $ta, .L5 !if yes,skip +- !------------------------------------------------------ +- ! __q0--, __r0 += (d); +- ! } +- ! } +- ! } +- !------------------------------------------------------ +- +- add $r10, $r10, $r6 !$r3 <- __r0+d=__r0 +- addi $r7, $r7, -1 !$r2 <- __q0--=__q0 +-.L5: +- !------------------------------------------------------ +- ! __r0 -= __m; +- ! *q = (USItype) __q1 * ((USItype) 1 << ((4 * 8) / 2)) | __q0; +- ! *r = __r0; +- !} +- !------------------------------------------------------ +- +- sub $r8, $r10, $r5 !$r8 = r = r0 = __r0-__m +- slli $r9, $r9, 16 !$r9 <- __q1<<16 +- or $r7, $r9, $r7 !$r7 = q = $r9|__q0 +- lmw.bim $r0, [$sp], $r4, 2 +- ret +- .size fudiv_qrnnd, .-fudiv_qrnnd +- +- .align 2 +- .globl __udivmoddi4 +- .type __udivmoddi4, @function +-__udivmoddi4: +- ! ===================================================================== +- ! stack allocation: +- ! sp+40 +------------------+ +- ! | q1 | +- ! sp+36 +------------------+ +- ! | q0 | +- ! sp+32 +------------------+ +- ! | bm | +- ! sp+28 +------------------+ +- ! | $lp | +- ! sp+24 +------------------+ +- ! | $fp | +- ! sp+20 +------------------+ +- ! | $r6 - $r10 | +- ! sp +------------------+ +- ! ===================================================================== +- +- addi $sp, $sp, -40 +- smw.bi $r6, [$sp], $r10, 10 +- !------------------------------------------------------ +- ! d0 = dd.s.low; +- ! d1 = dd.s.high; +- ! n0 = nn.s.low; +- ! n1 = nn.s.high; +- ! if (d1 == 0) +- ! { +- !------------------------------------------------------ +- +- move $fp, $r4 !$fp <- rp +- bnez P2H, .L9 !if yes,skip +- !------------------------------------------------------ +- ! if (d0 > n1) +- ! { +- !------------------------------------------------------ +- +- slt $ta, P1H, P2L !$ta <- n1<d0 +- beqz $ta, .L10 !if yes,skip +-#ifndef __NDS32_PERF_EXT__ +- smw.adm $r0, [$sp], $r5, 0 +- move $r0, P2L +- bal __clzsi2 +- move $r7, $r0 +- lmw.bim $r0, [$sp], $r5, 0 +-#else +- clz $r7, P2L +-#endif +- swi $r7, [$sp+(28)] +- beqz $r7, .L18 !if yes,skip +- !------------------------------------------------------ +- ! d0 = d0 << bm; +- ! n1 = (n1 << bm) | (n0 >> ((4 * 8) - bm)); +- ! n0 = n0 << bm; +- ! } +- !------------------------------------------------------ +- +- subri $r5, $r7, 32 !$r5 <- 32-bm +- srl $r5, P1L, $r5 !$r5 <- n0>>$r5 +- sll $r6, P1H, $r7 !$r6 <- n1<<bm +- or P1H, $r6, $r5 !P2h <- $r5|$r6=n1 +- sll P1L, P1L, $r7 !P1H <- n0<<bm=n0 +- sll P2L, P2L, $r7 !P2L <- d0<<bm=d0 +-.L18: +- !------------------------------------------------------ +- ! fudiv_qrnnd (&q0, &n0, n1, n0, d0); +- ! q1 = 0; +- ! } #if (d0 > n1) +- !------------------------------------------------------ +- +- move $r4,P1H ! give fudiv_qrnnd args +- move $r5,P1L ! +- move $r6,P2L ! +- bal fudiv_qrnnd !calcaulte q0 n0 +- movi $r6, 0 !P1L <- 0 +- swi $r7,[$sp+32] !q0 +- swi $r6,[$sp+36] !q1 +- move P1L,$r8 !n0 +- b .L19 +-.L10: +- !------------------------------------------------------ +- ! else #if (d0 > n1) +- ! { +- ! if(d0 == 0) +- !------------------------------------------------------ +- +- bnez P2L, .L20 !if yes,skip +- !------------------------------------------------------ +- ! d0 = 1 / d0; +- !------------------------------------------------------ +- +- movi $r4, 1 !P1L <- 1 +- divr P2L, $r4, $r4, P2L !$r9=1/d0,P1L=1%d0 +-.L20: +- +-#ifndef __NDS32_PERF_EXT__ +- smw.adm $r0, [$sp], $r5, 0 +- move $r0, P2L +- bal __clzsi2 +- move $r7, $r0 +- lmw.bim $r0, [$sp], $r5, 0 +-#else +- clz $r7, P2L +-#endif +- swi $r7,[$sp+(28)] ! store bm +- beqz $r7, .L28 ! if yes,skip +- !------------------------------------------------------ +- ! b = (4 * 8) - bm; +- ! d0 = d0 << bm; +- ! n2 = n1 >> b; +- ! n1 = (n1 << bm) | (n0 >> b); +- ! n0 = n0 << bm; +- ! fudiv_qrnnd (&q1, &n1, n2, n1, d0); +- ! } +- !------------------------------------------------------ +- +- subri $r10, $r7, 32 !$r10 <- 32-bm=b +- srl $r4, P1L, $r10 !$r4 <- n0>>b +- sll $r5, P1H, $r7 !$r5 <- n1<<bm +- or $r5, $r5, $r4 !$r5 <- $r5|$r4=n1 !for fun +- sll P2L, P2L, $r7 !P2L <- d0<<bm=d0 !for fun +- sll P1L, P1L, $r7 !P1L <- n0<<bm=n0 +- srl $r4, P1H, $r10 !$r4 <- n1>>b=n2 !for fun +- +- move $r6,P2L !for fun +- bal fudiv_qrnnd !caculate q1, n1 +- +- swi $r7,[$sp+(36)] ! q1 store +- move P1H,$r8 ! n1 store +- +- move $r4,$r8 ! prepare for next fudiv_qrnnd() +- move $r5,P1L +- move $r6,P2L +- b .L29 +-.L28: +- !------------------------------------------------------ +- ! else // bm != 0 +- ! { +- ! n1 -= d0; +- ! q1 = 1; +- ! +- !------------------------------------------------------ +- +- sub P1H, P1H, P2L !P1L <- n1-d0=n1 +- movi $ta, 1 ! +- swi $ta, [$sp+(36)] !1 -> [$sp+(36)] +- +- move $r4,P1H ! give fudiv_qrnnd args +- move $r5,P1L +- move $r6,P2L +-.L29: +- !------------------------------------------------------ +- ! fudiv_qrnnd (&q0, &n0, n1, n0, d0); +- !------------------------------------------------------ +- +- bal fudiv_qrnnd !calcuate q0, n0 +- swi $r7,[$sp+(32)] !q0 store +- move P1L,$r8 !n0 +-.L19: +- !------------------------------------------------------ +- ! if (rp != 0) +- ! { +- !------------------------------------------------------ +- +- beqz $fp, .L31 !if yes,skip +- !------------------------------------------------------ +- ! rr.s.low = n0 >> bm; +- ! rr.s.high = 0; +- ! *rp = rr.ll; +- ! } +- !------------------------------------------------------ +- +- movi $r5, 0 !$r5 <- 0 +- lwi $r7,[$sp+(28)] !load bm +- srl $r4, P1L, $r7 !$r4 <- n0>>bm +- swi $r4, [$fp+OFFSET_L] !r0 !$r4 -> [$sp+(48)] +- swi $r5, [$fp+OFFSET_H] !r1 !0 -> [$sp+(52)] +- b .L31 +-.L9: +- !------------------------------------------------------ +- ! else # d1 == 0 +- ! { +- ! if(d1 > n1) +- ! { +- !------------------------------------------------------ +- +- slt $ta, P1H, P2H !$ta <- n1<d1 +- beqz $ta, .L32 !if yes,skip +- !------------------------------------------------------ +- ! q0 = 0; +- ! q1 = 0; +- ! if (rp != 0) +- ! { +- !------------------------------------------------------ +- +- movi $r5, 0 !$r5 <- 0 +- swi $r5, [$sp+(32)] !q0 !0 -> [$sp+(40)]=q1 +- swi $r5, [$sp+(36)] !q1 !0 -> [$sp+(32)]=q0 +- beqz $fp, .L31 !if yes,skip +- !------------------------------------------------------ +- ! rr.s.low = n0; +- ! rr.s.high = n1; +- ! *rp = rr.ll; +- ! } +- !------------------------------------------------------ +- +- swi P1L, [$fp+OFFSET_L] !P1L -> [rp] +- swi P1H, [$fp+OFFSET_H] !P1H -> [rp+4] +- b .L31 +-.L32: +-#ifndef __NDS32_PERF_EXT__ +- smw.adm $r0, [$sp], $r5, 0 +- move $r0, P2H +- bal __clzsi2 +- move $r7, $r0 +- lmw.bim $r0, [$sp], $r5, 0 +-#else +- clz $r7,P2H +-#endif +- swi $r7,[$sp+(28)] !$r7=bm store +- beqz $r7, .L42 !if yes,skip +- !------------------------------------------------------ +- ! USItype m1, m0; +- ! b = (4 * 8) - bm; +- ! d1 = (d0 >> b) | (d1 << bm); +- ! d0 = d0 << bm; +- ! n2 = n1 >> b; +- ! n1 = (n0 >> b) | (n1 << bm); +- ! n0 = n0 << bm; +- ! fudiv_qrnnd (&q0, &n1, n2, n1, d1); +- !------------------------------------------------------ +- +- subri $r10, $r7, 32 !$r10 <- 32-bm=b +- srl $r5, P2L, $r10 !$r5 <- d0>>b +- sll $r6, P2H, $r7 !$r6 <- d1<<bm +- or $r6, $r5, $r6 !$r6 <- $r5|$r6=d1 !! func +- move P2H, $r6 !P2H <- d1 +- srl $r4, P1H, $r10 !$r4 <- n1>>b=n2 !!! func +- srl $r8, P1L, $r10 !$r8 <- n0>>b !!$r8 +- sll $r9, P1H, $r7 !$r9 <- n1<<bm +- or $r5, $r8, $r9 !$r5 <- $r8|$r9=n1 !func +- sll P2L, P2L, $r7 !P2L <- d0<<bm=d0 +- sll P1L, P1L, $r7 !P1L <- n0<<bm=n0 +- +- bal fudiv_qrnnd ! cal q0,n1 +- swi $r7,[$sp+(32)] +- move P1H,$r8 ! fudiv_qrnnd (&q0, &n1, n2, n1, d1); +- move $r6, $r7 ! from func +- +- !---------------------------------------------------- +- ! #umul_ppmm (m1, m0, q0, d0); +- ! do +- ! { USItype __x0, __x1, __x2, __x3; +- ! USItype __ul, __vl, __uh, __vh; +- ! __ul = ((USItype) (q0) & (((USItype) 1 << ((4 * 8) / 2)) - 1)); +- ! __uh = ((USItype) (q0) >> ((4 * 8) / 2)); +- ! __vl = ((USItype) (d0) & (((USItype) 1 << ((4 * 8) / 2)) - 1)); +- ! __vh = ((USItype) (d0) >> ((4 * 8) / 2)); +- ! __x0 = (USItype) __ul * __vl; +- ! __x1 = (USItype) __ul * __vh; +- ! __x2 = (USItype) __uh * __vl; +- ! __x3 = (USItype) __uh * __vh; +- ! __x1 += ((USItype) (__x0) >> ((4 * 8) / 2)); +- ! __x1 += __x2; +- ! if (__x1 < __x2) +- ! __x3 += ((USItype) 1 << ((4 * 8) / 2)); +- ! (m1) = __x3 + ((USItype) (__x1) >> ((4 * 8) / 2)); +- ! (m0) = (USItype)(q0*d0); +- ! } +- ! if (m1 > n1) +- !--------------------------------------------------- +-#ifdef __NDS32_ISA_V3M__ +- !mulr64 $r4, P2L, $r6 +- smw.adm $r0, [$sp], $r3, 0 +- move P1L, P2L +- move P2L, $r6 +- movi P1H, 0 +- movi P2H, 0 +- bal __muldi3 +- movd44 $r4, $r0 +- lmw.bim $r0, [$sp], $r3, 0 +- move $r8, W6H +- move $r5, W6L +-#else +- mulr64 $r4, P2L, $r6 +- move $r8, W6H +- move $r5, W6L +-#endif +- slt $ta, P1H, $r8 !$ta <- n1<m1 +- bnez $ta, .L46 !if yes,skip +- !------------------------------------------------------ +- ! if(m1 == n1) +- !------------------------------------------------------ +- +- bne $r8, P1H, .L45 !if yes,skip +- !------------------------------------------------------ +- ! if(m0 > n0) +- !------------------------------------------------------ +- +- slt $ta, P1L, $r5 !$ta <- n0<m0 +- beqz $ta, .L45 !if yes,skip +-.L46: +- !------------------------------------------------------ +- ! { +- ! q0--; +- ! # sub_ddmmss (m1, m0, m1, m0, d1, d0); +- ! do +- ! { USItype __x; +- ! __x = (m0) - (d0); +- ! (m1) = (m1) - (d1) - (__x > (m0)); +- ! (m0) = __x; +- ! } +- ! } +- !------------------------------------------------------ +- +- sub $r4, $r5, P2L !$r4 <- m0-d0=__x +- addi $r6, $r6, -1 !$r6 <- q0--=q0 +- sub $r8, $r8, P2H !$r8 <- m1-d1 +- swi $r6, [$sp+(32)] ! q0 !$r6->[$sp+(32)] +- slt $ta, $r5, $r4 !$ta <- m0<__x +- sub $r8, $r8, $ta !$r8 <- P1H-P1L=m1 +- move $r5, $r4 !$r5 <- __x=m0 +-.L45: +- !------------------------------------------------------ +- ! q1 = 0; +- ! if (rp != 0) +- ! { +- !------------------------------------------------------ +- +- movi $r4, 0 !$r4 <- 0 +- swi $r4, [$sp+(36)] !0 -> [$sp+(40)]=q1 +- beqz $fp, .L31 !if yes,skip +- !------------------------------------------------------ +- ! # sub_ddmmss (n1, n0, n1, n0, m1, m0); +- ! do +- ! { USItype __x; +- ! __x = (n0) - (m0); +- ! (n1) = (n1) - (m1) - (__x > (n0)); +- ! (n0) = __x; +- ! } +- ! rr.s.low = (n1 << b) | (n0 >> bm); +- ! rr.s.high = n1 >> bm; +- ! *rp = rr.ll; +- !------------------------------------------------------ +- +- sub $r4, P1H, $r8 !$r4 <- n1-m1 +- sub $r6, P1L, $r5 !$r6 <- n0-m0=__x=n0 +- slt $ta, P1L, $r6 !$ta <- n0<__x +- sub P1H, $r4, $ta !P1H <- $r4-$ta=n1 +- move P1L, $r6 +- +- lwi $r7,[$sp+(28)] ! load bm +- subri $r10,$r7,32 +- sll $r4, P1H, $r10 !$r4 <- n1<<b +- srl $r5, P1L, $r7 !$r5 <- __x>>bm +- or $r6, $r5, $r4 !$r6 <- $r5|$r4=rr.s.low +- srl $r8, P1H, $r7 !$r8 <- n1>>bm =rr.s.high +- swi $r6, [$fp+OFFSET_L] ! +- swi $r8, [$fp+OFFSET_H] ! +- b .L31 +-.L42: +- !------------------------------------------------------ +- ! else +- ! { +- ! if(n1 > d1) +- !------------------------------------------------------ +- +- slt $ta, P2H, P1H !$ta <- P2H<P1H +- bnez $ta, .L52 !if yes,skip +- !------------------------------------------------------ +- ! if (n0 >= d0) +- !------------------------------------------------------ +- +- slt $ta, P1L, P2L !$ta <- P1L<P2L +- bnez $ta, .L51 !if yes,skip +- !------------------------------------------------------ +- ! q0 = 1; +- ! do +- ! { USItype __x; +- ! __x = (n0) - (d0); +- ! (n1) = (n1) - (d1) - (__x > (n0)); +- ! (n0) = __x; +- ! } +- !------------------------------------------------------ +-.L52: +- sub $r4, P1H, P2H !$r4 <- P1H-P2H +- sub $r6, P1L, P2L !$r6 <- no-d0=__x=n0 +- slt $ta, P1L, $r6 !$ta <- no<__x +- sub P1H, $r4, $ta !P1H <- $r4-$ta=n1 +- move P1L, $r6 !n0 +- movi $r5, 1 ! +- swi $r5, [$sp+(32)] !1 -> [$sp+(32)]=q0 +- b .L54 +-.L51: +- !------------------------------------------------------ +- ! q0 = 0; +- !------------------------------------------------------ +- +- movi $r5,0 +- swi $r5, [$sp+(32)] !$r5=0 -> [$sp+(32)] +-.L54: +- !------------------------------------------------------ +- ! q1 = 0; +- ! if (rp != 0) +- ! { +- !------------------------------------------------------ +- +- movi $r5, 0 ! +- swi $r5, [$sp+(36)] !0 -> [$sp+(36)] +- beqz $fp, .L31 +- !------------------------------------------------------ +- ! rr.s.low = n0; +- ! rr.s.high = n1; +- ! *rp = rr.ll; +- ! } +- !------------------------------------------------------ +- +- swi P1L, [$fp+OFFSET_L] !remainder +- swi P1H, [$fp+OFFSET_H] ! +-.L31: +- !------------------------------------------------------ +- ! const DWunion ww = {{.low = q0, .high = q1}}; +- ! return ww.ll; +- !} +- !------------------------------------------------------ +- +- lwi P1L, [$sp+(32)] !quotient +- lwi P1H, [$sp+(36)] +- lmw.bim $r6, [$sp], $r10, 10 +- addi $sp, $sp, 12 +- ret +- .size __udivmoddi4, .-__udivmoddi4 +-#endif /* L_udivmoddi4 */ +- +- +- +-#ifdef L_umodsi3 +- +- ! ===================================================================== +- .text +- .align 2 +- .globl __umodsi3 +- .type __umodsi3, @function +-__umodsi3: +- ! --------------------------------------------------------------------- +- !!res=udivmodsi4(a,b,1); +- ! if (den==0) +- ! return num; +- ! --------------------------------------------------------------------- +- beqz $r1, .L1 ! if den==0, skip +- ! --------------------------------------------------------------------- +- ! bit=1; +- ! res=0; +- ! --------------------------------------------------------------------- +- movi $r4, 1 ! $r4 <- bit=1 +-#ifndef __OPTIMIZE_SIZE__ +-.L6: +-#endif +- ! --------------------------------------------------------------------- +- ! while (den<num +- ! --------------------------------------------------------------------- +- slt $ta, $r1, $r0 ! $ta <- den<num? +- beqz $ta, .L5 ! if no, skip +- ! --------------------------------------------------------------------- +- ! &&bit&&!(den&(1L<<31))) +- ! --------------------------------------------------------------------- +- bltz $r1, .L5 ! if den<0, skip +- ! --------------------------------------------------------------------- +- ! { den<<=1; +- ! bit<<=1; +- ! } +- ! --------------------------------------------------------------------- +-#if defined (__OPTIMIZE_SIZE__) && ! defined (__NDS32_ISA_V3M__) +- clz $r3, $r1 ! $r3 <- leading zero count for den +- clz $ta, $r0 ! $ta <- leading zero count for num +- sub $r3, $r3, $ta ! $r3 <- number of bits to shift +- sll $r1, $r1, $r3 ! $r1 <- den +- sll $r4, $r4, $r3 ! $r2 <- bit +-#else +- slli $r1, $r1, 1 ! $r1 <- den<<=1 +- slli $r4, $r4, 1 ! $r4 <- bit<<=1 +- b .L6 ! continue loop +-#endif +-.L5: +- ! --------------------------------------------------------------------- +- ! while (bit) +- ! { if (num>=den) +- ! { num-=den; +- ! res|=bit; +- ! } +- ! bit>>=1; +- ! den>>=1; +- ! } +- !!if (modwanted) +- !! return num; +- !!return res; +- ! --------------------------------------------------------------------- +- sub $r2, $r0, $r1 ! $r2 <- num-den +- slt $ta, $r0, $r1 ! $ta <- num<den? +- srli $r4, $r4, 1 ! $r4 <- bit>>=1 +- cmovz $r0, $r2, $ta ! $r0 <- num=(num<den)?num:num-den +- srli $r1, $r1, 1 ! $r1 <- den>>=1 +- bnez $r4, .L5 ! if bit!=0, continue loop +-.L1: +- ! --------------------------------------------------------------------- +- ! return res; +- ! --------------------------------------------------------------------- +- ret +- .size __umodsi3, .-__umodsi3 +-#endif /* L_umodsi3 */ +- +- +- +-#ifdef L_umoddi3 +- +- !-------------------------------------- +- #ifdef __big_endian__ +- #define V1H $r0 +- #define V1L $r1 +- #define V2H $r2 +- #define V2L $r3 +- #else +- #define V1H $r1 +- #define V1L $r0 +- #define V2H $r3 +- #define V2L $r2 +- #endif +- !-------------------------------------- +- .text +- .align 2 +- .globl __umoddi3 +- .type __umoddi3, @function +-__umoddi3: +- ! prologue +- addi $sp, $sp, -12 +- swi $lp, [$sp+(0)] +- ! end of prologue +- addi $r4, $sp, 4 +- bal __udivmoddi4 +- lwi $r0, [$sp+(4)] ! __udivmoddi4 return low when LE mode or return high when BE mode +- lwi $r1, [$sp+(8)] ! +-.L82: +- ! epilogue +- lwi $lp, [$sp+(0)] +- addi $sp, $sp, 12 +- ret +- .size __umoddi3, .-__umoddi3 +-#endif /* L_umoddi3 */ +- +- +- +-#ifdef L_muldi3 +- +-#ifdef __big_endian__ +- #define P1H $r0 +- #define P1L $r1 +- #define P2H $r2 +- #define P2L $r3 +- +- #define V2H $r4 +- #define V2L $r5 +-#else +- #define P1H $r1 +- #define P1L $r0 +- #define P2H $r3 +- #define P2L $r2 +- +- #define V2H $r5 +- #define V2L $r4 +-#endif +- +- ! ==================================================================== +- .text +- .align 2 +- .globl __muldi3 +- .type __muldi3, @function +-__muldi3: +- ! parameter passing for libgcc functions normally involves 2 doubles +- !--------------------------------------- +-#ifdef __NDS32_ISA_V3M__ +- ! There is no mulr64 instruction in Andes ISA V3M. +- ! So we must provide a sequence of calculations to complete the job. +- smw.adm $r6, [$sp], $r9, 0x0 +- zeh33 $r4, P1L +- srli $r7, P1L, 16 +- zeh33 $r5, P2L +- mul $r6, $r5, $r4 +- mul33 $r5, $r7 +- srli $r8, P2L, 16 +- mov55 $r9, $r5 +- maddr32 $r9, $r8, $r4 +- srli $r4, $r6, 16 +- add $r4, $r9, $r4 +- slt45 $r4, $r5 +- slli $r5, $r15, 16 +- maddr32 $r5, $r8, $r7 +- mul P2L, P1H, P2L +- srli $r7, $r4, 16 +- maddr32 P2L, P2H, P1L +- add333 P1H, $r5, $r7 +- slli $r4, $r4, 16 +- zeh33 $r6, $r6 +- add333 P1L, $r4, $r6 +- add333 P1H, P2L, P1H +- lmw.bim $r6, [$sp], $r9, 0x0 +- ret +-#else /* not __NDS32_ISA_V3M__ */ +- mul $ta, P1L, P2H +- mulr64 $r4, P1L, P2L +- maddr32 $ta, P1H, P2L +- move P1L, V2L +- add P1H, $ta, V2H +- ret +-#endif /* not __NDS32_ISA_V3M__ */ +- .size __muldi3, .-__muldi3 +-#endif /* L_muldi3 */ +- +- +- +-#ifdef L_addsub_df +- +-#ifndef __big_endian__ +- #define P1L $r0 +- #define P1H $r1 +- #define P2L $r2 +- #define P2H $r3 +- #define P3L $r4 +- #define P3H $r5 +- #define O1L $r7 +- #define O1H $r8 +-#else +- #define P1H $r0 +- #define P1L $r1 +- #define P2H $r2 +- #define P2L $r3 +- #define P3H $r4 +- #define P3L $r5 +- #define O1H $r7 +- #define O1L $r8 +-#endif +- .text +- .align 2 +- .global __subdf3 +- .type __subdf3, @function +-__subdf3: +- push $lp +- pushm $r6, $r10 +- +- move $r4, #0x80000000 +- xor P2H, P2H, $r4 +- +- j .Lsdpadd +- +- .global __adddf3 +- .type __adddf3, @function +-__adddf3: +- push $lp +- pushm $r6, $r10 +-.Lsdpadd: +- slli $r6, P1H, #1 +- srli $r6, $r6, #21 +- slli P3H, P1H, #11 +- srli $r10, P1L, #21 +- or P3H, P3H, $r10 +- slli P3L, P1L, #11 +- move O1L, #0x80000000 +- or P3H, P3H, O1L +- slli $r9, P2H, #1 +- srli $r9, $r9, #21 +- slli O1H, P2H, #11 +- srli $r10, P2L, #21 +- or O1H, O1H, $r10 +- or O1H, O1H, O1L +- slli O1L, P2L, #11 +- +- addi $r10, $r6, #-1 +- slti $r15, $r10, #0x7fe +- beqzs8 .LEspecA +- +-.LElab1: +- addi $r10, $r9, #-1 +- slti $r15, $r10, #0x7fe +- beqzs8 .LEspecB +- +-.LElab2: +- #NORMd($r4, P2L, P1L) +- bnez P3H, .LL1 +- bnez P3L, .LL2 +- move $r6, #0 +- j .LL3 +-.LL2: +- move P3H, P3L +- move P3L, #0 +- move P2L, #32 +- sub $r6, $r6, P2L +-.LL1: +-#ifndef __big_endian__ +-#ifdef __NDS32_PERF_EXT__ +- clz $r2, $r5 +-#else +- pushm $r0, $r1 +- pushm $r3, $r5 +- move $r0, $r5 +- bal __clzsi2 +- move $r2, $r0 +- popm $r3, $r5 +- popm $r0, $r1 +-#endif +-#else /* __big_endian__ */ +-#ifdef __NDS32_PERF_EXT__ +- clz $r3, $r4 +-#else +- pushm $r0, $r2 +- pushm $r4, $r5 +- move $r0, $r4 +- bal __clzsi2 +- move $r3, $r0 +- popm $r4, $r5 +- popm $r0, $r2 +-#endif +-#endif /* __big_endian__ */ +- beqz P2L, .LL3 +- sub $r6, $r6, P2L +- subri P1L, P2L, #32 +- srl P1L, P3L, P1L +- sll P3L, P3L, P2L +- sll P3H, P3H, P2L +- or P3H, P3H, P1L +-.LL3: +- #NORMd End +- +- #NORMd($r7, P2L, P1L) +- bnez O1H, .LL4 +- bnez O1L, .LL5 +- move $r9, #0 +- j .LL6 +-.LL5: +- move O1H, O1L +- move O1L, #0 +- move P2L, #32 +- sub $r9, $r9, P2L +-.LL4: +-#ifndef __big_endian__ +-#ifdef __NDS32_PERF_EXT__ +- clz $r2, O1H +-#else +- pushm $r0, $r1 +- pushm $r3, $r5 +- move $r0, O1H +- bal __clzsi2 +- move $r2, $r0 +- popm $r3, $r5 +- popm $r0, $r1 +-#endif +-#else /* __big_endian__ */ +-#ifdef __NDS32_PERF_EXT__ +- clz $r3, O1H +-#else +- pushm $r0, $r2 +- pushm $r4, $r5 +- move $r0, O1H +- bal __clzsi2 +- move $r3, $r0 +- popm $r4, $r5 +- popm $r0, $r2 +-#endif +-#endif /* __big_endian__ */ +- beqz P2L, .LL6 +- sub $r9, $r9, P2L +- subri P1L, P2L, #32 +- srl P1L, O1L, P1L +- sll O1L, O1L, P2L +- sll O1H, O1H, P2L +- or O1H, O1H, P1L +-.LL6: +- #NORMd End +- +- move $r10, #0x80000000 +- and P1H, P1H, $r10 +- +- beq $r6, $r9, .LEadd3 +- slts $r15, $r9, $r6 +- beqzs8 .Li1 +- sub $r9, $r6, $r9 +- move P2L, #0 +-.LL7: +- move $r10, #0x20 +- slt $r15, $r9, $r10 +- bnezs8 .LL8 +- or P2L, P2L, O1L +- move O1L, O1H +- move O1H, #0 +- addi $r9, $r9, #0xffffffe0 +- bnez O1L, .LL7 +-.LL8: +- beqz $r9, .LEadd3 +- move P1L, O1H +- move $r10, O1L +- srl O1L, O1L, $r9 +- srl O1H, O1H, $r9 +- subri $r9, $r9, #0x20 +- sll P1L, P1L, $r9 +- or O1L, O1L, P1L +- sll $r10, $r10, $r9 +- or P2L, P2L, $r10 +- beqz P2L, .LEadd3 +- ori O1L, O1L, #1 +- j .LEadd3 +-.Li1: +- move $r15, $r6 +- move $r6, $r9 +- sub $r9, $r9, $r15 +- move P2L, #0 +-.LL10: +- move $r10, #0x20 +- slt $r15, $r9, $r10 +- bnezs8 .LL11 +- or P2L, P2L, P3L +- move P3L, P3H +- move P3H, #0 +- addi $r9, $r9, #0xffffffe0 +- bnez P3L, .LL10 +-.LL11: +- beqz $r9, .LEadd3 +- move P1L, P3H +- move $r10, P3L +- srl P3L, P3L, $r9 +- srl P3H, P3H, $r9 +- subri $r9, $r9, #0x20 +- sll P1L, P1L, $r9 +- or P3L, P3L, P1L +- sll $r10, $r10, $r9 +- or P2L, P2L, $r10 +- beqz P2L, .LEadd3 +- ori P3L, P3L, #1 +- +-.LEadd3: +- xor $r10, P1H, P2H +- sltsi $r15, $r10, #0 +- bnezs8 .LEsub1 +- +- #ADD(P3L, O1L) +- add P3L, P3L, O1L +- slt $r15, P3L, O1L +- +- #ADDCC(P3H, O1H) +- beqzs8 .LL13 +- add P3H, P3H, O1H +- slt $r15, P3H, O1H +- beqzs8 .LL14 +- addi P3H, P3H, #0x1 +- j .LL15 +-.LL14: +- move $r15, #1 +- add P3H, P3H, $r15 +- slt $r15, P3H, $r15 +- j .LL15 +-.LL13: +- add P3H, P3H, O1H +- slt $r15, P3H, O1H +-.LL15: +- +- beqzs8 .LEres +- andi $r10, P3L, #1 +- beqz $r10, .Li3 +- ori P3L, P3L, #2 +-.Li3: +- srli P3L, P3L, #1 +- slli $r10, P3H, #31 +- or P3L, P3L, $r10 +- srli P3H, P3H, #1 +- move $r10, #0x80000000 +- or P3H, P3H, $r10 +- addi $r6, $r6, #1 +- subri $r15, $r6, #0x7ff +- bnezs8 .LEres +- move $r10, #0x7ff00000 +- or P1H, P1H, $r10 +- move P1L, #0 +- j .LEretA +- +-.LEsub1: +- #SUB(P3L, O1L) +- move $r15, P3L +- sub P3L, P3L, O1L +- slt $r15, $r15, P3L +- +- #SUBCC(P3H, O1H) +- beqzs8 .LL16 +- move $r15, P3H +- sub P3H, P3H, O1H +- slt $r15, $r15, P3H +- beqzs8 .LL17 +- subi333 P3H, P3H, #1 +- j .LL18 +-.LL17: +- move $r15, P3H +- subi333 P3H, P3H, #1 +- slt $r15, $r15, P3H +- j .LL18 +-.LL16: +- move $r15, P3H +- sub P3H, P3H, O1H +- slt $r15, $r15, P3H +-.LL18: +- +- beqzs8 .Li5 +- move $r10, #0x80000000 +- xor P1H, P1H, $r10 +- +- subri P3H, P3H, #0 +- beqz P3L, .LL19 +- subri P3L, P3L, #0 +- subi45 P3H, #1 +-.LL19: +- +-.Li5: +- #NORMd($r4, $r9, P1L) +- bnez P3H, .LL20 +- bnez P3L, .LL21 +- move $r6, #0 +- j .LL22 +-.LL21: +- move P3H, P3L +- move P3L, #0 +- move $r9, #32 +- sub $r6, $r6, $r9 +-.LL20: +-#ifdef __NDS32_PERF_EXT__ +- clz $r9, P3H +-#else +- pushm $r0, $r5 +- move $r0, P3H +- bal __clzsi2 +- move $r9, $r0 +- popm $r0, $r5 +-#endif +- beqz $r9, .LL22 +- sub $r6, $r6, $r9 +- subri P1L, $r9, #32 +- srl P1L, P3L, P1L +- sll P3L, P3L, $r9 +- sll P3H, P3H, $r9 +- or P3H, P3H, P1L +-.LL22: +- #NORMd End +- +- or $r10, P3H, P3L +- bnez $r10, .LEres +- move P1H, #0 +- +-.LEres: +- blez $r6, .LEund +- +-.LElab8: +- #ADD(P3L, $0x400) +- move $r15, #0x400 +- add P3L, P3L, $r15 +- slt $r15, P3L, $r15 +- +- #ADDCC(P3H, $0x0) +- beqzs8 .LL25 +- add P3H, P3H, $r15 +- slt $r15, P3H, $r15 +-.LL25: +- +- #ADDC($r6, $0x0) +- add $r6, $r6, $r15 +- srli $r10, P3L, #11 +- andi $r10, $r10, #1 +- sub P3L, P3L, $r10 +- srli P1L, P3L, #11 +- slli $r10, P3H, #21 +- or P1L, P1L, $r10 +- slli $r10, P3H, #1 +- srli $r10, $r10, #12 +- or P1H, P1H, $r10 +- slli $r10, $r6, #20 +- or P1H, P1H, $r10 +- +-.LEretA: +-.LE999: +- popm $r6, $r10 +- pop $lp +- ret5 $lp +- +-.LEspecA: +- #ADD(P3L, P3L) +- move $r15, P3L +- add P3L, P3L, P3L +- slt $r15, P3L, $r15 +- +- #ADDC(P3H, P3H) +- add P3H, P3H, P3H +- add P3H, P3H, $r15 +- bnez $r6, .Li7 +- or $r10, P3H, P3L +- beqz $r10, .Li8 +- j .LElab1 +-.Li8: +- subri $r15, $r9, #0x7ff +- beqzs8 .LEspecB +- add P3L, P2H, P2H +- or $r10, P3L, P2L +- bnez $r10, .LEretB +- sltsi $r15, P2H, #0 +- bnezs8 .LEretA +- +-.LEretB: +- move P1L, P2L +- move P1H, P2H +- j .LE999 +-.Li7: +- or $r10, P3H, P3L +- bnez $r10, .LEnan +- subri $r15, $r9, #0x7ff +- bnezs8 .LEretA +- xor $r10, P1H, P2H +- sltsi $r15, $r10, #0 +- bnezs8 .LEnan +- j .LEretB +- +-.LEspecB: +- #ADD(O1L, O1L) +- move $r15, O1L +- add O1L, O1L, O1L +- slt $r15, O1L, $r15 +- +- #ADDC(O1H, O1H) +- add O1H, O1H, O1H +- add O1H, O1H, $r15 +- bnez $r9, .Li11 +- or $r10, O1H, O1L +- beqz $r10, .LEretA +- j .LElab2 +-.Li11: +- or $r10, O1H, O1L +- beqz $r10, .LEretB +- +-.LEnan: +- move P1H, #0xfff80000 +- move P1L, #0 +- j .LEretA +- +-.LEund: +- subri $r9, $r6, #1 +- move P2L, #0 +-.LL26: +- move $r10, #0x20 +- slt $r15, $r9, $r10 +- bnezs8 .LL27 +- or P2L, P2L, P3L +- move P3L, P3H +- move P3H, #0 +- addi $r9, $r9, #0xffffffe0 +- bnez P3L, .LL26 +-.LL27: +- beqz $r9, .LL28 +- move P1L, P3H +- move $r10, P3L +- srl P3L, P3L, $r9 +- srl P3H, P3H, $r9 +- subri $r9, $r9, #0x20 +- sll P1L, P1L, $r9 +- or P3L, P3L, P1L +- sll $r10, $r10, $r9 +- or P2L, P2L, $r10 +- beqz P2L, .LL28 +- ori P3L, P3L, #1 +-.LL28: +- move $r6, #0 +- j .LElab8 +- .size __subdf3, .-__subdf3 +- .size __adddf3, .-__adddf3 +-#endif /* L_addsub_df */ +- +- +- +-#ifdef L_mul_sf +- +-#if !defined (__big_endian__) +- #define P1L $r0 +- #define P1H $r1 +- #define P2L $r2 +- #define P2H $r3 +-#else +- #define P1H $r0 +- #define P1L $r1 +- #define P2H $r2 +- #define P2L $r3 +-#endif +- .text +- .align 2 +- .global __mulsf3 +- .type __mulsf3, @function +-__mulsf3: +- push $lp +- pushm $r6, $r10 +- +- srli $r3, $r0, #23 +- andi $r3, $r3, #0xff +- srli $r5, $r1, #23 +- andi $r5, $r5, #0xff +- move $r6, #0x80000000 +- slli $r2, $r0, #8 +- or $r2, $r2, $r6 +- slli $r4, $r1, #8 +- or $r4, $r4, $r6 +- xor $r8, $r0, $r1 +- and $r6, $r6, $r8 +- +- addi $r8, $r3, #-1 +- slti $r15, $r8, #0xfe +- beqzs8 .LFspecA +- +-.LFlab1: +- addi $r8, $r5, #-1 +- slti $r15, $r8, #0xfe +- beqzs8 .LFspecB +- +-.LFlab2: +- move $r10, $r3 +-/* This is a 64-bit multiple. ($r2, $r7) is (high, low). */ +-#ifndef __NDS32_ISA_V3M__ +- mulr64 $r2, $r2, $r4 +-#else +- pushm $r0, $r1 +- pushm $r4, $r5 +- move P1L, $r2 +- movi P1H, #0 +- move P2L, $r4 +- movi P2H, #0 +- bal __muldi3 +- movd44 $r2, $r0 +- popm $r4, $r5 +- popm $r0, $r1 +-#endif +-#ifndef __big_endian__ +- move $r7, $r2 +- move $r2, $r3 +-#else +- move $r7, $r3 +-#endif +- move $r3, $r10 +- +- beqz $r7, .Li17 +- ori $r2, $r2, #1 +- +-.Li17: +- sltsi $r15, $r2, #0 +- bnezs8 .Li18 +- slli $r2, $r2, #1 +- addi $r3, $r3, #-1 +-.Li18: +- addi $r8, $r5, #0xffffff82 +- add $r3, $r3, $r8 +- addi $r8, $r3, #-1 +- slti $r15, $r8, #0xfe +- beqzs8 .LFoveund +- +-.LFlab8: +- #ADD($r2, $0x80) +- move $r15, #0x80 +- add $r2, $r2, $r15 +- slt $r15, $r2, $r15 +- +- #ADDC($r3, $0x0) +- add $r3, $r3, $r15 +- srli $r8, $r2, #8 +- andi $r8, $r8, #1 +- sub $r2, $r2, $r8 +- slli $r2, $r2, #1 +- srli $r2, $r2, #9 +- slli $r8, $r3, #23 +- or $r2, $r2, $r8 +- or $r0, $r2, $r6 +- +-.LF999: +- popm $r6, $r10 +- pop $lp +- ret5 $lp +- +-.LFspecA: +- bnez $r3, .Li19 +- add $r2, $r2, $r2 +- beqz $r2, .Li20 +-#ifdef __NDS32_PERF_EXT__ +- clz $r7, $r2 +-#else +- pushm $r0, $r5 +- move $r0, $r2 +- bal __clzsi2 +- move $r7, $r0 +- popm $r0, $r5 +-#endif +- sub $r3, $r3, $r7 +- sll $r2, $r2, $r7 +- j .LFlab1 +-.Li20: +- subri $r15, $r5, #0xff +- beqzs8 .LFnan +- j .LFzer +-.Li19: +- add $r8, $r2, $r2 +- bnez $r8, .LFnan +- bnez $r5, .Li21 +- add $r8, $r4, $r4 +- beqz $r8, .LFnan +-.Li21: +- subri $r15, $r5, #0xff +- bnezs8 .LFinf +- +-.LFspecB: +- bnez $r5, .Li22 +- add $r4, $r4, $r4 +- beqz $r4, .LFzer +-#ifdef __NDS32_PERF_EXT__ +- clz $r7, $r4 +-#else +- pushm $r0, $r5 +- move $r0, $r4 +- bal __clzsi2 +- move $r7, $r0 +- popm $r0, $r5 +-#endif +- sub $r5, $r5, $r7 +- sll $r4, $r4, $r7 +- j .LFlab2 +- +-.LFzer: +- move $r0, $r6 +- j .LF999 +-.Li22: +- add $r8, $r4, $r4 +- bnez $r8, .LFnan +- +-.LFinf: +- move $r8, #0x7f800000 +- or $r0, $r6, $r8 +- j .LF999 +- +-.LFnan: +- move $r0, #0xffc00000 +- j .LF999 +- +-.LFoveund: +- bgtz $r3, .LFinf +- subri $r7, $r3, #1 +- slti $r15, $r7, #0x20 +- beqzs8 .LFzer +- subri $r8, $r7, #0x20 +- sll $r3, $r2, $r8 +- srl $r2, $r2, $r7 +- beqz $r3, .Li25 +- ori $r2, $r2, #2 +-.Li25: +- move $r3, #0 +- addi $r8, $r2, #0x80 +- sltsi $r15, $r8, #0 +- beqzs8 .LFlab8 +- move $r3, #1 +- j .LFlab8 +- .size __mulsf3, .-__mulsf3 +-#endif /* L_mul_sf */ +- +- +- +-#ifdef L_mul_df +- +-#ifndef __big_endian__ +- #define P1L $r0 +- #define P1H $r1 +- #define P2L $r2 +- #define P2H $r3 +- #define P3L $r4 +- #define P3H $r5 +- #define O1L $r7 +- #define O1H $r8 +-#else +- #define P1H $r0 +- #define P1L $r1 +- #define P2H $r2 +- #define P2L $r3 +- #define P3H $r4 +- #define P3L $r5 +- #define O1H $r7 +- #define O1L $r8 +-#endif +- .text +- .align 2 +- .global __muldf3 +- .type __muldf3, @function +-__muldf3: +- push $lp +- pushm $r6, $r10 +- +- slli $r6, P1H, #1 +- srli $r6, $r6, #21 +- slli P3H, P1H, #11 +- srli $r10, P1L, #21 +- or P3H, P3H, $r10 +- slli P3L, P1L, #11 +- move O1L, #0x80000000 +- or P3H, P3H, O1L +- slli $r9, P2H, #1 +- srli $r9, $r9, #21 +- slli O1H, P2H, #11 +- srli $r10, P2L, #21 +- or O1H, O1H, $r10 +- or O1H, O1H, O1L +- xor P1H, P1H, P2H +- and P1H, P1H, O1L +- slli O1L, P2L, #11 +- +- addi $r10, $r6, #-1 +- slti $r15, $r10, #0x7fe +- beqzs8 .LFspecA +- +-.LFlab1: +- addi $r10, $r9, #-1 +- slti $r15, $r10, #0x7fe +- beqzs8 .LFspecB +- +-.LFlab2: +- addi $r10, $r9, #0xfffffc02 +- add $r6, $r6, $r10 +- +- move $r10, $r8 +-/* This is a 64-bit multiple. */ +-#ifndef __big_endian__ +-/* For little endian: ($r9, $r3) is (high, low). */ +-#ifndef __NDS32_ISA_V3M__ +- mulr64 $r8, $r5, $r8 +-#else +- pushm $r0, $r5 +- move $r0, $r5 +- movi $r1, #0 +- move $r2, $r8 +- movi $r3, #0 +- bal __muldi3 +- movd44 $r8, $r0 +- popm $r0, $r5 +-#endif +- move $r3, $r8 +-#else /* __big_endian__ */ +-/* For big endain: ($r9, $r2) is (high, low). */ +-#ifndef __NDS32_ISA_V3M__ +- mulr64 $r8, $r4, $r7 +-#else +- pushm $r0, $r5 +- move $r1, $r4 +- movi $r0, #0 +- move $r3, $r7 +- movi $r2, #0 +- bal __muldi3 +- movd44 $r8, $r0 +- popm $r0, $r5 +-#endif +- move $r2, $r9 +- move $r9, $r8 +-#endif /* __big_endian__ */ +- move $r8, $r10 +- +- move $r10, P1H +-/* This is a 64-bit multiple. */ +-#ifndef __big_endian__ +-/* For little endian: ($r0, $r2) is (high, low). */ +-#ifndef __NDS32_ISA_V3M__ +- mulr64 $r0, $r4, $r8 +-#else +- pushm $r2, $r5 +- move $r0, $r4 +- movi $r1, #0 +- move $r2, $r8 +- movi $r3, #0 +- bal __muldi3 +- popm $r2, $r5 +-#endif +- move $r2, $r0 +- move $r0, $r1 +-#else /* __big_endian__ */ +-/* For big endain: ($r1, $r3) is (high, low). */ +-#ifndef __NDS32_ISA_V3M__ +- mulr64 $r0, $r5, $r7 +-#else +- pushm $r2, $r5 +- move $r1, $r5 +- movi $r0, #0 +- move $r3, $r7 +- movi $r2, #0 +- bal __muldi3 +- popm $r2, $r5 +-#endif +- move $r3, $r1 +- move $r1, $r0 +-#endif /* __big_endian__ */ +- move P1H, $r10 +- +- #ADD(P2H, P1L) +- add P2H, P2H, P1L +- slt $r15, P2H, P1L +- +- #ADDC($r9, $0x0) +- add $r9, $r9, $r15 +- +- move $r10, P1H +-/* This is a 64-bit multiple. */ +-#ifndef __big_endian__ +-/* For little endian: ($r0, $r8) is (high, low). */ +-#ifndef __NDS32_ISA_V3M__ +- mulr64 $r0, $r5, $r7 +-#else +- pushm $r2, $r5 +- move $r0, $r5 +- movi $r1, #0 +- move $r2, $r7 +- movi $r3, #0 +- bal __muldi3 +- popm $r2, $r5 +-#endif +- move $r8, $r0 +- move $r0, $r1 +-#else /* __big_endian__ */ +-/* For big endian: ($r1, $r7) is (high, low). */ +-#ifndef __NDS32_ISA_V3M__ +- mulr64 $r0, $r4, $r8 +-#else +- pushm $r2, $r5 +- move $r1, $r4 +- movi $r0, #0 +- move $r3, $r8 +- movi $r2, #0 +- bal __muldi3 +- popm $r2, $r5 +-#endif +- move $r7, $r1 +- move $r1, $r0 +-#endif /* __big_endian__ */ +- move P1H, $r10 +- +- #ADD(P2L, O1H) +- add P2L, P2L, O1H +- slt $r15, P2L, O1H +- +- +- #ADDCC(P2H, P1L) +- beqzs8 .LL29 +- add P2H, P2H, P1L +- slt $r15, P2H, P1L +- beqzs8 .LL30 +- addi P2H, P2H, #0x1 +- j .LL31 +-.LL30: +- move $r15, #1 +- add P2H, P2H, $r15 +- slt $r15, P2H, $r15 +- j .LL31 +-.LL29: +- add P2H, P2H, P1L +- slt $r15, P2H, P1L +-.LL31: +- +- #ADDC($r9, $0x0) +- add $r9, $r9, $r15 +- +-/* This is a 64-bit multiple. */ +-#ifndef __big_endian__ +-/* For little endian: ($r8, $r0) is (high, low). */ +- move $r10, $r9 +-#ifndef __NDS32_ISA_V3M__ +- mulr64 $r8, $r4, $r7 +-#else +- pushm $r0, $r5 +- move $r0, $r4 +- movi $r1, #0 +- move $r2, $r7 +- movi $r3, #0 +- bal __muldi3 +- movd44 $r8, $r0 +- popm $r0, $r5 +-#endif +- move $r0, $r8 +- move $r8, $r9 +- move $r9, $r10 +-#else /* __big_endian__ */ +-/* For big endian: ($r7, $r1) is (high, low). */ +- move $r10, $r6 +-#ifndef __NDS32_ISA_V3M__ +- mulr64 $r6, $r5, $r8 +-#else +- pushm $r0, $r5 +- move $r1, $r5 +- movi $r0, #0 +- move $r3, $r8 +- movi $r2, #0 +- bal __muldi3 +- movd44 $r6, $r0 +- popm $r0, $r5 +-#endif +- move $r1, $r7 +- move $r7, $r6 +- move $r6, $r10 +-#endif /* __big_endian__ */ +- +- #ADD(P2L, O1H) +- add P2L, P2L, O1H +- slt $r15, P2L, O1H +- +- +- #ADDCC(P2H, $0x0) +- beqzs8 .LL34 +- add P2H, P2H, $r15 +- slt $r15, P2H, $r15 +-.LL34: +- +- #ADDC($r9, $0x0) +- add $r9, $r9, $r15 +- or $r10, P1L, P2L +- beqz $r10, .Li13 +- ori P2H, P2H, #1 +-.Li13: +- move P3H, $r9 +- move P3L, P2H +- sltsi $r15, P3H, #0 +- bnezs8 .Li14 +- +- move $r15, P3L +- add P3L, P3L, P3L +- slt $r15, P3L, $r15 +- add P3H, P3H, P3H +- add P3H, P3H, $r15 +- addi $r6, $r6, #-1 +-.Li14: +- addi $r10, $r6, #-1 +- slti $r15, $r10, #0x7fe +- beqzs8 .LFoveund +- +- #ADD(P3L, $0x400) +- move $r15, #0x400 +- add P3L, P3L, $r15 +- slt $r15, P3L, $r15 +- +- +- #ADDCC(P3H, $0x0) +- beqzs8 .LL37 +- add P3H, P3H, $r15 +- slt $r15, P3H, $r15 +-.LL37: +- +- #ADDC($r6, $0x0) +- add $r6, $r6, $r15 +- +-.LFlab8: +- srli $r10, P3L, #11 +- andi $r10, $r10, #1 +- sub P3L, P3L, $r10 +- srli P1L, P3L, #11 +- slli $r10, P3H, #21 +- or P1L, P1L, $r10 +- slli $r10, P3H, #1 +- srli $r10, $r10, #12 +- or P1H, P1H, $r10 +- slli $r10, $r6, #20 +- or P1H, P1H, $r10 +- +-.LFret: +-.LF999: +- popm $r6, $r10 +- pop $lp +- ret5 $lp +- +-.LFspecA: +- #ADD(P3L, P3L) +- move $r15, P3L +- add P3L, P3L, P3L +- slt $r15, P3L, $r15 +- +- #ADDC(P3H, P3H) +- add P3H, P3H, P3H +- add P3H, P3H, $r15 +- bnez $r6, .Li15 +- or $r10, P3H, P3L +- beqz $r10, .Li16 +- +- +- #NORMd($r4, P1L, P2H) +- bnez P3H, .LL38 +- bnez P3L, .LL39 +- move $r6, #0 +- j .LL40 +-.LL39: +- move P3H, P3L +- move P3L, #0 +- move P1L, #32 +- sub $r6, $r6, P1L +-.LL38: +-#ifndef __big_endian__ +-#ifdef __NDS32_PERF_EXT__ +- clz $r0, P3H +-#else +- pushm $r1, P3H +- move $r0, P3H +- bal __clzsi2 +- popm $r1, $r5 +-#endif +-#else /* __big_endian__ */ +-#ifdef __NDS32_PERF_EXT__ +- clz $r1, $r4 +-#else +- push $r0 +- pushm $r2, $r5 +- move $r0, $r4 +- bal __clzsi2 +- move $r1, $r0 +- popm $r2, $r5 +- pop $r0 +-#endif +-#endif /* __big_endian__ */ +- beqz P1L, .LL40 +- sub $r6, $r6, P1L +- subri P2H, P1L, #32 +- srl P2H, P3L, P2H +- sll P3L, P3L, P1L +- sll P3H, P3H, P1L +- or P3H, P3H, P2H +-.LL40: +- #NORMd End +- +- j .LFlab1 +-.Li16: +- subri $r15, $r9, #0x7ff +- beqzs8 .LFnan +- j .LFret +-.Li15: +- or $r10, P3H, P3L +- bnez $r10, .LFnan +- bnez $r9, .Li17 +- slli $r10, O1H, #1 +- or $r10, $r10, O1L +- beqz $r10, .LFnan +-.Li17: +- subri $r15, $r9, #0x7ff +- bnezs8 .LFinf +- +-.LFspecB: +- #ADD(O1L, O1L) +- move $r15, O1L +- add O1L, O1L, O1L +- slt $r15, O1L, $r15 +- +- #ADDC(O1H, O1H) +- add O1H, O1H, O1H +- add O1H, O1H, $r15 +- bnez $r9, .Li18 +- or $r10, O1H, O1L +- beqz $r10, .Li19 +- +- +- #NORMd($r7, P2L, P1L) +- bnez O1H, .LL41 +- bnez O1L, .LL42 +- move $r9, #0 +- j .LL43 +-.LL42: +- move O1H, O1L +- move O1L, #0 +- move P2L, #32 +- sub $r9, $r9, P2L +-.LL41: +-#ifndef __big_endian__ +-#ifdef __NDS32_PERF_EXT__ +- clz $r2, $r8 +-#else +- pushm $r0, $r1 +- pushm $r3, $r5 +- move $r0, $r8 +- bal __clzsi2 +- move $r2, $r0 +- popm $r3, $r5 +- popm $r0, $r1 +-#endif +-#else /* __big_endian__ */ +-#ifdef __NDS32_PERF_EXT__ +- clz $r3, $r7 +-#else +- pushm $r0, $r2 +- pushm $r4, $r5 +- move $r0, $r7 +- bal __clzsi2 +- move $r3, $r0 +- popm $r4, $r5 +- popm $r0, $r2 +-#endif +-#endif /* __big_endian__ */ +- beqz P2L, .LL43 +- sub $r9, $r9, P2L +- subri P1L, P2L, #32 +- srl P1L, O1L, P1L +- sll O1L, O1L, P2L +- sll O1H, O1H, P2L +- or O1H, O1H, P1L +-.LL43: +- #NORMd End +- +- j .LFlab2 +-.Li19: +- move P1L, #0 +- j .LFret +-.Li18: +- or $r10, O1H, O1L +- bnez $r10, .LFnan +- +-.LFinf: +- move $r10, #0x7ff00000 +- or P1H, P1H, $r10 +- move P1L, #0 +- j .LFret +- +-.LFnan: +- move P1H, #0xfff80000 +- move P1L, #0 +- j .LFret +- +-.LFoveund: +- bgtz $r6, .LFinf +- subri P1L, $r6, #1 +- move P2L, #0 +-.LL44: +- move $r10, #0x20 +- slt $r15, P1L, $r10 +- bnezs8 .LL45 +- or P2L, P2L, P3L +- move P3L, P3H +- move P3H, #0 +- addi P1L, P1L, #0xffffffe0 +- bnez P3L, .LL44 +-.LL45: +- beqz P1L, .LL46 +- move P2H, P3H +- move $r10, P3L +- srl P3L, P3L, P1L +- srl P3H, P3H, P1L +- subri P1L, P1L, #0x20 +- sll P2H, P2H, P1L +- or P3L, P3L, P2H +- sll $r10, $r10, P1L +- or P2L, P2L, $r10 +- beqz P2L, .LL46 +- ori P3L, P3L, #1 +-.LL46: +- #ADD(P3L, $0x400) +- move $r15, #0x400 +- add P3L, P3L, $r15 +- slt $r15, P3L, $r15 +- +- #ADDC(P3H, $0x0) +- add P3H, P3H, $r15 +- srli $r6, P3H, #31 +- j .LFlab8 +- .size __muldf3, .-__muldf3 +-#endif /* L_mul_df */ +- +- +- +-#ifdef L_div_sf +- +- .text +- .align 2 +- .global __divsf3 +- .type __divsf3, @function +-__divsf3: +- push $lp +- pushm $r6, $r10 +- +- move $r7, #0x80000000 +- srli $r4, $r0, #23 +- andi $r4, $r4, #0xff +- srli $r6, $r1, #23 +- andi $r6, $r6, #0xff +- slli $r3, $r0, #8 +- or $r3, $r3, $r7 +- slli $r5, $r1, #8 +- or $r5, $r5, $r7 +- xor $r10, $r0, $r1 +- and $r7, $r7, $r10 +- +- addi $r10, $r4, #-1 +- slti $r15, $r10, #0xfe +- beqzs8 .LGspecA +- +-.LGlab1: +- addi $r10, $r6, #-1 +- slti $r15, $r10, #0xfe +- beqzs8 .LGspecB +- +-.LGlab2: +- slt $r15, $r3, $r5 +- bnezs8 .Li27 +- srli $r3, $r3, #1 +- addi $r4, $r4, #1 +-.Li27: +- srli $r8, $r5, #14 +- divr $r0, $r2, $r3, $r8 +- andi $r9, $r5, #0x3fff +- mul $r1, $r9, $r0 +- slli $r2, $r2, #14 +- +- #SUB($r2, $r1) +- move $r15, $r2 +- sub $r2, $r2, $r1 +- slt $r15, $r15, $r2 +- beqzs8 .Li28 +- addi $r0, $r0, #-1 +- +- #ADD($r2, $r5) +- add $r2, $r2, $r5 +- slt $r15, $r2, $r5 +-.Li28: +- divr $r3, $r2, $r2, $r8 +- mul $r1, $r9, $r3 +- slli $r2, $r2, #14 +- +- #SUB($r2, $r1) +- move $r15, $r2 +- sub $r2, $r2, $r1 +- slt $r15, $r15, $r2 +- beqzs8 .Li29 +- addi $r3, $r3, #-1 +- +- #ADD($r2, $r5) +- add $r2, $r2, $r5 +- slt $r15, $r2, $r5 +-.Li29: +- slli $r10, $r0, #14 +- add $r3, $r3, $r10 +- slli $r3, $r3, #4 +- beqz $r2, .Li30 +- ori $r3, $r3, #1 +-.Li30: +- subri $r10, $r6, #0x7e +- add $r4, $r4, $r10 +- addi $r10, $r4, #-1 +- slti $r15, $r10, #0xfe +- beqzs8 .LGoveund +- +-.LGlab8: +- #ADD($r3, $0x80) +- move $r15, #0x80 +- add $r3, $r3, $r15 +- slt $r15, $r3, $r15 +- +- #ADDC($r4, $0x0) +- add $r4, $r4, $r15 +- srli $r10, $r3, #8 +- andi $r10, $r10, #1 +- sub $r3, $r3, $r10 +- slli $r3, $r3, #1 +- srli $r3, $r3, #9 +- slli $r10, $r4, #23 +- or $r3, $r3, $r10 +- or $r0, $r3, $r7 +- +-.LG999: +- popm $r6, $r10 +- pop $lp +- ret5 $lp +- +-.LGspecA: +- bnez $r4, .Li31 +- add $r3, $r3, $r3 +- beqz $r3, .Li31 +-#ifdef __NDS32_PERF_EXT__ +- clz $r8, $r3 +-#else +- pushm $r0, $r5 +- move $r0, $r3 +- bal __clzsi2 +- move $r8, $r0 +- popm $r0, $r5 +-#endif +- sub $r4, $r4, $r8 +- sll $r3, $r3, $r8 +- j .LGlab1 +-.Li31: +- bne $r6, $r4, .Li33 +- add $r10, $r5, $r5 +- beqz $r10, .LGnan +-.Li33: +- subri $r15, $r6, #0xff +- beqzs8 .LGspecB +- beqz $r4, .LGzer +- add $r10, $r3, $r3 +- bnez $r10, .LGnan +- j .LGinf +- +-.LGspecB: +- bnez $r6, .Li34 +- add $r5, $r5, $r5 +- beqz $r5, .LGinf +-#ifdef __NDS32_PERF_EXT__ +- clz $r8, $r5 +-#else +- pushm $r0, $r5 +- move $r0, $r5 +- bal __clzsi2 +- move $r8, $r0 +- popm $r0, $r5 +-#endif +- sub $r6, $r6, $r8 +- sll $r5, $r5, $r8 +- j .LGlab2 +-.Li34: +- add $r10, $r5, $r5 +- bnez $r10, .LGnan +- +-.LGzer: +- move $r0, $r7 +- j .LG999 +- +-.LGoveund: +- bgtz $r4, .LGinf +- subri $r8, $r4, #1 +- slti $r15, $r8, #0x20 +- beqzs8 .LGzer +- subri $r10, $r8, #0x20 +- sll $r4, $r3, $r10 +- srl $r3, $r3, $r8 +- beqz $r4, .Li37 +- ori $r3, $r3, #2 +-.Li37: +- move $r4, #0 +- addi $r10, $r3, #0x80 +- sltsi $r15, $r10, #0 +- beqzs8 .LGlab8 +- move $r4, #1 +- j .LGlab8 +- +-.LGinf: +- move $r10, #0x7f800000 +- or $r0, $r7, $r10 +- j .LG999 +- +-.LGnan: +- move $r0, #0xffc00000 +- j .LG999 +- .size __divsf3, .-__divsf3 +-#endif /* L_div_sf */ +- +- +- +-#ifdef L_div_df +- +-#ifndef __big_endian__ +- #define P1L $r0 +- #define P1H $r1 +- #define P2L $r2 +- #define P2H $r3 +- #define P3L $r4 +- #define P3H $r5 +- #define O1L $r7 +- #define O1H $r8 +-#else +- #define P1H $r0 +- #define P1L $r1 +- #define P2H $r2 +- #define P2L $r3 +- #define P3H $r4 +- #define P3L $r5 +- #define O1H $r7 +- #define O1L $r8 +-#endif +- .text +- .align 2 +- .global __divdf3 +- .type __divdf3, @function +-__divdf3: +- push $lp +- pushm $r6, $r10 +- +- slli $r6, P1H, #1 +- srli $r6, $r6, #21 +- slli P3H, P1H, #11 +- srli $r10, P1L, #21 +- or P3H, P3H, $r10 +- slli P3L, P1L, #11 +- move O1L, #0x80000000 +- or P3H, P3H, O1L +- slli $r9, P2H, #1 +- srli $r9, $r9, #21 +- slli O1H, P2H, #11 +- srli $r10, P2L, #21 +- or O1H, O1H, $r10 +- or O1H, O1H, O1L +- xor P1H, P1H, P2H +- and P1H, P1H, O1L +- slli O1L, P2L, #11 +- +- addi $r10, $r6, #-1 +- slti $r15, $r10, #0x7fe +- beqzs8 .LGspecA +- +-.LGlab1: +- addi $r10, $r9, #-1 +- slti $r15, $r10, #0x7fe +- beqzs8 .LGspecB +- +-.LGlab2: +- sub $r6, $r6, $r9 +- addi $r6, $r6, #0x3ff +- srli P3L, P3L, #1 +- slli $r10, P3H, #31 +- or P3L, P3L, $r10 +- srli P3H, P3H, #1 +- srli $r9, O1H, #16 +- divr P2H, P3H, P3H, $r9 +- move $r10, #0xffff +- and P2L, O1H, $r10 +- mul P1L, P2L, P2H +- slli P3H, P3H, #16 +- srli $r10, P3L, #16 +- or P3H, P3H, $r10 +- +- #SUB(P3H, P1L) +- move $r15, P3H +- sub P3H, P3H, P1L +- slt $r15, $r15, P3H +- beqzs8 .Li20 +- +-.Lb21: +- addi P2H, P2H, #-1 +- add P3H, P3H, O1H +- slt $r15, P3H, O1H +- beqzs8 .Lb21 +-.Li20: +- divr $r9, P3H, P3H, $r9 +- mul P1L, P2L, $r9 +- slli P3H, P3H, #16 +- move $r15, #0xffff +- and $r10, P3L, $r15 +- or P3H, P3H, $r10 +- +- #SUB(P3H, P1L) +- move $r15, P3H +- sub P3H, P3H, P1L +- slt $r15, $r15, P3H +- beqzs8 .Li22 +- +-.Lb23: +- addi $r9, $r9, #-1 +- add P3H, P3H, O1H +- slt $r15, P3H, O1H +- beqzs8 .Lb23 +-.Li22: +- slli P2H, P2H, #16 +- add P2H, P2H, $r9 +- +-/* This is a 64-bit multiple. */ +-#ifndef __big_endian__ +-/* For little endian: ($r0, $r9) is (high, low). */ +- move $r10, $r1 +-#ifndef __NDS32_ISA_V3M__ +- mulr64 $r0, $r3, $r7 +-#else +- pushm $r2, $r5 +- move $r0, $r3 +- movi $r1, #0 +- move $r2, $r7 +- movi $r3, #0 +- bal __muldi3 +- popm $r2, $r5 +-#endif +- move $r9, $r0 +- move $r0, $r1 +- move $r1, $r10 +-#else /* __big_endian__ */ +-/* For big endian: ($r1, $r9) is (high, low). */ +- move $r10, $r0 +-#ifndef __NDS32_ISA_V3M__ +- mulr64 $r0, $r2, $r8 +-#else +- pushm $r2, $r5 +- move $r1, $r2 +- movi $r0, #0 +- move $r3, $r8 +- movi $r2, #0 +- bal __muldi3 +- popm $r2, $r5 +-#endif +- move $r9, $r1 +- move $r1, $r0 +- move $r0, $r10 +-#endif /* __big_endian__ */ +- +- move P3L, #0 +- +- #SUB(P3L, $r9) +- move $r15, P3L +- sub P3L, P3L, $r9 +- slt $r15, $r15, P3L +- +- +- #SUBCC(P3H, P1L) +- beqzs8 .LL47 +- move $r15, P3H +- sub P3H, P3H, P1L +- slt $r15, $r15, P3H +- beqzs8 .LL48 +- subi333 P3H, P3H, #1 +- j .LL49 +-.LL48: +- move $r15, P3H +- subi333 P3H, P3H, #1 +- slt $r15, $r15, P3H +- j .LL49 +-.LL47: +- move $r15, P3H +- sub P3H, P3H, P1L +- slt $r15, $r15, P3H +-.LL49: +- +- beqzs8 .Li24 +- +-.LGlab3: +- addi P2H, P2H, #-1 +- +- #ADD(P3L, O1L) +- add P3L, P3L, O1L +- slt $r15, P3L, O1L +- +- +- #ADDCC(P3H, O1H) +- beqzs8 .LL50 +- add P3H, P3H, O1H +- slt $r15, P3H, O1H +- beqzs8 .LL51 +- addi P3H, P3H, #0x1 +- j .LL52 +-.LL51: +- move $r15, #1 +- add P3H, P3H, $r15 +- slt $r15, P3H, $r15 +- j .LL52 +-.LL50: +- add P3H, P3H, O1H +- slt $r15, P3H, O1H +-.LL52: +- +- beqzs8 .LGlab3 +-.Li24: +- bne P3H, O1H, .Li25 +- move P1L, O1L +- move P3H, P3L +- move $r9, #0 +- move P2L, $r9 +- j .Le25 +-.Li25: +- srli P2L, O1H, #16 +- divr $r9, P3H, P3H, P2L +- move $r10, #0xffff +- and $r10, O1H, $r10 +- mul P1L, $r10, $r9 +- slli P3H, P3H, #16 +- srli $r15, P3L, #16 +- or P3H, P3H, $r15 +- +- #SUB(P3H, P1L) +- move $r15, P3H +- sub P3H, P3H, P1L +- slt $r15, $r15, P3H +- beqzs8 .Li26 +- +-.Lb27: +- addi $r9, $r9, #-1 +- add P3H, P3H, O1H +- slt $r15, P3H, O1H +- beqzs8 .Lb27 +-.Li26: +- divr P2L, P3H, P3H, P2L +- mul P1L, $r10, P2L +- slli P3H, P3H, #16 +- move $r10, #0xffff +- and $r10, P3L, $r10 +- or P3H, P3H, $r10 +- +- #SUB(P3H, P1L) +- move $r15, P3H +- sub P3H, P3H, P1L +- slt $r15, $r15, P3H +- beqzs8 .Li28 +- +-.Lb29: +- addi P2L, P2L, #-1 +- add P3H, P3H, O1H +- slt $r15, P3H, O1H +- beqzs8 .Lb29 +-.Li28: +- slli $r9, $r9, #16 +- add $r9, $r9, P2L +- +-/* This is a 64-bit multiple. */ +-#ifndef __big_endian__ +-/* For little endian: ($r0, $r2) is (high, low). */ +- move $r10, $r1 +-#ifndef __NDS32_ISA_V3M__ +- mulr64 $r0, $r9, $r7 +-#else +- pushm $r2, $r5 +- move $r0, $r9 +- movi $r1, #0 +- move $r2, $r7 +- movi $r3, #0 +- bal __muldi3 +- popm $r2, $r5 +-#endif +- move $r2, $r0 +- move $r0, $r1 +- move $r1, $r10 +-#else /* __big_endian__ */ +-/* For big endian: ($r1, $r3) is (high, low). */ +- move $r10, $r0 +-#ifndef __NDS32_ISA_V3M__ +- mulr64 $r0, $r9, $r8 +-#else +- pushm $r2, $r5 +- move $r0, $r9 +- movi $r1, #0 +- move $r2, $r7 +- movi $r3, #0 +- bal __muldi3 +- popm $r2, $r5 +-#endif +- move $r3, $r1 +- move $r1, $r0 +- move $r0, $r10 +-#endif /* __big_endian__ */ +- +-.Le25: +- move P3L, #0 +- +- #SUB(P3L, P2L) +- move $r15, P3L +- sub P3L, P3L, P2L +- slt $r15, $r15, P3L +- +- +- #SUBCC(P3H, P1L) +- beqzs8 .LL53 +- move $r15, P3H +- sub P3H, P3H, P1L +- slt $r15, $r15, P3H +- beqzs8 .LL54 +- subi333 P3H, P3H, #1 +- j .LL55 +-.LL54: +- move $r15, P3H +- subi333 P3H, P3H, #1 +- slt $r15, $r15, P3H +- j .LL55 +-.LL53: +- move $r15, P3H +- sub P3H, P3H, P1L +- slt $r15, $r15, P3H +-.LL55: +- +- beqzs8 .Li30 +- +-.LGlab4: +- addi $r9, $r9, #-1 +- +- #ADD(P3L, O1L) +- add P3L, P3L, O1L +- slt $r15, P3L, O1L +- +- +- #ADDCC(P3H, O1H) +- beqzs8 .LL56 +- add P3H, P3H, O1H +- slt $r15, P3H, O1H +- beqzs8 .LL57 +- addi P3H, P3H, #0x1 +- j .LL58 +-.LL57: +- move $r15, #1 +- add P3H, P3H, $r15 +- slt $r15, P3H, $r15 +- j .LL58 +-.LL56: +- add P3H, P3H, O1H +- slt $r15, P3H, O1H +-.LL58: +- +- beqzs8 .LGlab4 +-.Li30: +- sltsi $r15, P2H, #0 +- bnezs8 .Li31 +- +- #ADD($r9, $r9) +- move $r15, $r9 +- add $r9, $r9, $r9 +- slt $r15, $r9, $r15 +- +- #ADDC(P2H, P2H) +- add P2H, P2H, P2H +- add P2H, P2H, $r15 +- addi $r6, $r6, #-1 +-.Li31: +- or $r10, P3H, P3L +- beqz $r10, .Li32 +- ori $r9, $r9, #1 +-.Li32: +- move P3H, P2H +- move P3L, $r9 +- addi $r10, $r6, #-1 +- slti $r15, $r10, #0x7fe +- beqzs8 .LGoveund +- +- #ADD(P3L, $0x400) +- move $r15, #0x400 +- add P3L, P3L, $r15 +- slt $r15, P3L, $r15 +- +- +- #ADDCC(P3H, $0x0) +- beqzs8 .LL61 +- add P3H, P3H, $r15 +- slt $r15, P3H, $r15 +-.LL61: +- +- #ADDC($r6, $0x0) +- add $r6, $r6, $r15 +- +-.LGlab8: +- srli $r10, P3L, #11 +- andi $r10, $r10, #1 +- sub P3L, P3L, $r10 +- srli P1L, P3L, #11 +- slli $r10, P3H, #21 +- or P1L, P1L, $r10 +- slli $r10, P3H, #1 +- srli $r10, $r10, #12 +- or P1H, P1H, $r10 +- slli $r10, $r6, #20 +- or P1H, P1H, $r10 +- +-.LGret: +-.LG999: +- popm $r6, $r10 +- pop $lp +- ret5 $lp +- +-.LGoveund: +- bgtz $r6, .LGinf +- subri P2H, $r6, #1 +- move P1L, #0 +-.LL62: +- move $r10, #0x20 +- slt $r15, P2H, $r10 +- bnezs8 .LL63 +- or P1L, P1L, P3L +- move P3L, P3H +- move P3H, #0 +- addi P2H, P2H, #0xffffffe0 +- bnez P3L, .LL62 +-.LL63: +- beqz P2H, .LL64 +- move P2L, P3H +- move $r10, P3L +- srl P3L, P3L, P2H +- srl P3H, P3H, P2H +- subri P2H, P2H, #0x20 +- sll P2L, P2L, P2H +- or P3L, P3L, P2L +- sll $r10, $r10, P2H +- or P1L, P1L, $r10 +- beqz P1L, .LL64 +- ori P3L, P3L, #1 +-.LL64: +- #ADD(P3L, $0x400) +- move $r15, #0x400 +- add P3L, P3L, $r15 +- slt $r15, P3L, $r15 +- +- #ADDC(P3H, $0x0) +- add P3H, P3H, $r15 +- srli $r6, P3H, #31 +- j .LGlab8 +- +-.LGspecA: +- #ADD(P3L, P3L) +- move $r15, P3L +- add P3L, P3L, P3L +- slt $r15, P3L, $r15 +- +- #ADDC(P3H, P3H) +- add P3H, P3H, P3H +- add P3H, P3H, $r15 +- bnez $r6, .Li33 +- or $r10, P3H, P3L +- beqz $r10, .Li33 +- +- +- #NORMd($r4, P2H, P2L) +- bnez P3H, .LL65 +- bnez P3L, .LL66 +- move $r6, #0 +- j .LL67 +-.LL66: +- move P3H, P3L +- move P3L, #0 +- move P2H, #32 +- sub $r6, $r6, P2H +-.LL65: +-#ifndef __big_endian__ +-#ifdef __NDS32_PERF_EXT__ +- clz $r3, $r5 +-#else +- pushm $r0, $r2 +- pushm $r4, $r5 +- move $r0, $r5 +- bal __clzsi2 +- move $r3, $r0 +- popm $r4, $r5 +- popm $r0, $r2 +-#endif +-#else /* __big_endian__ */ +-#ifdef __NDS32_PERF_EXT__ +- clz $r2, $r4 +-#else +- pushm $r0, $r1 +- pushm $r3, $r5 +- move $r0, $r4 +- bal __clzsi2 +- move $r2, $r0 +- popm $r3, $r5 +- popm $r0, $r1 +-#endif +-#endif /* __big_endian_ */ +- beqz P2H, .LL67 +- sub $r6, $r6, P2H +- subri P2L, P2H, #32 +- srl P2L, P3L, P2L +- sll P3L, P3L, P2H +- sll P3H, P3H, P2H +- or P3H, P3H, P2L +-.LL67: +- #NORMd End +- +- j .LGlab1 +-.Li33: +- bne $r6, $r9, .Li35 +- slli $r10, O1H, #1 +- or $r10, $r10, O1L +- beqz $r10, .LGnan +-.Li35: +- subri $r15, $r9, #0x7ff +- beqzs8 .LGspecB +- beqz $r6, .LGret +- or $r10, P3H, P3L +- bnez $r10, .LGnan +- +-.LGinf: +- move $r10, #0x7ff00000 +- or P1H, P1H, $r10 +- move P1L, #0 +- j .LGret +- +-.LGspecB: +- #ADD(O1L, O1L) +- move $r15, O1L +- add O1L, O1L, O1L +- slt $r15, O1L, $r15 +- +- #ADDC(O1H, O1H) +- add O1H, O1H, O1H +- add O1H, O1H, $r15 +- bnez $r9, .Li36 +- or $r10, O1H, O1L +- beqz $r10, .LGinf +- +- +- #NORMd($r7, P2H, P2L) +- bnez O1H, .LL68 +- bnez O1L, .LL69 +- move $r9, #0 +- j .LL70 +-.LL69: +- move O1H, O1L +- move O1L, #0 +- move P2H, #32 +- sub $r9, $r9, P2H +-.LL68: +-#ifndef __big_endian__ +-#ifdef __NDS32_PERF_EXT__ +- clz $r3, $r8 +-#else +- pushm $r0, $r2 +- pushm $r4, $r5 +- move $r0, $r8 +- bal __clzsi2 +- move $r3, $r0 +- popm $r4, $r5 +- popm $r0, $r2 +-#endif +-#else /* __big_endian__ */ +-#ifdef __NDS32_PERF_EXT__ +- clz $r2, $r7 +-#else +- pushm $r0, $r1 +- pushm $r3, $r5 +- move $r0, $r7 +- bal __clzsi2 +- move $r2, $r0 +- popm $r3, $r5 +- popm $r0, $r1 +-#endif +-#endif /* __big_endian__ */ +- beqz P2H, .LL70 +- sub $r9, $r9, P2H +- subri P2L, P2H, #32 +- srl P2L, O1L, P2L +- sll O1L, O1L, P2H +- sll O1H, O1H, P2H +- or O1H, O1H, P2L +-.LL70: +- #NORMd End +- +- j .LGlab2 +-.Li36: +- or $r10, O1H, O1L +- beqz $r10, .Li38 +- +-.LGnan: +- move P1H, #0xfff80000 +-.Li38: +- move P1L, #0 +- j .LGret +- .size __divdf3, .-__divdf3 +-#endif /* L_div_df */ +- +- +- +-#ifdef L_negate_sf +- +- .text +- .align 2 +- .global __negsf2 +- .type __negsf2, @function +-__negsf2: +- push $lp +- +- move $r1, #0x80000000 +- xor $r0, $r0, $r1 +- +-.LN999: +- pop $lp +- ret5 $lp +- .size __negsf2, .-__negsf2 +-#endif /* L_negate_sf */ +- +- +- +-#ifdef L_negate_df +- +-#ifndef __big_endian__ +- #define P1H $r1 +-#else +- #define P1H $r0 +-#endif +- .text +- .align 2 +- .global __negdf2 +- .type __negdf2, @function +-__negdf2: +- push $lp +- +- move $r2, #0x80000000 +- xor P1H, P1H, $r2 +- +-.LP999: +- pop $lp +- ret5 $lp +- .size __negdf2, .-__negdf2 +-#endif /* L_negate_df */ +- +- +- +-#ifdef L_sf_to_df +- +-#ifndef __big_endian__ +- #define O1L $r1 +- #define O1H $r2 +-#else +- #define O1H $r1 +- #define O1L $r2 +-#endif +- .text +- .align 2 +- .global __extendsfdf2 +- .type __extendsfdf2, @function +-__extendsfdf2: +- push $lp +- +- srli $r3, $r0, #23 +- andi $r3, $r3, #0xff +- move $r5, #0x80000000 +- and O1H, $r0, $r5 +- addi $r5, $r3, #-1 +- slti $r15, $r5, #0xfe +- beqzs8 .LJspec +- +-.LJlab1: +- addi $r3, $r3, #0x380 +- slli $r5, $r0, #9 +- srli $r5, $r5, #12 +- or O1H, O1H, $r5 +- slli O1L, $r0, #29 +- +-.LJret: +- slli $r5, $r3, #20 +- or O1H, O1H, $r5 +- move $r0, $r1 +- move $r1, $r2 +- +-.LJ999: +- pop $lp +- ret5 $lp +- +-.LJspec: +- move O1L, #0 +- add $r0, $r0, $r0 +- beqz $r0, .LJret +- bnez $r3, .Li42 +- +-.Lb43: +- addi $r3, $r3, #-1 +- add $r0, $r0, $r0 +- move $r5, #0x800000 +- slt $r15, $r0, $r5 +- bnezs8 .Lb43 +- j .LJlab1 +-.Li42: +- move $r3, #0x7ff +- move $r5, #0xff000000 +- slt $r15, $r5, $r0 +- beqzs8 .LJret +- move O1H, #0xfff80000 +- j .LJret +- .size __extendsfdf2, .-__extendsfdf2 +-#endif /* L_sf_to_df */ +- +- +- +-#ifdef L_df_to_sf +- +-#ifndef __big_endian__ +- #define P1L $r0 +- #define P1H $r1 +- #define P2L $r2 +- #define P2H $r3 +-#else +- #define P1H $r0 +- #define P1L $r1 +- #define P2H $r2 +- #define P2L $r3 +-#endif +- .text +- .align 2 +- .global __truncdfsf2 +- .type __truncdfsf2, @function +-__truncdfsf2: +- push $lp +- pushm $r6, $r8 +- +- slli P2H, P1H, #11 +- srli $r7, P1L, #21 +- or P2H, P2H, $r7 +- slli P2L, P1L, #11 +- move $r7, #0x80000000 +- or P2H, P2H, $r7 +- and $r5, P1H, $r7 +- slli $r4, P1H, #1 +- srli $r4, $r4, #21 +- addi $r4, $r4, #0xfffffc80 +- addi $r7, $r4, #-1 +- slti $r15, $r7, #0xfe +- beqzs8 .LKspec +- +-.LKlab1: +- beqz P2L, .Li45 +- ori P2H, P2H, #1 +-.Li45: +- #ADD(P2H, $0x80) +- move $r15, #0x80 +- add P2H, P2H, $r15 +- slt $r15, P2H, $r15 +- +- #ADDC($r4, $0x0) +- add $r4, $r4, $r15 +- srli $r7, P2H, #8 +- andi $r7, $r7, #1 +- sub P2H, P2H, $r7 +- slli P2H, P2H, #1 +- srli P2H, P2H, #9 +- slli $r7, $r4, #23 +- or P2H, P2H, $r7 +- or $r0, P2H, $r5 +- +-.LK999: +- popm $r6, $r8 +- pop $lp +- ret5 $lp +- +-.LKspec: +- subri $r15, $r4, #0x47f +- bnezs8 .Li46 +- slli $r7, P2H, #1 +- or $r7, $r7, P2L +- beqz $r7, .Li46 +- move $r0, #0xffc00000 +- j .LK999 +-.Li46: +- sltsi $r15, $r4, #0xff +- bnezs8 .Li48 +- move $r7, #0x7f800000 +- or $r0, $r5, $r7 +- j .LK999 +-.Li48: +- subri $r6, $r4, #1 +- move $r7, #0x20 +- slt $r15, $r6, $r7 +- bnezs8 .Li49 +- move $r0, $r5 +- j .LK999 +-.Li49: +- subri $r8, $r6, #0x20 +- sll $r7, P2H, $r8 +- or P2L, P2L, $r7 +- srl P2H, P2H, $r6 +- move $r4, #0 +- move $r7, #0x80000000 +- or P2H, P2H, $r7 +- j .LKlab1 +- .size __truncdfsf2, .-__truncdfsf2 +-#endif /* L_df_to_sf */ +- +- +- +-#ifdef L_df_to_si +- +-#ifndef __big_endian__ +- #define P1L $r0 +- #define P1H $r1 +-#else +- #define P1H $r0 +- #define P1L $r1 +-#endif +- .global __fixdfsi +- .type __fixdfsi, @function +-__fixdfsi: +- push $lp +- pushm $r6, $r6 +- +- slli $r3, P1H, #11 +- srli $r6, P1L, #21 +- or $r3, $r3, $r6 +- move $r6, #0x80000000 +- or $r3, $r3, $r6 +- slli $r6, P1H, #1 +- srli $r6, $r6, #21 +- subri $r2, $r6, #0x41e +- blez $r2, .LLnaninf +- move $r6, #0x20 +- slt $r15, $r2, $r6 +- bnezs8 .LL72 +- move $r3, #0 +-.LL72: +- srl $r3, $r3, $r2 +- sltsi $r15, P1H, #0 +- beqzs8 .Li50 +- subri $r3, $r3, #0 +-.Li50: +- move $r0, $r3 +- +-.LL999: +- popm $r6, $r6 +- pop $lp +- ret5 $lp +- +-.LLnaninf: +- beqz P1L, .Li51 +- ori P1H, P1H, #1 +-.Li51: +- move $r6, #0x7ff00000 +- slt $r15, $r6, P1H +- beqzs8 .Li52 +- move $r0, #0x80000000 +- j .LL999 +-.Li52: +- move $r0, #0x7fffffff +- j .LL999 +- .size __fixdfsi, .-__fixdfsi +-#endif /* L_df_to_si */ +- +- +- +-#ifdef L_fixsfdi +- +-#ifndef __big_endian__ +- #define O1L $r1 +- #define O1H $r2 +-#else +- #define O1H $r1 +- #define O1L $r2 +-#endif +- .text +- .align 2 +- .global __fixsfdi +- .type __fixsfdi, @function +-__fixsfdi: +- push $lp +- +- srli $r3, $r0, #23 +- andi $r3, $r3, #0xff +- slli O1H, $r0, #8 +- move $r5, #0x80000000 +- or O1H, O1H, $r5 +- move O1L, #0 +- sltsi $r15, $r3, #0xbe +- beqzs8 .LCinfnan +- subri $r3, $r3, #0xbe +-.LL8: +- move $r5, #0x20 +- slt $r15, $r3, $r5 +- bnezs8 .LL9 +- move O1L, O1H +- move O1H, #0 +- addi $r3, $r3, #0xffffffe0 +- bnez O1L, .LL8 +-.LL9: +- beqz $r3, .LL10 +- move $r4, O1H +- srl O1L, O1L, $r3 +- srl O1H, O1H, $r3 +- subri $r3, $r3, #0x20 +- sll $r4, $r4, $r3 +- or O1L, O1L, $r4 +-.LL10: +- sltsi $r15, $r0, #0 +- beqzs8 .LCret +- +- subri O1H, O1H, #0 +- beqz O1L, .LL11 +- subri O1L, O1L, #0 +- subi45 O1H, #1 +-.LL11: +- +-.LCret: +- move $r0, $r1 +- move $r1, $r2 +- +-.LC999: +- pop $lp +- ret5 $lp +- +-.LCinfnan: +- sltsi $r15, $r0, #0 +- bnezs8 .LCret3 +- subri $r15, $r3, #0xff +- bnezs8 .Li7 +- slli $r5, O1H, #1 +- beqz $r5, .Li7 +- +-.LCret3: +- move O1H, #0x80000000 +- j .LCret +-.Li7: +- move O1H, #0x7fffffff +- move O1L, #-1 +- j .LCret +- .size __fixsfdi, .-__fixsfdi +-#endif /* L_fixsfdi */ +- +- +- +-#ifdef L_fixdfdi +- +-#ifndef __big_endian__ +- #define P1L $r0 +- #define P1H $r1 +- #define O1L $r3 +- #define O1H $r4 +-#else +- #define P1H $r0 +- #define P1L $r1 +- #define O1H $r3 +- #define O1L $r4 +-#endif +- .text +- .align 2 +- .global __fixdfdi +- .type __fixdfdi, @function +-__fixdfdi: +- push $lp +- pushm $r6, $r6 +- +- slli $r5, P1H, #1 +- srli $r5, $r5, #21 +- slli O1H, P1H, #11 +- srli $r6, P1L, #21 +- or O1H, O1H, $r6 +- slli O1L, P1L, #11 +- move $r6, #0x80000000 +- or O1H, O1H, $r6 +- slti $r15, $r5, #0x43e +- beqzs8 .LCnaninf +- subri $r2, $r5, #0x43e +-.LL14: +- move $r6, #0x20 +- slt $r15, $r2, $r6 +- bnezs8 .LL15 +- move O1L, O1H +- move O1H, #0 +- addi $r2, $r2, #0xffffffe0 +- bnez O1L, .LL14 +-.LL15: +- beqz $r2, .LL16 +- move P1L, O1H +- srl O1L, O1L, $r2 +- srl O1H, O1H, $r2 +- subri $r2, $r2, #0x20 +- sll P1L, P1L, $r2 +- or O1L, O1L, P1L +-.LL16: +- sltsi $r15, P1H, #0 +- beqzs8 .LCret +- +- subri O1H, O1H, #0 +- beqz O1L, .LL17 +- subri O1L, O1L, #0 +- subi45 O1H, #1 +-.LL17: +- +-.LCret: +- move P1L, O1L +- move P1H, O1H +- +-.LC999: +- popm $r6, $r6 +- pop $lp +- ret5 $lp +- +-.LCnaninf: +- sltsi $r15, P1H, #0 +- bnezs8 .LCret3 +- subri $r15, $r5, #0x7ff +- bnezs8 .Li5 +- slli $r6, O1H, #1 +- or $r6, $r6, O1L +- beqz $r6, .Li5 +- +-.LCret3: +- move O1H, #0x80000000 +- move O1L, #0 +- j .LCret +-.Li5: +- move O1H, #0x7fffffff +- move O1L, #-1 +- j .LCret +- .size __fixdfdi, .-__fixdfdi +-#endif /* L_fixdfdi */ +- +- +- +-#ifdef L_fixunssfsi +- +- .global __fixunssfsi +- .type __fixunssfsi, @function +-__fixunssfsi: +- push $lp +- +- slli $r1, $r0, #8 +- move $r3, #0x80000000 +- or $r1, $r1, $r3 +- srli $r3, $r0, #23 +- andi $r3, $r3, #0xff +- subri $r2, $r3, #0x9e +- sltsi $r15, $r2, #0 +- bnezs8 .LLspec +- sltsi $r15, $r2, #0x20 +- bnezs8 .Li45 +- move $r0, #0 +- j .LL999 +-.Li45: +- srl $r1, $r1, $r2 +- sltsi $r15, $r0, #0 +- beqzs8 .Li46 +- subri $r1, $r1, #0 +-.Li46: +- move $r0, $r1 +- +-.LL999: +- pop $lp +- ret5 $lp +- +-.LLspec: +- move $r3, #0x7f800000 +- slt $r15, $r3, $r0 +- beqzs8 .Li47 +- move $r0, #0x80000000 +- j .LL999 +-.Li47: +- move $r0, #-1 +- j .LL999 +- .size __fixunssfsi, .-__fixunssfsi +-#endif /* L_fixunssfsi */ +- +- +- +-#ifdef L_fixunsdfsi +- +-#ifndef __big_endian__ +- #define P1L $r0 +- #define P1H $r1 +-#else +- #define P1H $r0 +- #define P1L $r1 +-#endif +- .text +- .align 2 +- .global __fixunsdfsi +- .type __fixunsdfsi, @function +-__fixunsdfsi: +- push $lp +- pushm $r6, $r6 +- +- slli $r3, P1H, #11 +- srli $r6, P1L, #21 +- or $r3, $r3, $r6 +- move $r6, #0x80000000 +- or $r3, $r3, $r6 +- slli $r6, P1H, #1 +- srli $r6, $r6, #21 +- subri $r2, $r6, #0x41e +- sltsi $r15, $r2, #0 +- bnezs8 .LNnaninf +- move $r6, #0x20 +- slt $r15, $r2, $r6 +- bnezs8 .LL73 +- move $r3, #0 +-.LL73: +- srl $r3, $r3, $r2 +- sltsi $r15, P1H, #0 +- beqzs8 .Li53 +- subri $r3, $r3, #0 +-.Li53: +- move $r0, $r3 +- +-.LN999: +- popm $r6, $r6 +- pop $lp +- ret5 $lp +- +-.LNnaninf: +- beqz P1L, .Li54 +- ori P1H, P1H, #1 +-.Li54: +- move $r6, #0x7ff00000 +- slt $r15, $r6, P1H +- beqzs8 .Li55 +- move $r0, #0x80000000 +- j .LN999 +-.Li55: +- move $r0, #-1 +- j .LN999 +- .size __fixunsdfsi, .-__fixunsdfsi +-#endif /* L_fixunsdfsi */ +- +- +- +-#ifdef L_fixunssfdi +- +-#ifndef __big_endian__ +- #define O1L $r1 +- #define O1H $r2 +-#else +- #define O1H $r1 +- #define O1L $r2 +-#endif +- .text +- .align 2 +- .global __fixunssfdi +- .type __fixunssfdi, @function +-__fixunssfdi: +- push $lp +- +- srli $r3, $r0, #23 +- andi $r3, $r3, #0xff +- slli O1H, $r0, #8 +- move $r5, #0x80000000 +- or O1H, O1H, $r5 +- move O1L, #0 +- sltsi $r15, $r3, #0xbe +- beqzs8 .LDinfnan +- subri $r3, $r3, #0xbe +-.LL12: +- move $r5, #0x20 +- slt $r15, $r3, $r5 +- bnezs8 .LL13 +- move O1L, O1H +- move O1H, #0 +- addi $r3, $r3, #0xffffffe0 +- bnez O1L, .LL12 +-.LL13: +- beqz $r3, .LL14 +- move $r4, O1H +- srl O1L, O1L, $r3 +- srl O1H, O1H, $r3 +- subri $r3, $r3, #0x20 +- sll $r4, $r4, $r3 +- or O1L, O1L, $r4 +-.LL14: +- sltsi $r15, $r0, #0 +- beqzs8 .LDret +- +- subri O1H, O1H, #0 +- beqz O1L, .LL15 +- subri O1L, O1L, #0 +- subi45 O1H, #1 +-.LL15: +- +-.LDret: +- move $r0, $r1 +- move $r1, $r2 +- +-.LD999: +- pop $lp +- ret5 $lp +- +-.LDinfnan: +- move O1H, #0x80000000 +- move O1L, #0 +- j .LDret +- .size __fixunssfdi, .-__fixunssfdi +-#endif /* L_fixunssfdi */ +- +- +- +-#ifdef L_fixunsdfdi +- +-#ifndef __big_endian__ +- #define P1L $r0 +- #define P1H $r1 +- #define O1L $r3 +- #define O1H $r4 +-#else +- #define P1H $r0 +- #define P1L $r1 +- #define O1H $r3 +- #define O1L $r4 +-#endif +- .text +- .align 2 +- .global __fixunsdfdi +- .type __fixunsdfdi, @function +-__fixunsdfdi: +- push $lp +- pushm $r6, $r6 +- +- slli $r5, P1H, #1 +- srli $r5, $r5, #21 +- slli O1H, P1H, #11 +- srli $r6, P1L, #21 +- or O1H, O1H, $r6 +- slli O1L, P1L, #11 +- move $r6, #0x80000000 +- or O1H, O1H, $r6 +- slti $r15, $r5, #0x43e +- beqzs8 .LDnaninf +- subri $r2, $r5, #0x43e +-.LL18: +- move $r6, #0x20 +- slt $r15, $r2, $r6 +- bnezs8 .LL19 +- move O1L, O1H +- move O1H, #0 +- addi $r2, $r2, #0xffffffe0 +- bnez O1L, .LL18 +-.LL19: +- beqz $r2, .LL20 +- move P1L, O1H +- srl O1L, O1L, $r2 +- srl O1H, O1H, $r2 +- subri $r2, $r2, #0x20 +- sll P1L, P1L, $r2 +- or O1L, O1L, P1L +-.LL20: +- sltsi $r15, P1H, #0 +- beqzs8 .LDret +- +- subri O1H, O1H, #0 +- beqz O1L, .LL21 +- subri O1L, O1L, #0 +- subi45 O1H, #1 +-.LL21: +- +-.LDret: +- move P1L, O1L +- move P1H, O1H +- +-.LD999: +- popm $r6, $r6 +- pop $lp +- ret5 $lp +- +-.LDnaninf: +- move O1H, #0x80000000 +- move O1L, #0 +- j .LDret +- .size __fixunsdfdi, .-__fixunsdfdi +-#endif /* L_fixunsdfdi */ +- +- +- +-#ifdef L_si_to_sf +- +- .text +- .align 2 +- .global __floatsisf +- .type __floatsisf, @function +-__floatsisf: +- push $lp +- +- move $r4, #0x80000000 +- and $r2, $r0, $r4 +- beqz $r0, .Li39 +- sltsi $r15, $r0, #0 +- beqzs8 .Li40 +- subri $r0, $r0, #0 +-.Li40: +- move $r1, #0x9e +-#ifdef __NDS32_PERF_EXT__ +- clz $r3, $r0 +-#else +- pushm $r0, $r2 +- pushm $r4, $r5 +- bal __clzsi2 +- move $r3, $r0 +- popm $r4, $r5 +- popm $r0, $r2 +-#endif +- sub $r1, $r1, $r3 +- sll $r0, $r0, $r3 +- +- #ADD($r0, $0x80) +- move $r15, #0x80 +- add $r0, $r0, $r15 +- slt $r15, $r0, $r15 +- +- #ADDC($r1, $0x0) +- add $r1, $r1, $r15 +- srai $r4, $r0, #8 +- andi $r4, $r4, #1 +- sub $r0, $r0, $r4 +- slli $r0, $r0, #1 +- srli $r0, $r0, #9 +- slli $r4, $r1, #23 +- or $r0, $r0, $r4 +-.Li39: +- or $r0, $r0, $r2 +- +-.LH999: +- pop $lp +- ret5 $lp +- .size __floatsisf, .-__floatsisf +-#endif /* L_si_to_sf */ +- +- +- +-#ifdef L_si_to_df +- +-#ifndef __big_endian__ +- #define O1L $r1 +- #define O1H $r2 +- #define O2L $r4 +- #define O2H $r5 +-#else +- #define O1H $r1 +- #define O1L $r2 +- #define O2H $r4 +- #define O2L $r5 +-#endif +- .text +- .align 2 +- .global __floatsidf +- .type __floatsidf, @function +-__floatsidf: +- push $lp +- pushm $r6, $r6 +- +- move O1L, #0 +- move O2H, O1L +- move $r3, O1L +- move O1H, $r0 +- beqz O1H, .Li39 +- sltsi $r15, O1H, #0 +- beqzs8 .Li40 +- move O2H, #0x80000000 +- +- subri O1H, O1H, #0 +- beqz O1L, .LL71 +- subri O1L, O1L, #0 +- subi45 O1H, #1 +-.LL71: +-.Li40: +- move $r3, #0x41e +-#ifndef __big_endian__ +-#ifdef __NDS32_PERF_EXT__ +- clz $r4, $r2 +-#else +- pushm $r0, $r3 +- push $r5 +- move $r0, $r2 +- bal __clzsi2 +- move $r4, $r0 +- pop $r5 +- popm $r0, $r3 +-#endif +-#else /* __big_endian__ */ +-#ifdef __NDS32_PERF_EXT__ +- clz $r5, $r1 +-#else +- pushm $r0, $r4 +- move $r0, $r1 +- bal __clzsi2 +- move $r5, $r0 +- popm $r0, $r4 +-#endif +-#endif /* __big_endian__ */ +- sub $r3, $r3, O2L +- sll O1H, O1H, O2L +-.Li39: +- srli O2L, O1L, #11 +- slli $r6, O1H, #21 +- or O2L, O2L, $r6 +- slli $r6, O1H, #1 +- srli $r6, $r6, #12 +- or O2H, O2H, $r6 +- slli $r6, $r3, #20 +- or O2H, O2H, $r6 +- move $r0, $r4 +- move $r1, $r5 +- +-.LH999: +- popm $r6, $r6 +- pop $lp +- ret5 $lp +- .size __floatsidf, .-__floatsidf +-#endif /* L_si_to_df */ +- +- +- +-#ifdef L_floatdisf +- +-#ifndef __big_endian__ +- #define P1L $r0 +- #define P1H $r1 +- #define P2L $r2 +- #define P2H $r3 +-#else +- #define P1H $r0 +- #define P1L $r1 +- #define P2H $r2 +- #define P2L $r3 +-#endif +- .text +- .align 2 +- .global __floatdisf +- .type __floatdisf, @function +-__floatdisf: +- push $lp +- pushm $r6, $r7 +- +- move $r7, #0x80000000 +- and $r5, P1H, $r7 +- move P2H, P1H +- move P2L, P1L +- or $r7, P1H, P1L +- beqz $r7, .Li1 +- sltsi $r15, P1H, #0 +- beqzs8 .Li2 +- +- subri P2H, P2H, #0 +- beqz P2L, .LL1 +- subri P2L, P2L, #0 +- subi45 P2H, #1 +-.LL1: +-.Li2: +- move $r4, #0xbe +- +- +- #NORMd($r2, $r6, P1L) +- bnez P2H, .LL2 +- bnez P2L, .LL3 +- move $r4, #0 +- j .LL4 +-.LL3: +- move P2H, P2L +- move P2L, #0 +- move $r6, #32 +- sub $r4, $r4, $r6 +-.LL2: +-#ifdef __NDS32_PERF_EXT__ +- clz $r6, P2H +-#else +- pushm $r0, $r5 +- move $r0, P2H +- bal __clzsi2 +- move $r6, $r0 +- popm $r0, $r5 +-#endif +- beqz $r6, .LL4 +- sub $r4, $r4, $r6 +- subri P1L, $r6, #32 +- srl P1L, P2L, P1L +- sll P2L, P2L, $r6 +- sll P2H, P2H, $r6 +- or P2H, P2H, P1L +-.LL4: +- #NORMd End +- +- beqz P2L, .Li3 +- ori P2H, P2H, #1 +-.Li3: +- #ADD(P2H, $0x80) +- move $r15, #0x80 +- add P2H, P2H, $r15 +- slt $r15, P2H, $r15 +- +- #ADDC($r4, $0x0) +- add $r4, $r4, $r15 +- srli $r7, P2H, #8 +- andi $r7, $r7, #1 +- sub P2H, P2H, $r7 +- slli P2H, P2H, #1 +- srli P2H, P2H, #9 +- slli $r7, $r4, #23 +- or P2H, P2H, $r7 +-.Li1: +- or $r0, P2H, $r5 +- +-.LA999: +- popm $r6, $r7 +- pop $lp +- ret5 $lp +- .size __floatdisf, .-__floatdisf +-#endif /* L_floatdisf */ +- +- +- +-#ifdef L_floatdidf +- +-#ifndef __big_endian__ +- #define P1L $r0 +- #define P1H $r1 +- #define P2L $r2 +- #define P2H $r3 +- #define O1L $r5 +- #define O1H $r6 +-#else +- #define P1H $r0 +- #define P1L $r1 +- #define P2H $r2 +- #define P2L $r3 +- #define O1H $r5 +- #define O1L $r6 +-#endif +- .text +- .align 2 +- .global __floatdidf +- .type __floatdidf, @function +-__floatdidf: +- push $lp +- pushm $r6, $r8 +- +- move $r4, #0 +- move $r7, $r4 +- move P2H, P1H +- move P2L, P1L +- or $r8, P1H, P1L +- beqz $r8, .Li1 +- move $r4, #0x43e +- sltsi $r15, P1H, #0 +- beqzs8 .Li2 +- move $r7, #0x80000000 +- +- subri P2H, P2H, #0 +- beqz P2L, .LL1 +- subri P2L, P2L, #0 +- subi45 P2H, #1 +-.LL1: +- +-.Li2: +- #NORMd($r2, O1H, O1L) +- bnez P2H, .LL2 +- bnez P2L, .LL3 +- move $r4, #0 +- j .LL4 +-.LL3: +- move P2H, P2L +- move P2L, #0 +- move O1H, #32 +- sub $r4, $r4, O1H +-.LL2: +-#ifdef __NDS32_PERF_EXT__ +- clz O1H, P2H +-#else /* not __NDS32_PERF_EXT__ */ +-/* +- Replace clz with function call. +- clz O1H, P2H +- EL: clz $r6, $r3 +- EB: clz $r5, $r2 +-*/ +-#ifndef __big_endian__ +- pushm $r0, $r5 +- move $r0, $r3 +- bal __clzsi2 +- move $r6, $r0 +- popm $r0, $r5 +-#else +- pushm $r0, $r4 +- move $r0, $r2 +- bal __clzsi2 +- move $r5, $r0 +- popm $r0, $r4 +-#endif +-#endif /* not __NDS32_PERF_EXT__ */ +- beqz O1H, .LL4 +- sub $r4, $r4, O1H +- subri O1L, O1H, #32 +- srl O1L, P2L, O1L +- sll P2L, P2L, O1H +- sll P2H, P2H, O1H +- or P2H, P2H, O1L +-.LL4: +- #NORMd End +- +- #ADD(P2L, $0x400) +- move $r15, #0x400 +- add P2L, P2L, $r15 +- slt $r15, P2L, $r15 +- +- +- #ADDCC(P2H, $0x0) +- beqzs8 .LL7 +- add P2H, P2H, $r15 +- slt $r15, P2H, $r15 +-.LL7: +- +- #ADDC($r4, $0x0) +- add $r4, $r4, $r15 +- srli $r8, P2L, #11 +- andi $r8, $r8, #1 +- sub P2L, P2L, $r8 +-.Li1: +- srli O1L, P2L, #11 +- slli $r8, P2H, #21 +- or O1L, O1L, $r8 +- slli O1H, P2H, #1 +- srli O1H, O1H, #12 +- slli $r8, $r4, #20 +- or O1H, O1H, $r8 +- or O1H, O1H, $r7 +- move P1L, O1L +- move P1H, O1H +- +-.LA999: +- popm $r6, $r8 +- pop $lp +- ret5 $lp +- .size __floatdidf, .-__floatdidf +-#endif /* L_floatdidf */ +- +- +- +-#ifdef L_floatunsisf +- +- .text +- .align 2 +- .global __floatunsisf +- .type __floatunsisf, @function +-__floatunsisf: +- push $lp +- +- beqz $r0, .Li41 +- move $r2, #0x9e +-#ifdef __NDS32_PERF_EXT__ +- clz $r1, $r0 +-#else +- push $r0 +- pushm $r2, $r5 +- bal __clzsi2 +- move $r1, $r0 +- popm $r2, $r5 +- pop $r0 +-#endif +- +- sub $r2, $r2, $r1 +- sll $r0, $r0, $r1 +- +- #ADD($r0, $0x80) +- move $r15, #0x80 +- add $r0, $r0, $r15 +- slt $r15, $r0, $r15 +- +- #ADDC($r2, $0x0) +- add $r2, $r2, $r15 +- srli $r3, $r0, #8 +- andi $r3, $r3, #1 +- sub $r0, $r0, $r3 +- slli $r0, $r0, #1 +- srli $r0, $r0, #9 +- slli $r3, $r2, #23 +- or $r0, $r0, $r3 +- +-.Li41: +-.LI999: +- pop $lp +- ret5 $lp +- .size __floatunsisf, .-__floatunsisf +-#endif /* L_floatunsisf */ +- +- +- +-#ifdef L_floatunsidf +- +-#ifndef __big_endian__ +- #define O1L $r1 +- #define O1H $r2 +- #define O2L $r4 +- #define O2H $r5 +-#else +- #define O1H $r1 +- #define O1L $r2 +- #define O2H $r4 +- #define O2L $r5 +-#endif +- .text +- .align 2 +- .global __floatunsidf +- .type __floatunsidf, @function +-__floatunsidf: +- push $lp +- pushm $r6, $r6 +- +- move O1L, #0 +- move $r3, O1L +- move O1H, $r0 +- beqz O1H, .Li41 +- move $r3, #0x41e +-#ifndef __big_endian__ +-#ifdef __NDS32_PERF_EXT__ +- clz $r5, $r2 +-#else +- pushm $r0, $r4 +- move $r0, $r2 +- bal __clzsi2 +- move $r5, $r0 +- popm $r0, $r4 +-#endif +-#else /* __big_endian__ */ +-#ifdef __NDS32_PERF_EXT__ +- clz $r4, $r1 +-#else +- pushm $r0, $r3 +- push $r5 +- move $r0, $r1 +- bal __clzsi2 +- move $r4, $r0 +- pop $r5 +- popm $r0, $r3 +-#endif +-#endif /* __big_endian__ */ +- sub $r3, $r3, O2H +- sll O1H, O1H, O2H +-.Li41: +- srli O2L, O1L, #11 +- slli $r6, O1H, #21 +- or O2L, O2L, $r6 +- slli O2H, O1H, #1 +- srli O2H, O2H, #12 +- slli $r6, $r3, #20 +- or O2H, O2H, $r6 +- move $r0, $r4 +- move $r1, $r5 +- +-.LI999: +- popm $r6, $r6 +- pop $lp +- ret5 $lp +- .size __floatunsidf, .-__floatunsidf +-#endif /* L_floatunsidf */ +- +- +- +-#ifdef L_floatundisf +- +-#ifndef __big_endian__ +- #define P1L $r0 +- #define P1H $r1 +- #define P2L $r2 +- #define P2H $r3 +-#else +- #define P1H $r0 +- #define P1L $r1 +- #define P2H $r2 +- #define P2L $r3 +-#endif +- .text +- .align 2 +- .global __floatundisf +- .type __floatundisf, @function +-__floatundisf: +- push $lp +- pushm $r6, $r6 +- +- move P2H, P1H +- move P2L, P1L +- or $r6, P1H, P1L +- beqz $r6, .Li4 +- move $r4, #0xbe +- +- +- #NORMd($r2, $r5, P1L) +- bnez P2H, .LL5 +- bnez P2L, .LL6 +- move $r4, #0 +- j .LL7 +-.LL6: +- move P2H, P2L +- move P2L, #0 +- move $r5, #32 +- sub $r4, $r4, $r5 +-.LL5: +-#ifdef __NDS32_PERF_EXT__ +- clz $r5, P2H +-#else +- pushm $r0, $r4 +- move $r0, P2H +- bal __clzsi2 +- move $r5, $r0 +- popm $r0, $r4 +-#endif +- beqz $r5, .LL7 +- sub $r4, $r4, $r5 +- subri P1L, $r5, #32 +- srl P1L, P2L, P1L +- sll P2L, P2L, $r5 +- sll P2H, P2H, $r5 +- or P2H, P2H, P1L +-.LL7: +- #NORMd End +- +- beqz P2L, .Li5 +- ori P2H, P2H, #1 +-.Li5: +- #ADD(P2H, $0x80) +- move $r15, #0x80 +- add P2H, P2H, $r15 +- slt $r15, P2H, $r15 +- +- #ADDC($r4, $0x0) +- add $r4, $r4, $r15 +- srli $r6, P2H, #8 +- andi $r6, $r6, #1 +- sub P2H, P2H, $r6 +- slli P2H, P2H, #1 +- srli P2H, P2H, #9 +- slli $r6, $r4, #23 +- or P2H, P2H, $r6 +-.Li4: +- move $r0, P2H +- +-.LB999: +- popm $r6, $r6 +- pop $lp +- ret5 $lp +- .size __floatundisf, .-__floatundisf +-#endif /* L_floatundisf */ +- +- +- +-#ifdef L_floatundidf +- +-#ifndef __big_endian__ +- #define P1L $r0 +- #define P1H $r1 +- #define P2L $r2 +- #define P2H $r3 +- #define O1L $r5 +- #define O1H $r6 +-#else +- #define P1H $r0 +- #define P1L $r1 +- #define P2H $r2 +- #define P2L $r3 +- #define O1H $r5 +- #define O1L $r6 +-#endif +- .text +- .align 2 +- .global __floatundidf +- .type __floatundidf, @function +-__floatundidf: +- push $lp +- pushm $r6, $r7 +- +- move $r4, #0 +- move P2H, P1H +- move P2L, P1L +- or $r7, P1H, P1L +- beqz $r7, .Li3 +- move $r4, #0x43e +- +- +- #NORMd($r2, O1H, O1L) +- bnez P2H, .LL8 +- bnez P2L, .LL9 +- move $r4, #0 +- j .LL10 +-.LL9: +- move P2H, P2L +- move P2L, #0 +- move O1H, #32 +- sub $r4, $r4, O1H +-.LL8: +-#ifdef __NDS32_PERF_EXT__ +- clz O1H, P2H +-#else /* not __NDS32_PERF_EXT__ */ +-/* +- Replace clz with function call. +- clz O1H, P2H +- EL: clz $r6, $r3 +- EB: clz $r5, $r2 +-*/ +-#ifndef __big_endian__ +- pushm $r0, $r5 +- move $r0, $r3 +- bal __clzsi2 +- move $r6, $r0 +- popm $r0, $r5 +-#else +- pushm $r0, $r4 +- move $r0, $r2 +- bal __clzsi2 +- move $r5, $r0 +- popm $r0, $r4 +-#endif +-#endif /* not __NDS32_PERF_EXT__ */ +- beqz O1H, .LL10 +- sub $r4, $r4, O1H +- subri O1L, O1H, #32 +- srl O1L, P2L, O1L +- sll P2L, P2L, O1H +- sll P2H, P2H, O1H +- or P2H, P2H, O1L +-.LL10: +- #NORMd End +- +- #ADD(P2L, $0x400) +- move $r15, #0x400 +- add P2L, P2L, $r15 +- slt $r15, P2L, $r15 +- +- +- #ADDCC(P2H, $0x0) +- beqzs8 .LL13 +- add P2H, P2H, $r15 +- slt $r15, P2H, $r15 +-.LL13: +- +- #ADDC($r4, $0x0) +- add $r4, $r4, $r15 +- srli $r7, P2L, #11 +- andi $r7, $r7, #1 +- sub P2L, P2L, $r7 +-.Li3: +- srli O1L, P2L, #11 +- slli $r7, P2H, #21 +- or O1L, O1L, $r7 +- slli O1H, P2H, #1 +- srli O1H, O1H, #12 +- slli $r7, $r4, #20 +- or O1H, O1H, $r7 +- move P1L, O1L +- move P1H, O1H +- +-.LB999: +- popm $r6, $r7 +- pop $lp +- ret5 $lp +- .size __floatundidf, .-__floatundidf +-#endif /* L_floatundidf */ +- +- +- +-#ifdef L_compare_sf +- +- .text +- .align 2 +- .global __cmpsf2 +- .type __cmpsf2, @function +-__cmpsf2: +- .global __eqsf2 +- .type __eqsf2, @function +-__eqsf2: +- .global __ltsf2 +- .type __ltsf2, @function +-__ltsf2: +- .global __lesf2 +- .type __lesf2, @function +-__lesf2: +- .global __nesf2 +- .type __nesf2, @function +-__nesf2: +- move $r4, #1 +- j .LA +- +- .global __gesf2 +- .type __gesf2, @function +-__gesf2: +- .global __gtsf2 +- .type __gtsf2, @function +-__gtsf2: +- move $r4, #-1 +-.LA: +- push $lp +- +- slli $r2, $r0, #1 +- slli $r3, $r1, #1 +- or $r5, $r2, $r3 +- beqz $r5, .LMequ +- move $r5, #0xff000000 +- slt $r15, $r5, $r2 +- bnezs8 .LMnan +- slt $r15, $r5, $r3 +- bnezs8 .LMnan +- srli $r2, $r2, #1 +- sltsi $r15, $r0, #0 +- beqzs8 .Li48 +- subri $r2, $r2, #0 +-.Li48: +- srli $r3, $r3, #1 +- sltsi $r15, $r1, #0 +- beqzs8 .Li49 +- subri $r3, $r3, #0 +-.Li49: +- slts $r15, $r2, $r3 +- beqzs8 .Li50 +- move $r0, #-1 +- j .LM999 +-.Li50: +- slts $r15, $r3, $r2 +- beqzs8 .LMequ +- move $r0, #1 +- j .LM999 +- +-.LMequ: +- move $r0, #0 +- +-.LM999: +- pop $lp +- ret5 $lp +- +-.LMnan: +- move $r0, $r4 +- j .LM999 +- .size __cmpsf2, .-__cmpsf2 +- .size __eqsf2, .-__eqsf2 +- .size __ltsf2, .-__ltsf2 +- .size __lesf2, .-__lesf2 +- .size __nesf2, .-__nesf2 +- .size __gesf2, .-__gesf2 +- .size __gtsf2, .-__gtsf2 +-#endif /* L_compare_sf */ +- +- +- +-#ifdef L_compare_df +- +-#ifdef __big_endian__ +- #define P1H $r0 +- #define P1L $r1 +- #define P2H $r2 +- #define P2L $r3 +-#else +- #define P1H $r1 +- #define P1L $r0 +- #define P2H $r3 +- #define P2L $r2 +-#endif +- .align 2 +- .globl __gtdf2 +- .globl __gedf2 +- .globl __ltdf2 +- .globl __ledf2 +- .globl __eqdf2 +- .globl __nedf2 +- .globl __cmpdf2 +- .type __gtdf2, @function +- .type __gedf2, @function +- .type __ltdf2, @function +- .type __ledf2, @function +- .type __eqdf2, @function +- .type __nedf2, @function +- .type __cmpdf2, @function +-__gtdf2: +-__gedf2: +- movi $r4, -1 +- b .L1 +- +-__ltdf2: +-__ledf2: +-__cmpdf2: +-__nedf2: +-__eqdf2: +- movi $r4, 1 +-.L1: +-#if defined (__NDS32_ISA_V3M__) +- push25 $r10, 0 +-#else +- smw.adm $r6, [$sp], $r9, 0 +-#endif +- +- sethi $r5, 0x7ff00 +- and $r6, P1H, $r5 ! r6=aExp +- and $r7, P2H, $r5 ! r7=bExp +- slli $r8, P1H, 12 ! r8=aSig0 +- slli $r9, P2H, 12 ! r9=bSig0 +- beq $r6, $r5, .L11 ! aExp==0x7ff +- beq $r7, $r5, .L12 ! bExp==0x7ff +-.L2: +- slli $ta, P1H, 1 ! ta=ahigh<<1 +- or $ta, P1L, $ta ! +- xor $r5, P1H, P2H ! r5=ahigh^bhigh +- beqz $ta, .L3 ! if(ahigh<<1)==0,go .L3 +- !------------------------------- +- ! (ahigh<<1)!=0 || (bhigh<<1)!=0 +- !------------------------------- +-.L4: +- beqz $r5, .L5 ! ahigh==bhigh, go .L5 +- !-------------------- +- ! a != b +- !-------------------- +-.L6: +- bltz $r5, .L7 ! if(aSign!=bSign), go .L7 +- !-------------------- +- ! aSign==bSign +- !-------------------- +- slt $ta, $r6, $r7 ! ta=(aExp<bExp) +- bne $r6, $r7, .L8 ! if(aExp!=bExp),go .L8 +- slt $ta, $r8, $r9 ! ta=(aSig0<bSig0) +- bne $r8, $r9, .L8 ! if(aSig0!=bSig0),go .L8 +- slt $ta, P1L, P2L ! ta=(aSig1<bSig1) +-.L8: +- beqz $ta, .L10 ! if(|a|>|b|), go .L10 +- nor $r0, P2H, P2H ! if(|a|<|b|),return (~yh) +-.L14: +-#if defined (__NDS32_ISA_V3M__) +- pop25 $r10, 0 +-#else +- lmw.bim $r6, [$sp], $r9, 0 +- ret +-#endif +-.L10: +- ori $r0, P2H, 1 ! return (yh|1) +- b .L14 +- !-------------------- +- ! (ahigh<<1)=0 +- !-------------------- +-.L3: +- slli $ta, P2H, 1 ! ta=bhigh<<1 +- or $ta, P2L, $ta ! +- bnez $ta, .L4 ! ta=(bhigh<<1)!=0,go .L4 +-.L5: +- xor $ta, P1L, P2L ! ta=alow^blow +- bnez $ta, .L6 ! alow!=blow,go .L6 +- movi $r0, 0 ! a==b, return 0 +- b .L14 +- !-------------------- +- ! aExp=0x7ff; +- !-------------------- +-.L11: +- or P1L, P1L, $r8 ! x1=(aSig0|aSig1) +- bnez P1L, .L13 ! if(a=nan), go.L13 +- xor $ta, $r7, $r5 ! ta=(bExp^0x7ff) +- bnez $ta, .L2 ! if(bExp!=0x7ff), go .L2 +- !-------------------- +- ! bExp=0x7ff; +- !-------------------- +-.L12: +- or $ta, P2L, $r9 ! ta=(bSig0|bSig1) +- beqz $ta, .L2 ! if(b!=nan), go .L2 +-.L13: +- move $r0, $r4 +- b .L14 +- !-------------------- +- ! aSign!=bSign +- !-------------------- +-.L7: +- ori $r0, P1H, 1 ! if(aSign!=bSign), return (ahigh|1) +- b .L14 +- +- .size __gtdf2, .-__gtdf2 +- .size __gedf2, .-__gedf2 +- .size __ltdf2, .-__ltdf2 +- .size __ledf2, .-__ledf2 +- .size __eqdf2, .-__eqdf2 +- .size __nedf2, .-__nedf2 +- .size __cmpdf2, .-__cmpdf2 +-#endif /* L_compare_df */ +- +- +- +-#ifdef L_unord_sf +- +- .text +- .align 2 +- .global __unordsf2 +- .type __unordsf2, @function +-__unordsf2: +- push $lp +- +- slli $r2, $r0, #1 +- move $r3, #0xff000000 +- slt $r15, $r3, $r2 +- beqzs8 .Li52 +- move $r0, #1 +- j .LP999 +-.Li52: +- slli $r2, $r1, #1 +- move $r3, #0xff000000 +- slt $r15, $r3, $r2 +- beqzs8 .Li53 +- move $r0, #1 +- j .LP999 +-.Li53: +- move $r0, #0 +- +-.LP999: +- pop $lp +- ret5 $lp +- .size __unordsf2, .-__unordsf2 +-#endif /* L_unord_sf */ +- +- +- +-#ifdef L_unord_df +- +-#ifndef __big_endian__ +- #define P1L $r0 +- #define P1H $r1 +- #define P2L $r2 +- #define P2H $r3 +-#else +- #define P1H $r0 +- #define P1L $r1 +- #define P2H $r2 +- #define P2L $r3 +-#endif +- .text +- .align 2 +- .global __unorddf2 +- .type __unorddf2, @function +-__unorddf2: +- push $lp +- +- slli $r4, P1H, #1 +- beqz P1L, .Li66 +- addi $r4, $r4, #1 +-.Li66: +- move $r5, #0xffe00000 +- slt $r15, $r5, $r4 +- beqzs8 .Li67 +- move $r0, #1 +- j .LR999 +-.Li67: +- slli $r4, P2H, #1 +- beqz P2L, .Li68 +- addi $r4, $r4, #1 +-.Li68: +- move $r5, #0xffe00000 +- slt $r15, $r5, $r4 +- beqzs8 .Li69 +- move $r0, #1 +- j .LR999 +-.Li69: +- move $r0, #0 +- +-.LR999: +- pop $lp +- ret5 $lp +- .size __unorddf2, .-__unorddf2 +-#endif /* L_unord_df */ +-/* ------------------------------------------- */ +-/* DPBIT floating point operations for libgcc */ +-/* ------------------------------------------- */ +diff --git a/libgcc/config/nds32/lib2csrc-mculib/_clzdi2.c b/libgcc/config/nds32/lib2csrc-mculib/_clzdi2.c +deleted file mode 100644 +index 6afd6ab..0000000 +--- a/libgcc/config/nds32/lib2csrc-mculib/_clzdi2.c ++++ /dev/null +@@ -1,38 +0,0 @@ +-/* mculib libgcc routines of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +-extern int __clzsi2 (int val); +-int +-__clzdi2 (long long val) +-{ +- if (val >> 32) +- { +- return __clzsi2 (val >> 32); +- } +- else +- { +- return __clzsi2 (val) + 32; +- } +-} +diff --git a/libgcc/config/nds32/lib2csrc-mculib/_clzsi2.c b/libgcc/config/nds32/lib2csrc-mculib/_clzsi2.c +deleted file mode 100644 +index 407caaf..0000000 +--- a/libgcc/config/nds32/lib2csrc-mculib/_clzsi2.c ++++ /dev/null +@@ -1,49 +0,0 @@ +-/* mculib libgcc routines of Andes NDS32 cpu for GNU compiler +- Copyright (C) 2012-2016 Free Software Foundation, Inc. +- Contributed by Andes Technology Corporation. +- +- This file is part of GCC. +- +- GCC is free software; you can redistribute it and/or modify it +- under the terms of the GNU General Public License as published +- by the Free Software Foundation; either version 3, or (at your +- option) any later version. +- +- GCC is distributed in the hope that it will be useful, but WITHOUT +- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +- License for more details. +- +- Under Section 7 of GPL version 3, you are granted additional +- permissions described in the GCC Runtime Library Exception, version +- 3.1, as published by the Free Software Foundation. +- +- You should have received a copy of the GNU General Public License and +- a copy of the GCC Runtime Library Exception along with this program; +- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +- <http://www.gnu.org/licenses/>. */ +- +-int +-__clzsi2 (int val) +-{ +- int i = 32; +- int j = 16; +- int temp; +- +- for (; j; j >>= 1) +- { +- if (temp = val >> j) +- { +- if (j == 1) +- { +- return (i - 2); +- } +- else +- { +- i -= j; +- val = temp; +- } +- } +- } +- return (i - val); +-} +diff --git a/libgcc/config/nds32/linux-atomic.c b/libgcc/config/nds32/linux-atomic.c +new file mode 100644 +index 0000000..69f589b +--- /dev/null ++++ b/libgcc/config/nds32/linux-atomic.c +@@ -0,0 +1,282 @@ ++/* Linux-specific atomic operations for NDS32 Linux. ++ Copyright (C) 2012-2016 Free Software Foundation, Inc. ++ ++This file is free software; you can redistribute it and/or modify it ++under the terms of the GNU General Public License as published by the ++Free Software Foundation; either version 3, or (at your option) any ++later version. ++ ++This file is distributed in the hope that it will be useful, but ++WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++General Public License for more details. ++ ++Under Section 7 of GPL version 3, you are granted additional ++permissions described in the GCC Runtime Library Exception, version ++3.1, as published by the Free Software Foundation. ++ ++You should have received a copy of the GNU General Public License and ++a copy of the GCC Runtime Library Exception along with this program; ++see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++<http://www.gnu.org/licenses/>. */ ++ ++/* We implement byte, short and int versions of each atomic operation ++ using the kernel helper defined below. There is no support for ++ 64-bit operations yet. */ ++ ++/* This function copy form NDS32 Linux-kernal. */ ++static inline int ++__kernel_cmpxchg (int oldval, int newval, int *mem) ++{ ++ int temp1, temp2, temp3, offset; ++ ++ asm volatile ("msync\tall\n" ++ "movi\t%0, #0\n" ++ "1:\n" ++ "\tllw\t%1, [%4+%0]\n" ++ "\tsub\t%3, %1, %6\n" ++ "\tcmovz\t%2, %5, %3\n" ++ "\tcmovn\t%2, %1, %3\n" ++ "\tscw\t%2, [%4+%0]\n" ++ "\tbeqz\t%2, 1b\n" ++ : "=&r" (offset), "=&r" (temp3), "=&r" (temp2), "=&r" (temp1) ++ : "r" (mem), "r" (newval), "r" (oldval) : "memory"); ++ ++ return temp1; ++} ++ ++#define HIDDEN __attribute__ ((visibility ("hidden"))) ++ ++#ifdef __NDS32_EL__ ++#define INVERT_MASK_1 0 ++#define INVERT_MASK_2 0 ++#else ++#define INVERT_MASK_1 24 ++#define INVERT_MASK_2 16 ++#endif ++ ++#define MASK_1 0xffu ++#define MASK_2 0xffffu ++ ++#define FETCH_AND_OP_WORD(OP, PFX_OP, INF_OP) \ ++ int HIDDEN \ ++ __sync_fetch_and_##OP##_4 (int *ptr, int val) \ ++ { \ ++ int failure, tmp; \ ++ \ ++ do { \ ++ tmp = __atomic_load_n (ptr, __ATOMIC_SEQ_CST); \ ++ failure = __kernel_cmpxchg (tmp, PFX_OP (tmp INF_OP val), ptr); \ ++ } while (failure != 0); \ ++ \ ++ return tmp; \ ++ } ++ ++FETCH_AND_OP_WORD (add, , +) ++FETCH_AND_OP_WORD (sub, , -) ++FETCH_AND_OP_WORD (or, , |) ++FETCH_AND_OP_WORD (and, , &) ++FETCH_AND_OP_WORD (xor, , ^) ++FETCH_AND_OP_WORD (nand, ~, &) ++ ++#define NAME_oldval(OP, WIDTH) __sync_fetch_and_##OP##_##WIDTH ++#define NAME_newval(OP, WIDTH) __sync_##OP##_and_fetch_##WIDTH ++ ++/* Implement both __sync_<op>_and_fetch and __sync_fetch_and_<op> for ++ subword-sized quantities. */ ++ ++#define SUBWORD_SYNC_OP(OP, PFX_OP, INF_OP, TYPE, WIDTH, RETURN) \ ++ TYPE HIDDEN \ ++ NAME##_##RETURN (OP, WIDTH) (TYPE *ptr, TYPE val) \ ++ { \ ++ int *wordptr = (int *) ((unsigned long) ptr & ~3); \ ++ unsigned int mask, shift, oldval, newval; \ ++ int failure; \ ++ \ ++ shift = (((unsigned long) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH; \ ++ mask = MASK_##WIDTH << shift; \ ++ \ ++ do { \ ++ oldval = __atomic_load_n (wordptr, __ATOMIC_SEQ_CST); \ ++ newval = ((PFX_OP (((oldval & mask) >> shift) \ ++ INF_OP (unsigned int) val)) << shift) & mask; \ ++ newval |= oldval & ~mask; \ ++ failure = __kernel_cmpxchg (oldval, newval, wordptr); \ ++ } while (failure != 0); \ ++ \ ++ return (RETURN & mask) >> shift; \ ++ } ++ ++ ++SUBWORD_SYNC_OP (add, , +, unsigned short, 2, oldval) ++SUBWORD_SYNC_OP (sub, , -, unsigned short, 2, oldval) ++SUBWORD_SYNC_OP (or, , |, unsigned short, 2, oldval) ++SUBWORD_SYNC_OP (and, , &, unsigned short, 2, oldval) ++SUBWORD_SYNC_OP (xor, , ^, unsigned short, 2, oldval) ++SUBWORD_SYNC_OP (nand, ~, &, unsigned short, 2, oldval) ++ ++SUBWORD_SYNC_OP (add, , +, unsigned char, 1, oldval) ++SUBWORD_SYNC_OP (sub, , -, unsigned char, 1, oldval) ++SUBWORD_SYNC_OP (or, , |, unsigned char, 1, oldval) ++SUBWORD_SYNC_OP (and, , &, unsigned char, 1, oldval) ++SUBWORD_SYNC_OP (xor, , ^, unsigned char, 1, oldval) ++SUBWORD_SYNC_OP (nand, ~, &, unsigned char, 1, oldval) ++ ++#define OP_AND_FETCH_WORD(OP, PFX_OP, INF_OP) \ ++ int HIDDEN \ ++ __sync_##OP##_and_fetch_4 (int *ptr, int val) \ ++ { \ ++ int tmp, failure; \ ++ \ ++ do { \ ++ tmp = __atomic_load_n (ptr, __ATOMIC_SEQ_CST); \ ++ failure = __kernel_cmpxchg (tmp, PFX_OP (tmp INF_OP val), ptr); \ ++ } while (failure != 0); \ ++ \ ++ return PFX_OP (tmp INF_OP val); \ ++ } ++ ++OP_AND_FETCH_WORD (add, , +) ++OP_AND_FETCH_WORD (sub, , -) ++OP_AND_FETCH_WORD (or, , |) ++OP_AND_FETCH_WORD (and, , &) ++OP_AND_FETCH_WORD (xor, , ^) ++OP_AND_FETCH_WORD (nand, ~, &) ++ ++SUBWORD_SYNC_OP (add, , +, unsigned short, 2, newval) ++SUBWORD_SYNC_OP (sub, , -, unsigned short, 2, newval) ++SUBWORD_SYNC_OP (or, , |, unsigned short, 2, newval) ++SUBWORD_SYNC_OP (and, , &, unsigned short, 2, newval) ++SUBWORD_SYNC_OP (xor, , ^, unsigned short, 2, newval) ++SUBWORD_SYNC_OP (nand, ~, &, unsigned short, 2, newval) ++ ++SUBWORD_SYNC_OP (add, , +, unsigned char, 1, newval) ++SUBWORD_SYNC_OP (sub, , -, unsigned char, 1, newval) ++SUBWORD_SYNC_OP (or, , |, unsigned char, 1, newval) ++SUBWORD_SYNC_OP (and, , &, unsigned char, 1, newval) ++SUBWORD_SYNC_OP (xor, , ^, unsigned char, 1, newval) ++SUBWORD_SYNC_OP (nand, ~, &, unsigned char, 1, newval) ++ ++int HIDDEN ++__sync_val_compare_and_swap_4 (int *ptr, int oldval, int newval) ++{ ++ int actual_oldval, fail; ++ ++ while (1) ++ { ++ actual_oldval = __atomic_load_n (ptr, __ATOMIC_SEQ_CST); ++ ++ if (oldval != actual_oldval) ++ return actual_oldval; ++ ++ fail = __kernel_cmpxchg (actual_oldval, newval, ptr); ++ ++ if (!fail) ++ return oldval; ++ } ++} ++ ++#define SUBWORD_VAL_CAS(TYPE, WIDTH) \ ++ TYPE HIDDEN \ ++ __sync_val_compare_and_swap_##WIDTH (TYPE *ptr, TYPE oldval, \ ++ TYPE newval) \ ++ { \ ++ int *wordptr = (int *)((unsigned long) ptr & ~3), fail; \ ++ unsigned int mask, shift, actual_oldval, actual_newval; \ ++ \ ++ shift = (((unsigned long) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH; \ ++ mask = MASK_##WIDTH << shift; \ ++ \ ++ while (1) \ ++ { \ ++ actual_oldval = __atomic_load_n (wordptr, __ATOMIC_SEQ_CST); \ ++ \ ++ if (((actual_oldval & mask) >> shift) != (unsigned int) oldval) \ ++ return (actual_oldval & mask) >> shift; \ ++ \ ++ actual_newval = (actual_oldval & ~mask) \ ++ | (((unsigned int) newval << shift) & mask); \ ++ \ ++ fail = __kernel_cmpxchg (actual_oldval, actual_newval, \ ++ wordptr); \ ++ \ ++ if (!fail) \ ++ return oldval; \ ++ } \ ++ } ++ ++SUBWORD_VAL_CAS (unsigned short, 2) ++SUBWORD_VAL_CAS (unsigned char, 1) ++ ++typedef unsigned char bool; ++ ++bool HIDDEN ++__sync_bool_compare_and_swap_4 (int *ptr, int oldval, int newval) ++{ ++ int failure = __kernel_cmpxchg (oldval, newval, ptr); ++ return (failure == 0); ++} ++ ++#define SUBWORD_BOOL_CAS(TYPE, WIDTH) \ ++ bool HIDDEN \ ++ __sync_bool_compare_and_swap_##WIDTH (TYPE *ptr, TYPE oldval, \ ++ TYPE newval) \ ++ { \ ++ TYPE actual_oldval \ ++ = __sync_val_compare_and_swap_##WIDTH (ptr, oldval, newval); \ ++ return (oldval == actual_oldval); \ ++ } ++ ++SUBWORD_BOOL_CAS (unsigned short, 2) ++SUBWORD_BOOL_CAS (unsigned char, 1) ++ ++int HIDDEN ++__sync_lock_test_and_set_4 (int *ptr, int val) ++{ ++ int failure, oldval; ++ ++ do { ++ oldval = __atomic_load_n (ptr, __ATOMIC_SEQ_CST); ++ failure = __kernel_cmpxchg (oldval, val, ptr); ++ } while (failure != 0); ++ ++ return oldval; ++} ++ ++#define SUBWORD_TEST_AND_SET(TYPE, WIDTH) \ ++ TYPE HIDDEN \ ++ __sync_lock_test_and_set_##WIDTH (TYPE *ptr, TYPE val) \ ++ { \ ++ int failure; \ ++ unsigned int oldval, newval, shift, mask; \ ++ int *wordptr = (int *) ((unsigned long) ptr & ~3); \ ++ \ ++ shift = (((unsigned long) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH; \ ++ mask = MASK_##WIDTH << shift; \ ++ \ ++ do { \ ++ oldval = __atomic_load_n (wordptr, __ATOMIC_SEQ_CST); \ ++ newval = (oldval & ~mask) \ ++ | (((unsigned int) val << shift) & mask); \ ++ failure = __kernel_cmpxchg (oldval, newval, wordptr); \ ++ } while (failure != 0); \ ++ \ ++ return (oldval & mask) >> shift; \ ++ } ++ ++SUBWORD_TEST_AND_SET (unsigned short, 2) ++SUBWORD_TEST_AND_SET (unsigned char, 1) ++ ++#define SYNC_LOCK_RELEASE(TYPE, WIDTH) \ ++ void HIDDEN \ ++ __sync_lock_release_##WIDTH (TYPE *ptr) \ ++ { \ ++ /* All writes before this point must be seen before we release \ ++ the lock itself. */ \ ++ __builtin_nds32_msync_all (); \ ++ *ptr = 0; \ ++ } ++ ++SYNC_LOCK_RELEASE (int, 4) ++SYNC_LOCK_RELEASE (short, 2) ++SYNC_LOCK_RELEASE (char, 1) +diff --git a/libgcc/config/nds32/linux-unwind.h b/libgcc/config/nds32/linux-unwind.h +new file mode 100644 +index 0000000..921edf9 +--- /dev/null ++++ b/libgcc/config/nds32/linux-unwind.h +@@ -0,0 +1,156 @@ ++/* DWARF2 EH unwinding support for NDS32 Linux signal frame. ++ Copyright (C) 2014-2015 Free Software Foundation, Inc. ++ Contributed by Andes Technology Corporation. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published ++ by the Free Software Foundation; either version 3, or (at your ++ option) any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ <http://www.gnu.org/licenses/>. */ ++ ++#ifndef inhibit_libc ++ ++/* Do code reading to identify a signal frame, and set the frame ++ state data appropriately. See unwind-dw2.c for the structs. ++ The corresponding bits in the Linux kernel are in ++ arch/nds32/kernel/signal.c. */ ++ ++#include <signal.h> ++#include <asm/unistd.h> ++ ++/* Exactly the same layout as the kernel structures, unique names. */ ++ ++/* arch/nds32/kernel/signal.c */ ++struct _sigframe { ++ struct ucontext uc; ++ unsigned long retcode; ++}; ++ ++struct _rt_sigframe { ++ siginfo_t info; ++ struct _sigframe sig; ++}; ++#define SIGRETURN 0xeb0e0a64 ++#define RT_SIGRETURN 0xab150a64 ++ ++#define MD_FALLBACK_FRAME_STATE_FOR nds32_fallback_frame_state ++ ++/* This function is supposed to be invoked by uw_frame_state_for() ++ when there is no unwind data available. ++ ++ Generally, given the _Unwind_Context CONTEXT for a stack frame, ++ we need to look up its caller and decode information into FS. ++ However, if the exception handling happens within a signal handler, ++ the return address of signal handler is a special module, which ++ contains signal return syscall and has no FDE in the .eh_frame section. ++ We need to implement MD_FALLBACK_FRAME_STATE_FOR so that we can ++ unwind through signal frames. */ ++static _Unwind_Reason_Code ++nds32_fallback_frame_state (struct _Unwind_Context *context, ++ _Unwind_FrameState *fs) ++{ ++ u_int32_t *pc = (u_int32_t *) context->ra; ++ struct sigcontext *sc_; ++ _Unwind_Ptr new_cfa; ++ ++#ifdef __NDS32_EB__ ++#error "Signal handler is not supported for force unwind." ++#endif ++ ++ if ((_Unwind_Ptr) pc & 3) ++ return _URC_END_OF_STACK; ++ ++ /* Check if we are going through a signal handler. ++ See arch/nds32/kernel/signal.c implementation. ++ SWI_SYS_SIGRETURN -> (0xeb0e0a64) ++ SWI_SYS_RT_SIGRETURN -> (0xab150a64) ++ FIXME: Currently we only handle little endian (EL) case. */ ++ if (pc[0] == SIGRETURN) ++ { ++ /* Using '_sigfame' memory address to locate kernal's sigcontext. ++ The sigcontext structures in arch/nds32/include/asm/sigcontext.h. */ ++ struct _sigframe *rt_; ++ rt_ = context->cfa; ++ sc_ = &rt_->uc.uc_mcontext; ++ } ++ else if (pc[0] == RT_SIGRETURN) ++ { ++ /* Using '_sigfame' memory address to locate kernal's sigcontext. */ ++ struct _rt_sigframe *rt_; ++ rt_ = context->cfa; ++ sc_ = &rt_->sig.uc.uc_mcontext; ++ } ++ else ++ return _URC_END_OF_STACK; ++ ++ /* Update cfa from sigcontext. */ ++ new_cfa = (_Unwind_Ptr) sc_; ++ fs->regs.cfa_how = CFA_REG_OFFSET; ++ fs->regs.cfa_reg = STACK_POINTER_REGNUM; ++ fs->regs.cfa_offset = new_cfa - (_Unwind_Ptr) context->cfa; ++ ++#define NDS32_PUT_FS_REG(NUM, NAME) \ ++ (fs->regs.reg[NUM].how = REG_SAVED_OFFSET, \ ++ fs->regs.reg[NUM].loc.offset = (_Unwind_Ptr) &(sc_->NAME) - new_cfa) ++ ++ /* Restore all registers value. */ ++ NDS32_PUT_FS_REG (0, nds32_r0); ++ NDS32_PUT_FS_REG (1, nds32_r1); ++ NDS32_PUT_FS_REG (2, nds32_r2); ++ NDS32_PUT_FS_REG (3, nds32_r3); ++ NDS32_PUT_FS_REG (4, nds32_r4); ++ NDS32_PUT_FS_REG (5, nds32_r5); ++ NDS32_PUT_FS_REG (6, nds32_r6); ++ NDS32_PUT_FS_REG (7, nds32_r7); ++ NDS32_PUT_FS_REG (8, nds32_r8); ++ NDS32_PUT_FS_REG (9, nds32_r9); ++ NDS32_PUT_FS_REG (10, nds32_r10); ++ NDS32_PUT_FS_REG (11, nds32_r11); ++ NDS32_PUT_FS_REG (12, nds32_r12); ++ NDS32_PUT_FS_REG (13, nds32_r13); ++ NDS32_PUT_FS_REG (14, nds32_r14); ++ NDS32_PUT_FS_REG (15, nds32_r15); ++ NDS32_PUT_FS_REG (16, nds32_r16); ++ NDS32_PUT_FS_REG (17, nds32_r17); ++ NDS32_PUT_FS_REG (18, nds32_r18); ++ NDS32_PUT_FS_REG (19, nds32_r19); ++ NDS32_PUT_FS_REG (20, nds32_r20); ++ NDS32_PUT_FS_REG (21, nds32_r21); ++ NDS32_PUT_FS_REG (22, nds32_r22); ++ NDS32_PUT_FS_REG (23, nds32_r23); ++ NDS32_PUT_FS_REG (24, nds32_r24); ++ NDS32_PUT_FS_REG (25, nds32_r25); ++ ++ NDS32_PUT_FS_REG (28, nds32_fp); ++ NDS32_PUT_FS_REG (29, nds32_gp); ++ NDS32_PUT_FS_REG (30, nds32_lp); ++ NDS32_PUT_FS_REG (31, nds32_sp); ++ ++ /* Restore PC, point to trigger signal instruction. */ ++ NDS32_PUT_FS_REG (32, nds32_ipc); ++ ++#undef NDS32_PUT_FS_REG ++ ++ /* The retaddr is PC, use PC to find FDE. */ ++ fs->retaddr_column = 32; ++ fs->signal_frame = 1; ++ ++ return _URC_NO_REASON; ++} ++ ++#endif +diff --git a/libgcc/config/nds32/sfp-machine.h b/libgcc/config/nds32/sfp-machine.h +index d822898..930a32e 100644 +--- a/libgcc/config/nds32/sfp-machine.h ++++ b/libgcc/config/nds32/sfp-machine.h +@@ -76,6 +76,25 @@ typedef int __gcc_CMPtype __attribute__ ((mode (__libgcc_cmp_return__))); + R##_c = FP_CLS_NAN; \ + } while (0) + ++#ifdef NDS32_ABI_2FP_PLUS ++#define FP_RND_NEAREST 0x0 ++#define FP_RND_PINF 0x1 ++#define FP_RND_MINF 0x2 ++#define FP_RND_ZERO 0x3 ++#define FP_RND_MASK 0x3 ++ ++#define _FP_DECL_EX \ ++ unsigned long int _fcsr __attribute__ ((unused)) = FP_RND_NEAREST ++ ++#define FP_INIT_ROUNDMODE \ ++ do { \ ++ _fcsr = __builtin_nds32_fmfcsr (); \ ++ } while (0) ++ ++#define FP_ROUNDMODE (_fcsr & FP_RND_MASK) ++ ++#endif ++ + /* Not checked. */ + #define _FP_TININESS_AFTER_ROUNDING 0 + +diff --git a/libgcc/config/nds32/t-nds32 b/libgcc/config/nds32/t-nds32 +index 20c8a3f..4e58b1b 100644 +--- a/libgcc/config/nds32/t-nds32 ++++ b/libgcc/config/nds32/t-nds32 +@@ -26,33 +26,22 @@ + # Make sure the linker script include these two objects + # for building .ctors/.dtors sections. + +-# Use -DCRT_BEGIN to create beginning parts of .init and .fini content +-# Make sure you are building crtbegin1.o with -O0 optimization, +-# otherwise the static function will be optimized out ++# Use -DCRT_BEGIN to create beginning parts of .init and .fini content. + crtbegin1.o: $(srcdir)/config/nds32/initfini.c $(GCC_PASSES) $(CONFIG_H) + $(GCC_FOR_TARGET) $(INCLUDES) \ + $(CFLAGS) \ + -DCRT_BEGIN \ + -finhibit-size-directive -fno-inline-functions \ +- -O0 -c $(srcdir)/config/nds32/initfini.c -o crtbegin1.o ++ -fno-toplevel-reorder \ ++ -Os -c $(srcdir)/config/nds32/initfini.c -o crtbegin1.o + +-# Use -DCRT_END to create ending parts of .init and .fini content +-# Make sure you are building crtend1.o with -O0 optimization, +-# otherwise the static function will be optimized out ++# Use -DCRT_END to create ending parts of .init and .fini content. + crtend1.o: $(srcdir)/config/nds32/initfini.c $(GCC_PASSES) $(CONFIG_H) + $(GCC_FOR_TARGET) $(INCLUDES) \ + $(CFLAGS) \ + -DCRT_END \ + -finhibit-size-directive -fno-inline-functions \ +- -O0 -c $(srcdir)/config/nds32/initfini.c -o crtend1.o +- +-# Use this rule if and only if your crt0.o does not come from library +-# Also, be sure to add 'crtzero.o' in extra_parts in libgcc/config.host +-# and change STARTFILE_SPEC in nds32.h +-# +-#crtzero.o: $(srcdir)/config/nds32/crtzero.S $(GCC_PASSES) $(CONFIG_H) +-# $(GCC_FOR_TARGET) $(INCLUDES) \ +-# -c $(srcdir)/config/nds32/crtzero.S -o crtzero.o +- ++ -fno-toplevel-reorder \ ++ -Os -c $(srcdir)/config/nds32/initfini.c -o crtend1.o + + # ------------------------------------------------------------------------ +diff --git a/libgcc/config/nds32/t-nds32-mculib b/libgcc/config/nds32/t-nds32-glibc +similarity index 50% +rename from libgcc/config/nds32/t-nds32-mculib +rename to libgcc/config/nds32/t-nds32-glibc +index b4f7b4c..385644b 100644 +--- a/libgcc/config/nds32/t-nds32-mculib ++++ b/libgcc/config/nds32/t-nds32-glibc +@@ -1,4 +1,4 @@ +-# Rules of mculib library makefile of Andes NDS32 cpu for GNU compiler ++# Rules of glibc library makefile of Andes NDS32 cpu for GNU compiler + # Copyright (C) 2012-2016 Free Software Foundation, Inc. + # Contributed by Andes Technology Corporation. + # +@@ -19,59 +19,16 @@ + # <http://www.gnu.org/licenses/>. + + # Compiler flags to use when compiling 'libgcc2.c' +-HOST_LIBGCC2_CFLAGS = -Os ++HOST_LIBGCC2_CFLAGS = -O2 -fPIC -fwrapv ++LIB2ADD += $(srcdir)/config/nds32/linux-atomic.c + +- +-LIB1ASMSRC = nds32/lib1asmsrc-mculib.S +- +-LIB1ASMFUNCS = \ +- _addsub_sf \ +- _sf_to_si \ +- _divsi3 \ +- _divdi3 \ +- _modsi3 \ +- _moddi3 \ +- _mulsi3 \ +- _udivsi3 \ +- _udivdi3 \ +- _udivmoddi4 \ +- _umodsi3 \ +- _umoddi3 \ +- _muldi3 \ +- _addsub_df \ +- _mul_sf \ +- _mul_df \ +- _div_sf \ +- _div_df \ +- _negate_sf \ +- _negate_df \ +- _sf_to_df \ +- _df_to_sf \ +- _df_to_si \ +- _fixsfdi \ +- _fixdfdi \ +- _fixunssfsi \ +- _fixunsdfsi \ +- _fixunssfdi \ +- _fixunsdfdi \ +- _si_to_sf \ +- _si_to_df \ +- _floatdisf \ +- _floatdidf \ +- _floatunsisf \ +- _floatunsidf \ +- _floatundisf \ +- _floatundidf \ +- _compare_sf \ +- _compare_df \ +- _unord_sf \ +- _unord_df ++#LIB1ASMSRC = nds32/lib1asmsrc-newlib.S ++#LIB1ASMFUNCS = _divsi3 _modsi3 _udivsi3 _umodsi3 + + # List of functions not to build from libgcc2.c. +-LIB2FUNCS_EXCLUDE = _clzsi2 _clzdi2 ++#LIB2FUNCS_EXCLUDE = _clzsi2 + + # List of extra C and assembler files(*.S) to add to static libgcc2. +-LIB2ADD_ST += $(srcdir)/config/nds32/lib2csrc-mculib/_clzsi2.c +-LIB2ADD_ST += $(srcdir)/config/nds32/lib2csrc-mculib/_clzdi2.c ++#LIB2ADD_ST += $(srcdir)/config/nds32/lib2csrc-newlib/_clzsi2.c + + # ------------------------------------------------------------------------ +diff --git a/libgcc/config/nds32/t-nds32-isr b/libgcc/config/nds32/t-nds32-isr +index 62b6867..6493838 100644 +--- a/libgcc/config/nds32/t-nds32-isr ++++ b/libgcc/config/nds32/t-nds32-isr +@@ -23,11 +23,15 @@ + # Makfile fragment rules for libnds32_isr.a to support ISR attribute extension + ############################################################################### + +-# basic flags setting +-ISR_CFLAGS = $(CFLAGS) -c +- +-# the object files we would like to create +-LIBNDS32_ISR_16B_OBJS = \ ++# Basic flags setting. ++ifneq ($(filter -mext-dsp,$(CFLAGS)),) ++ISR_CFLAGS = $(CFLAGS) -mno-force-no-ext-zol -mext-zol -c ++else ++ISR_CFLAGS = $(CFLAGS) -mno-force-no-ext-zol -c ++endif ++ ++# The object files we would like to create. ++LIBNDS32_ISR_VEC_OBJS = \ + vec_vid00.o vec_vid01.o vec_vid02.o vec_vid03.o \ + vec_vid04.o vec_vid05.o vec_vid06.o vec_vid07.o \ + vec_vid08.o vec_vid09.o vec_vid10.o vec_vid11.o \ +@@ -46,40 +50,9 @@ LIBNDS32_ISR_16B_OBJS = \ + vec_vid60.o vec_vid61.o vec_vid62.o vec_vid63.o \ + vec_vid64.o vec_vid65.o vec_vid66.o vec_vid67.o \ + vec_vid68.o vec_vid69.o vec_vid70.o vec_vid71.o \ +- vec_vid72.o \ +- excp_isr_ps_nn.o excp_isr_ps_ns.o excp_isr_ps_nr.o \ +- excp_isr_sa_nn.o excp_isr_sa_ns.o excp_isr_sa_nr.o \ +- intr_isr_ps_nn.o intr_isr_ps_ns.o intr_isr_ps_nr.o \ +- intr_isr_sa_nn.o intr_isr_sa_ns.o intr_isr_sa_nr.o \ +- reset.o +- +-LIBNDS32_ISR_4B_OBJS = \ +- vec_vid00_4b.o vec_vid01_4b.o vec_vid02_4b.o vec_vid03_4b.o \ +- vec_vid04_4b.o vec_vid05_4b.o vec_vid06_4b.o vec_vid07_4b.o \ +- vec_vid08_4b.o vec_vid09_4b.o vec_vid10_4b.o vec_vid11_4b.o \ +- vec_vid12_4b.o vec_vid13_4b.o vec_vid14_4b.o vec_vid15_4b.o \ +- vec_vid16_4b.o vec_vid17_4b.o vec_vid18_4b.o vec_vid19_4b.o \ +- vec_vid20_4b.o vec_vid21_4b.o vec_vid22_4b.o vec_vid23_4b.o \ +- vec_vid24_4b.o vec_vid25_4b.o vec_vid26_4b.o vec_vid27_4b.o \ +- vec_vid28_4b.o vec_vid29_4b.o vec_vid30_4b.o vec_vid31_4b.o \ +- vec_vid32_4b.o vec_vid33_4b.o vec_vid34_4b.o vec_vid35_4b.o \ +- vec_vid36_4b.o vec_vid37_4b.o vec_vid38_4b.o vec_vid39_4b.o \ +- vec_vid40_4b.o vec_vid41_4b.o vec_vid42_4b.o vec_vid43_4b.o \ +- vec_vid44_4b.o vec_vid45_4b.o vec_vid46_4b.o vec_vid47_4b.o \ +- vec_vid48_4b.o vec_vid49_4b.o vec_vid50_4b.o vec_vid51_4b.o \ +- vec_vid52_4b.o vec_vid53_4b.o vec_vid54_4b.o vec_vid55_4b.o \ +- vec_vid56_4b.o vec_vid57_4b.o vec_vid58_4b.o vec_vid59_4b.o \ +- vec_vid60_4b.o vec_vid61_4b.o vec_vid62_4b.o vec_vid63_4b.o \ +- vec_vid64_4b.o vec_vid65_4b.o vec_vid66_4b.o vec_vid67_4b.o \ +- vec_vid68_4b.o vec_vid69_4b.o vec_vid70_4b.o vec_vid71_4b.o \ +- vec_vid72_4b.o \ +- excp_isr_ps_nn_4b.o excp_isr_ps_ns_4b.o excp_isr_ps_nr_4b.o \ +- excp_isr_sa_nn_4b.o excp_isr_sa_ns_4b.o excp_isr_sa_nr_4b.o \ +- intr_isr_ps_nn_4b.o intr_isr_ps_ns_4b.o intr_isr_ps_nr_4b.o \ +- intr_isr_sa_nn_4b.o intr_isr_sa_ns_4b.o intr_isr_sa_nr_4b.o \ +- reset_4b.o ++ vec_vid72.o + +-LIBNDS32_ISR_COMMON_OBJS = \ ++LIBNDS32_ISR_JMP_OBJS = \ + jmptbl_vid00.o jmptbl_vid01.o jmptbl_vid02.o jmptbl_vid03.o \ + jmptbl_vid04.o jmptbl_vid05.o jmptbl_vid06.o jmptbl_vid07.o \ + jmptbl_vid08.o jmptbl_vid09.o jmptbl_vid10.o jmptbl_vid11.o \ +@@ -98,29 +71,32 @@ LIBNDS32_ISR_COMMON_OBJS = \ + jmptbl_vid60.o jmptbl_vid61.o jmptbl_vid62.o jmptbl_vid63.o \ + jmptbl_vid64.o jmptbl_vid65.o jmptbl_vid66.o jmptbl_vid67.o \ + jmptbl_vid68.o jmptbl_vid69.o jmptbl_vid70.o jmptbl_vid71.o \ +- jmptbl_vid72.o \ ++ jmptbl_vid72.o ++ ++LIBNDS32_ISR_COMMON_OBJS = \ ++ excp_isr_ps_nn.o excp_isr_ps_ns.o excp_isr_ps_nr.o \ ++ excp_isr_sa_nn.o excp_isr_sa_ns.o excp_isr_sa_nr.o \ ++ intr_isr_ps_nn.o intr_isr_ps_ns.o intr_isr_ps_nr.o \ ++ intr_isr_sa_nn.o intr_isr_sa_ns.o intr_isr_sa_nr.o \ ++ reset.o \ + nmih.o \ + wrh.o + +-LIBNDS32_ISR_COMPLETE_OBJS = $(LIBNDS32_ISR_16B_OBJS) $(LIBNDS32_ISR_4B_OBJS) $(LIBNDS32_ISR_COMMON_OBJS) +- ++LIBNDS32_ISR_COMPLETE_OBJS = $(LIBNDS32_ISR_VEC_OBJS) $(LIBNDS32_ISR_JMP_OBJS) $(LIBNDS32_ISR_COMMON_OBJS) + +-# Build common objects for ISR library +-nmih.o: $(srcdir)/config/nds32/isr-library/nmih.S +- $(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/nmih.S -o nmih.o + +-wrh.o: $(srcdir)/config/nds32/isr-library/wrh.S +- $(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/wrh.S -o wrh.o + +-jmptbl_vid%.o: $(srcdir)/config/nds32/isr-library/jmptbl_vid%.S ++# Build vector vid objects for ISR library. ++vec_vid%.o: $(srcdir)/config/nds32/isr-library/vec_vid%.S + $(GCC_FOR_TARGET) $(ISR_CFLAGS) $< -o $@ + + +- +-# Build 16b version objects for ISR library. (no "_4b" postfix string) +-vec_vid%.o: $(srcdir)/config/nds32/isr-library/vec_vid%.S ++# Build jump table objects for ISR library. ++jmptbl_vid%.o: $(srcdir)/config/nds32/isr-library/jmptbl_vid%.S + $(GCC_FOR_TARGET) $(ISR_CFLAGS) $< -o $@ + ++ ++# Build commen objects for ISR library. + excp_isr_ps_nn.o: $(srcdir)/config/nds32/isr-library/excp_isr.S + $(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/excp_isr.S -o excp_isr_ps_nn.o + +@@ -160,48 +136,12 @@ intr_isr_sa_nr.o: $(srcdir)/config/nds32/isr-library/intr_isr.S + reset.o: $(srcdir)/config/nds32/isr-library/reset.S + $(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/reset.S -o reset.o + +-# Build 4b version objects for ISR library. +-vec_vid%_4b.o: $(srcdir)/config/nds32/isr-library/vec_vid%_4b.S +- $(GCC_FOR_TARGET) $(ISR_CFLAGS) $< -o $@ +- +-excp_isr_ps_nn_4b.o: $(srcdir)/config/nds32/isr-library/excp_isr_4b.S +- $(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/excp_isr_4b.S -o excp_isr_ps_nn_4b.o +- +-excp_isr_ps_ns_4b.o: $(srcdir)/config/nds32/isr-library/excp_isr_4b.S +- $(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_NESTED $(srcdir)/config/nds32/isr-library/excp_isr_4b.S -o excp_isr_ps_ns_4b.o +- +-excp_isr_ps_nr_4b.o: $(srcdir)/config/nds32/isr-library/excp_isr_4b.S +- $(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_NESTED_READY $(srcdir)/config/nds32/isr-library/excp_isr_4b.S -o excp_isr_ps_nr_4b.o +- +-excp_isr_sa_nn_4b.o: $(srcdir)/config/nds32/isr-library/excp_isr_4b.S +- $(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_SAVE_ALL_REGS $(srcdir)/config/nds32/isr-library/excp_isr_4b.S -o excp_isr_sa_nn_4b.o +- +-excp_isr_sa_ns_4b.o: $(srcdir)/config/nds32/isr-library/excp_isr_4b.S +- $(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_SAVE_ALL_REGS -DNDS32_NESTED $(srcdir)/config/nds32/isr-library/excp_isr_4b.S -o excp_isr_sa_ns_4b.o +- +-excp_isr_sa_nr_4b.o: $(srcdir)/config/nds32/isr-library/excp_isr_4b.S +- $(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_SAVE_ALL_REGS -DNDS32_NESTED_READY $(srcdir)/config/nds32/isr-library/excp_isr_4b.S -o excp_isr_sa_nr_4b.o +- +-intr_isr_ps_nn_4b.o: $(srcdir)/config/nds32/isr-library/intr_isr_4b.S +- $(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/intr_isr_4b.S -o intr_isr_ps_nn_4b.o +- +-intr_isr_ps_ns_4b.o: $(srcdir)/config/nds32/isr-library/intr_isr_4b.S +- $(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_NESTED $(srcdir)/config/nds32/isr-library/intr_isr_4b.S -o intr_isr_ps_ns_4b.o +- +-intr_isr_ps_nr_4b.o: $(srcdir)/config/nds32/isr-library/intr_isr_4b.S +- $(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_NESTED_READY $(srcdir)/config/nds32/isr-library/intr_isr_4b.S -o intr_isr_ps_nr_4b.o +- +-intr_isr_sa_nn_4b.o: $(srcdir)/config/nds32/isr-library/intr_isr_4b.S +- $(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_SAVE_ALL_REGS $(srcdir)/config/nds32/isr-library/intr_isr_4b.S -o intr_isr_sa_nn_4b.o +- +-intr_isr_sa_ns_4b.o: $(srcdir)/config/nds32/isr-library/intr_isr_4b.S +- $(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_SAVE_ALL_REGS -DNDS32_NESTED $(srcdir)/config/nds32/isr-library/intr_isr_4b.S -o intr_isr_sa_ns_4b.o ++nmih.o: $(srcdir)/config/nds32/isr-library/nmih.S ++ $(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/nmih.S -o nmih.o + +-intr_isr_sa_nr_4b.o: $(srcdir)/config/nds32/isr-library/intr_isr_4b.S +- $(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_SAVE_ALL_REGS -DNDS32_NESTED_READY $(srcdir)/config/nds32/isr-library/intr_isr_4b.S -o intr_isr_sa_nr_4b.o ++wrh.o: $(srcdir)/config/nds32/isr-library/wrh.S ++ $(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/wrh.S -o wrh.o + +-reset_4b.o: $(srcdir)/config/nds32/isr-library/reset_4b.S +- $(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/reset_4b.S -o reset_4b.o + + + # The rule to create libnds32_isr.a file +diff --git a/libgcc/config/nds32/t-nds32-newlib b/libgcc/config/nds32/t-nds32-newlib +index e4af03e..c356b60 100644 +--- a/libgcc/config/nds32/t-nds32-newlib ++++ b/libgcc/config/nds32/t-nds32-newlib +@@ -19,7 +19,7 @@ + # <http://www.gnu.org/licenses/>. + + # Compiler flags to use when compiling 'libgcc2.c' +-HOST_LIBGCC2_CFLAGS = -O2 ++HOST_LIBGCC2_CFLAGS = -O2 -fwrapv + + + #LIB1ASMSRC = nds32/lib1asmsrc-newlib.S diff --git a/util/crossgcc/patches/gcc-6.3.0_riscv.patch b/util/crossgcc/patches/gcc-6.3.0_riscv.patch index ca9555de0b..a60511362a 100644 --- a/util/crossgcc/patches/gcc-6.3.0_riscv.patch +++ b/util/crossgcc/patches/gcc-6.3.0_riscv.patch @@ -9030,9 +9030,9 @@ index c9e43fb80e3..5359a4e6ee5 100755 # version to the per-target configury. case "$cpu_type" in aarch64 | alpha | arm | avr | bfin | cris | i386 | m32c | m68k | microblaze \ -- | mips | nios2 | pa | rs6000 | score | sparc | spu | tilegx | tilepro \ +- | mips | nds32 | nios2 | pa | rs6000 | score | sparc | spu | tilegx | tilepro \ - | visium | xstormy16 | xtensa) -+ | mips | nios2 | pa | riscv | rs6000 | score | sparc | spu | tilegx \ ++ | mips | nds32 | nios2 | pa | riscv | rs6000 | score | sparc | spu | tilegx \ + | tilepro | visium | xstormy16 | xtensa) insn="nop" ;; @@ -9063,9 +9063,9 @@ index 33f9a0ecdc6..673fb1bb891 100644 # version to the per-target configury. case "$cpu_type" in aarch64 | alpha | arm | avr | bfin | cris | i386 | m32c | m68k | microblaze \ -- | mips | nios2 | pa | rs6000 | score | sparc | spu | tilegx | tilepro \ +- | mips | nds32 | nios2 | pa | rs6000 | score | sparc | spu | tilegx | tilepro \ - | visium | xstormy16 | xtensa) -+ | mips | nios2 | pa | riscv | rs6000 | score | sparc | spu | tilegx \ ++ | mips | nds32 | nios2 | pa | riscv | rs6000 | score | sparc | spu | tilegx \ + | tilepro | visium | xstormy16 | xtensa) insn="nop" ;; |